Skip to main content

Filter

Select a subset of your data based on useful filters


You can filter and customize your search results to find exactly what you want. Filtering helps you reduce the amount of data returned in search results by removing irrelevant content, or by allowing you to select a specific subset of your data.

In annotation search, Filter is a list of Annotation objects.

info

Before using the Python SDK, Node.js SDK, or any of our gRPC clients, ensure they are properly installed on your machine. Refer to their respective installation guides for instructions on how to install and initialize them.

tip

You can learn how to paginate your API requests results here.

By Input Type

You can specify that search results should only include inputs of a particular data type, such as images, videos, or text.

from clarifai.client.user import User
from PIL import Image
import requests
from IPython.display import display

# Fill in your user ID, app ID, and personal access token (PAT)
USER_ID = ''
APP_ID = ''
PAT = ''

# Initialize a User object with your credentials
client = User(user_id=USER_ID, pat=PAT)

# Create an application with the specified app ID and base workflow
app = client.create_app(app_id=APP_ID, base_workflow="Universal", pat=PAT)

# URLs of images to be uploaded
urls = [
"https://images.pexels.com/photos/139257/pexels-photo-139257.jpeg",
"https://images.pexels.com/photos/1879386/pexels-photo-1879386.jpeg",
"https://images.pexels.com/photos/1071882/pexels-photo-1071882.jpeg"
]

# Initialize an Inputs object for uploading images and a Search object for searching
input_obj = app.inputs()
search = app.search(top_k=2)

# Upload images from the provided URLs
for i, url in enumerate(urls):
input_obj.upload_from_url(input_id=f"input{i}", image_url=url)

# Perform a search query to find images (filters for images only)
res = search.query(filters=[{'input_types': ['image']}])

# Retrieve the URL of the first image from the search results
for r in res:
hit = r.hits[0].input.data.image.url
break

# Print the URL of the found image
print(hit)

# Display the found image
hit_img = Image.open(requests.get(hit, stream=True).raw).resize((300, 250))
display(hit_img)

By Custom Concepts

After you annotate inputs with custom concepts, you can filter by concepts.

train a model

When performing a search with custom concepts, ensure that these concepts are first trained using an embedding-classifier model (transfer-learning model). Without this training, the search query will result in an error.

Training a model generates embeddings for each custom concept. These concept embeddings are then utilized in the search process.

################################################################################
# In this section, we set the user authentication, app ID, and the concept we
# we want to filter by. Change these strings to run your own example.
################################################################################

USER_ID = 'YOUR_USER_ID_HERE'
# Your PAT (Personal Access Token) can be found in the Account's Security section
PAT = 'YOUR_PAT_HERE'
APP_ID = 'YOUR_APP_ID_HERE'
# Change this to filter by your own concept
CONCEPT_ID = 'people'

##########################################################################
# YOU DO NOT NEED TO CHANGE ANYTHING BELOW THIS LINE TO RUN THIS EXAMPLE
##########################################################################

from clarifai_grpc.channel.clarifai_channel import ClarifaiChannel
from clarifai_grpc.grpc.api import resources_pb2, service_pb2, service_pb2_grpc
from clarifai_grpc.grpc.api.status import status_code_pb2

channel = ClarifaiChannel.get_grpc_channel()
stub = service_pb2_grpc.V2Stub(channel)

metadata = (('authorization', 'Key ' + PAT),)

userDataObject = resources_pb2.UserAppIDSet(user_id=USER_ID, app_id=APP_ID) # The userDataObject is required when using a PAT

post_annotations_searches_response = stub.PostAnnotationsSearches(
service_pb2.PostAnnotationsSearchesRequest(
user_app_id=userDataObject,
searches = [
resources_pb2.Search(
query=resources_pb2.Query(
filters=[
resources_pb2.Filter(
annotation=resources_pb2.Annotation(
data=resources_pb2.Data(
concepts=[ # You can search by multiple concepts
resources_pb2.Concept(
id=CONCEPT_ID, # You could search by concept Name as well
value=1 # Value of 0 will search for images that don't have the concept
)
]
)
)
)
]
)
)
],
pagination=service_pb2.Pagination(per_page=2, page=1)
),
metadata=metadata
)

if post_annotations_searches_response.status.code != status_code_pb2.SUCCESS:
raise Exception("Post searches failed, status: " + post_annotations_searches_response.status.description)

print("Search result:")
for hit in post_annotations_searches_response.hits:
print("\tScore %.2f for annotation: %s off input: %s" % (hit.score, hit.annotation.id, hit.input.id))

By User ID

If you have collaborators in your app and they helped you annotate your inputs, you can also filter annotations by their user ID.

#############################################################################
# In this section, we set the user authentication, app ID, and the ID of a
# collaborator. Change these strings to run your own example.
#############################################################################

USER_ID = 'YOUR_USER_ID_HERE'
# Your PAT (Personal Access Token) can be found in the Account's Security section
PAT = 'YOUR_PAT_HERE'
APP_ID = 'YOUR_APP_ID_HERE'
# Change this to filter by user ID
USER_ID_2 = 'COLLABORATOR_ID_HERE'

##########################################################################
# YOU DO NOT NEED TO CHANGE ANYTHING BELOW THIS LINE TO RUN THIS EXAMPLE
##########################################################################

from clarifai_grpc.channel.clarifai_channel import ClarifaiChannel
from clarifai_grpc.grpc.api import resources_pb2, service_pb2, service_pb2_grpc
from clarifai_grpc.grpc.api.status import status_code_pb2

channel = ClarifaiChannel.get_grpc_channel()
stub = service_pb2_grpc.V2Stub(channel)

metadata = (('authorization', 'Key ' + PAT),)

userDataObject = resources_pb2.UserAppIDSet(user_id=USER_ID, app_id=APP_ID) # The userDataObject is required when using a PAT

post_annotations_searches_response = stub.PostAnnotationsSearches(
service_pb2.PostAnnotationsSearchesRequest(
user_app_id=userDataObject,
searches = [
resources_pb2.Search(
query=resources_pb2.Query(
filters=[
resources_pb2.Filter(
annotation=resources_pb2.Annotation(
user_id=USER_ID_2
)
)
]
)
)
],
pagination=service_pb2.Pagination(per_page=2, page=1)
),
metadata=metadata
)

if post_annotations_searches_response.status.code != status_code_pb2.SUCCESS:
raise Exception("Post searches failed, status: " + post_annotations_searches_response.status.description)

print("Search result:")
for hit in post_annotations_searches_response.hits:
print("\tScore %.2f for annotation: %s off input: %s" % (hit.score, hit.annotation.id, hit.input.id))

By Dataset ID

You can include a dataset ID in the filter to narrow the search results to only inputs or data entries within that specific dataset.

# Import necessary modules
from clarifai.client.user import User
from PIL import Image
import requests
from IPython.display import display
import pandas as pd

# Specify user credentials
USER_ID=''
APP_ID=''
PAT=''

# Initialize Clarifai client
client = User(user_id=USER_ID)

# Create an application with specified parameters
app = client.create_app(app_id=APP_ID, base_workflow="Universal", pat=PAT)

# Create a dataset within the application
dataset = app.create_dataset(dataset_id="demo_dataset")

# Define a list of image URLs
urls = [
"https://images.pexels.com/photos/139257/pexels-photo-139257.jpeg",
"https://images.pexels.com/photos/1879386/pexels-photo-1879386.jpeg",
"https://images.pexels.com/photos/1071882/pexels-photo-1071882.jpeg"
]

# Convert the list of URLs to a DataFrame and save it as a CSV file
df = pd.DataFrame(urls, columns=['input'])
df.to_csv("images.csv", index=False)

# Upload images to the dataset from the CSV file
dataset.upload_from_csv(csv_path='images.csv', input_type='image', csv_type='url', labels=False)

# Initialize a search instance for the application
search = app.search(top_k=2)

# Query the search with filters based on the dataset
response = search.query(filters=[{"input_dataset_ids": [dataset.id]}])

# Retrieve and display the first image hit from the search response
resp = list(response)
for r in resp:
hit = r.hits[0].input.data.image.url
break
print(hit)
hit_img = Image.open(requests.get(hit, stream=True).raw).resize((300,250))
display(hit_img)

By Status Code

You can refine search results based on the status of the input data, such as whether it has been successfully processed, is pending processing, or has encountered errors.

You can also filter the annotations by their status.

from google.protobuf.struct_pb2 import Struct
from clarifai.client.user import User
from PIL import Image
import requests
from IPython.display import display

# Replace with your Clarifai user ID, app ID, and personal access token (PAT)
USER_ID = ''
APP_ID = ''
PAT = ''

# Initialize Clarifai user with specified user ID
client = User(user_id=USER_ID)

# Create a Clarifai application with the provided app ID, using the 'Universal' base workflow
app = client.create_app(app_id=APP_ID, base_workflow="Universal", pat=PAT)

# Initialize a search object for the application with a maximum of 2 results
search = app.search(top_k=2)

# URL of the image to be uploaded for search
url = "https://samples.clarifai.com/XiJinping.jpg"

# Upload the image from the specified URL with an input ID of 'geo'
input_obj.upload_from_url(input_id="geo", image_url=url)

# Query the application's search with a filter to retrieve inputs with a status code of 30000
response = search.query(filters=[{'input_status_code': 30000}])

# Convert the response to a list and retrieve the URL of the first hit input
resp = list(response)
for r in resp:
hit = r.hits[0].input.data.image.url
break

# Print the URL of the hit input
print(hit)

# Open and display the image corresponding to the hit input URL
hit_img = Image.open(requests.get(hit, stream=True).raw).resize((300,250))
display(hit_img)

By Geo Location

Search by geo location allows you to restrict your search results to a bounding box based on longitude and latitude points. There are two ways you can provide longitude/latitude points. You can provide one point and a radius or you can provide two points.

It is important to note that a search by geo location acts as a filter and returns results ranked by any other provided search criteria, whether that is a visual search, concept search, or something else. If no other criterion is provided, results will return in the order the inputs were created, NOT by their distance to the center of the search area.

If you are providing one point and a radius, the radius can be in "mile", "kilometer", "degree", or "radian", marked by keywords withinMiles, withinKilometers, withinDegrees, or withinRadians respectively.

If you are providing two points, a box will be drawn from the uppermost point to the lowermost point, and the leftmost point to the rightmost point.

Before you perform a search by geo location, make sure you have added inputs with longitude and latitude points.

Add Inputs With Longitude and Latitude Points

Provide a geo point to an input. The geo point is a JSON object consisting of a longitude and a latitude in GPS coordinate system (SRID 4326). There can be at most one single geo point associated with each input.

from clarifai.client.user import User 
from PIL import Image
import requests
from IPython.display import display

USER_ID='' # Specify your Clarifai user ID
APP_ID='' # Specify your Clarifai application ID
PAT='' # Specify your Clarifai personal access token

# Create a User instance with the specified user ID and personal access token
client = User(user_id=USER_ID, pat=PAT)

# Create an application instance using the specified application ID and base workflow ('Universal')
app = client.create_app(app_id=APP_ID, base_workflow="Universal")

# Initialize a search object associated with the created application, specifying the maximum number of results to retrieve
search = app.search(top_k=2)

# Define the URL of the image to be uploaded for geolocation-based filtering
url = "https://samples.clarifai.com/XiJinping.jpg"

# Upload the image from the specified URL with associated geolocation information (longitude and latitude)
input_obj.upload_from_url(input_id="geo", image_url=url, geo_info=[-30.0, 40.0])

# Execute a search query with a filter based on geolocation information (longitude, latitude, and radius)
response = search.query(filters=[{"geo_point": {'longitude':40.0, 'latitude':-30.0, 'geo_limit': 100}}])

# Convert the response to a list and retrieve the URL of the first image hit
resp = list(response)
for r in resp:
hit = r.hits[0].input.data.image.url
break

# Print the URL of the hit image
print(hit)

# Open the hit image using requests, resize it, and display it
hit_img = Image.open(requests.get(hit, stream=True).raw).resize((300,250))
display(hit_img)

Perform a Search With One Geo Point and Radius in Kilometers

#####################################################################################
# In this section, we set the user authentication, app ID, and details of the point
# we want to perform a search with. Change these strings to run your own example.
#####################################################################################

USER_ID = 'YOUR_USER_ID_HERE'
# Your PAT (Personal Access Token) can be found in the Account's Security section
PAT = 'YOUR_PAT_HERE'
APP_ID = 'YOUR_APP_ID_HERE'
# Change these to perform your own search
LONGITUDE = -29.0
LATITUDE = 40.0

##########################################################################
# YOU DO NOT NEED TO CHANGE ANYTHING BELOW THIS LINE TO RUN THIS EXAMPLE
##########################################################################

from clarifai_grpc.channel.clarifai_channel import ClarifaiChannel
from clarifai_grpc.grpc.api import resources_pb2, service_pb2, service_pb2_grpc
from clarifai_grpc.grpc.api.status import status_code_pb2

channel = ClarifaiChannel.get_grpc_channel()
stub = service_pb2_grpc.V2Stub(channel)

metadata = (('authorization', 'Key ' + PAT),)

userDataObject = resources_pb2.UserAppIDSet(user_id=USER_ID, app_id=APP_ID) # The userDataObject is required when using a PAT

post_annotations_searches_response = stub.PostAnnotationsSearches(
service_pb2.PostAnnotationsSearchesRequest(
user_app_id=userDataObject,
searches = [
resources_pb2.Search(
query=resources_pb2.Query(
filters=[
resources_pb2.Filter(
annotation=resources_pb2.Annotation(
data=resources_pb2.Data(
geo=resources_pb2.Geo(
geo_point=resources_pb2.GeoPoint(
longitude=LONGITUDE,
latitude=LATITUDE,
),
geo_limit=resources_pb2.GeoLimit(
type="withinKilometers",
value=150.0
)
)
)
)
)
]
)
)
],
pagination=service_pb2.Pagination(per_page=2, page=1)
),
metadata=metadata
)

if post_annotations_searches_response.status.code != status_code_pb2.SUCCESS:
raise Exception("Post searches failed, status: " + post_annotations_searches_response.status.description)

print("Search result:")
for hit in post_annotations_searches_response.hits:
print("\tScore %.2f for annotation: %s off input: %s" % (hit.score, hit.annotation.id, hit.input.id))

Perform a Search With Two Geo Points

#####################################################################################
# In this section, we set the user authentication, app ID, and details of the points
# we want to perform a search with. Change these strings to run your own example.
#####################################################################################

USER_ID = 'YOUR_USER_ID_HERE'
# Your PAT (Personal Access Token) can be found in the Account's Security section
PAT = 'YOUR_PAT_HERE'
APP_ID = 'YOUR_APP_ID_HERE'
# Change these to perform your own search
LONGITUDE_1 = -31.0
LATITUDE_1 = 42.0
LONGITUDE_2 = -29.0
LATITUDE_2 = 39.0

##########################################################################
# YOU DO NOT NEED TO CHANGE ANYTHING BELOW THIS LINE TO RUN THIS EXAMPLE
##########################################################################

from clarifai_grpc.channel.clarifai_channel import ClarifaiChannel
from clarifai_grpc.grpc.api import resources_pb2, service_pb2, service_pb2_grpc
from clarifai_grpc.grpc.api.status import status_code_pb2

channel = ClarifaiChannel.get_grpc_channel()
stub = service_pb2_grpc.V2Stub(channel)

metadata = (('authorization', 'Key ' + PAT),)

userDataObject = resources_pb2.UserAppIDSet(user_id=USER_ID, app_id=APP_ID) # The userDataObject is required when using a PAT

post_annotations_searches_response = stub.PostAnnotationsSearches(
service_pb2.PostAnnotationsSearchesRequest(
user_app_id=userDataObject,
searches = [
resources_pb2.Search(
query=resources_pb2.Query(
filters=[
resources_pb2.Filter(
annotation=resources_pb2.Annotation(
data=resources_pb2.Data(
geo=resources_pb2.Geo(
geo_box=[
resources_pb2.GeoBoxedPoint(
geo_point=resources_pb2.GeoPoint(
longitude=LONGITUDE_1,
latitude=LATITUDE_1
),
),
resources_pb2.GeoBoxedPoint(
geo_point=resources_pb2.GeoPoint(
longitude=LONGITUDE_2,
latitude=LATITUDE_2
),
),
]
)
)
)
)
]
)
)
],
pagination=service_pb2.Pagination(per_page=2, page=1)
),
metadata=metadata
)

if post_annotations_searches_response.status.code != status_code_pb2.SUCCESS:
raise Exception("Post searches failed, status: " + post_annotations_searches_response.status.description)

print("Search result:")
for hit in post_annotations_searches_response.hits:
print("\tScore %.2f for annotation: %s off input: %s" % (hit.score, hit.annotation.id, hit.input.id))

By Custom Metadata

After you have added inputs with custom metadata, you can search by that metadata.

Below is an example of searching over custom metadata. You can exact match any key: value pair no matter how nested it is.

For example, if the metadata on an input is:

{
"keyname": "value1",
"somelist": [1,2,3],
"somenesting": {
"keyname2":"value2",
"list2":[4,5]
}
}

Then the following searches will find this:

{
"keyname": "value1"
}
{
"somelist": [1,2,3]
}
{
"somelist": [1,2]
}
{
"somenesting": {"keyname2":"value2"}
}
{
"somenesting": {"list2":[5]}
}

How to perform searches:

from google.protobuf.struct_pb2 import Struct
from clarifai.client.user import User
from PIL import Image
import requests
from IPython.display import display

USER_ID='' # Fill in your user ID
APP_ID='' # Fill in your app ID
PAT='' # Fill in your personal access token

# Initialize a Clarifai user with provided credentials
client = User(user_id=USER_ID, pat=PAT)

# Create a Clarifai application with provided ID and base workflow
# Replace 'Universal' with 'General' or other appropriate workflow if needed
app = client.create_app(app_id=APP_ID, base_workflow="Universal", pat=PAT)

# Initialize a search object for the created application with top-k results set to 2
search = app.search(top_k=2)

# Define metadata for the image
metadata = Struct()
metadata.update({"filename": "XiJinping.jpg", "split": "train"})

# Specify the URL of the image to be uploaded
url = "https://samples.clarifai.com/XiJinping.jpg"

# Upload the image from the URL with associated metadata
input_obj.upload_from_url(input_id="metadata", image_url=url, metadata=metadata)

# Define metadata filter for the search query
metadata = {"filename": "XiJinping.jpg"}

# Execute the search query with the specified metadata filter
response = search.query(filters=[{"metadata": metadata}])

# Retrieve and process the search results
resp = list(response)
for r in resp:
hit = r.hits[0].input.data.image.url
break

# Print the URL of the retrieved image and display it
print(hit)
hit_img = Image.open(requests.get(hit, stream=True).raw).resize((300,250))
display(hit_img)

By Annotation Info

Each annotation has annotation info. Similar to metadata, you have full control of this field and can be any arbitrary JSON.

################################################################
# In this section, we set the user authentication and app ID.
# Change these strings to run your own example.
################################################################

USER_ID = 'YOUR_USER_ID_HERE'
# Your PAT (Personal Access Token) can be found in the Account's Security section
PAT = 'YOUR_PAT_HERE'
APP_ID = 'YOUR_APP_ID_HERE'

##########################################################################
# YOU DO NOT NEED TO CHANGE ANYTHING BELOW THIS LINE TO RUN THIS EXAMPLE
##########################################################################

from clarifai_grpc.channel.clarifai_channel import ClarifaiChannel
from clarifai_grpc.grpc.api import resources_pb2, service_pb2, service_pb2_grpc
from clarifai_grpc.grpc.api.status import status_code_pb2
from google.protobuf.struct_pb2 import Struct

channel = ClarifaiChannel.get_grpc_channel()
stub = service_pb2_grpc.V2Stub(channel)

metadata = (('authorization', 'Key ' + PAT),)

userDataObject = resources_pb2.UserAppIDSet(user_id=USER_ID, app_id=APP_ID) # The userDataObject is required when using a PAT

search_annotation_info = Struct()
search_annotation_info.update({"type": "animal"})

post_annotations_searches_response = stub.PostAnnotationsSearches(
service_pb2.PostAnnotationsSearchesRequest(
user_app_id=userDataObject,
searches = [
resources_pb2.Search(
query=resources_pb2.Query(
filters=[
resources_pb2.Filter(
annotation=resources_pb2.Annotation(
annotation_info=search_annotation_info
)
)
]
)
)
],
pagination=service_pb2.Pagination(per_page=2, page=1)
),
metadata=metadata
)

if post_annotations_searches_response.status.code != status_code_pb2.SUCCESS:
raise Exception("Post searches failed, status: " + post_annotations_searches_response.status.description)

print("Search result:")
for hit in post_annotations_searches_response.hits:
print("\tScore %.2f for annotation: %s off input: %s" % (hit.score, hit.annotation.id, hit.input.id))

Multimodal Filtering

Multimodal filtering allows you to refine search results based on a combination of visual and textual criteria.

Unlike unimodal filtering, which limits filtering to a single data type — such as images or text — multimodal filtering enables more comprehensive queries by considering multiple modalities at once.

# Importing necessary modules
from google.protobuf.struct_pb2 import Struct
from clarifai.client.user import User
from clarifai.client.search import Search

# Replace with your Clarifai account information
USER_ID = '' # Your user ID
APP_ID = '' # Your app ID
PAT = '' # Your personal access token

# Creating a User object with your credentials
client = User(user_id=USER_ID)

# Creating an app with specified settings
# (Example Workflows: 'Universal', 'General')
app = client.create_app(app_id=APP_ID, base_workflow="Universal", pat=PAT)

# Initializing a Search object
s = Search(user_id=USER_ID, app_id=APP_ID, pat=PAT)

# URL of the image and raw text to be uploaded
img_url = "https://images.pexels.com/photos/139257/pexels-photo-139257.jpeg"
raw_text = "This is earth."

# Uploading the image and raw text as inputs
input_obj = app.inputs()
input_obj.upload_from_url(input_id="img_input", image_url=img_url)
input_obj.upload_text(input_id="txt_input", raw_text=raw_text)

# Querying the search with filters to retrieve results
res = s.query(filters=[{'input_types': ['image', 'text']}])

# Iterating through the search results
for r in res:
# Extracting text and image URL from the search results
text_hit = r.hits[0].input.data.text
image_hit = r.hits[1].input.data.image.url
break # Exiting the loop after the first result

# Printing the extracted text and image URL
print(text_hit)
print(image_hit)

# Displaying the image using its URL
hit_img = Image.open(requests.get(image_hit, stream=True).raw).resize((300, 250))
display(hit_img)