Skip to main content

Filter

Learn how to perform search with filters using Clarifai Python SDK


The filter feature in Clarifai's search functionality allows users to narrow down search results based on specific criteria or conditions. Filters act as constraints that refine the set of results returned by a search query. Users can specify various filter parameters to tailor the search results to their needs. These parameters might include attributes such as input type, status code, metadata, or other properties associated with the data being searched.

Click here to know more about Filter.

Filter with Input Type

Filtering with input type in Clarifai enables users to narrow down search results based on the type of data input. For example, users can specify to only retrieve results that are of a particular input type, such as images, videos, or text. This functionality allows for more targeted searches, ensuring that search results align with the desired data format or content type.

from clarifai.client.user import User
from PIL import Image
import requests
from IPython.display import display

# Fill in your user ID, app ID, and personal access token (PAT)
USER_ID = ''
APP_ID = ''
PAT = ''

# Initialize a User object with your credentials
client = User(user_id=USER_ID, pat=PAT)

# Create an application with the specified app ID and base workflow
app = client.create_app(app_id=APP_ID, base_workflow="Universal", pat=PAT)

# URLs of images to be uploaded
urls = [
"https://images.pexels.com/photos/139257/pexels-photo-139257.jpeg",
"https://images.pexels.com/photos/1879386/pexels-photo-1879386.jpeg",
"https://images.pexels.com/photos/1071882/pexels-photo-1071882.jpeg"
]

# Initialize an Inputs object for uploading images and a Search object for searching
input_obj = app.inputs()
search = app.search(top_k=2)

# Upload images from the provided URLs
for i, url in enumerate(urls):
input_obj.upload_from_url(input_id=f"input{i}", image_url=url)

# Perform a search query to find images (filters for images only)
res = search.query(filters=[{'input_types': ['image']}])

# Retrieve the URL of the first image from the search results
for r in res:
hit = r.hits[0].input.data.image.url
break

# Print the URL of the found image
print(hit)

# Display the found image
hit_img = Image.open(requests.get(hit, stream=True).raw).resize((300, 250))
display(hit_img)
Output
https://images.pexels.com/photos/139257/pexels-photo-139257.jpeg

Filter with Metadata

The filter feature in Clarifai's search functionality allows users to narrow down search results based on specific criteria or conditions. Filters act as constraints that refine the set of results returned by a search query. In the following example, we are showing the filter method with the use of metadata.

from google.protobuf.struct_pb2 import Struct
from clarifai.client.user import User
from PIL import Image
import requests
from IPython.display import display

USER_ID='' # Fill in your user ID
APP_ID='' # Fill in your app ID
PAT='' # Fill in your personal access token

# Initialize a Clarifai user with provided credentials
client = User(user_id=USER_ID, pat=PAT)

# Create a Clarifai application with provided ID and base workflow
# Replace 'Universal' with 'General' or other appropriate workflow if needed
app = client.create_app(app_id=APP_ID, base_workflow="Universal", pat=PAT)

# Initialize a search object for the created application with top-k results set to 2
search = app.search(top_k=2)

# Define metadata for the image
metadata = Struct()
metadata.update({"filename": "XiJinping.jpg", "split": "train"})

# Specify the URL of the image to be uploaded
url = "https://samples.clarifai.com/XiJinping.jpg"

# Upload the image from the URL with associated metadata
input_obj.upload_from_url(input_id="metadata", image_url=url, metadata=metadata)

# Define metadata filter for the search query
metadata = {"filename": "XiJinping.jpg"}

# Execute the search query with the specified metadata filter
response = search.query(filters=[{"metadata": metadata}])

# Retrieve and process the search results
resp = list(response)
for r in resp:
hit = r.hits[0].input.data.image.url
break

# Print the URL of the retrieved image and display it
print(hit)
hit_img = Image.open(requests.get(hit, stream=True).raw).resize((300,250))
display(hit_img)
Output
https://samples.clarifai.com/XiJinping.jpg

Filter with Geopoint

Filtering with geopoint in Clarifai allows users to refine search results based on geographic location data associated with the inputs. This feature enables users to specify geographical coordinates such as latitude and longitude to filter inputs that are within a certain proximity or region. By incorporating geopoint filters, users can perform location-based searches, facilitating tasks such as retrieving content relevant to specific geographic areas or analyzing data within a particular region.

from clarifai.client.user import User 
from PIL import Image
import requests
from IPython.display import display

USER_ID='' # Specify your Clarifai user ID
APP_ID='' # Specify your Clarifai application ID
PAT='' # Specify your Clarifai personal access token

# Create a User instance with the specified user ID and personal access token
client = User(user_id=USER_ID, pat=PAT)

# Create an application instance using the specified application ID and base workflow ('Universal')
app = client.create_app(app_id=APP_ID, base_workflow="Universal")

# Initialize a search object associated with the created application, specifying the maximum number of results to retrieve
search = app.search(top_k=2)

# Define the URL of the image to be uploaded for geolocation-based filtering
url = "https://samples.clarifai.com/XiJinping.jpg"

# Upload the image from the specified URL with associated geolocation information (longitude and latitude)
input_obj.upload_from_url(input_id="geo", image_url=url, geo_info=[-30.0, 40.0])

# Execute a search query with a filter based on geolocation information (longitude, latitude, and radius)
response = search.query(filters=[{"geo_point": {'longitude':40.0, 'latitude':-30.0, 'geo_limit': 100}}])

# Convert the response to a list and retrieve the URL of the first image hit
resp = list(response)
for r in resp:
hit = r.hits[0].input.data.image.url
break

# Print the URL of the hit image
print(hit)

# Open the hit image using requests, resize it, and display it
hit_img = Image.open(requests.get(hit, stream=True).raw).resize((300,250))
display(hit_img)
Output
https://samples.clarifai.com/XiJinping.jpg

Filter with Input Status Code

Filtering with input status code in Clarifai allows users to refine search results based on the status of the input data. This feature enables users to specify criteria related to the status of the input data, such as whether the data is successfully processed, pending processing, or encountered errors.

from google.protobuf.struct_pb2 import Struct
from clarifai.client.user import User
from PIL import Image
import requests
from IPython.display import display

# Replace with your Clarifai user ID, app ID, and personal access token (PAT)
USER_ID = ''
APP_ID = ''
PAT = ''

# Initialize Clarifai user with specified user ID
client = User(user_id=USER_ID)

# Create a Clarifai application with the provided app ID, using the 'Universal' base workflow
app = client.create_app(app_id=APP_ID, base_workflow="Universal", pat=PAT)

# Initialize a search object for the application with a maximum of 2 results
search = app.search(top_k=2)

# URL of the image to be uploaded for search
url = "https://samples.clarifai.com/XiJinping.jpg"

# Upload the image from the specified URL with an input ID of 'geo'
input_obj.upload_from_url(input_id="geo", image_url=url)

# Query the application's search with a filter to retrieve inputs with a status code of 30000
response = search.query(filters=[{'input_status_code': 30000}])

# Convert the response to a list and retrieve the URL of the first hit input
resp = list(response)
for r in resp:
hit = r.hits[0].input.data.image.url
break

# Print the URL of the hit input
print(hit)

# Open and display the image corresponding to the hit input URL
hit_img = Image.open(requests.get(hit, stream=True).raw).resize((300,250))
display(hit_img)
Output
https://samples.clarifai.com/XiJinping.jpg

Filter with Dataset-ID

Filtering with dataset ID in Clarifai allows users to narrow down search results based on specific criteria within a designated dataset. By specifying a dataset ID in the filter, users can refine their search to only consider inputs or data entries within that particular dataset.

# Import necessary modules
from clarifai.client.user import User
from PIL import Image
import requests
from IPython.display import display
import pandas as pd

# Specify user credentials
USER_ID=''
APP_ID=''
PAT=''

# Initialize Clarifai client
client = User(user_id=USER_ID)

# Create an application with specified parameters
app = client.create_app(app_id=APP_ID, base_workflow="Universal", pat=PAT)

# Create a dataset within the application
dataset = app.create_dataset(dataset_id="demo_dataset")

# Define a list of image URLs
urls = [
"https://images.pexels.com/photos/139257/pexels-photo-139257.jpeg",
"https://images.pexels.com/photos/1879386/pexels-photo-1879386.jpeg",
"https://images.pexels.com/photos/1071882/pexels-photo-1071882.jpeg"
]

# Convert the list of URLs to a DataFrame and save it as a CSV file
df = pd.DataFrame(urls, columns=['input'])
df.to_csv("images.csv", index=False)

# Upload images to the dataset from the CSV file
dataset.upload_from_csv(csv_path='images.csv', input_type='image', csv_type='url', labels=False)

# Initialize a search instance for the application
search = app.search(top_k=2)

# Query the search with filters based on the dataset
response = search.query(filters=[{"input_dataset_ids": [dataset.id]}])

# Retrieve and display the first image hit from the search response
resp = list(response)
for r in resp:
hit = r.hits[0].input.data.image.url
break
print(hit)
hit_img = Image.open(requests.get(hit, stream=True).raw).resize((300,250))
display(hit_img)
Output
https://images.pexels.com/photos/1879386/pexels-photo-1879386.jpeg

MultiModal Filtering

Multimodal filtering in Clarifai refers to the ability to refine search results based on criteria related to both visual and textual content. Unlike unimodal filtering, which focuses solely on one type of data (e.g., images or text), multimodal filtering considers multiple modalities simultaneously.

# Importing necessary modules
from google.protobuf.struct_pb2 import Struct
from clarifai.client.user import User
from clarifai.client.search import Search

# Replace with your Clarifai account information
USER_ID = '' # Your user ID
APP_ID = '' # Your app ID
PAT = '' # Your personal access token

# Creating a User object with your credentials
client = User(user_id=USER_ID)

# Creating an app with specified settings
# (Example Workflows: 'Universal', 'General')
app = client.create_app(app_id=APP_ID, base_workflow="Universal", pat=PAT)

# Initializing a Search object
s = Search(user_id=USER_ID, app_id=APP_ID, pat=PAT)

# URL of the image and raw text to be uploaded
img_url = "https://images.pexels.com/photos/139257/pexels-photo-139257.jpeg"
raw_text = "This is earth."

# Uploading the image and raw text as inputs
input_obj = app.inputs()
input_obj.upload_from_url(input_id="img_input", image_url=img_url)
input_obj.upload_text(input_id="txt_input", raw_text=raw_text)

# Querying the search with filters to retrieve results
res = s.query(filters=[{'input_types': ['image', 'text']}])

# Iterating through the search results
for r in res:
# Extracting text and image URL from the search results
text_hit = r.hits[0].input.data.text
image_hit = r.hits[1].input.data.image.url
break # Exiting the loop after the first result

# Printing the extracted text and image URL
print(text_hit)
print(image_hit)

# Displaying the image using its URL
hit_img = Image.open(requests.get(image_hit, stream=True).raw).resize((300, 250))
display(hit_img)
Output
url: "https://data.clarifai.com/orig/users/8tzpjy1a841y/apps/multimodal/inputs/text/bed3a836aea9e11c141fdc45ab741778"

hosted {

prefix: "https://data.clarifai.com"

suffix: "users/8tzpjy1a841y/apps/multimodal/inputs/text/bed3a836aea9e11c141fdc45ab741778"

sizes: "orig"

crossorigin: "use-credentials"

}

text_info {

char_count: 14

encoding: "UTF8"

}

https://images.pexels.com/photos/139257/pexels-photo-139257.jpeg