Skip to main content

Advance Search Operations

Learn how to perform advance search operations using Clarifai SDKs


In this section we are showcasing Clarifai’s powerful capabilities for refining search results based on complex criteria. The use of AND and OR operations allows users to perform more nuanced searches, combining multiple conditions to precisely filter content. With AND, users can narrow down results to items that meet all specified criteria simultaneously, while OR expands the search scope to include items that meet any of the provided conditions. Additionally, leveraging custom concepts enables users to define their own labels or categories, further enhancing search flexibility.

Custom Concepts

Searching with custom concepts in Clarifai involves associating user-defined labels or concepts with data, enabling more targeted searches. By specifying custom concepts like "dog," users can retrieve relevant results efficiently. This approach streamlines tasks such as content organization and classification, enhancing applications like image recognition and content moderation.

Click here to know more about Custom Concept search.

from clarifai.client.search import Search
from clarifai.client.input import Inputs
from PIL import Image
import requests
from IPython.display import display

# Define your Clarifai credentials
USER_ID = ''
APP_ID = ''
PAT = ''

# Initialize the Clarifai client with your credentials
client = User(user_id=USER_ID)

# Create an application within Clarifai with the specified ID and base workflow
# The 'Universal' workflow is a general-purpose workflow that can handle various types of data
app = client.create_app(app_id=APP_ID, base_workflow="Universal", pat=PAT)

# Initialize a search object for the specified user, application, and access token
s = Search(user_id=USER_ID, app_id=APP_ID, pat=PAT)

# Initialize an Inputs object for the specified user, application, and access token
inp_obj = Inputs(user_id=USER_ID, app_id=APP_ID, pat=PAT)

# Prepare an input protobuf message from the provided image URL
input_proto = inp_obj.get_input_from_url(
input_id="dog-tiff",
image_url="https://samples.clarifai.com/dog.tiff",
labels=["dog"],
geo_info=[-30.0, 40.0], # longitude, latitude
)

# Upload the prepared input protobuf message to the Clarifai application
inp_obj.upload_inputs([input_proto])

# Perform a search query with specified ranks and filters
response = s.query(ranks=[{"image_url": "https://samples.clarifai.com/dog.tiff"}], filters=[{"concepts": [{'name':'dog','value':1}]}])

# Process the response to extract the URL of the first matching image
resp = list(response)
for r in resp:
hit = r.hits[0].input.data.image.url
break

# Print the URL of the matched image
print(hit)

# Open the matched image URL, resize it, and display it
hit_img = Image.open(requests.get(hit, stream=True).raw).resize((300,250))
display(hit_img)
Output
https://samples.clarifai.com/dog.tiff

OR

In Clarifai, the OR search operation provides users with a flexible means to retrieve search results that satisfy one or more specified criteria. By employing the OR filter, users can broaden their search scope to include items that meet any of the provided conditions.

Click here to know more about OR search.

# Import necessary modules
from clarifai.client.search import Search
from clarifai.client.user import User
from google.protobuf import struct_pb2
from PIL import Image
import requests
from IPython.display import display

# User-specific credentials
USER_ID = '' # User ID
APP_ID = '' # Application ID
PAT = '' # Personal Access Token

# Define dataset and image URL
CREATE_DATASET_ID = "ci_search_dataset"
DOG_IMG_URL = "https://samples.clarifai.com/dog.tiff"

# Create Clarifai application
app_obj = User(user_id=USER_ID, pat=PAT).create_app(app_id=APP_ID, base_workflow="General")

# Create a dataset
dataset_obj = app_obj.create_dataset(CREATE_DATASET_ID)

# Initialize inputs object
inp_obj = app_obj.inputs()

# Define metadata for the image
metadata = struct_pb2.Struct()
metadata.update({"Breed": "Saint Bernard"})

# Get input from URL and upload it
input_proto = inp_obj.get_input_from_url(
dataset_id=CREATE_DATASET_ID,
input_id="dog-tiff",
image_url=DOG_IMG_URL,
labels=["dog"],
geo_info=[-30.0, 40.0], # longitude, latitude
metadata=metadata)
inp_obj.upload_inputs([input_proto])

# Define OR filter
or_filter = [{ # OR
"concepts": [{
"name": "deer",
"value": 1
}, {
"name": "dog",
"value": 1
}]
}]

# Perform search with OR filter
search = app_obj.search()
res = search.query(ranks=[{"image_url": "https://samples.clarifai.com/dog.tiff"}], filters=or_filter)

# Process search results
resp = list(res)
for r in resp:
hit = r.hits[0].input.data.image.url
break

# Display the image
print(hit)
hit_img = Image.open(requests.get(hit, stream=True).raw).resize((300,250))
display(hit_img)
Output
https://samples.clarifai.com/dog.tiff

AND

In Clarifai, the AND operation allows users to refine search results by specifying multiple criteria that must all be met simultaneously. For example, if a user searches for images containing both "dog" and "deer" concepts, only images that are labeled as both dogs and deers will be returned.

Click here to know more about AND search.

# Import necessary modules
from clarifai.client.search import Search
from clarifai.client.user import User
from google.protobuf import struct_pb2
from PIL import Image
import requests
from IPython.display import display

# Define user-specific credentials
USER_ID=''
APP_ID=''
PAT=''

# Define constants
CREATE_DATASET_ID = "ci_search_dataset"
DOG_IMG_URL = "https://samples.clarifai.com/dog.tiff"

# Create a new application
app_obj = User(user_id=USER_ID, pat=PAT).create_app(app_id=APP_ID, base_workflow="General")

# Create a new dataset
dataset_obj = app_obj.create_dataset(CREATE_DATASET_ID)

# Initialize Inputs object for uploading data
inp_obj = app_obj.inputs()

# Define metadata for the input
metadata = struct_pb2.Struct()
metadata.update({"Breed": "Saint Bernard"})

# Get input from URL and upload it to the dataset
input_proto = inp_obj.get_input_from_url(
dataset_id=CREATE_DATASET_ID,
input_id="dog-tiff",
image_url=DOG_IMG_URL,
labels=["dog"],
geo_info=[-30.0, 40.0], # longitude, latitude
metadata=metadata)
inp_obj.upload_inputs([input_proto])

# Define an AND filter
and_filter = [
{ # AND
"concepts": [{
"name": "dog",
"value": 1
}]
},
{
"concepts": [{
"name": "deer",
"value": 1
}]
}
]

# Create a search object
search = app_obj.search()

# Perform a search query with the specified rank and AND filter
res = search.query(ranks=[{"image_url": "https://samples.clarifai.com/dog.tiff"}], filters=and_filter)

# Convert search results to a list
resp = list(res)

# Print the length of the search results
print(len(resp)) # Should be zero
Output
0

Pagination

In Clarifai SDKs, users can set the pagination parameters like per_page and page_no for displaying the search results.

from clarifai.client.user import User
#replace your "user_id"
client = User(user_id="user_id")

#(Example Workflows: 'Universal', 'Empty', 'General')
app = client.create_app(app_id="app_id", base_workflow="Universal")

# set the input url's
urls = [
"https://images.pexels.com/photos/139257/pexels-photo-139257.jpeg",
"https://images.pexels.com/photos/1879386/pexels-photo-1879386.jpeg",
"https://images.pexels.com/photos/1071882/pexels-photo-1071882.jpeg"
]

input_obj = app.inputs()
# add inputs to the app
for i, url in enumerate(urls):
input_obj.upload_from_url(input_id=f"input{i}", image_url=url)


# perform search with pagination
search = app.search(pagination=True)
response = search.query(ranks=[{"text_raw": "Red pineapples on the beach."}], per_page=2,page_no=1)
resp = list(response)
print(len(resp[0].hits))
Output
2