Audio
Make predictions on audio inputs
Input: Audio
Output: Text
To get predictions for a given audio input, you need to supply the audio along with the specific model from which you wish to receive predictions. You can supply the input via a publicly accessible URL or by directly sending bytes.
You need to specify your choice of model for prediction by utilizing the MODEL_ID
parameter.
The file size of each audio input should be under 5MB. This is typically suitable for a 48kHz audio file lasting up to 60 seconds, recorded with 16-bit audio quality. If your file exceeds this limit, you will need to split it into smaller chunks.
The initialization code used in the following examples is outlined in detail on the client installation page.
Predict via URL
Below is an example of how you would use the asr-wav2vec2-base-960h-english audio transcription model to convert English speech audio, sent via a URL, into English text.
- Python
- JavaScript (REST)
- NodeJS
- Java
- PHP
- cURL
#########################################################################################
# In this section, we set the user authentication, user and app ID, model ID, and
# audio URL. Change these strings to run your own example.
########################################################################################
# Your PAT (Personal Access Token) can be found in the Account's Security section
PAT = "YOUR_PAT_HERE"
# Specify the correct user_id/app_id pairings
# Since you're making inferences outside your app's scope
USER_ID = "facebook"
APP_ID = "asr"
# Change these to make your own predictions
MODEL_ID = "asr-wav2vec2-base-960h-english"
AUDIO_URL = "https://samples.clarifai.com/negative_sentence_1.wav"
##########################################################################
# YOU DO NOT NEED TO CHANGE ANYTHING BELOW THIS LINE TO RUN THIS EXAMPLE
##########################################################################
from clarifai_grpc.channel.clarifai_channel import ClarifaiChannel
from clarifai_grpc.grpc.api import resources_pb2, service_pb2, service_pb2_grpc
from clarifai_grpc.grpc.api.status import status_code_pb2
channel = ClarifaiChannel.get_grpc_channel()
stub = service_pb2_grpc.V2Stub(channel)
metadata = (("authorization", "Key " + PAT),)
userDataObject = resources_pb2.UserAppIDSet(
user_id=USER_ID, app_id=APP_ID
) # The userDataObject is required when using a PAT
post_model_outputs_response = stub.PostModelOutputs(
service_pb2.PostModelOutputsRequest(
user_app_id=userDataObject,
model_id=MODEL_ID,
inputs=[
resources_pb2.Input(
data=resources_pb2.Data(audio=resources_pb2.Audio(url=AUDIO_URL))
)
],
),
metadata=metadata,
)
if post_model_outputs_response.status.code != status_code_pb2.SUCCESS:
print(post_model_outputs_response.status)
raise Exception(
"Post workflow results failed, status: "
+ post_model_outputs_response.status.description
)
# Since we have one input, one output will exist here
output = post_model_outputs_response.outputs[0]
# Print the output
print(output.data.text.raw)
<!--index.html file-->
<script>
////////////////////////////////////////////////////////////////////////////////////////
// In this section, we set the user authentication, user and app ID, model ID, and
// audio URL. Change these strings to run your own example.
///////////////////////////////////////////////////////////////////////////////////////
// Your PAT (Personal Access Token) can be found in the Account's Security section
const PAT = "YOUR_PAT_HERE";
// Specify the correct user_id/app_id pairings
// Since you're making inferences outside your app's scope
const USER_ID = "facebook";
const APP_ID = "asr";
// Change these to make your own predictions
const MODEL_ID = "asr-wav2vec2-base-960h-english";
const AUDIO_URL = "https://samples.clarifai.com/negative_sentence_1.wav";
///////////////////////////////////////////////////////////////////////////////////
// YOU DO NOT NEED TO CHANGE ANYTHING BELOW THIS LINE TO RUN THIS EXAMPLE
///////////////////////////////////////////////////////////////////////////////////
const raw = JSON.stringify({
"user_app_id": {
"user_id": USER_ID,
"app_id": APP_ID
},
"inputs": [
{
"data": {
"audio": {
"url": AUDIO_URL
}
}
}
]
});
const requestOptions = {
method: 'POST',
headers: {
'Accept': 'application/json',
'Authorization': 'Key ' + PAT
},
body: raw
};
fetch(`https://api.clarifai.com/v2/models/${MODEL_ID}/outputs`, requestOptions)
.then(response => response.text())
.then(result => console.log(result))
.catch(error => console.log('error', error));
</script>
//index.js file
////////////////////////////////////////////////////////////////////////////////////////
// In this section, we set the user authentication, user and app ID, model ID, and
// audio URL. Change these strings to run your own example.
///////////////////////////////////////////////////////////////////////////////////////
// Your PAT (Personal Access Token) can be found in the Account's Security section
const PAT = "YOUR_PAT_HERE"
// Specify the correct user_id/app_id pairings
// Since you're making inferences outside your app's scope
const USER_ID = "facebook"
const APP_ID = "asr"
// Change these to make your own predictions
const MODEL_ID = "asr-wav2vec2-base-960h-english"
const AUDIO_URL = "https://samples.clarifai.com/negative_sentence_1.wav"
/////////////////////////////////////////////////////////////////////////////
// YOU DO NOT NEED TO CHANGE ANYTHING BELOW THIS LINE TO RUN THIS EXAMPLE
/////////////////////////////////////////////////////////////////////////////
const { ClarifaiStub, grpc } = require("clarifai-nodejs-grpc");
const stub = ClarifaiStub.grpc();
// This will be used by every Clarifai endpoint call
const metadata = new grpc.Metadata();
metadata.set("authorization", "Key " + PAT);
stub.PostModelOutputs(
{
user_app_id: {
"user_id": USER_ID,
"app_id": APP_ID,
},
model_id: MODEL_ID,
inputs: [{ data: { audio: { url: AUDIO_URL } } }],
},
metadata,
(err, response) => {
if (err) {
throw new Error(err);
}
if (response.status.code !== 10000) {
throw new Error(
"Post workflow results failed, status: " + response.status.description
);
}
// Since we have one input, one output will exist here
const output = response.outputs[0];
// Print the output
console.log(output.data.text.raw)
}
);
package com.clarifai.example;
import com.clarifai.channel.ClarifaiChannel;
import com.clarifai.credentials.ClarifaiCallCredentials;
import com.clarifai.grpc.api.*;
import com.clarifai.grpc.api.status.StatusCode;
public class ClarifaiExample {
//////////////////////////////////////////////////////////////////////////////////////
// In this section, we set the user authentication, user and app ID, model ID, and
// audio URL. Change these strings to run your own example.
//////////////////////////////////////////////////////////////////////////////////////
//Your PAT (Personal Access Token) can be found in the portal under Authentication
static final String PAT = "YOUR_PAT_HERE";
// Specify the correct user_id/app_id pairings
// Since you're making inferences outside your app's scope
static final String USER_ID = "facebook";
static final String APP_ID = "asr";
// Change these to make your own predictions
static final String MODEL_ID = "asr-wav2vec2-base-960h-english";
static final String AUDIO_URL = "https://samples.clarifai.com/negative_sentence_1.wav";
///////////////////////////////////////////////////////////////////////////////////
// YOU DO NOT NEED TO CHANGE ANYTHING BELOW THIS LINE TO RUN THIS EXAMPLE
///////////////////////////////////////////////////////////////////////////////////
public static void main(String[] args) {
V2Grpc.V2BlockingStub stub = V2Grpc.newBlockingStub(ClarifaiChannel.INSTANCE.getGrpcChannel())
.withCallCredentials(new ClarifaiCallCredentials(PAT));
MultiOutputResponse postModelOutputsResponse = stub.postModelOutputs(
PostModelOutputsRequest.newBuilder()
.setUserAppId(UserAppIDSet.newBuilder().setUserId(USER_ID).setAppId(APP_ID))
.setModelId(MODEL_ID)
.addInputs(
Input.newBuilder().setData(
Data.newBuilder().setAudio(
Audio.newBuilder().setUrl(AUDIO_URL)
)
)
)
.build()
);
if (postModelOutputsResponse.getStatus().getCode() != StatusCode.SUCCESS) {
System.out.println(postModelOutputsResponse.getStatus());
throw new RuntimeException("Post workflow results failed, status: " + postModelOutputsResponse.getStatus().getDescription());
}
Output output = postModelOutputsResponse.getOutputs(0);
// Print the output
System.out.println(output.getData().getText().getRaw());
}
}
<?php
require __DIR__ . "/vendor/autoload.php";
///////////////////////////////////////////////////////////////////////////////////
// In this section, we set the user authentication, user and app ID, model ID, and
// audio URL. Change these strings to run your own example.
///////////////////////////////////////////////////////////////////////////////////
// Your PAT (Personal Access Token) can be found in the Account's Security section
$PAT = "YOUR_PAT_HERE";
// Specify the correct user_id/app_id pairings
// Since you're making inferences outside your app's scope
$USER_ID = "facebook";
$APP_ID = "asr";
// Change these to make your own predictions
$MODEL_ID = "asr-wav2vec2-base-960h-english";
$AUDIO_URL = "https://samples.clarifai.com/negative_sentence_1.wav";
///////////////////////////////////////////////////////////////////////////////////
// YOU DO NOT NEED TO CHANGE ANYTHING BELOW THIS LINE TO RUN THIS EXAMPLE
///////////////////////////////////////////////////////////////////////////////////
use Clarifai\Api\Audio;
use Clarifai\ClarifaiClient;
use Clarifai\Api\PostModelOutputsRequest;
use Clarifai\Api\Input;
use Clarifai\Api\Data;
use Clarifai\Api\Status\StatusCode;
use Clarifai\Api\UserAppIDSet;
$client = ClarifaiClient::grpc();
$metadata = ["Authorization" => ["Key " . $PAT]];
$userDataObject = new UserAppIDSet([
"user_id" => $USER_ID,
"app_id" => $APP_ID
]);
// Let's make a RPC call to the Clarifai platform. It uses the opened gRPC client channel to communicate a
// request and then wait for the response
[$response, $status] = $client->PostModelOutputs(
// The request object carries the request along with the request status and other metadata related to the request itself
new PostModelOutputsRequest([
"user_app_id" => $userDataObject,
"model_id" => $MODEL_ID,
"inputs" => [
new Input([
"data" => new Data([
"audio" => new Audio([
"url" => $AUDIO_URL
])
])
])
]
]),
$metadata
)->wait();
// A response is returned and the first thing we do is check the status of it
// A successful response will have a status code of 0; otherwise, there is some error
if ($status->code !== 0) {
throw new Exception("Error: {$status->details}");
}
// In addition to the RPC response status, there is a Clarifai API status that reports if the operation was a success or failure
// (not just that the communication was successful)
if ($response->getStatus()->getCode() != StatusCode::SUCCESS) {
print $response->getStatus()->getDetails();
throw new Exception("Failure response: " . $response->getStatus()->getDescription());
}
// We'll get one output for each input we used above. Because of one input, we have here one output
$output = $response->getOutputs()[0];
// Print the output
echo $output->getData()->getText()->getRaw();
?>
curl -X POST "https://api.clarifai.com/v2/users/facebook/apps/asr/models/asr-wav2vec2-base-960h-english/outputs" \
-H "authorization: Key YOUR_PAT_HERE" \
-H "content-type: application/json" \
-d '{
"inputs": [
{
"data": {
"audio": {
"url": "https://samples.clarifai.com/negative_sentence_1.wav"
}
}
}
]
}'
Text Output Example
I AM NOT FLYING TO ENGLAND
Predict via Bytes
Below is an example of how you would use the asr-wav2vec2-base-960h-english audio transcription model to convert English speech audio, sent as bytes, into English text.
- Python
- JavaScript (REST)
- NodeJS
- Java
- PHP
- cURL
#########################################################################################
# In this section, we set the user authentication, user and app ID, model ID, and
# audio file location. Change these strings to run your own example.
########################################################################################
# Your PAT (Personal Access Token) can be found in the Account's Security section
PAT = "YOUR_PAT_HERE"
# Specify the correct user_id/app_id pairings
# Since you're making inferences outside your app's scope
USER_ID = "facebook"
APP_ID = "asr"
# Change these to make your own predictions
MODEL_ID = "asr-wav2vec2-base-960h-english"
AUDIO_FILE_LOCATION = "YOUR_AUDIO_FILE_LOCATION_HERE"
##########################################################################
# YOU DO NOT NEED TO CHANGE ANYTHING BELOW THIS LINE TO RUN THIS EXAMPLE
##########################################################################
from clarifai_grpc.channel.clarifai_channel import ClarifaiChannel
from clarifai_grpc.grpc.api import resources_pb2, service_pb2, service_pb2_grpc
from clarifai_grpc.grpc.api.status import status_code_pb2
channel = ClarifaiChannel.get_grpc_channel()
stub = service_pb2_grpc.V2Stub(channel)
metadata = (("authorization", "Key " + PAT),)
userDataObject = resources_pb2.UserAppIDSet(
user_id=USER_ID, app_id=APP_ID
) # The userDataObject is required when using a PAT
with open(AUDIO_FILE_LOCATION, "rb") as f:
file_bytes = f.read()
post_model_outputs_response = stub.PostModelOutputs(
service_pb2.PostModelOutputsRequest(
user_app_id=userDataObject,
model_id=MODEL_ID,
inputs=[
resources_pb2.Input(
data=resources_pb2.Data(audio=resources_pb2.Audio(base64=file_bytes))
)
],
),
metadata=metadata,
)
if post_model_outputs_response.status.code != status_code_pb2.SUCCESS:
print(post_model_outputs_response.status)
raise Exception(
"Post workflow results failed, status: "
+ post_model_outputs_response.status.description
)
# Since we have one input, one output will exist here
output = post_model_outputs_response.outputs[0]
# Print the output
print(output.data.text.raw)
<!--index.html file-->
<script>
//////////////////////////////////////////////////////////////////////////////////////////////
// In this section, we set the user authentication, user and app ID, model ID, and bytes
// of the audio we want as an input. Change these strings to run your own example.
/////////////////////////////////////////////////////////////////////////////////////////////
// Your PAT (Personal Access Token) can be found in the Account's Security section
const PAT = "YOUR_PAT_HERE";
// Specify the correct user_id/app_id pairings
// Since you're making inferences outside your app's scope
const USER_ID = "facebook";
const APP_ID = "asr";
// Change these to make your own predictions
const MODEL_ID = "asr-wav2vec2-base-960h-english";
const AUDIO_BYTES_STRING = "YOUR_BYTES_STRING_HERE";
///////////////////////////////////////////////////////////////////////////////////
// YOU DO NOT NEED TO CHANGE ANYTHING BELOW THIS LINE TO RUN THIS EXAMPLE
///////////////////////////////////////////////////////////////////////////////////
const raw = JSON.stringify({
"user_app_id": {
"user_id": USER_ID,
"app_id": APP_ID
},
"inputs": [
{
"data": {
"audio": {
"base64": AUDIO_BYTES_STRING
}
}
}
]
});
const requestOptions = {
method: 'POST',
headers: {
'Accept': 'application/json',
'Authorization': 'Key ' + PAT
},
body: raw
};
fetch(`https://api.clarifai.com/v2/models/${MODEL_ID}/outputs`, requestOptions)
.then(response => response.text())
.then(result => console.log(result))
.catch(error => console.log('error', error));
</script>
//index.js file
////////////////////////////////////////////////////////////////////////////////////////
// In this section, we set the user authentication, user and app ID, model ID, and
// audio file location. Change these strings to run your own example.
///////////////////////////////////////////////////////////////////////////////////////
// Your PAT (Personal Access Token) can be found in the Account's Security section
const PAT = "YOUR_PAT_HERE"
// Specify the correct user_id/app_id pairings
// Since you're making inferences outside your app's scope
const USER_ID = "facebook"
const APP_ID = "asr"
// Change these to make your own predictions
const MODEL_ID = "asr-wav2vec2-base-960h-english"
const AUDIO_FILE_LOCATION = "YOUR_AUDIO_FILE_LOCATION_HERE"
/////////////////////////////////////////////////////////////////////////////
// YOU DO NOT NEED TO CHANGE ANYTHING BELOW THIS LINE TO RUN THIS EXAMPLE
/////////////////////////////////////////////////////////////////////////////
const { ClarifaiStub, grpc } = require("clarifai-nodejs-grpc");
const stub = ClarifaiStub.grpc();
// This will be used by every Clarifai endpoint call
const metadata = new grpc.Metadata();
metadata.set("authorization", "Key " + PAT);
const fs = require("fs");
const audioBytes = fs.readFileSync(AUDIO_FILE_LOCATION);
stub.PostModelOutputs(
{
user_app_id: {
"user_id": USER_ID,
"app_id": APP_ID,
},
model_id: MODEL_ID,
inputs: [{ data: { audio: { base64: audioBytes } } }],
},
metadata,
(err, response) => {
if (err) {
throw new Error(err);
}
if (response.status.code !== 10000) {
throw new Error(
"Post workflow results failed, status: " + response.status.description
);
}
// Since we have one input, one output will exist here
const output = response.outputs[0];
// Print the output
console.log(output.data.text.raw)
}
);
package com.clarifai.example;
import java.io.File;
import java.io.IOException;
import java.nio.file.Files;
import com.clarifai.channel.ClarifaiChannel;
import com.clarifai.credentials.ClarifaiCallCredentials;
import com.clarifai.grpc.api.*;
import com.clarifai.grpc.api.status.StatusCode;
import com.google.protobuf.ByteString;
public class ClarifaiExample {
///////////////////////////////////////////////////////////////////////////////////////
// In this section, we set the user authentication, user and app ID, model ID, and
// audio file location. Change these strings to run your own example.
///////////////////////////////////////////////////////////////////////////////////////
//Your PAT (Personal Access Token) can be found in the portal under Authentication
static final String PAT = "YOUR_PAT_HERE";
// Specify the correct user_id/app_id pairings
// Since you're making inferences outside your app's scope
static final String USER_ID = "facebook";
static final String APP_ID = "asr";
// Change these to make your own predictions
static final String MODEL_ID = "asr-wav2vec2-base-960h-english";
static final String AUDIO_FILE_LOCATION = "YOUR_AUDIO_FILE_LOCATION_HERE";
///////////////////////////////////////////////////////////////////////////////////
// YOU DO NOT NEED TO CHANGE ANYTHING BELOW THIS LINE TO RUN THIS EXAMPLE
///////////////////////////////////////////////////////////////////////////////////
public static void main(String[] args) throws IOException {
V2Grpc.V2BlockingStub stub = V2Grpc.newBlockingStub(ClarifaiChannel.INSTANCE.getGrpcChannel())
.withCallCredentials(new ClarifaiCallCredentials(PAT));
MultiOutputResponse postModelOutputsResponse = stub.postModelOutputs(
PostModelOutputsRequest.newBuilder()
.setUserAppId(UserAppIDSet.newBuilder().setUserId(USER_ID).setAppId(APP_ID))
.setModelId(MODEL_ID)
.addInputs(
Input.newBuilder().setData(
Data.newBuilder().setAudio(
Audio.newBuilder().setBase64(ByteString.copyFrom(Files.readAllBytes(
new File(AUDIO_FILE_LOCATION).toPath()
)))
)
)
)
.build()
);
if (postModelOutputsResponse.getStatus().getCode() != StatusCode.SUCCESS) {
System.out.println(postModelOutputsResponse.getStatus());
throw new RuntimeException("Post workflow results failed, status: " + postModelOutputsResponse.getStatus().getDescription());
}
Output output = postModelOutputsResponse.getOutputs(0);
// Print the output
System.out.println(output.getData().getText().getRaw());
}
}
<?php
require __DIR__ . "/vendor/autoload.php";
///////////////////////////////////////////////////////////////////////////////////////
// In this section, we set the user authentication, user and app ID, model ID, and
// audio file location. Change these strings to run your own example.
//////////////////////////////////////////////////////////////////////////////////////
// Your PAT (Personal Access Token) can be found in the Account's Security section
$PAT = "YOUR_PAT_HERE";
// Specify the correct user_id/app_id pairings
// Since you're making inferences outside your app's scope
$USER_ID = "facebook";
$APP_ID = "asr";
// Change these to make your own predictions
$MODEL_ID = "asr-wav2vec2-base-960h-english";
$AUDIO_FILE_LOCATION = "YOUR_AUDIO_FILE_LOCATION_HERE";
///////////////////////////////////////////////////////////////////////////////////
// YOU DO NOT NEED TO CHANGE ANYTHING BELOW THIS LINE TO RUN THIS EXAMPLE
///////////////////////////////////////////////////////////////////////////////////
use Clarifai\Api\Audio;
use Clarifai\ClarifaiClient;
use Clarifai\Api\PostModelOutputsRequest;
use Clarifai\Api\Input;
use Clarifai\Api\Data;
use Clarifai\Api\Status\StatusCode;
use Clarifai\Api\UserAppIDSet;
$client = ClarifaiClient::grpc();
$metadata = ["Authorization" => ["Key " . $PAT]];
$userDataObject = new UserAppIDSet([
"user_id" => $USER_ID,
"app_id" => $APP_ID
]);
$audioData = file_get_contents($AUDIO_FILE_LOCATION); // Get the audio bytes data from the location
// Let's make a RPC call to the Clarifai platform. It uses the opened gRPC client channel to communicate a
// request and then wait for the response
[$response, $status] = $client->PostModelOutputs(
// The request object carries the request along with the request status and other metadata related to the request itself
new PostModelOutputsRequest([
"user_app_id" => $userDataObject,
"model_id" => $MODEL_ID,
"inputs" => [
new Input([
"data" => new Data([
"audio" => new Audio([
"base64" => $audioData
])
])
])
]
]),
$metadata
)->wait();
// A response is returned and the first thing we do is check the status of it
// A successful response will have a status code of 0; otherwise, there is some error
if ($status->code !== 0) {
throw new Exception("Error: {$status->details}");
}
// In addition to the RPC response status, there is a Clarifai API status that reports if the operation was a success or failure
// (not just that the communication was successful)
if ($response->getStatus()->getCode() != StatusCode::SUCCESS) {
print $response->getStatus()->getDetails();
throw new Exception("Failure response: " . $response->getStatus()->getDescription());
}
// We'll get one output for each input we used above. Because of one input, we have here one output
$output = $response->getOutputs()[0];
// Print the output
echo $output->getData()->getText()->getRaw();
?>
curl -X POST "https://api.clarifai.com/v2/users/facebook/apps/asr/models/asr-wav2vec2-base-960h-english/outputs" \
-H "authorization: Key YOUR_PAT_HERE" \
-H "content-type: application/json" \
-d '{
"inputs": [
{
"data": {
"audio": {
"base64": "YOUR_BYTES_STRING_HERE"
}
}
}
]
}'