This directory contains examples of how to perform model inference using different SDKs and model types. The examples demonstrate both streaming and non-streaming modes, as well as tool calling capabilities.
- Clarifai SDK
- OpenAI Client
- LiteLLM
# Install Clarifai SDK
pip install clarifai
# Install OpenAI SDK
pip install openai
# Install LiteLLM
pip install litellmSet your Clarifai Personal Access Token (PAT) as an environment variable:
export CLARIFAI_PAT=your_pat_hereclarifai_llm.py: Basic inferenceclarifai_llm_stream.py: Streaming inferenceclarifai_llm_tools.py: Tool calling exampleclarifai_llm_async_predict.py: Asynchronous predict exampleclarifai_llm_async_generate.py: Asynchronous generate example
openai_llm.py: Basic inferenceopenai_llm_stream.py: Streaming inferenceopenai_llm_tools.py: Tool calling example
litellm_llm.py: Basic inferencelitellm_llm_stream.py: Streaming inferencelitellm_llm_tools.py: Tool calling example
clarifai_multimodal.py: Basic inferenceclarifai_multimodal_stream.py: Streaming inferenceclarifai_multimodal_tools.py: Tool calling example
openai_multimodal.py: Basic inferenceopenai_multimodal_stream.py: Streaming inferenceopenai_multimodal_tools.py: Tool calling example
litellm_multimodal.py: Basic inferencelitellm_multimodal_stream.py: Streaming inferencelitellm_multimodal_tools.py: Tool calling example
# Using Clarifai SDK
from clarifai.client import Model
model = Model(url="https://clarifai.com/qwen/qwenLM/models/QwQ-32B-AWQ")
response = model.predict("What is the capital of France?")
print(response)
# Using OpenAI Client
from openai import OpenAI
client = OpenAI(
base_url="https://api.clarifai.com/v2/ext/openai/v1",
api_key="YOUR_PAT"
)
response = client.chat.completions.create(
model="CLARIFAI_MODEL_URL",
messages=[{"role": "user", "content": "What is the capital of France?"}]
)
print(response.choices[0].message.content)
# Using LiteLLM
import litellm
response = litellm.completion(
model="openai/CLARIFAI_MODEL_URL",
api_key="YOUR_PAT",
api_base="https://api.clarifai.com/v2/ext/openai/v1",
messages=[{"role": "user", "content": "What is the capital of France?"}]
)
print(response.choices[0].message.content)# Using Clarifai SDK
response_stream = model.generate("Tell me a story")
for chunk in response_stream:
print(chunk, end="", flush=True)
# Using OpenAI Client
stream = client.chat.completions.create(
model="CLARIFAI_MODEL_URL",
messages=[{"role": "user", "content": "Tell me a story"}],
stream=True
)
for chunk in stream:
print(chunk.choices[0].delta.content, end="", flush=True)
# Using LiteLLM
for chunk in litellm.completion(
model="openai/CLARIFAI_MODEL_URL",
api_key="YOUR_PAT",
api_base="https://api.clarifai.com/v2/ext/openai/v1",
messages=[{"role": "user", "content": "Tell me a story"}],
stream=True
):
print(chunk.choices[0].delta.content, end="", flush=True)# Example tool definition
tools = [{
"type": "function",
"function": {
"name": "get_weather",
"description": "Get current temperature for a given location.",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "City and country e.g. Tokyo, Japan"
}
},
"required": ["location"]
}
}
}]
# Using Clarifai SDK
response = model.predict(
prompt="What's the weather in Tokyo?",
tools=tools,
tool_choice='auto'
)
# Using OpenAI Client
response = client.chat.completions.create(
model="CLARIFAI_MODEL_URL",
messages=[{"role": "user", "content": "What's the weather in Tokyo?"}],
tools=tools
)
# Using LiteLLM
response = litellm.completion(
model="openai/CLARIFAI_MODEL_URL",
api_key="YOUR_PAT",
api_base="https://api.clarifai.com/v2/ext/openai/v1",
messages=[{"role": "user", "content": "What's the weather in Tokyo?"}],
tools=tools
)# Example block to call async_predict from notebook cells
from clarifai.client.model import Model
async def main():
model = Model(url="https://clarifai.com/qwen/qwenLM/models/QwQ-32B-AWQ")
response = await model.async_predict(prompt= "what is the value of pi?",
max_tokens=100)
return response
await main()# Example block to call async_generate from notebook cells
from clarifai.client.model import Model
async def main():
model = Model(url="https://clarifai.com/qwen/qwenLM/models/QwQ-32B-AWQ")
response = await model.async_generate(prompt= "what is the value of pi?",
max_tokens=100)
return response
# iterate the response over async generator
response = await main()
for res in response:
print(res)- Always ensure your Clarifai PAT is set in the environment variables
- For multimodal models, provide both text and image inputs as required
- Tool calling support may vary depending on the model capabilities
- Streaming responses are token-by-token and may have different formatting across SDKs
- Error handling and retry logic should be implemented in production environments
