Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/code_style.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ on: [push, pull_request]

jobs:
build:
runs-on: ubuntu-20.04
runs-on: ubuntu-22.04
strategy:
matrix:
python-version: ['3.9', '3.10', '3.11', '3.12', '3.13']
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/unit_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ on: [push, pull_request]

jobs:
build:
runs-on: ubuntu-20.04
runs-on: ubuntu-22.04
strategy:
matrix:
python-version: ['3.9', '3.10', '3.11', '3.12', '3.13']
Expand Down
3 changes: 3 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
Changelog
=========

* Added example for calling the inference endpoint with a minimal client
* Added missing doc generation for inference examples

v1.10.0 (2025-04-17)
-------------------

Expand Down
36 changes: 36 additions & 0 deletions datacrunch/InferenceClient/inference_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,19 @@
from urllib.parse import urlparse
from enum import Enum


class InferenceClientError(Exception):
"""Base exception for InferenceClient errors."""
pass


class AsyncStatus(int, Enum):
Initialized = 0
Queue = 1
Inference = 2
Completed = 3


@dataclass_json(undefined=Undefined.EXCLUDE)
@dataclass
class InferenceResponse:
Expand Down Expand Up @@ -222,6 +225,22 @@ def _make_request(self, method: str, path: str, **kwargs) -> requests.Response:
raise InferenceClientError(f"Request to {path} failed: {str(e)}")

def run_sync(self, data: Dict[str, Any], path: str = "", timeout_seconds: int = 60 * 5, headers: Optional[Dict[str, str]] = None, http_method: str = "POST", stream: bool = False):
"""Make a synchronous request to the inference endpoint.

Args:
data: The data payload to send with the request
path: API endpoint path. Defaults to empty string.
timeout_seconds: Request timeout in seconds. Defaults to 5 minutes.
headers: Optional headers to include in the request
http_method: HTTP method to use. Defaults to "POST".
stream: Whether to stream the response. Defaults to False.

Returns:
InferenceResponse: Object containing the response data.

Raises:
InferenceClientError: If the request fails
"""
response = self._make_request(
http_method, path, json=data, timeout_seconds=timeout_seconds, headers=headers, stream=stream)

Expand All @@ -233,6 +252,23 @@ def run_sync(self, data: Dict[str, Any], path: str = "", timeout_seconds: int =
)

def run(self, data: Dict[str, Any], path: str = "", timeout_seconds: int = 60 * 5, headers: Optional[Dict[str, str]] = None, http_method: str = "POST", no_response: bool = False):
"""Make an asynchronous request to the inference endpoint.

Args:
data: The data payload to send with the request
path: API endpoint path. Defaults to empty string.
timeout_seconds: Request timeout in seconds. Defaults to 5 minutes.
headers: Optional headers to include in the request
http_method: HTTP method to use. Defaults to "POST".
no_response: If True, don't wait for response. Defaults to False.

Returns:
AsyncInferenceExecution: Object to track the async execution status.
If no_response is True, returns None.

Raises:
InferenceClientError: If the request fails
"""
# Add relevant headers to the request, to indicate that the request is async
headers = headers or {}
if no_response:
Expand Down
7 changes: 5 additions & 2 deletions docs/source/examples/containers/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,13 @@ This section contains examples demonstrating how to work with containers in Data
:maxdepth: 1
:caption: Contents:

compute_resources
deployments
compute_resources
environment_variables
registry_credentials
secrets
sglang
scaling
scaling
inference_async
inference_sync
inference_minimal
8 changes: 8 additions & 0 deletions docs/source/examples/containers/inference_async.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
Calling the inference endpoint in async mode
============================================

This example demonstrates how to call the inference endpoint in async mode.

.. literalinclude:: ../../../../examples/containers/calling_the_inference_endpoint_in_async_mode.py
:language: python
:caption: Calling the inference endpoint in async mode
8 changes: 8 additions & 0 deletions docs/source/examples/containers/inference_minimal.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
Calling the inference endpoint using a minimal client
=====================================================

This example demonstrates how to call the inference endpoint using a minimal client that only uses only an inference key (no client credentials needed).

.. literalinclude:: ../../../../examples/containers/calling_the_endpoint_with_inference_key.py
:language: python
:caption: Calling the inference endpoint using a minimal client
8 changes: 8 additions & 0 deletions docs/source/examples/containers/inference_minimal_async.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
Calling the inference async endpoint using a minimal client
===========================================================

This example demonstrates how to call the inference async endpoint using a minimal client that only uses only an inference key (no client credentials needed).

.. literalinclude:: ../../../../examples/containers/calling_the_endpoint_with_inference_key_async.py
:language: python
:caption: Calling the inference async endpoint using a minimal client
2 changes: 1 addition & 1 deletion docs/source/examples/containers/sglang.rst
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,4 @@ This example demonstrates how to deploy and manage SGLang applications in DataCr

.. literalinclude:: ../../../../examples/containers/sglang_deployment_example.py
:language: python
:caption: SGLang Deployment
:caption: SGLang Deployment Example
5 changes: 3 additions & 2 deletions examples/containers/calling_the_endpoint_asynchronously.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,16 @@
from datacrunch.InferenceClient.inference_client import AsyncStatus

# Configuration - replace with your deployment name
DEPLOYMENT_NAME = "sglang-deployment-example-20250411-160652"
DEPLOYMENT_NAME = os.environ.get('DATACRUNCH_DEPLOYMENT_NAME')

# Get client secret and id from environment variables
DATACRUNCH_CLIENT_ID = os.environ.get('DATACRUNCH_CLIENT_ID')
DATACRUNCH_CLIENT_SECRET = os.environ.get('DATACRUNCH_CLIENT_SECRET')
DATACRUNCH_INFERENCE_KEY = os.environ.get('DATACRUNCH_INFERENCE_KEY')

# DataCrunch client instance
datacrunch = DataCrunchClient(DATACRUNCH_CLIENT_ID, DATACRUNCH_CLIENT_SECRET, inference_key=DATACRUNCH_INFERENCE_KEY)
datacrunch = DataCrunchClient(
DATACRUNCH_CLIENT_ID, DATACRUNCH_CLIENT_SECRET, inference_key=DATACRUNCH_INFERENCE_KEY)

# Get the deployment
deployment = datacrunch.containers.get_deployment_by_name(DEPLOYMENT_NAME)
Expand Down
5 changes: 3 additions & 2 deletions examples/containers/calling_the_endpoint_synchronously.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,16 @@
from datacrunch import DataCrunchClient

# Configuration - replace with your deployment name
DEPLOYMENT_NAME = "sglang-deployment-example-20250411-160652"
DEPLOYMENT_NAME = os.environ.get('DATACRUNCH_DEPLOYMENT_NAME')

# Get client secret and id from environment variables
DATACRUNCH_CLIENT_ID = os.environ.get('DATACRUNCH_CLIENT_ID')
DATACRUNCH_CLIENT_SECRET = os.environ.get('DATACRUNCH_CLIENT_SECRET')
DATACRUNCH_INFERENCE_KEY = os.environ.get('DATACRUNCH_INFERENCE_KEY')

# DataCrunch client instance
datacrunch = DataCrunchClient(DATACRUNCH_CLIENT_ID, DATACRUNCH_CLIENT_SECRET, inference_key=DATACRUNCH_INFERENCE_KEY)
datacrunch = DataCrunchClient(
DATACRUNCH_CLIENT_ID, DATACRUNCH_CLIENT_SECRET, inference_key=DATACRUNCH_INFERENCE_KEY)

# Get the deployment
deployment = datacrunch.containers.get_deployment_by_name(DEPLOYMENT_NAME)
Expand Down
27 changes: 27 additions & 0 deletions examples/containers/calling_the_endpoint_with_inference_key.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import os
from datacrunch.InferenceClient import InferenceClient

# Get inference key and endpoint base url from environment variables
DATACRUNCH_INFERENCE_KEY = os.environ.get('DATACRUNCH_INFERENCE_KEY')
DATACRUNCH_ENDPOINT_BASE_URL = os.environ.get('DATACRUNCH_ENDPOINT_BASE_URL')

# Create an inference client that uses only the inference key, without client credentials
inference_client = InferenceClient(
inference_key=DATACRUNCH_INFERENCE_KEY,
endpoint_base_url=DATACRUNCH_ENDPOINT_BASE_URL
)

# Make a synchronous request to the endpoint.
# This example demonstrates calling a SGLang deployment which serves LLMs using an OpenAI-compatible API format
data = {
"model": "deepseek-ai/deepseek-llm-7b-chat",
"prompt": "Is consciousness fundamentally computational, or is there something more to subjective experience that cannot be reduced to information processing?",
"max_tokens": 128,
"temperature": 0.7,
"top_p": 0.9
}

response = inference_client.run_sync(data=data, path='v1/completions')

# Print the response
print(response.output())
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import os
from time import sleep
from datacrunch.InferenceClient import InferenceClient
from datacrunch.InferenceClient.inference_client import AsyncStatus

# Get inference key and endpoint base url from environment variables
DATACRUNCH_INFERENCE_KEY = os.environ.get('DATACRUNCH_INFERENCE_KEY')
DATACRUNCH_ENDPOINT_BASE_URL = os.environ.get('DATACRUNCH_ENDPOINT_BASE_URL')

# Create an inference client that uses only the inference key, without client credentials
inference_client = InferenceClient(
inference_key=DATACRUNCH_INFERENCE_KEY,
endpoint_base_url=DATACRUNCH_ENDPOINT_BASE_URL
)

# Make an asynchronous request to the endpoint
# This example demonstrates calling a SGLang deployment which serves LLMs using an OpenAI-compatible API format
data = {
"model": "deepseek-ai/deepseek-llm-7b-chat",
"prompt": "Is consciousness fundamentally computational, or is there something more to subjective experience that cannot be reduced to information processing?",
"max_tokens": 128,
"temperature": 0.7,
"top_p": 0.9
}

# Run the request asynchronously using the inference client
async_inference_execution = inference_client.run(
data=data, path='v1/completions')

# Poll for status until completion
while async_inference_execution.status() != AsyncStatus.Completed:
print(async_inference_execution.status_json())
sleep(1)

# Print the response
print(async_inference_execution.output())