From d8c99d2b4c28270279bd3a42166a6e86f0a0e2a5 Mon Sep 17 00:00:00 2001 From: Tamir Date: Thu, 24 Apr 2025 22:27:52 +0300 Subject: [PATCH 1/4] added example for making an inference call with minimal client --- CHANGELOG.rst | 3 +++ docs/source/examples/containers/index.rst | 7 +++-- .../examples/containers/inference_async.rst | 8 ++++++ .../examples/containers/inference_minimal.rst | 8 ++++++ docs/source/examples/containers/sglang.rst | 2 +- ...calling_the_endpoint_with_inference_key.py | 27 +++++++++++++++++++ 6 files changed, 52 insertions(+), 3 deletions(-) create mode 100644 docs/source/examples/containers/inference_async.rst create mode 100644 docs/source/examples/containers/inference_minimal.rst create mode 100644 examples/containers/calling_the_endpoint_with_inference_key.py diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 900c50d..cb6c0ab 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,6 +1,9 @@ Changelog ========= +* Added example for calling the inference endpoint with a minimal client +* Added missing doc generation for inference examples + v1.10.0 (2025-04-17) ------------------- diff --git a/docs/source/examples/containers/index.rst b/docs/source/examples/containers/index.rst index 2cdfdc7..b87bb9c 100644 --- a/docs/source/examples/containers/index.rst +++ b/docs/source/examples/containers/index.rst @@ -7,10 +7,13 @@ This section contains examples demonstrating how to work with containers in Data :maxdepth: 1 :caption: Contents: - compute_resources deployments + compute_resources environment_variables registry_credentials secrets sglang - scaling \ No newline at end of file + scaling + inference_async + inference_sync + inference_minimal \ No newline at end of file diff --git a/docs/source/examples/containers/inference_async.rst b/docs/source/examples/containers/inference_async.rst new file mode 100644 index 0000000..c4d479a --- /dev/null +++ b/docs/source/examples/containers/inference_async.rst @@ -0,0 +1,8 @@ +Calling the inference endpoint in async mode +============================================ + +This example demonstrates how to call the inference endpoint in async mode. + +.. literalinclude:: ../../../../examples/containers/calling_the_inference_endpoint_in_async_mode.py + :language: python + :caption: Calling the inference endpoint in async mode \ No newline at end of file diff --git a/docs/source/examples/containers/inference_minimal.rst b/docs/source/examples/containers/inference_minimal.rst new file mode 100644 index 0000000..abee42c --- /dev/null +++ b/docs/source/examples/containers/inference_minimal.rst @@ -0,0 +1,8 @@ +Calling the inference endpoint using a minimal client +===================================================== + +This example demonstrates how to call the inference endpoint using a minimal client that only uses only an inference key (no client credentials). + +.. literalinclude:: ../../../../examples/containers/calling_the_endpoint_with_inference_key.py + :language: python + :caption: Calling the inference endpoint using a minimal client \ No newline at end of file diff --git a/docs/source/examples/containers/sglang.rst b/docs/source/examples/containers/sglang.rst index d228a66..c204115 100644 --- a/docs/source/examples/containers/sglang.rst +++ b/docs/source/examples/containers/sglang.rst @@ -5,4 +5,4 @@ This example demonstrates how to deploy and manage SGLang applications in DataCr .. literalinclude:: ../../../../examples/containers/sglang_deployment_example.py :language: python - :caption: SGLang Deployment \ No newline at end of file + :caption: SGLang Deployment Example \ No newline at end of file diff --git a/examples/containers/calling_the_endpoint_with_inference_key.py b/examples/containers/calling_the_endpoint_with_inference_key.py new file mode 100644 index 0000000..da7e7e2 --- /dev/null +++ b/examples/containers/calling_the_endpoint_with_inference_key.py @@ -0,0 +1,27 @@ +import os +from datacrunch.InferenceClient import InferenceClient + +# Get inference key and endpoint base url from environment variables +DATACRUNCH_INFERENCE_KEY = os.environ.get('DATACRUNCH_INFERENCE_KEY') +DATACRUNCH_ENDPOINT_BASE_URL = os.environ.get('DATACRUNCH_ENDPOINT_BASE_URL') + +# Create an inference client that uses only the inference key, without client credentials +inference_client = InferenceClient( + inference_key=DATACRUNCH_INFERENCE_KEY, + endpoint_base_url=DATACRUNCH_ENDPOINT_BASE_URL +) + +# Make a synchronous request to the endpoint. +# This example demonstrates calling a SGLang deployment which serves LLMs using an OpenAI-compatible API format +data = { + "model": "deepseek-ai/deepseek-llm-7b-chat", + "prompt": "Is consciousness fundamentally computational, or is there something more to subjective experience that cannot be reduced to information processing?", + "max_tokens": 128, + "temperature": 0.7, + "top_p": 0.9 +} + +response = inference_client.run_sync(data=data, path='v1/completions') + +# Print the response +print(response.output()) From 37d43eae5319bf12317f634026ac9b86e1f0bb05 Mon Sep 17 00:00:00 2001 From: Tamir Date: Fri, 25 Apr 2025 08:09:28 +0300 Subject: [PATCH 2/4] added async minimal client example --- .../InferenceClient/inference_client.py | 36 +++++++++++++++++++ .../calling_the_endpoint_asynchronously.py | 5 +-- .../calling_the_endpoint_synchronously.py | 5 +-- ...g_the_endpoint_with_inference_key_async.py | 36 +++++++++++++++++++ 4 files changed, 78 insertions(+), 4 deletions(-) create mode 100644 examples/containers/calling_the_endpoint_with_inference_key_async.py diff --git a/datacrunch/InferenceClient/inference_client.py b/datacrunch/InferenceClient/inference_client.py index 7835d35..216a057 100644 --- a/datacrunch/InferenceClient/inference_client.py +++ b/datacrunch/InferenceClient/inference_client.py @@ -6,16 +6,19 @@ from urllib.parse import urlparse from enum import Enum + class InferenceClientError(Exception): """Base exception for InferenceClient errors.""" pass + class AsyncStatus(int, Enum): Initialized = 0 Queue = 1 Inference = 2 Completed = 3 + @dataclass_json(undefined=Undefined.EXCLUDE) @dataclass class InferenceResponse: @@ -222,6 +225,22 @@ def _make_request(self, method: str, path: str, **kwargs) -> requests.Response: raise InferenceClientError(f"Request to {path} failed: {str(e)}") def run_sync(self, data: Dict[str, Any], path: str = "", timeout_seconds: int = 60 * 5, headers: Optional[Dict[str, str]] = None, http_method: str = "POST", stream: bool = False): + """Make a synchronous request to the inference endpoint. + + Args: + data: The data payload to send with the request + path: API endpoint path. Defaults to empty string. + timeout_seconds: Request timeout in seconds. Defaults to 5 minutes. + headers: Optional headers to include in the request + http_method: HTTP method to use. Defaults to "POST". + stream: Whether to stream the response. Defaults to False. + + Returns: + InferenceResponse: Object containing the response data. + + Raises: + InferenceClientError: If the request fails + """ response = self._make_request( http_method, path, json=data, timeout_seconds=timeout_seconds, headers=headers, stream=stream) @@ -233,6 +252,23 @@ def run_sync(self, data: Dict[str, Any], path: str = "", timeout_seconds: int = ) def run(self, data: Dict[str, Any], path: str = "", timeout_seconds: int = 60 * 5, headers: Optional[Dict[str, str]] = None, http_method: str = "POST", no_response: bool = False): + """Make an asynchronous request to the inference endpoint. + + Args: + data: The data payload to send with the request + path: API endpoint path. Defaults to empty string. + timeout_seconds: Request timeout in seconds. Defaults to 5 minutes. + headers: Optional headers to include in the request + http_method: HTTP method to use. Defaults to "POST". + no_response: If True, don't wait for response. Defaults to False. + + Returns: + AsyncInferenceExecution: Object to track the async execution status. + If no_response is True, returns None. + + Raises: + InferenceClientError: If the request fails + """ # Add relevant headers to the request, to indicate that the request is async headers = headers or {} if no_response: diff --git a/examples/containers/calling_the_endpoint_asynchronously.py b/examples/containers/calling_the_endpoint_asynchronously.py index 7e713f8..27018f6 100644 --- a/examples/containers/calling_the_endpoint_asynchronously.py +++ b/examples/containers/calling_the_endpoint_asynchronously.py @@ -4,7 +4,7 @@ from datacrunch.InferenceClient.inference_client import AsyncStatus # Configuration - replace with your deployment name -DEPLOYMENT_NAME = "sglang-deployment-example-20250411-160652" +DEPLOYMENT_NAME = os.environ.get('DATACRUNCH_DEPLOYMENT_NAME') # Get client secret and id from environment variables DATACRUNCH_CLIENT_ID = os.environ.get('DATACRUNCH_CLIENT_ID') @@ -12,7 +12,8 @@ DATACRUNCH_INFERENCE_KEY = os.environ.get('DATACRUNCH_INFERENCE_KEY') # DataCrunch client instance -datacrunch = DataCrunchClient(DATACRUNCH_CLIENT_ID, DATACRUNCH_CLIENT_SECRET, inference_key=DATACRUNCH_INFERENCE_KEY) +datacrunch = DataCrunchClient( + DATACRUNCH_CLIENT_ID, DATACRUNCH_CLIENT_SECRET, inference_key=DATACRUNCH_INFERENCE_KEY) # Get the deployment deployment = datacrunch.containers.get_deployment_by_name(DEPLOYMENT_NAME) diff --git a/examples/containers/calling_the_endpoint_synchronously.py b/examples/containers/calling_the_endpoint_synchronously.py index c65cca3..72ea5ff 100644 --- a/examples/containers/calling_the_endpoint_synchronously.py +++ b/examples/containers/calling_the_endpoint_synchronously.py @@ -2,7 +2,7 @@ from datacrunch import DataCrunchClient # Configuration - replace with your deployment name -DEPLOYMENT_NAME = "sglang-deployment-example-20250411-160652" +DEPLOYMENT_NAME = os.environ.get('DATACRUNCH_DEPLOYMENT_NAME') # Get client secret and id from environment variables DATACRUNCH_CLIENT_ID = os.environ.get('DATACRUNCH_CLIENT_ID') @@ -10,7 +10,8 @@ DATACRUNCH_INFERENCE_KEY = os.environ.get('DATACRUNCH_INFERENCE_KEY') # DataCrunch client instance -datacrunch = DataCrunchClient(DATACRUNCH_CLIENT_ID, DATACRUNCH_CLIENT_SECRET, inference_key=DATACRUNCH_INFERENCE_KEY) +datacrunch = DataCrunchClient( + DATACRUNCH_CLIENT_ID, DATACRUNCH_CLIENT_SECRET, inference_key=DATACRUNCH_INFERENCE_KEY) # Get the deployment deployment = datacrunch.containers.get_deployment_by_name(DEPLOYMENT_NAME) diff --git a/examples/containers/calling_the_endpoint_with_inference_key_async.py b/examples/containers/calling_the_endpoint_with_inference_key_async.py new file mode 100644 index 0000000..0a385ea --- /dev/null +++ b/examples/containers/calling_the_endpoint_with_inference_key_async.py @@ -0,0 +1,36 @@ +import os +from time import sleep +from datacrunch.InferenceClient import InferenceClient +from datacrunch.InferenceClient.inference_client import AsyncStatus + +# Get inference key and endpoint base url from environment variables +DATACRUNCH_INFERENCE_KEY = os.environ.get('DATACRUNCH_INFERENCE_KEY') +DATACRUNCH_ENDPOINT_BASE_URL = os.environ.get('DATACRUNCH_ENDPOINT_BASE_URL') + +# Create an inference client that uses only the inference key, without client credentials +inference_client = InferenceClient( + inference_key=DATACRUNCH_INFERENCE_KEY, + endpoint_base_url=DATACRUNCH_ENDPOINT_BASE_URL +) + +# Make an asynchronous request to the endpoint +# This example demonstrates calling a SGLang deployment which serves LLMs using an OpenAI-compatible API format +data = { + "model": "deepseek-ai/deepseek-llm-7b-chat", + "prompt": "Is consciousness fundamentally computational, or is there something more to subjective experience that cannot be reduced to information processing?", + "max_tokens": 128, + "temperature": 0.7, + "top_p": 0.9 +} + +# Run the request asynchronously using the inference client +async_inference_execution = inference_client.run( + data=data, path='v1/completions') + +# Poll for status until completion +while async_inference_execution.status() != AsyncStatus.Completed: + print(async_inference_execution.status_json()) + sleep(1) + +# Print the response +print(async_inference_execution.output()) From 69a726901be16c8082d65002eff06914b5679791 Mon Sep 17 00:00:00 2001 From: Tamir Date: Fri, 25 Apr 2025 12:49:21 +0300 Subject: [PATCH 3/4] add doc example generation template --- docs/source/examples/containers/inference_minimal.rst | 2 +- .../examples/containers/inference_minimal_async.rst | 8 ++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) create mode 100644 docs/source/examples/containers/inference_minimal_async.rst diff --git a/docs/source/examples/containers/inference_minimal.rst b/docs/source/examples/containers/inference_minimal.rst index abee42c..eb6fcc2 100644 --- a/docs/source/examples/containers/inference_minimal.rst +++ b/docs/source/examples/containers/inference_minimal.rst @@ -1,7 +1,7 @@ Calling the inference endpoint using a minimal client ===================================================== -This example demonstrates how to call the inference endpoint using a minimal client that only uses only an inference key (no client credentials). +This example demonstrates how to call the inference endpoint using a minimal client that only uses only an inference key (no client credentials needed). .. literalinclude:: ../../../../examples/containers/calling_the_endpoint_with_inference_key.py :language: python diff --git a/docs/source/examples/containers/inference_minimal_async.rst b/docs/source/examples/containers/inference_minimal_async.rst new file mode 100644 index 0000000..c5a5231 --- /dev/null +++ b/docs/source/examples/containers/inference_minimal_async.rst @@ -0,0 +1,8 @@ +Calling the inference async endpoint using a minimal client +=========================================================== + +This example demonstrates how to call the inference async endpoint using a minimal client that only uses only an inference key (no client credentials needed). + +.. literalinclude:: ../../../../examples/containers/calling_the_endpoint_with_inference_key_async.py + :language: python + :caption: Calling the inference async endpoint using a minimal client \ No newline at end of file From d493093c5472f554bd23f9db69793ee0827f0b3d Mon Sep 17 00:00:00 2001 From: Tamir Date: Fri, 25 Apr 2025 12:59:00 +0300 Subject: [PATCH 4/4] upgrade actions ubuntu version --- .github/workflows/code_style.yml | 2 +- .github/workflows/unit_tests.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/code_style.yml b/.github/workflows/code_style.yml index f0995ff..2d36207 100644 --- a/.github/workflows/code_style.yml +++ b/.github/workflows/code_style.yml @@ -8,7 +8,7 @@ on: [push, pull_request] jobs: build: - runs-on: ubuntu-20.04 + runs-on: ubuntu-22.04 strategy: matrix: python-version: ['3.9', '3.10', '3.11', '3.12', '3.13'] diff --git a/.github/workflows/unit_tests.yml b/.github/workflows/unit_tests.yml index cdaf609..850e765 100644 --- a/.github/workflows/unit_tests.yml +++ b/.github/workflows/unit_tests.yml @@ -8,7 +8,7 @@ on: [push, pull_request] jobs: build: - runs-on: ubuntu-20.04 + runs-on: ubuntu-22.04 strategy: matrix: python-version: ['3.9', '3.10', '3.11', '3.12', '3.13']