From d8c99d2b4c28270279bd3a42166a6e86f0a0e2a5 Mon Sep 17 00:00:00 2001
From: Tamir <tamir@datacrunch.io>
Date: Thu, 24 Apr 2025 22:27:52 +0300
Subject: [PATCH 1/4] added example for making an inference call with minimal
 client

---
 CHANGELOG.rst                                 |  3 +++
 docs/source/examples/containers/index.rst     |  7 +++--
 .../examples/containers/inference_async.rst   |  8 ++++++
 .../examples/containers/inference_minimal.rst |  8 ++++++
 docs/source/examples/containers/sglang.rst    |  2 +-
 ...calling_the_endpoint_with_inference_key.py | 27 +++++++++++++++++++
 6 files changed, 52 insertions(+), 3 deletions(-)
 create mode 100644 docs/source/examples/containers/inference_async.rst
 create mode 100644 docs/source/examples/containers/inference_minimal.rst
 create mode 100644 examples/containers/calling_the_endpoint_with_inference_key.py

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index 900c50d..cb6c0ab 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -1,6 +1,9 @@
 Changelog
 =========
 
+* Added example for calling the inference endpoint with a minimal client
+* Added missing doc generation for inference examples
+
 v1.10.0 (2025-04-17)
 -------------------
 
diff --git a/docs/source/examples/containers/index.rst b/docs/source/examples/containers/index.rst
index 2cdfdc7..b87bb9c 100644
--- a/docs/source/examples/containers/index.rst
+++ b/docs/source/examples/containers/index.rst
@@ -7,10 +7,13 @@ This section contains examples demonstrating how to work with containers in Data
    :maxdepth: 1
    :caption: Contents:
 
-   compute_resources
    deployments
+   compute_resources
    environment_variables
    registry_credentials
    secrets
    sglang
-   scaling 
\ No newline at end of file
+   scaling 
+   inference_async
+   inference_sync
+   inference_minimal
\ No newline at end of file
diff --git a/docs/source/examples/containers/inference_async.rst b/docs/source/examples/containers/inference_async.rst
new file mode 100644
index 0000000..c4d479a
--- /dev/null
+++ b/docs/source/examples/containers/inference_async.rst
@@ -0,0 +1,8 @@
+Calling the inference endpoint in async mode
+============================================
+
+This example demonstrates how to call the inference endpoint in async mode.
+
+.. literalinclude:: ../../../../examples/containers/calling_the_inference_endpoint_in_async_mode.py
+   :language: python
+   :caption: Calling the inference endpoint in async mode
\ No newline at end of file
diff --git a/docs/source/examples/containers/inference_minimal.rst b/docs/source/examples/containers/inference_minimal.rst
new file mode 100644
index 0000000..abee42c
--- /dev/null
+++ b/docs/source/examples/containers/inference_minimal.rst
@@ -0,0 +1,8 @@
+Calling the inference endpoint using a minimal client
+=====================================================
+
+This example demonstrates how to call the inference endpoint using a minimal client that only uses only an inference key (no client credentials).
+
+.. literalinclude:: ../../../../examples/containers/calling_the_endpoint_with_inference_key.py
+   :language: python
+   :caption: Calling the inference endpoint using a minimal client
\ No newline at end of file
diff --git a/docs/source/examples/containers/sglang.rst b/docs/source/examples/containers/sglang.rst
index d228a66..c204115 100644
--- a/docs/source/examples/containers/sglang.rst
+++ b/docs/source/examples/containers/sglang.rst
@@ -5,4 +5,4 @@ This example demonstrates how to deploy and manage SGLang applications in DataCr
 
 .. literalinclude:: ../../../../examples/containers/sglang_deployment_example.py
    :language: python
-   :caption: SGLang Deployment 
\ No newline at end of file
+   :caption: SGLang Deployment Example
\ No newline at end of file
diff --git a/examples/containers/calling_the_endpoint_with_inference_key.py b/examples/containers/calling_the_endpoint_with_inference_key.py
new file mode 100644
index 0000000..da7e7e2
--- /dev/null
+++ b/examples/containers/calling_the_endpoint_with_inference_key.py
@@ -0,0 +1,27 @@
+import os
+from datacrunch.InferenceClient import InferenceClient
+
+# Get inference key and endpoint base url from environment variables
+DATACRUNCH_INFERENCE_KEY = os.environ.get('DATACRUNCH_INFERENCE_KEY')
+DATACRUNCH_ENDPOINT_BASE_URL = os.environ.get('DATACRUNCH_ENDPOINT_BASE_URL')
+
+# Create an inference client that uses only the inference key, without client credentials
+inference_client = InferenceClient(
+    inference_key=DATACRUNCH_INFERENCE_KEY,
+    endpoint_base_url=DATACRUNCH_ENDPOINT_BASE_URL
+)
+
+# Make a synchronous request to the endpoint.
+# This example demonstrates calling a SGLang deployment which serves LLMs using an OpenAI-compatible API format
+data = {
+    "model": "deepseek-ai/deepseek-llm-7b-chat",
+    "prompt": "Is consciousness fundamentally computational, or is there something more to subjective experience that cannot be reduced to information processing?",
+    "max_tokens": 128,
+    "temperature": 0.7,
+    "top_p": 0.9
+}
+
+response = inference_client.run_sync(data=data, path='v1/completions')
+
+# Print the response
+print(response.output())

From 37d43eae5319bf12317f634026ac9b86e1f0bb05 Mon Sep 17 00:00:00 2001
From: Tamir <tamir@datacrunch.io>
Date: Fri, 25 Apr 2025 08:09:28 +0300
Subject: [PATCH 2/4] added async minimal client example

---
 .../InferenceClient/inference_client.py       | 36 +++++++++++++++++++
 .../calling_the_endpoint_asynchronously.py    |  5 +--
 .../calling_the_endpoint_synchronously.py     |  5 +--
 ...g_the_endpoint_with_inference_key_async.py | 36 +++++++++++++++++++
 4 files changed, 78 insertions(+), 4 deletions(-)
 create mode 100644 examples/containers/calling_the_endpoint_with_inference_key_async.py

diff --git a/datacrunch/InferenceClient/inference_client.py b/datacrunch/InferenceClient/inference_client.py
index 7835d35..216a057 100644
--- a/datacrunch/InferenceClient/inference_client.py
+++ b/datacrunch/InferenceClient/inference_client.py
@@ -6,16 +6,19 @@
 from urllib.parse import urlparse
 from enum import Enum
 
+
 class InferenceClientError(Exception):
     """Base exception for InferenceClient errors."""
     pass
 
+
 class AsyncStatus(int, Enum):
     Initialized = 0
     Queue = 1
     Inference = 2
     Completed = 3
 
+
 @dataclass_json(undefined=Undefined.EXCLUDE)
 @dataclass
 class InferenceResponse:
@@ -222,6 +225,22 @@ def _make_request(self, method: str, path: str, **kwargs) -> requests.Response:
             raise InferenceClientError(f"Request to {path} failed: {str(e)}")
 
     def run_sync(self, data: Dict[str, Any], path: str = "", timeout_seconds: int = 60 * 5, headers: Optional[Dict[str, str]] = None, http_method: str = "POST", stream: bool = False):
+        """Make a synchronous request to the inference endpoint.
+
+        Args:
+            data: The data payload to send with the request
+            path: API endpoint path. Defaults to empty string.
+            timeout_seconds: Request timeout in seconds. Defaults to 5 minutes.
+            headers: Optional headers to include in the request
+            http_method: HTTP method to use. Defaults to "POST".
+            stream: Whether to stream the response. Defaults to False.
+
+        Returns:
+            InferenceResponse: Object containing the response data.
+
+        Raises:
+            InferenceClientError: If the request fails
+        """
         response = self._make_request(
             http_method, path, json=data, timeout_seconds=timeout_seconds, headers=headers, stream=stream)
 
@@ -233,6 +252,23 @@ def run_sync(self, data: Dict[str, Any], path: str = "", timeout_seconds: int =
         )
 
     def run(self, data: Dict[str, Any], path: str = "", timeout_seconds: int = 60 * 5, headers: Optional[Dict[str, str]] = None, http_method: str = "POST", no_response: bool = False):
+        """Make an asynchronous request to the inference endpoint.
+
+        Args:
+            data: The data payload to send with the request
+            path: API endpoint path. Defaults to empty string.
+            timeout_seconds: Request timeout in seconds. Defaults to 5 minutes.
+            headers: Optional headers to include in the request
+            http_method: HTTP method to use. Defaults to "POST".
+            no_response: If True, don't wait for response. Defaults to False.
+
+        Returns:
+            AsyncInferenceExecution: Object to track the async execution status.
+            If no_response is True, returns None.
+
+        Raises:
+            InferenceClientError: If the request fails
+        """
         # Add relevant headers to the request, to indicate that the request is async
         headers = headers or {}
         if no_response:
diff --git a/examples/containers/calling_the_endpoint_asynchronously.py b/examples/containers/calling_the_endpoint_asynchronously.py
index 7e713f8..27018f6 100644
--- a/examples/containers/calling_the_endpoint_asynchronously.py
+++ b/examples/containers/calling_the_endpoint_asynchronously.py
@@ -4,7 +4,7 @@
 from datacrunch.InferenceClient.inference_client import AsyncStatus
 
 # Configuration - replace with your deployment name
-DEPLOYMENT_NAME = "sglang-deployment-example-20250411-160652"
+DEPLOYMENT_NAME = os.environ.get('DATACRUNCH_DEPLOYMENT_NAME')
 
 # Get client secret and id from environment variables
 DATACRUNCH_CLIENT_ID = os.environ.get('DATACRUNCH_CLIENT_ID')
@@ -12,7 +12,8 @@
 DATACRUNCH_INFERENCE_KEY = os.environ.get('DATACRUNCH_INFERENCE_KEY')
 
 # DataCrunch client instance
-datacrunch = DataCrunchClient(DATACRUNCH_CLIENT_ID, DATACRUNCH_CLIENT_SECRET, inference_key=DATACRUNCH_INFERENCE_KEY)
+datacrunch = DataCrunchClient(
+    DATACRUNCH_CLIENT_ID, DATACRUNCH_CLIENT_SECRET, inference_key=DATACRUNCH_INFERENCE_KEY)
 
 # Get the deployment
 deployment = datacrunch.containers.get_deployment_by_name(DEPLOYMENT_NAME)
diff --git a/examples/containers/calling_the_endpoint_synchronously.py b/examples/containers/calling_the_endpoint_synchronously.py
index c65cca3..72ea5ff 100644
--- a/examples/containers/calling_the_endpoint_synchronously.py
+++ b/examples/containers/calling_the_endpoint_synchronously.py
@@ -2,7 +2,7 @@
 from datacrunch import DataCrunchClient
 
 # Configuration - replace with your deployment name
-DEPLOYMENT_NAME = "sglang-deployment-example-20250411-160652"
+DEPLOYMENT_NAME = os.environ.get('DATACRUNCH_DEPLOYMENT_NAME')
 
 # Get client secret and id from environment variables
 DATACRUNCH_CLIENT_ID = os.environ.get('DATACRUNCH_CLIENT_ID')
@@ -10,7 +10,8 @@
 DATACRUNCH_INFERENCE_KEY = os.environ.get('DATACRUNCH_INFERENCE_KEY')
 
 # DataCrunch client instance
-datacrunch = DataCrunchClient(DATACRUNCH_CLIENT_ID, DATACRUNCH_CLIENT_SECRET, inference_key=DATACRUNCH_INFERENCE_KEY)
+datacrunch = DataCrunchClient(
+    DATACRUNCH_CLIENT_ID, DATACRUNCH_CLIENT_SECRET, inference_key=DATACRUNCH_INFERENCE_KEY)
 
 # Get the deployment
 deployment = datacrunch.containers.get_deployment_by_name(DEPLOYMENT_NAME)
diff --git a/examples/containers/calling_the_endpoint_with_inference_key_async.py b/examples/containers/calling_the_endpoint_with_inference_key_async.py
new file mode 100644
index 0000000..0a385ea
--- /dev/null
+++ b/examples/containers/calling_the_endpoint_with_inference_key_async.py
@@ -0,0 +1,36 @@
+import os
+from time import sleep
+from datacrunch.InferenceClient import InferenceClient
+from datacrunch.InferenceClient.inference_client import AsyncStatus
+
+# Get inference key and endpoint base url from environment variables
+DATACRUNCH_INFERENCE_KEY = os.environ.get('DATACRUNCH_INFERENCE_KEY')
+DATACRUNCH_ENDPOINT_BASE_URL = os.environ.get('DATACRUNCH_ENDPOINT_BASE_URL')
+
+# Create an inference client that uses only the inference key, without client credentials
+inference_client = InferenceClient(
+    inference_key=DATACRUNCH_INFERENCE_KEY,
+    endpoint_base_url=DATACRUNCH_ENDPOINT_BASE_URL
+)
+
+# Make an asynchronous request to the endpoint
+# This example demonstrates calling a SGLang deployment which serves LLMs using an OpenAI-compatible API format
+data = {
+    "model": "deepseek-ai/deepseek-llm-7b-chat",
+    "prompt": "Is consciousness fundamentally computational, or is there something more to subjective experience that cannot be reduced to information processing?",
+    "max_tokens": 128,
+    "temperature": 0.7,
+    "top_p": 0.9
+}
+
+# Run the request asynchronously using the inference client
+async_inference_execution = inference_client.run(
+    data=data, path='v1/completions')
+
+# Poll for status until completion
+while async_inference_execution.status() != AsyncStatus.Completed:
+    print(async_inference_execution.status_json())
+    sleep(1)
+
+# Print the response
+print(async_inference_execution.output())

From 69a726901be16c8082d65002eff06914b5679791 Mon Sep 17 00:00:00 2001
From: Tamir <tamir@datacrunch.io>
Date: Fri, 25 Apr 2025 12:49:21 +0300
Subject: [PATCH 3/4] add doc example generation template

---
 docs/source/examples/containers/inference_minimal.rst     | 2 +-
 .../examples/containers/inference_minimal_async.rst       | 8 ++++++++
 2 files changed, 9 insertions(+), 1 deletion(-)
 create mode 100644 docs/source/examples/containers/inference_minimal_async.rst

diff --git a/docs/source/examples/containers/inference_minimal.rst b/docs/source/examples/containers/inference_minimal.rst
index abee42c..eb6fcc2 100644
--- a/docs/source/examples/containers/inference_minimal.rst
+++ b/docs/source/examples/containers/inference_minimal.rst
@@ -1,7 +1,7 @@
 Calling the inference endpoint using a minimal client
 =====================================================
 
-This example demonstrates how to call the inference endpoint using a minimal client that only uses only an inference key (no client credentials).
+This example demonstrates how to call the inference endpoint using a minimal client that only uses only an inference key (no client credentials needed).
 
 .. literalinclude:: ../../../../examples/containers/calling_the_endpoint_with_inference_key.py
    :language: python
diff --git a/docs/source/examples/containers/inference_minimal_async.rst b/docs/source/examples/containers/inference_minimal_async.rst
new file mode 100644
index 0000000..c5a5231
--- /dev/null
+++ b/docs/source/examples/containers/inference_minimal_async.rst
@@ -0,0 +1,8 @@
+Calling the inference async endpoint using a minimal client
+===========================================================
+
+This example demonstrates how to call the inference async endpoint using a minimal client that only uses only an inference key (no client credentials needed).
+
+.. literalinclude:: ../../../../examples/containers/calling_the_endpoint_with_inference_key_async.py
+   :language: python
+   :caption: Calling the inference async endpoint using a minimal client
\ No newline at end of file

From d493093c5472f554bd23f9db69793ee0827f0b3d Mon Sep 17 00:00:00 2001
From: Tamir <tamir@datacrunch.io>
Date: Fri, 25 Apr 2025 12:59:00 +0300
Subject: [PATCH 4/4] upgrade actions ubuntu version

---
 .github/workflows/code_style.yml | 2 +-
 .github/workflows/unit_tests.yml | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/code_style.yml b/.github/workflows/code_style.yml
index f0995ff..2d36207 100644
--- a/.github/workflows/code_style.yml
+++ b/.github/workflows/code_style.yml
@@ -8,7 +8,7 @@ on: [push, pull_request]
 
 jobs:
   build:
-    runs-on: ubuntu-20.04
+    runs-on: ubuntu-22.04
     strategy:
       matrix:
         python-version: ['3.9', '3.10', '3.11', '3.12', '3.13']
diff --git a/.github/workflows/unit_tests.yml b/.github/workflows/unit_tests.yml
index cdaf609..850e765 100644
--- a/.github/workflows/unit_tests.yml
+++ b/.github/workflows/unit_tests.yml
@@ -8,7 +8,7 @@ on: [push, pull_request]
 
 jobs:
   build:
-    runs-on: ubuntu-20.04
+    runs-on: ubuntu-22.04
     strategy:
       matrix:
         python-version: ['3.9', '3.10', '3.11', '3.12', '3.13']