From 52deff17d017e236a93716c859ba0d539e4cfa5a Mon Sep 17 00:00:00 2001 From: Aydin Abiar Date: Mon, 12 Jan 2026 14:53:23 -0800 Subject: [PATCH 1/5] link to notebook isntead of README.md Signed-off-by: Aydin Abiar --- doc/source/conf.py | 1 - doc/source/serve/examples.yml | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/doc/source/conf.py b/doc/source/conf.py index d1641e665063..ec5e1192a680 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -242,7 +242,6 @@ def __init__(self, version: str): # Other misc files (overviews, console-only examples, etc) "ray-overview/examples/llamafactory-llm-fine-tune/README.ipynb", "ray-overview/examples/llamafactory-llm-fine-tune/**/*.ipynb", - "serve/tutorials/asynchronous-inference/content/asynchronous-inference.ipynb", # Legacy/backward compatibility "ray-overview/examples/**/README.md", "train/examples/**/README.md", diff --git a/doc/source/serve/examples.yml b/doc/source/serve/examples.yml index 005794eb3c1c..397bf5ce7a61 100644 --- a/doc/source/serve/examples.yml +++ b/doc/source/serve/examples.yml @@ -153,7 +153,7 @@ examples: skill_level: advanced use_cases: - generative ai - link: tutorials/asynchronous-inference/content/README + link: tutorials/asynchronous-inference/content/asynchronous-inference related_technology: integrations - title: Integrate with MLflow Model Registry skill_level: intermediate From 7d0eca1efe7890169e59a27abfc08959422a995a Mon Sep 17 00:00:00 2001 From: Aydin Abiar Date: Mon, 12 Jan 2026 16:03:00 -0800 Subject: [PATCH 2/5] fix lexer issue + nbconvert to sync README and source of truth Signed-off-by: Aydin Abiar --- .../asynchronous-inference/content/README.md | 37 +++++++++++++------ .../content/asynchronous-inference.ipynb | 4 +- 2 files changed, 27 insertions(+), 14 deletions(-) diff --git a/doc/source/serve/tutorials/asynchronous-inference/content/README.md b/doc/source/serve/tutorials/asynchronous-inference/content/README.md index fb85c632099b..26e1602b7c0c 100644 --- a/doc/source/serve/tutorials/asynchronous-inference/content/README.md +++ b/doc/source/serve/tutorials/asynchronous-inference/content/README.md @@ -1,6 +1,3 @@ ---- -orphan: true ---- # Asynchronous Inference with Ray Serve **⏱️ Time to complete:** 30 minutes @@ -57,14 +54,14 @@ Redis serves as both the message broker (task queue) and result backend. **Install and start Redis (Google Colab compatible):** -```bash +```python # Install and start Redis server -sudo apt-get update -qq -sudo apt-get install -y redis-server -redis-server --port 6399 --save "" --appendonly no --daemonize yes +!sudo apt-get update -qq +!sudo apt-get install -y redis-server +!redis-server --port 6399 --save "" --appendonly no --daemonize yes # Verify Redis is running -redis-cli -p 6399 ping +!redis-cli -p 6399 ping ``` **Alternative methods:** @@ -73,11 +70,16 @@ redis-cli -p 6399 ping - **Docker:** `docker run -d -p 6379:6379 redis:latest` - **Other platforms:** [Official Redis Installation Guide](https://redis.io/docs/getting-started/installation/) +If you're using a hosted Redis instance, ensure that your Ray Serve cluster can access it. For example, when using AWS ElastiCache for Redis: + +- Launch the ElastiCache instance in the same VPC that's attached to your Anyscale cloud. +- Attach IAM roles with read/write access to ElastiCache to your cluster instances. + ## Step 2: Install Dependencies ```python -pip install -q ray[serve-async-inference]>=2.50.0 requests>=2.31.0 PyPDF2>=3.0.0 celery[redis] +!pip install -q ray[serve-async-inference]>=2.50.0 requests>=2.31.0 PyPDF2>=3.0.0 celery[redis] ``` ## Step 3: Start the Ray Serve Application @@ -368,15 +370,28 @@ def get_task_status(task_id: str) -> Dict[str, Any]: response.raise_for_status() return response.json() +def wait_for_task_completion(task_id: str, timeout: int = 120, poll_interval: float = 2.0) -> Dict[str, Any]: + """Poll for task completion with timeout.""" + start_time = time.time() + while time.time() - start_time < timeout: + result = get_task_status(task_id) + status = result.get("status") + if status in ("SUCCESS", "FAILURE"): + return result + print(f" ⏳ Status: {status}, waiting...") + time.sleep(poll_interval) + raise TimeoutError(f"Task {task_id} did not complete within {timeout} seconds") + for i, (task_id, url) in enumerate(task_ids, 1): print(f"\nTask {i} ({url.split('/')[-1]}):") - result = get_task_status(task_id) + result = wait_for_task_completion(task_id) res = result.get("result") if res: print(f" ✓ Complete: {res.get('page_count')} pages, {res.get('word_count')} words") print(f" ✓ Processing time: {res.get('processing_time_seconds')}s") else: - print(" ✗ No result payload found in response.") + error = result.get("error") + print(f" ✗ Task failed: {error}" if error else " ✗ No result payload found in response.") ``` ## Deploy to Anyscale diff --git a/doc/source/serve/tutorials/asynchronous-inference/content/asynchronous-inference.ipynb b/doc/source/serve/tutorials/asynchronous-inference/content/asynchronous-inference.ipynb index 4ff5a163e0d1..8b54a973f8ba 100644 --- a/doc/source/serve/tutorials/asynchronous-inference/content/asynchronous-inference.ipynb +++ b/doc/source/serve/tutorials/asynchronous-inference/content/asynchronous-inference.ipynb @@ -129,12 +129,10 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "```note\n", "If you're using a hosted Redis instance, ensure that your Ray Serve cluster can access it. For example, when using AWS ElastiCache for Redis:\n", "\n", "- Launch the ElastiCache instance in the same VPC that's attached to your Anyscale cloud.\n", - "- Attach IAM roles with read/write access to ElastiCache to your cluster instances.\n", - "```" + "- Attach IAM roles with read/write access to ElastiCache to your cluster instances." ] }, { From 3bf835606bc9095864a236c81894090bcda04347 Mon Sep 17 00:00:00 2001 From: Aydin Abiar Date: Mon, 12 Jan 2026 16:06:57 -0800 Subject: [PATCH 3/5] add ipython3 lexer Signed-off-by: Aydin Abiar --- .../content/asynchronous-inference.ipynb | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/doc/source/serve/tutorials/asynchronous-inference/content/asynchronous-inference.ipynb b/doc/source/serve/tutorials/asynchronous-inference/content/asynchronous-inference.ipynb index 8b54a973f8ba..7f58bca06c0f 100644 --- a/doc/source/serve/tutorials/asynchronous-inference/content/asynchronous-inference.ipynb +++ b/doc/source/serve/tutorials/asynchronous-inference/content/asynchronous-inference.ipynb @@ -589,9 +589,7 @@ "name": "python3" }, "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 + "pygments_lexer": "ipython3" }, "file_extension": ".py", "mimetype": "text/x-python", @@ -599,8 +597,7 @@ "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.12" - } - }, + }, "nbformat": 4, "nbformat_minor": 0 } From d6f0411560d054354b00d2abfff4013d7888570d Mon Sep 17 00:00:00 2001 From: Aydin Abiar Date: Tue, 13 Jan 2026 10:30:10 -0800 Subject: [PATCH 4/5] fix notebook metadata layout Signed-off-by: Aydin Abiar --- .../content/asynchronous-inference.ipynb | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/doc/source/serve/tutorials/asynchronous-inference/content/asynchronous-inference.ipynb b/doc/source/serve/tutorials/asynchronous-inference/content/asynchronous-inference.ipynb index 7f58bca06c0f..2164a388accd 100644 --- a/doc/source/serve/tutorials/asynchronous-inference/content/asynchronous-inference.ipynb +++ b/doc/source/serve/tutorials/asynchronous-inference/content/asynchronous-inference.ipynb @@ -590,14 +590,8 @@ }, "language_info": { "pygments_lexer": "ipython3" - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.12" - }, + } + }, "nbformat": 4, "nbformat_minor": 0 } From ace54fa95664cc1d58b70ffe4b2b5f8fe6e86d31 Mon Sep 17 00:00:00 2001 From: Aydin Abiar Date: Thu, 15 Jan 2026 10:37:00 -0800 Subject: [PATCH 5/5] fix model registry mlflow example link Signed-off-by: Aydin Abiar --- doc/source/serve/examples.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/serve/examples.yml b/doc/source/serve/examples.yml index a6935dc0a160..cccd8ff6afcc 100644 --- a/doc/source/serve/examples.yml +++ b/doc/source/serve/examples.yml @@ -163,5 +163,5 @@ examples: related_technology: ml applications - title: Integrate with MLflow Model Registry skill_level: intermediate - link: mlflow-serving-intig + link: model-registries related_technology: integrations