From 52deff17d017e236a93716c859ba0d539e4cfa5a Mon Sep 17 00:00:00 2001
From: Aydin Abiar <aydin@anyscale.com>
Date: Mon, 12 Jan 2026 14:53:23 -0800
Subject: [PATCH 1/5] link to notebook isntead of README.md

Signed-off-by: Aydin Abiar <aydin@anyscale.com>
---
 doc/source/conf.py            | 1 -
 doc/source/serve/examples.yml | 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/doc/source/conf.py b/doc/source/conf.py
index d1641e665063..ec5e1192a680 100644
--- a/doc/source/conf.py
+++ b/doc/source/conf.py
@@ -242,7 +242,6 @@ def __init__(self, version: str):
     # Other misc files (overviews, console-only examples, etc)
     "ray-overview/examples/llamafactory-llm-fine-tune/README.ipynb",
     "ray-overview/examples/llamafactory-llm-fine-tune/**/*.ipynb",
-    "serve/tutorials/asynchronous-inference/content/asynchronous-inference.ipynb",
     # Legacy/backward compatibility
     "ray-overview/examples/**/README.md",
     "train/examples/**/README.md",
diff --git a/doc/source/serve/examples.yml b/doc/source/serve/examples.yml
index 005794eb3c1c..397bf5ce7a61 100644
--- a/doc/source/serve/examples.yml
+++ b/doc/source/serve/examples.yml
@@ -153,7 +153,7 @@ examples:
     skill_level: advanced
     use_cases:
       - generative ai
-    link: tutorials/asynchronous-inference/content/README
+    link: tutorials/asynchronous-inference/content/asynchronous-inference
     related_technology: integrations
   - title: Integrate with MLflow Model Registry
     skill_level: intermediate

From 7d0eca1efe7890169e59a27abfc08959422a995a Mon Sep 17 00:00:00 2001
From: Aydin Abiar <aydin@anyscale.com>
Date: Mon, 12 Jan 2026 16:03:00 -0800
Subject: [PATCH 2/5] fix lexer issue + nbconvert to sync README and source of
 truth

Signed-off-by: Aydin Abiar <aydin@anyscale.com>
---
 .../asynchronous-inference/content/README.md  | 37 +++++++++++++------
 .../content/asynchronous-inference.ipynb      |  4 +-
 2 files changed, 27 insertions(+), 14 deletions(-)

diff --git a/doc/source/serve/tutorials/asynchronous-inference/content/README.md b/doc/source/serve/tutorials/asynchronous-inference/content/README.md
index fb85c632099b..26e1602b7c0c 100644
--- a/doc/source/serve/tutorials/asynchronous-inference/content/README.md
+++ b/doc/source/serve/tutorials/asynchronous-inference/content/README.md
@@ -1,6 +1,3 @@
----
-orphan: true
----
 # Asynchronous Inference with Ray Serve
 
 **⏱️ Time to complete:** 30 minutes
@@ -57,14 +54,14 @@ Redis serves as both the message broker (task queue) and result backend.
 **Install and start Redis (Google Colab compatible):**
 
 
-```bash
+```python
 # Install and start Redis server
-sudo apt-get update -qq
-sudo apt-get install -y redis-server
-redis-server --port 6399 --save "" --appendonly no --daemonize yes
+!sudo apt-get update -qq
+!sudo apt-get install -y redis-server
+!redis-server --port 6399 --save "" --appendonly no --daemonize yes
 
 # Verify Redis is running
-redis-cli -p 6399 ping
+!redis-cli -p 6399 ping
 ```
 
 **Alternative methods:**
@@ -73,11 +70,16 @@ redis-cli -p 6399 ping
 - **Docker:** `docker run -d -p 6379:6379 redis:latest`
 - **Other platforms:** [Official Redis Installation Guide](https://redis.io/docs/getting-started/installation/)
 
+If you're using a hosted Redis instance, ensure that your Ray Serve cluster can access it. For example, when using AWS ElastiCache for Redis:
+
+- Launch the ElastiCache instance in the same VPC that's attached to your Anyscale cloud.
+- Attach IAM roles with read/write access to ElastiCache to your cluster instances.
+
 ## Step 2: Install Dependencies
 
 
 ```python
-pip install -q ray[serve-async-inference]>=2.50.0 requests>=2.31.0 PyPDF2>=3.0.0 celery[redis]
+!pip install -q ray[serve-async-inference]>=2.50.0 requests>=2.31.0 PyPDF2>=3.0.0 celery[redis]
 ```
 
 ## Step 3: Start the Ray Serve Application
@@ -368,15 +370,28 @@ def get_task_status(task_id: str) -> Dict[str, Any]:
     response.raise_for_status()
     return response.json()
 
+def wait_for_task_completion(task_id: str, timeout: int = 120, poll_interval: float = 2.0) -> Dict[str, Any]:
+    """Poll for task completion with timeout."""
+    start_time = time.time()
+    while time.time() - start_time < timeout:
+        result = get_task_status(task_id)
+        status = result.get("status")
+        if status in ("SUCCESS", "FAILURE"):
+            return result
+        print(f"   ⏳ Status: {status}, waiting...")
+        time.sleep(poll_interval)
+    raise TimeoutError(f"Task {task_id} did not complete within {timeout} seconds")
+
 for i, (task_id, url) in enumerate(task_ids, 1):
         print(f"\nTask {i} ({url.split('/')[-1]}):")
-        result = get_task_status(task_id)
+        result = wait_for_task_completion(task_id)
         res = result.get("result")
         if res:
             print(f"   ✓ Complete: {res.get('page_count')} pages, {res.get('word_count')} words")
             print(f"   ✓ Processing time: {res.get('processing_time_seconds')}s")
         else:
-            print("   ✗ No result payload found in response.")
+            error = result.get("error")
+            print(f"   ✗ Task failed: {error}" if error else "   ✗ No result payload found in response.")
 ```
 
 ## Deploy to Anyscale
diff --git a/doc/source/serve/tutorials/asynchronous-inference/content/asynchronous-inference.ipynb b/doc/source/serve/tutorials/asynchronous-inference/content/asynchronous-inference.ipynb
index 4ff5a163e0d1..8b54a973f8ba 100644
--- a/doc/source/serve/tutorials/asynchronous-inference/content/asynchronous-inference.ipynb
+++ b/doc/source/serve/tutorials/asynchronous-inference/content/asynchronous-inference.ipynb
@@ -129,12 +129,10 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "```note\n",
     "If you're using a hosted Redis instance, ensure that your Ray Serve cluster can access it. For example, when using AWS ElastiCache for Redis:\n",
     "\n",
     "- Launch the ElastiCache instance in the same VPC that's attached to your Anyscale cloud.\n",
-    "- Attach IAM roles with read/write access to ElastiCache to your cluster instances.\n",
-    "```"
+    "- Attach IAM roles with read/write access to ElastiCache to your cluster instances."
    ]
   },
   {

From 3bf835606bc9095864a236c81894090bcda04347 Mon Sep 17 00:00:00 2001
From: Aydin Abiar <aydin@anyscale.com>
Date: Mon, 12 Jan 2026 16:06:57 -0800
Subject: [PATCH 3/5] add ipython3 lexer

Signed-off-by: Aydin Abiar <aydin@anyscale.com>
---
 .../content/asynchronous-inference.ipynb                   | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/doc/source/serve/tutorials/asynchronous-inference/content/asynchronous-inference.ipynb b/doc/source/serve/tutorials/asynchronous-inference/content/asynchronous-inference.ipynb
index 8b54a973f8ba..7f58bca06c0f 100644
--- a/doc/source/serve/tutorials/asynchronous-inference/content/asynchronous-inference.ipynb
+++ b/doc/source/serve/tutorials/asynchronous-inference/content/asynchronous-inference.ipynb
@@ -589,9 +589,7 @@
    "name": "python3"
   },
   "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
+    "pygments_lexer": "ipython3"
    },
    "file_extension": ".py",
    "mimetype": "text/x-python",
@@ -599,8 +597,7 @@
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
    "version": "3.12.12"
-  }
- },
+  },
  "nbformat": 4,
  "nbformat_minor": 0
 }

From d6f0411560d054354b00d2abfff4013d7888570d Mon Sep 17 00:00:00 2001
From: Aydin Abiar <aydin@anyscale.com>
Date: Tue, 13 Jan 2026 10:30:10 -0800
Subject: [PATCH 4/5] fix notebook metadata layout

Signed-off-by: Aydin Abiar <aydin@anyscale.com>
---
 .../content/asynchronous-inference.ipynb               | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/doc/source/serve/tutorials/asynchronous-inference/content/asynchronous-inference.ipynb b/doc/source/serve/tutorials/asynchronous-inference/content/asynchronous-inference.ipynb
index 7f58bca06c0f..2164a388accd 100644
--- a/doc/source/serve/tutorials/asynchronous-inference/content/asynchronous-inference.ipynb
+++ b/doc/source/serve/tutorials/asynchronous-inference/content/asynchronous-inference.ipynb
@@ -590,14 +590,8 @@
   },
   "language_info": {
     "pygments_lexer": "ipython3"
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.12.12"
-  },
+  }
+ },
  "nbformat": 4,
  "nbformat_minor": 0
 }

From ace54fa95664cc1d58b70ffe4b2b5f8fe6e86d31 Mon Sep 17 00:00:00 2001
From: Aydin Abiar <aydin@anyscale.com>
Date: Thu, 15 Jan 2026 10:37:00 -0800
Subject: [PATCH 5/5] fix model registry mlflow example link

Signed-off-by: Aydin Abiar <aydin@anyscale.com>
---
 doc/source/serve/examples.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/serve/examples.yml b/doc/source/serve/examples.yml
index a6935dc0a160..cccd8ff6afcc 100644
--- a/doc/source/serve/examples.yml
+++ b/doc/source/serve/examples.yml
@@ -163,5 +163,5 @@ examples:
     related_technology: ml applications
   - title: Integrate with MLflow Model Registry
     skill_level: intermediate
-    link: mlflow-serving-intig
+    link: model-registries
     related_technology: integrations