From 68574fbcc335946e62c03dc3b6f71dbdf6607a18 Mon Sep 17 00:00:00 2001
From: Shahd Yousef <shahdyousefak@gmail.com>
Date: Sat, 7 Jun 2025 19:19:56 -0400
Subject: [PATCH 01/13] warmup script that sends dummy req via warmup endpoint
 , integrated into imageup

---
 .../content-categoriser/categoriser.py        | 38 ++++++++++++++
 preprocessors/graphic-caption/caption.py      | 38 ++++++++++++++
 preprocessors/text-followup/text-followup.py  | 38 ++++++++++++++
 scripts/imageup                               |  3 ++
 scripts/warmup                                | 51 +++++++++++++++++++
 5 files changed, 168 insertions(+)
 create mode 100755 scripts/warmup

diff --git a/preprocessors/content-categoriser/categoriser.py b/preprocessors/content-categoriser/categoriser.py
index b28237f7c..6ec1ad48f 100644
--- a/preprocessors/content-categoriser/categoriser.py
+++ b/preprocessors/content-categoriser/categoriser.py
@@ -198,5 +198,43 @@ def health():
     }), 200
 
 
+@app.route("/warmup", methods=["GET"])
+def warmup():
+    """
+    Trigger a warmup call to load the Ollama LLM into memory.
+    This avoids first-request latency by sending a dummy request.
+    """
+    try:
+        # construct the target Ollama endpoint for chat
+        api_url = f"{os.environ['OLLAMA_URL']}/chat"
+
+        # authorization headers with API key
+        headers = {
+            "Authorization": f"Bearer {os.environ['OLLAMA_API_KEY']}"
+        }
+
+        # prepare the warmup request data using the configured model
+        data = {
+            "model": os.environ["OLLAMA_MODEL"],
+            "messages": [{"role": "user", "content": "warmup"}],
+            "stream": False
+        }
+
+        logging.info("[WARMUP] Warmup endpoint triggered.")
+        logging.debug(
+            "[Warmup] Posting to %s with model %s", api_url, data["model"]
+        )
+
+        # send warmup request (with timeout)
+        r = requests.post(api_url, headers=headers, json=data, timeout=60)
+        r.raise_for_status()
+
+        return jsonify({"status": "warmed"}), 200
+
+    except Exception as e:
+        logging.exception("[WARMUP] Warmup failed.")
+        return jsonify({"status": "error", "message": str(e)}), 500
+
+
 if __name__ == "__main__":
     app.run(host='0.0.0.0', port=5000, debug=True)
diff --git a/preprocessors/graphic-caption/caption.py b/preprocessors/graphic-caption/caption.py
index 835b62445..169811f61 100644
--- a/preprocessors/graphic-caption/caption.py
+++ b/preprocessors/graphic-caption/caption.py
@@ -173,5 +173,43 @@ def health():
     }), 200
 
 
+@app.route("/warmup", methods=["GET"])
+def warmup():
+    """
+    Trigger a warmup call to load the Ollama LLM into memory.
+    This avoids first-request latency by sending a dummy request.
+    """
+    try:
+        # construct the target Ollama endpoint for chat
+        api_url = f"{os.environ['OLLAMA_URL']}/chat"
+
+        # authorization headers with API key
+        headers = {
+            "Authorization": f"Bearer {os.environ['OLLAMA_API_KEY']}"
+        }
+
+        # prepare the warmup request data using the configured model
+        data = {
+            "model": os.environ["OLLAMA_MODEL"],
+            "messages": [{"role": "user", "content": "warmup"}],
+            "stream": False
+        }
+
+        logging.info("[WARMUP] Warmup endpoint triggered.")
+        logging.debug(
+            "[Warmup] Posting to %s with model %s", api_url, data["model"]
+        )
+
+        # send warmup request (with timeout)
+        r = requests.post(api_url, headers=headers, json=data, timeout=60)
+        r.raise_for_status()
+
+        return jsonify({"status": "warmed"}), 200
+
+    except Exception as e:
+        logging.exception("[WARMUP] Warmup failed.")
+        return jsonify({"status": "error", "message": str(e)}), 500
+
+
 if __name__ == "__main__":
     app.run(host='0.0.0.0', port=5000, debug=True)
diff --git a/preprocessors/text-followup/text-followup.py b/preprocessors/text-followup/text-followup.py
index 2e674557e..4d6cb49c8 100644
--- a/preprocessors/text-followup/text-followup.py
+++ b/preprocessors/text-followup/text-followup.py
@@ -401,5 +401,43 @@ def health():
     }), 200
 
 
+@app.route("/warmup", methods=["GET"])
+def warmup():
+    """
+    Trigger a warmup call to load the Ollama LLM into memory.
+    This avoids first-request latency by sending a dummy request.
+    """
+    try:
+        # construct the target Ollama endpoint for chat
+        api_url = f"{os.environ['OLLAMA_URL']}/chat"
+
+        # authorization headers with API key
+        headers = {
+            "Authorization": f"Bearer {os.environ['OLLAMA_API_KEY']}"
+        }
+
+        # prepare the warmup request data using the configured model
+        data = {
+            "model": os.environ["OLLAMA_MODEL"],
+            "messages": [{"role": "user", "content": "warmup"}],
+            "stream": False
+        }
+
+        logging.info("[WARMUP] Warmup endpoint triggered.")
+        logging.debug(
+            "[Warmup] Posting to %s with model %s", api_url, data["model"]
+        )
+
+        # send warmup request (with timeout)
+        r = requests.post(api_url, headers=headers, json=data, timeout=60)
+        r.raise_for_status()
+
+        return jsonify({"status": "warmed"}), 200
+
+    except Exception as e:
+        logging.exception("[WARMUP] Warmup failed.")
+        return jsonify({"status": "error", "message": str(e)}), 500
+
+
 if __name__ == "__main__":
     app.run(host='0.0.0.0', port=5000, debug=True)
diff --git a/scripts/imageup b/scripts/imageup
index b2bf80156..dd5927ce0 100755
--- a/scripts/imageup
+++ b/scripts/imageup
@@ -104,4 +104,7 @@ docker network rm -f image || true
 # Bring all the containers back up
 docker compose --env-file "$COMPOSE_ENV_FILE" up -d --force-recreate
 
+echo "Running warmup for critical services..."
+"${SCRIPT_DIR}/warmup"
+
 cd -
\ No newline at end of file
diff --git a/scripts/warmup b/scripts/warmup
new file mode 100755
index 000000000..5b60f5191
--- /dev/null
+++ b/scripts/warmup
@@ -0,0 +1,51 @@
+#!/bin/bash
+
+# Locate this script's directory
+SCRIPT_DIR="$(dirname "$(realpath "$0")")"
+
+# Load env file
+ENV_FILE="$SCRIPT_DIR/../config/warmup.env"
+if [ -f "$ENV_FILE" ]; then
+  source "$ENV_FILE"
+else
+  echo "Missing $ENV_FILE" >&2
+  exit 1
+fi
+
+# Setup log file (with date)
+timestamp=$(date +"%Y%m%d_%H%M%S")
+logfile="${WARMUP_LOG_DIR}/warmup_${timestamp}.log"
+mkdir -p "$WARMUP_LOG_DIR"
+
+echo "[Warmup] $(date) Starting warmup..." | tee -a "$logfile"
+
+# Wait for health and warm up
+for container in "${!WARMUP_TARGETS[@]}"; do
+  endpoint="${WARMUP_TARGETS[$container]}"
+
+  if ! docker inspect "$container" &>/dev/null; then
+    echo "[Warmup] Container $container not found. Skipping." | tee -a "$logfile"
+    continue
+  fi
+
+  echo "[Warmup] Waiting for $container to be healthy..." | tee -a "$logfile"
+  until [[ "$(docker inspect -f '{{.State.Health.Status}}' "$container")" == "healthy" ]]; do
+    sleep 5
+  done
+
+  echo "[Warmup] $container marked healthy. Waiting 10s before hitting warmup..." | tee -a "$logfile"
+  # Wait briefly after container is marked healthy to ensure internal models are fully initialized before warmup.
+  # prevents race conditions where healthcheck passes but model isnt ready
+  sleep 10
+
+  echo "[Warmup] Hitting warmup endpoint on $container..." | tee -a "$logfile"
+  resp=$(docker exec "$container" curl -s -w "%{http_code}" -o /tmp/warmup_resp.txt "$endpoint")
+  if [[ "$resp" == "200" ]]; then
+    echo "[Warmup] $container warmed successfully." | tee -a "$logfile"
+  else
+    echo "[Warmup] $container warmup failed with HTTP $resp. Response was:" | tee -a "$logfile"
+    docker exec "$container" cat /tmp/warmup_resp.txt | tee -a "$logfile" || echo "[Warmup] (no response body)" | tee -a "$logfile"
+  fi
+done
+
+echo "[Warmup] Completed at $(date)! " | tee -a "$logfile"
\ No newline at end of file

From 6e5b26c7dba9de5b8177ed9afd41981ffee72cdc Mon Sep 17 00:00:00 2001
From: Shahd Yousef <shahdyousefak@gmail.com>
Date: Sat, 7 Jun 2025 19:41:46 -0400
Subject: [PATCH 02/13] text2speech warmup endpoints

---
 services/espnet-tts-fr/src/app.py | 16 ++++++++++++++++
 services/espnet-tts/src/app.py    | 16 ++++++++++++++++
 2 files changed, 32 insertions(+)

diff --git a/services/espnet-tts-fr/src/app.py b/services/espnet-tts-fr/src/app.py
index e54ca902c..b5e4a60e4 100644
--- a/services/espnet-tts-fr/src/app.py
+++ b/services/espnet-tts-fr/src/app.py
@@ -172,6 +172,22 @@ def segment_tts():
         empty_cache()
 
 
+@app.route("/warmup", methods=["GET"])
+def warmup():
+    """
+    Trigger a dummy call to warm up the model and pre-load it into GPU memory.
+    to reduce first-request latency by avoiding model load.
+    """
+    try:
+        logger.info("[WARMUP] Warmup endpoint triggered.")
+        # Run inference on a short dummy input
+        _ = tts("warmup")
+        return jsonify({"status": "warmed"}), 200
+    except Exception as e:
+        logger.exception("[WARMUP] Warmup failed.")
+        return jsonify({"status": "error", "message": str(e)}), 500
+
+
 @app.route("/health", methods=["GET"])
 def health():
     """
diff --git a/services/espnet-tts/src/app.py b/services/espnet-tts/src/app.py
index a825776d5..347922560 100644
--- a/services/espnet-tts/src/app.py
+++ b/services/espnet-tts/src/app.py
@@ -115,6 +115,22 @@ def segment_tts():
         empty_cache()
 
 
+@app.route("/warmup", methods=["GET"])
+def warmup():
+    """
+    Trigger a dummy call to warm up the model and pre-load it into GPU memory.
+    to reduce first-request latency by avoiding model load.
+    """
+    try:
+        logger.info("[WARMUP] Warmup endpoint triggered.")
+        # Run inference on a short dummy input
+        _ = tts("warmup")
+        return jsonify({"status": "warmed"}), 200
+    except Exception as e:
+        logger.exception("[WARMUP] Warmup failed.")
+        return jsonify({"status": "error", "message": str(e)}), 500
+
+
 @app.route("/health", methods=["GET"])
 def health():
     """

From ae67f1755968df5ce08da945bc914f80a205bb5f Mon Sep 17 00:00:00 2001
From: Shahd Yousef <shahdyousefak@gmail.com>
Date: Sun, 8 Jun 2025 00:21:39 -0400
Subject: [PATCH 03/13] semantic segmentation warmup endpoint

---
 preprocessors/mmsemseg/segment.py | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/preprocessors/mmsemseg/segment.py b/preprocessors/mmsemseg/segment.py
index 1e5c86236..c457f399f 100644
--- a/preprocessors/mmsemseg/segment.py
+++ b/preprocessors/mmsemseg/segment.py
@@ -320,5 +320,33 @@ def gpu_driver_health_check():
         }), 500
 
 
+@app.route("/warmup", methods=["GET"])
+def warmup():
+    """
+    Warms up the segmentation model by running a dummy inference.
+    """
+    try:
+        # dummy black image (512×512)
+        dummy_img = np.zeros((512, 512, 3), dtype=np.uint8)
+
+        # runs inference_segmentor(): model weight loading/memory allocation
+        model = init_segmentor(BEIT_CONFIG, BEIT_CHECKPOINT, device='cuda:0')
+        _ = inference_segmentor(model, dummy_img)
+
+        torch.cuda.empty_cache()
+
+        return jsonify({
+            "status": "warmup successful",
+            "timestamp": datetime.now().isoformat()
+        }), 200
+
+    except Exception as e:
+        logging.exception("Warmup failed")
+        return jsonify({
+            "status": "warmup failed",
+            "message": str(e)
+        }), 500
+
+
 if __name__ == "__main__":
     app.run(host='0.0.0.0', port=5000, debug=True)

From ad73553272c2808fcebf9dbd5e31763696ba744a Mon Sep 17 00:00:00 2001
From: Shahd Yousef <shahdyousefak@gmail.com>
Date: Sun, 8 Jun 2025 00:31:54 -0400
Subject: [PATCH 04/13] depth-map-gen warmup endpoint

---
 .../depth-map-gen/depth-map-generator.py      | 22 +++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/preprocessors/depth-map-gen/depth-map-generator.py b/preprocessors/depth-map-gen/depth-map-generator.py
index eff12c28d..cc88835e3 100644
--- a/preprocessors/depth-map-gen/depth-map-generator.py
+++ b/preprocessors/depth-map-gen/depth-map-generator.py
@@ -223,6 +223,28 @@ def health():
     }), 200
 
 
+@app.route("/warmup", methods=["GET"])
+def warmup():
+    try:
+        model = RelDepthModel(backbone='resnext101').eval().cuda()
+        model.load_state_dict(
+            strip_prefix_if_present(
+                torch.load("/app/res101.pth")['depth_model'], "module."),
+            strict=True
+        )
+
+        # simulating a single RGB image input to the model
+        # 1: one image; 3: RGB; 448 and 448: height and width
+        dummy = torch.ones((1, 3, 448, 448), dtype=torch.float32).cuda()
+        _ = model.inference(dummy)
+        return jsonify({"status": "warmed"}), 200
+
+    except Exception as e:
+        logging.error("Warmup failed")
+        logging.pii(f"Warmup error: {e}")
+        return jsonify({"status": "warmup failed"}), 500
+
+
 if __name__ == '__main__':
     app.run(host='0.0.0.0', port=5000, debug=True)
     depthgenerator()

From 669697f4e88f0617b83faf9bd84e186c95108cbd Mon Sep 17 00:00:00 2001
From: Shahd Yousef <shahdyousefak@gmail.com>
Date: Sun, 8 Jun 2025 00:41:59 -0400
Subject: [PATCH 05/13] object-detection warmup endpoint

---
 preprocessors/yolo/detect.py | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/preprocessors/yolo/detect.py b/preprocessors/yolo/detect.py
index 0c83b8470..a387bed00 100644
--- a/preprocessors/yolo/detect.py
+++ b/preprocessors/yolo/detect.py
@@ -239,5 +239,29 @@ def health():
     }), 200
 
 
+@app.route("/warmup", methods=["GET"])
+def warmup():
+    try:
+        # create a blank dummy image (640x640)
+        dummy_image = Image.new("RGB", (640, 640), color=(0, 0, 0))
+
+        # Run YOLO inference with dummy image
+        with torch.no_grad():
+            _ = model.predict(
+                dummy_image,
+                device=device,
+                conf=CONF_THRESHOLD,
+                imgsz=MAX_IMAGE_SIZE,
+                verbose=False
+            )
+
+        logging.info("YOLO warmup completed successfully")
+        return jsonify({"status": "ok"}), 200
+    except Exception as e:
+        logging.error(f"YOLO warmup failed: {str(e)}")
+        logging.pii(traceback.format_exc())
+        return jsonify({"status": "error", "error": str(e)}), 500
+
+
 if __name__ == "__main__":
     app.run(debug=True)

From d6ab4d27b2ba1fdb99b9f8d8f1bb095f9ee9e3ae Mon Sep 17 00:00:00 2001
From: Shahd Yousef <shahdyousefak@gmail.com>
Date: Sun, 8 Jun 2025 20:59:02 -0400
Subject: [PATCH 06/13] adding pii

---
 .../content-categoriser/categoriser.py        |  7 +--
 .../depth-map-gen/depth-map-generator.py      |  2 +-
 preprocessors/graphic-caption/caption.py      |  9 ++--
 preprocessors/mmsemseg/segment.py             |  1 +
 .../multistage-diagram-segmentation.py        | 43 +++++++++++++++++++
 preprocessors/text-followup/text-followup.py  |  7 +--
 6 files changed, 58 insertions(+), 11 deletions(-)

diff --git a/preprocessors/content-categoriser/categoriser.py b/preprocessors/content-categoriser/categoriser.py
index 6ec1ad48f..f05626a4f 100644
--- a/preprocessors/content-categoriser/categoriser.py
+++ b/preprocessors/content-categoriser/categoriser.py
@@ -221,8 +221,8 @@ def warmup():
         }
 
         logging.info("[WARMUP] Warmup endpoint triggered.")
-        logging.debug(
-            "[Warmup] Posting to %s with model %s", api_url, data["model"]
+        logging.pii(
+            f"[WARMUP] Posting to {api_url} with model {data['model']}"
         )
 
         # send warmup request (with timeout)
@@ -232,7 +232,8 @@ def warmup():
         return jsonify({"status": "warmed"}), 200
 
     except Exception as e:
-        logging.exception("[WARMUP] Warmup failed.")
+        logging.pii(f"[WARMUP] Warmup failed: {str(e)}")
+        logging.exception("[WARMUP] Exception details:")
         return jsonify({"status": "error", "message": str(e)}), 500
 
 
diff --git a/preprocessors/depth-map-gen/depth-map-generator.py b/preprocessors/depth-map-gen/depth-map-generator.py
index cc88835e3..8913475a1 100644
--- a/preprocessors/depth-map-gen/depth-map-generator.py
+++ b/preprocessors/depth-map-gen/depth-map-generator.py
@@ -36,7 +36,6 @@
 configure_logging()
 
 app = Flask(__name__)
-logging.basicConfig(level=logging.DEBUG)
 
 
 def parse_args():
@@ -226,6 +225,7 @@ def health():
 @app.route("/warmup", methods=["GET"])
 def warmup():
     try:
+        logging.pii("[WARMUP] Initializing RelDepthModel with resnext101 backbone and loading weights from /app/res101.pth")
         model = RelDepthModel(backbone='resnext101').eval().cuda()
         model.load_state_dict(
             strip_prefix_if_present(
diff --git a/preprocessors/graphic-caption/caption.py b/preprocessors/graphic-caption/caption.py
index 169811f61..bd1fad13b 100644
--- a/preprocessors/graphic-caption/caption.py
+++ b/preprocessors/graphic-caption/caption.py
@@ -28,7 +28,6 @@
 configure_logging()
 
 app = Flask(__name__)
-logging.basicConfig(level=logging.DEBUG)
 
 PROMPT = """Describe this image to a person who cannot see it.
     Use simple, descriptive, clear, and concise language.
@@ -196,8 +195,9 @@ def warmup():
         }
 
         logging.info("[WARMUP] Warmup endpoint triggered.")
-        logging.debug(
-            "[Warmup] Posting to %s with model %s", api_url, data["model"]
+        logging.pii(
+            f"[WARMUP] Sending warmup request to {api_url} with model: \
+                {data['model']}"
         )
 
         # send warmup request (with timeout)
@@ -207,7 +207,8 @@ def warmup():
         return jsonify({"status": "warmed"}), 200
 
     except Exception as e:
-        logging.exception("[WARMUP] Warmup failed.")
+        logging.pii(f"[WARMUP] Warmup failed: {e}")
+        logging.exception("[WARMUP] Exception details:")
         return jsonify({"status": "error", "message": str(e)}), 500
 
 
diff --git a/preprocessors/mmsemseg/segment.py b/preprocessors/mmsemseg/segment.py
index c457f399f..26163a266 100644
--- a/preprocessors/mmsemseg/segment.py
+++ b/preprocessors/mmsemseg/segment.py
@@ -341,6 +341,7 @@ def warmup():
         }), 200
 
     except Exception as e:
+        logging.pii(f"[WARMUP] Warmup failed: {e}")
         logging.exception("Warmup failed")
         return jsonify({
             "status": "warmup failed",
diff --git a/preprocessors/multistage-diagram-segmentation/multistage-diagram-segmentation.py b/preprocessors/multistage-diagram-segmentation/multistage-diagram-segmentation.py
index 8b1870363..49d1c8945 100644
--- a/preprocessors/multistage-diagram-segmentation/multistage-diagram-segmentation.py
+++ b/preprocessors/multistage-diagram-segmentation/multistage-diagram-segmentation.py
@@ -819,5 +819,48 @@ def health():
     }), 200
 
 
+@app.route("/warmup", methods=["GET"])
+def warmup():
+    """
+    Warms up Gemini & SAM
+    by running dummy inferences to load models into memory.
+    """
+    try:
+        logging.info("Starting warmup routine...")
+
+        # 1. warm up Gemini by sending a dummy prompt with a blank image
+        dummy_img = Image.new("RGB", (512, 512), color="white")
+        response = client.models.generate_content(
+            model=GEMINI_MODEL,
+            contents=["Return an empty JSON object.", dummy_img],
+            config=types.GenerateContentConfig(
+                temperature=0.1,
+                safety_settings=safety_settings,
+                response_mime_type='application/json',
+                response_schema=BASE_SCHEMA_GEMINI,
+            )
+        )
+        _ = validate_gemini_response(response)
+
+        # 2. warm up SAM with a dummy box
+        dummy_cv2 = np.zeros((512, 512, 3), dtype=np.uint8)
+        dummy_pil = Image.fromarray(dummy_cv2)
+        dummy_bbox = [[100, 100, 200, 200]]  # [x1, y1, x2, y2]
+        _ = sam_model(dummy_pil, bboxes=dummy_bbox)
+
+        logging.info("Warmup completed successfully.")
+        return jsonify({
+            "status": "warmup successful",
+            "timestamp": datetime.now().isoformat()
+        }), 200
+
+    except Exception as e:
+        logging.exception("Warmup failed")
+        return jsonify({
+            "status": "warmup failed",
+            "message": str(e)
+        }), 500
+
+
 if __name__ == "__main__":
     app.run(host='0.0.0.0', port=5000, debug=True)
diff --git a/preprocessors/text-followup/text-followup.py b/preprocessors/text-followup/text-followup.py
index 4d6cb49c8..1aefd135e 100644
--- a/preprocessors/text-followup/text-followup.py
+++ b/preprocessors/text-followup/text-followup.py
@@ -424,8 +424,8 @@ def warmup():
         }
 
         logging.info("[WARMUP] Warmup endpoint triggered.")
-        logging.debug(
-            "[Warmup] Posting to %s with model %s", api_url, data["model"]
+        logging.pii(
+            f"[WARMUP] Posting to {api_url} with model {data['model']}"
         )
 
         # send warmup request (with timeout)
@@ -435,7 +435,8 @@ def warmup():
         return jsonify({"status": "warmed"}), 200
 
     except Exception as e:
-        logging.exception("[WARMUP] Warmup failed.")
+        logging.pii(f"[WARMUP] Warmup failed: {str(e)}")
+        logging.exception("[WARMUP] Exception details:")
         return jsonify({"status": "error", "message": str(e)}), 500
 
 

From 3ed541c12e1ced6d1693beefd6b97111a8ab5d8b Mon Sep 17 00:00:00 2001
From: Shahd Yousef <shahdyousefak@gmail.com>
Date: Sun, 8 Jun 2025 21:01:19 -0400
Subject: [PATCH 07/13] added warmup endpoint to
 preprocessors/multistage-diagram-segmentation/multistage-diagram-segmentation.py
 with pii

---
 .../multistage-diagram-segmentation.py        | 29 +++++--------------
 1 file changed, 8 insertions(+), 21 deletions(-)

diff --git a/preprocessors/multistage-diagram-segmentation/multistage-diagram-segmentation.py b/preprocessors/multistage-diagram-segmentation/multistage-diagram-segmentation.py
index 49d1c8945..9e94a6c4e 100644
--- a/preprocessors/multistage-diagram-segmentation/multistage-diagram-segmentation.py
+++ b/preprocessors/multistage-diagram-segmentation/multistage-diagram-segmentation.py
@@ -821,18 +821,14 @@ def health():
 
 @app.route("/warmup", methods=["GET"])
 def warmup():
-    """
-    Warms up Gemini & SAM
-    by running dummy inferences to load models into memory.
-    """
     try:
-        logging.info("Starting warmup routine...")
+        logging.info("Warming up Gemini and SAM...")
 
-        # 1. warm up Gemini by sending a dummy prompt with a blank image
+        # Gemini: dummy image + prompt
         dummy_img = Image.new("RGB", (512, 512), color="white")
         response = client.models.generate_content(
             model=GEMINI_MODEL,
-            contents=["Return an empty JSON object.", dummy_img],
+            contents=["{}", dummy_img],
             config=types.GenerateContentConfig(
                 temperature=0.1,
                 safety_settings=safety_settings,
@@ -842,24 +838,15 @@ def warmup():
         )
         _ = validate_gemini_response(response)
 
-        # 2. warm up SAM with a dummy box
+        # SAM: dummy box
         dummy_cv2 = np.zeros((512, 512, 3), dtype=np.uint8)
         dummy_pil = Image.fromarray(dummy_cv2)
-        dummy_bbox = [[100, 100, 200, 200]]  # [x1, y1, x2, y2]
-        _ = sam_model(dummy_pil, bboxes=dummy_bbox)
-
-        logging.info("Warmup completed successfully.")
-        return jsonify({
-            "status": "warmup successful",
-            "timestamp": datetime.now().isoformat()
-        }), 200
+        _ = sam_model(dummy_pil, bboxes=[[100, 100, 200, 200]])
 
+        return jsonify({"status": "ok"}), 200
     except Exception as e:
-        logging.exception("Warmup failed")
-        return jsonify({
-            "status": "warmup failed",
-            "message": str(e)
-        }), 500
+        logging.pii(f"Warmup failed: {str(e)}")
+        return jsonify({"status": "error", "message": str(e)}), 500
 
 
 if __name__ == "__main__":

From afdd70802f0791c75fea3b95368a17e882686c84 Mon Sep 17 00:00:00 2001
From: Shahd Yousef <shahdyousefak@gmail.com>
Date: Sun, 8 Jun 2025 21:02:53 -0400
Subject: [PATCH 08/13] pep8

---
 preprocessors/depth-map-gen/depth-map-generator.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/preprocessors/depth-map-gen/depth-map-generator.py b/preprocessors/depth-map-gen/depth-map-generator.py
index 8913475a1..5424523d1 100644
--- a/preprocessors/depth-map-gen/depth-map-generator.py
+++ b/preprocessors/depth-map-gen/depth-map-generator.py
@@ -225,7 +225,8 @@ def health():
 @app.route("/warmup", methods=["GET"])
 def warmup():
     try:
-        logging.pii("[WARMUP] Initializing RelDepthModel with resnext101 backbone and loading weights from /app/res101.pth")
+        logging.pii("[WARMUP] Initializing RelDepthModel with resnext101 \
+                    and loading weights from /app/res101.pth")
         model = RelDepthModel(backbone='resnext101').eval().cuda()
         model.load_state_dict(
             strip_prefix_if_present(

From 7c5c76252453920eff41041d521ee0afc129ba4c Mon Sep 17 00:00:00 2001
From: Shahd Yousef <shahdyousefak@gmail.com>
Date: Mon, 16 Jun 2025 13:12:21 -0400
Subject: [PATCH 09/13] flagging services with WARMUP_ENABLED=true to hit
 warmup endpoint, modified the warmup script to read accorrdingly

---
 docker-compose.yml | 11 +++++++++++
 scripts/warmup     | 40 ++++++++++++++++++++++------------------
 2 files changed, 33 insertions(+), 18 deletions(-)

diff --git a/docker-compose.yml b/docker-compose.yml
index 02c2fdab7..231ce87ba 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -44,6 +44,7 @@ services:
     restart: "no"
     environment:
       - TORCH_DEVICE=cuda
+      - WARMUP_ENABLED=true
     labels:
       ca.mcgill.a11y.image.cacheTimeout: 3600
     deploy:
@@ -59,6 +60,7 @@ services:
     restart: "no"
     environment:
       - TORCH_DEVICE=cuda
+      - WARMUP_ENABLED=true
     labels:
       ca.mcgill.a11y.image.cacheTimeout: 3600
     deploy:
@@ -80,6 +82,8 @@ services:
           devices:
             - driver: nvidia
               capabilities: ["gpu", "compute", "utility"]
+    environment:
+      - WARMUP_ENABLED=true
 
   autour-preprocessor:
     profiles: [production, test, default]
@@ -110,6 +114,7 @@ services:
       ./config/ollama.env
     environment:
       - PII_LOGGING_ENABLED=${PII_LOGGING_ENABLED}
+      - WARMUP_ENABLED=true
 
   graphic-caption:
     profiles: [production, test, default]
@@ -125,6 +130,7 @@ services:
       ./config/ollama.env
     environment:
       - PII_LOGGING_ENABLED=${PII_LOGGING_ENABLED}
+      - WARMUP_ENABLED=true
 
   text-followup:
     profiles: [production, test, default]
@@ -134,6 +140,7 @@ services:
       - MAX_HISTORY_LENGTH=100
       - HISTORY_EXPIRY=3600
       - PII_LOGGING_ENABLED=${PII_LOGGING_ENABLED}
+      - WARMUP_ENABLED=true
     labels:
       ca.mcgill.a11y.image.preprocessor: 1
       ca.mcgill.a11y.image.port: 5000
@@ -179,6 +186,7 @@ services:
       - PII_LOGGING_ENABLED=${PII_LOGGING_ENABLED}
       - YOLO_MODEL_PATH=/usr/src/app/models/yolo11x.pt
       - CONF_THRESHOLD=0.75
+      - WARMUP_ENABLED=true
 
   multistage-diagram-segmentation:
     profiles: [production, test, default]
@@ -201,6 +209,7 @@ services:
       - SAM_MODEL_PATH=/usr/src/app/models/sam2.1_l.pt
       - GEMINI_MODEL=gemini-2.5-pro-preview-06-05
       - BASE_SCHEMA=/usr/src/app/base_schema.json
+      - WARMUP_ENABLED=true
     env_file:
       ./config/gemini.env   
 
@@ -263,6 +272,7 @@ services:
       ca.mcgill.a11y.image.optional_dependencies: "content-categoriser,graphic-tagger"
     environment:
       - PII_LOGGING_ENABLED=${PII_LOGGING_ENABLED}
+      - WARMUP_ENABLED=true
   
   supercollider:
     profiles: [production, test, default]
@@ -376,6 +386,7 @@ services:
         ca.mcgill.a11y.image.optional_dependencies: ""
     environment:
       - PII_LOGGING_ENABLED=${PII_LOGGING_ENABLED}
+      - WARMUP_ENABLED=true
 
   svg-depth-map:
     profiles: [production, test, default]
diff --git a/scripts/warmup b/scripts/warmup
index 5b60f5191..ef0136d2d 100755
--- a/scripts/warmup
+++ b/scripts/warmup
@@ -3,43 +3,47 @@
 # Locate this script's directory
 SCRIPT_DIR="$(dirname "$(realpath "$0")")"
 
-# Load env file
-ENV_FILE="$SCRIPT_DIR/../config/warmup.env"
-if [ -f "$ENV_FILE" ]; then
-  source "$ENV_FILE"
-else
-  echo "Missing $ENV_FILE" >&2
-  exit 1
-fi
-
-# Setup log file (with date)
+WARMUP_LOG_DIR="/var/docker/image/testing/warmup"
 timestamp=$(date +"%Y%m%d_%H%M%S")
 logfile="${WARMUP_LOG_DIR}/warmup_${timestamp}.log"
 mkdir -p "$WARMUP_LOG_DIR"
 
 echo "[Warmup] $(date) Starting warmup..." | tee -a "$logfile"
 
-# Wait for health and warm up
-for container in "${!WARMUP_TARGETS[@]}"; do
-  endpoint="${WARMUP_TARGETS[$container]}"
+# Get all running containers
+containers=$(docker ps --format '{{.Names}}')
 
-  if ! docker inspect "$container" &>/dev/null; then
-    echo "[Warmup] Container $container not found. Skipping." | tee -a "$logfile"
+for container in $containers; do
+  # Check if WARMUP_ENABLED=true is present in the environment
+  if ! docker inspect -f '{{range .Config.Env}}{{println .}}{{end}}' "$container" | grep -q "^WARMUP_ENABLED=true$"; then
     continue
   fi
 
+  # Get EXPOSED port (assume first one is the correct one)
+  exposed_port=$(docker inspect -f '{{range $p, $_ := .Config.ExposedPorts}}{{println $p}}{{end}}' "$container" | head -n1 | cut -d'/' -f1)
+  if [ -z "$exposed_port" ]; then
+    echo "[Warmup] $container has no EXPOSEd port. Skipping." | tee -a "$logfile"
+    continue
+  fi
+
+  endpoint="http://localhost:${exposed_port}/warmup"
+
   echo "[Warmup] Waiting for $container to be healthy..." | tee -a "$logfile"
   until [[ "$(docker inspect -f '{{.State.Health.Status}}' "$container")" == "healthy" ]]; do
-    sleep 5
+    sleep 2
   done
 
   echo "[Warmup] $container marked healthy. Waiting 10s before hitting warmup..." | tee -a "$logfile"
   # Wait briefly after container is marked healthy to ensure internal models are fully initialized before warmup.
   # prevents race conditions where healthcheck passes but model isnt ready
-  sleep 10
 
-  echo "[Warmup] Hitting warmup endpoint on $container..." | tee -a "$logfile"
+  # add random jitter to stagger warmups (addresses potential resource spike if all hit at once)
+  jitter=$((RANDOM % 5))
+  sleep $((10 + jitter))
+
+  echo "[Warmup] Hitting warmup endpoint at $endpoint..." | tee -a "$logfile"
   resp=$(docker exec "$container" curl -s -w "%{http_code}" -o /tmp/warmup_resp.txt "$endpoint")
+
   if [[ "$resp" == "200" ]]; then
     echo "[Warmup] $container warmed successfully." | tee -a "$logfile"
   else

From 05e69a49b3d2ff05e358ce0d80539f657faa3cec Mon Sep 17 00:00:00 2001
From: Shahd Yousef <shahdyousefak@gmail.com>
Date: Mon, 16 Jun 2025 23:01:59 -0400
Subject: [PATCH 10/13] updating warmup endpoint for yolo

---
 preprocessors/yolo/detect.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/preprocessors/yolo/detect.py b/preprocessors/yolo/detect.py
index a387bed00..1b38466cd 100644
--- a/preprocessors/yolo/detect.py
+++ b/preprocessors/yolo/detect.py
@@ -251,7 +251,7 @@ def warmup():
                 dummy_image,
                 device=device,
                 conf=CONF_THRESHOLD,
-                imgsz=MAX_IMAGE_SIZE,
+                # imgsz=MAX_IMAGE_SIZE,
                 verbose=False
             )
 

From 84fae39c369358d18086576c4c3731c342107190 Mon Sep 17 00:00:00 2001
From: Shahd Yousef <shahdyousefak@gmail.com>
Date: Tue, 17 Jun 2025 18:57:40 -0400
Subject: [PATCH 11/13] modifying preprocessors warmup endpoints

---
 .../content-categoriser/categoriser.py        | 20 +++++++++---------
 preprocessors/graphic-caption/caption.py      | 21 +++++++++----------
 preprocessors/mmsemseg/Dockerfile             |  4 ++--
 preprocessors/mmsemseg/segment.py             |  9 +++++---
 preprocessors/yolo/detect.py                  |  4 ++--
 5 files changed, 30 insertions(+), 28 deletions(-)

diff --git a/preprocessors/content-categoriser/categoriser.py b/preprocessors/content-categoriser/categoriser.py
index f05626a4f..6ec1b9c7c 100644
--- a/preprocessors/content-categoriser/categoriser.py
+++ b/preprocessors/content-categoriser/categoriser.py
@@ -205,25 +205,26 @@ def warmup():
     This avoids first-request latency by sending a dummy request.
     """
     try:
-        # construct the target Ollama endpoint for chat
-        api_url = f"{os.environ['OLLAMA_URL']}/chat"
+        # construct the target Ollama endpoint for generate
+        api_url = f"{os.environ['OLLAMA_URL']}/generate"
 
         # authorization headers with API key
         headers = {
-            "Authorization": f"Bearer {os.environ['OLLAMA_API_KEY']}"
+            "Authorization": f"Bearer {os.environ['OLLAMA_API_KEY']}",
+            "Content-Type": "application/json"
         }
 
         # prepare the warmup request data using the configured model
         data = {
             "model": os.environ["OLLAMA_MODEL"],
-            "messages": [{"role": "user", "content": "warmup"}],
-            "stream": False
+            "prompt": "ping",
+            "stream": False,
+            "keep_alive": -1  # instruct Ollama to keep the model in memory
         }
 
         logging.info("[WARMUP] Warmup endpoint triggered.")
-        logging.pii(
-            f"[WARMUP] Posting to {api_url} with model {data['model']}"
-        )
+        logging.pii(f"[WARMUP] Posting to {api_url} with model \
+                    {data['model']}")
 
         # send warmup request (with timeout)
         r = requests.post(api_url, headers=headers, json=data, timeout=60)
@@ -232,8 +233,7 @@ def warmup():
         return jsonify({"status": "warmed"}), 200
 
     except Exception as e:
-        logging.pii(f"[WARMUP] Warmup failed: {str(e)}")
-        logging.exception("[WARMUP] Exception details:")
+        logging.exception(f"[WARMUP] Exception details: {str(e)}")
         return jsonify({"status": "error", "message": str(e)}), 500
 
 
diff --git a/preprocessors/graphic-caption/caption.py b/preprocessors/graphic-caption/caption.py
index bd1fad13b..64979dbe2 100644
--- a/preprocessors/graphic-caption/caption.py
+++ b/preprocessors/graphic-caption/caption.py
@@ -179,26 +179,26 @@ def warmup():
     This avoids first-request latency by sending a dummy request.
     """
     try:
-        # construct the target Ollama endpoint for chat
-        api_url = f"{os.environ['OLLAMA_URL']}/chat"
+        # construct the target Ollama endpoint for generate
+        api_url = f"{os.environ['OLLAMA_URL']}/generate"
 
         # authorization headers with API key
         headers = {
-            "Authorization": f"Bearer {os.environ['OLLAMA_API_KEY']}"
+            "Authorization": f"Bearer {os.environ['OLLAMA_API_KEY']}",
+            "Content-Type": "application/json"
         }
 
         # prepare the warmup request data using the configured model
         data = {
             "model": os.environ["OLLAMA_MODEL"],
-            "messages": [{"role": "user", "content": "warmup"}],
-            "stream": False
+            "prompt": "ping",
+            "stream": False,
+            "keep_alive": -1  # instruct Ollama to keep the model in memory
         }
 
         logging.info("[WARMUP] Warmup endpoint triggered.")
-        logging.pii(
-            f"[WARMUP] Sending warmup request to {api_url} with model: \
-                {data['model']}"
-        )
+        logging.pii(f"[WARMUP] Posting to {api_url} with model \
+                    {data['model']}")
 
         # send warmup request (with timeout)
         r = requests.post(api_url, headers=headers, json=data, timeout=60)
@@ -207,8 +207,7 @@ def warmup():
         return jsonify({"status": "warmed"}), 200
 
     except Exception as e:
-        logging.pii(f"[WARMUP] Warmup failed: {e}")
-        logging.exception("[WARMUP] Exception details:")
+        logging.exception(f"[WARMUP] Exception details: {str(e)}")
         return jsonify({"status": "error", "message": str(e)}), 500
 
 
diff --git a/preprocessors/mmsemseg/Dockerfile b/preprocessors/mmsemseg/Dockerfile
index 99d0a4c1d..ab4f536d3 100644
--- a/preprocessors/mmsemseg/Dockerfile
+++ b/preprocessors/mmsemseg/Dockerfile
@@ -52,7 +52,7 @@ EXPOSE 5000
 ENV FLASK_APP=segment.py
 USER python
 
-HEALTHCHECK --interval=60s --timeout=10s --start-period=120s --retries=5 CMD curl -f http://localhost:5000/health || exit 1
-HEALTHCHECK --interval=3600s --timeout=30s --start-period=120s --retries=3 CMD curl -f http://localhost:5000/health/gpu || exit 1
+HEALTHCHECK --interval=60s --timeout=10s --start-period=120s --retries=5 \
+  CMD curl -f http://localhost:5000/health && curl -f http://localhost:5000/health/gpu || exit 1
 
 CMD [ "gunicorn", "segment:app", "-b", "0.0.0.0:5000", "--capture-output", "--log-level=debug" ]
\ No newline at end of file
diff --git a/preprocessors/mmsemseg/segment.py b/preprocessors/mmsemseg/segment.py
index 26163a266..302722a7a 100644
--- a/preprocessors/mmsemseg/segment.py
+++ b/preprocessors/mmsemseg/segment.py
@@ -284,10 +284,13 @@ def gpu_driver_health_check():
     try:
         # Get installed NVIDIA driver version from nvidia-smi
         nvidia_smi_version = subprocess.check_output(
-            ["nvidia-smi", "--query-gpu=driver_version",
-             "--format=csv,noheader"],
+            [
+                "nvidia-smi", 
+                "--query-gpu=driver_version",
+                "--format=csv,noheader"
+            ],
             text=True
-        ).strip()
+        ).strip().split("\n")[0]
 
         # Get loaded driver version from /proc/driver/nvidia/version
         loaded_driver_version = subprocess.check_output(
diff --git a/preprocessors/yolo/detect.py b/preprocessors/yolo/detect.py
index 1b38466cd..83cfea757 100644
--- a/preprocessors/yolo/detect.py
+++ b/preprocessors/yolo/detect.py
@@ -243,7 +243,7 @@ def health():
 def warmup():
     try:
         # create a blank dummy image (640x640)
-        dummy_image = Image.new("RGB", (640, 640), color=(0, 0, 0))
+        dummy_image = Image.new("RGB", (8, 8), color=(0, 0, 0))
 
         # Run YOLO inference with dummy image
         with torch.no_grad():
@@ -255,7 +255,7 @@ def warmup():
                 verbose=False
             )
 
-        logging.info("YOLO warmup completed successfully")
+        logging.info("YOLO warmup completed successfully with 8x8 image.")
         return jsonify({"status": "ok"}), 200
     except Exception as e:
         logging.error(f"YOLO warmup failed: {str(e)}")

From 69dd531c22fc8b5b64a0e0cdae779ccb832bba63 Mon Sep 17 00:00:00 2001
From: Shahd Yousef <shahdyousefak@gmail.com>
Date: Tue, 17 Jun 2025 19:00:34 -0400
Subject: [PATCH 12/13] removing trailing space

---
 preprocessors/mmsemseg/segment.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/preprocessors/mmsemseg/segment.py b/preprocessors/mmsemseg/segment.py
index 302722a7a..615a99458 100644
--- a/preprocessors/mmsemseg/segment.py
+++ b/preprocessors/mmsemseg/segment.py
@@ -285,7 +285,7 @@ def gpu_driver_health_check():
         # Get installed NVIDIA driver version from nvidia-smi
         nvidia_smi_version = subprocess.check_output(
             [
-                "nvidia-smi", 
+                "nvidia-smi",
                 "--query-gpu=driver_version",
                 "--format=csv,noheader"
             ],

From 0ac63b122bb45d312bb893f68eaeccc7005f5232 Mon Sep 17 00:00:00 2001
From: Shahd Yousef <shahdyousefak@gmail.com>
Date: Tue, 17 Jun 2025 19:05:38 -0400
Subject: [PATCH 13/13] Restrict to containers on the 'image' Docker network

---
 scripts/warmup | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/scripts/warmup b/scripts/warmup
index ef0136d2d..909ab4f20 100755
--- a/scripts/warmup
+++ b/scripts/warmup
@@ -11,7 +11,13 @@ mkdir -p "$WARMUP_LOG_DIR"
 echo "[Warmup] $(date) Starting warmup..." | tee -a "$logfile"
 
 # Get all running containers
-containers=$(docker ps --format '{{.Names}}')
+# Restrict to containers on the 'image' Docker network
+if docker network inspect image &> /dev/null; then
+  containers=$(docker network inspect image | jq -r '.[0].Containers | to_entries[] | .value.Name') 
+else
+  echo "[Warmup] No Docker network named 'image' found. Aborting." | tee -a "$logfile"
+  exit 1
+fi
 
 for container in $containers; do
   # Check if WARMUP_ENABLED=true is present in the environment
@@ -38,6 +44,7 @@ for container in $containers; do
   # prevents race conditions where healthcheck passes but model isnt ready
 
   # add random jitter to stagger warmups (addresses potential resource spike if all hit at once)
+  # note: even if some warmups fail, most models will still be partially/fully loaded, so the first real request is likely to succeed or respond faster than a cold start.
   jitter=$((RANDOM % 5))
   sleep $((10 + jitter))