From 68574fbcc335946e62c03dc3b6f71dbdf6607a18 Mon Sep 17 00:00:00 2001 From: Shahd Yousef Date: Sat, 7 Jun 2025 19:19:56 -0400 Subject: [PATCH 01/13] warmup script that sends dummy req via warmup endpoint , integrated into imageup --- .../content-categoriser/categoriser.py | 38 ++++++++++++++ preprocessors/graphic-caption/caption.py | 38 ++++++++++++++ preprocessors/text-followup/text-followup.py | 38 ++++++++++++++ scripts/imageup | 3 ++ scripts/warmup | 51 +++++++++++++++++++ 5 files changed, 168 insertions(+) create mode 100755 scripts/warmup diff --git a/preprocessors/content-categoriser/categoriser.py b/preprocessors/content-categoriser/categoriser.py index b28237f7c..6ec1ad48f 100644 --- a/preprocessors/content-categoriser/categoriser.py +++ b/preprocessors/content-categoriser/categoriser.py @@ -198,5 +198,43 @@ def health(): }), 200 +@app.route("/warmup", methods=["GET"]) +def warmup(): + """ + Trigger a warmup call to load the Ollama LLM into memory. + This avoids first-request latency by sending a dummy request. + """ + try: + # construct the target Ollama endpoint for chat + api_url = f"{os.environ['OLLAMA_URL']}/chat" + + # authorization headers with API key + headers = { + "Authorization": f"Bearer {os.environ['OLLAMA_API_KEY']}" + } + + # prepare the warmup request data using the configured model + data = { + "model": os.environ["OLLAMA_MODEL"], + "messages": [{"role": "user", "content": "warmup"}], + "stream": False + } + + logging.info("[WARMUP] Warmup endpoint triggered.") + logging.debug( + "[Warmup] Posting to %s with model %s", api_url, data["model"] + ) + + # send warmup request (with timeout) + r = requests.post(api_url, headers=headers, json=data, timeout=60) + r.raise_for_status() + + return jsonify({"status": "warmed"}), 200 + + except Exception as e: + logging.exception("[WARMUP] Warmup failed.") + return jsonify({"status": "error", "message": str(e)}), 500 + + if __name__ == "__main__": app.run(host='0.0.0.0', port=5000, debug=True) diff --git a/preprocessors/graphic-caption/caption.py b/preprocessors/graphic-caption/caption.py index 835b62445..169811f61 100644 --- a/preprocessors/graphic-caption/caption.py +++ b/preprocessors/graphic-caption/caption.py @@ -173,5 +173,43 @@ def health(): }), 200 +@app.route("/warmup", methods=["GET"]) +def warmup(): + """ + Trigger a warmup call to load the Ollama LLM into memory. + This avoids first-request latency by sending a dummy request. + """ + try: + # construct the target Ollama endpoint for chat + api_url = f"{os.environ['OLLAMA_URL']}/chat" + + # authorization headers with API key + headers = { + "Authorization": f"Bearer {os.environ['OLLAMA_API_KEY']}" + } + + # prepare the warmup request data using the configured model + data = { + "model": os.environ["OLLAMA_MODEL"], + "messages": [{"role": "user", "content": "warmup"}], + "stream": False + } + + logging.info("[WARMUP] Warmup endpoint triggered.") + logging.debug( + "[Warmup] Posting to %s with model %s", api_url, data["model"] + ) + + # send warmup request (with timeout) + r = requests.post(api_url, headers=headers, json=data, timeout=60) + r.raise_for_status() + + return jsonify({"status": "warmed"}), 200 + + except Exception as e: + logging.exception("[WARMUP] Warmup failed.") + return jsonify({"status": "error", "message": str(e)}), 500 + + if __name__ == "__main__": app.run(host='0.0.0.0', port=5000, debug=True) diff --git a/preprocessors/text-followup/text-followup.py b/preprocessors/text-followup/text-followup.py index 2e674557e..4d6cb49c8 100644 --- a/preprocessors/text-followup/text-followup.py +++ b/preprocessors/text-followup/text-followup.py @@ -401,5 +401,43 @@ def health(): }), 200 +@app.route("/warmup", methods=["GET"]) +def warmup(): + """ + Trigger a warmup call to load the Ollama LLM into memory. + This avoids first-request latency by sending a dummy request. + """ + try: + # construct the target Ollama endpoint for chat + api_url = f"{os.environ['OLLAMA_URL']}/chat" + + # authorization headers with API key + headers = { + "Authorization": f"Bearer {os.environ['OLLAMA_API_KEY']}" + } + + # prepare the warmup request data using the configured model + data = { + "model": os.environ["OLLAMA_MODEL"], + "messages": [{"role": "user", "content": "warmup"}], + "stream": False + } + + logging.info("[WARMUP] Warmup endpoint triggered.") + logging.debug( + "[Warmup] Posting to %s with model %s", api_url, data["model"] + ) + + # send warmup request (with timeout) + r = requests.post(api_url, headers=headers, json=data, timeout=60) + r.raise_for_status() + + return jsonify({"status": "warmed"}), 200 + + except Exception as e: + logging.exception("[WARMUP] Warmup failed.") + return jsonify({"status": "error", "message": str(e)}), 500 + + if __name__ == "__main__": app.run(host='0.0.0.0', port=5000, debug=True) diff --git a/scripts/imageup b/scripts/imageup index b2bf80156..dd5927ce0 100755 --- a/scripts/imageup +++ b/scripts/imageup @@ -104,4 +104,7 @@ docker network rm -f image || true # Bring all the containers back up docker compose --env-file "$COMPOSE_ENV_FILE" up -d --force-recreate +echo "Running warmup for critical services..." +"${SCRIPT_DIR}/warmup" + cd - \ No newline at end of file diff --git a/scripts/warmup b/scripts/warmup new file mode 100755 index 000000000..5b60f5191 --- /dev/null +++ b/scripts/warmup @@ -0,0 +1,51 @@ +#!/bin/bash + +# Locate this script's directory +SCRIPT_DIR="$(dirname "$(realpath "$0")")" + +# Load env file +ENV_FILE="$SCRIPT_DIR/../config/warmup.env" +if [ -f "$ENV_FILE" ]; then + source "$ENV_FILE" +else + echo "Missing $ENV_FILE" >&2 + exit 1 +fi + +# Setup log file (with date) +timestamp=$(date +"%Y%m%d_%H%M%S") +logfile="${WARMUP_LOG_DIR}/warmup_${timestamp}.log" +mkdir -p "$WARMUP_LOG_DIR" + +echo "[Warmup] $(date) Starting warmup..." | tee -a "$logfile" + +# Wait for health and warm up +for container in "${!WARMUP_TARGETS[@]}"; do + endpoint="${WARMUP_TARGETS[$container]}" + + if ! docker inspect "$container" &>/dev/null; then + echo "[Warmup] Container $container not found. Skipping." | tee -a "$logfile" + continue + fi + + echo "[Warmup] Waiting for $container to be healthy..." | tee -a "$logfile" + until [[ "$(docker inspect -f '{{.State.Health.Status}}' "$container")" == "healthy" ]]; do + sleep 5 + done + + echo "[Warmup] $container marked healthy. Waiting 10s before hitting warmup..." | tee -a "$logfile" + # Wait briefly after container is marked healthy to ensure internal models are fully initialized before warmup. + # prevents race conditions where healthcheck passes but model isnt ready + sleep 10 + + echo "[Warmup] Hitting warmup endpoint on $container..." | tee -a "$logfile" + resp=$(docker exec "$container" curl -s -w "%{http_code}" -o /tmp/warmup_resp.txt "$endpoint") + if [[ "$resp" == "200" ]]; then + echo "[Warmup] $container warmed successfully." | tee -a "$logfile" + else + echo "[Warmup] $container warmup failed with HTTP $resp. Response was:" | tee -a "$logfile" + docker exec "$container" cat /tmp/warmup_resp.txt | tee -a "$logfile" || echo "[Warmup] (no response body)" | tee -a "$logfile" + fi +done + +echo "[Warmup] Completed at $(date)! " | tee -a "$logfile" \ No newline at end of file From 6e5b26c7dba9de5b8177ed9afd41981ffee72cdc Mon Sep 17 00:00:00 2001 From: Shahd Yousef Date: Sat, 7 Jun 2025 19:41:46 -0400 Subject: [PATCH 02/13] text2speech warmup endpoints --- services/espnet-tts-fr/src/app.py | 16 ++++++++++++++++ services/espnet-tts/src/app.py | 16 ++++++++++++++++ 2 files changed, 32 insertions(+) diff --git a/services/espnet-tts-fr/src/app.py b/services/espnet-tts-fr/src/app.py index e54ca902c..b5e4a60e4 100644 --- a/services/espnet-tts-fr/src/app.py +++ b/services/espnet-tts-fr/src/app.py @@ -172,6 +172,22 @@ def segment_tts(): empty_cache() +@app.route("/warmup", methods=["GET"]) +def warmup(): + """ + Trigger a dummy call to warm up the model and pre-load it into GPU memory. + to reduce first-request latency by avoiding model load. + """ + try: + logger.info("[WARMUP] Warmup endpoint triggered.") + # Run inference on a short dummy input + _ = tts("warmup") + return jsonify({"status": "warmed"}), 200 + except Exception as e: + logger.exception("[WARMUP] Warmup failed.") + return jsonify({"status": "error", "message": str(e)}), 500 + + @app.route("/health", methods=["GET"]) def health(): """ diff --git a/services/espnet-tts/src/app.py b/services/espnet-tts/src/app.py index a825776d5..347922560 100644 --- a/services/espnet-tts/src/app.py +++ b/services/espnet-tts/src/app.py @@ -115,6 +115,22 @@ def segment_tts(): empty_cache() +@app.route("/warmup", methods=["GET"]) +def warmup(): + """ + Trigger a dummy call to warm up the model and pre-load it into GPU memory. + to reduce first-request latency by avoiding model load. + """ + try: + logger.info("[WARMUP] Warmup endpoint triggered.") + # Run inference on a short dummy input + _ = tts("warmup") + return jsonify({"status": "warmed"}), 200 + except Exception as e: + logger.exception("[WARMUP] Warmup failed.") + return jsonify({"status": "error", "message": str(e)}), 500 + + @app.route("/health", methods=["GET"]) def health(): """ From ae67f1755968df5ce08da945bc914f80a205bb5f Mon Sep 17 00:00:00 2001 From: Shahd Yousef Date: Sun, 8 Jun 2025 00:21:39 -0400 Subject: [PATCH 03/13] semantic segmentation warmup endpoint --- preprocessors/mmsemseg/segment.py | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/preprocessors/mmsemseg/segment.py b/preprocessors/mmsemseg/segment.py index 1e5c86236..c457f399f 100644 --- a/preprocessors/mmsemseg/segment.py +++ b/preprocessors/mmsemseg/segment.py @@ -320,5 +320,33 @@ def gpu_driver_health_check(): }), 500 +@app.route("/warmup", methods=["GET"]) +def warmup(): + """ + Warms up the segmentation model by running a dummy inference. + """ + try: + # dummy black image (512×512) + dummy_img = np.zeros((512, 512, 3), dtype=np.uint8) + + # runs inference_segmentor(): model weight loading/memory allocation + model = init_segmentor(BEIT_CONFIG, BEIT_CHECKPOINT, device='cuda:0') + _ = inference_segmentor(model, dummy_img) + + torch.cuda.empty_cache() + + return jsonify({ + "status": "warmup successful", + "timestamp": datetime.now().isoformat() + }), 200 + + except Exception as e: + logging.exception("Warmup failed") + return jsonify({ + "status": "warmup failed", + "message": str(e) + }), 500 + + if __name__ == "__main__": app.run(host='0.0.0.0', port=5000, debug=True) From ad73553272c2808fcebf9dbd5e31763696ba744a Mon Sep 17 00:00:00 2001 From: Shahd Yousef Date: Sun, 8 Jun 2025 00:31:54 -0400 Subject: [PATCH 04/13] depth-map-gen warmup endpoint --- .../depth-map-gen/depth-map-generator.py | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/preprocessors/depth-map-gen/depth-map-generator.py b/preprocessors/depth-map-gen/depth-map-generator.py index eff12c28d..cc88835e3 100644 --- a/preprocessors/depth-map-gen/depth-map-generator.py +++ b/preprocessors/depth-map-gen/depth-map-generator.py @@ -223,6 +223,28 @@ def health(): }), 200 +@app.route("/warmup", methods=["GET"]) +def warmup(): + try: + model = RelDepthModel(backbone='resnext101').eval().cuda() + model.load_state_dict( + strip_prefix_if_present( + torch.load("/app/res101.pth")['depth_model'], "module."), + strict=True + ) + + # simulating a single RGB image input to the model + # 1: one image; 3: RGB; 448 and 448: height and width + dummy = torch.ones((1, 3, 448, 448), dtype=torch.float32).cuda() + _ = model.inference(dummy) + return jsonify({"status": "warmed"}), 200 + + except Exception as e: + logging.error("Warmup failed") + logging.pii(f"Warmup error: {e}") + return jsonify({"status": "warmup failed"}), 500 + + if __name__ == '__main__': app.run(host='0.0.0.0', port=5000, debug=True) depthgenerator() From 669697f4e88f0617b83faf9bd84e186c95108cbd Mon Sep 17 00:00:00 2001 From: Shahd Yousef Date: Sun, 8 Jun 2025 00:41:59 -0400 Subject: [PATCH 05/13] object-detection warmup endpoint --- preprocessors/yolo/detect.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/preprocessors/yolo/detect.py b/preprocessors/yolo/detect.py index 0c83b8470..a387bed00 100644 --- a/preprocessors/yolo/detect.py +++ b/preprocessors/yolo/detect.py @@ -239,5 +239,29 @@ def health(): }), 200 +@app.route("/warmup", methods=["GET"]) +def warmup(): + try: + # create a blank dummy image (640x640) + dummy_image = Image.new("RGB", (640, 640), color=(0, 0, 0)) + + # Run YOLO inference with dummy image + with torch.no_grad(): + _ = model.predict( + dummy_image, + device=device, + conf=CONF_THRESHOLD, + imgsz=MAX_IMAGE_SIZE, + verbose=False + ) + + logging.info("YOLO warmup completed successfully") + return jsonify({"status": "ok"}), 200 + except Exception as e: + logging.error(f"YOLO warmup failed: {str(e)}") + logging.pii(traceback.format_exc()) + return jsonify({"status": "error", "error": str(e)}), 500 + + if __name__ == "__main__": app.run(debug=True) From d6ab4d27b2ba1fdb99b9f8d8f1bb095f9ee9e3ae Mon Sep 17 00:00:00 2001 From: Shahd Yousef Date: Sun, 8 Jun 2025 20:59:02 -0400 Subject: [PATCH 06/13] adding pii --- .../content-categoriser/categoriser.py | 7 +-- .../depth-map-gen/depth-map-generator.py | 2 +- preprocessors/graphic-caption/caption.py | 9 ++-- preprocessors/mmsemseg/segment.py | 1 + .../multistage-diagram-segmentation.py | 43 +++++++++++++++++++ preprocessors/text-followup/text-followup.py | 7 +-- 6 files changed, 58 insertions(+), 11 deletions(-) diff --git a/preprocessors/content-categoriser/categoriser.py b/preprocessors/content-categoriser/categoriser.py index 6ec1ad48f..f05626a4f 100644 --- a/preprocessors/content-categoriser/categoriser.py +++ b/preprocessors/content-categoriser/categoriser.py @@ -221,8 +221,8 @@ def warmup(): } logging.info("[WARMUP] Warmup endpoint triggered.") - logging.debug( - "[Warmup] Posting to %s with model %s", api_url, data["model"] + logging.pii( + f"[WARMUP] Posting to {api_url} with model {data['model']}" ) # send warmup request (with timeout) @@ -232,7 +232,8 @@ def warmup(): return jsonify({"status": "warmed"}), 200 except Exception as e: - logging.exception("[WARMUP] Warmup failed.") + logging.pii(f"[WARMUP] Warmup failed: {str(e)}") + logging.exception("[WARMUP] Exception details:") return jsonify({"status": "error", "message": str(e)}), 500 diff --git a/preprocessors/depth-map-gen/depth-map-generator.py b/preprocessors/depth-map-gen/depth-map-generator.py index cc88835e3..8913475a1 100644 --- a/preprocessors/depth-map-gen/depth-map-generator.py +++ b/preprocessors/depth-map-gen/depth-map-generator.py @@ -36,7 +36,6 @@ configure_logging() app = Flask(__name__) -logging.basicConfig(level=logging.DEBUG) def parse_args(): @@ -226,6 +225,7 @@ def health(): @app.route("/warmup", methods=["GET"]) def warmup(): try: + logging.pii("[WARMUP] Initializing RelDepthModel with resnext101 backbone and loading weights from /app/res101.pth") model = RelDepthModel(backbone='resnext101').eval().cuda() model.load_state_dict( strip_prefix_if_present( diff --git a/preprocessors/graphic-caption/caption.py b/preprocessors/graphic-caption/caption.py index 169811f61..bd1fad13b 100644 --- a/preprocessors/graphic-caption/caption.py +++ b/preprocessors/graphic-caption/caption.py @@ -28,7 +28,6 @@ configure_logging() app = Flask(__name__) -logging.basicConfig(level=logging.DEBUG) PROMPT = """Describe this image to a person who cannot see it. Use simple, descriptive, clear, and concise language. @@ -196,8 +195,9 @@ def warmup(): } logging.info("[WARMUP] Warmup endpoint triggered.") - logging.debug( - "[Warmup] Posting to %s with model %s", api_url, data["model"] + logging.pii( + f"[WARMUP] Sending warmup request to {api_url} with model: \ + {data['model']}" ) # send warmup request (with timeout) @@ -207,7 +207,8 @@ def warmup(): return jsonify({"status": "warmed"}), 200 except Exception as e: - logging.exception("[WARMUP] Warmup failed.") + logging.pii(f"[WARMUP] Warmup failed: {e}") + logging.exception("[WARMUP] Exception details:") return jsonify({"status": "error", "message": str(e)}), 500 diff --git a/preprocessors/mmsemseg/segment.py b/preprocessors/mmsemseg/segment.py index c457f399f..26163a266 100644 --- a/preprocessors/mmsemseg/segment.py +++ b/preprocessors/mmsemseg/segment.py @@ -341,6 +341,7 @@ def warmup(): }), 200 except Exception as e: + logging.pii(f"[WARMUP] Warmup failed: {e}") logging.exception("Warmup failed") return jsonify({ "status": "warmup failed", diff --git a/preprocessors/multistage-diagram-segmentation/multistage-diagram-segmentation.py b/preprocessors/multistage-diagram-segmentation/multistage-diagram-segmentation.py index 8b1870363..49d1c8945 100644 --- a/preprocessors/multistage-diagram-segmentation/multistage-diagram-segmentation.py +++ b/preprocessors/multistage-diagram-segmentation/multistage-diagram-segmentation.py @@ -819,5 +819,48 @@ def health(): }), 200 +@app.route("/warmup", methods=["GET"]) +def warmup(): + """ + Warms up Gemini & SAM + by running dummy inferences to load models into memory. + """ + try: + logging.info("Starting warmup routine...") + + # 1. warm up Gemini by sending a dummy prompt with a blank image + dummy_img = Image.new("RGB", (512, 512), color="white") + response = client.models.generate_content( + model=GEMINI_MODEL, + contents=["Return an empty JSON object.", dummy_img], + config=types.GenerateContentConfig( + temperature=0.1, + safety_settings=safety_settings, + response_mime_type='application/json', + response_schema=BASE_SCHEMA_GEMINI, + ) + ) + _ = validate_gemini_response(response) + + # 2. warm up SAM with a dummy box + dummy_cv2 = np.zeros((512, 512, 3), dtype=np.uint8) + dummy_pil = Image.fromarray(dummy_cv2) + dummy_bbox = [[100, 100, 200, 200]] # [x1, y1, x2, y2] + _ = sam_model(dummy_pil, bboxes=dummy_bbox) + + logging.info("Warmup completed successfully.") + return jsonify({ + "status": "warmup successful", + "timestamp": datetime.now().isoformat() + }), 200 + + except Exception as e: + logging.exception("Warmup failed") + return jsonify({ + "status": "warmup failed", + "message": str(e) + }), 500 + + if __name__ == "__main__": app.run(host='0.0.0.0', port=5000, debug=True) diff --git a/preprocessors/text-followup/text-followup.py b/preprocessors/text-followup/text-followup.py index 4d6cb49c8..1aefd135e 100644 --- a/preprocessors/text-followup/text-followup.py +++ b/preprocessors/text-followup/text-followup.py @@ -424,8 +424,8 @@ def warmup(): } logging.info("[WARMUP] Warmup endpoint triggered.") - logging.debug( - "[Warmup] Posting to %s with model %s", api_url, data["model"] + logging.pii( + f"[WARMUP] Posting to {api_url} with model {data['model']}" ) # send warmup request (with timeout) @@ -435,7 +435,8 @@ def warmup(): return jsonify({"status": "warmed"}), 200 except Exception as e: - logging.exception("[WARMUP] Warmup failed.") + logging.pii(f"[WARMUP] Warmup failed: {str(e)}") + logging.exception("[WARMUP] Exception details:") return jsonify({"status": "error", "message": str(e)}), 500 From 3ed541c12e1ced6d1693beefd6b97111a8ab5d8b Mon Sep 17 00:00:00 2001 From: Shahd Yousef Date: Sun, 8 Jun 2025 21:01:19 -0400 Subject: [PATCH 07/13] added warmup endpoint to preprocessors/multistage-diagram-segmentation/multistage-diagram-segmentation.py with pii --- .../multistage-diagram-segmentation.py | 29 +++++-------------- 1 file changed, 8 insertions(+), 21 deletions(-) diff --git a/preprocessors/multistage-diagram-segmentation/multistage-diagram-segmentation.py b/preprocessors/multistage-diagram-segmentation/multistage-diagram-segmentation.py index 49d1c8945..9e94a6c4e 100644 --- a/preprocessors/multistage-diagram-segmentation/multistage-diagram-segmentation.py +++ b/preprocessors/multistage-diagram-segmentation/multistage-diagram-segmentation.py @@ -821,18 +821,14 @@ def health(): @app.route("/warmup", methods=["GET"]) def warmup(): - """ - Warms up Gemini & SAM - by running dummy inferences to load models into memory. - """ try: - logging.info("Starting warmup routine...") + logging.info("Warming up Gemini and SAM...") - # 1. warm up Gemini by sending a dummy prompt with a blank image + # Gemini: dummy image + prompt dummy_img = Image.new("RGB", (512, 512), color="white") response = client.models.generate_content( model=GEMINI_MODEL, - contents=["Return an empty JSON object.", dummy_img], + contents=["{}", dummy_img], config=types.GenerateContentConfig( temperature=0.1, safety_settings=safety_settings, @@ -842,24 +838,15 @@ def warmup(): ) _ = validate_gemini_response(response) - # 2. warm up SAM with a dummy box + # SAM: dummy box dummy_cv2 = np.zeros((512, 512, 3), dtype=np.uint8) dummy_pil = Image.fromarray(dummy_cv2) - dummy_bbox = [[100, 100, 200, 200]] # [x1, y1, x2, y2] - _ = sam_model(dummy_pil, bboxes=dummy_bbox) - - logging.info("Warmup completed successfully.") - return jsonify({ - "status": "warmup successful", - "timestamp": datetime.now().isoformat() - }), 200 + _ = sam_model(dummy_pil, bboxes=[[100, 100, 200, 200]]) + return jsonify({"status": "ok"}), 200 except Exception as e: - logging.exception("Warmup failed") - return jsonify({ - "status": "warmup failed", - "message": str(e) - }), 500 + logging.pii(f"Warmup failed: {str(e)}") + return jsonify({"status": "error", "message": str(e)}), 500 if __name__ == "__main__": From afdd70802f0791c75fea3b95368a17e882686c84 Mon Sep 17 00:00:00 2001 From: Shahd Yousef Date: Sun, 8 Jun 2025 21:02:53 -0400 Subject: [PATCH 08/13] pep8 --- preprocessors/depth-map-gen/depth-map-generator.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/preprocessors/depth-map-gen/depth-map-generator.py b/preprocessors/depth-map-gen/depth-map-generator.py index 8913475a1..5424523d1 100644 --- a/preprocessors/depth-map-gen/depth-map-generator.py +++ b/preprocessors/depth-map-gen/depth-map-generator.py @@ -225,7 +225,8 @@ def health(): @app.route("/warmup", methods=["GET"]) def warmup(): try: - logging.pii("[WARMUP] Initializing RelDepthModel with resnext101 backbone and loading weights from /app/res101.pth") + logging.pii("[WARMUP] Initializing RelDepthModel with resnext101 \ + and loading weights from /app/res101.pth") model = RelDepthModel(backbone='resnext101').eval().cuda() model.load_state_dict( strip_prefix_if_present( From 7c5c76252453920eff41041d521ee0afc129ba4c Mon Sep 17 00:00:00 2001 From: Shahd Yousef Date: Mon, 16 Jun 2025 13:12:21 -0400 Subject: [PATCH 09/13] flagging services with WARMUP_ENABLED=true to hit warmup endpoint, modified the warmup script to read accorrdingly --- docker-compose.yml | 11 +++++++++++ scripts/warmup | 40 ++++++++++++++++++++++------------------ 2 files changed, 33 insertions(+), 18 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index 02c2fdab7..231ce87ba 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -44,6 +44,7 @@ services: restart: "no" environment: - TORCH_DEVICE=cuda + - WARMUP_ENABLED=true labels: ca.mcgill.a11y.image.cacheTimeout: 3600 deploy: @@ -59,6 +60,7 @@ services: restart: "no" environment: - TORCH_DEVICE=cuda + - WARMUP_ENABLED=true labels: ca.mcgill.a11y.image.cacheTimeout: 3600 deploy: @@ -80,6 +82,8 @@ services: devices: - driver: nvidia capabilities: ["gpu", "compute", "utility"] + environment: + - WARMUP_ENABLED=true autour-preprocessor: profiles: [production, test, default] @@ -110,6 +114,7 @@ services: ./config/ollama.env environment: - PII_LOGGING_ENABLED=${PII_LOGGING_ENABLED} + - WARMUP_ENABLED=true graphic-caption: profiles: [production, test, default] @@ -125,6 +130,7 @@ services: ./config/ollama.env environment: - PII_LOGGING_ENABLED=${PII_LOGGING_ENABLED} + - WARMUP_ENABLED=true text-followup: profiles: [production, test, default] @@ -134,6 +140,7 @@ services: - MAX_HISTORY_LENGTH=100 - HISTORY_EXPIRY=3600 - PII_LOGGING_ENABLED=${PII_LOGGING_ENABLED} + - WARMUP_ENABLED=true labels: ca.mcgill.a11y.image.preprocessor: 1 ca.mcgill.a11y.image.port: 5000 @@ -179,6 +186,7 @@ services: - PII_LOGGING_ENABLED=${PII_LOGGING_ENABLED} - YOLO_MODEL_PATH=/usr/src/app/models/yolo11x.pt - CONF_THRESHOLD=0.75 + - WARMUP_ENABLED=true multistage-diagram-segmentation: profiles: [production, test, default] @@ -201,6 +209,7 @@ services: - SAM_MODEL_PATH=/usr/src/app/models/sam2.1_l.pt - GEMINI_MODEL=gemini-2.5-pro-preview-06-05 - BASE_SCHEMA=/usr/src/app/base_schema.json + - WARMUP_ENABLED=true env_file: ./config/gemini.env @@ -263,6 +272,7 @@ services: ca.mcgill.a11y.image.optional_dependencies: "content-categoriser,graphic-tagger" environment: - PII_LOGGING_ENABLED=${PII_LOGGING_ENABLED} + - WARMUP_ENABLED=true supercollider: profiles: [production, test, default] @@ -376,6 +386,7 @@ services: ca.mcgill.a11y.image.optional_dependencies: "" environment: - PII_LOGGING_ENABLED=${PII_LOGGING_ENABLED} + - WARMUP_ENABLED=true svg-depth-map: profiles: [production, test, default] diff --git a/scripts/warmup b/scripts/warmup index 5b60f5191..ef0136d2d 100755 --- a/scripts/warmup +++ b/scripts/warmup @@ -3,43 +3,47 @@ # Locate this script's directory SCRIPT_DIR="$(dirname "$(realpath "$0")")" -# Load env file -ENV_FILE="$SCRIPT_DIR/../config/warmup.env" -if [ -f "$ENV_FILE" ]; then - source "$ENV_FILE" -else - echo "Missing $ENV_FILE" >&2 - exit 1 -fi - -# Setup log file (with date) +WARMUP_LOG_DIR="/var/docker/image/testing/warmup" timestamp=$(date +"%Y%m%d_%H%M%S") logfile="${WARMUP_LOG_DIR}/warmup_${timestamp}.log" mkdir -p "$WARMUP_LOG_DIR" echo "[Warmup] $(date) Starting warmup..." | tee -a "$logfile" -# Wait for health and warm up -for container in "${!WARMUP_TARGETS[@]}"; do - endpoint="${WARMUP_TARGETS[$container]}" +# Get all running containers +containers=$(docker ps --format '{{.Names}}') - if ! docker inspect "$container" &>/dev/null; then - echo "[Warmup] Container $container not found. Skipping." | tee -a "$logfile" +for container in $containers; do + # Check if WARMUP_ENABLED=true is present in the environment + if ! docker inspect -f '{{range .Config.Env}}{{println .}}{{end}}' "$container" | grep -q "^WARMUP_ENABLED=true$"; then continue fi + # Get EXPOSED port (assume first one is the correct one) + exposed_port=$(docker inspect -f '{{range $p, $_ := .Config.ExposedPorts}}{{println $p}}{{end}}' "$container" | head -n1 | cut -d'/' -f1) + if [ -z "$exposed_port" ]; then + echo "[Warmup] $container has no EXPOSEd port. Skipping." | tee -a "$logfile" + continue + fi + + endpoint="http://localhost:${exposed_port}/warmup" + echo "[Warmup] Waiting for $container to be healthy..." | tee -a "$logfile" until [[ "$(docker inspect -f '{{.State.Health.Status}}' "$container")" == "healthy" ]]; do - sleep 5 + sleep 2 done echo "[Warmup] $container marked healthy. Waiting 10s before hitting warmup..." | tee -a "$logfile" # Wait briefly after container is marked healthy to ensure internal models are fully initialized before warmup. # prevents race conditions where healthcheck passes but model isnt ready - sleep 10 - echo "[Warmup] Hitting warmup endpoint on $container..." | tee -a "$logfile" + # add random jitter to stagger warmups (addresses potential resource spike if all hit at once) + jitter=$((RANDOM % 5)) + sleep $((10 + jitter)) + + echo "[Warmup] Hitting warmup endpoint at $endpoint..." | tee -a "$logfile" resp=$(docker exec "$container" curl -s -w "%{http_code}" -o /tmp/warmup_resp.txt "$endpoint") + if [[ "$resp" == "200" ]]; then echo "[Warmup] $container warmed successfully." | tee -a "$logfile" else From 05e69a49b3d2ff05e358ce0d80539f657faa3cec Mon Sep 17 00:00:00 2001 From: Shahd Yousef Date: Mon, 16 Jun 2025 23:01:59 -0400 Subject: [PATCH 10/13] updating warmup endpoint for yolo --- preprocessors/yolo/detect.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/preprocessors/yolo/detect.py b/preprocessors/yolo/detect.py index a387bed00..1b38466cd 100644 --- a/preprocessors/yolo/detect.py +++ b/preprocessors/yolo/detect.py @@ -251,7 +251,7 @@ def warmup(): dummy_image, device=device, conf=CONF_THRESHOLD, - imgsz=MAX_IMAGE_SIZE, + # imgsz=MAX_IMAGE_SIZE, verbose=False ) From 84fae39c369358d18086576c4c3731c342107190 Mon Sep 17 00:00:00 2001 From: Shahd Yousef Date: Tue, 17 Jun 2025 18:57:40 -0400 Subject: [PATCH 11/13] modifying preprocessors warmup endpoints --- .../content-categoriser/categoriser.py | 20 +++++++++--------- preprocessors/graphic-caption/caption.py | 21 +++++++++---------- preprocessors/mmsemseg/Dockerfile | 4 ++-- preprocessors/mmsemseg/segment.py | 9 +++++--- preprocessors/yolo/detect.py | 4 ++-- 5 files changed, 30 insertions(+), 28 deletions(-) diff --git a/preprocessors/content-categoriser/categoriser.py b/preprocessors/content-categoriser/categoriser.py index f05626a4f..6ec1b9c7c 100644 --- a/preprocessors/content-categoriser/categoriser.py +++ b/preprocessors/content-categoriser/categoriser.py @@ -205,25 +205,26 @@ def warmup(): This avoids first-request latency by sending a dummy request. """ try: - # construct the target Ollama endpoint for chat - api_url = f"{os.environ['OLLAMA_URL']}/chat" + # construct the target Ollama endpoint for generate + api_url = f"{os.environ['OLLAMA_URL']}/generate" # authorization headers with API key headers = { - "Authorization": f"Bearer {os.environ['OLLAMA_API_KEY']}" + "Authorization": f"Bearer {os.environ['OLLAMA_API_KEY']}", + "Content-Type": "application/json" } # prepare the warmup request data using the configured model data = { "model": os.environ["OLLAMA_MODEL"], - "messages": [{"role": "user", "content": "warmup"}], - "stream": False + "prompt": "ping", + "stream": False, + "keep_alive": -1 # instruct Ollama to keep the model in memory } logging.info("[WARMUP] Warmup endpoint triggered.") - logging.pii( - f"[WARMUP] Posting to {api_url} with model {data['model']}" - ) + logging.pii(f"[WARMUP] Posting to {api_url} with model \ + {data['model']}") # send warmup request (with timeout) r = requests.post(api_url, headers=headers, json=data, timeout=60) @@ -232,8 +233,7 @@ def warmup(): return jsonify({"status": "warmed"}), 200 except Exception as e: - logging.pii(f"[WARMUP] Warmup failed: {str(e)}") - logging.exception("[WARMUP] Exception details:") + logging.exception(f"[WARMUP] Exception details: {str(e)}") return jsonify({"status": "error", "message": str(e)}), 500 diff --git a/preprocessors/graphic-caption/caption.py b/preprocessors/graphic-caption/caption.py index bd1fad13b..64979dbe2 100644 --- a/preprocessors/graphic-caption/caption.py +++ b/preprocessors/graphic-caption/caption.py @@ -179,26 +179,26 @@ def warmup(): This avoids first-request latency by sending a dummy request. """ try: - # construct the target Ollama endpoint for chat - api_url = f"{os.environ['OLLAMA_URL']}/chat" + # construct the target Ollama endpoint for generate + api_url = f"{os.environ['OLLAMA_URL']}/generate" # authorization headers with API key headers = { - "Authorization": f"Bearer {os.environ['OLLAMA_API_KEY']}" + "Authorization": f"Bearer {os.environ['OLLAMA_API_KEY']}", + "Content-Type": "application/json" } # prepare the warmup request data using the configured model data = { "model": os.environ["OLLAMA_MODEL"], - "messages": [{"role": "user", "content": "warmup"}], - "stream": False + "prompt": "ping", + "stream": False, + "keep_alive": -1 # instruct Ollama to keep the model in memory } logging.info("[WARMUP] Warmup endpoint triggered.") - logging.pii( - f"[WARMUP] Sending warmup request to {api_url} with model: \ - {data['model']}" - ) + logging.pii(f"[WARMUP] Posting to {api_url} with model \ + {data['model']}") # send warmup request (with timeout) r = requests.post(api_url, headers=headers, json=data, timeout=60) @@ -207,8 +207,7 @@ def warmup(): return jsonify({"status": "warmed"}), 200 except Exception as e: - logging.pii(f"[WARMUP] Warmup failed: {e}") - logging.exception("[WARMUP] Exception details:") + logging.exception(f"[WARMUP] Exception details: {str(e)}") return jsonify({"status": "error", "message": str(e)}), 500 diff --git a/preprocessors/mmsemseg/Dockerfile b/preprocessors/mmsemseg/Dockerfile index 99d0a4c1d..ab4f536d3 100644 --- a/preprocessors/mmsemseg/Dockerfile +++ b/preprocessors/mmsemseg/Dockerfile @@ -52,7 +52,7 @@ EXPOSE 5000 ENV FLASK_APP=segment.py USER python -HEALTHCHECK --interval=60s --timeout=10s --start-period=120s --retries=5 CMD curl -f http://localhost:5000/health || exit 1 -HEALTHCHECK --interval=3600s --timeout=30s --start-period=120s --retries=3 CMD curl -f http://localhost:5000/health/gpu || exit 1 +HEALTHCHECK --interval=60s --timeout=10s --start-period=120s --retries=5 \ + CMD curl -f http://localhost:5000/health && curl -f http://localhost:5000/health/gpu || exit 1 CMD [ "gunicorn", "segment:app", "-b", "0.0.0.0:5000", "--capture-output", "--log-level=debug" ] \ No newline at end of file diff --git a/preprocessors/mmsemseg/segment.py b/preprocessors/mmsemseg/segment.py index 26163a266..302722a7a 100644 --- a/preprocessors/mmsemseg/segment.py +++ b/preprocessors/mmsemseg/segment.py @@ -284,10 +284,13 @@ def gpu_driver_health_check(): try: # Get installed NVIDIA driver version from nvidia-smi nvidia_smi_version = subprocess.check_output( - ["nvidia-smi", "--query-gpu=driver_version", - "--format=csv,noheader"], + [ + "nvidia-smi", + "--query-gpu=driver_version", + "--format=csv,noheader" + ], text=True - ).strip() + ).strip().split("\n")[0] # Get loaded driver version from /proc/driver/nvidia/version loaded_driver_version = subprocess.check_output( diff --git a/preprocessors/yolo/detect.py b/preprocessors/yolo/detect.py index 1b38466cd..83cfea757 100644 --- a/preprocessors/yolo/detect.py +++ b/preprocessors/yolo/detect.py @@ -243,7 +243,7 @@ def health(): def warmup(): try: # create a blank dummy image (640x640) - dummy_image = Image.new("RGB", (640, 640), color=(0, 0, 0)) + dummy_image = Image.new("RGB", (8, 8), color=(0, 0, 0)) # Run YOLO inference with dummy image with torch.no_grad(): @@ -255,7 +255,7 @@ def warmup(): verbose=False ) - logging.info("YOLO warmup completed successfully") + logging.info("YOLO warmup completed successfully with 8x8 image.") return jsonify({"status": "ok"}), 200 except Exception as e: logging.error(f"YOLO warmup failed: {str(e)}") From 69dd531c22fc8b5b64a0e0cdae779ccb832bba63 Mon Sep 17 00:00:00 2001 From: Shahd Yousef Date: Tue, 17 Jun 2025 19:00:34 -0400 Subject: [PATCH 12/13] removing trailing space --- preprocessors/mmsemseg/segment.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/preprocessors/mmsemseg/segment.py b/preprocessors/mmsemseg/segment.py index 302722a7a..615a99458 100644 --- a/preprocessors/mmsemseg/segment.py +++ b/preprocessors/mmsemseg/segment.py @@ -285,7 +285,7 @@ def gpu_driver_health_check(): # Get installed NVIDIA driver version from nvidia-smi nvidia_smi_version = subprocess.check_output( [ - "nvidia-smi", + "nvidia-smi", "--query-gpu=driver_version", "--format=csv,noheader" ], From 0ac63b122bb45d312bb893f68eaeccc7005f5232 Mon Sep 17 00:00:00 2001 From: Shahd Yousef Date: Tue, 17 Jun 2025 19:05:38 -0400 Subject: [PATCH 13/13] Restrict to containers on the 'image' Docker network --- scripts/warmup | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/scripts/warmup b/scripts/warmup index ef0136d2d..909ab4f20 100755 --- a/scripts/warmup +++ b/scripts/warmup @@ -11,7 +11,13 @@ mkdir -p "$WARMUP_LOG_DIR" echo "[Warmup] $(date) Starting warmup..." | tee -a "$logfile" # Get all running containers -containers=$(docker ps --format '{{.Names}}') +# Restrict to containers on the 'image' Docker network +if docker network inspect image &> /dev/null; then + containers=$(docker network inspect image | jq -r '.[0].Containers | to_entries[] | .value.Name') +else + echo "[Warmup] No Docker network named 'image' found. Aborting." | tee -a "$logfile" + exit 1 +fi for container in $containers; do # Check if WARMUP_ENABLED=true is present in the environment @@ -38,6 +44,7 @@ for container in $containers; do # prevents race conditions where healthcheck passes but model isnt ready # add random jitter to stagger warmups (addresses potential resource spike if all hit at once) + # note: even if some warmups fail, most models will still be partially/fully loaded, so the first real request is likely to succeed or respond faster than a cold start. jitter=$((RANDOM % 5)) sleep $((10 + jitter))