From 514e0ab29d524456f58962c6b00ba702fbc7c5b0 Mon Sep 17 00:00:00 2001 From: Mike Gvozdev Date: Sun, 23 Nov 2025 20:22:28 -0500 Subject: [PATCH] update multistage-diagram-segmentation to work with new bounding box coordinates --- utils/llm/prompts.py | 2 ++ utils/segmentation/sam_processor.py | 21 +++++++++++++++++++-- 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/utils/llm/prompts.py b/utils/llm/prompts.py index ce686ce7..5eb36b0b 100644 --- a/utils/llm/prompts.py +++ b/utils/llm/prompts.py @@ -142,6 +142,8 @@ Output a only JSON list of bounding boxes where each entry contains the 2D bounding box in the key "box_2d", and the stage name in the key "label". +Include in the bounding boxes only the illustrations of the objects themselves, +not any surrounding text or arrows. """ diff --git a/utils/segmentation/sam_processor.py b/utils/segmentation/sam_processor.py index a4fa05a5..4d2eca2d 100644 --- a/utils/segmentation/sam_processor.py +++ b/utils/segmentation/sam_processor.py @@ -136,8 +136,25 @@ def segment_with_boxes( ) continue - logging.pii(f"Processing bounding box for label: '{label}'") - bboxes.append(bbox) + logging.pii( + f"Processing bounding box for label: '{label}' " + f"(normalized coords: {bbox})" + ) + + # Convert normalized coordinates (0-1000) received from Qwen 3 + # to pixel coordinates + bbox_pixels = [ + (bbox[0] / 1000.0) * width, + (bbox[1] / 1000.0) * height, + (bbox[2] / 1000.0) * width, + (bbox[3] / 1000.0) * height + ] + + logging.pii( + f"Converted to pixel coords: {bbox_pixels}" + ) + + bboxes.append(bbox_pixels) labels.append(label) if not bboxes: