diff --git a/utils/llm/prompts.py b/utils/llm/prompts.py index ce686ce7..5eb36b0b 100644 --- a/utils/llm/prompts.py +++ b/utils/llm/prompts.py @@ -142,6 +142,8 @@ Output a only JSON list of bounding boxes where each entry contains the 2D bounding box in the key "box_2d", and the stage name in the key "label". +Include in the bounding boxes only the illustrations of the objects themselves, +not any surrounding text or arrows. """ diff --git a/utils/segmentation/sam_processor.py b/utils/segmentation/sam_processor.py index a4fa05a5..4d2eca2d 100644 --- a/utils/segmentation/sam_processor.py +++ b/utils/segmentation/sam_processor.py @@ -136,8 +136,25 @@ def segment_with_boxes( ) continue - logging.pii(f"Processing bounding box for label: '{label}'") - bboxes.append(bbox) + logging.pii( + f"Processing bounding box for label: '{label}' " + f"(normalized coords: {bbox})" + ) + + # Convert normalized coordinates (0-1000) received from Qwen 3 + # to pixel coordinates + bbox_pixels = [ + (bbox[0] / 1000.0) * width, + (bbox[1] / 1000.0) * height, + (bbox[2] / 1000.0) * width, + (bbox[3] / 1000.0) * height + ] + + logging.pii( + f"Converted to pixel coords: {bbox_pixels}" + ) + + bboxes.append(bbox_pixels) labels.append(label) if not bboxes: