From 3a14c89c054119f99675b8ff16422910238899e6 Mon Sep 17 00:00:00 2001 From: Mike Gvozdev Date: Tue, 19 Aug 2025 14:04:59 -0400 Subject: [PATCH 1/7] update categorisation process rebase onto main --- .../content-categoriser/categoriser.py | 5 +++- schemas | 2 +- utils/llm/prompts.py | 29 +++++++++++++++---- 3 files changed, 29 insertions(+), 7 deletions(-) diff --git a/preprocessors/content-categoriser/categoriser.py b/preprocessors/content-categoriser/categoriser.py index 96defc40..c0d998a0 100644 --- a/preprocessors/content-categoriser/categoriser.py +++ b/preprocessors/content-categoriser/categoriser.py @@ -74,13 +74,16 @@ def categorise(): base64_image = source.split(",")[1] graphic_category = llm_client.chat_completion( - prompt=CATEGORISER_PROMPT + POSSIBLE_CATEGORIES, + prompt=f"{CATEGORISER_PROMPT} {POSSIBLE_CATEGORIES}", image_base64=base64_image, temperature=0.0, json_schema=CATEGORISER_RESPONSE_SCHEMA, parse_json=True ) + logging.debug(f"PROMPT: {CATEGORISER_PROMPT} {POSSIBLE_CATEGORIES}") + logging.debug(f"Schema: {CATEGORISER_RESPONSE_SCHEMA}") + if graphic_category is None: logging.error("Failed to receive response from LLM.") return jsonify( diff --git a/schemas b/schemas index 768f9357..2a790c8f 160000 --- a/schemas +++ b/schemas @@ -1 +1 @@ -Subproject commit 768f935733686d7a56c7075cf4d412ade3fe9eeb +Subproject commit 2a790c8fe10aa895b5f4cb13846140d80db339b2 diff --git a/utils/llm/prompts.py b/utils/llm/prompts.py index 12b264cc..3e5fb12b 100644 --- a/utils/llm/prompts.py +++ b/utils/llm/prompts.py @@ -21,14 +21,33 @@ ### # Content categoriser +# CATEGORISER_PROMPT = """ +# Answer only in JSON with the following format: +# '{"category": "YOUR_ANSWER"}.' +# Which of the following categories best +# describes this image, selecting from this enum: +# """ + +# POSSIBLE_CATEGORIES = "photograph, chart, text, other" + CATEGORISER_PROMPT = """ -Answer only in JSON with the following format: -'{"category": "YOUR_ANSWER"}.' -Which of the following categories best -describes this image, selecting from this enum: +Your task is to categorise the content of an image. +Answer only in JSON. +Assign boolean values (true or false) to each of the following categories: """ -POSSIBLE_CATEGORIES = "photograph, chart, text, other" + +POSSIBLE_CATEGORIES = [ + "photo", + "diagram", + "flow_diagram", + "contains_text", + "people", + "animals", + "collage", + "chart_or_graph", + "illustration" + ] ### # Followup From b2f23b513d54f40b7e8e18a2bf983e1380747087 Mon Sep 17 00:00:00 2001 From: Mike Gvozdev Date: Tue, 26 Aug 2025 13:51:54 -0400 Subject: [PATCH 2/7] parse categories based on updated schema --- preprocessors/content-categoriser/categoriser.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/preprocessors/content-categoriser/categoriser.py b/preprocessors/content-categoriser/categoriser.py index c0d998a0..893287b3 100644 --- a/preprocessors/content-categoriser/categoriser.py +++ b/preprocessors/content-categoriser/categoriser.py @@ -36,6 +36,16 @@ with open(DATA_SCHEMA, 'r') as f: CATEGORISER_RESPONSE_SCHEMA = json.load(f) +categories_properties = ( + CATEGORISER_RESPONSE_SCHEMA.get("properties", {}) + .get("categories", {}) + .get("properties", {}) +) +POSSIBLE_CATEGORIES = list(categories_properties.keys()) + +logging.debug(f"Data schema: {CATEGORISER_RESPONSE_SCHEMA}") +logging.debug(f"Possible categories: {POSSIBLE_CATEGORIES}") + PREPROCESSOR_NAME = "ca.mcgill.a11y.image.preprocessor.contentCategoriser" try: @@ -73,7 +83,7 @@ def categorise(): source = content["graphic"] base64_image = source.split(",")[1] - graphic_category = llm_client.chat_completion( + graphic_categories = llm_client.chat_completion( prompt=f"{CATEGORISER_PROMPT} {POSSIBLE_CATEGORIES}", image_base64=base64_image, temperature=0.0, @@ -84,7 +94,7 @@ def categorise(): logging.debug(f"PROMPT: {CATEGORISER_PROMPT} {POSSIBLE_CATEGORIES}") logging.debug(f"Schema: {CATEGORISER_RESPONSE_SCHEMA}") - if graphic_category is None: + if graphic_categories is None: logging.error("Failed to receive response from LLM.") return jsonify( {"error": "Failed to get graphic category from LLM"} From 9c73c4c7231252585d77133a53a3dade308a2e61 Mon Sep 17 00:00:00 2001 From: Mike Gvozdev Date: Tue, 26 Aug 2025 16:27:02 -0400 Subject: [PATCH 3/7] revert schemas --- schemas | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/schemas b/schemas index 2a790c8f..768f9357 160000 --- a/schemas +++ b/schemas @@ -1 +1 @@ -Subproject commit 2a790c8fe10aa895b5f4cb13846140d80db339b2 +Subproject commit 768f935733686d7a56c7075cf4d412ade3fe9eeb From 1a956d57f90eef6b418f15cac6c8eab384f5108a Mon Sep 17 00:00:00 2001 From: Mike Gvozdev Date: Tue, 26 Aug 2025 16:37:12 -0400 Subject: [PATCH 4/7] remove unused prompts and excessive logging --- .../content-categoriser/categoriser.py | 5 ---- utils/llm/prompts.py | 24 ------------------- 2 files changed, 29 deletions(-) diff --git a/preprocessors/content-categoriser/categoriser.py b/preprocessors/content-categoriser/categoriser.py index 893287b3..4e4f49f2 100644 --- a/preprocessors/content-categoriser/categoriser.py +++ b/preprocessors/content-categoriser/categoriser.py @@ -42,8 +42,6 @@ .get("properties", {}) ) POSSIBLE_CATEGORIES = list(categories_properties.keys()) - -logging.debug(f"Data schema: {CATEGORISER_RESPONSE_SCHEMA}") logging.debug(f"Possible categories: {POSSIBLE_CATEGORIES}") PREPROCESSOR_NAME = "ca.mcgill.a11y.image.preprocessor.contentCategoriser" @@ -91,9 +89,6 @@ def categorise(): parse_json=True ) - logging.debug(f"PROMPT: {CATEGORISER_PROMPT} {POSSIBLE_CATEGORIES}") - logging.debug(f"Schema: {CATEGORISER_RESPONSE_SCHEMA}") - if graphic_categories is None: logging.error("Failed to receive response from LLM.") return jsonify( diff --git a/utils/llm/prompts.py b/utils/llm/prompts.py index 3e5fb12b..f9ecdbf0 100644 --- a/utils/llm/prompts.py +++ b/utils/llm/prompts.py @@ -18,36 +18,12 @@ the graphic type is significant (like oil painting or aerial photo). Instead, start describing the graphic right away. """ -### - # Content categoriser -# CATEGORISER_PROMPT = """ -# Answer only in JSON with the following format: -# '{"category": "YOUR_ANSWER"}.' -# Which of the following categories best -# describes this image, selecting from this enum: -# """ - -# POSSIBLE_CATEGORIES = "photograph, chart, text, other" - CATEGORISER_PROMPT = """ Your task is to categorise the content of an image. Answer only in JSON. Assign boolean values (true or false) to each of the following categories: """ - - -POSSIBLE_CATEGORIES = [ - "photo", - "diagram", - "flow_diagram", - "contains_text", - "people", - "animals", - "collage", - "chart_or_graph", - "illustration" - ] ### # Followup From 1643f7dc3e8d68dcb44976d884e5afc4c78f064d Mon Sep 17 00:00:00 2001 From: Mike Gvozdev Date: Thu, 11 Sep 2025 13:05:49 -0400 Subject: [PATCH 5/7] cleanup after rebase --- preprocessors/content-categoriser/categoriser.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/preprocessors/content-categoriser/categoriser.py b/preprocessors/content-categoriser/categoriser.py index 4e4f49f2..8c59752f 100644 --- a/preprocessors/content-categoriser/categoriser.py +++ b/preprocessors/content-categoriser/categoriser.py @@ -20,11 +20,8 @@ import sys from datetime import datetime from config.logging_utils import configure_logging -from utils.llm import ( - LLMClient, - CATEGORISER_PROMPT, - POSSIBLE_CATEGORIES -) +from utils.llm import LLMClient, CATEGORISER_PROMPT + from utils.validation import Validator import json @@ -81,7 +78,7 @@ def categorise(): source = content["graphic"] base64_image = source.split(",")[1] - graphic_categories = llm_client.chat_completion( + graphic_category = llm_client.chat_completion( prompt=f"{CATEGORISER_PROMPT} {POSSIBLE_CATEGORIES}", image_base64=base64_image, temperature=0.0, @@ -89,7 +86,7 @@ def categorise(): parse_json=True ) - if graphic_categories is None: + if graphic_category is None: logging.error("Failed to receive response from LLM.") return jsonify( {"error": "Failed to get graphic category from LLM"} From f1c697e6f253db40ac7c52e920a3868be6be82ee Mon Sep 17 00:00:00 2001 From: Mike Gvozdev Date: Thu, 11 Sep 2025 13:09:40 -0400 Subject: [PATCH 6/7] remove fixed categories from the utility module --- utils/llm/__init__.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/utils/llm/__init__.py b/utils/llm/__init__.py index c3173a37..4588284d 100644 --- a/utils/llm/__init__.py +++ b/utils/llm/__init__.py @@ -9,7 +9,6 @@ BOUNDING_BOX_PROMPT_EXAMPLE, GRAPHIC_CAPTION_PROMPT, CATEGORISER_PROMPT, - POSSIBLE_CATEGORIES, FOLLOWUP_PROMPT ) @@ -20,6 +19,5 @@ 'BOUNDING_BOX_PROMPT_EXAMPLE', 'GRAPHIC_CAPTION_PROMPT', 'CATEGORISER_PROMPT', - 'POSSIBLE_CATEGORIES', 'FOLLOWUP_PROMPT' ] From 28be6aed67e602570b825fe6625d3d1e827dca02 Mon Sep 17 00:00:00 2001 From: Mike Gvozdev Date: Thu, 11 Sep 2025 14:34:42 -0400 Subject: [PATCH 7/7] add category check --- docker-compose.yml | 2 +- preprocessors/content-categoriser/categoriser.py | 2 -- .../multistage-diagram-segmentation.py | 11 +++++++++++ 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index ca9cf249..2f0d5fbf 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -198,7 +198,7 @@ services: ca.mcgill.a11y.image.preprocessor: 1 ca.mcgill.a11y.image.port: 5000 ca.mcgill.a11y.image.cacheTimeout: 3600 - ca.mcgill.a11y.image.required_dependencies: "" + ca.mcgill.a11y.image.required_dependencies: "content-categoriser" ca.mcgill.a11y.image.optional_dependencies: "" deploy: resources: diff --git a/preprocessors/content-categoriser/categoriser.py b/preprocessors/content-categoriser/categoriser.py index 8c59752f..92cfb8bc 100644 --- a/preprocessors/content-categoriser/categoriser.py +++ b/preprocessors/content-categoriser/categoriser.py @@ -92,8 +92,6 @@ def categorise(): {"error": "Failed to get graphic category from LLM"} ), 500 - logging.pii(f"Graphic category JSON: {graphic_category}") - # data schema validation ok, _ = validator.check_data(graphic_category) if not ok: diff --git a/preprocessors/multistage-diagram-segmentation/multistage-diagram-segmentation.py b/preprocessors/multistage-diagram-segmentation/multistage-diagram-segmentation.py index a7a78e5c..2d9fc2cb 100644 --- a/preprocessors/multistage-diagram-segmentation/multistage-diagram-segmentation.py +++ b/preprocessors/multistage-diagram-segmentation/multistage-diagram-segmentation.py @@ -97,6 +97,17 @@ def process_diagram(): if not ok: return jsonify({"error": "Invalid Preprocessor JSON format"}), 400 + # Determine if the content is a multistage diagram + # based on the categoriser output + preprocess_output = content["preprocessors"] + categoriser = "ca.mcgill.a11y.image.preprocessor.contentCategoriser" + if categoriser in preprocess_output: + categoriser_output = preprocess_output[categoriser] + categoriser_tags = categoriser_output["categories"] + if not categoriser_tags["multistage_diagram"]: + logging.info("Not a multistage diagram. Skipping...") + return "", 204 + request_uuid = content["request_uuid"] timestamp = time.time()