Shared-Reality-Lab · jaydeepsingh25 · Sep 26, 2025 · Aug 19, 2025 · Aug 26, 2025 · Aug 26, 2025
diff --git a/docker-compose.yml b/docker-compose.yml
@@ -198,7 +198,7 @@ services:
       ca.mcgill.a11y.image.preprocessor: 1
       ca.mcgill.a11y.image.port: 5000
       ca.mcgill.a11y.image.cacheTimeout: 3600
-      ca.mcgill.a11y.image.required_dependencies: ""
+      ca.mcgill.a11y.image.required_dependencies: "content-categoriser"
       ca.mcgill.a11y.image.optional_dependencies: ""
     deploy:
       resources:

diff --git a/preprocessors/content-categoriser/categoriser.py b/preprocessors/content-categoriser/categoriser.py
@@ -20,11 +20,8 @@
 import sys
 from datetime import datetime
 from config.logging_utils import configure_logging
-from utils.llm import (
-    LLMClient,
-    CATEGORISER_PROMPT,
-    POSSIBLE_CATEGORIES
-)
+from utils.llm import LLMClient, CATEGORISER_PROMPT
+
 from utils.validation import Validator
 import json
 
@@ -36,6 +33,14 @@
 with open(DATA_SCHEMA, 'r') as f:
     CATEGORISER_RESPONSE_SCHEMA = json.load(f)
 
+categories_properties = (
+    CATEGORISER_RESPONSE_SCHEMA.get("properties", {})
+    .get("categories", {})
+    .get("properties", {})
+)
+POSSIBLE_CATEGORIES = list(categories_properties.keys())
+logging.debug(f"Possible categories: {POSSIBLE_CATEGORIES}")
+
 PREPROCESSOR_NAME = "ca.mcgill.a11y.image.preprocessor.contentCategoriser"
 
 try:
@@ -74,7 +79,7 @@ def categorise():
     base64_image = source.split(",")[1]
 
     graphic_category = llm_client.chat_completion(
-        prompt=CATEGORISER_PROMPT + POSSIBLE_CATEGORIES,
+        prompt=f"{CATEGORISER_PROMPT} {POSSIBLE_CATEGORIES}",
         image_base64=base64_image,
         temperature=0.0,
         json_schema=CATEGORISER_RESPONSE_SCHEMA,
@@ -87,8 +92,6 @@ def categorise():
             {"error": "Failed to get graphic category from LLM"}
         ), 500
 
-    logging.pii(f"Graphic category JSON: {graphic_category}")
-
     # data schema validation
     ok, _ = validator.check_data(graphic_category)
     if not ok:

diff --git a/preprocessors/multistage-diagram-segmentation/multistage-diagram-segmentation.py b/preprocessors/multistage-diagram-segmentation/multistage-diagram-segmentation.py
@@ -97,6 +97,17 @@ def process_diagram():
     if not ok:
         return jsonify({"error": "Invalid Preprocessor JSON format"}), 400
 
+    # Determine if the content is a multistage diagram
+    # based on the categoriser output
+    preprocess_output = content["preprocessors"]
+    categoriser = "ca.mcgill.a11y.image.preprocessor.contentCategoriser"
+    if categoriser in preprocess_output:
+        categoriser_output = preprocess_output[categoriser]
+        categoriser_tags = categoriser_output["categories"]
+        if not categoriser_tags["multistage_diagram"]:
+            logging.info("Not a multistage diagram. Skipping...")
+            return "", 204
+
     request_uuid = content["request_uuid"]
     timestamp = time.time()
 

diff --git a/utils/llm/__init__.py b/utils/llm/__init__.py
@@ -9,7 +9,6 @@
     BOUNDING_BOX_PROMPT_EXAMPLE,
     GRAPHIC_CAPTION_PROMPT,
     CATEGORISER_PROMPT,
-    POSSIBLE_CATEGORIES,
     FOLLOWUP_PROMPT
 )
 
@@ -20,6 +19,5 @@
     'BOUNDING_BOX_PROMPT_EXAMPLE',
     'GRAPHIC_CAPTION_PROMPT',
     'CATEGORISER_PROMPT',
-    'POSSIBLE_CATEGORIES',
     'FOLLOWUP_PROMPT'
 ]
diff --git a/utils/llm/prompts.py b/utils/llm/prompts.py
@@ -18,17 +18,12 @@
 the graphic type is significant (like oil painting or aerial photo).
 Instead, start describing the graphic right away.
 """
-###
-
 # Content categoriser
 CATEGORISER_PROMPT = """
-Answer only in JSON with the following format:
-'{"category": "YOUR_ANSWER"}.'
-Which of the following categories best
-describes this image, selecting from this enum:
+Your task is to categorise the content of an image.
+Answer only in JSON.
+Assign boolean values (true or false) to each of the following categories:
 """
-
-POSSIBLE_CATEGORIES = "photograph, chart, text, other"
 ###
 
 # Followup