diff --git a/docker-compose.yml b/docker-compose.yml index ca9cf249..2f0d5fbf 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -198,7 +198,7 @@ services: ca.mcgill.a11y.image.preprocessor: 1 ca.mcgill.a11y.image.port: 5000 ca.mcgill.a11y.image.cacheTimeout: 3600 - ca.mcgill.a11y.image.required_dependencies: "" + ca.mcgill.a11y.image.required_dependencies: "content-categoriser" ca.mcgill.a11y.image.optional_dependencies: "" deploy: resources: diff --git a/preprocessors/content-categoriser/categoriser.py b/preprocessors/content-categoriser/categoriser.py index 96defc40..92cfb8bc 100644 --- a/preprocessors/content-categoriser/categoriser.py +++ b/preprocessors/content-categoriser/categoriser.py @@ -20,11 +20,8 @@ import sys from datetime import datetime from config.logging_utils import configure_logging -from utils.llm import ( - LLMClient, - CATEGORISER_PROMPT, - POSSIBLE_CATEGORIES -) +from utils.llm import LLMClient, CATEGORISER_PROMPT + from utils.validation import Validator import json @@ -36,6 +33,14 @@ with open(DATA_SCHEMA, 'r') as f: CATEGORISER_RESPONSE_SCHEMA = json.load(f) +categories_properties = ( + CATEGORISER_RESPONSE_SCHEMA.get("properties", {}) + .get("categories", {}) + .get("properties", {}) +) +POSSIBLE_CATEGORIES = list(categories_properties.keys()) +logging.debug(f"Possible categories: {POSSIBLE_CATEGORIES}") + PREPROCESSOR_NAME = "ca.mcgill.a11y.image.preprocessor.contentCategoriser" try: @@ -74,7 +79,7 @@ def categorise(): base64_image = source.split(",")[1] graphic_category = llm_client.chat_completion( - prompt=CATEGORISER_PROMPT + POSSIBLE_CATEGORIES, + prompt=f"{CATEGORISER_PROMPT} {POSSIBLE_CATEGORIES}", image_base64=base64_image, temperature=0.0, json_schema=CATEGORISER_RESPONSE_SCHEMA, @@ -87,8 +92,6 @@ def categorise(): {"error": "Failed to get graphic category from LLM"} ), 500 - logging.pii(f"Graphic category JSON: {graphic_category}") - # data schema validation ok, _ = validator.check_data(graphic_category) if not ok: diff --git a/preprocessors/multistage-diagram-segmentation/multistage-diagram-segmentation.py b/preprocessors/multistage-diagram-segmentation/multistage-diagram-segmentation.py index a7a78e5c..2d9fc2cb 100644 --- a/preprocessors/multistage-diagram-segmentation/multistage-diagram-segmentation.py +++ b/preprocessors/multistage-diagram-segmentation/multistage-diagram-segmentation.py @@ -97,6 +97,17 @@ def process_diagram(): if not ok: return jsonify({"error": "Invalid Preprocessor JSON format"}), 400 + # Determine if the content is a multistage diagram + # based on the categoriser output + preprocess_output = content["preprocessors"] + categoriser = "ca.mcgill.a11y.image.preprocessor.contentCategoriser" + if categoriser in preprocess_output: + categoriser_output = preprocess_output[categoriser] + categoriser_tags = categoriser_output["categories"] + if not categoriser_tags["multistage_diagram"]: + logging.info("Not a multistage diagram. Skipping...") + return "", 204 + request_uuid = content["request_uuid"] timestamp = time.time() diff --git a/utils/llm/__init__.py b/utils/llm/__init__.py index c3173a37..4588284d 100644 --- a/utils/llm/__init__.py +++ b/utils/llm/__init__.py @@ -9,7 +9,6 @@ BOUNDING_BOX_PROMPT_EXAMPLE, GRAPHIC_CAPTION_PROMPT, CATEGORISER_PROMPT, - POSSIBLE_CATEGORIES, FOLLOWUP_PROMPT ) @@ -20,6 +19,5 @@ 'BOUNDING_BOX_PROMPT_EXAMPLE', 'GRAPHIC_CAPTION_PROMPT', 'CATEGORISER_PROMPT', - 'POSSIBLE_CATEGORIES', 'FOLLOWUP_PROMPT' ] diff --git a/utils/llm/prompts.py b/utils/llm/prompts.py index 12b264cc..f9ecdbf0 100644 --- a/utils/llm/prompts.py +++ b/utils/llm/prompts.py @@ -18,17 +18,12 @@ the graphic type is significant (like oil painting or aerial photo). Instead, start describing the graphic right away. """ -### - # Content categoriser CATEGORISER_PROMPT = """ -Answer only in JSON with the following format: -'{"category": "YOUR_ANSWER"}.' -Which of the following categories best -describes this image, selecting from this enum: +Your task is to categorise the content of an image. +Answer only in JSON. +Assign boolean values (true or false) to each of the following categories: """ - -POSSIBLE_CATEGORIES = "photograph, chart, text, other" ### # Followup