Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,7 @@ services:
ca.mcgill.a11y.image.preprocessor: 1
ca.mcgill.a11y.image.port: 5000
ca.mcgill.a11y.image.cacheTimeout: 3600
ca.mcgill.a11y.image.required_dependencies: ""
ca.mcgill.a11y.image.required_dependencies: "content-categoriser"
ca.mcgill.a11y.image.optional_dependencies: ""
deploy:
resources:
Expand Down
19 changes: 11 additions & 8 deletions preprocessors/content-categoriser/categoriser.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,8 @@
import sys
from datetime import datetime
from config.logging_utils import configure_logging
from utils.llm import (
LLMClient,
CATEGORISER_PROMPT,
POSSIBLE_CATEGORIES
)
from utils.llm import LLMClient, CATEGORISER_PROMPT

from utils.validation import Validator
import json

Expand All @@ -36,6 +33,14 @@
with open(DATA_SCHEMA, 'r') as f:
CATEGORISER_RESPONSE_SCHEMA = json.load(f)

categories_properties = (
CATEGORISER_RESPONSE_SCHEMA.get("properties", {})
.get("categories", {})
.get("properties", {})
)
POSSIBLE_CATEGORIES = list(categories_properties.keys())
logging.debug(f"Possible categories: {POSSIBLE_CATEGORIES}")

PREPROCESSOR_NAME = "ca.mcgill.a11y.image.preprocessor.contentCategoriser"

try:
Expand Down Expand Up @@ -74,7 +79,7 @@ def categorise():
base64_image = source.split(",")[1]

graphic_category = llm_client.chat_completion(
prompt=CATEGORISER_PROMPT + POSSIBLE_CATEGORIES,
prompt=f"{CATEGORISER_PROMPT} {POSSIBLE_CATEGORIES}",
image_base64=base64_image,
temperature=0.0,
json_schema=CATEGORISER_RESPONSE_SCHEMA,
Expand All @@ -87,8 +92,6 @@ def categorise():
{"error": "Failed to get graphic category from LLM"}
), 500

logging.pii(f"Graphic category JSON: {graphic_category}")

# data schema validation
ok, _ = validator.check_data(graphic_category)
if not ok:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,17 @@ def process_diagram():
if not ok:
return jsonify({"error": "Invalid Preprocessor JSON format"}), 400

# Determine if the content is a multistage diagram
# based on the categoriser output
preprocess_output = content["preprocessors"]
categoriser = "ca.mcgill.a11y.image.preprocessor.contentCategoriser"
if categoriser in preprocess_output:
categoriser_output = preprocess_output[categoriser]
categoriser_tags = categoriser_output["categories"]
if not categoriser_tags["multistage_diagram"]:
logging.info("Not a multistage diagram. Skipping...")
return "", 204

request_uuid = content["request_uuid"]
timestamp = time.time()

Expand Down
2 changes: 0 additions & 2 deletions utils/llm/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
BOUNDING_BOX_PROMPT_EXAMPLE,
GRAPHIC_CAPTION_PROMPT,
CATEGORISER_PROMPT,
POSSIBLE_CATEGORIES,
FOLLOWUP_PROMPT
)

Expand All @@ -20,6 +19,5 @@
'BOUNDING_BOX_PROMPT_EXAMPLE',
'GRAPHIC_CAPTION_PROMPT',
'CATEGORISER_PROMPT',
'POSSIBLE_CATEGORIES',
'FOLLOWUP_PROMPT'
]
11 changes: 3 additions & 8 deletions utils/llm/prompts.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,17 +18,12 @@
the graphic type is significant (like oil painting or aerial photo).
Instead, start describing the graphic right away.
"""
###

# Content categoriser
CATEGORISER_PROMPT = """
Answer only in JSON with the following format:
'{"category": "YOUR_ANSWER"}.'
Which of the following categories best
describes this image, selecting from this enum:
Your task is to categorise the content of an image.
Answer only in JSON.
Assign boolean values (true or false) to each of the following categories:
"""

POSSIBLE_CATEGORIES = "photograph, chart, text, other"
###

# Followup
Expand Down