From 7f191e1a576deb8b6930f65ae480c7a474dfbc46 Mon Sep 17 00:00:00 2001 From: Jeffrey Blum Date: Sun, 14 Dec 2025 11:52:29 -0500 Subject: [PATCH 1/2] fixes for text-followup not working with qwen3-vl --- preprocessors/text-followup/text-followup.py | 19 ++++++++++++------- utils/llm/client.py | 6 +++++- utils/llm/prompts.py | 3 ++- 3 files changed, 19 insertions(+), 9 deletions(-) diff --git a/preprocessors/text-followup/text-followup.py b/preprocessors/text-followup/text-followup.py index bba395ab..6b2d8be2 100644 --- a/preprocessors/text-followup/text-followup.py +++ b/preprocessors/text-followup/text-followup.py @@ -1,4 +1,4 @@ -# Copyright (c) 2021 IMAGE Project, Shared Reality Lab, McGill University +# Copyright (c) 2025 IMAGE Project, Shared Reality Lab, McGill University # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as @@ -452,13 +452,16 @@ def followup(): {"error": "Failed to process focus area on image"} ), 500 + # get followup prompt from env as an override if it exists + followup_prompt = os.getenv('FOLLOWUP_PROMPT_OVERRIDE', FOLLOWUP_PROMPT) + if not focus: - system_prompt = FOLLOWUP_PROMPT + system_prompt = followup_prompt else: - system_prompt = FOLLOWUP_PROMPT + FOLLOWUP_PROMPT_FOCUS + system_prompt = followup_prompt + FOLLOWUP_PROMPT_FOCUS system_message = { - "role": "developer", + "role": "system", "content": system_prompt } @@ -469,7 +472,7 @@ def followup(): user_message = create_multimodal_message(user_prompt, graphic_b64) conversation_history[request_uuid] = { - 'messages': [system_message, user_message], + 'messages': [system_message,user_message], 'last_updated': timestamp, 'focus': focus if focus else None } @@ -508,7 +511,9 @@ def followup(): followup_response_json = llm_client.chat_completion( prompt="", # Empty since we're using full messages via kwargs - json_schema=FOLLOWUP_RESPONSE_SCHEMA, + system_prompt=system_prompt, + json_schema=None, # qwen3 wants json_object not rigid schema + response_format={"type": "json_object"}, temperature=0.0, messages=messages, # Pass full conversation history via kwargs parse_json=True, @@ -518,7 +523,7 @@ def followup(): if followup_response_json is None: logging.error("Failed to receive response from LLM.") return jsonify( - {"error": "Failed to get graphic caption from LLM"} + {"error": "Failed to receive response from LLM"} ), 500 response_text, token_usage = followup_response_json diff --git a/utils/llm/client.py b/utils/llm/client.py index 5e38650a..90640eaa 100644 --- a/utils/llm/client.py +++ b/utils/llm/client.py @@ -121,7 +121,7 @@ def chat_completion( # Add system prompt if provided if system_prompt: messages.append( - {"role": "developer", + {"role": "system", "content": system_prompt} ) @@ -140,6 +140,8 @@ def chat_completion( messages.append({"role": "user", "content": user_content}) + logging.pii(messages) + # Build API call parameters params = { "model": self.model, @@ -157,6 +159,8 @@ def chat_completion( params.update(kwargs) logging.debug(f"Making LLM request to model: {self.model}") + #logging.pii(f"LLM request params: {' '.join('GRAPHIC_HIDDEN' if item == 'graphic' else item for item in params)}") + #logging.pii(params) response = self.client.chat.completions.create(**params) # Validate and extract response diff --git a/utils/llm/prompts.py b/utils/llm/prompts.py index 586cade9..8e1dbd89 100644 --- a/utils/llm/prompts.py +++ b/utils/llm/prompts.py @@ -101,7 +101,8 @@ "response_brief": "One sentence response to the user request.", "response_full": "Further details. Maximum three sentences." } - +""" +OLD_END_OF_FOLLOWUP_PROMPT = """ The user may add a note to focus on a specific part of the image and an updated picture with the area of interest marked with a red rectangle. In this case, answer the question ONLY about the contents From 81ec1f10a508554d86656e2a57367bf9e3e24657 Mon Sep 17 00:00:00 2001 From: Jeffrey Blum Date: Sun, 14 Dec 2025 12:15:38 -0500 Subject: [PATCH 2/2] pep8 and remove old logging --- preprocessors/text-followup/text-followup.py | 4 ++-- utils/llm/client.py | 2 -- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/preprocessors/text-followup/text-followup.py b/preprocessors/text-followup/text-followup.py index 6b2d8be2..13a0a8a4 100644 --- a/preprocessors/text-followup/text-followup.py +++ b/preprocessors/text-followup/text-followup.py @@ -472,7 +472,7 @@ def followup(): user_message = create_multimodal_message(user_prompt, graphic_b64) conversation_history[request_uuid] = { - 'messages': [system_message,user_message], + 'messages': [system_message, user_message], 'last_updated': timestamp, 'focus': focus if focus else None } @@ -512,7 +512,7 @@ def followup(): followup_response_json = llm_client.chat_completion( prompt="", # Empty since we're using full messages via kwargs system_prompt=system_prompt, - json_schema=None, # qwen3 wants json_object not rigid schema + json_schema=None, # qwen3 wants json_object not rigid schema response_format={"type": "json_object"}, temperature=0.0, messages=messages, # Pass full conversation history via kwargs diff --git a/utils/llm/client.py b/utils/llm/client.py index 90640eaa..a0019237 100644 --- a/utils/llm/client.py +++ b/utils/llm/client.py @@ -159,8 +159,6 @@ def chat_completion( params.update(kwargs) logging.debug(f"Making LLM request to model: {self.model}") - #logging.pii(f"LLM request params: {' '.join('GRAPHIC_HIDDEN' if item == 'graphic' else item for item in params)}") - #logging.pii(params) response = self.client.chat.completions.create(**params) # Validate and extract response