diff --git a/.vscode/settings.json b/.vscode/settings.json index bb92a125e..9705e5741 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -21,4 +21,5 @@ "python.testing.cwd": "${workspaceFolder}/code", "python.testing.unittestEnabled": false, "python.testing.pytestEnabled": true, + "pylint.cwd": "${workspaceFolder}/code", } diff --git a/code/backend/batch/utilities/helpers/env_helper.py b/code/backend/batch/utilities/helpers/env_helper.py index e400a7f52..5e790b684 100644 --- a/code/backend/batch/utilities/helpers/env_helper.py +++ b/code/backend/batch/utilities/helpers/env_helper.py @@ -48,7 +48,7 @@ def __load_config(self, **kwargs) -> None: "AZURE_SEARCH_INDEX_IS_PRECHUNKED", "" ) self.AZURE_SEARCH_FILTER = os.getenv("AZURE_SEARCH_FILTER", "") - self.AZURE_SEARCH_TOP_K = int(os.getenv("AZURE_SEARCH_TOP_K", "5")) + self.AZURE_SEARCH_TOP_K = self.get_env_var_int("AZURE_SEARCH_TOP_K", 5) self.AZURE_SEARCH_ENABLE_IN_DOMAIN = ( os.getenv("AZURE_SEARCH_ENABLE_IN_DOMAIN", "true").lower() == "true" ) @@ -114,6 +114,9 @@ def __load_config(self, **kwargs) -> None: self.USE_ADVANCED_IMAGE_PROCESSING = self.get_env_var_bool( "USE_ADVANCED_IMAGE_PROCESSING", "False" ) + self.ADVANCED_IMAGE_PROCESSING_MAX_IMAGES = self.get_env_var_int( + "ADVANCED_IMAGE_PROCESSING_MAX_IMAGES", 1 + ) self.AZURE_COMPUTER_VISION_ENDPOINT = os.getenv( "AZURE_COMPUTER_VISION_ENDPOINT" ) @@ -244,7 +247,10 @@ def get_env_var_bool(self, var_name: str, default: str = "True") -> bool: def get_env_var_array(self, var_name: str, default: str = ""): return os.getenv(var_name, default).split(",") - def get_env_var_float(self, var_name: str, default: int): + def get_env_var_int(self, var_name: str, default: int): + return int(os.getenv(var_name, default)) + + def get_env_var_float(self, var_name: str, default: float): return float(os.getenv(var_name, default)) def is_auth_type_keys(self): diff --git a/code/backend/batch/utilities/tools/question_answer_tool.py b/code/backend/batch/utilities/tools/question_answer_tool.py index 4485a2fdd..86baa7691 100644 --- a/code/backend/batch/utilities/tools/question_answer_tool.py +++ b/code/backend/batch/utilities/tools/question_answer_tool.py @@ -186,7 +186,7 @@ def create_image_url_list(self, source_documents): doc.source.replace("_SAS_TOKEN_PLACEHOLDER_", container_sas) for doc in source_documents if doc.title is not None and doc.title.split(".")[-1] in image_types - ] + ][: self.env_helper.ADVANCED_IMAGE_PROCESSING_MAX_IMAGES] return image_urls diff --git a/code/tests/functional/app_config.py b/code/tests/functional/app_config.py index 0bad2ac93..d8d2e056f 100644 --- a/code/tests/functional/app_config.py +++ b/code/tests/functional/app_config.py @@ -74,6 +74,7 @@ class AppConfig: "AZURE_SPEECH_RECOGNIZER_LANGUAGES": "en-US,es-ES", "TIKTOKEN_CACHE_DIR": f"{os.path.dirname(os.path.realpath(__file__))}/resources", "USE_ADVANCED_IMAGE_PROCESSING": "False", + "ADVANCED_IMAGE_PROCESSING_MAX_IMAGES": "1", "USE_KEY_VAULT": "False", # These values are set directly within EnvHelper, adding them here ensures # that they are removed from the environment when remove_from_environment() runs diff --git a/code/tests/utilities/tools/test_question_answer_tool.py b/code/tests/utilities/tools/test_question_answer_tool.py index b4411cfde..72e36af9c 100644 --- a/code/tests/utilities/tools/test_question_answer_tool.py +++ b/code/tests/utilities/tools/test_question_answer_tool.py @@ -42,6 +42,7 @@ def env_helper_mock(): env_helper.AZURE_SEARCH_USE_INTEGRATED_VECTORIZATION = False env_helper.USE_ADVANCED_IMAGE_PROCESSING = False env_helper.AZURE_OPENAI_VISION_MODEL = "mock vision model" + env_helper.ADVANCED_IMAGE_PROCESSING_MAX_IMAGES = 1 yield env_helper @@ -83,7 +84,7 @@ def search_handler_mock(): @pytest.fixture(autouse=True) -def source_documents_mock(): +def get_source_documents_mock(): with patch( "backend.batch.utilities.tools.question_answer_tool.Search.get_source_documents" ) as mock: @@ -106,11 +107,11 @@ def source_documents_mock(): ), ] mock.return_value = documents - yield documents + yield mock def test_answer_question_returns_source_documents( - source_documents_mock: list[SourceDocument], + get_source_documents_mock: MagicMock, ): # given tool = QuestionAnswerTool() @@ -121,7 +122,7 @@ def test_answer_question_returns_source_documents( # then assert len(answer.source_documents) == 2 assert isinstance(answer.source_documents[0], SourceDocument) - assert answer.source_documents == source_documents_mock + assert answer.source_documents == get_source_documents_mock.return_value def test_answer_question_returns_answer(): @@ -350,3 +351,71 @@ def test_use_advanced_vision_processing(env_helper_mock, llm_helper_mock): assert isinstance(answer, Answer) assert answer.question == "mock question" assert answer.answer == "mock content" + + +def test_limit_number_of_images_passed_to_llm( + get_source_documents_mock: MagicMock, + env_helper_mock: MagicMock, + llm_helper_mock: MagicMock, +): + # given + get_source_documents_mock.return_value = [ + SourceDocument( + id="mock id", + content="mock content", + title="mock title", + source="mock source", + chunk=123, + offset=123, + page_number=123, + ), + SourceDocument( + id="mock id 2", + content="mock content 2", + title="mock title 2.jpg", + source="mock source 2_SAS_TOKEN_PLACEHOLDER_", + chunk_id="mock chunk id 2", + ), + SourceDocument( + id="mock id 3", + content="mock content 3", + title="mock title 3.jpg", + source="mock source 3_SAS_TOKEN_PLACEHOLDER_", + chunk_id="mock chunk id 3", + ), + ] + env_helper_mock.USE_ADVANCED_IMAGE_PROCESSING = True + tool = QuestionAnswerTool() + + # when + tool.answer_question("mock question", []) + + # then + llm_helper_mock.get_chat_completion.assert_called_once_with( + [ + {"content": "mock answering system prompt", "role": "system"}, + { + "content": 'Sources: {"retrieved_documents":[{"[doc1]":{"content":"mock example content"}}]}, Question: mock example user question', + "name": "example_user", + "role": "system", + }, + { + "content": "mock example answer", + "name": "example_assistant", + "role": "system", + }, + {"content": "mock azure openai system message", "role": "system"}, + { + "content": [ + { + "type": "text", + "text": 'Sources: {"retrieved_documents":[{"[doc1]":{"content":"mock content"}},{"[doc2]":{"content":"mock content 2"}},{"[doc3]":{"content":"mock content 3"}}]}, Question: mock question', + }, + {"type": "image_url", "image_url": "mock source 2mock sas"}, + ], + "role": "user", + }, + ], + model="mock vision model", + temperature=0, + ) diff --git a/docs/advanced_image_processing.md b/docs/advanced_image_processing.md index d57be4667..d2cf7cc66 100644 --- a/docs/advanced_image_processing.md +++ b/docs/advanced_image_processing.md @@ -38,4 +38,11 @@ Once enabled, advanced image processing will be enabled for all supported image ![image](./images/enable_advanced_image_processing.png) +The `ADVANCED_IMAGE_PROCESSING_MAX_IMAGES` environment variable can be used to control the maximum number of images passed to GPT-4 vision in a single request (default is `1`). +Increasing the number of images consumes more tokens and may result in throttled requests. + +```bash +azd env set ADVANCED_IMAGE_PROCESSING_MAX_IMAGES 2 +``` + Advanced image processing is only used in the `custom` conversation flow and not the `byod` flow, as Azure OpenAI On Your Data only supports Ada embeddings. It is currently not possible to use advanced image processing when integrated vectorization is enabled. diff --git a/infra/main.bicep b/infra/main.bicep index aefaa9c27..a762bb66f 100644 --- a/infra/main.bicep +++ b/infra/main.bicep @@ -110,6 +110,9 @@ param azureOpenAIModelCapacity int = 30 @description('Enables the use of a vision LLM and Computer Vision for embedding images') param useAdvancedImageProcessing bool = false +@description('The maximum number of images to pass to the vision model in a single request') +param advancedImageProcessingMaxImages int = 1 + @description('Azure OpenAI Vision Model Deployment Name') param azureOpenAIVisionModel string = 'gpt-4' @@ -554,6 +557,7 @@ module web './app/web.bicep' = if (hostingModel == 'code') { AZURE_SPEECH_SERVICE_REGION: location AZURE_SPEECH_RECOGNIZER_LANGUAGES: recognizedLanguages USE_ADVANCED_IMAGE_PROCESSING: useAdvancedImageProcessing + ADVANCED_IMAGE_PROCESSING_MAX_IMAGES: advancedImageProcessingMaxImages ORCHESTRATION_STRATEGY: orchestrationStrategy CONVERSATION_FLOW: conversationFlow LOGLEVEL: logLevel @@ -627,6 +631,7 @@ module web_docker './app/web.bicep' = if (hostingModel == 'container') { AZURE_SPEECH_SERVICE_REGION: location AZURE_SPEECH_RECOGNIZER_LANGUAGES: recognizedLanguages USE_ADVANCED_IMAGE_PROCESSING: useAdvancedImageProcessing + ADVANCED_IMAGE_PROCESSING_MAX_IMAGES: advancedImageProcessingMaxImages ORCHESTRATION_STRATEGY: orchestrationStrategy CONVERSATION_FLOW: conversationFlow LOGLEVEL: logLevel @@ -1097,3 +1102,5 @@ output ADMIN_WEBSITE_NAME string = hostingModel == 'code' : adminweb_docker.outputs.WEBSITE_ADMIN_URI output LOGLEVEL string = logLevel output CONVERSATION_FLOW string = conversationFlow +output USE_ADVANCED_IMAGE_PROCESSING bool = useAdvancedImageProcessing +output ADVANCED_IMAGE_PROCESSING_MAX_IMAGES int = advancedImageProcessingMaxImages diff --git a/infra/main.bicepparam b/infra/main.bicepparam index d69812bec..5d7156610 100644 --- a/infra/main.bicepparam +++ b/infra/main.bicepparam @@ -26,6 +26,7 @@ param azureOpenAIModelName = readEnvironmentVariable('AZURE_OPENAI_MODEL_NAME', param azureOpenAIModelVersion = readEnvironmentVariable('AZURE_OPENAI_MODEL_VERSION', '0613') param azureOpenAIModelCapacity = int(readEnvironmentVariable('AZURE_OPENAI_MODEL_CAPACITY', '30')) param useAdvancedImageProcessing = bool(readEnvironmentVariable('USE_ADVANCED_IMAGE_PROCESSING', 'false')) +param advancedImageProcessingMaxImages = int(readEnvironmentVariable('ADVANCED_IMAGE_PROCESSING_MAX_IMAGES', '1')) param azureOpenAIVisionModel = readEnvironmentVariable('AZURE_OPENAI_VISION_MODEL', 'gpt-4') param azureOpenAIVisionModelName = readEnvironmentVariable('AZURE_OPENAI_VISION_MODEL_NAME', 'gpt-4') param azureOpenAIVisionModelVersion = readEnvironmentVariable('AZURE_OPENAI_VISION_MODEL_VERSION', 'vision-preview') diff --git a/infra/main.json b/infra/main.json index 7973062ff..75cf34577 100644 --- a/infra/main.json +++ b/infra/main.json @@ -5,7 +5,7 @@ "_generator": { "name": "bicep", "version": "0.27.1.19265", - "templateHash": "6027201902589320671" + "templateHash": "10484197901623589764" } }, "parameters": { @@ -229,6 +229,13 @@ "description": "Enables the use of a vision LLM and Computer Vision for embedding images" } }, + "advancedImageProcessingMaxImages": { + "type": "int", + "defaultValue": 1, + "metadata": { + "description": "The maximum number of images to pass to the vision model in a single request" + } + }, "azureOpenAIVisionModel": { "type": "string", "defaultValue": "gpt-4", @@ -2031,6 +2038,7 @@ "AZURE_SPEECH_SERVICE_REGION": "[parameters('location')]", "AZURE_SPEECH_RECOGNIZER_LANGUAGES": "[parameters('recognizedLanguages')]", "USE_ADVANCED_IMAGE_PROCESSING": "[parameters('useAdvancedImageProcessing')]", + "ADVANCED_IMAGE_PROCESSING_MAX_IMAGES": "[parameters('advancedImageProcessingMaxImages')]", "ORCHESTRATION_STRATEGY": "[parameters('orchestrationStrategy')]", "CONVERSATION_FLOW": "[parameters('conversationFlow')]", "LOGLEVEL": "[parameters('logLevel')]" @@ -2984,6 +2992,7 @@ "AZURE_SPEECH_SERVICE_REGION": "[parameters('location')]", "AZURE_SPEECH_RECOGNIZER_LANGUAGES": "[parameters('recognizedLanguages')]", "USE_ADVANCED_IMAGE_PROCESSING": "[parameters('useAdvancedImageProcessing')]", + "ADVANCED_IMAGE_PROCESSING_MAX_IMAGES": "[parameters('advancedImageProcessingMaxImages')]", "ORCHESTRATION_STRATEGY": "[parameters('orchestrationStrategy')]", "CONVERSATION_FLOW": "[parameters('conversationFlow')]", "LOGLEVEL": "[parameters('logLevel')]" @@ -11102,6 +11111,14 @@ "CONVERSATION_FLOW": { "type": "string", "value": "[parameters('conversationFlow')]" + }, + "USE_ADVANCED_IMAGE_PROCESSING": { + "type": "bool", + "value": "[parameters('useAdvancedImageProcessing')]" + }, + "ADVANCED_IMAGE_PROCESSING_MAX_IMAGES": { + "type": "int", + "value": "[parameters('advancedImageProcessingMaxImages')]" } } } \ No newline at end of file