Skip to content

Commit

Permalink
feat: Set limit for advanced image processing images (#978)
Browse files Browse the repository at this point in the history
  • Loading branch information
cecheta authored May 28, 2024
1 parent 671da33 commit f604655
Show file tree
Hide file tree
Showing 9 changed files with 117 additions and 8 deletions.
1 change: 1 addition & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -21,4 +21,5 @@
"python.testing.cwd": "${workspaceFolder}/code",
"python.testing.unittestEnabled": false,
"python.testing.pytestEnabled": true,
"pylint.cwd": "${workspaceFolder}/code",
}
10 changes: 8 additions & 2 deletions code/backend/batch/utilities/helpers/env_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def __load_config(self, **kwargs) -> None:
"AZURE_SEARCH_INDEX_IS_PRECHUNKED", ""
)
self.AZURE_SEARCH_FILTER = os.getenv("AZURE_SEARCH_FILTER", "")
self.AZURE_SEARCH_TOP_K = int(os.getenv("AZURE_SEARCH_TOP_K", "5"))
self.AZURE_SEARCH_TOP_K = self.get_env_var_int("AZURE_SEARCH_TOP_K", 5)
self.AZURE_SEARCH_ENABLE_IN_DOMAIN = (
os.getenv("AZURE_SEARCH_ENABLE_IN_DOMAIN", "true").lower() == "true"
)
Expand Down Expand Up @@ -114,6 +114,9 @@ def __load_config(self, **kwargs) -> None:
self.USE_ADVANCED_IMAGE_PROCESSING = self.get_env_var_bool(
"USE_ADVANCED_IMAGE_PROCESSING", "False"
)
self.ADVANCED_IMAGE_PROCESSING_MAX_IMAGES = self.get_env_var_int(
"ADVANCED_IMAGE_PROCESSING_MAX_IMAGES", 1
)
self.AZURE_COMPUTER_VISION_ENDPOINT = os.getenv(
"AZURE_COMPUTER_VISION_ENDPOINT"
)
Expand Down Expand Up @@ -244,7 +247,10 @@ def get_env_var_bool(self, var_name: str, default: str = "True") -> bool:
def get_env_var_array(self, var_name: str, default: str = ""):
return os.getenv(var_name, default).split(",")

def get_env_var_float(self, var_name: str, default: int):
def get_env_var_int(self, var_name: str, default: int):
return int(os.getenv(var_name, default))

def get_env_var_float(self, var_name: str, default: float):
return float(os.getenv(var_name, default))

def is_auth_type_keys(self):
Expand Down
2 changes: 1 addition & 1 deletion code/backend/batch/utilities/tools/question_answer_tool.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@ def create_image_url_list(self, source_documents):
doc.source.replace("_SAS_TOKEN_PLACEHOLDER_", container_sas)
for doc in source_documents
if doc.title is not None and doc.title.split(".")[-1] in image_types
]
][: self.env_helper.ADVANCED_IMAGE_PROCESSING_MAX_IMAGES]

return image_urls

Expand Down
1 change: 1 addition & 0 deletions code/tests/functional/app_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ class AppConfig:
"AZURE_SPEECH_RECOGNIZER_LANGUAGES": "en-US,es-ES",
"TIKTOKEN_CACHE_DIR": f"{os.path.dirname(os.path.realpath(__file__))}/resources",
"USE_ADVANCED_IMAGE_PROCESSING": "False",
"ADVANCED_IMAGE_PROCESSING_MAX_IMAGES": "1",
"USE_KEY_VAULT": "False",
# These values are set directly within EnvHelper, adding them here ensures
# that they are removed from the environment when remove_from_environment() runs
Expand Down
77 changes: 73 additions & 4 deletions code/tests/utilities/tools/test_question_answer_tool.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ def env_helper_mock():
env_helper.AZURE_SEARCH_USE_INTEGRATED_VECTORIZATION = False
env_helper.USE_ADVANCED_IMAGE_PROCESSING = False
env_helper.AZURE_OPENAI_VISION_MODEL = "mock vision model"
env_helper.ADVANCED_IMAGE_PROCESSING_MAX_IMAGES = 1

yield env_helper

Expand Down Expand Up @@ -83,7 +84,7 @@ def search_handler_mock():


@pytest.fixture(autouse=True)
def source_documents_mock():
def get_source_documents_mock():
with patch(
"backend.batch.utilities.tools.question_answer_tool.Search.get_source_documents"
) as mock:
Expand All @@ -106,11 +107,11 @@ def source_documents_mock():
),
]
mock.return_value = documents
yield documents
yield mock


def test_answer_question_returns_source_documents(
source_documents_mock: list[SourceDocument],
get_source_documents_mock: MagicMock,
):
# given
tool = QuestionAnswerTool()
Expand All @@ -121,7 +122,7 @@ def test_answer_question_returns_source_documents(
# then
assert len(answer.source_documents) == 2
assert isinstance(answer.source_documents[0], SourceDocument)
assert answer.source_documents == source_documents_mock
assert answer.source_documents == get_source_documents_mock.return_value


def test_answer_question_returns_answer():
Expand Down Expand Up @@ -350,3 +351,71 @@ def test_use_advanced_vision_processing(env_helper_mock, llm_helper_mock):
assert isinstance(answer, Answer)
assert answer.question == "mock question"
assert answer.answer == "mock content"


def test_limit_number_of_images_passed_to_llm(
get_source_documents_mock: MagicMock,
env_helper_mock: MagicMock,
llm_helper_mock: MagicMock,
):
# given
get_source_documents_mock.return_value = [
SourceDocument(
id="mock id",
content="mock content",
title="mock title",
source="mock source",
chunk=123,
offset=123,
page_number=123,
),
SourceDocument(
id="mock id 2",
content="mock content 2",
title="mock title 2.jpg",
source="mock source 2_SAS_TOKEN_PLACEHOLDER_",
chunk_id="mock chunk id 2",
),
SourceDocument(
id="mock id 3",
content="mock content 3",
title="mock title 3.jpg",
source="mock source 3_SAS_TOKEN_PLACEHOLDER_",
chunk_id="mock chunk id 3",
),
]
env_helper_mock.USE_ADVANCED_IMAGE_PROCESSING = True
tool = QuestionAnswerTool()

# when
tool.answer_question("mock question", [])

# then
llm_helper_mock.get_chat_completion.assert_called_once_with(
[
{"content": "mock answering system prompt", "role": "system"},
{
"content": 'Sources: {"retrieved_documents":[{"[doc1]":{"content":"mock example content"}}]}, Question: mock example user question',
"name": "example_user",
"role": "system",
},
{
"content": "mock example answer",
"name": "example_assistant",
"role": "system",
},
{"content": "mock azure openai system message", "role": "system"},
{
"content": [
{
"type": "text",
"text": 'Sources: {"retrieved_documents":[{"[doc1]":{"content":"mock content"}},{"[doc2]":{"content":"mock content 2"}},{"[doc3]":{"content":"mock content 3"}}]}, Question: mock question',
},
{"type": "image_url", "image_url": "mock source 2mock sas"},
],
"role": "user",
},
],
model="mock vision model",
temperature=0,
)
7 changes: 7 additions & 0 deletions docs/advanced_image_processing.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,4 +38,11 @@ Once enabled, advanced image processing will be enabled for all supported image

![image](./images/enable_advanced_image_processing.png)

The `ADVANCED_IMAGE_PROCESSING_MAX_IMAGES` environment variable can be used to control the maximum number of images passed to GPT-4 vision in a single request (default is `1`).
Increasing the number of images consumes more tokens and may result in throttled requests.

```bash
azd env set ADVANCED_IMAGE_PROCESSING_MAX_IMAGES 2
```

Advanced image processing is only used in the `custom` conversation flow and not the `byod` flow, as Azure OpenAI On Your Data only supports Ada embeddings. It is currently not possible to use advanced image processing when integrated vectorization is enabled.
7 changes: 7 additions & 0 deletions infra/main.bicep
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,9 @@ param azureOpenAIModelCapacity int = 30
@description('Enables the use of a vision LLM and Computer Vision for embedding images')
param useAdvancedImageProcessing bool = false

@description('The maximum number of images to pass to the vision model in a single request')
param advancedImageProcessingMaxImages int = 1

@description('Azure OpenAI Vision Model Deployment Name')
param azureOpenAIVisionModel string = 'gpt-4'

Expand Down Expand Up @@ -554,6 +557,7 @@ module web './app/web.bicep' = if (hostingModel == 'code') {
AZURE_SPEECH_SERVICE_REGION: location
AZURE_SPEECH_RECOGNIZER_LANGUAGES: recognizedLanguages
USE_ADVANCED_IMAGE_PROCESSING: useAdvancedImageProcessing
ADVANCED_IMAGE_PROCESSING_MAX_IMAGES: advancedImageProcessingMaxImages
ORCHESTRATION_STRATEGY: orchestrationStrategy
CONVERSATION_FLOW: conversationFlow
LOGLEVEL: logLevel
Expand Down Expand Up @@ -627,6 +631,7 @@ module web_docker './app/web.bicep' = if (hostingModel == 'container') {
AZURE_SPEECH_SERVICE_REGION: location
AZURE_SPEECH_RECOGNIZER_LANGUAGES: recognizedLanguages
USE_ADVANCED_IMAGE_PROCESSING: useAdvancedImageProcessing
ADVANCED_IMAGE_PROCESSING_MAX_IMAGES: advancedImageProcessingMaxImages
ORCHESTRATION_STRATEGY: orchestrationStrategy
CONVERSATION_FLOW: conversationFlow
LOGLEVEL: logLevel
Expand Down Expand Up @@ -1097,3 +1102,5 @@ output ADMIN_WEBSITE_NAME string = hostingModel == 'code'
: adminweb_docker.outputs.WEBSITE_ADMIN_URI
output LOGLEVEL string = logLevel
output CONVERSATION_FLOW string = conversationFlow
output USE_ADVANCED_IMAGE_PROCESSING bool = useAdvancedImageProcessing
output ADVANCED_IMAGE_PROCESSING_MAX_IMAGES int = advancedImageProcessingMaxImages
1 change: 1 addition & 0 deletions infra/main.bicepparam
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ param azureOpenAIModelName = readEnvironmentVariable('AZURE_OPENAI_MODEL_NAME',
param azureOpenAIModelVersion = readEnvironmentVariable('AZURE_OPENAI_MODEL_VERSION', '0613')
param azureOpenAIModelCapacity = int(readEnvironmentVariable('AZURE_OPENAI_MODEL_CAPACITY', '30'))
param useAdvancedImageProcessing = bool(readEnvironmentVariable('USE_ADVANCED_IMAGE_PROCESSING', 'false'))
param advancedImageProcessingMaxImages = int(readEnvironmentVariable('ADVANCED_IMAGE_PROCESSING_MAX_IMAGES', '1'))
param azureOpenAIVisionModel = readEnvironmentVariable('AZURE_OPENAI_VISION_MODEL', 'gpt-4')
param azureOpenAIVisionModelName = readEnvironmentVariable('AZURE_OPENAI_VISION_MODEL_NAME', 'gpt-4')
param azureOpenAIVisionModelVersion = readEnvironmentVariable('AZURE_OPENAI_VISION_MODEL_VERSION', 'vision-preview')
Expand Down
19 changes: 18 additions & 1 deletion infra/main.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
"_generator": {
"name": "bicep",
"version": "0.27.1.19265",
"templateHash": "6027201902589320671"
"templateHash": "10484197901623589764"
}
},
"parameters": {
Expand Down Expand Up @@ -229,6 +229,13 @@
"description": "Enables the use of a vision LLM and Computer Vision for embedding images"
}
},
"advancedImageProcessingMaxImages": {
"type": "int",
"defaultValue": 1,
"metadata": {
"description": "The maximum number of images to pass to the vision model in a single request"
}
},
"azureOpenAIVisionModel": {
"type": "string",
"defaultValue": "gpt-4",
Expand Down Expand Up @@ -2031,6 +2038,7 @@
"AZURE_SPEECH_SERVICE_REGION": "[parameters('location')]",
"AZURE_SPEECH_RECOGNIZER_LANGUAGES": "[parameters('recognizedLanguages')]",
"USE_ADVANCED_IMAGE_PROCESSING": "[parameters('useAdvancedImageProcessing')]",
"ADVANCED_IMAGE_PROCESSING_MAX_IMAGES": "[parameters('advancedImageProcessingMaxImages')]",
"ORCHESTRATION_STRATEGY": "[parameters('orchestrationStrategy')]",
"CONVERSATION_FLOW": "[parameters('conversationFlow')]",
"LOGLEVEL": "[parameters('logLevel')]"
Expand Down Expand Up @@ -2984,6 +2992,7 @@
"AZURE_SPEECH_SERVICE_REGION": "[parameters('location')]",
"AZURE_SPEECH_RECOGNIZER_LANGUAGES": "[parameters('recognizedLanguages')]",
"USE_ADVANCED_IMAGE_PROCESSING": "[parameters('useAdvancedImageProcessing')]",
"ADVANCED_IMAGE_PROCESSING_MAX_IMAGES": "[parameters('advancedImageProcessingMaxImages')]",
"ORCHESTRATION_STRATEGY": "[parameters('orchestrationStrategy')]",
"CONVERSATION_FLOW": "[parameters('conversationFlow')]",
"LOGLEVEL": "[parameters('logLevel')]"
Expand Down Expand Up @@ -11102,6 +11111,14 @@
"CONVERSATION_FLOW": {
"type": "string",
"value": "[parameters('conversationFlow')]"
},
"USE_ADVANCED_IMAGE_PROCESSING": {
"type": "bool",
"value": "[parameters('useAdvancedImageProcessing')]"
},
"ADVANCED_IMAGE_PROCESSING_MAX_IMAGES": {
"type": "int",
"value": "[parameters('advancedImageProcessingMaxImages')]"
}
}
}

0 comments on commit f604655

Please sign in to comment.