diff --git a/code/backend/batch/utilities/helpers/azure_computer_vision_client.py b/code/backend/batch/utilities/helpers/azure_computer_vision_client.py new file mode 100644 index 000000000..c20b339eb --- /dev/null +++ b/code/backend/batch/utilities/helpers/azure_computer_vision_client.py @@ -0,0 +1,82 @@ +import logging +from typing import List +from urllib.parse import urljoin +from azure.identity import DefaultAzureCredential, get_bearer_token_provider + +import requests +from requests import Response + +from .env_helper import EnvHelper + +logger = logging.getLogger(__name__) + + +class AzureComputerVisionClient: + + __TOKEN_SCOPE = "https://cognitiveservices.azure.com/.default" + __VECTORIZE_IMAGE_PATH = "computervision/retrieval:vectorizeImage" + __RESPONSE_VECTOR_KEY = "vector" + + def __init__(self, env_helper: EnvHelper) -> None: + self.host = env_helper.AZURE_COMPUTER_VISION_ENDPOINT + self.timeout = env_helper.AZURE_COMPUTER_VISION_TIMEOUT + self.key = env_helper.AZURE_COMPUTER_VISION_KEY + self.use_keys = env_helper.is_auth_type_keys() + self.api_version = env_helper.AZURE_COMPUTER_VISION_VECTORIZE_IMAGE_API_VERSION + self.model_version = ( + env_helper.AZURE_COMPUTER_VISION_VECTORIZE_IMAGE_MODEL_VERSION + ) + + def vectorize_image(self, image_url: str) -> List[float]: + logger.info(f"Making call to computer vision to vectorize image: {image_url}") + response = self.__make_request(image_url) + self.__validate_response(response) + + response_json = self.__get_json_body(response) + return self.__get_vectors(response_json) + + def __make_request(self, image_url: str) -> Response: + try: + headers = {} + if self.use_keys: + headers["Ocp-Apim-Subscription-Key"] = self.key + else: + token_provider = get_bearer_token_provider( + DefaultAzureCredential(), self.__TOKEN_SCOPE + ) + headers["Authorization"] = "Bearer " + token_provider() + + return requests.post( + url=urljoin(self.host, self.__VECTORIZE_IMAGE_PATH), + params={ + "api-version": self.api_version, + "model-version": self.model_version, + }, + json={"url": image_url}, + headers=headers, + timeout=self.timeout, + ) + except Exception as e: + raise Exception(f"Call to vectorize image failed: {image_url}") from e + + def __validate_response(self, response: Response): + if response.status_code != 200: + raise Exception( + f"Call to vectorize image failed with status: {response.status_code} body: {response.text}" + ) + + def __get_json_body(self, response: Response) -> dict: + try: + return response.json() + except Exception as e: + raise Exception( + f"Call to vectorize image returned malformed response body: {response.text}", + ) from e + + def __get_vectors(self, response_json: dict) -> List[float]: + if self.__RESPONSE_VECTOR_KEY in response_json: + return response_json[self.__RESPONSE_VECTOR_KEY] + else: + raise Exception( + f"Call to vectorize image returned no vector: {response_json}" + ) diff --git a/code/backend/batch/utilities/helpers/config/config_helper.py b/code/backend/batch/utilities/helpers/config/config_helper.py index 5352f7b66..5bc1e0563 100644 --- a/code/backend/batch/utilities/helpers/config/config_helper.py +++ b/code/backend/batch/utilities/helpers/config/config_helper.py @@ -13,6 +13,7 @@ CONFIG_CONTAINER_NAME = "config" CONFIG_FILE_NAME = "active.json" +ADVANCED_IMAGE_PROCESSING_FILE_TYPES = ["jpeg", "jpg", "png", "tiff", "bmp"] logger = logging.getLogger(__name__) @@ -54,8 +55,8 @@ def __init__(self, config: dict): else None ) - def get_available_document_types(self): - document_types = [ + def get_available_document_types(self) -> list[str]: + document_types = { "txt", "pdf", "url", @@ -65,12 +66,15 @@ def get_available_document_types(self): "jpg", "png", "docx", - ] + } if self.env_helper.USE_ADVANCED_IMAGE_PROCESSING: - document_types.extend(["tiff", "bmp"]) + document_types.update(ADVANCED_IMAGE_PROCESSING_FILE_TYPES) return sorted(document_types) + def get_advanced_image_processing_image_types(self): + return ADVANCED_IMAGE_PROCESSING_FILE_TYPES + def get_available_chunking_strategies(self): return [c.value for c in ChunkingStrategy] @@ -180,6 +184,7 @@ def get_active_config_or_default(): @staticmethod def save_config_as_active(config): + ConfigHelper.validate_config(config) blob_client = AzureBlobStorageClient(container_name=CONFIG_CONTAINER_NAME) blob_client = blob_client.upload_file( json.dumps(config, indent=2), @@ -187,6 +192,21 @@ def save_config_as_active(config): content_type="application/json", ) + @staticmethod + def validate_config(config: dict): + for document_processor in config.get("document_processors"): + document_type = document_processor.get("document_type") + unsupported_advanced_image_processing_file_type = ( + document_type not in ADVANCED_IMAGE_PROCESSING_FILE_TYPES + ) + if ( + document_processor.get("use_advanced_image_processing") + and unsupported_advanced_image_processing_file_type + ): + raise Exception( + f"Advanced image processing has been enabled for document type {document_type}, but only {ADVANCED_IMAGE_PROCESSING_FILE_TYPES} file types are supported." + ) + @staticmethod def get_default_config(): if ConfigHelper._default_config is None: diff --git a/code/backend/batch/utilities/helpers/embedders/embedder_factory.py b/code/backend/batch/utilities/helpers/embedders/embedder_factory.py index 354c698f6..3a2336b99 100644 --- a/code/backend/batch/utilities/helpers/embedders/embedder_factory.py +++ b/code/backend/batch/utilities/helpers/embedders/embedder_factory.py @@ -12,4 +12,4 @@ def create(env_helper: EnvHelper): if env_helper.AZURE_SEARCH_USE_INTEGRATED_VECTORIZATION: return IntegratedVectorizationEmbedder(env_helper) else: - return PushEmbedder(AzureBlobStorageClient()) + return PushEmbedder(AzureBlobStorageClient(), env_helper) diff --git a/code/backend/batch/utilities/helpers/embedders/push_embedder.py b/code/backend/batch/utilities/helpers/embedders/push_embedder.py index 9f793e150..e6001d7ce 100644 --- a/code/backend/batch/utilities/helpers/embedders/push_embedder.py +++ b/code/backend/batch/utilities/helpers/embedders/push_embedder.py @@ -3,6 +3,8 @@ from typing import List from ...helpers.llm_helper import LLMHelper +from ...helpers.env_helper import EnvHelper +from ..azure_computer_vision_client import AzureComputerVisionClient from ..azure_blob_storage_client import AzureBlobStorageClient @@ -19,30 +21,48 @@ class PushEmbedder(EmbedderBase): - def __init__(self, blob_client: AzureBlobStorageClient): + def __init__(self, blob_client: AzureBlobStorageClient, env_helper: EnvHelper): self.llm_helper = LLMHelper() self.azure_search_helper = AzureSearchHelper() + self.azure_computer_vision_client = AzureComputerVisionClient(env_helper) self.document_loading = DocumentLoading() self.document_chunking = DocumentChunking() self.blob_client = blob_client - config = ConfigHelper.get_active_config_or_default() + self.config = ConfigHelper.get_active_config_or_default() self.embedding_configs = {} - for processor in config.document_processors: + for processor in self.config.document_processors: ext = processor.document_type.lower() self.embedding_configs[ext] = processor def embed_file(self, source_url: str, file_name: str): file_extension = file_name.split(".")[-1] embedding_config = self.embedding_configs.get(file_extension) - self.__embed(source_url=source_url, embedding_config=embedding_config) + self.__embed( + source_url=source_url, + file_extension=file_extension, + embedding_config=embedding_config, + ) if file_extension != "url": self.blob_client.upsert_blob_metadata( file_name, {"embeddings_added": "true"} ) - def __embed(self, source_url: str, embedding_config: EmbeddingConfig): + def __embed( + self, source_url: str, file_extension: str, embedding_config: EmbeddingConfig + ): documents_to_upload: List[SourceDocument] = [] - if not embedding_config.use_advanced_image_processing: + if ( + embedding_config.use_advanced_image_processing + and file_extension + in self.config.get_advanced_image_processing_image_types() + ): + logger.warning("Advanced image processing is not supported yet") + image_vectors = self.azure_computer_vision_client.vectorize_image( + source_url + ) + logger.info("Image vectors: " + str(image_vectors)) + # Coming soon, storing the image embeddings in Azure Search + else: documents: List[SourceDocument] = self.document_loading.load( source_url, embedding_config.loading ) @@ -59,9 +79,6 @@ def __embed(self, source_url: str, embedding_config: EmbeddingConfig): if not all([r.succeeded for r in response]): raise Exception(response) - else: - logger.warning("Advanced image processing is not supported yet") - def _convert_to_search_document(self, document: SourceDocument): embedded_content = self.llm_helper.generate_embeddings(document.content) metadata = { diff --git a/code/backend/batch/utilities/helpers/env_helper.py b/code/backend/batch/utilities/helpers/env_helper.py index 6f4634869..138ecd890 100644 --- a/code/backend/batch/utilities/helpers/env_helper.py +++ b/code/backend/batch/utilities/helpers/env_helper.py @@ -111,6 +111,18 @@ def __load_config(self, **kwargs) -> None: self.USE_ADVANCED_IMAGE_PROCESSING = self.get_env_var_bool( "USE_ADVANCED_IMAGE_PROCESSING", "False" ) + self.AZURE_COMPUTER_VISION_ENDPOINT = os.getenv( + "AZURE_COMPUTER_VISION_ENDPOINT" + ) + self.AZURE_COMPUTER_VISION_TIMEOUT = self.get_env_var_float( + "AZURE_COMPUTER_VISION_TIMEOUT", 30 + ) + self.AZURE_COMPUTER_VISION_VECTORIZE_IMAGE_API_VERSION = os.getenv( + "AZURE_COMPUTER_VISION_VECTORIZE_IMAGE_API_VERSION", "2024-02-01" + ) + self.AZURE_COMPUTER_VISION_VECTORIZE_IMAGE_MODEL_VERSION = os.getenv( + "AZURE_COMPUTER_VISION_VECTORIZE_IMAGE_MODEL_VERSION", "2023-04-15" + ) # Initialize Azure keys based on authentication type and environment settings. # When AZURE_AUTH_TYPE is "rbac", azure keys are None or an empty string. @@ -118,6 +130,7 @@ def __load_config(self, **kwargs) -> None: self.AZURE_SEARCH_KEY = None self.AZURE_OPENAI_API_KEY = "" self.AZURE_SPEECH_KEY = None + self.AZURE_COMPUTER_VISION_KEY = None else: self.AZURE_SEARCH_KEY = self.secretHelper.get_secret("AZURE_SEARCH_KEY") self.AZURE_OPENAI_API_KEY = self.secretHelper.get_secret( @@ -126,6 +139,9 @@ def __load_config(self, **kwargs) -> None: self.AZURE_SPEECH_KEY = self.secretHelper.get_secret( "AZURE_SPEECH_SERVICE_KEY" ) + self.AZURE_COMPUTER_VISION_KEY = self.secretHelper.get_secret( + "AZURE_COMPUTER_VISION_KEY" + ) # Set env for Azure OpenAI self.AZURE_OPENAI_ENDPOINT = os.environ.get( @@ -221,6 +237,9 @@ def get_env_var_bool(self, var_name: str, default: str = "True") -> bool: def get_env_var_array(self, var_name: str, default: str = ""): return os.getenv(var_name, default).split(",") + def get_env_var_float(self, var_name: str, default: int): + return float(os.getenv(var_name, default)) + def is_auth_type_keys(self): return self.AZURE_AUTH_TYPE == "keys" diff --git a/code/tests/conftest.py b/code/tests/conftest.py index e69de29bb..dff73fb36 100644 --- a/code/tests/conftest.py +++ b/code/tests/conftest.py @@ -0,0 +1,35 @@ +import ssl + +import pytest +import trustme + + +@pytest.fixture(scope="session") +def ca(): + """ + This fixture is required to run the http mock server with SSL. + https://pytest-httpserver.readthedocs.io/en/latest/howto.html#running-an-https-server + """ + return trustme.CA() + + +@pytest.fixture(scope="session") +def httpserver_ssl_context(ca): + """ + This fixture is required to run the http mock server with SSL. + https://pytest-httpserver.readthedocs.io/en/latest/howto.html#running-an-https-server + """ + context = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER) + localhost_cert = ca.issue_cert("localhost") + localhost_cert.configure_cert(context) + return context + + +@pytest.fixture(scope="session") +def httpclient_ssl_context(ca): + """ + This fixture is required to run the http mock server with SSL. + https://pytest-httpserver.readthedocs.io/en/latest/howto.html#running-an-https-server + """ + with ca.cert_pem.tempfile() as ca_temp_path: + return ssl.create_default_context(cafile=ca_temp_path) diff --git a/code/tests/constants.py b/code/tests/constants.py new file mode 100644 index 000000000..d29977139 --- /dev/null +++ b/code/tests/constants.py @@ -0,0 +1,5 @@ +AZURE_STORAGE_CONFIG_CONTAINER_NAME = "config" +AZURE_STORAGE_CONFIG_FILE_NAME = "active.json" + +COMPUTER_VISION_VECTORIZE_IMAGE_PATH = "/computervision/retrieval:vectorizeImage" +COMPUTER_VISION_VECTORIZE_IMAGE_REQUEST_METHOD = "POST" diff --git a/code/tests/functional/app_config.py b/code/tests/functional/app_config.py index aaef55d84..ae1569027 100644 --- a/code/tests/functional/app_config.py +++ b/code/tests/functional/app_config.py @@ -15,6 +15,7 @@ class AppConfig: ), "AZURE_BLOB_ACCOUNT_NAME": "some-blob-account-name", "AZURE_BLOB_CONTAINER_NAME": "some-blob-container-name", + "AZURE_COMPUTER_VISION_KEY": "some-computer-vision-key", "AZURE_CONTENT_SAFETY_ENDPOINT": "some-content-safety-endpoint", "AZURE_CONTENT_SAFETY_KEY": "some-content-safety-key", "AZURE_FORM_RECOGNIZER_ENDPOINT": "some-form-recognizer-endpoint", diff --git a/code/tests/functional/conftest.py b/code/tests/functional/conftest.py index 173d16dc4..970a7372d 100644 --- a/code/tests/functional/conftest.py +++ b/code/tests/functional/conftest.py @@ -1,54 +1,23 @@ -import ssl import pytest from pytest_httpserver import HTTPServer from tests.functional.app_config import AppConfig -from backend.batch.utilities.helpers.config.config_helper import ( - CONFIG_CONTAINER_NAME, - CONFIG_FILE_NAME, +from tests.constants import ( + AZURE_STORAGE_CONFIG_CONTAINER_NAME, + AZURE_STORAGE_CONFIG_FILE_NAME, + COMPUTER_VISION_VECTORIZE_IMAGE_PATH, + COMPUTER_VISION_VECTORIZE_IMAGE_REQUEST_METHOD, ) -import trustme - - -@pytest.fixture(scope="session") -def ca(): - """ - This fixture is required to run the http mock server with SSL. - https://pytest-httpserver.readthedocs.io/en/latest/howto.html#running-an-https-server - """ - return trustme.CA() - - -@pytest.fixture(scope="session") -def httpserver_ssl_context(ca): - """ - This fixture is required to run the http mock server with SSL. - https://pytest-httpserver.readthedocs.io/en/latest/howto.html#running-an-https-server - """ - context = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER) - localhost_cert = ca.issue_cert("localhost") - localhost_cert.configure_cert(context) - return context - - -@pytest.fixture(scope="session") -def httpclient_ssl_context(ca): - """ - This fixture is required to run the http mock server with SSL. - https://pytest-httpserver.readthedocs.io/en/latest/howto.html#running-an-https-server - """ - with ca.cert_pem.tempfile() as ca_temp_path: - return ssl.create_default_context(cafile=ca_temp_path) @pytest.fixture(scope="function", autouse=True) def setup_default_mocking(httpserver: HTTPServer, app_config: AppConfig): httpserver.expect_request( - f"/{CONFIG_CONTAINER_NAME}/{CONFIG_FILE_NAME}", + f"/{AZURE_STORAGE_CONFIG_CONTAINER_NAME}/{AZURE_STORAGE_CONFIG_FILE_NAME}", method="HEAD", ).respond_with_data() httpserver.expect_request( - f"/{CONFIG_CONTAINER_NAME}/{CONFIG_FILE_NAME}", + f"/{AZURE_STORAGE_CONFIG_CONTAINER_NAME}/{AZURE_STORAGE_CONFIG_FILE_NAME}", method="GET", ).respond_with_json( { @@ -233,6 +202,11 @@ def setup_default_mocking(httpserver: HTTPServer, app_config: AppConfig): method="POST", ).respond_with_data("speech-token") + httpserver.expect_request( + COMPUTER_VISION_VECTORIZE_IMAGE_PATH, + COMPUTER_VISION_VECTORIZE_IMAGE_REQUEST_METHOD, + ).respond_with_json({"modelVersion": "2022-04-11", "vector": [1.0, 2.0, 3.0]}) + yield httpserver.check() diff --git a/code/tests/functional/tests/backend_api/default/test_azure_byod.py b/code/tests/functional/tests/backend_api/default/test_azure_byod.py index 4e9738b21..8d1e22226 100644 --- a/code/tests/functional/tests/backend_api/default/test_azure_byod.py +++ b/code/tests/functional/tests/backend_api/default/test_azure_byod.py @@ -4,7 +4,7 @@ import requests from string import Template -from tests.functional.request_matching import ( +from tests.request_matching import ( RequestMatcher, verify_request_made, ) diff --git a/code/tests/functional/tests/backend_api/default/test_conversation_custom.py b/code/tests/functional/tests/backend_api/default/test_conversation_custom.py index e9283d0ed..ad0bfd80f 100644 --- a/code/tests/functional/tests/backend_api/default/test_conversation_custom.py +++ b/code/tests/functional/tests/backend_api/default/test_conversation_custom.py @@ -4,7 +4,7 @@ from pytest_httpserver import HTTPServer import requests -from tests.functional.request_matching import ( +from tests.request_matching import ( RequestMatcher, verify_request_made, ) diff --git a/code/tests/functional/tests/backend_api/default/test_speech_token.py b/code/tests/functional/tests/backend_api/default/test_speech_token.py index 1e61e0c75..1388b0cd5 100644 --- a/code/tests/functional/tests/backend_api/default/test_speech_token.py +++ b/code/tests/functional/tests/backend_api/default/test_speech_token.py @@ -2,7 +2,7 @@ import requests from pytest_httpserver import HTTPServer from tests.functional.app_config import AppConfig -from tests.functional.request_matching import ( +from tests.request_matching import ( RequestMatcher, verify_request_made, ) diff --git a/code/tests/functional/tests/backend_api/sk_orchestrator/test_response_with_search_documents_tool.py b/code/tests/functional/tests/backend_api/sk_orchestrator/test_response_with_search_documents_tool.py index 783c8006a..ec99d5203 100644 --- a/code/tests/functional/tests/backend_api/sk_orchestrator/test_response_with_search_documents_tool.py +++ b/code/tests/functional/tests/backend_api/sk_orchestrator/test_response_with_search_documents_tool.py @@ -2,7 +2,7 @@ from pytest_httpserver import HTTPServer import requests -from tests.functional.request_matching import ( +from tests.request_matching import ( RequestMatcher, verify_request_made, ) diff --git a/code/tests/functional/tests/backend_api/sk_orchestrator/test_response_with_text_processing_tool.py b/code/tests/functional/tests/backend_api/sk_orchestrator/test_response_with_text_processing_tool.py index 010b41758..d98cbcde4 100644 --- a/code/tests/functional/tests/backend_api/sk_orchestrator/test_response_with_text_processing_tool.py +++ b/code/tests/functional/tests/backend_api/sk_orchestrator/test_response_with_text_processing_tool.py @@ -2,7 +2,7 @@ from pytest_httpserver import HTTPServer import requests -from tests.functional.request_matching import ( +from tests.request_matching import ( RequestMatcher, verify_request_made, ) diff --git a/code/tests/functional/tests/backend_api/sk_orchestrator/test_response_without_tool_call.py b/code/tests/functional/tests/backend_api/sk_orchestrator/test_response_without_tool_call.py index 5b19ab704..ce9002a06 100644 --- a/code/tests/functional/tests/backend_api/sk_orchestrator/test_response_without_tool_call.py +++ b/code/tests/functional/tests/backend_api/sk_orchestrator/test_response_without_tool_call.py @@ -4,7 +4,7 @@ from pytest_httpserver import HTTPServer import requests -from tests.functional.request_matching import ( +from tests.request_matching import ( RequestMatcher, verify_request_made, ) diff --git a/code/tests/functional/tests/backend_api/without_data/test_azure_byod_without_data.py b/code/tests/functional/tests/backend_api/without_data/test_azure_byod_without_data.py index d8787d69a..24dd49033 100644 --- a/code/tests/functional/tests/backend_api/without_data/test_azure_byod_without_data.py +++ b/code/tests/functional/tests/backend_api/without_data/test_azure_byod_without_data.py @@ -4,7 +4,7 @@ import requests from string import Template -from tests.functional.request_matching import ( +from tests.request_matching import ( RequestMatcher, verify_request_made, ) diff --git a/code/tests/functional/tests/functions/conftest.py b/code/tests/functional/tests/functions/conftest.py index e03864398..d4102207f 100644 --- a/code/tests/functional/tests/functions/conftest.py +++ b/code/tests/functional/tests/functions/conftest.py @@ -18,6 +18,7 @@ def app_config(make_httpserver, ca): "AZURE_CONTENT_SAFETY_ENDPOINT": f"https://localhost:{make_httpserver.port}/", "AZURE_SPEECH_REGION_ENDPOINT": f"https://localhost:{make_httpserver.port}/", "AZURE_STORAGE_ACCOUNT_ENDPOINT": f"https://localhost:{make_httpserver.port}/", + "AZURE_COMPUTER_VISION_ENDPOINT": f"https://localhost:{make_httpserver.port}/", "SSL_CERT_FILE": ca_temp_path, "CURL_CA_BUNDLE": ca_temp_path, } diff --git a/code/tests/functional/tests/functions/test_advanced_image_processing.py b/code/tests/functional/tests/functions/test_advanced_image_processing.py index 42550f2a4..89867ce40 100644 --- a/code/tests/functional/tests/functions/test_advanced_image_processing.py +++ b/code/tests/functional/tests/functions/test_advanced_image_processing.py @@ -5,13 +5,15 @@ from azure.functions import QueueMessage import pytest -from backend.batch.utilities.helpers.config.config_helper import ( - CONFIG_CONTAINER_NAME, - CONFIG_FILE_NAME, -) from pytest_httpserver import HTTPServer from tests.functional.app_config import AppConfig -from tests.functional.request_matching import RequestMatcher, verify_request_made +from tests.request_matching import RequestMatcher, verify_request_made +from tests.constants import ( + AZURE_STORAGE_CONFIG_FILE_NAME, + AZURE_STORAGE_CONFIG_CONTAINER_NAME, + COMPUTER_VISION_VECTORIZE_IMAGE_PATH, + COMPUTER_VISION_VECTORIZE_IMAGE_REQUEST_METHOD, +) sys.path.append( os.path.join(os.path.dirname(sys.path[0]), "..", "..", "backend", "batch") @@ -78,7 +80,7 @@ def test_config_file_is_retrieved_from_storage( verify_request_made( mock_httpserver=httpserver, request_matcher=RequestMatcher( - path=f"/{CONFIG_CONTAINER_NAME}/{CONFIG_FILE_NAME}", + path=f"/{AZURE_STORAGE_CONFIG_CONTAINER_NAME}/{AZURE_STORAGE_CONFIG_FILE_NAME}", method="GET", headers={ "Authorization": ANY, @@ -88,6 +90,34 @@ def test_config_file_is_retrieved_from_storage( ) +def test_image_passed_to_computer_vision_to_generate_image_embeddings( + message: QueueMessage, httpserver: HTTPServer, app_config: AppConfig +): + # when + batch_push_results.build().get_user_function()(message) + + # then + request = verify_request_made( + httpserver, + RequestMatcher( + path=COMPUTER_VISION_VECTORIZE_IMAGE_PATH, + method=COMPUTER_VISION_VECTORIZE_IMAGE_REQUEST_METHOD, + query_string="api-version=2024-02-01&model-version=2023-04-15", + headers={ + "Content-Type": "application/json", + "Ocp-Apim-Subscription-Key": app_config.get( + "AZURE_COMPUTER_VISION_KEY" + ), + }, + times=1, + ), + )[0] + + assert request.get_json()["url"].startswith( + f"{app_config.get('AZURE_COMPUTER_VISION_ENDPOINT')}{app_config.get('AZURE_BLOB_CONTAINER_NAME')}/{FILE_NAME}" + ) + + def test_metadata_is_updated_after_processing( message: QueueMessage, httpserver: HTTPServer, app_config: AppConfig ): diff --git a/code/tests/functional/request_matching.py b/code/tests/request_matching.py similarity index 94% rename from code/tests/functional/request_matching.py rename to code/tests/request_matching.py index e637e2092..2cb91fc98 100644 --- a/code/tests/functional/request_matching.py +++ b/code/tests/request_matching.py @@ -30,7 +30,9 @@ def __str__(self): return f"Path: {self.path}, Method: {self.method}, JSON: {self.json}, Headers: {self.headers}, Query String: {self.query_string}, Times: {self.times}" -def verify_request_made(mock_httpserver: HTTPServer, request_matcher: RequestMatcher): +def verify_request_made( + mock_httpserver: HTTPServer, request_matcher: RequestMatcher +) -> list[Request]: requests_log = mock_httpserver.log similar_requests = [] @@ -71,6 +73,7 @@ def verify_request_made(mock_httpserver: HTTPServer, request_matcher: RequestMat error_message += "\n--- Similar Request End" assert len(matching_requests) == request_matcher.times, error_message + return matching_requests def contains_all_headers(request_matcher: RequestMatcher, request: Request): diff --git a/code/tests/utilities/helpers/test_AzureComputerVisionClient.py b/code/tests/utilities/helpers/test_AzureComputerVisionClient.py new file mode 100644 index 000000000..40f9d530d --- /dev/null +++ b/code/tests/utilities/helpers/test_AzureComputerVisionClient.py @@ -0,0 +1,244 @@ +import json +from json import JSONDecodeError +from unittest import mock +from unittest.mock import MagicMock +import pytest +from pytest_httpserver import HTTPServer +from trustme import CA +import werkzeug +import time +from requests import ReadTimeout + +from backend.batch.utilities.helpers.azure_computer_vision_client import ( + AzureComputerVisionClient, +) +from tests.request_matching import RequestMatcher, verify_request_made +from tests.constants import ( + COMPUTER_VISION_VECTORIZE_IMAGE_PATH, + COMPUTER_VISION_VECTORIZE_IMAGE_REQUEST_METHOD, +) + + +# These tests utilize `pytest_httpserver` to mock the Azure Computer Vision API. This is instead of mocking the requests +# library directly, like other client classes. The reasons for doing this are: +# 1. This gives us complete confidence that the requests library works as we expect it to, for example parsing of bad +# json. +# 2. It allows us to test the actual HTTP request that is being made to the Azure Computer Vision API. +# 3. If we need to change which http library we are using, there should be minimal changes required to the tests. +# +# If and when the Azure Computer Vision Python SDK starts to support the `vectorizeImage` and `vectorizeText` endpoints, +# and we switch to it, we should consider switching back to convential test mocking. + +IMAGE_URL = "some-image-url.jpg" +AZURE_COMPUTER_VISION_KEY = "some-api-key" + + +@pytest.fixture(autouse=True) +def pytest_ssl(monkeypatch: pytest.MonkeyPatch, ca: CA): + with ca.cert_pem.tempfile() as ca_temp_path: + monkeypatch.setenv("SSL_CERT_FILE", ca_temp_path) + monkeypatch.setenv("CURL_CA_BUNDLE", ca_temp_path) + yield + + +@pytest.fixture +def env_helper_mock(httpserver: HTTPServer): + env_helper_mock = MagicMock() + env_helper_mock.AZURE_COMPUTER_VISION_ENDPOINT = httpserver.url_for("") + env_helper_mock.AZURE_COMPUTER_VISION_KEY = AZURE_COMPUTER_VISION_KEY + env_helper_mock.AZURE_COMPUTER_VISION_TIMEOUT = 0.25 + env_helper_mock.AZURE_COMPUTER_VISION_VECTORIZE_IMAGE_API_VERSION = "2024-02-01" + env_helper_mock.AZURE_COMPUTER_VISION_VECTORIZE_IMAGE_MODEL_VERSION = "2023-04-15" + env_helper_mock.is_auth_type_keys.return_value = True + return env_helper_mock + + +@pytest.fixture +def azure_computer_vision_client(env_helper_mock: MagicMock): + return AzureComputerVisionClient(env_helper_mock) + + +@pytest.fixture +def azure_computer_vision_client_rbac(env_helper_mock: MagicMock): + env_helper_mock.is_auth_type_keys.return_value = False + return AzureComputerVisionClient(env_helper_mock) + + +def test_vectorize_image_calls_computer_vision_with_key_based_authentication( + httpserver: HTTPServer, azure_computer_vision_client: AzureComputerVisionClient +): + # given + httpserver.expect_request( + COMPUTER_VISION_VECTORIZE_IMAGE_PATH, + COMPUTER_VISION_VECTORIZE_IMAGE_REQUEST_METHOD, + ).respond_with_json({"modelVersion": "2022-04-11", "vector": [1.0, 2.0, 3.0]}) + + # when + azure_computer_vision_client.vectorize_image(IMAGE_URL) + + # then + verify_request_made( + httpserver, + RequestMatcher( + path=COMPUTER_VISION_VECTORIZE_IMAGE_PATH, + method=COMPUTER_VISION_VECTORIZE_IMAGE_REQUEST_METHOD, + query_string="api-version=2024-02-01&model-version=2023-04-15", + headers={ + "Content-Type": "application/json", + "Ocp-Apim-Subscription-Key": AZURE_COMPUTER_VISION_KEY, + }, + json={"url": IMAGE_URL}, + ), + ) + + +@mock.patch( + "backend.batch.utilities.helpers.azure_computer_vision_client.DefaultAzureCredential" +) +@mock.patch( + "backend.batch.utilities.helpers.azure_computer_vision_client.get_bearer_token_provider" +) +def test_vectorize_image_calls_computer_vision_with_rbac_based_authentication( + mock_get_bearer_token_provider: MagicMock, + mock_default_azure_credential: MagicMock, + httpserver: HTTPServer, + azure_computer_vision_client_rbac: AzureComputerVisionClient, +): + # given + httpserver.expect_request( + COMPUTER_VISION_VECTORIZE_IMAGE_PATH, + COMPUTER_VISION_VECTORIZE_IMAGE_REQUEST_METHOD, + ).respond_with_json({"modelVersion": "2022-04-11", "vector": [1.0, 2.0, 3.0]}) + + # when + mock_get_bearer_token_provider.return_value.return_value = "dummy token" + + azure_computer_vision_client_rbac.vectorize_image(IMAGE_URL) + + # then + mock_default_azure_credential.assert_called_once() + mock_get_bearer_token_provider.assert_called_once_with( + mock_default_azure_credential.return_value, + "https://cognitiveservices.azure.com/.default", + ) + + verify_request_made( + httpserver, + RequestMatcher( + path=COMPUTER_VISION_VECTORIZE_IMAGE_PATH, + method=COMPUTER_VISION_VECTORIZE_IMAGE_REQUEST_METHOD, + query_string="api-version=2024-02-01&model-version=2023-04-15", + headers={ + "Content-Type": "application/json", + "Authorization": "Bearer dummy token", + }, + json={"url": IMAGE_URL}, + ), + ) + + +def test_returns_image_vectors( + httpserver: HTTPServer, azure_computer_vision_client: AzureComputerVisionClient +): + # given + expected_vectors = [1.0, 2.0, 3.0] + + httpserver.expect_request( + COMPUTER_VISION_VECTORIZE_IMAGE_PATH, + COMPUTER_VISION_VECTORIZE_IMAGE_REQUEST_METHOD, + ).respond_with_json({"modelVersion": "2022-04-11", "vector": expected_vectors}) + + # when + actual_vectors = azure_computer_vision_client.vectorize_image(IMAGE_URL) + + # then + assert actual_vectors == expected_vectors + + +def test_vectorize_image_calls_computer_vision_timeout( + httpserver: HTTPServer, azure_computer_vision_client: AzureComputerVisionClient +): + # given + def handler(_) -> werkzeug.Response: + time.sleep(0.3) + return werkzeug.Response( + json.dumps({"modelVersion": "2022-04-11", "vector": [1.0, 2.0, 3.0]}), + status=200, + ) + + httpserver.expect_request( + COMPUTER_VISION_VECTORIZE_IMAGE_PATH, + COMPUTER_VISION_VECTORIZE_IMAGE_REQUEST_METHOD, + ).respond_with_handler(handler) + + # when + with pytest.raises(Exception) as exec_info: + azure_computer_vision_client.vectorize_image(IMAGE_URL) + + assert exec_info.value.args[0] == "Call to vectorize image failed: " + IMAGE_URL + assert isinstance(exec_info.value.__cause__, ReadTimeout) + + +def test_raises_exception_if_bad_response_code( + httpserver: HTTPServer, azure_computer_vision_client: AzureComputerVisionClient +): + # given + response_body = {"error": "computer says no"} + response_status = 500 + httpserver.expect_request( + COMPUTER_VISION_VECTORIZE_IMAGE_PATH, + COMPUTER_VISION_VECTORIZE_IMAGE_REQUEST_METHOD, + ).respond_with_json(response_body, status=response_status) + + # when + with pytest.raises(Exception) as exec_info: + azure_computer_vision_client.vectorize_image(IMAGE_URL) + + # then + assert ( + exec_info.value.args[0] + == f"Call to vectorize image failed with status: {response_status} body: {json.dumps(response_body, indent=4)}" + ) + + +def test_raises_exception_if_non_json_response( + httpserver: HTTPServer, azure_computer_vision_client: AzureComputerVisionClient +): + # given + response_body = "not json" + httpserver.expect_request( + COMPUTER_VISION_VECTORIZE_IMAGE_PATH, + COMPUTER_VISION_VECTORIZE_IMAGE_REQUEST_METHOD, + ).respond_with_data(response_body, status=200) + + # when + with pytest.raises(Exception) as exec_info: + azure_computer_vision_client.vectorize_image(IMAGE_URL) + + # then + assert ( + exec_info.value.args[0] + == f"Call to vectorize image returned malformed response body: {response_body}" + ) + assert isinstance(exec_info.value.__cause__, JSONDecodeError) + + +def test_raises_exception_if_vector_not_in_response( + httpserver: HTTPServer, azure_computer_vision_client: AzureComputerVisionClient +): + # given + response_body = {"modelVersion": "2022-04-11"} + httpserver.expect_request( + COMPUTER_VISION_VECTORIZE_IMAGE_PATH, + COMPUTER_VISION_VECTORIZE_IMAGE_REQUEST_METHOD, + ).respond_with_json(response_body, status=200) + + # when + with pytest.raises(Exception) as exec_info: + azure_computer_vision_client.vectorize_image(IMAGE_URL) + + # then + assert ( + exec_info.value.args[0] + == f"Call to vectorize image returned no vector: {response_body}" + ) diff --git a/code/tests/utilities/helpers/test_config_helper.py b/code/tests/utilities/helpers/test_config_helper.py index 868697af1..7be1a05d6 100644 --- a/code/tests/utilities/helpers/test_config_helper.py +++ b/code/tests/utilities/helpers/test_config_helper.py @@ -125,7 +125,9 @@ def blob_client_mock(config_dict: dict, AzureBlobStorageClientMock: MagicMock): @pytest.fixture(autouse=True) def env_helper_mock(): - with patch("backend.batch.utilities.helpers.config.config_helper.EnvHelper") as mock: + with patch( + "backend.batch.utilities.helpers.config.config_helper.EnvHelper" + ) as mock: env_helper = mock.return_value env_helper.ORCHESTRATION_STRATEGY = "openai_function" env_helper.LOAD_CONFIG_FROM_BLOB_STORAGE = True @@ -263,6 +265,37 @@ def test_save_config_as_active( ) +def test_save_config_as_active_validates_advanced_image_file_types_are_valid( + AzureBlobStorageClientMock: MagicMock, + config_dict: dict, +): + # given + config_dict["document_processors"] = [ + { + "document_type": "txt", + "chunking": { + "strategy": "layout", + "size": 500, + "overlap": 100, + }, + "loading": { + "strategy": "web", + }, + "use_advanced_image_processing": True, + } + ] + + # when + with pytest.raises(Exception) as e: + ConfigHelper.save_config_as_active(config_dict) + + # then + assert str(e.value) == ( + "Advanced image processing has been enabled for document type txt, but only ['jpeg', 'jpg', 'png', 'tiff', 'bmp'] file types are supported." + ) + AzureBlobStorageClientMock.assert_not_called() + + def test_delete_config(AzureBlobStorageClientMock: MagicMock): # when ConfigHelper.delete_config() @@ -362,6 +395,14 @@ def test_get_available_document_types_when_advanced_image_processing_enabled( ) +def test_get_advanced_image_processing_image_types(config: Config): + # when + image_types = config.get_advanced_image_processing_image_types() + + # then + assert sorted(image_types) == sorted(["jpeg", "jpg", "png", "tiff", "bmp"]) + + def test_get_available_chunking_strategies(config: Config): # when chunking_strategies = config.get_available_chunking_strategies() diff --git a/code/tests/utilities/helpers/test_env_helper.py b/code/tests/utilities/helpers/test_env_helper.py index 373d46108..aff7090bc 100644 --- a/code/tests/utilities/helpers/test_env_helper.py +++ b/code/tests/utilities/helpers/test_env_helper.py @@ -83,6 +83,7 @@ def test_keys_are_unset_when_auth_type_rbac(monkeypatch: MonkeyPatch): assert env_helper.AZURE_SEARCH_KEY is None assert env_helper.AZURE_OPENAI_API_KEY == "" assert env_helper.AZURE_SPEECH_KEY is None + assert env_helper.AZURE_COMPUTER_VISION_KEY is None def test_sets_default_log_level_when_unset(): diff --git a/code/tests/utilities/helpers/test_push_embedder.py b/code/tests/utilities/helpers/test_push_embedder.py index 015382902..df2fc034c 100644 --- a/code/tests/utilities/helpers/test_push_embedder.py +++ b/code/tests/utilities/helpers/test_push_embedder.py @@ -53,6 +53,11 @@ def mock_config_helper(): use_advanced_image_processing=False, ), ] + config_helper.get_advanced_image_processing_image_types.return_value = { + "jpeg", + "jpg", + "png", + } yield config_helper @@ -99,11 +104,19 @@ def document_chunking_mock(): yield mock -def test_embed_file_use_advanced_image_processing_skips_processing( +@pytest.fixture(autouse=True) +def azure_computer_vision_mock(): + with patch( + "backend.batch.utilities.helpers.embedders.push_embedder.AzureComputerVisionClient" + ) as mock: + yield mock + + +def test_embed_file_advanced_image_processing_skips_document_processing( azure_search_helper_mock, ): # given - push_embedder = PushEmbedder(MagicMock()) + push_embedder = PushEmbedder(MagicMock(), MagicMock()) # when push_embedder.embed_file("some-url", "some-file-name.jpg") @@ -112,9 +125,49 @@ def test_embed_file_use_advanced_image_processing_skips_processing( azure_search_helper_mock.return_value.get_search_client.assert_not_called() +def test_embed_file_advanced_image_processing_vectorizes_image( + azure_computer_vision_mock, +): + # given + push_embedder = PushEmbedder(MagicMock(), MagicMock()) + source_url = "http://localhost:8080/some-file-name.jpg" + + # when + push_embedder.embed_file(source_url, "some-file-name.jpg") + + # then + azure_computer_vision_mock.return_value.vectorize_image.assert_called_once_with( + source_url + ) + + +def test_embed_file_use_advanced_image_processing_does_not_vectorize_image_if_unsupported( + azure_computer_vision_mock, mock_config_helper, azure_search_helper_mock +): + # given + mock_config_helper.document_processors = [ + EmbeddingConfig( + "txt", + CHUNKING_SETTINGS, + LOADING_SETTINGS, + use_advanced_image_processing=True, + ), + ] + + push_embedder = PushEmbedder(MagicMock(), MagicMock()) + source_url = "http://localhost:8080/some-file-name.txt" + + # when + push_embedder.embed_file(source_url, "some-file-name.txt") + + # then + azure_computer_vision_mock.return_value.vectorize_image.assert_not_called() + azure_search_helper_mock.return_value.get_search_client.assert_called_once() + + def test_embed_file_loads_documents(document_loading_mock): # given - push_embedder = PushEmbedder(MagicMock()) + push_embedder = PushEmbedder(MagicMock(), MagicMock()) source_url = "some-url" # when @@ -131,7 +184,7 @@ def test_embed_file_loads_documents(document_loading_mock): def test_embed_file_chunks_documents(document_loading_mock, document_chunking_mock): # given - push_embedder = PushEmbedder(MagicMock()) + push_embedder = PushEmbedder(MagicMock(), MagicMock()) # when push_embedder.embed_file( @@ -147,7 +200,7 @@ def test_embed_file_chunks_documents(document_loading_mock, document_chunking_mo def test_embed_file_generates_embeddings_for_documents(llm_helper_mock): # given - push_embedder = PushEmbedder(MagicMock()) + push_embedder = PushEmbedder(MagicMock(), MagicMock()) # when push_embedder.embed_file( @@ -167,7 +220,7 @@ def test_embed_file_stores_documents_in_search_index( azure_search_helper_mock, ): # given - push_embedder = PushEmbedder(MagicMock()) + push_embedder = PushEmbedder(MagicMock(), MagicMock()) # when push_embedder.embed_file( @@ -227,7 +280,7 @@ def test_embed_file_raises_exception_on_failure( azure_search_helper_mock, ): # given - push_embedder = PushEmbedder(MagicMock()) + push_embedder = PushEmbedder(MagicMock(), MagicMock()) successful_indexing_result = MagicMock() successful_indexing_result.succeeded = True diff --git a/infra/app/function.bicep b/infra/app/function.bicep index 1460fb8b9..399e54a79 100644 --- a/infra/app/function.bicep +++ b/infra/app/function.bicep @@ -16,11 +16,13 @@ param azureAISearchName string = '' param formRecognizerName string = '' param contentSafetyName string = '' param speechServiceName string = '' +param computerVisionName string = '' param useKeyVault bool param openAIKeyName string = '' param storageAccountKeyName string = '' param formRecognizerKeyName string = '' param searchKeyName string = '' +param computerVisionKeyName string = '' param contentSafetyKeyName string = '' param speechKeyName string = '' param authType string @@ -49,6 +51,7 @@ module function '../core/host/functions.bicep' = { AZURE_FORM_RECOGNIZER_KEY: useKeyVault ? formRecognizerKeyName : listKeys(resourceId(subscription().subscriptionId, resourceGroup().name, 'Microsoft.CognitiveServices/accounts', formRecognizerName), '2023-05-01').key1 AZURE_CONTENT_SAFETY_KEY: useKeyVault ? contentSafetyKeyName : listKeys(resourceId(subscription().subscriptionId, resourceGroup().name, 'Microsoft.CognitiveServices/accounts', contentSafetyName), '2023-05-01').key1 AZURE_SPEECH_SERVICE_KEY: useKeyVault ? speechKeyName : listKeys(resourceId(subscription().subscriptionId, resourceGroup().name, 'Microsoft.CognitiveServices/accounts', speechServiceName), '2023-05-01').key1 + AZURE_COMPUTER_VISION_KEY: useKeyVault ? computerVisionKeyName : listKeys(resourceId(subscription().subscriptionId, resourceGroup().name, 'Microsoft.CognitiveServices/accounts', computerVisionName), '2023-05-01').key1 }) } } diff --git a/infra/app/storekeys.bicep b/infra/app/storekeys.bicep index 5075b9d4b..d4498025b 100644 --- a/infra/app/storekeys.bicep +++ b/infra/app/storekeys.bicep @@ -6,12 +6,14 @@ param rgName string = '' param formRecognizerName string = '' param contentSafetyName string = '' param speechServiceName string = '' +param computerVisionName string = '' param storageAccountKeyName string = 'AZURE-STORAGE-ACCOUNT-KEY' param openAIKeyName string = 'AZURE-OPENAI-API-KEY' param searchKeyName string = 'AZURE-SEARCH-KEY' param formRecognizerKeyName string = 'AZURE-FORM-RECOGNIZER-KEY' param contentSafetyKeyName string = 'AZURE-CONTENT-SAFETY-KEY' param speechKeyName string = 'AZURE-SPEECH-KEY' +param computerVisionKeyName string = 'AZURE-COMPUTER-VISION-KEY' resource storageAccountKeySecret 'Microsoft.KeyVault/vaults/secrets@2022-07-01' = { parent: keyVault @@ -61,6 +63,14 @@ resource speechKeySecret 'Microsoft.KeyVault/vaults/secrets@2022-07-01' = { } } +resource computerVisionKeySecret 'Microsoft.KeyVault/vaults/secrets@2022-07-01' = { + parent: keyVault + name: computerVisionKeyName + properties: { + value: listKeys(resourceId(subscription().subscriptionId, rgName, 'Microsoft.CognitiveServices/accounts', computerVisionName), '2023-05-01').key1 + } +} + resource keyVault 'Microsoft.KeyVault/vaults@2022-07-01' existing = { name: keyVaultName } @@ -71,3 +81,4 @@ output SEARCH_KEY_NAME string = searchKeySecret.name output OPENAI_KEY_NAME string = openAIKeySecret.name output STORAGE_ACCOUNT_KEY_NAME string = storageAccountKeySecret.name output SPEECH_KEY_NAME string = speechKeySecret.name +output COMPUTER_VISION_KEY_NAME string = computerVisionKeySecret.name diff --git a/infra/main.bicep b/infra/main.bicep index f19c80f28..fca9bb6b7 100644 --- a/infra/main.bicep +++ b/infra/main.bicep @@ -180,6 +180,12 @@ param computerVisionSkuName string = 'S1' ]) param computerVisionLocation string = useAdvancedImageProcessing ? location : '' +@description('Azure Computer Vision Vectorize Image API Version') +param computerVisionVectorizeImageApiVersion string = '2024-02-01' + +@description('Azure Computer Vision Vectorize Image Model Version') +param computerVisionVectorizeImageModelVersion string ='2023-04-15' + @description('Azure AI Search Resource') param azureAISearchName string = 'search-${resourceToken}' @@ -424,6 +430,7 @@ module storekeys './app/storekeys.bicep' = if (useKeyVault) { formRecognizerName: formrecognizer.outputs.name contentSafetyName: contentsafety.outputs.name speechServiceName: speechServiceName + computerVisionName: computerVision.outputs.name rgName: rgName } } @@ -651,6 +658,7 @@ module adminweb './app/adminweb.bicep' = if (hostingModel == 'code') { AZURE_SEARCH_DATASOURCE_NAME: azureSearchDatasource AZURE_SEARCH_INDEXER_NAME: azureSearchIndexer AZURE_SEARCH_USE_INTEGRATED_VECTORIZATION: azureSearchUseIntegratedVectorization + USE_ADVANCED_IMAGE_PROCESSING: useAdvancedImageProcessing BACKEND_URL: 'https://${functionName}.azurewebsites.net' DOCUMENT_PROCESSING_QUEUE_NAME: queueName FUNCTION_KEY: clientKey @@ -716,6 +724,7 @@ module adminweb_docker './app/adminweb.bicep' = if (hostingModel == 'container') AZURE_SEARCH_DATASOURCE_NAME: azureSearchDatasource AZURE_SEARCH_INDEXER_NAME: azureSearchIndexer AZURE_SEARCH_USE_INTEGRATED_VECTORIZATION: azureSearchUseIntegratedVectorization + USE_ADVANCED_IMAGE_PROCESSING: useAdvancedImageProcessing BACKEND_URL: 'https://${functionName}-docker.azurewebsites.net' DOCUMENT_PROCESSING_QUEUE_NAME: queueName FUNCTION_KEY: clientKey @@ -774,6 +783,7 @@ module function './app/function.bicep' = if (hostingModel == 'code') { formRecognizerName: formrecognizer.outputs.name contentSafetyName: contentsafety.outputs.name speechServiceName: speechService.outputs.name + computerVisionName: computerVision.outputs.name clientKey: clientKey openAIKeyName: useKeyVault ? storekeys.outputs.OPENAI_KEY_NAME : '' storageAccountKeyName: useKeyVault ? storekeys.outputs.STORAGE_ACCOUNT_KEY_NAME : '' @@ -781,12 +791,15 @@ module function './app/function.bicep' = if (hostingModel == 'code') { searchKeyName: useKeyVault ? storekeys.outputs.SEARCH_KEY_NAME : '' contentSafetyKeyName: useKeyVault ? storekeys.outputs.CONTENT_SAFETY_KEY_NAME : '' speechKeyName: useKeyVault ? storekeys.outputs.SPEECH_KEY_NAME : '' + computerVisionKeyName: useKeyVault ? storekeys.outputs.COMPUTER_VISION_KEY_NAME : '' useKeyVault: useKeyVault keyVaultName: useKeyVault || authType == 'rbac' ? keyvault.outputs.name : '' authType: authType appSettings: { AZURE_BLOB_ACCOUNT_NAME: storageAccountName AZURE_BLOB_CONTAINER_NAME: blobContainerName + AZURE_COMPUTER_VISION_VECTORIZE_IMAGE_API_VERSION: computerVisionVectorizeImageApiVersion + AZURE_COMPUTER_VISION_VECTORIZE_IMAGE_MODEL_VERSION: computerVisionVectorizeImageModelVersion AZURE_CONTENT_SAFETY_ENDPOINT: contentsafety.outputs.endpoint AZURE_FORM_RECOGNIZER_ENDPOINT: formrecognizer.outputs.endpoint AZURE_OPENAI_MODEL: azureOpenAIModel @@ -798,6 +811,8 @@ module function './app/function.bicep' = if (hostingModel == 'code') { AZURE_SEARCH_DATASOURCE_NAME: azureSearchDatasource AZURE_SEARCH_INDEXER_NAME: azureSearchIndexer AZURE_SEARCH_USE_INTEGRATED_VECTORIZATION: azureSearchUseIntegratedVectorization + USE_ADVANCED_IMAGE_PROCESSING: useAdvancedImageProcessing + AZURE_COMPUTER_VISION_ENDPOINT: computerVision.outputs.endpoint DOCUMENT_PROCESSING_QUEUE_NAME: queueName ORCHESTRATION_STRATEGY: orchestrationStrategy LOGLEVEL: logLevel @@ -828,12 +843,15 @@ module function_docker './app/function.bicep' = if (hostingModel == 'container') searchKeyName: useKeyVault ? storekeys.outputs.SEARCH_KEY_NAME : '' contentSafetyKeyName: useKeyVault ? storekeys.outputs.CONTENT_SAFETY_KEY_NAME : '' speechKeyName: useKeyVault ? storekeys.outputs.SPEECH_KEY_NAME : '' + computerVisionName: useKeyVault ? storekeys.outputs.COMPUTER_VISION_KEY_NAME : '' useKeyVault: useKeyVault keyVaultName: useKeyVault || authType == 'rbac' ? keyvault.outputs.name : '' authType: authType appSettings: { AZURE_BLOB_ACCOUNT_NAME: storageAccountName AZURE_BLOB_CONTAINER_NAME: blobContainerName + AZURE_COMPUTER_VISION_VECTORIZE_IMAGE_API_VERSION: computerVisionVectorizeImageApiVersion + AZURE_COMPUTER_VISION_VECTORIZE_IMAGE_MODEL_VERSION: computerVisionVectorizeImageModelVersion AZURE_CONTENT_SAFETY_ENDPOINT: contentsafety.outputs.endpoint AZURE_FORM_RECOGNIZER_ENDPOINT: formrecognizer.outputs.endpoint AZURE_OPENAI_MODEL: azureOpenAIModel @@ -845,6 +863,8 @@ module function_docker './app/function.bicep' = if (hostingModel == 'container') AZURE_SEARCH_DATASOURCE_NAME: azureSearchDatasource AZURE_SEARCH_INDEXER_NAME: azureSearchIndexer AZURE_SEARCH_USE_INTEGRATED_VECTORIZATION: azureSearchUseIntegratedVectorization + USE_ADVANCED_IMAGE_PROCESSING: useAdvancedImageProcessing + AZURE_COMPUTER_VISION_ENDPOINT: computerVision.outputs.endpoint DOCUMENT_PROCESSING_QUEUE_NAME: queueName ORCHESTRATION_STRATEGY: orchestrationStrategy LOGLEVEL: logLevel @@ -970,6 +990,8 @@ output AZURE_APP_SERVICE_HOSTING_MODEL string = hostingModel output AZURE_BLOB_CONTAINER_NAME string = blobContainerName output AZURE_BLOB_ACCOUNT_NAME string = storageAccountName output AZURE_BLOB_ACCOUNT_KEY string = useKeyVault ? storekeys.outputs.STORAGE_ACCOUNT_KEY_NAME : '' +output AZURE_COMPUTER_VISION_VECTORIZE_IMAGE_API_VERSION string = computerVisionVectorizeImageApiVersion +output AZURE_COMPUTER_VISION_VECTORIZE_IMAGE_MODEL_VERSION string = computerVisionVectorizeImageModelVersion output AZURE_CONTENT_SAFETY_ENDPOINT string = contentsafety.outputs.endpoint output AZURE_CONTENT_SAFETY_KEY string = useKeyVault ? storekeys.outputs.CONTENT_SAFETY_KEY_NAME : '' output AZURE_FORM_RECOGNIZER_ENDPOINT string = formrecognizer.outputs.endpoint @@ -989,6 +1011,7 @@ output AZURE_OPENAI_RESOURCE string = azureOpenAIResourceName output AZURE_OPENAI_EMBEDDING_MODEL string = azureOpenAIEmbeddingModel output AZURE_OPENAI_MODEL string = azureOpenAIModel output AZURE_OPENAI_API_KEY string = useKeyVault ? storekeys.outputs.OPENAI_KEY_NAME : '' +output AZURE_COMPUTER_VISION_ENDPOINT string = computerVision.outputs.endpoint output AZURE_RESOURCE_GROUP string = rgName output AZURE_SEARCH_KEY string = useKeyVault ? storekeys.outputs.SEARCH_KEY_NAME : '' output AZURE_SEARCH_SERVICE string = search.outputs.endpoint diff --git a/infra/main.bicepparam b/infra/main.bicepparam index 948df0d5b..2aaec96f4 100644 --- a/infra/main.bicepparam +++ b/infra/main.bicepparam @@ -35,7 +35,11 @@ param azureOpenAIMaxTokens = readEnvironmentVariable('AZURE_OPENAI_MAX_TOKENS', param azureOpenAITemperature = readEnvironmentVariable('AZURE_OPENAI_TEMPERATURE', '0') param azureOpenAITopP = readEnvironmentVariable('AZURE_OPENAI_TOP_P', '1') param azureOpenAIStopSequence = readEnvironmentVariable('AZURE_OPENAI_STOP_SEQUENCE', '\n') + +// Computer Vision parameters param computerVisionLocation = readEnvironmentVariable('AZURE_COMPUTER_VISION_LOCATION', '') +param computerVisionVectorizeImageApiVersion = readEnvironmentVariable('AZURE_COMPUTER_VISION_VECTORIZE_IMAGE_API_VERSION', '2024-02-01') +param computerVisionVectorizeImageModelVersion = readEnvironmentVariable('AZURE_COMPUTER_VISION_VECTORIZE_IMAGE_MODEL_VERSION', '2023-04-15') // The following are being renamed to align with the new naming convention // we manipulate existing resources here to maintain backwards compatibility diff --git a/infra/main.json b/infra/main.json index cdb55d8da..915088a7c 100644 --- a/infra/main.json +++ b/infra/main.json @@ -5,7 +5,7 @@ "_generator": { "name": "bicep", "version": "0.27.1.19265", - "templateHash": "17426906878691848997" + "templateHash": "95501043309266990" } }, "parameters": { @@ -366,6 +366,20 @@ "description": "Location of Computer Vision Resource (if useAdvancedImageProcessing=true)" } }, + "computerVisionVectorizeImageApiVersion": { + "type": "string", + "defaultValue": "2024-02-01", + "metadata": { + "description": "Azure Computer Vision Vectorize Image API Version" + } + }, + "computerVisionVectorizeImageModelVersion": { + "type": "string", + "defaultValue": "2023-04-15", + "metadata": { + "description": "Azure Computer Vision Vectorize Image Model Version" + } + }, "azureAISearchName": { "type": "string", "defaultValue": "[format('search-{0}', parameters('resourceToken'))]", @@ -1438,6 +1452,9 @@ "speechServiceName": { "value": "[parameters('speechServiceName')]" }, + "computerVisionName": { + "value": "[reference(extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, variables('rgName')), 'Microsoft.Resources/deployments', 'computerVision'), '2022-09-01').outputs.name.value]" + }, "rgName": { "value": "[variables('rgName')]" } @@ -1449,7 +1466,7 @@ "_generator": { "name": "bicep", "version": "0.27.1.19265", - "templateHash": "6700778522040462677" + "templateHash": "3769272141523051550" } }, "parameters": { @@ -1485,6 +1502,10 @@ "type": "string", "defaultValue": "" }, + "computerVisionName": { + "type": "string", + "defaultValue": "" + }, "storageAccountKeyName": { "type": "string", "defaultValue": "AZURE-STORAGE-ACCOUNT-KEY" @@ -1508,6 +1529,10 @@ "speechKeyName": { "type": "string", "defaultValue": "AZURE-SPEECH-KEY" + }, + "computerVisionKeyName": { + "type": "string", + "defaultValue": "AZURE-COMPUTER-VISION-KEY" } }, "resources": [ @@ -1558,6 +1583,14 @@ "properties": { "value": "[listKeys(resourceId(subscription().subscriptionId, parameters('rgName'), 'Microsoft.CognitiveServices/accounts', parameters('speechServiceName')), '2023-05-01').key1]" } + }, + { + "type": "Microsoft.KeyVault/vaults/secrets", + "apiVersion": "2022-07-01", + "name": "[format('{0}/{1}', parameters('keyVaultName'), parameters('computerVisionKeyName'))]", + "properties": { + "value": "[listKeys(resourceId(subscription().subscriptionId, parameters('rgName'), 'Microsoft.CognitiveServices/accounts', parameters('computerVisionName')), '2023-05-01').key1]" + } } ], "outputs": { @@ -1584,11 +1617,16 @@ "SPEECH_KEY_NAME": { "type": "string", "value": "[parameters('speechKeyName')]" + }, + "COMPUTER_VISION_KEY_NAME": { + "type": "string", + "value": "[parameters('computerVisionKeyName')]" } } } }, "dependsOn": [ + "[extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, variables('rgName')), 'Microsoft.Resources/deployments', 'computerVision')]", "[extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, variables('rgName')), 'Microsoft.Resources/deployments', parameters('contentSafetyName'))]", "[extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, variables('rgName')), 'Microsoft.Resources/deployments', parameters('formRecognizerName'))]", "[extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, variables('rgName')), 'Microsoft.Resources/deployments', parameters('azureOpenAIResourceName'))]", @@ -3837,6 +3875,7 @@ "AZURE_SEARCH_DATASOURCE_NAME": "[parameters('azureSearchDatasource')]", "AZURE_SEARCH_INDEXER_NAME": "[parameters('azureSearchIndexer')]", "AZURE_SEARCH_USE_INTEGRATED_VECTORIZATION": "[parameters('azureSearchUseIntegratedVectorization')]", + "USE_ADVANCED_IMAGE_PROCESSING": "[parameters('useAdvancedImageProcessing')]", "BACKEND_URL": "[format('https://{0}.azurewebsites.net', parameters('functionName'))]", "DOCUMENT_PROCESSING_QUEUE_NAME": "[variables('queueName')]", "FUNCTION_KEY": "[variables('clientKey')]", @@ -4764,6 +4803,7 @@ "AZURE_SEARCH_DATASOURCE_NAME": "[parameters('azureSearchDatasource')]", "AZURE_SEARCH_INDEXER_NAME": "[parameters('azureSearchIndexer')]", "AZURE_SEARCH_USE_INTEGRATED_VECTORIZATION": "[parameters('azureSearchUseIntegratedVectorization')]", + "USE_ADVANCED_IMAGE_PROCESSING": "[parameters('useAdvancedImageProcessing')]", "BACKEND_URL": "[format('https://{0}-docker.azurewebsites.net', parameters('functionName'))]", "DOCUMENT_PROCESSING_QUEUE_NAME": "[variables('queueName')]", "FUNCTION_KEY": "[variables('clientKey')]", @@ -7382,6 +7422,9 @@ "speechServiceName": { "value": "[reference(extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, variables('rgName')), 'Microsoft.Resources/deployments', parameters('speechServiceName')), '2022-09-01').outputs.name.value]" }, + "computerVisionName": { + "value": "[reference(extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, variables('rgName')), 'Microsoft.Resources/deployments', 'computerVision'), '2022-09-01').outputs.name.value]" + }, "clientKey": { "value": "[variables('clientKey')]" }, @@ -7391,6 +7434,7 @@ "searchKeyName": "[if(parameters('useKeyVault'), createObject('value', reference(extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, variables('rgName')), 'Microsoft.Resources/deployments', 'storekeys'), '2022-09-01').outputs.SEARCH_KEY_NAME.value), createObject('value', ''))]", "contentSafetyKeyName": "[if(parameters('useKeyVault'), createObject('value', reference(extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, variables('rgName')), 'Microsoft.Resources/deployments', 'storekeys'), '2022-09-01').outputs.CONTENT_SAFETY_KEY_NAME.value), createObject('value', ''))]", "speechKeyName": "[if(parameters('useKeyVault'), createObject('value', reference(extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, variables('rgName')), 'Microsoft.Resources/deployments', 'storekeys'), '2022-09-01').outputs.SPEECH_KEY_NAME.value), createObject('value', ''))]", + "computerVisionKeyName": "[if(parameters('useKeyVault'), createObject('value', reference(extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, variables('rgName')), 'Microsoft.Resources/deployments', 'storekeys'), '2022-09-01').outputs.COMPUTER_VISION_KEY_NAME.value), createObject('value', ''))]", "useKeyVault": { "value": "[parameters('useKeyVault')]" }, @@ -7402,6 +7446,8 @@ "value": { "AZURE_BLOB_ACCOUNT_NAME": "[parameters('storageAccountName')]", "AZURE_BLOB_CONTAINER_NAME": "[variables('blobContainerName')]", + "AZURE_COMPUTER_VISION_VECTORIZE_IMAGE_API_VERSION": "[parameters('computerVisionVectorizeImageApiVersion')]", + "AZURE_COMPUTER_VISION_VECTORIZE_IMAGE_MODEL_VERSION": "[parameters('computerVisionVectorizeImageModelVersion')]", "AZURE_CONTENT_SAFETY_ENDPOINT": "[reference(extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, variables('rgName')), 'Microsoft.Resources/deployments', parameters('contentSafetyName')), '2022-09-01').outputs.endpoint.value]", "AZURE_FORM_RECOGNIZER_ENDPOINT": "[reference(extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, variables('rgName')), 'Microsoft.Resources/deployments', parameters('formRecognizerName')), '2022-09-01').outputs.endpoint.value]", "AZURE_OPENAI_MODEL": "[parameters('azureOpenAIModel')]", @@ -7413,6 +7459,8 @@ "AZURE_SEARCH_DATASOURCE_NAME": "[parameters('azureSearchDatasource')]", "AZURE_SEARCH_INDEXER_NAME": "[parameters('azureSearchIndexer')]", "AZURE_SEARCH_USE_INTEGRATED_VECTORIZATION": "[parameters('azureSearchUseIntegratedVectorization')]", + "USE_ADVANCED_IMAGE_PROCESSING": "[parameters('useAdvancedImageProcessing')]", + "AZURE_COMPUTER_VISION_ENDPOINT": "[reference(extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, variables('rgName')), 'Microsoft.Resources/deployments', 'computerVision'), '2022-09-01').outputs.endpoint.value]", "DOCUMENT_PROCESSING_QUEUE_NAME": "[variables('queueName')]", "ORCHESTRATION_STRATEGY": "[parameters('orchestrationStrategy')]", "LOGLEVEL": "[parameters('logLevel')]" @@ -7426,7 +7474,7 @@ "_generator": { "name": "bicep", "version": "0.27.1.19265", - "templateHash": "7212582717200024100" + "templateHash": "3188820983633786286" } }, "parameters": { @@ -7491,6 +7539,10 @@ "type": "string", "defaultValue": "" }, + "computerVisionName": { + "type": "string", + "defaultValue": "" + }, "useKeyVault": { "type": "bool" }, @@ -7510,6 +7562,10 @@ "type": "string", "defaultValue": "" }, + "computerVisionKeyName": { + "type": "string", + "defaultValue": "" + }, "contentSafetyKeyName": { "type": "string", "defaultValue": "" @@ -7597,7 +7653,7 @@ "value": "[parameters('dockerFullImageName')]" }, "appSettings": { - "value": "[union(parameters('appSettings'), createObject('WEBSITES_ENABLE_APP_SERVICE_STORAGE', 'false', 'AZURE_AUTH_TYPE', parameters('authType'), 'USE_KEY_VAULT', if(parameters('useKeyVault'), parameters('useKeyVault'), ''), 'AZURE_OPENAI_API_KEY', if(parameters('useKeyVault'), parameters('openAIKeyName'), listKeys(resourceId(subscription().subscriptionId, resourceGroup().name, 'Microsoft.CognitiveServices/accounts', parameters('azureOpenAIName')), '2023-05-01').key1), 'AZURE_SEARCH_KEY', if(parameters('useKeyVault'), parameters('searchKeyName'), listAdminKeys(resourceId(subscription().subscriptionId, resourceGroup().name, 'Microsoft.Search/searchServices', parameters('azureAISearchName')), '2021-04-01-preview').primaryKey), 'AZURE_BLOB_ACCOUNT_KEY', if(parameters('useKeyVault'), parameters('storageAccountKeyName'), listKeys(resourceId(subscription().subscriptionId, resourceGroup().name, 'Microsoft.Storage/storageAccounts', parameters('storageAccountName')), '2021-09-01').keys[0].value), 'AZURE_FORM_RECOGNIZER_KEY', if(parameters('useKeyVault'), parameters('formRecognizerKeyName'), listKeys(resourceId(subscription().subscriptionId, resourceGroup().name, 'Microsoft.CognitiveServices/accounts', parameters('formRecognizerName')), '2023-05-01').key1), 'AZURE_CONTENT_SAFETY_KEY', if(parameters('useKeyVault'), parameters('contentSafetyKeyName'), listKeys(resourceId(subscription().subscriptionId, resourceGroup().name, 'Microsoft.CognitiveServices/accounts', parameters('contentSafetyName')), '2023-05-01').key1), 'AZURE_SPEECH_SERVICE_KEY', if(parameters('useKeyVault'), parameters('speechKeyName'), listKeys(resourceId(subscription().subscriptionId, resourceGroup().name, 'Microsoft.CognitiveServices/accounts', parameters('speechServiceName')), '2023-05-01').key1)))]" + "value": "[union(parameters('appSettings'), createObject('WEBSITES_ENABLE_APP_SERVICE_STORAGE', 'false', 'AZURE_AUTH_TYPE', parameters('authType'), 'USE_KEY_VAULT', if(parameters('useKeyVault'), parameters('useKeyVault'), ''), 'AZURE_OPENAI_API_KEY', if(parameters('useKeyVault'), parameters('openAIKeyName'), listKeys(resourceId(subscription().subscriptionId, resourceGroup().name, 'Microsoft.CognitiveServices/accounts', parameters('azureOpenAIName')), '2023-05-01').key1), 'AZURE_SEARCH_KEY', if(parameters('useKeyVault'), parameters('searchKeyName'), listAdminKeys(resourceId(subscription().subscriptionId, resourceGroup().name, 'Microsoft.Search/searchServices', parameters('azureAISearchName')), '2021-04-01-preview').primaryKey), 'AZURE_BLOB_ACCOUNT_KEY', if(parameters('useKeyVault'), parameters('storageAccountKeyName'), listKeys(resourceId(subscription().subscriptionId, resourceGroup().name, 'Microsoft.Storage/storageAccounts', parameters('storageAccountName')), '2021-09-01').keys[0].value), 'AZURE_FORM_RECOGNIZER_KEY', if(parameters('useKeyVault'), parameters('formRecognizerKeyName'), listKeys(resourceId(subscription().subscriptionId, resourceGroup().name, 'Microsoft.CognitiveServices/accounts', parameters('formRecognizerName')), '2023-05-01').key1), 'AZURE_CONTENT_SAFETY_KEY', if(parameters('useKeyVault'), parameters('contentSafetyKeyName'), listKeys(resourceId(subscription().subscriptionId, resourceGroup().name, 'Microsoft.CognitiveServices/accounts', parameters('contentSafetyName')), '2023-05-01').key1), 'AZURE_SPEECH_SERVICE_KEY', if(parameters('useKeyVault'), parameters('speechKeyName'), listKeys(resourceId(subscription().subscriptionId, resourceGroup().name, 'Microsoft.CognitiveServices/accounts', parameters('speechServiceName')), '2023-05-01').key1), 'AZURE_COMPUTER_VISION_KEY', if(parameters('useKeyVault'), parameters('computerVisionKeyName'), listKeys(resourceId(subscription().subscriptionId, resourceGroup().name, 'Microsoft.CognitiveServices/accounts', parameters('computerVisionName')), '2023-05-01').key1)))]" } }, "template": { @@ -8541,6 +8597,7 @@ } }, "dependsOn": [ + "[extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, variables('rgName')), 'Microsoft.Resources/deployments', 'computerVision')]", "[extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, variables('rgName')), 'Microsoft.Resources/deployments', parameters('contentSafetyName'))]", "[extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, variables('rgName')), 'Microsoft.Resources/deployments', parameters('formRecognizerName'))]", "[extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, variables('rgName')), 'Microsoft.Resources/deployments', parameters('hostingPlanName'))]", @@ -8611,6 +8668,7 @@ "searchKeyName": "[if(parameters('useKeyVault'), createObject('value', reference(extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, variables('rgName')), 'Microsoft.Resources/deployments', 'storekeys'), '2022-09-01').outputs.SEARCH_KEY_NAME.value), createObject('value', ''))]", "contentSafetyKeyName": "[if(parameters('useKeyVault'), createObject('value', reference(extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, variables('rgName')), 'Microsoft.Resources/deployments', 'storekeys'), '2022-09-01').outputs.CONTENT_SAFETY_KEY_NAME.value), createObject('value', ''))]", "speechKeyName": "[if(parameters('useKeyVault'), createObject('value', reference(extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, variables('rgName')), 'Microsoft.Resources/deployments', 'storekeys'), '2022-09-01').outputs.SPEECH_KEY_NAME.value), createObject('value', ''))]", + "computerVisionName": "[if(parameters('useKeyVault'), createObject('value', reference(extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, variables('rgName')), 'Microsoft.Resources/deployments', 'storekeys'), '2022-09-01').outputs.COMPUTER_VISION_KEY_NAME.value), createObject('value', ''))]", "useKeyVault": { "value": "[parameters('useKeyVault')]" }, @@ -8622,6 +8680,8 @@ "value": { "AZURE_BLOB_ACCOUNT_NAME": "[parameters('storageAccountName')]", "AZURE_BLOB_CONTAINER_NAME": "[variables('blobContainerName')]", + "AZURE_COMPUTER_VISION_VECTORIZE_IMAGE_API_VERSION": "[parameters('computerVisionVectorizeImageApiVersion')]", + "AZURE_COMPUTER_VISION_VECTORIZE_IMAGE_MODEL_VERSION": "[parameters('computerVisionVectorizeImageModelVersion')]", "AZURE_CONTENT_SAFETY_ENDPOINT": "[reference(extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, variables('rgName')), 'Microsoft.Resources/deployments', parameters('contentSafetyName')), '2022-09-01').outputs.endpoint.value]", "AZURE_FORM_RECOGNIZER_ENDPOINT": "[reference(extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, variables('rgName')), 'Microsoft.Resources/deployments', parameters('formRecognizerName')), '2022-09-01').outputs.endpoint.value]", "AZURE_OPENAI_MODEL": "[parameters('azureOpenAIModel')]", @@ -8633,6 +8693,8 @@ "AZURE_SEARCH_DATASOURCE_NAME": "[parameters('azureSearchDatasource')]", "AZURE_SEARCH_INDEXER_NAME": "[parameters('azureSearchIndexer')]", "AZURE_SEARCH_USE_INTEGRATED_VECTORIZATION": "[parameters('azureSearchUseIntegratedVectorization')]", + "USE_ADVANCED_IMAGE_PROCESSING": "[parameters('useAdvancedImageProcessing')]", + "AZURE_COMPUTER_VISION_ENDPOINT": "[reference(extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, variables('rgName')), 'Microsoft.Resources/deployments', 'computerVision'), '2022-09-01').outputs.endpoint.value]", "DOCUMENT_PROCESSING_QUEUE_NAME": "[variables('queueName')]", "ORCHESTRATION_STRATEGY": "[parameters('orchestrationStrategy')]", "LOGLEVEL": "[parameters('logLevel')]" @@ -8646,7 +8708,7 @@ "_generator": { "name": "bicep", "version": "0.27.1.19265", - "templateHash": "7212582717200024100" + "templateHash": "3188820983633786286" } }, "parameters": { @@ -8711,6 +8773,10 @@ "type": "string", "defaultValue": "" }, + "computerVisionName": { + "type": "string", + "defaultValue": "" + }, "useKeyVault": { "type": "bool" }, @@ -8730,6 +8796,10 @@ "type": "string", "defaultValue": "" }, + "computerVisionKeyName": { + "type": "string", + "defaultValue": "" + }, "contentSafetyKeyName": { "type": "string", "defaultValue": "" @@ -8817,7 +8887,7 @@ "value": "[parameters('dockerFullImageName')]" }, "appSettings": { - "value": "[union(parameters('appSettings'), createObject('WEBSITES_ENABLE_APP_SERVICE_STORAGE', 'false', 'AZURE_AUTH_TYPE', parameters('authType'), 'USE_KEY_VAULT', if(parameters('useKeyVault'), parameters('useKeyVault'), ''), 'AZURE_OPENAI_API_KEY', if(parameters('useKeyVault'), parameters('openAIKeyName'), listKeys(resourceId(subscription().subscriptionId, resourceGroup().name, 'Microsoft.CognitiveServices/accounts', parameters('azureOpenAIName')), '2023-05-01').key1), 'AZURE_SEARCH_KEY', if(parameters('useKeyVault'), parameters('searchKeyName'), listAdminKeys(resourceId(subscription().subscriptionId, resourceGroup().name, 'Microsoft.Search/searchServices', parameters('azureAISearchName')), '2021-04-01-preview').primaryKey), 'AZURE_BLOB_ACCOUNT_KEY', if(parameters('useKeyVault'), parameters('storageAccountKeyName'), listKeys(resourceId(subscription().subscriptionId, resourceGroup().name, 'Microsoft.Storage/storageAccounts', parameters('storageAccountName')), '2021-09-01').keys[0].value), 'AZURE_FORM_RECOGNIZER_KEY', if(parameters('useKeyVault'), parameters('formRecognizerKeyName'), listKeys(resourceId(subscription().subscriptionId, resourceGroup().name, 'Microsoft.CognitiveServices/accounts', parameters('formRecognizerName')), '2023-05-01').key1), 'AZURE_CONTENT_SAFETY_KEY', if(parameters('useKeyVault'), parameters('contentSafetyKeyName'), listKeys(resourceId(subscription().subscriptionId, resourceGroup().name, 'Microsoft.CognitiveServices/accounts', parameters('contentSafetyName')), '2023-05-01').key1), 'AZURE_SPEECH_SERVICE_KEY', if(parameters('useKeyVault'), parameters('speechKeyName'), listKeys(resourceId(subscription().subscriptionId, resourceGroup().name, 'Microsoft.CognitiveServices/accounts', parameters('speechServiceName')), '2023-05-01').key1)))]" + "value": "[union(parameters('appSettings'), createObject('WEBSITES_ENABLE_APP_SERVICE_STORAGE', 'false', 'AZURE_AUTH_TYPE', parameters('authType'), 'USE_KEY_VAULT', if(parameters('useKeyVault'), parameters('useKeyVault'), ''), 'AZURE_OPENAI_API_KEY', if(parameters('useKeyVault'), parameters('openAIKeyName'), listKeys(resourceId(subscription().subscriptionId, resourceGroup().name, 'Microsoft.CognitiveServices/accounts', parameters('azureOpenAIName')), '2023-05-01').key1), 'AZURE_SEARCH_KEY', if(parameters('useKeyVault'), parameters('searchKeyName'), listAdminKeys(resourceId(subscription().subscriptionId, resourceGroup().name, 'Microsoft.Search/searchServices', parameters('azureAISearchName')), '2021-04-01-preview').primaryKey), 'AZURE_BLOB_ACCOUNT_KEY', if(parameters('useKeyVault'), parameters('storageAccountKeyName'), listKeys(resourceId(subscription().subscriptionId, resourceGroup().name, 'Microsoft.Storage/storageAccounts', parameters('storageAccountName')), '2021-09-01').keys[0].value), 'AZURE_FORM_RECOGNIZER_KEY', if(parameters('useKeyVault'), parameters('formRecognizerKeyName'), listKeys(resourceId(subscription().subscriptionId, resourceGroup().name, 'Microsoft.CognitiveServices/accounts', parameters('formRecognizerName')), '2023-05-01').key1), 'AZURE_CONTENT_SAFETY_KEY', if(parameters('useKeyVault'), parameters('contentSafetyKeyName'), listKeys(resourceId(subscription().subscriptionId, resourceGroup().name, 'Microsoft.CognitiveServices/accounts', parameters('contentSafetyName')), '2023-05-01').key1), 'AZURE_SPEECH_SERVICE_KEY', if(parameters('useKeyVault'), parameters('speechKeyName'), listKeys(resourceId(subscription().subscriptionId, resourceGroup().name, 'Microsoft.CognitiveServices/accounts', parameters('speechServiceName')), '2023-05-01').key1), 'AZURE_COMPUTER_VISION_KEY', if(parameters('useKeyVault'), parameters('computerVisionKeyName'), listKeys(resourceId(subscription().subscriptionId, resourceGroup().name, 'Microsoft.CognitiveServices/accounts', parameters('computerVisionName')), '2023-05-01').key1)))]" } }, "template": { @@ -9761,6 +9831,7 @@ } }, "dependsOn": [ + "[extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, variables('rgName')), 'Microsoft.Resources/deployments', 'computerVision')]", "[extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, variables('rgName')), 'Microsoft.Resources/deployments', parameters('contentSafetyName'))]", "[extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, variables('rgName')), 'Microsoft.Resources/deployments', parameters('formRecognizerName'))]", "[extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, variables('rgName')), 'Microsoft.Resources/deployments', parameters('hostingPlanName'))]", @@ -10738,6 +10809,14 @@ "type": "string", "value": "[if(parameters('useKeyVault'), reference(extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, variables('rgName')), 'Microsoft.Resources/deployments', 'storekeys'), '2022-09-01').outputs.STORAGE_ACCOUNT_KEY_NAME.value, '')]" }, + "AZURE_COMPUTER_VISION_VECTORIZE_IMAGE_API_VERSION": { + "type": "string", + "value": "[parameters('computerVisionVectorizeImageApiVersion')]" + }, + "AZURE_COMPUTER_VISION_VECTORIZE_IMAGE_MODEL_VERSION": { + "type": "string", + "value": "[parameters('computerVisionVectorizeImageModelVersion')]" + }, "AZURE_CONTENT_SAFETY_ENDPOINT": { "type": "string", "value": "[reference(extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, variables('rgName')), 'Microsoft.Resources/deployments', parameters('contentSafetyName')), '2022-09-01').outputs.endpoint.value]" @@ -10814,6 +10893,10 @@ "type": "string", "value": "[if(parameters('useKeyVault'), reference(extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, variables('rgName')), 'Microsoft.Resources/deployments', 'storekeys'), '2022-09-01').outputs.OPENAI_KEY_NAME.value, '')]" }, + "AZURE_COMPUTER_VISION_ENDPOINT": { + "type": "string", + "value": "[reference(extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, variables('rgName')), 'Microsoft.Resources/deployments', 'computerVision'), '2022-09-01').outputs.endpoint.value]" + }, "AZURE_RESOURCE_GROUP": { "type": "string", "value": "[variables('rgName')]"