diff --git a/.env.sample b/.env.sample index 63f053d18..50494eedf 100644 --- a/.env.sample +++ b/.env.sample @@ -56,4 +56,3 @@ AZURE_SPEECH_SERVICE_REGION= AZURE_AUTH_TYPE=keys USE_KEY_VAULT=true AZURE_KEY_VAULT_ENDPOINT= -LOGLEVEL=INFO diff --git a/code/backend/batch/AddURLEmbeddings.py b/code/backend/batch/AddURLEmbeddings.py index 4d6abc588..96b86e4cd 100644 --- a/code/backend/batch/AddURLEmbeddings.py +++ b/code/backend/batch/AddURLEmbeddings.py @@ -1,24 +1,21 @@ +import os import logging import traceback import azure.functions as func -import sys -from utilities.helpers.EnvHelper import EnvHelper from utilities.helpers.DocumentProcessorHelper import DocumentProcessor from utilities.helpers.ConfigHelper import ConfigHelper -sys.path.append("..") bp_add_url_embeddings = func.Blueprint() -env_helper: EnvHelper = EnvHelper() - logger = logging.getLogger(__name__) -logger.setLevel(env_helper.LOGLEVEL) +logger.setLevel(level=os.environ.get("LOGLEVEL", "INFO").upper()) @bp_add_url_embeddings.route(route="AddURLEmbeddings") def add_url_embeddings(req: func.HttpRequest) -> func.HttpResponse: logger.info("Python HTTP trigger function processed a request.") + # Get Url from request url = req.params.get("url") if not url: diff --git a/code/backend/batch/BatchPushResults.py b/code/backend/batch/BatchPushResults.py index eec7a26e5..e7728434d 100644 --- a/code/backend/batch/BatchPushResults.py +++ b/code/backend/batch/BatchPushResults.py @@ -1,21 +1,17 @@ +import os import logging import json import azure.functions as func from urllib.parse import urlparse -import sys -from utilities.helpers.EnvHelper import EnvHelper from utilities.helpers.AzureBlobStorageHelper import AzureBlobStorageClient from utilities.helpers.DocumentProcessorHelper import DocumentProcessor from utilities.helpers.ConfigHelper import ConfigHelper -sys.path.append("..") bp_batch_push_results = func.Blueprint() -env_helper: EnvHelper = EnvHelper() - logger = logging.getLogger(__name__) -logger.setLevel(env_helper.LOGLEVEL) +logger.setLevel(level=os.environ.get("LOGLEVEL", "INFO").upper()) def _get_file_name_from_message(msg: func.QueueMessage) -> str: @@ -32,10 +28,15 @@ def _get_file_name_from_message(msg: func.QueueMessage) -> str: arg_name="msg", queue_name="doc-processing", connection="AzureWebJobsStorage" ) def batch_push_results(msg: func.QueueMessage) -> None: + do_batch_push_results(msg) + + +def do_batch_push_results(msg: func.QueueMessage) -> None: logger.info( "Python queue trigger function processed a queue item: %s", msg.get_body().decode("utf-8"), ) + document_processor = DocumentProcessor() blob_client = AzureBlobStorageClient() # Get the file name from the message @@ -44,6 +45,7 @@ def batch_push_results(msg: func.QueueMessage) -> None: file_sas = blob_client.get_blob_sas(file_name) # Get file extension's processors file_extension = file_name.split(".")[-1] + processors = list( filter( lambda x: x.document_type.lower() == file_extension.lower(), diff --git a/code/backend/batch/BatchStartProcessing.py b/code/backend/batch/BatchStartProcessing.py index 0ad62f406..9ae5f3016 100644 --- a/code/backend/batch/BatchStartProcessing.py +++ b/code/backend/batch/BatchStartProcessing.py @@ -1,19 +1,16 @@ +import os import logging import json import azure.functions as func -import sys -from utilities.helpers.EnvHelper import EnvHelper + from utilities.helpers.AzureBlobStorageHelper import ( AzureBlobStorageClient, create_queue_client, ) -sys.path.append("..") bp_batch_start_processing = func.Blueprint() -env_helper: EnvHelper = EnvHelper() - logger = logging.getLogger(__name__) -logger.setLevel(env_helper.LOGLEVEL) +logger.setLevel(level=os.environ.get("LOGLEVEL", "INFO").upper()) @bp_batch_start_processing.route(route="BatchStartProcessing") diff --git a/code/backend/batch/GetConversationResponse.py b/code/backend/batch/GetConversationResponse.py index 3bce11865..4ec132366 100644 --- a/code/backend/batch/GetConversationResponse.py +++ b/code/backend/batch/GetConversationResponse.py @@ -1,24 +1,28 @@ +import os import azure.functions as func import logging import json -import sys + from utilities.helpers.EnvHelper import EnvHelper from utilities.helpers.OrchestratorHelper import Orchestrator +from utilities.helpers.ConfigHelper import ConfigHelper -sys.path.append("..") bp_get_conversation_response = func.Blueprint() -env_helper: EnvHelper = EnvHelper() - logger = logging.getLogger(__name__) -logger.setLevel(env_helper.LOGLEVEL) +logger.setLevel(level=os.environ.get("LOGLEVEL", "INFO").upper()) @bp_get_conversation_response.route(route="GetConversationResponse") def get_conversation_response(req: func.HttpRequest) -> func.HttpResponse: + return do_get_conversation_response(req) + + +def do_get_conversation_response(req: func.HttpRequest) -> func.HttpResponse: logger.info("Python HTTP trigger function processed a request.") message_orchestrator = Orchestrator() + env_helper: EnvHelper = EnvHelper() try: req_body = req.get_json() @@ -38,7 +42,6 @@ def get_conversation_response(req: func.HttpRequest) -> func.HttpResponse: user_assistant_messages[i + 1]["content"], ) ) - from utilities.helpers.ConfigHelper import ConfigHelper messages = message_orchestrator.handle_message( user_message=user_message, diff --git a/code/backend/batch/__init__.py b/code/backend/batch/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/code/tests/test_AddURLEmbeddings.py b/code/tests/test_AddURLEmbeddings.py new file mode 100644 index 000000000..9157b4521 --- /dev/null +++ b/code/tests/test_AddURLEmbeddings.py @@ -0,0 +1,55 @@ +import sys +import os +from unittest.mock import patch +import azure.functions as func + + +sys.path.append(os.path.join(os.path.dirname(sys.path[0]), "backend", "batch")) + +from backend.batch.AddURLEmbeddings import add_url_embeddings # noqa: E402 + + +@patch("backend.batch.AddURLEmbeddings.ConfigHelper") +@patch("backend.batch.AddURLEmbeddings.DocumentProcessor") +def test_add_url_embeddings_when_url_set_in_body(_, __): + fake_request = func.HttpRequest( + method="POST", + url="", + body=b'{"url": "https://example.com"}', + headers={"Content-Type": "application/json"}, + ) + + response = add_url_embeddings.build().get_user_function()(fake_request) + + assert response.status_code == 200 + + +@patch("backend.batch.AddURLEmbeddings.ConfigHelper") +@patch("backend.batch.AddURLEmbeddings.DocumentProcessor") +def test_add_url_embeddings_when_url_set_in_param(_, __): + fake_request = func.HttpRequest( + method="POST", + url="", + body=b"", + headers={"Content-Type": "application/json"}, + params={"url": "https://example.com"}, + ) + + response = add_url_embeddings.build().get_user_function()(fake_request) + + assert response.status_code == 200 + + +@patch("backend.batch.AddURLEmbeddings.ConfigHelper") +@patch("backend.batch.AddURLEmbeddings.DocumentProcessor") +def test_add_url_embeddings_returns_400_when_url_not_set(_, __): + fake_request = func.HttpRequest( + method="POST", + url="", + body=b"", + params={}, + ) + + response = add_url_embeddings.build().get_user_function()(fake_request) + + assert response.status_code == 400 diff --git a/code/tests/test_BatchPushResults.py b/code/tests/test_BatchPushResults.py new file mode 100644 index 000000000..b62063043 --- /dev/null +++ b/code/tests/test_BatchPushResults.py @@ -0,0 +1,68 @@ +import sys +import os +from unittest.mock import patch, Mock +from azure.functions import QueueMessage + + +sys.path.append(os.path.join(os.path.dirname(sys.path[0]), "backend", "batch")) + +from backend.batch.BatchPushResults import ( # noqa: E402 + batch_push_results, + _get_file_name_from_message, +) + + +def test_get_file_name_from_message(): + mock_queue_message = QueueMessage( + body='{"message": "test message", "filename": "test_filename.md"}' + ) + + file_name = _get_file_name_from_message(mock_queue_message) + + assert file_name == "test_filename.md" + + +def test_get_file_name_from_message_no_filename(): + mock_queue_message = QueueMessage( + body='{"data": { "url": "test/test/test_filename.md"} }' + ) + + file_name = _get_file_name_from_message(mock_queue_message) + + assert file_name == "test_filename.md" + + +@patch("backend.batch.BatchPushResults.ConfigHelper") +@patch("backend.batch.BatchPushResults.AzureBlobStorageClient") +@patch("backend.batch.BatchPushResults.DocumentProcessor") +def test_batch_push_results( + mock_document_processor, + mock_azure_blob_storage_client, + mock_config_helper, +): + mock_queue_message = QueueMessage( + body='{"message": "test message", "filename": "test/test/test_filename.md"}' + ) + + mock_blob_client_instance = mock_azure_blob_storage_client.return_value + mock_blob_client_instance.get_blob_sas.return_value = "test_blob_sas" + + mock_document_processor_instance = mock_document_processor.return_value + + md_processor = Mock() + md_processor.document_type.lower.return_value = "md" + txt_processor = Mock() + txt_processor.document_type.lower.return_value = "txt" + mock_processors = [md_processor, txt_processor] + mock_config_helper.get_active_config_or_default.return_value.document_processors = ( + mock_processors + ) + + batch_push_results.build().get_user_function()(mock_queue_message) + + mock_document_processor_instance.process.assert_called_once_with( + source_url="test_blob_sas", processors=[md_processor] + ) + mock_blob_client_instance.upsert_blob_metadata.assert_called_once_with( + "test/test/test_filename.md", {"embeddings_added": "true"} + ) diff --git a/code/tests/test_BatchStartProcessing.py b/code/tests/test_BatchStartProcessing.py new file mode 100644 index 000000000..413c13e23 --- /dev/null +++ b/code/tests/test_BatchStartProcessing.py @@ -0,0 +1,60 @@ +import sys +import os +from unittest.mock import patch, Mock + +sys.path.append(os.path.join(os.path.dirname(sys.path[0]), "backend", "batch")) + +from backend.batch.BatchStartProcessing import batch_start_processing # noqa: E402 + + +@patch("backend.batch.BatchStartProcessing.create_queue_client") +@patch("backend.batch.BatchStartProcessing.AzureBlobStorageClient") +def test_batch_start_processing_processes_all( + mock_blob_storage_client, mock_create_queue_client +): + mock_http_request = Mock() + mock_http_request.params = dict() + mock_http_request.params["process_all"] = "true" + + mock_queue_client = Mock() + mock_create_queue_client.return_value = mock_queue_client + + mock_blob_storage_client.return_value.get_all_files.return_value = [ + {"filename": "file_name_one", "embeddings_added": False} + ] + + response = batch_start_processing.build().get_user_function()(mock_http_request) + + assert response.status_code == 200 + + mock_queue_client.send_message.assert_called_once_with( + b'{"filename": "file_name_one"}', + ) + + +@patch("backend.batch.BatchStartProcessing.create_queue_client") +@patch("backend.batch.BatchStartProcessing.AzureBlobStorageClient") +def test_batch_start_processing_filters_filter_no_embeddings( + mock_blob_storage_client, mock_create_queue_client +): + mock_http_request = Mock() + mock_http_request.params = dict() + mock_http_request.params["process_all"] = "false" + + mock_queue_client = Mock() + mock_create_queue_client.return_value = mock_queue_client + + mock_blob_storage_client.return_value.get_all_files.return_value = [ + { + "filename": "file_name_one", + "embeddings_added": True, # will get filtered out + }, + {"filename": "file_name_two", "embeddings_added": False}, + ] + response = batch_start_processing.build().get_user_function()(mock_http_request) + + assert response.status_code == 200 + + mock_queue_client.send_message.assert_called_once_with( + b'{"filename": "file_name_two"}', + ) diff --git a/code/tests/test_GetConversationResponse.py b/code/tests/test_GetConversationResponse.py new file mode 100644 index 000000000..a91ba9c38 --- /dev/null +++ b/code/tests/test_GetConversationResponse.py @@ -0,0 +1,49 @@ +import sys +import os +from unittest.mock import patch, Mock, ANY +import json + +sys.path.append(os.path.join(os.path.dirname(sys.path[0]), "backend", "batch")) + +from backend.batch.GetConversationResponse import ( # noqa: E402 + get_conversation_response, +) + + +@patch("backend.batch.GetConversationResponse.ConfigHelper") +@patch("backend.batch.GetConversationResponse.Orchestrator") +def test_get_conversation_response(mock_create_message_orchestrator, _): + mock_http_request = Mock() + request_json = { + "messages": [ + {"content": "Do I have meetings today?", "role": "user"}, + {"content": "It is sunny today", "role": "assistant"}, + {"content": "What is the weather like today?", "role": "user"}, + ], + "conversation_id": "13245", + } + mock_http_request.get_json.return_value = request_json + + mock_message_orchestrator = Mock() + mock_message_orchestrator.handle_message.return_value = [ + "You don't have any meetings today" + ] + + mock_create_message_orchestrator.return_value = mock_message_orchestrator + + response = get_conversation_response.build().get_user_function()(mock_http_request) + + assert response.status_code == 200 + + mock_message_orchestrator.handle_message.assert_called_once_with( + user_message="What is the weather like today?", + chat_history=[("Do I have meetings today?", "It is sunny today")], + conversation_id="13245", + orchestrator=ANY, + ) + + response_json = json.loads(response.get_body()) + assert response_json["id"] == "response.id" + assert response_json["choices"] == [ + {"messages": ["You don't have any meetings today"]} + ]