From af6cf8f708b1af759d91f1e76e40e3effebc356c Mon Sep 17 00:00:00 2001 From: Tanya Borisova Date: Tue, 9 Apr 2024 16:08:58 +0000 Subject: [PATCH 01/15] Add test for batch processing function --- code/backend/batch/AddURLEmbeddings.py | 8 ++- code/backend/batch/BatchPushResults.py | 16 +++-- code/backend/batch/BatchStartProcessing.py | 8 ++- code/backend/batch/GetConversationResponse.py | 11 ++- code/backend/batch/__init__.py | 0 code/tests/test_AddURLEmbeddings.py | 32 +++++++++ .../test_BatchGetConversationResponse.py | 71 +++++++++++++++++++ code/tests/test_BatchPushResults.py | 58 +++++++++++++++ code/tests/test_BatchStartProcessing.py | 56 +++++++++++++++ 9 files changed, 248 insertions(+), 12 deletions(-) create mode 100644 code/backend/batch/__init__.py create mode 100644 code/tests/test_AddURLEmbeddings.py create mode 100644 code/tests/test_BatchGetConversationResponse.py create mode 100644 code/tests/test_BatchPushResults.py create mode 100644 code/tests/test_BatchStartProcessing.py diff --git a/code/backend/batch/AddURLEmbeddings.py b/code/backend/batch/AddURLEmbeddings.py index 69fa1c7a2..fc6bb7e9a 100644 --- a/code/backend/batch/AddURLEmbeddings.py +++ b/code/backend/batch/AddURLEmbeddings.py @@ -3,8 +3,8 @@ import azure.functions as func import sys -from utilities.helpers.DocumentProcessorHelper import DocumentProcessor -from utilities.helpers.ConfigHelper import ConfigHelper +from backend.batch.utilities.helpers.DocumentProcessorHelper import DocumentProcessor +from backend.batch.utilities.helpers.ConfigHelper import ConfigHelper sys.path.append("..") @@ -13,6 +13,10 @@ @bp_add_url_embeddings.route(route="AddURLEmbeddings") def add_url_embeddings(req: func.HttpRequest) -> func.HttpResponse: + do_add_url_embeddings(req) + + +def do_add_url_embeddings(req: func.HttpRequest) -> func.HttpResponse: logging.info("Python HTTP trigger function processed a request.") # Get Url from request url = req.params.get("url") diff --git a/code/backend/batch/BatchPushResults.py b/code/backend/batch/BatchPushResults.py index 33c62bf4d..fe5cc8d23 100644 --- a/code/backend/batch/BatchPushResults.py +++ b/code/backend/batch/BatchPushResults.py @@ -2,13 +2,14 @@ import json import azure.functions as func from urllib.parse import urlparse -import sys -from utilities.helpers.AzureBlobStorageHelper import AzureBlobStorageClient -from utilities.helpers.DocumentProcessorHelper import DocumentProcessor -from utilities.helpers.ConfigHelper import ConfigHelper +from backend.batch.utilities.helpers.AzureBlobStorageHelper import ( + AzureBlobStorageClient, +) +from backend.batch.utilities.helpers.DocumentProcessorHelper import DocumentProcessor +from backend.batch.utilities.helpers.ConfigHelper import ConfigHelper -sys.path.append("..") +# sys.path.append("..") bp_batch_push_results = func.Blueprint() @@ -27,6 +28,10 @@ def _get_file_name_from_message(msg: func.QueueMessage) -> str: arg_name="msg", queue_name="doc-processing", connection="AzureWebJobsStorage" ) def batch_push_results(msg: func.QueueMessage) -> None: + do_batch_push_results(msg) + + +def do_batch_push_results(msg: func.QueueMessage) -> None: logging.info( "Python queue trigger function processed a queue item: %s", msg.get_body().decode("utf-8"), @@ -39,6 +44,7 @@ def batch_push_results(msg: func.QueueMessage) -> None: file_sas = blob_client.get_blob_sas(file_name) # Get file extension's processors file_extension = file_name.split(".")[-1] + processors = list( filter( lambda x: x.document_type.lower() == file_extension.lower(), diff --git a/code/backend/batch/BatchStartProcessing.py b/code/backend/batch/BatchStartProcessing.py index 9ada5abce..d3cd309a7 100644 --- a/code/backend/batch/BatchStartProcessing.py +++ b/code/backend/batch/BatchStartProcessing.py @@ -2,8 +2,8 @@ import json import azure.functions as func import sys -from utilities.helpers.EnvHelper import EnvHelper -from utilities.helpers.AzureBlobStorageHelper import ( +from backend.batch.utilities.helpers.EnvHelper import EnvHelper +from backend.batch.utilities.helpers.AzureBlobStorageHelper import ( AzureBlobStorageClient, create_queue_client, ) @@ -15,6 +15,10 @@ @bp_batch_start_processing.route(route="BatchStartProcessing") def batch_start_processing(req: func.HttpRequest) -> func.HttpResponse: + return do_batch_start_processing(req) + + +def do_batch_start_processing(req: func.HttpRequest) -> func.HttpResponse: logging.info("Requested to start processing all documents received") # Set up Blob Storage Client azure_blob_storage_client = AzureBlobStorageClient() diff --git a/code/backend/batch/GetConversationResponse.py b/code/backend/batch/GetConversationResponse.py index 5b8ec9fed..9e0b2168b 100644 --- a/code/backend/batch/GetConversationResponse.py +++ b/code/backend/batch/GetConversationResponse.py @@ -2,8 +2,10 @@ import logging import json import sys -from utilities.helpers.EnvHelper import EnvHelper -from utilities.helpers.OrchestratorHelper import Orchestrator +from backend.batch.utilities.helpers.EnvHelper import EnvHelper +from backend.batch.utilities.helpers.OrchestratorHelper import Orchestrator +from backend.batch.utilities.helpers.ConfigHelper import ConfigHelper + sys.path.append("..") @@ -13,6 +15,10 @@ @bp_get_conversation_response.route(route="GetConversationResponse") def get_conversation_response(req: func.HttpRequest) -> func.HttpResponse: + do_get_conversation_response(req) + + +def do_get_conversation_response(req: func.HttpRequest) -> func.HttpResponse: logging.info("Python HTTP trigger function processed a request.") message_orchestrator = Orchestrator() @@ -35,7 +41,6 @@ def get_conversation_response(req: func.HttpRequest) -> func.HttpResponse: user_assistant_messages[i + 1]["content"], ) ) - from utilities.helpers.ConfigHelper import ConfigHelper messages = message_orchestrator.handle_message( user_message=user_message, diff --git a/code/backend/batch/__init__.py b/code/backend/batch/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/code/tests/test_AddURLEmbeddings.py b/code/tests/test_AddURLEmbeddings.py new file mode 100644 index 000000000..617f28e58 --- /dev/null +++ b/code/tests/test_AddURLEmbeddings.py @@ -0,0 +1,32 @@ +from unittest import mock +import azure.functions as func +from backend.batch.AddURLEmbeddings import do_add_url_embeddings + + +@mock.patch("backend.batch.AddURLEmbeddings.DocumentProcessor") +def test_add_url_embeddings_when_url_set_in_body(mock_doc_processor): + fake_request = func.HttpRequest( + method="POST", + url="", + body=b'{"url": "https://example.com"}', + headers={"Content-Type": "application/json"}, + ) + + response = do_add_url_embeddings(fake_request) + + assert response.status_code == 200 + + +@mock.patch("backend.batch.AddURLEmbeddings.DocumentProcessor") +def test_add_url_embeddings_when_url_set_in_param(mock_doc_processor): + fake_request = func.HttpRequest( + method="POST", + url="", + body=b"", + headers={"Content-Type": "application/json"}, + params={"url": "https://example.com"}, + ) + + response = do_add_url_embeddings(fake_request) + + assert response.status_code == 200 diff --git a/code/tests/test_BatchGetConversationResponse.py b/code/tests/test_BatchGetConversationResponse.py new file mode 100644 index 000000000..72bc9df67 --- /dev/null +++ b/code/tests/test_BatchGetConversationResponse.py @@ -0,0 +1,71 @@ +from unittest.mock import patch, Mock, ANY +import json + +from backend.batch.GetConversationResponse import do_get_conversation_response + + +@patch("backend.batch.GetConversationResponse.Orchestrator") +def test_get_conversation_response(mock_create_message_orchestrator): + mock_http_request = Mock() + request_json = { + "messages": [ + {"content": "Do I have meetings today?", "role": "user"}, + {"content": "It is sunny today", "role": "assistant"}, + {"content": "What is the weather like today?", "role": "user"}, + ], + "conversation_id": "13245", + } + mock_http_request.get_json.return_value = request_json + + mock_message_orchestrator = Mock() + mock_message_orchestrator.handle_message.return_value = [ + "You don't have any meetings today" + ] + + mock_create_message_orchestrator.return_value = mock_message_orchestrator + + response = do_get_conversation_response(mock_http_request) + + assert response.status_code == 200 + + mock_message_orchestrator.handle_message.assert_called_once_with( + user_message="What is the weather like today?", + chat_history=[("Do I have meetings today?", "It is sunny today")], + conversation_id="13245", + orchestrator=ANY, + ) + + response_json = json.loads(response.get_body()) + assert response_json["id"] == "response.id" + assert response_json["choices"] == [ + {"messages": ["You don't have any meetings today"]} + ] + + +# @patch("backend.batch.BatchStartProcessing.create_queue_client") +# @patch("backend.batch.BatchStartProcessing.AzureBlobStorageClient") +# def test_batch_start_processing_filters_filter_no_embeddings(mock_blob_storage_client, mock_create_queue_client): +# mock_http_request = Mock() +# mock_http_request.params = dict() +# mock_http_request.params["process_all"] = "false" + +# mock_queue_client = Mock() +# mock_create_queue_client.return_value = mock_queue_client + +# mock_blob_storage_client.return_value.get_all_files.return_value = [ +# { +# "filename": "file_name_one", +# "embeddings_added": True # will get filtered out +# }, +# { +# "filename": "file_name_two", +# "embeddings_added": False +# } +# ] +# response = do_batch_start_processing(mock_http_request) + +# assert response.status_code == 200 + +# mock_queue_client.send_message.assert_called_once_with( +# b'{"filename": "file_name_two"}', +# ) diff --git a/code/tests/test_BatchPushResults.py b/code/tests/test_BatchPushResults.py new file mode 100644 index 000000000..3e0c51c01 --- /dev/null +++ b/code/tests/test_BatchPushResults.py @@ -0,0 +1,58 @@ +from unittest.mock import patch, Mock +from azure.functions import QueueMessage +from backend.batch.BatchPushResults import do_batch_push_results +from backend.batch.BatchPushResults import _get_file_name_from_message + + +def test_get_file_name_from_message(): + mock_queue_message = QueueMessage( + body='{"message": "test message", "filename": "test_filename.md"}' + ) + + file_name = _get_file_name_from_message(mock_queue_message) + + assert file_name == "test_filename.md" + + +def test_get_file_name_from_message_no_filename(): + mock_queue_message = QueueMessage( + body='{"data": { "url": "test/test/test_filename.md"} }' + ) + + file_name = _get_file_name_from_message(mock_queue_message) + + assert file_name == "test_filename.md" + + +@patch("backend.batch.BatchPushResults.ConfigHelper") +@patch("backend.batch.BatchPushResults.AzureBlobStorageClient") +@patch("backend.batch.BatchPushResults.DocumentProcessor") +def test_do_batch_push_results( + mock_document_processor, mock_azure_blob_storage_client, mock_config_helper +): + mock_queue_message = QueueMessage( + body='{"message": "test message", "filename": "test/test/test_filename.md"}' + ) + + mock_blob_client_instance = mock_azure_blob_storage_client.return_value + mock_blob_client_instance.get_blob_sas.return_value = "test_blob_sas" + + mock_document_processor_instance = mock_document_processor.return_value + + md_processor = Mock() + md_processor.document_type.lower.return_value = "md" + txt_processor = Mock() + txt_processor.document_type.lower.return_value = "txt" + mock_processors = [md_processor, txt_processor] + mock_config_helper.get_active_config_or_default.return_value.document_processors = ( + mock_processors + ) + + do_batch_push_results(mock_queue_message) + + mock_document_processor_instance.process.assert_called_once_with( + source_url="test_blob_sas", processors=[md_processor] + ) + mock_blob_client_instance.upsert_blob_metadata.assert_called_once_with( + "test/test/test_filename.md", {"embeddings_added": "true"} + ) diff --git a/code/tests/test_BatchStartProcessing.py b/code/tests/test_BatchStartProcessing.py new file mode 100644 index 000000000..3c24936ae --- /dev/null +++ b/code/tests/test_BatchStartProcessing.py @@ -0,0 +1,56 @@ +from unittest.mock import patch, Mock + +from backend.batch.BatchStartProcessing import do_batch_start_processing + + +@patch("backend.batch.BatchStartProcessing.create_queue_client") +@patch("backend.batch.BatchStartProcessing.AzureBlobStorageClient") +def test_batch_start_processing_processes_all( + mock_blob_storage_client, mock_create_queue_client +): + mock_http_request = Mock() + mock_http_request.params = dict() + mock_http_request.params["process_all"] = "true" + + mock_queue_client = Mock() + mock_create_queue_client.return_value = mock_queue_client + + mock_blob_storage_client.return_value.get_all_files.return_value = [ + {"filename": "file_name_one", "embeddings_added": False} + ] + + response = do_batch_start_processing(mock_http_request) + + assert response.status_code == 200 + + mock_queue_client.send_message.assert_called_once_with( + b'{"filename": "file_name_one"}', + ) + + +@patch("backend.batch.BatchStartProcessing.create_queue_client") +@patch("backend.batch.BatchStartProcessing.AzureBlobStorageClient") +def test_batch_start_processing_filters_filter_no_embeddings( + mock_blob_storage_client, mock_create_queue_client +): + mock_http_request = Mock() + mock_http_request.params = dict() + mock_http_request.params["process_all"] = "false" + + mock_queue_client = Mock() + mock_create_queue_client.return_value = mock_queue_client + + mock_blob_storage_client.return_value.get_all_files.return_value = [ + { + "filename": "file_name_one", + "embeddings_added": True, # will get filtered out + }, + {"filename": "file_name_two", "embeddings_added": False}, + ] + response = do_batch_start_processing(mock_http_request) + + assert response.status_code == 200 + + mock_queue_client.send_message.assert_called_once_with( + b'{"filename": "file_name_two"}', + ) From 2ddc45e328bb6706aa0a4f5a83675529c32db370 Mon Sep 17 00:00:00 2001 From: Tanya Borisova Date: Tue, 9 Apr 2024 16:22:09 +0000 Subject: [PATCH 02/15] Remove comment --- .../test_BatchGetConversationResponse.py | 29 ------------------- 1 file changed, 29 deletions(-) diff --git a/code/tests/test_BatchGetConversationResponse.py b/code/tests/test_BatchGetConversationResponse.py index 72bc9df67..a15dfc73b 100644 --- a/code/tests/test_BatchGetConversationResponse.py +++ b/code/tests/test_BatchGetConversationResponse.py @@ -40,32 +40,3 @@ def test_get_conversation_response(mock_create_message_orchestrator): assert response_json["choices"] == [ {"messages": ["You don't have any meetings today"]} ] - - -# @patch("backend.batch.BatchStartProcessing.create_queue_client") -# @patch("backend.batch.BatchStartProcessing.AzureBlobStorageClient") -# def test_batch_start_processing_filters_filter_no_embeddings(mock_blob_storage_client, mock_create_queue_client): -# mock_http_request = Mock() -# mock_http_request.params = dict() -# mock_http_request.params["process_all"] = "false" - -# mock_queue_client = Mock() -# mock_create_queue_client.return_value = mock_queue_client - -# mock_blob_storage_client.return_value.get_all_files.return_value = [ -# { -# "filename": "file_name_one", -# "embeddings_added": True # will get filtered out -# }, -# { -# "filename": "file_name_two", -# "embeddings_added": False -# } -# ] -# response = do_batch_start_processing(mock_http_request) - -# assert response.status_code == 200 - -# mock_queue_client.send_message.assert_called_once_with( -# b'{"filename": "file_name_two"}', -# ) From f37d719350a99b28a93c5a4d749e54c7804ef678 Mon Sep 17 00:00:00 2001 From: Tanya Borisova Date: Tue, 9 Apr 2024 16:57:09 +0000 Subject: [PATCH 03/15] Try to remove sys path modifications --- code/backend/batch/AddURLEmbeddings.py | 2 -- code/backend/batch/BatchPushResults.py | 1 - code/backend/batch/BatchStartProcessing.py | 2 -- code/backend/batch/GetConversationResponse.py | 3 --- 4 files changed, 8 deletions(-) diff --git a/code/backend/batch/AddURLEmbeddings.py b/code/backend/batch/AddURLEmbeddings.py index 7f950b6df..de7e19329 100644 --- a/code/backend/batch/AddURLEmbeddings.py +++ b/code/backend/batch/AddURLEmbeddings.py @@ -1,12 +1,10 @@ import logging import traceback import azure.functions as func -import sys from backend.batch.utilities.helpers.DocumentProcessorHelper import DocumentProcessor from backend.batch.utilities.helpers.ConfigHelper import ConfigHelper -sys.path.append("..") bp_add_url_embeddings = func.Blueprint() diff --git a/code/backend/batch/BatchPushResults.py b/code/backend/batch/BatchPushResults.py index 4d43d5620..056f2835f 100644 --- a/code/backend/batch/BatchPushResults.py +++ b/code/backend/batch/BatchPushResults.py @@ -9,7 +9,6 @@ from backend.batch.utilities.helpers.DocumentProcessorHelper import DocumentProcessor from backend.batch.utilities.helpers.ConfigHelper import ConfigHelper -# sys.path.append("..") bp_batch_push_results = func.Blueprint() diff --git a/code/backend/batch/BatchStartProcessing.py b/code/backend/batch/BatchStartProcessing.py index 6001162e1..ec5816f7e 100644 --- a/code/backend/batch/BatchStartProcessing.py +++ b/code/backend/batch/BatchStartProcessing.py @@ -1,14 +1,12 @@ import logging import json import azure.functions as func -import sys from backend.batch.utilities.helpers.EnvHelper import EnvHelper from backend.batch.utilities.helpers.AzureBlobStorageHelper import ( AzureBlobStorageClient, create_queue_client, ) -sys.path.append("..") bp_batch_start_processing = func.Blueprint() env_helper: EnvHelper = EnvHelper() diff --git a/code/backend/batch/GetConversationResponse.py b/code/backend/batch/GetConversationResponse.py index 6893bf37b..6ae6d37ee 100644 --- a/code/backend/batch/GetConversationResponse.py +++ b/code/backend/batch/GetConversationResponse.py @@ -1,14 +1,11 @@ import azure.functions as func import logging import json -import sys from backend.batch.utilities.helpers.EnvHelper import EnvHelper from backend.batch.utilities.helpers.OrchestratorHelper import Orchestrator from backend.batch.utilities.helpers.ConfigHelper import ConfigHelper -sys.path.append("..") - bp_get_conversation_response = func.Blueprint() env_helper: EnvHelper = EnvHelper() From 146125044b44def0ebecdd123dc2d9edb8a8b754 Mon Sep 17 00:00:00 2001 From: Tanya Borisova Date: Tue, 9 Apr 2024 22:07:20 +0000 Subject: [PATCH 04/15] Fix imports --- code/backend/batch/AddURLEmbeddings.py | 7 ++++--- code/backend/batch/BatchPushResults.py | 9 +++++---- code/backend/batch/BatchStartProcessing.py | 8 +++++--- code/backend/batch/GetConversationResponse.py | 10 ++++++---- code/tests/test_AddURLEmbeddings.py | 6 +++++- code/tests/test_BatchGetConversationResponse.py | 7 ++++++- code/tests/test_BatchPushResults.py | 7 +++++-- code/tests/test_BatchStartProcessing.py | 4 +++- 8 files changed, 39 insertions(+), 19 deletions(-) diff --git a/code/backend/batch/AddURLEmbeddings.py b/code/backend/batch/AddURLEmbeddings.py index de7e19329..f1b717a05 100644 --- a/code/backend/batch/AddURLEmbeddings.py +++ b/code/backend/batch/AddURLEmbeddings.py @@ -1,13 +1,14 @@ import logging import traceback import azure.functions as func +import sys -from backend.batch.utilities.helpers.DocumentProcessorHelper import DocumentProcessor -from backend.batch.utilities.helpers.ConfigHelper import ConfigHelper +from utilities.helpers.DocumentProcessorHelper import DocumentProcessor +from utilities.helpers.ConfigHelper import ConfigHelper +sys.path.append("..") bp_add_url_embeddings = func.Blueprint() - logger = logging.getLogger(__name__) diff --git a/code/backend/batch/BatchPushResults.py b/code/backend/batch/BatchPushResults.py index 056f2835f..db1af768d 100644 --- a/code/backend/batch/BatchPushResults.py +++ b/code/backend/batch/BatchPushResults.py @@ -2,16 +2,17 @@ import json import azure.functions as func from urllib.parse import urlparse +import sys -from backend.batch.utilities.helpers.AzureBlobStorageHelper import ( +from utilities.helpers.AzureBlobStorageHelper import ( AzureBlobStorageClient, ) -from backend.batch.utilities.helpers.DocumentProcessorHelper import DocumentProcessor -from backend.batch.utilities.helpers.ConfigHelper import ConfigHelper +from utilities.helpers.DocumentProcessorHelper import DocumentProcessor +from utilities.helpers.ConfigHelper import ConfigHelper +sys.path.append("..") bp_batch_push_results = func.Blueprint() - logger = logging.getLogger(__name__) diff --git a/code/backend/batch/BatchStartProcessing.py b/code/backend/batch/BatchStartProcessing.py index ec5816f7e..a389f69ca 100644 --- a/code/backend/batch/BatchStartProcessing.py +++ b/code/backend/batch/BatchStartProcessing.py @@ -1,16 +1,18 @@ import logging import json import azure.functions as func -from backend.batch.utilities.helpers.EnvHelper import EnvHelper -from backend.batch.utilities.helpers.AzureBlobStorageHelper import ( +import sys + +from utilities.helpers.EnvHelper import EnvHelper +from utilities.helpers.AzureBlobStorageHelper import ( AzureBlobStorageClient, create_queue_client, ) bp_batch_start_processing = func.Blueprint() env_helper: EnvHelper = EnvHelper() - logger = logging.getLogger(__name__) +sys.path.append("..") @bp_batch_start_processing.route(route="BatchStartProcessing") diff --git a/code/backend/batch/GetConversationResponse.py b/code/backend/batch/GetConversationResponse.py index 6ae6d37ee..326f61b12 100644 --- a/code/backend/batch/GetConversationResponse.py +++ b/code/backend/batch/GetConversationResponse.py @@ -1,14 +1,16 @@ import azure.functions as func import logging import json -from backend.batch.utilities.helpers.EnvHelper import EnvHelper -from backend.batch.utilities.helpers.OrchestratorHelper import Orchestrator -from backend.batch.utilities.helpers.ConfigHelper import ConfigHelper +import sys +from utilities.helpers.EnvHelper import EnvHelper +from utilities.helpers.OrchestratorHelper import Orchestrator +from utilities.helpers.ConfigHelper import ConfigHelper + +sys.path.append("..") bp_get_conversation_response = func.Blueprint() env_helper: EnvHelper = EnvHelper() - logger = logging.getLogger(__name__) diff --git a/code/tests/test_AddURLEmbeddings.py b/code/tests/test_AddURLEmbeddings.py index 617f28e58..63398a519 100644 --- a/code/tests/test_AddURLEmbeddings.py +++ b/code/tests/test_AddURLEmbeddings.py @@ -1,6 +1,10 @@ +import sys from unittest import mock import azure.functions as func -from backend.batch.AddURLEmbeddings import do_add_url_embeddings + +sys.path.append("backend/batch") + +from backend.batch.AddURLEmbeddings import do_add_url_embeddings # noqa: E402 @mock.patch("backend.batch.AddURLEmbeddings.DocumentProcessor") diff --git a/code/tests/test_BatchGetConversationResponse.py b/code/tests/test_BatchGetConversationResponse.py index a15dfc73b..f8a755310 100644 --- a/code/tests/test_BatchGetConversationResponse.py +++ b/code/tests/test_BatchGetConversationResponse.py @@ -1,7 +1,12 @@ +import sys from unittest.mock import patch, Mock, ANY import json -from backend.batch.GetConversationResponse import do_get_conversation_response +sys.path.append("backend/batch/") + +from backend.batch.GetConversationResponse import ( # noqa: E402 + do_get_conversation_response, +) @patch("backend.batch.GetConversationResponse.Orchestrator") diff --git a/code/tests/test_BatchPushResults.py b/code/tests/test_BatchPushResults.py index 3e0c51c01..e38e82bbb 100644 --- a/code/tests/test_BatchPushResults.py +++ b/code/tests/test_BatchPushResults.py @@ -1,7 +1,10 @@ +import sys from unittest.mock import patch, Mock from azure.functions import QueueMessage -from backend.batch.BatchPushResults import do_batch_push_results -from backend.batch.BatchPushResults import _get_file_name_from_message + +sys.path.append("backend/batch/") +from backend.batch.BatchPushResults import do_batch_push_results # noqa: E402 +from backend.batch.BatchPushResults import _get_file_name_from_message # noqa: E402 def test_get_file_name_from_message(): diff --git a/code/tests/test_BatchStartProcessing.py b/code/tests/test_BatchStartProcessing.py index 3c24936ae..4fb953831 100644 --- a/code/tests/test_BatchStartProcessing.py +++ b/code/tests/test_BatchStartProcessing.py @@ -1,6 +1,8 @@ +import sys from unittest.mock import patch, Mock -from backend.batch.BatchStartProcessing import do_batch_start_processing +sys.path.append("backend/batch") +from backend.batch.BatchStartProcessing import do_batch_start_processing # noqa: E402 @patch("backend.batch.BatchStartProcessing.create_queue_client") From 1ecc1f5288b63242b8b6857bfb95c49263390947 Mon Sep 17 00:00:00 2001 From: Tanya Borisova Date: Wed, 10 Apr 2024 15:36:33 +0000 Subject: [PATCH 05/15] Fix sys path append statements by making paths absolute --- code/tests/test_AddURLEmbeddings.py | 7 ++++++- code/tests/test_BatchGetConversationResponse.py | 6 +++++- code/tests/test_BatchPushResults.py | 8 +++++++- code/tests/test_BatchStartProcessing.py | 7 ++++++- 4 files changed, 24 insertions(+), 4 deletions(-) diff --git a/code/tests/test_AddURLEmbeddings.py b/code/tests/test_AddURLEmbeddings.py index 63398a519..a0b5b11c6 100644 --- a/code/tests/test_AddURLEmbeddings.py +++ b/code/tests/test_AddURLEmbeddings.py @@ -1,8 +1,13 @@ import sys +import os from unittest import mock import azure.functions as func -sys.path.append("backend/batch") + +function_app_path = os.path.abspath( + os.path.join(os.path.dirname(__file__), "../backend/batch") +) +sys.path.append(function_app_path) from backend.batch.AddURLEmbeddings import do_add_url_embeddings # noqa: E402 diff --git a/code/tests/test_BatchGetConversationResponse.py b/code/tests/test_BatchGetConversationResponse.py index f8a755310..4fd13137a 100644 --- a/code/tests/test_BatchGetConversationResponse.py +++ b/code/tests/test_BatchGetConversationResponse.py @@ -1,8 +1,12 @@ import sys +import os from unittest.mock import patch, Mock, ANY import json -sys.path.append("backend/batch/") +function_app_path = os.path.abspath( + os.path.join(os.path.dirname(__file__), "../backend/batch") +) +sys.path.append(function_app_path) from backend.batch.GetConversationResponse import ( # noqa: E402 do_get_conversation_response, diff --git a/code/tests/test_BatchPushResults.py b/code/tests/test_BatchPushResults.py index e38e82bbb..dd9949017 100644 --- a/code/tests/test_BatchPushResults.py +++ b/code/tests/test_BatchPushResults.py @@ -1,8 +1,14 @@ import sys +import os from unittest.mock import patch, Mock from azure.functions import QueueMessage -sys.path.append("backend/batch/") + +function_app_path = os.path.abspath( + os.path.join(os.path.dirname(__file__), "../backend/batch") +) +sys.path.append(function_app_path) + from backend.batch.BatchPushResults import do_batch_push_results # noqa: E402 from backend.batch.BatchPushResults import _get_file_name_from_message # noqa: E402 diff --git a/code/tests/test_BatchStartProcessing.py b/code/tests/test_BatchStartProcessing.py index 4fb953831..4e633053f 100644 --- a/code/tests/test_BatchStartProcessing.py +++ b/code/tests/test_BatchStartProcessing.py @@ -1,7 +1,12 @@ import sys +import os from unittest.mock import patch, Mock -sys.path.append("backend/batch") +function_app_path = os.path.abspath( + os.path.join(os.path.dirname(__file__), "../backend/batch") +) +sys.path.append(function_app_path) + from backend.batch.BatchStartProcessing import do_batch_start_processing # noqa: E402 From cc55b1ae88923aaeb22a23d89dfe43ada10f6317 Mon Sep 17 00:00:00 2001 From: Tanya Borisova Date: Wed, 10 Apr 2024 17:10:18 +0000 Subject: [PATCH 06/15] Remove extra sys.path.append that isnt necessary --- .env.sample | 57 ------------------- code/backend/batch/AddURLEmbeddings.py | 2 - code/backend/batch/BatchPushResults.py | 2 - code/backend/batch/BatchStartProcessing.py | 2 - code/backend/batch/GetConversationResponse.py | 2 - 5 files changed, 65 deletions(-) delete mode 100644 .env.sample diff --git a/.env.sample b/.env.sample deleted file mode 100644 index f4daa051c..000000000 --- a/.env.sample +++ /dev/null @@ -1,57 +0,0 @@ -# Azure Search for storing the processed documents -AZURE_SEARCH_SERVICE= -AZURE_SEARCH_INDEX= -AZURE_SEARCH_KEY= -AZURE_SEARCH_USE_SEMANTIC_SEARCH=False -AZURE_SEARCH_SEMANTIC_SEARCH_CONFIG=default -AZURE_SEARCH_TOP_K=5 -AZURE_SEARCH_ENABLE_IN_DOMAIN=False -AZURE_SEARCH_FIELDS_ID=id -AZURE_SEARCH_CONTENT_COLUMNS=content -AZURE_SEARCH_CONTENT_VECTOR_COLUMNS=content_vector -AZURE_SEARCH_DIMENSIONS=1536 -AZURE_SEARCH_FIELDS_TAG=tag -AZURE_SEARCH_FIELDS_METADATA=metadata -AZURE_SEARCH_FILENAME_COLUMN=filepath -AZURE_SEARCH_TITLE_COLUMN=title -AZURE_SEARCH_URL_COLUMN=url -AZURE_SEARCH_CONVERSATIONS_LOG_INDEX=conversations-log -# Azure OpenAI for generating the answer and computing the embedding of the documents -AZURE_OPENAI_RESOURCE= -AZURE_OPENAI_API_KEY= -AZURE_OPENAI_MODEL=gpt-35-turbo -AZURE_OPENAI_MODEL_NAME=gpt-35-turbo -AZURE_OPENAI_EMBEDDING_MODEL=text-embedding-ada-002 -AZURE_OPENAI_TEMPERATURE=0 -AZURE_OPENAI_TOP_P=1.0 -AZURE_OPENAI_MAX_TOKENS=1000 -AZURE_OPENAI_STOP_SEQUENCE= -AZURE_OPENAI_SYSTEM_MESSAGE=You are an AI assistant that helps people find information. -AZURE_OPENAI_API_VERSION=2024-02-01 -AZURE_OPENAI_STREAM=True -# Backend for processing the documents and application logging in the app -AzureWebJobsStorage= -BACKEND_URL=http://localhost:7071 -DOCUMENT_PROCESSING_QUEUE_NAME= -APPINSIGHTS_CONNECTION_STRING= -# Azure Blob Storage for storing the original documents to be processed -AZURE_BLOB_ACCOUNT_NAME= -AZURE_BLOB_ACCOUNT_KEY= -AZURE_BLOB_CONTAINER_NAME= -# Azure Form Recognizer for extracting the text from the documents -AZURE_FORM_RECOGNIZER_ENDPOINT= -AZURE_FORM_RECOGNIZER_KEY= -# Azure AI Content Safety for filtering out the inappropriate questions or answers -AZURE_CONTENT_SAFETY_ENDPOINT= -AZURE_CONTENT_SAFETY_KEY= -# Orchestration strategy. Use Azure OpenAI Functions (openai_function) or LangChain (langchain) for messages orchestration. If you are using a new model version 0613 select "openai_function" (or "langchain"), if you are using a 0314 model version select "langchain" -ORCHESTRATION_STRATEGY=openai_function -#Speech-to-text feature -AZURE_SPEECH_SERVICE_KEY= -AZURE_SPEECH_SERVICE_REGION= -# Auth type environment variables. -# When AZURE_AUTH_TYPE=rbac, please make sure variable USE_KEY_VAULT=false -# When USE_KEY_VAULT=true, please make sure to set AZURE_KEY_VAULT_ENDPOINT -AZURE_AUTH_TYPE=keys -USE_KEY_VAULT=true -AZURE_KEY_VAULT_ENDPOINT= diff --git a/code/backend/batch/AddURLEmbeddings.py b/code/backend/batch/AddURLEmbeddings.py index f1b717a05..a41ad8653 100644 --- a/code/backend/batch/AddURLEmbeddings.py +++ b/code/backend/batch/AddURLEmbeddings.py @@ -1,13 +1,11 @@ import logging import traceback import azure.functions as func -import sys from utilities.helpers.DocumentProcessorHelper import DocumentProcessor from utilities.helpers.ConfigHelper import ConfigHelper -sys.path.append("..") bp_add_url_embeddings = func.Blueprint() logger = logging.getLogger(__name__) diff --git a/code/backend/batch/BatchPushResults.py b/code/backend/batch/BatchPushResults.py index db1af768d..cdb31a00c 100644 --- a/code/backend/batch/BatchPushResults.py +++ b/code/backend/batch/BatchPushResults.py @@ -2,7 +2,6 @@ import json import azure.functions as func from urllib.parse import urlparse -import sys from utilities.helpers.AzureBlobStorageHelper import ( AzureBlobStorageClient, @@ -11,7 +10,6 @@ from utilities.helpers.ConfigHelper import ConfigHelper -sys.path.append("..") bp_batch_push_results = func.Blueprint() logger = logging.getLogger(__name__) diff --git a/code/backend/batch/BatchStartProcessing.py b/code/backend/batch/BatchStartProcessing.py index a389f69ca..e917a8172 100644 --- a/code/backend/batch/BatchStartProcessing.py +++ b/code/backend/batch/BatchStartProcessing.py @@ -1,7 +1,6 @@ import logging import json import azure.functions as func -import sys from utilities.helpers.EnvHelper import EnvHelper from utilities.helpers.AzureBlobStorageHelper import ( @@ -12,7 +11,6 @@ bp_batch_start_processing = func.Blueprint() env_helper: EnvHelper = EnvHelper() logger = logging.getLogger(__name__) -sys.path.append("..") @bp_batch_start_processing.route(route="BatchStartProcessing") diff --git a/code/backend/batch/GetConversationResponse.py b/code/backend/batch/GetConversationResponse.py index 326f61b12..62db2d8a0 100644 --- a/code/backend/batch/GetConversationResponse.py +++ b/code/backend/batch/GetConversationResponse.py @@ -1,14 +1,12 @@ import azure.functions as func import logging import json -import sys from utilities.helpers.EnvHelper import EnvHelper from utilities.helpers.OrchestratorHelper import Orchestrator from utilities.helpers.ConfigHelper import ConfigHelper -sys.path.append("..") bp_get_conversation_response = func.Blueprint() env_helper: EnvHelper = EnvHelper() logger = logging.getLogger(__name__) From 4b0cf90b3bd5b2f658d3b57229e3870e0cddb596 Mon Sep 17 00:00:00 2001 From: tanya-borisova Date: Thu, 11 Apr 2024 10:06:10 +0100 Subject: [PATCH 07/15] Apply suggestions from code review Co-authored-by: Chinedum Echeta <60179183+cecheta@users.noreply.github.com> --- code/backend/batch/AddURLEmbeddings.py | 2 +- code/backend/batch/GetConversationResponse.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/code/backend/batch/AddURLEmbeddings.py b/code/backend/batch/AddURLEmbeddings.py index a41ad8653..a01e8482b 100644 --- a/code/backend/batch/AddURLEmbeddings.py +++ b/code/backend/batch/AddURLEmbeddings.py @@ -12,7 +12,7 @@ @bp_add_url_embeddings.route(route="AddURLEmbeddings") def add_url_embeddings(req: func.HttpRequest) -> func.HttpResponse: - do_add_url_embeddings(req) + return do_add_url_embeddings(req) def do_add_url_embeddings(req: func.HttpRequest) -> func.HttpResponse: diff --git a/code/backend/batch/GetConversationResponse.py b/code/backend/batch/GetConversationResponse.py index 62db2d8a0..a94ac6d13 100644 --- a/code/backend/batch/GetConversationResponse.py +++ b/code/backend/batch/GetConversationResponse.py @@ -14,7 +14,7 @@ @bp_get_conversation_response.route(route="GetConversationResponse") def get_conversation_response(req: func.HttpRequest) -> func.HttpResponse: - do_get_conversation_response(req) + return do_get_conversation_response(req) def do_get_conversation_response(req: func.HttpRequest) -> func.HttpResponse: From 3ecdb851dbbb4c9f4aa9dcb205e697fdfed4af87 Mon Sep 17 00:00:00 2001 From: Tanya Borisova Date: Thu, 11 Apr 2024 09:25:09 +0000 Subject: [PATCH 08/15] Rename test file, add a test for no url in request --- code/tests/test_AddURLEmbeddings.py | 18 ++++++++++++++++-- ...onse.py => test_GetConversationResponse.py} | 0 2 files changed, 16 insertions(+), 2 deletions(-) rename code/tests/{test_BatchGetConversationResponse.py => test_GetConversationResponse.py} (100%) diff --git a/code/tests/test_AddURLEmbeddings.py b/code/tests/test_AddURLEmbeddings.py index a0b5b11c6..eb421b8e0 100644 --- a/code/tests/test_AddURLEmbeddings.py +++ b/code/tests/test_AddURLEmbeddings.py @@ -13,7 +13,7 @@ @mock.patch("backend.batch.AddURLEmbeddings.DocumentProcessor") -def test_add_url_embeddings_when_url_set_in_body(mock_doc_processor): +def test_add_url_embeddings_when_url_set_in_body(_): fake_request = func.HttpRequest( method="POST", url="", @@ -27,7 +27,7 @@ def test_add_url_embeddings_when_url_set_in_body(mock_doc_processor): @mock.patch("backend.batch.AddURLEmbeddings.DocumentProcessor") -def test_add_url_embeddings_when_url_set_in_param(mock_doc_processor): +def test_add_url_embeddings_when_url_set_in_param(_): fake_request = func.HttpRequest( method="POST", url="", @@ -39,3 +39,17 @@ def test_add_url_embeddings_when_url_set_in_param(mock_doc_processor): response = do_add_url_embeddings(fake_request) assert response.status_code == 200 + + +@mock.patch("backend.batch.AddURLEmbeddings.DocumentProcessor") +def test_add_url_embeddings_returns_400_when_url_not_set(_): + fake_request = func.HttpRequest( + method="POST", + url="", + body=b"", + params={}, + ) + + response = do_add_url_embeddings(fake_request) + + assert response.status_code == 400 diff --git a/code/tests/test_BatchGetConversationResponse.py b/code/tests/test_GetConversationResponse.py similarity index 100% rename from code/tests/test_BatchGetConversationResponse.py rename to code/tests/test_GetConversationResponse.py From 642e619e8c545719d7a96f4b0f9579379e10682e Mon Sep 17 00:00:00 2001 From: Tanya Borisova Date: Thu, 11 Apr 2024 09:47:09 +0000 Subject: [PATCH 09/15] Make sure ConfigHelper etc are mocked out --- code/tests/test_AddURLEmbeddings.py | 17 ++++++++++------- code/tests/test_GetConversationResponse.py | 3 ++- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/code/tests/test_AddURLEmbeddings.py b/code/tests/test_AddURLEmbeddings.py index eb421b8e0..fcb0dddf3 100644 --- a/code/tests/test_AddURLEmbeddings.py +++ b/code/tests/test_AddURLEmbeddings.py @@ -1,6 +1,6 @@ import sys import os -from unittest import mock +from unittest.mock import patch import azure.functions as func @@ -12,8 +12,9 @@ from backend.batch.AddURLEmbeddings import do_add_url_embeddings # noqa: E402 -@mock.patch("backend.batch.AddURLEmbeddings.DocumentProcessor") -def test_add_url_embeddings_when_url_set_in_body(_): +@patch("backend.batch.BatchPushResults.ConfigHelper") +@patch("backend.batch.AddURLEmbeddings.DocumentProcessor") +def test_add_url_embeddings_when_url_set_in_body(_, __): fake_request = func.HttpRequest( method="POST", url="", @@ -26,8 +27,9 @@ def test_add_url_embeddings_when_url_set_in_body(_): assert response.status_code == 200 -@mock.patch("backend.batch.AddURLEmbeddings.DocumentProcessor") -def test_add_url_embeddings_when_url_set_in_param(_): +@patch("backend.batch.BatchPushResults.ConfigHelper") +@patch("backend.batch.AddURLEmbeddings.DocumentProcessor") +def test_add_url_embeddings_when_url_set_in_param(_, __): fake_request = func.HttpRequest( method="POST", url="", @@ -41,8 +43,9 @@ def test_add_url_embeddings_when_url_set_in_param(_): assert response.status_code == 200 -@mock.patch("backend.batch.AddURLEmbeddings.DocumentProcessor") -def test_add_url_embeddings_returns_400_when_url_not_set(_): +@patch("backend.batch.BatchPushResults.ConfigHelper") +@patch("backend.batch.AddURLEmbeddings.DocumentProcessor") +def test_add_url_embeddings_returns_400_when_url_not_set(_, __): fake_request = func.HttpRequest( method="POST", url="", diff --git a/code/tests/test_GetConversationResponse.py b/code/tests/test_GetConversationResponse.py index 4fd13137a..50c619b29 100644 --- a/code/tests/test_GetConversationResponse.py +++ b/code/tests/test_GetConversationResponse.py @@ -13,8 +13,9 @@ ) +@patch("backend.batch.BatchPushResults.ConfigHelper") @patch("backend.batch.GetConversationResponse.Orchestrator") -def test_get_conversation_response(mock_create_message_orchestrator): +def test_get_conversation_response(mock_create_message_orchestrator, _): mock_http_request = Mock() request_json = { "messages": [ From 4278eae126ce960a2a9991c004f91c2a0aeeef4d Mon Sep 17 00:00:00 2001 From: Tanya Borisova Date: Thu, 11 Apr 2024 09:52:40 +0000 Subject: [PATCH 10/15] Mock out config helper --- code/tests/test_AddURLEmbeddings.py | 6 +++--- code/tests/test_GetConversationResponse.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/code/tests/test_AddURLEmbeddings.py b/code/tests/test_AddURLEmbeddings.py index fcb0dddf3..9e9259dab 100644 --- a/code/tests/test_AddURLEmbeddings.py +++ b/code/tests/test_AddURLEmbeddings.py @@ -12,7 +12,7 @@ from backend.batch.AddURLEmbeddings import do_add_url_embeddings # noqa: E402 -@patch("backend.batch.BatchPushResults.ConfigHelper") +@patch("backend.batch.AddURLEmbeddings.ConfigHelper") @patch("backend.batch.AddURLEmbeddings.DocumentProcessor") def test_add_url_embeddings_when_url_set_in_body(_, __): fake_request = func.HttpRequest( @@ -27,7 +27,7 @@ def test_add_url_embeddings_when_url_set_in_body(_, __): assert response.status_code == 200 -@patch("backend.batch.BatchPushResults.ConfigHelper") +@patch("backend.batch.AddURLEmbeddings.ConfigHelper") @patch("backend.batch.AddURLEmbeddings.DocumentProcessor") def test_add_url_embeddings_when_url_set_in_param(_, __): fake_request = func.HttpRequest( @@ -43,7 +43,7 @@ def test_add_url_embeddings_when_url_set_in_param(_, __): assert response.status_code == 200 -@patch("backend.batch.BatchPushResults.ConfigHelper") +@patch("backend.batch.AddURLEmbeddings.ConfigHelper") @patch("backend.batch.AddURLEmbeddings.DocumentProcessor") def test_add_url_embeddings_returns_400_when_url_not_set(_, __): fake_request = func.HttpRequest( diff --git a/code/tests/test_GetConversationResponse.py b/code/tests/test_GetConversationResponse.py index 50c619b29..7123f3067 100644 --- a/code/tests/test_GetConversationResponse.py +++ b/code/tests/test_GetConversationResponse.py @@ -13,7 +13,7 @@ ) -@patch("backend.batch.BatchPushResults.ConfigHelper") +@patch("backend.batch.GetConversationResponse.ConfigHelper") @patch("backend.batch.GetConversationResponse.Orchestrator") def test_get_conversation_response(mock_create_message_orchestrator, _): mock_http_request = Mock() From 4bed86c20979c6bcfe3cd7109f0ce4325bbba1ac Mon Sep 17 00:00:00 2001 From: Tanya Borisova Date: Fri, 12 Apr 2024 11:38:06 +0000 Subject: [PATCH 11/15] Improve sys.path.append --- code/tests/test_AddURLEmbeddings.py | 5 +---- code/tests/test_BatchPushResults.py | 5 +---- code/tests/test_BatchStartProcessing.py | 5 +---- code/tests/test_GetConversationResponse.py | 5 +---- 4 files changed, 4 insertions(+), 16 deletions(-) diff --git a/code/tests/test_AddURLEmbeddings.py b/code/tests/test_AddURLEmbeddings.py index 9e9259dab..c2b72a73a 100644 --- a/code/tests/test_AddURLEmbeddings.py +++ b/code/tests/test_AddURLEmbeddings.py @@ -4,10 +4,7 @@ import azure.functions as func -function_app_path = os.path.abspath( - os.path.join(os.path.dirname(__file__), "../backend/batch") -) -sys.path.append(function_app_path) +sys.path.append(os.path.join(os.path.dirname(sys.path[0]), "backend", "batch")) from backend.batch.AddURLEmbeddings import do_add_url_embeddings # noqa: E402 diff --git a/code/tests/test_BatchPushResults.py b/code/tests/test_BatchPushResults.py index dd9949017..eb59ed05d 100644 --- a/code/tests/test_BatchPushResults.py +++ b/code/tests/test_BatchPushResults.py @@ -4,10 +4,7 @@ from azure.functions import QueueMessage -function_app_path = os.path.abspath( - os.path.join(os.path.dirname(__file__), "../backend/batch") -) -sys.path.append(function_app_path) +sys.path.append(os.path.join(os.path.dirname(sys.path[0]), "backend", "batch")) from backend.batch.BatchPushResults import do_batch_push_results # noqa: E402 from backend.batch.BatchPushResults import _get_file_name_from_message # noqa: E402 diff --git a/code/tests/test_BatchStartProcessing.py b/code/tests/test_BatchStartProcessing.py index 4e633053f..8b85542ab 100644 --- a/code/tests/test_BatchStartProcessing.py +++ b/code/tests/test_BatchStartProcessing.py @@ -2,10 +2,7 @@ import os from unittest.mock import patch, Mock -function_app_path = os.path.abspath( - os.path.join(os.path.dirname(__file__), "../backend/batch") -) -sys.path.append(function_app_path) +sys.path.append(os.path.join(os.path.dirname(sys.path[0]), "backend", "batch")) from backend.batch.BatchStartProcessing import do_batch_start_processing # noqa: E402 diff --git a/code/tests/test_GetConversationResponse.py b/code/tests/test_GetConversationResponse.py index 7123f3067..808830006 100644 --- a/code/tests/test_GetConversationResponse.py +++ b/code/tests/test_GetConversationResponse.py @@ -3,10 +3,7 @@ from unittest.mock import patch, Mock, ANY import json -function_app_path = os.path.abspath( - os.path.join(os.path.dirname(__file__), "../backend/batch") -) -sys.path.append(function_app_path) +sys.path.append(os.path.join(os.path.dirname(sys.path[0]), "backend", "batch")) from backend.batch.GetConversationResponse import ( # noqa: E402 do_get_conversation_response, From 59bf1508e6436fc66d0b8d2bc793d5bb65fbe39a Mon Sep 17 00:00:00 2001 From: Tanya Borisova Date: Fri, 12 Apr 2024 12:08:38 +0000 Subject: [PATCH 12/15] Mock EnvHelper as well to speed up test execution time --- code/backend/batch/AddURLEmbeddings.py | 8 ++++---- code/backend/batch/BatchPushResults.py | 8 ++++---- code/tests/test_AddURLEmbeddings.py | 15 ++++++++++++--- code/tests/test_BatchPushResults.py | 9 +++++++-- 4 files changed, 27 insertions(+), 13 deletions(-) diff --git a/code/backend/batch/AddURLEmbeddings.py b/code/backend/batch/AddURLEmbeddings.py index cea43279c..73a7e1272 100644 --- a/code/backend/batch/AddURLEmbeddings.py +++ b/code/backend/batch/AddURLEmbeddings.py @@ -8,10 +8,6 @@ bp_add_url_embeddings = func.Blueprint() -env_helper: EnvHelper = EnvHelper() - -logger = logging.getLogger(__name__) -logger.setLevel(env_helper.LOGLEVEL) @bp_add_url_embeddings.route(route="AddURLEmbeddings") @@ -20,7 +16,11 @@ def add_url_embeddings(req: func.HttpRequest) -> func.HttpResponse: def do_add_url_embeddings(req: func.HttpRequest) -> func.HttpResponse: + logger = logging.getLogger(__name__) + env_helper: EnvHelper = EnvHelper() + logger.setLevel(env_helper.LOGLEVEL) logger.info("Python HTTP trigger function processed a request.") + # Get Url from request url = req.params.get("url") if not url: diff --git a/code/backend/batch/BatchPushResults.py b/code/backend/batch/BatchPushResults.py index e35cee9d7..6e141918c 100644 --- a/code/backend/batch/BatchPushResults.py +++ b/code/backend/batch/BatchPushResults.py @@ -10,10 +10,6 @@ bp_batch_push_results = func.Blueprint() -env_helper: EnvHelper = EnvHelper() - -logger = logging.getLogger(__name__) -logger.setLevel(env_helper.LOGLEVEL) def _get_file_name_from_message(msg: func.QueueMessage) -> str: @@ -34,10 +30,14 @@ def batch_push_results(msg: func.QueueMessage) -> None: def do_batch_push_results(msg: func.QueueMessage) -> None: + env_helper: EnvHelper = EnvHelper() + logger = logging.getLogger(__name__) + logger.setLevel(env_helper.LOGLEVEL) logger.info( "Python queue trigger function processed a queue item: %s", msg.get_body().decode("utf-8"), ) + document_processor = DocumentProcessor() blob_client = AzureBlobStorageClient() # Get the file name from the message diff --git a/code/tests/test_AddURLEmbeddings.py b/code/tests/test_AddURLEmbeddings.py index c2b72a73a..e4bc9106c 100644 --- a/code/tests/test_AddURLEmbeddings.py +++ b/code/tests/test_AddURLEmbeddings.py @@ -11,7 +11,10 @@ @patch("backend.batch.AddURLEmbeddings.ConfigHelper") @patch("backend.batch.AddURLEmbeddings.DocumentProcessor") -def test_add_url_embeddings_when_url_set_in_body(_, __): +@patch("backend.batch.AddURLEmbeddings.EnvHelper") +def test_add_url_embeddings_when_url_set_in_body(mock_env_helper, _, __): + mock_env_helper.return_value.LOGLEVEL = "INFO" + fake_request = func.HttpRequest( method="POST", url="", @@ -26,7 +29,10 @@ def test_add_url_embeddings_when_url_set_in_body(_, __): @patch("backend.batch.AddURLEmbeddings.ConfigHelper") @patch("backend.batch.AddURLEmbeddings.DocumentProcessor") -def test_add_url_embeddings_when_url_set_in_param(_, __): +@patch("backend.batch.AddURLEmbeddings.EnvHelper") +def test_add_url_embeddings_when_url_set_in_param(mock_env_helper, _, __): + mock_env_helper.return_value.LOGLEVEL = "INFO" + fake_request = func.HttpRequest( method="POST", url="", @@ -42,7 +48,10 @@ def test_add_url_embeddings_when_url_set_in_param(_, __): @patch("backend.batch.AddURLEmbeddings.ConfigHelper") @patch("backend.batch.AddURLEmbeddings.DocumentProcessor") -def test_add_url_embeddings_returns_400_when_url_not_set(_, __): +@patch("backend.batch.AddURLEmbeddings.EnvHelper") +def test_add_url_embeddings_returns_400_when_url_not_set(mock_env_helper, _, __): + mock_env_helper.return_value.LOGLEVEL = "INFO" + fake_request = func.HttpRequest( method="POST", url="", diff --git a/code/tests/test_BatchPushResults.py b/code/tests/test_BatchPushResults.py index eb59ed05d..3e263124d 100644 --- a/code/tests/test_BatchPushResults.py +++ b/code/tests/test_BatchPushResults.py @@ -30,12 +30,17 @@ def test_get_file_name_from_message_no_filename(): assert file_name == "test_filename.md" +@patch("backend.batch.BatchPushResults.EnvHelper") @patch("backend.batch.BatchPushResults.ConfigHelper") @patch("backend.batch.BatchPushResults.AzureBlobStorageClient") @patch("backend.batch.BatchPushResults.DocumentProcessor") -def test_do_batch_push_results( - mock_document_processor, mock_azure_blob_storage_client, mock_config_helper +def test_batch_push_results( + mock_document_processor, + mock_azure_blob_storage_client, + mock_config_helper, + mock_env_helper, ): + mock_env_helper.return_value.LOGLEVEL = "INFO" mock_queue_message = QueueMessage( body='{"message": "test message", "filename": "test/test/test_filename.md"}' ) From 691912d62d8709b653fc8aa78690e883813e32e5 Mon Sep 17 00:00:00 2001 From: Tanya Borisova Date: Fri, 12 Apr 2024 14:33:35 +0000 Subject: [PATCH 13/15] Utilise get_user_function() method to enable tests --- code/backend/batch/AddURLEmbeddings.py | 4 ---- code/backend/batch/BatchStartProcessing.py | 4 ---- code/tests/test_AddURLEmbeddings.py | 8 ++++---- code/tests/test_BatchPushResults.py | 8 +++++--- code/tests/test_BatchStartProcessing.py | 6 +++--- code/tests/test_GetConversationResponse.py | 4 ++-- 6 files changed, 14 insertions(+), 20 deletions(-) diff --git a/code/backend/batch/AddURLEmbeddings.py b/code/backend/batch/AddURLEmbeddings.py index 73a7e1272..8883e9ebd 100644 --- a/code/backend/batch/AddURLEmbeddings.py +++ b/code/backend/batch/AddURLEmbeddings.py @@ -12,10 +12,6 @@ @bp_add_url_embeddings.route(route="AddURLEmbeddings") def add_url_embeddings(req: func.HttpRequest) -> func.HttpResponse: - return do_add_url_embeddings(req) - - -def do_add_url_embeddings(req: func.HttpRequest) -> func.HttpResponse: logger = logging.getLogger(__name__) env_helper: EnvHelper = EnvHelper() logger.setLevel(env_helper.LOGLEVEL) diff --git a/code/backend/batch/BatchStartProcessing.py b/code/backend/batch/BatchStartProcessing.py index 0126ea82e..afe7ef6d0 100644 --- a/code/backend/batch/BatchStartProcessing.py +++ b/code/backend/batch/BatchStartProcessing.py @@ -16,10 +16,6 @@ @bp_batch_start_processing.route(route="BatchStartProcessing") def batch_start_processing(req: func.HttpRequest) -> func.HttpResponse: - return do_batch_start_processing(req) - - -def do_batch_start_processing(req: func.HttpRequest) -> func.HttpResponse: logger.info("Requested to start processing all documents received") # Set up Blob Storage Client azure_blob_storage_client = AzureBlobStorageClient() diff --git a/code/tests/test_AddURLEmbeddings.py b/code/tests/test_AddURLEmbeddings.py index e4bc9106c..55fc99220 100644 --- a/code/tests/test_AddURLEmbeddings.py +++ b/code/tests/test_AddURLEmbeddings.py @@ -6,7 +6,7 @@ sys.path.append(os.path.join(os.path.dirname(sys.path[0]), "backend", "batch")) -from backend.batch.AddURLEmbeddings import do_add_url_embeddings # noqa: E402 +from backend.batch.AddURLEmbeddings import add_url_embeddings # noqa: E402 @patch("backend.batch.AddURLEmbeddings.ConfigHelper") @@ -22,7 +22,7 @@ def test_add_url_embeddings_when_url_set_in_body(mock_env_helper, _, __): headers={"Content-Type": "application/json"}, ) - response = do_add_url_embeddings(fake_request) + response = add_url_embeddings.build().get_user_function()(fake_request) assert response.status_code == 200 @@ -41,7 +41,7 @@ def test_add_url_embeddings_when_url_set_in_param(mock_env_helper, _, __): params={"url": "https://example.com"}, ) - response = do_add_url_embeddings(fake_request) + response = add_url_embeddings.build().get_user_function()(fake_request) assert response.status_code == 200 @@ -59,6 +59,6 @@ def test_add_url_embeddings_returns_400_when_url_not_set(mock_env_helper, _, __) params={}, ) - response = do_add_url_embeddings(fake_request) + response = add_url_embeddings.build().get_user_function()(fake_request) assert response.status_code == 400 diff --git a/code/tests/test_BatchPushResults.py b/code/tests/test_BatchPushResults.py index 3e263124d..569ca0835 100644 --- a/code/tests/test_BatchPushResults.py +++ b/code/tests/test_BatchPushResults.py @@ -6,8 +6,10 @@ sys.path.append(os.path.join(os.path.dirname(sys.path[0]), "backend", "batch")) -from backend.batch.BatchPushResults import do_batch_push_results # noqa: E402 -from backend.batch.BatchPushResults import _get_file_name_from_message # noqa: E402 +from backend.batch.BatchPushResults import ( # noqa: E402 + batch_push_results, + _get_file_name_from_message, +) def test_get_file_name_from_message(): @@ -59,7 +61,7 @@ def test_batch_push_results( mock_processors ) - do_batch_push_results(mock_queue_message) + batch_push_results.build().get_user_function()(mock_queue_message) mock_document_processor_instance.process.assert_called_once_with( source_url="test_blob_sas", processors=[md_processor] diff --git a/code/tests/test_BatchStartProcessing.py b/code/tests/test_BatchStartProcessing.py index 8b85542ab..413c13e23 100644 --- a/code/tests/test_BatchStartProcessing.py +++ b/code/tests/test_BatchStartProcessing.py @@ -4,7 +4,7 @@ sys.path.append(os.path.join(os.path.dirname(sys.path[0]), "backend", "batch")) -from backend.batch.BatchStartProcessing import do_batch_start_processing # noqa: E402 +from backend.batch.BatchStartProcessing import batch_start_processing # noqa: E402 @patch("backend.batch.BatchStartProcessing.create_queue_client") @@ -23,7 +23,7 @@ def test_batch_start_processing_processes_all( {"filename": "file_name_one", "embeddings_added": False} ] - response = do_batch_start_processing(mock_http_request) + response = batch_start_processing.build().get_user_function()(mock_http_request) assert response.status_code == 200 @@ -51,7 +51,7 @@ def test_batch_start_processing_filters_filter_no_embeddings( }, {"filename": "file_name_two", "embeddings_added": False}, ] - response = do_batch_start_processing(mock_http_request) + response = batch_start_processing.build().get_user_function()(mock_http_request) assert response.status_code == 200 diff --git a/code/tests/test_GetConversationResponse.py b/code/tests/test_GetConversationResponse.py index 808830006..a91ba9c38 100644 --- a/code/tests/test_GetConversationResponse.py +++ b/code/tests/test_GetConversationResponse.py @@ -6,7 +6,7 @@ sys.path.append(os.path.join(os.path.dirname(sys.path[0]), "backend", "batch")) from backend.batch.GetConversationResponse import ( # noqa: E402 - do_get_conversation_response, + get_conversation_response, ) @@ -31,7 +31,7 @@ def test_get_conversation_response(mock_create_message_orchestrator, _): mock_create_message_orchestrator.return_value = mock_message_orchestrator - response = do_get_conversation_response(mock_http_request) + response = get_conversation_response.build().get_user_function()(mock_http_request) assert response.status_code == 200 From 9a805f193f19dc8d982c7877cabe329a24b5a672 Mon Sep 17 00:00:00 2001 From: Tanya Borisova Date: Fri, 12 Apr 2024 14:36:17 +0000 Subject: [PATCH 14/15] re-add env sample --- .env.sample | 57 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) create mode 100644 .env.sample diff --git a/.env.sample b/.env.sample new file mode 100644 index 000000000..f4daa051c --- /dev/null +++ b/.env.sample @@ -0,0 +1,57 @@ +# Azure Search for storing the processed documents +AZURE_SEARCH_SERVICE= +AZURE_SEARCH_INDEX= +AZURE_SEARCH_KEY= +AZURE_SEARCH_USE_SEMANTIC_SEARCH=False +AZURE_SEARCH_SEMANTIC_SEARCH_CONFIG=default +AZURE_SEARCH_TOP_K=5 +AZURE_SEARCH_ENABLE_IN_DOMAIN=False +AZURE_SEARCH_FIELDS_ID=id +AZURE_SEARCH_CONTENT_COLUMNS=content +AZURE_SEARCH_CONTENT_VECTOR_COLUMNS=content_vector +AZURE_SEARCH_DIMENSIONS=1536 +AZURE_SEARCH_FIELDS_TAG=tag +AZURE_SEARCH_FIELDS_METADATA=metadata +AZURE_SEARCH_FILENAME_COLUMN=filepath +AZURE_SEARCH_TITLE_COLUMN=title +AZURE_SEARCH_URL_COLUMN=url +AZURE_SEARCH_CONVERSATIONS_LOG_INDEX=conversations-log +# Azure OpenAI for generating the answer and computing the embedding of the documents +AZURE_OPENAI_RESOURCE= +AZURE_OPENAI_API_KEY= +AZURE_OPENAI_MODEL=gpt-35-turbo +AZURE_OPENAI_MODEL_NAME=gpt-35-turbo +AZURE_OPENAI_EMBEDDING_MODEL=text-embedding-ada-002 +AZURE_OPENAI_TEMPERATURE=0 +AZURE_OPENAI_TOP_P=1.0 +AZURE_OPENAI_MAX_TOKENS=1000 +AZURE_OPENAI_STOP_SEQUENCE= +AZURE_OPENAI_SYSTEM_MESSAGE=You are an AI assistant that helps people find information. +AZURE_OPENAI_API_VERSION=2024-02-01 +AZURE_OPENAI_STREAM=True +# Backend for processing the documents and application logging in the app +AzureWebJobsStorage= +BACKEND_URL=http://localhost:7071 +DOCUMENT_PROCESSING_QUEUE_NAME= +APPINSIGHTS_CONNECTION_STRING= +# Azure Blob Storage for storing the original documents to be processed +AZURE_BLOB_ACCOUNT_NAME= +AZURE_BLOB_ACCOUNT_KEY= +AZURE_BLOB_CONTAINER_NAME= +# Azure Form Recognizer for extracting the text from the documents +AZURE_FORM_RECOGNIZER_ENDPOINT= +AZURE_FORM_RECOGNIZER_KEY= +# Azure AI Content Safety for filtering out the inappropriate questions or answers +AZURE_CONTENT_SAFETY_ENDPOINT= +AZURE_CONTENT_SAFETY_KEY= +# Orchestration strategy. Use Azure OpenAI Functions (openai_function) or LangChain (langchain) for messages orchestration. If you are using a new model version 0613 select "openai_function" (or "langchain"), if you are using a 0314 model version select "langchain" +ORCHESTRATION_STRATEGY=openai_function +#Speech-to-text feature +AZURE_SPEECH_SERVICE_KEY= +AZURE_SPEECH_SERVICE_REGION= +# Auth type environment variables. +# When AZURE_AUTH_TYPE=rbac, please make sure variable USE_KEY_VAULT=false +# When USE_KEY_VAULT=true, please make sure to set AZURE_KEY_VAULT_ENDPOINT +AZURE_AUTH_TYPE=keys +USE_KEY_VAULT=true +AZURE_KEY_VAULT_ENDPOINT= From de3bf930693ca768964434cb099e191646af15db Mon Sep 17 00:00:00 2001 From: Tanya Borisova Date: Fri, 12 Apr 2024 16:56:36 +0000 Subject: [PATCH 15/15] Init log level from os environ --- code/backend/batch/AddURLEmbeddings.py | 7 +++---- code/backend/batch/BatchPushResults.py | 7 +++---- code/backend/batch/BatchStartProcessing.py | 5 ++--- code/backend/batch/GetConversationResponse.py | 5 +++-- code/tests/test_AddURLEmbeddings.py | 15 +++------------ code/tests/test_BatchPushResults.py | 3 --- 6 files changed, 14 insertions(+), 28 deletions(-) diff --git a/code/backend/batch/AddURLEmbeddings.py b/code/backend/batch/AddURLEmbeddings.py index 8883e9ebd..96b86e4cd 100644 --- a/code/backend/batch/AddURLEmbeddings.py +++ b/code/backend/batch/AddURLEmbeddings.py @@ -1,20 +1,19 @@ +import os import logging import traceback import azure.functions as func -from utilities.helpers.EnvHelper import EnvHelper from utilities.helpers.DocumentProcessorHelper import DocumentProcessor from utilities.helpers.ConfigHelper import ConfigHelper bp_add_url_embeddings = func.Blueprint() +logger = logging.getLogger(__name__) +logger.setLevel(level=os.environ.get("LOGLEVEL", "INFO").upper()) @bp_add_url_embeddings.route(route="AddURLEmbeddings") def add_url_embeddings(req: func.HttpRequest) -> func.HttpResponse: - logger = logging.getLogger(__name__) - env_helper: EnvHelper = EnvHelper() - logger.setLevel(env_helper.LOGLEVEL) logger.info("Python HTTP trigger function processed a request.") # Get Url from request diff --git a/code/backend/batch/BatchPushResults.py b/code/backend/batch/BatchPushResults.py index 6e141918c..e7728434d 100644 --- a/code/backend/batch/BatchPushResults.py +++ b/code/backend/batch/BatchPushResults.py @@ -1,15 +1,17 @@ +import os import logging import json import azure.functions as func from urllib.parse import urlparse -from utilities.helpers.EnvHelper import EnvHelper from utilities.helpers.AzureBlobStorageHelper import AzureBlobStorageClient from utilities.helpers.DocumentProcessorHelper import DocumentProcessor from utilities.helpers.ConfigHelper import ConfigHelper bp_batch_push_results = func.Blueprint() +logger = logging.getLogger(__name__) +logger.setLevel(level=os.environ.get("LOGLEVEL", "INFO").upper()) def _get_file_name_from_message(msg: func.QueueMessage) -> str: @@ -30,9 +32,6 @@ def batch_push_results(msg: func.QueueMessage) -> None: def do_batch_push_results(msg: func.QueueMessage) -> None: - env_helper: EnvHelper = EnvHelper() - logger = logging.getLogger(__name__) - logger.setLevel(env_helper.LOGLEVEL) logger.info( "Python queue trigger function processed a queue item: %s", msg.get_body().decode("utf-8"), diff --git a/code/backend/batch/BatchStartProcessing.py b/code/backend/batch/BatchStartProcessing.py index afe7ef6d0..9ae5f3016 100644 --- a/code/backend/batch/BatchStartProcessing.py +++ b/code/backend/batch/BatchStartProcessing.py @@ -1,17 +1,16 @@ +import os import logging import json import azure.functions as func -from utilities.helpers.EnvHelper import EnvHelper from utilities.helpers.AzureBlobStorageHelper import ( AzureBlobStorageClient, create_queue_client, ) bp_batch_start_processing = func.Blueprint() -env_helper: EnvHelper = EnvHelper() logger = logging.getLogger(__name__) -logger.setLevel(env_helper.LOGLEVEL) +logger.setLevel(level=os.environ.get("LOGLEVEL", "INFO").upper()) @bp_batch_start_processing.route(route="BatchStartProcessing") diff --git a/code/backend/batch/GetConversationResponse.py b/code/backend/batch/GetConversationResponse.py index cbfd430b3..4ec132366 100644 --- a/code/backend/batch/GetConversationResponse.py +++ b/code/backend/batch/GetConversationResponse.py @@ -1,3 +1,4 @@ +import os import azure.functions as func import logging import json @@ -8,9 +9,8 @@ bp_get_conversation_response = func.Blueprint() -env_helper: EnvHelper = EnvHelper() logger = logging.getLogger(__name__) -logger.setLevel(env_helper.LOGLEVEL) +logger.setLevel(level=os.environ.get("LOGLEVEL", "INFO").upper()) @bp_get_conversation_response.route(route="GetConversationResponse") @@ -22,6 +22,7 @@ def do_get_conversation_response(req: func.HttpRequest) -> func.HttpResponse: logger.info("Python HTTP trigger function processed a request.") message_orchestrator = Orchestrator() + env_helper: EnvHelper = EnvHelper() try: req_body = req.get_json() diff --git a/code/tests/test_AddURLEmbeddings.py b/code/tests/test_AddURLEmbeddings.py index 55fc99220..9157b4521 100644 --- a/code/tests/test_AddURLEmbeddings.py +++ b/code/tests/test_AddURLEmbeddings.py @@ -11,10 +11,7 @@ @patch("backend.batch.AddURLEmbeddings.ConfigHelper") @patch("backend.batch.AddURLEmbeddings.DocumentProcessor") -@patch("backend.batch.AddURLEmbeddings.EnvHelper") -def test_add_url_embeddings_when_url_set_in_body(mock_env_helper, _, __): - mock_env_helper.return_value.LOGLEVEL = "INFO" - +def test_add_url_embeddings_when_url_set_in_body(_, __): fake_request = func.HttpRequest( method="POST", url="", @@ -29,10 +26,7 @@ def test_add_url_embeddings_when_url_set_in_body(mock_env_helper, _, __): @patch("backend.batch.AddURLEmbeddings.ConfigHelper") @patch("backend.batch.AddURLEmbeddings.DocumentProcessor") -@patch("backend.batch.AddURLEmbeddings.EnvHelper") -def test_add_url_embeddings_when_url_set_in_param(mock_env_helper, _, __): - mock_env_helper.return_value.LOGLEVEL = "INFO" - +def test_add_url_embeddings_when_url_set_in_param(_, __): fake_request = func.HttpRequest( method="POST", url="", @@ -48,10 +42,7 @@ def test_add_url_embeddings_when_url_set_in_param(mock_env_helper, _, __): @patch("backend.batch.AddURLEmbeddings.ConfigHelper") @patch("backend.batch.AddURLEmbeddings.DocumentProcessor") -@patch("backend.batch.AddURLEmbeddings.EnvHelper") -def test_add_url_embeddings_returns_400_when_url_not_set(mock_env_helper, _, __): - mock_env_helper.return_value.LOGLEVEL = "INFO" - +def test_add_url_embeddings_returns_400_when_url_not_set(_, __): fake_request = func.HttpRequest( method="POST", url="", diff --git a/code/tests/test_BatchPushResults.py b/code/tests/test_BatchPushResults.py index 569ca0835..b62063043 100644 --- a/code/tests/test_BatchPushResults.py +++ b/code/tests/test_BatchPushResults.py @@ -32,7 +32,6 @@ def test_get_file_name_from_message_no_filename(): assert file_name == "test_filename.md" -@patch("backend.batch.BatchPushResults.EnvHelper") @patch("backend.batch.BatchPushResults.ConfigHelper") @patch("backend.batch.BatchPushResults.AzureBlobStorageClient") @patch("backend.batch.BatchPushResults.DocumentProcessor") @@ -40,9 +39,7 @@ def test_batch_push_results( mock_document_processor, mock_azure_blob_storage_client, mock_config_helper, - mock_env_helper, ): - mock_env_helper.return_value.LOGLEVEL = "INFO" mock_queue_message = QueueMessage( body='{"message": "test message", "filename": "test/test/test_filename.md"}' )