From 60568fd7a80ea81f9cab3b50c1c91ac61d455e4c Mon Sep 17 00:00:00 2001 From: Sebastian <130448732+sebastianMindee@users.noreply.github.com> Date: Wed, 18 Oct 2023 09:37:43 +0200 Subject: [PATCH] :recycle: update HTTP error handling (#176) --- examples/display_cropping.py | 5 +- mindee/client.py | 22 +++-- mindee/mindee_http/error.py | 102 +++++++++++++++++++++- mindee/parsing/common/api_response.py | 2 + tests/mindee_http/test_error.py | 116 ++++++++++++++++++++++++++ tests/test_cli.py | 18 ++-- tests/test_client.py | 12 +-- 7 files changed, 251 insertions(+), 26 deletions(-) create mode 100644 tests/mindee_http/test_error.py diff --git a/examples/display_cropping.py b/examples/display_cropping.py index 4baf5b0c..1e931859 100644 --- a/examples/display_cropping.py +++ b/examples/display_cropping.py @@ -16,6 +16,7 @@ import numpy as np from mindee import Client, product +from mindee.parsing.common.predict_response import PredictResponse def relative_to_pixel_pos(polygon, image_h: int, image_w: int) -> List[Tuple[int, int]]: @@ -66,7 +67,7 @@ def show_image_crops(file_path: str, cropping: list): input_doc = mindee_client.source_from_path(image_path) # Parse the document by passing the appropriate type - # api_response = mindee_client.parse(input_doc, product.CropperV1) + api_response: PredictResponse = mindee_client.parse(product.CropperV1, input_doc) # Display - # show_image_crops(image_path, api_response.pages[0].cropping) + show_image_crops(image_path, api_response.document.inference.pages[0].cropping) diff --git a/mindee/client.py b/mindee/client.py index e03982fa..01053ae0 100644 --- a/mindee/client.py +++ b/mindee/client.py @@ -1,4 +1,3 @@ -import json from pathlib import Path from time import sleep from typing import BinaryIO, Dict, Optional, Type, Union @@ -14,7 +13,7 @@ ) from mindee.logger import logger from mindee.mindee_http.endpoint import CustomEndpoint, Endpoint -from mindee.mindee_http.error import HTTPException +from mindee.mindee_http.error import handle_error from mindee.mindee_http.mindee_api import MindeeApi from mindee.parsing.common.async_predict_response import AsyncPredictResponse from mindee.parsing.common.inference import Inference, TypeInference @@ -292,8 +291,10 @@ def _make_request( dict_response = response.json() if not response.ok: - raise HTTPException( - f"API {response.status_code} HTTP error: {json.dumps(dict_response)}" + raise handle_error( + str(product_class.endpoint_name), + dict_response, + response.status_code, ) return PredictResponse(product_class, dict_response) @@ -323,8 +324,10 @@ def _predict_async( dict_response = response.json() if not response.ok: - raise HTTPException( - f"API {response.status_code} HTTP error: {json.dumps(dict_response)}" + raise handle_error( + str(product_class.endpoint_name), + dict_response, + response.status_code, ) return AsyncPredictResponse(product_class, dict_response) @@ -348,8 +351,11 @@ def _get_queued_document( or queue_response.status_code < 200 or queue_response.status_code > 302 ): - raise HTTPException( - f"API {queue_response.status_code} HTTP error: {json.dumps(queue_response.json())}" + dict_response = queue_response.json() + raise handle_error( + str(product_class.endpoint_name), + dict_response, + queue_response.status_code, ) return AsyncPredictResponse(product_class, queue_response.json()) diff --git a/mindee/mindee_http/error.py b/mindee/mindee_http/error.py index e008fb43..74a620dd 100644 --- a/mindee/mindee_http/error.py +++ b/mindee/mindee_http/error.py @@ -1,2 +1,102 @@ -class HTTPException(RuntimeError): +from typing import Union + +from mindee.parsing.common.string_dict import StringDict + + +class MindeeHTTPException(RuntimeError): """An exception relating to HTTP calls.""" + + status_code: int + api_code: str + api_details: str + api_message: str + + def __init__(self, http_error: StringDict, url: str, code: int) -> None: + """ + Base exception for HTTP calls. + + :param http_error: formatted & parsed error + :param url: url/endpoint the exception was raised on + :param code: HTTP code for the error + """ + self.status_code = code + self.api_code = http_error["code"] if "code" in http_error else None + self.api_details = http_error["details"] if "details" in http_error else None + self.api_message = http_error["message"] if "message" in http_error else None + super().__init__( + f"{url} {self.status_code} HTTP error: {self.api_details} - {self.api_message}" + ) + + +class MindeeHTTPClientException(MindeeHTTPException): + """API Client HTTP exception.""" + + +class MindeeHTTPServerException(MindeeHTTPException): + """API Server HTTP exception.""" + + +def create_error_obj(response: Union[StringDict, str]) -> StringDict: + """ + Creates an error object based on a requests' payload. + + :param response: response as sent by the server, as a dict. + In _very_ rare instances, this can be an html string. + """ + if not isinstance(response, str): + if "api_request" in response and "error" in response["api_request"]: + return response["api_request"]["error"] + raise RuntimeError(f"Could not build specific HTTP exception from '{response}'") + error_dict = {} + if "Maximum pdf pages" in response: + error_dict = { + "code": "TooManyPages", + "message": "Maximum amound of pdf pages reached.", + "details": response, + } + elif "Max file size is" in response: + error_dict = { + "code": "FileTooLarge", + "message": "Maximum file size reached.", + "details": response, + } + elif "Invalid file type" in response: + error_dict = { + "code": "InvalidFiletype", + "message": "Invalid file type.", + "details": response, + } + elif "Gateway timeout" in response: + error_dict = { + "code": "RequestTimeout", + "message": "Request timed out.", + "details": response, + } + elif "Too Many Requests" in response: + error_dict = { + "code": "TooManyRequests", + "message": "Too Many Requests.", + "details": response, + } + else: + error_dict = { + "code": "UnknownError", + "message": "Server sent back an unexpected reply.", + "details": response, + } + return error_dict + + +def handle_error(url: str, response: StringDict, code: int) -> MindeeHTTPException: + """ + Creates an appropriate HTTP error exception, based on retrieved HTTP error code. + + :param url: url of the product + :param response: StringDict + """ + error_obj = create_error_obj(response) + if 400 <= code <= 499: + return MindeeHTTPClientException(error_obj, url, code) + if 500 <= code <= 599: + return MindeeHTTPServerException(error_obj, url, code) + return MindeeHTTPException(error_obj, url, code) diff --git a/mindee/parsing/common/api_response.py b/mindee/parsing/common/api_response.py index 86fb9338..d4dd93e5 100644 --- a/mindee/parsing/common/api_response.py +++ b/mindee/parsing/common/api_response.py @@ -12,6 +12,8 @@ class ApiResponse(ABC): Serves as a base class for responses to both synchronous and asynchronous calls. """ + api_request: ApiRequest + """Results of the request sent to the API.""" raw_http: StringDict """Raw request sent by the server, as string.""" diff --git a/tests/mindee_http/test_error.py b/tests/mindee_http/test_error.py new file mode 100644 index 00000000..791d13b2 --- /dev/null +++ b/tests/mindee_http/test_error.py @@ -0,0 +1,116 @@ +import json +from pathlib import Path + +import pytest + +from mindee import Client, product +from mindee.input.sources import PathInput +from mindee.mindee_http.error import ( + MindeeHTTPClientException, + MindeeHTTPServerException, + handle_error, +) +from tests.test_inputs import FILE_TYPES_DIR +from tests.utils import clear_envvars, dummy_envvars + +ERROR_DATA_DIR = Path("./tests/data/errors") + + +@pytest.fixture +def empty_client(monkeypatch) -> Client: + clear_envvars(monkeypatch) + return Client() + + +@pytest.fixture +def dummy_client(monkeypatch) -> Client: + dummy_envvars(monkeypatch) + return Client("dummy") + + +@pytest.fixture +def dummy_file(monkeypatch) -> PathInput: + clear_envvars(monkeypatch) + c = Client(api_key="dummy-client") + return c.source_from_path(FILE_TYPES_DIR / "pdf" / "blank.pdf") + + +def test_http_client_error(dummy_client: Client, dummy_file: PathInput): + with pytest.raises(MindeeHTTPClientException): + dummy_client.parse(product.InvoiceV4, dummy_file) + + +def test_http_enqueue_client_error(dummy_client: Client, dummy_file: PathInput): + with pytest.raises(MindeeHTTPClientException): + dummy_client.enqueue(product.InvoiceV4, dummy_file) + + +def test_http_parse_client_error(dummy_client: Client, dummy_file: PathInput): + with pytest.raises(MindeeHTTPClientException): + dummy_client.parse_queued(product.InvoiceV4, "dummy-queue-id") + + +def test_http_enqueue_and_parse_client_error( + dummy_client: Client, dummy_file: PathInput +): + with pytest.raises(MindeeHTTPClientException): + dummy_client.enqueue_and_parse(product.InvoiceV4, dummy_file) + + +def test_http_400_error(): + error_ref = open(ERROR_DATA_DIR / "error_400_no_details.json") + error_obj = json.load(error_ref) + error_400 = handle_error("dummy-url", error_obj, 400) + with pytest.raises(MindeeHTTPClientException): + raise error_400 + assert error_400.status_code == 400 + assert error_400.api_code == "SomeCode" + assert error_400.api_message == "Some scary message here" + assert error_400.api_details is None + + +def test_http_401_error(): + error_ref = open(ERROR_DATA_DIR / "error_401_invalid_token.json") + error_obj = json.load(error_ref) + error_401 = handle_error("dummy-url", error_obj, 401) + with pytest.raises(MindeeHTTPClientException): + raise error_401 + assert error_401.status_code == 401 + assert error_401.api_code == "Unauthorized" + assert error_401.api_message == "Authorization required" + assert error_401.api_details == "Invalid token provided" + + +def test_http_429_error(): + error_ref = open(ERROR_DATA_DIR / "error_429_too_many_requests.json") + error_obj = json.load(error_ref) + error_429 = handle_error("dummy-url", error_obj, 429) + with pytest.raises(MindeeHTTPClientException): + raise error_429 + assert error_429.status_code == 429 + assert error_429.api_code == "TooManyRequests" + assert error_429.api_message == "Too many requests" + assert error_429.api_details == "Too Many Requests." + + +def test_http_500_error(): + error_ref = open(ERROR_DATA_DIR / "error_500_inference_fail.json") + error_obj = json.load(error_ref) + error_500 = handle_error("dummy-url", error_obj, 500) + with pytest.raises(MindeeHTTPServerException): + raise error_500 + assert error_500.status_code == 500 + assert error_500.api_code == "failure" + assert error_500.api_message == "Inference failed" + assert error_500.api_details == "Can not run prediction: " + + +def test_http_500_html_error(): + error_ref_contents = open(ERROR_DATA_DIR / "error_50x.html").read() + error_500 = handle_error("dummy-url", error_ref_contents, 500) + with pytest.raises(MindeeHTTPServerException): + raise error_500 + assert error_500.status_code == 500 + assert error_500.api_code == "UnknownError" + assert error_500.api_message == "Server sent back an unexpected reply." + assert error_500.api_details == error_ref_contents diff --git a/tests/test_cli.py b/tests/test_cli.py index 40d04ad3..31e11343 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -3,7 +3,7 @@ import pytest from mindee.cli import MindeeParser -from mindee.mindee_http.error import HTTPException +from mindee.mindee_http.error import MindeeHTTPException from tests.utils import clear_envvars @@ -71,7 +71,7 @@ def ots_doc_fetch(monkeypatch): def test_cli_custom_doc(custom_doc): - with pytest.raises(HTTPException): + with pytest.raises(MindeeHTTPException): parser = MindeeParser(parsed_args=custom_doc) parser.call_endpoint() @@ -83,7 +83,7 @@ def test_cli_invoice(ots_doc): parser = MindeeParser(parsed_args=ots_doc) parser.call_endpoint() ots_doc.api_key = "dummy" - with pytest.raises(HTTPException): + with pytest.raises(MindeeHTTPException): parser = MindeeParser(parsed_args=ots_doc) parser.call_endpoint() @@ -95,7 +95,7 @@ def test_cli_receipt(ots_doc): parser = MindeeParser(parsed_args=ots_doc) parser.call_endpoint() ots_doc.api_key = "dummy" - with pytest.raises(HTTPException): + with pytest.raises(MindeeHTTPException): parser = MindeeParser(parsed_args=ots_doc) parser.call_endpoint() @@ -107,7 +107,7 @@ def test_cli_financial_doc(ots_doc): parser = MindeeParser(parsed_args=ots_doc) parser.call_endpoint() ots_doc.api_key = "dummy" - with pytest.raises(HTTPException): + with pytest.raises(MindeeHTTPException): parser = MindeeParser(parsed_args=ots_doc) parser.call_endpoint() @@ -119,7 +119,7 @@ def test_cli_passport(ots_doc): parser = MindeeParser(parsed_args=ots_doc) parser.call_endpoint() ots_doc.api_key = "dummy" - with pytest.raises(HTTPException): + with pytest.raises(MindeeHTTPException): parser = MindeeParser(parsed_args=ots_doc) parser.call_endpoint() @@ -131,7 +131,7 @@ def test_cli_us_bank_check(ots_doc): parser = MindeeParser(parsed_args=ots_doc) parser.call_endpoint() ots_doc.api_key = "dummy" - with pytest.raises(HTTPException): + with pytest.raises(MindeeHTTPException): parser = MindeeParser(parsed_args=ots_doc) parser.call_endpoint() @@ -143,7 +143,7 @@ def test_cli_invoice_splitter_enqueue(ots_doc_enqueue_and_parse): parser = MindeeParser(parsed_args=ots_doc_enqueue_and_parse) parser.call_endpoint() ots_doc_enqueue_and_parse.api_key = "dummy" - with pytest.raises(HTTPException): + with pytest.raises(MindeeHTTPException): parser = MindeeParser(parsed_args=ots_doc_enqueue_and_parse) parser.call_endpoint() @@ -155,6 +155,6 @@ def test_cli_invoice_splitter_enqueue(ots_doc_enqueue_and_parse): # parser = MindeeParser(parsed_args=ots_doc_fetch) # parser.call_endpoint() # ots_doc_fetch.api_key = "dummy" -# with pytest.raises(HTTPException): +# with pytest.raises(MindeeHTTPException): # parser = MindeeParser(parsed_args=ots_doc_fetch) # parser.call_endpoint() diff --git a/tests/test_client.py b/tests/test_client.py index 410b7d1f..1d128e7e 100644 --- a/tests/test_client.py +++ b/tests/test_client.py @@ -4,7 +4,7 @@ from mindee import Client, PageOptions, product from mindee.input.sources import LocalInputSource -from mindee.mindee_http.error import HTTPException +from mindee.mindee_http.error import MindeeHTTPException from mindee.product.invoice_splitter.invoice_splitter_v1 import InvoiceSplitterV1 from mindee.product.receipt.receipt_v4 import ReceiptV4 from tests.test_inputs import FILE_TYPES_DIR @@ -35,7 +35,7 @@ def test_parse_path_without_token(empty_client: Client): def test_parse_path_with_env_token(env_client: Client): - with pytest.raises(HTTPException): + with pytest.raises(MindeeHTTPException): input_doc = env_client.source_from_path(FILE_TYPES_DIR / "pdf" / "blank.pdf") env_client.parse(product.ReceiptV4, input_doc) @@ -46,7 +46,7 @@ def test_parse_path_with_wrong_filetype(dummy_client: Client): def test_parse_path_with_wrong_token(dummy_client: Client): - with pytest.raises(HTTPException): + with pytest.raises(MindeeHTTPException): input_doc = dummy_client.source_from_path(FILE_TYPES_DIR / "pdf" / "blank.pdf") dummy_client.parse(product.ReceiptV4, input_doc) @@ -64,7 +64,7 @@ def test_interface_version(dummy_client: Client): account_name="dummy", version="1.1", ) - with pytest.raises(HTTPException): + with pytest.raises(MindeeHTTPException): input_doc = dummy_client.source_from_path(FILE_TYPES_DIR / "receipt.jpg") dummy_client.parse(product.CustomV1, input_doc, endpoint=dummy_endpoint) @@ -75,7 +75,7 @@ def test_keep_file_open(dummy_client: Client): ) try: dummy_client.parse(product.ReceiptV4, input_doc, close_file=False) - except HTTPException: + except MindeeHTTPException: pass assert not input_doc.file_object.closed input_doc.close() @@ -94,7 +94,7 @@ def test_cut_options(dummy_client: Client): close_file=False, page_options=PageOptions(page_indexes=range(5)), ) - except HTTPException: + except MindeeHTTPException: pass assert input_doc.count_doc_pages() == 5 input_doc.close()