diff --git a/.github/workflows/test-code-samples.yml b/.github/workflows/test-code-samples.yml index d4c1a8ad..bf4d4a12 100644 --- a/.github/workflows/test-code-samples.yml +++ b/.github/workflows/test-code-samples.yml @@ -52,6 +52,6 @@ jobs: with: status: ${{ job.status }} notify_when: "failure" - notification_title: "{workflow} is failing" + notification_title: "Code sample test '{workflow}' is failing" env: SLACK_WEBHOOK_URL: ${{ secrets.PRODUCTION_ISSUES_SLACK_HOOK_URL }} diff --git a/.github/workflows/test-integration.yml b/.github/workflows/test-integration.yml index 4b0bfd39..1e172547 100644 --- a/.github/workflows/test-integration.yml +++ b/.github/workflows/test-integration.yml @@ -49,5 +49,17 @@ jobs: - name: Run Integration Testing env: MINDEE_API_KEY: ${{ secrets.MINDEE_API_KEY_SE_TESTS }} + WORKFLOW_ID: ${{ secrets.WORKFLOW_ID_SE_TESTS }} run: | pytest -m integration + + + - name: Notify Slack Action on Failure + uses: ravsamhq/notify-slack-action@2.3.0 + if: ${{ always() && github.ref_name == 'main' }} + with: + status: ${{ job.status }} + notify_when: "failure" + notification_title: "Integration test '{workflow}' is failing" + env: + SLACK_WEBHOOK_URL: ${{ secrets.PRODUCTION_ISSUES_SLACK_HOOK_URL }} \ No newline at end of file diff --git a/.github/workflows/test-regression.yml b/.github/workflows/test-regression.yml index d742cc55..5ef5691f 100644 --- a/.github/workflows/test-regression.yml +++ b/.github/workflows/test-regression.yml @@ -59,6 +59,6 @@ jobs: with: status: ${{ job.status }} notify_when: "failure" - notification_title: "Regression test workflow {workflow} is failing" + notification_title: "Regression test workflow '{workflow}' is failing" env: SLACK_WEBHOOK_URL: ${{ secrets.PRODUCTION_ISSUES_SLACK_HOOK_URL }} diff --git a/docs/extras/code_samples/workflow_execution.txt b/docs/extras/code_samples/workflow_execution.txt new file mode 100644 index 00000000..567352cc --- /dev/null +++ b/docs/extras/code_samples/workflow_execution.txt @@ -0,0 +1,22 @@ +from mindee import Client, WorkflowResponse +from mindee.parsing.common import ExecutionPriority + +# Init a new client +mindee_client = Client(api_key: "my-api-key") + +workflow_id = "workflow-id" + +# Load a file from disk +input_doc = mindee_client.source_from_path("/path/to/the/file.ext") + +# Send the file to the workflow. +result: WorkflowResponse = mindee_client.execute_workflow( + input_doc, + workflow_id, + # Optionally, add an alias and a priority to the workflow. + # alias="my-alias", + # priority=ExecutionPriority.LOW +) + +# Print the ID of the execution to make sure it worked. +print(result.execution.id) diff --git a/mindee/__init__.py b/mindee/__init__.py index bf129c3f..27647254 100644 --- a/mindee/__init__.py +++ b/mindee/__init__.py @@ -4,3 +4,4 @@ from mindee.parsing.common.async_predict_response import AsyncPredictResponse, Job from mindee.parsing.common.feedback_response import FeedbackResponse from mindee.parsing.common.predict_response import PredictResponse +from mindee.parsing.common.workflow_response import WorkflowResponse diff --git a/mindee/client.py b/mindee/client.py index 3edd23e0..54111be0 100644 --- a/mindee/client.py +++ b/mindee/client.py @@ -4,6 +4,7 @@ from mindee.error.mindee_error import MindeeClientError, MindeeError from mindee.error.mindee_http_error import handle_error +from mindee.input import WorkflowOptions from mindee.input.local_response import LocalResponse from mindee.input.page_options import PageOptions from mindee.input.sources import ( @@ -22,11 +23,15 @@ is_valid_async_response, is_valid_sync_response, ) +from mindee.mindee_http.workflow_endpoint import WorkflowEndpoint +from mindee.mindee_http.workflow_settings import WorkflowSettings from mindee.parsing.common.async_predict_response import AsyncPredictResponse from mindee.parsing.common.feedback_response import FeedbackResponse from mindee.parsing.common.inference import Inference from mindee.parsing.common.predict_response import PredictResponse from mindee.parsing.common.string_dict import StringDict +from mindee.parsing.common.workflow_response import WorkflowResponse +from mindee.product import GeneratedV1 OTS_OWNER = "mindee" @@ -230,6 +235,41 @@ def parse_queued( return self._get_queued_document(product_class, endpoint, queue_id) + def execute_workflow( + self, + input_source: Union[LocalInputSource, UrlInputSource], + workflow_id: str, + options: Optional[WorkflowOptions] = None, + page_options: Optional[PageOptions] = None, + ) -> WorkflowResponse: + """ + Send the document to a workflow execution. + + :param input_source: The document/source file to use. + Has to be created beforehand. + :param workflow_id: ID of the workflow. + :param page_options: If set, remove pages from the document as specified. This is done before sending the file\ + to the server. It is useful to avoid page limitations. + :param options: Options for the workflow. + :return: + """ + if isinstance(input_source, LocalInputSource): + if page_options and input_source.is_pdf(): + input_source.process_pdf( + page_options.operation, + page_options.on_min_pages, + page_options.page_indexes, + ) + + logger.debug("Sending document to workflow: %s", workflow_id) + + if not options: + options = WorkflowOptions( + alias=None, priority=None, full_text=False, public_url=None + ) + + return self._send_to_workflow(GeneratedV1, input_source, workflow_id, options) + def _validate_async_params( self, initial_delay_sec: float, delay_sec: float, max_retries: int ) -> None: @@ -438,6 +478,44 @@ def _get_queued_document( return AsyncPredictResponse(product_class, queue_response.json()) + def _send_to_workflow( + self, + product_class: Type[Inference], + input_source: Union[LocalInputSource, UrlInputSource], + workflow_id: str, + options: WorkflowOptions, + ) -> WorkflowResponse: + """ + Sends a document to a workflow. + + :param product_class: The document class to use. + The response object will be instantiated based on this parameter. + + :param input_source: The document/source file to use. + Has to be created beforehand. + :param workflow_id: ID of the workflow. + :param options: Optional options for the workflow. + :return: + """ + if input_source is None: + raise MindeeClientError("No input document provided") + + workflow_endpoint = WorkflowEndpoint( + WorkflowSettings(api_key=self.api_key, workflow_id=workflow_id) + ) + + response = workflow_endpoint.workflow_execution_post(input_source, options) + + dict_response = response.json() + + if not is_valid_async_response(response): + clean_response = clean_request_json(response) + raise handle_error( + str(product_class.endpoint_name), + clean_response, + ) + return WorkflowResponse(product_class, dict_response) + def _initialize_ots_endpoint(self, product_class: Type[Inference]) -> Endpoint: if product_class.__name__ == "CustomV1": raise MindeeClientError("Missing endpoint specifications for custom build.") diff --git a/mindee/input/__init__.py b/mindee/input/__init__.py index 81744be8..008d880d 100644 --- a/mindee/input/__init__.py +++ b/mindee/input/__init__.py @@ -9,3 +9,4 @@ PathInput, UrlInputSource, ) +from mindee.input.workflow_options import WorkflowOptions diff --git a/mindee/input/workflow_options.py b/mindee/input/workflow_options.py new file mode 100644 index 00000000..32b56d45 --- /dev/null +++ b/mindee/input/workflow_options.py @@ -0,0 +1,28 @@ +from typing import Optional + +from mindee.parsing.common import ExecutionPriority + + +class WorkflowOptions: + """Options to pass to a workflow execution.""" + + alias: Optional[str] + """Alias for the document.""" + priority: Optional[ExecutionPriority] + """Priority of the document.""" + full_text: bool + """Whether to include the full OCR text response in compatible APIs.""" + public_url: Optional[str] + """A unique, encrypted URL for accessing the document validation interface without requiring authentication.""" + + def __init__( + self, + alias: Optional[str] = None, + priority: Optional[ExecutionPriority] = None, + full_text: Optional[bool] = False, + public_url: Optional[str] = None, + ): + self.alias = alias + self.priority = priority + self.full_text = full_text if full_text else False + self.public_url = public_url diff --git a/mindee/mindee_http/__init__.py b/mindee/mindee_http/__init__.py index 4cc858db..964258ef 100644 --- a/mindee/mindee_http/__init__.py +++ b/mindee/mindee_http/__init__.py @@ -6,3 +6,5 @@ is_valid_async_response, is_valid_sync_response, ) +from mindee.mindee_http.workflow_endpoint import WorkflowEndpoint +from mindee.mindee_http.workflow_settings import WorkflowSettings diff --git a/mindee/mindee_http/base_endpoint.py b/mindee/mindee_http/base_endpoint.py index fbae13b8..51c75f71 100644 --- a/mindee/mindee_http/base_endpoint.py +++ b/mindee/mindee_http/base_endpoint.py @@ -1,12 +1,12 @@ from abc import ABC -from mindee.mindee_http.mindee_api import MindeeApi +from mindee.mindee_http.base_settings import BaseSettings class BaseEndpoint(ABC): """Base endpoint class for the Mindee API.""" - def __init__(self, settings: MindeeApi) -> None: + def __init__(self, settings: BaseSettings) -> None: """ Base API endpoint class for all endpoints. diff --git a/mindee/mindee_http/base_settings.py b/mindee/mindee_http/base_settings.py new file mode 100644 index 00000000..c83e0869 --- /dev/null +++ b/mindee/mindee_http/base_settings.py @@ -0,0 +1,71 @@ +import os +from dataclasses import dataclass +from typing import Dict, Optional, Union + +from mindee.logger import logger +from mindee.versions import __version__, get_platform, python_version + +API_KEY_ENV_NAME = "MINDEE_API_KEY" +API_KEY_DEFAULT = "" + +BASE_URL_ENV_NAME = "MINDEE_BASE_URL" +BASE_URL_DEFAULT = "https://api.mindee.net/v1" + +REQUEST_TIMEOUT_ENV_NAME = "MINDEE_REQUEST_TIMEOUT" +TIMEOUT_DEFAULT = 120 + +PLATFORM = get_platform() +USER_AGENT = f"mindee-api-python@v{__version__} python-v{python_version} {PLATFORM}" + + +@dataclass +class BaseSettings: + """Settings class relating to API requests.""" + + api_key: Optional[str] + """API Key for the client.""" + base_url: str + request_timeout: int + + def __init__(self, api_key: Optional[str]): + self._set_api_key(api_key) + self.request_timeout = TIMEOUT_DEFAULT + self.set_base_url(BASE_URL_DEFAULT) + self.set_from_env() + + @property + def base_headers(self) -> Dict[str, str]: + """Base headers to send with all API requests.""" + return { + "Authorization": f"Token {self.api_key}", + "User-Agent": USER_AGENT, + } + + def _set_api_key(self, api_key: Optional[str]) -> None: + """Set the endpoint's API key from an environment variable, if present.""" + env_val = os.getenv(API_KEY_ENV_NAME, "") + if env_val and (not api_key or len(api_key) == 0): + logger.debug("API key set from environment") + self.api_key = env_val + return + self.api_key = api_key + + def set_from_env(self) -> None: + """Set various parameters from environment variables, if present.""" + env_vars = { + BASE_URL_ENV_NAME: self.set_base_url, + REQUEST_TIMEOUT_ENV_NAME: self.set_timeout, + } + for name, func in env_vars.items(): + env_val = os.getenv(name, "") + if env_val: + func(env_val) + logger.debug("Value was set from env: %s", name) + + def set_timeout(self, value: Union[str, int]) -> None: + """Set the timeout for all requests.""" + self.request_timeout = int(value) + + def set_base_url(self, value: str) -> None: + """Set the base URL for all requests.""" + self.base_url = value diff --git a/mindee/mindee_http/endpoint.py b/mindee/mindee_http/endpoint.py index 5d510d9c..fdbd2ae7 100644 --- a/mindee/mindee_http/endpoint.py +++ b/mindee/mindee_http/endpoint.py @@ -13,6 +13,8 @@ class Endpoint(BaseEndpoint): """Generic API endpoint for a product.""" + settings: MindeeApi + def __init__( self, url_name: str, owner: str, version: str, settings: MindeeApi ) -> None: diff --git a/mindee/mindee_http/mindee_api.py b/mindee/mindee_http/mindee_api.py index b93b772e..33bfc7d9 100644 --- a/mindee/mindee_http/mindee_api.py +++ b/mindee/mindee_http/mindee_api.py @@ -1,33 +1,14 @@ -import os from dataclasses import dataclass -from typing import Dict, Optional, Union +from typing import Optional from mindee.error.mindee_error import MindeeApiError -from mindee.logger import logger -from mindee.versions import __version__, get_platform, python_version - -API_KEY_ENV_NAME = "MINDEE_API_KEY" -API_KEY_DEFAULT = "" - -BASE_URL_ENV_NAME = "MINDEE_BASE_URL" -BASE_URL_DEFAULT = "https://api.mindee.net/v1" - -REQUEST_TIMEOUT_ENV_NAME = "MINDEE_REQUEST_TIMEOUT" -TIMEOUT_DEFAULT = 120 - -PLATFORM = get_platform() -USER_AGENT = f"mindee-api-python@v{__version__} python-v{python_version} {PLATFORM}" +from mindee.mindee_http.base_settings import API_KEY_ENV_NAME, BaseSettings @dataclass -class MindeeApi: +class MindeeApi(BaseSettings): """Settings class relating to API requests.""" - api_key: Optional[str] - """API Key for the client.""" - base_url: str - request_timeout: int - def __init__( self, api_key: Optional[str], @@ -35,7 +16,7 @@ def __init__( account_name: str, version: str, ): - self._set_api_key(api_key) + super().__init__(api_key) if not self.api_key or len(self.api_key) == 0: raise MindeeApiError( ( @@ -48,44 +29,4 @@ def __init__( self.endpoint_name = endpoint_name self.account_name = account_name self.version = version - self.request_timeout = TIMEOUT_DEFAULT - self.set_base_url(BASE_URL_DEFAULT) - self.set_from_env() self.url_root = f"{self.base_url}/products/{self.account_name}/{self.endpoint_name}/v{self.version}" - - @property - def base_headers(self) -> Dict[str, str]: - """Base headers to send with all API requests.""" - return { - "Authorization": f"Token {self.api_key}", - "User-Agent": USER_AGENT, - } - - def _set_api_key(self, api_key: Optional[str]) -> None: - """Set the endpoint's API key from an environment variable, if present.""" - env_val = os.getenv(API_KEY_ENV_NAME, "") - if env_val and (not api_key or len(api_key) == 0): - logger.debug("API key set from environment") - self.api_key = env_val - return - self.api_key = api_key - - def set_from_env(self) -> None: - """Set various parameters from environment variables, if present.""" - env_vars = { - BASE_URL_ENV_NAME: self.set_base_url, - REQUEST_TIMEOUT_ENV_NAME: self.set_timeout, - } - for name, func in env_vars.items(): - env_val = os.getenv(name, "") - if env_val: - func(env_val) - logger.debug("Value was set from env: %s", name) - - def set_timeout(self, value: Union[str, int]) -> None: - """Set the timeout for all requests.""" - self.request_timeout = int(value) - - def set_base_url(self, value: str) -> None: - """Set the base URL for all requests.""" - self.base_url = value diff --git a/mindee/mindee_http/response_validation.py b/mindee/mindee_http/response_validation.py index 5976a803..e261df91 100644 --- a/mindee/mindee_http/response_validation.py +++ b/mindee/mindee_http/response_validation.py @@ -39,7 +39,7 @@ def is_valid_async_response(response: requests.Response) -> bool: ): return False # Async errors. - if "job" not in response_json: + if "job" not in response_json and "execution" not in response_json: return False if ( "job" in response_json @@ -47,6 +47,7 @@ def is_valid_async_response(response: requests.Response) -> bool: and response_json["job"]["error"] ): return False + return True diff --git a/mindee/mindee_http/workflow_endpoint.py b/mindee/mindee_http/workflow_endpoint.py new file mode 100644 index 00000000..4fe26d87 --- /dev/null +++ b/mindee/mindee_http/workflow_endpoint.py @@ -0,0 +1,69 @@ +from typing import Union + +import requests + +from mindee.input import LocalInputSource, UrlInputSource, WorkflowOptions +from mindee.mindee_http.base_endpoint import BaseEndpoint +from mindee.mindee_http.workflow_settings import WorkflowSettings + + +class WorkflowEndpoint(BaseEndpoint): + """Workflow endpoint.""" + + settings: WorkflowSettings + + def __init__(self, settings: WorkflowSettings) -> None: + """ + Workflow Endpoint. + + :param settings: Settings object. + """ + super().__init__(settings) + + def workflow_execution_post( + self, + input_source: Union[LocalInputSource, UrlInputSource], + options: WorkflowOptions, + ): + """ + Sends the document to the workflow. + + :param input_source: The document/source file to use. + Has to be created beforehand. + :param options: Options for the workflow. + :return: + """ + data = {} + + if options.alias: + data["alias"] = options.alias + if options.priority: + data["priority"] = options.priority.value + if options.public_url: + data["public_url"] = options.public_url + + params = {} + if options.full_text: + params["full_text_ocr"] = "true" + + if isinstance(input_source, UrlInputSource): + data["document"] = input_source.url + response = requests.post( + self.settings.url_root, + headers=self.settings.base_headers, + data=data, + params=params, + timeout=self.settings.request_timeout, + ) + else: + files = {"document": input_source.read_contents(True)} + response = requests.post( + self.settings.url_root, + files=files, + headers=self.settings.base_headers, + data=data, + params=params, + timeout=self.settings.request_timeout, + ) + + return response diff --git a/mindee/mindee_http/workflow_settings.py b/mindee/mindee_http/workflow_settings.py new file mode 100644 index 00000000..74668920 --- /dev/null +++ b/mindee/mindee_http/workflow_settings.py @@ -0,0 +1,27 @@ +from dataclasses import dataclass +from typing import Optional + +from mindee.error.mindee_error import MindeeApiError +from mindee.mindee_http.base_settings import API_KEY_ENV_NAME, BaseSettings + + +@dataclass +class WorkflowSettings(BaseSettings): + """Settings class relating to workflow requests.""" + + def __init__( + self, + api_key: Optional[str], + workflow_id: str, + ): + super().__init__(api_key) + if not self.api_key or len(self.api_key) == 0: + raise MindeeApiError( + ( + f"Missing API key for workflow '{workflow_id}'," + " check your Client configuration.\n" + "You can set this using the " + f"'{API_KEY_ENV_NAME}' environment variable." + ) + ) + self.url_root = f"{self.base_url}/workflows/{workflow_id}/executions" diff --git a/mindee/parsing/common/__init__.py b/mindee/parsing/common/__init__.py index 7d5bf8ed..4707e85d 100644 --- a/mindee/parsing/common/__init__.py +++ b/mindee/parsing/common/__init__.py @@ -2,6 +2,9 @@ from mindee.parsing.common.api_response import ApiResponse from mindee.parsing.common.async_predict_response import AsyncPredictResponse from mindee.parsing.common.document import Document +from mindee.parsing.common.execution import Execution +from mindee.parsing.common.execution_file import ExecutionFile +from mindee.parsing.common.execution_priority import ExecutionPriority from mindee.parsing.common.extras import CropperExtra, Extras from mindee.parsing.common.feedback_response import FeedbackResponse from mindee.parsing.common.inference import Inference, TypeInference @@ -18,3 +21,4 @@ format_for_display, line_separator, ) +from mindee.parsing.common.workflow_response import WorkflowResponse diff --git a/mindee/parsing/common/execution.py b/mindee/parsing/common/execution.py new file mode 100644 index 00000000..6e3d7da5 --- /dev/null +++ b/mindee/parsing/common/execution.py @@ -0,0 +1,83 @@ +from datetime import datetime +from typing import Generic, Optional, Type + +from mindee.parsing.common.execution_file import ExecutionFile +from mindee.parsing.common.execution_priority import ExecutionPriority +from mindee.parsing.common.inference import Inference +from mindee.parsing.common.page import Page +from mindee.parsing.common.prediction import TypePrediction +from mindee.parsing.common.string_dict import StringDict +from mindee.product.generated.generated_v1 import GeneratedV1Document + + +class Execution(Generic[TypePrediction]): + """Workflow execution class.""" + + batch_name: str + """Identifier for the batch to which the execution belongs.""" + + created_at: Optional[datetime] + """The time at which the execution started.""" + + file: ExecutionFile + """File representation within a workflow execution.""" + + id: str + """Identifier for the execution.""" + + inference: Optional[Inference[TypePrediction, Page[TypePrediction]]] + """Deserialized inference object.""" + + priority: Optional["ExecutionPriority"] + """Priority of the execution.""" + + reviewed_at: Optional[datetime] + """The time at which the file was tagged as reviewed.""" + + available_at: Optional[datetime] + """The time at which the file was uploaded to a workflow.""" + + reviewed_prediction: Optional["GeneratedV1Document"] + """Reviewed fields and values.""" + + status: str + """Execution Status.""" + + type: Optional[str] + """Execution type.""" + + uploaded_at: Optional[datetime] + """The time at which the file was uploaded to a workflow.""" + + workflow_id: str + """Identifier for the workflow.""" + + def __init__(self, inference_type: Type[Inference], json_response: StringDict): + self.batch_name = json_response["batch_name"] + self.created_at = self.parse_date(json_response.get("created_at", None)) + self.file = ExecutionFile(json_response["file"]) + self.id = json_response["id"] + self.inference = ( + inference_type(json_response["inference"]) + if json_response["inference"] + else None + ) + self.priority = json_response.get("priority", None) + self.reviewed_at = self.parse_date(json_response.get("reviewed_at", None)) + self.available_at = self.parse_date(json_response.get("available_at", None)) + self.reviewed_prediction = ( + GeneratedV1Document(json_response["reviewed_prediction"]) + if json_response["reviewed_prediction"] + else None + ) + self.status = json_response["status"] + self.type = json_response.get("type", None) + self.uploaded_at = self.parse_date(json_response.get("uploaded_at", None)) + self.workflow_id = json_response["workflow_id"] + + @staticmethod + def parse_date(date_string: Optional[str]) -> Optional[datetime]: + """Shorthand to parse the date, if present.""" + if not date_string: + return None + return datetime.fromisoformat(date_string) diff --git a/mindee/parsing/common/execution_file.py b/mindee/parsing/common/execution_file.py new file mode 100644 index 00000000..3c728ddb --- /dev/null +++ b/mindee/parsing/common/execution_file.py @@ -0,0 +1,17 @@ +from typing import Optional + +from mindee.parsing.common.string_dict import StringDict + + +class ExecutionFile: + """Execution File class.""" + + name: Optional[str] + """File name.""" + + alias: Optional[str] + """File name.""" + + def __init__(self, json_response: StringDict): + self.name = json_response["name"] + self.alias = json_response["alias"] diff --git a/mindee/parsing/common/execution_priority.py b/mindee/parsing/common/execution_priority.py new file mode 100644 index 00000000..9bb2ccca --- /dev/null +++ b/mindee/parsing/common/execution_priority.py @@ -0,0 +1,9 @@ +from enum import Enum + + +class ExecutionPriority(Enum): + """Available priorities for workflow executions.""" + + LOW = "low" + MEDIUM = "medium" + HIGH = "high" diff --git a/mindee/parsing/common/workflow_response.py b/mindee/parsing/common/workflow_response.py new file mode 100644 index 00000000..c0f01105 --- /dev/null +++ b/mindee/parsing/common/workflow_response.py @@ -0,0 +1,21 @@ +from typing import Generic, Type + +from mindee.parsing.common.api_response import ApiResponse +from mindee.parsing.common.execution import Execution +from mindee.parsing.common.inference import Inference +from mindee.parsing.common.prediction import TypePrediction +from mindee.parsing.common.string_dict import StringDict + + +class WorkflowResponse(Generic[TypePrediction], ApiResponse): + """Base wrapper for API requests.""" + + execution: Execution + """ + Set the prediction model used to parse the document. + The response object will be instantiated based on this parameter. + """ + + def __init__(self, inference_type: Type[Inference], raw_response: StringDict): + super().__init__(raw_response) + self.execution = Execution(inference_type, raw_response["execution"]) diff --git a/tests/test_code_samples.sh b/tests/test_code_samples.sh index c902a084..32878785 100755 --- a/tests/test_code_samples.sh +++ b/tests/test_code_samples.sh @@ -6,7 +6,7 @@ ACCOUNT=$1 ENDPOINT=$2 API_KEY=$3 -for f in $(find ./docs/extras/code_samples -maxdepth 1 -name "*.txt" | sort -h) +for f in $(find ./docs/extras/code_samples -maxdepth 1 -name "*.txt" -not -name "workflow_execution.txt" | sort -h) do echo echo "###############################################" diff --git a/tests/utils.py b/tests/utils.py index 3245adfa..6e1af706 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -1,7 +1,7 @@ from difflib import SequenceMatcher from pathlib import Path -from mindee.mindee_http.mindee_api import ( +from mindee.mindee_http.base_settings import ( API_KEY_ENV_NAME, BASE_URL_ENV_NAME, REQUEST_TIMEOUT_ENV_NAME, diff --git a/tests/workflows/__init__.py b/tests/workflows/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/workflows/test_workflow.py b/tests/workflows/test_workflow.py new file mode 100644 index 00000000..526eeee9 --- /dev/null +++ b/tests/workflows/test_workflow.py @@ -0,0 +1,81 @@ +import json +from pathlib import Path + +import pytest + +from mindee.parsing.common.workflow_response import WorkflowResponse +from mindee.product.generated.generated_v1 import GeneratedV1 + +WORKFLOW_DIR = Path("./tests/data") / "workflows" + + +@pytest.fixture +def success_workflow() -> WorkflowResponse: + file_path = WORKFLOW_DIR / "success.json" + with open(file_path, "r", encoding="utf-8") as open_file: + json_data = json.load(open_file) + return WorkflowResponse(GeneratedV1, json_data) + + +@pytest.fixture +def success_low_priority_workflow() -> WorkflowResponse: + file_path = WORKFLOW_DIR / "success_low_priority.json" + with open(file_path, "r", encoding="utf-8") as open_file: + json_data = json.load(open_file) + return WorkflowResponse(GeneratedV1, json_data) + + +def test_deserialize_workflow(success_workflow: WorkflowResponse): + assert success_workflow is not None + assert success_workflow.api_request is not None + assert success_workflow.execution.batch_name is None + assert success_workflow.execution.created_at is None + assert success_workflow.execution.file.alias is None + assert success_workflow.execution.file.name == "default_sample.jpg" + assert success_workflow.execution.id == "8c75c035-e083-4e77-ba3b-7c3598bd1d8a" + assert success_workflow.execution.inference is None + assert success_workflow.execution.priority == "medium" + assert success_workflow.execution.reviewed_at is None + assert success_workflow.execution.reviewed_prediction is None + assert success_workflow.execution.status == "processing" + assert success_workflow.execution.type == "manual" + assert ( + success_workflow.execution.uploaded_at.strftime("%Y-%m-%dT%H:%M:%S.%f") + == "2024-11-13T13:02:31.699190" + ) + assert ( + success_workflow.execution.workflow_id == "07ebf237-ff27-4eee-b6a2-425df4a5cca6" + ) + + +def test_deserialize_workflow_with_priority_and_alias( + success_low_priority_workflow: WorkflowResponse, +): + assert success_low_priority_workflow is not None + assert success_low_priority_workflow.api_request is not None + assert success_low_priority_workflow.execution.batch_name is None + assert success_low_priority_workflow.execution.created_at is None + assert ( + success_low_priority_workflow.execution.file.alias == "low-priority-sample-test" + ) + assert success_low_priority_workflow.execution.file.name == "default_sample.jpg" + assert ( + success_low_priority_workflow.execution.id + == "b743e123-e18c-4b62-8a07-811a4f72afd3" + ) + assert success_low_priority_workflow.execution.inference is None + assert success_low_priority_workflow.execution.priority == "low" + assert success_low_priority_workflow.execution.reviewed_at is None + assert success_low_priority_workflow.execution.reviewed_prediction is None + assert success_low_priority_workflow.execution.status == "processing" + assert success_low_priority_workflow.execution.type == "manual" + assert ( + success_low_priority_workflow.execution.uploaded_at.strftime( + "%Y-%m-%dT%H:%M:%S.%f" + ) + == "2024-11-13T13:17:01.315179" + ) + assert ( + success_low_priority_workflow.execution.workflow_id + == "07ebf237-ff27-4eee-b6a2-425df4a5cca6" + ) diff --git a/tests/workflows/test_workflow_integration.py b/tests/workflows/test_workflow_integration.py new file mode 100644 index 00000000..3d963393 --- /dev/null +++ b/tests/workflows/test_workflow_integration.py @@ -0,0 +1,39 @@ +import os +from datetime import datetime + +import pytest + +from mindee import Client +from mindee.input import WorkflowOptions +from mindee.parsing.common.execution_priority import ExecutionPriority +from tests.product import PRODUCT_DATA_DIR + + +@pytest.fixture +def mindee_client(): + return Client() + + +@pytest.fixture +def workflow_id(): + return os.getenv("WORKFLOW_ID", "") + + +@pytest.fixture +def input_path(): + return PRODUCT_DATA_DIR / "financial_document" / "default_sample.jpg" + + +@pytest.mark.integration +def test_workflow(mindee_client: Client, workflow_id: str, input_path: str): + input_source = mindee_client.source_from_path(str(input_path)) + current_date_time = datetime.now().strftime("%Y-%m-%d-%H:%M:%S") + alias = f"python-{current_date_time}" + priority = ExecutionPriority.LOW + options = WorkflowOptions(alias=alias, priority=priority) + + response = mindee_client.execute_workflow(input_source, workflow_id, options) + + assert response.api_request.status_code == 202 + assert response.execution.file.alias == f"python-{current_date_time}" + assert response.execution.priority == "low"