Skip to content

Commit

Permalink
✨ add support for workflows (#274)
Browse files Browse the repository at this point in the history
  • Loading branch information
sebastianMindee authored Nov 26, 2024
1 parent 56a85fc commit 928dbc1
Show file tree
Hide file tree
Showing 26 changed files with 579 additions and 70 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/test-code-samples.yml
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,6 @@ jobs:
with:
status: ${{ job.status }}
notify_when: "failure"
notification_title: "{workflow} is failing"
notification_title: "Code sample test '{workflow}' is failing"
env:
SLACK_WEBHOOK_URL: ${{ secrets.PRODUCTION_ISSUES_SLACK_HOOK_URL }}
12 changes: 12 additions & 0 deletions .github/workflows/test-integration.yml
Original file line number Diff line number Diff line change
Expand Up @@ -49,5 +49,17 @@ jobs:
- name: Run Integration Testing
env:
MINDEE_API_KEY: ${{ secrets.MINDEE_API_KEY_SE_TESTS }}
WORKFLOW_ID: ${{ secrets.WORKFLOW_ID_SE_TESTS }}
run: |
pytest -m integration
- name: Notify Slack Action on Failure
uses: ravsamhq/notify-slack-action@2.3.0
if: ${{ always() && github.ref_name == 'main' }}
with:
status: ${{ job.status }}
notify_when: "failure"
notification_title: "Integration test '{workflow}' is failing"
env:
SLACK_WEBHOOK_URL: ${{ secrets.PRODUCTION_ISSUES_SLACK_HOOK_URL }}
2 changes: 1 addition & 1 deletion .github/workflows/test-regression.yml
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,6 @@ jobs:
with:
status: ${{ job.status }}
notify_when: "failure"
notification_title: "Regression test workflow {workflow} is failing"
notification_title: "Regression test workflow '{workflow}' is failing"
env:
SLACK_WEBHOOK_URL: ${{ secrets.PRODUCTION_ISSUES_SLACK_HOOK_URL }}
22 changes: 22 additions & 0 deletions docs/extras/code_samples/workflow_execution.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
from mindee import Client, WorkflowResponse
from mindee.parsing.common import ExecutionPriority

# Init a new client
mindee_client = Client(api_key: "my-api-key")

workflow_id = "workflow-id"

# Load a file from disk
input_doc = mindee_client.source_from_path("/path/to/the/file.ext")

# Send the file to the workflow.
result: WorkflowResponse = mindee_client.execute_workflow(
input_doc,
workflow_id,
# Optionally, add an alias and a priority to the workflow.
# alias="my-alias",
# priority=ExecutionPriority.LOW
)

# Print the ID of the execution to make sure it worked.
print(result.execution.id)
1 change: 1 addition & 0 deletions mindee/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@
from mindee.parsing.common.async_predict_response import AsyncPredictResponse, Job
from mindee.parsing.common.feedback_response import FeedbackResponse
from mindee.parsing.common.predict_response import PredictResponse
from mindee.parsing.common.workflow_response import WorkflowResponse
78 changes: 78 additions & 0 deletions mindee/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

from mindee.error.mindee_error import MindeeClientError, MindeeError
from mindee.error.mindee_http_error import handle_error
from mindee.input import WorkflowOptions
from mindee.input.local_response import LocalResponse
from mindee.input.page_options import PageOptions
from mindee.input.sources import (
Expand All @@ -22,11 +23,15 @@
is_valid_async_response,
is_valid_sync_response,
)
from mindee.mindee_http.workflow_endpoint import WorkflowEndpoint
from mindee.mindee_http.workflow_settings import WorkflowSettings
from mindee.parsing.common.async_predict_response import AsyncPredictResponse
from mindee.parsing.common.feedback_response import FeedbackResponse
from mindee.parsing.common.inference import Inference
from mindee.parsing.common.predict_response import PredictResponse
from mindee.parsing.common.string_dict import StringDict
from mindee.parsing.common.workflow_response import WorkflowResponse
from mindee.product import GeneratedV1

OTS_OWNER = "mindee"

Expand Down Expand Up @@ -230,6 +235,41 @@ def parse_queued(

return self._get_queued_document(product_class, endpoint, queue_id)

def execute_workflow(
self,
input_source: Union[LocalInputSource, UrlInputSource],
workflow_id: str,
options: Optional[WorkflowOptions] = None,
page_options: Optional[PageOptions] = None,
) -> WorkflowResponse:
"""
Send the document to a workflow execution.
:param input_source: The document/source file to use.
Has to be created beforehand.
:param workflow_id: ID of the workflow.
:param page_options: If set, remove pages from the document as specified. This is done before sending the file\
to the server. It is useful to avoid page limitations.
:param options: Options for the workflow.
:return:
"""
if isinstance(input_source, LocalInputSource):
if page_options and input_source.is_pdf():
input_source.process_pdf(
page_options.operation,
page_options.on_min_pages,
page_options.page_indexes,
)

logger.debug("Sending document to workflow: %s", workflow_id)

if not options:
options = WorkflowOptions(
alias=None, priority=None, full_text=False, public_url=None
)

return self._send_to_workflow(GeneratedV1, input_source, workflow_id, options)

def _validate_async_params(
self, initial_delay_sec: float, delay_sec: float, max_retries: int
) -> None:
Expand Down Expand Up @@ -438,6 +478,44 @@ def _get_queued_document(

return AsyncPredictResponse(product_class, queue_response.json())

def _send_to_workflow(
self,
product_class: Type[Inference],
input_source: Union[LocalInputSource, UrlInputSource],
workflow_id: str,
options: WorkflowOptions,
) -> WorkflowResponse:
"""
Sends a document to a workflow.
:param product_class: The document class to use.
The response object will be instantiated based on this parameter.
:param input_source: The document/source file to use.
Has to be created beforehand.
:param workflow_id: ID of the workflow.
:param options: Optional options for the workflow.
:return:
"""
if input_source is None:
raise MindeeClientError("No input document provided")

workflow_endpoint = WorkflowEndpoint(
WorkflowSettings(api_key=self.api_key, workflow_id=workflow_id)
)

response = workflow_endpoint.workflow_execution_post(input_source, options)

dict_response = response.json()

if not is_valid_async_response(response):
clean_response = clean_request_json(response)
raise handle_error(
str(product_class.endpoint_name),
clean_response,
)
return WorkflowResponse(product_class, dict_response)

def _initialize_ots_endpoint(self, product_class: Type[Inference]) -> Endpoint:
if product_class.__name__ == "CustomV1":
raise MindeeClientError("Missing endpoint specifications for custom build.")
Expand Down
1 change: 1 addition & 0 deletions mindee/input/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,4 @@
PathInput,
UrlInputSource,
)
from mindee.input.workflow_options import WorkflowOptions
28 changes: 28 additions & 0 deletions mindee/input/workflow_options.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
from typing import Optional

from mindee.parsing.common import ExecutionPriority


class WorkflowOptions:
"""Options to pass to a workflow execution."""

alias: Optional[str]
"""Alias for the document."""
priority: Optional[ExecutionPriority]
"""Priority of the document."""
full_text: bool
"""Whether to include the full OCR text response in compatible APIs."""
public_url: Optional[str]
"""A unique, encrypted URL for accessing the document validation interface without requiring authentication."""

def __init__(
self,
alias: Optional[str] = None,
priority: Optional[ExecutionPriority] = None,
full_text: Optional[bool] = False,
public_url: Optional[str] = None,
):
self.alias = alias
self.priority = priority
self.full_text = full_text if full_text else False
self.public_url = public_url
2 changes: 2 additions & 0 deletions mindee/mindee_http/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,5 @@
is_valid_async_response,
is_valid_sync_response,
)
from mindee.mindee_http.workflow_endpoint import WorkflowEndpoint
from mindee.mindee_http.workflow_settings import WorkflowSettings
4 changes: 2 additions & 2 deletions mindee/mindee_http/base_endpoint.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
from abc import ABC

from mindee.mindee_http.mindee_api import MindeeApi
from mindee.mindee_http.base_settings import BaseSettings


class BaseEndpoint(ABC):
"""Base endpoint class for the Mindee API."""

def __init__(self, settings: MindeeApi) -> None:
def __init__(self, settings: BaseSettings) -> None:
"""
Base API endpoint class for all endpoints.
Expand Down
71 changes: 71 additions & 0 deletions mindee/mindee_http/base_settings.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
import os
from dataclasses import dataclass
from typing import Dict, Optional, Union

from mindee.logger import logger
from mindee.versions import __version__, get_platform, python_version

API_KEY_ENV_NAME = "MINDEE_API_KEY"
API_KEY_DEFAULT = ""

BASE_URL_ENV_NAME = "MINDEE_BASE_URL"
BASE_URL_DEFAULT = "https://api.mindee.net/v1"

REQUEST_TIMEOUT_ENV_NAME = "MINDEE_REQUEST_TIMEOUT"
TIMEOUT_DEFAULT = 120

PLATFORM = get_platform()
USER_AGENT = f"mindee-api-python@v{__version__} python-v{python_version} {PLATFORM}"


@dataclass
class BaseSettings:
"""Settings class relating to API requests."""

api_key: Optional[str]
"""API Key for the client."""
base_url: str
request_timeout: int

def __init__(self, api_key: Optional[str]):
self._set_api_key(api_key)
self.request_timeout = TIMEOUT_DEFAULT
self.set_base_url(BASE_URL_DEFAULT)
self.set_from_env()

@property
def base_headers(self) -> Dict[str, str]:
"""Base headers to send with all API requests."""
return {
"Authorization": f"Token {self.api_key}",
"User-Agent": USER_AGENT,
}

def _set_api_key(self, api_key: Optional[str]) -> None:
"""Set the endpoint's API key from an environment variable, if present."""
env_val = os.getenv(API_KEY_ENV_NAME, "")
if env_val and (not api_key or len(api_key) == 0):
logger.debug("API key set from environment")
self.api_key = env_val
return
self.api_key = api_key

def set_from_env(self) -> None:
"""Set various parameters from environment variables, if present."""
env_vars = {
BASE_URL_ENV_NAME: self.set_base_url,
REQUEST_TIMEOUT_ENV_NAME: self.set_timeout,
}
for name, func in env_vars.items():
env_val = os.getenv(name, "")
if env_val:
func(env_val)
logger.debug("Value was set from env: %s", name)

def set_timeout(self, value: Union[str, int]) -> None:
"""Set the timeout for all requests."""
self.request_timeout = int(value)

def set_base_url(self, value: str) -> None:
"""Set the base URL for all requests."""
self.base_url = value
2 changes: 2 additions & 0 deletions mindee/mindee_http/endpoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
class Endpoint(BaseEndpoint):
"""Generic API endpoint for a product."""

settings: MindeeApi

def __init__(
self, url_name: str, owner: str, version: str, settings: MindeeApi
) -> None:
Expand Down
Loading

0 comments on commit 928dbc1

Please sign in to comment.