Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

✨ add support for workflows #274

Merged
merged 8 commits into from
Nov 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/test-code-samples.yml
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,6 @@ jobs:
with:
status: ${{ job.status }}
notify_when: "failure"
notification_title: "{workflow} is failing"
notification_title: "Code sample test '{workflow}' is failing"
env:
SLACK_WEBHOOK_URL: ${{ secrets.PRODUCTION_ISSUES_SLACK_HOOK_URL }}
12 changes: 12 additions & 0 deletions .github/workflows/test-integration.yml
Original file line number Diff line number Diff line change
Expand Up @@ -49,5 +49,17 @@ jobs:
- name: Run Integration Testing
env:
MINDEE_API_KEY: ${{ secrets.MINDEE_API_KEY_SE_TESTS }}
WORKFLOW_ID: ${{ secrets.WORKFLOW_ID_SE_TESTS }}
run: |
pytest -m integration


- name: Notify Slack Action on Failure
uses: ravsamhq/notify-slack-action@2.3.0
if: ${{ always() && github.ref_name == 'main' }}
with:
status: ${{ job.status }}
notify_when: "failure"
notification_title: "Integration test '{workflow}' is failing"
env:
SLACK_WEBHOOK_URL: ${{ secrets.PRODUCTION_ISSUES_SLACK_HOOK_URL }}
2 changes: 1 addition & 1 deletion .github/workflows/test-regression.yml
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,6 @@ jobs:
with:
status: ${{ job.status }}
notify_when: "failure"
notification_title: "Regression test workflow {workflow} is failing"
notification_title: "Regression test workflow '{workflow}' is failing"
env:
SLACK_WEBHOOK_URL: ${{ secrets.PRODUCTION_ISSUES_SLACK_HOOK_URL }}
22 changes: 22 additions & 0 deletions docs/extras/code_samples/workflow_execution.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
from mindee import Client, WorkflowResponse
from mindee.parsing.common import ExecutionPriority

# Init a new client
mindee_client = Client(api_key: "my-api-key")

workflow_id = "workflow-id"

# Load a file from disk
input_doc = mindee_client.source_from_path("/path/to/the/file.ext")

# Send the file to the workflow.
result: WorkflowResponse = mindee_client.execute_workflow(
input_doc,
workflow_id,
# Optionally, add an alias and a priority to the workflow.
# alias="my-alias",
# priority=ExecutionPriority.LOW
sebastianMindee marked this conversation as resolved.
Show resolved Hide resolved
)

# Print the ID of the execution to make sure it worked.
print(result.execution.id)
1 change: 1 addition & 0 deletions mindee/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@
from mindee.parsing.common.async_predict_response import AsyncPredictResponse, Job
from mindee.parsing.common.feedback_response import FeedbackResponse
from mindee.parsing.common.predict_response import PredictResponse
from mindee.parsing.common.workflow_response import WorkflowResponse
78 changes: 78 additions & 0 deletions mindee/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

from mindee.error.mindee_error import MindeeClientError, MindeeError
from mindee.error.mindee_http_error import handle_error
from mindee.input import WorkflowOptions
from mindee.input.local_response import LocalResponse
from mindee.input.page_options import PageOptions
from mindee.input.sources import (
Expand All @@ -22,11 +23,15 @@
is_valid_async_response,
is_valid_sync_response,
)
from mindee.mindee_http.workflow_endpoint import WorkflowEndpoint
from mindee.mindee_http.workflow_settings import WorkflowSettings
from mindee.parsing.common.async_predict_response import AsyncPredictResponse
from mindee.parsing.common.feedback_response import FeedbackResponse
from mindee.parsing.common.inference import Inference
from mindee.parsing.common.predict_response import PredictResponse
from mindee.parsing.common.string_dict import StringDict
from mindee.parsing.common.workflow_response import WorkflowResponse
from mindee.product import GeneratedV1

OTS_OWNER = "mindee"

Expand Down Expand Up @@ -230,6 +235,41 @@ def parse_queued(

return self._get_queued_document(product_class, endpoint, queue_id)

def execute_workflow(
self,
input_source: Union[LocalInputSource, UrlInputSource],
workflow_id: str,
options: Optional[WorkflowOptions] = None,
page_options: Optional[PageOptions] = None,
) -> WorkflowResponse:
"""
Send the document to a workflow execution.

:param input_source: The document/source file to use.
Has to be created beforehand.
:param workflow_id: ID of the workflow.
:param page_options: If set, remove pages from the document as specified. This is done before sending the file\
to the server. It is useful to avoid page limitations.
:param options: Options for the workflow.
:return:
"""
if isinstance(input_source, LocalInputSource):
if page_options and input_source.is_pdf():
input_source.process_pdf(
page_options.operation,
page_options.on_min_pages,
page_options.page_indexes,
)

logger.debug("Sending document to workflow: %s", workflow_id)

if not options:
options = WorkflowOptions(
alias=None, priority=None, full_text=False, public_url=None
)

return self._send_to_workflow(GeneratedV1, input_source, workflow_id, options)

def _validate_async_params(
self, initial_delay_sec: float, delay_sec: float, max_retries: int
) -> None:
Expand Down Expand Up @@ -438,6 +478,44 @@ def _get_queued_document(

return AsyncPredictResponse(product_class, queue_response.json())

def _send_to_workflow(
self,
product_class: Type[Inference],
input_source: Union[LocalInputSource, UrlInputSource],
workflow_id: str,
options: WorkflowOptions,
) -> WorkflowResponse:
"""
Sends a document to a workflow.

:param product_class: The document class to use.
The response object will be instantiated based on this parameter.

:param input_source: The document/source file to use.
Has to be created beforehand.
:param workflow_id: ID of the workflow.
:param options: Optional options for the workflow.
:return:
"""
if input_source is None:
raise MindeeClientError("No input document provided")

workflow_endpoint = WorkflowEndpoint(
WorkflowSettings(api_key=self.api_key, workflow_id=workflow_id)
)

response = workflow_endpoint.workflow_execution_post(input_source, options)

dict_response = response.json()

if not is_valid_async_response(response):
clean_response = clean_request_json(response)
raise handle_error(
str(product_class.endpoint_name),
clean_response,
)
return WorkflowResponse(product_class, dict_response)

def _initialize_ots_endpoint(self, product_class: Type[Inference]) -> Endpoint:
if product_class.__name__ == "CustomV1":
raise MindeeClientError("Missing endpoint specifications for custom build.")
Expand Down
1 change: 1 addition & 0 deletions mindee/input/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,4 @@
PathInput,
UrlInputSource,
)
from mindee.input.workflow_options import WorkflowOptions
28 changes: 28 additions & 0 deletions mindee/input/workflow_options.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
from typing import Optional

from mindee.parsing.common import ExecutionPriority


class WorkflowOptions:
"""Options to pass to a workflow execution."""

alias: Optional[str]
"""Alias for the document."""
priority: Optional[ExecutionPriority]
"""Priority of the document."""
full_text: bool
"""Whether to include the full OCR text response in compatible APIs."""
public_url: Optional[str]
"""A unique, encrypted URL for accessing the document validation interface without requiring authentication."""

def __init__(
self,
alias: Optional[str] = None,
priority: Optional[ExecutionPriority] = None,
full_text: Optional[bool] = False,
public_url: Optional[str] = None,
):
self.alias = alias
self.priority = priority
self.full_text = full_text if full_text else False
self.public_url = public_url
2 changes: 2 additions & 0 deletions mindee/mindee_http/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,5 @@
is_valid_async_response,
is_valid_sync_response,
)
from mindee.mindee_http.workflow_endpoint import WorkflowEndpoint
from mindee.mindee_http.workflow_settings import WorkflowSettings
4 changes: 2 additions & 2 deletions mindee/mindee_http/base_endpoint.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
from abc import ABC

from mindee.mindee_http.mindee_api import MindeeApi
from mindee.mindee_http.base_settings import BaseSettings


class BaseEndpoint(ABC):
"""Base endpoint class for the Mindee API."""

def __init__(self, settings: MindeeApi) -> None:
def __init__(self, settings: BaseSettings) -> None:
"""
Base API endpoint class for all endpoints.

Expand Down
71 changes: 71 additions & 0 deletions mindee/mindee_http/base_settings.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
import os
from dataclasses import dataclass
from typing import Dict, Optional, Union

from mindee.logger import logger
from mindee.versions import __version__, get_platform, python_version

API_KEY_ENV_NAME = "MINDEE_API_KEY"
API_KEY_DEFAULT = ""

BASE_URL_ENV_NAME = "MINDEE_BASE_URL"
BASE_URL_DEFAULT = "https://api.mindee.net/v1"

REQUEST_TIMEOUT_ENV_NAME = "MINDEE_REQUEST_TIMEOUT"
TIMEOUT_DEFAULT = 120

PLATFORM = get_platform()
USER_AGENT = f"mindee-api-python@v{__version__} python-v{python_version} {PLATFORM}"


@dataclass
class BaseSettings:
"""Settings class relating to API requests."""

api_key: Optional[str]
"""API Key for the client."""
base_url: str
request_timeout: int

def __init__(self, api_key: Optional[str]):
self._set_api_key(api_key)
self.request_timeout = TIMEOUT_DEFAULT
self.set_base_url(BASE_URL_DEFAULT)
self.set_from_env()

@property
def base_headers(self) -> Dict[str, str]:
"""Base headers to send with all API requests."""
return {
"Authorization": f"Token {self.api_key}",
"User-Agent": USER_AGENT,
}

def _set_api_key(self, api_key: Optional[str]) -> None:
"""Set the endpoint's API key from an environment variable, if present."""
env_val = os.getenv(API_KEY_ENV_NAME, "")
if env_val and (not api_key or len(api_key) == 0):
logger.debug("API key set from environment")
self.api_key = env_val
return
self.api_key = api_key

def set_from_env(self) -> None:
"""Set various parameters from environment variables, if present."""
env_vars = {
BASE_URL_ENV_NAME: self.set_base_url,
REQUEST_TIMEOUT_ENV_NAME: self.set_timeout,
}
for name, func in env_vars.items():
env_val = os.getenv(name, "")
if env_val:
func(env_val)
logger.debug("Value was set from env: %s", name)

def set_timeout(self, value: Union[str, int]) -> None:
"""Set the timeout for all requests."""
self.request_timeout = int(value)

def set_base_url(self, value: str) -> None:
"""Set the base URL for all requests."""
self.base_url = value
2 changes: 2 additions & 0 deletions mindee/mindee_http/endpoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
class Endpoint(BaseEndpoint):
"""Generic API endpoint for a product."""

settings: MindeeApi

def __init__(
self, url_name: str, owner: str, version: str, settings: MindeeApi
) -> None:
Expand Down
Loading