Skip to content

Commit

Permalink
♻️ minor improments to main client
Browse files Browse the repository at this point in the history
  • Loading branch information
ianardee committed Dec 14, 2023
1 parent 8a2339d commit 1184112
Showing 1 changed file with 32 additions and 23 deletions.
55 changes: 32 additions & 23 deletions mindee/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,8 @@ def get_bound_classname(type_var) -> str:

def _clean_account_name(account_name: str) -> str:
"""
Checks that an account name is provided for custom builds, and sets the default one otherwise.
Checks that an account name is provided for custom products, and sets the default one otherwise.
:param product_class: product class to use for API calls.
:param account_name: name of the account's holder. Only needed for custom products.
"""
if not account_name or len(account_name) < 1:
Expand Down Expand Up @@ -80,7 +79,7 @@ def parse(
The response object will be instantiated based on this parameter.
:param input_source: The document/source file to use.
Has to be be created beforehand.
Has to be created beforehand.
:param include_words: Whether to include the full text for each page.
This performs a full OCR operation on the server and will increase response time.
Expand Down Expand Up @@ -133,7 +132,7 @@ def enqueue(
The response object will be instantiated based on this parameter.
:param input_source: The document/source file to use.
Has to be be created beforehand.
Has to be created beforehand.
:param include_words: Whether to include the full text for each page.
This performs a full OCR operation on the server and will increase response time.
Expand Down Expand Up @@ -166,7 +165,12 @@ def enqueue(
page_options.page_indexes,
)
return self._predict_async(
product_class, input_source, include_words, close_file, cropper, endpoint
product_class,
input_source,
endpoint,
include_words,
close_file,
cropper,
)

def parse_queued(
Expand All @@ -191,16 +195,21 @@ def parse_queued(
return self._get_queued_document(product_class, endpoint, queue_id)

def _validate_async_params(
self, initial_delay_sec: float, delay_sec: float
self, initial_delay_sec: float, delay_sec: float, max_retries: int
) -> None:
if delay_sec < 2:
min_delay = 1
min_initial_delay = 2
min_retries = 2
if delay_sec < min_delay:
raise MindeeClientError(
"Cannot set auto-parsing delay to less than 2 seconds."
f"Cannot set auto-parsing delay to less than {min_delay} seconds."
)
if initial_delay_sec < 4:
if initial_delay_sec < min_initial_delay:
raise MindeeClientError(
"Cannot set initial parsing delay to less than 4 seconds."
f"Cannot set initial parsing delay to less than {min_initial_delay} seconds."
)
if max_retries < min_retries:
raise MindeeClientError(f"Cannot set retries to less than {min_retries}.")

def enqueue_and_parse(
self,
Expand All @@ -222,7 +231,7 @@ def enqueue_and_parse(
The response object will be instantiated based on this parameter.
:param input_source: The document/source file to use.
Has to be be created beforehand.
Has to be created beforehand.
:param include_words: Whether to include the full text for each page.
This performs a full OCR operation on the server and will increase response time.
Expand All @@ -246,9 +255,8 @@ def enqueue_and_parse(
This should not be shorter than 2 seconds.
:param max_retries: Total amount of polling attempts.
"""
self._validate_async_params(initial_delay_sec, delay_sec)
self._validate_async_params(initial_delay_sec, delay_sec, max_retries)
if not endpoint:
endpoint = self._initialize_ots_endpoint(product_class)
queue_result = self.enqueue(
Expand Down Expand Up @@ -345,16 +353,12 @@ def _predict_async(
self,
product_class: Type[Inference],
input_source: Union[LocalInputSource, UrlInputSource],
endpoint: Optional[Endpoint] = None,
include_words: bool = False,
close_file: bool = True,
cropper: bool = False,
endpoint: Optional[Endpoint] = None,
) -> AsyncPredictResponse:
"""
Sends a document to the queue, and sends back an asynchronous predict response.
:param doc_config: Configuration of the document.
"""
"""Sends a document to the queue, and sends back an asynchronous predict response."""
if input_source is None:
raise MindeeClientError("No input document provided")
if not endpoint:
Expand Down Expand Up @@ -384,7 +388,6 @@ def _get_queued_document(
Fetches a document or a Job from a given queue.
:param queue_id: Queue_id received from the API
:param doc_config: Pre-checked document configuration.
"""
queue_response = endpoint.document_queue_req_get(queue_id=queue_id)

Expand Down Expand Up @@ -436,8 +439,6 @@ def create_endpoint(
:param account_name: Your organization's username on the API Builder
:param version: If set, locks the version of the model to use.
If not set, use the latest version of the model.
:param product_class: A document class in which the response will be extracted.
Must inherit from ``mindee.product.base.Document``.
"""
if len(endpoint_name) == 0:
raise MindeeClientError("Custom endpoint require a valid 'endpoint_name'.")
Expand All @@ -456,6 +457,8 @@ def source_from_path(
Load a document from an absolute path, as a string.
:param input_path: Path of file to open
:param fix_pdf: Whether to attempt fixing PDF files before sending.
Setting this to `True` can modify the data sent to Mindee.
"""
input_doc = PathInput(input_path)
if fix_pdf:
Expand All @@ -469,6 +472,8 @@ def source_from_file(
Load a document from a normal Python file object/handle.
:param input_file: Input file handle
:param fix_pdf: Whether to attempt fixing PDF files before sending.
Setting this to `True` can modify the data sent to Mindee.
"""
input_doc = FileInput(input_file)
if fix_pdf:
Expand All @@ -483,6 +488,8 @@ def source_from_b64string(
:param input_string: Input to parse as base64 string
:param filename: The name of the file (without the path)
:param fix_pdf: Whether to attempt fixing PDF files before sending.
Setting this to `True` can modify the data sent to Mindee.
"""
input_doc = Base64Input(input_string, filename)
if fix_pdf:
Expand All @@ -497,6 +504,8 @@ def source_from_bytes(
:param input_bytes: Raw byte input
:param filename: The name of the file (without the path)
:param fix_pdf: Whether to attempt fixing PDF files before sending.
Setting this to `True` can modify the data sent to Mindee.
"""
input_doc = BytesInput(input_bytes, filename)
if fix_pdf:
Expand All @@ -508,7 +517,7 @@ def source_from_url(
url: str,
) -> UrlInputSource:
"""
Load a document from an URL.
Load a document from a URL.
:param url: Raw byte input
"""
Expand Down

0 comments on commit 1184112

Please sign in to comment.