From 1184112ad24d297dafbb3a68b2fd6971354cda53 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ianar=C3=A9=20S=C3=A9vi?= Date: Thu, 14 Dec 2023 18:47:36 +0100 Subject: [PATCH] :recycle: minor improments to main client --- mindee/client.py | 55 ++++++++++++++++++++++++++++-------------------- 1 file changed, 32 insertions(+), 23 deletions(-) diff --git a/mindee/client.py b/mindee/client.py index 9d8caad3..c85d90bc 100644 --- a/mindee/client.py +++ b/mindee/client.py @@ -32,9 +32,8 @@ def get_bound_classname(type_var) -> str: def _clean_account_name(account_name: str) -> str: """ - Checks that an account name is provided for custom builds, and sets the default one otherwise. + Checks that an account name is provided for custom products, and sets the default one otherwise. - :param product_class: product class to use for API calls. :param account_name: name of the account's holder. Only needed for custom products. """ if not account_name or len(account_name) < 1: @@ -80,7 +79,7 @@ def parse( The response object will be instantiated based on this parameter. :param input_source: The document/source file to use. - Has to be be created beforehand. + Has to be created beforehand. :param include_words: Whether to include the full text for each page. This performs a full OCR operation on the server and will increase response time. @@ -133,7 +132,7 @@ def enqueue( The response object will be instantiated based on this parameter. :param input_source: The document/source file to use. - Has to be be created beforehand. + Has to be created beforehand. :param include_words: Whether to include the full text for each page. This performs a full OCR operation on the server and will increase response time. @@ -166,7 +165,12 @@ def enqueue( page_options.page_indexes, ) return self._predict_async( - product_class, input_source, include_words, close_file, cropper, endpoint + product_class, + input_source, + endpoint, + include_words, + close_file, + cropper, ) def parse_queued( @@ -191,16 +195,21 @@ def parse_queued( return self._get_queued_document(product_class, endpoint, queue_id) def _validate_async_params( - self, initial_delay_sec: float, delay_sec: float + self, initial_delay_sec: float, delay_sec: float, max_retries: int ) -> None: - if delay_sec < 2: + min_delay = 1 + min_initial_delay = 2 + min_retries = 2 + if delay_sec < min_delay: raise MindeeClientError( - "Cannot set auto-parsing delay to less than 2 seconds." + f"Cannot set auto-parsing delay to less than {min_delay} seconds." ) - if initial_delay_sec < 4: + if initial_delay_sec < min_initial_delay: raise MindeeClientError( - "Cannot set initial parsing delay to less than 4 seconds." + f"Cannot set initial parsing delay to less than {min_initial_delay} seconds." ) + if max_retries < min_retries: + raise MindeeClientError(f"Cannot set retries to less than {min_retries}.") def enqueue_and_parse( self, @@ -222,7 +231,7 @@ def enqueue_and_parse( The response object will be instantiated based on this parameter. :param input_source: The document/source file to use. - Has to be be created beforehand. + Has to be created beforehand. :param include_words: Whether to include the full text for each page. This performs a full OCR operation on the server and will increase response time. @@ -246,9 +255,8 @@ def enqueue_and_parse( This should not be shorter than 2 seconds. :param max_retries: Total amount of polling attempts. - """ - self._validate_async_params(initial_delay_sec, delay_sec) + self._validate_async_params(initial_delay_sec, delay_sec, max_retries) if not endpoint: endpoint = self._initialize_ots_endpoint(product_class) queue_result = self.enqueue( @@ -345,16 +353,12 @@ def _predict_async( self, product_class: Type[Inference], input_source: Union[LocalInputSource, UrlInputSource], + endpoint: Optional[Endpoint] = None, include_words: bool = False, close_file: bool = True, cropper: bool = False, - endpoint: Optional[Endpoint] = None, ) -> AsyncPredictResponse: - """ - Sends a document to the queue, and sends back an asynchronous predict response. - - :param doc_config: Configuration of the document. - """ + """Sends a document to the queue, and sends back an asynchronous predict response.""" if input_source is None: raise MindeeClientError("No input document provided") if not endpoint: @@ -384,7 +388,6 @@ def _get_queued_document( Fetches a document or a Job from a given queue. :param queue_id: Queue_id received from the API - :param doc_config: Pre-checked document configuration. """ queue_response = endpoint.document_queue_req_get(queue_id=queue_id) @@ -436,8 +439,6 @@ def create_endpoint( :param account_name: Your organization's username on the API Builder :param version: If set, locks the version of the model to use. If not set, use the latest version of the model. - :param product_class: A document class in which the response will be extracted. - Must inherit from ``mindee.product.base.Document``. """ if len(endpoint_name) == 0: raise MindeeClientError("Custom endpoint require a valid 'endpoint_name'.") @@ -456,6 +457,8 @@ def source_from_path( Load a document from an absolute path, as a string. :param input_path: Path of file to open + :param fix_pdf: Whether to attempt fixing PDF files before sending. + Setting this to `True` can modify the data sent to Mindee. """ input_doc = PathInput(input_path) if fix_pdf: @@ -469,6 +472,8 @@ def source_from_file( Load a document from a normal Python file object/handle. :param input_file: Input file handle + :param fix_pdf: Whether to attempt fixing PDF files before sending. + Setting this to `True` can modify the data sent to Mindee. """ input_doc = FileInput(input_file) if fix_pdf: @@ -483,6 +488,8 @@ def source_from_b64string( :param input_string: Input to parse as base64 string :param filename: The name of the file (without the path) + :param fix_pdf: Whether to attempt fixing PDF files before sending. + Setting this to `True` can modify the data sent to Mindee. """ input_doc = Base64Input(input_string, filename) if fix_pdf: @@ -497,6 +504,8 @@ def source_from_bytes( :param input_bytes: Raw byte input :param filename: The name of the file (without the path) + :param fix_pdf: Whether to attempt fixing PDF files before sending. + Setting this to `True` can modify the data sent to Mindee. """ input_doc = BytesInput(input_bytes, filename) if fix_pdf: @@ -508,7 +517,7 @@ def source_from_url( url: str, ) -> UrlInputSource: """ - Load a document from an URL. + Load a document from a URL. :param url: Raw byte input """