Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

✨ add support for fetch and feedback #177

Merged
merged 14 commits into from
Oct 19, 2023
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
# Mindee Python API Library Changelog

## v3.13.2 - 2023-10-18
### Changes
* :arrow_up: update `urllib` & `pillow` dependencies


## v3.13.1 - 2023-10-03
### Changes
* :arrow_up: update `urllib`, `charset-normalizer` & `package` dependencies
Expand Down
9 changes: 9 additions & 0 deletions docs/parsing/common.rst
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,15 @@ Job
:members:


Miscellaneous Parsing
=====================

FeedbackResponse
----------------
.. autoclass:: mindee.parsing.common.feedback_response.FeedbackResponse
:members:


OCR Extraction
==============

Expand Down
1 change: 1 addition & 0 deletions mindee/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,5 @@
from mindee.client import Client, PageOptions
from mindee.parsing.common.api_response import ApiResponse
from mindee.parsing.common.async_predict_response import AsyncPredictResponse, Job
from mindee.parsing.common.feedback_response import FeedbackResponse
from mindee.parsing.common.predict_response import PredictResponse
186 changes: 119 additions & 67 deletions mindee/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,10 @@
from mindee.input.sources import LocalInputSource, UrlInputSource
from mindee.parsing.common.async_predict_response import AsyncPredictResponse
from mindee.parsing.common.document import Document, serialize_for_json
from mindee.parsing.common.feedback_response import FeedbackResponse
from mindee.parsing.common.inference import Inference, TypeInference
from mindee.parsing.common.predict_response import PredictResponse
from mindee.parsing.common.string_dict import StringDict


@dataclass
Expand Down Expand Up @@ -142,43 +144,59 @@ class MindeeParser:
input_doc: Union[LocalInputSource, UrlInputSource]
"""Document to be parsed."""
product_class: Type[Inference]
"""Product to parse"""
"""Product to parse."""
feedback: Optional[StringDict]
"""Dict representation of a feedback."""

def __init__(
self,
parser: Optional[ArgumentParser] = None,
parsed_args: Optional[Namespace] = None,
client: Optional[Client] = None,
input_doc: Optional[Union[LocalInputSource, UrlInputSource]] = None,
document_info: Optional[CommandConfig] = None,
) -> None:
self.parser = parser if parser else ArgumentParser(description="Mindee_API")
self.parsed_args = parsed_args if parsed_args else self._set_args()
self.client = client if client else Client(api_key=self.parsed_args.api_key)
# if self.parsed_args.parse_type == "parse":
self.input_doc = input_doc if input_doc else self._get_input_doc()
self.client = (
client
if client
else Client(
api_key=self.parsed_args.api_key
if "api_key" in self.parsed_args
else None
)
)
self._set_input()
self.document_info = (
document_info if document_info else DOCUMENTS[self.parsed_args.product_name]
)

def call_endpoint(self) -> None:
"""Calls the proper type of endpoint according to given command."""
# if self.parsed_args.parse_type == "parse":
self.call_parse()

# else:
# self.call_fetch()

# def call_fetch(self) -> None:
# """Fetches an API's for a previously enqueued document."""
# response: AsyncPredictResponse = self._parse_queued()
# if self.parsed_args.output_type == "raw":
# print(response.raw_http)
# else:
# if not hasattr(response, "document") or response.document is None:
# print(response.job)
# else:
# print(response.document)
if self.parsed_args.parse_type == "parse":
self.call_parse()
else:
self.call_feedback()

def call_feedback(self) -> None:
"""Sends feedback to an API."""
custom_endpoint: Optional[Endpoint] = None
if self.parsed_args.product_name == "custom":
custom_endpoint = self.client.create_endpoint(
self.parsed_args.endpoint_name,
self.parsed_args.account_name,
self.parsed_args.api_version,
)
if self.feedback is None:
raise RuntimeError("Invalid feedback provided.")

response: FeedbackResponse = self.client.send_feedback(
self.document_info.doc_class,
self.parsed_args.document_id,
{"feedback": self.feedback},
custom_endpoint,
)
print(json.dumps(response.feedback, indent=2))

def call_parse(self) -> None:
"""Calls an endpoint with the appropriate method, and displays the results."""
Expand Down Expand Up @@ -251,51 +269,27 @@ def _parse_async(self) -> AsyncPredictResponse:
endpoint=custom_endpoint,
)

# def _parse_queued(self) -> AsyncPredictResponse:
# """Fetches a queue's result from a document's id."""
# custom_endpoint: Optional[Endpoint] = None
# if self.parsed_args.product_name == "custom":
# self.client.create_endpoint(
# self.parsed_args.endpoint_name,
# self.parsed_args.account_name,
# self.parsed_args.api_version,
# )
# return self.client.parse_queued(
# self.document_info.doc_class,
# self.parsed_args.queue_id,
# custom_endpoint,
# )

def _doc_str(self, output_type: str, doc_response: Document) -> str:
if output_type == "parsed":
return json.dumps(doc_response, indent=2, default=serialize_for_json)
return str(doc_response)

def _set_args(self) -> Namespace:
"""Parse command line arguments."""
# call_parser = self.parser.add_subparsers(
# dest="parse_type",
# required=True,
# )
# parse_subparser = call_parser.add_parser("parse")
# fetch_subparser = call_parser.add_parser("fetch")

# parse_product_subparsers = parse_subparser.add_subparsers(
# dest="product_name",
# required=True,
# )
parse_product_subparsers = self.parser.add_subparsers(
dest="product_name",
required=True,
)

# fetch_product_subparsers = fetch_subparser.add_subparsers(
# dest="product_name",
# required=True,
# )

for name, info in DOCUMENTS.items():
parse_subp = parse_product_subparsers.add_parser(name, help=info.help)
parse_subparser = parse_product_subparsers.add_parser(name, help=info.help)

call_parser = parse_subparser.add_subparsers(
dest="parse_type", required=True
)
parse_subp = call_parser.add_parser("parse")
feedback_subp = call_parser.add_parser("feedback")

self._add_main_options(parse_subp)
self._add_sending_options(parse_subp)
self._add_display_options(parse_subp)
Expand All @@ -321,16 +315,18 @@ def _set_args(self) -> Namespace:
default=False,
)

# if info.is_async:
# fetch_subp = fetch_product_subparsers.add_parser(name, help=info.help)
# self._add_main_options(fetch_subp)
# self._add_display_options(fetch_subp)
# self._add_fetch_options(fetch_subp)
self._add_main_options(feedback_subp)
self._add_feedback_options(feedback_subp)

parsed_args = self.parser.parse_args()
return parsed_args

def _add_main_options(self, parser: ArgumentParser) -> None:
"""
Adds main options for most parsings.

:param parser: current parser.
"""
parser.add_argument(
"-k",
"--key",
Expand All @@ -341,7 +337,11 @@ def _add_main_options(self, parser: ArgumentParser) -> None:
)

def _add_display_options(self, parser: ArgumentParser) -> None:
"""Adds options related to output/display of a document (parse, parse-queued)."""
"""
Adds options related to output/display of a document (parse, parse-queued).

:param parser: current parser.
"""
parser.add_argument(
"-o",
"--output-type",
Expand All @@ -355,7 +355,11 @@ def _add_display_options(self, parser: ArgumentParser) -> None:
)

def _add_sending_options(self, parser: ArgumentParser) -> None:
"""Adds options for sending requests (parse, enqueue)."""
"""
Adds options for sending requests (parse, enqueue).

:param parser: current parser.
"""
parser.add_argument(
"-i",
"--input-type",
Expand Down Expand Up @@ -386,15 +390,29 @@ def _add_sending_options(self, parser: ArgumentParser) -> None:
)
parser.add_argument(dest="path", help="Full path to the file")

# def _add_fetch_options(self, parser: ArgumentParser):
# """Adds an option to provide the queue ID for an async document."""
# parser.add_argument(
# dest="queue_id",
# help="Async queue ID for a document (required)",
# )
def _add_feedback_options(self, parser: ArgumentParser):
"""
Adds the option to give feedback manually.

:param parser: current parser.
"""
parser.add_argument(
dest="document_id",
help="Mindee UUID of the document.",
type=str,
)
parser.add_argument(
dest="feedback",
type=json.loads,
help='Feedback JSON string to send, ex \'{"key": "value"}\'.',
)

def _add_custom_options(self, parser: ArgumentParser):
"""Adds options to custom-type documents."""
"""
Adds options to custom-type documents.

:param parser: current parser.
"""
parser.add_argument(
"-a",
"--account",
Expand All @@ -418,6 +436,7 @@ def _add_custom_options(self, parser: ArgumentParser):
)

def _get_input_doc(self) -> Union[LocalInputSource, UrlInputSource]:
"""Loads an input document."""
if self.parsed_args.input_type == "file":
with open(self.parsed_args.path, "rb", buffering=30) as file_handle:
return self.client.source_from_file(file_handle)
Expand All @@ -435,6 +454,39 @@ def _get_input_doc(self) -> Union[LocalInputSource, UrlInputSource]:
return self.client.source_from_url(self.parsed_args.path)
return self.client.source_from_path(self.parsed_args.path)

def _get_feedback_doc(self) -> StringDict:
"""Loads a feedback."""
json_doc: StringDict = {}
if self.parsed_args.input_type == "file":
with open(self.parsed_args.path, "rb", buffering=30) as f_f:
json_doc = json.loads(f_f.read())
elif self.parsed_args.input_type == "base64":
with open(self.parsed_args.path, "rt", encoding="ascii") as f_b64:
json_doc = json.loads(f_b64.read())
elif self.parsed_args.input_type == "bytes":
with open(self.parsed_args.path, "rb") as f_b:
json_doc = json.loads(f_b.read())
else:
if (
not self.parsed_args.feedback
or not "feedback" in self.parsed_args.feedback
):
raise RuntimeError("Invalid feedback.")
if not json_doc or "feedback" not in json_doc:
raise RuntimeError("Invalid feedback.")
return json_doc

def _set_input(self) -> None:
"""Loads an input document, or a feedback document."""
self.feedback = None
if self.parsed_args.parse_type == "feedback":
if not self.parsed_args.feedback:
self.feedback = self._get_feedback_doc()
else:
self.feedback = self.parsed_args.feedback
else:
self.input_doc = self._get_input_doc()


def main() -> None:
"""Run the Command Line Interface."""
Expand Down
38 changes: 36 additions & 2 deletions mindee/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,10 @@
from mindee.mindee_http.error import handle_error
from mindee.mindee_http.mindee_api import MindeeApi
from mindee.parsing.common.async_predict_response import AsyncPredictResponse
from mindee.parsing.common.feedback_response import FeedbackResponse
from mindee.parsing.common.inference import Inference, TypeInference
from mindee.parsing.common.predict_response import PredictResponse
from mindee.parsing.common.string_dict import StringDict

OTS_OWNER = "mindee"

Expand Down Expand Up @@ -175,7 +177,7 @@ def parse_queued(

:param product_class: The document class to use.
The response object will be instantiated based on this parameter.
:param queue_id: queue_id received from the API
:param queue_id: queue_id received from the API.
:param endpoint: For custom endpoints, an endpoint has to be given.
"""
if not endpoint:
Expand Down Expand Up @@ -275,6 +277,38 @@ def enqueue_and_parse(

return poll_results

def send_feedback(
self,
product_class: Type[Inference],
document_id: str,
feedback: StringDict,
endpoint: Optional[Endpoint] = None,
) -> FeedbackResponse:
"""
Send a feedback for a document.

:param product_class: The document class to use.
The response object will be instantiated based on this parameter.

:param document_id: The id of the document to send feedback to.
:param feedback: Feedback to send.
:param endpoint: For custom endpoints, an endpoint has to be given.
"""
if not document_id or len(document_id) == 0:
raise RuntimeError("Invalid document_id.")
if not endpoint:
endpoint = self._initialize_ots_endpoint(product_class)

feedback_response = endpoint.document_feedback_req_put(document_id, feedback)
if not feedback_response.ok:
raise handle_error(
str(product_class.endpoint_name),
feedback_response.json(),
feedback_response.status_code,
)

return FeedbackResponse(feedback_response.json())

def _make_request(
self,
product_class: Type[Inference],
Expand Down Expand Up @@ -360,7 +394,7 @@ def _get_queued_document(

return AsyncPredictResponse(product_class, queue_response.json())

def _initialize_ots_endpoint(self, product_class) -> Endpoint:
def _initialize_ots_endpoint(self, product_class: Type[Inference]) -> Endpoint:
if product_class.__name__ == "CustomV1":
raise TypeError("Missing endpoint specifications for custom build.")
endpoint_info: Dict[str, str] = product_class.get_endpoint_info(product_class)
Expand Down
Loading