Skip to content

Commit

Permalink
feat: implement document analysis tool
Browse files Browse the repository at this point in the history
  • Loading branch information
hugohonda committed Dec 11, 2024
1 parent 63eab86 commit dc23047
Show file tree
Hide file tree
Showing 3 changed files with 39 additions and 0 deletions.
1 change: 1 addition & 0 deletions tests/integ/test_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
countgd_example_based_counting,
depth_anything_v2,
detr_segmentation,
document_analysis,
florence2_ocr,
florence2_phrase_grounding,
florence2_phrase_grounding_video,
Expand Down
1 change: 1 addition & 0 deletions vision_agent/tools/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
countgd_example_based_counting,
depth_anything_v2,
detr_segmentation,
document_analysis,
extract_frames_and_timestamps,
florence2_ocr,
florence2_phrase_grounding,
Expand Down
37 changes: 37 additions & 0 deletions vision_agent/tools/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -1878,6 +1878,43 @@ def closest_box_distance(
return cast(float, np.sqrt(horizontal_distance**2 + vertical_distance**2))


def document_analysis(image: np.ndarray) -> Dict[str, Any]:
"""'document_analysis' is a tool that can analyze a document image to extract
text, tables, and other information. It returns a dictionary containing the
extracted text, tables, and other information.
Parameters:
image (np.ndarray): The document image to analyze
Returns:
Dict[str, Any]: A dictionary containing the extracted text, tables, and other
information.
Example
-------
>>> document_analysis(image)
{}
"""

image_file = numpy_to_bytes(image)

files = [("image", image_file)]

payload = {
"model": "document-analysis",
}

response: dict[str, Any] = send_inference_request(
payload=payload,
endpoint_name="document-analysis",
files=files,
v2=True,
metadata_payload={"function_name": "document_analysis"},
)

return response


# Utility and visualization functions


Expand Down

0 comments on commit dc23047

Please sign in to comment.