Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Improve export helpers method, enrich exception inf #94

Merged
merged 2 commits into from
Jan 20, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 11 additions & 7 deletions rossum_api/api_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,14 @@


class APIClientError(Exception):
def __init__(self, status_code, error):
def __init__(self, method, url, status_code, error):
self.method = method
self.url = url
self.status_code = status_code
self.error = error

def __str__(self):
return f"HTTP {self.status_code}, content: {self.error}"
return f"[{self.method}] {self.url} - HTTP {self.status_code} - {self.error}"


def authenticate_if_needed(method):
Expand Down Expand Up @@ -375,7 +377,7 @@ async def _authenticate(self) -> None:
build_full_login_url(self.base_url),
data={"username": self.username, "password": self.password},
)
await self._raise_for_status(response)
await self._raise_for_status(response, "POST")
self.token = response.json()["key"]

def _retrying(self):
Expand Down Expand Up @@ -415,7 +417,7 @@ async def _request(self, method: str, url: str, *args, **kwargs) -> httpx.Respon
async for attempt in self._retrying():
with attempt:
response = await self.client.request(method, url, headers=headers, *args, **kwargs)
await self._raise_for_status(response)
await self._raise_for_status(response, method)
return response

@authenticate_generator_if_needed
Expand All @@ -427,11 +429,11 @@ async def _stream(self, method: str, url: str, *args, **kwargs) -> AsyncIterator
headers = kwargs.pop("headers", {})
headers["Authorization"] = f"token {self.token}"
async with self.client.stream(method, url, headers=headers, *args, **kwargs) as response:
await self._raise_for_status(response)
await self._raise_for_status(response, method)
async for chunk in response.aiter_bytes():
yield chunk

async def _raise_for_status(self, response: httpx.Response):
async def _raise_for_status(self, response: httpx.Response, method: str) -> None:
"""Raise an exception in case of HTTP error.

Re-pack to our own exception class to shield users from the fact that we're using
Expand All @@ -441,4 +443,6 @@ async def _raise_for_status(self, response: httpx.Response):
response.raise_for_status()
except httpx.HTTPStatusError as e:
content = response.content if response.stream is None else await response.aread()
raise APIClientError(response.status_code, content.decode("utf-8")) from e
raise APIClientError(
method, response.url, response.status_code, content.decode("utf-8")
) from e
12 changes: 8 additions & 4 deletions rossum_api/elis_api_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,23 +204,27 @@ async def retrieve_upload(self, upload_id: int) -> Upload:
upload = await self._http_client.fetch_one(Resource.Upload, upload_id)
return self._deserializer(Resource.Upload, upload)

async def export_annotations_to_json(self, queue_id: int) -> AsyncIterator[Annotation]:
async def export_annotations_to_json(
self, queue_id: int, **filters: Any
) -> AsyncIterator[Annotation]:
"""https://elis.rossum.ai/api/docs/#export-annotations.

JSON export is paginated and returns the result in a way similar to other list_all methods.
"""
async for chunk in self._http_client.export(Resource.Queue, queue_id, "json"):
async for chunk in self._http_client.export(Resource.Queue, queue_id, "json", **filters):
# JSON export can be translated directly to Annotation object
yield self._deserializer(Resource.Annotation, typing.cast(typing.Dict, chunk))

async def export_annotations_to_file(
self, queue_id: int, export_format: ExportFileFormats
self, queue_id: int, export_format: ExportFileFormats, **filters: Any
) -> AsyncIterator[bytes]:
"""https://elis.rossum.ai/api/docs/#export-annotations.

XLSX/CSV/XML exports can be huge, therefore byte streaming is used to keep memory consumption low.
"""
async for chunk in self._http_client.export(Resource.Queue, queue_id, str(export_format)):
async for chunk in self._http_client.export(
Resource.Queue, queue_id, str(export_format), **filters
):
yield typing.cast(bytes, chunk)

# ##### ORGANIZATIONS #####
Expand Down
10 changes: 6 additions & 4 deletions rossum_api/elis_api_client_sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,22 +207,24 @@ def retrieve_upload(self, upload_id: int) -> Upload:

return self._run_coroutine(self.elis_api_client.retrieve_upload(upload_id))

def export_annotations_to_json(self, queue_id: int) -> Iterator[Annotation]:
def export_annotations_to_json(self, queue_id: int, **filters: Any) -> Iterator[Annotation]:
"""https://elis.rossum.ai/api/docs/#export-annotations.

JSON export is paginated and returns the result in a way similar to other list_all methods.
"""
return self._iter_over_async(self.elis_api_client.export_annotations_to_json(queue_id))
return self._iter_over_async(
self.elis_api_client.export_annotations_to_json(queue_id, **filters)
)

def export_annotations_to_file(
self, queue_id: int, export_format: ExportFileFormats
self, queue_id: int, export_format: ExportFileFormats, **filters: Any
) -> Iterator[bytes]:
"""https://elis.rossum.ai/api/docs/#export-annotations.

XLSX/CSV/XML exports can be huge, therefore byte streaming is used to keep memory consumption low.
"""
return self._iter_over_async(
self.elis_api_client.export_annotations_to_file(queue_id, export_format)
self.elis_api_client.export_annotations_to_file(queue_id, export_format, **filters)
)

# ##### ORGANIZATIONS #####
Expand Down
10 changes: 8 additions & 2 deletions tests/test_api_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -699,7 +699,10 @@ async def test_request_repacks_exception(client, httpx_mock):
)
with pytest.raises(APIClientError) as err:
await client._request("GET", "workspaces/123")
assert str(err.value) == 'HTTP 404, content: {"detail":"Not found."}'
assert str(err.value) == (
"[GET] https://elis.rossum.ai/api/v1/workspaces/123 - "
'HTTP 404 - {"detail":"Not found."}'
)


@pytest.mark.asyncio
Expand All @@ -713,7 +716,10 @@ async def test_stream_repacks_exception(client, httpx_mock):
with pytest.raises(APIClientError) as err:
async for _w in client._stream("GET", "queues/123/export?format=csv&exported_at=invalid"):
pass
assert str(err.value) == "HTTP 404, content: exported_at: Enter a valid date/time"
assert str(err.value) == (
"[GET] https://elis.rossum.ai/api/v1/queues/123/export?format=csv&exported_at=invalid "
"- HTTP 404 - exported_at: Enter a valid date/time"
)


@pytest.mark.asyncio
Expand Down
Loading