From f7d746f2b0314780ffe826e30bf5353260cafa5b Mon Sep 17 00:00:00 2001 From: Oskar Hollmann Date: Wed, 11 Dec 2024 12:07:21 +0100 Subject: [PATCH] chore: Extract URL domain logic into a reusable module --- rossum_api/api_client.py | 24 ++++++++------ rossum_api/domain_logic/urls.py | 36 +++++++++++++++++++++ rossum_api/elis_api_client.py | 3 +- rossum_api/elis_api_client_sync.py | 3 +- tests/domain_logic/test_urls.py | 52 ++++++++++++++++++++++++++++++ 5 files changed, 107 insertions(+), 11 deletions(-) create mode 100644 rossum_api/domain_logic/urls.py create mode 100644 tests/domain_logic/test_urls.py diff --git a/rossum_api/api_client.py b/rossum_api/api_client.py index 2640c97..31dbc0a 100644 --- a/rossum_api/api_client.py +++ b/rossum_api/api_client.py @@ -10,6 +10,15 @@ import httpx import tenacity +from rossum_api.domain_logic.urls import ( + DEFAULT_BASE_URL, + build_export_url, + build_full_login_url, + build_upload_url, + parse_annotation_id_from_datapoint_url, + parse_resource_id_from_url, +) + if typing.TYPE_CHECKING: from typing import Any, AsyncIterator, Dict, List, Optional, Sequence, Tuple, Union @@ -100,7 +109,7 @@ def __init__( username: Optional[str] = None, password: Optional[str] = None, token: Optional[str] = None, - base_url: Optional[str] = "https://elis.rossum.ai/api/v1", + base_url: str = DEFAULT_BASE_URL, timeout: Optional[float] = None, n_retries: int = 3, retry_backoff_factor: float = 1.0, @@ -272,9 +281,7 @@ def _embed_sideloads( # Datapoints from all annotations are present in content, we have to construct # content (list of datapoints) for each annotation def annotation_id(datapoint): - return int( - datapoint["url"].replace(f"/content/{datapoint['id']}", "").split("/")[-1] - ) + return parse_annotation_id_from_datapoint_url(datapoint["url"]) sideloads_by_id[sideload_group] = { k: list(v) @@ -292,7 +299,7 @@ def annotation_id(datapoint): url = result[sideload_name] if url is None: continue - sideload_id = int(url.replace("/content", "").split("/")[-1]) + sideload_id = parse_resource_id_from_url(url) result[sideload_name] = sideloads_by_id[sideload_group].get( sideload_id, [] @@ -338,7 +345,6 @@ async def upload( may be used to initialize values of the object created from the uploaded file, semantics is different for each resource """ - url = f"{resource.value}/{id_}/upload" files = {"content": (filename, await fp.read(), "application/octet-stream")} # Filename of values and metadata must be "", otherwise Elis API returns HTTP 400 with body @@ -347,7 +353,7 @@ async def upload( files["values"] = ("", json.dumps(values).encode("utf-8"), "application/json") if metadata is not None: files["metadata"] = ("", json.dumps(metadata).encode("utf-8"), "application/json") - return await self.request_json("POST", url, files=files) + return await self.request_json("POST", build_upload_url(resource, id_), files=files) async def export( self, @@ -363,7 +369,7 @@ async def export( query_params = {**query_params, **filters} if columns: query_params["columns"] = ",".join(columns) - url = f"{resource.value}/{id_}/export" + url = build_export_url(resource, id_) # to_status parameter is valid only in POST requests, we can use GET in all other cases method = "POST" if "to_status" in filters else "GET" if export_format == "json": @@ -403,7 +409,7 @@ async def _authenticate(self) -> None: async for attempt in self._retrying(): with attempt: response = await self.client.post( - f"{self.base_url}/auth/login", + build_full_login_url(self.base_url), data={"username": self.username, "password": self.password}, ) await self._raise_for_status(response) diff --git a/rossum_api/domain_logic/urls.py b/rossum_api/domain_logic/urls.py new file mode 100644 index 0000000..7605cd3 --- /dev/null +++ b/rossum_api/domain_logic/urls.py @@ -0,0 +1,36 @@ +from __future__ import annotations + +import re +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from rossum_api.models import Resource + +DEFAULT_BASE_URL = "https://elis.rossum.ai/api/v1" + + +def parse_resource_id_from_url(url: str) -> int: + # Annotation content resource is special, we need to stirp /content suffix + return int(url.replace("/content", "").split("/")[-1]) + + +def parse_annotation_id_from_datapoint_url(url: str) -> int: + # URL format: .../annotation//content/ + # Remove the /content/ from the URL and then pass it to the generic function. + return parse_resource_id_from_url(re.sub(r"/content/.*", "", url)) + + +def build_url(resource: Resource, id_: int) -> str: + return f"{resource.value}/{id_}" + + +def build_export_url(resource: Resource, id_: int) -> str: + return f"{build_url(resource, id_)}/export" + + +def build_upload_url(resource: Resource, id_: int) -> str: + return f"{build_url(resource, id_)}/upload" + + +def build_full_login_url(base_url: str) -> str: + return f"{base_url}/auth/login" diff --git a/rossum_api/elis_api_client.py b/rossum_api/elis_api_client.py index 89b011d..852f469 100644 --- a/rossum_api/elis_api_client.py +++ b/rossum_api/elis_api_client.py @@ -9,6 +9,7 @@ from rossum_api.api_client import APIClient from rossum_api.domain_logic.resources import Resource +from rossum_api.domain_logic.urls import DEFAULT_BASE_URL from rossum_api.models import deserialize_default from rossum_api.models.task import TaskStatus @@ -52,7 +53,7 @@ def __init__( username: Optional[str] = None, password: Optional[str] = None, token: Optional[str] = None, - base_url: Optional[str] = None, + base_url: str = DEFAULT_BASE_URL, http_client: Optional[APIClient] = None, deserializer: Optional[Deserializer] = None, ): diff --git a/rossum_api/elis_api_client_sync.py b/rossum_api/elis_api_client_sync.py index 888a1ec..0bdcda4 100644 --- a/rossum_api/elis_api_client_sync.py +++ b/rossum_api/elis_api_client_sync.py @@ -6,6 +6,7 @@ from queue import Queue as ThreadSafeQueue from rossum_api import ElisAPIClient +from rossum_api.domain_logic.urls import DEFAULT_BASE_URL if typing.TYPE_CHECKING: import pathlib @@ -61,7 +62,7 @@ def __init__( username: Optional[str] = None, password: Optional[str] = None, token: Optional[str] = None, - base_url: Optional[str] = None, + base_url: str = DEFAULT_BASE_URL, http_client: Optional[APIClient] = None, deserializer: Optional[Deserializer] = None, ): diff --git a/tests/domain_logic/test_urls.py b/tests/domain_logic/test_urls.py new file mode 100644 index 0000000..f0e2198 --- /dev/null +++ b/tests/domain_logic/test_urls.py @@ -0,0 +1,52 @@ +from __future__ import annotations + +import pytest + +from rossum_api.domain_logic.resources import Resource +from rossum_api.domain_logic.urls import ( + build_export_url, + build_full_login_url, + build_upload_url, + build_url, + parse_annotation_id_from_datapoint_url, + parse_resource_id_from_url, +) + + +@pytest.mark.parametrize( + "url, expected_id", + [ + ("https://elis.rossum.ai/api/v1/queues/8199", 8199), + ("https://elis.rossum.ai/api/v1/annotations/314521/content", 314521), + ], +) +def test_parse_resource_id_from_url(url, expected_id): + assert parse_resource_id_from_url(url) == expected_id + + +def test_parse_annotation_id_from_datapoint_url(): + assert ( + parse_annotation_id_from_datapoint_url( + "https://elis.rossum.ai/api/v1/annotations/314521/content/1123123" + ) + == 314521 + ) + + +def test_build_url(): + assert build_url(Resource.Queue, 123) == "queues/123" + + +def test_build_full_login_url(): + assert ( + build_full_login_url("https://elis.rossum.ai/api/v1") + == "https://elis.rossum.ai/api/v1/auth/login" + ) + + +def test_build_upload_url(): + assert build_upload_url(Resource.Queue, 123) == "queues/123/upload" + + +def test_build_export_url(): + assert build_export_url(Resource.Queue, 123) == "queues/123/export"