Merge pull request #334 from lsst-sqre/tickets/DM-47769a

DM-47769: Add LinkData model for parsing HTTP Link headers
lsst-sqre · Nov 25, 2024 · 50b38d5 · 50b38d5
2 parents 10c4f5a + 78af1ad
commit 50b38d5
Show file tree

Hide file tree

Showing 6 changed files with 116 additions and 1 deletion.
diff --git a/changelog.d/20241122_150037_rra_DM_47769a.md b/changelog.d/20241122_150037_rra_DM_47769a.md
@@ -0,0 +1,3 @@
+### New features
+
+- Add new `safir.database.PaginatedLinkData` model that parses the contents of an HTTP `Link` header and extracts pagination information.
diff --git a/docs/user-guide/database/pagination.rst b/docs/user-guide/database/pagination.rst
@@ -254,3 +254,23 @@ This follows the `HATEOS <https://en.wikipedia.org/wiki/HATEOAS>`__ design princ
 
 In this case, the application should call the `~safir.database.PaginatedList.first_url`, `~safir.database.PaginatedList.next_url`, and `~safir.database.PaginatedList.prev_url` methods with the current URL (generally ``request.url``) as an argument to retrieve the links to the first, next, and previous blocks of results.
 Those links can then be embedded in the response model wherever is appropriate for the API of that application.
+
+Parsing paginated query responses
+=================================
+
+Safir provides `~safir.database.PaginatedLinkData` to parse the contents of an :rfc:`8288` ``Link`` header and extract pagination links from it.
+This may be useful in clients of paginated query results, including tests of services that use the above approach to paginated queries.
+
+.. code-block:: python
+
+   from safir.database import PaginatedLinkData
+
+
+   r = client.get("/some/url", query={"limit": 100})
+   links = PaginatedLinkData.from_header(r.headers["Link"])
+   next_url = links.next_url
+   prev_url = links.prev_url
+   first_url = links.first_url
+
+Currently, only the first, next, and previous URLs are extracted from the ``Link`` header.
+If any of these URLs are not present, the corresponding attribute of `~safir.database.PaginatedLinkData` will be `None`.
diff --git a/safir/src/safir/database/__init__.py b/safir/src/safir/database/__init__.py
@@ -18,6 +18,7 @@
 )
 from ._pagination import (
     DatetimeIdCursor,
+    PaginatedLinkData,
     PaginatedList,
     PaginatedQueryRunner,
     PaginationCursor,
@@ -29,6 +30,7 @@
     "DatabaseInitializationError",
     "DatetimeIdCursor",
     "PaginationCursor",
+    "PaginatedLinkData",
     "PaginatedList",
     "PaginatedQueryRunner",
     "create_async_session",

diff --git a/safir/src/safir/database/_pagination.py b/safir/src/safir/database/_pagination.py
@@ -6,6 +6,7 @@
 
 from __future__ import annotations
 
+import re
 from abc import ABCMeta, abstractmethod
 from dataclasses import dataclass
 from datetime import UTC, datetime
@@ -20,6 +21,9 @@
 
 from ._datetime import datetime_to_db
 
+_LINK_REGEX = re.compile(r'\s*<(?P<target>[^>]+)>;\s*rel="(?P<type>[^"]+)"')
+"""Matches a component of a valid ``Link`` header."""
+
 C = TypeVar("C", bound="PaginationCursor")
 """Type of a cursor for a paginated list."""
 
@@ -29,11 +33,55 @@
 __all__ = [
     "DatetimeIdCursor",
     "PaginationCursor",
+    "PaginatedLinkData",
     "PaginatedList",
     "PaginatedQueryRunner",
 ]
 
 
+@dataclass
+class PaginatedLinkData:
+    """Holds the data returned in an :rfc:`8288` ``Link`` header."""
+
+    prev_url: str | None
+    """URL of the previous page, or `None` for the first page."""
+
+    next_url: str | None
+    """URL of the next page, or `None` for the last page."""
+
+    first_url: str | None
+    """URL of the first page."""
+
+    @classmethod
+    def from_header(cls, header: str | None) -> Self:
+        """Parse an :rfc:`8288` ``Link`` with pagination URLs.
+
+        Parameters
+        ----------
+        header
+            Contents of an RFC 8288 ``Link`` header.
+
+        Returns
+        -------
+        PaginatedLinkData
+            Parsed form of that header.
+        """
+        links = {}
+        if header:
+            for element in header.split(","):
+                if m := re.match(_LINK_REGEX, element):
+                    if m.group("type") in ("prev", "next", "first"):
+                        links[m.group("type")] = m.group("target")
+                    elif m.group("type") == "previous":
+                        links["prev"] = m.group("target")
+
+        return cls(
+            prev_url=links.get("prev"),
+            next_url=links.get("next"),
+            first_url=links.get("first"),
+        )
+
+
 @dataclass
 class PaginationCursor(Generic[E], metaclass=ABCMeta):
     """Generic pagnination cursor for keyset pagination.

diff --git a/safir/tests/database_test.py b/safir/tests/database_test.py
@@ -27,6 +27,7 @@
 
 from safir.database import (
     DatetimeIdCursor,
+    PaginatedLinkData,
     PaginatedQueryRunner,
     create_async_session,
     create_database_engine,
@@ -508,3 +509,44 @@ async def test_pagination(database_url: str, database_password: str) -> None:
         assert not result.prev_cursor
         base_url = URL("https://example.com/query?foo=b")
         assert result.link_header(base_url) == (f'<{base_url!s}>; rel="first"')
+
+
+def test_link_data() -> None:
+    header = (
+        '<https://example.com/query>; rel="first", '
+        '<https://example.com/query?cursor=1600000000.5_1>; rel="next"'
+    )
+    link = PaginatedLinkData.from_header(header)
+    assert not link.prev_url
+    assert link.next_url == "https://example.com/query?cursor=1600000000.5_1"
+    assert link.first_url == "https://example.com/query"
+
+    header = (
+        '<https://example.com/query?limit=10>; rel="first", '
+        '<https://example.com/query?limit=10&cursor=15_2>; rel="next", '
+        '<https://example.com/query?limit=10&cursor=p5_1>; rel="prev"'
+    )
+    link = PaginatedLinkData.from_header(header)
+    assert link.prev_url == "https://example.com/query?limit=10&cursor=p5_1"
+    assert link.next_url == "https://example.com/query?limit=10&cursor=15_2"
+    assert link.first_url == "https://example.com/query?limit=10"
+
+    header = (
+        '<https://example.com/query>; rel="first", '
+        '<https://example.com/query?cursor=p1510000000_2>; rel="previous"'
+    )
+    link = PaginatedLinkData.from_header(header)
+    assert link.prev_url == "https://example.com/query?cursor=p1510000000_2"
+    assert not link.next_url
+    assert link.first_url == "https://example.com/query"
+
+    header = '<https://example.com/query?foo=b>; rel="first"'
+    link = PaginatedLinkData.from_header(header)
+    assert not link.prev_url
+    assert not link.next_url
+    assert link.first_url == "https://example.com/query?foo=b"
+
+    link = PaginatedLinkData.from_header("")
+    assert not link.prev_url
+    assert not link.next_url
+    assert not link.first_url
diff --git a/safir/tests/models_test.py b/safir/tests/models_test.py
@@ -1,4 +1,4 @@
-"""Tests for safir.models."""
+"""Tests for `safir.models`."""
 
 from __future__ import annotations
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		### New features

		- Add new `safir.database.PaginatedLinkData` model that parses the contents of an HTTP `Link` header and extracts pagination information.