lsst-sqre · rra · Nov 25, 2024 · Nov 22, 2024 · Nov 25, 2024
diff --git a/changelog.d/20241122_150037_rra_DM_47769a.md b/changelog.d/20241122_150037_rra_DM_47769a.md
@@ -0,0 +1,3 @@
+### New features
+
+- Add new `safir.models.LinkData` model that parses the contents of an HTTP `Link` header and extracts pagination information.
diff --git a/docs/user-guide/database/pagination.rst b/docs/user-guide/database/pagination.rst
@@ -244,3 +244,23 @@ This follows the `HATEOS <https://en.wikipedia.org/wiki/HATEOAS>`__ design princ
 
 In this case, the application should call the `~safir.database.PaginatedList.first_url`, `~safir.database.PaginatedList.next_url`, and `~safir.database.PaginatedList.prev_url` methods with the current URL (generally ``request.url``) as an argument to retrieve the links to the first, next, and previous blocks of results.
 Those links can then be embedded in the response model wherever is appropriate for the API of that application.
+
+Parsing paginated query responses
+=================================
+
+Safir provides `~safir.models.LinkData` to parse the contents of an :rfc:`8288` ``Link`` header and extract pagination links from it.
+This may be useful in clients of paginated query results, including tests of services that use the above approach to paginated queries.
+
+.. code-block:: python
+
+   from safir.models import LinkData
+
+
+   r = client.get("/some/url", query={"limit": 100})
+   links = LinkData.from_header(r.headers["Link"])
+   next_url = links.next_url
+   prev_url = links.prev_url
+   first_url = links.first_url
+
+Currently, only the first, next, and previous URLs are extracted from the ``Link`` header.
+If any of these URLs are not present, the corresponding attribute of `~safir.models.LinkData` will be `None`.
diff --git a/safir/src/safir/models/__init__.py b/safir/src/safir/models/__init__.py
@@ -8,9 +8,11 @@
 """
 
 from ._errors import ErrorDetail, ErrorLocation, ErrorModel
+from ._link import LinkData
 
 __all__ = [
     "ErrorDetail",
     "ErrorLocation",
     "ErrorModel",
+    "LinkData",
 ]
diff --git a/safir/src/safir/models/_link.py b/safir/src/safir/models/_link.py
@@ -0,0 +1,55 @@
+"""Representation for a ``Link`` HTTP header."""
+
+from __future__ import annotations
+
+import re
+from dataclasses import dataclass
+from typing import Self
+
+__all__ = ["LinkData"]
+
+_LINK_REGEX = re.compile(r'\s*<(?P<target>[^>]+)>;\s*rel="(?P<type>[^"]+)"')
+"""Matches a component of a valid ``Link`` header."""
+
+
+@dataclass
+class LinkData:
+    """Holds the data returned in an :rfc:`8288` ``Link`` header."""
+
+    prev_url: str | None
+    """URL of the previous page, or `None` for the first page."""
+
+    next_url: str | None
+    """URL of the next page, or `None` for the last page."""
+
+    first_url: str | None
+    """URL of the first page."""
+
+    @classmethod
+    def from_header(cls, header: str | None) -> Self:
+        """Parse an :rfc:`8288` ``Link`` with pagination URLs.
+
+        Parameters
+        ----------
+        header
+            Contents of an RFC 8288 ``Link`` header.
+
+        Returns
+        -------
+        LinkData
+            Parsed form of that header.
+        """
+        links = {}
+        if header:
+            for element in header.split(","):
+                if m := re.match(_LINK_REGEX, element):
+                    if m.group("type") in ("prev", "next", "first"):
+                        links[m.group("type")] = m.group("target")
+                    elif m.group("type") == "previous":
+                        links["prev"] = m.group("target")
+
+        return cls(
+            prev_url=links.get("prev"),
+            next_url=links.get("next"),
+            first_url=links.get("first"),
+        )
diff --git a/safir/tests/models_test.py b/safir/tests/models_test.py
@@ -1,10 +1,10 @@
-"""Tests for safir.models."""
+"""Tests for `safir.models`."""
 
 from __future__ import annotations
 
 import json
 
-from safir.models import ErrorModel
+from safir.models import ErrorModel, LinkData
 
 
 def test_error_model() -> None:
@@ -20,3 +20,44 @@ def test_error_model() -> None:
     }
     model = ErrorModel.model_validate_json(json.dumps(error))
     assert model.model_dump() == error
+
+
+def test_link_data() -> None:
+    header = (
+        '<https://example.com/query>; rel="first", '
+        '<https://example.com/query?cursor=1600000000.5_1>; rel="next"'
+    )
+    link = LinkData.from_header(header)
+    assert not link.prev_url
+    assert link.next_url == "https://example.com/query?cursor=1600000000.5_1"
+    assert link.first_url == "https://example.com/query"
+
+    header = (
+        '<https://example.com/query?limit=10>; rel="first", '
+        '<https://example.com/query?limit=10&cursor=15_2>; rel="next", '
+        '<https://example.com/query?limit=10&cursor=p5_1>; rel="prev"'
+    )
+    link = LinkData.from_header(header)
+    assert link.prev_url == "https://example.com/query?limit=10&cursor=p5_1"
+    assert link.next_url == "https://example.com/query?limit=10&cursor=15_2"
+    assert link.first_url == "https://example.com/query?limit=10"
+
+    header = (
+        '<https://example.com/query>; rel="first", '
+        '<https://example.com/query?cursor=p1510000000_2>; rel="previous"'
+    )
+    link = LinkData.from_header(header)
+    assert link.prev_url == "https://example.com/query?cursor=p1510000000_2"
+    assert not link.next_url
+    assert link.first_url == "https://example.com/query"
+
+    header = '<https://example.com/query?foo=b>; rel="first"'
+    link = LinkData.from_header(header)
+    assert not link.prev_url
+    assert not link.next_url
+    assert link.first_url == "https://example.com/query?foo=b"
+
+    link = LinkData.from_header("")
+    assert not link.prev_url
+    assert not link.next_url
+    assert not link.first_url
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		### New features

		- Add new `safir.models.LinkData` model that parses the contents of an HTTP `Link` header and extracts pagination information.