From c7a777c23de4f77eeaa625ae44a8573f7da07750 Mon Sep 17 00:00:00 2001
From: Lawson Lewis <lawson@kurrawong.ai>
Date: Mon, 29 Jul 2024 14:17:44 +1000
Subject: [PATCH 1/2] compute better filenames for returned rdf

As per issue https://github.com/idn-au/catalogue-data/issues/35 The
returned RDF files did not have appropriate filenames.
---
 prez/renderers/renderer.py | 52 +++++++++++++++++---------------------
 1 file changed, 23 insertions(+), 29 deletions(-)

diff --git a/prez/renderers/renderer.py b/prez/renderers/renderer.py
index 5a4faf00..99562d11 100644
--- a/prez/renderers/renderer.py
+++ b/prez/renderers/renderer.py
@@ -7,20 +7,18 @@
 from fastapi import status
 from fastapi.exceptions import HTTPException
 from fastapi.responses import StreamingResponse
-from rdflib import Graph, URIRef, Namespace, RDF
+from rdflib import RDF, Graph, Namespace, URIRef
 from starlette.requests import Request
 from starlette.responses import Response
 
 from prez.models.profiles_and_mediatypes import ProfilesMediatypesInfo
 from prez.models.profiles_item import ProfileItem
 from prez.renderers.csv_renderer import render_csv_dropdown
-from prez.renderers.json_renderer import render_json_dropdown, NotFoundError
+from prez.renderers.json_renderer import NotFoundError, render_json_dropdown
 from prez.services.curie_functions import get_curie_id_for_uri
 from prez.sparql.methods import Repo
-from prez.sparql.objects_listings import (
-    generate_item_construct,
-    get_annotation_properties,
-)
+from prez.sparql.objects_listings import (generate_item_construct,
+                                          get_annotation_properties)
 
 log = logging.getLogger(__name__)
 
@@ -33,44 +31,42 @@ async def return_from_graph(
     selected_class: URIRef,
     repo: Repo,
 ):
-    profile_headers["Content-Disposition"] = "inline"
+    # set content-disposition
+    profile_headers["Content-Disposition"] = (
+        "attachment;" if str(mediatype) == "text/csv" else "inline;"
+    )
+    iri = graph.value(None, RDF.type, selected_class)
+    if iri:
+        profile_headers[
+            "Content-Disposition"
+        ] += f" filename={get_curie_id_for_uri(URIRef(str(iri)))}"
+    elif selected_class:
+        profile_headers[
+            "Content-Disposition"
+        ] += f" filename={selected_class.split('#')[-1].split('/')[-1]}"
 
     if str(mediatype) in RDF_MEDIATYPES:
         return await return_rdf(graph, mediatype, profile_headers)
-
     elif profile == URIRef("https://w3id.org/profile/dd"):
         graph = await return_annotated_rdf(graph, profile, repo)
-
         try:
             # TODO: Currently, data is generated in memory, instead of in a streaming manner.
             #       Not possible to do a streaming response yet since we are reading the RDF
             #       data into an in-memory graph.
             jsonld_data = await render_json_dropdown(graph, profile, selected_class)
-
             if str(mediatype) == "text/csv":
-                iri = graph.value(None, RDF.type, selected_class)
-                if iri:
-                    filename = get_curie_id_for_uri(URIRef(str(iri)))
-                else:
-                    filename = selected_class.split("#")[-1].split("/")[-1]
                 stream = render_csv_dropdown(jsonld_data["@graph"])
-                response = StreamingResponse(stream, media_type=mediatype)
-                response.headers[
-                    "Content-Disposition"
-                ] = f"attachment;filename={filename}.csv"
-                return response
-
-            # application/json
-            stream = io.StringIO(json.dumps(jsonld_data))
-            return StreamingResponse(stream, media_type=mediatype)
-
+            else:
+                stream = io.StringIO(json.dumps(jsonld_data))
+            return StreamingResponse(
+                stream, media_type=mediatype, headers=profile_headers
+            )
         except NotFoundError as err:
             raise HTTPException(status.HTTP_404_NOT_FOUND, str(err))
-
     else:
         if "anot+" in mediatype:
             non_anot_mediatype = mediatype.replace("anot+", "")
-            profile_headers['Content-Type'] = non_anot_mediatype
+            profile_headers["Content-Type"] = non_anot_mediatype
             graph = await return_annotated_rdf(graph, profile, repo)
             content = io.BytesIO(
                 graph.serialize(format=non_anot_mediatype, encoding="utf-8")
@@ -78,7 +74,6 @@ async def return_from_graph(
             return StreamingResponse(
                 content=content, media_type=non_anot_mediatype, headers=profile_headers
             )
-
         raise HTTPException(
             status.HTTP_400_BAD_REQUEST, f"Unsupported mediatype: {mediatype}."
         )
@@ -91,7 +86,6 @@ async def return_rdf(graph, mediatype, profile_headers):
             format=RDF_SERIALIZER_TYPES_MAP[str(mediatype)], encoding="utf-8"
         )
     )
-    profile_headers["Content-Disposition"] = "inline"
     return StreamingResponse(content=obj, media_type=mediatype, headers=profile_headers)
 
 

From ab6670ef5747c7c18b956dec25f17bc384d60279 Mon Sep 17 00:00:00 2001
From: Lawson Lewis <lawson@kurrawong.ai>
Date: Tue, 30 Jul 2024 13:02:51 +1000
Subject: [PATCH 2/2] add tests for content-disposition and mediatype

tests that the returned headers are as expected for object and listing
endpoints
---
 tests/test_pmt_headers.py | 133 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 133 insertions(+)
 create mode 100644 tests/test_pmt_headers.py

diff --git a/tests/test_pmt_headers.py b/tests/test_pmt_headers.py
new file mode 100644
index 00000000..6e9c57e7
--- /dev/null
+++ b/tests/test_pmt_headers.py
@@ -0,0 +1,133 @@
+"""test_pmt_headers
+
+A set of tests to confirm that the Profile and Media Type information in the response headers are
+as expected for object and listing endpoints.
+
+Also checks the content-disposition header
+"""
+
+from pathlib import Path
+
+import pytest
+from fastapi.testclient import TestClient
+from pyoxigraph.pyoxigraph import Store
+
+from prez.app import assemble_app
+from prez.dependencies import get_repo
+from prez.services.curie_functions import get_curie_id_for_uri
+from prez.sparql.methods import PyoxigraphRepo, Repo
+
+
+@pytest.fixture(scope="session")
+def test_store() -> Store:
+    # Create a new pyoxigraph Store
+    store = Store()
+
+    for file in Path(__file__).parent.glob("../tests/data/*/input/*.ttl"):
+        store.load(file.read_bytes(), "text/turtle")
+
+    return store
+
+
+@pytest.fixture(scope="session")
+def test_repo(test_store: Store) -> Repo:
+    # Create a PyoxigraphQuerySender using the test_store
+    return PyoxigraphRepo(test_store)
+
+
+@pytest.fixture(scope="session")
+def test_client(test_repo: Repo) -> TestClient:
+    # Override the dependency to use the test_repo
+    def override_get_repo():
+        return test_repo
+
+    app = assemble_app()
+
+    app.dependency_overrides[get_repo] = override_get_repo
+
+    with TestClient(app) as c:
+        yield c
+
+    # Remove the override to ensure subsequent tests are unaffected
+    app.dependency_overrides.clear()
+
+
+@pytest.mark.parametrize(
+    "endpoint, mediatype, filename",
+    [
+        ("/v/vocab", "text/turtle", "SchemesList"),
+        ("/s/datasets", "text/turtle", "DatasetList"),
+        ("/c/catalogs", "text/turtle", "CatalogList"),
+        ("/v/vocab", "application/ld+json", "SchemesList"),
+        ("/s/datasets", "application/ld+json", "DatasetList"),
+        ("/c/catalogs", "application/ld+json", "CatalogList"),
+    ],
+)
+def test_listing_endpoint(
+    endpoint: str, mediatype: str, filename: str, test_client: TestClient
+):
+    """Assert that response headers are returned correctly for a listing endpoint.
+
+    i.e that they specify the
+
+      - Content-Type, and
+      - Content-Disposition.
+
+    headers. And that the headers have an appropriate value.
+    """
+    headers = {"accept": mediatype}
+    expected_headers = {
+        "content-type": mediatype,
+        "content-disposition": f"inline; filename={filename}",
+    }
+    response = test_client.get(endpoint, headers=headers)
+    assert all(
+        header in response.headers.keys() for header in expected_headers.keys()
+    ), f"Response must specify the {expected_headers.keys()} headers."
+    assert all(
+        response.headers[header] == expected_headers[header]
+        for header in expected_headers.keys()
+    ), "Required headers do not have the expected values."
+
+
+@pytest.mark.parametrize(
+    "endpoint, mediatype, object_uri",
+    [
+        ("/v/vocab", "text/turtle", "https://linked.data.gov.au/def/vocdermods"),
+        ("/s/datasets", "text/turtle", "http://example.com/datasets/sandgate"),
+        ("/c/catalogs", "text/turtle", "https://data.idnau.org/pid/democat"),
+        (
+            "/v/vocab",
+            "application/ld+json",
+            "https://linked.data.gov.au/def/vocdermods",
+        ),
+        ("/s/datasets", "application/ld+json", "http://example.com/datasets/sandgate"),
+        ("/c/catalogs", "application/ld+json", "https://data.idnau.org/pid/democat"),
+    ],
+)
+def test_object_endpoint(
+    endpoint: str, mediatype: str, object_uri: str, test_client: TestClient
+):
+    """Assert that response headers are returned correctly for an object endpoint.
+
+    i.e that they specify the
+
+      - Content-Type, and
+      - Content-Disposition.
+
+    headers. And that the headers have an appropriate value.
+    """
+    curie = get_curie_id_for_uri(object_uri)
+    headers = {"accept": mediatype}
+    expected_headers = {
+        "content-type": mediatype,
+        "content-disposition": f"inline; filename={curie}",
+    }
+    response = test_client.get(endpoint + "/" + curie, headers=headers)
+    assert all(
+        header in response.headers.keys() for header in expected_headers.keys()
+    ), f"Response must specify the {expected_headers.keys()} headers."
+    assert all(
+        response.headers[header] == expected_headers[header]
+        for header in expected_headers.keys()
+    ), "Required headers do not have the expected values."