From c7a777c23de4f77eeaa625ae44a8573f7da07750 Mon Sep 17 00:00:00 2001 From: Lawson Lewis Date: Mon, 29 Jul 2024 14:17:44 +1000 Subject: [PATCH 1/2] compute better filenames for returned rdf As per issue https://github.com/idn-au/catalogue-data/issues/35 The returned RDF files did not have appropriate filenames. --- prez/renderers/renderer.py | 52 +++++++++++++++++--------------------- 1 file changed, 23 insertions(+), 29 deletions(-) diff --git a/prez/renderers/renderer.py b/prez/renderers/renderer.py index 5a4faf00..99562d11 100644 --- a/prez/renderers/renderer.py +++ b/prez/renderers/renderer.py @@ -7,20 +7,18 @@ from fastapi import status from fastapi.exceptions import HTTPException from fastapi.responses import StreamingResponse -from rdflib import Graph, URIRef, Namespace, RDF +from rdflib import RDF, Graph, Namespace, URIRef from starlette.requests import Request from starlette.responses import Response from prez.models.profiles_and_mediatypes import ProfilesMediatypesInfo from prez.models.profiles_item import ProfileItem from prez.renderers.csv_renderer import render_csv_dropdown -from prez.renderers.json_renderer import render_json_dropdown, NotFoundError +from prez.renderers.json_renderer import NotFoundError, render_json_dropdown from prez.services.curie_functions import get_curie_id_for_uri from prez.sparql.methods import Repo -from prez.sparql.objects_listings import ( - generate_item_construct, - get_annotation_properties, -) +from prez.sparql.objects_listings import (generate_item_construct, + get_annotation_properties) log = logging.getLogger(__name__) @@ -33,44 +31,42 @@ async def return_from_graph( selected_class: URIRef, repo: Repo, ): - profile_headers["Content-Disposition"] = "inline" + # set content-disposition + profile_headers["Content-Disposition"] = ( + "attachment;" if str(mediatype) == "text/csv" else "inline;" + ) + iri = graph.value(None, RDF.type, selected_class) + if iri: + profile_headers[ + "Content-Disposition" + ] += f" filename={get_curie_id_for_uri(URIRef(str(iri)))}" + elif selected_class: + profile_headers[ + "Content-Disposition" + ] += f" filename={selected_class.split('#')[-1].split('/')[-1]}" if str(mediatype) in RDF_MEDIATYPES: return await return_rdf(graph, mediatype, profile_headers) - elif profile == URIRef("https://w3id.org/profile/dd"): graph = await return_annotated_rdf(graph, profile, repo) - try: # TODO: Currently, data is generated in memory, instead of in a streaming manner. # Not possible to do a streaming response yet since we are reading the RDF # data into an in-memory graph. jsonld_data = await render_json_dropdown(graph, profile, selected_class) - if str(mediatype) == "text/csv": - iri = graph.value(None, RDF.type, selected_class) - if iri: - filename = get_curie_id_for_uri(URIRef(str(iri))) - else: - filename = selected_class.split("#")[-1].split("/")[-1] stream = render_csv_dropdown(jsonld_data["@graph"]) - response = StreamingResponse(stream, media_type=mediatype) - response.headers[ - "Content-Disposition" - ] = f"attachment;filename={filename}.csv" - return response - - # application/json - stream = io.StringIO(json.dumps(jsonld_data)) - return StreamingResponse(stream, media_type=mediatype) - + else: + stream = io.StringIO(json.dumps(jsonld_data)) + return StreamingResponse( + stream, media_type=mediatype, headers=profile_headers + ) except NotFoundError as err: raise HTTPException(status.HTTP_404_NOT_FOUND, str(err)) - else: if "anot+" in mediatype: non_anot_mediatype = mediatype.replace("anot+", "") - profile_headers['Content-Type'] = non_anot_mediatype + profile_headers["Content-Type"] = non_anot_mediatype graph = await return_annotated_rdf(graph, profile, repo) content = io.BytesIO( graph.serialize(format=non_anot_mediatype, encoding="utf-8") @@ -78,7 +74,6 @@ async def return_from_graph( return StreamingResponse( content=content, media_type=non_anot_mediatype, headers=profile_headers ) - raise HTTPException( status.HTTP_400_BAD_REQUEST, f"Unsupported mediatype: {mediatype}." ) @@ -91,7 +86,6 @@ async def return_rdf(graph, mediatype, profile_headers): format=RDF_SERIALIZER_TYPES_MAP[str(mediatype)], encoding="utf-8" ) ) - profile_headers["Content-Disposition"] = "inline" return StreamingResponse(content=obj, media_type=mediatype, headers=profile_headers) From ab6670ef5747c7c18b956dec25f17bc384d60279 Mon Sep 17 00:00:00 2001 From: Lawson Lewis Date: Tue, 30 Jul 2024 13:02:51 +1000 Subject: [PATCH 2/2] add tests for content-disposition and mediatype tests that the returned headers are as expected for object and listing endpoints --- tests/test_pmt_headers.py | 133 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 133 insertions(+) create mode 100644 tests/test_pmt_headers.py diff --git a/tests/test_pmt_headers.py b/tests/test_pmt_headers.py new file mode 100644 index 00000000..6e9c57e7 --- /dev/null +++ b/tests/test_pmt_headers.py @@ -0,0 +1,133 @@ +"""test_pmt_headers + +A set of tests to confirm that the Profile and Media Type information in the response headers are +as expected for object and listing endpoints. + +Also checks the content-disposition header +""" + +from pathlib import Path + +import pytest +from fastapi.testclient import TestClient +from pyoxigraph.pyoxigraph import Store + +from prez.app import assemble_app +from prez.dependencies import get_repo +from prez.services.curie_functions import get_curie_id_for_uri +from prez.sparql.methods import PyoxigraphRepo, Repo + + +@pytest.fixture(scope="session") +def test_store() -> Store: + # Create a new pyoxigraph Store + store = Store() + + for file in Path(__file__).parent.glob("../tests/data/*/input/*.ttl"): + store.load(file.read_bytes(), "text/turtle") + + return store + + +@pytest.fixture(scope="session") +def test_repo(test_store: Store) -> Repo: + # Create a PyoxigraphQuerySender using the test_store + return PyoxigraphRepo(test_store) + + +@pytest.fixture(scope="session") +def test_client(test_repo: Repo) -> TestClient: + # Override the dependency to use the test_repo + def override_get_repo(): + return test_repo + + app = assemble_app() + + app.dependency_overrides[get_repo] = override_get_repo + + with TestClient(app) as c: + yield c + + # Remove the override to ensure subsequent tests are unaffected + app.dependency_overrides.clear() + + +@pytest.mark.parametrize( + "endpoint, mediatype, filename", + [ + ("/v/vocab", "text/turtle", "SchemesList"), + ("/s/datasets", "text/turtle", "DatasetList"), + ("/c/catalogs", "text/turtle", "CatalogList"), + ("/v/vocab", "application/ld+json", "SchemesList"), + ("/s/datasets", "application/ld+json", "DatasetList"), + ("/c/catalogs", "application/ld+json", "CatalogList"), + ], +) +def test_listing_endpoint( + endpoint: str, mediatype: str, filename: str, test_client: TestClient +): + """Assert that response headers are returned correctly for a listing endpoint. + + i.e that they specify the + + - Content-Type, and + - Content-Disposition. + + headers. And that the headers have an appropriate value. + """ + headers = {"accept": mediatype} + expected_headers = { + "content-type": mediatype, + "content-disposition": f"inline; filename={filename}", + } + response = test_client.get(endpoint, headers=headers) + assert all( + header in response.headers.keys() for header in expected_headers.keys() + ), f"Response must specify the {expected_headers.keys()} headers." + assert all( + response.headers[header] == expected_headers[header] + for header in expected_headers.keys() + ), "Required headers do not have the expected values." + + +@pytest.mark.parametrize( + "endpoint, mediatype, object_uri", + [ + ("/v/vocab", "text/turtle", "https://linked.data.gov.au/def/vocdermods"), + ("/s/datasets", "text/turtle", "http://example.com/datasets/sandgate"), + ("/c/catalogs", "text/turtle", "https://data.idnau.org/pid/democat"), + ( + "/v/vocab", + "application/ld+json", + "https://linked.data.gov.au/def/vocdermods", + ), + ("/s/datasets", "application/ld+json", "http://example.com/datasets/sandgate"), + ("/c/catalogs", "application/ld+json", "https://data.idnau.org/pid/democat"), + ], +) +def test_object_endpoint( + endpoint: str, mediatype: str, object_uri: str, test_client: TestClient +): + """Assert that response headers are returned correctly for an object endpoint. + + i.e that they specify the + + - Content-Type, and + - Content-Disposition. + + headers. And that the headers have an appropriate value. + """ + curie = get_curie_id_for_uri(object_uri) + headers = {"accept": mediatype} + expected_headers = { + "content-type": mediatype, + "content-disposition": f"inline; filename={curie}", + } + response = test_client.get(endpoint + "/" + curie, headers=headers) + assert all( + header in response.headers.keys() for header in expected_headers.keys() + ), f"Response must specify the {expected_headers.keys()} headers." + assert all( + response.headers[header] == expected_headers[header] + for header in expected_headers.keys() + ), "Required headers do not have the expected values."