Skip to content

Commit

Permalink
David/search profiles bugfixes (#103)
Browse files Browse the repository at this point in the history
* Fixes #93, solution and general framework for #5

* Fixes #93, solution and general framework for #5

* Update VocPub profile to support vocabulary and collection listings to show dcterms:publisher, reg:status and derivation info

* Support multiple sh:sequencePath in listing construct query.

Support include predicate statement in listing construct query.

Fix pagination in listing construct query.

* Integrate search changes to use subquery

* only include properties of objects when calling "generate_listing_construct" for a listing, and not when calling for an object. Update tests to new profiles

* Default to Jena FT serach; include geo data in search results

* Add documentation on how to identify namespaces for prefixing

* Search bugfixes

* Update readmes

---------

Co-authored-by: Edmond Chuc <edmond@kurrawong.ai>
  • Loading branch information
recalcitrantsupplant and edmondchuc authored May 8, 2023
1 parent fc82562 commit 8a50fbf
Show file tree
Hide file tree
Showing 28 changed files with 453 additions and 273 deletions.
116 changes: 23 additions & 93 deletions README-Dev.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,28 @@ When Prez encounters a URI which is required for an internal link but is not in
To get "sensible" or "nice" prefixes, it is recommended to add all prefixes which will be required to turtle files in prez/reference_data/prefixes.
A future change could allow the prefixes to be specified alongside data in the backend, as profiles currently can be.

### Checking if namespace prefixes are defined

The following SPARQL query can be used as a starting point to check if a namespace prefix is defined for instances of
the main classes prez delivers. NB this query should NOT be run against SPARQL endpoints for large datasets; offline
options should instead be used.
NB. for "short" URIs, i.e. a hostname with no fragments and a "no" path, this query will (correctly, but uselessly)
return "http://" or "https://". You will need to otherwise identify what these URIs are and provide prefixes for them
should you wish.
```sparql
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
PREFIX vann: <http://purl.org/vocab/vann/>
PREFIX dcat: <http://www.w3.org/ns/dcat#>
PREFIX geo: <http://www.opengis.net/ont/geosparql#>
SELECT DISTINCT ?namespace
{?uri a ?type
BIND (REPLACE(STR(?uri), "(.*[/#])[^#/]*$", "$1") AS ?namespace)
VALUES ?type { skos:Collection skos:ConceptScheme skos:Concept dcat:Dataset geo:FeatureCollection geo:Feature dcat:Resource dcat:Catalog }
MINUS {?namespace vann:preferredPrefix ?prefix .}
} LIMIT 100
```

## High Level Sequence

Prez follows the following logic to determine what information to return, based on a profile, and in what mediatype to return it.
Expand Down Expand Up @@ -205,99 +227,7 @@ WHERE {
BIND(URI(CONCAT(STR(?instance_of_main_class),"/support-graph")) AS ?support_graph_uri)
}
```
### C.2 - CatPrez Insert Support Graphs
```sparql
PREFIX dcat: <http://www.w3.org/ns/dcat#>
PREFIX dcterms: <http://purl.org/dc/terms/>
PREFIX prez: <https://prez.dev/>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
INSERT {
GRAPH prez:catprez-system-graph {
?support_graph_uri prez:hasContextFor ?instance_of_main_class .
?collectionList rdfs:member ?instance_of_top_class .
?instance_of_main_class dcterms:identifier ?prez_id .
}
GRAPH ?support_graph_uri { ?member dcterms:identifier ?prez_mem_id . }
}
WHERE {
{
?instance_of_main_class a ?collection_class .
VALUES ?collection_class { <http://www.w3.org/ns/dcat#Catalog>
<http://www.w3.org/ns/dcat#Resource> }
OPTIONAL {?instance_of_top_class a ?topmost_class
VALUES ?topmost_class { <http://www.w3.org/ns/dcat#Catalog> }
}
MINUS { GRAPH prez:catprez-system-graph {?a_context_graph prez:hasContextFor ?instance_of_main_class}
}
OPTIONAL {?instance_of_main_class dcterms:identifier ?id
BIND(DATATYPE(?id) AS ?dtype_id)
FILTER(?dtype_id = xsd:token)
}
OPTIONAL { ?instance_of_main_class dcterms:hasPart ?member
OPTIONAL {?member dcterms:identifier ?mem_id
BIND(DATATYPE(?mem_id) AS ?dtype_mem_id)
FILTER(?dtype_mem_id = xsd:token) } }
}
BIND(
IF(?topmost_class=dcat:Dataset, prez:DatasetList,
IF(?topmost_class=dcat:Catalog,prez:CatalogList,
IF(?topmost_class=skos:ConceptScheme,prez:SchemesList,
IF(?topmost_class=skos:Collection,prez:VocPrezCollectionList,"")))) AS ?collectionList)
BIND(STRDT(COALESCE(STR(?id),MD5(STR(?instance_of_main_class))), prez:slug) AS ?prez_id)
BIND(STRDT(COALESCE(STR(?mem_id),MD5(STR(?member))), prez:slug) AS ?prez_mem_id)
BIND(URI(CONCAT(STR(?instance_of_main_class),"/support-graph")) AS ?support_graph_uri)
}
```
### C.3 - VocPrez Insert Support Graphs
```sparql
PREFIX dcat: <http://www.w3.org/ns/dcat#>
PREFIX dcterms: <http://purl.org/dc/terms/>
PREFIX prez: <https://prez.dev/>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
INSERT {
GRAPH prez:vocprez-system-graph {
?support_graph_uri prez:hasContextFor ?instance_of_main_class .
?collectionList rdfs:member ?instance_of_top_class .
?instance_of_main_class dcterms:identifier ?prez_id .
}
GRAPH ?support_graph_uri { ?member dcterms:identifier ?prez_mem_id . }
}
WHERE {
{
?instance_of_main_class a ?collection_class .
VALUES ?collection_class { <http://www.w3.org/2004/02/skos/core#ConceptScheme>
<http://www.w3.org/2004/02/skos/core#Collection> }
OPTIONAL {?instance_of_top_class a ?topmost_class
VALUES ?topmost_class { <http://www.w3.org/2004/02/skos/core#ConceptScheme>
<http://www.w3.org/2004/02/skos/core#Collection> }
}
MINUS { GRAPH prez:vocprez-system-graph {?a_context_graph prez:hasContextFor ?instance_of_main_class}
}
OPTIONAL {?instance_of_main_class dcterms:identifier ?id
BIND(DATATYPE(?id) AS ?dtype_id)
FILTER(?dtype_id = xsd:token)
}
OPTIONAL { {?instance_of_main_class ^skos:inScheme ?member }
UNION
{ ?instance_of_main_class skos:member ?member }
OPTIONAL {?member dcterms:identifier ?mem_id
BIND(DATATYPE(?mem_id) AS ?dtype_mem_id)
FILTER(?dtype_mem_id = xsd:token) } }
}
BIND(
IF(?topmost_class=dcat:Dataset, prez:DatasetList,
IF(?topmost_class=dcat:Catalog,prez:CatalogList,
IF(?topmost_class=skos:ConceptScheme,prez:SchemesList,
IF(?topmost_class=skos:Collection,prez:VocPrezCollectionList,"")))) AS ?collectionList)
BIND(STRDT(COALESCE(STR(?id),MD5(STR(?instance_of_main_class))), prez:slug) AS ?prez_id)
BIND(STRDT(COALESCE(STR(?mem_id),MD5(STR(?member))), prez:slug) AS ?prez_mem_id)
BIND(URI(CONCAT(STR(?instance_of_main_class),"/support-graph")) AS ?support_graph_uri)
}
```
## Appendix C - Removed - to updated numbering consistently
## Appendix D - Example Profile and Mediatype Selection SPARQL query
This SPARQL query determines the profile and mediatype to return based on user requests,
defaults, and the availability of these in profiles.
Expand Down
44 changes: 22 additions & 22 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

35 changes: 19 additions & 16 deletions prez/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,16 @@
import uvicorn
from fastapi import FastAPI, Request
from fastapi.openapi.utils import get_openapi
from fastapi.responses import JSONResponse
from rdflib import Graph, Literal, URIRef
from starlette.middleware.cors import CORSMiddleware

from prez.cache import tbox_cache
from prez.config import settings
from prez.models.model_exceptions import (
ClassNotFoundException,
URINotFoundException,
NoProfilesException,
)
from prez.reference_data.prez_ns import PREZ
from prez.renderers.renderer import return_rdf
from prez.routers.catprez import router as catprez_router
Expand All @@ -27,31 +31,30 @@
populate_api_info,
add_prefixes_to_prefix_graph,
)
from prez.services.exception_catchers import (
catch_400,
catch_404,
catch_500,
catch_class_not_found_exception,
catch_uri_not_found_exception,
catch_no_profiles_exception,
)
from prez.services.generate_profiles import create_profiles_graph
from prez.services.prez_logging import setup_logger
from prez.services.search_methods import generate_search_methods


async def catch_400(request: Request, exc):
return JSONResponse(content={"detail": exc}, status_code=400)


async def catch_404(request: Request, exc):
return JSONResponse(content={"detail": str(exc.detail)}, status_code=404)


async def catch_500(request: Request, exc):
return JSONResponse(content={"detail": "Internal Server Error"}, status_code=500)

from prez.services.search_methods import get_all_search_methods

app = FastAPI(
exception_handlers={
400: catch_400,
404: catch_404,
500: catch_500,
ClassNotFoundException: catch_class_not_found_exception,
URINotFoundException: catch_uri_not_found_exception,
NoProfilesException: catch_no_profiles_exception,
}
)


app.include_router(cql_router)
app.include_router(management_router)
app.include_router(object_router)
Expand Down Expand Up @@ -109,7 +112,7 @@ async def app_startup():
log = logging.getLogger("prez")
log.info("Starting up")
await healthcheck_sparql_endpoints()
await generate_search_methods()
await get_all_search_methods()
await create_profiles_graph()
await count_objects()
await populate_api_info()
Expand Down
6 changes: 4 additions & 2 deletions prez/models/catprez_item.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,10 @@

from pydantic import BaseModel, root_validator
from rdflib import URIRef
from rdflib.namespace import DCTERMS, XSD, DCAT, Namespace
from rdflib.namespace import DCAT, Namespace

from prez.services.curie_functions import get_uri_for_curie_id, get_curie_id_for_uri
from prez.services.model_methods import get_classes
from prez.sparql.methods import sparql_query_non_async

PREZ = Namespace("https://prez.dev/")

Expand All @@ -25,6 +24,9 @@ class CatalogItem(BaseModel):
top_level_listing: Optional[bool] = False

def __hash__(self):
"""
Required to make object hashable and in turn cacheable
"""
return hash(self.uri)

@root_validator
Expand Down
35 changes: 35 additions & 0 deletions prez/models/model_exceptions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
from rdflib import URIRef


class ClassNotFoundException(Exception):
"""
Raised when no classes can be found for a given URI.
If the URI is also not found, a URINotFoundException is raised instead.
"""

def __init__(self, uri: URIRef):
self.message = f"No classes found for {uri}. Prez can only display information for instances of classes"
super().__init__(self.message)


class URINotFoundException(Exception):
"""
Raised when a URI is not found in a given prez backend.
"""

def __init__(self, uri: URIRef, prez):
self.message = f"URI {uri} not found in {prez}."
super().__init__(self.message)


class NoProfilesException(Exception):
"""
Raised when no profiles can be found for a resource.
"""

def __init__(self, classes: list):
self.message = (
f"No profiles and/or mediatypes could be found to render the resource. The resource class(es) "
f"for which a profile was searched was/were: {', '.join(klass for klass in classes)}"
)
super().__init__(self.message)
1 change: 0 additions & 1 deletion prez/models/profiles_item.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@ def populate(cls, values):
values["uri"] = get_uri_for_curie_id(id)
elif uri:
values["id"] = get_curie_id_for_uri(uri)
values["classes"] = get_classes(values["uri"], "VocPrez")
q = f"""SELECT ?class {{ <{values["uri"]}> a ?class }}"""
r = profiles_graph_cache.query(q)
if len(r.bindings) > 0:
Expand Down
Loading

0 comments on commit 8a50fbf

Please sign in to comment.