From 17b12dc5fe75d36aeef29f7b228366f0030d87c3 Mon Sep 17 00:00:00 2001 From: david Date: Wed, 14 Feb 2024 00:56:28 +1000 Subject: [PATCH] Reorganise repo --- prez/app.py | 2 +- prez/dependencies.py | 2 +- prez/models/profiles_and_mediatypes.py | 2 +- .../endpoints/cql_endpoints.ttl | 23 - .../endpoints/endpoint_metadata.ttl | 5 + .../endpoint_node_selection_shapes.ttl | 26 + .../endpoints/ogc_catprez_endpoints.ttl.old | 81 --- .../endpoints/ogc_spaceprez_endpoints.ttl.old | 78 --- .../endpoints/ogc_vocprez_endpoints.ttl.old | 124 ----- .../endpoints/vocprez_endpoints.ttl.unused | 100 ---- .../profiles/ogc_records_profile.ttl | 4 +- prez/renderers/json_renderer.py | 29 +- prez/renderers/renderer.py | 4 +- prez/repositories/__init__.py | 6 + prez/repositories/base.py | 61 ++ prez/repositories/oxrdflib.py | 44 ++ prez/repositories/pyoxigraph.py | 100 ++++ prez/repositories/remote_sparql.py | 70 +++ prez/routers/cql.py | 53 +- prez/routers/ogc_router.py | 2 +- prez/routers/search.py | 4 +- prez/routers/sparql.py | 2 +- prez/services/annotations.py | 148 +++++ prez/services/app_service.py | 9 +- prez/services/cql_search.py | 178 ------ prez/services/generate_profiles.py | 7 +- prez/services/link_generation.py | 54 +- prez/services/listings.py | 12 +- prez/services/objects.py | 6 +- .../classes.py} | 3 +- prez/services/query_generation/connegp.py | 123 ++++ .../query_generation/count.py} | 31 ++ .../services/query_generation}/cql2sparql.py | 33 +- .../query_generation}/cql_sparql_reference.py | 0 .../default_cql_context.json | 0 .../query_generation/search.py} | 0 .../query_generation}/shacl_node_selection.py | 2 +- .../services/query_generation/umbrella.py | 7 +- prez/sparql/methods.py | 237 -------- prez/sparql/objects_listings.py | 523 ------------------ temp/grammar/grammar.py | 23 +- temp/test_search.py | 13 - test_data/spaceprez.ttl | 9 +- tests/_test_cql.py | 2 +- tests/test_count.py | 2 +- tests/test_dd_profiles.py | 2 +- tests/test_endpoints_cache.py | 2 +- tests/test_endpoints_catprez.py | 14 +- tests/test_endpoints_management.py | 2 +- tests/test_endpoints_object.py | 2 +- tests/test_endpoints_ok.py | 52 +- tests/test_endpoints_profiles.py | 6 +- tests/test_endpoints_spaceprez.py | 34 +- tests/test_endpoints_vocprez.py | 2 +- ...arsing.py => test_node_selection_shacl.py} | 4 +- tests/test_redirect_endpoint.py | 2 +- tests/test_search.py | 2 +- tests/test_search_grammar.py | 238 ++++++++ tests/test_sparql.py | 2 +- 59 files changed, 1017 insertions(+), 1591 deletions(-) delete mode 100755 prez/reference_data/endpoints/ogc_catprez_endpoints.ttl.old delete mode 100755 prez/reference_data/endpoints/ogc_spaceprez_endpoints.ttl.old delete mode 100755 prez/reference_data/endpoints/ogc_vocprez_endpoints.ttl.old delete mode 100755 prez/reference_data/endpoints/vocprez_endpoints.ttl.unused create mode 100644 prez/repositories/__init__.py create mode 100755 prez/repositories/base.py create mode 100644 prez/repositories/oxrdflib.py create mode 100644 prez/repositories/pyoxigraph.py create mode 100644 prez/repositories/remote_sparql.py create mode 100755 prez/services/annotations.py delete mode 100755 prez/services/cql_search.py rename prez/services/{model_methods.py => query_generation/classes.py} (93%) create mode 100644 prez/services/query_generation/connegp.py rename prez/{sparql/count_query.py => services/query_generation/count.py} (66%) rename {temp => prez/services/query_generation}/cql2sparql.py (95%) rename {temp => prez/services/query_generation}/cql_sparql_reference.py (100%) rename {temp => prez/services/query_generation}/default_cql_context.json (100%) rename prez/{sparql/search_query.py => services/query_generation/search.py} (100%) rename {temp => prez/services/query_generation}/shacl_node_selection.py (99%) rename temp/shacl2sparql.py => prez/services/query_generation/umbrella.py (99%) delete mode 100755 prez/sparql/methods.py delete mode 100755 prez/sparql/objects_listings.py delete mode 100755 temp/test_search.py rename tests/{test_shacl_parsing.py => test_node_selection_shacl.py} (89%) create mode 100644 tests/test_search_grammar.py diff --git a/prez/app.py b/prez/app.py index 75332fe9..c3f26f11 100755 --- a/prez/app.py +++ b/prez/app.py @@ -46,7 +46,7 @@ ) from prez.services.generate_profiles import create_profiles_graph from prez.services.prez_logging import setup_logger -from prez.sparql.methods import RemoteSparqlRepo, PyoxigraphRepo, OxrdflibRepo +from prez.repositories import RemoteSparqlRepo, PyoxigraphRepo, OxrdflibRepo app = FastAPI( exception_handlers={ diff --git a/prez/dependencies.py b/prez/dependencies.py index edf6ac6c..eb4e1cc5 100755 --- a/prez/dependencies.py +++ b/prez/dependencies.py @@ -15,7 +15,7 @@ endpoints_graph_cache, ) from prez.config import settings -from prez.sparql.methods import PyoxigraphRepo, RemoteSparqlRepo, OxrdflibRepo +from prez.repositories import PyoxigraphRepo, RemoteSparqlRepo, OxrdflibRepo from rdframe import CQLParser diff --git a/prez/models/profiles_and_mediatypes.py b/prez/models/profiles_and_mediatypes.py index 6572b5ed..92f89148 100755 --- a/prez/models/profiles_and_mediatypes.py +++ b/prez/models/profiles_and_mediatypes.py @@ -6,7 +6,7 @@ from prez.services.generate_profiles import get_profiles_and_mediatypes from prez.services.connegp_service import get_requested_profile_and_mediatype -from prez.sparql.methods import Repo +from prez.repositories import Repo PREZ = Namespace("https://prez.dev/") diff --git a/prez/reference_data/endpoints/cql_endpoints.ttl b/prez/reference_data/endpoints/cql_endpoints.ttl index 86fb76e1..42e423f7 100755 --- a/prez/reference_data/endpoints/cql_endpoints.ttl +++ b/prez/reference_data/endpoints/cql_endpoints.ttl @@ -15,26 +15,3 @@ endpoint:get a ont:ListingEndpoint ; shext:offset 0 ; . -endpoint:queryables a ont:ListingEndpoint ; - sh:rule [ sh:subject "?focus_node" ; - sh:predicate ; - sh:object ] ; - ont:deliversClasses prez:QueryablesList ; - sh:target [ sh:select """SELECT DISTINCT ?focus_node - WHERE { - ?s a ?class ; - ?focus_node ?o . - VALUES ?class { - dcat:Catalog - dcat:Dataset - dcat:Resource - skos:ConceptScheme - skos:Collection - skos:Concept - geo:FeatureCollection - geo:Feature - } - }""" ] ; - shext:limit 100 ; - shext:offset 0 ; -. \ No newline at end of file diff --git a/prez/reference_data/endpoints/endpoint_metadata.ttl b/prez/reference_data/endpoints/endpoint_metadata.ttl index 98d97948..d08ad3b2 100644 --- a/prez/reference_data/endpoints/endpoint_metadata.ttl +++ b/prez/reference_data/endpoints/endpoint_metadata.ttl @@ -43,3 +43,8 @@ ogce:item-object a ont:ObjectEndpoint ; ont:relevantShapes ex:Feature , ex:ConceptSchemeConcept , ex:CollectionConcept , ex:Resource ; . + +ogce:cql-queryables + a ont:ListingEndpoint ; + ont:relevantShapes ex:queryables ; +. \ No newline at end of file diff --git a/prez/reference_data/endpoints/endpoint_node_selection_shapes.ttl b/prez/reference_data/endpoints/endpoint_node_selection_shapes.ttl index 58b17045..32e569fa 100644 --- a/prez/reference_data/endpoints/endpoint_node_selection_shapes.ttl +++ b/prez/reference_data/endpoints/endpoint_node_selection_shapes.ttl @@ -7,6 +7,7 @@ @prefix prof: . @prefix rdfs: . @prefix sh: . +@prefix shext: . @prefix skos: . ex:TopLevelCatalogs @@ -84,4 +85,29 @@ ex:Profiles a sh:NodeShape ; ont:hierarchyLevel 1 ; sh:targetClass prof:Profile ; +. + +ex:queryables a sh:NodeShape ; + ont:hierarchyLevel 1 ; + sh:rule [ sh:subject "?focus_node" ; + sh:predicate ; + sh:object ] ; + ont:deliversClasses prez:QueryablesList ; + sh:target [ sh:select """SELECT DISTINCT ?focus_node + WHERE { + ?s a ?class ; + ?focus_node ?o . + VALUES ?class { + dcat:Catalog + dcat:Dataset + dcat:Resource + skos:ConceptScheme + skos:Collection + skos:Concept + geo:FeatureCollection + geo:Feature + } + }""" ] ; + shext:limit 100 ; + shext:offset 0 ; . \ No newline at end of file diff --git a/prez/reference_data/endpoints/ogc_catprez_endpoints.ttl.old b/prez/reference_data/endpoints/ogc_catprez_endpoints.ttl.old deleted file mode 100755 index 7f7a0557..00000000 --- a/prez/reference_data/endpoints/ogc_catprez_endpoints.ttl.old +++ /dev/null @@ -1,81 +0,0 @@ -PREFIX dcat: -PREFIX dcterms: -PREFIX endpoint: -PREFIX prez: -PREFIX ont: -PREFIX sh: -prefix skos: -PREFIX shext: -PREFIX xsd: - - -endpoint:top-level-catalog-listing a ont:ListingEndpoint ; - ont:endpointTemplate "/c/catalogs" ; - ont:deliversClasses prez:CatalogList ; # required to determine the correct profile for ConnegP - sh:targetClass dcat:Catalog ; # required for query construction - shext:limit 20 ; - shext:offset 0 ; - sh:target [ - sh:select """SELECT ?focus_node - WHERE { - ?focus_node dcterms:hasPart ?child_catalog . - ?child_catalog a dcat:Catalog . - } - """ ] ; -. - -endpoint:top-level-catalog-object a ont:ObjectEndpoint ; - sh:targetNode "$object" ; - ont:deliversClasses dcat:Catalog ; # required for link generation for objects - ont:endpointTemplate "/c/catalogs/$object" ; - ont:parentEndpoint endpoint:top-level-catalog-listing ; -. - -endpoint:lower-level-catalog-listing a ont:ListingEndpoint ; - ont:endpointTemplate "/c/catalogs/$parent_1/collections" ; - ont:deliversClasses prez:CatalogList ; # required to determine the correct profile for ConnegP - ont:parentToFocusRelation dcterms:hasPart ; - sh:targetClass dcat:Catalog ; # required for query construction - ont:parentEndpoint endpoint:top-level-catalog-object ; - shext:limit 20 ; - shext:offset 0 ; - sh:target [ - sh:select """SELECT ?focus_node - WHERE { - $parent_1 dcterms:hasPart ?focus_node . - } - """ ] ; -. - -endpoint:lower-level-catalog-object a ont:ObjectEndpoint ; - sh:targetNode "$object" ; - ont:deliversClasses dcat:Catalog ; # required for link generation for objects - ont:endpointTemplate "/c/catalogs/$parent_1/collections/$object" ; - ont:parentToFocusRelation dcterms:hasPart ; - ont:parentEndpoint endpoint:lower-level-catalog-listing ; -. - -endpoint:resource-listing a ont:ListingEndpoint ; - ont:endpointTemplate "/c/catalogs/$parent_2/collections/$parent_1/items" ; - ont:deliversClasses prez:ResourceList ; # required to determine the correct profile for ConnegP - ont:parentToFocusRelation dcterms:hasPart ; - sh:targetClass dcat:Resource ; # required for query construction - ont:parentEndpoint endpoint:lower-level-catalog-object ; - shext:limit 20 ; - shext:offset 0 ; - sh:target [ - sh:select """SELECT ?focus_node - WHERE { - $parent_1 dcterms:hasPart ?focus_node . - } - """ ] ; -. - -endpoint:resource-object a ont:ObjectEndpoint ; - sh:targetNode "$object" ; - ont:deliversClasses dcat:Resource ; # required for link generation for objects - ont:endpointTemplate "/c/catalogs/$parent_2/collections/$parent_1/items/$object" ; - ont:parentToFocusRelation dcterms:hasPart ; - ont:parentEndpoint endpoint:resource-listing ; -. - diff --git a/prez/reference_data/endpoints/ogc_spaceprez_endpoints.ttl.old b/prez/reference_data/endpoints/ogc_spaceprez_endpoints.ttl.old deleted file mode 100755 index 9d53c519..00000000 --- a/prez/reference_data/endpoints/ogc_spaceprez_endpoints.ttl.old +++ /dev/null @@ -1,78 +0,0 @@ -PREFIX dcat: -PREFIX endpoint: -PREFIX geo: -PREFIX ont: -PREFIX prez: -PREFIX rdfs: -PREFIX sh: -PREFIX xsd: -PREFIX shext: - -endpoint:spaceprez-home a ont:Endpoint ; - ont:endpointTemplate "/s" ; -. - -endpoint:dataset-listing a ont:ListingEndpoint ; - ont:deliversClasses prez:DatasetList ; - sh:targetClass dcat:Dataset ; - ont:endpointTemplate "/s/catalogs" ; - shext:limit 20 ; - shext:offset 0 ; -. - -endpoint:dataset-object a ont:ObjectEndpoint ; - sh:targetNode "$object" ; - ont:parentEndpoint endpoint:dataset-listing ; - ont:deliversClasses dcat:Dataset ; - ont:endpointTemplate "/s/catalogs/$object" ; -. - -endpoint:feature-collection-listing a ont:ListingEndpoint ; - ont:parentEndpoint endpoint:dataset-object ; - sh:targetClass geo:FeatureCollection ; - ont:deliversClasses prez:FeatureCollectionList ; - ont:endpointTemplate "/s/catalogs/$parent_1/collections" ; - ont:parentToFocusRelation rdfs:member ; - shext:limit 20 ; - shext:offset 0 ; - shext:orderBy [ sh:path rdfs:label ] ; - sh:target [ - sh:select """SELECT ?focus_node - WHERE { - $parent_1 rdfs:member ?focus_node . - } - """ ] ; -. - -endpoint:feature-collection-object a ont:ObjectEndpoint ; - sh:targetNode "$object" ; - ont:parentEndpoint endpoint:feature-collection-listing ; - ont:deliversClasses geo:FeatureCollection ; - ont:endpointTemplate "/s/catalogs/$parent_1/collections/$object" ; - ont:parentToFocusRelation rdfs:member ; -. - -endpoint:feature-listing a ont:ListingEndpoint ; - ont:parentEndpoint endpoint:feature-collection-object ; - sh:targetClass geo:Feature ; - ont:deliversClasses prez:FeatureList ; - ont:endpointTemplate "/s/catalogs/$parent_2/collections/$parent_1/items" ; - ont:parentToFocusRelation rdfs:member ; - shext:limit 20 ; - shext:offset 0 ; - shext:orderBy [ sh:path rdfs:label ] ; - sh:target [ - sh:select """SELECT ?focus_node - WHERE { - $parent_1 rdfs:member ?focus_node . - } - """ ] ; -. - -endpoint:feature-object a ont:ObjectEndpoint ; - sh:targetNode "$object" ; - ont:parentEndpoint endpoint:feature-listing ; - ont:deliversClasses geo:Feature ; - ont:endpointTemplate "/s/catalogs/$parent_2/collections/$parent_1/items/$object" ; - ont:parentToFocusRelation rdfs:member ; -. diff --git a/prez/reference_data/endpoints/ogc_vocprez_endpoints.ttl.old b/prez/reference_data/endpoints/ogc_vocprez_endpoints.ttl.old deleted file mode 100755 index e1053b87..00000000 --- a/prez/reference_data/endpoints/ogc_vocprez_endpoints.ttl.old +++ /dev/null @@ -1,124 +0,0 @@ -PREFIX dcat: -PREFIX dcterms: -PREFIX endpoint: -PREFIX prez: -PREFIX ont: -PREFIX sh: -prefix skos: -PREFIX shext: -PREFIX xsd: -PREFIX rdfs: - - -endpoint:catalog-listing a ont:ListingEndpoint ; - ont:endpointTemplate "/v/catalogs" ; - ont:deliversClasses prez:CatalogList ; # required to determine the correct profile for ConnegP - sh:targetClass dcat:Catalog ; # required for query construction - shext:limit 20 ; - shext:offset 0 ; - sh:target [ - sh:select """SELECT ?focus_node - WHERE { - ?focus_node dcterms:hasPart ?child_vocab . - ?child_vocab a skos:ConceptScheme . - } - """ ] ; -. - -endpoint:catalog-object a ont:ObjectEndpoint ; - sh:targetNode "$object" ; - ont:deliversClasses dcat:Catalog ; # required for link generation for objects - ont:endpointTemplate "/v/catalogs/$object" ; - ont:parentEndpoint endpoint:catalog-listing ; -. - -endpoint:vocab-listing a ont:ListingEndpoint ; - ont:endpointTemplate "/v/catalogs/$parent_1/collections" ; - ont:deliversClasses prez:SchemesList ; # required to determine the correct profile for ConnegP - ont:parentToFocusRelation dcterms:hasPart ; - sh:targetClass skos:ConceptScheme ; # required for query construction - ont:parentEndpoint endpoint:catalog-object ; - shext:limit 20 ; - shext:offset 0 ; - sh:target [ - sh:select """SELECT ?focus_node - WHERE { - $parent_1 dcterms:hasPart ?focus_node . - } - """ ] ; -. - -endpoint:vocab-object a ont:ObjectEndpoint ; - sh:targetNode "$object" ; - ont:deliversClasses skos:ConceptScheme ; # required for link generation for objects - ont:endpointTemplate "/v/catalogs/$parent_1/collections/$object" ; - ont:parentToFocusRelation dcterms:hasPart ; - ont:parentEndpoint endpoint:vocab-listing ; -. - -endpoint:concept-listing a ont:ListingEndpoint ; - ont:endpointTemplate "/v/catalogs/$parent_2/collections/$parent_1/items" ; - ont:deliversClasses prez:ConceptList ; # required to determine the correct profile for ConnegP - ont:focusToParentRelation skos:inScheme ; - sh:targetClass skos:Concept ; # required for query construction - ont:parentEndpoint endpoint:vocab-object ; - shext:limit 20 ; - shext:offset 0 ; - sh:target [ - sh:select """SELECT ?focus_node - WHERE { - ?focus_node skos:inScheme $parent_1 . - } - """ ] ; -. - -endpoint:concept-object a ont:ObjectEndpoint ; - sh:targetNode "$object" ; - ont:deliversClasses skos:Concept ; # required for link generation for objects - ont:endpointTemplate "/v/catalogs/$parent_2/collections/$parent_1/items/$object" ; - ont:focusToParentRelation skos:inScheme ; - ont:parentEndpoint endpoint:concept-listing ; -. - -endpoint:narrowers - a ont:ListingEndpoint ; - ont:parentEndpoint endpoint:concept-object ; - ont:deliversClasses prez:ConceptList ; - sh:rule - [ - sh:object "?hasChildren" ; - sh:predicate prez:hasChildren ; - sh:subject sh:this ; - ] ; - sh:target - [ - sh:select """SELECT DISTINCT ?focus_node ?hasChildren - WHERE { - $parent_1 skos:narrower|^skos:broader ?focus_node . - BIND(EXISTS{?focus_node skos:narrower|^skos:broader ?grandChildren} AS ?hasChildren) - } - """ ; - ] ; - shext:limit 20 ; - shext:offset 0 ; - shext:orderBy [ sh:path skos:prefLabel ] ; - sh:targetClass skos:Concept ; -. - -endpoint:top-concepts a ont:ListingEndpoint ; - ont:deliversClasses prez:ConceptList ; - ont:parentEndpoint endpoint:vocab-object ; - sh:rule [ sh:subject sh:this ; - sh:predicate prez:hasChildren ; - sh:object "?hasChildren" ] ; - sh:target [ sh:select """SELECT DISTINCT ?focus_node ?hasChildren - WHERE { - $parent_1 skos:hasTopConcept|^skos:isTopConceptOf ?focus_node . - BIND(EXISTS{?focus_node skos:narrower|^skos:broader ?grandChildren} AS ?hasChildren) - } - """ ] ; - shext:limit 20 ; - shext:offset 0 ; - shext:orderBy [ sh:path skos:prefLabel ] ; - sh:targetClass skos:Concept ; -. \ No newline at end of file diff --git a/prez/reference_data/endpoints/vocprez_endpoints.ttl.unused b/prez/reference_data/endpoints/vocprez_endpoints.ttl.unused deleted file mode 100755 index ccdaf4a3..00000000 --- a/prez/reference_data/endpoints/vocprez_endpoints.ttl.unused +++ /dev/null @@ -1,100 +0,0 @@ -PREFIX endpoint: -PREFIX ont: -PREFIX prez: -PREFIX rdfs: -PREFIX sh: -PREFIX skos: -PREFIX xsd: -PREFIX shext: - -endpoint:vocprez-home a ont:Endpoint ; - ont:endpointTemplate "/v" ; -. - -endpoint:collection-listing a ont:ListingEndpoint ; - ont:deliversClasses prez:VocPrezCollectionList ; - sh:targetClass skos:Collection ; - ont:endpointTemplate "/v/collection" ; -. - -endpoint:collection-object a ont:ObjectEndpoint ; - sh:targetNode "$object" ; - ont:parentEndpoint endpoint:collection-listing ; - ont:deliversClasses skos:Collection ; - ont:endpointTemplate "/v/collection/$object" ; -. - -endpoint:collection-concept a ont:ObjectEndpoint ; - sh:targetNode "$object" ; - ont:parentEndpoint endpoint:collection-object ; - ont:deliversClasses skos:Concept ; - ont:endpointTemplate "/v/collection/$parent_1/$object" ; - ont:parentToFocusRelation skos:member ; -. - - endpoint:vocabs-listing a ont:ListingEndpoint ; - ont:deliversClasses prez:SchemesList ; - sh:targetClass skos:ConceptScheme ; - ont:endpointTemplate "/v/vocab" ; -. - -endpoint:vocab-object a ont:ObjectEndpoint ; - sh:targetNode "$object" ; - ont:parentEndpoint endpoint:vocabs-listing ; - ont:deliversClasses skos:ConceptScheme ; - ont:endpointTemplate "/v/vocab/$object" ; -. - -endpoint:vocab-concept a ont:ObjectEndpoint ; - sh:targetNode "$object" ; - ont:parentEndpoint endpoint:vocab-object ; - ont:deliversClasses skos:Concept ; - ont:endpointTemplate "/v/vocab/$parent_1/$object" ; - ont:focusToParentRelation skos:inScheme ; -. - -endpoint:cs-top-concepts a ont:ListingEndpoint ; - ont:deliversClasses skos:Concept ; - sh:rule [ sh:subject sh:this ; - sh:predicate prez:hasChildren ; - sh:object "?hasChildren" ] ; - sh:target [ sh:select """SELECT DISTINCT ?focus_node ?hasChildren - WHERE { - $parent_1 skos:hasTopConcept|^skos:isTopConceptOf ?focus_node . - ?focus_node skos:prefLabel ?label . - BIND(EXISTS{?focus_node skos:narrower|^skos:broader ?grandChildren} AS ?hasChildren) - } - ORDER BY ?label - LIMIT $limit - OFFSET $offset - """ ] ; - sh:targetClass skos:Concept ; - ont:endpointTemplate "/v/vocab/$parent_1/top-concepts" ; -. - -endpoint:cs-children - a ont:ListingEndpoint ; - ont:deliversClasses skos:Concept ; - rdfs:comment """The concepts one level under top concepts. This query demonstrates how pagination of 2 items at a - time could work, the LIMIT is set to 3 such that we can determine if there are further (i.e. >2) objects available.""" ; - sh:rule - [ - sh:object "?hasChildren" ; - sh:predicate prez:hasChildren ; - sh:subject sh:this ; - ] ; - sh:target - [ - sh:select """SELECT DISTINCT ?focus_node ?hasChildren - WHERE { - $parent_1 skos:narrower|^skos:broader ?focus_node . - BIND(EXISTS{?focus_node skos:narrower|^skos:broader ?grandChildren} AS ?hasChildren) - } - """ ; - ] ; - shext:limit 20 ; - shext:offset 0 ; - shext:orderBy [ sh:path skos:prefLabel ] ; - sh:targetClass skos:Concept ; - ont:endpointTemplate "/v/vocabs/$parent_2/$parent_1/narrowers" ; -. diff --git a/prez/reference_data/profiles/ogc_records_profile.ttl b/prez/reference_data/profiles/ogc_records_profile.ttl index aa9e26b8..0b14c12e 100755 --- a/prez/reference_data/profiles/ogc_records_profile.ttl +++ b/prez/reference_data/profiles/ogc_records_profile.ttl @@ -29,8 +29,8 @@ prez:OGCRecordsProfile altr-ext:hasDefaultProfile prez:OGCListingProfile ] , [ a sh:NodeShape ; - sh:targetClass prez:SchemesList ; - altr-ext:hasDefaultProfile skos:ConceptScheme + sh:targetClass skos:ConceptScheme ; + altr-ext:hasDefaultProfile prez:OGCSchemesListProfile ] , [ a sh:NodeShape ; sh:targetClass dcat:Catalog , skos:ConceptScheme , skos:Concept , geo:Feature , geo:FeatureCollection , skos:Collection ; diff --git a/prez/renderers/json_renderer.py b/prez/renderers/json_renderer.py index 602c15f9..a0492dfc 100755 --- a/prez/renderers/json_renderer.py +++ b/prez/renderers/json_renderer.py @@ -1,11 +1,8 @@ -from itertools import chain - -from rdflib import Graph, URIRef, RDF, SH, Literal +from rdflib import Graph, URIRef, RDF, SH from rdflib.term import Node from prez.cache import profiles_graph_cache from prez.reference_data.prez_ns import ALTREXT -from prez.sparql.objects_listings import get_listing_predicates class NotFoundError(Exception): @@ -29,11 +26,11 @@ def _get_label_predicates(profile_graph: Graph, profile: URIRef) -> list[Node]: def _get_child_iris( - graph: Graph, - iri: Node, - child_to_focus_predicates: list[Node], - parent_to_focus_predicates: list[Node], - focus_to_child_predicates: list[Node], + graph: Graph, + iri: Node, + child_to_focus_predicates: list[Node], + parent_to_focus_predicates: list[Node], + focus_to_child_predicates: list[Node], ) -> list[Node]: children = [] for predicate in child_to_focus_predicates: @@ -55,7 +52,7 @@ def _get_child_iris( def create_graph_item( - iri: str, predicates: list[Node], graph: Graph, context: dict + iri: str, predicates: list[Node], graph: Graph, context: dict ) -> tuple[dict, dict]: item = {"iri": iri} for predicate in predicates: @@ -68,9 +65,9 @@ def create_graph_item( async def render_json_dropdown( - graph: Graph, - profile: URIRef, - selected_class: URIRef, + graph: Graph, + profile: URIRef, + selected_class: URIRef, ) -> dict: profile_graph = profiles_graph_cache.cbd(profile) @@ -90,9 +87,9 @@ async def render_json_dropdown( ) = get_listing_predicates(profile, selected_class) if ( - not child_to_focus_predicates - and not focus_to_parent_predicates - and not focus_to_child_predicates + not child_to_focus_predicates + and not focus_to_parent_predicates + and not focus_to_child_predicates ): # This is a listing view, e.g. /v/vocab. node_shape = profile_graph.value( diff --git a/prez/renderers/renderer.py b/prez/renderers/renderer.py index f03ecd74..8f87c850 100755 --- a/prez/renderers/renderer.py +++ b/prez/renderers/renderer.py @@ -11,8 +11,8 @@ from prez.renderers.csv_renderer import render_csv_dropdown from prez.renderers.json_renderer import render_json_dropdown, NotFoundError from prez.services.curie_functions import get_curie_id_for_uri -from prez.sparql.methods import Repo -from prez.sparql.objects_listings import ( +from prez.repositories import Repo +from prez.services.annotations import ( get_annotation_properties, ) diff --git a/prez/repositories/__init__.py b/prez/repositories/__init__.py new file mode 100644 index 00000000..b2acc6b2 --- /dev/null +++ b/prez/repositories/__init__.py @@ -0,0 +1,6 @@ +from .base import Repo +from .oxrdflib import OxrdflibRepo +from .pyoxigraph import PyoxigraphRepo +from .remote_sparql import RemoteSparqlRepo + +__all__ = ["Repo", "OxrdflibRepo", "PyoxigraphRepo", "RemoteSparqlRepo"] \ No newline at end of file diff --git a/prez/repositories/base.py b/prez/repositories/base.py new file mode 100755 index 00000000..a421d903 --- /dev/null +++ b/prez/repositories/base.py @@ -0,0 +1,61 @@ +import asyncio +import logging +import time +from abc import ABC, abstractmethod +from typing import List +from typing import Tuple +from urllib.parse import quote_plus + +import httpx +import pyoxigraph +from fastapi.concurrency import run_in_threadpool +from rdflib import Namespace, Graph, URIRef, Literal, BNode + +from prez.config import settings + +PREZ = Namespace("https://prez.dev/") + +log = logging.getLogger(__name__) + + +class Repo(ABC): + @abstractmethod + async def rdf_query_to_graph(self, query: str): + pass + + @abstractmethod + async def tabular_query_to_table(self, query: str, context: URIRef = None): + pass + + async def send_queries( + self, rdf_queries: List[str], tabular_queries: List[Tuple[URIRef, str]] = None + ) -> Tuple[Graph, List]: + # Common logic to send both query types in parallel + results = await asyncio.gather( + *[self.rdf_query_to_graph(query) for query in rdf_queries if query], + *[ + self.tabular_query_to_table(query, context) + for context, query in tabular_queries + if query + ], + ) + g = Graph() + tabular_results = [] + for result in results: + if isinstance(result, Graph): + g += result + else: + tabular_results.append(result) + return g, tabular_results + + @abstractmethod + def sparql( + self, query: str, raw_headers: list[tuple[bytes, bytes]], method: str = "GET" + ): + pass + + + + + + diff --git a/prez/repositories/oxrdflib.py b/prez/repositories/oxrdflib.py new file mode 100644 index 00000000..65d8cfd5 --- /dev/null +++ b/prez/repositories/oxrdflib.py @@ -0,0 +1,44 @@ +import logging + +from fastapi.concurrency import run_in_threadpool +from rdflib import Namespace, Graph, URIRef, Literal, BNode + +from prez.repositories.base import Repo + +PREZ = Namespace("https://prez.dev/") + +log = logging.getLogger(__name__) + + +class OxrdflibRepo(Repo): + def __init__(self, oxrdflib_graph: Graph): + self.oxrdflib_graph = oxrdflib_graph + + def _sync_rdf_query_to_graph(self, query: str) -> Graph: + results = self.oxrdflib_graph.query(query) + return results.graph + + def _sync_tabular_query_to_table(self, query: str, context: URIRef = None): + results = self.oxrdflib_graph.query(query) + reformatted_results = [] + for result in results: + reformatted_result = {} + for var in results.vars: + binding = result[var] + if binding: + str_type = self._str_type_for_rdflib_type(binding) + reformatted_result[str(var)] = {"type": str_type, "value": binding} + reformatted_results.append(reformatted_result) + return context, reformatted_results + + async def rdf_query_to_graph(self, query: str) -> Graph: + return await run_in_threadpool(self._sync_rdf_query_to_graph, query) + + async def tabular_query_to_table(self, query: str, context: URIRef = None): + return await run_in_threadpool( + self._sync_tabular_query_to_table, query, context + ) + + def _str_type_for_rdflib_type(self, instance): + map = {URIRef: "uri", BNode: "bnode", Literal: "literal"} + return map[type(instance)] diff --git a/prez/repositories/pyoxigraph.py b/prez/repositories/pyoxigraph.py new file mode 100644 index 00000000..514f2baf --- /dev/null +++ b/prez/repositories/pyoxigraph.py @@ -0,0 +1,100 @@ +import logging + +from fastapi.concurrency import run_in_threadpool +from rdflib import Namespace, Graph, URIRef, Literal + +import pyoxigraph +from prez.repositories.base import Repo + +PREZ = Namespace("https://prez.dev/") + +log = logging.getLogger(__name__) + + +class PyoxigraphRepo(Repo): + def __init__(self, pyoxi_store: pyoxigraph.Store): + self.pyoxi_store = pyoxi_store + + def _handle_query_solution_results( + self, results: pyoxigraph.QuerySolutions + ) -> dict: + """Organise the query results into format serializable by FastAPIs JSONResponse.""" + variables = results.variables + results_dict = {"head": {"vars": [v.value for v in results.variables]}} + results_list = [] + for result in results: + result_dict = {} + for var in variables: + binding = result[var] + if binding: + binding_type = self._pyoxi_result_type(binding) + result_dict[str(var)[1:]] = { + "type": binding_type, + "value": binding.value, + } + results_list.append(result_dict) + results_dict["results"] = {"bindings": results_list} + return results_dict + + @staticmethod + def _handle_query_triples_results(results: pyoxigraph.QueryTriples) -> Graph: + """Parse the query results into a Graph object.""" + ntriples = " .\n".join([str(r) for r in list(results)]) + " ." + g = Graph() + g.bind("prez", URIRef("https://prez.dev/")) + if ntriples == " .": + return g + return g.parse(data=ntriples, format="ntriples") + + def _sync_rdf_query_to_graph(self, query: str) -> Graph: + try: + results = self.pyoxi_store.query(query) + except Exception as e: + print(e) + result_graph = self._handle_query_triples_results(results) + return result_graph + + def _sync_tabular_query_to_table(self, query: str, context: URIRef = None) -> tuple: + results = self.pyoxi_store.query(query) + results_dict = self._handle_query_solution_results(results) + # only return the bindings from the results. + return context, results_dict["results"]["bindings"] + + def _sparql(self, query: str) -> dict | Graph | bool: + """Submit a sparql query to the pyoxigraph store and return the formatted results.""" + results = self.pyoxi_store.query(query) + if isinstance(results, pyoxigraph.QuerySolutions): # a SELECT query result + results_dict = self._handle_query_solution_results(results) + return results_dict + elif isinstance(results, pyoxigraph.QueryTriples): # a CONSTRUCT query result + result_graph = self._handle_query_triples_results(results) + return result_graph + elif isinstance(results, bool): + results_dict = {"head": {}, "boolean": results} + return results_dict + else: + raise TypeError(f"Unexpected result class {type(results)}") + + async def rdf_query_to_graph(self, query: str) -> Graph: + return await run_in_threadpool(self._sync_rdf_query_to_graph, query) + + async def tabular_query_to_table(self, query: str, context: URIRef = None) -> list: + return await run_in_threadpool( + self._sync_tabular_query_to_table, query, context + ) + + async def sparql( + self, query: str, raw_headers: list[tuple[bytes, bytes]], method: str = "" + ) -> list | Graph | bool: + return self._sparql(query) + + @staticmethod + def _pyoxi_result_type(term) -> str: + if isinstance(term, pyoxigraph.Literal): + return "literal" + elif isinstance(term, pyoxigraph.NamedNode): + return "uri" + elif isinstance(term, pyoxigraph.BlankNode): + return "bnode" + else: + raise ValueError(f"Unknown type: {type(term)}") diff --git a/prez/repositories/remote_sparql.py b/prez/repositories/remote_sparql.py new file mode 100644 index 00000000..4dd48732 --- /dev/null +++ b/prez/repositories/remote_sparql.py @@ -0,0 +1,70 @@ +import logging + +import httpx +from rdflib import Namespace, Graph, URIRef + +from prez.config import settings +from prez.repositories.base import Repo + +PREZ = Namespace("https://prez.dev/") + +log = logging.getLogger(__name__) + + +class RemoteSparqlRepo(Repo): + def __init__(self, async_client: httpx.AsyncClient): + self.async_client = async_client + + async def _send_query(self, query: str, mediatype="text/turtle"): + """Sends a SPARQL query asynchronously. + Args: query: str: A SPARQL query to be sent asynchronously. + Returns: httpx.Response: A httpx.Response object + """ + query_rq = self.async_client.build_request( + "POST", + url=settings.sparql_endpoint, + headers={"Accept": mediatype}, + data={"query": query}, + ) + response = await self.async_client.send(query_rq, stream=True) + return response + + async def rdf_query_to_graph(self, query: str) -> Graph: + """ + Sends a SPARQL query asynchronously and parses the response into an RDFLib Graph. + Args: query: str: A SPARQL query to be sent asynchronously. + Returns: rdflib.Graph: An RDFLib Graph object + """ + response = await self._send_query(query) + g = Graph() + await response.aread() + return g.parse(data=response.text, format="turtle") + + async def tabular_query_to_table(self, query: str, context: URIRef = None): + """ + Sends a SPARQL query asynchronously and parses the response into a table format. + The optional context parameter allows an identifier to be supplied with the query, such that multiple results can be + distinguished from each other. + """ + response = await self._send_query(query, "application/sparql-results+json") + await response.aread() + return context, response.json()["results"]["bindings"] + + async def sparql( + self, query: str, raw_headers: list[tuple[bytes, bytes]], method: str = "GET" + ): + """Sends a starlette Request object (containing a SPARQL query in the URL parameters) to a proxied SPARQL + endpoint.""" + # TODO: This only supports SPARQL GET requests because the query is sent as a query parameter. + + query_escaped_as_bytes = f"query={quote_plus(query)}".encode("utf-8") + + # TODO: Global app settings should be passed in as a function argument. + url = httpx.URL(url=settings.sparql_endpoint, query=query_escaped_as_bytes) + headers = [] + for header in raw_headers: + if header[0] != b"host": + headers.append(header) + headers.append((b"host", str(url.host).encode("utf-8"))) + rp_req = self.async_client.build_request(method, url, headers=headers) + return await self.async_client.send(rp_req, stream=True) diff --git a/prez/routers/cql.py b/prez/routers/cql.py index 00505ea8..7dcb9a2b 100755 --- a/prez/routers/cql.py +++ b/prez/routers/cql.py @@ -1,8 +1,10 @@ from typing import Optional from fastapi import APIRouter, Request, Depends -from rdflib import URIRef +from rdflib import Namespace +from rdflib.namespace import URIRef +from prez.reference_data.prez_ns import PREZ from prez.dependencies import ( get_repo, cql_post_parser_dependency, @@ -10,22 +12,24 @@ cql_get_parser_dependency, ) from prez.services.listings import listing_function -from prez.sparql.methods import Repo +from prez.repositories import Repo router = APIRouter(tags=["ogcrecords"]) +OGCE = Namespace(PREZ["endpoint/extended-ogc-records/"]) + @router.post( path="/cql", - name="https://prez.dev/endpoint/cql/post", + name=OGCE["cql-post"], ) async def cql_post_endpoint( - request: Request, - cql_parser: Optional[dict] = Depends(cql_post_parser_dependency), - page: int = 1, - per_page: int = 20, - repo: Repo = Depends(get_repo), - system_repo: Repo = Depends(get_system_repo), + request: Request, + cql_parser: Optional[dict] = Depends(cql_post_parser_dependency), + page: int = 1, + per_page: int = 20, + repo: Repo = Depends(get_repo), + system_repo: Repo = Depends(get_system_repo), ): endpoint_uri = URIRef("https://prez.dev/endpoint/cql/post") return await listing_function( @@ -41,15 +45,15 @@ async def cql_post_endpoint( @router.get( path="/cql", - name="https://prez.dev/endpoint/cql/get", + name=OGCE["cql-get"], ) async def cql_get_endpoint( - request: Request, - cql_parser: Optional[dict] = Depends(cql_get_parser_dependency), - page: int = 1, - per_page: int = 20, - repo: Repo = Depends(get_repo), - system_repo: Repo = Depends(get_system_repo), + request: Request, + cql_parser: Optional[dict] = Depends(cql_get_parser_dependency), + page: int = 1, + per_page: int = 20, + repo: Repo = Depends(get_repo), + system_repo: Repo = Depends(get_system_repo), ): endpoint_uri = URIRef("https://prez.dev/endpoint/cql/get") return await listing_function( @@ -65,22 +69,23 @@ async def cql_get_endpoint( @router.get( path="/queryables", - name="https://prez.dev/endpoint/cql/queryables", + name=OGCE["cql-queryables"], ) async def queryables_endpoint( - request: Request, - cql_parser: Optional[dict] = Depends(cql_get_parser_dependency), - page: int = 1, - per_page: int = 20, - repo: Repo = Depends(get_repo), - system_repo: Repo = Depends(get_system_repo), + request: Request, + cql_parser: Optional[dict] = Depends(cql_get_parser_dependency), + page: int = 1, + per_page: int = 20, + repo: Repo = Depends(get_repo), + system_repo: Repo = Depends(get_system_repo), ): - endpoint_uri = URIRef("https://prez.dev/endpoint/cql/queryables") + endpoint_uri = URIRef(request.scope.get("route").name) return await listing_function( request=request, repo=repo, system_repo=system_repo, endpoint_uri=endpoint_uri, + hierarchy_level=1, page=page, per_page=per_page, cql_parser=cql_parser, diff --git a/prez/routers/ogc_router.py b/prez/routers/ogc_router.py index 5c52a366..52638976 100755 --- a/prez/routers/ogc_router.py +++ b/prez/routers/ogc_router.py @@ -8,7 +8,7 @@ from prez.services.curie_functions import get_uri_for_curie_id from prez.services.listings import listing_function from prez.services.objects import object_function -from prez.sparql.methods import Repo +from prez.repositories import Repo from prez.reference_data.prez_ns import PREZ from temp.grammar import IRI diff --git a/prez/routers/search.py b/prez/routers/search.py index a00b20ab..ff6930ab 100755 --- a/prez/routers/search.py +++ b/prez/routers/search.py @@ -9,8 +9,8 @@ from prez.reference_data.prez_ns import PREZ from prez.renderers.renderer import return_from_graph from prez.services.link_generation import add_prez_links -from prez.sparql.methods import Repo -from prez.sparql.search_query import SearchQuery +from prez.repositories import Repo +from prez.services.query_generation.search import SearchQuery router = APIRouter(tags=["Search"]) diff --git a/prez/routers/sparql.py b/prez/routers/sparql.py index eb788584..643cb67f 100755 --- a/prez/routers/sparql.py +++ b/prez/routers/sparql.py @@ -11,7 +11,7 @@ from prez.dependencies import get_repo, get_system_repo from prez.models.profiles_and_mediatypes import ProfilesMediatypesInfo, populate_profile_and_mediatype from prez.renderers.renderer import return_annotated_rdf -from prez.sparql.methods import Repo +from prez.repositories import Repo PREZ = Namespace("https://prez.dev/") diff --git a/prez/services/annotations.py b/prez/services/annotations.py new file mode 100755 index 00000000..271c0adf --- /dev/null +++ b/prez/services/annotations.py @@ -0,0 +1,148 @@ +import logging +from itertools import chain +from textwrap import dedent +from typing import List, Tuple + +from rdflib import Graph, URIRef, Namespace, Literal + +from prez.cache import tbox_cache +from prez.config import settings +from prez.services.curie_functions import get_uri_for_curie_id + +log = logging.getLogger(__name__) + +ALTREXT = Namespace("http://www.w3.org/ns/dx/conneg/altr-ext#") +PREZ = Namespace("https://prez.dev/") + + +async def get_annotation_properties( + item_graph: Graph, +): + """ + Gets annotation data used for HTML display. + This includes the label, description, and provenance, if available. + Note the following three default predicates are always included. This allows context, i.e. background ontologies, + which are often diverse in the predicates they use, to be aligned with the default predicates used by Prez. The full + range of predicates used can be manually included via profiles. + """ + label_predicates = settings.label_predicates + description_predicates = settings.description_predicates + explanation_predicates = settings.provenance_predicates + other_predicates = settings.other_predicates + terms = ( + set(i for i in item_graph.predicates() if isinstance(i, URIRef)) + | set(i for i in item_graph.objects() if isinstance(i, URIRef)) + | set(i for i in item_graph.subjects() if isinstance(i, URIRef)) + ) + # TODO confirm caching of SUBJECT labels does not cause issues! this could be a lot of labels. Perhaps these are + # better separated and put in an LRU cache. Or it may not be worth the effort. + if not terms: + return None, Graph() + # read labels from the tbox cache, this should be the majority of labels + uncached_terms, labels_g = get_annotations_from_tbox_cache( + terms, + label_predicates, + description_predicates, + explanation_predicates, + other_predicates, + ) + + def other_predicates_statement(other_predicates, uncached_terms_other): + return f"""UNION + {{ + ?unannotated_term ?other_prop ?other . + VALUES ?other_prop {{ {" ".join('<' + str(pred) + '>' for pred in other_predicates)} }} + VALUES ?unannotated_term {{ {" ".join('<' + str(term) + '>' for term in uncached_terms_other)} + }} + }}""" + + queries_for_uncached = f"""CONSTRUCT {{ + ?unlabeled_term ?label_prop ?label . + ?undescribed_term ?desc_prop ?description . + ?unexplained_term ?expl_prop ?explanation . + ?unannotated_term ?other_prop ?other . + }} + WHERE {{ + {{ + ?unlabeled_term ?label_prop ?label . + VALUES ?label_prop {{ {" ".join('<' + str(pred) + '>' for pred in label_predicates)} }} + VALUES ?unlabeled_term {{ {" ".join('<' + str(term) + '>' for term in uncached_terms["labels"])} }} + FILTER(lang(?label) = "" || lang(?label) = "en" || lang(?label) = "en-AU") + }} + UNION + {{ + ?undescribed_term ?desc_prop ?description . + VALUES ?desc_prop {{ {" ".join('<' + str(pred) + '>' for pred in description_predicates)} }} + VALUES ?undescribed_term {{ {" ".join('<' + str(term) + '>' for term in uncached_terms["descriptions"])} + }} + }} + UNION + {{ + ?unexplained_term ?expl_prop ?explanation . + VALUES ?expl_prop {{ {" ".join('<' + str(pred) + '>' for pred in explanation_predicates)} }} + VALUES ?unexplained_term {{ {" ".join('<' + str(term) + '>' for term in uncached_terms["provenance"])} + }} + }} + {other_predicates_statement(other_predicates, uncached_terms["other"]) if other_predicates else ""} + }}""" + return queries_for_uncached, labels_g + + +def get_annotations_from_tbox_cache( + terms: List[URIRef], label_props, description_props, explanation_props, other_props +): + """ + Gets labels from the TBox cache, returns a list of terms that were not found in the cache, and a graph of labels, + descriptions, and explanations + """ + labels_from_cache = Graph(bind_namespaces="rdflib") + terms_list = list(terms) + props_from_cache = { + "labels": list( + chain( + *( + tbox_cache.triples_choices((terms_list, prop, None)) + for prop in label_props + ) + ) + ), + "descriptions": list( + chain( + *( + tbox_cache.triples_choices((terms_list, prop, None)) + for prop in description_props + ) + ) + ), + "provenance": list( + chain( + *( + tbox_cache.triples_choices((terms_list, prop, None)) + for prop in explanation_props + ) + ) + ), + "other": list( + chain( + *( + tbox_cache.triples_choices((terms_list, prop, None)) + for prop in other_props + ) + ) + ), + } + # get all the annotations we can from the cache + all = list(chain(*props_from_cache.values())) + default_language = settings.default_language + for triple in all: + if isinstance(triple[2], Literal): + if triple[2].language == default_language: + labels_from_cache.add(triple) + elif triple[2].language is None: + labels_from_cache.add(triple) + # the remaining terms are not in the cache; we need to query the SPARQL endpoint to attempt to get them + uncached_props = { + k: list(set(terms) - set(triple[0] for triple in v)) + for k, v in props_from_cache.items() + } + return uncached_props, labels_from_cache diff --git a/prez/services/app_service.py b/prez/services/app_service.py index 249cd5dc..04a155a1 100755 --- a/prez/services/app_service.py +++ b/prez/services/app_service.py @@ -3,21 +3,20 @@ from pathlib import Path import httpx -from rdflib import URIRef, Literal, BNode, RDF, Graph, RDFS, DCTERMS, SDO, SKOS, Dataset +from rdflib import URIRef, Literal, Graph, RDFS, DCTERMS, SDO, SKOS, Dataset from prez.cache import ( prez_system_graph, - profiles_graph_cache, counts_graph, prefix_graph, endpoints_graph_cache, tbox_cache, ) from prez.config import settings -from prez.reference_data.prez_ns import PREZ, ALTREXT +from prez.reference_data.prez_ns import PREZ from prez.services.curie_functions import get_curie_id_for_uri -from prez.sparql.methods import Repo -from prez.sparql.objects_listings import startup_count_objects +from prez.repositories import Repo +from prez.services.query_generation.count import startup_count_objects log = logging.getLogger(__name__) diff --git a/prez/services/cql_search.py b/prez/services/cql_search.py deleted file mode 100755 index 17be1512..00000000 --- a/prez/services/cql_search.py +++ /dev/null @@ -1,178 +0,0 @@ -import re -from typing import Tuple - -from fastapi import HTTPException - - -class CQLSearch(object): - from prez.config import settings - - def __init__(self, cql_query: str, sparql_query: str) -> None: - self.cql_query = cql_query - self.sparql_query = sparql_query - - def _check_prop_exists(self, prop: str) -> bool: - return prop in settings.cql_props.keys() - - def _check_type(self, prop: str, val: str) -> bool: - prop_type = settings.cql_props[prop].get("type") - if prop_type is not None: - correct_type = False - match prop_type: - case "integer": - if re.match(r"(-|\+)?\d+", val): - correct_type = True - case "float": - if re.match(r"(-|\+)?\d+\.\d+", val): - correct_type = True - case "string": - if re.match(r'".+"', val): - correct_type = True - case _: # invalid prop type? - pass - return correct_type - else: - return True - - def _parse_eq_ops(self, f: str) -> str: - # validate - exps = re.findall( - r'(\w+)\s?(<>|<=|>=|=|<|>)\s?(".+"|\d+(?:\.\d+)?)', f, flags=re.IGNORECASE - ) - for prop, op, val in exps: - if not self._check_prop_exists(prop): - raise HTTPException( - status_code=400, - detail=f"{prop} is not a valid property. Please consult /queryables for the list of available properties.", - ) - if not self._check_type(prop, val): - raise HTTPException( - status_code=400, - detail=f"Invalid type for the property {prop}, which is of type {settings.cql_props[prop].get('type')}", - ) - - # string replace - return re.sub( - r'(\w+)\s?(<>|<=|>=|=|<|>)\s?(".+"|\d+(?:\.\d+)?)', - lambda x: f'?{x.group(1)} {"!=" if x.group(2) == "<>" else x.group(2)} {x.group(3)}', - f, - flags=re.IGNORECASE, - ) - - def _parse_between(self, f: str) -> str: - # validate - exps = re.findall( - r'(\w+) between (".+"|\d+(?:\.\d+)?) and (".+"|\d+(?:\.\d+)?)', - f, - flags=re.IGNORECASE, - ) - for prop, val1, val2 in exps: - if not self._check_prop_exists(prop): - raise HTTPException( - status_code=400, - detail=f"{prop} is not a valid property. Please consult /queryables for the list of available properties.", - ) - if not self._check_type(prop, val1) or not self._check_type(prop, val2): - raise HTTPException( - status_code=400, - detail=f"Invalid type for the property {prop}, which is of type {settings.cql_props[prop].get('type')}", - ) - - # string replace - return re.sub( - r'(\w+) between (".+"|\d+(?:\.\d+)?) and (".+"|\d+(?:\.\d+)?)', - r"(?\1 >= \2 && ?\1 <= \3)", - f, - flags=re.IGNORECASE, - ) - - def _parse_or(self, f: str) -> str: - return re.sub(r" or ", r" || ", f, flags=re.IGNORECASE) - - def _parse_and(self, f: str) -> str: - return re.sub(r" and ", r" && ", f, flags=re.IGNORECASE) - - def _parse_like(self, f: str) -> str: - # validate - exps = re.findall(r'(\w+) like (".+")', f, flags=re.IGNORECASE) - for prop, val in exps: - if not self._check_prop_exists(prop): - raise HTTPException( - status_code=400, - detail=f"{prop} is not a valid property. Please consult /queryables for the list of available properties.", - ) - if not self._check_type(prop, val): - raise HTTPException( - status_code=400, - detail=f"Invalid type for the property {prop}, which is of type {settings.cql_props[prop].get('type')}", - ) - - # string replace - return re.sub( - r'(\w+) like (".+")', r'regex(?\1, \2, "i" )', f, flags=re.IGNORECASE - ) - - def _parse_is(self, f: str) -> str: - return re.sub( - r"(\w+) is (not )?null", - # no longer using FILTER(EXISTS {?f qname ?prop}), which is in the spec - https://opengeospatial.github.io/ogc-geosparql/geosparql11/spec.html#_f_2_4_comparison_predicates - lambda x: f'{"!" if x.group(2) is None else ""}BOUND(?{x.group(1)})', - f, - flags=re.IGNORECASE, - ) - - def _parse_in(self, f: str) -> str: - # validate - exps = re.findall( - r'(\w+) (in) (\((?:(?:".+"|\d+),\s?)*(?:".+"|\d+)\))', - f, - flags=re.IGNORECASE, - ) - for prop, op, val in exps: - if not self._check_prop_exists(prop): - raise HTTPException( - status_code=400, - detail=f"{prop} is not a valid property. Please consult /queryables for the list of available properties.", - ) - for element in val.strip("()").split(","): - if not self._check_type(prop, element.strip()): - raise HTTPException( - status_code=400, - detail=f"Invalid type for the property {prop}, which is of type {settings.cql_props[prop].get('type')}", - ) - - # string replace - return re.sub( - r'(\w+) (in) (\((?:(?:".+"|\d+),\s?)*(?:".+"|\d+)\))', - r"?\1 \2 \3", - f, - flags=re.IGNORECASE, - ) - - def generate_query(self) -> Tuple[str, str, str]: - self.dataset_query = "" - - if self.datasets != "": - self.dataset_query = f""" - VALUES ?d_id {{{" ".join([f'"{d.strip()}"^^prez:slug' for d in self.datasets.split(',')])}}} - """ - - self.collection_query = "" - - if self.collections != "": - self.collection_query = f""" - VALUES ?coll_id {{{" ".join([f'"{coll.strip()}"^^prez:slug' for coll in self.collections.split(',')])}}} - """ - - # TODO run regex at once, then separately parse components - if self.filter != "": - self.filter = self._parse_eq_ops(self.filter) - self.filter = self._parse_between(self.filter) - self.filter = self._parse_or(self.filter) - self.filter = self._parse_and(self.filter) - self.filter = self._parse_like(self.filter) - self.filter = self._parse_is(self.filter) - self.filter = self._parse_in(self.filter) - - self.filter = f"FILTER({self.filter})" - return self.filter diff --git a/prez/services/generate_profiles.py b/prez/services/generate_profiles.py index 067b0678..8cf93ef4 100755 --- a/prez/services/generate_profiles.py +++ b/prez/services/generate_profiles.py @@ -5,12 +5,11 @@ from rdflib import Graph, URIRef, RDF, PROF, Literal from prez.cache import profiles_graph_cache, prefix_graph -from prez.config import settings from prez.models.model_exceptions import NoProfilesException from prez.reference_data.prez_ns import PREZ from prez.services.curie_functions import get_curie_id_for_uri -from prez.sparql.methods import Repo -from prez.sparql.objects_listings import select_profile_mediatype +from prez.repositories import Repo +from prez.services.query_generation.connegp import select_profile_mediatype log = logging.getLogger(__name__) @@ -66,7 +65,6 @@ async def create_profiles_graph(repo) -> Graph: _add_prez_profile_links() -# @lru_cache(maxsize=128) async def get_profiles_and_mediatypes( classes: FrozenSet[URIRef], system_repo: Repo, @@ -189,4 +187,3 @@ def _add_prez_profile_links(): Literal(f"/profiles/{get_curie_id_for_uri(profile)}"), ) ) - # profiles_graph_cache.__iadd__(g) diff --git a/prez/services/link_generation.py b/prez/services/link_generation.py index c365e0b4..1d89b4dc 100755 --- a/prez/services/link_generation.py +++ b/prez/services/link_generation.py @@ -6,13 +6,12 @@ from prez.cache import endpoints_graph_cache, links_ids_graph_cache from prez.config import settings -from prez.reference_data.prez_ns import ONT from prez.reference_data.prez_ns import PREZ from prez.services.curie_functions import get_curie_id_for_uri -from prez.services.model_methods import get_classes -from prez.sparql.methods import Repo +from prez.services.query_generation.classes import get_classes +from prez.repositories import Repo from temp.grammar import * -from temp.shacl_node_selection import NodeShape +from prez.services.query_generation.shacl_node_selection import NodeShape log = logging.getLogger(__name__) @@ -29,7 +28,8 @@ async def add_prez_links(graph: Graph, repo: Repo, endpoint_structure): await _link_generation(uri, repo, klasses, graph, endpoint_structure) -async def _link_generation(uri: URIRef, repo: Repo, klasses, graph: Graph, endpoint_structure: str = settings.endpoint_structure): +async def _link_generation(uri: URIRef, repo: Repo, klasses, graph: Graph, + endpoint_structure: str = settings.endpoint_structure): # check the cache quads = list( links_ids_graph_cache.quads((None, None, None, uri)) @@ -45,18 +45,21 @@ async def _link_generation(uri: URIRef, repo: Repo, klasses, graph: Graph, endpo # run queries for available nodeshapes to get link components for ns in available_nodeshapes: if int(ns.hierarchy_level) > 1: - results = await get_link_components(available_nodeshapes, repo) + results = await get_link_components(ns, repo) for result in results: # if the list at tuple[1] > 0 then there's some result and a link should be generated. # NB for top level links, there will be a result (the graph pattern matched) BUT the result will not form # part of the link. e.g. ?path_node_1 will have result(s) but is not part of the link. for solution in result[1]: # create link strings - curie_for_uri, members_link, object_link = await create_link_strings(ns.hierarchy_level, solution, uri, endpoint_structure) + curie_for_uri, members_link, object_link = await create_link_strings(ns.hierarchy_level, + solution, uri, + endpoint_structure) # add links and identifiers to graph and cache await add_links_to_graph_and_cache(curie_for_uri, graph, members_link, object_link, uri) else: - curie_for_uri, members_link, object_link = await create_link_strings(ns.hierarchy_level, {}, uri, endpoint_structure) + curie_for_uri, members_link, object_link = await create_link_strings(ns.hierarchy_level, {}, uri, + endpoint_structure) await add_links_to_graph_and_cache(curie_for_uri, graph, members_link, object_link, uri) @@ -116,27 +119,26 @@ async def create_link_strings(hierarchy_level, solution, uri, endpoint_structure return curie_for_uri, members_link, object_link -async def get_link_components(available_nodeshapes, repo): +async def get_link_components(ns, repo): link_queries = [] - for ns in available_nodeshapes: - link_queries.append( - ( - ns.uri, - "".join(SubSelect( - select_clause=SelectClause( - variables_or_all=ns.path_nodes.values()), - where_clause=WhereClause( - group_graph_pattern=GroupGraphPattern( - content=GroupGraphPatternSub( - triples_block=TriplesBlock( - triples=ns.triples_list - ), - graph_patterns_or_triples_blocks=ns.gpnt_list - ) + link_queries.append( + ( + ns.uri, + "".join(SubSelect( + select_clause=SelectClause( + variables_or_all=ns.path_nodes.values()), + where_clause=WhereClause( + group_graph_pattern=GroupGraphPattern( + content=GroupGraphPatternSub( + triples_block=TriplesBlock( + triples=ns.triples_list + ), + graph_patterns_or_triples_blocks=ns.gpnt_list ) ) - ).render()) - ) + ) + ).render()) ) + ) _, results = await repo.send_queries([], link_queries) return results diff --git a/prez/services/listings.py b/prez/services/listings.py index 6f3b5f7c..dbc0022e 100755 --- a/prez/services/listings.py +++ b/prez/services/listings.py @@ -14,15 +14,15 @@ from prez.reference_data.prez_ns import PREZ from prez.renderers.renderer import return_from_graph from prez.services.link_generation import add_prez_links -from prez.services.model_methods import get_classes -from prez.sparql.count_query import CountQuery -from prez.sparql.methods import Repo -from prez.sparql.search_query import SearchQuery +from prez.services.query_generation.classes import get_classes +from prez.services.query_generation.count import CountQuery +from prez.repositories import Repo +from prez.services.query_generation.search import SearchQuery from temp.grammar import * # from rdframe.grammar import SubSelect # from rdframe import PrezQueryConstructor -from temp.shacl2sparql import PrezQueryConstructor -from temp.shacl_node_selection import NodeShape +from prez.services.query_generation.umbrella import PrezQueryConstructor +from prez.services.query_generation.shacl_node_selection import NodeShape log = logging.getLogger(__name__) diff --git a/prez/services/objects.py b/prez/services/objects.py index b6807c47..714a8c6e 100755 --- a/prez/services/objects.py +++ b/prez/services/objects.py @@ -11,10 +11,10 @@ from prez.reference_data.prez_ns import EP from prez.renderers.renderer import return_from_graph from prez.services.link_generation import add_prez_links -from prez.services.model_methods import get_classes -from prez.sparql.methods import Repo +from prez.services.query_generation.classes import get_classes +from prez.repositories import Repo from temp.grammar import IRI -from temp.shacl2sparql import PrezQueryConstructor +from prez.services.query_generation.umbrella import PrezQueryConstructor log = logging.getLogger(__name__) diff --git a/prez/services/model_methods.py b/prez/services/query_generation/classes.py similarity index 93% rename from prez/services/model_methods.py rename to prez/services/query_generation/classes.py index e32d2e90..08fc3e60 100755 --- a/prez/services/model_methods.py +++ b/prez/services/query_generation/classes.py @@ -1,8 +1,7 @@ import logging from rdflib import URIRef -from prez.cache import endpoints_graph_cache -from prez.sparql.methods import Repo +from prez.repositories import Repo log = logging.getLogger(__name__) diff --git a/prez/services/query_generation/connegp.py b/prez/services/query_generation/connegp.py new file mode 100644 index 00000000..911aee4d --- /dev/null +++ b/prez/services/query_generation/connegp.py @@ -0,0 +1,123 @@ +import logging +from textwrap import dedent +from typing import List, Tuple + +from rdflib import URIRef, Namespace + +from prez.services.curie_functions import get_uri_for_curie_id + +log = logging.getLogger(__name__) + +ALTREXT = Namespace("http://www.w3.org/ns/dx/conneg/altr-ext#") +PREZ = Namespace("https://prez.dev/") + + +def select_profile_mediatype( + classes: List[URIRef], + requested_profile_uri: URIRef = None, + requested_profile_token: str = None, + requested_mediatypes: List[Tuple] = None, + listing: bool = False, +): + """ + Returns a SPARQL SELECT query which will determine the profile and mediatype to return based on user requests, + defaults, and the availability of these in profiles. + + NB: Most specific class refers to the rdfs:Class of an object which has the most specific rdfs:subClassOf links to + the base class delivered by that API endpoint. The base classes delivered by each API endpoint are: + + SpacePrez: + /s/catalogs -> prez:DatasetList + /s/catalogs/{ds_id} -> dcat:Dataset + /s/catalogs/{ds_id}/collections/{fc_id} -> geo:FeatureCollection + /s/catalogs/{ds_id}/collections -> prez:FeatureCollectionList + /s/catalogs/{ds_id}/collections/{fc_id}/features -> geo:Feature + + VocPrez: + /v/schemes -> skos:ConceptScheme + /v/collections -> skos:Collection + /v/schemes/{cs_id}/concepts -> skos:Concept + + CatPrez: + /c/catalogs -> dcat:Catalog + /c/catalogs/{cat_id}/datasets -> dcat:Dataset + + The following logic is used to determine the profile and mediatype to be returned: + + 1. If a profile and mediatype are requested, they are returned if a matching profile which has the requested + mediatype is found, otherwise the default profile for the most specific class is returned, with its default + mediatype. + 2. If a profile only is requested, if it can be found it is returned, otherwise the default profile for the most + specific class is returned. In both cases the default mediatype is returned. + 3. If a mediatype only is requested, the default profile for the most specific class is returned, and if the + requested mediatype is available for that profile, it is returned, otherwise the default mediatype for that profile + is returned. + 4. If neither a profile nor mediatype is requested, the default profile for the most specific class is returned, + with the default mediatype for that profile. + """ + if listing: + profile_class = PREZ.ListingProfile + else: + profile_class = PREZ.ObjectProfile + if requested_profile_token: + requested_profile_uri = get_uri_for_curie_id(requested_profile_token) + query = dedent( + f""" PREFIX altr-ext: + PREFIX dcat: + PREFIX dcterms: + PREFIX geo: + PREFIX prez: + PREFIX prof: + PREFIX rdfs: + PREFIX skos: + PREFIX sh: + + SELECT ?profile ?title ?class (count(?mid) as ?distance) ?req_profile ?def_profile ?format ?req_format ?def_format + + WHERE {{ + VALUES ?class {{{" ".join('<' + str(klass) + '>' for klass in classes)}}} + ?class rdfs:subClassOf* ?mid . + ?mid rdfs:subClassOf* ?base_class . + VALUES ?base_class {{ dcat:Dataset geo:FeatureCollection geo:Feature + skos:ConceptScheme skos:Concept skos:Collection + dcat:Catalog dcat:Resource prof:Profile prez:SPARQLQuery + prez:SearchResult prez:CQLObjectList prez:QueryablesList prez:Object }} + ?profile altr-ext:constrainsClass ?class ; + altr-ext:hasResourceFormat ?format ; + dcterms:title ?title .\ + {f'?profile a {profile_class.n3()} .'} + {f'BIND(?profile=<{requested_profile_uri}> as ?req_profile)' if requested_profile_uri else ''} + BIND(EXISTS {{ ?shape sh:targetClass ?class ; + altr-ext:hasDefaultProfile ?profile }} AS ?def_profile) + {generate_mediatype_if_statements(requested_mediatypes) if requested_mediatypes else ''} + BIND(EXISTS {{ ?profile altr-ext:hasDefaultResourceFormat ?format }} AS ?def_format) + }} + GROUP BY ?class ?profile ?req_profile ?def_profile ?format ?req_format ?def_format ?title + ORDER BY DESC(?req_profile) DESC(?distance) DESC(?def_profile) DESC(?req_format) DESC(?def_format)""" + ) + return query + + +def generate_mediatype_if_statements(requested_mediatypes: list): + """ + Generates a list of if statements which will be used to determine the mediatype to return based on user requests, + and the availability of these in profiles. + These are of the form: + BIND( + IF(?format="application/ld+json", "0.9", + IF(?format="text/html", "0.8", + IF(?format="image/apng", "0.7", ""))) AS ?req_format) + """ + # TODO ConnegP appears to return a tuple of q values and profiles for headers, and only profiles (no q values) if they + # are not specified in QSAs. + if not isinstance(next(iter(requested_mediatypes)), tuple): + requested_mediatypes = [(1, mt) for mt in requested_mediatypes] + + line_join = "," + "\n" + ifs = ( + f"BIND(\n" + f"""{line_join.join({chr(9) + 'IF(?format="' + tup[1] + '", "' + str(tup[0]) + '"' for tup in requested_mediatypes})}""" + f""", ""{')' * len(requested_mediatypes)}\n""" + f"\tAS ?req_format)" + ) + return ifs diff --git a/prez/sparql/count_query.py b/prez/services/query_generation/count.py similarity index 66% rename from prez/sparql/count_query.py rename to prez/services/query_generation/count.py index a45fb358..97406deb 100755 --- a/prez/sparql/count_query.py +++ b/prez/services/query_generation/count.py @@ -16,6 +16,18 @@ def render(self): return "".join(part for part in cq.render()) def create_construct_query(self): + """Calls lower level functions and builds the overall query. + Query is of the form: + CONSTRUCT { + _:N9008750f9acb47c08dfc2c3ae72ede37 ?count . + } + WHERE { + SELECT (COUNT(DISTINCT ?focus_node) AS ?count) + WHERE { + <<>> + } + } + """ self.remove_limit_and_offset() self.rebuild_select_clause() cq = ConstructQuery( @@ -30,9 +42,15 @@ def create_construct_query(self): return cq def remove_limit_and_offset(self): + """Removes the LIMIT and OFFSET clauses from the original subselect query, + such that the count of all member objects can be obtained.""" self.subselect.solution_modifier = None def rebuild_select_clause(self): + """ + Rebuilds the SELECT clause to retrieve the count of the focus node. + SELECT (COUNT(DISTINCT ?focus_node) AS ?count) + """ sc = SelectClause( variables_or_all=[ ( @@ -60,6 +78,10 @@ def rebuild_select_clause(self): def create_construct_template(self): """ + Generates a triple for the CONSTRUCT query of the form: + { + _:N38355498469c47c5bb1dfa5b34a73df0 ?count . + } """ bn = BlankNode(value=BlankNodeLabel(part_1=BNode())) search_result_triples = [ @@ -73,3 +95,12 @@ def create_construct_template(self): construct_triples=ConstructTriples(triples=search_result_triples) ) return ct + + +def startup_count_objects(): + """ + Retrieves hardcoded counts for collections in the repository (Feature Collections, Catalogs etc.) + """ + return f"""PREFIX prez: + CONSTRUCT {{ ?collection prez:count ?count }} + WHERE {{ ?collection prez:count ?count }}""" diff --git a/temp/cql2sparql.py b/prez/services/query_generation/cql2sparql.py similarity index 95% rename from temp/cql2sparql.py rename to prez/services/query_generation/cql2sparql.py index c0f616b2..18864ea3 100755 --- a/temp/cql2sparql.py +++ b/prez/services/query_generation/cql2sparql.py @@ -4,36 +4,9 @@ from rdflib import URIRef, Namespace from rdflib.namespace import GEO, SH -from temp.grammar.grammar import ( - GroupOrUnionGraphPattern, - GroupGraphPatternSub, - TriplesBlock, - SimplifiedTriple, - GroupGraphPattern, - GraphPatternNotTriples, - Filter, - InlineDataOneVar, - InlineData, - DataBlock, - WhereClause, - ConstructTemplate, - SolutionModifier, - ConstructQuery, - ConstructTriples, - Var, - IRI, - RDFLiteral, - PrimaryExpression, - RegexExpression, - Expression, - BuiltInCall, - Constraint, - FunctionCall, - NumericLiteral, - DataBlockValue, - ArgList, -) -from temp.cql_sparql_reference import ( +from temp.grammar import * + +from prez.services.query_generation.cql_sparql_reference import ( cql_sparql_spatial_mapping, cql_to_shapely_mapping, ) diff --git a/temp/cql_sparql_reference.py b/prez/services/query_generation/cql_sparql_reference.py similarity index 100% rename from temp/cql_sparql_reference.py rename to prez/services/query_generation/cql_sparql_reference.py diff --git a/temp/default_cql_context.json b/prez/services/query_generation/default_cql_context.json similarity index 100% rename from temp/default_cql_context.json rename to prez/services/query_generation/default_cql_context.json diff --git a/prez/sparql/search_query.py b/prez/services/query_generation/search.py similarity index 100% rename from prez/sparql/search_query.py rename to prez/services/query_generation/search.py diff --git a/temp/shacl_node_selection.py b/prez/services/query_generation/shacl_node_selection.py similarity index 99% rename from temp/shacl_node_selection.py rename to prez/services/query_generation/shacl_node_selection.py index 049b3305..0527a0df 100644 --- a/temp/shacl_node_selection.py +++ b/prez/services/query_generation/shacl_node_selection.py @@ -55,7 +55,7 @@ def from_graph(self): # TODO this can be a SPARQL select against the system gra ) for ps_uri in self.propertyShapesURIs] self.hierarchy_level = next(self.graph.objects(self.uri, ONT.hierarchyLevel), None) if not self.hierarchy_level: - print('') + raise ValueError("No hierarchy level found") def to_grammar(self): if self.targetNode: diff --git a/temp/shacl2sparql.py b/prez/services/query_generation/umbrella.py similarity index 99% rename from temp/shacl2sparql.py rename to prez/services/query_generation/umbrella.py index af0c128f..cd052c74 100755 --- a/temp/shacl2sparql.py +++ b/prez/services/query_generation/umbrella.py @@ -108,12 +108,11 @@ def _generate_query(self): construct_triples=ConstructTriples(triples=self.construct_triples) ) solution_modifier = SolutionModifier() - query = ConstructQuery( + query_str = ConstructQuery( construct_template=construct_template, where_clause=where, solution_modifier=solution_modifier, - ) - query_str = "".join(part for part in query.render()) + ).to_string() self.sparql = query_str def build_inner_select(self): @@ -217,7 +216,7 @@ def create_select_subquery_from_template(self, target_bn): ).rstrip() sol_mod, order_by_triple = self._create_focus_node_solution_modifier() if order_by_triple: # insert it before the end of the string, - order_by_triple_text = "".join(order_by_triple.render()) + order_by_triple_text = order_by_triple.to_string() substituted_query = ( substituted_query[:-1] + f"{{{order_by_triple_text}}} }}" ) diff --git a/prez/sparql/methods.py b/prez/sparql/methods.py deleted file mode 100755 index e604e979..00000000 --- a/prez/sparql/methods.py +++ /dev/null @@ -1,237 +0,0 @@ -import asyncio -import logging -import time -from abc import ABC, abstractmethod -from typing import List -from typing import Tuple -from urllib.parse import quote_plus - -import httpx -import pyoxigraph -from fastapi.concurrency import run_in_threadpool -from rdflib import Namespace, Graph, URIRef, Literal, BNode - -from prez.config import settings - -PREZ = Namespace("https://prez.dev/") - -log = logging.getLogger(__name__) - - -class Repo(ABC): - @abstractmethod - async def rdf_query_to_graph(self, query: str): - pass - - @abstractmethod - async def tabular_query_to_table(self, query: str, context: URIRef = None): - pass - - async def send_queries( - self, rdf_queries: List[str], tabular_queries: List[Tuple[URIRef, str]] = None - ) -> Tuple[Graph, List]: - # Common logic to send both query types in parallel - results = await asyncio.gather( - *[self.rdf_query_to_graph(query) for query in rdf_queries if query], - *[ - self.tabular_query_to_table(query, context) - for context, query in tabular_queries - if query - ], - ) - g = Graph() - tabular_results = [] - for result in results: - if isinstance(result, Graph): - g += result - else: - tabular_results.append(result) - return g, tabular_results - - @abstractmethod - def sparql( - self, query: str, raw_headers: list[tuple[bytes, bytes]], method: str = "GET" - ): - pass - - -class RemoteSparqlRepo(Repo): - def __init__(self, async_client: httpx.AsyncClient): - self.async_client = async_client - - async def _send_query(self, query: str, mediatype="text/turtle"): - """Sends a SPARQL query asynchronously. - Args: query: str: A SPARQL query to be sent asynchronously. - Returns: httpx.Response: A httpx.Response object - """ - query_rq = self.async_client.build_request( - "POST", - url=settings.sparql_endpoint, - headers={"Accept": mediatype}, - data={"query": query}, - ) - response = await self.async_client.send(query_rq, stream=True) - return response - - async def rdf_query_to_graph(self, query: str) -> Graph: - """ - Sends a SPARQL query asynchronously and parses the response into an RDFLib Graph. - Args: query: str: A SPARQL query to be sent asynchronously. - Returns: rdflib.Graph: An RDFLib Graph object - """ - response = await self._send_query(query) - g = Graph() - await response.aread() - return g.parse(data=response.text, format="turtle") - - async def tabular_query_to_table(self, query: str, context: URIRef = None): - """ - Sends a SPARQL query asynchronously and parses the response into a table format. - The optional context parameter allows an identifier to be supplied with the query, such that multiple results can be - distinguished from each other. - """ - response = await self._send_query(query, "application/sparql-results+json") - await response.aread() - return context, response.json()["results"]["bindings"] - - async def sparql( - self, query: str, raw_headers: list[tuple[bytes, bytes]], method: str = "GET" - ): - """Sends a starlette Request object (containing a SPARQL query in the URL parameters) to a proxied SPARQL - endpoint.""" - # TODO: This only supports SPARQL GET requests because the query is sent as a query parameter. - - query_escaped_as_bytes = f"query={quote_plus(query)}".encode("utf-8") - - # TODO: Global app settings should be passed in as a function argument. - url = httpx.URL(url=settings.sparql_endpoint, query=query_escaped_as_bytes) - headers = [] - for header in raw_headers: - if header[0] != b"host": - headers.append(header) - headers.append((b"host", str(url.host).encode("utf-8"))) - rp_req = self.async_client.build_request(method, url, headers=headers) - return await self.async_client.send(rp_req, stream=True) - - -class PyoxigraphRepo(Repo): - def __init__(self, pyoxi_store: pyoxigraph.Store): - self.pyoxi_store = pyoxi_store - - def _handle_query_solution_results( - self, results: pyoxigraph.QuerySolutions - ) -> dict: - """Organise the query results into format serializable by FastAPIs JSONResponse.""" - variables = results.variables - results_dict = {"head": {"vars": [v.value for v in results.variables]}} - results_list = [] - for result in results: - result_dict = {} - for var in variables: - binding = result[var] - if binding: - binding_type = self._pyoxi_result_type(binding) - result_dict[str(var)[1:]] = { - "type": binding_type, - "value": binding.value, - } - results_list.append(result_dict) - results_dict["results"] = {"bindings": results_list} - return results_dict - - @staticmethod - def _handle_query_triples_results(results: pyoxigraph.QueryTriples) -> Graph: - """Parse the query results into a Graph object.""" - ntriples = " .\n".join([str(r) for r in list(results)]) + " ." - g = Graph() - g.bind("prez", URIRef("https://prez.dev/")) - if ntriples == " .": - return g - return g.parse(data=ntriples, format="ntriples") - - def _sync_rdf_query_to_graph(self, query: str) -> Graph: - try: - results = self.pyoxi_store.query(query) - except Exception as e: - print(e) - result_graph = self._handle_query_triples_results(results) - return result_graph - - def _sync_tabular_query_to_table(self, query: str, context: URIRef = None) -> tuple: - results = self.pyoxi_store.query(query) - results_dict = self._handle_query_solution_results(results) - # only return the bindings from the results. - return context, results_dict["results"]["bindings"] - - def _sparql(self, query: str) -> dict | Graph | bool: - """Submit a sparql query to the pyoxigraph store and return the formatted results.""" - results = self.pyoxi_store.query(query) - if isinstance(results, pyoxigraph.QuerySolutions): # a SELECT query result - results_dict = self._handle_query_solution_results(results) - return results_dict - elif isinstance(results, pyoxigraph.QueryTriples): # a CONSTRUCT query result - result_graph = self._handle_query_triples_results(results) - return result_graph - elif isinstance(results, bool): - results_dict = {"head": {}, "boolean": results} - return results_dict - else: - raise TypeError(f"Unexpected result class {type(results)}") - - async def rdf_query_to_graph(self, query: str) -> Graph: - return await run_in_threadpool(self._sync_rdf_query_to_graph, query) - - async def tabular_query_to_table(self, query: str, context: URIRef = None) -> list: - return await run_in_threadpool( - self._sync_tabular_query_to_table, query, context - ) - - async def sparql( - self, query: str, raw_headers: list[tuple[bytes, bytes]], method: str = "" - ) -> list | Graph | bool: - return self._sparql(query) - - @staticmethod - def _pyoxi_result_type(term) -> str: - if isinstance(term, pyoxigraph.Literal): - return "literal" - elif isinstance(term, pyoxigraph.NamedNode): - return "uri" - elif isinstance(term, pyoxigraph.BlankNode): - return "bnode" - else: - raise ValueError(f"Unknown type: {type(term)}") - - -class OxrdflibRepo(Repo): - def __init__(self, oxrdflib_graph: Graph): - self.oxrdflib_graph = oxrdflib_graph - - def _sync_rdf_query_to_graph(self, query: str) -> Graph: - results = self.oxrdflib_graph.query(query) - return results.graph - - def _sync_tabular_query_to_table(self, query: str, context: URIRef = None): - results = self.oxrdflib_graph.query(query) - reformatted_results = [] - for result in results: - reformatted_result = {} - for var in results.vars: - binding = result[var] - if binding: - str_type = self._str_type_for_rdflib_type(binding) - reformatted_result[str(var)] = {"type": str_type, "value": binding} - reformatted_results.append(reformatted_result) - return context, reformatted_results - - async def rdf_query_to_graph(self, query: str) -> Graph: - return await run_in_threadpool(self._sync_rdf_query_to_graph, query) - - async def tabular_query_to_table(self, query: str, context: URIRef = None): - return await run_in_threadpool( - self._sync_tabular_query_to_table, query, context - ) - - def _str_type_for_rdflib_type(self, instance): - map = {URIRef: "uri", BNode: "bnode", Literal: "literal"} - return map[type(instance)] diff --git a/prez/sparql/objects_listings.py b/prez/sparql/objects_listings.py deleted file mode 100755 index b1419cd2..00000000 --- a/prez/sparql/objects_listings.py +++ /dev/null @@ -1,523 +0,0 @@ -import logging -from itertools import chain -from textwrap import dedent -from typing import List, Tuple, Dict, FrozenSet - -from rdflib import Graph, URIRef, Namespace, Literal - -from prez.cache import tbox_cache, profiles_graph_cache -from prez.config import settings -from prez.services.curie_functions import get_uri_for_curie_id -from temp.grammar.grammar import SubSelect - -log = logging.getLogger(__name__) - -ALTREXT = Namespace("http://www.w3.org/ns/dx/conneg/altr-ext#") -PREZ = Namespace("https://prez.dev/") - - -async def get_annotation_properties( - item_graph: Graph, -): - """ - Gets annotation data used for HTML display. - This includes the label, description, and provenance, if available. - Note the following three default predicates are always included. This allows context, i.e. background ontologies, - which are often diverse in the predicates they use, to be aligned with the default predicates used by Prez. The full - range of predicates used can be manually included via profiles. - """ - label_predicates = settings.label_predicates - description_predicates = settings.description_predicates - explanation_predicates = settings.provenance_predicates - other_predicates = settings.other_predicates - terms = ( - set(i for i in item_graph.predicates() if isinstance(i, URIRef)) - | set(i for i in item_graph.objects() if isinstance(i, URIRef)) - | set(i for i in item_graph.subjects() if isinstance(i, URIRef)) - ) - # TODO confirm caching of SUBJECT labels does not cause issues! this could be a lot of labels. Perhaps these are - # better separated and put in an LRU cache. Or it may not be worth the effort. - if not terms: - return None, Graph() - # read labels from the tbox cache, this should be the majority of labels - uncached_terms, labels_g = get_annotations_from_tbox_cache( - terms, - label_predicates, - description_predicates, - explanation_predicates, - other_predicates, - ) - - def other_predicates_statement(other_predicates, uncached_terms_other): - return f"""UNION - {{ - ?unannotated_term ?other_prop ?other . - VALUES ?other_prop {{ {" ".join('<' + str(pred) + '>' for pred in other_predicates)} }} - VALUES ?unannotated_term {{ {" ".join('<' + str(term) + '>' for term in uncached_terms_other)} - }} - }}""" - - queries_for_uncached = f"""CONSTRUCT {{ - ?unlabeled_term ?label_prop ?label . - ?undescribed_term ?desc_prop ?description . - ?unexplained_term ?expl_prop ?explanation . - ?unannotated_term ?other_prop ?other . - }} - WHERE {{ - {{ - ?unlabeled_term ?label_prop ?label . - VALUES ?label_prop {{ {" ".join('<' + str(pred) + '>' for pred in label_predicates)} }} - VALUES ?unlabeled_term {{ {" ".join('<' + str(term) + '>' for term in uncached_terms["labels"])} }} - FILTER(lang(?label) = "" || lang(?label) = "en" || lang(?label) = "en-AU") - }} - UNION - {{ - ?undescribed_term ?desc_prop ?description . - VALUES ?desc_prop {{ {" ".join('<' + str(pred) + '>' for pred in description_predicates)} }} - VALUES ?undescribed_term {{ {" ".join('<' + str(term) + '>' for term in uncached_terms["descriptions"])} - }} - }} - UNION - {{ - ?unexplained_term ?expl_prop ?explanation . - VALUES ?expl_prop {{ {" ".join('<' + str(pred) + '>' for pred in explanation_predicates)} }} - VALUES ?unexplained_term {{ {" ".join('<' + str(term) + '>' for term in uncached_terms["provenance"])} - }} - }} - {other_predicates_statement(other_predicates, uncached_terms["other"]) if other_predicates else ""} - }}""" - return queries_for_uncached, labels_g - - -def get_annotations_from_tbox_cache( - terms: List[URIRef], label_props, description_props, explanation_props, other_props -): - """ - Gets labels from the TBox cache, returns a list of terms that were not found in the cache, and a graph of labels, - descriptions, and explanations - """ - labels_from_cache = Graph(bind_namespaces="rdflib") - terms_list = list(terms) - props_from_cache = { - "labels": list( - chain( - *( - tbox_cache.triples_choices((terms_list, prop, None)) - for prop in label_props - ) - ) - ), - "descriptions": list( - chain( - *( - tbox_cache.triples_choices((terms_list, prop, None)) - for prop in description_props - ) - ) - ), - "provenance": list( - chain( - *( - tbox_cache.triples_choices((terms_list, prop, None)) - for prop in explanation_props - ) - ) - ), - "other": list( - chain( - *( - tbox_cache.triples_choices((terms_list, prop, None)) - for prop in other_props - ) - ) - ), - } - # get all the annotations we can from the cache - all = list(chain(*props_from_cache.values())) - default_language = settings.default_language - for triple in all: - if isinstance(triple[2], Literal): - if triple[2].language == default_language: - labels_from_cache.add(triple) - elif triple[2].language is None: - labels_from_cache.add(triple) - # the remaining terms are not in the cache; we need to query the SPARQL endpoint to attempt to get them - uncached_props = { - k: list(set(terms) - set(triple[0] for triple in v)) - for k, v in props_from_cache.items() - } - return uncached_props, labels_from_cache - - -def temp_listing_count(subquery: SubSelect, klass): - """ - TODO: Implement COUNT and other expressions in SPARQL grammar. - """ - return f""" - PREFIX prez: <{PREZ}> - CONSTRUCT {{ - {klass.n3()} prez:count ?count - }} - WHERE {{ - SELECT (COUNT(DISTINCT ?focus_node) as ?count) {{ {subquery} }} - }}""" - - -def get_relevant_shape_bns_for_profile(selected_class, profile): - """ - Gets the shape blank nodes URIs from the profiles graph for a given profile. - """ - if not profile: - return None - shape_bns = list( - profiles_graph_cache.objects( - subject=profile, - predicate=ALTREXT.hasNodeShape, - ) - ) - if not shape_bns: - return None - relevant_shape_bns = [ - triple[0] - for triple in profiles_graph_cache.triples_choices( - ( - list(shape_bns), - URIRef("http://www.w3.org/ns/shacl#targetClass"), - selected_class, - ) - ) - ] - return relevant_shape_bns - - -def get_listing_predicates(profile, selected_class): - """ - Gets predicates relevant to listings of objects as specified in the profile. - This is used in two scenarios: - 1. "Collection" endpoints, for top level listing of objects of a particular type - 2. For a specific object, where it has members - The predicates retrieved from profiles are: - - child to focus, for example where the object of interest is a Concept Scheme, and is linked to Concept(s) via - the predicate skos:inScheme - - focus to child, for example where the object of interest is a Feature Collection, and is linked to Feature(s) - via the predicate rdfs:member - - parent to focus, for example where the object of interest is a Feature Collection, and is linked to Dataset(s) via - the predicate dcterms:hasPart - - focus to parents, for example where the object of interest is a Concept, and is linked to Concept Scheme(s) via - the predicate skos:inScheme - - relative properties, properties of the parent/child objects that should also be returned. For example, if the - focus object is a Concept Scheme, and the predicate skos:inScheme is used to link from Concept(s) (using - altr-ext:childToFocus) then specifying skos:broader as a relative property will cause the broader concepts to - be returned for each concept - """ - shape_bns = get_relevant_shape_bns_for_profile(selected_class, profile) - if not shape_bns: - return [], [], [], [], [] - child_to_focus = [ - i[2] - for i in profiles_graph_cache.triples_choices( - ( - shape_bns, - ALTREXT.childToFocus, - None, - ) - ) - ] - parent_to_focus = [ - i[2] - for i in profiles_graph_cache.triples_choices( - ( - shape_bns, - ALTREXT.parentToFocus, - None, - ) - ) - ] - focus_to_child = [ - i[2] - for i in profiles_graph_cache.triples_choices( - ( - shape_bns, - ALTREXT.focusToChild, - None, - ) - ) - ] - focus_to_parent = [ - i[2] - for i in profiles_graph_cache.triples_choices( - ( - shape_bns, - ALTREXT.focusToParent, - None, - ) - ) - ] - relative_properties = [ - i[2] - for i in profiles_graph_cache.triples_choices( - ( - shape_bns, - ALTREXT.relativeProperties, - None, - ) - ) - ] - return ( - child_to_focus, - parent_to_focus, - focus_to_child, - focus_to_parent, - relative_properties, - ) - - -def get_item_predicates(profile, selected_class): - """ - Gets any predicates specified in the profile, this includes: - - predicates to include. Uses sh:path - - predicates to exclude. Uses sh:path in conjunction with dash:hidden. - - inverse path predicates to include (inbound links to the object). Uses sh:inversePath. - - sequence path predicates to include, expressed as a list. Uses sh:sequencePath. - """ - shape_bns = get_relevant_shape_bns_for_profile(selected_class, profile) - if not shape_bns: - log.info( - f"No special predicates (include/exclude/inverse/sequence) found for class {selected_class} in profile " - f"{profile}. Default behaviour is to include all predicates, and blank nodes to a depth of two." - ) - return None, None, None, None - includes = [ - i[2] - for i in profiles_graph_cache.triples_choices( - (shape_bns, URIRef("http://www.w3.org/ns/shacl#path"), None) - ) - ] - excludes = [ - i[2] - for i in profiles_graph_cache.triples_choices( - (shape_bns, ALTREXT.exclude, None) - ) - ] - inverses = [ - i[2] - for i in profiles_graph_cache.triples_choices( - (shape_bns, URIRef("http://www.w3.org/ns/shacl#inversePath"), None) - ) - ] - _sequence_nodes = [ - i[2] - for i in profiles_graph_cache.triples_choices( - ( - shape_bns, - URIRef("http://www.w3.org/ns/shacl#sequencePath"), - None, - ) - ) - ] - sequence_paths = [ - [path_item for path_item in profiles_graph_cache.items(i)] - for i in _sequence_nodes - ] - return includes, excludes, inverses, sequence_paths - - -def select_profile_mediatype( - classes: List[URIRef], - requested_profile_uri: URIRef = None, - requested_profile_token: str = None, - requested_mediatypes: List[Tuple] = None, - listing: bool = False, -): - """ - Returns a SPARQL SELECT query which will determine the profile and mediatype to return based on user requests, - defaults, and the availability of these in profiles. - - NB: Most specific class refers to the rdfs:Class of an object which has the most specific rdfs:subClassOf links to - the base class delivered by that API endpoint. The base classes delivered by each API endpoint are: - - SpacePrez: - /s/catalogs -> prez:DatasetList - /s/catalogs/{ds_id} -> dcat:Dataset - /s/catalogs/{ds_id}/collections/{fc_id} -> geo:FeatureCollection - /s/catalogs/{ds_id}/collections -> prez:FeatureCollectionList - /s/catalogs/{ds_id}/collections/{fc_id}/features -> geo:Feature - - VocPrez: - /v/schemes -> skos:ConceptScheme - /v/collections -> skos:Collection - /v/schemes/{cs_id}/concepts -> skos:Concept - - CatPrez: - /c/catalogs -> dcat:Catalog - /c/catalogs/{cat_id}/datasets -> dcat:Dataset - - The following logic is used to determine the profile and mediatype to be returned: - - 1. If a profile and mediatype are requested, they are returned if a matching profile which has the requested - mediatype is found, otherwise the default profile for the most specific class is returned, with its default - mediatype. - 2. If a profile only is requested, if it can be found it is returned, otherwise the default profile for the most - specific class is returned. In both cases the default mediatype is returned. - 3. If a mediatype only is requested, the default profile for the most specific class is returned, and if the - requested mediatype is available for that profile, it is returned, otherwise the default mediatype for that profile - is returned. - 4. If neither a profile nor mediatype is requested, the default profile for the most specific class is returned, - with the default mediatype for that profile. - """ - if listing: - profile_class = PREZ.ListingProfile - else: - profile_class = PREZ.ObjectProfile - if requested_profile_token: - requested_profile_uri = get_uri_for_curie_id(requested_profile_token) - query = dedent( - f""" PREFIX altr-ext: - PREFIX dcat: - PREFIX dcterms: - PREFIX geo: - PREFIX prez: - PREFIX prof: - PREFIX rdfs: - PREFIX skos: - PREFIX sh: - - SELECT ?profile ?title ?class (count(?mid) as ?distance) ?req_profile ?def_profile ?format ?req_format ?def_format - - WHERE {{ - VALUES ?class {{{" ".join('<' + str(klass) + '>' for klass in classes)}}} - ?class rdfs:subClassOf* ?mid . - ?mid rdfs:subClassOf* ?base_class . - VALUES ?base_class {{ dcat:Dataset geo:FeatureCollection geo:Feature - skos:ConceptScheme skos:Concept skos:Collection - prez:ProfilesList dcat:Catalog dcat:Resource prof:Profile prez:SPARQLQuery - prez:SearchResult prez:CQLObjectList prez:QueryablesList prez:Object }} - ?profile altr-ext:constrainsClass ?class ; - altr-ext:hasResourceFormat ?format ; - dcterms:title ?title .\ - {f'?profile a {profile_class.n3()} .'} - {f'BIND(?profile=<{requested_profile_uri}> as ?req_profile)' if requested_profile_uri else ''} - BIND(EXISTS {{ ?shape sh:targetClass ?class ; - altr-ext:hasDefaultProfile ?profile }} AS ?def_profile) - {generate_mediatype_if_statements(requested_mediatypes) if requested_mediatypes else ''} - BIND(EXISTS {{ ?profile altr-ext:hasDefaultResourceFormat ?format }} AS ?def_format) - }} - GROUP BY ?class ?profile ?req_profile ?def_profile ?format ?req_format ?def_format ?title - ORDER BY DESC(?req_profile) DESC(?distance) DESC(?def_profile) DESC(?req_format) DESC(?def_format)""" - ) - return query - - -def generate_mediatype_if_statements(requested_mediatypes: list): - """ - Generates a list of if statements which will be used to determine the mediatype to return based on user requests, - and the availability of these in profiles. - These are of the form: - BIND( - IF(?format="application/ld+json", "0.9", - IF(?format="text/html", "0.8", - IF(?format="image/apng", "0.7", ""))) AS ?req_format) - """ - # TODO ConnegP appears to return a tuple of q values and profiles for headers, and only profiles (no q values) if they - # are not specified in QSAs. - if not isinstance(next(iter(requested_mediatypes)), tuple): - requested_mediatypes = [(1, mt) for mt in requested_mediatypes] - - line_join = "," + "\n" - ifs = ( - f"BIND(\n" - f"""{line_join.join({chr(9) + 'IF(?format="' + tup[1] + '", "' + str(tup[0]) + '"' for tup in requested_mediatypes})}""" - f""", ""{')' * len(requested_mediatypes)}\n""" - f"\tAS ?req_format)" - ) - return ifs - - -def get_endpoint_template_queries(classes: FrozenSet[URIRef]): - """ - NB the FILTER clause here should NOT be required but RDFLib has a bug (perhaps related to the +/* operators - - requires further investigation). Removing the FILTER clause will return too many results in instances where there - should be NO results - as if the VALUES ?classes clause is not used. - """ - query = f""" - PREFIX ont: - PREFIX xsd: - - SELECT ?endpoint ?parent_endpoint ?relation_direction ?relation_predicate ?endpoint_template ?distance - {{ - VALUES ?classes {{ {" ".join('<' + str(klass) + '>' for klass in classes)} }} - {{ - ?endpoint a ont:ObjectEndpoint ; - ont:endpointTemplate ?endpoint_template ; - ont:deliversClasses ?classes . - BIND("0"^^xsd:integer AS ?distance) - }} - UNION - {{ - ?parent_endpoint ?relation_direction ?relation_predicate . - ?endpoint ?ep_relation_direction ?ep_relation_predicate ; - ont:endpointTemplate ?endpoint_template ; - ont:deliversClasses ?classes . - FILTER(?classes IN ({", ".join('<' + str(klass) + '>' for klass in classes)})) - VALUES ?relation_direction {{ont:focusToParentRelation ont:parentToFocusRelation}} - VALUES ?ep_relation_direction {{ont:focusToParentRelation ont:parentToFocusRelation}} - {{ SELECT ?parent_endpoint ?endpoint (count(?intermediate) as ?distance) - {{ - ?endpoint ont:parentEndpoint* ?intermediate ; - ont:deliversClasses ?classes . - ?intermediate ont:parentEndpoint* ?parent_endpoint . - ?intermediate a ?intermediateEPClass . - ?parent_endpoint a ?parentEPClass . - VALUES ?intermediateEPClass {{ont:ObjectEndpoint}} - VALUES ?parentEPClass {{ont:ObjectEndpoint}} - }} - GROUP BY ?parent_endpoint ?endpoint - - }} - }} - }} ORDER BY ASC(?distance) - """ - return query - - -def generate_relationship_query( - uri: URIRef, endpoint_to_relations: Dict[URIRef, List[Tuple[URIRef, Literal]]] -): - """ - Generates a SPARQL query of the form: - SELECT * {{ SELECT ?endpoint ?parent_1 ?parent_2 - WHERE { - BIND("/s/catalogs/$parent_1/collections/$object" as ?endpoint) - ?parent_1 . - }}} - """ - if not endpoint_to_relations: - return None - subqueries = [] - for endpoint, relations in endpoint_to_relations.items(): - subquery = f"""{{ SELECT ?endpoint {" ".join(["?parent_" + str(i + 1) for i, pred in enumerate(relations)])} - WHERE {{\n BIND("{endpoint}" as ?endpoint)\n""" - uri_str = f"<{uri}>" - for i, relation in enumerate(relations): - predicate, direction = relation - if predicate: - parent = "?parent_" + str(i) - if direction == URIRef("https://prez.dev/ont/parentToFocusRelation"): - subquery += f"{parent} <{predicate}> {uri_str} .\n" - else: # assuming the direction is "focus_to_parent" - subquery += f"{uri_str} <{predicate}> {parent} .\n" - uri_str = parent - subquery += "}}" - subqueries.append(subquery) - - union_query = "SELECT * {" + " UNION ".join(subqueries) + "}" - return union_query - - -def startup_count_objects(): - """ - Retrieves hardcoded counts for collections in the dataset (feature collections, datasets etc.) - """ - return f"""PREFIX prez: -CONSTRUCT {{ ?collection prez:count ?count }} -WHERE {{ ?collection prez:count ?count }}""" diff --git a/temp/grammar/grammar.py b/temp/grammar/grammar.py index 92e53a97..5c1b4d20 100755 --- a/temp/grammar/grammar.py +++ b/temp/grammar/grammar.py @@ -5,12 +5,10 @@ from typing import List, Union, Optional, Generator, Tuple from pydantic import BaseModel, field_validator -from rdflib import URIRef, Variable, BNode, Literal +from rdflib import URIRef, Variable from rdflib.plugins.sparql import prepareQuery from rdflib.plugins.sparql.algebra import translateAlgebra -from prez.reference_data.prez_ns import PREZ - log = logging.getLogger(__name__) @@ -24,11 +22,14 @@ def __str__(self): return "".join(part for part in self.render()) def __repr__(self): - return f"{self.__class__.__name__}({self})" + return f"{self.__class__.__name__} ({self})" def render(self): raise NotImplementedError("Subclasses must implement this method.") + def to_string(self): + return self.__str__() + def collect_triples(self) -> List[SimplifiedTriple]: """ Recursively collect SimplifiedTriple instances from this object. @@ -345,7 +346,7 @@ class ConditionalOrExpression(SPARQLGrammarBase): def render(self) -> Generator[str, None, None]: for i, conditional_and_expression in enumerate( - self.conditional_and_expressions + self.conditional_and_expressions ): yield from conditional_and_expression.render() if i < len(self.conditional_and_expressions) - 1: @@ -686,10 +687,10 @@ def render(self) -> Generator[str, None, None]: @classmethod def filter_relational( - cls, - focus: PrimaryExpression, - comparators: Union[PrimaryExpression, List[PrimaryExpression]], - operator: str, + cls, + focus: PrimaryExpression, + comparators: Union[PrimaryExpression, List[PrimaryExpression]], + operator: str, ) -> Filter: """ Convenience method to create a FILTER clause to compare the focus node to comparators. @@ -1053,7 +1054,7 @@ def render(self) -> Generator[str, None, None]: @classmethod def create_with_one_expr( - cls, function_name: str, expression: PrimaryExpression + cls, function_name: str, expression: PrimaryExpression ) -> "BuiltInCall": """ Convenience method for functions that take a single PrimaryExpression as an argument. @@ -1063,7 +1064,7 @@ def create_with_one_expr( @classmethod def create_with_n_expr( - cls, function_name: str, expressions: List[PrimaryExpression] + cls, function_name: str, expressions: List[PrimaryExpression] ) -> "BuiltInCall": """ Convenience method for functions that take a list of PrimaryExpressions as arguments. diff --git a/temp/test_search.py b/temp/test_search.py deleted file mode 100755 index 0255d2d9..00000000 --- a/temp/test_search.py +++ /dev/null @@ -1,13 +0,0 @@ -from rdflib import RDFS - -from prez.sparql.search_query import SearchQuery - -# from temp.grammar import SearchQuery - -test = SearchQuery( - search_term="test", - pred_vals=[RDFS.label], - limit=10, - offset=0, -).render() -print("") diff --git a/test_data/spaceprez.ttl b/test_data/spaceprez.ttl index 446ac86e..380bb9f0 100755 --- a/test_data/spaceprez.ttl +++ b/test_data/spaceprez.ttl @@ -4,10 +4,11 @@ PREFIX ex: PREFIX geo: PREFIX rdfs: -ex:Dataset a dcat:Dataset ; - rdfs:label "Dataset" ; - rdfs:member ex:FeatureCollection ; - ex:property "top level dataset property" ; + +ex:SpacePrezCatalog a dcat:Catalog ; + dcterms:title "SpacePrez Catalog" ; + dcterms:description "A catalog of SpacePrez data" ; + dcterms:hasPart ex:FeatureCollection ; . ex:FeatureCollection a geo:FeatureCollection ; diff --git a/tests/_test_cql.py b/tests/_test_cql.py index c2e2f5e0..ee97f148 100755 --- a/tests/_test_cql.py +++ b/tests/_test_cql.py @@ -7,7 +7,7 @@ from prez.app import app from prez.dependencies import get_repo -from prez.sparql.methods import Repo, PyoxigraphRepo +from prez.repositories import Repo, PyoxigraphRepo from urllib.parse import quote_plus diff --git a/tests/test_count.py b/tests/test_count.py index c4dc4cc9..e7c0433d 100755 --- a/tests/test_count.py +++ b/tests/test_count.py @@ -6,7 +6,7 @@ from prez.app import app from prez.dependencies import get_repo -from prez.sparql.methods import Repo, PyoxigraphRepo +from prez.repositories import Repo, PyoxigraphRepo @pytest.fixture(scope="session") diff --git a/tests/test_dd_profiles.py b/tests/test_dd_profiles.py index 194a63bf..3707d3f4 100755 --- a/tests/test_dd_profiles.py +++ b/tests/test_dd_profiles.py @@ -8,7 +8,7 @@ from prez.app import app from prez.dependencies import get_repo -from prez.sparql.methods import Repo, PyoxigraphRepo +from prez.repositories import Repo, PyoxigraphRepo @pytest.fixture(scope="session") diff --git a/tests/test_endpoints_cache.py b/tests/test_endpoints_cache.py index b0f33b4b..56462677 100755 --- a/tests/test_endpoints_cache.py +++ b/tests/test_endpoints_cache.py @@ -7,7 +7,7 @@ from prez.app import app from prez.dependencies import get_repo -from prez.sparql.methods import Repo, PyoxigraphRepo +from prez.repositories import Repo, PyoxigraphRepo @pytest.fixture(scope="session") diff --git a/tests/test_endpoints_catprez.py b/tests/test_endpoints_catprez.py index 8f0016cd..c7f68a83 100755 --- a/tests/test_endpoints_catprez.py +++ b/tests/test_endpoints_catprez.py @@ -1,3 +1,4 @@ +import asyncio import time from pathlib import Path @@ -6,11 +7,10 @@ from pyoxigraph.pyoxigraph import Store from rdflib import Graph, URIRef from rdflib.namespace import RDF, DCAT -from rdflib.compare import isomorphic from prez.app import app from prez.dependencies import get_repo -from prez.sparql.methods import Repo, PyoxigraphRepo +from prez.repositories import Repo, PyoxigraphRepo @pytest.fixture(scope="session") @@ -18,8 +18,8 @@ def test_store() -> Store: # Create a new pyoxigraph Store store = Store() - for file in Path(__file__).parent.glob("../test_data/catprez.ttl"): - store.load(file.read_bytes(), "text/turtle") + file = Path("../test_data/catprez.ttl") + store.load(file.read_bytes(), "text/turtle") return store @@ -51,7 +51,7 @@ def override_get_repo(): app.dependency_overrides[get_repo] = override_get_repo - with TestClient(app) as c: + with TestClient(app, backend_options={'loop_factory': asyncio.new_event_loop}) as c: wait_for_app_to_be_ready(c) yield c @@ -62,7 +62,7 @@ def override_get_repo(): @pytest.fixture(scope="session") def a_catalog_link(client): # get link for first catalog - r = client.get("/c/catalogs") + r = client.get("/catalogs") g = Graph().parse(data=r.text) member_uri = g.value(None, RDF.type, DCAT.Catalog) link = g.value(member_uri, URIRef(f"https://prez.dev/link", None)) @@ -81,7 +81,7 @@ def a_resource_link(client, a_catalog_link): def test_catalog_listing_anot(client): r = client.get( - f"/c/catalogs?_mediatype=text/turtle&_profile=prez:OGCListingProfile" + f"/catalogs?_mediatype=text/turtle&_profile=prez:OGCListingProfile" ) response_graph = Graph().parse(data=r.text) expected_response_1 = ( diff --git a/tests/test_endpoints_management.py b/tests/test_endpoints_management.py index e9b83d75..6afeae72 100755 --- a/tests/test_endpoints_management.py +++ b/tests/test_endpoints_management.py @@ -8,7 +8,7 @@ from prez.app import app from prez.dependencies import get_repo from prez.reference_data.prez_ns import PREZ -from prez.sparql.methods import Repo, PyoxigraphRepo +from prez.repositories import Repo, PyoxigraphRepo @pytest.fixture(scope="session") diff --git a/tests/test_endpoints_object.py b/tests/test_endpoints_object.py index c32b4b31..4d6c9678 100755 --- a/tests/test_endpoints_object.py +++ b/tests/test_endpoints_object.py @@ -8,7 +8,7 @@ from prez.app import app from prez.dependencies import get_repo -from prez.sparql.methods import Repo, PyoxigraphRepo +from prez.repositories import Repo, PyoxigraphRepo @pytest.fixture(scope="session") diff --git a/tests/test_endpoints_ok.py b/tests/test_endpoints_ok.py index 88a49e83..799c913d 100755 --- a/tests/test_endpoints_ok.py +++ b/tests/test_endpoints_ok.py @@ -1,7 +1,7 @@ import logging import time from pathlib import Path -from typing import Optional, Set, Dict +from typing import Optional, Set import pytest from fastapi.testclient import TestClient @@ -11,7 +11,7 @@ from prez.app import app from prez.dependencies import get_repo from prez.reference_data.prez_ns import PREZ -from prez.sparql.methods import Repo, PyoxigraphRepo +from prez.repositories import Repo, PyoxigraphRepo log = logging.getLogger(__name__) @@ -62,8 +62,8 @@ def override_get_repo(): app.dependency_overrides.clear() -def test_catprez_links( - client: TestClient, visited: Optional[Set] = None, link="/c/catalogs" +def test_ogcprez_links( + client: TestClient, visited: Optional[Set] = None, link="/catalogs" ): if not visited: visited = set() @@ -80,46 +80,4 @@ def test_catprez_links( print(link) if link not in visited: visited.add(link) - test_catprez_links(client, visited, str(link)) - - -def test_vocprez_links( - client: TestClient, visited: Optional[Set] = None, link="/v/catalogs" -): - if not visited: - visited = set() - response = client.get(link) - g = Graph().parse(data=response.text, format="turtle") - links = list(g.objects(None, PREZ.link)) - member_bnode_list = list(g.objects(None, PREZ.members)) - if member_bnode_list: - member_bnode = member_bnode_list[0] - member_links = list(g.objects(member_bnode, PREZ.link)) - links.extend(member_links) - assert response.status_code == 200 - for link in links: - print(link) - if link not in visited: - visited.add(link) - test_vocprez_links(client, visited, str(link)) - - -def test_spaceprez_links( - client: TestClient, visited: Optional[Set] = None, link="/s/catalogs" -): - if not visited: - visited = set() - response = client.get(link) - g = Graph().parse(data=response.text, format="turtle") - links = list(g.objects(None, PREZ.link)) - member_bnode_list = list(g.objects(None, PREZ.members)) - if member_bnode_list: - member_bnode = member_bnode_list[0] - member_links = list(g.objects(member_bnode, PREZ.link)) - links.extend(member_links) - assert response.status_code == 200 - for link in links: - print(link) - if link not in visited: - visited.add(link) - test_spaceprez_links(client, visited, str(link)) + test_ogcprez_links(client, visited, str(link)) diff --git a/tests/test_endpoints_profiles.py b/tests/test_endpoints_profiles.py index d232b595..35fd78c8 100755 --- a/tests/test_endpoints_profiles.py +++ b/tests/test_endpoints_profiles.py @@ -8,7 +8,7 @@ from prez.app import app from prez.dependencies import get_repo -from prez.sparql.methods import Repo, PyoxigraphRepo +from prez.repositories import Repo, PyoxigraphRepo @pytest.fixture(scope="session") @@ -52,9 +52,9 @@ def test_profile(client): def test_ogcprez_profile(client): # check the example remote profile is loaded - r = client.get("/profiles/prez:OGCProfile") + r = client.get("/profiles/prez:OGCRecordsProfile") g = Graph().parse(data=r.text) - assert (URIRef("https://prez.dev/OGCProfile"), RDF.type, PROF.Profile) in g + assert (URIRef("https://prez.dev/OGCRecordsProfile"), RDF.type, PROF.Profile) in g def test_sp_profile(client): diff --git a/tests/test_endpoints_spaceprez.py b/tests/test_endpoints_spaceprez.py index 3e1e6a07..e3264bd6 100755 --- a/tests/test_endpoints_spaceprez.py +++ b/tests/test_endpoints_spaceprez.py @@ -1,24 +1,24 @@ +import asyncio from pathlib import Path import pytest from fastapi.testclient import TestClient from pyoxigraph.pyoxigraph import Store from rdflib import Graph, URIRef -from rdflib.compare import isomorphic -from rdflib.namespace import RDF, DCAT, RDFS, GEO +from rdflib.namespace import RDF, DCAT, GEO from prez.app import app from prez.dependencies import get_repo -from prez.sparql.methods import Repo, PyoxigraphRepo +from prez.repositories import Repo, PyoxigraphRepo @pytest.fixture(scope="session") def test_store() -> Store: # Create a new pyoxigraph Store store = Store() - - for file in Path(__file__).parent.glob("../test_data/spaceprez.ttl"): - store.load(file.read_bytes(), "text/turtle") + + file = Path("../test_data/spaceprez.ttl") + store.load(file.read_bytes(), "text/turtle") return store @@ -37,7 +37,7 @@ def override_get_repo(): app.dependency_overrides[get_repo] = override_get_repo - with TestClient(app) as c: + with TestClient(app, backend_options={'loop_factory': asyncio.new_event_loop}) as c: yield c # Remove the override to ensure subsequent tests are unaffected @@ -45,21 +45,21 @@ def override_get_repo(): @pytest.fixture(scope="session") -def a_dataset_link(client): - r = client.get("/s/catalogs") +def a_catalog_link(client): + r = client.get("/catalogs") g = Graph().parse(data=r.text) - member_uri = g.value(None, RDF.type, DCAT.Dataset) + member_uri = g.value(None, RDF.type, DCAT.Catalog) link = g.value(member_uri, URIRef(f"https://prez.dev/link", None)) return link @pytest.fixture(scope="session") -def an_fc_link(client, a_dataset_link): - r = client.get(f"{a_dataset_link}/collections") +def an_fc_link(client, a_catalog_link): + r = client.get(f"{a_catalog_link}/collections") g = Graph().parse(data=r.text) links = g.objects(subject=None, predicate=URIRef(f"https://prez.dev/link")) for link in links: - if link != a_dataset_link: + if link != a_catalog_link: return link @@ -73,13 +73,13 @@ def a_feature_link(client, an_fc_link): return link -def test_dataset_anot(client, a_dataset_link): - r = client.get(f"{a_dataset_link}?_mediatype=text/turtle") +def test_dataset_anot(client, a_catalog_link): + r = client.get(f"{a_catalog_link}?_mediatype=text/turtle") response_graph = Graph().parse(data=r.text) expected_response_1 = ( - URIRef("https://example.com/Dataset"), + URIRef("https://example.com/SpacePrezCatalog"), RDF.type, - DCAT.Dataset, + DCAT.Catalog, ) assert next(response_graph.triples(expected_response_1)) diff --git a/tests/test_endpoints_vocprez.py b/tests/test_endpoints_vocprez.py index a39a32fd..69222a35 100755 --- a/tests/test_endpoints_vocprez.py +++ b/tests/test_endpoints_vocprez.py @@ -9,7 +9,7 @@ from prez.app import app from prez.dependencies import get_repo -from prez.sparql.methods import Repo, PyoxigraphRepo +from prez.repositories import Repo, PyoxigraphRepo @pytest.fixture(scope="session") diff --git a/tests/test_shacl_parsing.py b/tests/test_node_selection_shacl.py similarity index 89% rename from tests/test_shacl_parsing.py rename to tests/test_node_selection_shacl.py index 78687162..29c533dc 100755 --- a/tests/test_shacl_parsing.py +++ b/tests/test_node_selection_shacl.py @@ -1,8 +1,8 @@ -from temp.shacl_nodeshapes2sparql import NodeShape, PropertyShape +from prez.services.query_generation.shacl_node_selection import NodeShape, PropertyShape from rdflib import Graph, URIRef import pytest -endpoints_graph = Graph().parse("tests/data/nodeshapes/endpoints.ttl", format="turtle") +endpoints_graph = Graph().parse("prez/reference_data/endpoints/endpoint_node_selection_shapes.ttl", format="turtle") # @pytest.fixture diff --git a/tests/test_redirect_endpoint.py b/tests/test_redirect_endpoint.py index 92653193..1e66b14f 100755 --- a/tests/test_redirect_endpoint.py +++ b/tests/test_redirect_endpoint.py @@ -6,7 +6,7 @@ from prez.app import app from prez.dependencies import get_repo -from prez.sparql.methods import Repo, PyoxigraphRepo +from prez.repositories import Repo, PyoxigraphRepo @pytest.fixture(scope="session") diff --git a/tests/test_search.py b/tests/test_search.py index 990fc012..3d429002 100755 --- a/tests/test_search.py +++ b/tests/test_search.py @@ -9,7 +9,7 @@ from prez.app import app from prez.dependencies import get_repo -from prez.sparql.methods import Repo, PyoxigraphRepo +from prez.repositories import Repo, PyoxigraphRepo @pytest.fixture(scope="session") diff --git a/tests/test_search_grammar.py b/tests/test_search_grammar.py new file mode 100644 index 00000000..9c104a29 --- /dev/null +++ b/tests/test_search_grammar.py @@ -0,0 +1,238 @@ +from rdflib import DCAT + +from prez.reference_data.prez_ns import PREZ +from temp.grammar.grammar import * + +""" +SELECT ?search_result_uri ?predicate ?match ?weight (URI(CONCAT("urn:hash:", SHA256(CONCAT(STR(?search_result_uri), STR(?predicate), STR(?match), STR(?weight))))) AS ?hashID) + WHERE { + SELECT ?search_result_uri ?predicate ?match (SUM(?w) AS ?weight) + WHERE + { + ?search_result_uri ?predicate ?match . + VALUES ?predicate { $predicates } + { + ?search_result_uri ?predicate ?match . + BIND (100 AS ?w) + FILTER (LCASE(?match) = "$term") + } + UNION + { + ?search_result_uri ?predicate ?match . + BIND (20 AS ?w) + FILTER (REGEX(?match, "^$term", "i")) + } + UNION + { + ?search_result_uri ?predicate ?match . + BIND (10 AS ?w) + FILTER (REGEX(?match, "$term", "i")) + } + } + GROUP BY ?search_result_uri ?predicate ?match + } + ORDER BY DESC(?weight) +""" + +all_vars = { + "sr_uri": Var(value="search_result_uri"), + "pred": Var(value="predicate"), + "match": Var(value="match"), + "weight": Var(value="weight"), + "w": Var(value="w"), + "search_term": Var(value="search_term"), +} + + +def test_main(): + # Assuming that the classes are defined as per your previous message + + # Create the necessary variables + # Create the necessary variables as PrimaryExpressions wrapped in STR function calls + sr_uri = Var(value="search_result_uri") + pred = Var(value="predicate") + match = Var(value="match") + weight = Var(value="weight") + + str_sr_uri = PrimaryExpression( + content=BuiltInCall.create_with_one_expr( + "STR", PrimaryExpression(content=sr_uri) + ) + ) + str_pred = PrimaryExpression( + content=BuiltInCall.create_with_one_expr("STR", PrimaryExpression(content=pred)) + ) + str_match = PrimaryExpression( + content=BuiltInCall.create_with_one_expr( + "STR", PrimaryExpression(content=match) + ) + ) + str_weight = PrimaryExpression( + content=BuiltInCall.create_with_one_expr( + "STR", PrimaryExpression(content=weight) + ) + ) + + # Create the inner CONCAT function call with the STR-wrapped variables + inner_concat = BuiltInCall.create_with_n_expr( + "CONCAT", [str_sr_uri, str_pred, str_match, str_weight] + ) + + # Wrap the inner CONCAT in a PrimaryExpression for the SHA256 function call + sha256_expr = PrimaryExpression( + content=BuiltInCall.create_with_one_expr( + "SHA256", PrimaryExpression(content=inner_concat) + ) + ) + + # Create the outer CONCAT function call, including the "urn:hash:" literal + urn_literal = PrimaryExpression(content=RDFLiteral(value="urn:hash:")) + outer_concat = BuiltInCall.create_with_n_expr("CONCAT", [urn_literal, sha256_expr]) + + # Finally, create the URI function call + uri_expr = BuiltInCall.create_with_one_expr( + "URI", PrimaryExpression(content=outer_concat) + ) + + # Render the expression + print("".join(part for part in uri_expr.render())) + + +def test_primary_expression(): + # Create a PrimaryExpression + primary_expr = PrimaryExpression(content=Var(value="myVar")) + + # Use the convenience method to create a BuiltInCall with the PrimaryExpression + str_function_call = BuiltInCall.create_with_one_expr("STR", primary_expr) + + # Render the BuiltInCall + str_function_call.to_string() + + +def test_multiple_primary_expression(): + # Create a list of PrimaryExpressions + primary_expressions = [ + PrimaryExpression(content=Var(value="var1")), + PrimaryExpression(content=Var(value="var2")), + ] + + # Use the convenience method to create a BuiltInCall with the list of PrimaryExpressions + concat_function_call = BuiltInCall.create_with_n_expr("CONCAT", primary_expressions) + + # Render the BuiltInCall + concat_function_call.to_string() + + +def test_aggregate(): + # function_name: str # One of 'COUNT', 'SUM', 'MIN', 'MAX', 'AVG', 'SAMPLE', 'GROUP_CONCAT' + # distinct: bool = False + # expression: Optional[ + # Union[str, Expression] + # ] = None # '*' for COUNT, else Expression + # separator: Optional[str] = None # Only used for GROUP_CONCAT + """ + SUM(?w) + """ + pr_exp = PrimaryExpression(content=(all_vars["w"])) + exp = Expression.from_primary_expr(pr_exp) + count_expression = Aggregate(function_name="SUM", expression=exp) + print("".join(part for part in count_expression.render())) + + +def test_regex(): + # Example usage of RegexExpression + pe1 = PrimaryExpression(content=Var(value="textVar")) + pe2 = PrimaryExpression(content=RDFLiteral(value="^regexPattern")) + pe3 = PrimaryExpression(content=RDFLiteral(value="i")) + regex_expression = RegexExpression( + text_expression=Expression.from_primary_expr(pe1), # Expression for the text + pattern_expression=Expression.from_primary_expr( + pe2 + ), # Expression for the regex pattern + flags_expression=Expression.from_primary_expr( + pe3 + ), # Optional: Expression for regex flags + ) + + # Render the RegexExpression + print("".join(part for part in regex_expression.render())) + + +def test_first_part_search(): + # Variables for outer SELECT + + expressions = [PrimaryExpression(content=v) for v in all_vars.values()] + str_builtins = [BuiltInCall.create_with_one_expr("STR", e) for e in expressions] + str_expressions = [PrimaryExpression(content=b) for b in str_builtins] + urn_literal = PrimaryExpression(content=RDFLiteral(value="urn:hash:")) + all_expressions = [urn_literal] + str_expressions + uri_expr = BuiltInCall.create_with_n_expr("CONCAT", all_expressions) + print("".join(part for part in uri_expr.render())) + + +def test_inner_ggp_search(): + # inner where + # { + # ?search_result_uri ?predicate ?match. + # BIND(100 AS ?w) + # FILTER(LCASE(?match) = "$term") + # } + ggp = GroupGraphPattern(content=GroupGraphPatternSub()) + + # select + ggp.content.add_triple( + SimplifiedTriple( + subject=all_vars["sr_uri"], + predicate=all_vars["pred"], + object=all_vars["match"], + ) + ) + + # bind + bind_for_w = Bind( + expression=Expression.from_primary_expr( + PrimaryExpression(content=NumericLiteral(value="100")) + ), + var=Var(value="w"), + ) + bind_gpnt = GraphPatternNotTriples(content=bind_for_w) + ggp.content.add_pattern(bind_gpnt) + + # filter + bifc = BuiltInCall(function_name="LCASE", arguments=[all_vars["match"]]) + pe_focus = PrimaryExpression(content=bifc) + pe_st = PrimaryExpression(content=all_vars["search_term"]) + filter_expr = Filter.filter_relational( + focus=pe_focus, comparators=pe_st, operator="=" + ) + filter_gpnt = GraphPatternNotTriples(content=filter_expr) + ggp.content.add_pattern(filter_gpnt) + + print("".join(part for part in ggp.render())) + + +def test_count_query(): + subquery = """SELECT ?focus_node { ?focus_node a dcat:Dataset }""" + + klass = IRI(value=DCAT.Dataset) + # Assuming `klass` is an instance of IRI class and `PREZ` is a predefined IRI + count_iri = IRI(value=PREZ["count"]) # Replace with actual IRI + count_var = Var(value="count") + + construct_triples = ConstructTriples( + triples=[SimplifiedTriple(subject=klass, predicate=count_iri, object=count_var)] + ) + construct_template = ConstructTemplate(construct_triples=construct_triples) + # Assuming `subquery` is a string containing the subquery + subquery_str = SubSelectString(select_string=subquery) + ggp = GroupGraphPattern(content=subquery_str) + where_clause = WhereClause(group_graph_pattern=ggp) + construct_query = ConstructQuery( + construct_template=construct_template, + where_clause=where_clause, + solution_modifier=SolutionModifier(), # Assuming no specific modifiers + ) + + +if __name__ == "__main__": + test_regex() diff --git a/tests/test_sparql.py b/tests/test_sparql.py index dddd7682..4ad1f60c 100755 --- a/tests/test_sparql.py +++ b/tests/test_sparql.py @@ -6,7 +6,7 @@ from prez.app import app from prez.dependencies import get_repo -from prez.sparql.methods import Repo, PyoxigraphRepo +from prez.repositories import Repo, PyoxigraphRepo @pytest.fixture(scope="session")