Skip to content

Commit

Permalink
Pyoxigraph MVP
Browse files Browse the repository at this point in the history
  • Loading branch information
recalcitrantsupplant committed Sep 26, 2023
1 parent 759cd2c commit 66593ca
Show file tree
Hide file tree
Showing 10 changed files with 257 additions and 45 deletions.
32 changes: 16 additions & 16 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 4 additions & 1 deletion prez/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
create_endpoints_graph,
populate_api_info,
add_prefixes_to_prefix_graph,
add_common_context_ontologies_to_tbox_cache,
add_common_context_ontologies_to_tbox_cache, load_test_data,
)
from prez.services.exception_catchers import (
catch_400,
Expand Down Expand Up @@ -123,6 +123,9 @@ async def app_startup():
await count_objects()
await populate_api_info()
await add_common_context_ontologies_to_tbox_cache()
if settings.test_mode:
log.info("Test mode enabled - loading test data into local Pyoxigraph instance")
load_test_data()


@app.on_event("shutdown")
Expand Down
3 changes: 3 additions & 0 deletions prez/cache.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from rdflib import Graph, ConjunctiveGraph
from pyoxigraph import Store

tbox_cache = Graph()

Expand All @@ -17,3 +18,5 @@
counts_graph = Graph()

search_methods = {}

pyoxi_store = Store()
20 changes: 11 additions & 9 deletions prez/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ class Settings(BaseSettings):
prez_version:
"""

sparql_endpoint: str
sparql_endpoint: Optional[str]
sparql_username: Optional[str]
sparql_password: Optional[str]
sparql_auth: Optional[tuple]
Expand All @@ -48,9 +48,10 @@ class Settings(BaseSettings):
provenance_predicates = [DCTERMS.provenance]
other_predicates = [SDO.color, REG.status]
sparql_timeout = 30.0

use_pyoxi = False
log_level = "INFO"
log_output = "stdout"
test_mode = False
prez_title: Optional[str] = "Prez"
prez_desc: Optional[str] = (
"A web framework API for delivering Linked Data. It provides read-only access to "
Expand All @@ -59,13 +60,14 @@ class Settings(BaseSettings):
prez_version: Optional[str]
disable_prefix_generation: bool = False

@root_validator()
def check_endpoint_enabled(cls, values):
if not values.get("sparql_endpoint"):
raise ValueError(
'A SPARQL endpoint must be specified using the "SPARQL_ENDPOINT" environment variable'
)
return values

# @root_validator()
# def check_endpoint_enabled(cls, values):
# if not values.get("sparql_endpoint"):
# raise ValueError(
# 'A SPARQL endpoint must be specified using the "SPARQL_ENDPOINT" environment variable'
# )
# return values

@root_validator()
def get_version(cls, values):
Expand Down
5 changes: 5 additions & 0 deletions prez/dependencies.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from prez.cache import pyoxi_store


def get_pyoxi_store():
return pyoxi_store
10 changes: 10 additions & 0 deletions prez/services/app_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
tbox_cache,
)
from prez.config import settings
from prez.dependencies import get_pyoxi_store
from prez.reference_data.prez_ns import PREZ, ALTREXT
from prez.sparql.methods import rdf_query_to_graph, sparql_query_non_async
from prez.sparql.objects_listings import startup_count_objects
Expand All @@ -23,6 +24,9 @@


async def healthcheck_sparql_endpoints():
if settings.use_pyoxi:
log.info("Skipping SPARQL endpoint healthcheck as using PyOxigraph")
return
connected_to_triplestore = False
log.info(f"Checking SPARQL endpoint {settings.sparql_endpoint} is online")
username = settings.sparql_username
Expand Down Expand Up @@ -194,3 +198,9 @@ async def add_common_context_ontologies_to_tbox_cache():
for triple in triples:
tbox_cache.add(triple)
log.info(f"Added {len(tbox_cache):,} triples from context ontologies to TBox cache")


def load_test_data():
pyoxi_store = get_pyoxi_store()
for file in (Path(__file__).parent.parent.parent/'tests/data').glob('*/input/*.ttl'):
pyoxi_store.load(file.read_bytes(), "text/turtle")
90 changes: 72 additions & 18 deletions prez/sparql/methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,15 @@
from typing import List

import httpx
from httpx import Client, AsyncClient, HTTPError
import pyoxigraph
from fastapi import Depends
from httpx import Client, AsyncClient
from httpx import Response as httpxResponse
from rdflib import Namespace, Graph, URIRef
from starlette.requests import Request
from async_lru import alru_cache

from prez.config import settings
from prez.dependencies import get_pyoxi_store

PREZ = Namespace("https://prez.dev/")

Expand All @@ -32,6 +35,9 @@

def sparql_query_non_async(query: str) -> Tuple[bool, Union[List, Dict]]:
"""Executes a SPARQL SELECT query for a single SPARQL endpoint"""
if settings.use_pyoxi:
result = tabular_query_pyoxi(query)
return True, result
response: httpxResponse = client.post(
settings.sparql_endpoint,
data=query,
Expand All @@ -51,6 +57,9 @@ def sparql_query_non_async(query: str) -> Tuple[bool, Union[List, Dict]]:

def sparql_ask_non_async(query: str):
"""Returns True if the provided triple pattern exists in the graph, otherwise False"""
if settings.use_pyoxi:
result = rdf_query_pyoxi(query)
return True, result
response: httpxResponse = client.post(
settings.sparql_endpoint,
data={"query": query},
Expand Down Expand Up @@ -87,7 +96,7 @@ async def sparql(request: Request):


# @alru_cache(maxsize=1000)
async def send_query(query: str, mediatype="text/turtle"):
async def _send_query(query: str, mediatype="text/turtle"):
"""Sends a SPARQL query asynchronously.
Args: query: str: A SPARQL query to be sent asynchronously.
Returns: httpx.Response: A httpx.Response object
Expand All @@ -102,18 +111,6 @@ async def send_query(query: str, mediatype="text/turtle"):
return response


async def rdf_query_to_graph(query: str) -> Graph:
"""
Sends a SPARQL query asynchronously and parses the response into an RDFLib Graph.
Args: query: str: A SPARQL query to be sent asynchronously.
Returns: rdflib.Graph: An RDFLib Graph object
"""
response = await send_query(query)
g = Graph()
await response.aread()
return g.parse(data=response.text, format="turtle")


async def send_queries(
rdf_queries: List[str], tabular_queries: List[Tuple[URIRef, str]] = None
) -> Tuple[Graph, List[Any]]:
Expand Down Expand Up @@ -148,12 +145,69 @@ async def send_queries(
return g, tabular_results


async def rdf_query_to_graph(query: str) -> Graph:
"""
Sends a SPARQL query asynchronously and parses the response into an RDFLib Graph.
Args: query: str: A SPARQL query to be sent asynchronously.
Returns: rdflib.Graph: An RDFLib Graph object
"""
if settings.use_pyoxi:
g = rdf_query_pyoxi(query)
else:
response = await _send_query(query)
g = Graph()
await response.aread()
g.parse(data=response.text, format="turtle")
return g


async def tabular_query_to_table(query: str, context: URIRef = None):
"""
Sends a SPARQL query asynchronously and parses the response into a table format.
The optional context parameter allows an identifier to be supplied with the query, such that multiple results can be
distinguished from each other.
"""
response = await send_query(query, "application/sparql-results+json")
await response.aread()
return context, response.json()["results"]["bindings"]
if settings.use_pyoxi:
response = tabular_query_pyoxi(query)
else:
response = await _send_query(query, "application/sparql-results+json")
await response.aread()
response = response.json()["results"]["bindings"]
return context, response


def rdf_query_pyoxi(query: str):
pyoxi_store = get_pyoxi_store()
results = pyoxi_store.query(query)
ntriples = " .\n".join([str(r) for r in list(results)]) + " ."
if ntriples == " .":
return Graph()
graph = Graph().parse(data=ntriples, format="ntriples")
return graph


def tabular_query_pyoxi(query: str):
pyoxi_store = get_pyoxi_store()
results = pyoxi_store.query(query)
variables = results.variables
results_list = []
for result in results:
results_dict = {}
for var in variables:
binding = result[var]
if binding:
binding_type = pyoxi_result_type(binding)
results_dict[str(var)[1:]] = {'type': binding_type, 'value': binding.value}
results_list.append(results_dict)
return results_list


def pyoxi_result_type(term) -> str:
if isinstance(term, pyoxigraph.Literal):
return 'literal'
elif isinstance(term, pyoxigraph.NamedNode):
return 'uri'
elif isinstance(term, pyoxigraph.BlankNode):
return 'bnode'
else:
raise ValueError(f"Unknown type: {type(term)}")
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ toml = "^0.10.2"
fastapi = "^0.95.0"
python-multipart = "^0.0.6"
jinja2 = "^3.1.2"
pyoxigraph = { version = "^0.3.19", optional = true }
pyoxigraph = "^0.3.19"

[tool.poetry.group.dev.dependencies]
pytest = "^7.1.2"
Expand Down
4 changes: 4 additions & 0 deletions tests_pyoxy/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
import os

os.environ["USE_PYOXI"] = "true"
os.environ["TEST_MODE"] = "true"
Loading

0 comments on commit 66593ca

Please sign in to comment.