Skip to content

Commit

Permalink
remove external connegp dependencies
Browse files Browse the repository at this point in the history
remove external connegp dependencies
non-default profile resolution not working.
test passing
need to add more tests
clean up refactored code
seems to be all working.
need to implement tests
debugging case of no requested profile
connegp almost implemented.
object function not tested
  • Loading branch information
lalewis1 committed Feb 28, 2024
1 parent 6d45c57 commit b921985
Show file tree
Hide file tree
Showing 12 changed files with 613 additions and 599 deletions.
Binary file removed connegp-0.1.6-py3-none-any.whl
Binary file not shown.
343 changes: 163 additions & 180 deletions poetry.lock

Large diffs are not rendered by default.

58 changes: 0 additions & 58 deletions prez/models/profiles_and_mediatypes.py

This file was deleted.

55 changes: 22 additions & 33 deletions prez/services/connegp_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,11 @@
from textwrap import dedent

from pydantic import BaseModel
from pyoxigraph import Store
from rdflib import Graph, Namespace, URIRef

from prez.cache import prefix_graph, system_store
from prez.dependencies import get_system_repo
from prez.models.model_exceptions import NoProfilesException
from prez.repositories.base import Repo
from prez.services.curie_functions import get_curie_id_for_uri
from prez.services.curie_functions import get_curie_id_for_uri, get_uri_for_curie_id

logger = logging.getLogger("prez")

Expand All @@ -37,73 +34,66 @@ class NegotiatedPMTs(BaseModel):
headers: dict
params: dict
classes: list[URIRef]
system_repo: Repo
listing: bool = False
default_weighting: float = 1.0
requested_profiles: list[tuple[str, float]] | None = None
requested_mediatypes: list[tuple[str, float]] | None = None
available: list[dict] | None = None
selected: dict | None = None
_system_store: Store | None = None
_prefix_graph: Graph | None = None
_system_repo: Repo | None = None

class Config:
arbitrary_types_allowed = True

async def setup(self) -> bool:
if self._system_store is None:
self._system_store = system_store
if self._prefix_graph is None:
self._prefix_graph = prefix_graph
if self._system_repo is None:
self._system_repo = await get_system_repo(self._system_store)
self.requested_profiles = await self._get_requested_profiles()
self.requested_mediatypes = await self._get_requested_mediatypes()
self.available = await self._get_available()
self.selected = await self._get_selected()
return True if self.selected else False

def _resolve_token(self, token: str) -> str:
async def _resolve_token(self, token: str) -> str:
query_str: str = dedent("""
PREFIX dcterms: <http://purl.org/dc/terms/>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
PREFIX prof: <http://www.w3.org/ns/dx/prof/>
SELECT ?s
SELECT ?profile
WHERE {
?s a prof:Profile .
?s dcterms:identifier ?o .
?profile a prof:Profile .
?profile dcterms:identifier ?o .
FILTER(?o="<token>"^^xsd:token)
}
""".replace("<token>", token))
try:
result = {result[0].value for result in self._system_store.query(query_str)}.pop()
except KeyError:
_, results = await self.system_repo.send_queries([], [(None, query_str)])
result: str = results[0][1][0]["profile"]["value"]
except (KeyError, IndexError, ValueError):
raise TokenError(f"Token: '{token}' could not be resolved to URI")
uri = "<" + result + ">"
return uri

def _tupilize(self, string: str, is_profile: bool = False) -> tuple[str, float]:
async def _tupilize(self, string: str, is_profile: bool = False) -> tuple[str, float]:
parts: list[str | float] = string.split("q=") # split out the weighting
parts[0] = parts[0].strip(" ;") # remove the seperator character, and any whitespace characters
if is_profile and not re.search(r"^<.*>$", parts[0]): # If it doesn't look like a URI ...
try:
parts[0] = self._resolve_token(parts[0]) # then try to resolve the token to a URI
parts[0] = await self._resolve_token(parts[0]) # then try to resolve the token to a URI
except TokenError as e:
logger.error(e.args[0])
try: # if token resolution fails, try to resolve as a curie
result = str(self._prefix_graph.namespace_manager.expand_curie(parts[0]))
result = str(get_uri_for_curie_id(parts[0]))
parts[0] = "<" + result + ">"
except ValueError as e:
parts[0] = "" # if curie resolution failed, then the profile is invalid
logger.error(e.args[0])
if len(parts) == 1:
parts.append(self.default_weighting) # If no weight given, set the default
else:
try:
parts[1] = float(parts[1]) # Type-check the seperated weighting
except ValueError as e:
log = logging.getLogger("prez")
log.debug(
logger.debug(
f"Could not cast q={parts[1]} as float. Defaulting to {self.default_weighting}. {e.args[0]}")
return parts[0], parts[1]

Expand All @@ -114,18 +104,18 @@ def _prioritize(types: list[tuple[str, float]]) -> list[tuple[str, float]]:
async def _get_requested_profiles(self) -> list[tuple[str, float]] | None:
raw_profiles: str = self.params.get("_profile", "") # Prefer profiles declared in the QSA, as per the spec.
if not raw_profiles:
raw_profiles: str = self.headers.get("Accept-Profile", "")
raw_profiles: str = self.headers.get("accept-profile", "")
if raw_profiles:
profiles: list = [self._tupilize(profile, is_profile=True) for profile in raw_profiles.split(",")]
profiles: list = [await self._tupilize(profile, is_profile=True) for profile in raw_profiles.split(",")]
return self._prioritize(profiles)
return None

async def _get_requested_mediatypes(self) -> list[tuple[str, float]] | None:
raw_mediatypes: str = self.params.get("_media", "") # Prefer mediatypes declared in the QSA, as per the spec.
if not raw_mediatypes:
raw_mediatypes: str = self.headers.get("Accept", "")
raw_mediatypes: str = self.headers.get("accept", "")
if raw_mediatypes:
mediatypes: list = [self._tupilize(mediatype) for mediatype in raw_mediatypes.split(",")]
mediatypes: list = [await self._tupilize(mediatype) for mediatype in raw_mediatypes.split(",")]
return self._prioritize(mediatypes)
return None

Expand Down Expand Up @@ -162,7 +152,6 @@ def generate_response_headers(self) -> dict:
]
)
headers = {
"Access-Control-Allow-Origin": "*", # HACK: why is this specified here?
"Content-Type": self.selected["mediatype"],
"link": profile_header_links + mediatype_header_links
}
Expand All @@ -172,10 +161,10 @@ def _compose_select_query(self) -> str:
prez = Namespace("https://prez.dev/")
profile_class = prez.ListingProfile if self.listing else prez.ObjectProfile
try:
requested_profile = self.requested_profiles[0] # TODO: handle multiple requested profiles
requested_profile = self.requested_profiles[0][0] # TODO: handle multiple requested profiles
except TypeError as e:
requested_profile = None
logger.debug(e)
logger.debug(f"{e}. normally this just means no profiles were requested")

query = dedent(
f"""
Expand Down Expand Up @@ -203,7 +192,7 @@ def _compose_select_query(self) -> str:
altr-ext:hasResourceFormat ?format ;
dcterms:title ?title .\
{f'?profile a {profile_class.n3()} .'}
{f'BIND(?profile=<{requested_profile}> as ?req_profile)' if requested_profile else ''}
{f'BIND(?profile={requested_profile} as ?req_profile)' if requested_profile else ''}
BIND(EXISTS {{ ?shape sh:targetClass ?class ;
altr-ext:hasDefaultProfile ?profile }} AS ?def_profile)
{self._generate_mediatype_if_statements()}
Expand Down Expand Up @@ -241,7 +230,7 @@ def _generate_mediatype_if_statements(self) -> str:
return ifs

async def _do_query(self, query: str) -> tuple[Graph, list]:
response = await self._system_repo.send_queries([], [(None, query)])
response = await self.system_repo.send_queries([], [(None, query)])
if not response[1][0][1]:
raise NoProfilesException(self.classes)
return response
128 changes: 2 additions & 126 deletions prez/services/generate_profiles.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,9 @@
import logging
from pathlib import Path
from typing import FrozenSet

from rdflib import Graph, URIRef, RDF, PROF, Literal
from rdflib import Graph

from prez.cache import profiles_graph_cache, prefix_graph
from prez.models.model_exceptions import NoProfilesException
from prez.reference_data.prez_ns import PREZ
from prez.services.curie_functions import get_curie_id_for_uri
from prez.repositories import Repo
from prez.services.query_generation.connegp import select_profile_mediatype
from prez.cache import profiles_graph_cache

log = logging.getLogger(__name__)

Expand Down Expand Up @@ -61,121 +55,3 @@ async def create_profiles_graph(repo) -> Graph:
log.info(f"Remote profile(s) found and added")
else:
log.info("No remote profiles found")


async def get_profiles_and_mediatypes(
classes: FrozenSet[URIRef],
system_repo: Repo,
requested_profile: URIRef = None,
requested_profile_token: str = None,
requested_mediatype: URIRef = None,
listing: bool = False,
):
query = select_profile_mediatype(
classes,
requested_profile,
requested_profile_token,
requested_mediatype,
listing,
)
log.debug(f"ConnegP query: {query}")
response = await system_repo.send_queries([], [(None, query)])
# log.debug(f"ConnegP response:{results_pretty_printer(response)}")
if response[1][0][1] == []:
raise NoProfilesException(classes)
top_result = response[1][0][1][0]
profile, mediatype, selected_class = (
URIRef(top_result["profile"]["value"]),
Literal(top_result["format"]["value"]),
URIRef(top_result["class"]["value"]),
)
profile_headers, avail_profile_uris = generate_profiles_headers(
selected_class, response, profile, mediatype
)
return profile, mediatype, selected_class, profile_headers, avail_profile_uris


def results_pretty_printer(response):
# Calculate max width for each column, including the new "#" column
max_widths = [
len(str(len(response.bindings)))
] # length of the highest row number as a string
for header in response.vars:
max_width = max(
len(header.n3(prefix_graph.namespace_manager)),
max(
len(
row[header].n3(prefix_graph.namespace_manager)
if row[header]
else ""
)
for row in response.bindings
),
)
max_widths.append(max_width)

# Header row
header_row = "\n" + " | ".join(
["#".ljust(max_widths[0])]
+ [
str(header).ljust(max_widths[i + 1])
for i, header in enumerate(response.vars)
]
)
pp_string = header_row + "\n"
pp_string += ("-" * len(header_row)) + "\n" # Divider

# Data rows
row_number = 1
for row in response.bindings:
row_data = [str(row_number).ljust(max_widths[0])]
row_data += [
(
row[header].n3(prefix_graph.namespace_manager) if row[header] else ""
).ljust(max_widths[i + 1])
for i, header in enumerate(response.vars)
]
formatted_row = " | ".join(row_data)
pp_string += formatted_row + "\n"
row_number += 1

return pp_string


def generate_profiles_headers(selected_class, response, profile, mediatype):
headers = {
"Access-Control-Allow-Origin": "*",
"Content-Type": mediatype,
}
avail_profiles = set(
(
get_curie_id_for_uri(i["profile"]["value"]),
i["profile"]["value"],
i["title"]["value"],
)
for i in response[1][0][1]
)
avail_profiles_headers = ", ".join(
[
f'<http://www.w3.org/ns/dx/prof/Profile>; rel="type"; title="{i[2]}"; token="{i[0]}"; anchor=<{i[1]}>'
for i in avail_profiles
]
)
avail_mediatypes_headers = ", ".join(
[
f"""<{selected_class}?_profile={get_curie_id_for_uri(i["profile"]["value"])}&_mediatype={i["format"]["value"]}>; \
rel="{"self" if i["profile"]["value"] == profile and i["format"]["value"] == mediatype else "alternate"}"; \
type="{i["format"]["value"]}"; profile="{i["profile"]["value"]}"\
"""
for i in response[1][0][1]
]
)
headers["Link"] = ", ".join(
[
f'<{profile}>; rel="profile"',
avail_profiles_headers,
avail_mediatypes_headers,
]
)
avail_profile_uris = [i[1] for i in avail_profiles]
return headers, avail_profile_uris
Loading

0 comments on commit b921985

Please sign in to comment.