diff --git a/TODO.md b/TODO.md index 4b9d41b16..6a3834d4b 100644 --- a/TODO.md +++ b/TODO.md @@ -4,13 +4,11 @@ ways to better this mess ## better shtrove api experience - better web-browsing experience - - when `Accept` header accepts html, use html regardless of query-params - - when query param `acceptMediatype` requests another mediatype, display on page in copy/pastable way - - exception: when given `withFileName`, download without html wrapping - - exception: `/trove/browse` should still give hypertext with clickable links - include more explanatory docs (and better fill out those explanations) - - more helpful (less erratic) visual design + - even more helpful (less erratic) visual design - in each html rendering of an api response, include a `
` for adding/editing/viewing query params + - in browsable html, replace json literals with rdf rendered like the rest of the page + - (perf) add bare-minimal IndexcardDeriver (iris, types, namelikes); use for search-result display - better tsv/csv experience - set default columns for `index-value-search` (and/or broadly improve `fields` handling) - better turtle experience diff --git a/tests/share/search/index_strategy/_common_trovesearch_tests.py b/tests/share/search/index_strategy/_common_trovesearch_tests.py index 3d5f51e58..c7146a762 100644 --- a/tests/share/search/index_strategy/_common_trovesearch_tests.py +++ b/tests/share/search/index_strategy/_common_trovesearch_tests.py @@ -117,10 +117,10 @@ def test_cardsearch_pagination(self): })) self._index_indexcards(_cards) # gather all pages results: - _querystring: str = f'page[size]={_page_size}' + _querystring: str | None = f'page[size]={_page_size}' _result_iris: set[str] = set() _page_count = 0 - while True: + while _querystring is not None: _cardsearch_handle = self.index_strategy.pls_handle_cardsearch( CardsearchParams.from_querystring(_querystring), ) @@ -133,9 +133,11 @@ def test_cardsearch_pagination(self): _result_iris.update(_page_iris) _page_count += 1 _next_cursor = _cardsearch_handle.cursor.next_cursor() - if _next_cursor is None: - break - _querystring = urlencode({'page[cursor]': _next_cursor.as_queryparam_value()}) + _querystring = ( + urlencode({'page[cursor]': _next_cursor.as_queryparam_value()}) + if _next_cursor is not None + else None # done + ) self.assertEqual(_page_count, math.ceil(_total_count / _page_size)) self.assertEqual(_result_iris, _expected_iris) diff --git a/tests/share/test_oaipmh_trove.py b/tests/share/test_oaipmh_trove.py index 0bdd7df1b..330f1631b 100644 --- a/tests/share/test_oaipmh_trove.py +++ b/tests/share/test_oaipmh_trove.py @@ -232,11 +232,9 @@ def _assert_full_list(self, verb, params, request_method, expected_count, page_s pages = 0 count = 0 token = None - while True: - if token: - parsed = oai_request({'verb': verb, 'resumptionToken': token}, request_method) - else: - parsed = oai_request({'verb': verb, 'metadataPrefix': 'oai_dc', **params}, request_method) + next_params: dict[str, str] | None = {'verb': verb, 'metadataPrefix': 'oai_dc', **params} + while next_params is not None: + parsed = oai_request(next_params, request_method) page = parsed.xpath('//oai:header/oai:identifier', namespaces=NAMESPACES) pages += 1 count += len(page) @@ -245,9 +243,10 @@ def _assert_full_list(self, verb, params, request_method, expected_count, page_s token = token[0].text if token: assert len(page) == page_size + next_params = {'verb': verb, 'resumptionToken': token} else: assert len(page) <= page_size - break + next_params = None # done assert count == expected_count assert pages == math.ceil(expected_count / page_size) diff --git a/tests/trove/render/_base.py b/tests/trove/render/_base.py index 94b8f94a8..c550041cc 100644 --- a/tests/trove/render/_base.py +++ b/tests/trove/render/_base.py @@ -7,7 +7,7 @@ from trove.trovesearch.trovesearch_gathering import trovesearch_by_indexstrategy from trove.render._base import BaseRenderer -from trove.render._rendering import ProtoRendering +from trove.render.rendering import ProtoRendering from trove.vocab.namespaces import RDF from tests.trove._input_output_tests import BasicInputOutputTestCase from ._inputs import UNRENDERED_RDF, UNRENDERED_SEARCH_RDF, RdfCase @@ -66,9 +66,9 @@ def assert_outputs_equal(self, expected_output, actual_output) -> None: self._get_rendered_output(actual_output), ) - def _get_rendered_output(self, rendering: ProtoRendering): + def _get_rendered_output(self, rendering: ProtoRendering) -> str: # for now, they always iter strings (update if/when bytes are in play) - return ''.join(rendering.iter_content()) # type: ignore[arg-type] + return ''.join(map(str, rendering.iter_content())) class TrovesearchRendererTests(TroveRendererTests): diff --git a/tests/trove/render/test_jsonapi_renderer.py b/tests/trove/render/test_jsonapi_renderer.py index 9357c5ff6..992ade522 100644 --- a/tests/trove/render/test_jsonapi_renderer.py +++ b/tests/trove/render/test_jsonapi_renderer.py @@ -2,7 +2,7 @@ from unittest import mock from trove.render.jsonapi import RdfJsonapiRenderer -from trove.render._rendering import SimpleRendering +from trove.render.rendering import SimpleRendering from trove.vocab.namespaces import BLARG from . import _base diff --git a/tests/trove/render/test_jsonld_renderer.py b/tests/trove/render/test_jsonld_renderer.py index eef657f1d..b74d7389c 100644 --- a/tests/trove/render/test_jsonld_renderer.py +++ b/tests/trove/render/test_jsonld_renderer.py @@ -1,7 +1,7 @@ import json from trove.render.jsonld import RdfJsonldRenderer -from trove.render._rendering import SimpleRendering +from trove.render.rendering import SimpleRendering from ._inputs import BLARG from . import _base diff --git a/tests/trove/render/test_simple_csv_renderer.py b/tests/trove/render/test_simple_csv_renderer.py index ca06aa273..d4da76e5b 100644 --- a/tests/trove/render/test_simple_csv_renderer.py +++ b/tests/trove/render/test_simple_csv_renderer.py @@ -1,5 +1,5 @@ from trove.render.simple_csv import TrovesearchSimpleCsvRenderer -from trove.render._rendering import SimpleRendering +from trove.render.rendering import SimpleRendering from . import _base diff --git a/tests/trove/render/test_simple_json_renderer.py b/tests/trove/render/test_simple_json_renderer.py index 7f59c8a59..cd1d9bcf6 100644 --- a/tests/trove/render/test_simple_json_renderer.py +++ b/tests/trove/render/test_simple_json_renderer.py @@ -1,7 +1,7 @@ import json from trove.render.simple_json import TrovesearchSimpleJsonRenderer -from trove.render._rendering import SimpleRendering +from trove.render.rendering import SimpleRendering from trove.vocab.namespaces import BLARG from . import _base diff --git a/tests/trove/render/test_simple_tsv_renderer.py b/tests/trove/render/test_simple_tsv_renderer.py index 752493362..baa3ed5ec 100644 --- a/tests/trove/render/test_simple_tsv_renderer.py +++ b/tests/trove/render/test_simple_tsv_renderer.py @@ -1,5 +1,5 @@ from trove.render.simple_tsv import TrovesearchSimpleTsvRenderer -from trove.render._rendering import SimpleRendering +from trove.render.rendering import SimpleRendering from . import _base diff --git a/tests/trove/render/test_turtle_renderer.py b/tests/trove/render/test_turtle_renderer.py index 32f949278..306174a44 100644 --- a/tests/trove/render/test_turtle_renderer.py +++ b/tests/trove/render/test_turtle_renderer.py @@ -1,7 +1,7 @@ from primitive_metadata import primitive_rdf as rdf from trove.render.turtle import RdfTurtleRenderer -from trove.render._rendering import SimpleRendering +from trove.render.rendering import SimpleRendering from . import _base diff --git a/tests/trove/test_doctest.py b/tests/trove/test_doctest.py index 18c77a18b..06baf8993 100644 --- a/tests/trove/test_doctest.py +++ b/tests/trove/test_doctest.py @@ -3,7 +3,9 @@ import trove.util.chainmap import trove.util.frozen import trove.util.iris +import trove.util.iter import trove.util.propertypath +import trove.vocab.mediatypes _DOCTEST_OPTIONFLAGS = ( doctest.ELLIPSIS @@ -14,7 +16,9 @@ trove.util.chainmap, trove.util.frozen, trove.util.iris, + trove.util.iter, trove.util.propertypath, + trove.vocab.mediatypes, ) diff --git a/trove/render/__init__.py b/trove/render/__init__.py index c5bf699a1..27abfdf79 100644 --- a/trove/render/__init__.py +++ b/trove/render/__init__.py @@ -1,8 +1,7 @@ -from typing import Type - from django import http from trove import exceptions as trove_exceptions +from trove.vocab.mediatypes import strip_mediatype_parameters from ._base import BaseRenderer from .jsonapi import RdfJsonapiRenderer from .html_browse import RdfHtmlBrowseRenderer @@ -25,10 +24,6 @@ TrovesearchSimpleTsvRenderer, ) -RendersType = Type[ - BaseRenderer | RdfHtmlBrowseRenderer | RdfJsonapiRenderer | RdfTurtleRenderer | RdfJsonldRenderer | TrovesearchSimpleCsvRenderer | TrovesearchSimpleJsonRenderer | TrovesearchSimpleTsvRenderer -] - RENDERER_BY_MEDIATYPE = { _renderer_type.MEDIATYPE: _renderer_type for _renderer_type in RENDERERS @@ -42,7 +37,9 @@ def get_renderer_type(request: http.HttpRequest) -> type[BaseRenderer]: _requested_mediatype = request.GET.get('acceptMediatype') if _requested_mediatype: try: - _chosen_renderer_type = RENDERER_BY_MEDIATYPE[_requested_mediatype] + _chosen_renderer_type = RENDERER_BY_MEDIATYPE[ + strip_mediatype_parameters(_requested_mediatype) + ] except KeyError: raise trove_exceptions.CannotRenderMediatype(_requested_mediatype) else: diff --git a/trove/render/_base.py b/trove/render/_base.py index 49a3a52ec..9c6ddb5b0 100644 --- a/trove/render/_base.py +++ b/trove/render/_base.py @@ -13,7 +13,7 @@ from trove.vocab import mediatypes from trove.vocab.trove import TROVE_API_THESAURUS from trove.vocab.namespaces import namespaces_shorthand -from ._rendering import ProtoRendering, SimpleRendering +from .rendering import ProtoRendering, SimpleRendering @dataclasses.dataclass @@ -61,7 +61,7 @@ def render_document(self) -> ProtoRendering: except NotImplementedError: raise NotImplementedError(f'class "{type(self)}" must implement either `render_document` or `simple_render_document`') else: - return SimpleRendering( # type: ignore[return-value] # until ProtoRendering(typing.Protocol) with py3.12 + return SimpleRendering( mediatype=self.MEDIATYPE, rendered_content=_content, ) @@ -69,7 +69,7 @@ def render_document(self) -> ProtoRendering: @classmethod def render_error_document(cls, error: trove_exceptions.TroveError) -> ProtoRendering: # may override, but default to jsonapi - return SimpleRendering( # type: ignore[return-value] # until ProtoRendering(typing.Protocol) with py3.12 + return SimpleRendering( mediatype=mediatypes.JSONAPI, rendered_content=json.dumps( {'errors': [{ # https://jsonapi.org/format/#error-objects diff --git a/trove/render/_html.py b/trove/render/_html.py index 6daa1e037..3bded5288 100644 --- a/trove/render/_html.py +++ b/trove/render/_html.py @@ -5,18 +5,20 @@ from xml.etree.ElementTree import ( Element, SubElement, + tostring as etree_tostring, ) -from typing import Any from primitive_metadata import primitive_rdf as rdf __all__ = ('HtmlBuilder',) +HTML_DOCTYPE = '' + @dataclasses.dataclass class HtmlBuilder: - given_root: Element + given_root: Element = dataclasses.field(default_factory=lambda: Element('html')) _: dataclasses.KW_ONLY _nested_elements: list[Element] = dataclasses.field(default_factory=list) _heading_depth: int = 0 @@ -36,18 +38,16 @@ def _current_element(self) -> Element: # html-building helper methods @contextlib.contextmanager - def nest_h_tag(self, **kwargs: Any) -> Generator[Element]: + def deeper_heading(self) -> Generator[str]: _outer_heading_depth = self._heading_depth if not _outer_heading_depth: self._heading_depth = 1 elif _outer_heading_depth < 6: # h6 deepest self._heading_depth += 1 - _h_tag = f'h{self._heading_depth}' - with self.nest(_h_tag, **kwargs) as _nested: - try: - yield _nested - finally: - self._heading_depth = _outer_heading_depth + try: + yield f'h{self._heading_depth}' + finally: + self._heading_depth = _outer_heading_depth @contextlib.contextmanager def nest(self, tag_name: str, attrs: dict | None = None) -> Generator[Element]: @@ -67,3 +67,9 @@ def leaf(self, tag_name: str, *, text: str | None = None, attrs: dict | None = N _leaf_element.text = text.unicode_value elif text is not None: _leaf_element.text = text + + def as_html_doc(self) -> str: + return '\n'.join(( + HTML_DOCTYPE, + etree_tostring(self.root_element, encoding='unicode', method='html'), + )) diff --git a/trove/render/_rendering.py b/trove/render/_rendering.py deleted file mode 100644 index 0de9b015a..000000000 --- a/trove/render/_rendering.py +++ /dev/null @@ -1,47 +0,0 @@ -import abc -import dataclasses -from typing import Iterator, Generator - -from trove import exceptions as trove_exceptions - - -class ProtoRendering(abc.ABC): - '''base class for all renderings - - (TODO: typing.Protocol (when py3.12+)) - ''' - - @property - @abc.abstractmethod - def mediatype(self) -> str: - '''`mediatype`: required readable attribute - ''' - raise NotImplementedError - - @abc.abstractmethod - def iter_content(self) -> Iterator[str | bytes | memoryview]: - '''`iter_content`: (only) required method - ''' - yield from () - - -@dataclasses.dataclass -class SimpleRendering: # implements ProtoRendering - mediatype: str - rendered_content: str = '' - - def iter_content(self) -> Generator[str]: - yield self.rendered_content - - -@dataclasses.dataclass -class StreamableRendering: # implements ProtoRendering - mediatype: str - content_stream: Iterator[str | bytes | memoryview] - _started_already: bool = False - - def iter_content(self) -> Iterator[str | bytes | memoryview]: - if self._started_already: - raise trove_exceptions.CannotRenderStreamTwice - self._started_already = True - yield from self.content_stream diff --git a/trove/render/_simple_trovesearch.py b/trove/render/_simple_trovesearch.py index 36bc36c4b..657e5b169 100644 --- a/trove/render/_simple_trovesearch.py +++ b/trove/render/_simple_trovesearch.py @@ -1,6 +1,8 @@ from __future__ import annotations -from collections.abc import Generator, Iterator +from collections.abc import Generator, Iterator, Sequence +import itertools import json +import logging from typing import Any, TYPE_CHECKING from primitive_metadata import primitive_rdf as rdf @@ -9,10 +11,12 @@ from trove.vocab.jsonapi import JSONAPI_LINK_OBJECT from trove.vocab.namespaces import TROVE, RDF from ._base import BaseRenderer -from ._rendering import ProtoRendering, SimpleRendering +from .rendering import ProtoRendering, SimpleRendering if TYPE_CHECKING: from trove.util.json import JsonObject +_logger = logging.getLogger(__name__) + class SimpleTrovesearchRenderer(BaseRenderer): '''for "simple" search api responses (including only result metadata) @@ -30,18 +34,14 @@ def simple_multicard_rendering(self, cards: Iterator[tuple[str, JsonObject]]) -> raise NotImplementedError def unicard_rendering(self, card_iri: str, osfmap_json: JsonObject) -> ProtoRendering: - return SimpleRendering( # type: ignore[return-value] + return SimpleRendering( mediatype=self.MEDIATYPE, rendered_content=self.simple_unicard_rendering(card_iri, osfmap_json), ) - def multicard_rendering(self, card_pages: Iterator[dict[str, JsonObject]]) -> ProtoRendering: - _cards = ( - (_card_iri, _card_contents) - for _page in card_pages - for _card_iri, _card_contents in _page.items() - ) - return SimpleRendering( # type: ignore[return-value] + def multicard_rendering(self, card_pages: Iterator[Sequence[tuple[str, JsonObject]]]) -> ProtoRendering: + _cards = itertools.chain.from_iterable(card_pages) + return SimpleRendering( mediatype=self.MEDIATYPE, rendered_content=self.simple_multicard_rendering(_cards), ) @@ -57,7 +57,7 @@ def render_document(self) -> ProtoRendering: ) raise trove_exceptions.UnsupportedRdfType(_focustypes) - def _iter_card_pages(self) -> Generator[dict[str, JsonObject]]: + def _iter_card_pages(self) -> Generator[list[tuple[str, JsonObject]]]: assert not self.__already_iterated_cards self.__already_iterated_cards = True self._page_links = set() @@ -67,22 +67,22 @@ def _iter_card_pages(self) -> Generator[dict[str, JsonObject]]: if (RDF.type, JSONAPI_LINK_OBJECT) in _page: self._page_links.add(_page) elif rdf.is_container(_page): - _cardpage = [] - for _search_result in rdf.container_objects(_page): + _cardpage: list[tuple[str, JsonObject]] = [] + for _search_result_blanknode in rdf.container_objects(_page): try: _card = next( _obj - for _pred, _obj in _search_result + for _pred, _obj in _search_result_blanknode if _pred == TROVE.indexCard ) except StopIteration: pass # skip malformed else: - _cardpage.append(_card) - yield { - self._get_card_iri(_card): self._get_card_content(_card, _page_graph) - for _card in _cardpage - } + _cardpage.append(( + self._get_card_iri(_card), + self._get_card_content(_card, _page_graph), + )) + yield _cardpage def _get_card_iri(self, card: str | rdf.RdfBlanknode) -> str: return card if isinstance(card, str) else '' diff --git a/trove/render/html_browse.py b/trove/render/html_browse.py index 1f5bffd6f..dd8f947af 100644 --- a/trove/render/html_browse.py +++ b/trove/render/html_browse.py @@ -1,7 +1,4 @@ -from collections.abc import ( - Iterator, - Generator, -) +from collections.abc import Generator import contextlib import dataclasses import datetime @@ -12,7 +9,6 @@ from urllib.parse import quote, urlsplit, urlunsplit from xml.etree.ElementTree import ( Element, - tostring as etree_tostring, fromstring as etree_fromstring, ) @@ -27,7 +23,8 @@ from trove.util.iris import get_sufficiently_unique_iri from trove.util.randomness import shuffled from trove.vocab import mediatypes -from trove.vocab.namespaces import RDF, RDFS, SKOS, DCTERMS, FOAF, DC +from trove.vocab import jsonapi +from trove.vocab.namespaces import RDF, RDFS, SKOS, DCTERMS, FOAF, DC, OSFMAP from trove.vocab.static_vocab import combined_thesaurus__suffuniq from trove.vocab.trove import trove_browse_link from ._base import BaseRenderer @@ -50,30 +47,33 @@ DCTERMS.title, DC.title, FOAF.name, + OSFMAP.fileName, ) _IMPLICIT_DATATYPES = frozenset(( RDF.string, RDF.langString, )) +_PREDICATES_RENDERED_SPECIAL = frozenset(( + RDF.type, +)) +_PRIMITIVE_LITERAL_TYPES = (float, int, datetime.date) _QUERYPARAM_SPLIT_RE = re.compile(r'(?=[?&])') _PHI = (math.sqrt(5) + 1) / 2 -_HTML_DOCTYPE = '' - @dataclasses.dataclass class RdfHtmlBrowseRenderer(BaseRenderer): - MEDIATYPE: ClassVar[str] = 'text/html; charset=utf-8' - __current_data: rdf.RdfTripleDictionary = dataclasses.field(init=False) + MEDIATYPE: ClassVar[str] = mediatypes.HTML + __current_data: rdf.RdfGraph = dataclasses.field(init=False) __visiting_iris: set[str] = dataclasses.field(init=False) __hb: HtmlBuilder = dataclasses.field(init=False) __last_hue_turn: float = dataclasses.field(default_factory=random.random) def __post_init__(self) -> None: # TODO: lang (according to request -- also translate) - self.__current_data = self.response_tripledict + self.__current_data = self.response_data self.__visiting_iris = set() @property @@ -82,20 +82,19 @@ def is_data_blended(self) -> bool | None: # override BaseRenderer def simple_render_document(self) -> str: - self.__hb = HtmlBuilder(Element('html')) + self.__hb = HtmlBuilder() self.render_html_head() - _body_attrs = { - 'class': 'BrowseWrapper', - 'style': self._hue_turn_css(), - } - with self.__hb.nest('body', attrs=_body_attrs): + with ( + self._hue_turn_css() as _hue_turn_style, + self.__hb.nest('body', attrs={ + 'class': 'BrowseWrapper', + 'style': _hue_turn_style, + }), + ): self.render_nav() self.render_main() self.render_footer() - return '\n'.join(( - _HTML_DOCTYPE, - etree_tostring(self.__hb.root_element, encoding='unicode', method='html'), - )) + return self.__hb.as_html_doc() def render_html_head(self) -> None: with self.__hb.nest('head'): @@ -153,67 +152,69 @@ def __mediatype_link(self, mediatype: str) -> None: with self.__hb.nest('a', attrs={'href': reverse('trove:docs')}) as _link: _link.text = _('(stable for documented use)') - def __render_subj(self, subj_iri: str, *, start_collapsed: bool | None = None) -> None: - _twopledict = self.__current_data.get(subj_iri, {}) - with self.__visiting(subj_iri): + def __render_subj(self, subj_iri: str, *, include_details: bool = True) -> None: + with self.__visiting(subj_iri) as _h_tag: with self.__nest_card('article'): with self.__hb.nest('header'): - _compact = self.iri_shorthand.compact_iri(subj_iri) - _is_compactable = (_compact != subj_iri) - _should_link = (subj_iri not in self.response_focus.iris) - with self.__hb.nest_h_tag(attrs={'id': quote(subj_iri)}) as _h: - if _should_link: - with self.__nest_link(subj_iri) as _link: - if _is_compactable: - _link.text = _compact - else: - self.__split_iri_pre(subj_iri) + with self.__hb.nest(_h_tag, attrs={'id': quote(subj_iri)}): + if self.__is_focus(subj_iri): + self.__split_iri_pre(subj_iri) else: - if _is_compactable: - _h.text = _compact - else: + with self.__nest_link(subj_iri): self.__split_iri_pre(subj_iri) self.__iri_subheaders(subj_iri) - if _twopledict: - with self.__hb.nest('details') as _details: - _detail_depth = sum((_el.tag == 'details') for _el in self.__hb._nested_elements) - _should_open = ( - _detail_depth < 3 - if start_collapsed is None - else not start_collapsed - ) - if _should_open: - _details.set('open', '') + if self.__is_focus(subj_iri): + self.__hb.leaf('pre', text=subj_iri) + if include_details and (_twopledict := self.__current_data.tripledict.get(subj_iri, {})): + _details_attrs = ( + {'open': ''} + if (self.__is_focus(subj_iri) or _is_local_url(subj_iri)) + else {} + ) + with self.__hb.nest('details', _details_attrs): self.__hb.leaf('summary', text=_('more details...')) self.__twoples(_twopledict) def __twoples(self, twopledict: rdf.RdfTwopleDictionary) -> None: with self.__hb.nest('dl', {'class': 'Browse__twopleset'}): - for _pred, _obj_set in shuffled(twopledict.items()): + for _pred, _obj_set in self.__order_twopledict(twopledict): with self.__hb.nest('dt', attrs={'class': 'Browse__predicate'}): self.__compact_link(_pred) for _text in self.__iri_thesaurus_labels(_pred): self.__literal(_text) with self.__hb.nest('dd'): - for _obj in shuffled(_obj_set): + for _obj in _obj_set: self.__obj(_obj) + def __order_twopledict(self, twopledict: rdf.RdfTwopleDictionary) -> Generator[tuple[str, list[rdf.RdfObject]]]: + _items_with_sorted_objs = ( + (_pred, sorted(_obj_set, key=_obj_ordering_key)) + for _pred, _obj_set in twopledict.items() + if _pred not in _PREDICATES_RENDERED_SPECIAL + ) + yield from sorted( + _items_with_sorted_objs, + key=lambda _item: _obj_ordering_key(_item[1][0]), + ) + def __obj(self, obj: rdf.RdfObject) -> None: if isinstance(obj, str): # iri # TODO: detect whether indexcard? - if (obj in self.__current_data) and (obj not in self.__visiting_iris): + if (obj in self.__current_data.tripledict) and (obj not in self.__visiting_iris): self.__render_subj(obj) else: with self.__hb.nest('article', attrs={'class': 'Browse__object'}): self.__iri_link_and_labels(obj) elif isinstance(obj, frozenset): # blanknode - if (RDF.type, RDF.Seq) in obj: + if _is_jsonapi_link_obj(obj): + self.__jsonapi_link_obj(obj) + elif _is_sequence_obj(obj): self.__sequence(obj) else: self.__blanknode(obj) elif isinstance(obj, rdf.Literal): self.__literal(obj, is_rdf_object=True) - elif isinstance(obj, (float, int, datetime.date)): + elif isinstance(obj, _PRIMITIVE_LITERAL_TYPES): self.__literal(rdf.literal(obj), is_rdf_object=True) elif isinstance(obj, rdf.QuotedGraph): self.__quoted_graph(obj) @@ -255,8 +256,16 @@ def __sequence(self, sequence_twoples: frozenset[rdf.RdfTwople]) -> None: self.__obj(_seq_obj) def __quoted_graph(self, quoted_graph: rdf.QuotedGraph) -> None: - with self.__quoted_data(quoted_graph.tripledict): - self.__render_subj(quoted_graph.focus_iri) # , start_collapsed=True) + _should_include_details = ( + self.__is_focus(quoted_graph.focus_iri) + or (( # primary topic of response focus + self.response_focus.single_iri(), + FOAF.primaryTopic, + quoted_graph.focus_iri, + ) in self.response_data) + ) + with self.__quoted_data(quoted_graph): + self.__render_subj(quoted_graph.focus_iri, include_details=_should_include_details) def __blanknode(self, blanknode: rdf.RdfTwopleDictionary | frozenset) -> None: _twopledict = ( @@ -264,28 +273,46 @@ def __blanknode(self, blanknode: rdf.RdfTwopleDictionary | frozenset) -> None: if isinstance(blanknode, dict) else rdf.twopledict_from_twopleset(blanknode) ) - with self.__hb.nest('details', attrs={ - 'open': '', - 'class': 'Browse__blanknode Browse__object', - 'style': self._hue_turn_css(), - }): - self.__hb.leaf('summary', text='(blank node)') + with ( + self._hue_turn_css() as _hue_turn_style, + self.__hb.nest('details', attrs={ + 'open': '', + 'class': 'Browse__blanknode Browse__object', + 'style': _hue_turn_style, + }), + ): + with self.__hb.nest('summary'): + for _type_iri in _twopledict.get(RDF.type, ()): + self.__compact_link(_type_iri) self.__twoples(_twopledict) + def __jsonapi_link_obj(self, twopleset: frozenset[rdf.RdfTwople]) -> None: + _iri = next( + (str(_obj) for (_pred, _obj) in twopleset if _pred == RDF.value), + '', + ) + _text = next( + (_obj.unicode_value for (_pred, _obj) in twopleset if _pred == jsonapi.JSONAPI_MEMBERNAME), + '', + ) + with self.__nest_link(_iri, attrs={'class': 'Browse__blanknode Browse__object'}) as _a: + _a.text = _('link: %(linktext)s') % {'linktext': _text} + def __split_iri_pre(self, iri: str) -> None: - self.__hb.leaf('pre', text='\n'.join(self.__iri_lines(iri))) + self.__hb.leaf('pre', text='\n'.join(self.__iri_display_lines(iri))) @contextlib.contextmanager - def __visiting(self, iri: str) -> Iterator[None]: + def __visiting(self, iri: str) -> Generator[str]: assert iri not in self.__visiting_iris self.__visiting_iris.add(iri) try: - yield + with self.__hb.deeper_heading() as _h_tag: + yield _h_tag finally: self.__visiting_iris.remove(iri) @contextlib.contextmanager - def __quoted_data(self, quoted_data: dict) -> Generator[None]: + def __quoted_data(self, quoted_data: rdf.RdfGraph) -> Generator[None]: _outer_data = self.__current_data _outer_visiting_iris = self.__visiting_iris self.__current_data = quoted_data @@ -301,27 +328,32 @@ def __iri_link_and_labels(self, iri: str) -> None: for _text in self.__iri_thesaurus_labels(iri): self.__literal(_text) - def __nest_link(self, iri: str) -> contextlib.AbstractContextManager[Element]: + def __nest_link(self, iri: str, attrs: dict[str, str] | None = None) -> contextlib.AbstractContextManager[Element]: _href = ( iri if _is_local_url(iri) else trove_browse_link(iri) ) - return self.__hb.nest('a', attrs={'href': _href}) + return self.__hb.nest('a', attrs={**(attrs or {}), 'href': _href}) def __compact_link(self, iri: str) -> Element: with self.__nest_link(iri) as _a: - _a.text = self.iri_shorthand.compact_iri(iri) + _a.text = ''.join(self.__iri_display_lines(iri)) return _a - def __nest_card(self, tag: str) -> contextlib.AbstractContextManager[Element]: - return self.__hb.nest( - tag, - attrs={ - 'class': 'Browse__card', - 'style': self._hue_turn_css(), - }, - ) + @contextlib.contextmanager + def __nest_card(self, tag: str) -> Generator[Element]: + with ( + self._hue_turn_css() as _hue_turn_style, + self.__hb.nest( + tag, + attrs={ + 'class': 'Browse__card', + 'style': _hue_turn_style, + }, + ) as _element, + ): + yield _element def __iri_thesaurus_labels(self, iri: str) -> list[str]: # TODO: consider requested language @@ -331,16 +363,21 @@ def __iri_thesaurus_labels(self, iri: str) -> list[str]: if _thesaurus_entry: for _pred in _LINK_TEXT_PREDICATES: _labels.update(_thesaurus_entry.get(_pred, ())) - _twoples = self.__current_data.get(iri) + _twoples = self.__current_data.tripledict.get(iri) if _twoples: for _pred in _LINK_TEXT_PREDICATES: _labels.update(_twoples.get(_pred, ())) return shuffled(_labels) - def _hue_turn_css(self) -> str: - _hue_turn = (self.__last_hue_turn + _PHI) % 1.0 + @contextlib.contextmanager + def _hue_turn_css(self) -> Generator[str]: + _prior_turn = self.__last_hue_turn + _hue_turn = (_prior_turn + _PHI) % 1.0 self.__last_hue_turn = _hue_turn - return f'--hue-turn: {_hue_turn}turn;' + try: + yield f'--hue-turn: {_hue_turn}turn;' + finally: + self.__last_hue_turn = _prior_turn def _queryparam_href(self, param_name: str, param_value: str | None) -> str: _base_url = self.response_focus.single_iri() @@ -364,26 +401,34 @@ def _queryparam_href(self, param_name: str, param_value: str | None) -> str: )) def __iri_subheaders(self, iri: str) -> None: - _type_iris = self.__current_data.get(iri, {}).get(RDF.type, ()) - if _type_iris: - for _type_iri in _type_iris: - self.__compact_link(_type_iri) + for _type_iri in self.__current_data.q(iri, RDF.type): + self.__compact_link(_type_iri) _labels = self.__iri_thesaurus_labels(iri) if _labels: for _label in _labels: self.__literal(_label) - def __iri_lines(self, iri: str) -> Iterator[str]: - (_scheme, _netloc, _path, _query, _fragment) = urlsplit(iri) - yield ( - f'://{_netloc}{_path}' - if _netloc - else f'{_scheme}:{_path}' - ) - if _query: - yield from filter(bool, _QUERYPARAM_SPLIT_RE.split(f'?{_query}')) - if _fragment: - yield f'#{_fragment}' + def __iri_display_lines(self, iri: str) -> Generator[str]: + _compact = self.iri_shorthand.compact_iri(iri) + if _compact != iri: + yield _compact + else: + (_scheme, _netloc, _path, _query, _fragment) = urlsplit(iri) + # first line with path + if _is_local_url(iri): + yield f'/{_path.lstrip('/')}' + elif _netloc: + yield f'://{_netloc}{_path}' + else: + yield f'{_scheme}:{_path}' + # query and fragment separate + if _query: + yield from filter(bool, _QUERYPARAM_SPLIT_RE.split(f'?{_query}')) + if _fragment: + yield f'#{_fragment}' + + def __is_focus(self, iri: str) -> bool: + return (iri in self.response_focus.iris) def _append_class(el: Element, element_class: str) -> None: @@ -395,3 +440,25 @@ def _append_class(el: Element, element_class: str) -> None: def _is_local_url(iri: str) -> bool: return iri.startswith(settings.SHARE_WEB_URL) + + +def _is_sequence_obj(obj: rdf.RdfObject) -> bool: + return ( + isinstance(obj, frozenset) + and (RDF.type, RDF.Seq) in obj + ) + + +def _is_jsonapi_link_obj(obj: rdf.RdfObject) -> bool: + return ( + isinstance(obj, frozenset) + and (RDF.type, jsonapi.JSONAPI_LINK_OBJECT) in obj + ) + + +def _obj_ordering_key(obj: rdf.RdfObject) -> tuple[bool, ...]: + return ( + not isinstance(obj, (rdf.Literal, *_PRIMITIVE_LITERAL_TYPES)), # literal values first + not isinstance(obj, str), # iris next + _is_jsonapi_link_obj(obj), # jsonapi link objects last + ) diff --git a/trove/render/jsonapi.py b/trove/render/jsonapi.py index e60fc2338..536e562bc 100644 --- a/trove/render/jsonapi.py +++ b/trove/render/jsonapi.py @@ -38,15 +38,11 @@ def _resource_ids_defaultdict() -> defaultdict[Any, str]: _prefix = str(time.time_ns()) - _ints = itertools.count() - - def _iter_ids() -> Iterator[str]: - while True: - _id = next(_ints) - yield f'{_prefix}-{_id}' - - _ids = _iter_ids() - return defaultdict(lambda: next(_ids)) + _infinite_ids = ( + f'{_prefix}-{_id}' + for _id in itertools.count() + ) + return defaultdict(_infinite_ids.__next__) @dataclasses.dataclass diff --git a/trove/render/rendering/__init__.py b/trove/render/rendering/__init__.py new file mode 100644 index 000000000..029ca9f4c --- /dev/null +++ b/trove/render/rendering/__init__.py @@ -0,0 +1,4 @@ +from .proto import ProtoRendering +from .simple import SimpleRendering + +__all__ = ('ProtoRendering', 'SimpleRendering') diff --git a/trove/render/rendering/html_wrapped.py b/trove/render/rendering/html_wrapped.py new file mode 100644 index 000000000..360e09446 --- /dev/null +++ b/trove/render/rendering/html_wrapped.py @@ -0,0 +1,20 @@ +import dataclasses +import html +from typing import Iterator + +from trove.vocab import mediatypes +from trove.render._html import HTML_DOCTYPE +from .proto import ProtoRendering + + +@dataclasses.dataclass +class HtmlWrappedRendering(ProtoRendering): + inner_rendering: ProtoRendering + mediatype: str = mediatypes.HTML + + def iter_content(self) -> Iterator[str]: + yield HTML_DOCTYPE + yield '
'
+        for _content in self.inner_rendering.iter_content():
+            yield html.escape(_content)
+        yield '
' diff --git a/trove/render/rendering/proto.py b/trove/render/rendering/proto.py new file mode 100644 index 000000000..ac0269f94 --- /dev/null +++ b/trove/render/rendering/proto.py @@ -0,0 +1,16 @@ +from typing import ( + Iterator, + Protocol, +) + +__all__ = ('ProtoRendering',) + + +class ProtoRendering(Protocol): + '''protocol for all renderings + ''' + mediatype: str # required attribute + + def iter_content(self) -> Iterator[str]: + '''`iter_content`: (only) required method + ''' diff --git a/trove/render/rendering/simple.py b/trove/render/rendering/simple.py new file mode 100644 index 000000000..2300ababf --- /dev/null +++ b/trove/render/rendering/simple.py @@ -0,0 +1,17 @@ +from collections.abc import Generator +import dataclasses + +from .proto import ProtoRendering + +__all__ = ('SimpleRendering',) + + +@dataclasses.dataclass +class SimpleRendering(ProtoRendering): + '''for simple pre-rendered string content + ''' + mediatype: str + rendered_content: str = '' + + def iter_content(self) -> Generator[str]: + yield self.rendered_content diff --git a/trove/render/rendering/streamable.py b/trove/render/rendering/streamable.py new file mode 100644 index 000000000..4570a66be --- /dev/null +++ b/trove/render/rendering/streamable.py @@ -0,0 +1,18 @@ +from collections.abc import Iterator +import dataclasses + +from trove import exceptions as trove_exceptions +from .proto import ProtoRendering + + +@dataclasses.dataclass +class StreamableRendering(ProtoRendering): + mediatype: str + content_stream: Iterator[str] = iter(()) + _started_already: bool = False + + def iter_content(self) -> Iterator[str]: + if self._started_already: + raise trove_exceptions.CannotRenderStreamTwice + self._started_already = True + yield from self.content_stream diff --git a/trove/render/simple_csv.py b/trove/render/simple_csv.py index 52c9d700b..a67935335 100644 --- a/trove/render/simple_csv.py +++ b/trove/render/simple_csv.py @@ -2,29 +2,32 @@ from collections.abc import ( Generator, Iterator, - Iterable, Sequence, ) import csv +import dataclasses import functools import itertools -import dataclasses +import logging from typing import TYPE_CHECKING, ClassVar from trove.trovesearch.search_params import ( CardsearchParams, ValuesearchParams, ) +from trove.util.iter import iter_unique from trove.util.propertypath import Propertypath, GLOB_PATHSTEP from trove.vocab import mediatypes from trove.vocab import osfmap from trove.vocab.namespaces import TROVE from ._simple_trovesearch import SimpleTrovesearchRenderer -from ._rendering import StreamableRendering, ProtoRendering +from .rendering import ProtoRendering +from .rendering.streamable import StreamableRendering if TYPE_CHECKING: from trove.util.trove_params import BasicTroveParams from trove.util.json import JsonValue, JsonObject +_logger = logging.getLogger(__name__) type Jsonpath = Sequence[str] # path of json keys type CsvValue = str | int | float | None @@ -40,14 +43,15 @@ class TrovesearchSimpleCsvRenderer(SimpleTrovesearchRenderer): CSV_DIALECT: ClassVar[type[csv.Dialect]] = csv.excel def unicard_rendering(self, card_iri: str, osfmap_json: JsonObject) -> ProtoRendering: - return self.multicard_rendering(card_pages=iter([{card_iri: osfmap_json}])) + _page = [(card_iri, osfmap_json)] + return self.multicard_rendering(card_pages=iter([_page])) - def multicard_rendering(self, card_pages: Iterator[dict[str, JsonObject]]) -> ProtoRendering: + def multicard_rendering(self, card_pages: Iterator[Sequence[tuple[str, JsonObject]]]) -> ProtoRendering: _doc = TabularDoc( card_pages, trove_params=getattr(self.response_focus, 'search_params', None), ) - return StreamableRendering( # type: ignore[return-value] + return StreamableRendering( mediatype=self.MEDIATYPE, content_stream=csv_stream(self.CSV_DIALECT, _doc.header(), _doc.rows()), ) @@ -66,7 +70,7 @@ def csv_stream( @dataclasses.dataclass class TabularDoc: - card_pages: Iterator[dict[str, JsonObject]] + card_pages: Iterator[Sequence[tuple[str, JsonObject]]] trove_params: BasicTroveParams | None = None _started: bool = False @@ -78,10 +82,6 @@ def column_jsonpaths(self) -> tuple[Jsonpath, ...]: ) return (_ID_JSONPATH, *_column_jsonpaths) - @functools.cached_property - def first_page(self) -> dict[str, JsonObject]: - return next(self.card_pages, {}) - def _column_paths(self) -> Iterator[Propertypath]: _pathlists: list[Sequence[Propertypath]] = [] if self.trove_params is not None: # hacks @@ -102,29 +102,16 @@ def _column_paths(self) -> Iterator[Propertypath]: _pathlists.append(_pathlist) if not _pathlists: _pathlists.append(osfmap.DEFAULT_TABULAR_SEARCH_COLUMN_PATHS) - return self.iter_unique(itertools.chain.from_iterable(_pathlists)) - - @staticmethod - def iter_unique[T](iterable: Iterable[T]) -> Generator[T]: - _seen = set() - for _item in iterable: - if _item not in _seen: - _seen.add(_item) - yield _item - - def _iter_card_pages(self) -> Generator[dict[str, JsonObject]]: - assert not self._started - self._started = True - if self.first_page: - yield self.first_page - yield from self.card_pages + return iter_unique(itertools.chain.from_iterable(_pathlists)) def header(self) -> list[CsvValue]: return ['.'.join(_path) for _path in self.column_jsonpaths] def rows(self) -> Generator[list[CsvValue]]: - for _page in self._iter_card_pages(): - for _card_iri, _osfmap_json in _page.items(): + assert not self._started + self._started = True + for _page in self.card_pages: + for _card_iri, _osfmap_json in _page: yield self._row_values(_osfmap_json) def _row_values(self, osfmap_json: JsonObject) -> list[CsvValue]: diff --git a/trove/render/simple_json.py b/trove/render/simple_json.py index 753d6ee6e..a29025d37 100644 --- a/trove/render/simple_json.py +++ b/trove/render/simple_json.py @@ -11,9 +11,15 @@ ) from trove.vocab import mediatypes from trove.vocab.namespaces import TROVE, RDF -from ._rendering import StreamableRendering, ProtoRendering +from .rendering import ProtoRendering +from .rendering.streamable import StreamableRendering from ._simple_trovesearch import SimpleTrovesearchRenderer if typing.TYPE_CHECKING: + from collections.abc import ( + Generator, + Iterator, + Sequence, + ) from trove.util.json import JsonObject @@ -23,25 +29,25 @@ class TrovesearchSimpleJsonRenderer(SimpleTrovesearchRenderer): MEDIATYPE = mediatypes.JSON INDEXCARD_DERIVER_IRI = TROVE['derive/osfmap_json'] - def simple_unicard_rendering(self, card_iri: str, osfmap_json: dict[str, typing.Any]) -> str: + def simple_unicard_rendering(self, card_iri: str, osfmap_json: JsonObject) -> str: return json.dumps({ 'data': self._render_card_content(card_iri, osfmap_json), 'links': self._render_links(), 'meta': self._render_meta(), }, indent=2) - def multicard_rendering(self, card_pages: typing.Iterator[dict[str, dict[str, typing.Any]]]) -> ProtoRendering: - return StreamableRendering( # type: ignore[return-value] + def multicard_rendering(self, card_pages: Iterator[Sequence[tuple[str, JsonObject]]]) -> ProtoRendering: + return StreamableRendering( mediatype=self.MEDIATYPE, content_stream=self._stream_json(card_pages), ) - def _stream_json(self, card_pages: typing.Iterator[dict[str, typing.Any]]) -> typing.Generator[str]: + def _stream_json(self, card_pages: Iterator[Sequence[tuple[str, JsonObject]]]) -> Generator[str]: _prefix = '{"data": [' yield _prefix _datum_prefix = None for _page in card_pages: - for _card_iri, _osfmap_json in _page.items(): + for _card_iri, _osfmap_json in _page: if _datum_prefix is not None: yield _datum_prefix yield json.dumps(self._render_card_content(_card_iri, _osfmap_json), indent=2) @@ -78,7 +84,7 @@ def _render_meta(self) -> dict[str, int | str]: pass return _meta - def _render_links(self) -> dict[str, typing.Any]: + def _render_links(self) -> JsonObject: _links = {} for _pagelink in self._page_links: _twopledict = rdf.twopledict_from_twopleset(_pagelink) @@ -88,8 +94,8 @@ def _render_links(self) -> dict[str, typing.Any]: _links[_membername.unicode_value] = _link_url return _links - def _add_twople(self, json_dict: dict[str, typing.Any], predicate_iri: str, object_iri: str) -> None: - _obj_ref = {'@id': object_iri} + def _add_twople(self, json_dict: JsonObject, predicate_iri: str, object_iri: str) -> None: + _obj_ref: JsonObject = {'@id': object_iri} _obj_list = json_dict.setdefault(predicate_iri, []) if isinstance(_obj_list, list): _obj_list.append(_obj_ref) diff --git a/trove/static/css/browse.css b/trove/static/css/browse.css index 643bcfcf2..75adadddc 100644 --- a/trove/static/css/browse.css +++ b/trove/static/css/browse.css @@ -20,7 +20,7 @@ flex-wrap: wrap; gap: var(--gutter-1); margin: 0; - padding: 1rem; + padding: var(--gutter-2); min-height: 100vh; background-color: lch(var(--bg-luminance) var(--bg-chroma) var(--hue-turn)); } @@ -36,7 +36,7 @@ .Browse__card { display: flex; flex-direction: column; - padding: var(--gutter-2) var(--gutter-3); + padding: var(--gutter-3) var(--gutter-4); background-color: lch(var(--bg-luminance) var(--bg-chroma) var(--hue-turn)); border-color: lch(59% var(--bg-chroma) var(--hue-turn)); border-style: solid; @@ -44,10 +44,10 @@ border-block-start-width: var(--gutter-4); border-inline-end-width: 0; border-block-end-width: 0; - /* - border-start-end-radius: 1rem; - border-end-start-radius: 1rem; - */ +} + +.BrowseWrapper details > summary { + padding-left: var(--gutter-4); } .BrowseWrapper details > summary::before { @@ -65,16 +65,22 @@ .Browse__card > header { display: flex; flex-direction: row; - gap: var(--gutter-2); + flex-wrap: wrap; + gap: var(--gutter-3); align-items: baseline; - border-bottom: solid 1px rgba(0,0,0,0.382); - margin-bottom: var(--gutter-3); + padding-left: var(--gutter-3); } .Browse__card > header > :first-child { margin: 0; } +.Browse__card > header:not(:last-child) { + border-bottom: solid 1px rgba(0,0,0,0.382); + padding-bottom: var(--gutter-3); + margin-bottom: var(--gutter-3); +} + .Browse__card > footer { padding: var(--gutter-2); } @@ -86,7 +92,7 @@ dl.Browse__twopleset { [twople-obj] 1fr ; grid-auto-flow: row; - row-gap: var(--gutter-2); + row-gap: var(--gutter-3); margin: 0; padding: 0; } @@ -126,8 +132,7 @@ dl.Browse__twopleset > dd { .Browse__literal { display: flex; flex-direction: row; - gap: var(--gutter-3); - padding: var(--gutter-4); + gap: var(--gutter-5); } .Browse__literal > q { @@ -140,10 +145,18 @@ dl.Browse__twopleset > dd { .Browse__predicate { background-color: lch(from var(--bg-color-initial) 89% c var(--hue-turn)); - padding: var(--gutter-4); + padding: 0 var(--gutter-4); +} + +.Browse__predicate .Browse__literal { + padding: 0 var(--gutter-3); } .Browse__object { background-color: lch(from var(--bg-color-initial) 93% c var(--hue-turn)); - padding: var(--gutter-4); + padding: 0 var(--gutter-4); +} + +.Browse__object.Browse__blanknode { + background-color: lch(var(--bg-luminance) var(--bg-chroma) var(--hue-turn)); } diff --git a/trove/trovebrowse_gathering.py b/trove/trovebrowse_gathering.py index f8efb9a60..8145ed9ef 100644 --- a/trove/trovebrowse_gathering.py +++ b/trove/trovebrowse_gathering.py @@ -39,14 +39,21 @@ def gather_cards_focused_on(focus: gather.Focus, *, blend_cards: bool) -> GathererGenerator: _identifier_qs = trove_db.ResourceIdentifier.objects.queryset_for_iris(focus.iris) _indexcard_qs = trove_db.Indexcard.objects.filter(focus_identifier_set__in=_identifier_qs) + _lrd_qs = ( + trove_db.LatestResourceDescription.objects + .filter(indexcard__in=_indexcard_qs) + .select_related('indexcard') + ) if blend_cards: - for _latest_resource_description in trove_db.LatestResourceDescription.objects.filter(indexcard__in=_indexcard_qs): - yield from rdf.iter_tripleset(_latest_resource_description.as_rdf_tripledict()) + for _resource_description in _lrd_qs: + yield from rdf.iter_tripleset(_resource_description.as_rdfdoc_with_supplements().tripledict) + yield (ns.FOAF.isPrimaryTopicOf, _resource_description.indexcard.get_iri()) else: - for _indexcard in _indexcard_qs: - _card_iri = _indexcard.get_iri() + for _resource_description in _lrd_qs: + _card_iri = _resource_description.indexcard.get_iri() yield (ns.FOAF.isPrimaryTopicOf, _card_iri) yield (_card_iri, ns.RDF.type, ns.TROVE.Indexcard) + yield (_card_iri, ns.TROVE.resourceMetadata, _resource_description.as_quoted_graph()) @trovebrowse.gatherer(ns.TROVE.thesaurusEntry) diff --git a/trove/trovesearch/trovesearch_gathering.py b/trove/trovesearch/trovesearch_gathering.py index 14138cbf0..f10006920 100644 --- a/trove/trovesearch/trovesearch_gathering.py +++ b/trove/trovesearch/trovesearch_gathering.py @@ -40,7 +40,7 @@ ) -logger = logging.getLogger(__name__) +_logger = logging.getLogger(__name__) type GathererGenerator = Generator[rdf.RdfTriple | rdf.RdfTwople] diff --git a/trove/util/django.py b/trove/util/django.py index 77cf184bd..9b79165ee 100644 --- a/trove/util/django.py +++ b/trove/util/django.py @@ -16,18 +16,16 @@ def pk_chunked(queryset: QuerySet, chunksize: int) -> Generator[list]: ''' _ordered_qs = queryset.order_by('pk') _prior_end_pk = None - while True: # for each chunk: - _qs = ( - _ordered_qs - if _prior_end_pk is None - else _ordered_qs.filter(pk__gt=_prior_end_pk) - ) + _chunk_qs: QuerySet | None = _ordered_qs + while _chunk_qs is not None: # for each chunk: # load primary key values only - _pks = list(_qs.values_list('pk', flat=True)[:chunksize]) - if not _pks: - break # done - _end_pk = _pks[-1] - if (_prior_end_pk is not None) and (_end_pk <= _prior_end_pk): - raise RuntimeError(f'sentinel pks not ascending?? got {_end_pk} after {_prior_end_pk}') - _prior_end_pk = _end_pk - yield _pks + _pks = list(_chunk_qs.values_list('pk', flat=True)[:chunksize]) + if _pks: + _end_pk = _pks[-1] + if (_prior_end_pk is not None) and (_end_pk <= _prior_end_pk): + raise RuntimeError(f'sentinel pks not ascending?? got {_end_pk} after {_prior_end_pk}') + yield _pks + _prior_end_pk = _end_pk + _chunk_qs = _ordered_qs.filter(pk__gt=_prior_end_pk) + else: + _chunk_qs = None # done diff --git a/trove/util/iter.py b/trove/util/iter.py new file mode 100644 index 000000000..414febee5 --- /dev/null +++ b/trove/util/iter.py @@ -0,0 +1,19 @@ +from collections.abc import ( + Generator, + Hashable, + Iterable, +) + + +def iter_unique[T: Hashable](iterable: Iterable[T]) -> Generator[T]: + ''' + >>> list(iter_unique([1,1,1])) + [1] + >>> list(iter_unique([1,2,3,2,4,2,1,5])) + [1, 2, 3, 4, 5] + ''' + _seen = set() + for _item in iterable: + if _item not in _seen: + _seen.add(_item) + yield _item diff --git a/trove/views/_base.py b/trove/views/_base.py index 802aa56e2..cd2a0fcbd 100644 --- a/trove/views/_base.py +++ b/trove/views/_base.py @@ -26,7 +26,7 @@ if TYPE_CHECKING: from django.http import HttpResponse, StreamingHttpResponse, HttpRequest from trove.render import BaseRenderer - from trove.render._rendering import ProtoRendering + from trove.render.rendering import ProtoRendering __all__ = ( diff --git a/trove/views/_responder.py b/trove/views/_responder.py index 1d3365742..a0599e0f8 100644 --- a/trove/views/_responder.py +++ b/trove/views/_responder.py @@ -5,14 +5,21 @@ from django import http as djhttp from trove.render._base import BaseRenderer -from trove.render._rendering import ( - ProtoRendering, - StreamableRendering, -) +from trove.render.rendering import ProtoRendering +from trove.render.rendering.streamable import StreamableRendering +from trove.render.rendering.html_wrapped import HtmlWrappedRendering from trove.exceptions import TroveError from trove.vocab import mediatypes +_BROWSER_FRIENDLY_MEDIATYPES = { + mediatypes.HTML, + mediatypes.JSON, + mediatypes.JSONLD, + mediatypes.JSONAPI, +} + + def make_http_response( *, content_rendering: ProtoRendering, @@ -24,15 +31,26 @@ def make_http_response( if isinstance(content_rendering, StreamableRendering) else djhttp.HttpResponse ) + _download_filename = ( + http_request.GET.get('withFileName') + if http_request is not None + else None + ) + if ( + _download_filename is None + and content_rendering.mediatype not in _BROWSER_FRIENDLY_MEDIATYPES + and http_request is not None + and 'Accept' in http_request.headers + and http_request.accepts(mediatypes.HTML) + ): # when browsing in browser, return html (unless given filename) + content_rendering = HtmlWrappedRendering(content_rendering) _response = _response_type( content_rendering.iter_content(), - content_type=content_rendering.mediatype, + content_type=_make_content_type(content_rendering.mediatype), ) - if http_request is not None: - _requested_filename = http_request.GET.get('withFileName') - if _requested_filename is not None: - _file_name = _get_file_name(_requested_filename, content_rendering.mediatype) - _response.headers['Content-Disposition'] = _disposition(_file_name) + if _download_filename is not None: + _file_name = _get_file_name(_download_filename, content_rendering.mediatype) + _response.headers['Content-Disposition'] = _disposition(_file_name) return _response @@ -46,7 +64,7 @@ def make_http_error_response( return djhttp.HttpResponse( _content_rendering.iter_content(), status=error.http_status, - content_type=_content_rendering.mediatype, + content_type=_make_content_type(_content_rendering.mediatype), ) @@ -70,3 +88,13 @@ def _disposition(filename: str) -> bytes: b'filename=' + filename.encode('latin-1', errors='replace'), b"filename*=utf-8''" + filename.encode(), )) + + +def _make_content_type(mediatype: str) -> str: + """make a content-type header value from a mediatype + + currently just adds "charset=utf-8" to text mediatypes that don't already have one + """ + if mediatype.startswith('text/') and ('charset' not in mediatype): + return f'{mediatype};charset=utf-8' + return mediatype diff --git a/trove/views/browse.py b/trove/views/browse.py index 6739b53d7..e50b41721 100644 --- a/trove/views/browse.py +++ b/trove/views/browse.py @@ -47,6 +47,11 @@ def _default_include(cls): _ns.TROVE.usedAtPath, )) + def to_querydict(self): + _querydict = super().to_querydict() + _querydict['iri'] = self.iri + return _querydict + class BrowseIriView(GatheredTroveView): gathering_organizer = trovebrowse diff --git a/trove/vocab/mediatypes.py b/trove/vocab/mediatypes.py index 66495683a..71a1990f4 100644 --- a/trove/vocab/mediatypes.py +++ b/trove/vocab/mediatypes.py @@ -17,9 +17,27 @@ CSV: '.csv', } +_PARAMETER_DELIMITER = ';' + + +def strip_mediatype_parameters(mediatype: str) -> str: + """from a full mediatype that may have parameters, get only the base mediatype + + >>> strip_mediatype_parameters('text/plain;charset=utf-8') + 'text/plain' + >>> strip_mediatype_parameters('text/plain') + 'text/plain' + + note: does not validate that the mediatype exists or makes sense + >>> strip_mediatype_parameters('application/whatever ; blarg=foo') + 'application/whatever' + """ + (_base, _, __) = mediatype.partition(_PARAMETER_DELIMITER) + return _base.strip() + def dot_extension(mediatype: str) -> str: try: - return _file_extensions[mediatype] + return _file_extensions[strip_mediatype_parameters(mediatype)] except KeyError: raise ValueError(f'unrecognized mediatype: {mediatype}') diff --git a/trove/vocab/namespaces.py b/trove/vocab/namespaces.py index c0ebf1cb6..db86e679c 100644 --- a/trove/vocab/namespaces.py +++ b/trove/vocab/namespaces.py @@ -47,6 +47,8 @@ SHAREv2 = rdf.IriNamespace('https://share.osf.io/vocab/2017/sharev2/') # for the OSF metadata application profile (TODO: update to resolvable URL, when there is one) OSFMAP = rdf.IriNamespace('https://osf.io/vocab/2022/') +# non-standard namespace used by OSF for datacite terms (resolves to datacite docs) +DATACITE = rdf.IriNamespace('https://schema.datacite.org/meta/kernel-4/#') # for identifying jsonapi concepts with linked anchors on the jsonapi spec (probably fine) JSONAPI = rdf.IriNamespace('https://jsonapi.org/format/1.1/#') @@ -58,6 +60,7 @@ 'jsonapi': JSONAPI, 'oai': OAI, 'oai_dc': OAI_DC, + 'datacite': DATACITE, } if __debug__: # blarg: a nothing namespace for examples and testing diff --git a/trove/vocab/trove.py b/trove/vocab/trove.py index 7dd6d1a9e..ac7ac7a51 100644 --- a/trove/vocab/trove.py +++ b/trove/vocab/trove.py @@ -48,7 +48,7 @@ def trove_browse_link(iri: str) -> str: _compact = namespaces_shorthand().compact_iri(iri) return urllib.parse.urljoin( reverse('trove:browse-iri'), - f'?iri={urllib.parse.quote(_compact)}', + f'?blendCards&iri={urllib.parse.quote(_compact)}', ) @@ -494,7 +494,7 @@ def trove_browse_link(iri: str) -> str: unstable mediatypes (may change or sometimes respond 500): -* `text/html;charset=utf-8`: rdf as browsable html +* `text/html`: rdf as browsable html * `text/turtle`: rdf as [turtle](https://www.w3.org/TR/turtle/) * `application/ld+json`: rdf as [json-ld](https://www.w3.org/TR/json-ld11/)