diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 21f75c2..c3b7f93 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -4,7 +4,7 @@ jobs: test: strategy: matrix: - ckan-version: ["2.11", "2.10", 2.9] + ckan-version: ["2.11", "2.10"] fail-fast: false runs-on: ubuntu-latest @@ -12,7 +12,7 @@ jobs: # The CKAN version tag of the Solr and Postgres containers should match # the one of the container the tests run on. # You can switch this base image with a custom image tailored to your project - image: openknowledge/ckan-dev:${{ matrix.ckan-version }} + image: ckan/ckan-dev:${{ matrix.ckan-version }} services: solr: image: ckan/ckan-solr:${{ matrix.ckan-version }}-solr9 diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 4ed8bd9..446aac5 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -14,25 +14,14 @@ repos: stages: [pre-commit] - id: debug-statements stages: [pre-push] - -## Isort -- repo: https://github.com/pycqa/isort - rev: 5.13.2 - hooks: - - id: isort - name: isort - stages: [pre-commit] - -## Black -- repo: https://github.com/psf/black - rev: 24.4.0 - hooks: - - id: black stages: [pre-commit] ## Ruff - repo: https://github.com/charliermarsh/ruff-pre-commit - rev: v0.4.1 + rev: v0.5.0 hooks: - id: ruff + args: [--fix] + stages: [pre-commit] + - id: ruff-format stages: [pre-commit] diff --git a/README.md b/README.md index a91a291..2a517a4 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ Tools for building interfaces for data collections using declarative style. This extension simplifies describing series of items, such as datasets from search page, users registered on portal, rows of CSV file, tables in DB, -etc. Once you defined the way items are obtained from data source, you'll get +etc. Once you defined the way of fetching items from data source, you'll get generic interface for pagination, search and displaying data in any format: HTML page, CSV document, JSON list, or any other custom format that you can describe. diff --git a/ckanext/collection/config.py b/ckanext/collection/config.py index e6909e3..2b5e5ac 100644 --- a/ckanext/collection/config.py +++ b/ckanext/collection/config.py @@ -13,39 +13,27 @@ def anonymous_collections() -> list[str]: - """Names of registered collections that are viewable by any visitor, - including anonymous. - """ - + """Names of registered public collections.""" return tk.config[CONFIG_ANNONYMOUS] def authenticated_collections() -> list[str]: - """Names of registered collections that are viewable by any authenticated - user. - """ + """Names of registered collections available to registered users.""" return tk.config[CONFIG_AUTHENTICATED] def include_htmx_asset() -> bool: - """Add HTMX asset to pages. Enable this option if you are using CKAN - v2.10. - """ - + """Add HTMX asset to pages.""" return tk.config[CONFIG_INCLUDE_ASSET] def htmx_init_modules() -> bool: - """Initialize CKAN JS modules every time HTMX fetches HTML from the - server. - """ - + """Initialize CKAN JS modules when HTMX fetches HTML from the server.""" return tk.config[CONFIG_INIT_MODULES] def serializer(format: str) -> type[types.BaseSerializer] | None: """Import path for serializer used by `format` export endpoint.""" - value = tk.config.get(f"ckanext.collection.export.{format}.serializer") if value: return import_string(value, silent=True) diff --git a/ckanext/collection/interfaces.py b/ckanext/collection/interfaces.py index 44e5700..e30100e 100644 --- a/ckanext/collection/interfaces.py +++ b/ckanext/collection/interfaces.py @@ -1,3 +1,5 @@ +"""Interface.""" + from __future__ import annotations from ckan.plugins import Interface @@ -6,7 +8,7 @@ class ICollection(Interface): - """Extend functionality of ckanext-collections + """Extend functionality of ckanext-collections. Example: ```python diff --git a/ckanext/collection/internal.py b/ckanext/collection/internal.py index d0f5114..d75a99c 100644 --- a/ckanext/collection/internal.py +++ b/ckanext/collection/internal.py @@ -1,6 +1,4 @@ -"""Logic used across collection utilities. - -""" +"""Logic used across collection utilities.""" from __future__ import annotations @@ -52,7 +50,6 @@ class AttachTrait(abc.ABC, Generic[types.TDataCollection]): logic. Example: - >>> class Impl(AttachTrait): >>> def __init__(self, collection): >>> self._attach(collection) @@ -187,7 +184,6 @@ def configurable_attribute( """Declare configurable attribute. Example: - >>> class DataFactory(Data): >>> private = configurable_attribute(False) >>> @@ -198,9 +194,9 @@ def configurable_attribute( class UserTrait(AttrSettingsTrait): - """Add configurable `user` attribute, with default set to - `current_user.name`. + """Add configurable `user` attribute. + Default value set to `tk.current_user.name`. """ user = configurable_attribute( diff --git a/ckanext/collection/plugin.py b/ckanext/collection/plugin.py index 73e6382..cdcd751 100644 --- a/ckanext/collection/plugin.py +++ b/ckanext/collection/plugin.py @@ -1,3 +1,5 @@ +"""hehe.""" + from __future__ import annotations import operator diff --git a/ckanext/collection/tests/conftest.py b/ckanext/collection/tests/conftest.py index 3a9b6e8..ad2f905 100644 --- a/ckanext/collection/tests/conftest.py +++ b/ckanext/collection/tests/conftest.py @@ -6,6 +6,5 @@ @pytest.fixture() def collection_registry(): """Collection registry cleaned after each test.""" - yield internal.collection_registry internal.collection_registry.reset() diff --git a/ckanext/collection/tests/test_dive.py b/ckanext/collection/tests/test_dive.py index b40d303..d8b6a2f 100644 --- a/ckanext/collection/tests/test_dive.py +++ b/ckanext/collection/tests/test_dive.py @@ -34,12 +34,6 @@ def test_api_search(self): for pkg in col: assert isinstance(pkg, dict) - @pytest.mark.usefixtures("clean_db", "clean_index", "package") - def test_api_list(self): - col = ApiListCollection("", {}, data_settings={"action": "package_list"}) - for pkg in col: - assert isinstance(pkg, str) - @pytest.mark.usefixtures("clean_db", "user") def test_api(self): col = ApiCollection("", {}, data_settings={"action": "user_list"}) diff --git a/ckanext/collection/tests/utils/test_data.py b/ckanext/collection/tests/utils/test_data.py index 81be09a..ab254e9 100644 --- a/ckanext/collection/tests/utils/test_data.py +++ b/ckanext/collection/tests/utils/test_data.py @@ -234,30 +234,3 @@ def test_base(self, package_factory: Any): assert obj.total == 1 assert next(iter(obj))["id"] == ids[1] - - -@pytest.mark.usefixtures("clean_db") -class TestApiListData: - def test_base(self, organization_factory: Any): - ids = sorted([o["name"] for o in organization_factory.create_batch(3)]) - - collection = Collection("", {}) - obj = data.ApiListData(collection, action="organization_list") - - assert obj.total == 3 - assert sorted(obj) == ids - - def test_payload(self, organization_factory: Any): - organization_factory(type="custom") - - collection = Collection("", {}) - - obj = data.ApiListData(collection, action="organization_list") - assert obj.total == 0 - - obj = data.ApiListData( - collection, - action="organization_list", - payload={"type": "custom"}, - ) - assert obj.total == 1 diff --git a/ckanext/collection/types.py b/ckanext/collection/types.py index 1631245..1e6dc0a 100644 --- a/ckanext/collection/types.py +++ b/ckanext/collection/types.py @@ -42,7 +42,7 @@ def service_name(self) -> str: class BaseColumns(abc.ABC, Service): - """Declaration of columns properties""" + """Declaration of columns properties.""" names: list[str] visible: set[str] @@ -77,7 +77,7 @@ def service_name(self): class BasePager(abc.ABC, Service): - """Declaration of pager properties""" + """Declaration of pager properties.""" params: dict[str, Any] diff --git a/ckanext/collection/utils/__init__.py b/ckanext/collection/utils/__init__.py index c4281b2..8b0bf4a 100644 --- a/ckanext/collection/utils/__init__.py +++ b/ckanext/collection/utils/__init__.py @@ -1,6 +1,5 @@ from .collection import ( ApiCollection, - ApiListCollection, ApiSearchCollection, Collection, CollectionExplorer, @@ -12,7 +11,6 @@ from .columns import Columns, DbColumns, TableColumns from .data import ( ApiData, - ApiListData, ApiSearchData, BaseModelData, BaseSaData, @@ -57,8 +55,6 @@ "UrlDbConnection", "ApiCollection", "ApiData", - "ApiListCollection", - "ApiListData", "ApiSearchCollection", "ApiSearchData", "BaseModelData", diff --git a/ckanext/collection/utils/collection/__init__.py b/ckanext/collection/utils/collection/__init__.py index 73208f2..63bf038 100644 --- a/ckanext/collection/utils/collection/__init__.py +++ b/ckanext/collection/utils/collection/__init__.py @@ -2,7 +2,7 @@ from ckanext.collection.utils.data import StaticData -from .api import ApiCollection, ApiListCollection, ApiSearchCollection +from .api import ApiCollection, ApiSearchCollection from .base import Collection from .db import DbCollection from .explorer import CollectionExplorer, DbExplorer @@ -13,7 +13,6 @@ "DbCollection", "ApiCollection", "ApiSearchCollection", - "ApiListCollection", "ModelCollection", "CollectionExplorer", "DbExplorer", diff --git a/ckanext/collection/utils/collection/api.py b/ckanext/collection/utils/collection/api.py index 10be12b..fcceb15 100644 --- a/ckanext/collection/utils/collection/api.py +++ b/ckanext/collection/utils/collection/api.py @@ -1,6 +1,6 @@ from __future__ import annotations -from ckanext.collection.utils.data import ApiData, ApiListData, ApiSearchData +from ckanext.collection.utils.data import ApiData, ApiSearchData from .base import Collection @@ -11,7 +11,3 @@ class ApiCollection(Collection): class ApiSearchCollection(ApiCollection): DataFactory = ApiSearchData - - -class ApiListCollection(ApiCollection): - DataFactory = ApiListData diff --git a/ckanext/collection/utils/collection/base.py b/ckanext/collection/utils/collection/base.py index f277b78..b83fe6f 100644 --- a/ckanext/collection/utils/collection/base.py +++ b/ckanext/collection/utils/collection/base.py @@ -167,7 +167,7 @@ def replace_service( ) -> types.BaseDbConnection | None: ... def replace_service(self, service: types.Service) -> types.Service | None: - """Attach service to collection""" + """Attach service to collection.""" old_service = getattr(self, service.service_name, None) setattr(self, service.service_name, service) return old_service diff --git a/ckanext/collection/utils/collection/explorer.py b/ckanext/collection/utils/collection/explorer.py index b3b6b75..95bd24d 100644 --- a/ckanext/collection/utils/collection/explorer.py +++ b/ckanext/collection/utils/collection/explorer.py @@ -19,8 +19,9 @@ class ExplorerSerializer(HtmlSerializer[types.TDataCollection]): extend_page_template: bool = internal.configurable_attribute( - default_factory=lambda self: tk.request - and not tk.request.headers.get("hx-request"), + default_factory=lambda self: bool( + tk.request and not tk.request.headers.get("hx-request"), + ), ) main_template: str = internal.configurable_attribute( "collection/serialize/explorer/main.html", diff --git a/ckanext/collection/utils/columns.py b/ckanext/collection/utils/columns.py index 0826406..16c7ef7 100644 --- a/ckanext/collection/utils/columns.py +++ b/ckanext/collection/utils/columns.py @@ -68,7 +68,7 @@ def _compute_set(self, value: Default | set[str]): return cast("set[str]", set()) if value is self.Default.ALL: - return {c for c in self.names} + return set(self.names) if value is self.Default.NOT_HIDDEN: return {c for c in self.names if c not in self.hidden} diff --git a/ckanext/collection/utils/data/__init__.py b/ckanext/collection/utils/data/__init__.py index 5ee8905..3a262dc 100644 --- a/ckanext/collection/utils/data/__init__.py +++ b/ckanext/collection/utils/data/__init__.py @@ -5,18 +5,17 @@ from ckanext.collection import internal, types -from .api import ApiData, ApiListData, ApiSearchData +from .api import ApiData, ApiSearchData from .base import Data from .db import DbData, TableData +from .misc import CsvFileData from .model import BaseSaData, ModelData, StatementSaData, UnionSaData -from .stream import CsvFileData __all__ = [ "Data", "CsvFileData", "TableData", "ApiData", - "ApiListData", "ApiSearchData", "UnionSaData", "UnionModelData", @@ -38,7 +37,25 @@ class StaticData(Data[types.TData, types.TDataCollection]): """Static data source. - This class turns existing iterable into a data source. + This class produce items from its `data` attribute. Use any sequence as a + value for `data` during initialization. + + Attributes: + data: sequence of items produced by the service + + Example: + ```python + NumericData = data.StaticData.with_attributes(data=range(1, 20)) + + UppercaseData = data.StaticData.with_attributes( + data="ABCDEFGHIJKLMNOPQRSTUVWXYZ", + ) + ``` + ```pycon + >>> col = collection.Collection(data_factory=NumericData) + >>> list(col) + [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + ``` """ data: Iterable[types.TData] = internal.configurable_attribute( diff --git a/ckanext/collection/utils/data/api.py b/ckanext/collection/utils/data/api.py index 6cb2853..f3d84d4 100644 --- a/ckanext/collection/utils/data/api.py +++ b/ckanext/collection/utils/data/api.py @@ -17,10 +17,26 @@ class ApiData(Data[types.TData, types.TDataCollection], internal.UserTrait): """API data source. - This base class is suitable for building API calls. + This base class is suitable for building API calls. Its `compute_data` + makes the single request to the specified API action and yields items from + the response. Attributes: - action: API action that returns the data + action: API action that returns the data + payload: parameters passed to the action + ignore_auth: skip authorization checks + user (str): name of the user for the action. Default: `tk.current_user.name` + + Example: + ```pycon + >>> col = collection.Collection( + >>> data_factory=data.ApiData, + >>> data_settings={"action": "group_list_authz", "user": "default"}, + >>> ) + >>> list(col) + [{...}, {...}] + ``` + """ action: str = internal.configurable_attribute() @@ -41,7 +57,28 @@ def compute_data(self): class ApiSearchData(ApiData[types.TData, types.TDataCollection]): - """API data source optimized for package_search-like actions.""" + """API data source optimized for package_search-like actions. + + This class expects that API action accepts `start` and `rows` parameters + that controls offset and limit. And result of the action must contain + `count` and `results` keys. + + This data service can iterate over huge number of items, reading just few + of them into the memory at once. + + Example: + ```pycon + >>> col = collection.Collection( + >>> data_factory=data.ApiSearchData, + >>> data_settings={ + >>> "action": "package_search", + >>> "payload": {"q": "res_format:CSV"}, + >>> }, + >>> ) + >>> list(col) + [{...}, {...}] + ``` + """ def prepare_payload(self) -> dict[str, Any]: payload = super().prepare_payload() @@ -79,56 +116,28 @@ def compute_total(self, data: dict[str, Any]) -> int: return data["count"] def range(self, start: int, end: int) -> Iterable[types.TData]: - """@inherit""" action = self.get_action() return action( self.make_context(), dict(self.prepare_payload(), rows=end - start, start=start), )["results"] - def __iter__(self) -> Iterator[types.TData]: - action = self.get_action() - context = self.make_context() - start = 0 - while True: - result = action(context, dict(self.prepare_payload(), start=start)) - - yield from result["results"] - start += len(result["results"]) - - if start >= result["count"] or not result["results"]: - break - - -class ApiListData(ApiSearchData[types.TData, types.TDataCollection]): - """API data source optimized for organization_list-like actions.""" - - def range(self, start: int, end: int) -> Iterable[types.TData]: - """@inherit""" + def at(self, index: int) -> types.TData: action = self.get_action() return action( self.make_context(), - dict(self.prepare_payload(), limit=end - start, offset=start), - ) - - def compute_data(self): - action = self.get_action() - return action(self.make_context(), self.prepare_payload()) - - def compute_total(self, data: dict[str, Any]) -> int: - return len(data) + dict(self.prepare_payload(), rows=1, start=index), + )["results"][0] def __iter__(self) -> Iterator[types.TData]: action = self.get_action() context = self.make_context() start = 0 while True: - result = action(context, dict(self.prepare_payload(), offset=start)) - if not result: - break + result = action(context, dict(self.prepare_payload(), start=start)) - yield from result - start += len(result) + yield from result["results"] + start += len(result["results"]) - if start >= self.total: + if start >= result["count"] or not result["results"]: break diff --git a/ckanext/collection/utils/data/base.py b/ckanext/collection/utils/data/base.py index 619dc5d..11394d0 100644 --- a/ckanext/collection/utils/data/base.py +++ b/ckanext/collection/utils/data/base.py @@ -11,14 +11,28 @@ class Data( internal.Domain[types.TDataCollection], Generic[types.TData, types.TDataCollection], ): - """Data source for collection. + """Base data source for collection. - This class produces data for collection. + This class defines an outline of the data service. In basic case, sublcass + should override `compute_data` method and return a Sequence from it to keep + all methods functional. + + Example: + ```python + class MyData(data.Data): + def compute_data(self): + return range(1, 20) + ``` """ - def __init__(self, obj: types.TDataCollection, /, **kwargs: Any): - super().__init__(obj, **kwargs) + @cached_property + def _data(self): + return self.compute_data() + + @cached_property + def _total(self) -> int: + return self.compute_total(self._data) def __iter__(self) -> Iterator[types.TData]: yield from self._data @@ -61,11 +75,3 @@ def at(self, index: Any) -> types.TData: @property def total(self): return self._total - - @cached_property - def _data(self): - return self.compute_data() - - @cached_property - def _total(self) -> int: - return self.compute_total(self._data) diff --git a/ckanext/collection/utils/data/misc.py b/ckanext/collection/utils/data/misc.py new file mode 100644 index 0000000..5b8e6ac --- /dev/null +++ b/ckanext/collection/utils/data/misc.py @@ -0,0 +1,44 @@ +from __future__ import annotations + +import csv +import logging + +from ckanext.collection import internal, types + +from .base import Data + +log = logging.getLogger(__name__) + + +class CsvFileData(Data[types.TData, types.TDataCollection]): + """Data source for CSV file. + + CSV file available at path specified by `source` attribute of the service + is read into memory and its every row transformed into dictionary. + + Attributes: + source: path to CSV source + + Example: + ```pycon + >>> col = collection.Collection( + >>> data_factory=data.CsvFileData, + >>> data_settings={"source": "/path/to/file.csv"}, + >>> ) + >>> list(col) + [ + {"column_1": "value_1", "column_2": "value_2"}, + ... + ] + ``` + + + + """ + + source: str = internal.configurable_attribute() + + def compute_data(self): + with open(self.source) as src: + reader = csv.DictReader(src) + return list(reader) diff --git a/ckanext/collection/utils/data/model.py b/ckanext/collection/utils/data/model.py index 198fde7..157f783 100644 --- a/ckanext/collection/utils/data/model.py +++ b/ckanext/collection/utils/data/model.py @@ -26,7 +26,33 @@ class BaseSaData( Data[types.TData, types.TDataCollection], Generic[TStatement, types.TData, types.TDataCollection], ): - """Data source for custom SQL statement.""" + """Abstract data source for SQL statements. + + This class can be extended to build data source over SQL statement. + + Attributes: + use_naive_filters: search by filterable columns from `params`. Default: true + use_naive_search: if `params` contains `q`, ILIKE it against searchable + columns. Default: true + session: SQLAlchemy session + + Example: + ```python + import sqlalchemy as sa + from ckan import model + + class UserData(data.BaseSaData): + def get_base_statement(self): + return sa.select(model.User.name) + ``` + + ```pycon + >>> col = collection.Collection(data_factory=data.UserData) + >>> list(col) + [("default",), (...,)] + ``` + + """ _data: cached_property[TStatement] use_naive_filters: bool = internal.configurable_attribute(True) @@ -68,7 +94,7 @@ def alter_statement(self, stmt: TStatement): return stmt def count_statement(self, stmt: TStatement) -> int: - """Count number of items in query""" + """Count number of items in query.""" count_stmt: Select = sa.select(sa.func.count()).select_from(stmt) return cast(int, self._execute(count_stmt).scalar()) @@ -80,7 +106,6 @@ def _into_clause(self, column: ColumnElement[Any], value: Any): def statement_with_filters(self, stmt: TStatement) -> TStatement: """Add normal filter to statement.""" - if not isinstance(stmt, Select): return stmt @@ -152,7 +177,21 @@ def statement_with_sorting(self, stmt: TStatement): class StatementSaData(BaseSaData[Select, types.TData, types.TDataCollection]): - """Data source for custom SQL statement.""" + """Data source for arbitrary SQL statement. + + Attributes: + statement (sqlalchemy.sql.Select): select statement + + Example: + ```pycon + >>> col = collection.Collection( + >>> data_factory=data.StatementSaData, + >>> data_settings={"statement": sa.select(model.User.name)}, + >>> ) + >>> list(col) + [("default",), (...,)] + ``` + """ statement: Any = internal.configurable_attribute(None) @@ -162,7 +201,27 @@ def get_base_statement(self): class UnionSaData(BaseSaData[Select, types.TData, types.TDataCollection]): - """Data source for custom SQL statement.""" + """Data source for multiple SQL statement merged with UNION ALL. + + Attributes: + statements (sqlalchemy.sql.Select): select statements + + Example: + ```pycon + >>> col = collection.Collection( + >>> data_factory=data.UnionSaData, + >>> data_settings={"statements": [ + >>> sa.select(model.User.name, sa.literal("user")), + >>> sa.select(model.Package.name, sa.literal("package")), + >>> sa.select(model.Group.name, sa.literal("group")), + >>> ]}, + >>> ) + >>> list(col) + [("default", "user"), + ("warandpeace", "package"), + ("my-cool-group", "group")] + ``` + """ statements: Iterable[GenerativeSelect] = internal.configurable_attribute( default_factory=lambda self: [], @@ -174,13 +233,22 @@ def get_base_statement(self): class ModelData(BaseSaData[Select, types.TData, types.TDataCollection]): - """DB data source. - - This base class is suitable for building SQL query. + """Data source for SQLAlchemy model. Attributes: model: main model used by data source - is_scalar: return model instance instead of columns set. + is_scalar: return model instance instead of collection of columns. + + Example: + ```pycon + >>> col = collection.Collection( + >>> data_factory=data.ModelData, + >>> data_settings={"model": model.User, "is_scalar": True}, + >>> ) + >>> list(col) + [, ...] + ``` + """ model: Any = internal.configurable_attribute(None) @@ -221,8 +289,7 @@ def select_columns(self) -> Iterable[Any]: return [self.model] if self.is_scalar else [mapper.columns] def get_extra_sources(self) -> dict[str, Any]: - """Return mapping of additional models/subqueries used to build the - statement. + """Return mapping of additional models/subqueries for statement. Note: Don't call this method direclty. Instead, use `extra_sources` property, that caches return value of the current method. Extra sources @@ -267,7 +334,6 @@ def get_joins(self) -> Iterable[tuple[str, Any, bool]]: def apply_joins(self, stmt: Select) -> Select: """Return list of columns for select statement.""" - sources = self.extra_sources for name, condition, isouter in self.get_joins(): diff --git a/ckanext/collection/utils/data/stream.py b/ckanext/collection/utils/data/stream.py deleted file mode 100644 index 08adf96..0000000 --- a/ckanext/collection/utils/data/stream.py +++ /dev/null @@ -1,19 +0,0 @@ -from __future__ import annotations - -import csv -import logging - -from ckanext.collection import internal, types - -from .base import Data - -log = logging.getLogger(__name__) - - -class CsvFileData(Data[types.TData, types.TDataCollection]): - source = internal.configurable_attribute() - - def compute_data(self): - with open(self.source) as src: - reader = csv.DictReader(src) - return list(reader) diff --git a/ckanext/collection/utils/serialize/__init__.py b/ckanext/collection/utils/serialize/__init__.py index ac7ff13..ad54b1f 100644 --- a/ckanext/collection/utils/serialize/__init__.py +++ b/ckanext/collection/utils/serialize/__init__.py @@ -58,7 +58,7 @@ class Serializer( internal.Domain[types.TDataCollection], Generic[types.TSerialized, types.TDataCollection], ): - """Base collection serializer. + r"""Base collection serializer. For any derived implementation, `serialize` must transfrom data of the collection into expected format. Example: @@ -68,7 +68,7 @@ class Serializer( >>> yield yaml.dump(record) >>> >>> def serialize(self): - >>> return "---\n".join(self.stream()) + >>> return "---\\n".join(self.stream()) """ @@ -119,7 +119,6 @@ def dictize_row(self, row: Any) -> dict[str, Any]: class StreamingSerializer( Serializer[types.TSerialized, types.TDataCollection], ): - @abc.abstractmethod def stream(self) -> Iterable[types.TSerialized]: """Iterate over fragments of the content. @@ -137,7 +136,6 @@ def serialize(self) -> types.TSerialized: class DictListSerializer( StreamingSerializer["list[dict[str, Any]]", types.TDataCollection], ): - def stream(self): """Iterate over fragments of the content.""" for item in self.attached.data: @@ -145,7 +143,6 @@ def stream(self): class RenderableSerializer(StreamingSerializer[str, types.TDataCollection]): - def stream(self) -> Iterable[str]: """Iterate over fragments of the content.""" yield "" @@ -222,10 +219,7 @@ def stream(self): class ChartJsSerializer(StreamingSerializer[str, types.TDataCollection]): - """Serialize collection into data source for ChartJS module of - ckanext-charts. - - """ + """Serialize collection into data source for ChartJS.""" label_column: str = internal.configurable_attribute("") dataset_columns: list[str] = internal.configurable_attribute( diff --git a/ckanext/collection/views.py b/ckanext/collection/views.py index 37e8fc3..acbc31c 100644 --- a/ckanext/collection/views.py +++ b/ckanext/collection/views.py @@ -1,3 +1,5 @@ +"""Views of the extension.""" + from __future__ import annotations from flask import Blueprint @@ -18,7 +20,7 @@ from ckanext.ap_main.views.generics import ApConfigurationPageView class ApConfiguration(ApConfigurationPageView): - pass + """Config page for admin panel.""" bp.add_url_rule( "/admin-panel/config/collection", @@ -31,6 +33,7 @@ class ApConfiguration(ApConfigurationPageView): @bp.route("/api/util/collection//render") def render(name: str) -> str | bytes: + """Render public collection.""" try: tk.check_access("collection_view_render", {}, {"name": name}) except tk.NotAuthorized: @@ -49,6 +52,7 @@ def render(name: str) -> str | bytes: @bp.route("/api/util/collection//export") @bp.route("/api/util/collection//export/") def export(name: str, format: str | None = None) -> types.Response: + """Serialize and download public collection.""" try: tk.check_access("collection_view_export", {}, {"name": name}) except tk.NotAuthorized: diff --git a/docs/interfaces.md b/docs/interfaces.md index 283d1cb..c22aaf5 100644 --- a/docs/interfaces.md +++ b/docs/interfaces.md @@ -2,6 +2,6 @@ ## ICollection -::: ckanext.collection.interfaces.ICollection +::: collection.interfaces.ICollection options: show_bases: false diff --git a/docs/structure/collection.md b/docs/structure/collection.md index feeef5e..ef5cb4f 100644 --- a/docs/structure/collection.md +++ b/docs/structure/collection.md @@ -51,12 +51,100 @@ name and colon is removed. Then, the prefix is removed. /// +/// details | Why `params` are transformed? + type: tip + +As long as collection is initialized manually and don't have a name, you don't +need to think about `params` transformation. + +```pycon +>>> col = collection.Collection("", {"a": 1, "xxx:b": 2, "yyy:c": 3}) +>>> col.params +{"a": 1, "xxx:b": 2, "yyy:c": 3} +``` + +Transformation becomes important, whn you initialize registered *named* +collection via `get_collection` + +```pycon +>>> col = get_collection( +>>> "my-collection", +>>> {"a": 1, "my-collection:b": 2}, +>>> ) +>>> col.params +{"b": 2} +``` + +This design decision was made to simplify rendering conllections on webpages. + +Imagine the page that renders `users` and `packages` collection. These +collections are rendered as tables with pagination and view code looks like this: + +```python +import ckan.plugins.toolkit as tk +from ckan.logic import parse_params + +@route(...) +def users_and_packages(): + params = parse_params(tk.request.args) + + users = get_collection("users", params) + packages = get_collection("packages", params) + + return tk.render(template, { + "users": users, + "packages": packages, + }) + +``` + +Because `params` uses collection name as prefix, it's possible to paginate +collections separately. Query string `?users:page=2&packages:page=8` parsed +into `params` dictionary on the first line of view. This dictionary contains +both page values with prefixes. When `users` and `packages` collections +initialized, they pick only relevant values from `params`, so `users` takes +`page=2` and `packages` takes `page=8`. + +In this way, `params` flow naturally from user into collection. When you are +initializing collections in code, most likely you'll interact with collection +classes instead of `get_collection`, so you can leave collection name empty and +keep all `params`: + +```python +col = MyCollection("", {...}) +``` + +And when you must use `get_collection` with named collection, but want to pass +all `params` into collection, you can easily add prefixes yoursef: + +```pycon +>>> data = {"a": 1, "b": 2} +>>> name = "my-collection" +>>> col = get_collection(name, {f"{name}:{k}": v for k, v in data.items()}) +>>> col.params +{"a": 1, "b": 2} +``` + +And to make it even simpler, `get_collection` accepts `prefix_params` as 3rd +positional argument. When this flag is enabled, prefixes are added +automatically, so you can achieve the same effect as in snippet above using +short version: + +```pycon +>>> col = get_collection("my-collection", {"a": 1, "b": 2}, True) +>>> col.params +{"a": 1, "b": 2} +``` + + +/// + ## Initialization When a collection is created, it initializes services using service factories and service settings. `data` service is initialized using -`Collection.DataFactory` class, `serializer` is initialized using -`Collection.SerializerService`, etc. +`Collection.DataFactory` class and `data_settings`, `serializer` is initialized +using `Collection.SerializerService` and `serializer_settings`, etc. This logic creates a workflow for defining new collections. Create a subclass of base Collection and override `*Factory` of this new class. @@ -193,8 +281,8 @@ Or even: This form is convenient when you experimenting with collections or creating them dynamically. But more often you'll create a separate class for collection -and services. This flow is more readable and flexible, as you keep all the -derived classes and can combine/reuse them in future. +and services. Using separate classes is more readable and flexible, as you keep +all the derived classes and can combine/reuse them in future. /// diff --git a/docs/structure/data.md b/docs/structure/data.md index 6336292..ffaf58d 100644 --- a/docs/structure/data.md +++ b/docs/structure/data.md @@ -1,56 +1,228 @@ # Data +## Overview This service produces the data for collection. Every data service must: -* be Iterable and iterate over all available records by default +* be `Iterable` +* yield all existing records during iteration. I.e, if data service produces + datasets from `package_search` API, `list(data)` must contain **all** + datasets from the search index, not only first 10 or 20. * define `total` property, that reflects number of available records so that `len(list(data)) == data.total` * define `range(start: Any, end: Any)` method that returns slice of the data Base class for data services - `Data` - already contains a simple version of -this logic. You need to define only one method to make you custom -implementations: `compute_data()`. When data if accessed for the first time, -`compute_data` is called. Its result cached and used for iteration in -for-loops, slicing via `range` method and size measurement via `total` -property. +this logic. Just override `compute_data()` and return a sequence with records +from it, to satisfy minimal requirements of the data service. + + +/// admonition + type: example ```python -class CustomData(Data): - def compute_data(self) -> Any: +class MyData(data.Data): + def compute_data(self): return "abcdefghijklmnopqrstuvwxyz" +``` -col = Collection("name", {}, data_factory=CustomData) -assert list(col) == ["a", "b", "c", "d", "e", "f", "g", "h", "i", "j"] -assert col.data.total == 26 -assert col.data.range(-3, None) == "xyz" +```pycon +>>> col = collection.Collection(data_factory=MyData) +>>> list(col) +["a", "b", "c", "d", "e", "f", "g", "h", "i", "j"] +>>> col.data.total +26 +>>> col.data.range(-3, None) +"xyz" ``` +/// -If you need more complex data source, make sure you defined `__iter__`, -`total`, and `range`: +Using `compute_data` simplifies defining data services, but it's not +required. You can explicitly implement all methods -```python -class CustomData(Data): - names = configurable_attribute(default_factory=["Anna", "Henry", "Mary"]) +/// admonition + type: example + +```pycol +class MyData(data.Data): + letters = "abcdefghijklmnopqrstuvwxyz" @property def total(self): - return len(self.names) + return len(self.letters) def __iter__(self): - yield from sorted(self.names) + yield from self.letters + + def range(self, start, end): + return self.letters[start:end] + +``` +/// + + +## Base `Data` class + +This class defines a couple of standard helpers in addition to minimal +requirements of data service + +The most important, it caches result of `compute_data` when data or data length +is accessed. Because of it, items and length of the data service are not +updated in runtime. - def range(self, start: Any, end: Any): - if not isinstance(start, str) or not isinstance(end, str): - return [] +/// admonition + type: example - for name in self: - if name < start: - continue - if name > end: - break - yield name +In the following example, items from data service and its length are not +changed after assigning to `items`, because of `compute_data` called only +during first access to data. After this point, data service uses cached result +of the first `compute_data` call. +```python +class MyData(data.Data): + items = [1, 2, 3] + + def compute_data(self): + return self.items +``` +```pycon +>>> col = collection.Collection(data_factory=MyData) +>>> list(col.data) +[1, 2, 3] +>>> col.data.total +3 +>>> col.data.items = [] # (1)! +>>> list(col.data) +[1, 2, 3] +>>> col.data.total +3 ``` + +1. This has no sense, because data is already cached and `items` property will + not be used anymore + +To reset the cache and use `compute_data` again, call `refresh_data()` method +of the data service. + +```pycon +>>> col.data.items = "hello" +>>> col.data.refresh_data() +>>> list(col.data) +["h", "e", "l", "l", "o"] +>>> col.data.total +5 +``` + +/// + +Base `Data` class expects that `compute_data` returns a +[`collections.abc.Sequence`](https://docs.python.org/3/library/collections.abc.html#collections.abc.Sequence). +With this expectation it implements `range(start, end)` that returns slice of +the data, and `at(index)` that returns element with specified index. + +/// admonition + type: example + +```python +class MyData(data.Data): + def compute_data(self): + return "hello world" +``` + +```pycon +>>> col = collection.Collection(data_factory=MyData) +>>> col.data.at(4) +"o" +>>> col.data.range(6, None) +"world" +``` + +These methods are also accessible via index operator. + +```pycon +>>> col.data[4] +"o" +>>> col.data[6:] +"world" +``` + +/// + + +If you are not going to rely on `compute_data` when extending `Data` class, +implement your own caching logic and index-acces, if you need them. + +## Available data factories + +These factories are available at `ckanext.collection.shared.data`. + +::: collection.shared.data.Data + options: + show_root_heading: true + show_root_toc_entry: true + show_bases: false + heading_level: 3 + members: [] + +::: collection.shared.data.StaticData + options: + show_root_heading: true + show_root_toc_entry: true + show_bases: false + heading_level: 3 + members: [] + +::: collection.shared.data.CsvFileData + options: + show_root_heading: true + show_root_toc_entry: true + show_bases: false + heading_level: 3 + members: [] + +::: collection.shared.data.ApiData + options: + show_root_heading: true + show_root_toc_entry: true + show_bases: false + heading_level: 3 + members: [] + +::: collection.shared.data.ApiSearchData + options: + show_root_heading: true + show_root_toc_entry: true + heading_level: 3 + members: [] + +::: collection.shared.data.BaseSaData + options: + show_root_heading: true + show_root_toc_entry: true + show_bases: false + heading_level: 3 + members: [] + +::: collection.shared.data.StatementSaData + options: + show_root_heading: true + show_root_toc_entry: true + heading_level: 3 + members: [] + + +::: collection.shared.data.UnionSaData + options: + show_root_heading: true + show_root_toc_entry: true + heading_level: 3 + members: [] + +::: collection.shared.data.ModelData + options: + show_root_heading: true + show_root_toc_entry: true + heading_level: 3 + members: [] diff --git a/docs/structure/domain.md b/docs/structure/domain.md index 3c61990..2489554 100644 --- a/docs/structure/domain.md +++ b/docs/structure/domain.md @@ -12,7 +12,7 @@ chapter can be applied to every service you see in a real application. The abstract service actually consists of two classes. The first one is `types.Service`. This is an abrstract class, which contains abstract property -`service_name`. This class identifies the name of the service inside the +`service_name`. The property identifies the name of the service inside the collection. The base `data.Data` class implements `types.Service` and its implementation of diff --git a/docs/usage.md b/docs/usage.md index ee6acbd..42effee 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -93,7 +93,7 @@ def my_factory(name: str, params: dict[str, Any], **kwargs: Any): ## Initialize collection Collection class defines the data source of collection and different aspects of -it behavior. But collection class itself does not contain any data and +its behavior. But collection class itself does not contain any data and collection instance must be created to work with data. Any collection can be initialized directly, using collection class. And every diff --git a/mkdocs.yml b/mkdocs.yml index 3364676..f8e0aaf 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -25,6 +25,7 @@ plugins: - mkdocstrings: handlers: python: + paths: ["ckanext"] options: show_root_full_path: false show_root_toc_entry: false @@ -85,11 +86,11 @@ nav: - structure/index.md - structure/collection.md - structure/domain.md - - structure/columns.md - structure/data.md - - structure/filters.md - - structure/serializer.md - structure/pager.md + - structure/columns.md + - structure/serializer.md + - structure/filters.md - API: - api/index.md - configuration.md diff --git a/pyproject.toml b/pyproject.toml index 04d9403..5d26585 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,20 +5,27 @@ [tool.ruff] target-version = "py38" lint.select = [ + "ANN0", # type annotations for function arguments "B", # likely bugs and design problems "BLE", # do not catch blind exception - "C40", # better list/set/dict comprehensions + "C4", # better list/set/dict comprehensions "C90", # check McCabe complexity - "COM", # trailing commas + "DTZ", # enforce timezone in date objects "E", # pycodestyle error "W", # pycodestyle warning "F", # pyflakes + "FA", # verify annotations from future "G", # format strings for logging statements "N", # naming conventions + "I", # isort + "ICN", # import conventions + # "D1", # require doc + "D2", # doc formatting + "D4", # doc convention # "PL", # pylint + "PERF", # performance anti-patterns "PT", # pytest style "PIE", # misc lints - "Q", # preferred quoting style "RET", # improvements for return statements "RSE", # improvements for rise statements # "S", # security testing @@ -31,20 +38,37 @@ lint.select = [ ] lint.ignore = [ + "RET503", # don't enforce return-None "E712", # comparison to bool: violated by SQLAlchemy filters "PT004", # fixture does not return anything, add leading underscore: violated by clean_db "PLC1901", # simplify comparison to empty string: violated by SQLAlchemy filters ] [tool.ruff.lint.per-file-ignores] -"ckanext/collection/tests*" = ["S", "PL"] +"ckanext/collection/tests*" = ["S", "PL", "ANN", "D"] "ckanext/collection/tests/test_dive.py" = ["F403", "F405"] -[tool.isort] -known_ckan = "ckan" -known_ckanext = "ckanext" -known_self = "ckanext.collection" -sections = "FUTURE,STDLIB,FIRSTPARTY,THIRDPARTY,CKAN,CKANEXT,SELF,LOCALFOLDER" -profile = "black" + +[tool.ruff.lint.pydocstyle] +convention = "google" + +[tool.ruff.lint.isort] +section-order = [ + "future", + "standard-library", + "first-party", + "third-party", + "ckan", + "ckanext", + "self", + "local-folder", +] + +[tool.ruff.lint.isort.sections] +# Group all Django imports into a separate section. +ckan = ["ckan"] +ckanext = ["ckanext"] +self = ["ckanext.collection"] + [tool.pytest.ini_options] addopts = "--ckan-ini test.ini" diff --git a/setup.cfg b/setup.cfg index 688139d..444ba71 100644 --- a/setup.cfg +++ b/setup.cfg @@ -23,7 +23,7 @@ python_requires = >= 3.8 packages = find: namespace_packages = ckanext install_requires = - typing-extensions>=4.4.0 + typing-extensions include_package_data = True [options.entry_points] diff --git a/setup.py b/setup.py index 7ffbc01..de8315f 100644 --- a/setup.py +++ b/setup.py @@ -1,10 +1,6 @@ from setuptools import setup setup( - # If you are changing from the default layout of your extension, you may - # have to change the message extractors, you can read more about babel - # message extraction at - # http://babel.pocoo.org/docs/messages/#extraction-method-mapping-and-configuration message_extractors={ "ckanext": [ ("**.py", "python", None),