diff --git a/.github/workflows/pr.yml b/.github/workflows/pr.yml new file mode 100644 index 0000000..8f191a2 --- /dev/null +++ b/.github/workflows/pr.yml @@ -0,0 +1,51 @@ +name: Test PR + +on: + pull_request: + branches: + - main + +jobs: + test: + runs-on: ubuntu-latest + services: + postgres: + image: postgres:14.3-alpine + env: + POSTGRES_DB: postgres + POSTGRES_PORT: 5432 + POSTGRES_USER: postgres + POSTGRES_PASSWORD: postgres + ports: + - '127.0.0.1:5432:5432' + steps: + - uses: actions/checkout@v3 + - uses: actions/setup-python@v4 + with: + python-version: "3.10" + + - uses: actions/cache@v3 + id: cache-venv + with: + path: | + ~/.local + .venv + key: ${{ hashFiles('**/poetry.lock') }}-${{ steps.setup-python.outputs.python-version }}-1 + + - name: Install Poetry + uses: snok/install-poetry@v1 + with: + virtualenvs-create: false + version: 1.5.0 + - run: | + python -m venv .venv --upgrade-deps + source .venv/bin/activate + poetry install --no-interaction --all-extras + if: steps.cache-venv.outputs.cache-hit != 'true' + + - name: Run tests + env: + PSQL_DATABASE_TEST: postgresql://postgres:postgres@localhost:5432/postgres + run: | + source .venv/bin/activate + pytest diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index c0c679e..8997d0c 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -6,17 +6,41 @@ on: - main jobs: - # build-and-publish-test: - # runs-on: ubuntu-latest - # steps: - # - uses: snok/.github/workflows/publish@main - # with: - # overwrite-repository: true - # repository-url: https://test.pypi.org/legacy/ - # token: ${{ secrets.TEST_PYPI_TOKEN }} - # python-version: '3.10.0' + test: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: actions/setup-python@v4 + with: + python-version: "3.10" + + - uses: actions/cache@v3 + id: cache-venv + with: + path: | + ~/.local + .venv + key: ${{ hashFiles('**/poetry.lock') }}-${{ steps.setup-python.outputs.python-version }}-1 + + - name: Install Poetry + uses: snok/install-poetry@v1 + with: + virtualenvs-create: false + version: 1.5.0 + - run: | + python -m venv .venv --upgrade-deps + source .venv/bin/activate + poetry install --no-interaction --all-extras + if: steps.cache-venv.outputs.cache-hit != 'true' + + + - name: Run tests + run: | + source .venv/bin/activate + pytest + build-and-publish: - # needs: build-and-publish-test + needs: [test] runs-on: ubuntu-latest steps: - uses: MatsMoll/.github/workflows/publish@patch-1 diff --git a/.gitignore b/.gitignore index db8e2e6..f181542 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,8 @@ .pg/* .DS_STORE +test_data/feature-store.json + # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 5a71cdd..0d2d19b 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,3 +1,4 @@ +default_stages: [commit] repos: - repo: https://github.com/psf/black rev: 22.3.0 @@ -5,13 +6,8 @@ repos: - id: black args: [ "--quiet" ] - - repo: https://github.com/pycqa/isort - rev: 5.10.1 - hooks: - - id: isort - - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.1.0 + rev: v4.4.0 hooks: - id: check-ast - id: check-merge-conflict @@ -25,43 +21,35 @@ repos: - id: trailing-whitespace - id: double-quote-string-fixer - - repo: https://github.com/asottile/yesqa - rev: v1.3.0 + - repo: https://github.com/asottile/pyupgrade + rev: v3.7.0 hooks: - - id: yesqa - additional_dependencies: &flake8_deps - - flake8-bugbear + - id: pyupgrade + types: [ python ] + args: [ "--py36-plus", "--py37-plus", "--py38-plus", "--py39-plus", "--py310-plus"] + files: 'cloud\/.*\.py' + + # Static type and code checkers below + + - repo: https://github.com/PyCQA/flake8 + rev: 6.0.0 + hooks: + - id: flake8 + additional_dependencies: - flake8-comprehensions - flake8-print - flake8-mutable - - flake8-simplify - flake8-pytest-style - flake8-printf-formatting - - 'flake8-simplify==0.18' - - 'flake8-type-checking==1.3.2' + - 'flake8-simplify==0.19.2' + - 'flake8-type-checking==2.1.3' + args: [ '--enable-extensions=G' ] - - repo: https://gitlab.com/pycqa/flake8 - rev: 3.9.2 + - repo: https://github.com/python-poetry/poetry + rev: 1.5.0 hooks: - - id: flake8 - additional_dependencies: *flake8_deps - - - repo: https://github.com/sirosen/check-jsonschema - rev: 0.14.2 - hooks: - - id: check-github-actions - - id: check-github-workflows - - - repo: https://github.com/asottile/pyupgrade - rev: v2.31.1 - hooks: - - id: pyupgrade - args: [ "--py36-plus", "--py37-plus", '--py38-plus', '--py39-plus'] + - id: poetry-check - - repo: https://github.com/hadialqattan/pycln - rev: v1.2.5 - hooks: - - id: pycln - repo: https://github.com/pre-commit/mirrors-mypy rev: v0.942 diff --git a/.python-version b/.python-version new file mode 100644 index 0000000..c84ccce --- /dev/null +++ b/.python-version @@ -0,0 +1 @@ +3.10.5 diff --git a/aligned/__init__.py b/aligned/__init__.py index ca86707..dff49b1 100644 --- a/aligned/__init__.py +++ b/aligned/__init__.py @@ -10,7 +10,7 @@ String, Timestamp, ) -from aligned.compiler.model import Model +from aligned.compiler.model import ModelContract from aligned.data_source.stream_data_source import HttpStreamSource from aligned.feature_store import FeatureStore from aligned.feature_view import ( @@ -56,6 +56,6 @@ 'EventTimestamp', 'Timestamp', 'Json', - 'Model', + 'ModelContract', 'TextVectoriserModel', ] diff --git a/aligned/cli.py b/aligned/cli.py index fac4d4b..b05a647 100644 --- a/aligned/cli.py +++ b/aligned/cli.py @@ -246,14 +246,7 @@ def serve_command( default='.env', help='The path to env variables', ) -@click.option( - '--prune-unused-features', - default=False, - help='Will only process features that are used in a model if set to True', -) -async def serve_worker_command( - repo_path: str, worker_path: str, env_file: str, prune_unused_features: bool -) -> None: +async def serve_worker_command(repo_path: str, worker_path: str, env_file: str) -> None: """ Starts a API serving the feature store """ @@ -269,7 +262,7 @@ async def serve_worker_command( worker = StreamWorker.from_object(dir, reference_file_path, obj) - await worker.start(prune_unused_features) + await worker.start() @cli.command('materialize') diff --git a/aligned/compiler/feature_factory.py b/aligned/compiler/feature_factory.py index 2e7e4a5..a9438d8 100644 --- a/aligned/compiler/feature_factory.py +++ b/aligned/compiler/feature_factory.py @@ -85,7 +85,7 @@ class EventTrigger: @dataclass class TargetProbability: of_value: Any - target: ClassificationTarget + target: ClassificationLabel _name: str | None = None def __hash__(self) -> int: @@ -107,7 +107,7 @@ def feature_referance(self) -> FeatureReferance: @dataclass -class RegressionTarget(FeatureReferencable): +class RegressionLabel(FeatureReferencable): feature: FeatureFactory event_trigger: EventTrigger | None = field(default=None) ground_truth_event: StreamDataSource | None = field(default=None) @@ -124,17 +124,17 @@ def feature_referance(self) -> FeatureReferance: raise ValueError('Missing location, can not create reference') return FeatureReferance(self._name, self._location, self.feature.dtype) - def listen_to_ground_truth_event(self, stream: StreamDataSource) -> RegressionTarget: - return RegressionTarget( + def listen_to_ground_truth_event(self, stream: StreamDataSource) -> RegressionLabel: + return RegressionLabel( feature=self.feature, event_trigger=self.event_trigger, ground_truth_event=stream, ) - def send_ground_truth_event(self, when: Bool, sink_to: StreamDataSource) -> RegressionTarget: + def send_ground_truth_event(self, when: Bool, sink_to: StreamDataSource) -> RegressionLabel: assert when.dtype == FeatureType('').bool, 'A trigger needs a boolean condition' - return RegressionTarget( + return RegressionLabel( self.feature, EventTrigger(when, sink_to), ground_truth_event=self.ground_truth_event ) @@ -162,7 +162,7 @@ def compile(self) -> RegressionTargetSchemas: @dataclass -class ClassificationTarget(FeatureReferencable): +class ClassificationLabel(FeatureReferencable): feature: FeatureFactory event_trigger: EventTrigger | None = field(default=None) ground_truth_event: StreamDataSource | None = field(default=None) @@ -179,17 +179,17 @@ def feature_referance(self) -> FeatureReferance: raise ValueError('Missing location, can not create reference') return FeatureReferance(self._name, self._location, self.feature.dtype) - def listen_to_ground_truth_event(self, stream: StreamDataSource) -> ClassificationTarget: - return ClassificationTarget( + def listen_to_ground_truth_event(self, stream: StreamDataSource) -> ClassificationLabel: + return ClassificationLabel( feature=self.feature, event_trigger=self.event_trigger, ground_truth_event=stream, ) - def send_ground_truth_event(self, when: Bool, sink_to: StreamDataSource) -> ClassificationTarget: + def send_ground_truth_event(self, when: Bool, sink_to: StreamDataSource) -> ClassificationLabel: assert when.dtype == FeatureType('').bool, 'A trigger needs a boolean condition' - return ClassificationTarget(self.feature, EventTrigger(when, sink_to)) + return ClassificationLabel(self.feature, EventTrigger(when, sink_to)) def probability_of(self, value: Any) -> TargetProbability: """Define a value that will be the probability of a certain target class. @@ -299,11 +299,11 @@ def feature(self) -> Feature: constraints=self.constraints, ) - def as_classification_target(self) -> ClassificationTarget: - return ClassificationTarget(self) + def as_classification_label(self) -> ClassificationLabel: + return ClassificationLabel(self) - def as_regression_target(self) -> RegressionTarget: - return RegressionTarget(self) + def as_regression_label(self) -> RegressionLabel: + return RegressionLabel(self) def compile(self) -> DerivedFeature: @@ -435,6 +435,14 @@ def is_not_null(self) -> Bool: return instance +class CouldBeModelVersion: + def as_model_Version(self) -> ModelVersion: + if isinstance(self, FeatureFactory): + return ModelVersion(self) + + raise ValueError(f'{self} is not a feature factory, and can therefore not be a model version') + + class CouldBeEntityFeature: def as_entity(self) -> Entity: if isinstance(self, FeatureFactory): @@ -733,7 +741,7 @@ def aggregate(self) -> ArithmeticAggregation: return ArithmeticAggregation(self) -class Int32(ArithmeticFeature, CouldBeEntityFeature): +class Int32(ArithmeticFeature, CouldBeEntityFeature, CouldBeModelVersion): def copy_type(self) -> Int32: return Int32() @@ -745,7 +753,7 @@ def aggregate(self) -> ArithmeticAggregation: return ArithmeticAggregation(self) -class Int64(ArithmeticFeature, CouldBeEntityFeature): +class Int64(ArithmeticFeature, CouldBeEntityFeature, CouldBeModelVersion): def copy_type(self) -> Int64: return Int64() @@ -796,6 +804,7 @@ def validate_startswith(self: T, prefix: str) -> T: class String( CategoricalEncodableFeature, NumberConvertableFeature, + CouldBeModelVersion, CouldBeEntityFeature, LengthValidatable, StringValidatable, @@ -888,6 +897,21 @@ def json_path_value_at(self, path: str, as_type: T) -> T: return feature +class ModelVersion(FeatureFactory): + + _dtype: FeatureFactory + + @property + def dtype(self) -> FeatureType: + return self._dtype.dtype + + def __init__(self, dtype: FeatureFactory): + self._dtype = dtype + + def aggregate(self) -> CategoricalAggregation: + return CategoricalAggregation(self) + + class Entity(FeatureFactory): _dtype: FeatureFactory @@ -961,6 +985,26 @@ def indexed( return self +@dataclass +class List(FeatureFactory): + + sub_type: FeatureFactory + + def copy_type(self) -> List: + return List(self.sub_type.copy_type()) + + @property + def dtype(self) -> FeatureType: + return FeatureType('').array + + def contains(self, value: Any) -> Bool: + from aligned.compiler.transformation_factory import ArrayContainsFactory + + feature = Bool() + feature.transformation = ArrayContainsFactory(LiteralValue.from_value(value), self) + return feature + + class ImageUrl(StringValidatable): @property def dtype(self) -> FeatureType: diff --git a/aligned/compiler/model.py b/aligned/compiler/model.py index 333173a..fbca499 100644 --- a/aligned/compiler/model.py +++ b/aligned/compiler/model.py @@ -7,12 +7,13 @@ import polars as pl from aligned.compiler.feature_factory import ( - ClassificationTarget, + ClassificationLabel, EventTimestamp, FeatureFactory, FeatureReferencable, - RegressionTarget, + RegressionLabel, TargetProbability, + ModelVersion, ) from aligned.data_source.batch_data_source import BatchDataSource from aligned.data_source.stream_data_source import StreamDataSource @@ -68,12 +69,12 @@ class ModelMetedata: dataset_folder: Folder | None = field(default=None) -class Model(ABC): +class ModelContract(ABC): @staticmethod def metadata_with( name: str, - description: str, features: list[FeatureReferencable], + description: str | None = None, contacts: list[str] | None = None, tags: dict[str, str] | None = None, predictions_source: BatchDataSource | None = None, @@ -93,17 +94,36 @@ def metadata_with( @abstractproperty def metadata(self) -> ModelMetedata: - pass + raise NotImplementedError() @classmethod def compile(cls) -> ModelSchema: - var_names = [name for name in cls().__dir__() if not name.startswith('_')] - metadata = cls().metadata + return cls().compile_instance() + + def compile_instance(self) -> ModelSchema: + """ + Compiles the ModelContract in to ModelSchema structure that can further be encoded. + + ```python + class MyModel(ModelContract): + ... + + metadata = ModelContract.metadata_with(...) + + model_schema = MyModel().compile_instance() + + ``` + + Returns: The compiled Model schema + """ + var_names = [name for name in self.__dir__() if not name.startswith('_')] + metadata = self.metadata inference_view: PredictionsView = PredictionsView( entities=set(), features=set(), derived_features=set(), + model_version_column=None, source=metadata.predictions_source, stream_source=metadata.predictions_stream, classification_targets=set(), @@ -115,25 +135,27 @@ def compile(cls) -> ModelSchema: regression_targets: dict[str, RegressionTargetSchema] = {} for var_name in var_names: - feature = getattr(cls, var_name) + feature = getattr(self, var_name) if isinstance(feature, FeatureFactory): feature._location = FeatureLocation.model(metadata.name) + if isinstance(feature, ModelVersion): + inference_view.model_version_column = feature.feature() if isinstance(feature, FeatureView): compiled = feature.compile() inference_view.entities.update(compiled.entities) - elif isinstance(feature, Model): + elif isinstance(feature, ModelContract): compiled = feature.compile() inference_view.entities.update(compiled.predictions_view.entities) - elif isinstance(feature, ClassificationTarget): + elif isinstance(feature, ClassificationLabel): assert feature._name feature._location = FeatureLocation.model(metadata.name) target_feature = feature.compile() classification_targets[var_name] = target_feature inference_view.classification_targets.add(target_feature) - elif isinstance(feature, RegressionTarget): + elif isinstance(feature, RegressionLabel): assert feature._name feature._location = FeatureLocation.model(metadata.name) target_feature = feature.compile() @@ -189,7 +211,7 @@ def compile(cls) -> ModelSchema: ) inference_view.derived_features.add(arg_max_feature) - if not probability_features: + if not probability_features and inference_view.classification_targets: inference_view.features.update( {target.feature for target in inference_view.classification_targets} ) diff --git a/aligned/compiler/repo_reader.py b/aligned/compiler/repo_reader.py index 4b576e9..ff4c7c9 100644 --- a/aligned/compiler/repo_reader.py +++ b/aligned/compiler/repo_reader.py @@ -82,7 +82,7 @@ class RepoReader: async def definition_from_path(repo_path: Path, excludes: list[str] | None = None) -> RepoDefinition: excluded_files: list[Path] = [] - for exclude in excludes: + for exclude in excludes or []: excluded_files.extend(repo_path.resolve().glob(exclude)) metadata = RepoMetadata(created_at=datetime.now(), name=repo_path.name, github_url=None) @@ -121,7 +121,7 @@ async def definition_from_path(repo_path: Path, excludes: list[str] | None = Non ) else: classes = super_classes_in(obj) - if 'Model' in classes: + if 'ModelContract' in classes: repo.models.add(obj.compile()) elif 'FeatureView' in classes: fv = obj.compile() diff --git a/aligned/compiler/transformation_factory.py b/aligned/compiler/transformation_factory.py index a7dbfd0..2fb41d9 100644 --- a/aligned/compiler/transformation_factory.py +++ b/aligned/compiler/transformation_factory.py @@ -84,6 +84,22 @@ def compile(self) -> Transformation: return Ordinal(self.feature.name, self.orders) +@dataclass +class ArrayContainsFactory(TransformationFactory): + + value: LiteralValue + in_feature: FeatureFactory + + @property + def using_features(self) -> list[FeatureFactory]: + return [self.in_feature] + + def compile(self) -> Transformation: + from aligned.schemas.transformation import ArrayContains + + return ArrayContains(self.in_feature.name, self.value) + + @dataclass class ContainsFactory(TransformationFactory): diff --git a/aligned/data_source/batch_data_source.py b/aligned/data_source/batch_data_source.py index 2ccf9d3..d31c280 100644 --- a/aligned/data_source/batch_data_source.py +++ b/aligned/data_source/batch_data_source.py @@ -2,7 +2,7 @@ from abc import ABC, abstractmethod from datetime import datetime -from typing import TYPE_CHECKING, TypeVar +from typing import TYPE_CHECKING, TypeVar, Any from mashumaro.types import SerializableType @@ -10,8 +10,9 @@ from aligned.schemas.feature import Feature if TYPE_CHECKING: + from aligned.compiler.feature_factory import FeatureFactory from aligned.request.retrival_request import RetrivalRequest - from aligned.retrival_job import DateRangeJob, FullExtractJob, RetrivalJob + from aligned.retrival_job import RetrivalJob class BatchDataSourceFactory: @@ -69,6 +70,34 @@ def _serialize(self) -> dict: def __hash__(self) -> int: return hash(self.job_group_key()) + def contains_config(self, config: Any) -> bool: + """ + Checks if a data source contains a source config. + This can be used to select different sources based on the data sources to connect to. + + ``` + config = PostgreSQLConfig(env_var='MY_APP_DB_URL') + source = config.table('my_table') + + print(source.contains_config(config)) + >> True + + store = await FileSource.json_at("features.json").feature_store() + views = store.views_with_config(config) + print(len(views)) + >> 3 + ``` + + Args: + config: The config to check for + + Returns: + bool: If the config is contained in the source + """ + if isinstance(config, BatchDataSource): + return config.to_dict() == self.to_dict() + return False + @classmethod def _deserialize(cls, value: dict) -> BatchDataSource: name_type = value['type_name'] @@ -82,7 +111,7 @@ def _deserialize(cls, value: dict) -> BatchDataSource: data_class = BatchDataSourceFactory.shared().supported_data_sources[name_type] return data_class.from_dict(value) - def all_data(self, request: RetrivalRequest, limit: int | None) -> FullExtractJob: + def all_data(self, request: RetrivalRequest, limit: int | None) -> RetrivalJob: raise NotImplementedError() def all_between_dates( @@ -90,7 +119,7 @@ def all_between_dates( request: RetrivalRequest, start_date: datetime, end_date: datetime, - ) -> DateRangeJob: + ) -> RetrivalJob: raise NotImplementedError() @classmethod @@ -102,6 +131,62 @@ def multi_source_features_for( def features_for(self, facts: RetrivalJob, request: RetrivalRequest) -> RetrivalJob: return type(self).multi_source_features_for(facts, [(self, request)]) + async def schema(self) -> dict[str, FeatureFactory]: + """Returns the schema for the data source + + ```python + source = FileSource.parquet_at('test_data/titanic.parquet') + schema = await source.schema() + >>> {'passenger_id': FeatureType(name='int64'), ...} + ``` + + Raises: + NotImplementedError: By default will this error be raised if not implemented + + Returns: + dict[str, FeatureType]: A dictionary containing the column name and the feature type + """ + raise NotImplementedError(f'`schema()` is not implemented for {type(self)}.') + + async def feature_view_code(self, view_name: str) -> str: + """Setup the code needed to represent the data source as a feature view + + ```python + FileSource.parquet("my_path.parquet").feature_view_code(view_name="my_view") + + >>> \"\"\"from aligned import FeatureView, String, Int64, Float + + class MyView(FeatureView): + + metadata = FeatureView.metadata_with( + name="Embarked", + description="some description", + batch_source=FileSource.parquest("my_path.parquet") + stream_source=None, + ) + + Passenger_id = Int64() + Survived = Int64() + Pclass = Int64() + Name = String() + Sex = String() + Age = Float() + Sibsp = Int64() + Parch = Int64() + Ticket = String() + Fare = Float() + Cabin = String() + Embarked = String()\"\"\" + ``` + + Returns: + str: The code needed to setup a basic feature view + """ + from aligned import FeatureView + + schema = await self.schema() + return FeatureView.feature_view_code_template(schema, f'{self}', view_name) + class ColumnFeatureMappable: mapping_keys: dict[str, str] diff --git a/aligned/feature_source.py b/aligned/feature_source.py index 11807c4..d4e498e 100644 --- a/aligned/feature_source.py +++ b/aligned/feature_source.py @@ -68,23 +68,28 @@ def features_for(self, facts: RetrivalJob, request: FeatureRequest) -> RetrivalJ combined_requests = [ request for request in request.needed_requests if request.location.identifier not in self.sources ] - jobs = [ - self.source_types[source_group].multi_source_features_for( - facts=facts, - requests=[ - (source, req) for source, req in core_requests if source.job_group_key() == source_group - ], + jobs = [] + for source_group in source_groupes: + requests = [ + (source, req) for source, req in core_requests if source.job_group_key() == source_group + ] + has_derived_features = any(req.derived_features for _, req in requests) + job = ( + self.source_types[source_group] + .multi_source_features_for(facts=facts, requests=requests) + .ensure_types([req for _, req in requests]) ) - for source_group in source_groupes - ] - return ( - CombineFactualJob( + if has_derived_features: + job = job.derive_features() + jobs.append(job) + + if len(combined_requests) > 0 or len(jobs) > 1: + return CombineFactualJob( jobs=jobs, combined_requests=combined_requests, - ) - .ensure_types(request.needed_requests) - .derive_features(request.needed_requests) - ) + ).derive_features() + else: + return jobs[0] def all_for(self, request: FeatureRequest, limit: int | None = None) -> RetrivalJob: if len(request.needed_requests) != 1: diff --git a/aligned/feature_store.py b/aligned/feature_store.py index 82f9833..34c261a 100644 --- a/aligned/feature_store.py +++ b/aligned/feature_store.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import logging from collections import defaultdict from dataclasses import dataclass, field @@ -5,10 +7,12 @@ from importlib import import_module from typing import Any -from aligned.compiler.model import Model +from prometheus_client import Histogram + +from aligned.compiler.model import ModelContract from aligned.data_file import DataFileReference +from aligned.data_source.batch_data_source import BatchDataSource from aligned.enricher import Enricher -from aligned.exceptions import CombinedFeatureViewQuerying from aligned.feature_source import ( BatchFeatureSource, FeatureSource, @@ -20,7 +24,7 @@ from aligned.feature_view.feature_view import FeatureView from aligned.request.retrival_request import FeatureRequest, RetrivalRequest from aligned.retrival_job import FilterJob, RetrivalJob, StreamAggregationJob, SupervisedJob -from aligned.schemas.feature import FeatureLocation +from aligned.schemas.feature import FeatureLocation, Feature from aligned.schemas.feature_view import CompiledFeatureView from aligned.schemas.model import EventTrigger from aligned.schemas.model import Model as ModelSchema @@ -28,6 +32,24 @@ logger = logging.getLogger(__name__) +feature_view_write_time = Histogram( + 'feature_view_write_time', + 'The time used to write data related to a feature view', + labelnames=['feature_view'], +) + + +@dataclass +class SourceRequest: + """ + Represent a request to a source. + This can be used validate the sources. + """ + + location: FeatureLocation + source: BatchDataSource + request: RetrivalRequest + @dataclass class RawStringFeatureRequest: @@ -86,7 +108,7 @@ def __init__( self.models = models @staticmethod - def experimental() -> 'FeatureStore': + def experimental() -> FeatureStore: return FeatureStore.from_definition( RepoDefinition( metadata=RepoMetadata(created_at=datetime.utcnow(), name='experimental'), @@ -130,7 +152,7 @@ def set_module(path: str, module_class: DynamicEnricher) -> None: ) @staticmethod - def from_definition(repo: RepoDefinition, feature_source: FeatureSource | None = None) -> 'FeatureStore': + def from_definition(repo: RepoDefinition, feature_source: FeatureSource | None = None) -> FeatureStore: """Creates a feature store based on a repo definition A feature source can also be defined if wanted, otherwise will the batch source be used for reads @@ -151,17 +173,20 @@ def from_definition(repo: RepoDefinition, feature_source: FeatureSource | None = combined_feature_views = {fv.name: fv for fv in repo.combined_feature_views} FeatureStore.register_enrichers(repo.enrichers) + sources = { + FeatureLocation.feature_view(view.name).identifier: view.batch_data_source + for view in repo.feature_views + } | { + FeatureLocation.model(model.name).identifier: model.predictions_view.source + for model in repo.models + if model.predictions_view.source is not None + } return FeatureStore( feature_views=feature_views, combined_feature_views=combined_feature_views, models={model.name: model for model in repo.models}, - feature_source=BatchFeatureSource( - { - FeatureLocation.feature_view(fv.name).identifier: fv.batch_data_source - for fv in repo.feature_views - } - ), + feature_source=BatchFeatureSource(sources), ) def repo_definition(self) -> RepoDefinition: @@ -176,7 +201,7 @@ def repo_definition(self) -> RepoDefinition: @staticmethod async def from_reference_at_path( path: str = '.', reference_file: str = 'feature_store_location.py' - ) -> 'FeatureStore': + ) -> FeatureStore: """Looks for a file reference struct, and loads the associated repo. This can be used for changing which feature store definitions @@ -195,7 +220,7 @@ async def from_reference_at_path( return FeatureStore.from_definition(repo_def) @staticmethod - async def from_dir(path: str = '.') -> 'FeatureStore': + async def from_dir(path: str = '.') -> FeatureStore: """Reads and generates a feature store based on the given directory's content. This will read the feature views, services etc in a given repo and generate a feature store. @@ -227,9 +252,25 @@ def features_for_request( else: entity_request = entities - return self.feature_source.features_for(entity_request, requests) + return self.feature_source.features_for(entity_request, requests).filter(feature_names) def features_for(self, entities: dict[str, list] | RetrivalJob, features: list[str]) -> RetrivalJob: + """ + Returns a set of features given a set of entities. + + ```python + entities = { "user_id": [1, 2, 3, ...] } + job = store.features_for(entities, features=["user:time_since_last_login", ...]) + data = await job.to_pandas() + ``` + + Args: + entities (dict[str, list] | RetrivalJob): The entities to load data for + features (list[str]): A list of features to load. Use the format (:) + + Returns: + RetrivalJob: A job that knows how to fetch the features + """ feature_request = RawStringFeatureRequest(features=set(features)) requests = self.requests_for(feature_request) @@ -238,6 +279,9 @@ def features_for(self, entities: dict[str, list] | RetrivalJob, features: list[s if requests.needs_event_timestamp: feature_names.add(self.event_timestamp_column) + if isinstance(entities, dict) and self.event_timestamp_column not in entities: + length = len(list(entities.values())[0]) + entities[self.event_timestamp_column] = [datetime.utcnow()] * length for view, feature_set in feature_request.grouped_features.items(): if feature_set != {'*'}: @@ -259,7 +303,13 @@ def features_for(self, entities: dict[str, list] | RetrivalJob, features: list[s return self.features_for_request(requests, entities, feature_names) - def model(self, name: str) -> 'ModelFeatureStore': + def model(self, name: str) -> ModelFeatureStore: + """ + Selects a model for easy of use. + + Returns: + ModelFeatureStore: A new store that containes the selected model + """ model = self.models[name] return ModelFeatureStore(model, self) @@ -276,6 +326,7 @@ def _requests_for( feature_request: RawStringFeatureRequest, feature_views: dict[str, CompiledFeatureView], combined_feature_views: dict[str, CompiledCombinedFeatureView], + models: dict[str, ModelSchema], ) -> FeatureRequest: features = feature_request.grouped_features requests: list[RetrivalRequest] = [] @@ -283,9 +334,21 @@ def _requests_for( needs_event_timestamp = False for location in feature_request.locations: - feature_view_name = location.name - if feature_view_name in combined_feature_views: - cfv = combined_feature_views[feature_view_name] + location_name = location.name + if location.location == 'model': + model = models[location_name] + view = model.predictions_view + if len(features[location]) == 1 and list(features[location])[0] == '*': + request = view.request(location_name) + else: + request = view.request_for(features[location], location_name) + requests.append(request) + entity_names.update(request.entity_names) + if request.event_timestamp: + needs_event_timestamp = True + + elif location_name in combined_feature_views: + cfv = combined_feature_views[location_name] if len(features[location]) == 1 and list(features[location])[0] == '*': sub_requests = cfv.request_all else: @@ -296,8 +359,8 @@ def _requests_for( if request.event_timestamp: needs_event_timestamp = True - elif feature_view_name in feature_views: - feature_view = feature_views[feature_view_name] + elif location_name in feature_views: + feature_view = feature_views[location_name] if len(features[location]) == 1 and list(features[location])[0] == '*': sub_requests = feature_view.request_all else: @@ -309,7 +372,7 @@ def _requests_for( needs_event_timestamp = True else: raise ValueError( - f'Unable to find: {feature_view_name}, ' + f'Unable to find: {location_name}, ' f'availible views are: {combined_feature_views.keys()}, and: {feature_views.keys()}' ) @@ -323,9 +386,11 @@ def _requests_for( ) def requests_for(self, feature_request: RawStringFeatureRequest) -> FeatureRequest: - return FeatureStore._requests_for(feature_request, self.feature_views, self.combined_feature_views) + return FeatureStore._requests_for( + feature_request, self.feature_views, self.combined_feature_views, self.models + ) - def feature_view(self, view: str) -> 'FeatureViewStore': + def feature_view(self, view: str) -> FeatureViewStore: """ Selects a feature view based on a name. @@ -345,10 +410,7 @@ def feature_view(self, view: str) -> 'FeatureViewStore': FeatureViewStore: The selected feature view ready for querying """ if view in self.combined_feature_views: - raise CombinedFeatureViewQuerying( - 'You are trying to get a combined feature view. ', - 'This is only possible through store.features_for(...), as of now.\n', - ) + return FeatureViewStore(self, self.combined_feature_views[view], set()) feature_view = self.feature_views[view] return FeatureViewStore(self, feature_view, self.event_triggers_for(view)) @@ -370,7 +432,7 @@ class MyFeatureView(FeatureView): Args: feature_view (FeatureView): The feature view to add """ - compiled_view = type(feature_view).compile() + compiled_view = feature_view.compile_instance() self.feature_views[compiled_view.name] = compiled_view if isinstance(self.feature_source, BatchFeatureSource): self.feature_source.sources[ @@ -381,7 +443,7 @@ def add_combined_feature_view(self, feature_view: CombinedFeatureView) -> None: compiled_view = type(feature_view).compile() self.combined_feature_views[compiled_view.name] = compiled_view - def add_model(self, model: Model) -> None: + def add_model(self, model: ModelContract) -> None: """ Compiles and adds the model to the store @@ -391,7 +453,7 @@ def add_model(self, model: Model) -> None: compiled_model = type(model).compile() self.models[compiled_model.name] = compiled_model - def with_source(self, source: FeatureSource | FeatureSourceFactory | None = None) -> 'FeatureStore': + def with_source(self, source: FeatureSource | FeatureSourceFactory | None = None) -> FeatureStore: """ Creates a new instance of a feature store, but changes where to fetch the features from @@ -410,12 +472,15 @@ def with_source(self, source: FeatureSource | FeatureSourceFactory | None = None if isinstance(source, FeatureSourceFactory): feature_source = source.feature_source() else: - feature_source = source or BatchFeatureSource( - { - FeatureLocation.feature_view(view.name).identifier: view.batch_data_source - for view in set(self.feature_views.values()) - } - ) + sources = { + FeatureLocation.feature_view(view.name).identifier: view.batch_data_source + for view in set(self.feature_views.values()) + } | { + FeatureLocation.model(model.name).identifier: model.predictions_view.source + for model in set(self.models.values()) + if model.predictions_view.source is not None + } + feature_source = source or BatchFeatureSource(sources=sources) return FeatureStore( feature_views=self.feature_views, @@ -424,7 +489,7 @@ def with_source(self, source: FeatureSource | FeatureSourceFactory | None = None feature_source=feature_source, ) - def offline_store(self) -> 'FeatureStore': + def offline_store(self) -> FeatureStore: """ Will set the source to the defined batch sources. @@ -433,6 +498,30 @@ def offline_store(self) -> 'FeatureStore': """ return self.with_source() + def use_application_sources(self) -> FeatureStore: + """ + Selects features from the application source if added. + Otherwise, the we will default back to the batch source. + + Returns: + FeatureStore: A new feature store that loads features from the application source + """ + sources = { + FeatureLocation.feature_view(view.name).identifier: view.application_source + or view.batch_data_source + for view in set(self.feature_views.values()) + } | { + FeatureLocation.model(model.name).identifier: model.predictions_view.source + for model in set(self.models.values()) + if model.predictions_view.source is not None + } + return FeatureStore( + feature_views=self.feature_views, + combined_feature_views=self.combined_feature_views, + models=self.models, + feature_source=BatchFeatureSource(sources=sources), + ) + def model_features_for(self, view_name: str) -> set[str]: all_model_features: set[str] = set() for model in self.models.values(): @@ -441,6 +530,35 @@ def model_features_for(self, view_name: str) -> set[str]: ) return all_model_features + def views_with_config(self, config: Any) -> list[SourceRequest]: + """ + Returns the feature views where the config match. + + ```python + source = PostgreSQLConfig(env_var='SOURCE_URL') + store.views_with_conifg(source) + ``` + + Args: + config (Any): The config to find views for + + Returns: + list[SourceRequest]: A list of data sources, the request and it's location + """ + views: list[SourceRequest] = [] + for view in self.feature_views.values(): + request = view.request_all.needed_requests[0] + if view.batch_data_source.contains_config(config): + views.append( + SourceRequest(FeatureLocation.feature_view(view.name), view.batch_data_source, request) + ) + + if view.application_source and view.application_source.contains_config(config): + views.append( + SourceRequest(FeatureLocation.feature_view(view.name), view.application_source, request) + ) + return views + @dataclass class ModelFeatureStore: @@ -460,7 +578,37 @@ def request(self, except_features: set[str] | None = None) -> FeatureRequest: RawStringFeatureRequest(self.raw_string_features(except_features or set())) ) + def needed_entities(self) -> set[Feature]: + return self.request().request_result.entities + def features_for(self, entities: dict[str, list] | RetrivalJob) -> RetrivalJob: + """Returns the features for the given entities + + ```python + store = await FileSource.json_at("features-latest.json").feature_store() + + df = store.model("titanic")\\ + .features_for({"passenger_id": [1, 2, 3]})\\ + .to_polars() + + print(df.collect()) + >>> ┌──────────────┬───────┬─────────┬─────────────────────┬──────────────┐ + >>> │ passenger_id ┆ is_mr ┆ is_male ┆ constant_filled_age ┆ has_siblings │ + >>> │ --- ┆ --- ┆ --- ┆ --- ┆ --- │ + >>> │ i32 ┆ bool ┆ bool ┆ f64 ┆ bool │ + >>> ╞══════════════╪═══════╪═════════╪═════════════════════╪══════════════╡ + >>> │ 1 ┆ true ┆ true ┆ 22.0 ┆ true │ + >>> │ 2 ┆ false ┆ false ┆ 38.0 ┆ true │ + >>> │ 3 ┆ false ┆ false ┆ 26.0 ┆ false │ + >>> └──────────────┴───────┴─────────┴─────────────────────┴──────────────┘ + ``` + + Args: + entities (dict[str, list] | RetrivalJob): The entities to fetch features for + + Returns: + RetrivalJob: A retrival job that can be used to fetch the features + """ request = self.request() if isinstance(entities, dict): features = self.raw_string_features(set(entities.keys())) @@ -477,10 +625,66 @@ def features_for(self, entities: dict[str, list] | RetrivalJob) -> RetrivalJob: return job.filter(request.features_to_include) - def with_target(self) -> 'SupervisedModelFeatureStore': + def with_labels(self) -> SupervisedModelFeatureStore: + """Will also load the labels for the model + + ```python + store = await FileSource.json_at("features-latest.json").feature_store() + + data = store.model("titanic")\\ + .with_labels()\\ + .features_for({"passenger_id": [1, 2, 3]})\\ + .to_polars() + + print(data.labels.collect(), data.input.collect()) + >>> ┌──────────┐ ┌───────┬─────────┬─────────────────────┬──────────────┐ + >>> │ survived │ │ is_mr ┆ is_male ┆ constant_filled_age ┆ has_siblings │ + >>> │ --- │ │ --- ┆ --- ┆ --- ┆ --- │ + >>> │ bool │ │ bool ┆ bool ┆ f64 ┆ bool │ + >>> ╞══════════╡ ╞═══════╪═════════╪═════════════════════╪══════════════╡ + >>> │ false │ │ true ┆ true ┆ 22.0 ┆ true │ + >>> │ true │ │ false ┆ false ┆ 38.0 ┆ true │ + >>> │ true │ │ false ┆ false ┆ 26.0 ┆ false │ + >>> └──────────┘ └───────┴─────────┴─────────────────────┴──────────────┘ + ``` + + Returns: + SupervisedModelFeatureStore: A new queryable feature store + """ return SupervisedModelFeatureStore(self.model, self.store) def cached_at(self, location: DataFileReference) -> RetrivalJob: + """Loads the model features from a pre computed location + + ```python + from aligned import FileSource + + store = await FileSource.json_at("features-latest.json").feature_store() + + cached_features = FileSource.parquet_at("titanic_features.parquet") + + df = store.model("titanic")\\ + .cached_at(cached_features)\\ + .to_polars() + + print(df.collect()) + >>> ┌──────────────┬───────┬─────────┬─────────────────────┬──────────────┐ + >>> │ passenger_id ┆ is_mr ┆ is_male ┆ constant_filled_age ┆ has_siblings │ + >>> │ --- ┆ --- ┆ --- ┆ --- ┆ --- │ + >>> │ i32 ┆ bool ┆ bool ┆ f64 ┆ bool │ + >>> ╞══════════════╪═══════╪═════════╪═════════════════════╪══════════════╡ + >>> │ 1 ┆ true ┆ true ┆ 22.0 ┆ true │ + >>> │ 2 ┆ false ┆ false ┆ 38.0 ┆ true │ + >>> │ 3 ┆ false ┆ false ┆ 26.0 ┆ false │ + >>> └──────────────┴───────┴─────────┴─────────────────────┴──────────────┘ + ``` + + Args: + location (DataFileReference): _description_ + + Returns: + RetrivalJob: _description_ + """ from aligned.local.job import FileFullJob features = {f'{feature.location.identifier}:{feature.name}' for feature in self.model.features} @@ -506,6 +710,26 @@ def process_features(self, input: RetrivalJob | dict[str, list]) -> RetrivalJob: .filter(request.features_to_include) ) + def predictions_for(self, entities: dict[str, list] | RetrivalJob) -> RetrivalJob: + + if self.model.predictions_view.source is None: + raise ValueError( + 'Model does not have a prediction source. ' + 'This can be set in the metadata for a model contract.' + ) + + source = self.model.predictions_view.source + request = self.model.predictions_view.request(self.model.name) + + if isinstance(entities, RetrivalJob): + job = entities + elif isinstance(entities, dict): + job = RetrivalJob.from_dict(entities, request=[request]) + else: + raise ValueError(f'features must be a dict or a RetrivalJob, was {type(input)}') + + return source.features_for(job, request).ensure_types([request]).derive_features() + @dataclass class SupervisedModelFeatureStore: @@ -514,22 +738,45 @@ class SupervisedModelFeatureStore: store: FeatureStore def features_for(self, entities: dict[str, list] | RetrivalJob) -> SupervisedJob: + """Loads the features and labels for a model + + ```python + store = await FileSource.json_at("features-latest.json").feature_store() + + data = store.model("titanic")\\ + .with_labels()\\ + .features_for({"passenger_id": [1, 2, 3]})\\ + .to_polars() + + print(data.labels.collect(), data.input.collect()) + >>> ┌──────────┐ ┌───────┬─────────┬─────────────────────┬──────────────┐ + >>> │ survived │ │ is_mr ┆ is_male ┆ constant_filled_age ┆ has_siblings │ + >>> │ --- │ │ --- ┆ --- ┆ --- ┆ --- │ + >>> │ bool │ │ bool ┆ bool ┆ f64 ┆ bool │ + >>> ╞══════════╡ ╞═══════╪═════════╪═════════════════════╪══════════════╡ + >>> │ false │ │ true ┆ true ┆ 22.0 ┆ true │ + >>> │ true │ │ false ┆ false ┆ 38.0 ┆ true │ + >>> │ true │ │ false ┆ false ┆ 26.0 ┆ false │ + >>> └──────────┘ └───────┴─────────┴─────────────────────┴──────────────┘ + ``` + + Args: + entities (dict[str, list] | RetrivalJob): A dictionary of entity names to lists of entity values + + Returns: + SupervisedJob: A object that will load the features and lables in your desired format + """ feature_refs = self.model.features features = {f'{feature.location.identifier}:{feature.name}' for feature in feature_refs} pred_view = self.model.predictions_view - target_features = set() + + target_feature_refs = pred_view.labels_estimates_refs() + target_features = {feature.identifier for feature in target_feature_refs} + targets = set() if pred_view.classification_targets: - target_features = { - f'{feature.estimating.location.identifier}:{feature.estimating.name}' - for feature in pred_view.classification_targets - } targets = {feature.estimating.name for feature in pred_view.classification_targets} elif pred_view.regression_targets: - target_features = { - f'{feature.estimating.location.identifier}:{feature.estimating.name}' - for feature in pred_view.regression_targets - } targets = {feature.estimating.name for feature in pred_view.regression_targets} else: raise ValueError('Found no targets in the model') @@ -550,6 +797,63 @@ def features_for(self, entities: dict[str, list] | RetrivalJob) -> SupervisedJob target_columns=targets, ) + def predictions_for(self, entities: dict[str, list] | RetrivalJob) -> RetrivalJob: + """Loads the predictions and labels / ground truths for a model + + ```python + entities = { + "trip_id": ["ea6b8d5d-62fd-4664-a112-4889ebfcdf2b", ...], + "vendor_id": [2, ...], + } + preds = await store.model("taxi")\\ + .with_labels()\\ + .predictions_for(entities)\\ + .to_polars() + + print(preds.collect()) + >>> ┌──────────┬───────────┬────────────────────┬───────────────────────────────────┐ + >>> │ duration ┆ vendor_id ┆ predicted_duration ┆ trip_id │ + >>> │ --- ┆ --- ┆ --- ┆ --- │ + >>> │ i64 ┆ i32 ┆ i64 ┆ str │ + >>> ╞══════════╪═══════════╪════════════════════╪═══════════════════════════════════╡ + >>> │ 408 ┆ 2 ┆ 500 ┆ ea6b8d5d-62fd-4664-a112-4889ebfc… │ + >>> │ 280 ┆ 1 ┆ 292 ┆ 64c4c94f-2a85-406f-86e6-082f1f7a… │ + >>> │ 712 ┆ 4 ┆ 689 ┆ 3258461f-6113-4c5e-864b-75a0dee8… │ + >>> └──────────┴───────────┴────────────────────┴───────────────────────────────────┘ + ``` + + Args: + entities (dict[str, list] | RetrivalJob): A dictionary of entity names to lists of entity values + + Returns: + RetrivalJob: A object that will load the features and lables in your desired format + """ + + pred_view = self.model.predictions_view + if pred_view.source is None: + raise ValueError( + 'Model does not have a prediction source. ' + 'This can be set in the metadata for a model contract.' + ) + + request = pred_view.request(self.model.name) + + target_features = pred_view.labels_estimates_refs() + labels = pred_view.labels() + target_features = {feature.identifier for feature in target_features} + pred_features = {f'model:{self.model.name}:{feature.name}' for feature in labels} + request = self.store.requests_for(RawStringFeatureRequest(pred_features)) + target_request = self.store.requests_for( + RawStringFeatureRequest(target_features) + ).without_event_timestamp(name_sufix='target') + + total_request = FeatureRequest( + FeatureLocation.model(self.model.name), + request.features_to_include.union(target_request.features_to_include), + request.needed_requests + target_request.needed_requests, + ) + return self.store.features_for_request(total_request, entities, total_request.features_to_include) + @dataclass class FeatureViewStore: @@ -558,12 +862,16 @@ class FeatureViewStore: view: CompiledFeatureView event_triggers: set[EventTrigger] = field(default_factory=set) feature_filter: set[str] | None = field(default=None) - only_write_model_features: bool = field(default=False) + + @property + def name(self) -> str: + return self.view.name @property def request(self) -> RetrivalRequest: - if self.only_write_model_features: + if self.feature_filter is not None: features_in_models = self.store.model_features_for(self.view.name) + logger.info(f'Only processing model features: {features_in_models}') return self.view.request_for(features_in_models).needed_requests[0] else: return self.view.request_all.needed_requests[0] @@ -572,7 +880,7 @@ def request(self) -> RetrivalRequest: def source(self) -> FeatureSource: return self.store.feature_source - def with_optimised_write(self) -> 'FeatureViewStore': + def with_optimised_write(self) -> FeatureViewStore: features_in_models = self.store.model_features_for(self.view.name) return self.select(features_in_models) @@ -631,7 +939,8 @@ def features_for(self, entities: dict[str, list] | RetrivalJob) -> RetrivalJob: else: return job - def select(self, features: set[str]) -> 'FeatureViewStore': + def select(self, features: set[str]) -> FeatureViewStore: + logger.info(f'Selecting features {features}') return FeatureViewStore(self.store, self.view, self.event_triggers, features) @property @@ -646,10 +955,13 @@ def write_input(self) -> set[str]: async def write(self, values: dict[str, list[Any]]) -> None: from aligned import FileSource - from aligned.data_file import DataFileReference from aligned.schemas.derivied_feature import AggregateOver request = self.view.request_all.needed_requests[0] + if self.feature_filter is not None: + logger.info(f'Filtering features to {self.feature_filter}') + request = self.view.request_for(self.feature_filter) + job = ( RetrivalJob.from_dict(values, request) .validate_entites() @@ -674,6 +986,11 @@ async def write(self, values: dict[str, list[Any]]) -> None: job = StreamAggregationJob(job, checkpoints) + job = job.derive_features([request]) + + if self.feature_filter: + job = job.filter(self.feature_filter) + await self.batch_write(job) def process_input(self, values: dict[str, list[Any]]) -> RetrivalJob: @@ -710,14 +1027,16 @@ async def batch_write(self, values: dict[str, list[Any]] | RetrivalJob) -> None: else: raise ValueError(f'values must be a dict or a RetrivalJob, was {type(values)}') - job = ( - core_job.derive_features([request]) - .listen_to_events(self.event_triggers) - .update_vector_index(self.view.indexes) - ) + # job = ( + # core_job.derive_features([request]) + # .listen_to_events(self.event_triggers) + # .update_vector_index(self.view.indexes) + # ) + job = core_job - if self.feature_filter: - logger.info(f'Only writing features used by models: {self.feature_filter}') - job = job.filter(self.feature_filter) + # if self.feature_filter: + # logger.info(f'Only writing features used by models: {self.feature_filter}') + # job = job.filter(self.feature_filter) - await self.source.write(job, job.retrival_requests) + with feature_view_write_time.labels(self.view.name).time(): + await self.source.write(job, job.retrival_requests) diff --git a/aligned/feature_view/feature_view.py b/aligned/feature_view/feature_view.py index d0f71cd..617b2be 100644 --- a/aligned/feature_view/feature_view.py +++ b/aligned/feature_view/feature_view.py @@ -17,6 +17,7 @@ from aligned.schemas.feature_view import CompiledFeatureView if TYPE_CHECKING: + from aligned.compiler.feature_factory import FeatureFactory from aligned.feature_store import FeatureViewStore # Enables code compleation in the select method @@ -29,6 +30,8 @@ class FeatureViewMetadata: description: str batch_source: BatchDataSource stream_source: StreamDataSource | None = field(default=None) + application_source: BatchDataSource | None = field(default=None) + staging_source: BatchDataSource | None = field(default=None) contacts: list[str] | None = field(default=None) tags: dict[str, str] = field(default_factory=dict) @@ -40,6 +43,8 @@ def from_compiled(view: CompiledFeatureView) -> FeatureViewMetadata: tags=view.tags, batch_source=view.batch_data_source, stream_source=view.stream_data_source, + application_source=view.application_source, + staging_source=view.staging_source, ) @@ -52,7 +57,7 @@ class FeatureView(ABC): @abstractproperty def metadata(self) -> FeatureViewMetadata: - pass + raise NotImplementedError() @staticmethod def metadata_with( @@ -60,6 +65,8 @@ def metadata_with( description: str, batch_source: BatchDataSource, stream_source: StreamDataSource | None = None, + application_source: BatchDataSource | None = None, + staging_source: BatchDataSource | None = None, contacts: list[str] | None = None, tags: dict[str, str] | None = None, ) -> FeatureViewMetadata: @@ -70,19 +77,24 @@ def metadata_with( description, batch_source, stream_source or HttpStreamSource(name), + application_source=application_source, + staging_source=staging_source, contacts=contacts, tags=tags or {}, ) @classmethod def compile(cls) -> CompiledFeatureView: + return cls().compile_instance() + + def compile_instance(self) -> CompiledFeatureView: from aligned.compiler.feature_factory import FeatureFactory # Used to deterministicly init names for hidden features hidden_features = 0 - metadata = cls().metadata - var_names = [name for name in cls().__dir__() if not name.startswith('_')] + metadata = self.metadata + var_names = [name for name in self.__dir__() if not name.startswith('_')] view = CompiledFeatureView( name=metadata.name, @@ -95,12 +107,13 @@ def compile(cls) -> CompiledFeatureView: aggregated_features=set(), event_timestamp=None, stream_data_source=metadata.stream_source, + application_source=metadata.application_source, indexes=[], ) aggregations: list[FeatureFactory] = [] for var_name in var_names: - feature = getattr(cls, var_name) + feature = getattr(self, var_name) if not isinstance(feature, FeatureFactory): continue @@ -175,7 +188,7 @@ def sort_key(x: tuple[int, FeatureFactory]) -> int: raise Exception( 'Can only have one EventTimestamp for each' ' FeatureViewDefinition. Check that this is the case for' - f' {cls.__name__}' + f' {type(self).__name__}' ) view.features.add(compiled_feature) view.event_timestamp = feature.event_timestamp() @@ -221,6 +234,26 @@ def sort_key(x: tuple[int, FeatureFactory]) -> int: @classmethod def query(cls) -> FeatureViewStore: + """Makes it possible to query the feature view for features + + ```python + class SomeView(FeatureView): + + metadata = ... + + id = Int32().as_entity() + + a = Int32() + b = Int32() + + data = await SomeView.query().features_for({ + "id": [1, 2, 3], + }).to_pandas() + ``` + + Returns: + FeatureViewStore: Returns a queryable `FeatureViewStore` containing the feature view + """ from aligned import FeatureStore self = cls() @@ -229,6 +262,71 @@ def query(cls) -> FeatureViewStore: return store.feature_view(self.metadata.name) @classmethod - async def process(cls, data: list[dict] | dict[str, Any]) -> list[dict]: + async def process(cls, data: dict[str, list[Any]]) -> list[dict]: df = await cls.query().process_input(data).to_polars() return df.collect().to_dicts() + + @staticmethod + def feature_view_code_template( + schema: dict[str, FeatureFactory], batch_source_code: str, view_name: str, imports: str | None = None + ) -> str: + """Setup the code needed to represent the data source as a feature view + + ```python + + source = FileSource.parquet_at("file.parquet") + schema = await source.schema() + FeatureView.feature_view_code_template(schema, batch_source_code=f"{source}", view_name="my_view") + + >>> \"\"\"from aligned import FeatureView, String, Int64, Float + + class MyView(FeatureView): + + metadata = FeatureView.metadata_with( + name="titanic", + description="some description", + batch_source=FileSource.parquest("my_path.parquet") + stream_source=None, + ) + + Passenger_id = Int64() + Survived = Int64() + Pclass = Int64() + Name = String() + Sex = String() + Age = Float() + Sibsp = Int64() + Parch = Int64() + Ticket = String() + Fare = Float() + Cabin = String() + Embarked = String()\"\"\" + ``` + + Returns: + str: The code needed to setup a basic feature view + """ + data_types: set[str] = set() + feature_code = '' + for name, dtype in schema.items(): + type_name = dtype.__class__.__name__ + data_types.add(type_name) + feature_code += f'{name} = {type_name}()\n ' + + all_types = ', '.join(data_types) + + return f""" +from aligned import FeatureView, {all_types} +{imports or ""} + +class MyView(FeatureView): + + metadata = FeatureView.metadata_with( + name="{view_name}", + description="some description", + batch_source={batch_source_code} + stream_source=None, + ) + + {feature_code} + """ diff --git a/aligned/feature_view/tests/test_combined_view.py b/aligned/feature_view/tests/test_combined_view.py index 44135b4..2049e6b 100644 --- a/aligned/feature_view/tests/test_combined_view.py +++ b/aligned/feature_view/tests/test_combined_view.py @@ -1,28 +1,25 @@ import pytest from aligned import FeatureStore -from aligned.exceptions import CombinedFeatureViewQuerying - - -def test_combined_view_error(combined_feature_store: FeatureStore) -> None: - - with pytest.raises(CombinedFeatureViewQuerying): - combined_feature_store.feature_view('combined') @pytest.mark.asyncio async def test_combined_view(combined_feature_store: FeatureStore) -> None: entities = {'passenger_id': [1, 2, 3, 4, None], 'scan_id': [842302, 84300903, 843786, None, 842301]} - result = await combined_feature_store.features_for( + result_job = combined_feature_store.features_for( entities, features=[ 'combined:some_feature', 'combined:other_feature', ], - ).to_pandas() + ) + result = await result_job.log_each_job().to_pandas() - assert result.shape == (len(entities['passenger_id']), 5) + assert 'some_feature' in result.columns + assert 'other_feature' in result.columns + + assert result.shape == (len(entities['passenger_id']), 4) assert result.isna().sum().sum() == 4 + 2 @@ -32,5 +29,8 @@ async def test_combined_view_get_all_features(combined_feature_store: FeatureSto entities = {'passenger_id': [1, 2, 3, 4, None], 'scan_id': [842302, 84300903, 843786, None, 842301]} result = await combined_feature_store.features_for(entities, features=['combined:*']).to_pandas() - assert result.shape == (len(entities['passenger_id']), 5) + assert 'some_feature' in result.columns + assert 'other_feature' in result.columns + + assert result.shape == (len(entities['passenger_id']), 4) assert result.isna().sum().sum() == 4 + 2 diff --git a/aligned/local/job.py b/aligned/local/job.py index c289ef5..ef02e78 100644 --- a/aligned/local/job.py +++ b/aligned/local/job.py @@ -4,7 +4,7 @@ import pandas as pd import polars as pl -from aligned.request.retrival_request import RetrivalRequest +from aligned.request.retrival_request import AggregatedFeature, AggregateOver, RetrivalRequest from aligned.retrival_job import DateRangeJob, FactualRetrivalJob, FullExtractJob, RequestResult, RetrivalJob from aligned.schemas.feature import Feature from aligned.sources.local import DataFileReference @@ -167,6 +167,39 @@ class FileFactualJob(FactualRetrivalJob): def request_result(self) -> RequestResult: return RequestResult.from_request_list(self.requests) + def describe(self) -> str: + return f'Reading file at {self.source}' + + async def aggregate_over( + self, + group: AggregateOver, + features: set[AggregatedFeature], + df: pl.LazyFrame, + event_timestamp_col: str, + ) -> pl.LazyFrame: + + subset = df + if group.condition: + raise NotImplementedError('Condition aggregation not implemented for file data source') + + if group.window: + event_timestamp = group.window.time_column.name + end = pl.col(event_timestamp_col) + start = end - group.window.time_window + subset = subset.filter(pl.col(event_timestamp).is_between(start, end)) + + transformations = [] + for feature in features: + expr = await feature.derived_feature.transformation.transform_polars( + subset, feature.derived_feature.name + ) + if isinstance(expr, pl.Expr): + transformations.append(expr.alias(feature.name)) + else: + raise NotImplementedError('Only expressions are supported for file data source') + + return subset.groupby('row_id').agg(transformations) + async def file_transformations(self, df: pl.LazyFrame) -> pl.LazyFrame: """Selects only the wanted subset from the loaded source @@ -187,7 +220,11 @@ async def file_transformations(self, df: pl.LazyFrame) -> pl.LazyFrame: all_features.update(request.all_required_features) result = await self.facts.to_polars() - event_timestamp_col = 'event_timestamp' + event_timestamp_col = 'aligned_event_timestamp' + using_event_timestamp = False + if 'event_timestamp' in result.columns: + using_event_timestamp = True + result = result.rename({'event_timestamp': event_timestamp_col}) row_id_name = 'row_id' result = result.with_row_count(row_id_name) @@ -218,7 +255,7 @@ async def file_transformations(self, df: pl.LazyFrame) -> pl.LazyFrame: column_selects = list(entity_names.union({'row_id'})) if request.event_timestamp: - column_selects.append('event_timestamp') + column_selects.append(event_timestamp_col) # Need to only select the relevent entities and row_id # Otherwise will we get a duplicate column error @@ -228,6 +265,10 @@ async def file_transformations(self, df: pl.LazyFrame) -> pl.LazyFrame: ) new_result = new_result.select(pl.exclude(list(entity_names))) + for group, features in request.aggregate_over().items(): + aggregated_df = await self.aggregate_over(group, features, new_result, event_timestamp_col) + new_result = new_result.join(aggregated_df, on='row_id', how='left') + if request.event_timestamp: field = request.event_timestamp.name ttl = request.event_timestamp.ttl @@ -245,12 +286,15 @@ async def file_transformations(self, df: pl.LazyFrame) -> pl.LazyFrame: new_result = new_result.filter( pl.col(field).is_null() | (pl.col(field) <= pl.col(event_timestamp_col)) ) - new_result = new_result.select(pl.exclude(field)) + new_result = new_result.sort(field, descending=True).select(pl.exclude(field)) unique = new_result.unique(subset=row_id_name, keep='first') result = result.join(unique, on=row_id_name, how='left') result = result.select(pl.exclude('.*_right$')) + if using_event_timestamp: + result = result.rename({event_timestamp_col: 'event_timestamp'}) + return result.select([pl.exclude('row_id')]) async def to_pandas(self) -> pd.DataFrame: diff --git a/aligned/psql/jobs.py b/aligned/psql/jobs.py index 1fd4e10..5d23801 100644 --- a/aligned/psql/jobs.py +++ b/aligned/psql/jobs.py @@ -8,7 +8,7 @@ import polars as pl from aligned.request.retrival_request import RequestResult, RetrivalRequest -from aligned.retrival_job import DateRangeJob, FactualRetrivalJob, FullExtractJob, RetrivalJob +from aligned.retrival_job import FactualRetrivalJob, RetrivalJob from aligned.schemas.derivied_feature import AggregatedFeature, AggregateOver from aligned.schemas.feature import FeatureLocation, FeatureType from aligned.schemas.transformation import PsqlTransformation @@ -29,19 +29,23 @@ class SqlColumn: @property def sql_select(self) -> str: - selection = self.selection - # if not special operation e.g function. Then wrap in quotes - if not ('(' in selection or '-' in selection or '.' in selection or selection == '*'): - selection = f'"{self.selection}"' - - if self.selection == self.alias: - return f'{selection}' - return f'{selection} AS "{self.alias}"' + return psql_select_column(self) def __hash__(self) -> int: return hash(self.sql_select) +def psql_select_column(column: SqlColumn) -> str: + selection = column.selection + # if not special operation e.g function. Then wrap in quotes + if not ('(' in selection or '-' in selection or '.' in selection or ' ' in selection or selection == '*'): + selection = f'"{column.selection}"' + + if column.selection == column.alias: + return f'{selection}' + return f'{selection} AS "{column.alias}"' + + @dataclass class SqlJoin: table: str @@ -60,33 +64,37 @@ class TableFetch: order_by: str | None = field(default=None) def sql_query(self, distinct: str | None = None) -> str: - # Select the core features - wheres = '' - order_by = '' - group_by = '' - select = 'SELECT' + return psql_table_fetch(self, distinct) - if distinct: - select = f'SELECT DISTINCT ON ({distinct})' - if self.conditions: - wheres = 'WHERE ' + ' AND '.join(self.conditions) +def psql_table_fetch(fetch: TableFetch, distinct: str | None = None) -> str: + # Select the core features + wheres = '' + order_by = '' + group_by = '' + select = 'SELECT' - if self.order_by: - order_by = 'ORDER BY ' + self.order_by + if distinct: + select = f'SELECT DISTINCT ON ({distinct})' - if self.group_by: - group_by = 'GROUP BY ' + ', '.join(self.group_by) + if fetch.conditions: + wheres = 'WHERE ' + ' AND '.join(fetch.conditions) - table_columns = [col.sql_select for col in self.columns] + if fetch.order_by: + order_by = 'ORDER BY ' + fetch.order_by - if isinstance(self.table, TableFetch): - from_sql = f'FROM ({self.table.sql_query()}) as entities' - else: - from_sql = f"""FROM entities - LEFT JOIN "{ self.table }" ta ON { ' AND '.join(self.joins) }""" + if fetch.group_by: + group_by = 'GROUP BY ' + ', '.join(fetch.group_by) - return f""" + table_columns = [col.sql_select for col in fetch.columns] + + if isinstance(fetch.table, TableFetch): + from_sql = f'FROM ({fetch.table.sql_query()}) as entities' + else: + from_sql = f"""FROM entities +LEFT JOIN "{ fetch.table }" ta ON { ' AND '.join(fetch.joins) }""" + + return f""" { select } { ', '.join(table_columns) } { from_sql } { wheres } @@ -108,13 +116,20 @@ def request_result(self) -> RequestResult: def retrival_requests(self) -> list[RetrivalRequest]: return self.requests + def will_load_list_feature(self) -> bool: + for request in self.requests: + for feature in request.all_features: + if feature.dtype == FeatureType('').array: + return True + return False + async def to_pandas(self) -> pd.DataFrame: df = await self.to_polars() return df.collect().to_pandas() async def to_polars(self) -> pl.LazyFrame: try: - return pl.read_sql(self.query, self.config.url).lazy() + return pl.read_database(self.query, self.config.url).lazy() except Exception as e: logger.error(f'Error running query: {self.query}') logger.error(f'Error: {e}') @@ -124,109 +139,54 @@ def describe(self) -> str: return f'PostgreSQL Job: \n{self.query}\n' -@dataclass -class FullExtractPsqlJob(FullExtractJob): - - source: PostgreSQLDataSource - request: RetrivalRequest - limit: int | None = None - - @property - def request_result(self) -> RequestResult: - return RequestResult.from_request(self.request) - - @property - def retrival_requests(self) -> list[RetrivalRequest]: - return [self.request] - - @property - def config(self) -> PostgreSQLConfig: - return self.source.config - - def describe(self) -> str: - return self.psql_job().describe() - - async def to_pandas(self) -> pd.DataFrame: - return await self.psql_job().to_pandas() - - async def to_polars(self) -> pl.LazyFrame: - return await self.psql_job().to_polars() - - def psql_job(self) -> PostgreSqlJob: - return PostgreSqlJob(self.config, self.build_request()) - - def build_request(self) -> str: - - all_features = [ - feature.name for feature in list(self.request.all_required_features.union(self.request.entities)) - ] - sql_columns = self.source.feature_identifier_for(all_features) - columns = [ - f'"{sql_col}" AS {alias}' if sql_col != alias else sql_col - for sql_col, alias in zip(sql_columns, all_features) - ] - column_select = ', '.join(columns) - schema = f'{self.config.schema}.' if self.config.schema else '' - - limit_query = '' - if self.limit: - limit_query = f'LIMIT {int(self.limit)}' - - return f'SELECT {column_select} FROM {schema}"{self.source.table}" {limit_query}' - - -@dataclass -class DateRangePsqlJob(DateRangeJob): - - source: PostgreSQLDataSource - start_date: datetime - end_date: datetime - request: RetrivalRequest - - @property - def request_result(self) -> RequestResult: - return RequestResult.from_request(self.request) - - @property - def retrival_requests(self) -> list[RetrivalRequest]: - return [self.request] - - @property - def config(self) -> PostgreSQLConfig: - return self.source.config - - async def to_pandas(self) -> pd.DataFrame: - return await self.psql_job().to_pandas() - - async def to_polars(self) -> pl.LazyFrame: - return await self.psql_job().to_polars() - - def psql_job(self) -> PostgreSqlJob: - return PostgreSqlJob(self.config, self.build_request()) - - def build_request(self) -> str: - - if not self.request.event_timestamp: - raise ValueError('Event timestamp is needed in order to run a data range job') - - event_timestamp_column = self.source.feature_identifier_for([self.request.event_timestamp.name])[0] - all_features = [ - feature.name for feature in list(self.request.all_required_features.union(self.request.entities)) - ] - sql_columns = self.source.feature_identifier_for(all_features) - columns = [ - f'"{sql_col}" AS {alias}' if sql_col != alias else sql_col - for sql_col, alias in zip(sql_columns, all_features) - ] - column_select = ', '.join(columns) - schema = f'{self.config.schema}.' if self.config.schema else '' - start_date = self.start_date.strftime('%Y-%m-%d %H:%M:%S') - end_date = self.end_date.strftime('%Y-%m-%d %H:%M:%S') - - return ( - f'SELECT {column_select} FROM {schema}"{self.source.table}" WHERE' - f' {event_timestamp_column} BETWEEN \'{start_date}\' AND \'{end_date}\'' - ) +def build_full_select_query_psql( + source: PostgreSQLDataSource, request: RetrivalRequest, limit: int | None +) -> str: + """ + Generates the SQL query needed to select all features related to a psql data source + """ + all_features = [feature.name for feature in list(request.all_required_features.union(request.entities))] + sql_columns = source.feature_identifier_for(all_features) + columns = [ + f'"{sql_col}" AS {alias}' if sql_col != alias else sql_col + for sql_col, alias in zip(sql_columns, all_features) + ] + column_select = ', '.join(columns) + + config = source.config + schema = f'{config.schema}.' if config.schema else '' + + limit_query = '' + if limit: + limit_query = f'LIMIT {int(limit)}' + + return f'SELECT {column_select} FROM {schema}"{source.table}" {limit_query}' + + +def build_date_range_query_psql( + source: PostgreSQLDataSource, request: RetrivalRequest, start_date: datetime, end_date: datetime +) -> str: + if not request.event_timestamp: + raise ValueError('Event timestamp is needed in order to run a data range job') + + event_timestamp_column = source.feature_identifier_for([request.event_timestamp.name])[0] + all_features = [feature.name for feature in list(request.all_required_features.union(request.entities))] + sql_columns = source.feature_identifier_for(all_features) + columns = [ + f'"{sql_col}" AS {alias}' if sql_col != alias else sql_col + for sql_col, alias in zip(sql_columns, all_features) + ] + column_select = ', '.join(columns) + + config = source.config + schema = f'{config.schema}.' if config.schema else '' + start_date_str = start_date.strftime('%Y-%m-%d %H:%M:%S') + end_date_str = end_date.strftime('%Y-%m-%d %H:%M:%S') + + return ( + f'SELECT {column_select} FROM {schema}"{source.table}" WHERE' + f' {event_timestamp_column} BETWEEN \'{start_date_str}\' AND \'{end_date_str}\'' + ) @dataclass @@ -269,9 +229,14 @@ def config(self) -> PostgreSQLConfig: return list(self.sources.values())[0].config def describe(self) -> str: + from aligned.retrival_job import LiteralDictJob + if isinstance(self.facts, PostgreSqlJob): psql_job = self.build_sql_entity_query(self.facts) return f'Loading features for {self.facts.describe()}\n\nQuery: {psql_job}' + elif isinstance(self.facts, LiteralDictJob): + psql_job = self.build_request_from_facts(pl.DataFrame(self.facts.data).lazy()) + return f'Loading features from dicts \n\nQuery: {psql_job}' else: return f'Loading features from {self.facts.describe()}, and its related features' @@ -285,9 +250,14 @@ async def to_polars(self) -> pl.LazyFrame: async def psql_job(self) -> PostgreSqlJob: if isinstance(self.facts, PostgreSqlJob): - return PostgreSqlJob(self.config, self.build_sql_entity_query(self.facts)) + return PostgreSqlJob(self.config, self.build_sql_entity_query(self.facts), self.retrival_requests) entities = await self.build_request() - return PostgreSqlJob(self.config, entities) + return PostgreSqlJob(self.config, entities, self.retrival_requests) + + def ignore_event_timestamp(self) -> RetrivalJob: + return FactPsqlJob( + self.sources, [request.without_event_timestamp() for request in self.requests], self.facts + ) def dtype_to_sql_type(self, dtype: object) -> str: if isinstance(dtype, str): @@ -307,7 +277,9 @@ def value_selection(self, request: RetrivalRequest, entities_has_event_timestamp source = self.sources[request.location] entity_selects = {f'entities.{entity}' for entity in request.entity_names} - field_selects = request.all_required_feature_names.union(entity_selects).union({'entities.row_id'}) + field_selects = list( + request.all_required_feature_names.union(entity_selects).union({'entities.row_id'}) + ) field_identifiers = source.feature_identifier_for(field_selects) selects = { SqlColumn(db_field_name, feature) @@ -319,10 +291,12 @@ def value_selection(self, request: RetrivalRequest, entities_has_event_timestamp sort_query = 'entities.row_id' event_timestamp_clause: str | None = None - if request.event_timestamp and entities_has_event_timestamp: - event_timestamp_column = source.feature_identifier_for([request.event_timestamp.name])[0] - event_timestamp_clause = f'entities.event_timestamp >= ta.{event_timestamp_column}' - sort_query += f', {event_timestamp_column} DESC' + if request.event_timestamp_request and entities_has_event_timestamp: + timestamp = request.event_timestamp_request.event_timestamp + entity_column = request.event_timestamp_request.entity_column + event_timestamp_column = source.feature_identifier_for([timestamp.name])[0] + event_timestamp_clause = f'entities.{entity_column} >= ta.{event_timestamp_column}' + sort_query += f', ta.{event_timestamp_column} DESC' join_conditions = [ f'ta."{entity_db_name}" = entities.{entity}' @@ -391,22 +365,24 @@ def sql_aggregated_request( for feature in features } - id_column = window.group_by[0].name + id_column = 'row_id' + # id_column = window.group_by[0].name event_timestamp_clause: str | None = None - if request.event_timestamp: - id_column = 'row_id' + if request.event_timestamp_request: + timestamp = request.event_timestamp_request.event_timestamp + entity_column = request.event_timestamp_request.entity_column group_by_names = {id_column} # Use row_id as the main join key - event_timestamp_name = source.feature_identifier_for([request.event_timestamp.name])[0] + event_timestamp_name = source.feature_identifier_for([timestamp.name])[0] if window.window: time_window_config = window.window window_in_seconds = int(time_window_config.time_window.total_seconds()) event_timestamp_clause = ( - f'ta.{event_timestamp_name} BETWEEN entities.event_timestamp' - f" - interval '{window_in_seconds} seconds' AND entities.event_timestamp" + f'ta.{event_timestamp_name} BETWEEN entities.{entity_column}' + f" - interval '{window_in_seconds} seconds' AND entities.{entity_column}" ) else: - event_timestamp_clause = f'ta.{event_timestamp_name} <= entities.event_timestamp' + event_timestamp_clause = f'ta.{event_timestamp_name} <= entities.{entity_column}' entities = list(request.entity_names) entity_db_name = source.feature_identifier_for(entities) @@ -467,13 +443,7 @@ def sql_aggregated_request( def aggregated_values_from_request(self, request: RetrivalRequest) -> list[TableFetch]: - aggregation_windows: dict[AggregateOver, set[AggregatedFeature]] = {} - - for aggregate in request.aggregated_features: - if aggregate.aggregate_over not in aggregation_windows: - aggregation_windows[aggregate.aggregate_over] = {aggregate} - else: - aggregation_windows[aggregate.aggregate_over].add(aggregate) + aggregation_windows = request.aggregate_over() fetches: list[TableFetch] = [] supported_aggregation_features = set(request.feature_names).union(request.entity_names) @@ -500,71 +470,71 @@ def aggregated_values_from_request(self, request: RetrivalRequest) -> list[Table return fetches async def build_request(self) -> str: - import numpy as np + facts = await self.facts.to_polars() + return self.build_request_from_facts(facts) + + def build_request_from_facts(self, facts: pl.LazyFrame) -> str: final_select_names: set[str] = set() - entity_types: dict[str, FeatureType] = {} + all_entities = {'row_id'} + entity_types: dict[str, FeatureType] = {'row_id': FeatureType('').int64} has_event_timestamp = False for request in self.requests: - final_select_names = final_select_names.union( - {f'{request.location.name}_cte.{feature}' for feature in request.all_required_feature_names} - ) final_select_names = final_select_names.union( {f'entities.{entity}' for entity in request.entity_names} ) for entity in request.entities: entity_types[entity.name] = entity.dtype - if request.event_timestamp: + all_entities.add(entity.name) + + if request.event_timestamp_request: + entity_column = request.event_timestamp_request.entity_column has_event_timestamp = True + entity_types[entity_column] = FeatureType('').datetime + all_entities.add(entity_column) + final_select_names.add(f'entities.{entity_column}') - if has_event_timestamp: - final_select_names.add('event_timestamp') - entity_types['event_timestamp'] = FeatureType('').datetime + all_entities_list = list(all_entities) # Need to replace nan as it will not be encoded - fact_df = await self.facts.to_pandas() - fact_df = fact_df.replace(np.nan, None) - - number_of_values = fact_df.shape[0] - # + 1 is needed as 0 is evaluated for null - fact_df['row_id'] = list(range(1, number_of_values + 1)) + fact_df = facts.with_row_count(name='row_id', offset=1).collect() - entity_type_list = [ - self.dtype_to_sql_type(entity_types.get(entity, FeatureType('').int32)) - for entity in fact_df.columns - ] + entity_type_list = { + entity: self.dtype_to_sql_type(entity_types.get(entity, FeatureType('').int32)) + for entity in all_entities + } query_values: list[list[SqlValue]] = [] - all_entities = [] - for values in fact_df.values: + for values in fact_df[all_entities_list].to_dicts(): row_placeholders = [] - for column_index, value in enumerate(values): - row_placeholders.append(SqlValue(value, entity_type_list[column_index])) - if fact_df.columns[column_index] not in all_entities: - all_entities.append(fact_df.columns[column_index]) - query_values.append(row_placeholders) + for key, value in values.items(): + row_placeholders.append(SqlValue(value, entity_type_list[key])) - feature_view_names: list[str] = [location.name for location in self.sources.keys()] + query_values.append(row_placeholders) # Add the joins to the fact tables: list[TableFetch] = [] aggregates: list[TableFetch] = [] for request in self.requests: - fetch = self.value_selection(request, has_event_timestamp) - tables.append(fetch) - aggregate_fetches = self.aggregated_values_from_request(request) - aggregates.extend(aggregate_fetches) - for aggregate in aggregate_fetches: + all_entities.update(request.entity_names) + + if request.aggregated_features: + aggregate_fetches = self.aggregated_values_from_request(request) + aggregates.extend(aggregate_fetches) + for aggregate in aggregate_fetches: + final_select_names = final_select_names.union( + {column.alias for column in aggregate.columns if column.alias != 'entities.row_id'} + ) + else: + fetch = self.value_selection(request, has_event_timestamp) + tables.append(fetch) final_select_names = final_select_names.union( - {column.alias for column in aggregate.columns if column.alias != aggregate.id_column} + {f'{fetch.name}.{feature}' for feature in request.all_required_feature_names} ) joins = '\n '.join( - [ - f'LEFT JOIN {feature_view}_cte ON {feature_view}_cte.row_id = entities.row_id' - for feature_view in feature_view_names - ] + [f'LEFT JOIN {table.name} ON {table.name}.row_id = entities.row_id' for table in tables] ) if aggregates: joins += '\n ' @@ -578,13 +548,91 @@ async def build_request(self) -> str: entity_values = self.build_entities_from_values(query_values) return self.generate_query( - entity_columns=list(all_entities), + entity_columns=all_entities_list, entity_query=entity_values, tables=tables, aggregates=aggregates, final_select=list(final_select_names), final_joins=joins, ) + # import numpy as np + + # final_select_names: set[str] = set() + # entity_types: dict[str, FeatureType] = {} + # has_event_timestamp = False + + # for request in self.requests: + # final_select_names = final_select_names.union( + # {f'{request.name}_cte.{feature}' for feature in request.all_required_feature_names} + # ) + # final_select_names = final_select_names.union( + # {f'entities.{entity}' for entity in request.entity_names} + # ) + # for entity in request.entities: + # entity_types[entity.name] = entity.dtype + # if request.event_timestamp: + # has_event_timestamp = True + + # if has_event_timestamp: + # final_select_names.add('event_timestamp') + # entity_types['event_timestamp'] = FeatureType('').datetime + + # entities = list(entity_types.keys()) + ['row_id'] + + # # Need to replace nan as it will not be encoded + # fact_df = (await self.facts.to_polars()).with_row_count(name='row_id').collect() + + # entity_type_list = { + # entity: self.dtype_to_sql_type(entity_types.get(entity, FeatureType('').int32)) + # for entity in entities + # } + + # query_values: list[list[SqlValue]] = [] + # all_entities = entities + # for values in fact_df[entities].to_dicts(): + # row_placeholders = [] + # for key, value in values.items(): + # row_placeholders.append(SqlValue(value, entity_type_list[key])) + + # query_values.append(row_placeholders) + + # feature_view_names: list[str] = [location.name for location in self.sources.keys()] + # # Add the joins to the fact + + # tables: list[TableFetch] = [] + # aggregates: list[TableFetch] = [] + # for request in self.requests: + # fetch = self.value_selection(request, has_event_timestamp) + # tables.append(fetch) + # aggregate_fetches = self.aggregated_values_from_request(request) + # aggregates.extend(aggregate_fetches) + # for aggregate in aggregate_fetches: + # final_select_names = final_select_names.union( + # {column.alias for column in aggregate.columns if column.alias != aggregate.id_column} + # ) + + # joins = '\n '.join( + # [f'LEFT JOIN {table.name} ON {table.name}.row_id = entities.row_id' for table in tables] + # ) + # if aggregates: + # joins += '\n ' + # joins += '\n '.join( + # [ + # f'LEFT JOIN {table.name} ON {table.name}.{table.id_column} = entities.{table.id_column}' + # for table in aggregates + # ] + # ) + + # entity_values = self.build_entities_from_values(query_values) + + # return self.generate_query( + # entity_columns=list(all_entities), + # entity_query=entity_values, + # tables=tables, + # aggregates=aggregates, + # final_select=list(final_select_names), + # final_joins=joins, + # ) def build_entities_from_values(self, values: list[list[SqlValue]]) -> str: query = 'VALUES ' @@ -603,17 +651,17 @@ def build_sql_entity_query(self, sql_facts: PostgreSqlJob) -> str: has_event_timestamp = False all_entities = set() - if 'event_timestamp' in sql_facts.query: - has_event_timestamp = True - all_entities.add('event_timestamp') - for request in self.requests: final_select_names = final_select_names.union( {f'entities.{entity}' for entity in request.entity_names} ) + if request.event_timestamp_request: + entity_column = request.event_timestamp_request.entity_column - if has_event_timestamp: - final_select_names.add('event_timestamp') + if entity_column in sql_facts.query: + has_event_timestamp = True + all_entities.add(entity_column) + final_select_names.add(f'entities.{entity_column}') # Add the joins to the fact diff --git a/aligned/redshift/jobs.py b/aligned/redshift/jobs.py index 8a3f4a2..94dfc97 100644 --- a/aligned/redshift/jobs.py +++ b/aligned/redshift/jobs.py @@ -2,212 +2,22 @@ import logging from dataclasses import dataclass, field -from datetime import datetime import pandas as pd import polars as pl from aligned.psql.jobs import PostgreSqlJob +from aligned.redshift.sql_job import SqlColumn, TableFetch from aligned.request.retrival_request import RequestResult, RetrivalRequest -from aligned.retrival_job import DateRangeJob, FactualRetrivalJob, FullExtractJob, RetrivalJob +from aligned.retrival_job import FactualRetrivalJob, RetrivalJob from aligned.schemas.derivied_feature import AggregatedFeature, AggregateOver, DerivedFeature from aligned.schemas.feature import FeatureLocation, FeatureType from aligned.schemas.transformation import RedshiftTransformation -from aligned.sources.psql import PostgreSQLConfig, PostgreSQLDataSource +from aligned.sources.redshift import RedshiftSQLConfig, RedshiftSQLDataSource logger = logging.getLogger(__name__) -@dataclass -class SQLQuery: - sql: str - - -@dataclass -class SqlColumn: - selection: str - alias: str - - @property - def sql_select(self) -> str: - if self.selection == self.alias: - return f'{self.selection}' - return f'{self.selection} AS "{self.alias}"' - - def __hash__(self) -> int: - return hash(self.sql_select) - - -@dataclass -class SqlJoin: - table: str - conditions: list[str] - - -@dataclass -class TableFetch: - name: str - id_column: str - table: str | TableFetch - columns: set[SqlColumn] - schema: str | None = field(default=None) - joins: list[str] = field(default_factory=list) - conditions: list[str] = field(default_factory=list) - group_by: list[str] = field(default_factory=list) - order_by: str | None = field(default=None) - - def sql_query(self, distinct: str | None = None) -> str: - # Select the core features - wheres = '' - order_by = '' - group_by = '' - select = 'SELECT' - - if self.conditions: - wheres = 'WHERE ' + ' AND '.join(self.conditions) - - if self.order_by: - order_by = 'ORDER BY ' + self.order_by - - if self.group_by: - group_by = 'GROUP BY ' + ', '.join(self.group_by) - - table_columns = [col.sql_select for col in self.columns] - - if isinstance(self.table, TableFetch): - from_sql = f'FROM ({self.table.sql_query()}) as entities' - else: - schema = f'{self.schema}.' if self.schema else '' - from_sql = f"""FROM entities - LEFT JOIN {schema}"{ self.table }" ta ON { ' AND '.join(self.joins) }""" - - if distinct: - aliases = [col.alias for col in self.columns] - return f""" - SELECT { ', '.join(aliases) } - FROM ( - { select } { ', '.join(table_columns) }, - ROW_NUMBER() OVER( - PARTITION BY entities.row_id - { order_by } - ) AS row_number - { from_sql } - { wheres } - { order_by } - { group_by } - ) AS entities - WHERE row_number = 1""" - else: - return f""" - { select } { ', '.join(table_columns) } - { from_sql } - { wheres } - { order_by } - { group_by }""" - - -@dataclass -class FullExtractPsqlJob(FullExtractJob): - - source: PostgreSQLDataSource - request: RetrivalRequest - limit: int | None = None - - @property - def request_result(self) -> RequestResult: - return RequestResult.from_request(self.request) - - @property - def retrival_requests(self) -> list[RetrivalRequest]: - return [self.request] - - @property - def config(self) -> PostgreSQLConfig: - return self.source.config - - async def to_pandas(self) -> pd.DataFrame: - return await self.psql_job().to_pandas() - - async def to_polars(self) -> pl.LazyFrame: - return await self.psql_job().to_polars() - - def psql_job(self) -> PostgreSqlJob: - return PostgreSqlJob(self.config, self.build_request()) - - def build_request(self) -> str: - - all_features = [ - feature.name for feature in list(self.request.all_required_features.union(self.request.entities)) - ] - sql_columns = self.source.feature_identifier_for(all_features) - columns = [ - f'"{sql_col}" AS {alias}' if sql_col != alias else sql_col - for sql_col, alias in zip(sql_columns, all_features) - ] - column_select = ', '.join(columns) - schema = f'{self.config.schema}.' if self.config.schema else '' - - limit_query = '' - if self.limit: - limit_query = f'LIMIT {int(self.limit)}' - - f'SELECT {column_select} FROM {schema}"{self.source.table}" {limit_query}', - - -@dataclass -class DateRangePsqlJob(DateRangeJob): - - source: PostgreSQLDataSource - start_date: datetime - end_date: datetime - request: RetrivalRequest - - @property - def request_result(self) -> RequestResult: - return RequestResult.from_request(self.request) - - @property - def retrival_requests(self) -> list[RetrivalRequest]: - return [self.request] - - @property - def config(self) -> PostgreSQLConfig: - return self.source.config - - async def to_pandas(self) -> pd.DataFrame: - return await self.psql_job().to_pandas() - - async def to_polars(self) -> pl.LazyFrame: - return await self.psql_job().to_polars() - - def psql_job(self) -> PostgreSqlJob: - return PostgreSqlJob(self.config, self.build_request()) - - def build_request(self) -> str: - - if not self.request.event_timestamp: - raise ValueError('Event timestamp is needed in order to run a data range job') - - event_timestamp_column = self.source.feature_identifier_for([self.request.event_timestamp.name])[0] - all_features = [ - feature.name for feature in list(self.request.all_required_features.union(self.request.entities)) - ] - sql_columns = self.source.feature_identifier_for(all_features) - columns = [ - f'"{sql_col}" AS {alias}' if sql_col != alias else sql_col - for sql_col, alias in zip(sql_columns, all_features) - ] - column_select = ', '.join(columns) - schema = f'{self.config.schema}.' if self.config.schema else '' - start_date = self.start_date.strftime('%Y-%m-%d %H:%M:%S') - end_date = self.end_date.strftime('%Y-%m-%d %H:%M:%S') - - return ( - f'SELECT {column_select} FROM {schema}"{self.source.table}" WHERE' - f' {event_timestamp_column} BETWEEN \'{start_date}\' AND \'{end_date}\'' - ) - - @dataclass class SqlValue: value: str | None @@ -231,7 +41,7 @@ class FactRedshiftJob(FactualRetrivalJob): NB: It is expected that the data sources are for the same psql instance """ - sources: dict[FeatureLocation, PostgreSQLDataSource] + sources: dict[FeatureLocation, RedshiftSQLDataSource] requests: list[RetrivalRequest] facts: RetrivalJob @@ -246,7 +56,7 @@ def retrival_requests(self) -> list[RetrivalRequest]: return self.requests @property - def config(self) -> PostgreSQLConfig: + def config(self) -> RedshiftSQLConfig: return list(self.sources.values())[0].config async def to_pandas(self) -> pd.DataFrame: @@ -259,12 +69,12 @@ async def to_polars(self) -> pl.LazyFrame: async def psql_job(self) -> PostgreSqlJob: if isinstance(self.facts, PostgreSqlJob): - return PostgreSqlJob(self.config, self.build_sql_entity_query(self.facts)) + return PostgreSqlJob(self.config.psql_config, self.build_sql_entity_query(self.facts)) raise ValueError(f'Redshift only support SQL entity queries. Got: {self.facts}') def describe(self) -> str: if isinstance(self.facts, PostgreSqlJob): - return PostgreSqlJob(self.config, self.build_sql_entity_query(self.facts)).describe() + return PostgreSqlJob(self.config.psql_config, self.build_sql_entity_query(self.facts)).describe() raise ValueError(f'Redshift only support SQL entity queries. Got: {self.facts}') def dtype_to_sql_type(self, dtype: object) -> str: @@ -285,13 +95,16 @@ def value_selection(self, request: RetrivalRequest, entities_has_event_timestamp source = self.sources[request.location] entity_selects = {f'{self.entity_table_name}.{entity}' for entity in request.entity_names} - field_selects = request.all_required_feature_names.union(entity_selects).union( - {f'{self.entity_table_name}.row_id'} + field_selects = list( + request.all_required_feature_names.union(entity_selects).union( + {f'{self.entity_table_name}.row_id'} + ) ) field_identifiers = source.feature_identifier_for(field_selects) selects = { SqlColumn(db_field_name, feature) for feature, db_field_name in zip(field_selects, field_identifiers) + if feature not in source.list_references } entities = list(request.entity_names) @@ -313,14 +126,38 @@ def value_selection(self, request: RetrivalRequest, entities_has_event_timestamp if event_timestamp_clause: join_conditions.append(event_timestamp_clause) + join_tables: list[tuple[TableFetch, str]] = [] + if source.list_references: + for feature_name, reference in source.list_references.items(): + if feature_name not in request.all_feature_names: + continue + selects.add(SqlColumn(feature_name, feature_name)) + table_id = f'{feature_name}_list' + source_id_column = reference.join_column or list(request.entity_names)[0] + column = f"'[\"' || listagg({reference.value_column}, '\",\"') || '\"]'" + join_table = TableFetch( + name=table_id, + id_column=source_id_column, + table=reference.table_name, + schema=reference.table_schema, + columns={ + SqlColumn(reference.id_column, reference.id_column), + SqlColumn(column, feature_name), + }, + group_by=[reference.id_column], + ) + table_join_condition = f'{table_id}.{reference.id_column} = ta.{source_id_column}' + join_tables.append((join_table, table_join_condition)) + rename_fetch = TableFetch( name=f'{request.name}_cte', id_column='row_id', table=source.table, columns=selects, joins=join_conditions, + join_tables=join_tables, order_by=sort_query, - schema=self.config.schema, + schema=source.config.schema, ) derived_map = request.derived_feature_map() @@ -564,8 +401,8 @@ def build_sql_entity_query(self, sql_facts: PostgreSqlJob) -> str: all_entities_list.append('row_id') entity_query = ( - f'SELECT {all_entities_str}, ROW_NUMBER() OVER (ORDER BY ' - f'{list(request.entity_names)[0]}) AS row_id FROM ({sql_facts.query}) AS {self.entity_table_name}' + f'SELECT {all_entities_str}, ROW_NUMBER() OVER ()' + f'AS row_id FROM ({sql_facts.query}) AS {self.entity_table_name}' ) joins = '\n '.join( [ diff --git a/aligned/redshift/sql_job.py b/aligned/redshift/sql_job.py new file mode 100644 index 0000000..bc7c89a --- /dev/null +++ b/aligned/redshift/sql_job.py @@ -0,0 +1,184 @@ +from __future__ import annotations +from typing import TYPE_CHECKING + +from dataclasses import dataclass, field +from logging import getLogger + +import polars as pl + +from aligned.request.retrival_request import RequestResult, RetrivalRequest +from aligned.retrival_job import RetrivalJob +from aligned.sources.redshift import RedshiftSQLConfig + +if TYPE_CHECKING: + import pandas as pd + +logger = getLogger(__name__) + + +@dataclass +class SQLQuery: + sql: str + + +@dataclass +class SqlColumn: + selection: str + alias: str + + @property + def sql_select(self) -> str: + if self.selection == self.alias: + return f'{self.selection}' + return f'{self.selection} AS "{self.alias}"' + + def __hash__(self) -> int: + return hash(self.sql_select) + + +@dataclass +class SqlJoin: + table: str + conditions: list[str] + + +@dataclass +class TableFetch: + """ + A configuration of an SQL query on a table + """ + + name: str + id_column: str + table: str | TableFetch + columns: set[SqlColumn] + schema: str | None = field(default=None) + joins: list[str] = field(default_factory=list) + join_tables: list[tuple[TableFetch, str]] = field(default_factory=list) + conditions: list[str] = field(default_factory=list) + group_by: list[str] = field(default_factory=list) + order_by: str | None = field(default=None) + + def sql_query(self, distinct: str | None = None) -> str: + return redshift_table_fetch(self, distinct) + +def select_table(table: TableFetch) -> str: + if isinstance(table.table, TableFetch): + raise ValueError("Do not support TableFetch in this select") + wheres = '' + order_by = '' + group_by = '' + from_table = 'FROM ' + + columns = [ + col.sql_select for col in table.columns + ] + select = f'SELECT {",".join(columns)}' + + if table.conditions: + wheres = 'WHERE ' + ' AND '.join(table.conditions) + + if table.order_by: + order_by = 'ORDER BY ' + table.order_by + + if table.group_by: + group_by = 'GROUP BY ' + ', '.join(table.group_by) + + if table.schema: + from_table += f'{table.schema}.' + + from_table += f'"{table.table}"' + + return f""" + {select} + {from_table} + {wheres} + {order_by} + {group_by} + """ + +def redshift_table_fetch(fetch: TableFetch, distinct: str | None = None) -> str: + wheres = '' + order_by = '' + group_by = '' + select = 'SELECT' + + if fetch.conditions: + wheres = 'WHERE ' + ' AND '.join(fetch.conditions) + + if fetch.order_by: + order_by = 'ORDER BY ' + fetch.order_by + + if fetch.group_by: + group_by = 'GROUP BY ' + ', '.join(fetch.group_by) + + table_columns = [col.sql_select for col in fetch.columns] + + if isinstance(fetch.table, TableFetch): + sub_query = redshift_table_fetch(fetch.table) + from_sql = f'FROM ({sub_query}) as entities' + else: + schema = f'{fetch.schema}.' if fetch.schema else '' + from_sql = f"""FROM entities + LEFT JOIN {schema}"{ fetch.table }" ta ON { ' AND '.join(fetch.joins) }""" + if fetch.join_tables: + for join_table, join_condition in fetch.join_tables: + from_sql += f""" + LEFT JOIN ( + {select_table(join_table)} + ) AS {join_table.name} ON {join_condition} + """ + + if distinct: + aliases = [col.alias for col in fetch.columns] + return f""" + SELECT { ', '.join(aliases) } + FROM ( + { select } { ', '.join(table_columns) }, + ROW_NUMBER() OVER( + PARTITION BY entities.row_id + { order_by } + ) AS row_number + { from_sql } + { wheres } + { order_by } + { group_by } + ) AS entities + WHERE row_number = 1""" + else: + return f""" + { select } { ', '.join(table_columns) } + { from_sql } + { wheres } + { order_by } + { group_by }""" + + +@dataclass +class RedshiftSqlJob(RetrivalJob): + + config: RedshiftSQLConfig + query: str + requests: list[RetrivalRequest] + + def request_result(self) -> RequestResult: + return RequestResult.from_request_list(self.retrival_requests) + + @property + def retrival_requests(self) -> list[RetrivalRequest]: + return self.requests + + async def to_pandas(self) -> pd.DataFrame: + df = await self.to_polars() + return df.collect().to_pandas() + + async def to_polars(self) -> pl.LazyFrame: + try: + return pl.read_sql(self.query, self.config.url).lazy() + except Exception as e: + logger.error(f'Error running query: {self.query}') + logger.error(f'Error: {e}') + raise e + + def describe(self) -> str: + return f'RedshiftSql Job: \n{self.query}\n' diff --git a/aligned/request/retrival_request.py b/aligned/request/retrival_request.py index 63f6ec7..c71fef0 100644 --- a/aligned/request/retrival_request.py +++ b/aligned/request/retrival_request.py @@ -6,6 +6,13 @@ from aligned.schemas.feature import EventTimestamp, Feature, FeatureLocation +@dataclass +class EventTimestampRequest(Codable): + + event_timestamp: EventTimestamp + entity_column: str = field(default='event_timestamp') + + @dataclass class RetrivalRequest(Codable): """ @@ -21,7 +28,11 @@ class RetrivalRequest(Codable): features: set[Feature] derived_features: set[DerivedFeature] aggregated_features: set[AggregatedFeature] = field(default_factory=set) - event_timestamp: EventTimestamp | None = field(default=None) + event_timestamp_request: EventTimestampRequest | None = field(default=None) + + @property + def event_timestamp(self) -> EventTimestamp | None: + return self.event_timestamp_request.event_timestamp if self.event_timestamp_request else None features_to_include: set[str] = field(default_factory=set) @@ -34,6 +45,7 @@ def __init__( derived_features: set[DerivedFeature], aggregated_features: set[AggregatedFeature] | None = None, event_timestamp: EventTimestamp | None = None, + entity_timestamp_columns: str | None = None, features_to_include: set[str] | None = None, ): self.name = name @@ -42,7 +54,11 @@ def __init__( self.features = features self.derived_features = derived_features self.aggregated_features = aggregated_features or set() - self.event_timestamp = event_timestamp + if event_timestamp: + self.event_timestamp_request = EventTimestampRequest( + event_timestamp=event_timestamp, + entity_column=entity_timestamp_columns or 'event_timestamp', + ) self.features_to_include = features_to_include or self.all_feature_names def filter_features(self, feature_names: set[str]) -> 'RetrivalRequest': @@ -179,7 +195,7 @@ def combine(requests: list['RetrivalRequest']) -> list['RetrivalRequest']: return list(grouped_requests.values()) @staticmethod - def unsafe_combine(requests: list['RetrivalRequest']) -> list['RetrivalRequest']: + def unsafe_combine(requests: list['RetrivalRequest']) -> 'RetrivalRequest': result_request = RetrivalRequest( name=requests[0].name, @@ -328,6 +344,13 @@ def request_result(self) -> RequestResult: def without_event_timestamp(self, name_sufix: str | None = None) -> 'FeatureRequest': return FeatureRequest( location=self.location, - features_to_include=self.features_to_include, + features_to_include=self.features_to_include - {'event_timestamp'}, needed_requests=[request.without_event_timestamp(name_sufix) for request in self.needed_requests], ) + + def rename_entities(self, mappings: dict[str, str]) -> 'FeatureRequest': + return FeatureRequest( + location=self.location, + features_to_include=self.features_to_include, + needed_requests=[request.rename_entities(mappings) for request in self.needed_requests], + ) diff --git a/aligned/retrival_job.py b/aligned/retrival_job.py index a683727..4069597 100644 --- a/aligned/retrival_job.py +++ b/aligned/retrival_job.py @@ -2,15 +2,17 @@ import asyncio import logging +import timeit from abc import ABC, abstractmethod from collections import defaultdict from contextlib import suppress -from dataclasses import dataclass +from dataclasses import dataclass, field from datetime import datetime from typing import TYPE_CHECKING, TypeVar import pandas as pd import polars as pl +from prometheus_client import Histogram from aligned.exceptions import UnableToFindFileException from aligned.request.retrival_request import RequestResult, RetrivalRequest @@ -44,8 +46,7 @@ def split( column = data[event_timestamp_column] if column.dtype != 'datetime64[ns]': column = pd.to_datetime(data[event_timestamp_column]) - values = column.quantile([start_ratio, end_ratio]) - return data.loc[(column >= values.iloc[0]) & (column <= values.iloc[1])].index + data = data.iloc[column.sort_values().index] group_size = data.shape[0] start_index = round(group_size * start_ratio) @@ -59,28 +60,32 @@ def split( def split_polars( - data: pl.LazyFrame, start_ratio: float, end_ratio: float, event_timestamp_column: str | None = None -) -> pl.DataFrame: + data: pl.DataFrame, start_ratio: float, end_ratio: float, event_timestamp_column: str | None = None +) -> pd.Series: + + row_name = 'row_nr' + data = data.with_row_count(row_name) + if event_timestamp_column: - values = data.select( - [ - pl.col(event_timestamp_column).quantile(start_ratio).alias('start_value'), - pl.col(event_timestamp_column).quantile(end_ratio).alias('end_value'), - ] - ) - return data.filter( - pl.col(event_timestamp_column).is_between(values[0, 'start_value'], values[0, 'end_value']) - ).collect() + data = data.sort(event_timestamp_column) + # values = data.select( + # [ + # pl.col(event_timestamp_column).quantile(start_ratio).alias('start_value'), + # pl.col(event_timestamp_column).quantile(end_ratio).alias('end_value'), + # ] + # ) + # return data.filter( + # pl.col(event_timestamp_column).is_between(values[0, 'start_value'], values[0, 'end_value']) + # ).collect() - collected = data.collect() - group_size = collected.shape[0] + group_size = data.shape[0] start_index = round(group_size * start_ratio) end_index = round(group_size * end_ratio) if end_index >= group_size: - return collected[start_index:] + return data[start_index:][row_name].to_pandas() else: - return collected[start_index:end_index] + return data[start_index:end_index][row_name].to_pandas() @dataclass @@ -160,7 +165,7 @@ async def to_pandas(self) -> TrainTestSet[pd.DataFrame]: return TrainTestSet( data=data, entity_columns=core_data.entity_columns, - features=core_data.features, + features=core_data.feature_columns, target_columns=core_data.target_columns, train_index=split(data, 0, test_ratio_start, core_data.event_timestamp_column), test_index=split(data, test_ratio_start, 1, core_data.event_timestamp_column), @@ -170,15 +175,18 @@ async def to_pandas(self) -> TrainTestSet[pd.DataFrame]: async def to_polars(self) -> TrainTestSet[pl.DataFrame]: # Use the pandas method, as the split is not created for polars yet # A but unsure if I should use the same index concept for polars - pandas_data = await self.to_pandas() + core_data = await self.job.to_polars() + + data = core_data.data.collect() + return TrainTestSet( - data=pl.from_pandas(pandas_data.data), - entity_columns=pandas_data.entity_columns, - features=pandas_data.features, - target_columns=pandas_data.target_columns, - train_index=pandas_data.train_index, - test_index=pandas_data.test_index, - event_timestamp_column=pandas_data.event_timestamp_column, + data=data, + entity_columns=core_data.entity_columns, + features=core_data.feature_columns, + target_columns=core_data.target_columns, + train_index=split_polars(data, 0, self.train_size, core_data.event_timestamp_column), + test_index=split_polars(data, self.train_size, 1, core_data.event_timestamp_column), + event_timestamp_column=core_data.event_timestamp_column, ) def validation_set(self, validation_size: float) -> SupervisedValidationJob: @@ -214,8 +222,8 @@ async def to_polars(self) -> TrainTestValidateSet[pl.DataFrame]: return TrainTestValidateSet( data=pl.from_pandas(data.data), entity_columns=data.entity_columns, - features=data.features, - target=data.target, + features=data.feature_columns, + target=data.labels, train_index=data.train_index, test_index=data.test_index, validate_index=data.validate_index, @@ -256,7 +264,7 @@ def remove_derived_features(self) -> RetrivalJob: def log_each_job(self) -> RetrivalJob: if isinstance(self, ModificationJob): - return self.copy_with(self.job.log_each_job()) + return LogJob(self.copy_with(self.job.log_each_job())) return LogJob(self) def chuncked(self, size: int) -> DataLoaderJob: @@ -284,8 +292,11 @@ def train_set(self, train_size: float, target_column: str) -> SupervisedTrainJob def validate(self, validator: Validator) -> RetrivalJob: return ValidationJob(self, validator) - def derive_features(self, requests: list[RetrivalRequest]) -> RetrivalJob: - return DerivedFeatureJob(job=self, requests=requests) + def monitor_time_used(self, time_metric: Histogram, labels: list[str] | None = None) -> RetrivalJob: + return TimeMetricLoggerJob(self, time_metric, labels) + + def derive_features(self, requests: list[RetrivalRequest] | None = None) -> RetrivalJob: + return DerivedFeatureJob(job=self, requests=requests or self.retrival_requests) def combined_features(self, requests: list[RetrivalRequest] | None = None) -> RetrivalJob: return CombineFactualJob([self], requests or self.retrival_requests) @@ -311,6 +322,14 @@ def validate_entites(self) -> RetrivalJob: def fill_missing_columns(self) -> RetrivalJob: return FillMissingColumnsJob(self) + def rename(self, mappings: dict[str, str]) -> RetrivalJob: + return RenameJob(self, mappings) + + def ignore_event_timestamp(self) -> RetrivalJob: + if isinstance(self, ModificationJob): + return self.copy_with(self.job.ignore_event_timestamp()) + raise NotImplementedError('Not implemented ignore_event_timestamp') + @staticmethod def from_dict(data: dict[str, list], request: list[RetrivalRequest] | RetrivalRequest) -> RetrivalJob: if isinstance(request, RetrivalRequest): @@ -336,6 +355,21 @@ def copy_with(self: JobType, job: RetrivalJob) -> JobType: return self +@dataclass +class RenameJob(RetrivalJob, ModificationJob): + + job: RetrivalJob + mappings: dict[str, str] + + async def to_pandas(self) -> pd.DataFrame: + df = await self.job.to_pandas() + return df.rename(self.mappings) + + async def to_polars(self) -> pl.LazyFrame: + df = await self.job.to_polars() + return df.rename(self.mappings) + + @dataclass class UpdateVectorIndexJob(RetrivalJob, ModificationJob): @@ -411,14 +445,18 @@ def retrival_requests(self) -> list[RetrivalRequest]: return self.job.retrival_requests async def to_pandas(self) -> pd.DataFrame: + if logger.level == 0: + logging.basicConfig(level=logging.INFO) df = await self.job.to_pandas() - logger.info(f'Results from {type(self.job)}') + logger.info(f'Results from {type(self.job).__name__}') logger.info(df) return df async def to_polars(self) -> pl.LazyFrame: + if logger.level == 0: + logging.basicConfig(level=logging.INFO) df = await self.job.to_polars() - logger.info(f'Results from {type(self.job)}') + logger.info(f'Results from {type(self.job).__name__}') logger.info(df.head(10).collect()) return df @@ -525,13 +563,13 @@ async def compute_derived_features_pandas(self, df: pd.DataFrame) -> pd.DataFram df[feature.name] = await feature.transformation.transform_pandas( df[feature.depending_on_names] ) - if df[feature.name].dtype != feature.dtype.pandas_type: - if feature.dtype.is_numeric: - df[feature.name] = pd.to_numeric(df[feature.name], errors='coerce').astype( - feature.dtype.pandas_type - ) - else: - df[feature.name] = df[feature.name].astype(feature.dtype.pandas_type) + # if df[feature.name].dtype != feature.dtype.pandas_type: + # if feature.dtype.is_numeric: + # df[feature.name] = pd.to_numeric(df[feature.name], errors='coerce').astype( + # feature.dtype.pandas_type + # ) + # else: + # df[feature.name] = df[feature.name].astype(feature.dtype.pandas_type) return df async def to_pandas(self) -> pd.DataFrame: @@ -638,6 +676,7 @@ async def data_windows(self, window: AggregateOver, data: pl.DataFrame, now: dat if window.window: time_window = window.window filter_expr = pl.col(time_window.time_column.name) > now - time_window.time_window + if window.condition: raise ValueError('Condition is not supported for stream aggregation, yet') @@ -817,6 +856,11 @@ async def to_polars(self) -> pl.LazyFrame: df = await self.job.to_polars() logger.info('Writing result to cache') await self.location.write_polars(df) + except FileNotFoundError: + logger.info('Unable to load file, so fetching from source') + df = await self.job.to_polars() + logger.info('Writing result to cache') + await self.location.write_polars(df) return df def cached_at(self, location: DataFileReference | str) -> RetrivalJob: @@ -890,6 +934,38 @@ async def to_polars(self) -> pl.LazyFrame: return await self.job.to_polars() +@dataclass +class TimeMetricLoggerJob(RetrivalJob, ModificationJob): + + job: RetrivalJob + + time_metric: Histogram + labels: list[str] | None = field(default=None) + + async def to_pandas(self) -> pd.DataFrame: + start_time = timeit.default_timer() + df = await self.job.to_pandas() + elapsed = timeit.default_timer() - start_time + logger.info(f'Computed records in {elapsed} seconds') + if self.labels: + self.time_metric.labels(*self.labels).observe(elapsed) + else: + self.time_metric.observe(elapsed) + return df + + async def to_polars(self) -> pl.LazyFrame: + start_time = timeit.default_timer() + df = await self.job.to_polars() + concrete = df.collect() + elapsed = timeit.default_timer() - start_time + logger.info(f'Computed records in {elapsed} seconds') + if self.labels: + self.time_metric.labels(*self.labels).observe(elapsed) + else: + self.time_metric.observe(elapsed) + return concrete.lazy() + + @dataclass class EnsureTypesJob(RetrivalJob, ModificationJob): @@ -907,13 +983,16 @@ def retrival_requests(self) -> list[RetrivalRequest]: async def to_pandas(self) -> pd.DataFrame: df = await self.job.to_pandas() for request in self.requests: + features_to_check = request.all_required_features + if request.aggregated_features: - continue - for feature in request.all_required_features: + features_to_check = {feature.derived_feature for feature in request.aggregated_features} + + for feature in features_to_check: mask = ~df[feature.name].isnull() - with suppress(AttributeError): + with suppress(AttributeError, TypeError): df[feature.name] = df[feature.name].mask( ~mask, other=df.loc[mask, feature.name].str.strip('"') ) @@ -922,7 +1001,7 @@ async def to_pandas(self) -> pd.DataFrame: df[feature.name] = pd.to_datetime(df[feature.name], infer_datetime_format=True, utc=True) elif feature.dtype == FeatureType('').datetime or feature.dtype == FeatureType('').string: continue - else: + elif feature.dtype != FeatureType('').array: if feature.dtype.is_numeric: df[feature.name] = pd.to_numeric(df[feature.name], errors='coerce').astype( @@ -930,7 +1009,8 @@ async def to_pandas(self) -> pd.DataFrame: ) else: df[feature.name] = df[feature.name].astype(feature.dtype.pandas_type) - if request.event_timestamp: + + if request.event_timestamp and request.event_timestamp.name in df.columns: feature = request.event_timestamp df[feature.name] = pd.to_datetime(df[feature.name], infer_datetime_format=True, utc=True) return df @@ -938,10 +1018,12 @@ async def to_pandas(self) -> pd.DataFrame: async def to_polars(self) -> pl.LazyFrame: df = await self.job.to_polars() for request in self.requests: + features_to_check = request.all_required_features + if request.aggregated_features: - continue + features_to_check = {feature.derived_feature for feature in request.aggregated_features} - for feature in request.all_required_features: + for feature in features_to_check: if feature.dtype == FeatureType('').bool: df = df.with_columns(pl.col(feature.name).cast(pl.Int8).cast(pl.Boolean)) elif feature.dtype == FeatureType('').datetime: @@ -954,6 +1036,10 @@ async def to_polars(self) -> pl.LazyFrame: .cast(pl.Datetime(time_zone='UTC')) .alias(feature.name) ) + elif feature.dtype == FeatureType('').array: + dtype = df.select(feature.name).dtypes[0] + if dtype == pl.Utf8: + df = df.with_columns(pl.col(feature.name).str.json_extract(pl.List(pl.Utf8))) else: df = df.with_columns(pl.col(feature.name).cast(feature.dtype.polars_type, strict=False)) @@ -1020,7 +1106,13 @@ def request_result(self) -> RequestResult: @property def retrival_requests(self) -> list[RetrivalRequest]: - return [job.retrival_requests for job in self.jobs] + [self.combined_requests] + jobs = [] + for job in self.jobs: + jobs.extend(job.retrival_requests) + return jobs + self.combined_requests + + def ignore_event_timestamp(self) -> RetrivalJob: + return CombineFactualJob([job.ignore_event_timestamp() for job in self.jobs], self.combined_requests) async def combine_data(self, df: pd.DataFrame) -> pd.DataFrame: for request in self.combined_requests: @@ -1092,6 +1184,9 @@ def cached_at(self, location: DataFileReference | str) -> RetrivalJob: def remove_derived_features(self) -> RetrivalJob: return CombineFactualJob([job.remove_derived_features() for job in self.jobs], self.combined_requests) + def log_each_job(self) -> RetrivalJob: + return CombineFactualJob([job.log_each_job() for job in self.jobs], self.combined_requests) + def describe(self) -> str: description = f'Combining {len(self.jobs)} jobs:\n' for index, job in enumerate(self.jobs): @@ -1133,7 +1228,6 @@ def validate(self, validator: Validator) -> RetrivalJob: return FilterJob(self.include_features, self.job.validate(validator)) def cached_at(self, location: DataFileReference | str) -> RetrivalJob: - return FilterJob(self.include_features, self.job.cached_at(location)) def with_subfeatures(self) -> RetrivalJob: @@ -1142,6 +1236,12 @@ def with_subfeatures(self) -> RetrivalJob: def remove_derived_features(self) -> RetrivalJob: return self.job.remove_derived_features() + def ignore_event_timestamp(self) -> RetrivalJob: + return FilterJob( + include_features=self.include_features - {'event_timestamp'}, + job=self.job.ignore_event_timestamp(), + ) + @dataclass class ListenForTriggers(RetrivalJob, ModificationJob): diff --git a/aligned/schemas/feature.py b/aligned/schemas/feature.py index 98fbf3c..2de0a6e 100644 --- a/aligned/schemas/feature.py +++ b/aligned/schemas/feature.py @@ -3,9 +3,90 @@ from dataclasses import dataclass from typing import Literal +import polars as pl + +import aligned.compiler.feature_factory as ff from aligned.schemas.codable import Codable from aligned.schemas.constraints import Constraint +NAME_POLARS_MAPPING = { + 'string': pl.Utf8, + 'int32': pl.Int32, + 'int64': pl.Int64, + 'float': pl.Float64, + 'double': pl.Float64, + 'bool': pl.Boolean, + 'date': pl.Date, + 'datetime': pl.Datetime, + 'time': pl.Time, + 'timedelta': pl.Duration, + 'uuid': pl.Utf8, + 'array': pl.List(pl.Utf8), + 'embedding': pl.List, +} + + +# @dataclass +# class SupportedTypes(Codable): + +# string: String | None = field(default=None) + +# def dtype(self) -> DataTypeInterface: +# values = [self.string] +# for value in values: +# if value: +# return value +# raise ValueError("Found no data type, the config could be corrupt.") + + +# @dataclass +# class DataTypeInterface(Codable): + +# @property +# def python_type(self) -> type: +# raise NotImplementedError() + +# @property +# def pandas_type(self) -> str | type: +# raise NotImplementedError() + +# @property +# def polars_type(self) -> pl.DataType: +# raise NotImplementedError() + +# @dataclass +# class String(DataTypeInterface): + +# @property +# def python_type(self) -> type: +# return str + +# @property +# def pandas_type(self) -> str | type: +# return str + +# @property +# def polars_type(self) -> pl.DataType: +# return pl.Utf8() + + +# @dataclass +# class List(DataTypeInterface): + +# inner_type: DataTypeInterface + +# @property +# def python_type(self) -> type: +# return list + +# @property +# def pandas_type(self) -> str | type: +# return str + +# @property +# def polars_type(self) -> pl.DataType: +# return pl.List(self.inner_type.polars_type) + @dataclass class FeatureType(Codable): @@ -62,22 +143,24 @@ def pandas_type(self) -> str | type: @property def polars_type(self) -> type: - import polars as pl + return NAME_POLARS_MAPPING[self.name] + @property + def feature_factory(self) -> ff.FeatureFactory: return { - 'string': pl.Utf8, - 'int32': pl.Int32, - 'int64': pl.Int64, - 'float': pl.Float64, - 'double': pl.Float64, - 'bool': pl.Boolean, - 'date': pl.Date, - 'datetime': pl.Datetime, - 'time': pl.Time, - 'timedelta': pl.Duration, - 'uuid': pl.Utf8, - 'array': pl.List, - 'embedding': pl.List, + 'string': ff.String(), + 'int32': ff.Int32(), + 'int64': ff.Int64(), + 'float': ff.Float(), + 'double': ff.Float(), + 'bool': ff.Bool(), + 'date': ff.Timestamp(), + 'datetime': ff.Timestamp(), + 'time': ff.Timestamp(), + 'timedelta': ff.Timestamp(), + 'uuid': ff.UUID(), + 'array': ff.Embedding(), + 'embedding': ff.Embedding(), }[self.name] def __eq__(self, other: object) -> bool: @@ -92,6 +175,13 @@ def __pre_serialize__(self) -> FeatureType: assert isinstance(self.name, str) return self + @staticmethod + def from_polars(polars_type: pl.DataType) -> FeatureType: + for name, dtype in NAME_POLARS_MAPPING.items(): + if polars_type.is_(dtype): + return FeatureType(name=name) + raise ValueError(f'Unable to find a value that can represent {polars_type}') + @property def string(self) -> FeatureType: return FeatureType(name='string') @@ -222,3 +312,7 @@ class FeatureReferance(Codable): def __hash__(self) -> int: return hash(self.name) + + @property + def identifier(self) -> str: + return f'{self.location.identifier}:{self.name}' diff --git a/aligned/schemas/feature_view.py b/aligned/schemas/feature_view.py index e8e01ad..861218c 100644 --- a/aligned/schemas/feature_view.py +++ b/aligned/schemas/feature_view.py @@ -26,6 +26,8 @@ class CompiledFeatureView(Codable): event_timestamp: EventTimestamp | None = field(default=None) stream_data_source: StreamDataSource | None = field(default=None) + application_source: BatchDataSource | None = field(default=None) + staging_source: BatchDataSource | None = field(default=None) event_triggers: set[EventTrigger] | None = field(default=None) @@ -49,6 +51,8 @@ def __pre_serialize__(self) -> CompiledFeatureView: assert isinstance(self.event_timestamp, EventTimestamp) if self.stream_data_source is not None: assert isinstance(self.stream_data_source, StreamDataSource) + if self.application_source is not None: + assert isinstance(self.application_source, BatchDataSource) if self.event_triggers is not None: for event_trigger in self.event_triggers: assert isinstance(event_trigger, EventTrigger) @@ -271,11 +275,13 @@ def requests_for(self, feature_names: set[str]) -> FeatureRequest: entities=request.entities, features=set(), derived_features=set(), + aggregated_features=set(), event_timestamp=request.event_timestamp, ) current = dependent_views[request.location] current.derived_features = current.derived_features.union(request.derived_features) current.features = current.features.union(request.features) + current.aggregated_features = current.aggregated_features.union(request.aggregated_features) dependent_views[request.location] = current dependent_views[self.name] = RetrivalRequest( # Add the request we want @@ -284,6 +290,7 @@ def requests_for(self, feature_names: set[str]) -> FeatureRequest: entities=self.entity_features, features=set(), derived_features={feature for feature in self.features if feature.name in feature_names}, + aggregated_features=set(), event_timestamp=None, ) diff --git a/aligned/schemas/model.py b/aligned/schemas/model.py index 83aea73..41aae9b 100644 --- a/aligned/schemas/model.py +++ b/aligned/schemas/model.py @@ -1,15 +1,16 @@ import logging from dataclasses import dataclass, field -from aligned.data_source.batch_data_source import BatchDataSource -from aligned.data_source.stream_data_source import StreamDataSource from aligned.request.retrival_request import FeatureRequest, RetrivalRequest from aligned.schemas.codable import Codable -from aligned.schemas.derivied_feature import DerivedFeature +from aligned.schemas.feature import FeatureLocation +from aligned.schemas.feature import EventTimestamp, Feature, FeatureReferance +from aligned.data_source.stream_data_source import StreamDataSource from aligned.schemas.event_trigger import EventTrigger -from aligned.schemas.feature import EventTimestamp, Feature, FeatureLocation, FeatureReferance -from aligned.schemas.folder import Folder from aligned.schemas.target import ClassificationTarget, RegressionTarget +from aligned.schemas.derivied_feature import DerivedFeature +from aligned.data_source.batch_data_source import BatchDataSource +from aligned.schemas.folder import Folder logger = logging.getLogger(__name__) @@ -34,6 +35,7 @@ class PredictionsView(Codable): entities: set[Feature] features: set[Feature] derived_features: set[DerivedFeature] + model_version_column: Feature | None = field(default=None) event_timestamp: EventTimestamp | None = field(default=None) source: BatchDataSource | None = field(default=None) stream_source: StreamDataSource | None = field(default=None) @@ -44,14 +46,14 @@ class PredictionsView(Codable): @property def full_schema(self) -> set[Feature]: - schema = self.features.union(self.entities).union( - {target.feature for target in self.classification_targets} - ) + schema = self.features.union(self.entities) - for target in self.classification_targets: + for target in self.classification_targets or {}: + schema.add(target.feature) schema.update({prob.feature for prob in target.class_probabilities}) - for target in self.regression_targets: + for target in self.regression_targets or {}: + schema.add(target.feature) if target.confidence: schema.add(target.confidence) @@ -61,8 +63,53 @@ def full_schema(self) -> set[Feature]: if target.upper_confidence: schema.add(target.upper_confidence) + if self.model_version_column: + schema.add(self.model_version_column) + return schema + def request(self, name: str) -> RetrivalRequest: + entities = self.entities + if self.model_version_column: + entities.add(self.model_version_column) + return RetrivalRequest( + name=name, + location=FeatureLocation.model(name), + entities=entities, + features=self.features, + derived_features=self.derived_features, + event_timestamp=self.event_timestamp, + ) + + def request_for(self, features: set[str], name: str) -> RetrivalRequest: + entities = self.entities + if self.model_version_column: + entities.add(self.model_version_column) + return RetrivalRequest( + name=name, + location=FeatureLocation.model(name), + entities=entities, + features={feature for feature in self.features if feature.name in features}, + derived_features={feature for feature in self.derived_features if feature.name in features}, + event_timestamp=self.event_timestamp, + ) + + def labels_estimates_refs(self) -> set[FeatureReferance]: + if self.classification_targets: + return {feature.estimating for feature in self.classification_targets} + elif self.regression_targets: + return {feature.estimating for feature in self.regression_targets} + else: + raise ValueError('Found no targets in the model') + + def labels(self) -> set[Feature]: + if self.classification_targets: + return {feature.feature for feature in self.classification_targets} + elif self.regression_targets: + return {feature.feature for feature in self.regression_targets} + else: + raise ValueError('Found no targets in the model') + @dataclass class Model(Codable): diff --git a/aligned/schemas/record_coders.py b/aligned/schemas/record_coders.py index b59235e..328a778 100644 --- a/aligned/schemas/record_coders.py +++ b/aligned/schemas/record_coders.py @@ -78,7 +78,7 @@ class JsonRecordCoder(RecordCoder): def decode(self, records: list[dict]) -> list[dict]: import json - decoded = [json.loads(record[self.key]) for record in records] + decoded = [json.loads(record[self.key]) for record in records if self.key in record] return [record for record in decoded if isinstance(record, dict)] def encode(self, records: list[dict]) -> list[dict]: diff --git a/aligned/schemas/transformation.py b/aligned/schemas/transformation.py index 2e029c9..b349417 100644 --- a/aligned/schemas/transformation.py +++ b/aligned/schemas/transformation.py @@ -225,6 +225,7 @@ def __init__(self) -> None: PercentileAggregation, JsonPath, Clip, + ArrayContains, ]: self.add(tran_type) @@ -420,7 +421,7 @@ async def transform_pandas(self, df: pd.DataFrame) -> pd.Series: return df[self.key] == self.value.python_value async def transform_polars(self, df: pl.LazyFrame, alias: str) -> pl.LazyFrame | pl.Expr: - return df.with_columns((pl.col(self.key) == self.value.python_value).alias(alias)) + return pl.col(self.key) == self.value.python_value @staticmethod def test_definition() -> TransformationTestDefinition: @@ -549,7 +550,7 @@ async def transform_pandas(self, df: pd.DataFrame) -> pd.Series: return df[self.key] != self.value.python_value async def transform_polars(self, df: pl.LazyFrame, alias: str) -> pl.LazyFrame | pl.Expr: - return df.with_columns((pl.col(self.key) != self.value.python_value).alias(alias)) + return pl.col(self.key) != self.value.python_value @staticmethod def test_definition() -> TransformationTestDefinition: @@ -573,7 +574,7 @@ async def transform_pandas(self, df: pd.DataFrame) -> pd.Series: return df[self.key] > self.value async def transform_polars(self, df: pl.LazyFrame, alias: str) -> pl.LazyFrame | pl.Expr: - return df.with_columns((pl.col(self.key) > self.value).alias(alias)) + return pl.col(self.key) > self.value @staticmethod def test_definition() -> TransformationTestDefinition: @@ -599,7 +600,7 @@ async def transform_pandas(self, df: pd.DataFrame) -> pd.Series: return df[self.left_key] > df[self.right_key] async def transform_polars(self, df: pl.LazyFrame, alias: str) -> pl.LazyFrame | pl.Expr: - return df.with_columns((pl.col(self.left_key) > pl.col(self.right_key)).alias(alias)) + return pl.col(self.left_key) > pl.col(self.right_key) @staticmethod def test_definition() -> TransformationTestDefinition: @@ -1034,9 +1035,6 @@ async def transform_pandas(self, df: pd.DataFrame) -> pd.Series: ) async def transform_polars(self, df: pl.LazyFrame, alias: str) -> pl.LazyFrame | pl.Expr: - pl.col(self.key).str.strptime(pl.Datetime, strict=False) - pl.when(pl.col(self.key)) - col = pl.col(self.key).cast(pl.Datetime).dt match self.component: case 'day': @@ -1107,6 +1105,39 @@ def test_definition() -> TransformationTestDefinition: ) +@dataclass +class ArrayContains(Transformation): + """Checks if an array contains a value + + some_array = List(String()) + contains_a_char = some_array.contains("a") + """ + + key: str + value: LiteralValue + + name: str = 'array_contains' + dtype: FeatureType = FeatureType('').bool + + def __init__(self, key: str, value: str) -> None: + self.key = key + self.value = value + + async def transform_pandas(self, df: pd.DataFrame) -> pd.Series: + return pl.Series(df[self.key]).arr.contains(self.value.python_value).to_pandas() + + async def transform_polars(self, df: pl.LazyFrame, alias: str) -> pl.LazyFrame | pl.Expr: + return pl.col(self.key).arr.contains(self.value.python_value) + + @staticmethod + def test_definition() -> TransformationTestDefinition: + return TransformationTestDefinition( + ArrayContains('x', LiteralValue.from_value('test')), + input={'x': [['Hello', 'test'], ['nah'], ['test', 'espania', None]]}, + output=[True, False, True], + ) + + @dataclass class Contains(Transformation): """Checks if a string value contains another string @@ -1854,7 +1885,7 @@ async def transform_polars(self, df: pl.LazyFrame, alias: str) -> pl.LazyFrame | return pl.col(self.key).median() def as_psql(self) -> str: - return f'PERCENTILE_CONT(0.5) WITHIN GROUP(ORDER BY {self.key})' + return f'percentile_cont(0.5) WITHIN GROUP(ORDER BY {self.key})' @dataclass @@ -1874,7 +1905,7 @@ async def transform_polars(self, df: pl.LazyFrame, alias: str) -> pl.LazyFrame | return pl.col(self.key).quantile(self.percentile) def as_psql(self) -> str: - return f'PERCENTILE_CONT({self.percentile}) WITHIN GROUP(ORDER BY {self.key})' + return f'percentile_cont({self.percentile}) WITHIN GROUP(ORDER BY {self.key})' @dataclass diff --git a/aligned/source_validation.py b/aligned/source_validation.py new file mode 100644 index 0000000..cd43b3e --- /dev/null +++ b/aligned/source_validation.py @@ -0,0 +1,33 @@ +from aligned.feature_store import SourceRequest, FeatureLocation + + +async def validate_sources_in(views: list[SourceRequest]) -> dict[FeatureLocation, bool]: + """Validateds if the sources can fulfill the needs required by the feature views + Therefore, this means that the views get their "core features". + + ``` + source = FileSource.parquet_at('test_data/titanic.parquet') + + views = feature_store.views_with_batch_source(source) + validation = await validate_sources_in(views) + + >>> {'titanic_parquet': True} + ``` + + Args: + views (list[FeatureViewStore]): The feature views to check + + Returns: + dict[str, bool]: A dict containing the feature view name and if the source full fill the need + """ + + results: dict[FeatureLocation, bool] = {} + + for view in views: + try: + _ = (await view.source.all_data(view.request, limit=1).to_polars()).collect() + results[view.location] = True + except Exception: + results[view.location] = False + + return results diff --git a/aligned/sources/local.py b/aligned/sources/local.py index a9d8c5f..21436db 100644 --- a/aligned/sources/local.py +++ b/aligned/sources/local.py @@ -20,10 +20,12 @@ from aligned.retrival_job import DateRangeJob, FactualRetrivalJob, FullExtractJob, RetrivalJob from aligned.s3.storage import FileStorage, HttpStorage from aligned.schemas.codable import Codable +from aligned.schemas.feature import FeatureType from aligned.schemas.folder import Folder from aligned.storage import Storage if TYPE_CHECKING: + from aligned.compiler.feature_factory import FeatureFactory from aligned.feature_store import FeatureStore @@ -98,7 +100,7 @@ async def to_polars(self) -> pl.LazyFrame: buffer = await HttpStorage().read(self.path) io_buffer = BytesIO(buffer) io_buffer.seek(0) - return pl.read_csv(io_buffer, sep=self.csv_config.seperator, try_parse_dates=True).lazy() + return pl.read_csv(io_buffer, separator=self.csv_config.seperator, try_parse_dates=True).lazy() return pl.scan_csv(self.path, separator=self.csv_config.seperator, try_parse_dates=True) @@ -160,6 +162,22 @@ def multi_source_features_for( facts=facts, ) + async def schema(self) -> dict[str, FeatureFactory]: + df = await self.to_polars() + return {name: FeatureType.from_polars(pl_type).feature_factory for name, pl_type in df.schema.items()} + + async def feature_view_code(self, view_name: str) -> str: + from aligned import FeatureView + + schema = await self.schema() + data_source_code = f'FileSource.csv_at("{self.path}", csv_config={self.csv_config})' + return FeatureView.feature_view_code_template( + schema, + data_source_code, + view_name, + 'from aligned import FileSource\nfrom aligned.sources.local import CsvConfig', + ) + @dataclass class ParquetConfig(Codable): @@ -210,7 +228,7 @@ async def to_polars(self) -> pl.LazyFrame: return pl.scan_parquet(self.path) async def write_polars(self, df: pl.LazyFrame) -> None: - df.sink_parquet(self.path, compression=self.config.compression) + df.collect().write_parquet(self.path, compression=self.config.compression) def all_data(self, request: RetrivalRequest, limit: int | None) -> FullExtractJob: return FileFullJob(self, request, limit) @@ -236,6 +254,21 @@ def multi_source_features_for( facts=facts, ) + async def schema(self) -> dict[str, FeatureFactory]: + parquet_schema = pl.read_parquet_schema(self.path) + return { + name: FeatureType.from_polars(pl_type).feature_factory for name, pl_type in parquet_schema.items() + } + + async def feature_view_code(self, view_name: str) -> str: + from aligned import FeatureView + + schema = await self.schema() + data_source_code = f'FileSource.parquet_at("{self.path}")' + return FeatureView.feature_view_code_template( + schema, data_source_code, view_name, 'from aligned import FileSource' + ) + @dataclass class StorageFileSource(StorageFileReference): diff --git a/aligned/sources/psql.py b/aligned/sources/psql.py index 4e63f6d..8d9a39a 100644 --- a/aligned/sources/psql.py +++ b/aligned/sources/psql.py @@ -2,16 +2,16 @@ from dataclasses import dataclass from datetime import datetime -from typing import TYPE_CHECKING, Callable +from typing import TYPE_CHECKING, Callable, Any from aligned.data_source.batch_data_source import BatchDataSource, ColumnFeatureMappable -from aligned.enricher import SqlDatabaseEnricher, StatisticEricher from aligned.request.retrival_request import RetrivalRequest -from aligned.retrival_job import DateRangeJob, FactualRetrivalJob, FullExtractJob, RetrivalJob +from aligned.retrival_job import FactualRetrivalJob, RetrivalJob from aligned.schemas.codable import Codable if TYPE_CHECKING: - from aligned.enricher import Enricher, TimespanSelector + from aligned.compiler.feature_factory import FeatureFactory + from aligned.enricher import Enricher from aligned.entity_data_source import EntityDataSource @@ -60,7 +60,7 @@ def fetch(self, query: str) -> RetrivalJob: @dataclass -class PostgreSQLDataSource(BatchDataSource, ColumnFeatureMappable, StatisticEricher): +class PostgreSQLDataSource(BatchDataSource, ColumnFeatureMappable): config: PostgreSQLConfig table: str @@ -71,55 +71,32 @@ class PostgreSQLDataSource(BatchDataSource, ColumnFeatureMappable, StatisticEric def job_group_key(self) -> str: return self.config.env_var + def contains_config(self, config: Any) -> bool: + return isinstance(config, PostgreSQLConfig) and config.env_var == self.config.env_var + def __hash__(self) -> int: return hash(self.table) - def mean( - self, columns: set[str], time: TimespanSelector | None = None, limit: int | None = None - ) -> Enricher: - reverse_map = {value: key for key, value in self.mapping_keys.items()} - sql_columns = ', '.join([f'AVG({reverse_map.get(column, column)}) AS {column}' for column in columns]) - - query = f'SELECT {sql_columns} FROM {self.table}' - if time: - seconds = time.timespand.total_seconds() - query += f' WHERE {time.time_column} >= NOW() - interval \'{seconds} seconds\'' - if limit and isinstance(limit, int): - query += f' LIMIT {limit}' - - return SqlDatabaseEnricher(self.config.env_var, query) - - def std( - self, columns: set[str], time: TimespanSelector | None = None, limit: int | None = None - ) -> Enricher: - reverse_map = {value: key for key, value in self.mapping_keys.items()} - sql_columns = ', '.join( - [f'STDDEV({reverse_map.get(column, column)}) AS {column}' for column in columns] - ) - - query = f'SELECT {sql_columns} FROM {self.table}' - if time: - seconds = time.timespand.total_seconds() - query += f' WHERE {time.time_column} >= NOW() - interval \'{seconds} seconds\'' - if limit and isinstance(limit, int): - query += f' LIMIT {limit}' - - return SqlDatabaseEnricher(self.config.env_var, query) - - def all_data(self, request: RetrivalRequest, limit: int | None) -> FullExtractJob: - from aligned.psql.jobs import FullExtractPsqlJob + def all_data(self, request: RetrivalRequest, limit: int | None) -> RetrivalJob: + from aligned.psql.jobs import build_full_select_query_psql, PostgreSqlJob - return FullExtractPsqlJob(self, request, limit) + return PostgreSqlJob( + config=self.config, query=build_full_select_query_psql(self, request, limit), requests=[request] + ) def all_between_dates( self, request: RetrivalRequest, start_date: datetime, end_date: datetime, - ) -> DateRangeJob: - from aligned.psql.jobs import DateRangePsqlJob + ) -> RetrivalJob: + from aligned.psql.jobs import build_date_range_query_psql, PostgreSqlJob - return DateRangePsqlJob(self, start_date, end_date, request) + return PostgreSqlJob( + config=self.config, + query=build_date_range_query_psql(self, request, start_date, end_date), + requests=[request], + ) @classmethod def multi_source_features_for( @@ -133,3 +110,45 @@ def multi_source_features_for( requests=[request for _, request in requests], facts=facts, ) + + async def schema(self) -> dict[str, FeatureFactory]: + import polars as pl + + import aligned.compiler.feature_factory as ff + + config = self.config + schema = config.schema or 'public' + table = self.table + sql_query = f""" +SELECT column_name, data_type, character_maximum_length, is_nullable, column_default, + CASE WHEN column_name IN ( + SELECT column_name + FROM information_schema.key_column_usage + WHERE constraint_name IN ( + SELECT constraint_name + FROM information_schema.table_constraints + WHERE table_schema = '{schema}' + AND table_name = '{table}' + AND constraint_type = 'PRIMARY KEY' + ) + ) THEN 'YES' ELSE 'NO' END AS is_primary_key +FROM information_schema.columns +WHERE table_schema = '{schema}' + AND table_name = '{table}'""" + df = pl.read_database(sql_query, connection_uri=self.config.url, engine='adbc') + psql_types = { + 'uuid': ff.UUID(), + 'timestamp with time zone': ff.Timestamp(), + 'timestamp without time zone': ff.Timestamp(), + 'character varying': ff.String(), + 'text': ff.String(), + 'integer': ff.Int64(), + 'float': ff.Float(), + 'date': ff.Timestamp(), + 'boolean': ff.Bool(), + 'jsonb': ff.Json(), + 'smallint': ff.Int32(), + 'numeric': ff.Float(), + } + values = df.select(['column_name', 'data_type']).to_dicts() + return {value['column_name']: psql_types[value['data_type']] for value in values} diff --git a/aligned/sources/redis.py b/aligned/sources/redis.py index be82f90..e2a0bc1 100644 --- a/aligned/sources/redis.py +++ b/aligned/sources/redis.py @@ -17,6 +17,7 @@ class Redis: # type: ignore StrictRedis = Redis ConnectionPool = Redis +from aligned.data_source.batch_data_source import ColumnFeatureMappable from aligned.data_source.stream_data_source import SinkableDataSource, StreamDataSource from aligned.feature_source import FeatureSource, FeatureSourceFactory, WritableFeatureSource from aligned.request.retrival_request import FeatureRequest, RetrivalRequest @@ -220,12 +221,12 @@ async def write(self, job: RetrivalJob, requests: list[RetrivalRequest]) -> None @dataclass -class RedisStreamSource(StreamDataSource, SinkableDataSource): +class RedisStreamSource(StreamDataSource, SinkableDataSource, ColumnFeatureMappable): topic_name: str config: RedisConfig - mappings: dict[str, str] = field(default_factory=dict) + mapping_keys: dict[str, str] = field(default_factory=dict) record_coder: RecordCoder = field(default_factory=PassthroughRecordCoder) name: str = 'redis' @@ -245,7 +246,7 @@ def consumer(self, from_timestamp: str | None = None) -> ReadableStream: ) def map_values(self, mappings: dict[str, str]) -> RedisStreamSource: - self.mappings = self.mappings | mappings + self.mapping_keys = self.mapping_keys | mappings return self def make_redis_friendly(self, data: pl.LazyFrame, features: set[Feature]) -> pl.LazyFrame: diff --git a/aligned/sources/redshift.py b/aligned/sources/redshift.py index 31163f5..8af799b 100644 --- a/aligned/sources/redshift.py +++ b/aligned/sources/redshift.py @@ -1,19 +1,33 @@ from __future__ import annotations -from dataclasses import dataclass +from dataclasses import dataclass, field from datetime import datetime, timedelta -from typing import Callable +from typing import Callable, Any from aligned import RedisConfig from aligned.compiler.model import EntityDataSource, SqlEntityDataSource from aligned.data_source.batch_data_source import BatchDataSource, ColumnFeatureMappable from aligned.enricher import Enricher from aligned.request.retrival_request import RetrivalRequest -from aligned.retrival_job import DateRangeJob, FullExtractJob, RetrivalJob +from aligned.retrival_job import RetrivalJob from aligned.schemas.codable import Codable from aligned.sources.psql import PostgreSQLConfig, PostgreSQLDataSource +@dataclass +class RedshiftListReference(Codable): + """ + A class representing a one to many relationship. + This can simulate how a list datatype + """ + + table_schema: str + table_name: str + value_column: str + id_column: str + join_column: str | None = None + + @dataclass class RedshiftSQLConfig(Codable): env_var: str @@ -34,30 +48,44 @@ def psql_config(self) -> PostgreSQLConfig: def from_url(url: str) -> RedshiftSQLConfig: import os - os.environ['REDSHIFT_DATABASE'] = url.replace('redshift:', 'postgresql:') + if 'REDSHIFT_DATABASE' not in os.environ: + os.environ['REDSHIFT_DATABASE'] = url.replace('redshift:', 'postgresql:') return RedshiftSQLConfig(env_var='REDSHIFT_DATABASE') - def table(self, table: str, mapping_keys: dict[str, str] | None = None) -> RedshiftSQLDataSource: - return RedshiftSQLDataSource(config=self, table=table, mapping_keys=mapping_keys or {}) + def table( + self, + table: str, + mapping_keys: dict[str, str] | None = None, + list_references: dict[str, RedshiftListReference] | None = None, + ) -> RedshiftSQLDataSource: + return RedshiftSQLDataSource( + config=self, table=table, mapping_keys=mapping_keys or {}, list_references=list_references or {} + ) def data_enricher( self, name: str, sql: str, redis: RedisConfig, values: dict | None = None, lock_timeout: int = 60 ) -> Enricher: - from pathlib import Path - from aligned.enricher import FileCacheEnricher, RedisLockEnricher, SqlDatabaseEnricher return FileCacheEnricher( timedelta(days=1), - file=Path(f'./cache/{name}.parquet'), + file_path=f'./cache/{name}.parquet', enricher=RedisLockEnricher( name, SqlDatabaseEnricher(self.url, sql, values), redis, timeout=lock_timeout ), ) + def with_schema(self, name: str) -> RedshiftSQLConfig: + return RedshiftSQLConfig(env_var=self.env_var, schema=name) + def entity_source(self, timestamp_column: str, sql: Callable[[str], str]) -> EntityDataSource: return SqlEntityDataSource(sql, self.url, timestamp_column) + def fetch(self, query: str) -> RetrivalJob: + from aligned.redshift.jobs import PostgreSqlJob + + return PostgreSqlJob(self.psql_config, query) + @dataclass class RedshiftSQLDataSource(BatchDataSource, ColumnFeatureMappable): @@ -65,6 +93,7 @@ class RedshiftSQLDataSource(BatchDataSource, ColumnFeatureMappable): config: RedshiftSQLConfig table: str mapping_keys: dict[str, str] + list_references: dict[str, RedshiftListReference] = field(default_factory=dict) type_name = 'redshift' @@ -74,22 +103,33 @@ def to_psql_source(self) -> PostgreSQLDataSource: def job_group_key(self) -> str: return self.config.env_var + def contains_config(self, config: Any) -> bool: + return isinstance(config, RedshiftSQLConfig) and config.env_var == self.config.env_var + def __hash__(self) -> int: return hash(self.table) - def all_data(self, request: RetrivalRequest, limit: int | None) -> FullExtractJob: - from aligned.psql.jobs import FullExtractPsqlJob + def all_data(self, request: RetrivalRequest, limit: int | None) -> RetrivalJob: + from aligned.psql.jobs import build_full_select_query_psql + from aligned.redshift.sql_job import RedshiftSqlJob - return FullExtractPsqlJob(self, request, limit) + source = PostgreSQLDataSource(self.config.psql_config, self.table, self.mapping_keys) + return RedshiftSqlJob( + config=self.config, query=build_full_select_query_psql(source, request, limit), requests=[request] + ) def all_between_dates( self, request: RetrivalRequest, start_date: datetime, end_date: datetime - ) -> DateRangeJob: - from aligned.psql.jobs import DateRangePsqlJob, PostgreSQLDataSource + ) -> RetrivalJob: + from aligned.redshift.sql_job import RedshiftSqlJob + from aligned.psql.jobs import build_date_range_query_psql source = PostgreSQLDataSource(self.config.psql_config, self.table, self.mapping_keys) - - return DateRangePsqlJob(source, start_date, end_date, request) + return RedshiftSqlJob( + config=self.config, + query=build_date_range_query_psql(source, request, start_date, end_date), + requests=[request], + ) @classmethod def multi_source_features_for( @@ -100,7 +140,7 @@ def multi_source_features_for( from aligned.redshift.jobs import FactRedshiftJob return FactRedshiftJob( - sources={request.location: source.to_psql_source() for source, request in requests}, + sources={request.location: source for source, request in requests}, requests=[request for _, request in requests], facts=facts, ) diff --git a/aligned/sources/tests/test_parquet.py b/aligned/sources/tests/test_parquet.py new file mode 100644 index 0000000..3bb7bdd --- /dev/null +++ b/aligned/sources/tests/test_parquet.py @@ -0,0 +1,37 @@ +import pytest + +from aligned import FeatureStore, FeatureView, FileSource +from conftest import DataTest + + +@pytest.mark.asyncio +async def test_parquest(point_in_time_data_test: DataTest) -> None: + + store = FeatureStore.experimental() + + for source in point_in_time_data_test.sources: + view = source.view + view_name = view.metadata.name + + file_source = FileSource.parquet_at(f'test_data/{view_name}.parquet') + await file_source.write_polars(source.data.lazy()) + + view.metadata = FeatureView.metadata_with( # type: ignore + name=view.metadata.name, + description=view.metadata.description, + batch_source=file_source, + ) + store.add_feature_view(view) + + job = store.features_for( + point_in_time_data_test.entities.to_dict(as_series=False), point_in_time_data_test.feature_reference + ) + data = (await job.to_polars()).collect() + + expected = point_in_time_data_test.expected_output + + assert expected.shape == data.shape, f'Expected: {expected.shape}\nGot: {data.shape}' + assert set(expected.columns) == set(data.columns), f'Expected: {expected.columns}\nGot: {data.columns}' + + ordered_columns = data.select(expected.columns) + assert ordered_columns.frame_equal(expected), f'Expected: {expected}\nGot: {ordered_columns}' diff --git a/aligned/sources/tests/test_psql.py b/aligned/sources/tests/test_psql.py new file mode 100644 index 0000000..c929fda --- /dev/null +++ b/aligned/sources/tests/test_psql.py @@ -0,0 +1,42 @@ +from os import environ + +import pytest + +from aligned import FeatureStore, FeatureView, PostgreSQLConfig +from conftest import DataTest + + +@pytest.mark.asyncio +async def test_postgresql(point_in_time_data_test: DataTest) -> None: + + if 'PSQL_DATABASE_TEST' not in environ: + environ['PSQL_DATABASE_TEST'] = 'postgresql://postgres:postgres@localhost:5433/aligned-test' + + psql_database = environ['PSQL_DATABASE_TEST'] + + store = FeatureStore.experimental() + + for source in point_in_time_data_test.sources: + view = source.view + db_name = view.metadata.name + source.data.to_pandas().to_sql(db_name, psql_database, if_exists='replace') + + view.metadata = FeatureView.metadata_with( # type: ignore + name=view.metadata.name, + description=view.metadata.description, + batch_source=PostgreSQLConfig('PSQL_DATABASE_TEST').table(db_name), + ) + store.add_feature_view(view) + + job = store.features_for( + point_in_time_data_test.entities.to_dict(as_series=False), point_in_time_data_test.feature_reference + ) + data = (await job.to_polars()).collect() + + expected = point_in_time_data_test.expected_output + + assert expected.shape == data.shape, f'Expected: {expected.shape}\nGot: {data.shape}' + assert set(expected.columns) == set(data.columns), f'Expected: {expected.columns}\nGot: {data.columns}' + + ordered_columns = data.select(expected.columns) + assert ordered_columns.frame_equal(expected), f'Expected: {expected}\nGot: {ordered_columns}' diff --git a/aligned/split_strategy.py b/aligned/split_strategy.py index 4d1fe6a..59c3a8a 100644 --- a/aligned/split_strategy.py +++ b/aligned/split_strategy.py @@ -77,13 +77,13 @@ class SupervisedDataSet(Generic[DatasetType]): data: DatasetType entity_columns: set[str] - features: set[str] + feature_columns: set[str] target_columns: set[str] event_timestamp_column: str | None @property def sorted_features(self) -> list[str]: - return sorted(self.features) + return sorted(self.feature_columns) def __init__( self, @@ -95,7 +95,7 @@ def __init__( ): self.data = data self.entity_columns = entity_columns - self.features = features + self.feature_columns = features self.target_columns = target self.event_timestamp_column = event_timestamp_column @@ -112,7 +112,7 @@ def input(self) -> DatasetType: return self.data[self.sorted_features] @property - def target(self) -> DatasetType: + def labels(self) -> DatasetType: if isinstance(self.data, (pl.LazyFrame, pl.DataFrame)): return self.data.select(list(self.target_columns)) return self.data[list(self.target_columns)] @@ -123,7 +123,7 @@ class TrainTestValidateSet(Generic[DatasetType]): data: DatasetType entity_columns: set[str] - features: set[str] + feature_columns: set[str] target_columns: set[str] train_index: Index @@ -144,7 +144,7 @@ def __init__( ): self.data = data self.entity_columns = entity_columns - self.features = features + self.feature_columns = features self.target_columns = target self.train_index = train_index self.test_index = test_index @@ -153,7 +153,7 @@ def __init__( @property def sorted_features(self) -> list[str]: - return sorted(self.features) + return sorted(self.feature_columns) @property def input(self) -> DatasetType: @@ -162,7 +162,7 @@ def input(self) -> DatasetType: return self.data[self.sorted_features] @property - def target(self) -> DatasetType: + def labels(self) -> DatasetType: if isinstance(self.data, pl.LazyFrame): return self.data.select(sorted(self.target_columns)) return self.data[sorted(self.target_columns)] @@ -177,7 +177,7 @@ def train(self) -> SupervisedDataSet[DatasetType]: return SupervisedDataSet( data, self.entity_columns, - self.features, + self.feature_columns, self.target_columns, self.event_timestamp_column, ) @@ -188,7 +188,7 @@ def train_input(self) -> DatasetType: @property def train_target(self) -> DatasetType: - return self.train.target + return self.train.labels @property def test(self) -> SupervisedDataSet[DatasetType]: @@ -201,7 +201,7 @@ def test(self) -> SupervisedDataSet[DatasetType]: return SupervisedDataSet( data, set(self.entity_columns), - set(self.features), + set(self.feature_columns), self.target_columns, self.event_timestamp_column, ) @@ -212,7 +212,7 @@ def test_input(self) -> DatasetType: @property def test_target(self) -> DatasetType: - return self.test.target + return self.test.labels @property def validate(self) -> SupervisedDataSet[DatasetType]: @@ -223,7 +223,7 @@ def validate(self) -> SupervisedDataSet[DatasetType]: return SupervisedDataSet( data, self.entity_columns, - set(self.features), + set(self.feature_columns), self.target_columns, self.event_timestamp_column, ) @@ -234,7 +234,7 @@ def validate_input(self) -> DatasetType: @property def validate_target(self) -> DatasetType: - return self.validate.target + return self.validate.labels class SplitDataSet(Generic[DatasetType]): diff --git a/aligned/tests/test_model_target.py b/aligned/tests/test_model_target.py index 4dae1e9..c106b5f 100644 --- a/aligned/tests/test_model_target.py +++ b/aligned/tests/test_model_target.py @@ -14,13 +14,13 @@ async def test_titanic_model_with_targets(titanic_feature_store: FeatureStore) - dataset = ( await titanic_feature_store.model('titanic') - .with_target() + .with_labels() .features_for({'passenger_id': entity_list}) .to_pandas() ) assert dataset.input.shape == (8, 5) - assert dataset.target.shape == (8, 1) + assert dataset.labels.shape == (8, 1) assert dataset.entities.shape == (8, 1) assert np.all(dataset.entities['passenger_id'].to_numpy() == entity_list) @@ -53,13 +53,13 @@ async def test_titanic_model_with_targets_and_scd(titanic_feature_store_scd: Fea dataset = ( await titanic_feature_store_scd.model('titanic') - .with_target() + .with_labels() .features_for(entities.to_dict()) .to_polars() ) input_df = dataset.input.collect() - target_df = dataset.target.collect() + target_df = dataset.labels.collect() assert target_df['survived'].series_equal(expected_data['survived']) assert input_df['is_male'].series_equal(expected_data['is_male']) diff --git a/aligned/tests/test_models_as_feature.py b/aligned/tests/test_models_as_feature.py index a884447..2da22c8 100644 --- a/aligned/tests/test_models_as_feature.py +++ b/aligned/tests/test_models_as_feature.py @@ -1,4 +1,4 @@ -from aligned import Bool, FeatureStore, FeatureView, FileSource, Int32, Model, String +from aligned import Bool, FeatureStore, FeatureView, FileSource, Int32, ModelContract, String from aligned.schemas.feature import FeatureLocation @@ -21,21 +21,21 @@ class OtherView(FeatureView): is_true = Bool() -class First(Model): +class First(ModelContract): view = View() other = OtherView() - metadata = Model.metadata_with('test_model', '', features=[view.feature_a, other.feature_b]) + metadata = ModelContract.metadata_with('test_model', features=[view.feature_a, other.feature_b]) - target = other.is_true.as_classification_target() + target = other.is_true.as_classification_label() -class Second(Model): +class Second(ModelContract): first = First() - metadata = Model.metadata_with('second_model', '', features=[first.target]) + metadata = ModelContract.metadata_with('second_model', features=[first.target]) def test_model_referenced_as_feature() -> None: diff --git a/aligned/tests/test_source_validation.py b/aligned/tests/test_source_validation.py new file mode 100644 index 0000000..e4cb458 --- /dev/null +++ b/aligned/tests/test_source_validation.py @@ -0,0 +1,66 @@ +import pytest +from os import environ + +from aligned import FeatureStore, FileSource, FeatureView +from aligned.schemas.feature import FeatureType, FeatureLocation +from aligned.source_validation import validate_sources_in +from aligned.sources.psql import PostgreSQLConfig + + +@pytest.mark.asyncio +async def test_source_validation(titanic_feature_store: FeatureStore) -> None: + + source = FileSource.parquet_at('test_data/titanic.parquet') + + views = titanic_feature_store.views_with_config(source) + + assert len(views) == 1 + validation = await validate_sources_in(views) + + assert {FeatureLocation.feature_view('titanic_parquet'): True} == validation + + +@pytest.mark.asyncio +async def test_source_validation_psql(titanic_feature_view: FeatureView) -> None: + + if 'PSQL_DATABASE_TEST' not in environ: + environ['PSQL_DATABASE_TEST'] = 'postgresql://postgres:postgres@localhost:5432/aligned-test' + + psql_config = PostgreSQLConfig('PSQL_DATABASE_TEST') + titanic_feature_view.metadata.batch_source = psql_config.table('titanic') + + store = FeatureStore.experimental() + store.add_feature_view(titanic_feature_view) + views = store.views_with_config(psql_config) + + assert len(views) == 1 + validation = await validate_sources_in(views) + + assert {FeatureLocation.feature_view('titanic'): False} == validation + + +@pytest.mark.asyncio +async def test_schema_loading() -> None: + source = FileSource.parquet_at('test_data/titanic.parquet') + schema = await source.schema() + dtype_schema = {key: feature.dtype for key, feature in schema.items()} + assert dtype_schema == { + 'passenger_id': FeatureType(name='int64'), + 'survived': FeatureType(name='int64'), + 'Pclass': FeatureType(name='int64'), + 'name': FeatureType(name='string'), + 'sex': FeatureType(name='string'), + 'age': FeatureType(name='float'), + 'sibsp': FeatureType(name='int64'), + 'Parch': FeatureType(name='int64'), + 'Ticket': FeatureType(name='string'), + 'Fare': FeatureType(name='float'), + 'cabin': FeatureType(name='string'), + 'Embarked': FeatureType(name='string'), + } + + +@pytest.mark.asyncio +async def test_feature_view_generation() -> None: + fv_impl = await FileSource.csv_at('test_data/data.csv').feature_view_code('my_view') + assert '' in fv_impl diff --git a/aligned/validation/pandera.py b/aligned/validation/pandera.py index 74fac30..c98951a 100644 --- a/aligned/validation/pandera.py +++ b/aligned/validation/pandera.py @@ -28,7 +28,7 @@ class PanderaValidator(Validator): } datatype_check = { - FeatureType('').bool, + # FeatureType('').bool, FeatureType('').string, FeatureType('').uuid, FeatureType('').date, diff --git a/aligned/worker.py b/aligned/worker.py index 52b70f3..aade977 100644 --- a/aligned/worker.py +++ b/aligned/worker.py @@ -4,16 +4,16 @@ import logging import timeit from dataclasses import dataclass, field -from datetime import datetime from pathlib import Path from prometheus_client import Counter, Histogram from aligned.active_learning.selection import ActiveLearningMetric, ActiveLearningSelection from aligned.active_learning.write_policy import ActiveLearningWritePolicy +from aligned.data_source.batch_data_source import ColumnFeatureMappable from aligned.data_source.stream_data_source import StreamDataSource from aligned.feature_source import WritableFeatureSource -from aligned.feature_store import FeatureViewStore, ModelFeatureStore +from aligned.feature_store import FeatureStore, FeatureViewStore, ModelFeatureStore from aligned.retrival_job import RetrivalJob, StreamAggregationJob from aligned.sources.local import StorageFileReference from aligned.streams.interface import ReadableStream @@ -43,13 +43,13 @@ class StreamWorker: feature_store_reference: StorageFileReference sink_source: WritableFeatureSource - views_to_process: set[str] + views_to_process: set[str] | None = field(default=None) should_prune_unused_features: bool = field(default=False) active_learning_configs: list[ActiveLearningConfig] = field(default_factory=list) metric_logging_port: int | None = field(default=None) read_timestamps: dict[str, str] = field(default_factory=dict) - default_start_timestamp: datetime | None = field(default=None) + default_start_timestamp: str | None = field(default=None) @staticmethod def from_reference( @@ -114,57 +114,75 @@ def read_from_timestamps(self, timestamps: dict[str, str]) -> StreamWorker: self.read_timestamps = timestamps return self - def set_default_start_timestamp(self, timestamp: datetime) -> StreamWorker: + def read_from(self, timestamp: str) -> StreamWorker: self.default_start_timestamp = timestamp return self - def metrics_port(self, port: int) -> StreamWorker: + def expose_metrics_at(self, port: int) -> StreamWorker: self.metric_logging_port = port return self - async def start(self, should_prune_unused_features: bool) -> None: - from prometheus_client import start_http_server + def prune_unused_features(self, should_prune_unused_features: bool | None = None) -> StreamWorker: + self.should_prune_unused_features = True + if should_prune_unused_features: + self.should_prune_unused_features = should_prune_unused_features + return self + def feature_views_by_topic(self, store: FeatureStore) -> dict[str, list[FeatureViewStore]]: from aligned.data_source.stream_data_source import HttpStreamSource - store = await self.feature_store_reference.feature_store() - store = store.with_source(self.sink_source) - - views = self.views_to_process or set() + feature_views_to_process = self.views_to_process or set() if not self.views_to_process: - views = [ + feature_views_to_process = { view.name for view in store.feature_views.values() if view.stream_data_source is not None and not isinstance(view.stream_data_source, HttpStreamSource) - ] - if not views: + } + if not feature_views_to_process: raise ValueError('No feature views with streaming source to process') - feature_views_to_process = views - feature_views: dict[str, list[FeatureViewStore]] = {} for view in store.feature_views.values(): if view.name not in feature_views_to_process: continue - if not view.stream_data_source: + logger.info(f'View: {view.name} have no stream source. Therefore, it will not be processed') continue + source = view.stream_data_source + view_store = store.feature_view(view.name) + if self.should_prune_unused_features: + logger.info(f'Optimising the write for {view.name} based on model usage') + view_store = view_store.with_optimised_write() + + request = view_store.request + if len(request.all_features) == 0: + logger.info( + f'View: {view.name} had no features to process. Therefore, it will not be ignored' + ) + continue + if source.topic_name in feature_views: - feature_views[source.topic_name] = feature_views[source.topic_name] + [ - store.feature_view(view.name) - ] + feature_views[source.topic_name] = feature_views[source.topic_name] + [view_store] else: - feature_views[source.topic_name] = [store.feature_view(view.name)] + feature_views[source.topic_name] = [view_store] + + return feature_views + + async def start(self) -> None: + from prometheus_client import start_http_server + + store = await self.feature_store_reference.feature_store() + store = store.with_source(self.sink_source) + + feature_views = self.feature_views_by_topic(store) processes = [] for topic_name, views in feature_views.items(): process_views = views - if should_prune_unused_features: - process_views = [view.with_optimised_write() for view in process_views] stream: StreamDataSource = views[0].view.stream_data_source stream_consumer = stream.consumer( self.read_timestamps.get(topic_name, self.default_start_timestamp) @@ -231,44 +249,25 @@ async def process_predictions( logger.info(f'Processing {len(records)} predictions in {timeit.default_timer() - start_time} seconds') -async def single_processing( - stream_source: ReadableStream, topic_name: str, feature_view: FeatureViewStore -) -> None: - logger.info(f'Started listning to {topic_name}') - while True: - logger.info(f'Reading {topic_name}') - records = await stream_source.read() - logger.info(f'Read {topic_name} values: {len(records)}') - - if not records: - continue - - start_time = timeit.default_timer() - job = stream_job(records, feature_view) - - await feature_view.batch_write(job) # type: ignore [arg-type] - elapsed = timeit.default_timer() - start_time +def stream_job(values: list[dict], feature_view: FeatureViewStore) -> RetrivalJob: + from aligned import FileSource - process_time.labels(feature_view.view.name).observe(elapsed) - processed_rows_count.labels(feature_view.view.name).inc(len(records)) + request = feature_view.request + mappings: dict[str, str] | None = None - logger.info(f'Wrote {len(records)} records in {elapsed} seconds') + if isinstance(feature_view.view.stream_data_source, ColumnFeatureMappable): + mappings = feature_view.view.stream_data_source.mapping_keys + value_job = RetrivalJob.from_dict(values, request) -def stream_job(values: list[dict], feature_view: FeatureViewStore) -> RetrivalJob: - from aligned import FileSource + if mappings: + value_job = value_job.rename(mappings) - request = feature_view.request - job = ( - RetrivalJob.from_dict(values, request) - .validate_entites() - .fill_missing_columns() - .ensure_types([request]) - ) + job = value_job.validate_entites().fill_missing_columns().ensure_types([request]) aggregations = request.aggregate_over() if not aggregations: - return job + return job.derive_features() checkpoints = {} @@ -279,15 +278,17 @@ def stream_job(values: list[dict], feature_view: FeatureViewStore) -> RetrivalJo name += f'_{time_window.time_window.total_seconds()}' checkpoints[aggregation] = FileSource.parquet_at(name) - return StreamAggregationJob(job, checkpoints) + job = StreamAggregationJob(job, checkpoints).derive_features() + if feature_view.feature_filter: + job = job.filter(feature_view.feature_filter) + return job async def monitor_process(values: list[dict], view: FeatureViewStore): - start_time = timeit.default_timer() - await view.batch_write(stream_job(values, view)) - elapsed = timeit.default_timer() - start_time - process_time.labels(view.view.name).observe(elapsed) - processed_rows_count.labels(view.view.name).inc(len(values)) + job = stream_job(values, view).monitor_time_used(process_time, labels=[view.view.name]) + await view.batch_write(job) + record_count = len(values) + processed_rows_count.labels(view.view.name).inc(record_count) async def multi_processing( @@ -295,7 +296,9 @@ async def multi_processing( ) -> None: logger.info(f'Started listning to {topic_name}') while True: + logger.info(f'Reading {topic_name}') stream_values = await stream_source.read() + logger.info(f'Read {topic_name} values: {len(stream_values)}') if not stream_values: continue @@ -303,20 +306,34 @@ async def multi_processing( await asyncio.gather(*[monitor_process(stream_values, view) for view in feature_views]) +async def single_processing( + stream_source: ReadableStream, topic_name: str, feature_view: FeatureViewStore +) -> None: + logger.info(f'Started listning to {topic_name}') + while True: + logger.info(f'Reading {topic_name}') + records = await stream_source.read() + logger.info(f'Read {topic_name} values: {len(records)}') + + if not records: + continue + await monitor_process(records, feature_view) + + async def process( stream_source: ReadableStream, topic_name: str, feature_views: list[FeatureViewStore], error_count: int = 0, ) -> None: - try: - if len(feature_views) == 1: - await single_processing(stream_source, topic_name, feature_views[0]) - else: - await multi_processing(stream_source, topic_name, feature_views) - except Exception as e: - logger.error(f'Error processing {topic_name}: {type(e)} - {e}') - if error_count > 5: - raise e - await asyncio.sleep(5) - await process(stream_source, topic_name, feature_views, error_count=error_count + 1) + # try: + if len(feature_views) == 1: + await single_processing(stream_source, topic_name, feature_views[0]) + else: + await multi_processing(stream_source, topic_name, feature_views) + # except Exception as e: + # logger.error(f'Error processing {topic_name}: {type(e)} - {e}') + # if error_count > 5: + # raise e + # await asyncio.sleep(5) + # await process(stream_source, topic_name, feature_views, error_count=error_count + 1) diff --git a/conftest.py b/conftest.py index 96a8716..50cfa03 100644 --- a/conftest.py +++ b/conftest.py @@ -1,5 +1,7 @@ +from dataclasses import dataclass from math import ceil, floor +import polars as pl import pytest import pytest_asyncio @@ -12,7 +14,8 @@ FileSource, Float, Int32, - Model, + Int64, + ModelContract, RedisConfig, String, TextVectoriserModel, @@ -208,7 +211,9 @@ class BreastDiagnoseFeatureView(FeatureView): @pytest_asyncio.fixture -async def breast_scan_without_timestamp_feature_store(breast_scan_feature_viewout_with_datetime: FeatureView): +async def breast_scan_without_timestamp_feature_store( + breast_scan_feature_viewout_with_datetime: FeatureView, +) -> FeatureStore: store = FeatureStore.experimental() store.add_feature_view(breast_scan_feature_viewout_with_datetime) return store @@ -342,7 +347,9 @@ class BreastDiagnoseFeatureView(FeatureView): @pytest_asyncio.fixture -async def breast_scan_with_timestamp_feature_store(breast_scan_feature_view_with_datetime: FeatureView): +async def breast_scan_with_timestamp_feature_store( + breast_scan_feature_view_with_datetime: FeatureView, +) -> FeatureStore: store = FeatureStore.experimental() store.add_feature_view(breast_scan_feature_view_with_datetime) return store @@ -351,7 +358,7 @@ async def breast_scan_with_timestamp_feature_store(breast_scan_feature_view_with @pytest_asyncio.fixture async def breast_scan_with_timestamp_and_aggregation_feature_store( breast_scan_feature_view_with_datetime_and_aggregation: FeatureView, -): +) -> FeatureStore: store = FeatureStore.experimental() store.add_feature_view(breast_scan_feature_view_with_datetime_and_aggregation) return store @@ -430,18 +437,24 @@ class TitanicPassenger(FeatureView): @pytest.fixture -def titanic_model(titanic_feature_view: FeatureView) -> Model: - class Titanic(Model): +def titanic_model(titanic_feature_view: FeatureView) -> ModelContract: + class Titanic(ModelContract): features = titanic_feature_view - metadata = Model.metadata_with( + metadata = ModelContract.metadata_with( 'titanic', - 'A model predicting if a passenger will survive', - features=[features.age, features.sibsp, features.has_siblings, features.is_male, features.is_mr], + description='A model predicting if a passenger will survive', + features=[ + features.age, # type: ignore + features.sibsp, # type: ignore + features.has_siblings, # type: ignore + features.is_male, # type: ignore + features.is_mr, # type: ignore + ], ) - will_survive = features.survived.as_classification_target() + will_survive = features.survived.as_classification_label() # type: ignore return Titanic() @@ -486,7 +499,7 @@ class TitanicPassenger(FeatureView): @pytest_asyncio.fixture async def titanic_feature_store( - titanic_feature_view: FeatureView, titanic_feature_view_parquet: FeatureView, titanic_model: Model + titanic_feature_view: FeatureView, titanic_feature_view_parquet: FeatureView, titanic_model: ModelContract ) -> FeatureStore: feature_store = FeatureStore.experimental() feature_store.add_feature_view(titanic_feature_view) @@ -544,7 +557,7 @@ async def alot_of_transforation_feature_store( @pytest_asyncio.fixture async def combined_view( - titanic_feature_view, breast_scan_feature_viewout_with_datetime + titanic_feature_view: FeatureView, breast_scan_feature_viewout_with_datetime: FeatureView ) -> CombinedFeatureView: class SomeCombinedView(CombinedFeatureView): @@ -555,8 +568,8 @@ class SomeCombinedView(CombinedFeatureView): titanic = titanic_feature_view cancer_scan = breast_scan_feature_viewout_with_datetime - some_feature = titanic.age + cancer_scan.radius_mean - other_feature = titanic.sibsp + cancer_scan.radius_mean + some_feature = titanic.age + cancer_scan.radius_mean # type: ignore + other_feature = titanic.sibsp + cancer_scan.radius_mean # type: ignore return SomeCombinedView() @@ -624,18 +637,18 @@ class TitanicPassenger(FeatureView): @pytest.fixture -def titanic_model_scd(titanic_feature_view_scd: FeatureView) -> Model: - class Titanic(Model): +def titanic_model_scd(titanic_feature_view_scd: FeatureView) -> ModelContract: + class Titanic(ModelContract): features = titanic_feature_view_scd - metadata = Model.metadata_with( + metadata = ModelContract.metadata_with( 'titanic', - 'A model predicting if a passenger will survive', - features=[features.age, features.sibsp, features.has_siblings, features.is_male], + description='A model predicting if a passenger will survive', + features=[features.age, features.sibsp, features.has_siblings, features.is_male], # type: ignore ) - will_survive = features.survived.as_classification_target() + will_survive = features.survived.as_classification_label() # type: ignore probability = will_survive.probability_of(True) return Titanic() @@ -643,10 +656,150 @@ class Titanic(Model): @pytest_asyncio.fixture async def titanic_feature_store_scd( - titanic_feature_view_scd: FeatureView, titanic_feature_view_parquet: FeatureView, titanic_model_scd: Model + titanic_feature_view_scd: FeatureView, + titanic_feature_view_parquet: FeatureView, + titanic_model_scd: ModelContract, ) -> FeatureStore: feature_store = FeatureStore.experimental() feature_store.add_feature_view(titanic_feature_view_scd) feature_store.add_feature_view(titanic_feature_view_parquet) feature_store.add_model(titanic_model_scd) return feature_store + + +@dataclass +class FeatureData: + data: pl.DataFrame + view: FeatureView + + +@dataclass +class DataTest: + sources: list[FeatureData] + entities: pl.DataFrame + feature_reference: list[str] + expected_output: pl.DataFrame + + +@pytest.fixture +def point_in_time_data_test() -> DataTest: + from datetime import datetime, timezone + + placeholder_ds = FileSource.parquet_at('placeholder') + + class CreditHistory(FeatureView): + + metadata = FeatureView.metadata_with('credit_history', description='', batch_source=placeholder_ds) + + dob_ssn = String().as_entity() + event_timestamp = EventTimestamp() + credit_card_due = Int64() + student_loan_due = Int64() + + due_sum = credit_card_due + student_loan_due + + bankruptcies = Int32() + + class CreditHistoryAggregate(FeatureView): + + metadata = FeatureView.metadata_with( + 'credit_history_agg', description='', batch_source=placeholder_ds + ) + + dob_ssn = String().as_entity() + event_timestamp = EventTimestamp() + credit_card_due = Int64() + + credit_sum = credit_card_due.aggregate().over(weeks=1).sum() + + class Loan(FeatureView): + + metadata = FeatureView.metadata_with('loan', description='', batch_source=placeholder_ds) + + loan_id = Int32().as_entity() + event_timestamp = EventTimestamp() + loan_status = Bool().description('If the loan was granted or not') + personal_income = Int64() + loan_amount = Int64() + + first_event_timestamp = datetime(2020, 4, 26, 18, 1, 4, 746575, tzinfo=timezone.utc) + second_event_timestamp = datetime(2020, 4, 27, 18, 1, 4, 746575, tzinfo=timezone.utc) + + credit_data = pl.DataFrame( + { + 'dob_ssn': [ + '19530219_5179', + '19520816_8737', + '19860413_2537', + '19530219_5179', + '19520816_8737', + '19860413_2537', + ], + 'event_timestamp': [ + first_event_timestamp, + first_event_timestamp, + first_event_timestamp, + second_event_timestamp, + second_event_timestamp, + second_event_timestamp, + ], + 'credit_card_due': [8419, 2944, 833, 5936, 1575, 6263], + 'student_loan_due': [22328, 2515, 33000, 48955, 9501, 35510], + 'bankruptcies': [0, 0, 0, 0, 0, 0], + } + ) + + loan_data = pl.DataFrame( + { + 'loan_id': [10000, 10001, 10002, 10000, 10001, 10002], + 'event_timestamp': [ + first_event_timestamp, + first_event_timestamp, + first_event_timestamp, + second_event_timestamp, + second_event_timestamp, + second_event_timestamp, + ], + 'loan_status': [1, 0, 1, 1, 1, 1], + 'personal_income': [59000, 9600, 9600, 65500, 54400, 9900], + 'loan_amount': [35000, 1000, 5500, 35000, 35000, 2500], + } + ) + + entities = pl.DataFrame( + { + 'dob_ssn': ['19530219_5179', '19520816_8737', '19860413_2537'], + 'loan_id': [10000, 10001, 10002], + 'event_timestamp': [first_event_timestamp, first_event_timestamp, second_event_timestamp], + } + ) + + expected_output = pl.DataFrame( + { + 'dob_ssn': ['19530219_5179', '19520816_8737', '19860413_2537'], + 'loan_id': [10000, 10001, 10002], + 'event_timestamp': [first_event_timestamp, first_event_timestamp, second_event_timestamp], + 'credit_card_due': [8419, 2944, 6263], + 'credit_sum': [8419, 2944, 833 + 6263], + 'student_loan_due': [22328, 2515, 35510], + 'due_sum': [22328 + 8419, 2515 + 2944, 35510 + 6263], + 'personal_income': [59000, 9600, 9900], + } + ) + + return DataTest( + sources=[ + FeatureData(data=credit_data, view=CreditHistory()), + FeatureData(data=loan_data, view=Loan()), + FeatureData(data=credit_data, view=CreditHistoryAggregate()), + ], + entities=entities, + feature_reference=[ + 'credit_history:credit_card_due', + 'credit_history:student_loan_due', + 'credit_history:due_sum', + 'credit_history_agg:credit_sum', + 'loan:personal_income', + ], + expected_output=expected_output, + ) diff --git a/docker-compose.yaml b/docker-compose.yaml new file mode 100644 index 0000000..2e60b90 --- /dev/null +++ b/docker-compose.yaml @@ -0,0 +1,17 @@ +version: '3' +services: + psql_app_db: + image: postgres:latest + environment: + - POSTGRES_USER=postgres + - POSTGRES_PASSWORD=postgres + - POSTGRES_DB=aligned-test + ports: + - 5433:5432 + + redis: + image: 'redis/redis-stack-server:latest' + environment: + - ALLOW_EMPTY_PASSWORD=yes + ports: + - 6379:6379 diff --git a/poetry.lock b/poetry.lock index 5785610..3dee404 100644 --- a/poetry.lock +++ b/poetry.lock @@ -30,23 +30,24 @@ files = [ [[package]] name = "anyio" -version = "3.6.2" +version = "3.7.1" description = "High level compatibility layer for multiple asynchronous event loop implementations" optional = false -python-versions = ">=3.6.2" +python-versions = ">=3.7" files = [ - {file = "anyio-3.6.2-py3-none-any.whl", hash = "sha256:fbbe32bd270d2a2ef3ed1c5d45041250284e31fc0a4df4a5a6071842051a51e3"}, - {file = "anyio-3.6.2.tar.gz", hash = "sha256:25ea0d673ae30af41a0c442f81cf3b38c7e79fdc7b60335a4c14e05eb0947421"}, + {file = "anyio-3.7.1-py3-none-any.whl", hash = "sha256:91dee416e570e92c64041bd18b900d1d6fa78dff7048769ce5ac5ddad004fbb5"}, + {file = "anyio-3.7.1.tar.gz", hash = "sha256:44a3c9aba0f5defa43261a8b3efb97891f2bd7d804e0e1f56419befa1adfc780"}, ] [package.dependencies] +exceptiongroup = {version = "*", markers = "python_version < \"3.11\""} idna = ">=2.8" sniffio = ">=1.1" [package.extras] -doc = ["packaging", "sphinx-autodoc-typehints (>=1.2.0)", "sphinx-rtd-theme"] -test = ["contextlib2", "coverage[toml] (>=4.5)", "hypothesis (>=4.0)", "mock (>=4)", "pytest (>=7.0)", "pytest-mock (>=3.6.1)", "trustme", "uvloop (<0.15)", "uvloop (>=0.15)"] -trio = ["trio (>=0.16,<0.22)"] +doc = ["Sphinx", "packaging", "sphinx-autodoc-typehints (>=1.2.0)", "sphinx-rtd-theme (>=1.2.2)", "sphinxcontrib-jquery"] +test = ["anyio[trio]", "coverage[toml] (>=4.5)", "hypothesis (>=4.0)", "mock (>=4)", "psutil (>=5.9)", "pytest (>=7.0)", "pytest-mock (>=3.6.1)", "trustme", "uvloop (>=0.17)"] +trio = ["trio (<0.22)"] [[package]] name = "asgi-correlation-id" @@ -67,13 +68,13 @@ celery = ["celery"] [[package]] name = "asgiref" -version = "3.7.1" +version = "3.7.2" description = "ASGI specs, helper code, and adapters" optional = true python-versions = ">=3.7" files = [ - {file = "asgiref-3.7.1-py3-none-any.whl", hash = "sha256:33958cb2e4b3cd8b1b06ef295bd8605cde65b11df51d3beab39e2e149a610ab3"}, - {file = "asgiref-3.7.1.tar.gz", hash = "sha256:8de379fcc383bcfe4507e229fc31209ea23d4831c850f74063b2c11639474dd2"}, + {file = "asgiref-3.7.2-py3-none-any.whl", hash = "sha256:89b2ef2247e3b562a16eef663bc0e2e703ec6468e2fa8a5cd61cd449786d4f6e"}, + {file = "asgiref-3.7.2.tar.gz", hash = "sha256:9e0ce3aa93a819ba5b45120216b23878cf6e8525eb3848653452b4192b92afed"}, ] [package.dependencies] @@ -93,6 +94,51 @@ files = [ {file = "async_timeout-4.0.2-py3-none-any.whl", hash = "sha256:8ca1e4fcf50d07413d66d1a5e416e42cfdf5851c981d679a09851a6853383b3c"}, ] +[[package]] +name = "black" +version = "23.7.0" +description = "The uncompromising code formatter." +optional = false +python-versions = ">=3.8" +files = [ + {file = "black-23.7.0-cp310-cp310-macosx_10_16_arm64.whl", hash = "sha256:5c4bc552ab52f6c1c506ccae05681fab58c3f72d59ae6e6639e8885e94fe2587"}, + {file = "black-23.7.0-cp310-cp310-macosx_10_16_universal2.whl", hash = "sha256:552513d5cd5694590d7ef6f46e1767a4df9af168d449ff767b13b084c020e63f"}, + {file = "black-23.7.0-cp310-cp310-macosx_10_16_x86_64.whl", hash = "sha256:86cee259349b4448adb4ef9b204bb4467aae74a386bce85d56ba4f5dc0da27be"}, + {file = "black-23.7.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:501387a9edcb75d7ae8a4412bb8749900386eaef258f1aefab18adddea1936bc"}, + {file = "black-23.7.0-cp310-cp310-win_amd64.whl", hash = "sha256:fb074d8b213749fa1d077d630db0d5f8cc3b2ae63587ad4116e8a436e9bbe995"}, + {file = "black-23.7.0-cp311-cp311-macosx_10_16_arm64.whl", hash = "sha256:b5b0ee6d96b345a8b420100b7d71ebfdd19fab5e8301aff48ec270042cd40ac2"}, + {file = "black-23.7.0-cp311-cp311-macosx_10_16_universal2.whl", hash = "sha256:893695a76b140881531062d48476ebe4a48f5d1e9388177e175d76234ca247cd"}, + {file = "black-23.7.0-cp311-cp311-macosx_10_16_x86_64.whl", hash = "sha256:c333286dc3ddca6fdff74670b911cccedacb4ef0a60b34e491b8a67c833b343a"}, + {file = "black-23.7.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:831d8f54c3a8c8cf55f64d0422ee875eecac26f5f649fb6c1df65316b67c8926"}, + {file = "black-23.7.0-cp311-cp311-win_amd64.whl", hash = "sha256:7f3bf2dec7d541b4619b8ce526bda74a6b0bffc480a163fed32eb8b3c9aed8ad"}, + {file = "black-23.7.0-cp38-cp38-macosx_10_16_arm64.whl", hash = "sha256:f9062af71c59c004cd519e2fb8f5d25d39e46d3af011b41ab43b9c74e27e236f"}, + {file = "black-23.7.0-cp38-cp38-macosx_10_16_universal2.whl", hash = "sha256:01ede61aac8c154b55f35301fac3e730baf0c9cf8120f65a9cd61a81cfb4a0c3"}, + {file = "black-23.7.0-cp38-cp38-macosx_10_16_x86_64.whl", hash = "sha256:327a8c2550ddc573b51e2c352adb88143464bb9d92c10416feb86b0f5aee5ff6"}, + {file = "black-23.7.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6d1c6022b86f83b632d06f2b02774134def5d4d4f1dac8bef16d90cda18ba28a"}, + {file = "black-23.7.0-cp38-cp38-win_amd64.whl", hash = "sha256:27eb7a0c71604d5de083757fbdb245b1a4fae60e9596514c6ec497eb63f95320"}, + {file = "black-23.7.0-cp39-cp39-macosx_10_16_arm64.whl", hash = "sha256:8417dbd2f57b5701492cd46edcecc4f9208dc75529bcf76c514864e48da867d9"}, + {file = "black-23.7.0-cp39-cp39-macosx_10_16_universal2.whl", hash = "sha256:47e56d83aad53ca140da0af87678fb38e44fd6bc0af71eebab2d1f59b1acf1d3"}, + {file = "black-23.7.0-cp39-cp39-macosx_10_16_x86_64.whl", hash = "sha256:25cc308838fe71f7065df53aedd20327969d05671bac95b38fdf37ebe70ac087"}, + {file = "black-23.7.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:642496b675095d423f9b8448243336f8ec71c9d4d57ec17bf795b67f08132a91"}, + {file = "black-23.7.0-cp39-cp39-win_amd64.whl", hash = "sha256:ad0014efc7acf0bd745792bd0d8857413652979200ab924fbf239062adc12491"}, + {file = "black-23.7.0-py3-none-any.whl", hash = "sha256:9fd59d418c60c0348505f2ddf9609c1e1de8e7493eab96198fc89d9f865e7a96"}, + {file = "black-23.7.0.tar.gz", hash = "sha256:022a582720b0d9480ed82576c920a8c1dde97cc38ff11d8d8859b3bd6ca9eedb"}, +] + +[package.dependencies] +click = ">=8.0.0" +mypy-extensions = ">=0.4.3" +packaging = ">=22.0" +pathspec = ">=0.9.0" +platformdirs = ">=2" +tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""} + +[package.extras] +colorama = ["colorama (>=0.4.3)"] +d = ["aiohttp (>=3.7.4)"] +jupyter = ["ipython (>=7.8.0)", "tokenize-rt (>=3.2.0)"] +uvloop = ["uvloop (>=0.15.2)"] + [[package]] name = "certifi" version = "2023.5.7" @@ -182,13 +228,13 @@ pycparser = "*" [[package]] name = "click" -version = "8.1.3" +version = "8.1.6" description = "Composable command line interface toolkit" optional = false python-versions = ">=3.7" files = [ - {file = "click-8.1.3-py3-none-any.whl", hash = "sha256:bb4d8133cb15a609f44e8213d9b391b0809795062913b383c62be0ee95b1db48"}, - {file = "click-8.1.3.tar.gz", hash = "sha256:7682dc8afb30297001674575ea00d1814d808d6a36af415a82bd481d37ba7b8e"}, + {file = "click-8.1.6-py3-none-any.whl", hash = "sha256:fa244bb30b3b5ee2cae3da8f55c9e5e0c0e86093306301fb418eb9dc40fbded5"}, + {file = "click-8.1.6.tar.gz", hash = "sha256:48ee849951919527a045bfe3bf7baa8a959c423134e1a5b98c05c20ba75a1cbd"}, ] [package.dependencies] @@ -207,55 +253,117 @@ files = [ [[package]] name = "connectorx" -version = "0.3.1" +version = "0.3.2a7" description = "" optional = false python-versions = "*" files = [ - {file = "connectorx-0.3.1-cp310-cp310-macosx_10_7_x86_64.whl", hash = "sha256:719750045e7c3b94c199271fbfe6aef47944768e711f27bcc606b498707e0054"}, - {file = "connectorx-0.3.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:aed31b08acebeb3ebbe53c0df846c686e7c27c4242bff3a75b72cf517d070257"}, - {file = "connectorx-0.3.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:71d2c2678339fb01f89469bbe22e66e75cabcf727a52ed72d576fef5744ebc58"}, - {file = "connectorx-0.3.1-cp310-none-win_amd64.whl", hash = "sha256:92e576ef9610b59f8e5456c12d22e5b0752d0207f586df82701987657909888b"}, - {file = "connectorx-0.3.1-cp37-cp37m-macosx_10_7_x86_64.whl", hash = "sha256:36c28cc59220998928e7b283eecf404e17e077dc3e525570096d0968b192cc64"}, - {file = "connectorx-0.3.1-cp37-cp37m-macosx_11_0_arm64.whl", hash = "sha256:c5173e7252f593c46787627a46561b0d949eb80ab23321e045bbf6bd5131945c"}, - {file = "connectorx-0.3.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3c8411631750d24c12e5e296720637909b8515d5faa3b5eaf7bb86c582d02667"}, - {file = "connectorx-0.3.1-cp37-none-win_amd64.whl", hash = "sha256:0674b6389f8f2ba62155ac2f718df18f76f9de5c50d9911a5fefe7485e1c598e"}, - {file = "connectorx-0.3.1-cp38-cp38-macosx_10_7_x86_64.whl", hash = "sha256:324c5075e8aa6698db8c877cb847f0d86172784db88ac0f3e6762aa9852330f3"}, - {file = "connectorx-0.3.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:027a3880629a7b33ae0c7a80ab4fa53286957a253af2dfe34f19adfea6b79b91"}, - {file = "connectorx-0.3.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a666b967958fcf9fc0444a7b3603483ee23a2fe39f0da3d545ff199f376f7e4b"}, - {file = "connectorx-0.3.1-cp38-none-win_amd64.whl", hash = "sha256:3c5dedfd75cf44898c17cc84a1dd0ab6ed0fa54de0461f2d6aa4bcb2c2b0dc1d"}, - {file = "connectorx-0.3.1-cp39-cp39-macosx_10_7_x86_64.whl", hash = "sha256:354c4126bcd7a9efbb8879feac92e1e7b0d0712f7e98665c392af663805491f8"}, - {file = "connectorx-0.3.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:3011e1f9a27fd2a7b12c6a45bc29f6e7577a27418a3f607adaf54b301ff09068"}, - {file = "connectorx-0.3.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f1efb6ed547acc5837c2211e3d65d22948019d1653e7b30e522a4a4bd6d25fa8"}, - {file = "connectorx-0.3.1-cp39-none-win_amd64.whl", hash = "sha256:001b473e600b6d25af83b32674f98dccf49705a59bd6df724b5ba9beb236a0e0"}, + {file = "connectorx-0.3.2a7-cp310-cp310-macosx_10_7_x86_64.whl", hash = "sha256:4c9a770af18f81951fbcbaab0f896b95bbe90f6fac7be9f91b9085fcd29f61ac"}, + {file = "connectorx-0.3.2a7-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:704b26855fa07a86c9131616904bd741fcdcde04c7f20b074f0a2e7a830bcbd6"}, + {file = "connectorx-0.3.2a7-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:5cebd9ed2163b96f116404c8f874743344720562efb1f22cd06bd7e60d06ec75"}, + {file = "connectorx-0.3.2a7-cp310-none-win_amd64.whl", hash = "sha256:d5278dae5a3347ba709e28512c0a5aa7454fa249590920ece6b310d5f27b3804"}, + {file = "connectorx-0.3.2a7-cp311-cp311-macosx_10_7_x86_64.whl", hash = "sha256:b1d880a214bb37d17b194530e7459c5b7409bd7005141e85b7a9f3ad37ac1f16"}, + {file = "connectorx-0.3.2a7-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7f58e62c480ff16591b13343ba145cddcf4daeded764ad9d29cedf55b0570d9b"}, + {file = "connectorx-0.3.2a7-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:73d2690b89c18cd10a1ce7aed2d71f749dda6178be8a6c77d339f65f063678a6"}, + {file = "connectorx-0.3.2a7-cp311-none-win_amd64.whl", hash = "sha256:05a5672cdf054aab38e8b96269ffb610ba5fb2a6db688f320218b552d0f26fe5"}, + {file = "connectorx-0.3.2a7-cp38-cp38-macosx_10_7_x86_64.whl", hash = "sha256:6da61612cc5dfed625076bd907652df1dc59eaaced478f3b7caf06c57afc1544"}, + {file = "connectorx-0.3.2a7-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:848e5072ddafb58192edffc3a3d6d43cf57ba2651ad48748726aa8eb1d63b4b2"}, + {file = "connectorx-0.3.2a7-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:11e5dcf82d26ccc5bb8ad69f197181dc379378f1bbcabec2b6a432bf39ae30c0"}, + {file = "connectorx-0.3.2a7-cp38-none-win_amd64.whl", hash = "sha256:b71b7ddfe8521e67a33e6178f313ee5d42957bbff626b56b87119ff6bc137b86"}, + {file = "connectorx-0.3.2a7-cp39-cp39-macosx_10_7_x86_64.whl", hash = "sha256:35488a9efb4ec57590e57b41deb13b26d69af5a2957dfb913c8717f4fa08439f"}, + {file = "connectorx-0.3.2a7-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:64cfa7debd7a9a56cf86f7485da1fcbff4ae2c4db42e46be8ce0915291c33f35"}, + {file = "connectorx-0.3.2a7-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:de21533fa65a2f3628edc3eef495c6bb6f832ed96fc5daa38cd26ad91c9d9bb8"}, + {file = "connectorx-0.3.2a7-cp39-none-win_amd64.whl", hash = "sha256:6fcf949d85fce94c70fc9415b8b9078de1f777ae39f8519e9c9a8e46bb3a3dca"}, +] + +[[package]] +name = "contourpy" +version = "1.1.0" +description = "Python library for calculating contours of 2D quadrilateral grids" +optional = false +python-versions = ">=3.8" +files = [ + {file = "contourpy-1.1.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:89f06eff3ce2f4b3eb24c1055a26981bffe4e7264acd86f15b97e40530b794bc"}, + {file = "contourpy-1.1.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:dffcc2ddec1782dd2f2ce1ef16f070861af4fb78c69862ce0aab801495dda6a3"}, + {file = "contourpy-1.1.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:25ae46595e22f93592d39a7eac3d638cda552c3e1160255258b695f7b58e5655"}, + {file = "contourpy-1.1.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:17cfaf5ec9862bc93af1ec1f302457371c34e688fbd381f4035a06cd47324f48"}, + {file = "contourpy-1.1.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:18a64814ae7bce73925131381603fff0116e2df25230dfc80d6d690aa6e20b37"}, + {file = "contourpy-1.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:90c81f22b4f572f8a2110b0b741bb64e5a6427e0a198b2cdc1fbaf85f352a3aa"}, + {file = "contourpy-1.1.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:53cc3a40635abedbec7f1bde60f8c189c49e84ac180c665f2cd7c162cc454baa"}, + {file = "contourpy-1.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:1f795597073b09d631782e7245016a4323cf1cf0b4e06eef7ea6627e06a37ff2"}, + {file = "contourpy-1.1.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:0b7b04ed0961647691cfe5d82115dd072af7ce8846d31a5fac6c142dcce8b882"}, + {file = "contourpy-1.1.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:27bc79200c742f9746d7dd51a734ee326a292d77e7d94c8af6e08d1e6c15d545"}, + {file = "contourpy-1.1.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:052cc634bf903c604ef1a00a5aa093c54f81a2612faedaa43295809ffdde885e"}, + {file = "contourpy-1.1.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9382a1c0bc46230fb881c36229bfa23d8c303b889b788b939365578d762b5c18"}, + {file = "contourpy-1.1.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e5cec36c5090e75a9ac9dbd0ff4a8cf7cecd60f1b6dc23a374c7d980a1cd710e"}, + {file = "contourpy-1.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1f0cbd657e9bde94cd0e33aa7df94fb73c1ab7799378d3b3f902eb8eb2e04a3a"}, + {file = "contourpy-1.1.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:181cbace49874f4358e2929aaf7ba84006acb76694102e88dd15af861996c16e"}, + {file = "contourpy-1.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:fb3b7d9e6243bfa1efb93ccfe64ec610d85cfe5aec2c25f97fbbd2e58b531256"}, + {file = "contourpy-1.1.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:bcb41692aa09aeb19c7c213411854402f29f6613845ad2453d30bf421fe68fed"}, + {file = "contourpy-1.1.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:5d123a5bc63cd34c27ff9c7ac1cd978909e9c71da12e05be0231c608048bb2ae"}, + {file = "contourpy-1.1.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:62013a2cf68abc80dadfd2307299bfa8f5aa0dcaec5b2954caeb5fa094171103"}, + {file = "contourpy-1.1.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0b6616375d7de55797d7a66ee7d087efe27f03d336c27cf1f32c02b8c1a5ac70"}, + {file = "contourpy-1.1.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:317267d915490d1e84577924bd61ba71bf8681a30e0d6c545f577363157e5e94"}, + {file = "contourpy-1.1.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d551f3a442655f3dcc1285723f9acd646ca5858834efeab4598d706206b09c9f"}, + {file = "contourpy-1.1.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:e7a117ce7df5a938fe035cad481b0189049e8d92433b4b33aa7fc609344aafa1"}, + {file = "contourpy-1.1.0-cp38-cp38-win_amd64.whl", hash = "sha256:d4f26b25b4f86087e7d75e63212756c38546e70f2a92d2be44f80114826e1cd4"}, + {file = "contourpy-1.1.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:bc00bb4225d57bff7ebb634646c0ee2a1298402ec10a5fe7af79df9a51c1bfd9"}, + {file = "contourpy-1.1.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:189ceb1525eb0655ab8487a9a9c41f42a73ba52d6789754788d1883fb06b2d8a"}, + {file = "contourpy-1.1.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9f2931ed4741f98f74b410b16e5213f71dcccee67518970c42f64153ea9313b9"}, + {file = "contourpy-1.1.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:30f511c05fab7f12e0b1b7730ebdc2ec8deedcfb505bc27eb570ff47c51a8f15"}, + {file = "contourpy-1.1.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:143dde50520a9f90e4a2703f367cf8ec96a73042b72e68fcd184e1279962eb6f"}, + {file = "contourpy-1.1.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e94bef2580e25b5fdb183bf98a2faa2adc5b638736b2c0a4da98691da641316a"}, + {file = "contourpy-1.1.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:ed614aea8462735e7d70141374bd7650afd1c3f3cb0c2dbbcbe44e14331bf002"}, + {file = "contourpy-1.1.0-cp39-cp39-win_amd64.whl", hash = "sha256:438ba416d02f82b692e371858143970ed2eb6337d9cdbbede0d8ad9f3d7dd17d"}, + {file = "contourpy-1.1.0-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:a698c6a7a432789e587168573a864a7ea374c6be8d4f31f9d87c001d5a843493"}, + {file = "contourpy-1.1.0-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:397b0ac8a12880412da3551a8cb5a187d3298a72802b45a3bd1805e204ad8439"}, + {file = "contourpy-1.1.0-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:a67259c2b493b00e5a4d0f7bfae51fb4b3371395e47d079a4446e9b0f4d70e76"}, + {file = "contourpy-1.1.0-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:2b836d22bd2c7bb2700348e4521b25e077255ebb6ab68e351ab5aa91ca27e027"}, + {file = "contourpy-1.1.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:084eaa568400cfaf7179b847ac871582199b1b44d5699198e9602ecbbb5f6104"}, + {file = "contourpy-1.1.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:911ff4fd53e26b019f898f32db0d4956c9d227d51338fb3b03ec72ff0084ee5f"}, + {file = "contourpy-1.1.0.tar.gz", hash = "sha256:e53046c3863828d21d531cc3b53786e6580eb1ba02477e8681009b6aa0870b21"}, ] +[package.dependencies] +numpy = ">=1.16" + +[package.extras] +bokeh = ["bokeh", "selenium"] +docs = ["furo", "sphinx-copybutton"] +mypy = ["contourpy[bokeh,docs]", "docutils-stubs", "mypy (==1.2.0)", "types-Pillow"] +test = ["Pillow", "contourpy[test-no-images]", "matplotlib"] +test-no-images = ["pytest", "pytest-cov", "wurlitzer"] + [[package]] name = "cryptography" -version = "40.0.2" +version = "41.0.2" description = "cryptography is a package which provides cryptographic recipes and primitives to Python developers." optional = false -python-versions = ">=3.6" +python-versions = ">=3.7" files = [ - {file = "cryptography-40.0.2-cp36-abi3-macosx_10_12_universal2.whl", hash = "sha256:8f79b5ff5ad9d3218afb1e7e20ea74da5f76943ee5edb7f76e56ec5161ec782b"}, - {file = "cryptography-40.0.2-cp36-abi3-macosx_10_12_x86_64.whl", hash = "sha256:05dc219433b14046c476f6f09d7636b92a1c3e5808b9a6536adf4932b3b2c440"}, - {file = "cryptography-40.0.2-cp36-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4df2af28d7bedc84fe45bd49bc35d710aede676e2a4cb7fc6d103a2adc8afe4d"}, - {file = "cryptography-40.0.2-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0dcca15d3a19a66e63662dc8d30f8036b07be851a8680eda92d079868f106288"}, - {file = "cryptography-40.0.2-cp36-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:a04386fb7bc85fab9cd51b6308633a3c271e3d0d3eae917eebab2fac6219b6d2"}, - {file = "cryptography-40.0.2-cp36-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:adc0d980fd2760c9e5de537c28935cc32b9353baaf28e0814df417619c6c8c3b"}, - {file = "cryptography-40.0.2-cp36-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:d5a1bd0e9e2031465761dfa920c16b0065ad77321d8a8c1f5ee331021fda65e9"}, - {file = "cryptography-40.0.2-cp36-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:a95f4802d49faa6a674242e25bfeea6fc2acd915b5e5e29ac90a32b1139cae1c"}, - {file = "cryptography-40.0.2-cp36-abi3-win32.whl", hash = "sha256:aecbb1592b0188e030cb01f82d12556cf72e218280f621deed7d806afd2113f9"}, - {file = "cryptography-40.0.2-cp36-abi3-win_amd64.whl", hash = "sha256:b12794f01d4cacfbd3177b9042198f3af1c856eedd0a98f10f141385c809a14b"}, - {file = "cryptography-40.0.2-pp38-pypy38_pp73-macosx_10_12_x86_64.whl", hash = "sha256:142bae539ef28a1c76794cca7f49729e7c54423f615cfd9b0b1fa90ebe53244b"}, - {file = "cryptography-40.0.2-pp38-pypy38_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:956ba8701b4ffe91ba59665ed170a2ebbdc6fc0e40de5f6059195d9f2b33ca0e"}, - {file = "cryptography-40.0.2-pp38-pypy38_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:4f01c9863da784558165f5d4d916093737a75203a5c5286fde60e503e4276c7a"}, - {file = "cryptography-40.0.2-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:3daf9b114213f8ba460b829a02896789751626a2a4e7a43a28ee77c04b5e4958"}, - {file = "cryptography-40.0.2-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:48f388d0d153350f378c7f7b41497a54ff1513c816bcbbcafe5b829e59b9ce5b"}, - {file = "cryptography-40.0.2-pp39-pypy39_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:c0764e72b36a3dc065c155e5b22f93df465da9c39af65516fe04ed3c68c92636"}, - {file = "cryptography-40.0.2-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:cbaba590180cba88cb99a5f76f90808a624f18b169b90a4abb40c1fd8c19420e"}, - {file = "cryptography-40.0.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:7a38250f433cd41df7fcb763caa3ee9362777fdb4dc642b9a349721d2bf47404"}, - {file = "cryptography-40.0.2.tar.gz", hash = "sha256:c33c0d32b8594fa647d2e01dbccc303478e16fdd7cf98652d5b3ed11aa5e5c99"}, + {file = "cryptography-41.0.2-cp37-abi3-macosx_10_12_universal2.whl", hash = "sha256:01f1d9e537f9a15b037d5d9ee442b8c22e3ae11ce65ea1f3316a41c78756b711"}, + {file = "cryptography-41.0.2-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:079347de771f9282fbfe0e0236c716686950c19dee1b76240ab09ce1624d76d7"}, + {file = "cryptography-41.0.2-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:439c3cc4c0d42fa999b83ded80a9a1fb54d53c58d6e59234cfe97f241e6c781d"}, + {file = "cryptography-41.0.2-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f14ad275364c8b4e525d018f6716537ae7b6d369c094805cae45300847e0894f"}, + {file = "cryptography-41.0.2-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:84609ade00a6ec59a89729e87a503c6e36af98ddcd566d5f3be52e29ba993182"}, + {file = "cryptography-41.0.2-cp37-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:49c3222bb8f8e800aead2e376cbef687bc9e3cb9b58b29a261210456a7783d83"}, + {file = "cryptography-41.0.2-cp37-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:d73f419a56d74fef257955f51b18d046f3506270a5fd2ac5febbfa259d6c0fa5"}, + {file = "cryptography-41.0.2-cp37-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:2a034bf7d9ca894720f2ec1d8b7b5832d7e363571828037f9e0c4f18c1b58a58"}, + {file = "cryptography-41.0.2-cp37-abi3-win32.whl", hash = "sha256:d124682c7a23c9764e54ca9ab5b308b14b18eba02722b8659fb238546de83a76"}, + {file = "cryptography-41.0.2-cp37-abi3-win_amd64.whl", hash = "sha256:9c3fe6534d59d071ee82081ca3d71eed3210f76ebd0361798c74abc2bcf347d4"}, + {file = "cryptography-41.0.2-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:a719399b99377b218dac6cf547b6ec54e6ef20207b6165126a280b0ce97e0d2a"}, + {file = "cryptography-41.0.2-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:182be4171f9332b6741ee818ec27daff9fb00349f706629f5cbf417bd50e66fd"}, + {file = "cryptography-41.0.2-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:7a9a3bced53b7f09da251685224d6a260c3cb291768f54954e28f03ef14e3766"}, + {file = "cryptography-41.0.2-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:f0dc40e6f7aa37af01aba07277d3d64d5a03dc66d682097541ec4da03cc140ee"}, + {file = "cryptography-41.0.2-pp38-pypy38_pp73-macosx_10_12_x86_64.whl", hash = "sha256:674b669d5daa64206c38e507808aae49904c988fa0a71c935e7006a3e1e83831"}, + {file = "cryptography-41.0.2-pp38-pypy38_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:7af244b012711a26196450d34f483357e42aeddb04128885d95a69bd8b14b69b"}, + {file = "cryptography-41.0.2-pp38-pypy38_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:9b6d717393dbae53d4e52684ef4f022444fc1cce3c48c38cb74fca29e1f08eaa"}, + {file = "cryptography-41.0.2-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:192255f539d7a89f2102d07d7375b1e0a81f7478925b3bc2e0549ebf739dae0e"}, + {file = "cryptography-41.0.2-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:f772610fe364372de33d76edcd313636a25684edb94cee53fd790195f5989d14"}, + {file = "cryptography-41.0.2-pp39-pypy39_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:b332cba64d99a70c1e0836902720887fb4529ea49ea7f5462cf6640e095e11d2"}, + {file = "cryptography-41.0.2-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:9a6673c1828db6270b76b22cc696f40cde9043eb90373da5c2f8f2158957f42f"}, + {file = "cryptography-41.0.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:342f3767e25876751e14f8459ad85e77e660537ca0a066e10e75df9c9e9099f0"}, + {file = "cryptography-41.0.2.tar.gz", hash = "sha256:7d230bf856164de164ecb615ccc14c7fc6de6906ddd5b491f3af90d3514c925c"}, ] [package.dependencies] @@ -264,27 +372,38 @@ cffi = ">=1.12" [package.extras] docs = ["sphinx (>=5.3.0)", "sphinx-rtd-theme (>=1.1.1)"] docstest = ["pyenchant (>=1.6.11)", "sphinxcontrib-spelling (>=4.0.1)", "twine (>=1.12.0)"] -pep8test = ["black", "check-manifest", "mypy", "ruff"] -sdist = ["setuptools-rust (>=0.11.4)"] +nox = ["nox"] +pep8test = ["black", "check-sdist", "mypy", "ruff"] +sdist = ["build"] ssh = ["bcrypt (>=3.1.5)"] -test = ["iso8601", "pretend", "pytest (>=6.2.0)", "pytest-benchmark", "pytest-cov", "pytest-shard (>=0.1.2)", "pytest-subtests", "pytest-xdist"] +test = ["pretend", "pytest (>=6.2.0)", "pytest-benchmark", "pytest-cov", "pytest-xdist"] test-randomorder = ["pytest-randomly"] -tox = ["tox"] + +[[package]] +name = "cycler" +version = "0.11.0" +description = "Composable style cycles" +optional = false +python-versions = ">=3.6" +files = [ + {file = "cycler-0.11.0-py3-none-any.whl", hash = "sha256:3a27e95f763a428a739d2add979fa7494c912a32c17c4c38c4d5f082cad165a3"}, + {file = "cycler-0.11.0.tar.gz", hash = "sha256:9c87405839a19696e837b3b818fed3f5f69f16f1eec1a1ad77e043dcea9c772f"}, +] [[package]] name = "deltalake" -version = "0.9.0" +version = "0.10.0" description = "Native Delta Lake Python binding based on delta-rs with Pandas integration" optional = false python-versions = ">=3.7" files = [ - {file = "deltalake-0.9.0-1-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f0c1775e4624412e2ccda8f96cb19e952960e2fa8aa59ed4fdfd6fc5dcac8f49"}, - {file = "deltalake-0.9.0-1-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5d3d8b20b2e3e98edb4a8ab4d6f05f22259b0c355186e557008ece981a39fd01"}, - {file = "deltalake-0.9.0-cp37-abi3-macosx_10_7_x86_64.whl", hash = "sha256:4de70eeac00bfab1375eb805f3832a6ebc30ea00af2b9773561667e440d65e4b"}, - {file = "deltalake-0.9.0-cp37-abi3-macosx_10_9_x86_64.macosx_11_0_arm64.macosx_10_9_universal2.whl", hash = "sha256:52448204032459058c8606cf3c058f321522f736d355c0391e8e5a6570f10e31"}, - {file = "deltalake-0.9.0-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:cc52c4660dd47fc836a980897a1fb39dbb0282ca7f061e724f1e7c835dde744c"}, - {file = "deltalake-0.9.0-cp37-abi3-win_amd64.whl", hash = "sha256:45690f5b9e42da9fdc48f37656796e7cf3e26cd388a986fc6a52b38ea675f4ef"}, - {file = "deltalake-0.9.0.tar.gz", hash = "sha256:1634380220051e7bbd3f4b0b6b5901c46b62ad8326fd5da91b032e8d318216f5"}, + {file = "deltalake-0.10.0-cp37-abi3-macosx_10_7_x86_64.whl", hash = "sha256:1aef72679268324e7cc556f03969b92235076b6edcecae7a2f85ac930b5fb9b9"}, + {file = "deltalake-0.10.0-cp37-abi3-macosx_10_9_x86_64.macosx_11_0_arm64.macosx_10_9_universal2.whl", hash = "sha256:91e801b78e298946950b07bf0573e4cf012d6259ac294bcac662b6d746f95596"}, + {file = "deltalake-0.10.0-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:437f8ee9f28a9cd8454e8700c3dd88f5ad33c2ec9a7192356b55b23fee50259c"}, + {file = "deltalake-0.10.0-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f532b3c3dc8437fc26279a296bc576bdf79b167e1d4864847ca7c5ec7b3915b6"}, + {file = "deltalake-0.10.0-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:42212ee879735d476772a9c1d1ca3e43ef15893a301796baffe94db3578e98ca"}, + {file = "deltalake-0.10.0-cp37-abi3-win_amd64.whl", hash = "sha256:88c7759d8a97ec882dd0309eb097506c1682637ca4a0dcbe04d6482eedd298c9"}, + {file = "deltalake-0.10.0.tar.gz", hash = "sha256:b8793a1c7a1219c8935ed9926a6870c9341597396c0f50bf6ff98c323d06ee0f"}, ] [package.dependencies] @@ -311,13 +430,13 @@ graph = ["objgraph (>=1.7.2)"] [[package]] name = "exceptiongroup" -version = "1.1.1" +version = "1.1.2" description = "Backport of PEP 654 (exception groups)" optional = false python-versions = ">=3.7" files = [ - {file = "exceptiongroup-1.1.1-py3-none-any.whl", hash = "sha256:232c37c63e4f682982c8b6459f33a8981039e5fb8756b2074364e5055c498c9e"}, - {file = "exceptiongroup-1.1.1.tar.gz", hash = "sha256:d484c3090ba2889ae2928419117447a14daf3c1231d5e30d0aae34f354f01785"}, + {file = "exceptiongroup-1.1.2-py3-none-any.whl", hash = "sha256:e346e69d186172ca7cf029c8c1d16235aa0e04035e5750b4b95039e65204328f"}, + {file = "exceptiongroup-1.1.2.tar.gz", hash = "sha256:12c3e887d6485d16943a309616de20ae5582633e0a2eda17f4e10fd61c1e8af5"}, ] [package.extras] @@ -325,18 +444,18 @@ test = ["pytest (>=6)"] [[package]] name = "fakeredis" -version = "2.13.0" -description = "Fake implementation of redis API for testing purposes." +version = "2.17.0" +description = "Python implementation of redis API, can be used for testing purposes." optional = false python-versions = ">=3.7,<4.0" files = [ - {file = "fakeredis-2.13.0-py3-none-any.whl", hash = "sha256:df7bb44fb9e593970c626325230e1c321f954ce7b204d4c4452eae5233d554ed"}, - {file = "fakeredis-2.13.0.tar.gz", hash = "sha256:53f00f44f771d2b794f1ea036fa07a33476ab7368f1b0e908daab3eff80336f6"}, + {file = "fakeredis-2.17.0-py3-none-any.whl", hash = "sha256:a99ef6e5642c31e91d36be78809fec3743e2bf7aaa682685b0d65a849fecd148"}, + {file = "fakeredis-2.17.0.tar.gz", hash = "sha256:e304bc7addb2f862c3550cb7db58548418a0fadd4cd78a4de66464c84fbc2195"}, ] [package.dependencies] redis = ">=4" -sortedcontainers = ">=2.4,<3.0" +sortedcontainers = ">=2,<3" [package.extras] json = ["jsonpath-ng (>=1.5,<2.0)"] @@ -363,6 +482,63 @@ dev = ["pre-commit (>=2.17.0,<3.0.0)", "ruff (==0.0.138)", "uvicorn[standard] (> doc = ["mdx-include (>=1.4.1,<2.0.0)", "mkdocs (>=1.1.2,<2.0.0)", "mkdocs-markdownextradata-plugin (>=0.1.7,<0.3.0)", "mkdocs-material (>=8.1.4,<9.0.0)", "pyyaml (>=5.3.1,<7.0.0)", "typer-cli (>=0.0.13,<0.0.14)", "typer[all] (>=0.6.1,<0.8.0)"] test = ["anyio[trio] (>=3.2.1,<4.0.0)", "black (==23.1.0)", "coverage[toml] (>=6.5.0,<8.0)", "databases[sqlite] (>=0.3.2,<0.7.0)", "email-validator (>=1.1.1,<2.0.0)", "flask (>=1.1.2,<3.0.0)", "httpx (>=0.23.0,<0.24.0)", "isort (>=5.0.6,<6.0.0)", "mypy (==0.982)", "orjson (>=3.2.1,<4.0.0)", "passlib[bcrypt] (>=1.7.2,<2.0.0)", "peewee (>=3.13.3,<4.0.0)", "pytest (>=7.1.3,<8.0.0)", "python-jose[cryptography] (>=3.3.0,<4.0.0)", "python-multipart (>=0.0.5,<0.0.7)", "pyyaml (>=5.3.1,<7.0.0)", "ruff (==0.0.138)", "sqlalchemy (>=1.3.18,<1.4.43)", "types-orjson (==3.6.2)", "types-ujson (==5.7.0.1)", "ujson (>=4.0.1,!=4.0.2,!=4.1.0,!=4.2.0,!=4.3.0,!=5.0.0,!=5.1.0,<6.0.0)"] +[[package]] +name = "fonttools" +version = "4.41.0" +description = "Tools to manipulate font files" +optional = false +python-versions = ">=3.8" +files = [ + {file = "fonttools-4.41.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:ba2a367ff478cd108d5319c0dc4fd4eb4ea3476dbfc45b00c45718e889cd9463"}, + {file = "fonttools-4.41.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:69178674505ec81adf4af2a3bbacd0cb9a37ba7831bc3fca307f80e48ab2767b"}, + {file = "fonttools-4.41.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:86edb95c4d1fe4fae2111d7e0c10c6e42b7790b377bcf1952303469eee5b52bb"}, + {file = "fonttools-4.41.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:50f8bdb421270f71b54695c62785e300fab4bb6127be40bf9f3084962a0c3adb"}, + {file = "fonttools-4.41.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:c890061915e95b619c1d3cc3c107c6fb021406b701c0c24b03e74830d522f210"}, + {file = "fonttools-4.41.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:b329ae7ce971b5c4148d6cdb8119c0ce4587265b2330d4f2f3776ef851bee020"}, + {file = "fonttools-4.41.0-cp310-cp310-win32.whl", hash = "sha256:bc9e7b1e268be7a23fc66471b615c324e99c5db39ce8c49dd6dd8e962c7bc1b8"}, + {file = "fonttools-4.41.0-cp310-cp310-win_amd64.whl", hash = "sha256:f3fe90dfb297bd8265238c06787911cd81c2cb89ac5b13e1c911928bdabfce0f"}, + {file = "fonttools-4.41.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:e38bd91eae257f36c2b7245c0278e9cd9d754f3a66b8d2b548c623ba66e387b6"}, + {file = "fonttools-4.41.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:415cf7c806a3f56fb280dadcf3c92c85c0415e75665ca957b4a2a2e39c17a5c9"}, + {file = "fonttools-4.41.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:381558eafffc1432d08ca58063e71c7376ecaae48e9318354a90a1049a644845"}, + {file = "fonttools-4.41.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ee75b8ca48f6c48af25e967dce995ef94e46872b35c7d454b983c62c9c7006d"}, + {file = "fonttools-4.41.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:d45f28c20bb67dee0f4a4caae709f40b0693d764b7b2bf2d58890f36b1bfcef0"}, + {file = "fonttools-4.41.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:5448a87f6ed57ed844b64a05d3792827af584a8584613f6289867f4e77eb603b"}, + {file = "fonttools-4.41.0-cp311-cp311-win32.whl", hash = "sha256:69dbe0154e15b68dd671441ea8f23dad87488b24a6e650d45958f4722819a443"}, + {file = "fonttools-4.41.0-cp311-cp311-win_amd64.whl", hash = "sha256:ea879afd1d6189fca02a85a7868560c9bb8415dccff6b7ae6d81e4f06b3ab30d"}, + {file = "fonttools-4.41.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:8f602dd5bcde7e4241419924f23c6f0d66723dd5408a58c3a2f781745c693f45"}, + {file = "fonttools-4.41.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:06eac087ea55b3ebb2207d93b5ac56c847163899f05f5a77e1910f688fe10030"}, + {file = "fonttools-4.41.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7e22d0144d735f6c7df770509b8c0c33414bf460df0d5dddc98a159e5dbb10eb"}, + {file = "fonttools-4.41.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:19d461c801b8904d201c6c38a99bfcfef673bfdfe0c7f026f582ef78896434e0"}, + {file = "fonttools-4.41.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:72d40a32d6443871ea0d147813caad58394b48729dfa4fbc45dcaac54f9506f2"}, + {file = "fonttools-4.41.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:0614b6348866092d00df3dfb37e037fc06412ca67087de361a2777ea5ed62c16"}, + {file = "fonttools-4.41.0-cp38-cp38-win32.whl", hash = "sha256:e43f6c7f9ba4f9d29edee530e45f9aa162872ec9549398b85971477a99f2a806"}, + {file = "fonttools-4.41.0-cp38-cp38-win_amd64.whl", hash = "sha256:eb9dfa87152bd97019adc387b2f29ef6af601de4386f36570ca537ace96d96ed"}, + {file = "fonttools-4.41.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:d2dae84a3d0f76884a6102c62f2795b2d6602c2c95cfcce74c8a590b6200e533"}, + {file = "fonttools-4.41.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:cc3324e4159e6d1f55c3615b4c1c211f87cc96cc0cc7c946c8447dc1319f2e9d"}, + {file = "fonttools-4.41.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4c654b1facf1f3b742e4d9b2dcdf0fa867b1f007b1b4981cc58a75ef5dca2a3c"}, + {file = "fonttools-4.41.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:560ea1a604c927399f36742abf342a4c5f3fee8e8e8a484b774dfe9630bd9a91"}, + {file = "fonttools-4.41.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:9387b09694fbf8ac7dcf887069068f81fb4124d05e09557ac7daabfbec1744bd"}, + {file = "fonttools-4.41.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:465d0f24bf4f75160f441793b55076b7a080a57d3a1f738390af2c20bee24fbb"}, + {file = "fonttools-4.41.0-cp39-cp39-win32.whl", hash = "sha256:841c491fa3e9c54e8f9cd5dae059e88f45e086aea090c28be9d42f59c8b99e01"}, + {file = "fonttools-4.41.0-cp39-cp39-win_amd64.whl", hash = "sha256:efd59e83223cb77952997fb850c7a7c2a958c9af0642060f536722c2a9e9d53b"}, + {file = "fonttools-4.41.0-py3-none-any.whl", hash = "sha256:5b1c2b21b40229166a864f2b0aec06d37f0a204066deb1734c93370e0c76339d"}, + {file = "fonttools-4.41.0.tar.gz", hash = "sha256:6faff25991dec48f8cac882055a09ae1a29fd15bc160bc3d663e789e994664c2"}, +] + +[package.extras] +all = ["brotli (>=1.0.1)", "brotlicffi (>=0.8.0)", "fs (>=2.2.0,<3)", "lxml (>=4.0,<5)", "lz4 (>=1.7.4.2)", "matplotlib", "munkres", "scipy", "skia-pathops (>=0.5.0)", "sympy", "uharfbuzz (>=0.23.0)", "unicodedata2 (>=15.0.0)", "xattr", "zopfli (>=0.1.4)"] +graphite = ["lz4 (>=1.7.4.2)"] +interpolatable = ["munkres", "scipy"] +lxml = ["lxml (>=4.0,<5)"] +pathops = ["skia-pathops (>=0.5.0)"] +plot = ["matplotlib"] +repacker = ["uharfbuzz (>=0.23.0)"] +symfont = ["sympy"] +type1 = ["xattr"] +ufo = ["fs (>=2.2.0,<3)"] +unicode = ["unicodedata2 (>=15.0.0)"] +woff = ["brotli (>=1.0.1)", "brotlicffi (>=0.8.0)", "zopfli (>=0.1.4)"] + [[package]] name = "freezegun" version = "1.2.2" @@ -379,13 +555,13 @@ python-dateutil = ">=2.7" [[package]] name = "fsspec" -version = "2023.5.0" +version = "2023.6.0" description = "File-system specification" optional = false python-versions = ">=3.8" files = [ - {file = "fsspec-2023.5.0-py3-none-any.whl", hash = "sha256:51a4ad01a5bb66fcc58036e288c0d53d3975a0df2a5dc59a93b59bade0391f2a"}, - {file = "fsspec-2023.5.0.tar.gz", hash = "sha256:b3b56e00fb93ea321bc9e5d9cf6f8522a0198b20eb24e02774d329e9c6fb84ce"}, + {file = "fsspec-2023.6.0-py3-none-any.whl", hash = "sha256:1cbad1faef3e391fba6dc005ae9b5bdcbf43005c9167ce78c915549c352c869a"}, + {file = "fsspec-2023.6.0.tar.gz", hash = "sha256:d0b2f935446169753e7a5c5c55681c54ea91996cc67be93c39a154fb3a2742af"}, ] [package.extras] @@ -562,6 +738,23 @@ files = [ {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"}, ] +[[package]] +name = "isort" +version = "5.12.0" +description = "A Python utility / library to sort Python imports." +optional = false +python-versions = ">=3.8.0" +files = [ + {file = "isort-5.12.0-py3-none-any.whl", hash = "sha256:f84c2818376e66cf843d497486ea8fed8700b340f308f076c6fb1229dff318b6"}, + {file = "isort-5.12.0.tar.gz", hash = "sha256:8bef7dde241278824a6d83f44a544709b065191b95b6e50894bdc722fcba0504"}, +] + +[package.extras] +colors = ["colorama (>=0.4.3)"] +pipfile-deprecated-finder = ["pip-shims (>=0.5.2)", "pipreqs", "requirementslib"] +plugins = ["setuptools"] +requirements-deprecated-finder = ["pip-api", "pipreqs"] + [[package]] name = "jinja2" version = "3.1.2" @@ -583,7 +776,7 @@ i18n = ["Babel (>=2.7)"] name = "kafka-python" version = "2.0.2" description = "Pure Python client for Apache Kafka" -optional = false +optional = true python-versions = "*" files = [ {file = "kafka-python-2.0.2.tar.gz", hash = "sha256:04dfe7fea2b63726cd6f3e79a2d86e709d608d74406638c5da33a01d45a9d7e3"}, @@ -593,74 +786,151 @@ files = [ [package.extras] crc32c = ["crc32c"] +[[package]] +name = "kiwisolver" +version = "1.4.4" +description = "A fast implementation of the Cassowary constraint solver" +optional = false +python-versions = ">=3.7" +files = [ + {file = "kiwisolver-1.4.4-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:2f5e60fabb7343a836360c4f0919b8cd0d6dbf08ad2ca6b9cf90bf0c76a3c4f6"}, + {file = "kiwisolver-1.4.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:10ee06759482c78bdb864f4109886dff7b8a56529bc1609d4f1112b93fe6423c"}, + {file = "kiwisolver-1.4.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c79ebe8f3676a4c6630fd3f777f3cfecf9289666c84e775a67d1d358578dc2e3"}, + {file = "kiwisolver-1.4.4-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:abbe9fa13da955feb8202e215c4018f4bb57469b1b78c7a4c5c7b93001699938"}, + {file = "kiwisolver-1.4.4-cp310-cp310-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:7577c1987baa3adc4b3c62c33bd1118c3ef5c8ddef36f0f2c950ae0b199e100d"}, + {file = "kiwisolver-1.4.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f8ad8285b01b0d4695102546b342b493b3ccc6781fc28c8c6a1bb63e95d22f09"}, + {file = "kiwisolver-1.4.4-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8ed58b8acf29798b036d347791141767ccf65eee7f26bde03a71c944449e53de"}, + {file = "kiwisolver-1.4.4-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a68b62a02953b9841730db7797422f983935aeefceb1679f0fc85cbfbd311c32"}, + {file = "kiwisolver-1.4.4-cp310-cp310-win32.whl", hash = "sha256:e92a513161077b53447160b9bd8f522edfbed4bd9759e4c18ab05d7ef7e49408"}, + {file = "kiwisolver-1.4.4-cp310-cp310-win_amd64.whl", hash = "sha256:3fe20f63c9ecee44560d0e7f116b3a747a5d7203376abeea292ab3152334d004"}, + {file = "kiwisolver-1.4.4-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:e0ea21f66820452a3f5d1655f8704a60d66ba1191359b96541eaf457710a5fc6"}, + {file = "kiwisolver-1.4.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:bc9db8a3efb3e403e4ecc6cd9489ea2bac94244f80c78e27c31dcc00d2790ac2"}, + {file = "kiwisolver-1.4.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:d5b61785a9ce44e5a4b880272baa7cf6c8f48a5180c3e81c59553ba0cb0821ca"}, + {file = "kiwisolver-1.4.4-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c2dbb44c3f7e6c4d3487b31037b1bdbf424d97687c1747ce4ff2895795c9bf69"}, + {file = "kiwisolver-1.4.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6295ecd49304dcf3bfbfa45d9a081c96509e95f4b9d0eb7ee4ec0530c4a96514"}, + {file = "kiwisolver-1.4.4-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4bd472dbe5e136f96a4b18f295d159d7f26fd399136f5b17b08c4e5f498cd494"}, + {file = "kiwisolver-1.4.4-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:bf7d9fce9bcc4752ca4a1b80aabd38f6d19009ea5cbda0e0856983cf6d0023f5"}, + {file = "kiwisolver-1.4.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:78d6601aed50c74e0ef02f4204da1816147a6d3fbdc8b3872d263338a9052c51"}, + {file = "kiwisolver-1.4.4-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:877272cf6b4b7e94c9614f9b10140e198d2186363728ed0f701c6eee1baec1da"}, + {file = "kiwisolver-1.4.4-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:db608a6757adabb32f1cfe6066e39b3706d8c3aa69bbc353a5b61edad36a5cb4"}, + {file = "kiwisolver-1.4.4-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:5853eb494c71e267912275e5586fe281444eb5e722de4e131cddf9d442615626"}, + {file = "kiwisolver-1.4.4-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:f0a1dbdb5ecbef0d34eb77e56fcb3e95bbd7e50835d9782a45df81cc46949750"}, + {file = "kiwisolver-1.4.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:283dffbf061a4ec60391d51e6155e372a1f7a4f5b15d59c8505339454f8989e4"}, + {file = "kiwisolver-1.4.4-cp311-cp311-win32.whl", hash = "sha256:d06adcfa62a4431d404c31216f0f8ac97397d799cd53800e9d3efc2fbb3cf14e"}, + {file = "kiwisolver-1.4.4-cp311-cp311-win_amd64.whl", hash = "sha256:e7da3fec7408813a7cebc9e4ec55afed2d0fd65c4754bc376bf03498d4e92686"}, + {file = "kiwisolver-1.4.4-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:62ac9cc684da4cf1778d07a89bf5f81b35834cb96ca523d3a7fb32509380cbf6"}, + {file = "kiwisolver-1.4.4-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:41dae968a94b1ef1897cb322b39360a0812661dba7c682aa45098eb8e193dbdf"}, + {file = "kiwisolver-1.4.4-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:02f79693ec433cb4b5f51694e8477ae83b3205768a6fb48ffba60549080e295b"}, + {file = "kiwisolver-1.4.4-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d0611a0a2a518464c05ddd5a3a1a0e856ccc10e67079bb17f265ad19ab3c7597"}, + {file = "kiwisolver-1.4.4-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:db5283d90da4174865d520e7366801a93777201e91e79bacbac6e6927cbceede"}, + {file = "kiwisolver-1.4.4-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:1041feb4cda8708ce73bb4dcb9ce1ccf49d553bf87c3954bdfa46f0c3f77252c"}, + {file = "kiwisolver-1.4.4-cp37-cp37m-win32.whl", hash = "sha256:a553dadda40fef6bfa1456dc4be49b113aa92c2a9a9e8711e955618cd69622e3"}, + {file = "kiwisolver-1.4.4-cp37-cp37m-win_amd64.whl", hash = "sha256:03baab2d6b4a54ddbb43bba1a3a2d1627e82d205c5cf8f4c924dc49284b87166"}, + {file = "kiwisolver-1.4.4-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:841293b17ad704d70c578f1f0013c890e219952169ce8a24ebc063eecf775454"}, + {file = "kiwisolver-1.4.4-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:f4f270de01dd3e129a72efad823da90cc4d6aafb64c410c9033aba70db9f1ff0"}, + {file = "kiwisolver-1.4.4-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:f9f39e2f049db33a908319cf46624a569b36983c7c78318e9726a4cb8923b26c"}, + {file = "kiwisolver-1.4.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c97528e64cb9ebeff9701e7938653a9951922f2a38bd847787d4a8e498cc83ae"}, + {file = "kiwisolver-1.4.4-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1d1573129aa0fd901076e2bfb4275a35f5b7aa60fbfb984499d661ec950320b0"}, + {file = "kiwisolver-1.4.4-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ad881edc7ccb9d65b0224f4e4d05a1e85cf62d73aab798943df6d48ab0cd79a1"}, + {file = "kiwisolver-1.4.4-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:b428ef021242344340460fa4c9185d0b1f66fbdbfecc6c63eff4b7c29fad429d"}, + {file = "kiwisolver-1.4.4-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:2e407cb4bd5a13984a6c2c0fe1845e4e41e96f183e5e5cd4d77a857d9693494c"}, + {file = "kiwisolver-1.4.4-cp38-cp38-win32.whl", hash = "sha256:75facbe9606748f43428fc91a43edb46c7ff68889b91fa31f53b58894503a191"}, + {file = "kiwisolver-1.4.4-cp38-cp38-win_amd64.whl", hash = "sha256:5bce61af018b0cb2055e0e72e7d65290d822d3feee430b7b8203d8a855e78766"}, + {file = "kiwisolver-1.4.4-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:8c808594c88a025d4e322d5bb549282c93c8e1ba71b790f539567932722d7bd8"}, + {file = "kiwisolver-1.4.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:f0a71d85ecdd570ded8ac3d1c0f480842f49a40beb423bb8014539a9f32a5897"}, + {file = "kiwisolver-1.4.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:b533558eae785e33e8c148a8d9921692a9fe5aa516efbdff8606e7d87b9d5824"}, + {file = "kiwisolver-1.4.4-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:efda5fc8cc1c61e4f639b8067d118e742b812c930f708e6667a5ce0d13499e29"}, + {file = "kiwisolver-1.4.4-cp39-cp39-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:7c43e1e1206cd421cd92e6b3280d4385d41d7166b3ed577ac20444b6995a445f"}, + {file = "kiwisolver-1.4.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bc8d3bd6c72b2dd9decf16ce70e20abcb3274ba01b4e1c96031e0c4067d1e7cd"}, + {file = "kiwisolver-1.4.4-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4ea39b0ccc4f5d803e3337dd46bcce60b702be4d86fd0b3d7531ef10fd99a1ac"}, + {file = "kiwisolver-1.4.4-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:968f44fdbf6dd757d12920d63b566eeb4d5b395fd2d00d29d7ef00a00582aac9"}, + {file = "kiwisolver-1.4.4-cp39-cp39-win32.whl", hash = "sha256:da7e547706e69e45d95e116e6939488d62174e033b763ab1496b4c29b76fabea"}, + {file = "kiwisolver-1.4.4-cp39-cp39-win_amd64.whl", hash = "sha256:ba59c92039ec0a66103b1d5fe588fa546373587a7d68f5c96f743c3396afc04b"}, + {file = "kiwisolver-1.4.4-pp37-pypy37_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:91672bacaa030f92fc2f43b620d7b337fd9a5af28b0d6ed3f77afc43c4a64b5a"}, + {file = "kiwisolver-1.4.4-pp37-pypy37_pp73-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:787518a6789009c159453da4d6b683f468ef7a65bbde796bcea803ccf191058d"}, + {file = "kiwisolver-1.4.4-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:da152d8cdcab0e56e4f45eb08b9aea6455845ec83172092f09b0e077ece2cf7a"}, + {file = "kiwisolver-1.4.4-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:ecb1fa0db7bf4cff9dac752abb19505a233c7f16684c5826d1f11ebd9472b871"}, + {file = "kiwisolver-1.4.4-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:28bc5b299f48150b5f822ce68624e445040595a4ac3d59251703779836eceff9"}, + {file = "kiwisolver-1.4.4-pp38-pypy38_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:81e38381b782cc7e1e46c4e14cd997ee6040768101aefc8fa3c24a4cc58e98f8"}, + {file = "kiwisolver-1.4.4-pp38-pypy38_pp73-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:2a66fdfb34e05b705620dd567f5a03f239a088d5a3f321e7b6ac3239d22aa286"}, + {file = "kiwisolver-1.4.4-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:872b8ca05c40d309ed13eb2e582cab0c5a05e81e987ab9c521bf05ad1d5cf5cb"}, + {file = "kiwisolver-1.4.4-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:70e7c2e7b750585569564e2e5ca9845acfaa5da56ac46df68414f29fea97be9f"}, + {file = "kiwisolver-1.4.4-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:9f85003f5dfa867e86d53fac6f7e6f30c045673fa27b603c397753bebadc3008"}, + {file = "kiwisolver-1.4.4-pp39-pypy39_pp73-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2e307eb9bd99801f82789b44bb45e9f541961831c7311521b13a6c85afc09767"}, + {file = "kiwisolver-1.4.4-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b1792d939ec70abe76f5054d3f36ed5656021dcad1322d1cc996d4e54165cef9"}, + {file = "kiwisolver-1.4.4-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f6cb459eea32a4e2cf18ba5fcece2dbdf496384413bc1bae15583f19e567f3b2"}, + {file = "kiwisolver-1.4.4-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:36dafec3d6d6088d34e2de6b85f9d8e2324eb734162fba59d2ba9ed7a2043d5b"}, + {file = "kiwisolver-1.4.4.tar.gz", hash = "sha256:d41997519fcba4a1e46eb4a2fe31bc12f0ff957b2b81bac28db24744f333e955"}, +] + [[package]] name = "markupsafe" -version = "2.1.2" +version = "2.1.3" description = "Safely add untrusted strings to HTML/XML markup." optional = false python-versions = ">=3.7" files = [ - {file = "MarkupSafe-2.1.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:665a36ae6f8f20a4676b53224e33d456a6f5a72657d9c83c2aa00765072f31f7"}, - {file = "MarkupSafe-2.1.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:340bea174e9761308703ae988e982005aedf427de816d1afe98147668cc03036"}, - {file = "MarkupSafe-2.1.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:22152d00bf4a9c7c83960521fc558f55a1adbc0631fbb00a9471e097b19d72e1"}, - {file = "MarkupSafe-2.1.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:28057e985dace2f478e042eaa15606c7efccb700797660629da387eb289b9323"}, - {file = "MarkupSafe-2.1.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ca244fa73f50a800cf8c3ebf7fd93149ec37f5cb9596aa8873ae2c1d23498601"}, - {file = "MarkupSafe-2.1.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:d9d971ec1e79906046aa3ca266de79eac42f1dbf3612a05dc9368125952bd1a1"}, - {file = "MarkupSafe-2.1.2-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:7e007132af78ea9df29495dbf7b5824cb71648d7133cf7848a2a5dd00d36f9ff"}, - {file = "MarkupSafe-2.1.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:7313ce6a199651c4ed9d7e4cfb4aa56fe923b1adf9af3b420ee14e6d9a73df65"}, - {file = "MarkupSafe-2.1.2-cp310-cp310-win32.whl", hash = "sha256:c4a549890a45f57f1ebf99c067a4ad0cb423a05544accaf2b065246827ed9603"}, - {file = "MarkupSafe-2.1.2-cp310-cp310-win_amd64.whl", hash = "sha256:835fb5e38fd89328e9c81067fd642b3593c33e1e17e2fdbf77f5676abb14a156"}, - {file = "MarkupSafe-2.1.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:2ec4f2d48ae59bbb9d1f9d7efb9236ab81429a764dedca114f5fdabbc3788013"}, - {file = "MarkupSafe-2.1.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:608e7073dfa9e38a85d38474c082d4281f4ce276ac0010224eaba11e929dd53a"}, - {file = "MarkupSafe-2.1.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:65608c35bfb8a76763f37036547f7adfd09270fbdbf96608be2bead319728fcd"}, - {file = "MarkupSafe-2.1.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f2bfb563d0211ce16b63c7cb9395d2c682a23187f54c3d79bfec33e6705473c6"}, - {file = "MarkupSafe-2.1.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:da25303d91526aac3672ee6d49a2f3db2d9502a4a60b55519feb1a4c7714e07d"}, - {file = "MarkupSafe-2.1.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:9cad97ab29dfc3f0249b483412c85c8ef4766d96cdf9dcf5a1e3caa3f3661cf1"}, - {file = "MarkupSafe-2.1.2-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:085fd3201e7b12809f9e6e9bc1e5c96a368c8523fad5afb02afe3c051ae4afcc"}, - {file = "MarkupSafe-2.1.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:1bea30e9bf331f3fef67e0a3877b2288593c98a21ccb2cf29b74c581a4eb3af0"}, - {file = "MarkupSafe-2.1.2-cp311-cp311-win32.whl", hash = "sha256:7df70907e00c970c60b9ef2938d894a9381f38e6b9db73c5be35e59d92e06625"}, - {file = "MarkupSafe-2.1.2-cp311-cp311-win_amd64.whl", hash = "sha256:e55e40ff0cc8cc5c07996915ad367fa47da6b3fc091fdadca7f5403239c5fec3"}, - {file = "MarkupSafe-2.1.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:a6e40afa7f45939ca356f348c8e23048e02cb109ced1eb8420961b2f40fb373a"}, - {file = "MarkupSafe-2.1.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cf877ab4ed6e302ec1d04952ca358b381a882fbd9d1b07cccbfd61783561f98a"}, - {file = "MarkupSafe-2.1.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:63ba06c9941e46fa389d389644e2d8225e0e3e5ebcc4ff1ea8506dce646f8c8a"}, - {file = "MarkupSafe-2.1.2-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f1cd098434e83e656abf198f103a8207a8187c0fc110306691a2e94a78d0abb2"}, - {file = "MarkupSafe-2.1.2-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:55f44b440d491028addb3b88f72207d71eeebfb7b5dbf0643f7c023ae1fba619"}, - {file = "MarkupSafe-2.1.2-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:a6f2fcca746e8d5910e18782f976489939d54a91f9411c32051b4aab2bd7c513"}, - {file = "MarkupSafe-2.1.2-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:0b462104ba25f1ac006fdab8b6a01ebbfbce9ed37fd37fd4acd70c67c973e460"}, - {file = "MarkupSafe-2.1.2-cp37-cp37m-win32.whl", hash = "sha256:7668b52e102d0ed87cb082380a7e2e1e78737ddecdde129acadb0eccc5423859"}, - {file = "MarkupSafe-2.1.2-cp37-cp37m-win_amd64.whl", hash = "sha256:6d6607f98fcf17e534162f0709aaad3ab7a96032723d8ac8750ffe17ae5a0666"}, - {file = "MarkupSafe-2.1.2-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:a806db027852538d2ad7555b203300173dd1b77ba116de92da9afbc3a3be3eed"}, - {file = "MarkupSafe-2.1.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:a4abaec6ca3ad8660690236d11bfe28dfd707778e2442b45addd2f086d6ef094"}, - {file = "MarkupSafe-2.1.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f03a532d7dee1bed20bc4884194a16160a2de9ffc6354b3878ec9682bb623c54"}, - {file = "MarkupSafe-2.1.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4cf06cdc1dda95223e9d2d3c58d3b178aa5dacb35ee7e3bbac10e4e1faacb419"}, - {file = "MarkupSafe-2.1.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:22731d79ed2eb25059ae3df1dfc9cb1546691cc41f4e3130fe6bfbc3ecbbecfa"}, - {file = "MarkupSafe-2.1.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:f8ffb705ffcf5ddd0e80b65ddf7bed7ee4f5a441ea7d3419e861a12eaf41af58"}, - {file = "MarkupSafe-2.1.2-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:8db032bf0ce9022a8e41a22598eefc802314e81b879ae093f36ce9ddf39ab1ba"}, - {file = "MarkupSafe-2.1.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:2298c859cfc5463f1b64bd55cb3e602528db6fa0f3cfd568d3605c50678f8f03"}, - {file = "MarkupSafe-2.1.2-cp38-cp38-win32.whl", hash = "sha256:50c42830a633fa0cf9e7d27664637532791bfc31c731a87b202d2d8ac40c3ea2"}, - {file = "MarkupSafe-2.1.2-cp38-cp38-win_amd64.whl", hash = "sha256:bb06feb762bade6bf3c8b844462274db0c76acc95c52abe8dbed28ae3d44a147"}, - {file = "MarkupSafe-2.1.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:99625a92da8229df6d44335e6fcc558a5037dd0a760e11d84be2260e6f37002f"}, - {file = "MarkupSafe-2.1.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:8bca7e26c1dd751236cfb0c6c72d4ad61d986e9a41bbf76cb445f69488b2a2bd"}, - {file = "MarkupSafe-2.1.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:40627dcf047dadb22cd25ea7ecfe9cbf3bbbad0482ee5920b582f3809c97654f"}, - {file = "MarkupSafe-2.1.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:40dfd3fefbef579ee058f139733ac336312663c6706d1163b82b3003fb1925c4"}, - {file = "MarkupSafe-2.1.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:090376d812fb6ac5f171e5938e82e7f2d7adc2b629101cec0db8b267815c85e2"}, - {file = "MarkupSafe-2.1.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:2e7821bffe00aa6bd07a23913b7f4e01328c3d5cc0b40b36c0bd81d362faeb65"}, - {file = "MarkupSafe-2.1.2-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:c0a33bc9f02c2b17c3ea382f91b4db0e6cde90b63b296422a939886a7a80de1c"}, - {file = "MarkupSafe-2.1.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:b8526c6d437855442cdd3d87eede9c425c4445ea011ca38d937db299382e6fa3"}, - {file = "MarkupSafe-2.1.2-cp39-cp39-win32.whl", hash = "sha256:137678c63c977754abe9086a3ec011e8fd985ab90631145dfb9294ad09c102a7"}, - {file = "MarkupSafe-2.1.2-cp39-cp39-win_amd64.whl", hash = "sha256:0576fe974b40a400449768941d5d0858cc624e3249dfd1e0c33674e5c7ca7aed"}, - {file = "MarkupSafe-2.1.2.tar.gz", hash = "sha256:abcabc8c2b26036d62d4c746381a6f7cf60aafcc653198ad678306986b09450d"}, + {file = "MarkupSafe-2.1.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:cd0f502fe016460680cd20aaa5a76d241d6f35a1c3350c474bac1273803893fa"}, + {file = "MarkupSafe-2.1.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e09031c87a1e51556fdcb46e5bd4f59dfb743061cf93c4d6831bf894f125eb57"}, + {file = "MarkupSafe-2.1.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:68e78619a61ecf91e76aa3e6e8e33fc4894a2bebe93410754bd28fce0a8a4f9f"}, + {file = "MarkupSafe-2.1.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:65c1a9bcdadc6c28eecee2c119465aebff8f7a584dd719facdd9e825ec61ab52"}, + {file = "MarkupSafe-2.1.3-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:525808b8019e36eb524b8c68acdd63a37e75714eac50e988180b169d64480a00"}, + {file = "MarkupSafe-2.1.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:962f82a3086483f5e5f64dbad880d31038b698494799b097bc59c2edf392fce6"}, + {file = "MarkupSafe-2.1.3-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:aa7bd130efab1c280bed0f45501b7c8795f9fdbeb02e965371bbef3523627779"}, + {file = "MarkupSafe-2.1.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:c9c804664ebe8f83a211cace637506669e7890fec1b4195b505c214e50dd4eb7"}, + {file = "MarkupSafe-2.1.3-cp310-cp310-win32.whl", hash = "sha256:10bbfe99883db80bdbaff2dcf681dfc6533a614f700da1287707e8a5d78a8431"}, + {file = "MarkupSafe-2.1.3-cp310-cp310-win_amd64.whl", hash = "sha256:1577735524cdad32f9f694208aa75e422adba74f1baee7551620e43a3141f559"}, + {file = "MarkupSafe-2.1.3-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:ad9e82fb8f09ade1c3e1b996a6337afac2b8b9e365f926f5a61aacc71adc5b3c"}, + {file = "MarkupSafe-2.1.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:3c0fae6c3be832a0a0473ac912810b2877c8cb9d76ca48de1ed31e1c68386575"}, + {file = "MarkupSafe-2.1.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b076b6226fb84157e3f7c971a47ff3a679d837cf338547532ab866c57930dbee"}, + {file = "MarkupSafe-2.1.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bfce63a9e7834b12b87c64d6b155fdd9b3b96191b6bd334bf37db7ff1fe457f2"}, + {file = "MarkupSafe-2.1.3-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:338ae27d6b8745585f87218a3f23f1512dbf52c26c28e322dbe54bcede54ccb9"}, + {file = "MarkupSafe-2.1.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:e4dd52d80b8c83fdce44e12478ad2e85c64ea965e75d66dbeafb0a3e77308fcc"}, + {file = "MarkupSafe-2.1.3-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:df0be2b576a7abbf737b1575f048c23fb1d769f267ec4358296f31c2479db8f9"}, + {file = "MarkupSafe-2.1.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:5bbe06f8eeafd38e5d0a4894ffec89378b6c6a625ff57e3028921f8ff59318ac"}, + {file = "MarkupSafe-2.1.3-cp311-cp311-win32.whl", hash = "sha256:dd15ff04ffd7e05ffcb7fe79f1b98041b8ea30ae9234aed2a9168b5797c3effb"}, + {file = "MarkupSafe-2.1.3-cp311-cp311-win_amd64.whl", hash = "sha256:134da1eca9ec0ae528110ccc9e48041e0828d79f24121a1a146161103c76e686"}, + {file = "MarkupSafe-2.1.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:8e254ae696c88d98da6555f5ace2279cf7cd5b3f52be2b5cf97feafe883b58d2"}, + {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cb0932dc158471523c9637e807d9bfb93e06a95cbf010f1a38b98623b929ef2b"}, + {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9402b03f1a1b4dc4c19845e5c749e3ab82d5078d16a2a4c2cd2df62d57bb0707"}, + {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ca379055a47383d02a5400cb0d110cef0a776fc644cda797db0c5696cfd7e18e"}, + {file = "MarkupSafe-2.1.3-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:b7ff0f54cb4ff66dd38bebd335a38e2c22c41a8ee45aa608efc890ac3e3931bc"}, + {file = "MarkupSafe-2.1.3-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:c011a4149cfbcf9f03994ec2edffcb8b1dc2d2aede7ca243746df97a5d41ce48"}, + {file = "MarkupSafe-2.1.3-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:56d9f2ecac662ca1611d183feb03a3fa4406469dafe241673d521dd5ae92a155"}, + {file = "MarkupSafe-2.1.3-cp37-cp37m-win32.whl", hash = "sha256:8758846a7e80910096950b67071243da3e5a20ed2546e6392603c096778d48e0"}, + {file = "MarkupSafe-2.1.3-cp37-cp37m-win_amd64.whl", hash = "sha256:787003c0ddb00500e49a10f2844fac87aa6ce977b90b0feaaf9de23c22508b24"}, + {file = "MarkupSafe-2.1.3-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:2ef12179d3a291be237280175b542c07a36e7f60718296278d8593d21ca937d4"}, + {file = "MarkupSafe-2.1.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:2c1b19b3aaacc6e57b7e25710ff571c24d6c3613a45e905b1fde04d691b98ee0"}, + {file = "MarkupSafe-2.1.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8afafd99945ead6e075b973fefa56379c5b5c53fd8937dad92c662da5d8fd5ee"}, + {file = "MarkupSafe-2.1.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8c41976a29d078bb235fea9b2ecd3da465df42a562910f9022f1a03107bd02be"}, + {file = "MarkupSafe-2.1.3-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d080e0a5eb2529460b30190fcfcc4199bd7f827663f858a226a81bc27beaa97e"}, + {file = "MarkupSafe-2.1.3-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:69c0f17e9f5a7afdf2cc9fb2d1ce6aabdb3bafb7f38017c0b77862bcec2bbad8"}, + {file = "MarkupSafe-2.1.3-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:504b320cd4b7eff6f968eddf81127112db685e81f7e36e75f9f84f0df46041c3"}, + {file = "MarkupSafe-2.1.3-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:42de32b22b6b804f42c5d98be4f7e5e977ecdd9ee9b660fda1a3edf03b11792d"}, + {file = "MarkupSafe-2.1.3-cp38-cp38-win32.whl", hash = "sha256:ceb01949af7121f9fc39f7d27f91be8546f3fb112c608bc4029aef0bab86a2a5"}, + {file = "MarkupSafe-2.1.3-cp38-cp38-win_amd64.whl", hash = "sha256:1b40069d487e7edb2676d3fbdb2b0829ffa2cd63a2ec26c4938b2d34391b4ecc"}, + {file = "MarkupSafe-2.1.3-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:8023faf4e01efadfa183e863fefde0046de576c6f14659e8782065bcece22198"}, + {file = "MarkupSafe-2.1.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:6b2b56950d93e41f33b4223ead100ea0fe11f8e6ee5f641eb753ce4b77a7042b"}, + {file = "MarkupSafe-2.1.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9dcdfd0eaf283af041973bff14a2e143b8bd64e069f4c383416ecd79a81aab58"}, + {file = "MarkupSafe-2.1.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:05fb21170423db021895e1ea1e1f3ab3adb85d1c2333cbc2310f2a26bc77272e"}, + {file = "MarkupSafe-2.1.3-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:282c2cb35b5b673bbcadb33a585408104df04f14b2d9b01d4c345a3b92861c2c"}, + {file = "MarkupSafe-2.1.3-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:ab4a0df41e7c16a1392727727e7998a467472d0ad65f3ad5e6e765015df08636"}, + {file = "MarkupSafe-2.1.3-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:7ef3cb2ebbf91e330e3bb937efada0edd9003683db6b57bb108c4001f37a02ea"}, + {file = "MarkupSafe-2.1.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:0a4e4a1aff6c7ac4cd55792abf96c915634c2b97e3cc1c7129578aa68ebd754e"}, + {file = "MarkupSafe-2.1.3-cp39-cp39-win32.whl", hash = "sha256:fec21693218efe39aa7f8599346e90c705afa52c5b31ae019b2e57e8f6542bb2"}, + {file = "MarkupSafe-2.1.3-cp39-cp39-win_amd64.whl", hash = "sha256:3fd4abcb888d15a94f32b75d8fd18ee162ca0c064f35b11134be77050296d6ba"}, + {file = "MarkupSafe-2.1.3.tar.gz", hash = "sha256:af598ed32d6ae86f1b747b82783958b1a4ab8f617b06fe68795c7f026abbdcad"}, ] [[package]] name = "mashumaro" -version = "3.7" +version = "3.8.1" description = "Fast serialization library on top of dataclasses" optional = false python-versions = ">=3.7" files = [ - {file = "mashumaro-3.7-py3-none-any.whl", hash = "sha256:bfffa6fa137c02cf5487e7161915968c534f2e01ce50a89c57ae1046b08134ee"}, - {file = "mashumaro-3.7.tar.gz", hash = "sha256:c3add6a2f09b524f4512cddbcd3cd5bb99eb29bd1114dd29889e0f9db124b830"}, + {file = "mashumaro-3.8.1-py3-none-any.whl", hash = "sha256:e060469a4bec1c86f8145ea27ecd99027ea3e343075a4efcb5e8a969a45b9fb9"}, + {file = "mashumaro-3.8.1.tar.gz", hash = "sha256:8bae8b25e2287b75094655b8ba8635f34016c09ca16498188f2f3b198d88b7ef"}, ] [package.dependencies] @@ -674,59 +944,70 @@ yaml = ["pyyaml (>=3.13)"] [[package]] name = "matplotlib" -version = "3.7.1" +version = "3.7.2" description = "Python plotting package" optional = false python-versions = ">=3.8" files = [ - {file = "matplotlib-3.7.1-cp310-cp310-macosx_10_12_universal2.whl", hash = "sha256:95cbc13c1fc6844ab8812a525bbc237fa1470863ff3dace7352e910519e194b1"}, - {file = "matplotlib-3.7.1-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:08308bae9e91aca1ec6fd6dda66237eef9f6294ddb17f0d0b3c863169bf82353"}, - {file = "matplotlib-3.7.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:544764ba51900da4639c0f983b323d288f94f65f4024dc40ecb1542d74dc0500"}, - {file = "matplotlib-3.7.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:56d94989191de3fcc4e002f93f7f1be5da476385dde410ddafbb70686acf00ea"}, - {file = "matplotlib-3.7.1-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e99bc9e65901bb9a7ce5e7bb24af03675cbd7c70b30ac670aa263240635999a4"}, - {file = "matplotlib-3.7.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eb7d248c34a341cd4c31a06fd34d64306624c8cd8d0def7abb08792a5abfd556"}, - {file = "matplotlib-3.7.1-cp310-cp310-win32.whl", hash = "sha256:ce463ce590f3825b52e9fe5c19a3c6a69fd7675a39d589e8b5fbe772272b3a24"}, - {file = "matplotlib-3.7.1-cp310-cp310-win_amd64.whl", hash = "sha256:3d7bc90727351fb841e4d8ae620d2d86d8ed92b50473cd2b42ce9186104ecbba"}, - {file = "matplotlib-3.7.1-cp311-cp311-macosx_10_12_universal2.whl", hash = "sha256:770a205966d641627fd5cf9d3cb4b6280a716522cd36b8b284a8eb1581310f61"}, - {file = "matplotlib-3.7.1-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:f67bfdb83a8232cb7a92b869f9355d677bce24485c460b19d01970b64b2ed476"}, - {file = "matplotlib-3.7.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2bf092f9210e105f414a043b92af583c98f50050559616930d884387d0772aba"}, - {file = "matplotlib-3.7.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:89768d84187f31717349c6bfadc0e0d8c321e8eb34522acec8a67b1236a66332"}, - {file = "matplotlib-3.7.1-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:83111e6388dec67822e2534e13b243cc644c7494a4bb60584edbff91585a83c6"}, - {file = "matplotlib-3.7.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a867bf73a7eb808ef2afbca03bcdb785dae09595fbe550e1bab0cd023eba3de0"}, - {file = "matplotlib-3.7.1-cp311-cp311-win32.whl", hash = "sha256:fbdeeb58c0cf0595efe89c05c224e0a502d1aa6a8696e68a73c3efc6bc354304"}, - {file = "matplotlib-3.7.1-cp311-cp311-win_amd64.whl", hash = "sha256:c0bd19c72ae53e6ab979f0ac6a3fafceb02d2ecafa023c5cca47acd934d10be7"}, - {file = "matplotlib-3.7.1-cp38-cp38-macosx_10_12_universal2.whl", hash = "sha256:6eb88d87cb2c49af00d3bbc33a003f89fd9f78d318848da029383bfc08ecfbfb"}, - {file = "matplotlib-3.7.1-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:cf0e4f727534b7b1457898c4f4ae838af1ef87c359b76dcd5330fa31893a3ac7"}, - {file = "matplotlib-3.7.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:46a561d23b91f30bccfd25429c3c706afe7d73a5cc64ef2dfaf2b2ac47c1a5dc"}, - {file = "matplotlib-3.7.1-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:8704726d33e9aa8a6d5215044b8d00804561971163563e6e6591f9dcf64340cc"}, - {file = "matplotlib-3.7.1-cp38-cp38-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:4cf327e98ecf08fcbb82685acaf1939d3338548620ab8dfa02828706402c34de"}, - {file = "matplotlib-3.7.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:617f14ae9d53292ece33f45cba8503494ee199a75b44de7717964f70637a36aa"}, - {file = "matplotlib-3.7.1-cp38-cp38-win32.whl", hash = "sha256:7c9a4b2da6fac77bcc41b1ea95fadb314e92508bf5493ceff058e727e7ecf5b0"}, - {file = "matplotlib-3.7.1-cp38-cp38-win_amd64.whl", hash = "sha256:14645aad967684e92fc349493fa10c08a6da514b3d03a5931a1bac26e6792bd1"}, - {file = "matplotlib-3.7.1-cp39-cp39-macosx_10_12_universal2.whl", hash = "sha256:81a6b377ea444336538638d31fdb39af6be1a043ca5e343fe18d0f17e098770b"}, - {file = "matplotlib-3.7.1-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:28506a03bd7f3fe59cd3cd4ceb2a8d8a2b1db41afede01f66c42561b9be7b4b7"}, - {file = "matplotlib-3.7.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:8c587963b85ce41e0a8af53b9b2de8dddbf5ece4c34553f7bd9d066148dc719c"}, - {file = "matplotlib-3.7.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8bf26ade3ff0f27668989d98c8435ce9327d24cffb7f07d24ef609e33d582439"}, - {file = "matplotlib-3.7.1-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:def58098f96a05f90af7e92fd127d21a287068202aa43b2a93476170ebd99e87"}, - {file = "matplotlib-3.7.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f883a22a56a84dba3b588696a2b8a1ab0d2c3d41be53264115c71b0a942d8fdb"}, - {file = "matplotlib-3.7.1-cp39-cp39-win32.whl", hash = "sha256:4f99e1b234c30c1e9714610eb0c6d2f11809c9c78c984a613ae539ea2ad2eb4b"}, - {file = "matplotlib-3.7.1-cp39-cp39-win_amd64.whl", hash = "sha256:3ba2af245e36990facf67fde840a760128ddd71210b2ab6406e640188d69d136"}, - {file = "matplotlib-3.7.1-pp38-pypy38_pp73-macosx_10_12_x86_64.whl", hash = "sha256:3032884084f541163f295db8a6536e0abb0db464008fadca6c98aaf84ccf4717"}, - {file = "matplotlib-3.7.1-pp38-pypy38_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3a2cb34336110e0ed8bb4f650e817eed61fa064acbefeb3591f1b33e3a84fd96"}, - {file = "matplotlib-3.7.1-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b867e2f952ed592237a1828f027d332d8ee219ad722345b79a001f49df0936eb"}, - {file = "matplotlib-3.7.1-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:57bfb8c8ea253be947ccb2bc2d1bb3862c2bccc662ad1b4626e1f5e004557042"}, - {file = "matplotlib-3.7.1-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:438196cdf5dc8d39b50a45cb6e3f6274edbcf2254f85fa9b895bf85851c3a613"}, - {file = "matplotlib-3.7.1-pp39-pypy39_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:21e9cff1a58d42e74d01153360de92b326708fb205250150018a52c70f43c290"}, - {file = "matplotlib-3.7.1-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:75d4725d70b7c03e082bbb8a34639ede17f333d7247f56caceb3801cb6ff703d"}, - {file = "matplotlib-3.7.1-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:97cc368a7268141afb5690760921765ed34867ffb9655dd325ed207af85c7529"}, - {file = "matplotlib-3.7.1.tar.gz", hash = "sha256:7b73305f25eab4541bd7ee0b96d87e53ae9c9f1823be5659b806cd85786fe882"}, + {file = "matplotlib-3.7.2-cp310-cp310-macosx_10_12_universal2.whl", hash = "sha256:2699f7e73a76d4c110f4f25be9d2496d6ab4f17345307738557d345f099e07de"}, + {file = "matplotlib-3.7.2-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:a8035ba590658bae7562786c9cc6ea1a84aa49d3afab157e414c9e2ea74f496d"}, + {file = "matplotlib-3.7.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:2f8e4a49493add46ad4a8c92f63e19d548b2b6ebbed75c6b4c7f46f57d36cdd1"}, + {file = "matplotlib-3.7.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:71667eb2ccca4c3537d9414b1bc00554cb7f91527c17ee4ec38027201f8f1603"}, + {file = "matplotlib-3.7.2-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:152ee0b569a37630d8628534c628456b28686e085d51394da6b71ef84c4da201"}, + {file = "matplotlib-3.7.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:070f8dddd1f5939e60aacb8fa08f19551f4b0140fab16a3669d5cd6e9cb28fc8"}, + {file = "matplotlib-3.7.2-cp310-cp310-win32.whl", hash = "sha256:fdbb46fad4fb47443b5b8ac76904b2e7a66556844f33370861b4788db0f8816a"}, + {file = "matplotlib-3.7.2-cp310-cp310-win_amd64.whl", hash = "sha256:23fb1750934e5f0128f9423db27c474aa32534cec21f7b2153262b066a581fd1"}, + {file = "matplotlib-3.7.2-cp311-cp311-macosx_10_12_universal2.whl", hash = "sha256:30e1409b857aa8a747c5d4f85f63a79e479835f8dffc52992ac1f3f25837b544"}, + {file = "matplotlib-3.7.2-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:50e0a55ec74bf2d7a0ebf50ac580a209582c2dd0f7ab51bc270f1b4a0027454e"}, + {file = "matplotlib-3.7.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ac60daa1dc83e8821eed155796b0f7888b6b916cf61d620a4ddd8200ac70cd64"}, + {file = "matplotlib-3.7.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:305e3da477dc8607336ba10bac96986d6308d614706cae2efe7d3ffa60465b24"}, + {file = "matplotlib-3.7.2-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1c308b255efb9b06b23874236ec0f10f026673ad6515f602027cc8ac7805352d"}, + {file = "matplotlib-3.7.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:60c521e21031632aa0d87ca5ba0c1c05f3daacadb34c093585a0be6780f698e4"}, + {file = "matplotlib-3.7.2-cp311-cp311-win32.whl", hash = "sha256:26bede320d77e469fdf1bde212de0ec889169b04f7f1179b8930d66f82b30cbc"}, + {file = "matplotlib-3.7.2-cp311-cp311-win_amd64.whl", hash = "sha256:af4860132c8c05261a5f5f8467f1b269bf1c7c23902d75f2be57c4a7f2394b3e"}, + {file = "matplotlib-3.7.2-cp38-cp38-macosx_10_12_universal2.whl", hash = "sha256:a1733b8e84e7e40a9853e505fe68cc54339f97273bdfe6f3ed980095f769ddc7"}, + {file = "matplotlib-3.7.2-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:d9881356dc48e58910c53af82b57183879129fa30492be69058c5b0d9fddf391"}, + {file = "matplotlib-3.7.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:f081c03f413f59390a80b3e351cc2b2ea0205839714dbc364519bcf51f4b56ca"}, + {file = "matplotlib-3.7.2-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:1cd120fca3407a225168238b790bd5c528f0fafde6172b140a2f3ab7a4ea63e9"}, + {file = "matplotlib-3.7.2-cp38-cp38-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:a2c1590b90aa7bd741b54c62b78de05d4186271e34e2377e0289d943b3522273"}, + {file = "matplotlib-3.7.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6d2ff3c984b8a569bc1383cd468fc06b70d7b59d5c2854ca39f1436ae8394117"}, + {file = "matplotlib-3.7.2-cp38-cp38-win32.whl", hash = "sha256:5dea00b62d28654b71ca92463656d80646675628d0828e08a5f3b57e12869e13"}, + {file = "matplotlib-3.7.2-cp38-cp38-win_amd64.whl", hash = "sha256:0f506a1776ee94f9e131af1ac6efa6e5bc7cb606a3e389b0ccb6e657f60bb676"}, + {file = "matplotlib-3.7.2-cp39-cp39-macosx_10_12_universal2.whl", hash = "sha256:6515e878f91894c2e4340d81f0911857998ccaf04dbc1bba781e3d89cbf70608"}, + {file = "matplotlib-3.7.2-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:71f7a8c6b124e904db550f5b9fe483d28b896d4135e45c4ea381ad3b8a0e3256"}, + {file = "matplotlib-3.7.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:12f01b92ecd518e0697da4d97d163b2b3aa55eb3eb4e2c98235b3396d7dad55f"}, + {file = "matplotlib-3.7.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a7e28d6396563955f7af437894a36bf2b279462239a41028323e04b85179058b"}, + {file = "matplotlib-3.7.2-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:dbcf59334ff645e6a67cd5f78b4b2cdb76384cdf587fa0d2dc85f634a72e1a3e"}, + {file = "matplotlib-3.7.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:318c89edde72ff95d8df67d82aca03861240512994a597a435a1011ba18dbc7f"}, + {file = "matplotlib-3.7.2-cp39-cp39-win32.whl", hash = "sha256:ce55289d5659b5b12b3db4dc9b7075b70cef5631e56530f14b2945e8836f2d20"}, + {file = "matplotlib-3.7.2-cp39-cp39-win_amd64.whl", hash = "sha256:2ecb5be2b2815431c81dc115667e33da0f5a1bcf6143980d180d09a717c4a12e"}, + {file = "matplotlib-3.7.2-pp38-pypy38_pp73-macosx_10_12_x86_64.whl", hash = "sha256:fdcd28360dbb6203fb5219b1a5658df226ac9bebc2542a9e8f457de959d713d0"}, + {file = "matplotlib-3.7.2-pp38-pypy38_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0c3cca3e842b11b55b52c6fb8bd6a4088693829acbfcdb3e815fa9b7d5c92c1b"}, + {file = "matplotlib-3.7.2-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ebf577c7a6744e9e1bd3fee45fc74a02710b214f94e2bde344912d85e0c9af7c"}, + {file = "matplotlib-3.7.2-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:936bba394682049919dda062d33435b3be211dc3dcaa011e09634f060ec878b2"}, + {file = "matplotlib-3.7.2-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:bc221ffbc2150458b1cd71cdd9ddd5bb37962b036e41b8be258280b5b01da1dd"}, + {file = "matplotlib-3.7.2-pp39-pypy39_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:35d74ebdb3f71f112b36c2629cf32323adfbf42679e2751252acd468f5001c07"}, + {file = "matplotlib-3.7.2-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:717157e61b3a71d3d26ad4e1770dc85156c9af435659a25ee6407dc866cb258d"}, + {file = "matplotlib-3.7.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:20f844d6be031948148ba49605c8b96dfe7d3711d1b63592830d650622458c11"}, + {file = "matplotlib-3.7.2.tar.gz", hash = "sha256:a8cdb91dddb04436bd2f098b8fdf4b81352e68cf4d2c6756fcc414791076569b"}, ] +[package.dependencies] +contourpy = ">=1.0.1" +cycler = ">=0.10" +fonttools = ">=4.22.0" +kiwisolver = ">=1.0.1" +numpy = ">=1.20" +packaging = ">=20.0" +pillow = ">=6.2.0" +pyparsing = ">=2.3.1,<3.1" +python-dateutil = ">=2.7" + [[package]] name = "mypy-extensions" version = "1.0.0" description = "Type system extensions for programs checked with the mypy type checker." -optional = true +optional = false python-versions = ">=3.5" files = [ {file = "mypy_extensions-1.0.0-py3-none-any.whl", hash = "sha256:4392f6c0eb8a5668a69e23d168ffa70f0be9ccfd32b5cc2d26a34ae5b844552d"}, @@ -746,39 +1027,36 @@ files = [ [[package]] name = "numpy" -version = "1.24.3" +version = "1.25.1" description = "Fundamental package for array computing in Python" optional = false -python-versions = ">=3.8" -files = [ - {file = "numpy-1.24.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:3c1104d3c036fb81ab923f507536daedc718d0ad5a8707c6061cdfd6d184e570"}, - {file = "numpy-1.24.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:202de8f38fc4a45a3eea4b63e2f376e5f2dc64ef0fa692838e31a808520efaf7"}, - {file = "numpy-1.24.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8535303847b89aa6b0f00aa1dc62867b5a32923e4d1681a35b5eef2d9591a463"}, - {file = "numpy-1.24.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2d926b52ba1367f9acb76b0df6ed21f0b16a1ad87c6720a1121674e5cf63e2b6"}, - {file = "numpy-1.24.3-cp310-cp310-win32.whl", hash = "sha256:f21c442fdd2805e91799fbe044a7b999b8571bb0ab0f7850d0cb9641a687092b"}, - {file = "numpy-1.24.3-cp310-cp310-win_amd64.whl", hash = "sha256:ab5f23af8c16022663a652d3b25dcdc272ac3f83c3af4c02eb8b824e6b3ab9d7"}, - {file = "numpy-1.24.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:9a7721ec204d3a237225db3e194c25268faf92e19338a35f3a224469cb6039a3"}, - {file = "numpy-1.24.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:d6cc757de514c00b24ae8cf5c876af2a7c3df189028d68c0cb4eaa9cd5afc2bf"}, - {file = "numpy-1.24.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:76e3f4e85fc5d4fd311f6e9b794d0c00e7002ec122be271f2019d63376f1d385"}, - {file = "numpy-1.24.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a1d3c026f57ceaad42f8231305d4653d5f05dc6332a730ae5c0bea3513de0950"}, - {file = "numpy-1.24.3-cp311-cp311-win32.whl", hash = "sha256:c91c4afd8abc3908e00a44b2672718905b8611503f7ff87390cc0ac3423fb096"}, - {file = "numpy-1.24.3-cp311-cp311-win_amd64.whl", hash = "sha256:5342cf6aad47943286afa6f1609cad9b4266a05e7f2ec408e2cf7aea7ff69d80"}, - {file = "numpy-1.24.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:7776ea65423ca6a15255ba1872d82d207bd1e09f6d0894ee4a64678dd2204078"}, - {file = "numpy-1.24.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:ae8d0be48d1b6ed82588934aaaa179875e7dc4f3d84da18d7eae6eb3f06c242c"}, - {file = "numpy-1.24.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ecde0f8adef7dfdec993fd54b0f78183051b6580f606111a6d789cd14c61ea0c"}, - {file = "numpy-1.24.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4749e053a29364d3452c034827102ee100986903263e89884922ef01a0a6fd2f"}, - {file = "numpy-1.24.3-cp38-cp38-win32.whl", hash = "sha256:d933fabd8f6a319e8530d0de4fcc2e6a61917e0b0c271fded460032db42a0fe4"}, - {file = "numpy-1.24.3-cp38-cp38-win_amd64.whl", hash = "sha256:56e48aec79ae238f6e4395886b5eaed058abb7231fb3361ddd7bfdf4eed54289"}, - {file = "numpy-1.24.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:4719d5aefb5189f50887773699eaf94e7d1e02bf36c1a9d353d9f46703758ca4"}, - {file = "numpy-1.24.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:0ec87a7084caa559c36e0a2309e4ecb1baa03b687201d0a847c8b0ed476a7187"}, - {file = "numpy-1.24.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ea8282b9bcfe2b5e7d491d0bf7f3e2da29700cec05b49e64d6246923329f2b02"}, - {file = "numpy-1.24.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:210461d87fb02a84ef243cac5e814aad2b7f4be953b32cb53327bb49fd77fbb4"}, - {file = "numpy-1.24.3-cp39-cp39-win32.whl", hash = "sha256:784c6da1a07818491b0ffd63c6bbe5a33deaa0e25a20e1b3ea20cf0e43f8046c"}, - {file = "numpy-1.24.3-cp39-cp39-win_amd64.whl", hash = "sha256:d5036197ecae68d7f491fcdb4df90082b0d4960ca6599ba2659957aafced7c17"}, - {file = "numpy-1.24.3-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:352ee00c7f8387b44d19f4cada524586f07379c0d49270f87233983bc5087ca0"}, - {file = "numpy-1.24.3-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1a7d6acc2e7524c9955e5c903160aa4ea083736fde7e91276b0e5d98e6332812"}, - {file = "numpy-1.24.3-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:35400e6a8d102fd07c71ed7dcadd9eb62ee9a6e84ec159bd48c28235bbb0f8e4"}, - {file = "numpy-1.24.3.tar.gz", hash = "sha256:ab344f1bf21f140adab8e47fdbc7c35a477dc01408791f8ba00d018dd0bc5155"}, +python-versions = ">=3.9" +files = [ + {file = "numpy-1.25.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:77d339465dff3eb33c701430bcb9c325b60354698340229e1dff97745e6b3efa"}, + {file = "numpy-1.25.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:d736b75c3f2cb96843a5c7f8d8ccc414768d34b0a75f466c05f3a739b406f10b"}, + {file = "numpy-1.25.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4a90725800caeaa160732d6b31f3f843ebd45d6b5f3eec9e8cc287e30f2805bf"}, + {file = "numpy-1.25.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c6c9261d21e617c6dc5eacba35cb68ec36bb72adcff0dee63f8fbc899362588"}, + {file = "numpy-1.25.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:0def91f8af6ec4bb94c370e38c575855bf1d0be8a8fbfba42ef9c073faf2cf19"}, + {file = "numpy-1.25.1-cp310-cp310-win32.whl", hash = "sha256:fd67b306320dcadea700a8f79b9e671e607f8696e98ec255915c0c6d6b818503"}, + {file = "numpy-1.25.1-cp310-cp310-win_amd64.whl", hash = "sha256:c1516db588987450b85595586605742879e50dcce923e8973f79529651545b57"}, + {file = "numpy-1.25.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6b82655dd8efeea69dbf85d00fca40013d7f503212bc5259056244961268b66e"}, + {file = "numpy-1.25.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e8f6049c4878cb16960fbbfb22105e49d13d752d4d8371b55110941fb3b17800"}, + {file = "numpy-1.25.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:41a56b70e8139884eccb2f733c2f7378af06c82304959e174f8e7370af112e09"}, + {file = "numpy-1.25.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d5154b1a25ec796b1aee12ac1b22f414f94752c5f94832f14d8d6c9ac40bcca6"}, + {file = "numpy-1.25.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:38eb6548bb91c421261b4805dc44def9ca1a6eef6444ce35ad1669c0f1a3fc5d"}, + {file = "numpy-1.25.1-cp311-cp311-win32.whl", hash = "sha256:791f409064d0a69dd20579345d852c59822c6aa087f23b07b1b4e28ff5880fcb"}, + {file = "numpy-1.25.1-cp311-cp311-win_amd64.whl", hash = "sha256:c40571fe966393b212689aa17e32ed905924120737194b5d5c1b20b9ed0fb171"}, + {file = "numpy-1.25.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:3d7abcdd85aea3e6cdddb59af2350c7ab1ed764397f8eec97a038ad244d2d105"}, + {file = "numpy-1.25.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:1a180429394f81c7933634ae49b37b472d343cccb5bb0c4a575ac8bbc433722f"}, + {file = "numpy-1.25.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d412c1697c3853c6fc3cb9751b4915859c7afe6a277c2bf00acf287d56c4e625"}, + {file = "numpy-1.25.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:20e1266411120a4f16fad8efa8e0454d21d00b8c7cee5b5ccad7565d95eb42dd"}, + {file = "numpy-1.25.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:f76aebc3358ade9eacf9bc2bb8ae589863a4f911611694103af05346637df1b7"}, + {file = "numpy-1.25.1-cp39-cp39-win32.whl", hash = "sha256:247d3ffdd7775bdf191f848be8d49100495114c82c2bd134e8d5d075fb386a1c"}, + {file = "numpy-1.25.1-cp39-cp39-win_amd64.whl", hash = "sha256:1d5d3c68e443c90b38fdf8ef40e60e2538a27548b39b12b73132456847f4b631"}, + {file = "numpy-1.25.1-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:35a9527c977b924042170a0887de727cd84ff179e478481404c5dc66b4170009"}, + {file = "numpy-1.25.1-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0d3fe3dd0506a28493d82dc3cf254be8cd0d26f4008a417385cbf1ae95b54004"}, + {file = "numpy-1.25.1-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:012097b5b0d00a11070e8f2e261128c44157a8689f7dedcf35576e525893f4fe"}, + {file = "numpy-1.25.1.tar.gz", hash = "sha256:9a3a9f3a61480cc086117b426a8bd86869c213fc4072e606f01c4e4b66eb92bf"}, ] [[package]] @@ -872,11 +1150,22 @@ mypy = ["pandas-stubs (<=1.4.3.220807)"] pyspark = ["pyspark (>=3.2.0)"] strategies = ["hypothesis (>=5.41.1)"] +[[package]] +name = "pathspec" +version = "0.11.1" +description = "Utility library for gitignore style pattern matching of file paths." +optional = false +python-versions = ">=3.7" +files = [ + {file = "pathspec-0.11.1-py3-none-any.whl", hash = "sha256:d8af70af76652554bd134c22b3e8a1cc46ed7d91edcdd721ef1a0c51a84a5293"}, + {file = "pathspec-0.11.1.tar.gz", hash = "sha256:2798de800fa92780e33acca925945e9a19a133b715067cf165b8866c15a31687"}, +] + [[package]] name = "pillow" version = "9.5.0" description = "Python Imaging Library (Fork)" -optional = true +optional = false python-versions = ">=3.7" files = [ {file = "Pillow-9.5.0-cp310-cp310-macosx_10_10_x86_64.whl", hash = "sha256:ace6ca218308447b9077c14ea4ef381ba0b67ee78d64046b3f19cf4e1139ad16"}, @@ -951,15 +1240,30 @@ files = [ docs = ["furo", "olefile", "sphinx (>=2.4)", "sphinx-copybutton", "sphinx-inline-tabs", "sphinx-removed-in", "sphinxext-opengraph"] tests = ["check-manifest", "coverage", "defusedxml", "markdown2", "olefile", "packaging", "pyroma", "pytest", "pytest-cov", "pytest-timeout"] +[[package]] +name = "platformdirs" +version = "3.9.1" +description = "A small Python package for determining appropriate platform-specific dirs, e.g. a \"user data dir\"." +optional = false +python-versions = ">=3.7" +files = [ + {file = "platformdirs-3.9.1-py3-none-any.whl", hash = "sha256:ad8291ae0ae5072f66c16945166cb11c63394c7a3ad1b1bc9828ca3162da8c2f"}, + {file = "platformdirs-3.9.1.tar.gz", hash = "sha256:1b42b450ad933e981d56e59f1b97495428c9bd60698baab9f3eb3d00d5822421"}, +] + +[package.extras] +docs = ["furo (>=2023.5.20)", "proselint (>=0.13)", "sphinx (>=7.0.1)", "sphinx-autodoc-typehints (>=1.23,!=1.23.4)"] +test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=7.3.1)", "pytest-cov (>=4.1)", "pytest-mock (>=3.10)"] + [[package]] name = "pluggy" -version = "1.0.0" +version = "1.2.0" description = "plugin and hook calling mechanisms for python" optional = false -python-versions = ">=3.6" +python-versions = ">=3.7" files = [ - {file = "pluggy-1.0.0-py2.py3-none-any.whl", hash = "sha256:74134bbf457f031a36d68416e1509f34bd5ccc019f0bcc952c7b909d06b37bd3"}, - {file = "pluggy-1.0.0.tar.gz", hash = "sha256:4224373bacce55f955a878bf9cfa763c1e360858e330072059e10bad68531159"}, + {file = "pluggy-1.2.0-py3-none-any.whl", hash = "sha256:c2fd55a7d7a3863cba1a013e4e2414658b1d07b6bc57b3919e0c63c9abb99849"}, + {file = "pluggy-1.2.0.tar.gz", hash = "sha256:d12f0c4b579b15f5e054301bb226ee85eeeba08ffec228092f8defbaa3a4c4b3"}, ] [package.extras] @@ -1008,6 +1312,25 @@ timezone = ["backports.zoneinfo", "tzdata"] xlsx2csv = ["xlsx2csv (>=0.8.0)"] xlsxwriter = ["xlsxwriter"] +[[package]] +name = "printf-log-formatter" +version = "0.3.0" +description = "Convert logger f-strings and str.format syntax to printf-style strings" +optional = false +python-versions = ">=3.7" +files = [ + {file = "printf_log_formatter-0.3.0-py3-none-macosx_10_7_x86_64.whl", hash = "sha256:a41bfba0d49dc16675ac9c180c3380903622a4f48d6a537c72394054ed513a35"}, + {file = "printf_log_formatter-0.3.0-py3-none-macosx_11_0_arm64.whl", hash = "sha256:c6bb875f2e7ede7467ee59a8f73125cb6361d9b599b8bcf8f9e1f2ca487519ae"}, + {file = "printf_log_formatter-0.3.0-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:300af2cdeafde7e4f0b631d41e82ceb7322a290b8e04c4e4bb35f8334ae9c7e5"}, + {file = "printf_log_formatter-0.3.0-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:b51bfb73707f47b0dddc215a4660d088be918a250f5040683b724d2a2a9ad4d6"}, + {file = "printf_log_formatter-0.3.0-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b7099fa053a3eae2e9594f18712458b90d8008bc5f942f67bb18f9bc351001cd"}, + {file = "printf_log_formatter-0.3.0-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d17c0e555ed68249071675f8a86310b251fb84711a1928bfef5d0d14ece2f410"}, + {file = "printf_log_formatter-0.3.0-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:af414b2e2d5b78504ace2d5b76b8aaae0afbc894314b56be170d8a052ba81388"}, + {file = "printf_log_formatter-0.3.0-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:97d675c90d84b87ecaa48d867b937876d46c66b0d480bf2bae373738fe8a3d17"}, + {file = "printf_log_formatter-0.3.0-py3-none-win32.whl", hash = "sha256:d2fc29f8776121c1477100287c285d4798d310d0547f4573ad3eae23b66549cc"}, + {file = "printf_log_formatter-0.3.0-py3-none-win_amd64.whl", hash = "sha256:27c3f053166bfe74e490661e8fe47167b12b8ca4d3066ba13f1ab53d1c7c6788"}, +] + [[package]] name = "prometheus-client" version = "0.16.0" @@ -1037,38 +1360,60 @@ files = [ fastapi = ">=0.38.1,<1.0.0" prometheus-client = ">=0.8.0,<1.0.0" +[[package]] +name = "psycopg2" +version = "2.9.6" +description = "psycopg2 - Python-PostgreSQL Database Adapter" +optional = false +python-versions = ">=3.6" +files = [ + {file = "psycopg2-2.9.6-cp310-cp310-win32.whl", hash = "sha256:f7a7a5ee78ba7dc74265ba69e010ae89dae635eea0e97b055fb641a01a31d2b1"}, + {file = "psycopg2-2.9.6-cp310-cp310-win_amd64.whl", hash = "sha256:f75001a1cbbe523e00b0ef896a5a1ada2da93ccd752b7636db5a99bc57c44494"}, + {file = "psycopg2-2.9.6-cp311-cp311-win32.whl", hash = "sha256:53f4ad0a3988f983e9b49a5d9765d663bbe84f508ed655affdb810af9d0972ad"}, + {file = "psycopg2-2.9.6-cp311-cp311-win_amd64.whl", hash = "sha256:b81fcb9ecfc584f661b71c889edeae70bae30d3ef74fa0ca388ecda50b1222b7"}, + {file = "psycopg2-2.9.6-cp36-cp36m-win32.whl", hash = "sha256:11aca705ec888e4f4cea97289a0bf0f22a067a32614f6ef64fcf7b8bfbc53744"}, + {file = "psycopg2-2.9.6-cp36-cp36m-win_amd64.whl", hash = "sha256:36c941a767341d11549c0fbdbb2bf5be2eda4caf87f65dfcd7d146828bd27f39"}, + {file = "psycopg2-2.9.6-cp37-cp37m-win32.whl", hash = "sha256:869776630c04f335d4124f120b7fb377fe44b0a7645ab3c34b4ba42516951889"}, + {file = "psycopg2-2.9.6-cp37-cp37m-win_amd64.whl", hash = "sha256:a8ad4a47f42aa6aec8d061fdae21eaed8d864d4bb0f0cade5ad32ca16fcd6258"}, + {file = "psycopg2-2.9.6-cp38-cp38-win32.whl", hash = "sha256:2362ee4d07ac85ff0ad93e22c693d0f37ff63e28f0615a16b6635a645f4b9214"}, + {file = "psycopg2-2.9.6-cp38-cp38-win_amd64.whl", hash = "sha256:d24ead3716a7d093b90b27b3d73459fe8cd90fd7065cf43b3c40966221d8c394"}, + {file = "psycopg2-2.9.6-cp39-cp39-win32.whl", hash = "sha256:1861a53a6a0fd248e42ea37c957d36950da00266378746588eab4f4b5649e95f"}, + {file = "psycopg2-2.9.6-cp39-cp39-win_amd64.whl", hash = "sha256:ded2faa2e6dfb430af7713d87ab4abbfc764d8d7fb73eafe96a24155f906ebf5"}, + {file = "psycopg2-2.9.6.tar.gz", hash = "sha256:f15158418fd826831b28585e2ab48ed8df2d0d98f502a2b4fe619e7d5ca29011"}, +] + [[package]] name = "pyarrow" -version = "12.0.0" +version = "12.0.1" description = "Python library for Apache Arrow" optional = false python-versions = ">=3.7" files = [ - {file = "pyarrow-12.0.0-cp310-cp310-macosx_10_14_x86_64.whl", hash = "sha256:3b97649c8a9a09e1d8dc76513054f1331bd9ece78ee39365e6bf6bc7503c1e94"}, - {file = "pyarrow-12.0.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:bc4ea634dacb03936f50fcf59574a8e727f90c17c24527e488d8ceb52ae284de"}, - {file = "pyarrow-12.0.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1d568acfca3faa565d663e53ee34173be8e23a95f78f2abfdad198010ec8f745"}, - {file = "pyarrow-12.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1b50bb9a82dca38a002d7cbd802a16b1af0f8c50ed2ec94a319f5f2afc047ee9"}, - {file = "pyarrow-12.0.0-cp310-cp310-win_amd64.whl", hash = "sha256:3d1733b1ea086b3c101427d0e57e2be3eb964686e83c2363862a887bb5c41fa8"}, - {file = "pyarrow-12.0.0-cp311-cp311-macosx_10_14_x86_64.whl", hash = "sha256:a7cd32fe77f967fe08228bc100433273020e58dd6caced12627bcc0a7675a513"}, - {file = "pyarrow-12.0.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:92fb031e6777847f5c9b01eaa5aa0c9033e853ee80117dce895f116d8b0c3ca3"}, - {file = "pyarrow-12.0.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:280289ebfd4ac3570f6b776515baa01e4dcbf17122c401e4b7170a27c4be63fd"}, - {file = "pyarrow-12.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:272f147d4f8387bec95f17bb58dcfc7bc7278bb93e01cb7b08a0e93a8921e18e"}, - {file = "pyarrow-12.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:0846ace49998825eda4722f8d7f83fa05601c832549c9087ea49d6d5397d8cec"}, - {file = "pyarrow-12.0.0-cp37-cp37m-macosx_10_14_x86_64.whl", hash = "sha256:993287136369aca60005ee7d64130f9466489c4f7425f5c284315b0a5401ccd9"}, - {file = "pyarrow-12.0.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7a7b6a765ee4f88efd7d8348d9a1f804487d60799d0428b6ddf3344eaef37282"}, - {file = "pyarrow-12.0.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a1c4fce253d5bdc8d62f11cfa3da5b0b34b562c04ce84abb8bd7447e63c2b327"}, - {file = "pyarrow-12.0.0-cp37-cp37m-win_amd64.whl", hash = "sha256:e6be4d85707fc8e7a221c8ab86a40449ce62559ce25c94321df7c8500245888f"}, - {file = "pyarrow-12.0.0-cp38-cp38-macosx_10_14_x86_64.whl", hash = "sha256:ea830d9f66bfb82d30b5794642f83dd0e4a718846462d22328981e9eb149cba8"}, - {file = "pyarrow-12.0.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:7b5b9f60d9ef756db59bec8d90e4576b7df57861e6a3d6a8bf99538f68ca15b3"}, - {file = "pyarrow-12.0.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b99e559d27db36ad3a33868a475f03e3129430fc065accc839ef4daa12c6dab6"}, - {file = "pyarrow-12.0.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5b0810864a593b89877120972d1f7af1d1c9389876dbed92b962ed81492d3ffc"}, - {file = "pyarrow-12.0.0-cp38-cp38-win_amd64.whl", hash = "sha256:23a77d97f4d101ddfe81b9c2ee03a177f0e590a7e68af15eafa06e8f3cf05976"}, - {file = "pyarrow-12.0.0-cp39-cp39-macosx_10_14_x86_64.whl", hash = "sha256:2cc63e746221cddb9001f7281dee95fd658085dd5b717b076950e1ccc607059c"}, - {file = "pyarrow-12.0.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:d8c26912607e26c2991826bbaf3cf2b9c8c3e17566598c193b492f058b40d3a4"}, - {file = "pyarrow-12.0.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0d8b90efc290e99a81d06015f3a46601c259ecc81ffb6d8ce288c91bd1b868c9"}, - {file = "pyarrow-12.0.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2466be046b81863be24db370dffd30a2e7894b4f9823fb60ef0a733c31ac6256"}, - {file = "pyarrow-12.0.0-cp39-cp39-win_amd64.whl", hash = "sha256:0e36425b1c1cbf5447718b3f1751bf86c58f2b3ad299f996cd9b1aa040967656"}, - {file = "pyarrow-12.0.0.tar.gz", hash = "sha256:19c812d303610ab5d664b7b1de4051ae23565f9f94d04cbea9e50569746ae1ee"}, + {file = "pyarrow-12.0.1-cp310-cp310-macosx_10_14_x86_64.whl", hash = "sha256:6d288029a94a9bb5407ceebdd7110ba398a00412c5b0155ee9813a40d246c5df"}, + {file = "pyarrow-12.0.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:345e1828efdbd9aa4d4de7d5676778aba384a2c3add896d995b23d368e60e5af"}, + {file = "pyarrow-12.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8d6009fdf8986332b2169314da482baed47ac053311c8934ac6651e614deacd6"}, + {file = "pyarrow-12.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2d3c4cbbf81e6dd23fe921bc91dc4619ea3b79bc58ef10bce0f49bdafb103daf"}, + {file = "pyarrow-12.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:cdacf515ec276709ac8042c7d9bd5be83b4f5f39c6c037a17a60d7ebfd92c890"}, + {file = "pyarrow-12.0.1-cp311-cp311-macosx_10_14_x86_64.whl", hash = "sha256:749be7fd2ff260683f9cc739cb862fb11be376de965a2a8ccbf2693b098db6c7"}, + {file = "pyarrow-12.0.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:6895b5fb74289d055c43db3af0de6e16b07586c45763cb5e558d38b86a91e3a7"}, + {file = "pyarrow-12.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1887bdae17ec3b4c046fcf19951e71b6a619f39fa674f9881216173566c8f718"}, + {file = "pyarrow-12.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e2c9cb8eeabbadf5fcfc3d1ddea616c7ce893db2ce4dcef0ac13b099ad7ca082"}, + {file = "pyarrow-12.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:ce4aebdf412bd0eeb800d8e47db854f9f9f7e2f5a0220440acf219ddfddd4f63"}, + {file = "pyarrow-12.0.1-cp37-cp37m-macosx_10_14_x86_64.whl", hash = "sha256:e0d8730c7f6e893f6db5d5b86eda42c0a130842d101992b581e2138e4d5663d3"}, + {file = "pyarrow-12.0.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:43364daec02f69fec89d2315f7fbfbeec956e0d991cbbef471681bd77875c40f"}, + {file = "pyarrow-12.0.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:051f9f5ccf585f12d7de836e50965b3c235542cc896959320d9776ab93f3b33d"}, + {file = "pyarrow-12.0.1-cp37-cp37m-win_amd64.whl", hash = "sha256:be2757e9275875d2a9c6e6052ac7957fbbfc7bc7370e4a036a9b893e96fedaba"}, + {file = "pyarrow-12.0.1-cp38-cp38-macosx_10_14_x86_64.whl", hash = "sha256:cf812306d66f40f69e684300f7af5111c11f6e0d89d6b733e05a3de44961529d"}, + {file = "pyarrow-12.0.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:459a1c0ed2d68671188b2118c63bac91eaef6fc150c77ddd8a583e3c795737bf"}, + {file = "pyarrow-12.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:85e705e33eaf666bbe508a16fd5ba27ca061e177916b7a317ba5a51bee43384c"}, + {file = "pyarrow-12.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9120c3eb2b1f6f516a3b7a9714ed860882d9ef98c4b17edcdc91d95b7528db60"}, + {file = "pyarrow-12.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:c780f4dc40460015d80fcd6a6140de80b615349ed68ef9adb653fe351778c9b3"}, + {file = "pyarrow-12.0.1-cp39-cp39-macosx_10_14_x86_64.whl", hash = "sha256:a3c63124fc26bf5f95f508f5d04e1ece8cc23a8b0af2a1e6ab2b1ec3fdc91b24"}, + {file = "pyarrow-12.0.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:b13329f79fa4472324f8d32dc1b1216616d09bd1e77cfb13104dec5463632c36"}, + {file = "pyarrow-12.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bb656150d3d12ec1396f6dde542db1675a95c0cc8366d507347b0beed96e87ca"}, + {file = "pyarrow-12.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6251e38470da97a5b2e00de5c6a049149f7b2bd62f12fa5dbb9ac674119ba71a"}, + {file = "pyarrow-12.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:3de26da901216149ce086920547dfff5cd22818c9eab67ebc41e863a5883bac7"}, + {file = "pyarrow-12.0.1.tar.gz", hash = "sha256:cce317fc96e5b71107bf1f9f184d5e54e2bd14bbf3f9a3d62819961f0af86fec"}, ] [package.dependencies] @@ -1087,47 +1432,47 @@ files = [ [[package]] name = "pydantic" -version = "1.10.8" +version = "1.10.11" description = "Data validation and settings management using python type hints" optional = false python-versions = ">=3.7" files = [ - {file = "pydantic-1.10.8-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:1243d28e9b05003a89d72e7915fdb26ffd1d39bdd39b00b7dbe4afae4b557f9d"}, - {file = "pydantic-1.10.8-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c0ab53b609c11dfc0c060d94335993cc2b95b2150e25583bec37a49b2d6c6c3f"}, - {file = "pydantic-1.10.8-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f9613fadad06b4f3bc5db2653ce2f22e0de84a7c6c293909b48f6ed37b83c61f"}, - {file = "pydantic-1.10.8-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:df7800cb1984d8f6e249351139667a8c50a379009271ee6236138a22a0c0f319"}, - {file = "pydantic-1.10.8-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:0c6fafa0965b539d7aab0a673a046466d23b86e4b0e8019d25fd53f4df62c277"}, - {file = "pydantic-1.10.8-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:e82d4566fcd527eae8b244fa952d99f2ca3172b7e97add0b43e2d97ee77f81ab"}, - {file = "pydantic-1.10.8-cp310-cp310-win_amd64.whl", hash = "sha256:ab523c31e22943713d80d8d342d23b6f6ac4b792a1e54064a8d0cf78fd64e800"}, - {file = "pydantic-1.10.8-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:666bdf6066bf6dbc107b30d034615d2627e2121506c555f73f90b54a463d1f33"}, - {file = "pydantic-1.10.8-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:35db5301b82e8661fa9c505c800d0990bc14e9f36f98932bb1d248c0ac5cada5"}, - {file = "pydantic-1.10.8-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f90c1e29f447557e9e26afb1c4dbf8768a10cc676e3781b6a577841ade126b85"}, - {file = "pydantic-1.10.8-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:93e766b4a8226e0708ef243e843105bf124e21331694367f95f4e3b4a92bbb3f"}, - {file = "pydantic-1.10.8-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:88f195f582851e8db960b4a94c3e3ad25692c1c1539e2552f3df7a9e972ef60e"}, - {file = "pydantic-1.10.8-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:34d327c81e68a1ecb52fe9c8d50c8a9b3e90d3c8ad991bfc8f953fb477d42fb4"}, - {file = "pydantic-1.10.8-cp311-cp311-win_amd64.whl", hash = "sha256:d532bf00f381bd6bc62cabc7d1372096b75a33bc197a312b03f5838b4fb84edd"}, - {file = "pydantic-1.10.8-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:7d5b8641c24886d764a74ec541d2fc2c7fb19f6da2a4001e6d580ba4a38f7878"}, - {file = "pydantic-1.10.8-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7b1f6cb446470b7ddf86c2e57cd119a24959af2b01e552f60705910663af09a4"}, - {file = "pydantic-1.10.8-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c33b60054b2136aef8cf190cd4c52a3daa20b2263917c49adad20eaf381e823b"}, - {file = "pydantic-1.10.8-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:1952526ba40b220b912cdc43c1c32bcf4a58e3f192fa313ee665916b26befb68"}, - {file = "pydantic-1.10.8-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:bb14388ec45a7a0dc429e87def6396f9e73c8c77818c927b6a60706603d5f2ea"}, - {file = "pydantic-1.10.8-cp37-cp37m-win_amd64.whl", hash = "sha256:16f8c3e33af1e9bb16c7a91fc7d5fa9fe27298e9f299cff6cb744d89d573d62c"}, - {file = "pydantic-1.10.8-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1ced8375969673929809d7f36ad322934c35de4af3b5e5b09ec967c21f9f7887"}, - {file = "pydantic-1.10.8-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:93e6bcfccbd831894a6a434b0aeb1947f9e70b7468f274154d03d71fabb1d7c6"}, - {file = "pydantic-1.10.8-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:191ba419b605f897ede9892f6c56fb182f40a15d309ef0142212200a10af4c18"}, - {file = "pydantic-1.10.8-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:052d8654cb65174d6f9490cc9b9a200083a82cf5c3c5d3985db765757eb3b375"}, - {file = "pydantic-1.10.8-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:ceb6a23bf1ba4b837d0cfe378329ad3f351b5897c8d4914ce95b85fba96da5a1"}, - {file = "pydantic-1.10.8-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:6f2e754d5566f050954727c77f094e01793bcb5725b663bf628fa6743a5a9108"}, - {file = "pydantic-1.10.8-cp38-cp38-win_amd64.whl", hash = "sha256:6a82d6cda82258efca32b40040228ecf43a548671cb174a1e81477195ed3ed56"}, - {file = "pydantic-1.10.8-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:3e59417ba8a17265e632af99cc5f35ec309de5980c440c255ab1ca3ae96a3e0e"}, - {file = "pydantic-1.10.8-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:84d80219c3f8d4cad44575e18404099c76851bc924ce5ab1c4c8bb5e2a2227d0"}, - {file = "pydantic-1.10.8-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2e4148e635994d57d834be1182a44bdb07dd867fa3c2d1b37002000646cc5459"}, - {file = "pydantic-1.10.8-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:12f7b0bf8553e310e530e9f3a2f5734c68699f42218bf3568ef49cd9b0e44df4"}, - {file = "pydantic-1.10.8-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:42aa0c4b5c3025483240a25b09f3c09a189481ddda2ea3a831a9d25f444e03c1"}, - {file = "pydantic-1.10.8-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:17aef11cc1b997f9d574b91909fed40761e13fac438d72b81f902226a69dac01"}, - {file = "pydantic-1.10.8-cp39-cp39-win_amd64.whl", hash = "sha256:66a703d1983c675a6e0fed8953b0971c44dba48a929a2000a493c3772eb61a5a"}, - {file = "pydantic-1.10.8-py3-none-any.whl", hash = "sha256:7456eb22ed9aaa24ff3e7b4757da20d9e5ce2a81018c1b3ebd81a0b88a18f3b2"}, - {file = "pydantic-1.10.8.tar.gz", hash = "sha256:1410275520dfa70effadf4c21811d755e7ef9bb1f1d077a21958153a92c8d9ca"}, + {file = "pydantic-1.10.11-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:ff44c5e89315b15ff1f7fdaf9853770b810936d6b01a7bcecaa227d2f8fe444f"}, + {file = "pydantic-1.10.11-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:a6c098d4ab5e2d5b3984d3cb2527e2d6099d3de85630c8934efcfdc348a9760e"}, + {file = "pydantic-1.10.11-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:16928fdc9cb273c6af00d9d5045434c39afba5f42325fb990add2c241402d151"}, + {file = "pydantic-1.10.11-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0588788a9a85f3e5e9ebca14211a496409cb3deca5b6971ff37c556d581854e7"}, + {file = "pydantic-1.10.11-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:e9baf78b31da2dc3d3f346ef18e58ec5f12f5aaa17ac517e2ffd026a92a87588"}, + {file = "pydantic-1.10.11-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:373c0840f5c2b5b1ccadd9286782852b901055998136287828731868027a724f"}, + {file = "pydantic-1.10.11-cp310-cp310-win_amd64.whl", hash = "sha256:c3339a46bbe6013ef7bdd2844679bfe500347ac5742cd4019a88312aa58a9847"}, + {file = "pydantic-1.10.11-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:08a6c32e1c3809fbc49debb96bf833164f3438b3696abf0fbeceb417d123e6eb"}, + {file = "pydantic-1.10.11-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a451ccab49971af043ec4e0d207cbc8cbe53dbf148ef9f19599024076fe9c25b"}, + {file = "pydantic-1.10.11-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5b02d24f7b2b365fed586ed73582c20f353a4c50e4be9ba2c57ab96f8091ddae"}, + {file = "pydantic-1.10.11-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3f34739a89260dfa420aa3cbd069fbcc794b25bbe5c0a214f8fb29e363484b66"}, + {file = "pydantic-1.10.11-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:e297897eb4bebde985f72a46a7552a7556a3dd11e7f76acda0c1093e3dbcf216"}, + {file = "pydantic-1.10.11-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:d185819a7a059550ecb85d5134e7d40f2565f3dd94cfd870132c5f91a89cf58c"}, + {file = "pydantic-1.10.11-cp311-cp311-win_amd64.whl", hash = "sha256:4400015f15c9b464c9db2d5d951b6a780102cfa5870f2c036d37c23b56f7fc1b"}, + {file = "pydantic-1.10.11-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:2417de68290434461a266271fc57274a138510dca19982336639484c73a07af6"}, + {file = "pydantic-1.10.11-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:331c031ba1554b974c98679bd0780d89670d6fd6f53f5d70b10bdc9addee1713"}, + {file = "pydantic-1.10.11-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8268a735a14c308923e8958363e3a3404f6834bb98c11f5ab43251a4e410170c"}, + {file = "pydantic-1.10.11-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:44e51ba599c3ef227e168424e220cd3e544288c57829520dc90ea9cb190c3248"}, + {file = "pydantic-1.10.11-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:d7781f1d13b19700b7949c5a639c764a077cbbdd4322ed505b449d3ca8edcb36"}, + {file = "pydantic-1.10.11-cp37-cp37m-win_amd64.whl", hash = "sha256:7522a7666157aa22b812ce14c827574ddccc94f361237ca6ea8bb0d5c38f1629"}, + {file = "pydantic-1.10.11-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:bc64eab9b19cd794a380179ac0e6752335e9555d214cfcb755820333c0784cb3"}, + {file = "pydantic-1.10.11-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:8dc77064471780262b6a68fe67e013298d130414d5aaf9b562c33987dbd2cf4f"}, + {file = "pydantic-1.10.11-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fe429898f2c9dd209bd0632a606bddc06f8bce081bbd03d1c775a45886e2c1cb"}, + {file = "pydantic-1.10.11-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:192c608ad002a748e4a0bed2ddbcd98f9b56df50a7c24d9a931a8c5dd053bd3d"}, + {file = "pydantic-1.10.11-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:ef55392ec4bb5721f4ded1096241e4b7151ba6d50a50a80a2526c854f42e6a2f"}, + {file = "pydantic-1.10.11-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:41e0bb6efe86281623abbeeb0be64eab740c865388ee934cd3e6a358784aca6e"}, + {file = "pydantic-1.10.11-cp38-cp38-win_amd64.whl", hash = "sha256:265a60da42f9f27e0b1014eab8acd3e53bd0bad5c5b4884e98a55f8f596b2c19"}, + {file = "pydantic-1.10.11-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:469adf96c8e2c2bbfa655fc7735a2a82f4c543d9fee97bd113a7fb509bf5e622"}, + {file = "pydantic-1.10.11-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:e6cbfbd010b14c8a905a7b10f9fe090068d1744d46f9e0c021db28daeb8b6de1"}, + {file = "pydantic-1.10.11-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:abade85268cc92dff86d6effcd917893130f0ff516f3d637f50dadc22ae93999"}, + {file = "pydantic-1.10.11-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e9738b0f2e6c70f44ee0de53f2089d6002b10c33264abee07bdb5c7f03038303"}, + {file = "pydantic-1.10.11-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:787cf23e5a0cde753f2eabac1b2e73ae3844eb873fd1f5bdbff3048d8dbb7604"}, + {file = "pydantic-1.10.11-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:174899023337b9fc685ac8adaa7b047050616136ccd30e9070627c1aaab53a13"}, + {file = "pydantic-1.10.11-cp39-cp39-win_amd64.whl", hash = "sha256:1954f8778489a04b245a1e7b8b22a9d3ea8ef49337285693cf6959e4b757535e"}, + {file = "pydantic-1.10.11-py3-none-any.whl", hash = "sha256:008c5e266c8aada206d0627a011504e14268a62091450210eda7c07fabe6963e"}, + {file = "pydantic-1.10.11.tar.gz", hash = "sha256:f66d479cf7eb331372c470614be6511eae96f1f120344c25f3f9bb59fb1b5528"}, ] [package.dependencies] @@ -1137,15 +1482,29 @@ typing-extensions = ">=4.2.0" dotenv = ["python-dotenv (>=0.10.4)"] email = ["email-validator (>=1.0.3)"] +[[package]] +name = "pyparsing" +version = "3.0.9" +description = "pyparsing module - Classes and methods to define and execute parsing grammars" +optional = false +python-versions = ">=3.6.8" +files = [ + {file = "pyparsing-3.0.9-py3-none-any.whl", hash = "sha256:5026bae9a10eeaefb61dab2f09052b9f4307d44aee4eda64b309723d8d206bbc"}, + {file = "pyparsing-3.0.9.tar.gz", hash = "sha256:2b020ecf7d21b687f219b71ecad3631f644a47f01403fa1d1036b0c6416d70fb"}, +] + +[package.extras] +diagrams = ["jinja2", "railroad-diagrams"] + [[package]] name = "pytest" -version = "7.3.1" +version = "7.4.0" description = "pytest: simple powerful testing with Python" optional = false python-versions = ">=3.7" files = [ - {file = "pytest-7.3.1-py3-none-any.whl", hash = "sha256:3799fa815351fea3a5e96ac7e503a96fa51cc9942c3753cda7651b93c1cfa362"}, - {file = "pytest-7.3.1.tar.gz", hash = "sha256:434afafd78b1d78ed0addf160ad2b77a30d35d4bdf8af234fe621919d9ed15e3"}, + {file = "pytest-7.4.0-py3-none-any.whl", hash = "sha256:78bf16451a2eb8c7a2ea98e32dc119fd2aa758f1d5d66dbf0a59d69a3969df32"}, + {file = "pytest-7.4.0.tar.gz", hash = "sha256:b4bf8c45bd59934ed84001ad51e11b4ee40d40a1229d2c79f9c592b0a3f6bd8a"}, ] [package.dependencies] @@ -1157,7 +1516,7 @@ pluggy = ">=0.12,<2.0" tomli = {version = ">=1.0.0", markers = "python_version < \"3.11\""} [package.extras] -testing = ["argcomplete", "attrs (>=19.2.0)", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "xmlschema"] +testing = ["argcomplete", "attrs (>=19.2.0)", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"] [[package]] name = "pytest-asyncio" @@ -1179,13 +1538,13 @@ testing = ["coverage (>=6.2)", "flaky (>=3.5.0)", "hypothesis (>=5.7.1)", "mypy [[package]] name = "pytest-mock" -version = "3.10.0" +version = "3.11.1" description = "Thin-wrapper around the mock package for easier use with pytest" optional = false python-versions = ">=3.7" files = [ - {file = "pytest-mock-3.10.0.tar.gz", hash = "sha256:fbbdb085ef7c252a326fd8cdcac0aa3b1333d8811f131bdcc701002e1be7ed4f"}, - {file = "pytest_mock-3.10.0-py3-none-any.whl", hash = "sha256:f4c973eeae0282963eb293eb173ce91b091a79c1334455acfac9ddee8a1c784b"}, + {file = "pytest-mock-3.11.1.tar.gz", hash = "sha256:7f6b125602ac6d743e523ae0bfa71e1a697a2f5534064528c6ff84c2f7c2fc7f"}, + {file = "pytest_mock-3.11.1-py3-none-any.whl", hash = "sha256:21c279fff83d70763b05f8874cc9cfb3fcacd6d354247a976f9529d19f9acf39"}, ] [package.dependencies] @@ -1235,13 +1594,13 @@ files = [ [[package]] name = "redis" -version = "4.5.5" +version = "4.6.0" description = "Python client for Redis database and key-value store" optional = false python-versions = ">=3.7" files = [ - {file = "redis-4.5.5-py3-none-any.whl", hash = "sha256:77929bc7f5dab9adf3acba2d3bb7d7658f1e0c2f1cafe7eb36434e751c471119"}, - {file = "redis-4.5.5.tar.gz", hash = "sha256:dc87a0bdef6c8bfe1ef1e1c40be7034390c2ae02d92dcd0c7ca1729443899880"}, + {file = "redis-4.6.0-py3-none-any.whl", hash = "sha256:e2b03db868160ee4591de3cb90d40ebb50a90dd302138775937f6a42b7ed183c"}, + {file = "redis-4.6.0.tar.gz", hash = "sha256:585dc516b9eb042a619ef0a39c3d7d55fe81bdb4df09a52c9cdde0d07bf1aa7d"}, ] [package.dependencies] @@ -1303,52 +1662,52 @@ files = [ [[package]] name = "sqlalchemy" -version = "2.0.15" +version = "2.0.19" description = "Database Abstraction Library" optional = false python-versions = ">=3.7" files = [ - {file = "SQLAlchemy-2.0.15-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:78303719c6f72af97814b0072ad18bee72e70adca8d95cf8fecd59c5e1ddb040"}, - {file = "SQLAlchemy-2.0.15-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9d810b4aacd5ef4e293aa4ea01f19fca53999e9edcfc4a8ef1146238b30bdc28"}, - {file = "SQLAlchemy-2.0.15-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3fb5d09f1d51480f711b69fe28ad42e4f8b08600a85ab2473baee669e1257800"}, - {file = "SQLAlchemy-2.0.15-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:51b19887c96d405599880da6a7cbdf8545a7e78ec5683e46a43bac8885e32d0f"}, - {file = "SQLAlchemy-2.0.15-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:d6b17cb86908e7f88be14007d6afe7d2ab11966e373044137f96a6a4d83eb21c"}, - {file = "SQLAlchemy-2.0.15-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:df25052b92bd514357a9b370d74f240db890ea79aaa428fb893520e10ee5bc18"}, - {file = "SQLAlchemy-2.0.15-cp310-cp310-win32.whl", hash = "sha256:55ec62ddc0200b4fee94d11abbec7aa25948d5d21cb8df8807f4bdd3c51ba44b"}, - {file = "SQLAlchemy-2.0.15-cp310-cp310-win_amd64.whl", hash = "sha256:ae1d8deb391ab39cc8f0d5844e588a115ae3717e607d91482023917f920f777f"}, - {file = "SQLAlchemy-2.0.15-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:4670ce853cb25f72115a1bbe366ae13cf3f28fc5c87222df14f8d3d55d51816e"}, - {file = "SQLAlchemy-2.0.15-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:cea7c4a3dfc2ca61f88a2b1ddd6b0bfbd116c9b1a361b3b66fd826034b833142"}, - {file = "SQLAlchemy-2.0.15-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6f5784dfb2d45c19cde03c45c04a54bf47428610106197ed6e6fa79f33bc63d3"}, - {file = "SQLAlchemy-2.0.15-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9b31ebde27575b3b0708673ec14f0c305c4564d995b545148ab7ac0f4d9b847a"}, - {file = "SQLAlchemy-2.0.15-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:6b42913a0259267e9ee335da0c36498077799e59c5e332d506e72b4f32de781d"}, - {file = "SQLAlchemy-2.0.15-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:6a3f8020e013e9b3b7941dcf20b0fc8f7429daaf7158760846731cbd8caa5e45"}, - {file = "SQLAlchemy-2.0.15-cp311-cp311-win32.whl", hash = "sha256:88ab245ed2c96265441ed2818977be28c840cfa5204ba167425d6c26eb67b7e7"}, - {file = "SQLAlchemy-2.0.15-cp311-cp311-win_amd64.whl", hash = "sha256:5cc48a7fda2b5c5b8860494d6c575db3a101a68416492105fed6591dc8a2728a"}, - {file = "SQLAlchemy-2.0.15-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:f6fd3c88ea4b170d13527e93be1945e69facd917661d3725a63470eb683fbffe"}, - {file = "SQLAlchemy-2.0.15-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1e885dacb167077df15af2f9ccdacbd7f5dd0d538a6d74b94074f2cefc7bb589"}, - {file = "SQLAlchemy-2.0.15-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:201a99f922ac8c780b3929128fbd9df901418877c70e160e19adb05665e51c31"}, - {file = "SQLAlchemy-2.0.15-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:e17fdcb8971e77c439113642ca8861f9465e21fc693bd3916654ceef3ac26883"}, - {file = "SQLAlchemy-2.0.15-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:db269f67ed17b07e80aaa8fba1f650c0d84aa0bdd9d5352e4ac38d5bf47ac568"}, - {file = "SQLAlchemy-2.0.15-cp37-cp37m-win32.whl", hash = "sha256:994a75b197662e0608b6a76935d7c345f7fd874eac0b7093d561033db61b0e8c"}, - {file = "SQLAlchemy-2.0.15-cp37-cp37m-win_amd64.whl", hash = "sha256:4d61731a35eddb0f667774fe15e5a4831e444d066081d1e809e1b8a0e3f97cae"}, - {file = "SQLAlchemy-2.0.15-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:f7f994a53c0e6b44a2966fd6bfc53e37d34b7dca34e75b6be295de6db598255e"}, - {file = "SQLAlchemy-2.0.15-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:79bfe728219239bdc493950ea4a4d15b02138ecb304771f9024d0d6f5f4e3706"}, - {file = "SQLAlchemy-2.0.15-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1d6320a1d175447dce63618ec997a53836de48ed3b44bbe952f0b4b399b19941"}, - {file = "SQLAlchemy-2.0.15-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6f80a9c9a9af0e4bd5080cc0955ce70274c28e9b931ad7e0fb07021afcd32af6"}, - {file = "SQLAlchemy-2.0.15-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:4a75fdb9a84072521bb2ebd31eefe1165d4dccea3039dda701a864f4b5daa17f"}, - {file = "SQLAlchemy-2.0.15-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:21c89044fc48a25c2184eba332edeffbbf9367913bb065cd31538235d828f06f"}, - {file = "SQLAlchemy-2.0.15-cp38-cp38-win32.whl", hash = "sha256:1a0754c2d9f0c7982bec0a31138e495ed1f6b8435d7e677c45be60ec18370acf"}, - {file = "SQLAlchemy-2.0.15-cp38-cp38-win_amd64.whl", hash = "sha256:bc5c2b0da46c26c5f73f700834f871d0723e1e882641932468d56833bab09775"}, - {file = "SQLAlchemy-2.0.15-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:670ecf74ee2e70b917028a06446ad26ff9b1195e84b09c3139c215123d57dc30"}, - {file = "SQLAlchemy-2.0.15-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:d14282bf5b4de87f922db3c70858953fd081ef4f05dba6cca3dd705daffe1cc9"}, - {file = "SQLAlchemy-2.0.15-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:256b2b9660e51ad7055a9835b12717416cf7288afcf465107413917b6bb2316f"}, - {file = "SQLAlchemy-2.0.15-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:810199d1c5b43603a9e815ae9487aef3ab1ade7ed9c0c485e12519358929fbfe"}, - {file = "SQLAlchemy-2.0.15-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:536c86ec81ca89291d533ff41a3a05f9e4e88e01906dcee0751fc7082f3e8d6c"}, - {file = "SQLAlchemy-2.0.15-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:435f6807fa6a0597d84741470f19db204a7d34625ea121abd63e8d95f673f0c4"}, - {file = "SQLAlchemy-2.0.15-cp39-cp39-win32.whl", hash = "sha256:da7381a883aee20b7d2ffda17d909b38134b6a625920e65239a1c681881df800"}, - {file = "SQLAlchemy-2.0.15-cp39-cp39-win_amd64.whl", hash = "sha256:788d1772fb8dcd12091ca82809eef504ce0f2c423e45284bc351b872966ff554"}, - {file = "SQLAlchemy-2.0.15-py3-none-any.whl", hash = "sha256:933d30273861fe61f014ce2a7e3c364915f5efe9ed250ec1066ca6ea5942c0bd"}, - {file = "SQLAlchemy-2.0.15.tar.gz", hash = "sha256:2e940a8659ef870ae10e0d9e2a6d5aaddf0ff6e91f7d0d7732afc9e8c4be9bbc"}, + {file = "SQLAlchemy-2.0.19-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:9deaae357edc2091a9ed5d25e9ee8bba98bcfae454b3911adeaf159c2e9ca9e3"}, + {file = "SQLAlchemy-2.0.19-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:0bf0fd65b50a330261ec7fe3d091dfc1c577483c96a9fa1e4323e932961aa1b5"}, + {file = "SQLAlchemy-2.0.19-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1d90ccc15ba1baa345796a8fb1965223ca7ded2d235ccbef80a47b85cea2d71a"}, + {file = "SQLAlchemy-2.0.19-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cb4e688f6784427e5f9479d1a13617f573de8f7d4aa713ba82813bcd16e259d1"}, + {file = "SQLAlchemy-2.0.19-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:584f66e5e1979a7a00f4935015840be627e31ca29ad13f49a6e51e97a3fb8cae"}, + {file = "SQLAlchemy-2.0.19-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:2c69ce70047b801d2aba3e5ff3cba32014558966109fecab0c39d16c18510f15"}, + {file = "SQLAlchemy-2.0.19-cp310-cp310-win32.whl", hash = "sha256:96f0463573469579d32ad0c91929548d78314ef95c210a8115346271beeeaaa2"}, + {file = "SQLAlchemy-2.0.19-cp310-cp310-win_amd64.whl", hash = "sha256:22bafb1da60c24514c141a7ff852b52f9f573fb933b1e6b5263f0daa28ce6db9"}, + {file = "SQLAlchemy-2.0.19-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:d6894708eeb81f6d8193e996257223b6bb4041cb05a17cd5cf373ed836ef87a2"}, + {file = "SQLAlchemy-2.0.19-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:d8f2afd1aafded7362b397581772c670f20ea84d0a780b93a1a1529da7c3d369"}, + {file = "SQLAlchemy-2.0.19-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b15afbf5aa76f2241184c1d3b61af1a72ba31ce4161013d7cb5c4c2fca04fd6e"}, + {file = "SQLAlchemy-2.0.19-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8fc05b59142445a4efb9c1fd75c334b431d35c304b0e33f4fa0ff1ea4890f92e"}, + {file = "SQLAlchemy-2.0.19-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:5831138f0cc06b43edf5f99541c64adf0ab0d41f9a4471fd63b54ae18399e4de"}, + {file = "SQLAlchemy-2.0.19-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:3afa8a21a9046917b3a12ffe016ba7ebe7a55a6fc0c7d950beb303c735c3c3ad"}, + {file = "SQLAlchemy-2.0.19-cp311-cp311-win32.whl", hash = "sha256:c896d4e6ab2eba2afa1d56be3d0b936c56d4666e789bfc59d6ae76e9fcf46145"}, + {file = "SQLAlchemy-2.0.19-cp311-cp311-win_amd64.whl", hash = "sha256:024d2f67fb3ec697555e48caeb7147cfe2c08065a4f1a52d93c3d44fc8e6ad1c"}, + {file = "SQLAlchemy-2.0.19-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:89bc2b374ebee1a02fd2eae6fd0570b5ad897ee514e0f84c5c137c942772aa0c"}, + {file = "SQLAlchemy-2.0.19-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dd4d410a76c3762511ae075d50f379ae09551d92525aa5bb307f8343bf7c2c12"}, + {file = "SQLAlchemy-2.0.19-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f469f15068cd8351826df4080ffe4cc6377c5bf7d29b5a07b0e717dddb4c7ea2"}, + {file = "SQLAlchemy-2.0.19-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:cda283700c984e699e8ef0fcc5c61f00c9d14b6f65a4f2767c97242513fcdd84"}, + {file = "SQLAlchemy-2.0.19-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:43699eb3f80920cc39a380c159ae21c8a8924fe071bccb68fc509e099420b148"}, + {file = "SQLAlchemy-2.0.19-cp37-cp37m-win32.whl", hash = "sha256:61ada5831db36d897e28eb95f0f81814525e0d7927fb51145526c4e63174920b"}, + {file = "SQLAlchemy-2.0.19-cp37-cp37m-win_amd64.whl", hash = "sha256:57d100a421d9ab4874f51285c059003292433c648df6abe6c9c904e5bd5b0828"}, + {file = "SQLAlchemy-2.0.19-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:16a310f5bc75a5b2ce7cb656d0e76eb13440b8354f927ff15cbaddd2523ee2d1"}, + {file = "SQLAlchemy-2.0.19-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:cf7b5e3856cbf1876da4e9d9715546fa26b6e0ba1a682d5ed2fc3ca4c7c3ec5b"}, + {file = "SQLAlchemy-2.0.19-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2e7b69d9ced4b53310a87117824b23c509c6fc1f692aa7272d47561347e133b6"}, + {file = "SQLAlchemy-2.0.19-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8f9eb4575bfa5afc4b066528302bf12083da3175f71b64a43a7c0badda2be365"}, + {file = "SQLAlchemy-2.0.19-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:6b54d1ad7a162857bb7c8ef689049c7cd9eae2f38864fc096d62ae10bc100c7d"}, + {file = "SQLAlchemy-2.0.19-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:5d6afc41ca0ecf373366fd8e10aee2797128d3ae45eb8467b19da4899bcd1ee0"}, + {file = "SQLAlchemy-2.0.19-cp38-cp38-win32.whl", hash = "sha256:430614f18443b58ceb9dedec323ecddc0abb2b34e79d03503b5a7579cd73a531"}, + {file = "SQLAlchemy-2.0.19-cp38-cp38-win_amd64.whl", hash = "sha256:eb60699de43ba1a1f77363f563bb2c652f7748127ba3a774f7cf2c7804aa0d3d"}, + {file = "SQLAlchemy-2.0.19-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:a752b7a9aceb0ba173955d4f780c64ee15a1a991f1c52d307d6215c6c73b3a4c"}, + {file = "SQLAlchemy-2.0.19-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:7351c05db355da112e056a7b731253cbeffab9dfdb3be1e895368513c7d70106"}, + {file = "SQLAlchemy-2.0.19-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fa51ce4aea583b0c6b426f4b0563d3535c1c75986c4373a0987d84d22376585b"}, + {file = "SQLAlchemy-2.0.19-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ae7473a67cd82a41decfea58c0eac581209a0aa30f8bc9190926fbf628bb17f7"}, + {file = "SQLAlchemy-2.0.19-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:851a37898a8a39783aab603c7348eb5b20d83c76a14766a43f56e6ad422d1ec8"}, + {file = "SQLAlchemy-2.0.19-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:539010665c90e60c4a1650afe4ab49ca100c74e6aef882466f1de6471d414be7"}, + {file = "SQLAlchemy-2.0.19-cp39-cp39-win32.whl", hash = "sha256:f82c310ddf97b04e1392c33cf9a70909e0ae10a7e2ddc1d64495e3abdc5d19fb"}, + {file = "SQLAlchemy-2.0.19-cp39-cp39-win_amd64.whl", hash = "sha256:8e712cfd2e07b801bc6b60fdf64853bc2bd0af33ca8fa46166a23fe11ce0dbb0"}, + {file = "SQLAlchemy-2.0.19-py3-none-any.whl", hash = "sha256:314145c1389b021a9ad5aa3a18bac6f5d939f9087d7fc5443be28cba19d2c972"}, + {file = "SQLAlchemy-2.0.19.tar.gz", hash = "sha256:77a14fa20264af73ddcdb1e2b9c5a829b8cc6b8304d0f093271980e36c200a3f"}, ] [package.dependencies] @@ -1375,6 +1734,7 @@ postgresql-pg8000 = ["pg8000 (>=1.29.1)"] postgresql-psycopg = ["psycopg (>=3.0.7)"] postgresql-psycopg2binary = ["psycopg2-binary"] postgresql-psycopg2cffi = ["psycopg2cffi"] +postgresql-psycopgbinary = ["psycopg[binary] (>=3.0.7)"] pymysql = ["pymysql"] sqlcipher = ["sqlcipher3-binary"] @@ -1408,13 +1768,13 @@ files = [ [[package]] name = "types-pyopenssl" -version = "23.1.0.3" +version = "23.2.0.2" description = "Typing stubs for pyOpenSSL" optional = false python-versions = "*" files = [ - {file = "types-pyOpenSSL-23.1.0.3.tar.gz", hash = "sha256:e7211088eff3e20d359888dedecb0994f7181d5cce0f26354dd47ca0484dc8a6"}, - {file = "types_pyOpenSSL-23.1.0.3-py3-none-any.whl", hash = "sha256:ad024b07a1f4bffbca44699543c71efd04733a6c22781fa9673a971e410a3086"}, + {file = "types-pyOpenSSL-23.2.0.2.tar.gz", hash = "sha256:6a010dac9ecd42b582d7dd2cc3e9e40486b79b3b64bb2fffba1474ff96af906d"}, + {file = "types_pyOpenSSL-23.2.0.2-py3-none-any.whl", hash = "sha256:19536aa3debfbe25a918cf0d898e9f5fbbe6f3594a429da7914bf331deb1b342"}, ] [package.dependencies] @@ -1422,13 +1782,13 @@ cryptography = ">=35.0.0" [[package]] name = "types-redis" -version = "4.5.5.2" +version = "4.6.0.3" description = "Typing stubs for redis" optional = false python-versions = "*" files = [ - {file = "types-redis-4.5.5.2.tar.gz", hash = "sha256:2fe82f374d9dddf007deaf23d81fddcfd9523d9522bf11523c5c43bc5b27099e"}, - {file = "types_redis-4.5.5.2-py3-none-any.whl", hash = "sha256:bf8692252038dbe03b007ca4fde87d3ae8e10610854a6858e3bf5d01721a7c4b"}, + {file = "types-redis-4.6.0.3.tar.gz", hash = "sha256:efdef37dc0c04bf5786195651fd694f8bfdd693eac09ec4af46d90f72652558f"}, + {file = "types_redis-4.6.0.3-py3-none-any.whl", hash = "sha256:67c44c14369c33c2a300da2a50b5607c0fc888f7b85eeb7c73e15c78a0f05edd"}, ] [package.dependencies] @@ -1437,13 +1797,13 @@ types-pyOpenSSL = "*" [[package]] name = "typing-extensions" -version = "4.6.2" +version = "4.7.1" description = "Backported and Experimental Type Hints for Python 3.7+" optional = false python-versions = ">=3.7" files = [ - {file = "typing_extensions-4.6.2-py3-none-any.whl", hash = "sha256:3a8b36f13dd5fdc5d1b16fe317f5668545de77fa0b8e02006381fd49d731ab98"}, - {file = "typing_extensions-4.6.2.tar.gz", hash = "sha256:06006244c70ac8ee83fa8282cb188f697b8db25bc8b4df07be1873c43897060c"}, + {file = "typing_extensions-4.7.1-py3-none-any.whl", hash = "sha256:440d5dd3af93b060174bf433bccd69b0babc3b15b1a8dca43789fd7f61514b36"}, + {file = "typing_extensions-4.7.1.tar.gz", hash = "sha256:b75ddc264f0ba5615db7ba217daeb99701ad295353c45f9e95963337ceeeffb2"}, ] [[package]] @@ -1588,18 +1948,19 @@ files = [ [[package]] name = "xlsxwriter" -version = "3.1.1" +version = "3.1.2" description = "A Python module for creating Excel XLSX files." optional = false python-versions = ">=3.6" files = [ - {file = "XlsxWriter-3.1.1-py3-none-any.whl", hash = "sha256:b50e3bd905d7dafa6ea45210e2cc5600b4ccd104a0d3a4d4d7cf813b78426440"}, - {file = "XlsxWriter-3.1.1.tar.gz", hash = "sha256:03459ee76f664470c4c63a8977cab624fb259d0fc1faac64dc9cc6f3cc08f945"}, + {file = "XlsxWriter-3.1.2-py3-none-any.whl", hash = "sha256:331508ff39d610ecdaf979e458840bc1eab6e6a02cfd5d08f044f0f73636236f"}, + {file = "XlsxWriter-3.1.2.tar.gz", hash = "sha256:78751099a770273f1c98b8d6643351f68f98ae8e6acf9d09d37dc6798f8cd3de"}, ] [extras] aws = ["aioaws", "connectorx"] image = ["pillow"] +kafka = ["kafka-python"] pandera = ["pandera"] psql = ["connectorx"] redis = ["redis"] @@ -1608,4 +1969,4 @@ server = ["asgi-correlation-id", "fastapi", "prometheus-fastapi-instrumentator", [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "737bde823d591927755425e68f5ee04024aaedfa6d9348a500eaeac0268dddfb" +content-hash = "01a8f94fdf2e36d23931929ced1d279cd41967f27072ae401ba1d1f0afd1e76e" diff --git a/pyproject.toml b/pyproject.toml index cf0a91e..8a78aa7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "aligned" -version = "0.0.20" +version = "0.0.21" description = "A scalable feature store that makes it easy to align offline and online ML systems" authors = ["Mats E. Mollestad "] license = "Apache-2.0" @@ -58,13 +58,13 @@ asgi-correlation-id = { version = "^3.0.0", optional = true } pandera = { version = "^0.13.3", optional = true} httpx = "^0.23.0" polars = { version = "^0.17.15", extras = ["all"] } -connectorx = { version = "^0.3.1", optional = true } +connectorx = { version = "^0.3.2-alpha.7", optional = true } pillow = { version = "^9.4.0", optional = true } prometheus-fastapi-instrumentator = { version="^5.9.1", optional = true } # gensim = { version = "4.3.0", optional = true } # openai = { version = "^0.27.2", optional = true } # sentence-transformers = { version = "^2.2.2", optional = true } -kafka-python = "^2.0.2" +kafka-python = { version= "^2.0.2", optional = true } [tool.poetry.extras] aws = ["aioaws", "connectorx"] @@ -73,6 +73,7 @@ redis = ["redis"] server = ["asgi-correlation-id", "fastapi", "uvicorn", "prometheus-fastapi-instrumentator"] pandera = ["pandera"] image = ["pillow"] +kafka = ["kafka-python"] # text = ["gensim", "openai", "sentence-transformers"] [tool.poetry.group.dev.dependencies] @@ -81,6 +82,11 @@ pytest-mock = "^3.8.1" freezegun = "^1.2.2" pytest-asyncio = "^0.20.1" fakeredis = "^2.10.0" +sqlalchemy = "^2.0.19" +printf-log-formatter = "^0.3.0" +isort = "^5.12.0" +black = "^23.7.0" +psycopg2 = "^2.9.6" [build-system] requires = ["poetry-core>=1.0.0"] diff --git a/setup.cfg b/setup.cfg index be42865..5a556ea 100644 --- a/setup.cfg +++ b/setup.cfg @@ -6,7 +6,7 @@ inline-quotes = 'double' # W503 line break before binary operator - not Black/PEP8 compatible # SIM106 handle error cases first # TC002 Move third-party import into a type-checking block (not compatible with pydantic) -ignore = E203, W503, SIM106, TC002 +ignore = E203, W503, SIM106, TC002, SIM110, TC001 enable-extensions = TC, TC1 pytest-mark-no-parentheses=true pytest-fixture-no-parentheses=true diff --git a/test_data/credit_history.parquet b/test_data/credit_history.parquet new file mode 100644 index 0000000..5740d65 Binary files /dev/null and b/test_data/credit_history.parquet differ diff --git a/test_data/credit_history_agg.parquet b/test_data/credit_history_agg.parquet new file mode 100644 index 0000000..5740d65 Binary files /dev/null and b/test_data/credit_history_agg.parquet differ diff --git a/test_data/feature-store.json b/test_data/feature-store.json index eb9b60a..a13c10c 100644 --- a/test_data/feature-store.json +++ b/test_data/feature-store.json @@ -1 +1 @@ -{"metadata": {"created_at": "2023-06-22T13:06:07.201140", "name": "feature_store_location.py", "github_url": null}, "feature_views": [{"name": "titanic_parquet", "description": "Some features from the titanic dataset", "tags": {}, "batch_data_source": {"mapping_keys": {}, "type_name": "parquet", "path": "test_data/titanic.parquet", "config": {"engine": "auto", "compression": "snappy", "should_write_index": false}}, "entities": [{"name": "passenger_id", "dtype": {"name": "int32"}, "description": null, "tags": null, "constraints": null}], "features": [{"name": "sibsp", "dtype": {"name": "int32"}, "description": "Number of siblings on titanic", "tags": null, "constraints": [{"name": "lower_bound_inc", "value": 0.0}, {"name": "upper_bound_inc", "value": 20.0}]}, {"name": "survived", "dtype": {"name": "bool"}, "description": "If the passenger survived", "tags": null, "constraints": null}, {"name": "name", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": null}, {"name": "cabin", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": null}, {"name": "age", "dtype": {"name": "float"}, "description": "A float as some have decimals", "tags": null, "constraints": [{"name": "upper_bound", "value": 100.0}, {"name": "lower_bound", "value": 0.0}, {"name": "requierd"}]}, {"name": "sex", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": [{"name": "in_domain", "values": ["male", "female"]}]}], "derived_features": [{"name": "is_male", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sex", "location": {"name": "titanic_parquet", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "equals", "dtype": {"name": "bool"}, "key": "sex", "value": {"name": "string", "value": "male"}}, "depth": 1}, {"name": "is_mr", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "name", "location": {"name": "titanic_parquet", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "contains", "dtype": {"name": "bool"}, "key": "name", "value": "Mr."}, "depth": 1}, {"name": "has_siblings", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sibsp", "location": {"name": "titanic_parquet", "location": "feature_view"}, "dtype": {"name": "int32"}}], "transformation": {"name": "not-equals", "dtype": {"name": "bool"}, "key": "sibsp", "value": {"name": "int", "value": 0}}, "depth": 1}, {"name": "is_female", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sex", "location": {"name": "titanic_parquet", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "equals", "dtype": {"name": "bool"}, "key": "sex", "value": {"name": "string", "value": "female"}}, "depth": 1}], "aggregated_features": [], "event_timestamp": null, "stream_data_source": null, "event_triggers": null, "contacts": null, "indexes": []}, {"name": "titanic", "description": "Some features from the titanic dataset", "tags": {}, "batch_data_source": {"mapping_keys": {"PassengerId": "passenger_id", "Age": "age", "Sex": "sex", "Survived": "survived", "SibSp": "sibsp", "UpdatedAt": "updated_at"}, "type_name": "csv", "path": "test_data/titanic_scd_data.csv", "csv_config": {"seperator": ",", "compression": "infer", "should_write_index": false}}, "entities": [{"name": "passenger_id", "dtype": {"name": "int32"}, "description": null, "tags": null, "constraints": null}], "features": [{"name": "sibsp", "dtype": {"name": "int32"}, "description": "Number of siblings on titanic", "tags": null, "constraints": [{"name": "lower_bound_inc", "value": 0.0}, {"name": "upper_bound_inc", "value": 20.0}]}, {"name": "survived", "dtype": {"name": "bool"}, "description": "If the passenger survived", "tags": null, "constraints": null}, {"name": "name", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": null}, {"name": "updated_at", "dtype": {"name": "datetime"}, "description": null, "tags": null, "constraints": null}, {"name": "cabin", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": null}, {"name": "age", "dtype": {"name": "float"}, "description": "A float as some have decimals", "tags": null, "constraints": [{"name": "upper_bound", "value": 100.0}, {"name": "lower_bound", "value": 0.0}, {"name": "requierd"}]}, {"name": "sex", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": [{"name": "in_domain", "values": ["male", "female"]}]}], "derived_features": [{"name": "double_sibsp", "dtype": {"name": "float"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sibsp", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "int32"}}], "transformation": {"name": "mul_val", "dtype": {"name": "float"}, "key": "sibsp", "value": {"name": "int", "value": 2}}, "depth": 1}, {"name": "square_sibsp", "dtype": {"name": "float"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sibsp", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "int32"}}, {"name": "sibsp", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "int32"}}], "transformation": {"name": "mul", "dtype": {"name": "float"}, "front": "sibsp", "behind": "sibsp"}, "depth": 1}, {"name": "is_mr", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "name", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "contains", "dtype": {"name": "bool"}, "key": "name", "value": "Mr."}, "depth": 1}, {"name": "is_female", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sex", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "equals", "dtype": {"name": "bool"}, "key": "sex", "value": {"name": "string", "value": "female"}}, "depth": 1}, {"name": "name_embedding", "dtype": {"name": "embedding"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "name", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "word_vectoriser", "dtype": {"name": "embedding"}, "key": "name", "model": {"name": "gensim", "model_name": "glove-wiki-gigaword-50", "config": {"to_lowercase": false, "deaccent": false, "encoding": "utf8", "errors": "strict"}, "loaded_model": null}}, "depth": 1}, {"name": "is_male", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sex", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "equals", "dtype": {"name": "bool"}, "key": "sex", "value": {"name": "string", "value": "male"}}, "depth": 1}, {"name": "has_siblings", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sibsp", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "int32"}}], "transformation": {"name": "not-equals", "dtype": {"name": "bool"}, "key": "sibsp", "value": {"name": "int", "value": 0}}, "depth": 1}], "aggregated_features": [], "event_timestamp": {"name": "updated_at", "ttl": null, "description": null, "tags": null, "dtype": {"name": "datetime"}}, "stream_data_source": {"name": "redis", "topic_name": "titanic_stream", "config": {"env_var": "REDIS_URL"}, "mappings": {}, "record_coder": {"coder_type": "json", "key": "json"}}, "event_triggers": null, "contacts": null, "indexes": [{"location": {"name": "titanic", "location": "feature_view"}, "vector": {"name": "name_embedding", "dtype": {"name": "embedding"}, "description": null, "tags": null, "constraints": null}, "vector_dim": 50, "metadata": [{"name": "age", "dtype": {"name": "float"}, "description": "A float as some have decimals", "tags": null, "constraints": [{"name": "upper_bound", "value": 100.0}, {"name": "lower_bound", "value": 0.0}, {"name": "requierd"}]}, {"name": "sex", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": [{"name": "in_domain", "values": ["male", "female"]}]}], "storage": {"type_name": "redis", "config": {"env_var": "REDIS_URL"}, "name": "name_embedding_index", "initial_cap": 10000, "distance_metric": "COSINE", "index_alogrithm": "FLAT", "embedding_type": "FLOAT32"}, "entities": [{"name": "passenger_id", "dtype": {"name": "int32"}, "description": null, "tags": null, "constraints": null}]}]}], "combined_feature_views": [], "models": [{"name": "titanic", "features": [{"name": "age", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "float"}}, {"name": "is_male", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "bool"}}, {"name": "sibsp", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "int32"}}, {"name": "has_siblings", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "bool"}}], "predictions_view": {"entities": [{"name": "passenger_id", "dtype": {"name": "int32"}, "description": null, "tags": null, "constraints": null}], "features": [{"name": "probability", "dtype": {"name": "float"}, "description": "The probability of target named will_survive being 'True'.", "tags": null, "constraints": null}], "derived_features": [{"name": "will_survive", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "probability", "location": {"name": "titanic", "location": "model"}, "dtype": {"name": "float"}}], "transformation": {"name": "map_arg_max", "dtype": {"name": "bool"}, "column_mappings": {"probability": {"name": "bool", "value": true}}}, "depth": 1}], "event_timestamp": null, "source": null, "stream_source": null, "regression_targets": [], "classification_targets": [{"estimating": {"name": "survived", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "bool"}}, "feature": {"name": "will_survive", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null}, "on_ground_truth_event": null, "event_trigger": null, "class_probabilities": [{"outcome": {"name": "bool", "value": true}, "feature": {"name": "probability", "dtype": {"name": "float"}, "description": null, "tags": null, "constraints": null}}], "confidence": null}]}, "description": "A model predicting if a passenger will survive", "contacts": null, "tags": null, "dataset_folder": null}], "enrichers": []} +{"metadata": {"created_at": "2023-07-31T09:04:35.949675", "name": "feature_store_location.py", "github_url": null}, "feature_views": [{"name": "titanic", "description": "Some features from the titanic dataset", "tags": {}, "batch_data_source": {"mapping_keys": {"PassengerId": "passenger_id", "Age": "age", "Sex": "sex", "Survived": "survived", "SibSp": "sibsp", "UpdatedAt": "updated_at"}, "type_name": "csv", "path": "test_data/titanic_scd_data.csv", "csv_config": {"seperator": ",", "compression": "infer", "should_write_index": false}}, "entities": [{"name": "passenger_id", "dtype": {"name": "int32"}, "description": null, "tags": null, "constraints": null}], "features": [{"name": "age", "dtype": {"name": "float"}, "description": "A float as some have decimals", "tags": null, "constraints": [{"name": "requierd"}, {"name": "lower_bound", "value": 0.0}, {"name": "upper_bound", "value": 100.0}]}, {"name": "sex", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": [{"name": "in_domain", "values": ["male", "female"]}]}, {"name": "sibsp", "dtype": {"name": "int32"}, "description": "Number of siblings on titanic", "tags": null, "constraints": [{"name": "lower_bound_inc", "value": 0.0}, {"name": "upper_bound_inc", "value": 20.0}]}, {"name": "name", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": null}, {"name": "cabin", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": null}, {"name": "survived", "dtype": {"name": "bool"}, "description": "If the passenger survived", "tags": null, "constraints": null}, {"name": "updated_at", "dtype": {"name": "datetime"}, "description": null, "tags": null, "constraints": null}], "derived_features": [{"name": "name_embedding", "dtype": {"name": "embedding"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "name", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "word_vectoriser", "dtype": {"name": "embedding"}, "key": "name", "model": {"name": "gensim", "model_name": "glove-wiki-gigaword-50", "config": {"to_lowercase": false, "deaccent": false, "encoding": "utf8", "errors": "strict"}, "loaded_model": null}}, "depth": 1}, {"name": "has_siblings", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sibsp", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "int32"}}], "transformation": {"name": "not-equals", "dtype": {"name": "bool"}, "key": "sibsp", "value": {"name": "int", "value": 0}}, "depth": 1}, {"name": "double_sibsp", "dtype": {"name": "float"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sibsp", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "int32"}}], "transformation": {"name": "mul_val", "dtype": {"name": "float"}, "key": "sibsp", "value": {"name": "int", "value": 2}}, "depth": 1}, {"name": "is_mr", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "name", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "contains", "dtype": {"name": "bool"}, "key": "name", "value": "Mr."}, "depth": 1}, {"name": "is_female", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sex", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "equals", "dtype": {"name": "bool"}, "key": "sex", "value": {"name": "string", "value": "female"}}, "depth": 1}, {"name": "is_male", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sex", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "equals", "dtype": {"name": "bool"}, "key": "sex", "value": {"name": "string", "value": "male"}}, "depth": 1}, {"name": "square_sibsp", "dtype": {"name": "float"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sibsp", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "int32"}}, {"name": "sibsp", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "int32"}}], "transformation": {"name": "mul", "dtype": {"name": "float"}, "front": "sibsp", "behind": "sibsp"}, "depth": 1}], "aggregated_features": [], "event_timestamp": {"name": "updated_at", "ttl": null, "description": null, "tags": null, "dtype": {"name": "datetime"}}, "stream_data_source": {"mapping_keys": {}, "name": "redis", "topic_name": "titanic_stream", "config": {"env_var": "REDIS_URL"}, "record_coder": {"coder_type": "json", "key": "json"}}, "application_source": null, "event_triggers": null, "contacts": null, "indexes": [{"location": {"name": "titanic", "location": "feature_view"}, "vector": {"name": "name_embedding", "dtype": {"name": "embedding"}, "description": null, "tags": null, "constraints": null}, "vector_dim": 50, "metadata": [{"name": "age", "dtype": {"name": "float"}, "description": "A float as some have decimals", "tags": null, "constraints": [{"name": "requierd"}, {"name": "lower_bound", "value": 0.0}, {"name": "upper_bound", "value": 100.0}]}, {"name": "sex", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": [{"name": "in_domain", "values": ["male", "female"]}]}], "storage": {"type_name": "redis", "config": {"env_var": "REDIS_URL"}, "name": "name_embedding_index", "initial_cap": 10000, "distance_metric": "COSINE", "index_alogrithm": "FLAT", "embedding_type": "FLOAT32"}, "entities": [{"name": "passenger_id", "dtype": {"name": "int32"}, "description": null, "tags": null, "constraints": null}]}]}, {"name": "titanic_parquet", "description": "Some features from the titanic dataset", "tags": {}, "batch_data_source": {"mapping_keys": {}, "type_name": "parquet", "path": "test_data/titanic.parquet", "config": {"engine": "auto", "compression": "snappy", "should_write_index": false}}, "entities": [{"name": "passenger_id", "dtype": {"name": "int32"}, "description": null, "tags": null, "constraints": null}], "features": [{"name": "name", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": null}, {"name": "age", "dtype": {"name": "float"}, "description": "A float as some have decimals", "tags": null, "constraints": [{"name": "requierd"}, {"name": "lower_bound", "value": 0.0}, {"name": "upper_bound", "value": 100.0}]}, {"name": "sibsp", "dtype": {"name": "int32"}, "description": "Number of siblings on titanic", "tags": null, "constraints": [{"name": "lower_bound_inc", "value": 0.0}, {"name": "upper_bound_inc", "value": 20.0}]}, {"name": "sex", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": [{"name": "in_domain", "values": ["male", "female"]}]}, {"name": "cabin", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": null}, {"name": "survived", "dtype": {"name": "bool"}, "description": "If the passenger survived", "tags": null, "constraints": null}], "derived_features": [{"name": "has_siblings", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sibsp", "location": {"name": "titanic_parquet", "location": "feature_view"}, "dtype": {"name": "int32"}}], "transformation": {"name": "not-equals", "dtype": {"name": "bool"}, "key": "sibsp", "value": {"name": "int", "value": 0}}, "depth": 1}, {"name": "is_female", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sex", "location": {"name": "titanic_parquet", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "equals", "dtype": {"name": "bool"}, "key": "sex", "value": {"name": "string", "value": "female"}}, "depth": 1}, {"name": "is_mr", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "name", "location": {"name": "titanic_parquet", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "contains", "dtype": {"name": "bool"}, "key": "name", "value": "Mr."}, "depth": 1}, {"name": "is_male", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sex", "location": {"name": "titanic_parquet", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "equals", "dtype": {"name": "bool"}, "key": "sex", "value": {"name": "string", "value": "male"}}, "depth": 1}], "aggregated_features": [], "event_timestamp": null, "stream_data_source": null, "application_source": null, "event_triggers": null, "contacts": null, "indexes": []}], "combined_feature_views": [], "models": [{"name": "titanic", "features": [{"name": "age", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "float"}}, {"name": "sibsp", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "int32"}}, {"name": "has_siblings", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "bool"}}, {"name": "is_male", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "bool"}}], "predictions_view": {"entities": [{"name": "passenger_id", "dtype": {"name": "int32"}, "description": null, "tags": null, "constraints": null}], "features": [{"name": "probability", "dtype": {"name": "float"}, "description": "The probability of target named will_survive being 'True'.", "tags": null, "constraints": null}], "derived_features": [{"name": "will_survive", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "probability", "location": {"name": "titanic", "location": "model"}, "dtype": {"name": "float"}}], "transformation": {"name": "map_arg_max", "dtype": {"name": "bool"}, "column_mappings": {"probability": {"name": "bool", "value": true}}}, "depth": 1}], "model_version_column": null, "event_timestamp": null, "source": null, "stream_source": null, "regression_targets": [], "classification_targets": [{"estimating": {"name": "survived", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "bool"}}, "feature": {"name": "will_survive", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null}, "on_ground_truth_event": null, "event_trigger": null, "class_probabilities": [{"outcome": {"name": "bool", "value": true}, "feature": {"name": "probability", "dtype": {"name": "float"}, "description": null, "tags": null, "constraints": null}}], "confidence": null}]}, "description": "A model predicting if a passenger will survive", "contacts": null, "tags": null, "dataset_folder": null}], "enrichers": []} diff --git a/test_data/loan.parquet b/test_data/loan.parquet new file mode 100644 index 0000000..f4d3280 Binary files /dev/null and b/test_data/loan.parquet differ