From 423e8b06a833d24d0718a787fc1799cd287c6305 Mon Sep 17 00:00:00 2001 From: "Mats E. Mollestad" Date: Mon, 1 Apr 2024 23:08:29 +0200 Subject: [PATCH 01/13] Added a way to describe exposed models --- aligned/__init__.py | 2 + aligned/compiler/feature_factory.py | 26 +- aligned/compiler/model.py | 95 ++++-- aligned/compiler/transformation_factory.py | 37 +++ aligned/exposed_model/interface.py | 289 ++++++++++++++++++ aligned/exposed_model/ollama.py | 223 ++++++++++++++ aligned/feature_store.py | 55 +++- aligned/feature_view/feature_view.py | 9 +- .../feature_view/tests/test_joined_source.py | 4 +- aligned/jobs/tests/test_derived_job.py | 2 +- aligned/retrival_job.py | 69 ++++- aligned/schemas/model.py | 2 + aligned/schemas/transformation.py | 104 +++++++ aligned/sources/local.py | 6 +- aligned/tests/test_model_target.py | 14 +- aligned/tests/test_models_as_feature.py | 4 +- conftest.py | 4 +- poetry.lock | 60 ++-- pyproject.toml | 7 +- 19 files changed, 929 insertions(+), 83 deletions(-) create mode 100644 aligned/exposed_model/interface.py create mode 100644 aligned/exposed_model/ollama.py diff --git a/aligned/__init__.py b/aligned/__init__.py index d6e0660..ede3c39 100644 --- a/aligned/__init__.py +++ b/aligned/__init__.py @@ -26,6 +26,7 @@ from aligned.sources.redshift import RedshiftSQLConfig from aligned.sources.s3 import AwsS3Config from aligned.sources.azure_blob_storage import AzureBlobConfig +from aligned.exposed_model.interface import ExposedModel from aligned.schemas.feature import FeatureLocation __all__ = [ @@ -45,6 +46,7 @@ # Streaming Sources 'KafkaConfig', # Types + 'ExposedModel', 'Entity', 'String', 'Bool', diff --git a/aligned/compiler/feature_factory.py b/aligned/compiler/feature_factory.py index 85bf93b..5b5ad2e 100644 --- a/aligned/compiler/feature_factory.py +++ b/aligned/compiler/feature_factory.py @@ -1050,13 +1050,31 @@ def dtype(self) -> FeatureType: def aggregate(self) -> StringAggregation: return StringAggregation(self) - def split(self, pattern: str, max_splits: int | None = None) -> String: - raise NotImplementedError() + def ollama_embedding(self, model: str, host_env: str | None = None) -> Embedding: + from aligned.compiler.transformation_factory import OllamaEmbedding + + feature = Embedding() + feature.transformation = OllamaEmbedding(model, self, host_env) + return feature + + def ollama_generate(self, model: str, system: str | None = None, host_env: str | None = None) -> String: + from aligned.compiler.transformation_factory import OllamaGenerate + + feature = Json() + feature.transformation = OllamaGenerate(model, system or '', self, host_env) + return feature + + def split(self, pattern: str) -> String: + from aligned.compiler.transformation_factory import Split + + feature = self.copy_type() + feature.transformation = Split(pattern, self) + return feature def replace(self, values: dict[str, str]) -> String: from aligned.compiler.transformation_factory import ReplaceFactory - feature = String() + feature = self.copy_type() feature.transformation = ReplaceFactory(values, self) return feature @@ -1091,7 +1109,7 @@ def append(self, other: FeatureFactory | str) -> String: def prepend(self, other: FeatureFactory | str) -> String: from aligned.compiler.transformation_factory import AppendStrings, PrependConstString - feature = String() + feature = self.copy_type() if isinstance(other, FeatureFactory): feature.transformation = AppendStrings(other, self) else: diff --git a/aligned/compiler/model.py b/aligned/compiler/model.py index 55e9e29..0f9d0e7 100644 --- a/aligned/compiler/model.py +++ b/aligned/compiler/model.py @@ -23,6 +23,9 @@ from aligned.data_source.batch_data_source import BatchDataSource from aligned.data_source.stream_data_source import StreamDataSource from aligned.feature_view.feature_view import FeatureView, FeatureViewWrapper +from aligned.exposed_model.interface import ExposedModel +from aligned.request.retrival_request import RetrivalRequest +from aligned.retrival_job import ConvertableToRetrivalJob, PredictionJob, RetrivalJob from aligned.schemas.derivied_feature import DerivedFeature from aligned.schemas.feature import Feature, FeatureLocation, FeatureReferance, FeatureType from aligned.schemas.feature_view import CompiledFeatureView @@ -51,14 +54,17 @@ class ModelMetadata: contacts: list[str] | None = field(default=None) tags: list[str] | None = field(default=None) description: str | None = field(default=None) - prediction_source: BatchDataSource | None = field(default=None) - prediction_stream: StreamDataSource | None = field(default=None) + + output_source: BatchDataSource | None = field(default=None) + output_stream: StreamDataSource | None = field(default=None) + application_source: BatchDataSource | None = field(default=None) acceptable_freshness: timedelta | None = field(default=None) unacceptable_freshness: timedelta | None = field(default=None) exposed_at_url: str | None = field(default=None) + exposed_model: ExposedModel | None = field(default=None) dataset_store: DatasetStore | None = field(default=None) @@ -91,6 +97,43 @@ def __call__(self) -> T: def compile(self) -> ModelSchema: return compile_with_metadata(self.contract(), self.metadata) + def predict_over( + self, + values: ConvertableToRetrivalJob | RetrivalJob, + needed_views: list[FeatureViewWrapper | ModelContractWrapper] | None = None, + ) -> PredictionJob: + from aligned import FeatureStore + from aligned.retrival_job import RetrivalJob + + model = self.compile() + model.features.default_features + + if not model.exposed_model: + raise ValueError(f"Model {model.name} does not have an `exposed_model` to use for predictions.") + + if not isinstance(values, RetrivalJob): + features = {feat.as_feature() for feat in model.features.default_features} + request = RetrivalRequest( + name='default', + location=FeatureLocation.model(model.name), + entities=set(), + features=features, + derived_features=set(), + ) + values = RetrivalJob.from_convertable(values, request) + + store = FeatureStore.empty() + + for needed_data in needed_views or []: + if isinstance(needed_data, ModelContractWrapper): + store.add_compiled_model(needed_data.compile()) + else: + store.add_compiled_view(needed_data.compile()) + + store.add_compiled_model(model) + + return store.model(model.name).predict_over(values) + def as_view(self) -> CompiledFeatureView | None: compiled = self.compile() @@ -108,7 +151,7 @@ def filter( condition = where(self.__call__()) - main_source = meta.prediction_source + main_source = meta.output_source if not main_source: raise ValueError( f'Model: {self.metadata.name} needs a `prediction_source` to use `filter`, got None.' @@ -119,9 +162,9 @@ def filter( condition._location = FeatureLocation.model(name) if condition.transformation: - meta.prediction_source = FilteredDataSource(main_source, condition.compile()) + meta.output_source = FilteredDataSource(main_source, condition.compile()) else: - meta.prediction_source = FilteredDataSource(main_source, condition.feature()) + meta.output_source = FilteredDataSource(main_source, condition.feature()) if application_source: meta.application_source = application_source @@ -213,37 +256,48 @@ def compile(self) -> FeatureVersionSchema: def model_contract( - name: str, - features: list[FeatureReferencable] | FeatureInputVersions, + input_features: list[FeatureReferencable] | FeatureInputVersions, + name: str | None = None, contacts: list[str] | None = None, tags: list[str] | None = None, description: str | None = None, - prediction_source: BatchDataSource | None = None, - prediction_stream: StreamDataSource | None = None, + output_source: BatchDataSource | None = None, + output_stream: StreamDataSource | None = None, application_source: BatchDataSource | None = None, dataset_store: DatasetStore | StorageFileReference | None = None, exposed_at_url: str | None = None, + exposed_model: ExposedModel | None = None, acceptable_freshness: timedelta | None = None, unacceptable_freshness: timedelta | None = None, ) -> Callable[[Type[T]], ModelContractWrapper[T]]: def decorator(cls: Type[T]) -> ModelContractWrapper[T]: - if isinstance(features, FeatureInputVersions): - input_features = features + if isinstance(input_features, FeatureInputVersions): + features_versions = input_features else: - input_features = FeatureInputVersions(default_version='default', versions={'default': features}) + features_versions = FeatureInputVersions( + default_version='default', versions={'default': input_features} + ) + + used_name = name or str(cls.__name__).lower() + used_description = description or str(cls.__doc__) + + used_exposed_at_url = exposed_at_url + if exposed_model: + used_exposed_at_url = exposed_model.exposed_at_url or exposed_at_url metadata = ModelMetadata( - name, - input_features, + used_name, + features_versions, contacts=contacts, tags=tags, - description=description, - prediction_source=prediction_source, - prediction_stream=prediction_stream, + description=used_description, + output_source=output_source, + output_stream=output_stream, application_source=application_source, dataset_store=resolve_dataset_store(dataset_store) if dataset_store else None, - exposed_at_url=exposed_at_url, + exposed_at_url=used_exposed_at_url, + exposed_model=exposed_model, acceptable_freshness=acceptable_freshness, unacceptable_freshness=unacceptable_freshness, ) @@ -275,9 +329,9 @@ class MyModel(ModelContract): features=set(), derived_features=set(), model_version_column=None, - source=metadata.prediction_source, + source=metadata.output_source, application_source=metadata.application_source, - stream_source=metadata.prediction_stream, + stream_source=metadata.output_stream, classification_targets=set(), regression_targets=set(), recommendation_targets=set(), @@ -382,4 +436,5 @@ class MyModel(ModelContract): description=metadata.description, dataset_store=metadata.dataset_store, exposed_at_url=metadata.exposed_at_url, + exposed_model=metadata.exposed_model, ) diff --git a/aligned/compiler/transformation_factory.py b/aligned/compiler/transformation_factory.py index e81f9e0..cffd54c 100644 --- a/aligned/compiler/transformation_factory.py +++ b/aligned/compiler/transformation_factory.py @@ -240,6 +240,43 @@ def compile(self) -> Transformation: return SplitTransformation(self.from_feature.name, self.pattern) +@dataclass +class OllamaEmbedding(TransformationFactory): + + model: str + from_feature: FeatureFactory + host_env: str | None + + @property + def using_features(self) -> list[FeatureFactory]: + return [self.from_feature] + + def compile(self) -> Transformation: + from aligned.schemas.transformation import OllamaEmbedding as OllamaEmbeddingTransformation + + return OllamaEmbeddingTransformation(self.from_feature.name, self.model, self.host_env) + + +@dataclass +class OllamaGenerate(TransformationFactory): + + model: str + system: str | None + prompt_feature: FeatureFactory + host_env: str | None + + @property + def using_features(self) -> list[FeatureFactory]: + return [self.prompt_feature] + + def compile(self) -> Transformation: + from aligned.schemas.transformation import OllamaGenerate as OllamaGenerateTransformation + + return OllamaGenerateTransformation( + self.prompt_feature.name, self.model, self.system or '', self.host_env + ) + + # @dataclass # class Split(TransformationFactory): diff --git a/aligned/exposed_model/interface.py b/aligned/exposed_model/interface.py new file mode 100644 index 0000000..7bfffcf --- /dev/null +++ b/aligned/exposed_model/interface.py @@ -0,0 +1,289 @@ +from __future__ import annotations + +import polars as pl +from typing import TYPE_CHECKING +from dataclasses import dataclass +from aligned.retrival_job import RetrivalJob +from aligned.schemas.codable import Codable +from mashumaro.types import SerializableType +import logging + +from aligned.schemas.feature import Feature, FeatureReferance + +if TYPE_CHECKING: + from aligned.feature_store import ModelFeatureStore + +logger = logging.getLogger(__name__) + + +class PredictorFactory: + + supported_predictors: dict[str, type[ExposedModel]] + _shared: PredictorFactory | None = None + + def __init__(self): + self.supported_predictors = {} + + types: list[type[ExposedModel]] = [ + EnitityPredictor, + OllamaGeneratePredictor, + OllamaEmbeddingPredictor, + ] + for predictor in types: + self.supported_predictors[predictor.model_type] = predictor + + @classmethod + def shared(cls) -> PredictorFactory: + if cls._shared: + return cls._shared + cls._shared = cls() + return cls._shared + + +class ExposedModel(Codable, SerializableType): + + model_type: str + + @property + def exposed_at_url(self) -> str | None: + return None + + async def needed_features(self, store: ModelFeatureStore) -> list[FeatureReferance]: + raise NotImplementedError(type(self)) + + async def needed_entities(self, store: ModelFeatureStore) -> set[Feature]: + raise NotImplementedError(type(self)) + + async def run_polars(self, values: RetrivalJob, store: ModelFeatureStore) -> pl.DataFrame: + raise NotImplementedError(type(self)) + + def _serialize(self) -> dict: + assert ( + self.model_type in PredictorFactory.shared().supported_predictors + ), f'Unknown predictor_type: {self.model_type}' + return self.to_dict() + + @classmethod + def _deserialize(cls, value: dict) -> ExposedModel: + name_type = value['model_type'] + if name_type not in PredictorFactory.shared().supported_predictors: + raise ValueError( + f"Unknown batch data source id: '{name_type}'.\nRemember to add the" + ' data source to the BatchDataSourceFactory.supported_data_sources if' + ' it is a custom type.' + ) + del value['model_type'] + data_class = PredictorFactory.shared().supported_predictors[name_type] + return data_class.from_dict(value) + + @staticmethod + def ollama_generate( + endpoint: str, + model: str, + prompt_template: str, + input_features_versions: str, + ) -> 'OllamaGeneratePredictor': + + return OllamaGeneratePredictor( + endpoint=endpoint, + model_name=model, + prompt_template=prompt_template, + input_features_versions=input_features_versions, + ) + + @staticmethod + def ollama_embedding( + endpoint: str, + model: str, + input_features_versions: str, + prompt_template: str, + embedding_name: str | None = None, + ) -> 'OllamaEmbeddingPredictor': + + return OllamaEmbeddingPredictor( + endpoint=endpoint, + model_name=model, + prompt_template=prompt_template, + input_features_versions=input_features_versions, + embedding_name=embedding_name or 'embedding', + ) + + +@dataclass +class EnitityPredictor(ExposedModel): + + endpoint: str + + input_features_versions: str + + model_type: str = 'entity' + + @property + def exposed_at_url(self) -> str | None: + return self.endpoint + + @property + def as_markdown(self) -> str: + return f"""Sending entities to as a JSON payload stored column wise: {self.endpoint}.""" + + async def needed_features(self, store: ModelFeatureStore) -> list[FeatureReferance]: + return store.feature_references_for(self.input_features_versions) + + async def needed_entities(self, store: ModelFeatureStore) -> set[Feature]: + return store.using_version(self.input_features_versions).needed_entities() + + async def run_polars(self, values: RetrivalJob, store: ModelFeatureStore) -> pl.DataFrame: + from httpx import AsyncClient + import polars as pl + + async with AsyncClient() as client: + entities = (await values.to_polars()).to_dict(as_series=False) + response = await client.post(self.endpoint, json=entities) + response.raise_for_status() + + dict_data = dict(response.json()) + return pl.DataFrame(data=dict_data) + + +@dataclass +class OllamaGeneratePredictor(ExposedModel): + + endpoint: str + model_name: str + + prompt_template: str + input_features_versions: str + + model_type: str = 'ollama_generate' + + @property + def exposed_at_url(self) -> str | None: + return self.endpoint + + def prompt_template_hash(self) -> str: + from hashlib import sha256 + + return sha256(self.prompt_template.encode(), usedforsecurity=False).hexdigest() + + @property + def as_markdown(self) -> str: + return f"""Sending a `generate` request to an Ollama server located at: {self.endpoint}. + +This will use the model: `{self.model_name}` to generate the responses. + +And use the prompt template: +``` +{self.prompt_template} +```""" + + async def needed_features(self, store: ModelFeatureStore) -> list[FeatureReferance]: + return store.feature_references_for(self.input_features_versions) + + async def needed_entities(self, store: ModelFeatureStore) -> set[Feature]: + return store.using_version(self.input_features_versions).needed_entities() + + async def run_polars(self, values: RetrivalJob, store: ModelFeatureStore) -> pl.DataFrame: + from ollama import AsyncClient + import polars as pl + + client = AsyncClient(host=self.endpoint) + + entities = await values.to_polars() + + features = store.feature_references_for(self.input_features_versions) + expected_cols = {feat.name for feat in features} + missing_cols = expected_cols - set(entities.columns) + + if missing_cols: + entities = await ( + store.using_version(self.input_features_versions).features_for(values).to_polars() + ) + + prompts = entities + + ret_vals = [] + model_version = f"{self.prompt_template_hash()} -> {self.model_name}" + + for value in prompts.iter_rows(named=True): + prompt = self.prompt_template.format(**value) + + response = await client.generate(self.model_name, prompt, stream=False) + + if isinstance(response, dict): + response['model_version'] = model_version + else: + logger.info(f"Unable to log prompt to the Ollama response. Got: {type(response)}") + + ret_vals.append(response) + + return prompts.hstack(pl.DataFrame(ret_vals)) + + +@dataclass +class OllamaEmbeddingPredictor(ExposedModel): + + endpoint: str + model_name: str + embedding_name: str + + prompt_template: str + input_features_versions: str + + model_type: str = 'ollama_embedding' + + @property + def exposed_at_url(self) -> str | None: + return self.endpoint + + def prompt_template_hash(self) -> str: + from hashlib import sha256 + + return sha256(self.prompt_template.encode(), usedforsecurity=False).hexdigest() + + @property + def as_markdown(self) -> str: + return f"""Sending a `embedding` request to an Ollama server located at: {self.endpoint}. + +This will use the model: `{self.model_name}` to generate the embeddings.""" + + async def needed_features(self, store: ModelFeatureStore) -> list[FeatureReferance]: + return store.feature_references_for(self.input_features_versions) + + async def needed_entities(self, store: ModelFeatureStore) -> set[Feature]: + return store.using_version(self.input_features_versions).needed_entities() + + async def run_polars(self, values: RetrivalJob, store: ModelFeatureStore) -> pl.DataFrame: + from ollama import AsyncClient + import polars as pl + + client = AsyncClient(host=self.endpoint) + + expected_cols = [feat.name for feat in store.feature_references_for(self.input_features_versions)] + + entities = await values.to_polars() + missing_cols = set(expected_cols) - set(entities.columns) + if missing_cols: + entities = ( + await store.using_version(self.input_features_versions).features_for(values).to_polars() + ) + + prompts = entities + + ret_vals = [] + + for value in prompts.iter_rows(named=True): + prompt = self.prompt_template.format(**value) + + response = await client.embeddings(self.model_name, prompt) + + if isinstance(response, dict): + embedding = response['embedding'] # type: ignore + else: + embedding = response + + ret_vals.append(embedding) + + model_version = f"{self.prompt_template_hash()} -> {self.model_name}" + return prompts.hstack([pl.Series(name=self.embedding_name, values=ret_vals)]).with_columns( + pl.lit(model_version).alias('model_version') + ) diff --git a/aligned/exposed_model/ollama.py b/aligned/exposed_model/ollama.py new file mode 100644 index 0000000..a6197ab --- /dev/null +++ b/aligned/exposed_model/ollama.py @@ -0,0 +1,223 @@ +from aligned.compiler.model import ModelContractWrapper +from aligned.compiler.feature_factory import ( + Embedding, + Entity, + FeatureFactory, + FeatureReferencable, + Int32, + String, + List, + Int64, + EventTimestamp, +) +import logging + +from aligned.data_source.batch_data_source import BatchDataSource +from aligned.exposed_model.interface import ExposedModel + + +logger = logging.getLogger(__name__) + + +class OllamaGeneration: + + model: String + + response: String + created_at: EventTimestamp + + context: List + + load_duration: Int64 + total_duration: Int64 + + prompt_eval_count: Int32 + prompt_eval_duration: Int64 + + eval_count: Int32 + eval_duration: Int64 + + +def ollama_generate_contract( + prompt: FeatureFactory, + contract_name: str, + endpoint: str, + model: str, + entities: list[FeatureFactory] | FeatureFactory, + prediction_source: BatchDataSource | None = None, +) -> ModelContractWrapper[OllamaGeneration]: + from aligned import model_contract, ExposedModel + + @model_contract( + name=contract_name, + input_features=[prompt], + exposed_model=ExposedModel.ollama_generate( + endpoint=endpoint, + model=model, + prompt_template=f"{{{prompt.name}}}", + input_features_versions='default', + ), + output_source=prediction_source, + ) + class OllamaOutput: + model = String().as_model_version() + + input_prompt = String() + + response = String() + created_at = EventTimestamp() + + context = List(Int32()) + + load_duration = Int64() + total_duration = Int64() + + prompt_eval_count = Int32() + prompt_eval_duration = Int64() + + eval_count = Int32() + eval_duration = Int64() + + if not isinstance(entities, list): + entities = [entities] + + for entity in entities: + if isinstance(entity, Entity): + feature = entity._dtype.copy_type() + else: + feature = entity.copy_type() + + new_entity = Entity(feature) + + feature._name = entity.name + new_entity._name = entity.name + + setattr(OllamaOutput.contract, entity.name, new_entity) + + return OllamaOutput # type: ignore + + +def ollama_embedding_contract( + text: FeatureFactory, + contract_name: str, + endpoint: str, + model: str, + entities: list[FeatureFactory] | FeatureFactory, + output_source: BatchDataSource | None = None, +): + from aligned import model_contract, FeatureInputVersions + + @model_contract( + name=contract_name, + input_features=FeatureInputVersions(default_version='default', versions={'default': [text]}), + exposed_model=ExposedModel.ollama_embedding( + endpoint=endpoint, + model=model, + input_features_versions='default', + prompt_template=f"{{{text.name}}}", + embedding_name='embedding', + ), + output_source=output_source, + ) + class OllamaEmbedding: + + embedding = Embedding() + + if not isinstance(entities, list): + entities = [entities] + + for entity in entities: + if isinstance(entity, Entity): + feature = entity._dtype.copy_type() + else: + feature = entity.copy_type() + + new_entity = Entity(feature) + + feature._name = entity.name + new_entity._name = entity.name + + setattr(OllamaEmbedding.contract, entity.name, new_entity) + + return OllamaEmbedding # type: ignore + + +def ollama_classification_contract( + input: list[FeatureReferencable] | FeatureReferencable, + contract_name: str, + endpoint: str, + model: str, + entities: list[FeatureFactory] | FeatureFactory, + ground_truth: FeatureFactory, + output_source: BatchDataSource | None = None, + prompt_template: str | None = None, +): + from aligned import model_contract, ExposedModel + from aligned.schemas.constraints import InDomain + + if not isinstance(input, list): + input = [input] + + allowed_outputs = [] + + if ground_truth.constraints: + for constraint in ground_truth.constraints: + if isinstance(constraint, InDomain): + allowed_outputs = constraint.values + + if not prompt_template: + prompt_template = '' + + if allowed_outputs: + prompt_template = ( + "Your task is to classify the input into one of the following classes: '" + + "', '".join(allowed_outputs) + + "'.\n\n" + ) + + prompt_template += 'You have the following information at your disposal:\n' + + for feature in input: + ref = feature.feature_referance() + prompt_template += f"{ref.name}: {{{ref.name}}}\n" + + prompt_template += ( + '\n\nDo not explain why you think the input belongs to a certain class, ' + 'just provide the class you think the input belongs to. ' + 'If you are unsure about which class it belong to, return `Unknown`.' + ) + + @model_contract( + name=contract_name, + input_features=input, + exposed_model=ExposedModel.ollama_generate( + endpoint=endpoint, model=model, prompt_template=prompt_template, input_features_versions='default' + ), + output_source=output_source, + ) + class OllamaOutput: + model_version = ( + String() + .as_model_version() + .description('This is a combination of the used LLM model, and the prompt template.') + ) + created_at = EventTimestamp() + response = ground_truth.as_classification_label() + + if not isinstance(entities, list): + entities = [entities] + + for entity in entities: + if isinstance(entity, Entity): + feature = entity._dtype.copy_type() + else: + feature = entity.copy_type() + + new_entity = Entity(feature) + + feature._name = entity.name + new_entity._name = entity.name + + setattr(OllamaOutput.contract, entity.name, new_entity) + + return OllamaOutput # type: ignore diff --git a/aligned/feature_store.py b/aligned/feature_store.py index f1fd242..d95b85e 100644 --- a/aligned/feature_store.py +++ b/aligned/feature_store.py @@ -25,6 +25,7 @@ from aligned.feature_view.feature_view import FeatureView, FeatureViewWrapper from aligned.request.retrival_request import FeatureRequest, RetrivalRequest from aligned.retrival_job import ( + PredictionJob, SelectColumnsJob, RetrivalJob, StreamAggregationJob, @@ -918,14 +919,40 @@ def using_version(self, version: str) -> ModelFeatureStore: def request( self, except_features: set[str] | None = None, event_timestamp_column: str | None = None ) -> FeatureRequest: + feature_refs = self.raw_string_features(except_features or set()) + if not feature_refs: + raise ValueError(f"No features to request for model '{self.model.name}'") + return self.store.requests_for( - RawStringFeatureRequest(self.raw_string_features(except_features or set())), + RawStringFeatureRequest(feature_refs), event_timestamp_column, ) def needed_entities(self) -> set[Feature]: return self.request().request_result.entities + def feature_references_for(self, version: str) -> list[FeatureReferance]: + return self.model.features.features_for(version) + + def has_exposed_model(self) -> bool: + return self.model.exposed_model is not None + + def predict_over( + self, + entities: ConvertableToRetrivalJob | RetrivalJob, + ) -> PredictionJob: + predictor = self.model.exposed_model + if not predictor: + raise ValueError( + f'Model {self.model.name} has no predictor set. ' + 'This can be done by setting the `exposed_at` value' + ) + + if not isinstance(entities, RetrivalJob): + entities = RetrivalJob.from_convertable(entities, self.request().needed_requests) + + return PredictionJob(entities, self.model, self.store) + def features_for( self, entities: ConvertableToRetrivalJob | RetrivalJob, event_timestamp_column: str | None = None ) -> RetrivalJob: @@ -956,6 +983,9 @@ def features_for( Returns: RetrivalJob: A retrival job that can be used to fetch the features """ + import polars as pl + import pandas as pd + request = self.request(event_timestamp_column=event_timestamp_column) if isinstance(entities, dict): features = self.raw_string_features(set(entities.keys())) @@ -966,11 +996,26 @@ def features_for( entities, list(features), event_timestamp_column=event_timestamp_column ).with_request(request.needed_requests) - if isinstance(entities, dict): - subset_request = self.request(set(entities.keys()), event_timestamp_column) + if isinstance(entities, (dict, pl.DataFrame, pd.DataFrame)): + + existing_keys = set() + if isinstance(entities, dict): + existing_keys = set(entities.keys()) + elif isinstance(entities, (pl.DataFrame, pd.DataFrame)): + existing_keys = set(entities.columns) + + subset_request = self.request(existing_keys, event_timestamp_column) + + needs_core_features = False + + for req in subset_request.needed_requests: + missing_keys = set(req.feature_names) - existing_keys + if missing_keys: + needs_core_features = True + break - if subset_request.request_result.feature_columns != request.request_result.feature_columns: - job = job.derive_features(request.needed_requests) + if not needs_core_features: + job = RetrivalJob.from_convertable(entities, request).derive_features(request.needed_requests) return job.select_columns(request.features_to_include) diff --git a/aligned/feature_view/feature_view.py b/aligned/feature_view/feature_view.py index 31661bd..6e51a20 100644 --- a/aligned/feature_view/feature_view.py +++ b/aligned/feature_view/feature_view.py @@ -91,8 +91,8 @@ def resolve_source(source: BatchDataSource | FeatureViewWrapper) -> BatchDataSou def feature_view( - name: str, source: BatchDataSource | FeatureViewWrapper, + name: str | None = None, description: str | None = None, stream_source: StreamDataSource | None = None, application_source: BatchDataSource | None = None, @@ -104,10 +104,13 @@ def feature_view( ) -> Callable[[Type[T]], FeatureViewWrapper[T]]: def decorator(cls: Type[T]) -> FeatureViewWrapper[T]: + used_name = name or str(cls.__name__).lower() + used_description = description or str(cls.__doc__) + metadata = FeatureViewMetadata( - name, + used_name, resolve_source(source), - description=description, + description=used_description, stream_source=stream_source, application_source=application_source, materialized_source=materialized_source, diff --git a/aligned/feature_view/tests/test_joined_source.py b/aligned/feature_view/tests/test_joined_source.py index ad36ddd..dc59118 100644 --- a/aligned/feature_view/tests/test_joined_source.py +++ b/aligned/feature_view/tests/test_joined_source.py @@ -29,8 +29,8 @@ class RightOtherIdData: @model_contract( name='some_model', - features=[], - prediction_source=FileSource.csv_at('test_data/other.csv'), + input_features=[], + output_source=FileSource.csv_at('test_data/other.csv'), ) class ModelData: diff --git a/aligned/jobs/tests/test_derived_job.py b/aligned/jobs/tests/test_derived_job.py index c889a83..281ee3b 100644 --- a/aligned/jobs/tests/test_derived_job.py +++ b/aligned/jobs/tests/test_derived_job.py @@ -101,7 +101,7 @@ class ExpenceAgg: @model_contract( name='model', - features=[ + input_features=[ expences.abs_amount, expences.is_expence, income_agg.total_amount, diff --git a/aligned/retrival_job.py b/aligned/retrival_job.py index ae85cab..1eceb91 100644 --- a/aligned/retrival_job.py +++ b/aligned/retrival_job.py @@ -34,9 +34,10 @@ from aligned.schemas.folder import DatasetMetadata, DatasetStore from aligned.schemas.derivied_feature import AggregatedFeature, AggregateOver - from aligned.schemas.model import EventTrigger + from aligned.schemas.model import EventTrigger, Model from aligned.sources.local import DataFileReference, StorageFileReference from aligned.feature_source import WritableFeatureSource + from aligned.feature_store import FeatureStore logger = logging.getLogger(__name__) @@ -2168,3 +2169,69 @@ async def to_lazy_polars(self) -> pl.LazyFrame: async def to_pandas(self) -> pd.DataFrame: return (await self.to_polars()).to_pandas() + + +@dataclass +class PredictionJob(RetrivalJob): + + job: RetrivalJob + model: Model + store: FeatureStore + + @property + def request_result(self) -> RequestResult: + return self.job.request_result + + @property + def retrival_requests(self) -> list[RetrivalRequest]: + return self.job.retrival_requests + + async def to_pandas(self) -> pd.DataFrame: + return await self.job.to_pandas() + + async def to_lazy_polars(self) -> pl.LazyFrame: + predictor = self.model.exposed_model + if not predictor: + raise ValueError('No predictor defined for model') + + df = await predictor.run_polars( + self.job, + self.store.model(self.model.name), + ) + return df.lazy() + + def remove_derived_features(self) -> RetrivalJob: + return self.job.remove_derived_features() + + async def insert_into_output_source(self) -> None: + pred_source = self.model.predictions_view.source + if not pred_source: + raise ValueError('No source defined for predictions view') + + if not isinstance(pred_source, WritableFeatureSource): + raise ValueError('Source for predictions view is not writable') + + req = self.model.predictions_view.request('preds') + await pred_source.insert(self, [req]) + + async def upsert_into_output_source(self) -> None: + pred_source = self.model.predictions_view.source + if not pred_source: + raise ValueError('No source defined for predictions view') + + if not isinstance(pred_source, WritableFeatureSource): + raise ValueError('Source for predictions view is not writable') + + req = self.model.predictions_view.request('preds') + await pred_source.upsert(self, [req]) + + async def overwrite_output_source(self) -> None: + pred_source = self.model.predictions_view.source + if not pred_source: + raise ValueError('No source defined for predictions view') + + if not isinstance(pred_source, WritableFeatureSource): + raise ValueError('Source for predictions view is not writable') + + req = self.model.predictions_view.request('preds') + await pred_source.overwrite(self, [req]) diff --git a/aligned/schemas/model.py b/aligned/schemas/model.py index 5b928e0..3da202c 100644 --- a/aligned/schemas/model.py +++ b/aligned/schemas/model.py @@ -11,6 +11,7 @@ from aligned.schemas.feature_view import CompiledFeatureView, FeatureViewReferenceSource from aligned.schemas.derivied_feature import DerivedFeature from aligned.schemas.folder import DatasetStore +from aligned.exposed_model.interface import ExposedModel from aligned.data_source.stream_data_source import StreamDataSource from aligned.data_source.batch_data_source import BatchDataSource from aligned.retrival_job import RetrivalJob @@ -156,6 +157,7 @@ class Model(Codable): tags: list[str] | None = field(default=None) dataset_store: DatasetStore | None = field(default=None) exposed_at_url: str | None = field(default=None) + exposed_model: ExposedModel | None = field(default=None) def __hash__(self) -> int: return self.name.__hash__() diff --git a/aligned/schemas/transformation.py b/aligned/schemas/transformation.py index be4b1be..063731d 100644 --- a/aligned/schemas/transformation.py +++ b/aligned/schemas/transformation.py @@ -234,6 +234,7 @@ def __init__(self) -> None: Clip, ArrayContains, ArrayAtIndex, + OllamaEmbedding, ]: self.add(tran_type) @@ -2156,6 +2157,109 @@ async def transform_polars(self, df: pl.LazyFrame, alias: str) -> pl.LazyFrame | return pl.col(self.key).struct.field(self.field).alias(alias) +@dataclass +class OllamaGenerate(Transformation): + + key: str + model: str + system: str + + host_env: str | None = None + name = 'ollama_embedding' + dtype = FeatureType.json() + + async def transform_pandas(self, df: pd.DataFrame) -> pd.Series: + from ollama import AsyncClient + import os + + host = None + if self.host_env: + host = os.getenv(self.host_env) + + client = AsyncClient(host=host) + + response = pd.Series([[]] * df.shape[0]) + + for index, row in df.iterrows(): + response.iloc[index] = await client.generate( + model=self.model, + prompt=row[self.key], + system=self.system, + ) + + return response + + async def transform_polars(self, df: pl.LazyFrame, alias: str) -> pl.LazyFrame | pl.Expr: + def generate_embedding(values: pl.Series) -> pl.Series: + from ollama import Client + import os + + host = None + if self.host_env: + host = os.getenv(self.host_env) + + client = Client(host=host) + + return pl.Series( + [ + str( + client.generate( + model=self.model, + prompt=value, + system=self.system, + ) + ) + for value in values + ] + ) + + return pl.col(self.key).map_batches(generate_embedding, return_dtype=pl.String()) + + +@dataclass +class OllamaEmbedding(Transformation): + + key: str + model: str + + host_env: str | None = None + name = 'ollama_embedding' + dtype = FeatureType.embedding() + + async def transform_pandas(self, df: pd.DataFrame) -> pd.Series: + from ollama import AsyncClient + import os + + host = None + if self.host_env: + host = os.getenv(self.host_env) + + client = AsyncClient(host=host) + + response = pd.Series([[]] * df.shape[0]) + + for index, row in df.iterrows(): + response.iloc[index] = await client.embeddings(self.model, row[self.key])['embedding'] + + return response + + async def transform_polars(self, df: pl.LazyFrame, alias: str) -> pl.LazyFrame | pl.Expr: + def generate_embedding(values: pl.Series) -> pl.Series: + from ollama import Client + import os + + host = None + if self.host_env: + host = os.getenv(self.host_env) + + client = Client(host=host) + + values = [client.embeddings(self.model, value)['embedding'] for value in values] + return pl.Series(values) + + return pl.col(self.key).map_batches(generate_embedding, return_dtype=pl.List(pl.Float64())) + + @dataclass class JsonPath(Transformation): diff --git a/aligned/sources/local.py b/aligned/sources/local.py index db2f343..0b4489b 100644 --- a/aligned/sources/local.py +++ b/aligned/sources/local.py @@ -313,11 +313,11 @@ class ParquetFileSource(BatchDataSource, ColumnFeatureMappable, DataFileReferenc @property def to_markdown(self) -> str: return f'''#### Parquet File - *Renames*: {self.mapping_keys} +*Renames*: {self.mapping_keys} - *File*: {self.path} +*File*: {self.path} - [Go to file]({self.path})''' +[Go to file]({self.path})''' def job_group_key(self) -> str: return f'{self.type_name}/{self.path}' diff --git a/aligned/tests/test_model_target.py b/aligned/tests/test_model_target.py index c2f1112..dce0676 100644 --- a/aligned/tests/test_model_target.py +++ b/aligned/tests/test_model_target.py @@ -73,7 +73,7 @@ async def test_model_wrapper() -> None: @model_contract( name='test_model', - features=[], + input_features=[], ) class TestModel: id = Int32().as_entity() @@ -82,7 +82,7 @@ class TestModel: test_model_features = TestModel() - @model_contract(name='new_model', features=[test_model_features.a]) + @model_contract(name='new_model', input_features=[test_model_features.a]) class NewModel: id = Int32().as_entity() @@ -120,8 +120,8 @@ async def test_model_insert_predictions() -> None: @model_contract( name='test_model', - features=[], - prediction_source=FileSource.parquet_at(path).with_renames({'some_id': 'id'}), + input_features=[], + output_source=FileSource.parquet_at(path).with_renames({'some_id': 'id'}), ) class TestModel: id = Int32().as_entity() @@ -157,8 +157,8 @@ async def test_model_insert_predictions_csv() -> None: @model_contract( name='test_model', - features=[], - prediction_source=FileSource.csv_at(path).with_renames({'some_id': 'id'}), + input_features=[], + output_source=FileSource.csv_at(path).with_renames({'some_id': 'id'}), ) class TestModel: id = Int32().as_entity() @@ -193,7 +193,7 @@ async def test_model_upsert_predictions() -> None: path = 'test_data/test_model.parquet' - @model_contract(name='test_model', features=[], prediction_source=FileSource.parquet_at(path)) + @model_contract(name='test_model', input_features=[], output_source=FileSource.parquet_at(path)) class TestModel: id = Int32().as_entity() diff --git a/aligned/tests/test_models_as_feature.py b/aligned/tests/test_models_as_feature.py index 6aacbbc..51dc6e5 100644 --- a/aligned/tests/test_models_as_feature.py +++ b/aligned/tests/test_models_as_feature.py @@ -27,7 +27,7 @@ class OtherView: @model_contract( 'test_model', - features=FeatureInputVersions( + input_features=FeatureInputVersions( default_version='v1', versions={ 'v1': [view.feature_a, other.feature_b], @@ -43,7 +43,7 @@ class First: first = First() -@model_contract('second_model', features=[first.target]) +@model_contract('second_model', input_features=[first.target]) class Second: other_id = Int32().as_entity() view_id = Int32().as_entity() diff --git a/conftest.py b/conftest.py index 7c59756..6a4ec04 100644 --- a/conftest.py +++ b/conftest.py @@ -436,7 +436,7 @@ def titanic_model(titanic_feature_view: FeatureView) -> ModelContractWrapper: @model_contract( name='titanic', description='A model predicting if a passenger will survive', - features=[ + input_features=[ features.age, # type: ignore features.sibsp, # type: ignore features.has_siblings, # type: ignore @@ -629,7 +629,7 @@ def titanic_model_scd(titanic_feature_view_scd: FeatureView) -> ModelContractWra @model_contract( 'titanic', description='A model predicting if a passenger will survive', - features=[features.age, features.sibsp, features.has_siblings, features.is_male], # type: ignore + input_features=[features.age, features.sibsp, features.has_siblings, features.is_male], # type: ignore acceptable_freshness=timedelta(days=1), unacceptable_freshness=timedelta(days=2), ) diff --git a/poetry.lock b/poetry.lock index d8ccd8e..b8a6cbd 100644 --- a/poetry.lock +++ b/poetry.lock @@ -556,45 +556,46 @@ files = [ [[package]] name = "httpcore" -version = "0.16.3" +version = "1.0.5" description = "A minimal low-level HTTP client." optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" files = [ - {file = "httpcore-0.16.3-py3-none-any.whl", hash = "sha256:da1fb708784a938aa084bde4feb8317056c55037247c787bd7e19eb2c2949dc0"}, - {file = "httpcore-0.16.3.tar.gz", hash = "sha256:c5d6f04e2fc530f39e0c077e6a30caa53f1451096120f1f38b954afd0b17c0cb"}, + {file = "httpcore-1.0.5-py3-none-any.whl", hash = "sha256:421f18bac248b25d310f3cacd198d55b8e6125c107797b609ff9b7a6ba7991b5"}, + {file = "httpcore-1.0.5.tar.gz", hash = "sha256:34a38e2f9291467ee3b44e89dd52615370e152954ba21721378a87b2960f7a61"}, ] [package.dependencies] -anyio = ">=3.0,<5.0" certifi = "*" h11 = ">=0.13,<0.15" -sniffio = "==1.*" [package.extras] +asyncio = ["anyio (>=4.0,<5.0)"] http2 = ["h2 (>=3,<5)"] socks = ["socksio (==1.*)"] +trio = ["trio (>=0.22.0,<0.26.0)"] [[package]] name = "httpx" -version = "0.23.3" +version = "0.27.0" description = "The next generation HTTP client." optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" files = [ - {file = "httpx-0.23.3-py3-none-any.whl", hash = "sha256:a211fcce9b1254ea24f0cd6af9869b3d29aba40154e947d2a07bb499b3e310d6"}, - {file = "httpx-0.23.3.tar.gz", hash = "sha256:9818458eb565bb54898ccb9b8b251a28785dd4a55afbc23d0eb410754fe7d0f9"}, + {file = "httpx-0.27.0-py3-none-any.whl", hash = "sha256:71d5465162c13681bff01ad59b2cc68dd838ea1f10e51574bac27103f00c91a5"}, + {file = "httpx-0.27.0.tar.gz", hash = "sha256:a0cb88a46f32dc874e04ee956e4c2764aba2aa228f650b06788ba6bda2962ab5"}, ] [package.dependencies] +anyio = "*" certifi = "*" -httpcore = ">=0.15.0,<0.17.0" -rfc3986 = {version = ">=1.3,<2", extras = ["idna2008"]} +httpcore = "==1.*" +idna = "*" sniffio = "*" [package.extras] brotli = ["brotli", "brotlicffi"] -cli = ["click (==8.*)", "pygments (==2.*)", "rich (>=10,<13)"] +cli = ["click (==8.*)", "pygments (==2.*)", "rich (>=10,<14)"] http2 = ["h2 (>=3,<5)"] socks = ["socksio (==1.*)"] @@ -832,6 +833,20 @@ files = [ {file = "numpy-1.26.4.tar.gz", hash = "sha256:2a02aba9ed12e4ac4eb3ea9421c420301a0c6460d9830d74a9df87efa4912010"}, ] +[[package]] +name = "ollama" +version = "0.1.8" +description = "The official Python client for Ollama." +optional = false +python-versions = "<4.0,>=3.8" +files = [ + {file = "ollama-0.1.8-py3-none-any.whl", hash = "sha256:45916aaf99c6e41a73197e9be8a17ea90290938894ac29d2fa855f55cb70d2e3"}, + {file = "ollama-0.1.8.tar.gz", hash = "sha256:148a0ff1ce87c904ad8d137f14dda6919f833e83c61457e5e2d449507c43a1bd"}, +] + +[package.dependencies] +httpx = ">=0.27.0,<0.28.0" + [[package]] name = "packaging" version = "23.2" @@ -1462,23 +1477,6 @@ async-timeout = {version = ">=4.0.2", markers = "python_full_version <= \"3.11.2 hiredis = ["hiredis (>=1.0.0)"] ocsp = ["cryptography (>=36.0.1)", "pyopenssl (==20.0.1)", "requests (>=2.26.0)"] -[[package]] -name = "rfc3986" -version = "1.5.0" -description = "Validating URI References per RFC 3986" -optional = false -python-versions = "*" -files = [ - {file = "rfc3986-1.5.0-py2.py3-none-any.whl", hash = "sha256:a86d6e1f5b1dc238b218b012df0aa79409667bb209e58da56d0b94704e712a97"}, - {file = "rfc3986-1.5.0.tar.gz", hash = "sha256:270aaf10d87d0d4e095063c65bf3ddbc6ee3d0b226328ce21e036f946e421835"}, -] - -[package.dependencies] -idna = {version = "*", optional = true, markers = "extra == \"idna2008\""} - -[package.extras] -idna2008 = ["idna"] - [[package]] name = "six" version = "1.16.0" @@ -1836,4 +1834,4 @@ server = ["asgi-correlation-id", "fastapi", "prometheus-fastapi-instrumentator", [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "c221da4b6f89a40e5914fff5320a7cf2039379a7b929518504597bc26af91622" +content-hash = "bebec91e8ad79dfbc1cb1a208404260607ebf7752cc4ba6afbe12fbb788449e7" diff --git a/pyproject.toml b/pyproject.toml index c51058f..a3950a1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -58,7 +58,6 @@ pydantic = "^2.0.0" prometheus_client = "^0.16.0" asgi-correlation-id = { version = "^3.0.0", optional = true } pandera = { version = "^0.17.0", optional = true} -httpx = "^0.23.0" polars = { version = "^0.20.0", extras = ["pyarrow"] } pillow = { version = "^9.4.0", optional = true } prometheus-fastapi-instrumentator = { version="^5.9.1", optional = true } @@ -68,7 +67,9 @@ prometheus-fastapi-instrumentator = { version="^5.9.1", optional = true } kafka-python = { version= "^2.0.2", optional = true } connectorx = { version = "^0.3.2", optional = true } asyncpg = { version = "^0.29.0", optional = true } -sqlglot = "^22.5.0" +sqlglot = { version = "^22.5.0", optional = true } +ollama = { version = "^0.1.8", optional = true } +httpx = "^0.27.0" [tool.poetry.extras] aws = ["aioaws", "connectorx"] @@ -78,6 +79,8 @@ server = ["asgi-correlation-id", "fastapi", "uvicorn", "prometheus-fastapi-instr pandera = ["pandera"] image = ["pillow"] kafka = ["kafka-python"] +ollama = ["ollama"] +sql = ["sqlglot"] # text = ["gensim", "openai", "sentence-transformers"] [tool.poetry.group.dev.dependencies] From d45fc431d7158ff33f39f01a8d9be100e7c5cd94 Mon Sep 17 00:00:00 2001 From: "Mats E. Mollestad" Date: Tue, 2 Apr 2024 21:15:38 +0200 Subject: [PATCH 02/13] Added mlflow exposed model --- .gitignore | 1 + aligned/exposed_model/interface.py | 91 ++ aligned/exposed_model/tests/test_mlflow.py | 59 + aligned/feature_store.py | 27 +- aligned/feature_view/feature_view.py | 2 +- aligned/local/job.py | 13 +- aligned/retrival_job.py | 10 + aligned/sources/local.py | 23 +- aligned/tests/test_models_as_feature.py | 8 +- conftest.py | 2 +- poetry.lock | 1436 +++++++++++++++++--- pyproject.toml | 2 + test_data/credit_history.csv | 14 +- test_data/credit_history.parquet | Bin 1603 -> 1619 bytes test_data/credit_history_agg.parquet | Bin 1603 -> 1619 bytes test_data/credit_history_mater.parquet | Bin 987 -> 987 bytes test_data/data/csv_iso.csv | 6 +- test_data/data/csv_unix.csv | 6 +- test_data/data/parquet_iso.parquet | Bin 1133 -> 1139 bytes test_data/data/parquet_unix.parquet | Bin 1077 -> 1077 bytes test_data/feature-store.json | 2 +- test_data/loan.csv | 14 +- test_data/test_model.csv | 8 +- test_data/test_model.parquet | Bin 598 -> 594 bytes test_data/titanic-sets.json | 2 +- test_data/titanic-test.csv | 42 +- test_data/titanic-train.csv | 122 +- test_data/titanic-validate.csv | 42 +- 28 files changed, 1608 insertions(+), 324 deletions(-) create mode 100644 aligned/exposed_model/tests/test_mlflow.py diff --git a/.gitignore b/.gitignore index f181542..5dd4726 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,7 @@ .DS_STORE test_data/feature-store.json +test_data/mlruns # Byte-compiled / optimized / DLL files __pycache__/ diff --git a/aligned/exposed_model/interface.py b/aligned/exposed_model/interface.py index 7bfffcf..16fb260 100644 --- a/aligned/exposed_model/interface.py +++ b/aligned/exposed_model/interface.py @@ -108,6 +108,24 @@ def ollama_embedding( embedding_name=embedding_name or 'embedding', ) + @staticmethod + def in_memory_mlflow( + model_name: str, + model_alias: str, + prediction_column: str, + model_version_column: str | None = None, + predicted_at_column: str | None = None, + model_contract_version_tag: str | None = None, + ): + return InMemMLFlowAlias( + model_name=model_name, + model_alias=model_alias, + prediction_column=prediction_column, + predicted_at_column=predicted_at_column or 'predicted_at', + model_version_column=model_version_column or 'model_version', + model_contract_version_tag=model_contract_version_tag, + ) + @dataclass class EnitityPredictor(ExposedModel): @@ -287,3 +305,76 @@ async def run_polars(self, values: RetrivalJob, store: ModelFeatureStore) -> pl. return prompts.hstack([pl.Series(name=self.embedding_name, values=ret_vals)]).with_columns( pl.lit(model_version).alias('model_version') ) + + +@dataclass +class InMemMLFlowAlias(ExposedModel): + + model_name: str + model_alias: str + + prediction_column: str + predicted_at_column: str + model_version_column: str + + model_contract_version_tag: str | None + + model_type: str = 'latest_mlflow' + + @property + def exposed_at_url(self) -> str | None: + return None + + @property + def as_markdown(self) -> str: + return f"""Using the latest MLFlow model: `{self.model_name}`.""" + + def get_model_version(self): + from mlflow.tracking import MlflowClient + + mlflow_client = MlflowClient() + + return mlflow_client.get_model_version_by_alias(self.model_name, self.model_alias) + + def contract_version(self, model_version) -> str: + version = 'default' + if self.model_contract_version_tag: + if self.model_contract_version_tag not in model_version.tags: # noqa + raise ValueError( + f"Model contract version tag {self.model_contract_version_tag} not found in model version tags" + ) + else: + version = model_version.tags[self.model_contract_version_tag] + return version + + async def needed_features(self, store: ModelFeatureStore) -> list[FeatureReferance]: + mv = self.get_model_version() + version = self.contract_version(mv) + return store.feature_references_for(version) + + async def needed_entities(self, store: ModelFeatureStore) -> set[Feature]: + mv = self.get_model_version() + version = self.contract_version(mv) + return store.using_version(version).needed_entities() + + async def run_polars(self, values: RetrivalJob, store: ModelFeatureStore) -> pl.DataFrame: + import mlflow + import polars as pl + from datetime import datetime, timezone + + model_uri = f"models:/{self.model_name}@{self.model_alias}" + mv = self.get_model_version() + + model = mlflow.pyfunc.load_model(model_uri) + + job = store.features_for(values) + df = await job.to_polars() + features = job.request_result.feature_columns + + predictions = model.predict(df[features]) + + return df.with_columns( + pl.Series(name=self.prediction_column, values=predictions), + pl.lit(mv.run_id).alias(self.model_version_column), + pl.lit(datetime.now(timezone.utc)).alias(self.predicted_at_column), + ) diff --git a/aligned/exposed_model/tests/test_mlflow.py b/aligned/exposed_model/tests/test_mlflow.py new file mode 100644 index 0000000..77b98da --- /dev/null +++ b/aligned/exposed_model/tests/test_mlflow.py @@ -0,0 +1,59 @@ +from contextlib import suppress +import pytest +from aligned import ExposedModel, model_contract, String, Int32, EventTimestamp, feature_view, FileSource + + +@pytest.mark.asyncio +async def test_mlflow() -> None: + from mlflow.tracking import MlflowClient + import mlflow + + mlflow.set_tracking_uri('test_data/mlruns') + + model_name = 'test_model' + model_alias = 'Champion' + + mlflow_client = MlflowClient() + + with suppress(mlflow.exceptions.MlflowException): + mlflow_client.delete_registered_model(model_name) + + def predict(data): + return data * 2 + + mlflow.pyfunc.log_model( + artifact_path='model', python_model=predict, registered_model_name=model_name, input_example=[1, 2, 3] + ) + mlflow_client.set_registered_model_alias(name=model_name, alias=model_alias, version=1) # type: ignore + + @feature_view( + name='input', + source=FileSource.parquet_at('non-existing-data'), + ) + class InputFeatureView: + entity_id = String().as_entity() + x = Int32() + + @model_contract( + input_features=[InputFeatureView().x], + exposed_model=ExposedModel.in_memory_mlflow( + model_name=model_name, + model_alias=model_alias, + prediction_column='prediction', + model_version_column='model_version', + predicted_at_column='predicted_at', + ), + ) + class MyModelContract: + entity_id = String().as_entity() + predicted_at = EventTimestamp() + prediction = Int32() + model_version = String().as_model_version() + + preds = await MyModelContract.predict_over( + values={'entity_id': ['a', 'b'], 'x': [1, 2]}, needed_views=[InputFeatureView] + ).to_polars() + + assert preds['prediction'].to_list() == [2, 4] + assert 'model_version' in preds.columns + assert 'predicted_at' in preds.columns diff --git a/aligned/feature_store.py b/aligned/feature_store.py index d95b85e..5c9a0b6 100644 --- a/aligned/feature_store.py +++ b/aligned/feature_store.py @@ -1,5 +1,8 @@ from __future__ import annotations +import polars as pl +import pandas as pd + import logging from collections import defaultdict from dataclasses import dataclass, field @@ -429,15 +432,28 @@ def features_for( if view.name == request.location.name: feature_names.update(request.all_feature_names) + if not isinstance(entities, RetrivalJob): + entities = RetrivalJob.from_convertable(entities, requests) + + existing_features = set(entities.loaded_columns) + + loaded_requests = [] + for request_index in range(len(requests.needed_requests)): request = requests.needed_requests[request_index] feature_names.update(request.entity_names) - if isinstance(entities, dict): - # Do not load the features if they already exist as an entity - request.features = {feature for feature in request.features if feature.name not in entities} + if request.features_to_include - existing_features: + request.features = { + feature for feature in request.features if feature.name not in existing_features + } + loaded_requests.append(request) + + if not loaded_requests: + return entities - return self.features_for_request(requests, entities, feature_names) + new_request = FeatureRequest(requests.location, requests.features_to_include, loaded_requests) + return self.features_for_request(new_request, entities, feature_names) def model(self, name: str) -> ModelFeatureStore: """ @@ -983,9 +999,6 @@ def features_for( Returns: RetrivalJob: A retrival job that can be used to fetch the features """ - import polars as pl - import pandas as pd - request = self.request(event_timestamp_column=event_timestamp_column) if isinstance(entities, dict): features = self.raw_string_features(set(entities.keys())) diff --git a/aligned/feature_view/feature_view.py b/aligned/feature_view/feature_view.py index 6e51a20..62b510a 100644 --- a/aligned/feature_view/feature_view.py +++ b/aligned/feature_view/feature_view.py @@ -87,7 +87,7 @@ def resolve_source(source: BatchDataSource | FeatureViewWrapper) -> BatchDataSou elif isinstance(source, BatchDataSource): return source else: - raise ValueError(f'Unable to use source: {source}') + raise ValueError(f'Unable to use source: {type(source)} - {source}') def feature_view( diff --git a/aligned/local/job.py b/aligned/local/job.py index a02bedf..43faa90 100644 --- a/aligned/local/job.py +++ b/aligned/local/job.py @@ -29,6 +29,10 @@ def __init__(self, df: pl.LazyFrame | pd.DataFrame, requests: list[RetrivalReque else: self.df = df + @property + def loaded_columns(self) -> list[str]: + return self.df.columns + @property def retrival_requests(self) -> list[RetrivalRequest]: return self.requests @@ -169,11 +173,18 @@ def decode_timestamps(df: pl.LazyFrame, request: RetrivalRequest, formatter: Dat @dataclass class FileFullJob(RetrivalJob): - source: DataFileReference + source: DataFileReference | RetrivalJob request: RetrivalRequest limit: int | None = field(default=None) date_formatter: DateFormatter = field(default_factory=DateFormatter.iso_8601) + @property + def loaded_columns(self) -> list[str]: + if isinstance(self.source, DataFileReference): + return [] + else: + return self.source.loaded_columns + @property def request_result(self) -> RequestResult: return self.request.request_result diff --git a/aligned/retrival_job.py b/aligned/retrival_job.py index 1eceb91..0a64b70 100644 --- a/aligned/retrival_job.py +++ b/aligned/retrival_job.py @@ -466,6 +466,12 @@ async def to_polars(self) -> TrainTestValidateSet[pl.DataFrame]: class RetrivalJob(ABC): + @property + def loaded_columns(self) -> list[str]: + if isinstance(self, ModificationJob): + return self.job.loaded_columns + return [] + @property def request_result(self) -> RequestResult: if isinstance(self, ModificationJob): @@ -1294,6 +1300,10 @@ class LiteralDictJob(RetrivalJob): data: dict[str, list] requests: list[RetrivalRequest] + @property + def loaded_columns(self) -> list[str]: + return list(self.data.keys()) + @property def request_result(self) -> RequestResult: return RequestResult.from_request_list(self.requests) diff --git a/aligned/sources/local.py b/aligned/sources/local.py index 0b4489b..cb1257a 100644 --- a/aligned/sources/local.py +++ b/aligned/sources/local.py @@ -77,7 +77,18 @@ async def data_file_freshness(reference: DataFileReference, column_name: str) -> def create_parent_dir(path: str) -> None: - Path(path).parent.mkdir(exist_ok=True) + + parents = [] + + file_path = Path(path) + parent = file_path.parent + + while not parent.is_dir(): + parents.append(parent) + parent = file_path.parent + + for parent in reversed(parents): + parent.mkdir(exist_ok=True) def do_file_exist(path: str) -> bool: @@ -105,6 +116,7 @@ class CsvFileSource(BatchDataSource, ColumnFeatureMappable, DataFileReference, W mapping_keys: dict[str, str] = field(default_factory=dict) csv_config: CsvConfig = field(default_factory=CsvConfig) formatter: DateFormatter = field(default_factory=DateFormatter.iso_8601) + expected_schema: dict[str, FeatureType] | None = field(default=None) type_name: str = 'csv' @@ -232,7 +244,14 @@ def enricher(self) -> CsvFileEnricher: return CsvFileEnricher(file=self.path) def all_data(self, request: RetrivalRequest, limit: int | None) -> RetrivalJob: - return FileFullJob(self, request, limit, date_formatter=self.formatter) + with_schema = CsvFileSource( + path=self.path, + mapping_keys=self.mapping_keys, + csv_config=self.csv_config, + formatter=self.formatter, + expected_schema={feat.name: feat.dtype for feat in request.features}, + ) + return FileFullJob(with_schema, request, limit, date_formatter=self.formatter) def all_between_dates( self, request: RetrivalRequest, start_date: datetime, end_date: datetime diff --git a/aligned/tests/test_models_as_feature.py b/aligned/tests/test_models_as_feature.py index 51dc6e5..88359d4 100644 --- a/aligned/tests/test_models_as_feature.py +++ b/aligned/tests/test_models_as_feature.py @@ -4,7 +4,7 @@ from aligned.schemas.feature import FeatureLocation -@feature_view('view', FileSource.csv_at(''), 'test') +@feature_view(source=FileSource.csv_at('')) class View: view_id = Int32().as_entity() @@ -12,7 +12,7 @@ class View: feature_a = String() -@feature_view('other', FileSource.csv_at(''), 'test') +@feature_view(source=FileSource.csv_at('')) class OtherView: other_id = Int32().as_entity() @@ -26,7 +26,7 @@ class OtherView: @model_contract( - 'test_model', + name='test_model', input_features=FeatureInputVersions( default_version='v1', versions={ @@ -43,7 +43,7 @@ class First: first = First() -@model_contract('second_model', input_features=[first.target]) +@model_contract(name='second_model', input_features=[first.target]) class Second: other_id = Int32().as_entity() view_id = Int32().as_entity() diff --git a/conftest.py b/conftest.py index 6a4ec04..d306d3a 100644 --- a/conftest.py +++ b/conftest.py @@ -627,7 +627,7 @@ def titanic_model_scd(titanic_feature_view_scd: FeatureView) -> ModelContractWra features = titanic_feature_view_scd @model_contract( - 'titanic', + name='titanic', description='A model predicting if a passenger will survive', input_features=[features.age, features.sibsp, features.has_siblings, features.is_male], # type: ignore acceptable_freshness=timedelta(days=1), diff --git a/poetry.lock b/poetry.lock index b8a6cbd..4c3d03d 100644 --- a/poetry.lock +++ b/poetry.lock @@ -28,6 +28,39 @@ files = [ {file = "aiofiles-23.2.1.tar.gz", hash = "sha256:84ec2218d8419404abcb9f0c02df3f34c6e0a68ed41072acfb1cef5cbc29051a"}, ] +[[package]] +name = "alembic" +version = "1.13.1" +description = "A database migration tool for SQLAlchemy." +optional = true +python-versions = ">=3.8" +files = [ + {file = "alembic-1.13.1-py3-none-any.whl", hash = "sha256:2edcc97bed0bd3272611ce3a98d98279e9c209e7186e43e75bbb1b2bdfdbcc43"}, + {file = "alembic-1.13.1.tar.gz", hash = "sha256:4932c8558bf68f2ee92b9bbcb8218671c627064d5b08939437af6d77dc05e595"}, +] + +[package.dependencies] +Mako = "*" +SQLAlchemy = ">=1.3.0" +typing-extensions = ">=4" + +[package.extras] +tz = ["backports.zoneinfo"] + +[[package]] +name = "aniso8601" +version = "9.0.1" +description = "A library for parsing ISO 8601 strings." +optional = true +python-versions = "*" +files = [ + {file = "aniso8601-9.0.1-py2.py3-none-any.whl", hash = "sha256:1d2b7ef82963909e93c4f24ce48d4de9e66009a21bf1c1e1c85bdd0812fe412f"}, + {file = "aniso8601-9.0.1.tar.gz", hash = "sha256:72e3117667eedf66951bb2d93f4296a56b94b078a8a95905a052611fb3f1b973"}, +] + +[package.extras] +dev = ["black", "coverage", "isort", "pre-commit", "pyenchant", "pylint"] + [[package]] name = "annotated-types" version = "0.6.0" @@ -80,13 +113,13 @@ celery = ["celery"] [[package]] name = "asgiref" -version = "3.7.2" +version = "3.8.1" description = "ASGI specs, helper code, and adapters" optional = true -python-versions = ">=3.7" +python-versions = ">=3.8" files = [ - {file = "asgiref-3.7.2-py3-none-any.whl", hash = "sha256:89b2ef2247e3b562a16eef663bc0e2e703ec6468e2fa8a5cd61cd449786d4f6e"}, - {file = "asgiref-3.7.2.tar.gz", hash = "sha256:9e0ce3aa93a819ba5b45120216b23878cf6e8525eb3848653452b4192b92afed"}, + {file = "asgiref-3.8.1-py3-none-any.whl", hash = "sha256:3e1e3ecc849832fe52ccf2cb6686b7a55f82bb1d6aee72a58826471390335e47"}, + {file = "asgiref-3.8.1.tar.gz", hash = "sha256:c343bd80a0bec947a9860adb4c432ffa7db769836c64238fc34bdc3fec84d590"}, ] [package.dependencies] @@ -209,6 +242,17 @@ d = ["aiohttp (>=3.7.4)", "aiohttp (>=3.7.4,!=3.9.0)"] jupyter = ["ipython (>=7.8.0)", "tokenize-rt (>=3.2.0)"] uvloop = ["uvloop (>=0.15.2)"] +[[package]] +name = "blinker" +version = "1.7.0" +description = "Fast, simple object-to-object and broadcast signaling" +optional = true +python-versions = ">=3.8" +files = [ + {file = "blinker-1.7.0-py3-none-any.whl", hash = "sha256:c3f865d4d54db7abc53758a01601cf343fe55b84c1de4e3fa910e420b438d5b9"}, + {file = "blinker-1.7.0.tar.gz", hash = "sha256:e6820ff6fa4e4d1d8e2747c2283749c3f547e4fee112b98555cdcdae32996182"}, +] + [[package]] name = "certifi" version = "2024.2.2" @@ -284,6 +328,105 @@ files = [ [package.dependencies] pycparser = "*" +[[package]] +name = "charset-normalizer" +version = "3.3.2" +description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." +optional = true +python-versions = ">=3.7.0" +files = [ + {file = "charset-normalizer-3.3.2.tar.gz", hash = "sha256:f30c3cb33b24454a82faecaf01b19c18562b1e89558fb6c56de4d9118a032fd5"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:25baf083bf6f6b341f4121c2f3c548875ee6f5339300e08be3f2b2ba1721cdd3"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:06435b539f889b1f6f4ac1758871aae42dc3a8c0e24ac9e60c2384973ad73027"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9063e24fdb1e498ab71cb7419e24622516c4a04476b17a2dab57e8baa30d6e03"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6897af51655e3691ff853668779c7bad41579facacf5fd7253b0133308cf000d"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1d3193f4a680c64b4b6a9115943538edb896edc190f0b222e73761716519268e"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cd70574b12bb8a4d2aaa0094515df2463cb429d8536cfb6c7ce983246983e5a6"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8465322196c8b4d7ab6d1e049e4c5cb460d0394da4a27d23cc242fbf0034b6b5"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a9a8e9031d613fd2009c182b69c7b2c1ef8239a0efb1df3f7c8da66d5dd3d537"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:beb58fe5cdb101e3a055192ac291b7a21e3b7ef4f67fa1d74e331a7f2124341c"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:e06ed3eb3218bc64786f7db41917d4e686cc4856944f53d5bdf83a6884432e12"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:2e81c7b9c8979ce92ed306c249d46894776a909505d8f5a4ba55b14206e3222f"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:572c3763a264ba47b3cf708a44ce965d98555f618ca42c926a9c1616d8f34269"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:fd1abc0d89e30cc4e02e4064dc67fcc51bd941eb395c502aac3ec19fab46b519"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-win32.whl", hash = "sha256:3d47fa203a7bd9c5b6cee4736ee84ca03b8ef23193c0d1ca99b5089f72645c73"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-win_amd64.whl", hash = "sha256:10955842570876604d404661fbccbc9c7e684caf432c09c715ec38fbae45ae09"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:802fe99cca7457642125a8a88a084cef28ff0cf9407060f7b93dca5aa25480db"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:573f6eac48f4769d667c4442081b1794f52919e7edada77495aaed9236d13a96"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:549a3a73da901d5bc3ce8d24e0600d1fa85524c10287f6004fbab87672bf3e1e"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f27273b60488abe721a075bcca6d7f3964f9f6f067c8c4c605743023d7d3944f"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1ceae2f17a9c33cb48e3263960dc5fc8005351ee19db217e9b1bb15d28c02574"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:65f6f63034100ead094b8744b3b97965785388f308a64cf8d7c34f2f2e5be0c4"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:753f10e867343b4511128c6ed8c82f7bec3bd026875576dfd88483c5c73b2fd8"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4a78b2b446bd7c934f5dcedc588903fb2f5eec172f3d29e52a9096a43722adfc"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:e537484df0d8f426ce2afb2d0f8e1c3d0b114b83f8850e5f2fbea0e797bd82ae"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:eb6904c354526e758fda7167b33005998fb68c46fbc10e013ca97f21ca5c8887"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:deb6be0ac38ece9ba87dea880e438f25ca3eddfac8b002a2ec3d9183a454e8ae"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:4ab2fe47fae9e0f9dee8c04187ce5d09f48eabe611be8259444906793ab7cbce"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:80402cd6ee291dcb72644d6eac93785fe2c8b9cb30893c1af5b8fdd753b9d40f"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-win32.whl", hash = "sha256:7cd13a2e3ddeed6913a65e66e94b51d80a041145a026c27e6bb76c31a853c6ab"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-win_amd64.whl", hash = "sha256:663946639d296df6a2bb2aa51b60a2454ca1cb29835324c640dafb5ff2131a77"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:0b2b64d2bb6d3fb9112bafa732def486049e63de9618b5843bcdd081d8144cd8"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:ddbb2551d7e0102e7252db79ba445cdab71b26640817ab1e3e3648dad515003b"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:55086ee1064215781fff39a1af09518bc9255b50d6333f2e4c74ca09fac6a8f6"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8f4a014bc36d3c57402e2977dada34f9c12300af536839dc38c0beab8878f38a"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a10af20b82360ab00827f916a6058451b723b4e65030c5a18577c8b2de5b3389"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8d756e44e94489e49571086ef83b2bb8ce311e730092d2c34ca8f7d925cb20aa"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:90d558489962fd4918143277a773316e56c72da56ec7aa3dc3dbbe20fdfed15b"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6ac7ffc7ad6d040517be39eb591cac5ff87416c2537df6ba3cba3bae290c0fed"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:7ed9e526742851e8d5cc9e6cf41427dfc6068d4f5a3bb03659444b4cabf6bc26"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:8bdb58ff7ba23002a4c5808d608e4e6c687175724f54a5dade5fa8c67b604e4d"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:6b3251890fff30ee142c44144871185dbe13b11bab478a88887a639655be1068"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:b4a23f61ce87adf89be746c8a8974fe1c823c891d8f86eb218bb957c924bb143"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:efcb3f6676480691518c177e3b465bcddf57cea040302f9f4e6e191af91174d4"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-win32.whl", hash = "sha256:d965bba47ddeec8cd560687584e88cf699fd28f192ceb452d1d7ee807c5597b7"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-win_amd64.whl", hash = "sha256:96b02a3dc4381e5494fad39be677abcb5e6634bf7b4fa83a6dd3112607547001"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:95f2a5796329323b8f0512e09dbb7a1860c46a39da62ecb2324f116fa8fdc85c"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c002b4ffc0be611f0d9da932eb0f704fe2602a9a949d1f738e4c34c75b0863d5"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a981a536974bbc7a512cf44ed14938cf01030a99e9b3a06dd59578882f06f985"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3287761bc4ee9e33561a7e058c72ac0938c4f57fe49a09eae428fd88aafe7bb6"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:42cb296636fcc8b0644486d15c12376cb9fa75443e00fb25de0b8602e64c1714"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0a55554a2fa0d408816b3b5cedf0045f4b8e1a6065aec45849de2d6f3f8e9786"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:c083af607d2515612056a31f0a8d9e0fcb5876b7bfc0abad3ecd275bc4ebc2d5"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:87d1351268731db79e0f8e745d92493ee2841c974128ef629dc518b937d9194c"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:bd8f7df7d12c2db9fab40bdd87a7c09b1530128315d047a086fa3ae3435cb3a8"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:c180f51afb394e165eafe4ac2936a14bee3eb10debc9d9e4db8958fe36afe711"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:8c622a5fe39a48f78944a87d4fb8a53ee07344641b0562c540d840748571b811"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-win32.whl", hash = "sha256:db364eca23f876da6f9e16c9da0df51aa4f104a972735574842618b8c6d999d4"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-win_amd64.whl", hash = "sha256:86216b5cee4b06df986d214f664305142d9c76df9b6512be2738aa72a2048f99"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:6463effa3186ea09411d50efc7d85360b38d5f09b870c48e4600f63af490e56a"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:6c4caeef8fa63d06bd437cd4bdcf3ffefe6738fb1b25951440d80dc7df8c03ac"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:37e55c8e51c236f95b033f6fb391d7d7970ba5fe7ff453dad675e88cf303377a"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fb69256e180cb6c8a894fee62b3afebae785babc1ee98b81cdf68bbca1987f33"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ae5f4161f18c61806f411a13b0310bea87f987c7d2ecdbdaad0e94eb2e404238"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b2b0a0c0517616b6869869f8c581d4eb2dd83a4d79e0ebcb7d373ef9956aeb0a"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:45485e01ff4d3630ec0d9617310448a8702f70e9c01906b0d0118bdf9d124cf2"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:eb00ed941194665c332bf8e078baf037d6c35d7c4f3102ea2d4f16ca94a26dc8"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:2127566c664442652f024c837091890cb1942c30937add288223dc895793f898"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:a50aebfa173e157099939b17f18600f72f84eed3049e743b68ad15bd69b6bf99"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:4d0d1650369165a14e14e1e47b372cfcb31d6ab44e6e33cb2d4e57265290044d"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:923c0c831b7cfcb071580d3f46c4baf50f174be571576556269530f4bbd79d04"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:06a81e93cd441c56a9b65d8e1d043daeb97a3d0856d177d5c90ba85acb3db087"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-win32.whl", hash = "sha256:6ef1d82a3af9d3eecdba2321dc1b3c238245d890843e040e41e470ffa64c3e25"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-win_amd64.whl", hash = "sha256:eb8821e09e916165e160797a6c17edda0679379a4be5c716c260e836e122f54b"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:c235ebd9baae02f1b77bcea61bce332cb4331dc3617d254df3323aa01ab47bd4"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:5b4c145409bef602a690e7cfad0a15a55c13320ff7a3ad7ca59c13bb8ba4d45d"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:68d1f8a9e9e37c1223b656399be5d6b448dea850bed7d0f87a8311f1ff3dabb0"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:22afcb9f253dac0696b5a4be4a1c0f8762f8239e21b99680099abd9b2b1b2269"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e27ad930a842b4c5eb8ac0016b0a54f5aebbe679340c26101df33424142c143c"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1f79682fbe303db92bc2b1136016a38a42e835d932bab5b3b1bfcfbf0640e519"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b261ccdec7821281dade748d088bb6e9b69e6d15b30652b74cbbac25e280b796"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:122c7fa62b130ed55f8f285bfd56d5f4b4a5b503609d181f9ad85e55c89f4185"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:d0eccceffcb53201b5bfebb52600a5fb483a20b61da9dbc885f8b103cbe7598c"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:9f96df6923e21816da7e0ad3fd47dd8f94b2a5ce594e00677c0013018b813458"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:7f04c839ed0b6b98b1a7501a002144b76c18fb1c1850c8b98d458ac269e26ed2"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:34d1c8da1e78d2e001f363791c98a272bb734000fcef47a491c1e3b0505657a8"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:ff8fa367d09b717b2a17a052544193ad76cd49979c805768879cb63d9ca50561"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-win32.whl", hash = "sha256:aed38f6e4fb3f5d6bf81bfa990a07806be9d83cf7bacef998ab1a9bd660a581f"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-win_amd64.whl", hash = "sha256:b01b88d45a6fcb69667cd6d2f7a9aeb4bf53760d7fc536bf679ec94fe9f3ff3d"}, + {file = "charset_normalizer-3.3.2-py3-none-any.whl", hash = "sha256:3e4d1f6587322d2788836a99c69062fbb091331ec940e02d12d179c1d53e25fc"}, +] + [[package]] name = "click" version = "8.1.7" @@ -298,6 +441,17 @@ files = [ [package.dependencies] colorama = {version = "*", markers = "platform_system == \"Windows\""} +[[package]] +name = "cloudpickle" +version = "3.0.0" +description = "Pickler class to extend the standard pickle.Pickler functionality" +optional = true +python-versions = ">=3.8" +files = [ + {file = "cloudpickle-3.0.0-py3-none-any.whl", hash = "sha256:246ee7d0c295602a036e86369c77fecda4ab17b506496730f2f576d9016fd9c7"}, + {file = "cloudpickle-3.0.0.tar.gz", hash = "sha256:996d9a482c6fb4f33c1a35335cf8afd065d2a56e973270364840712d9131a882"}, +] + [[package]] name = "colorama" version = "0.4.6" @@ -334,45 +488,108 @@ files = [ {file = "connectorx-0.3.2-cp39-none-win_amd64.whl", hash = "sha256:0b80acca13326856c14ee726b47699011ab1baa10897180240c8783423ca5e8c"}, ] +[[package]] +name = "contourpy" +version = "1.2.0" +description = "Python library for calculating contours of 2D quadrilateral grids" +optional = true +python-versions = ">=3.9" +files = [ + {file = "contourpy-1.2.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0274c1cb63625972c0c007ab14dd9ba9e199c36ae1a231ce45d725cbcbfd10a8"}, + {file = "contourpy-1.2.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ab459a1cbbf18e8698399c595a01f6dcc5c138220ca3ea9e7e6126232d102bb4"}, + {file = "contourpy-1.2.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6fdd887f17c2f4572ce548461e4f96396681212d858cae7bd52ba3310bc6f00f"}, + {file = "contourpy-1.2.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5d16edfc3fc09968e09ddffada434b3bf989bf4911535e04eada58469873e28e"}, + {file = "contourpy-1.2.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1c203f617abc0dde5792beb586f827021069fb6d403d7f4d5c2b543d87edceb9"}, + {file = "contourpy-1.2.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b69303ceb2e4d4f146bf82fda78891ef7bcd80c41bf16bfca3d0d7eb545448aa"}, + {file = "contourpy-1.2.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:884c3f9d42d7218304bc74a8a7693d172685c84bd7ab2bab1ee567b769696df9"}, + {file = "contourpy-1.2.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:4a1b1208102be6e851f20066bf0e7a96b7d48a07c9b0cfe6d0d4545c2f6cadab"}, + {file = "contourpy-1.2.0-cp310-cp310-win32.whl", hash = "sha256:34b9071c040d6fe45d9826cbbe3727d20d83f1b6110d219b83eb0e2a01d79488"}, + {file = "contourpy-1.2.0-cp310-cp310-win_amd64.whl", hash = "sha256:bd2f1ae63998da104f16a8b788f685e55d65760cd1929518fd94cd682bf03e41"}, + {file = "contourpy-1.2.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:dd10c26b4eadae44783c45ad6655220426f971c61d9b239e6f7b16d5cdaaa727"}, + {file = "contourpy-1.2.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:5c6b28956b7b232ae801406e529ad7b350d3f09a4fde958dfdf3c0520cdde0dd"}, + {file = "contourpy-1.2.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ebeac59e9e1eb4b84940d076d9f9a6cec0064e241818bcb6e32124cc5c3e377a"}, + {file = "contourpy-1.2.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:139d8d2e1c1dd52d78682f505e980f592ba53c9f73bd6be102233e358b401063"}, + {file = "contourpy-1.2.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1e9dc350fb4c58adc64df3e0703ab076f60aac06e67d48b3848c23647ae4310e"}, + {file = "contourpy-1.2.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:18fc2b4ed8e4a8fe849d18dce4bd3c7ea637758c6343a1f2bae1e9bd4c9f4686"}, + {file = "contourpy-1.2.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:16a7380e943a6d52472096cb7ad5264ecee36ed60888e2a3d3814991a0107286"}, + {file = "contourpy-1.2.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:8d8faf05be5ec8e02a4d86f616fc2a0322ff4a4ce26c0f09d9f7fb5330a35c95"}, + {file = "contourpy-1.2.0-cp311-cp311-win32.whl", hash = "sha256:67b7f17679fa62ec82b7e3e611c43a016b887bd64fb933b3ae8638583006c6d6"}, + {file = "contourpy-1.2.0-cp311-cp311-win_amd64.whl", hash = "sha256:99ad97258985328b4f207a5e777c1b44a83bfe7cf1f87b99f9c11d4ee477c4de"}, + {file = "contourpy-1.2.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:575bcaf957a25d1194903a10bc9f316c136c19f24e0985a2b9b5608bdf5dbfe0"}, + {file = "contourpy-1.2.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:9e6c93b5b2dbcedad20a2f18ec22cae47da0d705d454308063421a3b290d9ea4"}, + {file = "contourpy-1.2.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:464b423bc2a009088f19bdf1f232299e8b6917963e2b7e1d277da5041f33a779"}, + {file = "contourpy-1.2.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:68ce4788b7d93e47f84edd3f1f95acdcd142ae60bc0e5493bfd120683d2d4316"}, + {file = "contourpy-1.2.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3d7d1f8871998cdff5d2ff6a087e5e1780139abe2838e85b0b46b7ae6cc25399"}, + {file = "contourpy-1.2.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6e739530c662a8d6d42c37c2ed52a6f0932c2d4a3e8c1f90692ad0ce1274abe0"}, + {file = "contourpy-1.2.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:247b9d16535acaa766d03037d8e8fb20866d054d3c7fbf6fd1f993f11fc60ca0"}, + {file = "contourpy-1.2.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:461e3ae84cd90b30f8d533f07d87c00379644205b1d33a5ea03381edc4b69431"}, + {file = "contourpy-1.2.0-cp312-cp312-win32.whl", hash = "sha256:1c2559d6cffc94890b0529ea7eeecc20d6fadc1539273aa27faf503eb4656d8f"}, + {file = "contourpy-1.2.0-cp312-cp312-win_amd64.whl", hash = "sha256:491b1917afdd8638a05b611a56d46587d5a632cabead889a5440f7c638bc6ed9"}, + {file = "contourpy-1.2.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:5fd1810973a375ca0e097dee059c407913ba35723b111df75671a1976efa04bc"}, + {file = "contourpy-1.2.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:999c71939aad2780f003979b25ac5b8f2df651dac7b38fb8ce6c46ba5abe6ae9"}, + {file = "contourpy-1.2.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b7caf9b241464c404613512d5594a6e2ff0cc9cb5615c9475cc1d9b514218ae8"}, + {file = "contourpy-1.2.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:266270c6f6608340f6c9836a0fb9b367be61dde0c9a9a18d5ece97774105ff3e"}, + {file = "contourpy-1.2.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:dbd50d0a0539ae2e96e537553aff6d02c10ed165ef40c65b0e27e744a0f10af8"}, + {file = "contourpy-1.2.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:11f8d2554e52f459918f7b8e6aa20ec2a3bce35ce95c1f0ef4ba36fbda306df5"}, + {file = "contourpy-1.2.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:ce96dd400486e80ac7d195b2d800b03e3e6a787e2a522bfb83755938465a819e"}, + {file = "contourpy-1.2.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:6d3364b999c62f539cd403f8123ae426da946e142312a514162adb2addd8d808"}, + {file = "contourpy-1.2.0-cp39-cp39-win32.whl", hash = "sha256:1c88dfb9e0c77612febebb6ac69d44a8d81e3dc60f993215425b62c1161353f4"}, + {file = "contourpy-1.2.0-cp39-cp39-win_amd64.whl", hash = "sha256:78e6ad33cf2e2e80c5dfaaa0beec3d61face0fb650557100ee36db808bfa6843"}, + {file = "contourpy-1.2.0-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:be16975d94c320432657ad2402f6760990cb640c161ae6da1363051805fa8108"}, + {file = "contourpy-1.2.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b95a225d4948b26a28c08307a60ac00fb8671b14f2047fc5476613252a129776"}, + {file = "contourpy-1.2.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:0d7e03c0f9a4f90dc18d4e77e9ef4ec7b7bbb437f7f675be8e530d65ae6ef956"}, + {file = "contourpy-1.2.0.tar.gz", hash = "sha256:171f311cb758de7da13fc53af221ae47a5877be5a0843a9fe150818c51ed276a"}, +] + +[package.dependencies] +numpy = ">=1.20,<2.0" + +[package.extras] +bokeh = ["bokeh", "selenium"] +docs = ["furo", "sphinx (>=7.2)", "sphinx-copybutton"] +mypy = ["contourpy[bokeh,docs]", "docutils-stubs", "mypy (==1.6.1)", "types-Pillow"] +test = ["Pillow", "contourpy[test-no-images]", "matplotlib"] +test-no-images = ["pytest", "pytest-cov", "pytest-xdist", "wurlitzer"] + [[package]] name = "cryptography" -version = "42.0.4" +version = "42.0.5" description = "cryptography is a package which provides cryptographic recipes and primitives to Python developers." optional = false python-versions = ">=3.7" files = [ - {file = "cryptography-42.0.4-cp37-abi3-macosx_10_12_universal2.whl", hash = "sha256:ffc73996c4fca3d2b6c1c8c12bfd3ad00def8621da24f547626bf06441400449"}, - {file = "cryptography-42.0.4-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:db4b65b02f59035037fde0998974d84244a64c3265bdef32a827ab9b63d61b18"}, - {file = "cryptography-42.0.4-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dad9c385ba8ee025bb0d856714f71d7840020fe176ae0229de618f14dae7a6e2"}, - {file = "cryptography-42.0.4-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:69b22ab6506a3fe483d67d1ed878e1602bdd5912a134e6202c1ec672233241c1"}, - {file = "cryptography-42.0.4-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:e09469a2cec88fb7b078e16d4adec594414397e8879a4341c6ace96013463d5b"}, - {file = "cryptography-42.0.4-cp37-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:3e970a2119507d0b104f0a8e281521ad28fc26f2820687b3436b8c9a5fcf20d1"}, - {file = "cryptography-42.0.4-cp37-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:e53dc41cda40b248ebc40b83b31516487f7db95ab8ceac1f042626bc43a2f992"}, - {file = "cryptography-42.0.4-cp37-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:c3a5cbc620e1e17009f30dd34cb0d85c987afd21c41a74352d1719be33380885"}, - {file = "cryptography-42.0.4-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:6bfadd884e7280df24d26f2186e4e07556a05d37393b0f220a840b083dc6a824"}, - {file = "cryptography-42.0.4-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:01911714117642a3f1792c7f376db572aadadbafcd8d75bb527166009c9f1d1b"}, - {file = "cryptography-42.0.4-cp37-abi3-win32.whl", hash = "sha256:fb0cef872d8193e487fc6bdb08559c3aa41b659a7d9be48b2e10747f47863925"}, - {file = "cryptography-42.0.4-cp37-abi3-win_amd64.whl", hash = "sha256:c1f25b252d2c87088abc8bbc4f1ecbf7c919e05508a7e8628e6875c40bc70923"}, - {file = "cryptography-42.0.4-cp39-abi3-macosx_10_12_universal2.whl", hash = "sha256:15a1fb843c48b4a604663fa30af60818cd28f895572386e5f9b8a665874c26e7"}, - {file = "cryptography-42.0.4-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a1327f280c824ff7885bdeef8578f74690e9079267c1c8bd7dc5cc5aa065ae52"}, - {file = "cryptography-42.0.4-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6ffb03d419edcab93b4b19c22ee80c007fb2d708429cecebf1dd3258956a563a"}, - {file = "cryptography-42.0.4-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:1df6fcbf60560d2113b5ed90f072dc0b108d64750d4cbd46a21ec882c7aefce9"}, - {file = "cryptography-42.0.4-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:44a64043f743485925d3bcac548d05df0f9bb445c5fcca6681889c7c3ab12764"}, - {file = "cryptography-42.0.4-cp39-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:3c6048f217533d89f2f8f4f0fe3044bf0b2090453b7b73d0b77db47b80af8dff"}, - {file = "cryptography-42.0.4-cp39-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:6d0fbe73728c44ca3a241eff9aefe6496ab2656d6e7a4ea2459865f2e8613257"}, - {file = "cryptography-42.0.4-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:887623fe0d70f48ab3f5e4dbf234986b1329a64c066d719432d0698522749929"}, - {file = "cryptography-42.0.4-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:ce8613beaffc7c14f091497346ef117c1798c202b01153a8cc7b8e2ebaaf41c0"}, - {file = "cryptography-42.0.4-cp39-abi3-win32.whl", hash = "sha256:810bcf151caefc03e51a3d61e53335cd5c7316c0a105cc695f0959f2c638b129"}, - {file = "cryptography-42.0.4-cp39-abi3-win_amd64.whl", hash = "sha256:a0298bdc6e98ca21382afe914c642620370ce0470a01e1bef6dd9b5354c36854"}, - {file = "cryptography-42.0.4-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:5f8907fcf57392cd917892ae83708761c6ff3c37a8e835d7246ff0ad251d9298"}, - {file = "cryptography-42.0.4-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:12d341bd42cdb7d4937b0cabbdf2a94f949413ac4504904d0cdbdce4a22cbf88"}, - {file = "cryptography-42.0.4-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:1cdcdbd117681c88d717437ada72bdd5be9de117f96e3f4d50dab3f59fd9ab20"}, - {file = "cryptography-42.0.4-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:0e89f7b84f421c56e7ff69f11c441ebda73b8a8e6488d322ef71746224c20fce"}, - {file = "cryptography-42.0.4-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:f1e85a178384bf19e36779d91ff35c7617c885da487d689b05c1366f9933ad74"}, - {file = "cryptography-42.0.4-pp39-pypy39_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:d2a27aca5597c8a71abbe10209184e1a8e91c1fd470b5070a2ea60cafec35bcd"}, - {file = "cryptography-42.0.4-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:4e36685cb634af55e0677d435d425043967ac2f3790ec652b2b88ad03b85c27b"}, - {file = "cryptography-42.0.4-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:f47be41843200f7faec0683ad751e5ef11b9a56a220d57f300376cd8aba81660"}, - {file = "cryptography-42.0.4.tar.gz", hash = "sha256:831a4b37accef30cccd34fcb916a5d7b5be3cbbe27268a02832c3e450aea39cb"}, + {file = "cryptography-42.0.5-cp37-abi3-macosx_10_12_universal2.whl", hash = "sha256:a30596bae9403a342c978fb47d9b0ee277699fa53bbafad14706af51fe543d16"}, + {file = "cryptography-42.0.5-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:b7ffe927ee6531c78f81aa17e684e2ff617daeba7f189f911065b2ea2d526dec"}, + {file = "cryptography-42.0.5-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2424ff4c4ac7f6b8177b53c17ed5d8fa74ae5955656867f5a8affaca36a27abb"}, + {file = "cryptography-42.0.5-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:329906dcc7b20ff3cad13c069a78124ed8247adcac44b10bea1130e36caae0b4"}, + {file = "cryptography-42.0.5-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:b03c2ae5d2f0fc05f9a2c0c997e1bc18c8229f392234e8a0194f202169ccd278"}, + {file = "cryptography-42.0.5-cp37-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:f8837fe1d6ac4a8052a9a8ddab256bc006242696f03368a4009be7ee3075cdb7"}, + {file = "cryptography-42.0.5-cp37-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:0270572b8bd2c833c3981724b8ee9747b3ec96f699a9665470018594301439ee"}, + {file = "cryptography-42.0.5-cp37-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:b8cac287fafc4ad485b8a9b67d0ee80c66bf3574f655d3b97ef2e1082360faf1"}, + {file = "cryptography-42.0.5-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:16a48c23a62a2f4a285699dba2e4ff2d1cff3115b9df052cdd976a18856d8e3d"}, + {file = "cryptography-42.0.5-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:2bce03af1ce5a5567ab89bd90d11e7bbdff56b8af3acbbec1faded8f44cb06da"}, + {file = "cryptography-42.0.5-cp37-abi3-win32.whl", hash = "sha256:b6cd2203306b63e41acdf39aa93b86fb566049aeb6dc489b70e34bcd07adca74"}, + {file = "cryptography-42.0.5-cp37-abi3-win_amd64.whl", hash = "sha256:98d8dc6d012b82287f2c3d26ce1d2dd130ec200c8679b6213b3c73c08b2b7940"}, + {file = "cryptography-42.0.5-cp39-abi3-macosx_10_12_universal2.whl", hash = "sha256:5e6275c09d2badf57aea3afa80d975444f4be8d3bc58f7f80d2a484c6f9485c8"}, + {file = "cryptography-42.0.5-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e4985a790f921508f36f81831817cbc03b102d643b5fcb81cd33df3fa291a1a1"}, + {file = "cryptography-42.0.5-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7cde5f38e614f55e28d831754e8a3bacf9ace5d1566235e39d91b35502d6936e"}, + {file = "cryptography-42.0.5-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:7367d7b2eca6513681127ebad53b2582911d1736dc2ffc19f2c3ae49997496bc"}, + {file = "cryptography-42.0.5-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:cd2030f6650c089aeb304cf093f3244d34745ce0cfcc39f20c6fbfe030102e2a"}, + {file = "cryptography-42.0.5-cp39-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:a2913c5375154b6ef2e91c10b5720ea6e21007412f6437504ffea2109b5a33d7"}, + {file = "cryptography-42.0.5-cp39-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:c41fb5e6a5fe9ebcd58ca3abfeb51dffb5d83d6775405305bfa8715b76521922"}, + {file = "cryptography-42.0.5-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:3eaafe47ec0d0ffcc9349e1708be2aaea4c6dd4978d76bf6eb0cb2c13636c6fc"}, + {file = "cryptography-42.0.5-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:1b95b98b0d2af784078fa69f637135e3c317091b615cd0905f8b8a087e86fa30"}, + {file = "cryptography-42.0.5-cp39-abi3-win32.whl", hash = "sha256:1f71c10d1e88467126f0efd484bd44bca5e14c664ec2ede64c32f20875c0d413"}, + {file = "cryptography-42.0.5-cp39-abi3-win_amd64.whl", hash = "sha256:a011a644f6d7d03736214d38832e030d8268bcff4a41f728e6030325fea3e400"}, + {file = "cryptography-42.0.5-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:9481ffe3cf013b71b2428b905c4f7a9a4f76ec03065b05ff499bb5682a8d9ad8"}, + {file = "cryptography-42.0.5-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:ba334e6e4b1d92442b75ddacc615c5476d4ad55cc29b15d590cc6b86efa487e2"}, + {file = "cryptography-42.0.5-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:ba3e4a42397c25b7ff88cdec6e2a16c2be18720f317506ee25210f6d31925f9c"}, + {file = "cryptography-42.0.5-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:111a0d8553afcf8eb02a4fea6ca4f59d48ddb34497aa8706a6cf536f1a5ec576"}, + {file = "cryptography-42.0.5-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:cd65d75953847815962c84a4654a84850b2bb4aed3f26fadcc1c13892e1e29f6"}, + {file = "cryptography-42.0.5-pp39-pypy39_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:e807b3188f9eb0eaa7bbb579b462c5ace579f1cedb28107ce8b48a9f7ad3679e"}, + {file = "cryptography-42.0.5-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:f12764b8fffc7a123f641d7d049d382b73f96a34117e0b637b80643169cec8ac"}, + {file = "cryptography-42.0.5-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:37dd623507659e08be98eec89323469e8c7b4c1407c85112634ae3dbdb926fdd"}, + {file = "cryptography-42.0.5.tar.gz", hash = "sha256:6fe07eec95dfd477eb9530aef5bead34fec819b3aaf6c5bd6d20565da607bfe1"}, ] [package.dependencies] @@ -388,6 +605,21 @@ ssh = ["bcrypt (>=3.1.5)"] test = ["certifi", "pretend", "pytest (>=6.2.0)", "pytest-benchmark", "pytest-cov", "pytest-xdist"] test-randomorder = ["pytest-randomly"] +[[package]] +name = "cycler" +version = "0.12.1" +description = "Composable style cycles" +optional = true +python-versions = ">=3.8" +files = [ + {file = "cycler-0.12.1-py3-none-any.whl", hash = "sha256:85cef7cff222d8644161529808465972e51340599459b8ac3ccbac5a854e0d30"}, + {file = "cycler-0.12.1.tar.gz", hash = "sha256:88bb128f02ba341da8ef447245a9e138fae777f6a23943da4540077d3601eb1c"}, +] + +[package.extras] +docs = ["ipython", "matplotlib", "numpydoc", "sphinx"] +tests = ["pytest", "pytest-cov", "pytest-xdist"] + [[package]] name = "dill" version = "0.3.8" @@ -403,6 +635,38 @@ files = [ graph = ["objgraph (>=1.7.2)"] profile = ["gprof2dot (>=2022.7.29)"] +[[package]] +name = "docker" +version = "7.0.0" +description = "A Python library for the Docker Engine API." +optional = true +python-versions = ">=3.8" +files = [ + {file = "docker-7.0.0-py3-none-any.whl", hash = "sha256:12ba681f2777a0ad28ffbcc846a69c31b4dfd9752b47eb425a274ee269c5e14b"}, + {file = "docker-7.0.0.tar.gz", hash = "sha256:323736fb92cd9418fc5e7133bc953e11a9da04f4483f828b527db553f1e7e5a3"}, +] + +[package.dependencies] +packaging = ">=14.0" +pywin32 = {version = ">=304", markers = "sys_platform == \"win32\""} +requests = ">=2.26.0" +urllib3 = ">=1.26.0" + +[package.extras] +ssh = ["paramiko (>=2.4.3)"] +websockets = ["websocket-client (>=1.3.0)"] + +[[package]] +name = "entrypoints" +version = "0.4" +description = "Discover and load entry points from installed packages." +optional = true +python-versions = ">=3.6" +files = [ + {file = "entrypoints-0.4-py3-none-any.whl", hash = "sha256:f174b5ff827504fd3cd97cc3f8649f3693f51538c7e4bdf3ef002c8429d42f9f"}, + {file = "entrypoints-0.4.tar.gz", hash = "sha256:b706eddaa9218a19ebcd67b56818f05bb27589b1ca9e8d797b74affad4ccacd4"}, +] + [[package]] name = "exceptiongroup" version = "1.2.0" @@ -419,13 +683,13 @@ test = ["pytest (>=6)"] [[package]] name = "fakeredis" -version = "2.21.1" +version = "2.21.3" description = "Python implementation of redis API, can be used for testing purposes." optional = false python-versions = ">=3.7,<4.0" files = [ - {file = "fakeredis-2.21.1-py3-none-any.whl", hash = "sha256:5d1b113a92c1e5dd6e8055008d9204ace4c125e104f04ac08cca4296bc6c78d4"}, - {file = "fakeredis-2.21.1.tar.gz", hash = "sha256:773bd03c38fe745c0c03c5b4ebb92521a25d3306f903c0ca65706bf65cf19e2a"}, + {file = "fakeredis-2.21.3-py3-none-any.whl", hash = "sha256:033fe5882a20ec308ed0cf67a86c1cd982a1bffa63deb0f52eaa625bd8ce305f"}, + {file = "fakeredis-2.21.3.tar.gz", hash = "sha256:e9e1c309d49d83c4ce1ab6f3ee2e56787f6a5573a305109017bf140334dd396d"}, ] [package.dependencies] @@ -458,6 +722,93 @@ typing-extensions = ">=4.5.0" [package.extras] all = ["email-validator (>=2.0.0)", "httpx (>=0.23.0)", "itsdangerous (>=1.1.0)", "jinja2 (>=2.11.2)", "orjson (>=3.2.1)", "pydantic-extra-types (>=2.0.0)", "pydantic-settings (>=2.0.0)", "python-multipart (>=0.0.5)", "pyyaml (>=5.3.1)", "ujson (>=4.0.1,!=4.0.2,!=4.1.0,!=4.2.0,!=4.3.0,!=5.0.0,!=5.1.0)", "uvicorn[standard] (>=0.12.0)"] +[[package]] +name = "flask" +version = "3.0.2" +description = "A simple framework for building complex web applications." +optional = true +python-versions = ">=3.8" +files = [ + {file = "flask-3.0.2-py3-none-any.whl", hash = "sha256:3232e0e9c850d781933cf0207523d1ece087eb8d87b23777ae38456e2fbe7c6e"}, + {file = "flask-3.0.2.tar.gz", hash = "sha256:822c03f4b799204250a7ee84b1eddc40665395333973dfb9deebfe425fefcb7d"}, +] + +[package.dependencies] +blinker = ">=1.6.2" +click = ">=8.1.3" +itsdangerous = ">=2.1.2" +Jinja2 = ">=3.1.2" +Werkzeug = ">=3.0.0" + +[package.extras] +async = ["asgiref (>=3.2)"] +dotenv = ["python-dotenv"] + +[[package]] +name = "fonttools" +version = "4.50.0" +description = "Tools to manipulate font files" +optional = true +python-versions = ">=3.8" +files = [ + {file = "fonttools-4.50.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:effd303fb422f8ce06543a36ca69148471144c534cc25f30e5be752bc4f46736"}, + {file = "fonttools-4.50.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:7913992ab836f621d06aabac118fc258b9947a775a607e1a737eb3a91c360335"}, + {file = "fonttools-4.50.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8e0a1c5bd2f63da4043b63888534b52c5a1fd7ae187c8ffc64cbb7ae475b9dab"}, + {file = "fonttools-4.50.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d40fc98540fa5360e7ecf2c56ddf3c6e7dd04929543618fd7b5cc76e66390562"}, + {file = "fonttools-4.50.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:9fff65fbb7afe137bac3113827855e0204482727bddd00a806034ab0d3951d0d"}, + {file = "fonttools-4.50.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:b1aeae3dd2ee719074a9372c89ad94f7c581903306d76befdaca2a559f802472"}, + {file = "fonttools-4.50.0-cp310-cp310-win32.whl", hash = "sha256:e9623afa319405da33b43c85cceb0585a6f5d3a1d7c604daf4f7e1dd55c03d1f"}, + {file = "fonttools-4.50.0-cp310-cp310-win_amd64.whl", hash = "sha256:778c5f43e7e654ef7fe0605e80894930bc3a7772e2f496238e57218610140f54"}, + {file = "fonttools-4.50.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:3dfb102e7f63b78c832e4539969167ffcc0375b013080e6472350965a5fe8048"}, + {file = "fonttools-4.50.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:9e58fe34cb379ba3d01d5d319d67dd3ce7ca9a47ad044ea2b22635cd2d1247fc"}, + {file = "fonttools-4.50.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2c673ab40d15a442a4e6eb09bf007c1dda47c84ac1e2eecbdf359adacb799c24"}, + {file = "fonttools-4.50.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9b3ac35cdcd1a4c90c23a5200212c1bb74fa05833cc7c14291d7043a52ca2aaa"}, + {file = "fonttools-4.50.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:8844e7a2c5f7ecf977e82eb6b3014f025c8b454e046d941ece05b768be5847ae"}, + {file = "fonttools-4.50.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:f849bd3c5c2249b49c98eca5aaebb920d2bfd92b3c69e84ca9bddf133e9f83f0"}, + {file = "fonttools-4.50.0-cp311-cp311-win32.whl", hash = "sha256:39293ff231b36b035575e81c14626dfc14407a20de5262f9596c2cbb199c3625"}, + {file = "fonttools-4.50.0-cp311-cp311-win_amd64.whl", hash = "sha256:c33d5023523b44d3481624f840c8646656a1def7630ca562f222eb3ead16c438"}, + {file = "fonttools-4.50.0-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:b4a886a6dbe60100ba1cd24de962f8cd18139bd32808da80de1fa9f9f27bf1dc"}, + {file = "fonttools-4.50.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:b2ca1837bfbe5eafa11313dbc7edada79052709a1fffa10cea691210af4aa1fa"}, + {file = "fonttools-4.50.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0493dd97ac8977e48ffc1476b932b37c847cbb87fd68673dee5182004906828"}, + {file = "fonttools-4.50.0-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:77844e2f1b0889120b6c222fc49b2b75c3d88b930615e98893b899b9352a27ea"}, + {file = "fonttools-4.50.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:3566bfb8c55ed9100afe1ba6f0f12265cd63a1387b9661eb6031a1578a28bad1"}, + {file = "fonttools-4.50.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:35e10ddbc129cf61775d58a14f2d44121178d89874d32cae1eac722e687d9019"}, + {file = "fonttools-4.50.0-cp312-cp312-win32.whl", hash = "sha256:cc8140baf9fa8f9b903f2b393a6c413a220fa990264b215bf48484f3d0bf8710"}, + {file = "fonttools-4.50.0-cp312-cp312-win_amd64.whl", hash = "sha256:0ccc85fd96373ab73c59833b824d7a73846670a0cb1f3afbaee2b2c426a8f931"}, + {file = "fonttools-4.50.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:e270a406219af37581d96c810172001ec536e29e5593aa40d4c01cca3e145aa6"}, + {file = "fonttools-4.50.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:ac2463de667233372e9e1c7e9de3d914b708437ef52a3199fdbf5a60184f190c"}, + {file = "fonttools-4.50.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:47abd6669195abe87c22750dbcd366dc3a0648f1b7c93c2baa97429c4dc1506e"}, + {file = "fonttools-4.50.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:074841375e2e3d559aecc86e1224caf78e8b8417bb391e7d2506412538f21adc"}, + {file = "fonttools-4.50.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:0743fd2191ad7ab43d78cd747215b12033ddee24fa1e088605a3efe80d6984de"}, + {file = "fonttools-4.50.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:3d7080cce7be5ed65bee3496f09f79a82865a514863197ff4d4d177389e981b0"}, + {file = "fonttools-4.50.0-cp38-cp38-win32.whl", hash = "sha256:a467ba4e2eadc1d5cc1a11d355abb945f680473fbe30d15617e104c81f483045"}, + {file = "fonttools-4.50.0-cp38-cp38-win_amd64.whl", hash = "sha256:f77e048f805e00870659d6318fd89ef28ca4ee16a22b4c5e1905b735495fc422"}, + {file = "fonttools-4.50.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:b6245eafd553c4e9a0708e93be51392bd2288c773523892fbd616d33fd2fda59"}, + {file = "fonttools-4.50.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:a4062cc7e8de26f1603323ef3ae2171c9d29c8a9f5e067d555a2813cd5c7a7e0"}, + {file = "fonttools-4.50.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:34692850dfd64ba06af61e5791a441f664cb7d21e7b544e8f385718430e8f8e4"}, + {file = "fonttools-4.50.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:678dd95f26a67e02c50dcb5bf250f95231d455642afbc65a3b0bcdacd4e4dd38"}, + {file = "fonttools-4.50.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:4f2ce7b0b295fe64ac0a85aef46a0f2614995774bd7bc643b85679c0283287f9"}, + {file = "fonttools-4.50.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:d346f4dc2221bfb7ab652d1e37d327578434ce559baf7113b0f55768437fe6a0"}, + {file = "fonttools-4.50.0-cp39-cp39-win32.whl", hash = "sha256:a51eeaf52ba3afd70bf489be20e52fdfafe6c03d652b02477c6ce23c995222f4"}, + {file = "fonttools-4.50.0-cp39-cp39-win_amd64.whl", hash = "sha256:8639be40d583e5d9da67795aa3eeeda0488fb577a1d42ae11a5036f18fb16d93"}, + {file = "fonttools-4.50.0-py3-none-any.whl", hash = "sha256:48fa36da06247aa8282766cfd63efff1bb24e55f020f29a335939ed3844d20d3"}, + {file = "fonttools-4.50.0.tar.gz", hash = "sha256:fa5cf61058c7dbb104c2ac4e782bf1b2016a8cf2f69de6e4dd6a865d2c969bb5"}, +] + +[package.extras] +all = ["brotli (>=1.0.1)", "brotlicffi (>=0.8.0)", "fs (>=2.2.0,<3)", "lxml (>=4.0)", "lz4 (>=1.7.4.2)", "matplotlib", "munkres", "pycairo", "scipy", "skia-pathops (>=0.5.0)", "sympy", "uharfbuzz (>=0.23.0)", "unicodedata2 (>=15.1.0)", "xattr", "zopfli (>=0.1.4)"] +graphite = ["lz4 (>=1.7.4.2)"] +interpolatable = ["munkres", "pycairo", "scipy"] +lxml = ["lxml (>=4.0)"] +pathops = ["skia-pathops (>=0.5.0)"] +plot = ["matplotlib"] +repacker = ["uharfbuzz (>=0.23.0)"] +symfont = ["sympy"] +type1 = ["xattr"] +ufo = ["fs (>=2.2.0,<3)"] +unicode = ["unicodedata2 (>=15.1.0)"] +woff = ["brotli (>=1.0.1)", "brotlicffi (>=0.8.0)", "zopfli (>=0.1.4)"] + [[package]] name = "freezegun" version = "1.4.0" @@ -472,6 +823,83 @@ files = [ [package.dependencies] python-dateutil = ">=2.7" +[[package]] +name = "gitdb" +version = "4.0.11" +description = "Git Object Database" +optional = true +python-versions = ">=3.7" +files = [ + {file = "gitdb-4.0.11-py3-none-any.whl", hash = "sha256:81a3407ddd2ee8df444cbacea00e2d038e40150acfa3001696fe0dcf1d3adfa4"}, + {file = "gitdb-4.0.11.tar.gz", hash = "sha256:bf5421126136d6d0af55bc1e7c1af1c397a34f5b7bd79e776cd3e89785c2b04b"}, +] + +[package.dependencies] +smmap = ">=3.0.1,<6" + +[[package]] +name = "gitpython" +version = "3.1.43" +description = "GitPython is a Python library used to interact with Git repositories" +optional = true +python-versions = ">=3.7" +files = [ + {file = "GitPython-3.1.43-py3-none-any.whl", hash = "sha256:eec7ec56b92aad751f9912a73404bc02ba212a23adb2c7098ee668417051a1ff"}, + {file = "GitPython-3.1.43.tar.gz", hash = "sha256:35f314a9f878467f5453cc1fee295c3e18e52f1b99f10f6cf5b1682e968a9e7c"}, +] + +[package.dependencies] +gitdb = ">=4.0.1,<5" + +[package.extras] +doc = ["sphinx (==4.3.2)", "sphinx-autodoc-typehints", "sphinx-rtd-theme", "sphinxcontrib-applehelp (>=1.0.2,<=1.0.4)", "sphinxcontrib-devhelp (==1.0.2)", "sphinxcontrib-htmlhelp (>=2.0.0,<=2.0.1)", "sphinxcontrib-qthelp (==1.0.3)", "sphinxcontrib-serializinghtml (==1.1.5)"] +test = ["coverage[toml]", "ddt (>=1.1.1,!=1.4.3)", "mock", "mypy", "pre-commit", "pytest (>=7.3.1)", "pytest-cov", "pytest-instafail", "pytest-mock", "pytest-sugar", "typing-extensions"] + +[[package]] +name = "graphene" +version = "3.3" +description = "GraphQL Framework for Python" +optional = true +python-versions = "*" +files = [ + {file = "graphene-3.3-py2.py3-none-any.whl", hash = "sha256:bb3810be33b54cb3e6969506671eb72319e8d7ba0d5ca9c8066472f75bf35a38"}, + {file = "graphene-3.3.tar.gz", hash = "sha256:529bf40c2a698954217d3713c6041d69d3f719ad0080857d7ee31327112446b0"}, +] + +[package.dependencies] +aniso8601 = ">=8,<10" +graphql-core = ">=3.1,<3.3" +graphql-relay = ">=3.1,<3.3" + +[package.extras] +dev = ["black (==22.3.0)", "coveralls (>=3.3,<4)", "flake8 (>=4,<5)", "iso8601 (>=1,<2)", "mock (>=4,<5)", "pytest (>=6,<7)", "pytest-asyncio (>=0.16,<2)", "pytest-benchmark (>=3.4,<4)", "pytest-cov (>=3,<4)", "pytest-mock (>=3,<4)", "pytz (==2022.1)", "snapshottest (>=0.6,<1)"] +test = ["coveralls (>=3.3,<4)", "iso8601 (>=1,<2)", "mock (>=4,<5)", "pytest (>=6,<7)", "pytest-asyncio (>=0.16,<2)", "pytest-benchmark (>=3.4,<4)", "pytest-cov (>=3,<4)", "pytest-mock (>=3,<4)", "pytz (==2022.1)", "snapshottest (>=0.6,<1)"] + +[[package]] +name = "graphql-core" +version = "3.2.3" +description = "GraphQL implementation for Python, a port of GraphQL.js, the JavaScript reference implementation for GraphQL." +optional = true +python-versions = ">=3.6,<4" +files = [ + {file = "graphql-core-3.2.3.tar.gz", hash = "sha256:06d2aad0ac723e35b1cb47885d3e5c45e956a53bc1b209a9fc5369007fe46676"}, + {file = "graphql_core-3.2.3-py3-none-any.whl", hash = "sha256:5766780452bd5ec8ba133f8bf287dc92713e3868ddd83aee4faab9fc3e303dc3"}, +] + +[[package]] +name = "graphql-relay" +version = "3.2.0" +description = "Relay library for graphql-core" +optional = true +python-versions = ">=3.6,<4" +files = [ + {file = "graphql-relay-3.2.0.tar.gz", hash = "sha256:1ff1c51298356e481a0be009ccdff249832ce53f30559c1338f22a0e0d17250c"}, + {file = "graphql_relay-3.2.0-py3-none-any.whl", hash = "sha256:c9b22bd28b170ba1fe674c74384a8ff30a76c8e26f88ac3aa1584dd3179953e5"}, +] + +[package.dependencies] +graphql-core = ">=3.2,<3.3" + [[package]] name = "greenlet" version = "3.0.3" @@ -543,6 +971,26 @@ files = [ docs = ["Sphinx", "furo"] test = ["objgraph", "psutil"] +[[package]] +name = "gunicorn" +version = "21.2.0" +description = "WSGI HTTP Server for UNIX" +optional = true +python-versions = ">=3.5" +files = [ + {file = "gunicorn-21.2.0-py3-none-any.whl", hash = "sha256:3213aa5e8c24949e792bcacfc176fef362e7aac80b76c56f6b5122bf350722f0"}, + {file = "gunicorn-21.2.0.tar.gz", hash = "sha256:88ec8bff1d634f98e61b9f65bc4bf3cd918a90806c6f5c48bc5603849ec81033"}, +] + +[package.dependencies] +packaging = "*" + +[package.extras] +eventlet = ["eventlet (>=0.24.1)"] +gevent = ["gevent (>=1.4.0)"] +setproctitle = ["setproctitle"] +tornado = ["tornado (>=0.2)"] + [[package]] name = "h11" version = "0.14.0" @@ -610,6 +1058,25 @@ files = [ {file = "idna-3.6.tar.gz", hash = "sha256:9ecdbbd083b06798ae1e86adcbfe8ab1479cf864e4ee30fe4e46a003d12491ca"}, ] +[[package]] +name = "importlib-metadata" +version = "7.1.0" +description = "Read metadata from Python packages" +optional = true +python-versions = ">=3.8" +files = [ + {file = "importlib_metadata-7.1.0-py3-none-any.whl", hash = "sha256:30962b96c0c223483ed6cc7280e7f0199feb01a0e40cfae4d4450fc6fab1f570"}, + {file = "importlib_metadata-7.1.0.tar.gz", hash = "sha256:b78938b926ee8d5f020fc4772d487045805a55ddbad2ecf21c6d60938dc7fcd2"}, +] + +[package.dependencies] +zipp = ">=0.5" + +[package.extras] +docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] +perf = ["ipython"] +testing = ["flufl.flake8", "importlib-resources (>=1.3)", "jaraco.test (>=5.4)", "packaging", "pyfakefs", "pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-mypy", "pytest-perf (>=0.9.2)", "pytest-ruff (>=0.2.1)"] + [[package]] name = "iniconfig" version = "2.0.0" @@ -635,6 +1102,17 @@ files = [ [package.extras] colors = ["colorama (>=0.4.6)"] +[[package]] +name = "itsdangerous" +version = "2.1.2" +description = "Safely pass data to untrusted environments and back." +optional = true +python-versions = ">=3.7" +files = [ + {file = "itsdangerous-2.1.2-py3-none-any.whl", hash = "sha256:2c2349112351b88699d8d4b6b075022c0808887cb7ad10069318a8b0bc88db44"}, + {file = "itsdangerous-2.1.2.tar.gz", hash = "sha256:5dbbc68b317e5e42f327f9021763545dc3fc3bfe22e6deb96aaf1fc38874156a"}, +] + [[package]] name = "jinja2" version = "3.1.3" @@ -652,6 +1130,17 @@ MarkupSafe = ">=2.0" [package.extras] i18n = ["Babel (>=2.7)"] +[[package]] +name = "joblib" +version = "1.3.2" +description = "Lightweight pipelining with Python functions" +optional = true +python-versions = ">=3.7" +files = [ + {file = "joblib-1.3.2-py3-none-any.whl", hash = "sha256:ef4331c65f239985f3f2220ecc87db222f08fd22097a3dd5698f693875f8cbb9"}, + {file = "joblib-1.3.2.tar.gz", hash = "sha256:92f865e621e17784e7955080b6d042489e3b8e294949cc44c6eac304f59772b1"}, +] + [[package]] name = "kafka-python" version = "2.0.2" @@ -666,6 +1155,153 @@ files = [ [package.extras] crc32c = ["crc32c"] +[[package]] +name = "kiwisolver" +version = "1.4.5" +description = "A fast implementation of the Cassowary constraint solver" +optional = true +python-versions = ">=3.7" +files = [ + {file = "kiwisolver-1.4.5-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:05703cf211d585109fcd72207a31bb170a0f22144d68298dc5e61b3c946518af"}, + {file = "kiwisolver-1.4.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:146d14bebb7f1dc4d5fbf74f8a6cb15ac42baadee8912eb84ac0b3b2a3dc6ac3"}, + {file = "kiwisolver-1.4.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:6ef7afcd2d281494c0a9101d5c571970708ad911d028137cd558f02b851c08b4"}, + {file = "kiwisolver-1.4.5-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:9eaa8b117dc8337728e834b9c6e2611f10c79e38f65157c4c38e9400286f5cb1"}, + {file = "kiwisolver-1.4.5-cp310-cp310-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:ec20916e7b4cbfb1f12380e46486ec4bcbaa91a9c448b97023fde0d5bbf9e4ff"}, + {file = "kiwisolver-1.4.5-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:39b42c68602539407884cf70d6a480a469b93b81b7701378ba5e2328660c847a"}, + {file = "kiwisolver-1.4.5-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:aa12042de0171fad672b6c59df69106d20d5596e4f87b5e8f76df757a7c399aa"}, + {file = "kiwisolver-1.4.5-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2a40773c71d7ccdd3798f6489aaac9eee213d566850a9533f8d26332d626b82c"}, + {file = "kiwisolver-1.4.5-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:19df6e621f6d8b4b9c4d45f40a66839294ff2bb235e64d2178f7522d9170ac5b"}, + {file = "kiwisolver-1.4.5-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:83d78376d0d4fd884e2c114d0621624b73d2aba4e2788182d286309ebdeed770"}, + {file = "kiwisolver-1.4.5-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:e391b1f0a8a5a10ab3b9bb6afcfd74f2175f24f8975fb87ecae700d1503cdee0"}, + {file = "kiwisolver-1.4.5-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:852542f9481f4a62dbb5dd99e8ab7aedfeb8fb6342349a181d4036877410f525"}, + {file = "kiwisolver-1.4.5-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:59edc41b24031bc25108e210c0def6f6c2191210492a972d585a06ff246bb79b"}, + {file = "kiwisolver-1.4.5-cp310-cp310-win32.whl", hash = "sha256:a6aa6315319a052b4ee378aa171959c898a6183f15c1e541821c5c59beaa0238"}, + {file = "kiwisolver-1.4.5-cp310-cp310-win_amd64.whl", hash = "sha256:d0ef46024e6a3d79c01ff13801cb19d0cad7fd859b15037aec74315540acc276"}, + {file = "kiwisolver-1.4.5-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:11863aa14a51fd6ec28688d76f1735f8f69ab1fabf388851a595d0721af042f5"}, + {file = "kiwisolver-1.4.5-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:8ab3919a9997ab7ef2fbbed0cc99bb28d3c13e6d4b1ad36e97e482558a91be90"}, + {file = "kiwisolver-1.4.5-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:fcc700eadbbccbf6bc1bcb9dbe0786b4b1cb91ca0dcda336eef5c2beed37b797"}, + {file = "kiwisolver-1.4.5-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:dfdd7c0b105af050eb3d64997809dc21da247cf44e63dc73ff0fd20b96be55a9"}, + {file = "kiwisolver-1.4.5-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:76c6a5964640638cdeaa0c359382e5703e9293030fe730018ca06bc2010c4437"}, + {file = "kiwisolver-1.4.5-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:bbea0db94288e29afcc4c28afbf3a7ccaf2d7e027489c449cf7e8f83c6346eb9"}, + {file = "kiwisolver-1.4.5-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ceec1a6bc6cab1d6ff5d06592a91a692f90ec7505d6463a88a52cc0eb58545da"}, + {file = "kiwisolver-1.4.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:040c1aebeda72197ef477a906782b5ab0d387642e93bda547336b8957c61022e"}, + {file = "kiwisolver-1.4.5-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:f91de7223d4c7b793867797bacd1ee53bfe7359bd70d27b7b58a04efbb9436c8"}, + {file = "kiwisolver-1.4.5-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:faae4860798c31530dd184046a900e652c95513796ef51a12bc086710c2eec4d"}, + {file = "kiwisolver-1.4.5-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:b0157420efcb803e71d1b28e2c287518b8808b7cf1ab8af36718fd0a2c453eb0"}, + {file = "kiwisolver-1.4.5-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:06f54715b7737c2fecdbf140d1afb11a33d59508a47bf11bb38ecf21dc9ab79f"}, + {file = "kiwisolver-1.4.5-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:fdb7adb641a0d13bdcd4ef48e062363d8a9ad4a182ac7647ec88f695e719ae9f"}, + {file = "kiwisolver-1.4.5-cp311-cp311-win32.whl", hash = "sha256:bb86433b1cfe686da83ce32a9d3a8dd308e85c76b60896d58f082136f10bffac"}, + {file = "kiwisolver-1.4.5-cp311-cp311-win_amd64.whl", hash = "sha256:6c08e1312a9cf1074d17b17728d3dfce2a5125b2d791527f33ffbe805200a355"}, + {file = "kiwisolver-1.4.5-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:32d5cf40c4f7c7b3ca500f8985eb3fb3a7dfc023215e876f207956b5ea26632a"}, + {file = "kiwisolver-1.4.5-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:f846c260f483d1fd217fe5ed7c173fb109efa6b1fc8381c8b7552c5781756192"}, + {file = "kiwisolver-1.4.5-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:5ff5cf3571589b6d13bfbfd6bcd7a3f659e42f96b5fd1c4830c4cf21d4f5ef45"}, + {file = "kiwisolver-1.4.5-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7269d9e5f1084a653d575c7ec012ff57f0c042258bf5db0954bf551c158466e7"}, + {file = "kiwisolver-1.4.5-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:da802a19d6e15dffe4b0c24b38b3af68e6c1a68e6e1d8f30148c83864f3881db"}, + {file = "kiwisolver-1.4.5-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3aba7311af82e335dd1e36ffff68aaca609ca6290c2cb6d821a39aa075d8e3ff"}, + {file = "kiwisolver-1.4.5-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:763773d53f07244148ccac5b084da5adb90bfaee39c197554f01b286cf869228"}, + {file = "kiwisolver-1.4.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2270953c0d8cdab5d422bee7d2007f043473f9d2999631c86a223c9db56cbd16"}, + {file = "kiwisolver-1.4.5-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:d099e745a512f7e3bbe7249ca835f4d357c586d78d79ae8f1dcd4d8adeb9bda9"}, + {file = "kiwisolver-1.4.5-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:74db36e14a7d1ce0986fa104f7d5637aea5c82ca6326ed0ec5694280942d1162"}, + {file = "kiwisolver-1.4.5-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:7e5bab140c309cb3a6ce373a9e71eb7e4873c70c2dda01df6820474f9889d6d4"}, + {file = "kiwisolver-1.4.5-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:0f114aa76dc1b8f636d077979c0ac22e7cd8f3493abbab152f20eb8d3cda71f3"}, + {file = "kiwisolver-1.4.5-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:88a2df29d4724b9237fc0c6eaf2a1adae0cdc0b3e9f4d8e7dc54b16812d2d81a"}, + {file = "kiwisolver-1.4.5-cp312-cp312-win32.whl", hash = "sha256:72d40b33e834371fd330fb1472ca19d9b8327acb79a5821d4008391db8e29f20"}, + {file = "kiwisolver-1.4.5-cp312-cp312-win_amd64.whl", hash = "sha256:2c5674c4e74d939b9d91dda0fae10597ac7521768fec9e399c70a1f27e2ea2d9"}, + {file = "kiwisolver-1.4.5-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:3a2b053a0ab7a3960c98725cfb0bf5b48ba82f64ec95fe06f1d06c99b552e130"}, + {file = "kiwisolver-1.4.5-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3cd32d6c13807e5c66a7cbb79f90b553642f296ae4518a60d8d76243b0ad2898"}, + {file = "kiwisolver-1.4.5-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:59ec7b7c7e1a61061850d53aaf8e93db63dce0c936db1fda2658b70e4a1be709"}, + {file = "kiwisolver-1.4.5-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:da4cfb373035def307905d05041c1d06d8936452fe89d464743ae7fb8371078b"}, + {file = "kiwisolver-1.4.5-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2400873bccc260b6ae184b2b8a4fec0e4082d30648eadb7c3d9a13405d861e89"}, + {file = "kiwisolver-1.4.5-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:1b04139c4236a0f3aff534479b58f6f849a8b351e1314826c2d230849ed48985"}, + {file = "kiwisolver-1.4.5-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:4e66e81a5779b65ac21764c295087de82235597a2293d18d943f8e9e32746265"}, + {file = "kiwisolver-1.4.5-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:7931d8f1f67c4be9ba1dd9c451fb0eeca1a25b89e4d3f89e828fe12a519b782a"}, + {file = "kiwisolver-1.4.5-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:b3f7e75f3015df442238cca659f8baa5f42ce2a8582727981cbfa15fee0ee205"}, + {file = "kiwisolver-1.4.5-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:bbf1d63eef84b2e8c89011b7f2235b1e0bf7dacc11cac9431fc6468e99ac77fb"}, + {file = "kiwisolver-1.4.5-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:4c380469bd3f970ef677bf2bcba2b6b0b4d5c75e7a020fb863ef75084efad66f"}, + {file = "kiwisolver-1.4.5-cp37-cp37m-win32.whl", hash = "sha256:9408acf3270c4b6baad483865191e3e582b638b1654a007c62e3efe96f09a9a3"}, + {file = "kiwisolver-1.4.5-cp37-cp37m-win_amd64.whl", hash = "sha256:5b94529f9b2591b7af5f3e0e730a4e0a41ea174af35a4fd067775f9bdfeee01a"}, + {file = "kiwisolver-1.4.5-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:11c7de8f692fc99816e8ac50d1d1aef4f75126eefc33ac79aac02c099fd3db71"}, + {file = "kiwisolver-1.4.5-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:53abb58632235cd154176ced1ae8f0d29a6657aa1aa9decf50b899b755bc2b93"}, + {file = "kiwisolver-1.4.5-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:88b9f257ca61b838b6f8094a62418421f87ac2a1069f7e896c36a7d86b5d4c29"}, + {file = "kiwisolver-1.4.5-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3195782b26fc03aa9c6913d5bad5aeb864bdc372924c093b0f1cebad603dd712"}, + {file = "kiwisolver-1.4.5-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:fc579bf0f502e54926519451b920e875f433aceb4624a3646b3252b5caa9e0b6"}, + {file = "kiwisolver-1.4.5-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5a580c91d686376f0f7c295357595c5a026e6cbc3d77b7c36e290201e7c11ecb"}, + {file = "kiwisolver-1.4.5-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:cfe6ab8da05c01ba6fbea630377b5da2cd9bcbc6338510116b01c1bc939a2c18"}, + {file = "kiwisolver-1.4.5-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:d2e5a98f0ec99beb3c10e13b387f8db39106d53993f498b295f0c914328b1333"}, + {file = "kiwisolver-1.4.5-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:a51a263952b1429e429ff236d2f5a21c5125437861baeed77f5e1cc2d2c7c6da"}, + {file = "kiwisolver-1.4.5-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:3edd2fa14e68c9be82c5b16689e8d63d89fe927e56debd6e1dbce7a26a17f81b"}, + {file = "kiwisolver-1.4.5-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:74d1b44c6cfc897df648cc9fdaa09bc3e7679926e6f96df05775d4fb3946571c"}, + {file = "kiwisolver-1.4.5-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:76d9289ed3f7501012e05abb8358bbb129149dbd173f1f57a1bf1c22d19ab7cc"}, + {file = "kiwisolver-1.4.5-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:92dea1ffe3714fa8eb6a314d2b3c773208d865a0e0d35e713ec54eea08a66250"}, + {file = "kiwisolver-1.4.5-cp38-cp38-win32.whl", hash = "sha256:5c90ae8c8d32e472be041e76f9d2f2dbff4d0b0be8bd4041770eddb18cf49a4e"}, + {file = "kiwisolver-1.4.5-cp38-cp38-win_amd64.whl", hash = "sha256:c7940c1dc63eb37a67721b10d703247552416f719c4188c54e04334321351ced"}, + {file = "kiwisolver-1.4.5-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:9407b6a5f0d675e8a827ad8742e1d6b49d9c1a1da5d952a67d50ef5f4170b18d"}, + {file = "kiwisolver-1.4.5-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:15568384086b6df3c65353820a4473575dbad192e35010f622c6ce3eebd57af9"}, + {file = "kiwisolver-1.4.5-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:0dc9db8e79f0036e8173c466d21ef18e1befc02de8bf8aa8dc0813a6dc8a7046"}, + {file = "kiwisolver-1.4.5-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:cdc8a402aaee9a798b50d8b827d7ecf75edc5fb35ea0f91f213ff927c15f4ff0"}, + {file = "kiwisolver-1.4.5-cp39-cp39-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:6c3bd3cde54cafb87d74d8db50b909705c62b17c2099b8f2e25b461882e544ff"}, + {file = "kiwisolver-1.4.5-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:955e8513d07a283056b1396e9a57ceddbd272d9252c14f154d450d227606eb54"}, + {file = "kiwisolver-1.4.5-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:346f5343b9e3f00b8db8ba359350eb124b98c99efd0b408728ac6ebf38173958"}, + {file = "kiwisolver-1.4.5-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b9098e0049e88c6a24ff64545cdfc50807818ba6c1b739cae221bbbcbc58aad3"}, + {file = "kiwisolver-1.4.5-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:00bd361b903dc4bbf4eb165f24d1acbee754fce22ded24c3d56eec268658a5cf"}, + {file = "kiwisolver-1.4.5-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:7b8b454bac16428b22560d0a1cf0a09875339cab69df61d7805bf48919415901"}, + {file = "kiwisolver-1.4.5-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:f1d072c2eb0ad60d4c183f3fb44ac6f73fb7a8f16a2694a91f988275cbf352f9"}, + {file = "kiwisolver-1.4.5-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:31a82d498054cac9f6d0b53d02bb85811185bcb477d4b60144f915f3b3126342"}, + {file = "kiwisolver-1.4.5-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:6512cb89e334e4700febbffaaa52761b65b4f5a3cf33f960213d5656cea36a77"}, + {file = "kiwisolver-1.4.5-cp39-cp39-win32.whl", hash = "sha256:9db8ea4c388fdb0f780fe91346fd438657ea602d58348753d9fb265ce1bca67f"}, + {file = "kiwisolver-1.4.5-cp39-cp39-win_amd64.whl", hash = "sha256:59415f46a37f7f2efeec758353dd2eae1b07640d8ca0f0c42548ec4125492635"}, + {file = "kiwisolver-1.4.5-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:5c7b3b3a728dc6faf3fc372ef24f21d1e3cee2ac3e9596691d746e5a536de920"}, + {file = "kiwisolver-1.4.5-pp37-pypy37_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:620ced262a86244e2be10a676b646f29c34537d0d9cc8eb26c08f53d98013390"}, + {file = "kiwisolver-1.4.5-pp37-pypy37_pp73-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:378a214a1e3bbf5ac4a8708304318b4f890da88c9e6a07699c4ae7174c09a68d"}, + {file = "kiwisolver-1.4.5-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:aaf7be1207676ac608a50cd08f102f6742dbfc70e8d60c4db1c6897f62f71523"}, + {file = "kiwisolver-1.4.5-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:ba55dce0a9b8ff59495ddd050a0225d58bd0983d09f87cfe2b6aec4f2c1234e4"}, + {file = "kiwisolver-1.4.5-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:fd32ea360bcbb92d28933fc05ed09bffcb1704ba3fc7942e81db0fd4f81a7892"}, + {file = "kiwisolver-1.4.5-pp38-pypy38_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:5e7139af55d1688f8b960ee9ad5adafc4ac17c1c473fe07133ac092310d76544"}, + {file = "kiwisolver-1.4.5-pp38-pypy38_pp73-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:dced8146011d2bc2e883f9bd68618b8247387f4bbec46d7392b3c3b032640126"}, + {file = "kiwisolver-1.4.5-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c9bf3325c47b11b2e51bca0824ea217c7cd84491d8ac4eefd1e409705ef092bd"}, + {file = "kiwisolver-1.4.5-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:5794cf59533bc3f1b1c821f7206a3617999db9fbefc345360aafe2e067514929"}, + {file = "kiwisolver-1.4.5-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:e368f200bbc2e4f905b8e71eb38b3c04333bddaa6a2464a6355487b02bb7fb09"}, + {file = "kiwisolver-1.4.5-pp39-pypy39_pp73-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e5d706eba36b4c4d5bc6c6377bb6568098765e990cfc21ee16d13963fab7b3e7"}, + {file = "kiwisolver-1.4.5-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:85267bd1aa8880a9c88a8cb71e18d3d64d2751a790e6ca6c27b8ccc724bcd5ad"}, + {file = "kiwisolver-1.4.5-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:210ef2c3a1f03272649aff1ef992df2e724748918c4bc2d5a90352849eb40bea"}, + {file = "kiwisolver-1.4.5-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:11d011a7574eb3b82bcc9c1a1d35c1d7075677fdd15de527d91b46bd35e935ee"}, + {file = "kiwisolver-1.4.5.tar.gz", hash = "sha256:e57e563a57fb22a142da34f38acc2fc1a5c864bc29ca1517a88abc963e60d6ec"}, +] + +[[package]] +name = "mako" +version = "1.3.2" +description = "A super-fast templating language that borrows the best ideas from the existing templating languages." +optional = true +python-versions = ">=3.8" +files = [ + {file = "Mako-1.3.2-py3-none-any.whl", hash = "sha256:32a99d70754dfce237019d17ffe4a282d2d3351b9c476e90d8a60e63f133b80c"}, + {file = "Mako-1.3.2.tar.gz", hash = "sha256:2a0c8ad7f6274271b3bb7467dd37cf9cc6dab4bc19cb69a4ef10669402de698e"}, +] + +[package.dependencies] +MarkupSafe = ">=0.9.2" + +[package.extras] +babel = ["Babel"] +lingua = ["lingua"] +testing = ["pytest"] + +[[package]] +name = "markdown" +version = "3.6" +description = "Python implementation of John Gruber's Markdown." +optional = true +python-versions = ">=3.8" +files = [ + {file = "Markdown-3.6-py3-none-any.whl", hash = "sha256:48f276f4d8cfb8ce6527c8f79e2ee29708508bf4d40aa410fbc3b4ee832c850f"}, + {file = "Markdown-3.6.tar.gz", hash = "sha256:ed4f41f6daecbeeb96e576ce414c41d2d876daa9a16cb35fa8ed8c2ddfad0224"}, +] + +[package.extras] +docs = ["mdx-gh-links (>=0.2)", "mkdocs (>=1.5)", "mkdocs-gen-files", "mkdocs-literate-nav", "mkdocs-nature (>=0.6)", "mkdocs-section-index", "mkdocstrings[python]"] +testing = ["coverage", "pyyaml"] + [[package]] name = "markupsafe" version = "2.1.5" @@ -755,15 +1391,115 @@ orjson = ["orjson"] toml = ["tomli (>=1.1.0)", "tomli-w (>=1.0)"] yaml = ["pyyaml (>=3.13)"] +[[package]] +name = "matplotlib" +version = "3.8.3" +description = "Python plotting package" +optional = true +python-versions = ">=3.9" +files = [ + {file = "matplotlib-3.8.3-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:cf60138ccc8004f117ab2a2bad513cc4d122e55864b4fe7adf4db20ca68a078f"}, + {file = "matplotlib-3.8.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:5f557156f7116be3340cdeef7f128fa99b0d5d287d5f41a16e169819dcf22357"}, + {file = "matplotlib-3.8.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f386cf162b059809ecfac3bcc491a9ea17da69fa35c8ded8ad154cd4b933d5ec"}, + {file = "matplotlib-3.8.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b3c5f96f57b0369c288bf6f9b5274ba45787f7e0589a34d24bdbaf6d3344632f"}, + {file = "matplotlib-3.8.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:83e0f72e2c116ca7e571c57aa29b0fe697d4c6425c4e87c6e994159e0c008635"}, + {file = "matplotlib-3.8.3-cp310-cp310-win_amd64.whl", hash = "sha256:1c5c8290074ba31a41db1dc332dc2b62def469ff33766cbe325d32a3ee291aea"}, + {file = "matplotlib-3.8.3-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:5184e07c7e1d6d1481862ee361905b7059f7fe065fc837f7c3dc11eeb3f2f900"}, + {file = "matplotlib-3.8.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:d7e7e0993d0758933b1a241a432b42c2db22dfa37d4108342ab4afb9557cbe3e"}, + {file = "matplotlib-3.8.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:04b36ad07eac9740fc76c2aa16edf94e50b297d6eb4c081e3add863de4bb19a7"}, + {file = "matplotlib-3.8.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7c42dae72a62f14982f1474f7e5c9959fc4bc70c9de11cc5244c6e766200ba65"}, + {file = "matplotlib-3.8.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:bf5932eee0d428192c40b7eac1399d608f5d995f975cdb9d1e6b48539a5ad8d0"}, + {file = "matplotlib-3.8.3-cp311-cp311-win_amd64.whl", hash = "sha256:40321634e3a05ed02abf7c7b47a50be50b53ef3eaa3a573847431a545585b407"}, + {file = "matplotlib-3.8.3-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:09074f8057917d17ab52c242fdf4916f30e99959c1908958b1fc6032e2d0f6d4"}, + {file = "matplotlib-3.8.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:5745f6d0fb5acfabbb2790318db03809a253096e98c91b9a31969df28ee604aa"}, + {file = "matplotlib-3.8.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b97653d869a71721b639714b42d87cda4cfee0ee74b47c569e4874c7590c55c5"}, + {file = "matplotlib-3.8.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:242489efdb75b690c9c2e70bb5c6550727058c8a614e4c7716f363c27e10bba1"}, + {file = "matplotlib-3.8.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:83c0653c64b73926730bd9ea14aa0f50f202ba187c307a881673bad4985967b7"}, + {file = "matplotlib-3.8.3-cp312-cp312-win_amd64.whl", hash = "sha256:ef6c1025a570354297d6c15f7d0f296d95f88bd3850066b7f1e7b4f2f4c13a39"}, + {file = "matplotlib-3.8.3-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:c4af3f7317f8a1009bbb2d0bf23dfaba859eb7dd4ccbd604eba146dccaaaf0a4"}, + {file = "matplotlib-3.8.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:4c6e00a65d017d26009bac6808f637b75ceade3e1ff91a138576f6b3065eeeba"}, + {file = "matplotlib-3.8.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e7b49ab49a3bea17802df6872f8d44f664ba8f9be0632a60c99b20b6db2165b7"}, + {file = "matplotlib-3.8.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6728dde0a3997396b053602dbd907a9bd64ec7d5cf99e728b404083698d3ca01"}, + {file = "matplotlib-3.8.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:813925d08fb86aba139f2d31864928d67511f64e5945ca909ad5bc09a96189bb"}, + {file = "matplotlib-3.8.3-cp39-cp39-win_amd64.whl", hash = "sha256:cd3a0c2be76f4e7be03d34a14d49ded6acf22ef61f88da600a18a5cd8b3c5f3c"}, + {file = "matplotlib-3.8.3-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:fa93695d5c08544f4a0dfd0965f378e7afc410d8672816aff1e81be1f45dbf2e"}, + {file = "matplotlib-3.8.3-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e9764df0e8778f06414b9d281a75235c1e85071f64bb5d71564b97c1306a2afc"}, + {file = "matplotlib-3.8.3-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:5e431a09e6fab4012b01fc155db0ce6dccacdbabe8198197f523a4ef4805eb26"}, + {file = "matplotlib-3.8.3.tar.gz", hash = "sha256:7b416239e9ae38be54b028abbf9048aff5054a9aba5416bef0bd17f9162ce161"}, +] + +[package.dependencies] +contourpy = ">=1.0.1" +cycler = ">=0.10" +fonttools = ">=4.22.0" +kiwisolver = ">=1.3.1" +numpy = ">=1.21,<2" +packaging = ">=20.0" +pillow = ">=8" +pyparsing = ">=2.3.1" +python-dateutil = ">=2.7" + +[[package]] +name = "mlflow" +version = "2.11.3" +description = "MLflow: A Platform for ML Development and Productionization" +optional = true +python-versions = ">=3.8" +files = [ + {file = "mlflow-2.11.3-py3-none-any.whl", hash = "sha256:6af105162b6f7f2a69cace48c2af7adbecfada5e6386f1ce7d1bbbffd6e09953"}, + {file = "mlflow-2.11.3.tar.gz", hash = "sha256:621b7e311e890b79719c2e7777286d8e08d38dd04d5ab080966990bd4a55febb"}, +] + +[package.dependencies] +alembic = "<1.10.0 || >1.10.0,<2" +click = ">=7.0,<9" +cloudpickle = "<4" +docker = ">=4.0.0,<8" +entrypoints = "<1" +Flask = "<4" +gitpython = ">=3.1.9,<4" +graphene = "<4" +gunicorn = {version = "<22", markers = "platform_system != \"Windows\""} +importlib-metadata = ">=3.7.0,<4.7.0 || >4.7.0,<8" +Jinja2 = [ + {version = ">=2.11,<4", markers = "platform_system != \"Windows\""}, + {version = ">=3.0,<4", markers = "platform_system == \"Windows\""}, +] +markdown = ">=3.3,<4" +matplotlib = "<4" +numpy = "<2" +packaging = "<24" +pandas = "<3" +protobuf = ">=3.12.0,<5" +pyarrow = ">=4.0.0,<16" +pytz = "<2025" +pyyaml = ">=5.1,<7" +querystring-parser = "<2" +requests = ">=2.17.3,<3" +scikit-learn = "<2" +scipy = "<2" +sqlalchemy = ">=1.4.0,<3" +sqlparse = ">=0.4.0,<1" +waitress = {version = "<4", markers = "platform_system == \"Windows\""} + +[package.extras] +aliyun-oss = ["aliyunstoreplugin"] +databricks = ["azure-storage-file-datalake (>12)", "boto3 (>1)", "botocore", "google-cloud-storage (>=1.30.0)"] +extras = ["azureml-core (>=1.2.0)", "boto3", "botocore", "google-cloud-storage (>=1.30.0)", "kubernetes", "mlserver (>=1.2.0,!=1.3.1,<1.4.0)", "mlserver-mlflow (>=1.2.0,!=1.3.1,<1.4.0)", "prometheus-flask-exporter", "pyarrow", "pysftp", "requests-auth-aws-sigv4", "virtualenv"] +gateway = ["aiohttp (<4)", "boto3 (>=1.28.56,<2)", "fastapi (<1)", "pydantic (>=1.0,<3)", "slowapi (>=0.1.9,<1)", "tiktoken (<1)", "uvicorn[standard] (<1)", "watchfiles (<1)"] +genai = ["aiohttp (<4)", "boto3 (>=1.28.56,<2)", "fastapi (<1)", "pydantic (>=1.0,<3)", "slowapi (>=0.1.9,<1)", "tiktoken (<1)", "uvicorn[standard] (<1)", "watchfiles (<1)"] +sqlserver = ["mlflow-dbstore"] +xethub = ["mlflow-xethub"] + [[package]] name = "multimethod" -version = "1.11.1" +version = "1.11.2" description = "Multiple argument dispatching." optional = true python-versions = ">=3.9" files = [ - {file = "multimethod-1.11.1-py3-none-any.whl", hash = "sha256:939703e1667725169722269f3bb6d078ce414526a16475028ea3eb3b8a5fc4ef"}, - {file = "multimethod-1.11.1.tar.gz", hash = "sha256:e2bfd270a30ee4b56e64b94b8353ed10f7e7aa769cedf6cb8f1d429ce50d72ea"}, + {file = "multimethod-1.11.2-py3-none-any.whl", hash = "sha256:cb338f09395c0ee87d36c7691cdd794d13d8864358082cf1205f812edd5ce05a"}, + {file = "multimethod-1.11.2.tar.gz", hash = "sha256:7f2a4863967142e6db68632fef9cd79053c09670ba0c5f113301e245140bba5c"}, ] [[package]] @@ -837,7 +1573,7 @@ files = [ name = "ollama" version = "0.1.8" description = "The official Python client for Ollama." -optional = false +optional = true python-versions = "<4.0,>=3.8" files = [ {file = "ollama-0.1.8-py3-none-any.whl", hash = "sha256:45916aaf99c6e41a73197e9be8a17ea90290938894ac29d2fa855f55cb70d2e3"}, @@ -1088,28 +1824,30 @@ testing = ["pytest", "pytest-benchmark"] [[package]] name = "polars" -version = "0.20.10" +version = "0.20.18" description = "Blazingly fast DataFrame library" optional = false python-versions = ">=3.8" files = [ - {file = "polars-0.20.10-cp38-abi3-macosx_10_12_x86_64.whl", hash = "sha256:14b126dbe626c8df34a9cc1449dea270dbafd64deff88fc3620046e69e06f84c"}, - {file = "polars-0.20.10-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:6d5f485dba006aa1ce443980b351a5cb8ff481cbbc51343debfbf66fb9594269"}, - {file = "polars-0.20.10-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ff934fe816856db7b72565b35abf1656db485772cd3bc5631071cef7ec1d10c7"}, - {file = "polars-0.20.10-cp38-abi3-manylinux_2_24_aarch64.whl", hash = "sha256:f5b7222ca39a4cbd286d9927d4924d2bc2ce6d7fc83a256bfd20b4199482722f"}, - {file = "polars-0.20.10-cp38-abi3-win_amd64.whl", hash = "sha256:082a22c0c1bfa1fe0c24198e646ffb19478b893f594ecf8e330c7cdc136f6e6b"}, - {file = "polars-0.20.10.tar.gz", hash = "sha256:ab32a232916df61c9377edcb5893d0b1624d810444d8fa627f9885d33819a8b7"}, + {file = "polars-0.20.18-cp38-abi3-macosx_10_12_x86_64.whl", hash = "sha256:e305f5e6c0b8dc37fe0ff3bb1143a8bf0341134e0b23dec7c50a148f426acceb"}, + {file = "polars-0.20.18-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:26716f074301f583da9af93108d57da631622d6496cbcbb8c08476180953f408"}, + {file = "polars-0.20.18-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5b3843f69228df68cb82e39647c212fde58671c064c25a0c4d544f9446160a7e"}, + {file = "polars-0.20.18-cp38-abi3-manylinux_2_24_aarch64.whl", hash = "sha256:4b775e9677d0050775243400def1f5de4dd02b5ee220873406abc4028228525e"}, + {file = "polars-0.20.18-cp38-abi3-win_amd64.whl", hash = "sha256:73b81b9582c48f0ca4ae08c0adc56917b0c55682044bedf0eccd3f94e4e39169"}, + {file = "polars-0.20.18.tar.gz", hash = "sha256:8a321cbdbb459e3c0cc1af2ce6ac930d0d3b5ccbeb2dd3e4237ad07d487fd290"}, ] [package.dependencies] pyarrow = {version = ">=7.0.0", optional = true, markers = "extra == \"pyarrow\""} [package.extras] -adbc = ["adbc_driver_sqlite"] -all = ["polars[adbc,cloudpickle,connectorx,deltalake,fsspec,gevent,numpy,pandas,plot,pyarrow,pydantic,pyiceberg,sqlalchemy,timezone,xlsx2csv,xlsxwriter]"] +adbc = ["adbc-driver-manager", "adbc-driver-sqlite"] +all = ["polars[adbc,async,cloudpickle,connectorx,deltalake,fastexcel,fsspec,gevent,numpy,pandas,plot,pyarrow,pydantic,pyiceberg,sqlalchemy,timezone,xlsx2csv,xlsxwriter]"] +async = ["nest-asyncio"] cloudpickle = ["cloudpickle"] connectorx = ["connectorx (>=0.3.2)"] deltalake = ["deltalake (>=0.14.0)"] +fastexcel = ["fastexcel (>=0.9)"] fsspec = ["fsspec"] gevent = ["gevent"] matplotlib = ["matplotlib"] @@ -1122,7 +1860,7 @@ pydantic = ["pydantic"] pyiceberg = ["pyiceberg (>=0.5.0)"] pyxlsb = ["pyxlsb (>=1.0)"] sqlalchemy = ["pandas", "sqlalchemy"] -timezone = ["backports.zoneinfo", "tzdata"] +timezone = ["backports-zoneinfo", "tzdata"] xlsx2csv = ["xlsx2csv (>=0.8.0)"] xlsxwriter = ["xlsxwriter"] @@ -1174,6 +1912,26 @@ files = [ fastapi = ">=0.38.1,<1.0.0" prometheus-client = ">=0.8.0,<1.0.0" +[[package]] +name = "protobuf" +version = "4.25.3" +description = "" +optional = true +python-versions = ">=3.8" +files = [ + {file = "protobuf-4.25.3-cp310-abi3-win32.whl", hash = "sha256:d4198877797a83cbfe9bffa3803602bbe1625dc30d8a097365dbc762e5790faa"}, + {file = "protobuf-4.25.3-cp310-abi3-win_amd64.whl", hash = "sha256:209ba4cc916bab46f64e56b85b090607a676f66b473e6b762e6f1d9d591eb2e8"}, + {file = "protobuf-4.25.3-cp37-abi3-macosx_10_9_universal2.whl", hash = "sha256:f1279ab38ecbfae7e456a108c5c0681e4956d5b1090027c1de0f934dfdb4b35c"}, + {file = "protobuf-4.25.3-cp37-abi3-manylinux2014_aarch64.whl", hash = "sha256:e7cb0ae90dd83727f0c0718634ed56837bfeeee29a5f82a7514c03ee1364c019"}, + {file = "protobuf-4.25.3-cp37-abi3-manylinux2014_x86_64.whl", hash = "sha256:7c8daa26095f82482307bc717364e7c13f4f1c99659be82890dcfc215194554d"}, + {file = "protobuf-4.25.3-cp38-cp38-win32.whl", hash = "sha256:f4f118245c4a087776e0a8408be33cf09f6c547442c00395fbfb116fac2f8ac2"}, + {file = "protobuf-4.25.3-cp38-cp38-win_amd64.whl", hash = "sha256:c053062984e61144385022e53678fbded7aea14ebb3e0305ae3592fb219ccfa4"}, + {file = "protobuf-4.25.3-cp39-cp39-win32.whl", hash = "sha256:19b270aeaa0099f16d3ca02628546b8baefe2955bbe23224aaf856134eccf1e4"}, + {file = "protobuf-4.25.3-cp39-cp39-win_amd64.whl", hash = "sha256:e3c97a1555fd6388f857770ff8b9703083de6bf1f9274a002a332d65fbb56c8c"}, + {file = "protobuf-4.25.3-py3-none-any.whl", hash = "sha256:f0700d54bcf45424477e46a9f0944155b46fb0639d69728739c0e47bab83f2b9"}, + {file = "protobuf-4.25.3.tar.gz", hash = "sha256:25b5d0b42fd000320bd7830b349e3b696435f3b329810427a6bcce6a5492cc5c"}, +] + [[package]] name = "psycopg2" version = "2.9.9" @@ -1196,47 +1954,47 @@ files = [ [[package]] name = "pyarrow" -version = "15.0.0" +version = "15.0.2" description = "Python library for Apache Arrow" optional = false python-versions = ">=3.8" files = [ - {file = "pyarrow-15.0.0-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:0a524532fd6dd482edaa563b686d754c70417c2f72742a8c990b322d4c03a15d"}, - {file = "pyarrow-15.0.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:60a6bdb314affa9c2e0d5dddf3d9cbb9ef4a8dddaa68669975287d47ece67642"}, - {file = "pyarrow-15.0.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:66958fd1771a4d4b754cd385835e66a3ef6b12611e001d4e5edfcef5f30391e2"}, - {file = "pyarrow-15.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1f500956a49aadd907eaa21d4fff75f73954605eaa41f61cb94fb008cf2e00c6"}, - {file = "pyarrow-15.0.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:6f87d9c4f09e049c2cade559643424da84c43a35068f2a1c4653dc5b1408a929"}, - {file = "pyarrow-15.0.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:85239b9f93278e130d86c0e6bb455dcb66fc3fd891398b9d45ace8799a871a1e"}, - {file = "pyarrow-15.0.0-cp310-cp310-win_amd64.whl", hash = "sha256:5b8d43e31ca16aa6e12402fcb1e14352d0d809de70edd185c7650fe80e0769e3"}, - {file = "pyarrow-15.0.0-cp311-cp311-macosx_10_15_x86_64.whl", hash = "sha256:fa7cd198280dbd0c988df525e50e35b5d16873e2cdae2aaaa6363cdb64e3eec5"}, - {file = "pyarrow-15.0.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:8780b1a29d3c8b21ba6b191305a2a607de2e30dab399776ff0aa09131e266340"}, - {file = "pyarrow-15.0.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fe0ec198ccc680f6c92723fadcb97b74f07c45ff3fdec9dd765deb04955ccf19"}, - {file = "pyarrow-15.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:036a7209c235588c2f07477fe75c07e6caced9b7b61bb897c8d4e52c4b5f9555"}, - {file = "pyarrow-15.0.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:2bd8a0e5296797faf9a3294e9fa2dc67aa7f10ae2207920dbebb785c77e9dbe5"}, - {file = "pyarrow-15.0.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:e8ebed6053dbe76883a822d4e8da36860f479d55a762bd9e70d8494aed87113e"}, - {file = "pyarrow-15.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:17d53a9d1b2b5bd7d5e4cd84d018e2a45bc9baaa68f7e6e3ebed45649900ba99"}, - {file = "pyarrow-15.0.0-cp312-cp312-macosx_10_15_x86_64.whl", hash = "sha256:9950a9c9df24090d3d558b43b97753b8f5867fb8e521f29876aa021c52fda351"}, - {file = "pyarrow-15.0.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:003d680b5e422d0204e7287bb3fa775b332b3fce2996aa69e9adea23f5c8f970"}, - {file = "pyarrow-15.0.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f75fce89dad10c95f4bf590b765e3ae98bcc5ba9f6ce75adb828a334e26a3d40"}, - {file = "pyarrow-15.0.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0ca9cb0039923bec49b4fe23803807e4ef39576a2bec59c32b11296464623dc2"}, - {file = "pyarrow-15.0.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:9ed5a78ed29d171d0acc26a305a4b7f83c122d54ff5270810ac23c75813585e4"}, - {file = "pyarrow-15.0.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:6eda9e117f0402dfcd3cd6ec9bfee89ac5071c48fc83a84f3075b60efa96747f"}, - {file = "pyarrow-15.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:9a3a6180c0e8f2727e6f1b1c87c72d3254cac909e609f35f22532e4115461177"}, - {file = "pyarrow-15.0.0-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:19a8918045993349b207de72d4576af0191beef03ea655d8bdb13762f0cd6eac"}, - {file = "pyarrow-15.0.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:d0ec076b32bacb6666e8813a22e6e5a7ef1314c8069d4ff345efa6246bc38593"}, - {file = "pyarrow-15.0.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5db1769e5d0a77eb92344c7382d6543bea1164cca3704f84aa44e26c67e320fb"}, - {file = "pyarrow-15.0.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e2617e3bf9df2a00020dd1c1c6dce5cc343d979efe10bc401c0632b0eef6ef5b"}, - {file = "pyarrow-15.0.0-cp38-cp38-manylinux_2_28_aarch64.whl", hash = "sha256:d31c1d45060180131caf10f0f698e3a782db333a422038bf7fe01dace18b3a31"}, - {file = "pyarrow-15.0.0-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:c8c287d1d479de8269398b34282e206844abb3208224dbdd7166d580804674b7"}, - {file = "pyarrow-15.0.0-cp38-cp38-win_amd64.whl", hash = "sha256:07eb7f07dc9ecbb8dace0f58f009d3a29ee58682fcdc91337dfeb51ea618a75b"}, - {file = "pyarrow-15.0.0-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:47af7036f64fce990bb8a5948c04722e4e3ea3e13b1007ef52dfe0aa8f23cf7f"}, - {file = "pyarrow-15.0.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:93768ccfff85cf044c418bfeeafce9a8bb0cee091bd8fd19011aff91e58de540"}, - {file = "pyarrow-15.0.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f6ee87fd6892700960d90abb7b17a72a5abb3b64ee0fe8db6c782bcc2d0dc0b4"}, - {file = "pyarrow-15.0.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:001fca027738c5f6be0b7a3159cc7ba16a5c52486db18160909a0831b063c4e4"}, - {file = "pyarrow-15.0.0-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:d1c48648f64aec09accf44140dccb92f4f94394b8d79976c426a5b79b11d4fa7"}, - {file = "pyarrow-15.0.0-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:972a0141be402bb18e3201448c8ae62958c9c7923dfaa3b3d4530c835ac81aed"}, - {file = "pyarrow-15.0.0-cp39-cp39-win_amd64.whl", hash = "sha256:f01fc5cf49081426429127aa2d427d9d98e1cb94a32cb961d583a70b7c4504e6"}, - {file = "pyarrow-15.0.0.tar.gz", hash = "sha256:876858f549d540898f927eba4ef77cd549ad8d24baa3207cf1b72e5788b50e83"}, + {file = "pyarrow-15.0.2-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:88b340f0a1d05b5ccc3d2d986279045655b1fe8e41aba6ca44ea28da0d1455d8"}, + {file = "pyarrow-15.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:eaa8f96cecf32da508e6c7f69bb8401f03745c050c1dd42ec2596f2e98deecac"}, + {file = "pyarrow-15.0.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:23c6753ed4f6adb8461e7c383e418391b8d8453c5d67e17f416c3a5d5709afbd"}, + {file = "pyarrow-15.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f639c059035011db8c0497e541a8a45d98a58dbe34dc8fadd0ef128f2cee46e5"}, + {file = "pyarrow-15.0.2-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:290e36a59a0993e9a5224ed2fb3e53375770f07379a0ea03ee2fce2e6d30b423"}, + {file = "pyarrow-15.0.2-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:06c2bb2a98bc792f040bef31ad3e9be6a63d0cb39189227c08a7d955db96816e"}, + {file = "pyarrow-15.0.2-cp310-cp310-win_amd64.whl", hash = "sha256:f7a197f3670606a960ddc12adbe8075cea5f707ad7bf0dffa09637fdbb89f76c"}, + {file = "pyarrow-15.0.2-cp311-cp311-macosx_10_15_x86_64.whl", hash = "sha256:5f8bc839ea36b1f99984c78e06e7a06054693dc2af8920f6fb416b5bca9944e4"}, + {file = "pyarrow-15.0.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f5e81dfb4e519baa6b4c80410421528c214427e77ca0ea9461eb4097c328fa33"}, + {file = "pyarrow-15.0.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3a4f240852b302a7af4646c8bfe9950c4691a419847001178662a98915fd7ee7"}, + {file = "pyarrow-15.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4e7d9cfb5a1e648e172428c7a42b744610956f3b70f524aa3a6c02a448ba853e"}, + {file = "pyarrow-15.0.2-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:2d4f905209de70c0eb5b2de6763104d5a9a37430f137678edfb9a675bac9cd98"}, + {file = "pyarrow-15.0.2-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:90adb99e8ce5f36fbecbbc422e7dcbcbed07d985eed6062e459e23f9e71fd197"}, + {file = "pyarrow-15.0.2-cp311-cp311-win_amd64.whl", hash = "sha256:b116e7fd7889294cbd24eb90cd9bdd3850be3738d61297855a71ac3b8124ee38"}, + {file = "pyarrow-15.0.2-cp312-cp312-macosx_10_15_x86_64.whl", hash = "sha256:25335e6f1f07fdaa026a61c758ee7d19ce824a866b27bba744348fa73bb5a440"}, + {file = "pyarrow-15.0.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:90f19e976d9c3d8e73c80be84ddbe2f830b6304e4c576349d9360e335cd627fc"}, + {file = "pyarrow-15.0.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a22366249bf5fd40ddacc4f03cd3160f2d7c247692945afb1899bab8a140ddfb"}, + {file = "pyarrow-15.0.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c2a335198f886b07e4b5ea16d08ee06557e07db54a8400cc0d03c7f6a22f785f"}, + {file = "pyarrow-15.0.2-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:3e6d459c0c22f0b9c810a3917a1de3ee704b021a5fb8b3bacf968eece6df098f"}, + {file = "pyarrow-15.0.2-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:033b7cad32198754d93465dcfb71d0ba7cb7cd5c9afd7052cab7214676eec38b"}, + {file = "pyarrow-15.0.2-cp312-cp312-win_amd64.whl", hash = "sha256:29850d050379d6e8b5a693098f4de7fd6a2bea4365bfd073d7c57c57b95041ee"}, + {file = "pyarrow-15.0.2-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:7167107d7fb6dcadb375b4b691b7e316f4368f39f6f45405a05535d7ad5e5058"}, + {file = "pyarrow-15.0.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:e85241b44cc3d365ef950432a1b3bd44ac54626f37b2e3a0cc89c20e45dfd8bf"}, + {file = "pyarrow-15.0.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:248723e4ed3255fcd73edcecc209744d58a9ca852e4cf3d2577811b6d4b59818"}, + {file = "pyarrow-15.0.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ff3bdfe6f1b81ca5b73b70a8d482d37a766433823e0c21e22d1d7dde76ca33f"}, + {file = "pyarrow-15.0.2-cp38-cp38-manylinux_2_28_aarch64.whl", hash = "sha256:f3d77463dee7e9f284ef42d341689b459a63ff2e75cee2b9302058d0d98fe142"}, + {file = "pyarrow-15.0.2-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:8c1faf2482fb89766e79745670cbca04e7018497d85be9242d5350cba21357e1"}, + {file = "pyarrow-15.0.2-cp38-cp38-win_amd64.whl", hash = "sha256:28f3016958a8e45a1069303a4a4f6a7d4910643fc08adb1e2e4a7ff056272ad3"}, + {file = "pyarrow-15.0.2-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:89722cb64286ab3d4daf168386f6968c126057b8c7ec3ef96302e81d8cdb8ae4"}, + {file = "pyarrow-15.0.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:cd0ba387705044b3ac77b1b317165c0498299b08261d8122c96051024f953cd5"}, + {file = "pyarrow-15.0.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ad2459bf1f22b6a5cdcc27ebfd99307d5526b62d217b984b9f5c974651398832"}, + {file = "pyarrow-15.0.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:58922e4bfece8b02abf7159f1f53a8f4d9f8e08f2d988109126c17c3bb261f22"}, + {file = "pyarrow-15.0.2-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:adccc81d3dc0478ea0b498807b39a8d41628fa9210729b2f718b78cb997c7c91"}, + {file = "pyarrow-15.0.2-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:8bd2baa5fe531571847983f36a30ddbf65261ef23e496862ece83bdceb70420d"}, + {file = "pyarrow-15.0.2-cp39-cp39-win_amd64.whl", hash = "sha256:6669799a1d4ca9da9c7e06ef48368320f5856f36f9a4dd31a11839dda3f6cc8c"}, + {file = "pyarrow-15.0.2.tar.gz", hash = "sha256:9c9bc803cb3b7bfacc1e96ffbfd923601065d9d3f911179d81e72d99fd74a3d9"}, ] [package.dependencies] @@ -1244,24 +2002,24 @@ numpy = ">=1.16.6,<2" [[package]] name = "pycparser" -version = "2.21" +version = "2.22" description = "C parser in Python" optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +python-versions = ">=3.8" files = [ - {file = "pycparser-2.21-py2.py3-none-any.whl", hash = "sha256:8ee45429555515e1f6b185e78100aea234072576aa43ab53aefcae078162fca9"}, - {file = "pycparser-2.21.tar.gz", hash = "sha256:e644fdec12f7872f86c58ff790da456218b10f863970249516d60a5eaca77206"}, + {file = "pycparser-2.22-py3-none-any.whl", hash = "sha256:c3702b6d3dd8c7abc1afa565d7e63d53a1d0bd86cdc24edd75470f4de499cfcc"}, + {file = "pycparser-2.22.tar.gz", hash = "sha256:491c8be9c040f5390f5bf44a5b07752bd07f56edf992381b05c701439eec10f6"}, ] [[package]] name = "pydantic" -version = "2.6.2" +version = "2.6.4" description = "Data validation using Python type hints" optional = false python-versions = ">=3.8" files = [ - {file = "pydantic-2.6.2-py3-none-any.whl", hash = "sha256:37a5432e54b12fecaa1049c5195f3d860a10e01bdfd24f1840ef14bd0d3aeab3"}, - {file = "pydantic-2.6.2.tar.gz", hash = "sha256:a09be1c3d28f3abe37f8a78af58284b236a92ce520105ddc91a6d29ea1176ba7"}, + {file = "pydantic-2.6.4-py3-none-any.whl", hash = "sha256:cc46fce86607580867bdc3361ad462bab9c222ef042d3da86f2fb333e1d916c5"}, + {file = "pydantic-2.6.4.tar.gz", hash = "sha256:b1704e0847db01817624a6b86766967f552dd9dbf3afba4004409f908dcc84e6"}, ] [package.dependencies] @@ -1363,15 +2121,29 @@ files = [ [package.dependencies] typing-extensions = ">=4.6.0,<4.7.0 || >4.7.0" +[[package]] +name = "pyparsing" +version = "3.1.2" +description = "pyparsing module - Classes and methods to define and execute parsing grammars" +optional = true +python-versions = ">=3.6.8" +files = [ + {file = "pyparsing-3.1.2-py3-none-any.whl", hash = "sha256:f9db75911801ed778fe61bb643079ff86601aca99fcae6345aa67292038fb742"}, + {file = "pyparsing-3.1.2.tar.gz", hash = "sha256:a1bac0ce561155ecc3ed78ca94d3c9378656ad4c94c1270de543f621420f94ad"}, +] + +[package.extras] +diagrams = ["jinja2", "railroad-diagrams"] + [[package]] name = "pytest" -version = "8.0.1" +version = "8.1.1" description = "pytest: simple powerful testing with Python" optional = false python-versions = ">=3.8" files = [ - {file = "pytest-8.0.1-py3-none-any.whl", hash = "sha256:3e4f16fe1c0a9dc9d9389161c127c3edc5d810c38d6793042fb81d9f48a59fca"}, - {file = "pytest-8.0.1.tar.gz", hash = "sha256:267f6563751877d772019b13aacbe4e860d73fe8f651f28112e9ac37de7513ae"}, + {file = "pytest-8.1.1-py3-none-any.whl", hash = "sha256:2a8386cfc11fa9d2c50ee7b2a57e7d898ef90470a7a34c4b949ff59662bb78b7"}, + {file = "pytest-8.1.1.tar.gz", hash = "sha256:ac978141a75948948817d360297b7aae0fcb9d6ff6bc9ec6d514b85d5a65c044"}, ] [package.dependencies] @@ -1379,11 +2151,11 @@ colorama = {version = "*", markers = "sys_platform == \"win32\""} exceptiongroup = {version = ">=1.0.0rc8", markers = "python_version < \"3.11\""} iniconfig = "*" packaging = "*" -pluggy = ">=1.3.0,<2.0" -tomli = {version = ">=1.0.0", markers = "python_version < \"3.11\""} +pluggy = ">=1.4,<2.0" +tomli = {version = ">=1", markers = "python_version < \"3.11\""} [package.extras] -testing = ["argcomplete", "attrs (>=19.2.0)", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"] +testing = ["argcomplete", "attrs (>=19.2)", "hypothesis (>=3.56)", "mock", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"] [[package]] name = "pytest-asyncio" @@ -1405,30 +2177,30 @@ testing = ["coverage (>=6.2)", "flaky (>=3.5.0)", "hypothesis (>=5.7.1)", "mypy [[package]] name = "pytest-mock" -version = "3.12.0" +version = "3.14.0" description = "Thin-wrapper around the mock package for easier use with pytest" optional = false python-versions = ">=3.8" files = [ - {file = "pytest-mock-3.12.0.tar.gz", hash = "sha256:31a40f038c22cad32287bb43932054451ff5583ff094bca6f675df2f8bc1a6e9"}, - {file = "pytest_mock-3.12.0-py3-none-any.whl", hash = "sha256:0972719a7263072da3a21c7f4773069bcc7486027d7e8e1f81d98a47e701bc4f"}, + {file = "pytest-mock-3.14.0.tar.gz", hash = "sha256:2719255a1efeceadbc056d6bf3df3d1c5015530fb40cf347c0f9afac88410bd0"}, + {file = "pytest_mock-3.14.0-py3-none-any.whl", hash = "sha256:0b72c38033392a5f4621342fe11e9219ac11ec9d375f8e2a0c164539e0d70f6f"}, ] [package.dependencies] -pytest = ">=5.0" +pytest = ">=6.2.5" [package.extras] dev = ["pre-commit", "pytest-asyncio", "tox"] [[package]] name = "python-dateutil" -version = "2.8.2" +version = "2.9.0.post0" description = "Extensions to the standard Python datetime module" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" files = [ - {file = "python-dateutil-2.8.2.tar.gz", hash = "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86"}, - {file = "python_dateutil-2.8.2-py2.py3-none-any.whl", hash = "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9"}, + {file = "python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3"}, + {file = "python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427"}, ] [package.dependencies] @@ -1459,6 +2231,102 @@ files = [ {file = "pytz-2024.1.tar.gz", hash = "sha256:2a29735ea9c18baf14b448846bde5a48030ed267578472d8955cd0e7443a9812"}, ] +[[package]] +name = "pywin32" +version = "306" +description = "Python for Window Extensions" +optional = true +python-versions = "*" +files = [ + {file = "pywin32-306-cp310-cp310-win32.whl", hash = "sha256:06d3420a5155ba65f0b72f2699b5bacf3109f36acbe8923765c22938a69dfc8d"}, + {file = "pywin32-306-cp310-cp310-win_amd64.whl", hash = "sha256:84f4471dbca1887ea3803d8848a1616429ac94a4a8d05f4bc9c5dcfd42ca99c8"}, + {file = "pywin32-306-cp311-cp311-win32.whl", hash = "sha256:e65028133d15b64d2ed8f06dd9fbc268352478d4f9289e69c190ecd6818b6407"}, + {file = "pywin32-306-cp311-cp311-win_amd64.whl", hash = "sha256:a7639f51c184c0272e93f244eb24dafca9b1855707d94c192d4a0b4c01e1100e"}, + {file = "pywin32-306-cp311-cp311-win_arm64.whl", hash = "sha256:70dba0c913d19f942a2db25217d9a1b726c278f483a919f1abfed79c9cf64d3a"}, + {file = "pywin32-306-cp312-cp312-win32.whl", hash = "sha256:383229d515657f4e3ed1343da8be101000562bf514591ff383ae940cad65458b"}, + {file = "pywin32-306-cp312-cp312-win_amd64.whl", hash = "sha256:37257794c1ad39ee9be652da0462dc2e394c8159dfd913a8a4e8eb6fd346da0e"}, + {file = "pywin32-306-cp312-cp312-win_arm64.whl", hash = "sha256:5821ec52f6d321aa59e2db7e0a35b997de60c201943557d108af9d4ae1ec7040"}, + {file = "pywin32-306-cp37-cp37m-win32.whl", hash = "sha256:1c73ea9a0d2283d889001998059f5eaaba3b6238f767c9cf2833b13e6a685f65"}, + {file = "pywin32-306-cp37-cp37m-win_amd64.whl", hash = "sha256:72c5f621542d7bdd4fdb716227be0dd3f8565c11b280be6315b06ace35487d36"}, + {file = "pywin32-306-cp38-cp38-win32.whl", hash = "sha256:e4c092e2589b5cf0d365849e73e02c391c1349958c5ac3e9d5ccb9a28e017b3a"}, + {file = "pywin32-306-cp38-cp38-win_amd64.whl", hash = "sha256:e8ac1ae3601bee6ca9f7cb4b5363bf1c0badb935ef243c4733ff9a393b1690c0"}, + {file = "pywin32-306-cp39-cp39-win32.whl", hash = "sha256:e25fd5b485b55ac9c057f67d94bc203f3f6595078d1fb3b458c9c28b7153a802"}, + {file = "pywin32-306-cp39-cp39-win_amd64.whl", hash = "sha256:39b61c15272833b5c329a2989999dcae836b1eed650252ab1b7bfbe1d59f30f4"}, +] + +[[package]] +name = "pyyaml" +version = "6.0.1" +description = "YAML parser and emitter for Python" +optional = true +python-versions = ">=3.6" +files = [ + {file = "PyYAML-6.0.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d858aa552c999bc8a8d57426ed01e40bef403cd8ccdd0fc5f6f04a00414cac2a"}, + {file = "PyYAML-6.0.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:fd66fc5d0da6d9815ba2cebeb4205f95818ff4b79c3ebe268e75d961704af52f"}, + {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69b023b2b4daa7548bcfbd4aa3da05b3a74b772db9e23b982788168117739938"}, + {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:81e0b275a9ecc9c0c0c07b4b90ba548307583c125f54d5b6946cfee6360c733d"}, + {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba336e390cd8e4d1739f42dfe9bb83a3cc2e80f567d8805e11b46f4a943f5515"}, + {file = "PyYAML-6.0.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:326c013efe8048858a6d312ddd31d56e468118ad4cdeda36c719bf5bb6192290"}, + {file = "PyYAML-6.0.1-cp310-cp310-win32.whl", hash = "sha256:bd4af7373a854424dabd882decdc5579653d7868b8fb26dc7d0e99f823aa5924"}, + {file = "PyYAML-6.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:fd1592b3fdf65fff2ad0004b5e363300ef59ced41c2e6b3a99d4089fa8c5435d"}, + {file = "PyYAML-6.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6965a7bc3cf88e5a1c3bd2e0b5c22f8d677dc88a455344035f03399034eb3007"}, + {file = "PyYAML-6.0.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f003ed9ad21d6a4713f0a9b5a7a0a79e08dd0f221aff4525a2be4c346ee60aab"}, + {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42f8152b8dbc4fe7d96729ec2b99c7097d656dc1213a3229ca5383f973a5ed6d"}, + {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc"}, + {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673"}, + {file = "PyYAML-6.0.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e7d73685e87afe9f3b36c799222440d6cf362062f78be1013661b00c5c6f678b"}, + {file = "PyYAML-6.0.1-cp311-cp311-win32.whl", hash = "sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741"}, + {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, + {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"}, + {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"}, + {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"}, + {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"}, + {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"}, + {file = "PyYAML-6.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:0d3304d8c0adc42be59c5f8a4d9e3d7379e6955ad754aa9d6ab7a398b59dd1df"}, + {file = "PyYAML-6.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47"}, + {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98"}, + {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c"}, + {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:afd7e57eddb1a54f0f1a974bc4391af8bcce0b444685d936840f125cf046d5bd"}, + {file = "PyYAML-6.0.1-cp36-cp36m-win32.whl", hash = "sha256:fca0e3a251908a499833aa292323f32437106001d436eca0e6e7833256674585"}, + {file = "PyYAML-6.0.1-cp36-cp36m-win_amd64.whl", hash = "sha256:f22ac1c3cac4dbc50079e965eba2c1058622631e526bd9afd45fedd49ba781fa"}, + {file = "PyYAML-6.0.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:b1275ad35a5d18c62a7220633c913e1b42d44b46ee12554e5fd39c70a243d6a3"}, + {file = "PyYAML-6.0.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:18aeb1bf9a78867dc38b259769503436b7c72f7a1f1f4c93ff9a17de54319b27"}, + {file = "PyYAML-6.0.1-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:596106435fa6ad000c2991a98fa58eeb8656ef2325d7e158344fb33864ed87e3"}, + {file = "PyYAML-6.0.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:baa90d3f661d43131ca170712d903e6295d1f7a0f595074f151c0aed377c9b9c"}, + {file = "PyYAML-6.0.1-cp37-cp37m-win32.whl", hash = "sha256:9046c58c4395dff28dd494285c82ba00b546adfc7ef001486fbf0324bc174fba"}, + {file = "PyYAML-6.0.1-cp37-cp37m-win_amd64.whl", hash = "sha256:4fb147e7a67ef577a588a0e2c17b6db51dda102c71de36f8549b6816a96e1867"}, + {file = "PyYAML-6.0.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1d4c7e777c441b20e32f52bd377e0c409713e8bb1386e1099c2415f26e479595"}, + {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5"}, + {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:28c119d996beec18c05208a8bd78cbe4007878c6dd15091efb73a30e90539696"}, + {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e07cbde391ba96ab58e532ff4803f79c4129397514e1413a7dc761ccd755735"}, + {file = "PyYAML-6.0.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:49a183be227561de579b4a36efbb21b3eab9651dd81b1858589f796549873dd6"}, + {file = "PyYAML-6.0.1-cp38-cp38-win32.whl", hash = "sha256:184c5108a2aca3c5b3d3bf9395d50893a7ab82a38004c8f61c258d4428e80206"}, + {file = "PyYAML-6.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:1e2722cc9fbb45d9b87631ac70924c11d3a401b2d7f410cc0e3bbf249f2dca62"}, + {file = "PyYAML-6.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8"}, + {file = "PyYAML-6.0.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c8098ddcc2a85b61647b2590f825f3db38891662cfc2fc776415143f599bb859"}, + {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5773183b6446b2c99bb77e77595dd486303b4faab2b086e7b17bc6bef28865f6"}, + {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b786eecbdf8499b9ca1d697215862083bd6d2a99965554781d0d8d1ad31e13a0"}, + {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c"}, + {file = "PyYAML-6.0.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:04ac92ad1925b2cff1db0cfebffb6ffc43457495c9b3c39d3fcae417d7125dc5"}, + {file = "PyYAML-6.0.1-cp39-cp39-win32.whl", hash = "sha256:faca3bdcf85b2fc05d06ff3fbc1f83e1391b3e724afa3feba7d13eeab355484c"}, + {file = "PyYAML-6.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:510c9deebc5c0225e8c96813043e62b680ba2f9c50a08d3724c7f28a747d1486"}, + {file = "PyYAML-6.0.1.tar.gz", hash = "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43"}, +] + +[[package]] +name = "querystring-parser" +version = "1.2.4" +description = "QueryString parser for Python/Django that correctly handles nested dictionaries" +optional = true +python-versions = "*" +files = [ + {file = "querystring_parser-1.2.4-py2.py3-none-any.whl", hash = "sha256:d2fa90765eaf0de96c8b087872991a10238e89ba015ae59fedfed6bd61c242a0"}, + {file = "querystring_parser-1.2.4.tar.gz", hash = "sha256:644fce1cffe0530453b43a83a38094dbe422ccba8c9b2f2a1c00280e14ca8a62"}, +] + +[package.dependencies] +six = "*" + [[package]] name = "redis" version = "4.6.0" @@ -1477,6 +2345,111 @@ async-timeout = {version = ">=4.0.2", markers = "python_full_version <= \"3.11.2 hiredis = ["hiredis (>=1.0.0)"] ocsp = ["cryptography (>=36.0.1)", "pyopenssl (==20.0.1)", "requests (>=2.26.0)"] +[[package]] +name = "requests" +version = "2.31.0" +description = "Python HTTP for Humans." +optional = true +python-versions = ">=3.7" +files = [ + {file = "requests-2.31.0-py3-none-any.whl", hash = "sha256:58cd2187c01e70e6e26505bca751777aa9f2ee0b7f4300988b709f44e013003f"}, + {file = "requests-2.31.0.tar.gz", hash = "sha256:942c5a758f98d790eaed1a29cb6eefc7ffb0d1cf7af05c3d2791656dbd6ad1e1"}, +] + +[package.dependencies] +certifi = ">=2017.4.17" +charset-normalizer = ">=2,<4" +idna = ">=2.5,<4" +urllib3 = ">=1.21.1,<3" + +[package.extras] +socks = ["PySocks (>=1.5.6,!=1.5.7)"] +use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] + +[[package]] +name = "scikit-learn" +version = "1.4.1.post1" +description = "A set of python modules for machine learning and data mining" +optional = true +python-versions = ">=3.9" +files = [ + {file = "scikit-learn-1.4.1.post1.tar.gz", hash = "sha256:93d3d496ff1965470f9977d05e5ec3376fb1e63b10e4fda5e39d23c2d8969a30"}, + {file = "scikit_learn-1.4.1.post1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c540aaf44729ab5cd4bd5e394f2b375e65ceaea9cdd8c195788e70433d91bbc5"}, + {file = "scikit_learn-1.4.1.post1-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:4310bff71aa98b45b46cd26fa641309deb73a5d1c0461d181587ad4f30ea3c36"}, + {file = "scikit_learn-1.4.1.post1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9f43dd527dabff5521af2786a2f8de5ba381e182ec7292663508901cf6ceaf6e"}, + {file = "scikit_learn-1.4.1.post1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c02e27d65b0c7dc32f2c5eb601aaf5530b7a02bfbe92438188624524878336f2"}, + {file = "scikit_learn-1.4.1.post1-cp310-cp310-win_amd64.whl", hash = "sha256:629e09f772ad42f657ca60a1a52342eef786218dd20cf1369a3b8d085e55ef8f"}, + {file = "scikit_learn-1.4.1.post1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6145dfd9605b0b50ae72cdf72b61a2acd87501369a763b0d73d004710ebb76b5"}, + {file = "scikit_learn-1.4.1.post1-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:1afed6951bc9d2053c6ee9a518a466cbc9b07c6a3f9d43bfe734192b6125d508"}, + {file = "scikit_learn-1.4.1.post1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ce03506ccf5f96b7e9030fea7eb148999b254c44c10182ac55857bc9b5d4815f"}, + {file = "scikit_learn-1.4.1.post1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4ba516fcdc73d60e7f48cbb0bccb9acbdb21807de3651531208aac73c758e3ab"}, + {file = "scikit_learn-1.4.1.post1-cp311-cp311-win_amd64.whl", hash = "sha256:78cd27b4669513b50db4f683ef41ea35b5dddc797bd2bbd990d49897fd1c8a46"}, + {file = "scikit_learn-1.4.1.post1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:a1e289f33f613cefe6707dead50db31930530dc386b6ccff176c786335a7b01c"}, + {file = "scikit_learn-1.4.1.post1-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:0df87de9ce1c0140f2818beef310fb2e2afdc1e66fc9ad587965577f17733649"}, + {file = "scikit_learn-1.4.1.post1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:712c1c69c45b58ef21635360b3d0a680ff7d83ac95b6f9b82cf9294070cda710"}, + {file = "scikit_learn-1.4.1.post1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1754b0c2409d6ed5a3380512d0adcf182a01363c669033a2b55cca429ed86a81"}, + {file = "scikit_learn-1.4.1.post1-cp312-cp312-win_amd64.whl", hash = "sha256:1d491ef66e37f4e812db7e6c8286520c2c3fc61b34bf5e59b67b4ce528de93af"}, + {file = "scikit_learn-1.4.1.post1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:aa0029b78ef59af22cfbd833e8ace8526e4df90212db7ceccbea582ebb5d6794"}, + {file = "scikit_learn-1.4.1.post1-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:14e4c88436ac96bf69eb6d746ac76a574c314a23c6961b7d344b38877f20fee1"}, + {file = "scikit_learn-1.4.1.post1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d7cd3a77c32879311f2aa93466d3c288c955ef71d191503cf0677c3340ae8ae0"}, + {file = "scikit_learn-1.4.1.post1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2a3ee19211ded1a52ee37b0a7b373a8bfc66f95353af058a210b692bd4cda0dd"}, + {file = "scikit_learn-1.4.1.post1-cp39-cp39-win_amd64.whl", hash = "sha256:234b6bda70fdcae9e4abbbe028582ce99c280458665a155eed0b820599377d25"}, +] + +[package.dependencies] +joblib = ">=1.2.0" +numpy = ">=1.19.5,<2.0" +scipy = ">=1.6.0" +threadpoolctl = ">=2.0.0" + +[package.extras] +benchmark = ["matplotlib (>=3.3.4)", "memory-profiler (>=0.57.0)", "pandas (>=1.1.5)"] +docs = ["Pillow (>=7.1.2)", "matplotlib (>=3.3.4)", "memory-profiler (>=0.57.0)", "numpydoc (>=1.2.0)", "pandas (>=1.1.5)", "plotly (>=5.14.0)", "pooch (>=1.6.0)", "scikit-image (>=0.17.2)", "seaborn (>=0.9.0)", "sphinx (>=6.0.0)", "sphinx-copybutton (>=0.5.2)", "sphinx-gallery (>=0.15.0)", "sphinx-prompt (>=1.3.0)", "sphinxext-opengraph (>=0.4.2)"] +examples = ["matplotlib (>=3.3.4)", "pandas (>=1.1.5)", "plotly (>=5.14.0)", "pooch (>=1.6.0)", "scikit-image (>=0.17.2)", "seaborn (>=0.9.0)"] +tests = ["black (>=23.3.0)", "matplotlib (>=3.3.4)", "mypy (>=1.3)", "numpydoc (>=1.2.0)", "pandas (>=1.1.5)", "polars (>=0.19.12)", "pooch (>=1.6.0)", "pyamg (>=4.0.0)", "pyarrow (>=12.0.0)", "pytest (>=7.1.2)", "pytest-cov (>=2.9.0)", "ruff (>=0.0.272)", "scikit-image (>=0.17.2)"] + +[[package]] +name = "scipy" +version = "1.12.0" +description = "Fundamental algorithms for scientific computing in Python" +optional = true +python-versions = ">=3.9" +files = [ + {file = "scipy-1.12.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:78e4402e140879387187f7f25d91cc592b3501a2e51dfb320f48dfb73565f10b"}, + {file = "scipy-1.12.0-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:f5f00ebaf8de24d14b8449981a2842d404152774c1a1d880c901bf454cb8e2a1"}, + {file = "scipy-1.12.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e53958531a7c695ff66c2e7bb7b79560ffdc562e2051644c5576c39ff8efb563"}, + {file = "scipy-1.12.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5e32847e08da8d895ce09d108a494d9eb78974cf6de23063f93306a3e419960c"}, + {file = "scipy-1.12.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:4c1020cad92772bf44b8e4cdabc1df5d87376cb219742549ef69fc9fd86282dd"}, + {file = "scipy-1.12.0-cp310-cp310-win_amd64.whl", hash = "sha256:75ea2a144096b5e39402e2ff53a36fecfd3b960d786b7efd3c180e29c39e53f2"}, + {file = "scipy-1.12.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:408c68423f9de16cb9e602528be4ce0d6312b05001f3de61fe9ec8b1263cad08"}, + {file = "scipy-1.12.0-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:5adfad5dbf0163397beb4aca679187d24aec085343755fcdbdeb32b3679f254c"}, + {file = "scipy-1.12.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c3003652496f6e7c387b1cf63f4bb720951cfa18907e998ea551e6de51a04467"}, + {file = "scipy-1.12.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8b8066bce124ee5531d12a74b617d9ac0ea59245246410e19bca549656d9a40a"}, + {file = "scipy-1.12.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:8bee4993817e204d761dba10dbab0774ba5a8612e57e81319ea04d84945375ba"}, + {file = "scipy-1.12.0-cp311-cp311-win_amd64.whl", hash = "sha256:a24024d45ce9a675c1fb8494e8e5244efea1c7a09c60beb1eeb80373d0fecc70"}, + {file = "scipy-1.12.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:e7e76cc48638228212c747ada851ef355c2bb5e7f939e10952bc504c11f4e372"}, + {file = "scipy-1.12.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:f7ce148dffcd64ade37b2df9315541f9adad6efcaa86866ee7dd5db0c8f041c3"}, + {file = "scipy-1.12.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9c39f92041f490422924dfdb782527a4abddf4707616e07b021de33467f917bc"}, + {file = "scipy-1.12.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a7ebda398f86e56178c2fa94cad15bf457a218a54a35c2a7b4490b9f9cb2676c"}, + {file = "scipy-1.12.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:95e5c750d55cf518c398a8240571b0e0782c2d5a703250872f36eaf737751338"}, + {file = "scipy-1.12.0-cp312-cp312-win_amd64.whl", hash = "sha256:e646d8571804a304e1da01040d21577685ce8e2db08ac58e543eaca063453e1c"}, + {file = "scipy-1.12.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:913d6e7956c3a671de3b05ccb66b11bc293f56bfdef040583a7221d9e22a2e35"}, + {file = "scipy-1.12.0-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:bba1b0c7256ad75401c73e4b3cf09d1f176e9bd4248f0d3112170fb2ec4db067"}, + {file = "scipy-1.12.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:730badef9b827b368f351eacae2e82da414e13cf8bd5051b4bdfd720271a5371"}, + {file = "scipy-1.12.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6546dc2c11a9df6926afcbdd8a3edec28566e4e785b915e849348c6dd9f3f490"}, + {file = "scipy-1.12.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:196ebad3a4882081f62a5bf4aeb7326aa34b110e533aab23e4374fcccb0890dc"}, + {file = "scipy-1.12.0-cp39-cp39-win_amd64.whl", hash = "sha256:b360f1b6b2f742781299514e99ff560d1fe9bd1bff2712894b52abe528d1fd1e"}, + {file = "scipy-1.12.0.tar.gz", hash = "sha256:4bf5abab8a36d20193c698b0f1fc282c1d083c94723902c447e5d2f1780936a3"}, +] + +[package.dependencies] +numpy = ">=1.22.4,<1.29.0" + +[package.extras] +dev = ["click", "cython-lint (>=0.12.2)", "doit (>=0.36.0)", "mypy", "pycodestyle", "pydevtool", "rich-click", "ruff", "types-psutil", "typing_extensions"] +doc = ["jupytext", "matplotlib (>2)", "myst-nb", "numpydoc", "pooch", "pydata-sphinx-theme (==0.9.0)", "sphinx (!=4.1.0)", "sphinx-design (>=0.2.0)"] +test = ["asv", "gmpy2", "hypothesis", "mpmath", "pooch", "pytest", "pytest-cov", "pytest-timeout", "pytest-xdist", "scikit-umfpack", "threadpoolctl"] + [[package]] name = "six" version = "1.16.0" @@ -1488,15 +2461,26 @@ files = [ {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"}, ] +[[package]] +name = "smmap" +version = "5.0.1" +description = "A pure Python implementation of a sliding window memory map manager" +optional = true +python-versions = ">=3.7" +files = [ + {file = "smmap-5.0.1-py3-none-any.whl", hash = "sha256:e6d8668fa5f93e706934a62d7b4db19c8d9eb8cf2adbb75ef1b675aa332b69da"}, + {file = "smmap-5.0.1.tar.gz", hash = "sha256:dceeb6c0028fdb6734471eb07c0cd2aae706ccaecab45965ee83f11c8d3b1f62"}, +] + [[package]] name = "sniffio" -version = "1.3.0" +version = "1.3.1" description = "Sniff out which async library your code is running under" optional = false python-versions = ">=3.7" files = [ - {file = "sniffio-1.3.0-py3-none-any.whl", hash = "sha256:eecefdce1e5bbfb7ad2eeaabf7c1eeb404d7757c379bd1f7e5cce9d8bf425384"}, - {file = "sniffio-1.3.0.tar.gz", hash = "sha256:e60305c5e5d314f5389259b7f22aaa33d8f7dee49763119234af3755c55b9101"}, + {file = "sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2"}, + {file = "sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc"}, ] [[package]] @@ -1512,60 +2496,60 @@ files = [ [[package]] name = "sqlalchemy" -version = "2.0.27" +version = "2.0.29" description = "Database Abstraction Library" optional = false python-versions = ">=3.7" files = [ - {file = "SQLAlchemy-2.0.27-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d04e579e911562f1055d26dab1868d3e0bb905db3bccf664ee8ad109f035618a"}, - {file = "SQLAlchemy-2.0.27-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:fa67d821c1fd268a5a87922ef4940442513b4e6c377553506b9db3b83beebbd8"}, - {file = "SQLAlchemy-2.0.27-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6c7a596d0be71b7baa037f4ac10d5e057d276f65a9a611c46970f012752ebf2d"}, - {file = "SQLAlchemy-2.0.27-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:954d9735ee9c3fa74874c830d089a815b7b48df6f6b6e357a74130e478dbd951"}, - {file = "SQLAlchemy-2.0.27-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:5cd20f58c29bbf2680039ff9f569fa6d21453fbd2fa84dbdb4092f006424c2e6"}, - {file = "SQLAlchemy-2.0.27-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:03f448ffb731b48323bda68bcc93152f751436ad6037f18a42b7e16af9e91c07"}, - {file = "SQLAlchemy-2.0.27-cp310-cp310-win32.whl", hash = "sha256:d997c5938a08b5e172c30583ba6b8aad657ed9901fc24caf3a7152eeccb2f1b4"}, - {file = "SQLAlchemy-2.0.27-cp310-cp310-win_amd64.whl", hash = "sha256:eb15ef40b833f5b2f19eeae65d65e191f039e71790dd565c2af2a3783f72262f"}, - {file = "SQLAlchemy-2.0.27-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6c5bad7c60a392850d2f0fee8f355953abaec878c483dd7c3836e0089f046bf6"}, - {file = "SQLAlchemy-2.0.27-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a3012ab65ea42de1be81fff5fb28d6db893ef978950afc8130ba707179b4284a"}, - {file = "SQLAlchemy-2.0.27-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dbcd77c4d94b23e0753c5ed8deba8c69f331d4fd83f68bfc9db58bc8983f49cd"}, - {file = "SQLAlchemy-2.0.27-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d177b7e82f6dd5e1aebd24d9c3297c70ce09cd1d5d37b43e53f39514379c029c"}, - {file = "SQLAlchemy-2.0.27-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:680b9a36029b30cf063698755d277885d4a0eab70a2c7c6e71aab601323cba45"}, - {file = "SQLAlchemy-2.0.27-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:1306102f6d9e625cebaca3d4c9c8f10588735ef877f0360b5cdb4fdfd3fd7131"}, - {file = "SQLAlchemy-2.0.27-cp311-cp311-win32.whl", hash = "sha256:5b78aa9f4f68212248aaf8943d84c0ff0f74efc65a661c2fc68b82d498311fd5"}, - {file = "SQLAlchemy-2.0.27-cp311-cp311-win_amd64.whl", hash = "sha256:15e19a84b84528f52a68143439d0c7a3a69befcd4f50b8ef9b7b69d2628ae7c4"}, - {file = "SQLAlchemy-2.0.27-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:0de1263aac858f288a80b2071990f02082c51d88335a1db0d589237a3435fe71"}, - {file = "SQLAlchemy-2.0.27-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ce850db091bf7d2a1f2fdb615220b968aeff3849007b1204bf6e3e50a57b3d32"}, - {file = "SQLAlchemy-2.0.27-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8dfc936870507da96aebb43e664ae3a71a7b96278382bcfe84d277b88e379b18"}, - {file = "SQLAlchemy-2.0.27-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c4fbe6a766301f2e8a4519f4500fe74ef0a8509a59e07a4085458f26228cd7cc"}, - {file = "SQLAlchemy-2.0.27-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:4535c49d961fe9a77392e3a630a626af5baa967172d42732b7a43496c8b28876"}, - {file = "SQLAlchemy-2.0.27-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:0fb3bffc0ced37e5aa4ac2416f56d6d858f46d4da70c09bb731a246e70bff4d5"}, - {file = "SQLAlchemy-2.0.27-cp312-cp312-win32.whl", hash = "sha256:7f470327d06400a0aa7926b375b8e8c3c31d335e0884f509fe272b3c700a7254"}, - {file = "SQLAlchemy-2.0.27-cp312-cp312-win_amd64.whl", hash = "sha256:f9374e270e2553653d710ece397df67db9d19c60d2647bcd35bfc616f1622dcd"}, - {file = "SQLAlchemy-2.0.27-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:e97cf143d74a7a5a0f143aa34039b4fecf11343eed66538610debc438685db4a"}, - {file = "SQLAlchemy-2.0.27-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d7b5a3e2120982b8b6bd1d5d99e3025339f7fb8b8267551c679afb39e9c7c7f1"}, - {file = "SQLAlchemy-2.0.27-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e36aa62b765cf9f43a003233a8c2d7ffdeb55bc62eaa0a0380475b228663a38f"}, - {file = "SQLAlchemy-2.0.27-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:5ada0438f5b74c3952d916c199367c29ee4d6858edff18eab783b3978d0db16d"}, - {file = "SQLAlchemy-2.0.27-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:b1d9d1bfd96eef3c3faedb73f486c89e44e64e40e5bfec304ee163de01cf996f"}, - {file = "SQLAlchemy-2.0.27-cp37-cp37m-win32.whl", hash = "sha256:ca891af9f3289d24a490a5fde664ea04fe2f4984cd97e26de7442a4251bd4b7c"}, - {file = "SQLAlchemy-2.0.27-cp37-cp37m-win_amd64.whl", hash = "sha256:fd8aafda7cdff03b905d4426b714601c0978725a19efc39f5f207b86d188ba01"}, - {file = "SQLAlchemy-2.0.27-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:ec1f5a328464daf7a1e4e385e4f5652dd9b1d12405075ccba1df842f7774b4fc"}, - {file = "SQLAlchemy-2.0.27-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:ad862295ad3f644e3c2c0d8b10a988e1600d3123ecb48702d2c0f26771f1c396"}, - {file = "SQLAlchemy-2.0.27-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:48217be1de7d29a5600b5c513f3f7664b21d32e596d69582be0a94e36b8309cb"}, - {file = "SQLAlchemy-2.0.27-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9e56afce6431450442f3ab5973156289bd5ec33dd618941283847c9fd5ff06bf"}, - {file = "SQLAlchemy-2.0.27-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:611068511b5531304137bcd7fe8117c985d1b828eb86043bd944cebb7fae3910"}, - {file = "SQLAlchemy-2.0.27-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:b86abba762ecfeea359112b2bb4490802b340850bbee1948f785141a5e020de8"}, - {file = "SQLAlchemy-2.0.27-cp38-cp38-win32.whl", hash = "sha256:30d81cc1192dc693d49d5671cd40cdec596b885b0ce3b72f323888ab1c3863d5"}, - {file = "SQLAlchemy-2.0.27-cp38-cp38-win_amd64.whl", hash = "sha256:120af1e49d614d2525ac247f6123841589b029c318b9afbfc9e2b70e22e1827d"}, - {file = "SQLAlchemy-2.0.27-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:d07ee7793f2aeb9b80ec8ceb96bc8cc08a2aec8a1b152da1955d64e4825fcbac"}, - {file = "SQLAlchemy-2.0.27-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:cb0845e934647232b6ff5150df37ceffd0b67b754b9fdbb095233deebcddbd4a"}, - {file = "SQLAlchemy-2.0.27-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1fc19ae2e07a067663dd24fca55f8ed06a288384f0e6e3910420bf4b1270cc51"}, - {file = "SQLAlchemy-2.0.27-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b90053be91973a6fb6020a6e44382c97739736a5a9d74e08cc29b196639eb979"}, - {file = "SQLAlchemy-2.0.27-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:2f5c9dfb0b9ab5e3a8a00249534bdd838d943ec4cfb9abe176a6c33408430230"}, - {file = "SQLAlchemy-2.0.27-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:33e8bde8fff203de50399b9039c4e14e42d4d227759155c21f8da4a47fc8053c"}, - {file = "SQLAlchemy-2.0.27-cp39-cp39-win32.whl", hash = "sha256:d873c21b356bfaf1589b89090a4011e6532582b3a8ea568a00e0c3aab09399dd"}, - {file = "SQLAlchemy-2.0.27-cp39-cp39-win_amd64.whl", hash = "sha256:ff2f1b7c963961d41403b650842dc2039175b906ab2093635d8319bef0b7d620"}, - {file = "SQLAlchemy-2.0.27-py3-none-any.whl", hash = "sha256:1ab4e0448018d01b142c916cc7119ca573803a4745cfe341b8f95657812700ac"}, - {file = "SQLAlchemy-2.0.27.tar.gz", hash = "sha256:86a6ed69a71fe6b88bf9331594fa390a2adda4a49b5c06f98e47bf0d392534f8"}, + {file = "SQLAlchemy-2.0.29-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:4c142852ae192e9fe5aad5c350ea6befe9db14370b34047e1f0f7cf99e63c63b"}, + {file = "SQLAlchemy-2.0.29-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:99a1e69d4e26f71e750e9ad6fdc8614fbddb67cfe2173a3628a2566034e223c7"}, + {file = "SQLAlchemy-2.0.29-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5ef3fbccb4058355053c51b82fd3501a6e13dd808c8d8cd2561e610c5456013c"}, + {file = "SQLAlchemy-2.0.29-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9d6753305936eddc8ed190e006b7bb33a8f50b9854823485eed3a886857ab8d1"}, + {file = "SQLAlchemy-2.0.29-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:0f3ca96af060a5250a8ad5a63699180bc780c2edf8abf96c58af175921df847a"}, + {file = "SQLAlchemy-2.0.29-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:c4520047006b1d3f0d89e0532978c0688219857eb2fee7c48052560ae76aca1e"}, + {file = "SQLAlchemy-2.0.29-cp310-cp310-win32.whl", hash = "sha256:b2a0e3cf0caac2085ff172c3faacd1e00c376e6884b5bc4dd5b6b84623e29e4f"}, + {file = "SQLAlchemy-2.0.29-cp310-cp310-win_amd64.whl", hash = "sha256:01d10638a37460616708062a40c7b55f73e4d35eaa146781c683e0fa7f6c43fb"}, + {file = "SQLAlchemy-2.0.29-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:308ef9cb41d099099fffc9d35781638986870b29f744382904bf9c7dadd08513"}, + {file = "SQLAlchemy-2.0.29-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:296195df68326a48385e7a96e877bc19aa210e485fa381c5246bc0234c36c78e"}, + {file = "SQLAlchemy-2.0.29-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a13b917b4ffe5a0a31b83d051d60477819ddf18276852ea68037a144a506efb9"}, + {file = "SQLAlchemy-2.0.29-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4f6d971255d9ddbd3189e2e79d743ff4845c07f0633adfd1de3f63d930dbe673"}, + {file = "SQLAlchemy-2.0.29-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:61405ea2d563407d316c63a7b5271ae5d274a2a9fbcd01b0aa5503635699fa1e"}, + {file = "SQLAlchemy-2.0.29-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:de7202ffe4d4a8c1e3cde1c03e01c1a3772c92858837e8f3879b497158e4cb44"}, + {file = "SQLAlchemy-2.0.29-cp311-cp311-win32.whl", hash = "sha256:b5d7ed79df55a731749ce65ec20d666d82b185fa4898430b17cb90c892741520"}, + {file = "SQLAlchemy-2.0.29-cp311-cp311-win_amd64.whl", hash = "sha256:205f5a2b39d7c380cbc3b5dcc8f2762fb5bcb716838e2d26ccbc54330775b003"}, + {file = "SQLAlchemy-2.0.29-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:d96710d834a6fb31e21381c6d7b76ec729bd08c75a25a5184b1089141356171f"}, + {file = "SQLAlchemy-2.0.29-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:52de4736404e53c5c6a91ef2698c01e52333988ebdc218f14c833237a0804f1b"}, + {file = "SQLAlchemy-2.0.29-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5c7b02525ede2a164c5fa5014915ba3591730f2cc831f5be9ff3b7fd3e30958e"}, + {file = "SQLAlchemy-2.0.29-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0dfefdb3e54cd15f5d56fd5ae32f1da2d95d78319c1f6dfb9bcd0eb15d603d5d"}, + {file = "SQLAlchemy-2.0.29-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:a88913000da9205b13f6f195f0813b6ffd8a0c0c2bd58d499e00a30eb508870c"}, + {file = "SQLAlchemy-2.0.29-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:fecd5089c4be1bcc37c35e9aa678938d2888845a134dd016de457b942cf5a758"}, + {file = "SQLAlchemy-2.0.29-cp312-cp312-win32.whl", hash = "sha256:8197d6f7a3d2b468861ebb4c9f998b9df9e358d6e1cf9c2a01061cb9b6cf4e41"}, + {file = "SQLAlchemy-2.0.29-cp312-cp312-win_amd64.whl", hash = "sha256:9b19836ccca0d321e237560e475fd99c3d8655d03da80c845c4da20dda31b6e1"}, + {file = "SQLAlchemy-2.0.29-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:87a1d53a5382cdbbf4b7619f107cc862c1b0a4feb29000922db72e5a66a5ffc0"}, + {file = "SQLAlchemy-2.0.29-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2a0732dffe32333211801b28339d2a0babc1971bc90a983e3035e7b0d6f06b93"}, + {file = "SQLAlchemy-2.0.29-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:90453597a753322d6aa770c5935887ab1fc49cc4c4fdd436901308383d698b4b"}, + {file = "SQLAlchemy-2.0.29-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:ea311d4ee9a8fa67f139c088ae9f905fcf0277d6cd75c310a21a88bf85e130f5"}, + {file = "SQLAlchemy-2.0.29-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:5f20cb0a63a3e0ec4e169aa8890e32b949c8145983afa13a708bc4b0a1f30e03"}, + {file = "SQLAlchemy-2.0.29-cp37-cp37m-win32.whl", hash = "sha256:e5bbe55e8552019c6463709b39634a5fc55e080d0827e2a3a11e18eb73f5cdbd"}, + {file = "SQLAlchemy-2.0.29-cp37-cp37m-win_amd64.whl", hash = "sha256:c2f9c762a2735600654c654bf48dad388b888f8ce387b095806480e6e4ff6907"}, + {file = "SQLAlchemy-2.0.29-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:7e614d7a25a43a9f54fcce4675c12761b248547f3d41b195e8010ca7297c369c"}, + {file = "SQLAlchemy-2.0.29-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:471fcb39c6adf37f820350c28aac4a7df9d3940c6548b624a642852e727ea586"}, + {file = "SQLAlchemy-2.0.29-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:988569c8732f54ad3234cf9c561364221a9e943b78dc7a4aaf35ccc2265f1930"}, + {file = "SQLAlchemy-2.0.29-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dddaae9b81c88083e6437de95c41e86823d150f4ee94bf24e158a4526cbead01"}, + {file = "SQLAlchemy-2.0.29-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:334184d1ab8f4c87f9652b048af3f7abea1c809dfe526fb0435348a6fef3d380"}, + {file = "SQLAlchemy-2.0.29-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:38b624e5cf02a69b113c8047cf7f66b5dfe4a2ca07ff8b8716da4f1b3ae81567"}, + {file = "SQLAlchemy-2.0.29-cp38-cp38-win32.whl", hash = "sha256:bab41acf151cd68bc2b466deae5deeb9e8ae9c50ad113444151ad965d5bf685b"}, + {file = "SQLAlchemy-2.0.29-cp38-cp38-win_amd64.whl", hash = "sha256:52c8011088305476691b8750c60e03b87910a123cfd9ad48576d6414b6ec2a1d"}, + {file = "SQLAlchemy-2.0.29-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:3071ad498896907a5ef756206b9dc750f8e57352113c19272bdfdc429c7bd7de"}, + {file = "SQLAlchemy-2.0.29-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:dba622396a3170974f81bad49aacebd243455ec3cc70615aeaef9e9613b5bca5"}, + {file = "SQLAlchemy-2.0.29-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7b184e3de58009cc0bf32e20f137f1ec75a32470f5fede06c58f6c355ed42a72"}, + {file = "SQLAlchemy-2.0.29-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8c37f1050feb91f3d6c32f864d8e114ff5545a4a7afe56778d76a9aec62638ba"}, + {file = "SQLAlchemy-2.0.29-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:bda7ce59b06d0f09afe22c56714c65c957b1068dee3d5e74d743edec7daba552"}, + {file = "SQLAlchemy-2.0.29-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:25664e18bef6dc45015b08f99c63952a53a0a61f61f2e48a9e70cec27e55f699"}, + {file = "SQLAlchemy-2.0.29-cp39-cp39-win32.whl", hash = "sha256:77d29cb6c34b14af8a484e831ab530c0f7188f8efed1c6a833a2c674bf3c26ec"}, + {file = "SQLAlchemy-2.0.29-cp39-cp39-win_amd64.whl", hash = "sha256:04c487305ab035a9548f573763915189fc0fe0824d9ba28433196f8436f1449c"}, + {file = "SQLAlchemy-2.0.29-py3-none-any.whl", hash = "sha256:dc4ee2d4ee43251905f88637d5281a8d52e916a021384ec10758826f5cbae305"}, + {file = "SQLAlchemy-2.0.29.tar.gz", hash = "sha256:bd9566b8e58cabd700bc367b60e90d9349cd16f0984973f98a9a09f9c64e86f0"}, ] [package.dependencies] @@ -1601,7 +2585,7 @@ sqlcipher = ["sqlcipher3_binary"] name = "sqlglot" version = "22.5.0" description = "An easily customizable SQL parser and transpiler" -optional = false +optional = true python-versions = ">=3.7" files = [ {file = "sqlglot-22.5.0-py3-none-any.whl", hash = "sha256:ef11f7e56e93732aca3caab3c74a6c11489e383a43c2ac5b5f86cc85517ef9f3"}, @@ -1612,6 +2596,22 @@ files = [ dev = ["duckdb (>=0.6)", "maturin (>=1.4,<2.0)", "mypy", "pandas", "pandas-stubs", "pdoc", "pre-commit", "pyspark", "python-dateutil", "ruff", "types-python-dateutil", "typing-extensions"] rs = ["sqlglotrs (==0.1.2)"] +[[package]] +name = "sqlparse" +version = "0.4.4" +description = "A non-validating SQL parser." +optional = true +python-versions = ">=3.5" +files = [ + {file = "sqlparse-0.4.4-py3-none-any.whl", hash = "sha256:5430a4fe2ac7d0f93e66f1efc6e1338a41884b7ddf2a350cedd20ccc4d9d28f3"}, + {file = "sqlparse-0.4.4.tar.gz", hash = "sha256:d446183e84b8349fa3061f0fe7f06ca94ba65b426946ffebe6e3e8295332420c"}, +] + +[package.extras] +dev = ["build", "flake8"] +doc = ["sphinx"] +test = ["pytest", "pytest-cov"] + [[package]] name = "starlette" version = "0.27.0" @@ -1629,6 +2629,17 @@ anyio = ">=3.4.0,<5" [package.extras] full = ["httpx (>=0.22.0)", "itsdangerous", "jinja2", "python-multipart", "pyyaml"] +[[package]] +name = "threadpoolctl" +version = "3.4.0" +description = "threadpoolctl" +optional = true +python-versions = ">=3.8" +files = [ + {file = "threadpoolctl-3.4.0-py3-none-any.whl", hash = "sha256:8f4c689a65b23e5ed825c8436a92b818aac005e0f3715f6a1664d7c7ee29d262"}, + {file = "threadpoolctl-3.4.0.tar.gz", hash = "sha256:f11b491a03661d6dd7ef692dd422ab34185d982466c49c8f98c8f716b5c93196"}, +] + [[package]] name = "tomli" version = "2.0.1" @@ -1642,17 +2653,17 @@ files = [ [[package]] name = "typeguard" -version = "4.1.5" +version = "4.2.1" description = "Run-time type checker for Python" optional = true python-versions = ">=3.8" files = [ - {file = "typeguard-4.1.5-py3-none-any.whl", hash = "sha256:8923e55f8873caec136c892c3bed1f676eae7be57cdb94819281b3d3bc9c0953"}, - {file = "typeguard-4.1.5.tar.gz", hash = "sha256:ea0a113bbc111bcffc90789ebb215625c963411f7096a7e9062d4e4630c155fd"}, + {file = "typeguard-4.2.1-py3-none-any.whl", hash = "sha256:7da3bd46e61f03e0852f8d251dcbdc2a336aa495d7daff01e092b55327796eb8"}, + {file = "typeguard-4.2.1.tar.gz", hash = "sha256:c556a1b95948230510070ca53fa0341fb0964611bd05d598d87fb52115d65fee"}, ] [package.dependencies] -typing-extensions = {version = ">=4.7.0", markers = "python_version < \"3.12\""} +typing-extensions = {version = ">=4.10.0", markers = "python_version < \"3.13\""} [package.extras] doc = ["Sphinx (>=7)", "packaging", "sphinx-autodoc-typehints (>=1.2.0)"] @@ -1660,13 +2671,13 @@ test = ["coverage[toml] (>=7)", "mypy (>=1.2.0)", "pytest (>=7)"] [[package]] name = "types-pyopenssl" -version = "24.0.0.20240130" +version = "24.0.0.20240311" description = "Typing stubs for pyOpenSSL" optional = false python-versions = ">=3.8" files = [ - {file = "types-pyOpenSSL-24.0.0.20240130.tar.gz", hash = "sha256:c812e5c1c35249f75ef5935708b2a997d62abf9745be222e5f94b9595472ab25"}, - {file = "types_pyOpenSSL-24.0.0.20240130-py3-none-any.whl", hash = "sha256:24a255458b5b8a7fca8139cf56f2a8ad5a4f1a5f711b73a5bb9cb50dc688fab5"}, + {file = "types-pyOpenSSL-24.0.0.20240311.tar.gz", hash = "sha256:7bca00cfc4e7ef9c5d2663c6a1c068c35798e59670595439f6296e7ba3d58083"}, + {file = "types_pyOpenSSL-24.0.0.20240311-py3-none-any.whl", hash = "sha256:6e8e8bfad34924067333232c93f7fc4b369856d8bea0d5c9d1808cb290ab1972"}, ] [package.dependencies] @@ -1674,13 +2685,13 @@ cryptography = ">=35.0.0" [[package]] name = "types-redis" -version = "4.6.0.20240218" +version = "4.6.0.20240311" description = "Typing stubs for redis" optional = false python-versions = ">=3.8" files = [ - {file = "types-redis-4.6.0.20240218.tar.gz", hash = "sha256:5103d7e690e5c74c974a161317b2d59ac2303cf8bef24175b04c2a4c3486cb39"}, - {file = "types_redis-4.6.0.20240218-py3-none-any.whl", hash = "sha256:dc9c45a068240e33a04302aec5655cf41e80f91eecffccbb2df215b2f6fc375d"}, + {file = "types-redis-4.6.0.20240311.tar.gz", hash = "sha256:e049bbdff0e0a1f8e701b64636811291d21bff79bf1e7850850a44055224a85f"}, + {file = "types_redis-4.6.0.20240311-py3-none-any.whl", hash = "sha256:6b9d68a29aba1ee400c823d8e5fe88675282eb69d7211e72fe65dbe54b33daca"}, ] [package.dependencies] @@ -1689,13 +2700,13 @@ types-pyOpenSSL = "*" [[package]] name = "typing-extensions" -version = "4.9.0" +version = "4.10.0" description = "Backported and Experimental Type Hints for Python 3.8+" optional = false python-versions = ">=3.8" files = [ - {file = "typing_extensions-4.9.0-py3-none-any.whl", hash = "sha256:af72aea155e91adfc61c3ae9e0e342dbc0cba726d6cba4b6c72c1f34e47291cd"}, - {file = "typing_extensions-4.9.0.tar.gz", hash = "sha256:23478f88c37f27d76ac8aee6c905017a143b0b1b886c3c9f66bc2fd94f9f5783"}, + {file = "typing_extensions-4.10.0-py3-none-any.whl", hash = "sha256:69b1a937c3a517342112fb4c6df7e72fc39a38e7891a5730ed4985b5214b5475"}, + {file = "typing_extensions-4.10.0.tar.gz", hash = "sha256:b0abd7c89e8fb96f98db18d86106ff1d90ab692004eb746cf6eda2682f91b3cb"}, ] [[package]] @@ -1724,6 +2735,23 @@ files = [ {file = "tzdata-2024.1.tar.gz", hash = "sha256:2674120f8d891909751c38abcdfd386ac0a5a1127954fbc332af6b5ceae07efd"}, ] +[[package]] +name = "urllib3" +version = "2.2.1" +description = "HTTP library with thread-safe connection pooling, file post, and more." +optional = true +python-versions = ">=3.8" +files = [ + {file = "urllib3-2.2.1-py3-none-any.whl", hash = "sha256:450b20ec296a467077128bff42b73080516e71b56ff59a60a02bef2232c4fa9d"}, + {file = "urllib3-2.2.1.tar.gz", hash = "sha256:d0570876c61ab9e520d776c38acbbb5b05a776d3f9ff98a5c8fd5162a444cf19"}, +] + +[package.extras] +brotli = ["brotli (>=1.0.9)", "brotlicffi (>=0.8.0)"] +h2 = ["h2 (>=4,<5)"] +socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"] +zstd = ["zstandard (>=0.18.0)"] + [[package]] name = "uvicorn" version = "0.17.6" @@ -1743,6 +2771,38 @@ h11 = ">=0.8" [package.extras] standard = ["PyYAML (>=5.1)", "colorama (>=0.4)", "httptools (>=0.4.0)", "python-dotenv (>=0.13)", "uvloop (>=0.14.0,!=0.15.0,!=0.15.1)", "watchgod (>=0.6)", "websockets (>=10.0)"] +[[package]] +name = "waitress" +version = "3.0.0" +description = "Waitress WSGI server" +optional = true +python-versions = ">=3.8.0" +files = [ + {file = "waitress-3.0.0-py3-none-any.whl", hash = "sha256:2a06f242f4ba0cc563444ca3d1998959447477363a2d7e9b8b4d75d35cfd1669"}, + {file = "waitress-3.0.0.tar.gz", hash = "sha256:005da479b04134cdd9dd602d1ee7c49d79de0537610d653674cc6cbde222b8a1"}, +] + +[package.extras] +docs = ["Sphinx (>=1.8.1)", "docutils", "pylons-sphinx-themes (>=1.0.9)"] +testing = ["coverage (>=5.0)", "pytest", "pytest-cov"] + +[[package]] +name = "werkzeug" +version = "3.0.2" +description = "The comprehensive WSGI web application library." +optional = true +python-versions = ">=3.8" +files = [ + {file = "werkzeug-3.0.2-py3-none-any.whl", hash = "sha256:3aac3f5da756f93030740bc235d3e09449efcf65f2f55e3602e1d851b8f48795"}, + {file = "werkzeug-3.0.2.tar.gz", hash = "sha256:e39b645a6ac92822588e7b39a692e7828724ceae0b0d702ef96701f90e70128d"}, +] + +[package.dependencies] +MarkupSafe = ">=2.1.1" + +[package.extras] +watchdog = ["watchdog (>=2.3)"] + [[package]] name = "wrapt" version = "1.16.0" @@ -1822,16 +2882,34 @@ files = [ {file = "wrapt-1.16.0.tar.gz", hash = "sha256:5f370f952971e7d17c7d1ead40e49f32345a7f7a5373571ef44d800d06b1899d"}, ] +[[package]] +name = "zipp" +version = "3.18.1" +description = "Backport of pathlib-compatible object wrapper for zip files" +optional = true +python-versions = ">=3.8" +files = [ + {file = "zipp-3.18.1-py3-none-any.whl", hash = "sha256:206f5a15f2af3dbaee80769fb7dc6f249695e940acca08dfb2a4769fe61e538b"}, + {file = "zipp-3.18.1.tar.gz", hash = "sha256:2884ed22e7d8961de1c9a05142eb69a247f120291bc0206a00a7642f09b5b715"}, +] + +[package.extras] +docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] +testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-ignore-flaky", "pytest-mypy", "pytest-ruff (>=0.2.1)"] + [extras] aws = ["aioaws", "connectorx"] image = ["pillow"] kafka = ["kafka-python"] +mlflow = ["mlflow"] +ollama = ["ollama"] pandera = ["pandera"] psql = ["asyncpg", "connectorx"] redis = ["redis"] server = ["asgi-correlation-id", "fastapi", "prometheus-fastapi-instrumentator", "uvicorn"] +sql = ["sqlglot"] [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "bebec91e8ad79dfbc1cb1a208404260607ebf7752cc4ba6afbe12fbb788449e7" +content-hash = "c6ee03e810fbdaa78ecf10add3178b72b3f8c6a78e0d584de3171b548ea7f73f" diff --git a/pyproject.toml b/pyproject.toml index a3950a1..afc0647 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -70,6 +70,7 @@ asyncpg = { version = "^0.29.0", optional = true } sqlglot = { version = "^22.5.0", optional = true } ollama = { version = "^0.1.8", optional = true } httpx = "^0.27.0" +mlflow = { version = "^2.11.3", optional = true } [tool.poetry.extras] aws = ["aioaws", "connectorx"] @@ -81,6 +82,7 @@ image = ["pillow"] kafka = ["kafka-python"] ollama = ["ollama"] sql = ["sqlglot"] +mlflow = ["mlflow"] # text = ["gensim", "openai", "sentence-transformers"] [tool.poetry.group.dev.dependencies] diff --git a/test_data/credit_history.csv b/test_data/credit_history.csv index a45c2b9..6a4b502 100644 --- a/test_data/credit_history.csv +++ b/test_data/credit_history.csv @@ -1,7 +1,7 @@ -due_sum,event_timestamp,student_loan_due,dob_ssn,credit_card_due,bankruptcies -30747,2020-04-26 18:01:04.746575+00:00,22328,19530219_5179,8419,0 -5459,2020-04-26 18:01:04.746575+00:00,2515,19520816_8737,2944,0 -33833,2020-04-26 18:01:04.746575+00:00,33000,19860413_2537,833,0 -54891,2020-04-27 18:01:04.746575+00:00,48955,19530219_5179,5936,0 -11076,2020-04-27 18:01:04.746575+00:00,9501,19520816_8737,1575,0 -41773,2020-04-27 18:01:04.746575+00:00,35510,19860413_2537,6263,0 +event_timestamp,credit_card_due,student_loan_due,bankruptcies,due_sum,dob_ssn +2020-04-26 18:01:04.746575+00:00,8419,22328,0,30747,19530219_5179 +2020-04-26 18:01:04.746575+00:00,2944,2515,0,5459,19520816_8737 +2020-04-26 18:01:04.746575+00:00,833,33000,0,33833,19860413_2537 +2020-04-27 18:01:04.746575+00:00,5936,48955,0,54891,19530219_5179 +2020-04-27 18:01:04.746575+00:00,1575,9501,0,11076,19520816_8737 +2020-04-27 18:01:04.746575+00:00,6263,35510,0,41773,19860413_2537 diff --git a/test_data/credit_history.parquet b/test_data/credit_history.parquet index 505bcd357d054008feaf95ada6476c79012d462c..08fd32bbecdce15c481f5baad480d3028562a4c0 100644 GIT binary patch delta 312 zcmX@ibD2jdz%j^Bltna6G{#4iO_YIw!FZyaqKk%wnSqI+alDbKu{ltfM^qpNNHBmH zK*bD#3=GWnEIdriDx9J`lBNu70+JH!Dfvn9#l?A|OkzA@I~c__O-xT>`o=ifkWqT# zGf&1HlMNWtm|9pT&tMETy~M^KDI-=VY9j`;5XjoXBx(g^?qC*eV1%+;SP|?7_Q{G& zy_&liWpLZbsK&t{_J>_mghB1y3!>G)&gPB2W8HcC@ zgIWvQWHV+}-WFB{v3+2H9qclb&oIgh105%J4WvnI8LJqN8i&{f*3Gk+7cy~(vM?}! Gf(rnW+dcCD delta 349 zcmcc2bC{<-z%j^Bluh&kqi9-;D32%u7_*2n2r@7*+ow73GB7Y0TACUg7#Ui|n;M#1 z@(QXL8CV#a#aoygn{x^(SeO}@7#hbLnFh6^I`o=ifkWqT# zGf&1HlMNWtm|9pT&tMETy~M^KDI-=VY9j`;5XjoXBx(g^?qC*eV1%+;SP|?7_Q{G& zy_&liWpLZbsK&t{_J>_mghB1y3!>G)&gPB2W8HcC@ zgIWvQWHV+}-WFB{v3+2H9qclb&oIgh105%J4WvnI8LJqN8i&{f*3Gk+7cy~(vM?}! Gf(rnW+dcCD delta 349 zcmcc2bC{<-z%j^Bluh&kqi9-;D32%u7_*2n2r@7*+ow73GB7Y0TACUg7#Ui|n;M#1 z@(QXL8CV#a#aoygn{x^(SeO}@7#hbLnFh6^IZ6KgNe<0+zD#i7#T#_ zM6*QWChn1vHZ?M^Ff@y|FgG^m6jZP8(r r7y|={wT%(Ze8nW%HQ9u5GV?ShnTb8Bj0uxJFsiYD^lX-5I>-nB!`&u~ delta 150 zcmcc3ew%%Q66cQH4vg$N49ho8)HY=_o0urfFT(%?4|YCqVC2wYsMtQSNsl{8Z5tzl zD4S@OXw1ajQc@OX1}28a@kXY`=A42GrbY%9hGy{==Awcs6F(>^bug;EVPcS!5#ta| u5Ca+oWNl*_;6LJTdfj7+SI4D?Jb3{8zJwL?Rk<@A&o7z~Y+7&y#K z4b6a*v97V55`%y-NK%wXQk8*CQi3V9M3hO4O{{@QY#XE65+(*wHqi@=q9s6sbubJz zGc|!5Y}^Di-NXuLo+(Idav-A=%OXa#p2_))vb@_E#TXbEBxS^USVR|1p2=9m_---} zld?_|o2VFr+AU@ooYsrYU=x*KP+P|;12Rz*WFiZL*ancG*bY`PHZ=~hDVw{P9y0;} D>ohhs delta 277 zcmey&@s?x4492?2yo_>z#=1skA%+%KCgxTqMtTMYhNdP4+MyxNa(YS(45n59!XUOHc1Jl)Dlr9F*dP(jACmT)s`?Zh_Z=ZU=%F^nW~M&RIp~SspcS4 zO+ltw_$e_=PGpkOTfnH+#KIscBgP>bC&s`4VohR%GnX)lu3>}6 zAtq(5Z>*wX3~CpcWpFx6tcOihfFiG1CC?W1><>@85 JnVV6I830HT5jX$< delta 59 zcmdnWv6W-O8qpThIXkC_urah2)CnEz3C%Xd!7Xt){Ostlg=p!S{1eali$}mqfl&+t}$RH^r#vz&^ z1{4LdW-&sTqAZd&9xO%q`6Z$(45BQmOcIQV5Sl47g+Yu%Qbtlotw08-OqGF65-2Ik zB*r0DAeN!V!63GQNmPVEt%(UP4b%gd2ihohgh^D4LF@se*kln#Db5ZieGo7=nQYED wpWV~Z(b1_OYVuvi5XC?s+uhZXL4qwHKPR!MSW-uZfdPmZ<^nwt;22~G0OQ~%+yDRo delta 223 zcmcb_a*btzoG>#3CnEz3C%Xd!BLf65O{|ih=q)2G0+-=}%J5Azl(L`0$RH^r#vz&^ z1{4Ld<}gB-qAZd&9xO%q`6Z$(45BQmOcG3)DG&x@B7+!*q>QAFT7k^OMiB)L2C)rH zq9P1xEle^%ZK@1xk`j!GqD*2OVg+IuYI7!YFsgC(FzJJUxy592#`)~7j*gB_8IF_h pGKQ$81KI95*^Z93whR(%0r@$JMa7ajG7Jnr#IO+PjR40WLjWk$C1n5r diff --git a/test_data/titanic-sets.json b/test_data/titanic-sets.json index b746620..b3d6a3a 100644 --- a/test_data/titanic-sets.json +++ b/test_data/titanic-sets.json @@ -1 +1 @@ -{"raw_data": [], "train_test": [], "train_test_validation": [{"id": "titanic_test", "name": null, "request_result": {"entities": [{"name": "passenger_id", "dtype": {"name": "int32"}, "description": null, "tags": null, "constraints": null}], "features": [{"name": "is_female", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sex", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "equals", "dtype": {"name": "bool"}, "key": "sex", "value": {"name": "string", "value": "female"}}, "depth": 1}, {"name": "name", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": [{"name": "optional"}]}, {"name": "survived", "dtype": {"name": "bool"}, "description": "If the passenger survived", "tags": null, "constraints": null}, {"name": "cabin", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": [{"name": "optional"}]}, {"name": "is_mr", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "name", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "contains", "dtype": {"name": "bool"}, "key": "name", "value": "Mr."}, "depth": 1}, {"name": "is_male", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sex", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "equals", "dtype": {"name": "bool"}, "key": "sex", "value": {"name": "string", "value": "male"}}, "depth": 1}, {"name": "has_siblings", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sibsp", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "int32"}}], "transformation": {"name": "not-equals", "dtype": {"name": "bool"}, "key": "sibsp", "value": {"name": "int", "value": 0}}, "depth": 1}, {"name": "sibsp", "dtype": {"name": "int32"}, "description": "Number of siblings on titanic", "tags": null, "constraints": [{"name": "lower_bound_inc", "value": 0.0}, {"name": "upper_bound_inc", "value": 20.0}, {"name": "optional"}]}, {"name": "age", "dtype": {"name": "float"}, "description": "A float as some have decimals", "tags": null, "constraints": [{"name": "lower_bound_inc", "value": 0.0}, {"name": "upper_bound_inc", "value": 100.0}]}, {"name": "sex", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": [{"name": "in_domain", "values": ["male", "female"]}, {"name": "optional"}]}], "event_timestamp": null}, "train_dataset": {"mapping_keys": {}, "type_name": "csv", "path": "test_data/titanic-train.csv", "csv_config": {"seperator": ",", "compression": "infer", "should_write_index": false}, "formatter": {"date_format": "%Y-%m-%dT%H:%M:%S%.f+%Z", "time_unit": null, "time_zone": null, "name": "string_form"}}, "test_dataset": {"mapping_keys": {}, "type_name": "csv", "path": "test_data/titanic-test.csv", "csv_config": {"seperator": ",", "compression": "infer", "should_write_index": false}, "formatter": {"date_format": "%Y-%m-%dT%H:%M:%S%.f+%Z", "time_unit": null, "time_zone": null, "name": "string_form"}}, "validation_dataset": {"mapping_keys": {}, "type_name": "csv", "path": "test_data/titanic-validate.csv", "csv_config": {"seperator": ",", "compression": "infer", "should_write_index": false}, "formatter": {"date_format": "%Y-%m-%dT%H:%M:%S%.f+%Z", "time_unit": null, "time_zone": null, "name": "string_form"}}, "train_size_fraction": 0.6, "test_size_fraction": 0.20000000000000007, "validate_size_fraction": 0.19999999999999996, "target": ["survived"], "description": null, "tags": null}], "active_learning": []} +{"raw_data": [], "train_test": [], "train_test_validation": [{"id": "titanic_test", "name": null, "request_result": {"entities": [{"name": "passenger_id", "dtype": {"name": "int32"}, "description": null, "tags": null, "constraints": null}], "features": [{"name": "age", "dtype": {"name": "float"}, "description": "A float as some have decimals", "tags": null, "constraints": [{"name": "upper_bound_inc", "value": 100.0}, {"name": "lower_bound_inc", "value": 0.0}]}, {"name": "sex", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": [{"name": "optional"}, {"name": "in_domain", "values": ["male", "female"]}]}, {"name": "is_mr", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "name", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "contains", "dtype": {"name": "bool"}, "key": "name", "value": "Mr."}, "depth": 1}, {"name": "sibsp", "dtype": {"name": "int32"}, "description": "Number of siblings on titanic", "tags": null, "constraints": [{"name": "optional"}, {"name": "upper_bound_inc", "value": 20.0}, {"name": "lower_bound_inc", "value": 0.0}]}, {"name": "cabin", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": [{"name": "optional"}]}, {"name": "name", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": [{"name": "optional"}]}, {"name": "survived", "dtype": {"name": "bool"}, "description": "If the passenger survived", "tags": null, "constraints": null}, {"name": "is_male", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sex", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "equals", "dtype": {"name": "bool"}, "key": "sex", "value": {"name": "string", "value": "male"}}, "depth": 1}, {"name": "is_female", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sex", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "equals", "dtype": {"name": "bool"}, "key": "sex", "value": {"name": "string", "value": "female"}}, "depth": 1}, {"name": "has_siblings", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sibsp", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "int32"}}], "transformation": {"name": "not-equals", "dtype": {"name": "bool"}, "key": "sibsp", "value": {"name": "int", "value": 0}}, "depth": 1}], "event_timestamp": null}, "train_dataset": {"mapping_keys": {}, "type_name": "csv", "path": "test_data/titanic-train.csv", "csv_config": {"seperator": ",", "compression": "infer", "should_write_index": false}, "formatter": {"date_format": "%Y-%m-%dT%H:%M:%S%.f+%Z", "time_unit": null, "time_zone": null, "name": "string_form"}, "expected_schema": null}, "test_dataset": {"mapping_keys": {}, "type_name": "csv", "path": "test_data/titanic-test.csv", "csv_config": {"seperator": ",", "compression": "infer", "should_write_index": false}, "formatter": {"date_format": "%Y-%m-%dT%H:%M:%S%.f+%Z", "time_unit": null, "time_zone": null, "name": "string_form"}, "expected_schema": null}, "validation_dataset": {"mapping_keys": {}, "type_name": "csv", "path": "test_data/titanic-validate.csv", "csv_config": {"seperator": ",", "compression": "infer", "should_write_index": false}, "formatter": {"date_format": "%Y-%m-%dT%H:%M:%S%.f+%Z", "time_unit": null, "time_zone": null, "name": "string_form"}, "expected_schema": null}, "train_size_fraction": 0.6, "test_size_fraction": 0.20000000000000007, "validate_size_fraction": 0.19999999999999996, "target": ["survived"], "description": null, "tags": null}], "active_learning": []} diff --git a/test_data/titanic-test.csv b/test_data/titanic-test.csv index f06ce07..8c048ed 100644 --- a/test_data/titanic-test.csv +++ b/test_data/titanic-test.csv @@ -1,21 +1,21 @@ -is_female,name,survived,cabin,is_mr,is_male,has_siblings,sibsp,passenger_id,age,sex -False,"Sirayanian, Mr. Orsen",False,,True,True,False,0,61,22.0,male -True,"Icard, Miss. Amelie",True,B28,False,False,False,0,62,38.0,female -False,"Harris, Mr. Henry Birkhardt",False,C83,True,True,True,1,63,45.0,male -False,"Skoog, Master. Harald",False,,False,True,True,3,64,4.0,male -False,"Stewart, Mr. Albert A",False,,True,True,False,0,65,,male -False,"Moubarek, Master. Gerios",True,,False,True,True,1,66,,male -True,"Nye, Mrs. (Elizabeth Ramell)",True,F33,True,False,False,0,67,29.0,female -False,"Crease, Mr. Ernest James",False,,True,True,False,0,68,19.0,male -True,"Andersson, Miss. Erna Alexandra",True,,False,False,True,4,69,17.0,female -False,"Kink, Mr. Vincenz",False,,True,True,True,2,70,26.0,male -False,"Jenkin, Mr. Stephen Curnow",False,,True,True,False,0,71,32.0,male -True,"Goodwin, Miss. Lillian Amy",False,,False,False,True,5,72,16.0,female -False,"Hood, Mr. Ambrose Jr",False,,True,True,False,0,73,21.0,male -False,"Chronopoulos, Mr. Apostolos",False,,True,True,True,1,74,26.0,male -False,"Bing, Mr. Lee",True,,True,True,False,0,75,32.0,male -False,"Moen, Mr. Sigurd Hansen",False,F G73,True,True,False,0,76,25.0,male -False,"Staneff, Mr. Ivan",False,,True,True,False,0,77,,male -False,"Moutal, Mr. Rahamin Haim",False,,True,True,False,0,78,,male -False,"Caldwell, Master. Alden Gates",True,,False,True,False,0,79,0.83,male -True,"Dowdell, Miss. Elizabeth",True,,False,False,False,0,80,30.0,female +age,sex,sibsp,is_mr,name,has_siblings,passenger_id,survived,is_male,is_female,cabin +22.0,male,0,True,"Sirayanian, Mr. Orsen",False,61,False,True,False, +38.0,female,0,False,"Icard, Miss. Amelie",False,62,True,False,True,B28 +45.0,male,1,True,"Harris, Mr. Henry Birkhardt",True,63,False,True,False,C83 +4.0,male,3,False,"Skoog, Master. Harald",True,64,False,True,False, +,male,0,True,"Stewart, Mr. Albert A",False,65,False,True,False, +,male,1,False,"Moubarek, Master. Gerios",True,66,True,True,False, +29.0,female,0,True,"Nye, Mrs. (Elizabeth Ramell)",False,67,True,False,True,F33 +19.0,male,0,True,"Crease, Mr. Ernest James",False,68,False,True,False, +17.0,female,4,False,"Andersson, Miss. Erna Alexandra",True,69,True,False,True, +26.0,male,2,True,"Kink, Mr. Vincenz",True,70,False,True,False, +32.0,male,0,True,"Jenkin, Mr. Stephen Curnow",False,71,False,True,False, +16.0,female,5,False,"Goodwin, Miss. Lillian Amy",True,72,False,False,True, +21.0,male,0,True,"Hood, Mr. Ambrose Jr",False,73,False,True,False, +26.0,male,1,True,"Chronopoulos, Mr. Apostolos",True,74,False,True,False, +32.0,male,0,True,"Bing, Mr. Lee",False,75,True,True,False, +25.0,male,0,True,"Moen, Mr. Sigurd Hansen",False,76,False,True,False,F G73 +,male,0,True,"Staneff, Mr. Ivan",False,77,False,True,False, +,male,0,True,"Moutal, Mr. Rahamin Haim",False,78,False,True,False, +0.83,male,0,False,"Caldwell, Master. Alden Gates",False,79,True,True,False, +30.0,female,0,False,"Dowdell, Miss. Elizabeth",False,80,True,False,True, diff --git a/test_data/titanic-train.csv b/test_data/titanic-train.csv index 3966e0b..f9b262c 100644 --- a/test_data/titanic-train.csv +++ b/test_data/titanic-train.csv @@ -1,61 +1,61 @@ -is_female,name,survived,cabin,is_mr,is_male,has_siblings,sibsp,passenger_id,age,sex -False,"Braund, Mr. Owen Harris",False,,True,True,True,1,1,22.0,male -True,"Cumings, Mrs. John Bradley (Florence Briggs Thayer)",True,C85,True,False,True,1,2,38.0,female -True,"Heikkinen, Miss. Laina",True,,False,False,False,0,3,26.0,female -True,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",True,C123,True,False,True,1,4,35.0,female -False,"Allen, Mr. William Henry",False,,True,True,False,0,5,35.0,male -False,"Moran, Mr. James",False,,True,True,False,0,6,,male -False,"McCarthy, Mr. Timothy J",False,E46,True,False,False,0,7,54.0,other -False,"Palsson, Master. Gosta Leonard",False,,False,True,True,3,8,2.0,male -True,"Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)",True,,True,False,False,0,9,27.0,female -True,"Nasser, Mrs. Nicholas (Adele Achem)",True,,True,False,True,1,10,14.0,female -True,"Sandstrom, Miss. Marguerite Rut",True,G6,False,False,True,1,11,4.0,female -True,"Bonnell, Miss. Elizabeth",True,C103,False,False,False,0,12,58.0,female -False,"Saundercock, Mr. William Henry",False,,True,True,False,0,13,20.0,male -False,"Andersson, Mr. Anders Johan",False,,True,True,True,1,14,39.0,male -True,"Vestrom, Miss. Hulda Amanda Adolfina",False,,False,False,False,0,15,14.0,female -True,"Hewlett, Mrs. (Mary D Kingcome) ",True,,True,False,False,0,16,55.0,female -False,"Rice, Master. Eugene",False,,False,True,True,4,17,2.0,male -False,"Williams, Mr. Charles Eugene",True,,True,True,False,0,18,,male -True,"Vander Planke, Mrs. Julius (Emelia Maria Vandemoortele)",False,,True,False,True,1,19,31.0,female -True,"Masselmani, Mrs. Fatima",True,,True,False,False,0,20,,female -False,"Fynney, Mr. Joseph J",False,,True,True,False,0,21,35.0,male -False,"Beesley, Mr. Lawrence",True,D56,True,True,False,0,22,34.0,male -True,"McGowan, Miss. Anna ""Annie""",True,,False,False,False,0,23,15.0,female -False,"Sloper, Mr. William Thompson",True,A6,True,True,False,0,24,28.0,male -True,"Palsson, Miss. Torborg Danira",False,,False,False,True,3,25,8.0,female -True,"Asplund, Mrs. Carl Oscar (Selma Augusta Emilia Johansson)",True,,True,False,True,1,26,38.0,female -False,"Emir, Mr. Farred Chehab",False,,True,True,False,0,27,,male -False,"Fortune, Mr. Charles Alexander",False,C23 C25 C27,True,True,True,3,28,19.0,male -True,"O'Dwyer, Miss. Ellen ""Nellie""",True,,False,False,False,0,29,,female -False,"Todoroff, Mr. Lalio",False,,True,True,False,0,30,,male -False,"Uruchurtu, Don. Manuel E",False,,False,True,False,0,31,40.0,male -True,"Spencer, Mrs. William Augustus (Marie Eugenie)",True,B78,True,False,True,1,32,,female -True,"Glynn, Miss. Mary Agatha",True,,False,False,False,0,33,,female -False,"Wheadon, Mr. Edward H",False,,True,True,False,0,34,66.0,male -False,"Meyer, Mr. Edgar Joseph",False,,True,True,True,1,35,28.0,male -False,"Holverson, Mr. Alexander Oskar",False,,True,True,True,1,36,42.0,male -False,"Mamee, Mr. Hanna",True,,True,True,False,0,37,,male -False,"Cann, Mr. Ernest Charles",False,,True,True,False,0,38,21.0,male -True,"Vander Planke, Miss. Augusta Maria",False,,False,False,True,2,39,18.0,female -True,"Nicola-Yarred, Miss. Jamila",True,,False,False,True,1,40,14.0,female -True,"Ahlin, Mrs. Johan (Johanna Persdotter Larsson)",False,,True,False,True,1,41,40.0,female -True,"Turpin, Mrs. William John Robert (Dorothy Ann Wonnacott)",False,,True,False,True,1,42,27.0,female -False,"Kraeff, Mr. Theodor",False,,True,True,False,0,43,,male -True,"Laroche, Miss. Simonne Marie Anne Andree",True,,False,False,True,1,44,3.0,female -True,"Devaney, Miss. Margaret Delia",True,,False,False,False,0,45,19.0,female -False,"Rogers, Mr. William John",False,,True,True,False,0,46,,male -False,"Lennon, Mr. Denis",False,,True,True,True,1,47,,male -True,"O'Driscoll, Miss. Bridget",True,,False,False,False,0,48,,female -False,"Samaan, Mr. Youssef",False,,True,True,True,2,49,,male -True,"Arnold-Franchi, Mrs. Josef (Josefine Franchi)",False,,True,False,True,1,50,18.0,female -False,"Panula, Master. Juha Niilo",False,,False,True,True,4,51,7.0,male -False,"Nosworthy, Mr. Richard Cater",False,,True,True,False,0,52,21.0,male -True,"Harper, Mrs. Henry Sleeper (Myna Haxtun)",True,D33,True,False,True,1,53,49.0,female -True,"Faunthorpe, Mrs. Lizzie (Elizabeth Anne Wilkinson)",True,,True,False,True,1,54,29.0,female -False,"Ostby, Mr. Engelhart Cornelius",False,B30,True,True,False,0,55,65.0,male -False,"Woolner, Mr. Hugh",True,C52,True,True,False,0,56,,male -True,"Rugg, Miss. Emily",True,,False,False,False,0,57,21.0,female -False,"Novel, Mr. Mansouer",False,,True,True,False,0,58,28.5,male -True,"West, Miss. Constance Mirium",True,,False,False,True,1,59,5.0,female -False,"Goodwin, Master. William Frederick",False,,False,True,True,5,60,11.0,male +age,sex,sibsp,is_mr,name,has_siblings,passenger_id,survived,is_male,is_female,cabin +22.0,male,1,True,"Braund, Mr. Owen Harris",True,1,False,True,False, +38.0,female,1,True,"Cumings, Mrs. John Bradley (Florence Briggs Thayer)",True,2,True,False,True,C85 +26.0,female,0,False,"Heikkinen, Miss. Laina",False,3,True,False,True, +35.0,female,1,True,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",True,4,True,False,True,C123 +35.0,male,0,True,"Allen, Mr. William Henry",False,5,False,True,False, +,male,0,True,"Moran, Mr. James",False,6,False,True,False, +54.0,other,0,True,"McCarthy, Mr. Timothy J",False,7,False,False,False,E46 +2.0,male,3,False,"Palsson, Master. Gosta Leonard",True,8,False,True,False, +27.0,female,0,True,"Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)",False,9,True,False,True, +14.0,female,1,True,"Nasser, Mrs. Nicholas (Adele Achem)",True,10,True,False,True, +4.0,female,1,False,"Sandstrom, Miss. Marguerite Rut",True,11,True,False,True,G6 +58.0,female,0,False,"Bonnell, Miss. Elizabeth",False,12,True,False,True,C103 +20.0,male,0,True,"Saundercock, Mr. William Henry",False,13,False,True,False, +39.0,male,1,True,"Andersson, Mr. Anders Johan",True,14,False,True,False, +14.0,female,0,False,"Vestrom, Miss. Hulda Amanda Adolfina",False,15,False,False,True, +55.0,female,0,True,"Hewlett, Mrs. (Mary D Kingcome) ",False,16,True,False,True, +2.0,male,4,False,"Rice, Master. Eugene",True,17,False,True,False, +,male,0,True,"Williams, Mr. Charles Eugene",False,18,True,True,False, +31.0,female,1,True,"Vander Planke, Mrs. Julius (Emelia Maria Vandemoortele)",True,19,False,False,True, +,female,0,True,"Masselmani, Mrs. Fatima",False,20,True,False,True, +35.0,male,0,True,"Fynney, Mr. Joseph J",False,21,False,True,False, +34.0,male,0,True,"Beesley, Mr. Lawrence",False,22,True,True,False,D56 +15.0,female,0,False,"McGowan, Miss. Anna ""Annie""",False,23,True,False,True, +28.0,male,0,True,"Sloper, Mr. William Thompson",False,24,True,True,False,A6 +8.0,female,3,False,"Palsson, Miss. Torborg Danira",True,25,False,False,True, +38.0,female,1,True,"Asplund, Mrs. Carl Oscar (Selma Augusta Emilia Johansson)",True,26,True,False,True, +,male,0,True,"Emir, Mr. Farred Chehab",False,27,False,True,False, +19.0,male,3,True,"Fortune, Mr. Charles Alexander",True,28,False,True,False,C23 C25 C27 +,female,0,False,"O'Dwyer, Miss. Ellen ""Nellie""",False,29,True,False,True, +,male,0,True,"Todoroff, Mr. Lalio",False,30,False,True,False, +40.0,male,0,False,"Uruchurtu, Don. Manuel E",False,31,False,True,False, +,female,1,True,"Spencer, Mrs. William Augustus (Marie Eugenie)",True,32,True,False,True,B78 +,female,0,False,"Glynn, Miss. Mary Agatha",False,33,True,False,True, +66.0,male,0,True,"Wheadon, Mr. Edward H",False,34,False,True,False, +28.0,male,1,True,"Meyer, Mr. Edgar Joseph",True,35,False,True,False, +42.0,male,1,True,"Holverson, Mr. Alexander Oskar",True,36,False,True,False, +,male,0,True,"Mamee, Mr. Hanna",False,37,True,True,False, +21.0,male,0,True,"Cann, Mr. Ernest Charles",False,38,False,True,False, +18.0,female,2,False,"Vander Planke, Miss. Augusta Maria",True,39,False,False,True, +14.0,female,1,False,"Nicola-Yarred, Miss. Jamila",True,40,True,False,True, +40.0,female,1,True,"Ahlin, Mrs. Johan (Johanna Persdotter Larsson)",True,41,False,False,True, +27.0,female,1,True,"Turpin, Mrs. William John Robert (Dorothy Ann Wonnacott)",True,42,False,False,True, +,male,0,True,"Kraeff, Mr. Theodor",False,43,False,True,False, +3.0,female,1,False,"Laroche, Miss. Simonne Marie Anne Andree",True,44,True,False,True, +19.0,female,0,False,"Devaney, Miss. Margaret Delia",False,45,True,False,True, +,male,0,True,"Rogers, Mr. William John",False,46,False,True,False, +,male,1,True,"Lennon, Mr. Denis",True,47,False,True,False, +,female,0,False,"O'Driscoll, Miss. Bridget",False,48,True,False,True, +,male,2,True,"Samaan, Mr. Youssef",True,49,False,True,False, +18.0,female,1,True,"Arnold-Franchi, Mrs. Josef (Josefine Franchi)",True,50,False,False,True, +7.0,male,4,False,"Panula, Master. Juha Niilo",True,51,False,True,False, +21.0,male,0,True,"Nosworthy, Mr. Richard Cater",False,52,False,True,False, +49.0,female,1,True,"Harper, Mrs. Henry Sleeper (Myna Haxtun)",True,53,True,False,True,D33 +29.0,female,1,True,"Faunthorpe, Mrs. Lizzie (Elizabeth Anne Wilkinson)",True,54,True,False,True, +65.0,male,0,True,"Ostby, Mr. Engelhart Cornelius",False,55,False,True,False,B30 +,male,0,True,"Woolner, Mr. Hugh",False,56,True,True,False,C52 +21.0,female,0,False,"Rugg, Miss. Emily",False,57,True,False,True, +28.5,male,0,True,"Novel, Mr. Mansouer",False,58,False,True,False, +5.0,female,1,False,"West, Miss. Constance Mirium",True,59,True,False,True, +11.0,male,5,False,"Goodwin, Master. William Frederick",True,60,False,True,False, diff --git a/test_data/titanic-validate.csv b/test_data/titanic-validate.csv index 32ffe50..10e2e9e 100644 --- a/test_data/titanic-validate.csv +++ b/test_data/titanic-validate.csv @@ -1,21 +1,21 @@ -is_female,name,survived,cabin,is_mr,is_male,has_siblings,sibsp,passenger_id,age,sex -False,"Waelens, Mr. Achille",False,,True,True,False,0,81,22.0,male -False,"Sheerlinck, Mr. Jan Baptist",True,,True,True,False,0,82,29.0,male -True,"McDermott, Miss. Brigdet Delia",True,,False,False,False,0,83,,female -False,"Carrau, Mr. Francisco M",False,,True,True,False,0,84,28.0,male -True,"Ilett, Miss. Bertha",True,,False,False,False,0,85,17.0,female -True,"Backstrom, Mrs. Karl Alfred (Maria Mathilda Gustafsson)",True,,True,False,True,3,86,33.0,female -False,"Ford, Mr. William Neal",False,,True,True,True,1,87,16.0,male -False,"Slocovski, Mr. Selman Francis",False,,True,True,False,0,88,,male -True,"Fortune, Miss. Mabel Helen",True,C23 C25 C27,False,False,True,3,89,23.0,female -False,"Celotti, Mr. Francesco",False,,True,True,False,0,90,24.0,male -False,"Christmann, Mr. Emil",False,,True,True,False,0,91,29.0,male -False,"Andreasson, Mr. Paul Edvin",False,,True,True,False,0,92,20.0,male -False,"Chaffee, Mr. Herbert Fuller",False,E31,True,True,True,1,93,46.0,male -False,"Dean, Mr. Bertram Frank",False,,True,True,True,1,94,26.0,male -False,"Coxon, Mr. Daniel",False,,True,True,False,0,95,59.0,male -False,"Shorney, Mr. Charles Joseph",False,,True,True,False,0,96,,male -False,"Goldschmidt, Mr. George B",False,A5,True,True,False,0,97,71.0,male -False,"Greenfield, Mr. William Bertram",True,D10 D12,True,True,False,0,98,23.0,male -True,"Doling, Mrs. John T (Ada Julia Bone)",True,,True,False,False,0,99,34.0,female -False,"Kantor, Mr. Sinai",False,,True,True,True,1,100,34.0,male +age,sex,sibsp,is_mr,name,has_siblings,passenger_id,survived,is_male,is_female,cabin +22.0,male,0,True,"Waelens, Mr. Achille",False,81,False,True,False, +29.0,male,0,True,"Sheerlinck, Mr. Jan Baptist",False,82,True,True,False, +,female,0,False,"McDermott, Miss. Brigdet Delia",False,83,True,False,True, +28.0,male,0,True,"Carrau, Mr. Francisco M",False,84,False,True,False, +17.0,female,0,False,"Ilett, Miss. Bertha",False,85,True,False,True, +33.0,female,3,True,"Backstrom, Mrs. Karl Alfred (Maria Mathilda Gustafsson)",True,86,True,False,True, +16.0,male,1,True,"Ford, Mr. William Neal",True,87,False,True,False, +,male,0,True,"Slocovski, Mr. Selman Francis",False,88,False,True,False, +23.0,female,3,False,"Fortune, Miss. Mabel Helen",True,89,True,False,True,C23 C25 C27 +24.0,male,0,True,"Celotti, Mr. Francesco",False,90,False,True,False, +29.0,male,0,True,"Christmann, Mr. Emil",False,91,False,True,False, +20.0,male,0,True,"Andreasson, Mr. Paul Edvin",False,92,False,True,False, +46.0,male,1,True,"Chaffee, Mr. Herbert Fuller",True,93,False,True,False,E31 +26.0,male,1,True,"Dean, Mr. Bertram Frank",True,94,False,True,False, +59.0,male,0,True,"Coxon, Mr. Daniel",False,95,False,True,False, +,male,0,True,"Shorney, Mr. Charles Joseph",False,96,False,True,False, +71.0,male,0,True,"Goldschmidt, Mr. George B",False,97,False,True,False,A5 +23.0,male,0,True,"Greenfield, Mr. William Bertram",False,98,True,True,False,D10 D12 +34.0,female,0,True,"Doling, Mrs. John T (Ada Julia Bone)",False,99,True,False,True, +34.0,male,1,True,"Kantor, Mr. Sinai",True,100,False,True,False, From 6f9fe9f15fabc18dac60a9bb2030cbfbde0fd44f Mon Sep 17 00:00:00 2001 From: "Mats E. Mollestad" Date: Tue, 2 Apr 2024 22:10:11 +0200 Subject: [PATCH 03/13] Added dtype info to scan csv --- aligned/sources/local.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/aligned/sources/local.py b/aligned/sources/local.py index cb1257a..a6de12b 100644 --- a/aligned/sources/local.py +++ b/aligned/sources/local.py @@ -150,7 +150,14 @@ async def to_lazy_polars(self) -> pl.LazyFrame: raise UnableToFindFileException(self.path) try: - return pl.scan_csv(self.path, separator=self.csv_config.seperator, try_parse_dates=True) + schema: dict[str, pl.PolarsDataType] | None = None + if self.expected_schema: + schema = { # type: ignore + name: dtype.polars_type + for name, dtype + in self.expected_schema.items() if not dtype.is_datetime + } + return pl.scan_csv(self.path, dtypes=schema, separator=self.csv_config.seperator, try_parse_dates=True) except OSError: raise UnableToFindFileException(self.path) @@ -164,7 +171,7 @@ async def upsert(self, job: RetrivalJob, requests: list[RetrivalRequest]) -> Non potential_timestamps = request.all_features if request.event_timestamp: - potential_timestamps.add(request.event_timestamp) + potential_timestamps.add(request.event_timestamp.as_feature()) for feature in potential_timestamps: if feature.dtype.name == 'datetime': From a56538eda81e4a00c49a0494897a6deb132118cf Mon Sep 17 00:00:00 2001 From: "Mats E. Mollestad" Date: Tue, 2 Apr 2024 22:33:11 +0200 Subject: [PATCH 04/13] Fixed parsing bugs --- aligned/jobs/tests/test_derived_job.py | 2 +- aligned/sources/local.py | 26 ++++-- test_data/credit_history.csv | 14 +-- test_data/credit_history_mater.parquet | Bin 987 -> 988 bytes test_data/data/csv_iso.csv | 6 +- test_data/data/csv_unix.csv | 6 +- test_data/data/parquet_iso.parquet | Bin 1139 -> 1136 bytes test_data/data/parquet_unix.parquet | Bin 1077 -> 1077 bytes test_data/feature-store.json | 2 +- test_data/loan.csv | 14 +-- test_data/test_model.csv | 8 +- test_data/test_model.parquet | Bin 594 -> 598 bytes test_data/titanic-sets.json | 2 +- test_data/titanic-test.csv | 42 ++++----- test_data/titanic-train.csv | 122 ++++++++++++------------- test_data/titanic-validate.csv | 42 ++++----- 16 files changed, 149 insertions(+), 137 deletions(-) diff --git a/aligned/jobs/tests/test_derived_job.py b/aligned/jobs/tests/test_derived_job.py index 281ee3b..9ec717a 100644 --- a/aligned/jobs/tests/test_derived_job.py +++ b/aligned/jobs/tests/test_derived_job.py @@ -165,7 +165,7 @@ async def test_model_with_label_multiple_views() -> None: 'user_id': ['b', 'b', 'a', 'a'], 'total_amount': [109.0, 109.0, 120.0, 120.0], 'is_expence': [True, True, True, True], - 'abs_amount': [20, 100, 20, 100], + 'abs_amount': [20.0, 100.0, 20.0, 100.0], 'amount': [-20.0, -100.0, -20.0, -100.0], } ) diff --git a/aligned/sources/local.py b/aligned/sources/local.py index a6de12b..50be6eb 100644 --- a/aligned/sources/local.py +++ b/aligned/sources/local.py @@ -152,12 +152,15 @@ async def to_lazy_polars(self) -> pl.LazyFrame: try: schema: dict[str, pl.PolarsDataType] | None = None if self.expected_schema: - schema = { # type: ignore - name: dtype.polars_type - for name, dtype - in self.expected_schema.items() if not dtype.is_datetime - } - return pl.scan_csv(self.path, dtypes=schema, separator=self.csv_config.seperator, try_parse_dates=True) + schema = { # type: ignore + name: dtype.polars_type + for name, dtype in self.expected_schema.items() + if not dtype.is_datetime + } + + return pl.scan_csv( + self.path, dtypes=schema, separator=self.csv_config.seperator, try_parse_dates=True + ) except OSError: raise UnableToFindFileException(self.path) @@ -251,12 +254,21 @@ def enricher(self) -> CsvFileEnricher: return CsvFileEnricher(file=self.path) def all_data(self, request: RetrivalRequest, limit: int | None) -> RetrivalJob: + from aligned.schemas.constraints import Optional + + optional_constraint = Optional() + with_schema = CsvFileSource( path=self.path, mapping_keys=self.mapping_keys, csv_config=self.csv_config, formatter=self.formatter, - expected_schema={feat.name: feat.dtype for feat in request.features}, + expected_schema={ + feat.name: feat.dtype + for feat in request.features + if not (feat.constraints and optional_constraint in feat.constraints) + and not feat.name.isdigit() + }, ) return FileFullJob(with_schema, request, limit, date_formatter=self.formatter) diff --git a/test_data/credit_history.csv b/test_data/credit_history.csv index 6a4b502..2965743 100644 --- a/test_data/credit_history.csv +++ b/test_data/credit_history.csv @@ -1,7 +1,7 @@ -event_timestamp,credit_card_due,student_loan_due,bankruptcies,due_sum,dob_ssn -2020-04-26 18:01:04.746575+00:00,8419,22328,0,30747,19530219_5179 -2020-04-26 18:01:04.746575+00:00,2944,2515,0,5459,19520816_8737 -2020-04-26 18:01:04.746575+00:00,833,33000,0,33833,19860413_2537 -2020-04-27 18:01:04.746575+00:00,5936,48955,0,54891,19530219_5179 -2020-04-27 18:01:04.746575+00:00,1575,9501,0,11076,19520816_8737 -2020-04-27 18:01:04.746575+00:00,6263,35510,0,41773,19860413_2537 +dob_ssn,student_loan_due,credit_card_due,event_timestamp,due_sum,bankruptcies +19530219_5179,22328,8419,2020-04-26 18:01:04.746575+00:00,30747,0 +19520816_8737,2515,2944,2020-04-26 18:01:04.746575+00:00,5459,0 +19860413_2537,33000,833,2020-04-26 18:01:04.746575+00:00,33833,0 +19530219_5179,48955,5936,2020-04-27 18:01:04.746575+00:00,54891,0 +19520816_8737,9501,1575,2020-04-27 18:01:04.746575+00:00,11076,0 +19860413_2537,35510,6263,2020-04-27 18:01:04.746575+00:00,41773,0 diff --git a/test_data/credit_history_mater.parquet b/test_data/credit_history_mater.parquet index f31408c811c6e794f9f0b7c0b6d22468c360489d..466952bed2650ced37d3c47a4f0865e39e7a143b 100644 GIT binary patch delta 443 zcmcc3euteaz%j^Blua~CG-l#%DJcsx0~15zcq3C|b5218QzHWlL$i1bb5TK+i64}> zI~dhC7=VgRM9n593iHb_0KtQu4;&adbQmhOPi)d-Nm4t-I5A3|bH{E6Ms^*B$x;AGf2vaafo&>iUBnMnWq>v!I4|bN=1{P5<2DLRzG84O1 z`9SVq5IY7IdcZt+2GHR^SBTvK3QSgH%Hhv<1cERS;TQ~x&VnlM<-85Cot{m=nNJI z83qIaMLiABYdHbDPF5W{RH1_n@!0RWwzYz_bb delta 447 zcmcb^ew&>oz%j^h;sH6%xCp?4I~skC9{IetFJ{?I4i{J0DE!)#FZ3+s4Qs z$|jm68aI(kO4`)Oz{1ch-oo72oKsN2!py+L&^X@6R8UaEbfTS-Zx^H5D<%d>88Hsg z1Th8%5NjJFocW4Lw2KiWEXpG3K z8-blGc7#P#j6v-Svy3Q@q$&fOB-{j1CNVa#X^dhJcZ%I&5tW#%#FWDm=?DZtAi^=*(PjWqVkiE_^| jh%oZYNX`x7PK)vg%ZV`dGfWDcyq8&=V-^zw11Q!2yoYpE diff --git a/test_data/data/csv_iso.csv b/test_data/data/csv_iso.csv index 03ad4f6..23ad6d2 100644 --- a/test_data/data/csv_iso.csv +++ b/test_data/data/csv_iso.csv @@ -1,4 +1,4 @@ id,other,et,timestamp -1,foo,2024-04-02T19:24:20.581529+UTC,2024-04-02T19:24:20.581654+UTC -2,bar,2024-04-01T19:24:20.581651+UTC,2024-04-03T19:24:20.581654+UTC -3,baz,2024-03-31T19:24:20.581653+UTC,2024-04-04T19:24:20.581655+UTC +1,foo,2024-04-02T20:31:42.129+UTC,2024-04-02T20:31:42.129150+UTC +2,bar,2024-04-01T20:31:42.129146+UTC,2024-04-03T20:31:42.129150+UTC +3,baz,2024-03-31T20:31:42.129149+UTC,2024-04-04T20:31:42.129151+UTC diff --git a/test_data/data/csv_unix.csv b/test_data/data/csv_unix.csv index 709ed2f..1a26852 100644 --- a/test_data/data/csv_unix.csv +++ b/test_data/data/csv_unix.csv @@ -1,4 +1,4 @@ id,other,et,timestamp -1,foo,1712085860581529,1712085860581654 -2,bar,1711999460581651,1712172260581654 -3,baz,1711913060581653,1712258660581655 +1,foo,1712089902129000,1712089902129150 +2,bar,1712003502129146,1712176302129150 +3,baz,1711917102129149,1712262702129151 diff --git a/test_data/data/parquet_iso.parquet b/test_data/data/parquet_iso.parquet index 4771f5f03d1e06bef97e11ce5392331a1e8bb230..875d30453f84177ec7bdb8749eaa98635d81879f 100644 GIT binary patch delta 282 zcmey&@quH))XYAw~vP#)eiV zMtX)umfE2q&T@M43=D=w@(dh?CT3?O~#&${!0$@o|9!XUOHc1Jl)Dlr9F*dPZ zjA9!mGcxL{Bb#DqY5*}viGjhm2}qb&0reY##3m;)$!fH*Fi6UXafrr=0bK%QO=5&I zmoSNLV1x>{O`gM8%J^ck5RV4HW?}`@ zZaO)DQBk9Zg+WqAj6*a|jDZ2fn#2faE@2Yg#t0SenLLxRl=0nUekNtDCN@zq2DMwv vGT7{9R9nKtAU1FiG1CC?W1><>|GV Ii&2{y05?AoA^-pY diff --git a/test_data/feature-store.json b/test_data/feature-store.json index 696292c..da12c37 100644 --- a/test_data/feature-store.json +++ b/test_data/feature-store.json @@ -1 +1 @@ -{"metadata": {"created_at": "2024-04-02T19:24:22.440920", "name": "feature_store_location.py", "repo_url": null, "github_url": null}, "feature_views": [{"name": "titanic", "source": {"mapping_keys": {"PassengerId": "passenger_id", "Age": "age", "Sex": "sex", "Survived": "survived", "SibSp": "sibsp", "UpdatedAt": "updated_at"}, "type_name": "csv", "path": "test_data/titanic_scd_data.csv", "csv_config": {"seperator": ",", "compression": "infer", "should_write_index": false}, "formatter": {"date_format": "%Y-%m-%dT%H:%M:%S%.f+%Z", "time_unit": null, "time_zone": null, "name": "string_form"}, "expected_schema": null}, "entities": [{"name": "passenger_id", "dtype": {"name": "int32"}, "description": null, "tags": null, "constraints": null}], "features": [{"name": "age", "dtype": {"name": "float"}, "description": "A float as some have decimals", "tags": null, "constraints": [{"name": "upper_bound_inc", "value": 100.0}, {"name": "lower_bound_inc", "value": 0.0}]}, {"name": "sex", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": [{"name": "in_domain", "values": ["male", "female"]}]}, {"name": "sibsp", "dtype": {"name": "int32"}, "description": "Number of siblings on titanic", "tags": null, "constraints": [{"name": "upper_bound_inc", "value": 20.0}, {"name": "lower_bound_inc", "value": 0.0}]}, {"name": "name", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": null}, {"name": "updated_at", "dtype": {"name": "datetime-UTC"}, "description": null, "tags": null, "constraints": null}, {"name": "survived", "dtype": {"name": "bool"}, "description": "If the passenger survived", "tags": null, "constraints": null}, {"name": "cabin", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": null}], "derived_features": [{"name": "is_mr", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "name", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "contains", "dtype": {"name": "bool"}, "key": "name", "value": "Mr."}, "depth": 1}, {"name": "name_embedding", "dtype": {"name": "embedding"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "name", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "word_vectoriser", "dtype": {"name": "embedding"}, "key": "name", "model": {"name": "gensim", "model_name": "glove-wiki-gigaword-50", "config": {"to_lowercase": false, "deaccent": false, "encoding": "utf8", "errors": "strict"}, "loaded_model": null}}, "depth": 1}, {"name": "square_sibsp", "dtype": {"name": "float"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sibsp", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "int32"}}], "transformation": {"name": "mul", "dtype": {"name": "float"}, "front": "sibsp", "behind": "sibsp"}, "depth": 1}, {"name": "double_sibsp", "dtype": {"name": "float"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sibsp", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "int32"}}], "transformation": {"name": "mul_val", "dtype": {"name": "float"}, "key": "sibsp", "value": {"name": "int", "value": 2}}, "depth": 1}, {"name": "is_male", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sex", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "equals", "dtype": {"name": "bool"}, "key": "sex", "value": {"name": "string", "value": "male"}}, "depth": 1}, {"name": "is_female", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sex", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "equals", "dtype": {"name": "bool"}, "key": "sex", "value": {"name": "string", "value": "female"}}, "depth": 1}, {"name": "has_siblings", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sibsp", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "int32"}}], "transformation": {"name": "not-equals", "dtype": {"name": "bool"}, "key": "sibsp", "value": {"name": "int", "value": 0}}, "depth": 1}], "tags": null, "description": "Some features from the titanic dataset", "aggregated_features": [], "event_timestamp": {"name": "updated_at", "ttl": null, "description": null, "tags": null, "dtype": {"name": "datetime-UTC"}}, "stream_data_source": {"mapping_keys": {}, "name": "redis", "topic_name": "titanic_stream", "config": {"env_var": "REDIS_URL"}, "record_coder": {"coder_type": "json", "key": "json"}}, "application_source": null, "materialized_source": null, "acceptable_freshness": null, "unacceptable_freshness": null, "event_triggers": null, "contacts": null, "indexes": [{"location": {"name": "titanic", "location": "feature_view"}, "vector": {"name": "name_embedding", "dtype": {"name": "embedding"}, "description": null, "tags": null, "constraints": null}, "vector_dim": 50, "metadata": [{"name": "age", "dtype": {"name": "float"}, "description": "A float as some have decimals", "tags": null, "constraints": [{"name": "upper_bound_inc", "value": 100.0}, {"name": "lower_bound_inc", "value": 0.0}]}, {"name": "sex", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": [{"name": "in_domain", "values": ["male", "female"]}]}], "storage": {"type_name": "redis", "config": {"env_var": "REDIS_URL"}, "name": "name_embedding_index", "initial_cap": 10000, "distance_metric": "COSINE", "index_alogrithm": "FLAT", "embedding_type": "FLOAT32"}, "entities": [{"name": "passenger_id", "dtype": {"name": "int32"}, "description": null, "tags": null, "constraints": null}]}]}, {"name": "titanic_parquet", "source": {"mapping_keys": {}, "type_name": "parquet", "path": "test_data/titanic.parquet", "config": {"engine": "auto", "compression": "snappy", "should_write_index": false}, "date_formatter": {"name": "noop"}}, "entities": [{"name": "passenger_id", "dtype": {"name": "int32"}, "description": null, "tags": null, "constraints": null}], "features": [{"name": "age", "dtype": {"name": "float"}, "description": "A float as some have decimals", "tags": null, "constraints": [{"name": "upper_bound_inc", "value": 100.0}, {"name": "lower_bound_inc", "value": 0.0}]}, {"name": "sex", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": [{"name": "in_domain", "values": ["male", "female"]}]}, {"name": "sibsp", "dtype": {"name": "int32"}, "description": "Number of siblings on titanic", "tags": null, "constraints": [{"name": "upper_bound_inc", "value": 20.0}, {"name": "lower_bound_inc", "value": 0.0}]}, {"name": "name", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": null}, {"name": "survived", "dtype": {"name": "bool"}, "description": "If the passenger survived", "tags": null, "constraints": null}, {"name": "cabin", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": null}], "derived_features": [{"name": "is_male", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sex", "location": {"name": "titanic_parquet", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "equals", "dtype": {"name": "bool"}, "key": "sex", "value": {"name": "string", "value": "male"}}, "depth": 1}, {"name": "is_female", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sex", "location": {"name": "titanic_parquet", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "equals", "dtype": {"name": "bool"}, "key": "sex", "value": {"name": "string", "value": "female"}}, "depth": 1}, {"name": "has_siblings", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sibsp", "location": {"name": "titanic_parquet", "location": "feature_view"}, "dtype": {"name": "int32"}}], "transformation": {"name": "not-equals", "dtype": {"name": "bool"}, "key": "sibsp", "value": {"name": "int", "value": 0}}, "depth": 1}, {"name": "is_mr", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "name", "location": {"name": "titanic_parquet", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "contains", "dtype": {"name": "bool"}, "key": "name", "value": "Mr."}, "depth": 1}], "tags": null, "description": "Some features from the titanic dataset", "aggregated_features": [], "event_timestamp": null, "stream_data_source": null, "application_source": null, "materialized_source": null, "acceptable_freshness": null, "unacceptable_freshness": null, "event_triggers": null, "contacts": null, "indexes": []}], "combined_feature_views": [], "models": [{"name": "titanic", "features": {"default_version": "default", "versions": {"default": [{"name": "age", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "float"}}, {"name": "sibsp", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "int32"}}, {"name": "has_siblings", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "bool"}}, {"name": "is_male", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "bool"}}]}}, "predictions_view": {"entities": [], "features": [{"name": "probability", "dtype": {"name": "float"}, "description": "The probability of target named will_survive being 'True'.", "tags": null, "constraints": null}], "derived_features": [{"name": "will_survive", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "probability", "location": {"name": "titanic", "location": "model"}, "dtype": {"name": "float"}}], "transformation": {"name": "map_arg_max", "dtype": {"name": "bool"}, "column_mappings": {"probability": {"name": "bool", "value": true}}}, "depth": 1}], "model_version_column": null, "event_timestamp": null, "source": null, "application_source": null, "stream_source": null, "regression_targets": [], "classification_targets": [{"estimating": {"name": "survived", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "bool"}}, "feature": {"name": "will_survive", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null}, "on_ground_truth_event": null, "event_trigger": null, "class_probabilities": [{"outcome": {"name": "bool", "value": true}, "feature": {"name": "probability", "dtype": {"name": "float"}, "description": null, "tags": null, "constraints": null}}], "confidence": null}], "recommendation_targets": [], "acceptable_freshness": 86400.0, "unacceptable_freshness": 172800.0}, "description": "A model predicting if a passenger will survive", "contacts": null, "tags": null, "dataset_store": null, "exposed_at_url": null, "exposed_model": null}], "enrichers": []} +{"metadata": {"created_at": "2024-04-02T20:31:44.459387", "name": "feature_store_location.py", "repo_url": null, "github_url": null}, "feature_views": [{"name": "titanic", "source": {"mapping_keys": {"PassengerId": "passenger_id", "Age": "age", "Sex": "sex", "Survived": "survived", "SibSp": "sibsp", "UpdatedAt": "updated_at"}, "type_name": "csv", "path": "test_data/titanic_scd_data.csv", "csv_config": {"seperator": ",", "compression": "infer", "should_write_index": false}, "formatter": {"date_format": "%Y-%m-%dT%H:%M:%S%.f+%Z", "time_unit": null, "time_zone": null, "name": "string_form"}, "expected_schema": null}, "entities": [{"name": "passenger_id", "dtype": {"name": "int32"}, "description": null, "tags": null, "constraints": null}], "features": [{"name": "age", "dtype": {"name": "float"}, "description": "A float as some have decimals", "tags": null, "constraints": [{"name": "upper_bound_inc", "value": 100.0}, {"name": "lower_bound_inc", "value": 0.0}]}, {"name": "name", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": null}, {"name": "updated_at", "dtype": {"name": "datetime-UTC"}, "description": null, "tags": null, "constraints": null}, {"name": "sex", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": [{"name": "in_domain", "values": ["male", "female"]}]}, {"name": "cabin", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": null}, {"name": "survived", "dtype": {"name": "bool"}, "description": "If the passenger survived", "tags": null, "constraints": null}, {"name": "sibsp", "dtype": {"name": "int32"}, "description": "Number of siblings on titanic", "tags": null, "constraints": [{"name": "upper_bound_inc", "value": 20.0}, {"name": "lower_bound_inc", "value": 0.0}]}], "derived_features": [{"name": "name_embedding", "dtype": {"name": "embedding"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "name", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "word_vectoriser", "dtype": {"name": "embedding"}, "key": "name", "model": {"name": "gensim", "model_name": "glove-wiki-gigaword-50", "config": {"to_lowercase": false, "deaccent": false, "encoding": "utf8", "errors": "strict"}, "loaded_model": null}}, "depth": 1}, {"name": "has_siblings", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sibsp", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "int32"}}], "transformation": {"name": "not-equals", "dtype": {"name": "bool"}, "key": "sibsp", "value": {"name": "int", "value": 0}}, "depth": 1}, {"name": "double_sibsp", "dtype": {"name": "float"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sibsp", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "int32"}}], "transformation": {"name": "mul_val", "dtype": {"name": "float"}, "key": "sibsp", "value": {"name": "int", "value": 2}}, "depth": 1}, {"name": "is_female", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sex", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "equals", "dtype": {"name": "bool"}, "key": "sex", "value": {"name": "string", "value": "female"}}, "depth": 1}, {"name": "square_sibsp", "dtype": {"name": "float"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sibsp", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "int32"}}], "transformation": {"name": "mul", "dtype": {"name": "float"}, "front": "sibsp", "behind": "sibsp"}, "depth": 1}, {"name": "is_male", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sex", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "equals", "dtype": {"name": "bool"}, "key": "sex", "value": {"name": "string", "value": "male"}}, "depth": 1}, {"name": "is_mr", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "name", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "contains", "dtype": {"name": "bool"}, "key": "name", "value": "Mr."}, "depth": 1}], "tags": null, "description": "Some features from the titanic dataset", "aggregated_features": [], "event_timestamp": {"name": "updated_at", "ttl": null, "description": null, "tags": null, "dtype": {"name": "datetime-UTC"}}, "stream_data_source": {"mapping_keys": {}, "name": "redis", "topic_name": "titanic_stream", "config": {"env_var": "REDIS_URL"}, "record_coder": {"coder_type": "json", "key": "json"}}, "application_source": null, "materialized_source": null, "acceptable_freshness": null, "unacceptable_freshness": null, "event_triggers": null, "contacts": null, "indexes": [{"location": {"name": "titanic", "location": "feature_view"}, "vector": {"name": "name_embedding", "dtype": {"name": "embedding"}, "description": null, "tags": null, "constraints": null}, "vector_dim": 50, "metadata": [{"name": "age", "dtype": {"name": "float"}, "description": "A float as some have decimals", "tags": null, "constraints": [{"name": "upper_bound_inc", "value": 100.0}, {"name": "lower_bound_inc", "value": 0.0}]}, {"name": "sex", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": [{"name": "in_domain", "values": ["male", "female"]}]}], "storage": {"type_name": "redis", "config": {"env_var": "REDIS_URL"}, "name": "name_embedding_index", "initial_cap": 10000, "distance_metric": "COSINE", "index_alogrithm": "FLAT", "embedding_type": "FLOAT32"}, "entities": [{"name": "passenger_id", "dtype": {"name": "int32"}, "description": null, "tags": null, "constraints": null}]}]}, {"name": "titanic_parquet", "source": {"mapping_keys": {}, "type_name": "parquet", "path": "test_data/titanic.parquet", "config": {"engine": "auto", "compression": "snappy", "should_write_index": false}, "date_formatter": {"name": "noop"}}, "entities": [{"name": "passenger_id", "dtype": {"name": "int32"}, "description": null, "tags": null, "constraints": null}], "features": [{"name": "age", "dtype": {"name": "float"}, "description": "A float as some have decimals", "tags": null, "constraints": [{"name": "upper_bound_inc", "value": 100.0}, {"name": "lower_bound_inc", "value": 0.0}]}, {"name": "name", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": null}, {"name": "sex", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": [{"name": "in_domain", "values": ["male", "female"]}]}, {"name": "cabin", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": null}, {"name": "survived", "dtype": {"name": "bool"}, "description": "If the passenger survived", "tags": null, "constraints": null}, {"name": "sibsp", "dtype": {"name": "int32"}, "description": "Number of siblings on titanic", "tags": null, "constraints": [{"name": "upper_bound_inc", "value": 20.0}, {"name": "lower_bound_inc", "value": 0.0}]}], "derived_features": [{"name": "is_male", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sex", "location": {"name": "titanic_parquet", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "equals", "dtype": {"name": "bool"}, "key": "sex", "value": {"name": "string", "value": "male"}}, "depth": 1}, {"name": "has_siblings", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sibsp", "location": {"name": "titanic_parquet", "location": "feature_view"}, "dtype": {"name": "int32"}}], "transformation": {"name": "not-equals", "dtype": {"name": "bool"}, "key": "sibsp", "value": {"name": "int", "value": 0}}, "depth": 1}, {"name": "is_mr", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "name", "location": {"name": "titanic_parquet", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "contains", "dtype": {"name": "bool"}, "key": "name", "value": "Mr."}, "depth": 1}, {"name": "is_female", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sex", "location": {"name": "titanic_parquet", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "equals", "dtype": {"name": "bool"}, "key": "sex", "value": {"name": "string", "value": "female"}}, "depth": 1}], "tags": null, "description": "Some features from the titanic dataset", "aggregated_features": [], "event_timestamp": null, "stream_data_source": null, "application_source": null, "materialized_source": null, "acceptable_freshness": null, "unacceptable_freshness": null, "event_triggers": null, "contacts": null, "indexes": []}], "combined_feature_views": [], "models": [{"name": "titanic", "features": {"default_version": "default", "versions": {"default": [{"name": "age", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "float"}}, {"name": "sibsp", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "int32"}}, {"name": "has_siblings", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "bool"}}, {"name": "is_male", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "bool"}}]}}, "predictions_view": {"entities": [], "features": [{"name": "probability", "dtype": {"name": "float"}, "description": "The probability of target named will_survive being 'True'.", "tags": null, "constraints": null}], "derived_features": [{"name": "will_survive", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "probability", "location": {"name": "titanic", "location": "model"}, "dtype": {"name": "float"}}], "transformation": {"name": "map_arg_max", "dtype": {"name": "bool"}, "column_mappings": {"probability": {"name": "bool", "value": true}}}, "depth": 1}], "model_version_column": null, "event_timestamp": null, "source": null, "application_source": null, "stream_source": null, "regression_targets": [], "classification_targets": [{"estimating": {"name": "survived", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "bool"}}, "feature": {"name": "will_survive", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null}, "on_ground_truth_event": null, "event_trigger": null, "class_probabilities": [{"outcome": {"name": "bool", "value": true}, "feature": {"name": "probability", "dtype": {"name": "float"}, "description": null, "tags": null, "constraints": null}}], "confidence": null}], "recommendation_targets": [], "acceptable_freshness": 86400.0, "unacceptable_freshness": 172800.0}, "description": "A model predicting if a passenger will survive", "contacts": null, "tags": null, "dataset_store": null, "exposed_at_url": null, "exposed_model": null}], "enrichers": []} diff --git a/test_data/loan.csv b/test_data/loan.csv index fdcbfe8..c3bc530 100644 --- a/test_data/loan.csv +++ b/test_data/loan.csv @@ -1,7 +1,7 @@ -event_timestamp,loan_id,loan_status,personal_income,loan_amount -2020-04-26 18:01:04.746575+00:00,10000,True,59000,35000 -2020-04-26 18:01:04.746575+00:00,10001,False,9600,1000 -2020-04-26 18:01:04.746575+00:00,10002,True,9600,5500 -2020-04-27 18:01:04.746575+00:00,10000,True,65500,35000 -2020-04-27 18:01:04.746575+00:00,10001,True,54400,35000 -2020-04-27 18:01:04.746575+00:00,10002,True,9900,2500 +loan_id,loan_status,personal_income,loan_amount,event_timestamp +10000,True,59000,35000,2020-04-26 18:01:04.746575+00:00 +10001,False,9600,1000,2020-04-26 18:01:04.746575+00:00 +10002,True,9600,5500,2020-04-26 18:01:04.746575+00:00 +10000,True,65500,35000,2020-04-27 18:01:04.746575+00:00 +10001,True,54400,35000,2020-04-27 18:01:04.746575+00:00 +10002,True,9900,2500,2020-04-27 18:01:04.746575+00:00 diff --git a/test_data/test_model.csv b/test_data/test_model.csv index decf326..9bfa2bd 100644 --- a/test_data/test_model.csv +++ b/test_data/test_model.csv @@ -1,7 +1,7 @@ -a,some_id -10,1 -14,2 -20,3 +some_id,a +1,10 +2,14 +3,20 1,1 2,2 3,3 diff --git a/test_data/test_model.parquet b/test_data/test_model.parquet index 45000b8c6a01b7b74e9d929351f025a63b3e1646..3d63e93e325d66955310baa075a964e3afa02562 100644 GIT binary patch delta 223 zcmcb_a*btzoG>#3CnEzBC%Xd!3j+i&POOri=q)2G0+QiFmf@OcC}lr~kwH>Mj6*a- z45%E)n!^ZTin2)Bc(4@Z=a-1GFo?3KGD$FHra%~si40;Kk}{GyY6UVA8$}d27{oR( ziHb0&wJ^y5wW%_&NlGv#iZY3Dh!u!ssLh$o!KlXB!=w)a<`$F98RxURIyyQ!WjId0 q%NU}X4rIIMWIHJ61?1->78OhC$S^Pf5yL`828IB~AVUBtBqe14 delta 247 zcmcb{a*1VvoG>2)CnEz3C%Xd!7Xt){Ostlg=p!S{1eali$}mqfl&+t}$RH^r#vz&^ z1{4LdW-&sTqAZd&9xO%q`6Z$(45BQmOcIQV5Sl47g+Yu%Qbtlotw08-OqGF65-2Ik zB*r0DAeN!V!63GQNmPVEt%(UP4b%gd2ihohgh^D4LF@se*kln#Db5ZieGo7=nQYED wpWV~Z(b1_OYVuvi5XC?s+uhZXL4qwHKPR!MSW-uZfdPmZ<^nwt;22~G0OQ~%+yDRo diff --git a/test_data/titanic-sets.json b/test_data/titanic-sets.json index b3d6a3a..e8dbef8 100644 --- a/test_data/titanic-sets.json +++ b/test_data/titanic-sets.json @@ -1 +1 @@ -{"raw_data": [], "train_test": [], "train_test_validation": [{"id": "titanic_test", "name": null, "request_result": {"entities": [{"name": "passenger_id", "dtype": {"name": "int32"}, "description": null, "tags": null, "constraints": null}], "features": [{"name": "age", "dtype": {"name": "float"}, "description": "A float as some have decimals", "tags": null, "constraints": [{"name": "upper_bound_inc", "value": 100.0}, {"name": "lower_bound_inc", "value": 0.0}]}, {"name": "sex", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": [{"name": "optional"}, {"name": "in_domain", "values": ["male", "female"]}]}, {"name": "is_mr", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "name", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "contains", "dtype": {"name": "bool"}, "key": "name", "value": "Mr."}, "depth": 1}, {"name": "sibsp", "dtype": {"name": "int32"}, "description": "Number of siblings on titanic", "tags": null, "constraints": [{"name": "optional"}, {"name": "upper_bound_inc", "value": 20.0}, {"name": "lower_bound_inc", "value": 0.0}]}, {"name": "cabin", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": [{"name": "optional"}]}, {"name": "name", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": [{"name": "optional"}]}, {"name": "survived", "dtype": {"name": "bool"}, "description": "If the passenger survived", "tags": null, "constraints": null}, {"name": "is_male", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sex", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "equals", "dtype": {"name": "bool"}, "key": "sex", "value": {"name": "string", "value": "male"}}, "depth": 1}, {"name": "is_female", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sex", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "equals", "dtype": {"name": "bool"}, "key": "sex", "value": {"name": "string", "value": "female"}}, "depth": 1}, {"name": "has_siblings", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sibsp", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "int32"}}], "transformation": {"name": "not-equals", "dtype": {"name": "bool"}, "key": "sibsp", "value": {"name": "int", "value": 0}}, "depth": 1}], "event_timestamp": null}, "train_dataset": {"mapping_keys": {}, "type_name": "csv", "path": "test_data/titanic-train.csv", "csv_config": {"seperator": ",", "compression": "infer", "should_write_index": false}, "formatter": {"date_format": "%Y-%m-%dT%H:%M:%S%.f+%Z", "time_unit": null, "time_zone": null, "name": "string_form"}, "expected_schema": null}, "test_dataset": {"mapping_keys": {}, "type_name": "csv", "path": "test_data/titanic-test.csv", "csv_config": {"seperator": ",", "compression": "infer", "should_write_index": false}, "formatter": {"date_format": "%Y-%m-%dT%H:%M:%S%.f+%Z", "time_unit": null, "time_zone": null, "name": "string_form"}, "expected_schema": null}, "validation_dataset": {"mapping_keys": {}, "type_name": "csv", "path": "test_data/titanic-validate.csv", "csv_config": {"seperator": ",", "compression": "infer", "should_write_index": false}, "formatter": {"date_format": "%Y-%m-%dT%H:%M:%S%.f+%Z", "time_unit": null, "time_zone": null, "name": "string_form"}, "expected_schema": null}, "train_size_fraction": 0.6, "test_size_fraction": 0.20000000000000007, "validate_size_fraction": 0.19999999999999996, "target": ["survived"], "description": null, "tags": null}], "active_learning": []} +{"raw_data": [], "train_test": [], "train_test_validation": [{"id": "titanic_test", "name": null, "request_result": {"entities": [{"name": "passenger_id", "dtype": {"name": "int32"}, "description": null, "tags": null, "constraints": null}], "features": [{"name": "age", "dtype": {"name": "float"}, "description": "A float as some have decimals", "tags": null, "constraints": [{"name": "upper_bound_inc", "value": 100.0}, {"name": "lower_bound_inc", "value": 0.0}]}, {"name": "name", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": [{"name": "optional"}]}, {"name": "has_siblings", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sibsp", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "int32"}}], "transformation": {"name": "not-equals", "dtype": {"name": "bool"}, "key": "sibsp", "value": {"name": "int", "value": 0}}, "depth": 1}, {"name": "sex", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": [{"name": "optional"}, {"name": "in_domain", "values": ["male", "female"]}]}, {"name": "is_female", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sex", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "equals", "dtype": {"name": "bool"}, "key": "sex", "value": {"name": "string", "value": "female"}}, "depth": 1}, {"name": "cabin", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": [{"name": "optional"}]}, {"name": "is_male", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sex", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "equals", "dtype": {"name": "bool"}, "key": "sex", "value": {"name": "string", "value": "male"}}, "depth": 1}, {"name": "survived", "dtype": {"name": "bool"}, "description": "If the passenger survived", "tags": null, "constraints": null}, {"name": "sibsp", "dtype": {"name": "int32"}, "description": "Number of siblings on titanic", "tags": null, "constraints": [{"name": "upper_bound_inc", "value": 20.0}, {"name": "optional"}, {"name": "lower_bound_inc", "value": 0.0}]}, {"name": "is_mr", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "name", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "contains", "dtype": {"name": "bool"}, "key": "name", "value": "Mr."}, "depth": 1}], "event_timestamp": null}, "train_dataset": {"mapping_keys": {}, "type_name": "csv", "path": "test_data/titanic-train.csv", "csv_config": {"seperator": ",", "compression": "infer", "should_write_index": false}, "formatter": {"date_format": "%Y-%m-%dT%H:%M:%S%.f+%Z", "time_unit": null, "time_zone": null, "name": "string_form"}, "expected_schema": null}, "test_dataset": {"mapping_keys": {}, "type_name": "csv", "path": "test_data/titanic-test.csv", "csv_config": {"seperator": ",", "compression": "infer", "should_write_index": false}, "formatter": {"date_format": "%Y-%m-%dT%H:%M:%S%.f+%Z", "time_unit": null, "time_zone": null, "name": "string_form"}, "expected_schema": null}, "validation_dataset": {"mapping_keys": {}, "type_name": "csv", "path": "test_data/titanic-validate.csv", "csv_config": {"seperator": ",", "compression": "infer", "should_write_index": false}, "formatter": {"date_format": "%Y-%m-%dT%H:%M:%S%.f+%Z", "time_unit": null, "time_zone": null, "name": "string_form"}, "expected_schema": null}, "train_size_fraction": 0.6, "test_size_fraction": 0.20000000000000007, "validate_size_fraction": 0.19999999999999996, "target": ["survived"], "description": null, "tags": null}], "active_learning": []} diff --git a/test_data/titanic-test.csv b/test_data/titanic-test.csv index 8c048ed..c23e49e 100644 --- a/test_data/titanic-test.csv +++ b/test_data/titanic-test.csv @@ -1,21 +1,21 @@ -age,sex,sibsp,is_mr,name,has_siblings,passenger_id,survived,is_male,is_female,cabin -22.0,male,0,True,"Sirayanian, Mr. Orsen",False,61,False,True,False, -38.0,female,0,False,"Icard, Miss. Amelie",False,62,True,False,True,B28 -45.0,male,1,True,"Harris, Mr. Henry Birkhardt",True,63,False,True,False,C83 -4.0,male,3,False,"Skoog, Master. Harald",True,64,False,True,False, -,male,0,True,"Stewart, Mr. Albert A",False,65,False,True,False, -,male,1,False,"Moubarek, Master. Gerios",True,66,True,True,False, -29.0,female,0,True,"Nye, Mrs. (Elizabeth Ramell)",False,67,True,False,True,F33 -19.0,male,0,True,"Crease, Mr. Ernest James",False,68,False,True,False, -17.0,female,4,False,"Andersson, Miss. Erna Alexandra",True,69,True,False,True, -26.0,male,2,True,"Kink, Mr. Vincenz",True,70,False,True,False, -32.0,male,0,True,"Jenkin, Mr. Stephen Curnow",False,71,False,True,False, -16.0,female,5,False,"Goodwin, Miss. Lillian Amy",True,72,False,False,True, -21.0,male,0,True,"Hood, Mr. Ambrose Jr",False,73,False,True,False, -26.0,male,1,True,"Chronopoulos, Mr. Apostolos",True,74,False,True,False, -32.0,male,0,True,"Bing, Mr. Lee",False,75,True,True,False, -25.0,male,0,True,"Moen, Mr. Sigurd Hansen",False,76,False,True,False,F G73 -,male,0,True,"Staneff, Mr. Ivan",False,77,False,True,False, -,male,0,True,"Moutal, Mr. Rahamin Haim",False,78,False,True,False, -0.83,male,0,False,"Caldwell, Master. Alden Gates",False,79,True,True,False, -30.0,female,0,False,"Dowdell, Miss. Elizabeth",False,80,True,False,True, +age,name,passenger_id,has_siblings,sex,is_female,cabin,is_male,survived,sibsp,is_mr +22.0,"Sirayanian, Mr. Orsen",61,False,male,False,,True,False,0,True +38.0,"Icard, Miss. Amelie",62,False,female,True,B28,False,True,0,False +45.0,"Harris, Mr. Henry Birkhardt",63,True,male,False,C83,True,False,1,True +4.0,"Skoog, Master. Harald",64,True,male,False,,True,False,3,False +,"Stewart, Mr. Albert A",65,False,male,False,,True,False,0,True +,"Moubarek, Master. Gerios",66,True,male,False,,True,True,1,False +29.0,"Nye, Mrs. (Elizabeth Ramell)",67,False,female,True,F33,False,True,0,True +19.0,"Crease, Mr. Ernest James",68,False,male,False,,True,False,0,True +17.0,"Andersson, Miss. Erna Alexandra",69,True,female,True,,False,True,4,False +26.0,"Kink, Mr. Vincenz",70,True,male,False,,True,False,2,True +32.0,"Jenkin, Mr. Stephen Curnow",71,False,male,False,,True,False,0,True +16.0,"Goodwin, Miss. Lillian Amy",72,True,female,True,,False,False,5,False +21.0,"Hood, Mr. Ambrose Jr",73,False,male,False,,True,False,0,True +26.0,"Chronopoulos, Mr. Apostolos",74,True,male,False,,True,False,1,True +32.0,"Bing, Mr. Lee",75,False,male,False,,True,True,0,True +25.0,"Moen, Mr. Sigurd Hansen",76,False,male,False,F G73,True,False,0,True +,"Staneff, Mr. Ivan",77,False,male,False,,True,False,0,True +,"Moutal, Mr. Rahamin Haim",78,False,male,False,,True,False,0,True +0.83,"Caldwell, Master. Alden Gates",79,False,male,False,,True,True,0,False +30.0,"Dowdell, Miss. Elizabeth",80,False,female,True,,False,True,0,False diff --git a/test_data/titanic-train.csv b/test_data/titanic-train.csv index f9b262c..c3a8cf0 100644 --- a/test_data/titanic-train.csv +++ b/test_data/titanic-train.csv @@ -1,61 +1,61 @@ -age,sex,sibsp,is_mr,name,has_siblings,passenger_id,survived,is_male,is_female,cabin -22.0,male,1,True,"Braund, Mr. Owen Harris",True,1,False,True,False, -38.0,female,1,True,"Cumings, Mrs. John Bradley (Florence Briggs Thayer)",True,2,True,False,True,C85 -26.0,female,0,False,"Heikkinen, Miss. Laina",False,3,True,False,True, -35.0,female,1,True,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",True,4,True,False,True,C123 -35.0,male,0,True,"Allen, Mr. William Henry",False,5,False,True,False, -,male,0,True,"Moran, Mr. James",False,6,False,True,False, -54.0,other,0,True,"McCarthy, Mr. Timothy J",False,7,False,False,False,E46 -2.0,male,3,False,"Palsson, Master. Gosta Leonard",True,8,False,True,False, -27.0,female,0,True,"Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)",False,9,True,False,True, -14.0,female,1,True,"Nasser, Mrs. Nicholas (Adele Achem)",True,10,True,False,True, -4.0,female,1,False,"Sandstrom, Miss. Marguerite Rut",True,11,True,False,True,G6 -58.0,female,0,False,"Bonnell, Miss. Elizabeth",False,12,True,False,True,C103 -20.0,male,0,True,"Saundercock, Mr. William Henry",False,13,False,True,False, -39.0,male,1,True,"Andersson, Mr. Anders Johan",True,14,False,True,False, -14.0,female,0,False,"Vestrom, Miss. Hulda Amanda Adolfina",False,15,False,False,True, -55.0,female,0,True,"Hewlett, Mrs. (Mary D Kingcome) ",False,16,True,False,True, -2.0,male,4,False,"Rice, Master. Eugene",True,17,False,True,False, -,male,0,True,"Williams, Mr. Charles Eugene",False,18,True,True,False, -31.0,female,1,True,"Vander Planke, Mrs. Julius (Emelia Maria Vandemoortele)",True,19,False,False,True, -,female,0,True,"Masselmani, Mrs. Fatima",False,20,True,False,True, -35.0,male,0,True,"Fynney, Mr. Joseph J",False,21,False,True,False, -34.0,male,0,True,"Beesley, Mr. Lawrence",False,22,True,True,False,D56 -15.0,female,0,False,"McGowan, Miss. Anna ""Annie""",False,23,True,False,True, -28.0,male,0,True,"Sloper, Mr. William Thompson",False,24,True,True,False,A6 -8.0,female,3,False,"Palsson, Miss. Torborg Danira",True,25,False,False,True, -38.0,female,1,True,"Asplund, Mrs. Carl Oscar (Selma Augusta Emilia Johansson)",True,26,True,False,True, -,male,0,True,"Emir, Mr. Farred Chehab",False,27,False,True,False, -19.0,male,3,True,"Fortune, Mr. Charles Alexander",True,28,False,True,False,C23 C25 C27 -,female,0,False,"O'Dwyer, Miss. Ellen ""Nellie""",False,29,True,False,True, -,male,0,True,"Todoroff, Mr. Lalio",False,30,False,True,False, -40.0,male,0,False,"Uruchurtu, Don. Manuel E",False,31,False,True,False, -,female,1,True,"Spencer, Mrs. William Augustus (Marie Eugenie)",True,32,True,False,True,B78 -,female,0,False,"Glynn, Miss. Mary Agatha",False,33,True,False,True, -66.0,male,0,True,"Wheadon, Mr. Edward H",False,34,False,True,False, -28.0,male,1,True,"Meyer, Mr. Edgar Joseph",True,35,False,True,False, -42.0,male,1,True,"Holverson, Mr. Alexander Oskar",True,36,False,True,False, -,male,0,True,"Mamee, Mr. Hanna",False,37,True,True,False, -21.0,male,0,True,"Cann, Mr. Ernest Charles",False,38,False,True,False, -18.0,female,2,False,"Vander Planke, Miss. Augusta Maria",True,39,False,False,True, -14.0,female,1,False,"Nicola-Yarred, Miss. Jamila",True,40,True,False,True, -40.0,female,1,True,"Ahlin, Mrs. Johan (Johanna Persdotter Larsson)",True,41,False,False,True, -27.0,female,1,True,"Turpin, Mrs. William John Robert (Dorothy Ann Wonnacott)",True,42,False,False,True, -,male,0,True,"Kraeff, Mr. Theodor",False,43,False,True,False, -3.0,female,1,False,"Laroche, Miss. Simonne Marie Anne Andree",True,44,True,False,True, -19.0,female,0,False,"Devaney, Miss. Margaret Delia",False,45,True,False,True, -,male,0,True,"Rogers, Mr. William John",False,46,False,True,False, -,male,1,True,"Lennon, Mr. Denis",True,47,False,True,False, -,female,0,False,"O'Driscoll, Miss. Bridget",False,48,True,False,True, -,male,2,True,"Samaan, Mr. Youssef",True,49,False,True,False, -18.0,female,1,True,"Arnold-Franchi, Mrs. Josef (Josefine Franchi)",True,50,False,False,True, -7.0,male,4,False,"Panula, Master. Juha Niilo",True,51,False,True,False, -21.0,male,0,True,"Nosworthy, Mr. Richard Cater",False,52,False,True,False, -49.0,female,1,True,"Harper, Mrs. Henry Sleeper (Myna Haxtun)",True,53,True,False,True,D33 -29.0,female,1,True,"Faunthorpe, Mrs. Lizzie (Elizabeth Anne Wilkinson)",True,54,True,False,True, -65.0,male,0,True,"Ostby, Mr. Engelhart Cornelius",False,55,False,True,False,B30 -,male,0,True,"Woolner, Mr. Hugh",False,56,True,True,False,C52 -21.0,female,0,False,"Rugg, Miss. Emily",False,57,True,False,True, -28.5,male,0,True,"Novel, Mr. Mansouer",False,58,False,True,False, -5.0,female,1,False,"West, Miss. Constance Mirium",True,59,True,False,True, -11.0,male,5,False,"Goodwin, Master. William Frederick",True,60,False,True,False, +age,name,passenger_id,has_siblings,sex,is_female,cabin,is_male,survived,sibsp,is_mr +22.0,"Braund, Mr. Owen Harris",1,True,male,False,,True,False,1,True +38.0,"Cumings, Mrs. John Bradley (Florence Briggs Thayer)",2,True,female,True,C85,False,True,1,True +26.0,"Heikkinen, Miss. Laina",3,False,female,True,,False,True,0,False +35.0,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",4,True,female,True,C123,False,True,1,True +35.0,"Allen, Mr. William Henry",5,False,male,False,,True,False,0,True +,"Moran, Mr. James",6,False,male,False,,True,False,0,True +54.0,"McCarthy, Mr. Timothy J",7,False,other,False,E46,False,False,0,True +2.0,"Palsson, Master. Gosta Leonard",8,True,male,False,,True,False,3,False +27.0,"Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)",9,False,female,True,,False,True,0,True +14.0,"Nasser, Mrs. Nicholas (Adele Achem)",10,True,female,True,,False,True,1,True +4.0,"Sandstrom, Miss. Marguerite Rut",11,True,female,True,G6,False,True,1,False +58.0,"Bonnell, Miss. Elizabeth",12,False,female,True,C103,False,True,0,False +20.0,"Saundercock, Mr. William Henry",13,False,male,False,,True,False,0,True +39.0,"Andersson, Mr. Anders Johan",14,True,male,False,,True,False,1,True +14.0,"Vestrom, Miss. Hulda Amanda Adolfina",15,False,female,True,,False,False,0,False +55.0,"Hewlett, Mrs. (Mary D Kingcome) ",16,False,female,True,,False,True,0,True +2.0,"Rice, Master. Eugene",17,True,male,False,,True,False,4,False +,"Williams, Mr. Charles Eugene",18,False,male,False,,True,True,0,True +31.0,"Vander Planke, Mrs. Julius (Emelia Maria Vandemoortele)",19,True,female,True,,False,False,1,True +,"Masselmani, Mrs. Fatima",20,False,female,True,,False,True,0,True +35.0,"Fynney, Mr. Joseph J",21,False,male,False,,True,False,0,True +34.0,"Beesley, Mr. Lawrence",22,False,male,False,D56,True,True,0,True +15.0,"McGowan, Miss. Anna ""Annie""",23,False,female,True,,False,True,0,False +28.0,"Sloper, Mr. William Thompson",24,False,male,False,A6,True,True,0,True +8.0,"Palsson, Miss. Torborg Danira",25,True,female,True,,False,False,3,False +38.0,"Asplund, Mrs. Carl Oscar (Selma Augusta Emilia Johansson)",26,True,female,True,,False,True,1,True +,"Emir, Mr. Farred Chehab",27,False,male,False,,True,False,0,True +19.0,"Fortune, Mr. Charles Alexander",28,True,male,False,C23 C25 C27,True,False,3,True +,"O'Dwyer, Miss. Ellen ""Nellie""",29,False,female,True,,False,True,0,False +,"Todoroff, Mr. Lalio",30,False,male,False,,True,False,0,True +40.0,"Uruchurtu, Don. Manuel E",31,False,male,False,,True,False,0,False +,"Spencer, Mrs. William Augustus (Marie Eugenie)",32,True,female,True,B78,False,True,1,True +,"Glynn, Miss. Mary Agatha",33,False,female,True,,False,True,0,False +66.0,"Wheadon, Mr. Edward H",34,False,male,False,,True,False,0,True +28.0,"Meyer, Mr. Edgar Joseph",35,True,male,False,,True,False,1,True +42.0,"Holverson, Mr. Alexander Oskar",36,True,male,False,,True,False,1,True +,"Mamee, Mr. Hanna",37,False,male,False,,True,True,0,True +21.0,"Cann, Mr. Ernest Charles",38,False,male,False,,True,False,0,True +18.0,"Vander Planke, Miss. Augusta Maria",39,True,female,True,,False,False,2,False +14.0,"Nicola-Yarred, Miss. Jamila",40,True,female,True,,False,True,1,False +40.0,"Ahlin, Mrs. Johan (Johanna Persdotter Larsson)",41,True,female,True,,False,False,1,True +27.0,"Turpin, Mrs. William John Robert (Dorothy Ann Wonnacott)",42,True,female,True,,False,False,1,True +,"Kraeff, Mr. Theodor",43,False,male,False,,True,False,0,True +3.0,"Laroche, Miss. Simonne Marie Anne Andree",44,True,female,True,,False,True,1,False +19.0,"Devaney, Miss. Margaret Delia",45,False,female,True,,False,True,0,False +,"Rogers, Mr. William John",46,False,male,False,,True,False,0,True +,"Lennon, Mr. Denis",47,True,male,False,,True,False,1,True +,"O'Driscoll, Miss. Bridget",48,False,female,True,,False,True,0,False +,"Samaan, Mr. Youssef",49,True,male,False,,True,False,2,True +18.0,"Arnold-Franchi, Mrs. Josef (Josefine Franchi)",50,True,female,True,,False,False,1,True +7.0,"Panula, Master. Juha Niilo",51,True,male,False,,True,False,4,False +21.0,"Nosworthy, Mr. Richard Cater",52,False,male,False,,True,False,0,True +49.0,"Harper, Mrs. Henry Sleeper (Myna Haxtun)",53,True,female,True,D33,False,True,1,True +29.0,"Faunthorpe, Mrs. Lizzie (Elizabeth Anne Wilkinson)",54,True,female,True,,False,True,1,True +65.0,"Ostby, Mr. Engelhart Cornelius",55,False,male,False,B30,True,False,0,True +,"Woolner, Mr. Hugh",56,False,male,False,C52,True,True,0,True +21.0,"Rugg, Miss. Emily",57,False,female,True,,False,True,0,False +28.5,"Novel, Mr. Mansouer",58,False,male,False,,True,False,0,True +5.0,"West, Miss. Constance Mirium",59,True,female,True,,False,True,1,False +11.0,"Goodwin, Master. William Frederick",60,True,male,False,,True,False,5,False diff --git a/test_data/titanic-validate.csv b/test_data/titanic-validate.csv index 10e2e9e..9078b2e 100644 --- a/test_data/titanic-validate.csv +++ b/test_data/titanic-validate.csv @@ -1,21 +1,21 @@ -age,sex,sibsp,is_mr,name,has_siblings,passenger_id,survived,is_male,is_female,cabin -22.0,male,0,True,"Waelens, Mr. Achille",False,81,False,True,False, -29.0,male,0,True,"Sheerlinck, Mr. Jan Baptist",False,82,True,True,False, -,female,0,False,"McDermott, Miss. Brigdet Delia",False,83,True,False,True, -28.0,male,0,True,"Carrau, Mr. Francisco M",False,84,False,True,False, -17.0,female,0,False,"Ilett, Miss. Bertha",False,85,True,False,True, -33.0,female,3,True,"Backstrom, Mrs. Karl Alfred (Maria Mathilda Gustafsson)",True,86,True,False,True, -16.0,male,1,True,"Ford, Mr. William Neal",True,87,False,True,False, -,male,0,True,"Slocovski, Mr. Selman Francis",False,88,False,True,False, -23.0,female,3,False,"Fortune, Miss. Mabel Helen",True,89,True,False,True,C23 C25 C27 -24.0,male,0,True,"Celotti, Mr. Francesco",False,90,False,True,False, -29.0,male,0,True,"Christmann, Mr. Emil",False,91,False,True,False, -20.0,male,0,True,"Andreasson, Mr. Paul Edvin",False,92,False,True,False, -46.0,male,1,True,"Chaffee, Mr. Herbert Fuller",True,93,False,True,False,E31 -26.0,male,1,True,"Dean, Mr. Bertram Frank",True,94,False,True,False, -59.0,male,0,True,"Coxon, Mr. Daniel",False,95,False,True,False, -,male,0,True,"Shorney, Mr. Charles Joseph",False,96,False,True,False, -71.0,male,0,True,"Goldschmidt, Mr. George B",False,97,False,True,False,A5 -23.0,male,0,True,"Greenfield, Mr. William Bertram",False,98,True,True,False,D10 D12 -34.0,female,0,True,"Doling, Mrs. John T (Ada Julia Bone)",False,99,True,False,True, -34.0,male,1,True,"Kantor, Mr. Sinai",True,100,False,True,False, +age,name,passenger_id,has_siblings,sex,is_female,cabin,is_male,survived,sibsp,is_mr +22.0,"Waelens, Mr. Achille",81,False,male,False,,True,False,0,True +29.0,"Sheerlinck, Mr. Jan Baptist",82,False,male,False,,True,True,0,True +,"McDermott, Miss. Brigdet Delia",83,False,female,True,,False,True,0,False +28.0,"Carrau, Mr. Francisco M",84,False,male,False,,True,False,0,True +17.0,"Ilett, Miss. Bertha",85,False,female,True,,False,True,0,False +33.0,"Backstrom, Mrs. Karl Alfred (Maria Mathilda Gustafsson)",86,True,female,True,,False,True,3,True +16.0,"Ford, Mr. William Neal",87,True,male,False,,True,False,1,True +,"Slocovski, Mr. Selman Francis",88,False,male,False,,True,False,0,True +23.0,"Fortune, Miss. Mabel Helen",89,True,female,True,C23 C25 C27,False,True,3,False +24.0,"Celotti, Mr. Francesco",90,False,male,False,,True,False,0,True +29.0,"Christmann, Mr. Emil",91,False,male,False,,True,False,0,True +20.0,"Andreasson, Mr. Paul Edvin",92,False,male,False,,True,False,0,True +46.0,"Chaffee, Mr. Herbert Fuller",93,True,male,False,E31,True,False,1,True +26.0,"Dean, Mr. Bertram Frank",94,True,male,False,,True,False,1,True +59.0,"Coxon, Mr. Daniel",95,False,male,False,,True,False,0,True +,"Shorney, Mr. Charles Joseph",96,False,male,False,,True,False,0,True +71.0,"Goldschmidt, Mr. George B",97,False,male,False,A5,True,False,0,True +23.0,"Greenfield, Mr. William Bertram",98,False,male,False,D10 D12,True,True,0,True +34.0,"Doling, Mrs. John T (Ada Julia Bone)",99,False,female,True,,False,True,0,True +34.0,"Kantor, Mr. Sinai",100,True,male,False,,True,False,1,True From f642af6037026713a80de7fd87595a4eaca8f2c3 Mon Sep 17 00:00:00 2001 From: "Mats E. Mollestad" Date: Tue, 2 Apr 2024 23:19:58 +0200 Subject: [PATCH 05/13] Added a way to load predictions from different model versions --- aligned/feature_store.py | 33 +++++++++++++++++------ aligned/schemas/model.py | 16 ++++++++--- aligned/tests/test_models_as_feature.py | 36 +++++++++++++++++++++++++ test_data/model_preds.csv | 5 ++++ 4 files changed, 79 insertions(+), 11 deletions(-) create mode 100644 test_data/model_preds.csv diff --git a/aligned/feature_store.py b/aligned/feature_store.py index 5c9a0b6..5ad39a0 100644 --- a/aligned/feature_store.py +++ b/aligned/feature_store.py @@ -398,6 +398,7 @@ def features_for( entities: ConvertableToRetrivalJob | RetrivalJob, features: list[str], event_timestamp_column: str | None = None, + model_version_as_entity: bool | None = None, ) -> RetrivalJob: """ Returns a set of features given a set of entities. @@ -417,7 +418,7 @@ def features_for( """ feature_request = RawStringFeatureRequest(features=set(features)) - requests = self.requests_for(feature_request, event_timestamp_column) + requests = self.requests_for(feature_request, event_timestamp_column, model_version_as_entity) feature_names = set() @@ -480,6 +481,7 @@ def _requests_for( combined_feature_views: dict[str, CompiledCombinedFeatureView], models: dict[str, ModelSchema], event_timestamp_column: str | None = None, + model_version_as_entity: bool | None = None, ) -> FeatureRequest: features = feature_request.grouped_features @@ -488,13 +490,16 @@ def _requests_for( for location in feature_request.locations: location_name = location.name + if location.location == 'model': model = models[location_name] view = model.predictions_view if len(features[location]) == 1 and list(features[location])[0] == '*': - request = view.request(location_name) + request = view.request(location_name, model_version_as_entity or False) else: - request = view.request_for(features[location], location_name) + request = view.request_for( + features[location], location_name, model_version_as_entity or False + ) requests.append(request) entity_names.update(request.entity_names) @@ -526,9 +531,11 @@ def _requests_for( raise ValueError(f'Unable to find: {location_name}') if len(features[location]) == 1 and list(features[location])[0] == '*': - sub_request = feature_view.request(location_name) + sub_request = feature_view.request(location_name, model_version_as_entity or False) else: - sub_request = feature_view.request_for(features[location], location_name) + sub_request = feature_view.request_for( + features[location], location_name, model_version_as_entity or False + ) requests.append(sub_request) entity_names.update(sub_request.entity_names) @@ -560,7 +567,10 @@ def requests_for_features( return self.requests_for(RawStringFeatureRequest(set(features)), event_timestamp_column) def requests_for( - self, feature_request: RawStringFeatureRequest, event_timestamp_column: str | None = None + self, + feature_request: RawStringFeatureRequest, + event_timestamp_column: str | None = None, + model_version_as_entity: bool | None = None, ) -> FeatureRequest: return FeatureStore._requests_for( feature_request, @@ -568,6 +578,7 @@ def requests_for( self.combined_feature_views, self.models, event_timestamp_column=event_timestamp_column, + model_version_as_entity=model_version_as_entity, ) def feature_view(self, view: str) -> FeatureViewStore: @@ -1133,12 +1144,18 @@ def process_features(self, input: RetrivalJob | ConvertableToRetrivalJob) -> Ret ) def predictions_for( - self, entities: ConvertableToRetrivalJob | RetrivalJob, event_timestamp_column: str | None = None + self, + entities: ConvertableToRetrivalJob | RetrivalJob, + event_timestamp_column: str | None = None, + model_version_as_entity: bool | None = None, ) -> RetrivalJob: location_id = self.location.identifier return self.store.features_for( - entities, features=[f'{location_id}:*'], event_timestamp_column=event_timestamp_column + entities, + features=[f'{location_id}:*'], + event_timestamp_column=event_timestamp_column, + model_version_as_entity=model_version_as_entity, ) def predictions_between(self, start_date: datetime, end_date: datetime) -> RetrivalJob: diff --git a/aligned/schemas/model.py b/aligned/schemas/model.py index 3da202c..5ecae49 100644 --- a/aligned/schemas/model.py +++ b/aligned/schemas/model.py @@ -104,19 +104,29 @@ def full_schema(self) -> set[Feature]: return schema - def request(self, name: str) -> RetrivalRequest: + def request(self, name: str, model_version_as_entity: bool = False) -> RetrivalRequest: + entities = self.entities + + if model_version_as_entity and self.model_version_column: + entities = entities.union({self.model_version_column}) + return RetrivalRequest( name=name, location=FeatureLocation.model(name), - entities=self.entities, + entities=entities, features=self.features, derived_features=self.derived_features, event_timestamp=self.event_timestamp, ) - def request_for(self, features: set[str], name: str) -> RetrivalRequest: + def request_for( + self, features: set[str], name: str, model_version_as_entity: bool = False + ) -> RetrivalRequest: entities = self.entities + if model_version_as_entity and self.model_version_column: + entities = entities.union({self.model_version_column}) + return RetrivalRequest( name=name, location=FeatureLocation.model(name), diff --git a/aligned/tests/test_models_as_feature.py b/aligned/tests/test_models_as_feature.py index 88359d4..1f04d46 100644 --- a/aligned/tests/test_models_as_feature.py +++ b/aligned/tests/test_models_as_feature.py @@ -1,3 +1,4 @@ +import pytest from aligned import Bool, FeatureStore, FileSource, Int32, String from aligned.feature_view.feature_view import feature_view from aligned.compiler.model import FeatureInputVersions, model_contract @@ -43,6 +44,13 @@ class First: first = First() +@model_contract(name='first_with_versions', input_features=[view.feature_a, other.feature_b]) +class FirstWithVersions: + some_id = Int32().as_entity() + target = other.is_true.as_classification_label() + model_version = String().as_model_version() + + @model_contract(name='second_model', input_features=[first.target]) class Second: other_id = Int32().as_entity() @@ -81,3 +89,31 @@ def test_model_version() -> None: model_request = store.model('test_model').using_version('v2').request() assert model_request.features_to_include == {'feature_a', 'is_true', 'feature_b', 'view_id', 'other_id'} + + +@pytest.mark.asyncio +async def test_load_preds_with_different_model_version() -> None: + import polars as pl + + store = FeatureStore.experimental() + store.add_model(FirstWithVersions) + + source = FileSource.csv_at('test_data/model_preds.csv') + + await source.write_polars( + pl.DataFrame( + {'some_id': [1, 2, 1, 2], 'target': [0, 1, 1, 1], 'model_version': ['v1', 'v1', 'v2', 'v2']} + ).lazy() + ) + + model_store = store.model('first_with_versions').using_source(source) + + df = await model_store.predictions_for({'some_id': [1, 2], 'model_version': ['v2', 'v2']}).to_polars() + + assert df['target'].to_list() == [False, True] + + new_df = await model_store.predictions_for( + {'some_id': [1, 2], 'model_version': ['v2', 'v2']}, model_version_as_entity=True + ).to_polars() + + assert new_df['target'].to_list() == [True, True] diff --git a/test_data/model_preds.csv b/test_data/model_preds.csv new file mode 100644 index 0000000..c4bc418 --- /dev/null +++ b/test_data/model_preds.csv @@ -0,0 +1,5 @@ +some_id,target,model_version +1,0,v1 +2,1,v1 +1,1,v2 +2,1,v2 From a46cca03bd4db4ba3af5780140767e2f6ebbe4df Mon Sep 17 00:00:00 2001 From: "Mats E. Mollestad" Date: Sun, 7 Apr 2024 18:39:14 +0200 Subject: [PATCH 06/13] Added stack source and other minor improvments --- README.md | 2 +- aligned/__init__.py | 12 ++ aligned/compiler/feature_factory.py | 59 +++++- aligned/data_source/batch_data_source.py | 236 ++++++++++++++++++++++- aligned/feature_store.py | 6 + aligned/feature_view/feature_view.py | 9 + aligned/local/job.py | 4 +- aligned/retrival_job.py | 46 ++++- aligned/schemas/constraints.py | 8 +- aligned/schemas/feature.py | 28 +++ aligned/schemas/feature_view.py | 2 +- aligned/schemas/model.py | 3 + aligned/sources/azure_blob_storage.py | 1 - aligned/sources/local.py | 4 + aligned/tests/test_transformations.py | 20 ++ 15 files changed, 429 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index 7e8c07e..7d1ab25 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ A data managment tool for ML applications. -Similar to have DBT is a data managment tool for business analytics, will Aligned manage ML projects. +Similar to how DBT is a data managment tool for business analytics, will Aligned manage ML projects. Aligned does this through two things. 1. A light weight data managment system. Making it possible to query a data lake and databases. diff --git a/aligned/__init__.py b/aligned/__init__.py index ede3c39..7e56e40 100644 --- a/aligned/__init__.py +++ b/aligned/__init__.py @@ -4,8 +4,14 @@ Entity, EventTimestamp, Float, + Int8, + Int16, Int32, Int64, + UInt8, + UInt16, + UInt32, + UInt64, Json, String, Timestamp, @@ -52,6 +58,12 @@ 'Bool', 'Entity', 'UUID', + 'UInt8', + 'UInt16', + 'UInt32', + 'UInt64', + 'Int8', + 'Int16', 'Int32', 'Int64', 'Float', diff --git a/aligned/compiler/feature_factory.py b/aligned/compiler/feature_factory.py index 5b5ad2e..820ee4f 100644 --- a/aligned/compiler/feature_factory.py +++ b/aligned/compiler/feature_factory.py @@ -533,13 +533,12 @@ def transformed_using_features_polars( def transform_polars( self, expression: pl.Expr, - using_features: list[FeatureFactory] | None = None, as_dtype: T | None = None, ) -> T: from aligned.compiler.transformation_factory import PolarsTransformationFactory dtype: FeatureFactory = as_dtype or self.copy_type() # type: ignore [assignment] - dtype.transformation = PolarsTransformationFactory(dtype, expression, using_features or [self]) + dtype.transformation = PolarsTransformationFactory(dtype, expression, [self]) return dtype # type: ignore [return-value] def polars_aggregation(self, aggregation: pl.Expr, as_type: T) -> T: @@ -930,6 +929,62 @@ def aggregate(self) -> ArithmeticAggregation: return ArithmeticAggregation(self) +class UInt8(ArithmeticFeature, CouldBeEntityFeature, CouldBeModelVersion, CategoricalEncodableFeature): + def copy_type(self) -> UInt8: + if self.constraints and Optional() in self.constraints: + return UInt8().is_optional() + return UInt8() + + @property + def dtype(self) -> FeatureType: + return FeatureType.uint8() + + def aggregate(self) -> ArithmeticAggregation: + return ArithmeticAggregation(self) + + +class UInt16(ArithmeticFeature, CouldBeEntityFeature, CouldBeModelVersion, CategoricalEncodableFeature): + def copy_type(self) -> UInt16: + if self.constraints and Optional() in self.constraints: + return UInt16().is_optional() + return UInt16() + + @property + def dtype(self) -> FeatureType: + return FeatureType.uint16() + + def aggregate(self) -> ArithmeticAggregation: + return ArithmeticAggregation(self) + + +class UInt32(ArithmeticFeature, CouldBeEntityFeature, CouldBeModelVersion, CategoricalEncodableFeature): + def copy_type(self) -> UInt32: + if self.constraints and Optional() in self.constraints: + return UInt32().is_optional() + return UInt32() + + @property + def dtype(self) -> FeatureType: + return FeatureType.uint32() + + def aggregate(self) -> ArithmeticAggregation: + return ArithmeticAggregation(self) + + +class UInt64(ArithmeticFeature, CouldBeEntityFeature, CouldBeModelVersion, CategoricalEncodableFeature): + def copy_type(self) -> UInt64: + if self.constraints and Optional() in self.constraints: + return UInt64().is_optional() + return UInt64() + + @property + def dtype(self) -> FeatureType: + return FeatureType.uint64() + + def aggregate(self) -> ArithmeticAggregation: + return ArithmeticAggregation(self) + + class Int8(ArithmeticFeature, CouldBeEntityFeature, CouldBeModelVersion, CategoricalEncodableFeature): def copy_type(self) -> Int8: if self.constraints and Optional() in self.constraints: diff --git a/aligned/data_source/batch_data_source.py b/aligned/data_source/batch_data_source.py index 4848c6d..1a00753 100644 --- a/aligned/data_source/batch_data_source.py +++ b/aligned/data_source/batch_data_source.py @@ -1,8 +1,10 @@ from __future__ import annotations from copy import copy +from datetime import timedelta, timezone, datetime from typing import TYPE_CHECKING, TypeVar, Any, Callable, Coroutine from dataclasses import dataclass +from uuid import uuid4 from mashumaro.types import SerializableType from aligned.data_file import DataFileReference @@ -13,11 +15,14 @@ from aligned.request.retrival_request import RequestResult, RetrivalRequest from aligned.compiler.feature_factory import FeatureFactory from polars.type_aliases import TimeUnit +import polars as pl + +import logging + +logger = logging.getLogger(__name__) if TYPE_CHECKING: from aligned.retrival_job import RetrivalJob - from datetime import datetime - import polars as pl class BatchDataSourceFactory: @@ -60,6 +65,7 @@ def __init__(self) -> None: FeatureViewReferenceSource, CustomMethodDataSource, ModelSource, + StackSource, ] self.supported_data_sources = {source.type_name: source for source in source_types} @@ -729,6 +735,92 @@ def depends_on(self) -> set[FeatureLocation]: return self.source.depends_on().union(self.right_source.depends_on()) +@dataclass +class StackSource(BatchDataSource): + + top: BatchDataSource + bottom: BatchDataSource + + source_column: str | None = None + + type_name: str = 'stack' + + @property + def source_column_config(self): + from aligned.retrival_job import StackSourceColumn + + if not self.source_column: + return None + + return StackSourceColumn( + top_source_name=self.top.source_id(), + bottom_source_name=self.bottom.source_id(), + source_column=self.source_column, + ) + + def sub_request(self, request: RetrivalRequest, config) -> RetrivalRequest: + return RetrivalRequest( + name=request.name, + location=request.location, + features={feature for feature in request.features if feature.name != config.source_column}, + entities=request.entities, + derived_features={ + feature + for feature in request.derived_features + if not any(dep.name == config.source_column for dep in feature.depending_on) + }, + aggregated_features=request.aggregated_features, + event_timestamp_request=request.event_timestamp_request, + features_to_include=request.features_to_include - {config.source_column}, + ) + + def job_group_key(self) -> str: + return f'stack/{self.top.job_group_key()}/{self.bottom.job_group_key()}' + + async def schema(self) -> dict[str, FeatureType]: + top_schema = await self.top.schema() + bottom_schema = await self.bottom.schema() + + return {**top_schema, **bottom_schema} + + def all_data(self, request: RetrivalRequest, limit: int | None) -> RetrivalJob: + from aligned.retrival_job import StackJob + + config = self.source_column_config + + sub_request = request + + if config: + sub_request = self.sub_request(request, config) + + return ( + StackJob( + top=self.top.all_data(sub_request, int(limit / 2) if limit else None), + bottom=self.bottom.all_data(sub_request, int(limit / 2) if limit else None), + source_column=self.source_column_config, + ) + .with_request([request]) + .derive_features([request]) + ) + + def all_between_dates( + self, request: RetrivalRequest, start_date: datetime, end_date: datetime + ) -> RetrivalJob: + from aligned.retrival_job import StackJob + + top = self.top.all_between_dates(request, start_date, end_date) + bottom = self.bottom.all_between_dates(request, start_date, end_date) + + return StackJob( + top=top, + bottom=bottom, + source_column=self.source_column_config, + ) + + def depends_on(self) -> set[FeatureLocation]: + return self.top.depends_on().union(self.bottom.depends_on()) + + @dataclass class JoinDataSource(BatchDataSource): @@ -846,3 +938,143 @@ def columns_for(self, features: list[Feature]) -> list[str]: def feature_identifier_for(self, columns: list[str]) -> list[str]: reverse_map = {v: k for k, v in self.mapping_keys.items()} return [reverse_map.get(column, column) for column in columns] + + +def data_for_request(request: RetrivalRequest, size: int) -> pl.DataFrame: + from aligned.schemas.constraints import ( + InDomain, + LowerBound, + LowerBoundInclusive, + Unique, + UpperBound, + UpperBoundInclusive, + Optional, + ) + import numpy as np + + needed_features = request.features.union(request.entities) + schema = {feature.name: feature.dtype.polars_type for feature in needed_features} + + exprs = [] + + for feature in needed_features: + dtype = feature.dtype + + choices: list[Any] | None = None + max_value: float | None = None + min_value: float | None = None + + is_optional = False + is_unique = False + + for constraints in feature.constraints or set(): + if isinstance(constraints, InDomain): + choices = constraints.values + elif isinstance(constraints, LowerBound): + min_value = constraints.value + elif isinstance(constraints, LowerBoundInclusive): + min_value = constraints.value + elif isinstance(constraints, UpperBound): + max_value = constraints.value + elif isinstance(constraints, UpperBoundInclusive): + max_value = constraints.value + elif isinstance(constraints, Unique): + is_unique = True + elif isinstance(constraints, Optional): + is_optional = True + + if dtype.is_numeric: + if is_unique: + values = np.arange(0, size, dtype=dtype.pandas_type) + else: + values = np.random.random(size) + + if max_value is not None: + values = values * max_value + + if min_value is not None: + values = values - min_value + elif dtype.is_datetime: + values = [ + datetime.now(tz=timezone.utc) - np.random.random() * timedelta(days=365) for _ in range(size) + ] + else: + if choices: + values = np.random.choice(choices, size=size) + else: + values = np.random.choice(list('abcde'), size=size) + + if is_optional: + values = np.where(np.random.random(size) > 0.5, values, np.NaN) + + exprs.append(pl.lit(values).alias(feature.name)) + + return pl.DataFrame(exprs, schema=schema) + + +class DummyDataBatchSource(BatchDataSource): + """ + The DummyDataBatchSource is a data source that generates random data for a given request. + This can be useful for testing and development purposes. + + It will use the data types and constraints defined on a feature to generate the data. + + ```python + from aligned import feature_view, Int64, String, DummyDataBatchSource + + @feature_view( + source=DummyDataBatchSource(), + ) + class MyView: + passenger_id = Int64().as_entity() + survived = Bool() + age = Float().lower_bound(0).upper_bound(100) + name = String() + sex = String().accepted_values(["male", "female"]) + ``` + """ + + type_name: str = 'dummy_data' + + def job_group_key(self) -> str: + return str(uuid4()) + + @classmethod + def multi_source_features_for( + cls: type[T], facts: RetrivalJob, requests: list[tuple[T, RetrivalRequest]] + ) -> RetrivalJob: + async def random_features_for(facts: RetrivalJob, request: RetrivalRequest) -> pl.LazyFrame: + df = await facts.to_polars() + return data_for_request(request, df.height).lazy() + + return CustomMethodDataSource.from_methods( + features_for=random_features_for, + ).features_for(facts, requests[0][1]) + + def all_data(self, request: RetrivalRequest, limit: int | None = None) -> RetrivalJob: + from aligned import CustomMethodDataSource + + async def all_data(request: RetrivalRequest, limit: int | None = None) -> pl.LazyFrame: + return data_for_request(request, limit or 100).lazy() + + return CustomMethodDataSource.from_methods(all_data=all_data).all_data(request, limit) + + def all_between_dates( + self, request: RetrivalRequest, start_date: datetime, end_date: datetime + ) -> RetrivalJob: + from aligned import CustomMethodDataSource + + async def between_date( + request: RetrivalRequest, start_date: datetime, end_date: datetime + ) -> pl.LazyFrame: + return data_for_request(request, 100).lazy() + + return CustomMethodDataSource.from_methods(all_between_dates=between_date).all_between_dates( + request, start_date, end_date + ) + + async def schema(self) -> dict[str, FeatureType]: + return {} + + def depends_on(self) -> set[FeatureLocation]: + return set() diff --git a/aligned/feature_store.py b/aligned/feature_store.py index 5ad39a0..d7146fd 100644 --- a/aligned/feature_store.py +++ b/aligned/feature_store.py @@ -1534,7 +1534,13 @@ def with_optimised_write(self) -> FeatureViewStore: features_in_models = self.store.model_features_for(self.view.name) return self.select(features_in_models) + def select_columns(self, columns: list[str]) -> RetrivalJob: + return self.all_columns().select_columns(set(columns)) + def all(self, limit: int | None = None) -> RetrivalJob: + return self.all_columns(limit) + + def all_columns(self, limit: int | None = None) -> RetrivalJob: if not isinstance(self.source, RangeFeatureSource): raise ValueError(f'The source ({self.source}) needs to conform to RangeFeatureSource') diff --git a/aligned/feature_view/feature_view.py b/aligned/feature_view/feature_view.py index 62b510a..0809a25 100644 --- a/aligned/feature_view/feature_view.py +++ b/aligned/feature_view/feature_view.py @@ -156,6 +156,15 @@ def compile(self) -> CompiledFeatureView: view = set_location_for_features_in(view, FeatureLocation.feature_view(self.metadata.name)) return FeatureView.compile_with_metadata(view, self.metadata) + def vstack( + self, source: BatchDataSource | FeatureViewWrapper, source_column: str | None = None + ) -> BatchDataSource: + from aligned.data_source.batch_data_source import StackSource + + return StackSource( + top=resolve_source(self), bottom=resolve_source(source), source_column=source_column + ) + def filter( self, name: str, where: Callable[[T], Bool], materialize_source: BatchDataSource | None = None ) -> FeatureViewWrapper[T]: diff --git a/aligned/local/job.py b/aligned/local/job.py index 43faa90..bd4b577 100644 --- a/aligned/local/job.py +++ b/aligned/local/job.py @@ -233,7 +233,9 @@ async def file_transform_polars(self, df: pl.LazyFrame) -> pl.LazyFrame: if optional_features: df = df.with_columns([pl.lit(None).alias(feature.name) for feature in optional_features]) - df = df.rename(mapping=renames) + if renames: + df = df.rename(mapping=renames) + df = decode_timestamps(df, self.request, self.date_formatter) if self.request.aggregated_features: diff --git a/aligned/retrival_job.py b/aligned/retrival_job.py index 0a64b70..41efe99 100644 --- a/aligned/retrival_job.py +++ b/aligned/retrival_job.py @@ -28,6 +28,7 @@ TrainTestValidateSet, ) from aligned.validation.interface import Validator +from aligned.feature_source import WritableFeatureSource if TYPE_CHECKING: from typing import AsyncIterator @@ -36,7 +37,6 @@ from aligned.schemas.derivied_feature import AggregatedFeature, AggregateOver from aligned.schemas.model import EventTrigger, Model from aligned.sources.local import DataFileReference, StorageFileReference - from aligned.feature_source import WritableFeatureSource from aligned.feature_store import FeatureStore @@ -1039,6 +1039,50 @@ def describe(self) -> str: return f'Aggregating over {self.job.describe()}' +@dataclass +class StackSourceColumn: + top_source_name: str + bottom_source_name: str + source_column: str + + +@dataclass +class StackJob(RetrivalJob): + + top: RetrivalJob + bottom: RetrivalJob + + source_column: StackSourceColumn | None + + @property + def request_result(self) -> RequestResult: + return self.top.request_result + + @property + def retrival_requests(self) -> list[RetrivalRequest]: + return RetrivalRequest.combine(self.top.retrival_requests + self.bottom.retrival_requests) + + async def to_lazy_polars(self) -> pl.LazyFrame: + top = await self.top.to_lazy_polars() + bottom = await self.bottom.to_lazy_polars() + + if self.source_column: + top = top.with_columns( + pl.lit(self.source_column.top_source_name).alias(self.source_column.source_column) + ) + bottom = bottom.with_columns( + pl.lit(self.source_column.bottom_source_name).alias(self.source_column.source_column) + ) + + return top.collect().vstack(bottom.collect()).lazy() + + async def to_pandas(self) -> pd.DataFrame: + return (await self.to_lazy_polars()).collect().to_pandas() + + def describe(self) -> str: + return f'Stacking {self.top.describe()} on top of {self.bottom.describe()}' + + @dataclass class JoinAsofJob(RetrivalJob): diff --git a/aligned/schemas/constraints.py b/aligned/schemas/constraints.py index fe89eca..e585abc 100644 --- a/aligned/schemas/constraints.py +++ b/aligned/schemas/constraints.py @@ -1,5 +1,5 @@ from dataclasses import dataclass -from typing import Optional +from typing import Optional as OptionalType from mashumaro.types import SerializableType @@ -29,7 +29,7 @@ class SupportedConstraints: types: dict[str, type[Constraint]] - _shared: Optional['SupportedConstraints'] = None + _shared: OptionalType['SupportedConstraints'] = None def __init__(self) -> None: self.types = {} @@ -44,6 +44,10 @@ def __init__(self) -> None: InDomain, MaxLength, MinLength, + StartsWith, + EndsWith, + Unique, + Regex, ]: self.add(tran_type) diff --git a/aligned/schemas/feature.py b/aligned/schemas/feature.py index f6bd096..f1be91d 100644 --- a/aligned/schemas/feature.py +++ b/aligned/schemas/feature.py @@ -16,6 +16,10 @@ ('int16', pl.Int16), ('int32', pl.Int32), ('int64', pl.Int64), + ('uint8', pl.UInt8), + ('uint16', pl.UInt16), + ('uint32', pl.UInt32), + ('uint64', pl.UInt64), ('float', pl.Float64), ('float', pl.Float32), ('double', pl.Float64), @@ -45,6 +49,10 @@ def is_numeric(self) -> bool: 'int16', 'int32', 'int64', + 'uint8', + 'uint16', + 'uint32', + 'uint64', 'float', 'double', } # Can be represented as an int @@ -156,6 +164,10 @@ def feature_factory(self) -> ff.FeatureFactory: 'int16': ff.Int16(), 'int32': ff.Int32(), 'int64': ff.Int64(), + 'uint8': ff.UInt8(), + 'uint16': ff.UInt16(), + 'uint32': ff.UInt32(), + 'uint64': ff.UInt64(), 'float': ff.Float(), 'double': ff.Float(), 'bool': ff.Bool(), @@ -208,6 +220,22 @@ def from_polars(polars_type: pl.DataType) -> FeatureType: def string() -> FeatureType: return FeatureType(name='string') + @staticmethod + def uint8() -> FeatureType: + return FeatureType(name='uint8') + + @staticmethod + def uint16() -> FeatureType: + return FeatureType(name='uint16') + + @staticmethod + def uint32() -> FeatureType: + return FeatureType(name='uint32') + + @staticmethod + def uint64() -> FeatureType: + return FeatureType(name='uint64') + @staticmethod def int8() -> FeatureType: return FeatureType(name='int8') diff --git a/aligned/schemas/feature_view.py b/aligned/schemas/feature_view.py index ad374d8..465f99e 100644 --- a/aligned/schemas/feature_view.py +++ b/aligned/schemas/feature_view.py @@ -327,7 +327,7 @@ class FeatureViewReferenceSource(BatchDataSource): type_name = 'view_ref' def job_group_key(self) -> str: - return self.view.name + return FeatureLocation.feature_view(self.view.name).identifier async def schema(self) -> dict[str, FeatureType]: if self.view.materialized_source: diff --git a/aligned/schemas/model.py b/aligned/schemas/model.py index 5ecae49..3c063eb 100644 --- a/aligned/schemas/model.py +++ b/aligned/schemas/model.py @@ -201,6 +201,9 @@ class ModelSource(BatchDataSource): type_name: str = 'model_source' + def job_group_key(self) -> str: + return FeatureLocation.model(self.pred_view.name).identifier + async def schema(self) -> dict[str, FeatureType]: if self.model.predictions_view.source: return await self.model.predictions_view.source.schema() diff --git a/aligned/sources/azure_blob_storage.py b/aligned/sources/azure_blob_storage.py index 267ec1e..aeef512 100644 --- a/aligned/sources/azure_blob_storage.py +++ b/aligned/sources/azure_blob_storage.py @@ -447,7 +447,6 @@ async def write_pandas(self, df: pd.DataFrame) -> None: async def write_polars(self, df: pl.LazyFrame) -> None: url = f"az://{self.path}" creds = self.config.read_creds() - df.collect().write_parquet(url, storage_options=creds) df.collect().to_pandas().to_parquet(url, storage_options=creds) @classmethod diff --git a/aligned/sources/local.py b/aligned/sources/local.py index 50be6eb..ee1d2a1 100644 --- a/aligned/sources/local.py +++ b/aligned/sources/local.py @@ -158,6 +158,10 @@ async def to_lazy_polars(self) -> pl.LazyFrame: if not dtype.is_datetime } + if self.mapping_keys: + reverse_mapping = {v: k for k, v in self.mapping_keys.items()} + schema = {reverse_mapping.get(name, name): dtype for name, dtype in schema.items()} + return pl.scan_csv( self.path, dtypes=schema, separator=self.csv_config.seperator, try_parse_dates=True ) diff --git a/aligned/tests/test_transformations.py b/aligned/tests/test_transformations.py index cf9a652..eb8c688 100644 --- a/aligned/tests/test_transformations.py +++ b/aligned/tests/test_transformations.py @@ -135,3 +135,23 @@ class Titanic: data = await Titanic.query().all().to_pandas() # type: ignore assert data['cabin'].isnull().sum() == 0 + + +@pytest.mark.asyncio +async def test_fill_optional_column_bug(titanic_source: CsvFileSource) -> None: + @feature_view(name='test_fill', source=titanic_source) + class TestFill: + + passenger_id = String().as_entity() + + cabin = String().is_optional() + some_new_column = Int32().is_optional().fill_na(0) + some_string = String().is_optional().fill_na('some_string') + + is_male = cabin == 'male' + + df = await TestFill.query().all().to_polars() + + assert df['some_new_column'].isnull().sum() == 0 + assert df['some_string'].isnull().sum() == 0 + assert False From 23d5061c4bfe794963437d792e01c1c4ecaeb585 Mon Sep 17 00:00:00 2001 From: "Mats E. Mollestad" Date: Sun, 7 Apr 2024 19:04:34 +0200 Subject: [PATCH 07/13] Fixed circular import --- aligned/retrival_job.py | 8 +++- test_data/credit_history.csv | 14 +++--- test_data/credit_history_mater.parquet | Bin 988 -> 983 bytes test_data/data/csv_iso.csv | 6 +-- test_data/data/csv_unix.csv | 6 +-- test_data/data/parquet_iso.parquet | Bin 1136 -> 1131 bytes test_data/data/parquet_unix.parquet | Bin 1077 -> 1077 bytes test_data/feature-store.json | 2 +- test_data/loan.csv | 14 +++--- test_data/test_model.csv | 8 ++-- test_data/test_model.parquet | Bin 598 -> 594 bytes test_data/titanic-sets.json | 1 - test_data/titanic-test.csv | 21 --------- test_data/titanic-train.csv | 61 ------------------------- test_data/titanic-validate.csv | 21 --------- 15 files changed, 32 insertions(+), 130 deletions(-) delete mode 100644 test_data/titanic-sets.json delete mode 100644 test_data/titanic-test.csv delete mode 100644 test_data/titanic-train.csv delete mode 100644 test_data/titanic-validate.csv diff --git a/aligned/retrival_job.py b/aligned/retrival_job.py index 41efe99..ec30a70 100644 --- a/aligned/retrival_job.py +++ b/aligned/retrival_job.py @@ -28,11 +28,11 @@ TrainTestValidateSet, ) from aligned.validation.interface import Validator -from aligned.feature_source import WritableFeatureSource if TYPE_CHECKING: from typing import AsyncIterator from aligned.schemas.folder import DatasetMetadata, DatasetStore + from aligned.feature_source import WritableFeatureSource from aligned.schemas.derivied_feature import AggregatedFeature, AggregateOver from aligned.schemas.model import EventTrigger, Model @@ -2258,6 +2258,8 @@ def remove_derived_features(self) -> RetrivalJob: return self.job.remove_derived_features() async def insert_into_output_source(self) -> None: + from aligned.feature_source import WritableFeatureSource + pred_source = self.model.predictions_view.source if not pred_source: raise ValueError('No source defined for predictions view') @@ -2269,6 +2271,8 @@ async def insert_into_output_source(self) -> None: await pred_source.insert(self, [req]) async def upsert_into_output_source(self) -> None: + from aligned.feature_source import WritableFeatureSource + pred_source = self.model.predictions_view.source if not pred_source: raise ValueError('No source defined for predictions view') @@ -2280,6 +2284,8 @@ async def upsert_into_output_source(self) -> None: await pred_source.upsert(self, [req]) async def overwrite_output_source(self) -> None: + from aligned.feature_source import WritableFeatureSource + pred_source = self.model.predictions_view.source if not pred_source: raise ValueError('No source defined for predictions view') diff --git a/test_data/credit_history.csv b/test_data/credit_history.csv index 2965743..9571ca4 100644 --- a/test_data/credit_history.csv +++ b/test_data/credit_history.csv @@ -1,7 +1,7 @@ -dob_ssn,student_loan_due,credit_card_due,event_timestamp,due_sum,bankruptcies -19530219_5179,22328,8419,2020-04-26 18:01:04.746575+00:00,30747,0 -19520816_8737,2515,2944,2020-04-26 18:01:04.746575+00:00,5459,0 -19860413_2537,33000,833,2020-04-26 18:01:04.746575+00:00,33833,0 -19530219_5179,48955,5936,2020-04-27 18:01:04.746575+00:00,54891,0 -19520816_8737,9501,1575,2020-04-27 18:01:04.746575+00:00,11076,0 -19860413_2537,35510,6263,2020-04-27 18:01:04.746575+00:00,41773,0 +event_timestamp,credit_card_due,dob_ssn,due_sum,bankruptcies,student_loan_due +2020-04-26 18:01:04.746575+00:00,8419,19530219_5179,30747,0,22328 +2020-04-26 18:01:04.746575+00:00,2944,19520816_8737,5459,0,2515 +2020-04-26 18:01:04.746575+00:00,833,19860413_2537,33833,0,33000 +2020-04-27 18:01:04.746575+00:00,5936,19530219_5179,54891,0,48955 +2020-04-27 18:01:04.746575+00:00,1575,19520816_8737,11076,0,9501 +2020-04-27 18:01:04.746575+00:00,6263,19860413_2537,41773,0,35510 diff --git a/test_data/credit_history_mater.parquet b/test_data/credit_history_mater.parquet index 466952bed2650ced37d3c47a4f0865e39e7a143b..224e36f5dfe5725c8ef0513e1d89343bf4ff3f67 100644 GIT binary patch delta 283 zcmcb^ex02qz%j^h;sH6%itP@J>^cv2KA706$CaSQ!63>enk5=HkxNS2)X2cX&@A4< z+}NB`P{G2?z{Jov-pEu?P{VYhof1nIquQ>C`xH2rZv>gZuw(bc=VrVKVhjunk}_hu z7)84nCu=dvPL5`j5#mv0l3-8CPl_)t&QoRZkzrt%T*|1#m@)YPqnaVeQn3vzq9P1x zN0?+pc_dXC*d*b~MVZ9d#HKNdfox-B5Ie#mDlz#3V<6`bCVdd7@SN<$JBV;}4V^5_EGJn7RA}Mq=oske1XAH1R2G?IS(R*LGMR}vbMk&>QI2U$ J3=E(U0RVNZNLBy< delta 348 zcmcc4euv#Iz%j^Blua~CG$sZ}F@Q0PD1#sa12em^11|#ugQ2B`nSqI+alDbKu{o!p zf~k>#g`ruzg}JDp%ETE;x*d#a93XurqGp&zNXP(9V0f_efdeCl4nxKEiQ0P1NouDi z?o;60vD<->U58=$#);3(csm%yfa)Y=#7;4aCQa62l%1T+C^2~nzm8ALcng2|vjM@J_|N7r;mS1=vu=m`>c0ZTeM>IVQdl?Q^PJU~KG m?v|Mm#(q^v>5jGx5^MqaIf+HZk~Tn3LJ-4jCI$vjkOBa{_)p^i diff --git a/test_data/data/csv_iso.csv b/test_data/data/csv_iso.csv index 23ad6d2..0d53936 100644 --- a/test_data/data/csv_iso.csv +++ b/test_data/data/csv_iso.csv @@ -1,4 +1,4 @@ id,other,et,timestamp -1,foo,2024-04-02T20:31:42.129+UTC,2024-04-02T20:31:42.129150+UTC -2,bar,2024-04-01T20:31:42.129146+UTC,2024-04-03T20:31:42.129150+UTC -3,baz,2024-03-31T20:31:42.129149+UTC,2024-04-04T20:31:42.129151+UTC +1,foo,2024-04-07T17:03:43.252072+UTC,2024-04-07T17:03:43.252290+UTC +2,bar,2024-04-06T17:03:43.252286+UTC,2024-04-08T17:03:43.252290+UTC +3,baz,2024-04-05T17:03:43.252289+UTC,2024-04-09T17:03:43.252290+UTC diff --git a/test_data/data/csv_unix.csv b/test_data/data/csv_unix.csv index 1a26852..395a9bc 100644 --- a/test_data/data/csv_unix.csv +++ b/test_data/data/csv_unix.csv @@ -1,4 +1,4 @@ id,other,et,timestamp -1,foo,1712089902129000,1712089902129150 -2,bar,1712003502129146,1712176302129150 -3,baz,1711917102129149,1712262702129151 +1,foo,1712509423252072,1712509423252290 +2,bar,1712423023252286,1712595823252290 +3,baz,1712336623252289,1712682223252290 diff --git a/test_data/data/parquet_iso.parquet b/test_data/data/parquet_iso.parquet index 875d30453f84177ec7bdb8749eaa98635d81879f..04fcf4e9a2301227ec07d7c573175b46a42c8ef8 100644 GIT binary patch delta 272 zcmeys@tR}8RMrcOqD_pGxfm4#%tH*#tqhED3chQSOb&TCPuX-ObnuIqCitKK!z%#8fs)| z05{a4322(7pAy65I3_8*X^d(eEDVw|VjQAzVhju*)+9zaa|x5^CPt`m2aD*m$#WQs z81GHyXHwSs#40MrpmvQ}2B&kxI@m-d7}VxXj$x7!h1esu03;-~gH?=8jYF(ub1&0l FMgX%OHCF%t delta 293 zcmaFO@quH))cRA5q795OqHLlJV9X-QAjrVL%${5$&A`B5WME{XYXC$>Aw~vP#)eiV zMtX)umfE2q&T@M43=D=w@(dh?CT3?O~#&${!0$@o|9!XUOHc1Jl)Dlr9F*dPZ zjA9!Y)s`@U?6|-vT7s}|vM8gtCaUR%rUnoTlo%L{n}CFg70_Hmkl18@Mk$s>jB0I@ za~WlMH!z9;-6$y|*2W^bX!3N%BE}b!xtNr7{;-OQF{oW(mci)^u}N&A5)5i9SY<#a cih@jJVGvsd5)|9SD#oV9A=bCKgXu9N0G`-66951J diff --git a/test_data/data/parquet_unix.parquet b/test_data/data/parquet_unix.parquet index e2d0a768052b7a92fb0815530689af0fa16958d7..fe01b657b08f40d0eb4c7a4d7c801935fe448910 100644 GIT binary patch delta 58 zcmdnWv6W-OTG3B8R=0??;7$j>cLAF`^R>8H+l7hR8StZ#X#vDC4xb{rQ^B IT#VYx0D2u1`2YX_ delta 58 zcmdnWv6W-OTG9Ch|CB`87=GE$e82~!|K2}%=$FXEL&l>2pfdmdyUUmZW&X#@TX}Eh IV$@~^0Nn2u;s5{u diff --git a/test_data/feature-store.json b/test_data/feature-store.json index da12c37..91ff6fb 100644 --- a/test_data/feature-store.json +++ b/test_data/feature-store.json @@ -1 +1 @@ -{"metadata": {"created_at": "2024-04-02T20:31:44.459387", "name": "feature_store_location.py", "repo_url": null, "github_url": null}, "feature_views": [{"name": "titanic", "source": {"mapping_keys": {"PassengerId": "passenger_id", "Age": "age", "Sex": "sex", "Survived": "survived", "SibSp": "sibsp", "UpdatedAt": "updated_at"}, "type_name": "csv", "path": "test_data/titanic_scd_data.csv", "csv_config": {"seperator": ",", "compression": "infer", "should_write_index": false}, "formatter": {"date_format": "%Y-%m-%dT%H:%M:%S%.f+%Z", "time_unit": null, "time_zone": null, "name": "string_form"}, "expected_schema": null}, "entities": [{"name": "passenger_id", "dtype": {"name": "int32"}, "description": null, "tags": null, "constraints": null}], "features": [{"name": "age", "dtype": {"name": "float"}, "description": "A float as some have decimals", "tags": null, "constraints": [{"name": "upper_bound_inc", "value": 100.0}, {"name": "lower_bound_inc", "value": 0.0}]}, {"name": "name", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": null}, {"name": "updated_at", "dtype": {"name": "datetime-UTC"}, "description": null, "tags": null, "constraints": null}, {"name": "sex", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": [{"name": "in_domain", "values": ["male", "female"]}]}, {"name": "cabin", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": null}, {"name": "survived", "dtype": {"name": "bool"}, "description": "If the passenger survived", "tags": null, "constraints": null}, {"name": "sibsp", "dtype": {"name": "int32"}, "description": "Number of siblings on titanic", "tags": null, "constraints": [{"name": "upper_bound_inc", "value": 20.0}, {"name": "lower_bound_inc", "value": 0.0}]}], "derived_features": [{"name": "name_embedding", "dtype": {"name": "embedding"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "name", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "word_vectoriser", "dtype": {"name": "embedding"}, "key": "name", "model": {"name": "gensim", "model_name": "glove-wiki-gigaword-50", "config": {"to_lowercase": false, "deaccent": false, "encoding": "utf8", "errors": "strict"}, "loaded_model": null}}, "depth": 1}, {"name": "has_siblings", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sibsp", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "int32"}}], "transformation": {"name": "not-equals", "dtype": {"name": "bool"}, "key": "sibsp", "value": {"name": "int", "value": 0}}, "depth": 1}, {"name": "double_sibsp", "dtype": {"name": "float"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sibsp", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "int32"}}], "transformation": {"name": "mul_val", "dtype": {"name": "float"}, "key": "sibsp", "value": {"name": "int", "value": 2}}, "depth": 1}, {"name": "is_female", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sex", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "equals", "dtype": {"name": "bool"}, "key": "sex", "value": {"name": "string", "value": "female"}}, "depth": 1}, {"name": "square_sibsp", "dtype": {"name": "float"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sibsp", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "int32"}}], "transformation": {"name": "mul", "dtype": {"name": "float"}, "front": "sibsp", "behind": "sibsp"}, "depth": 1}, {"name": "is_male", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sex", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "equals", "dtype": {"name": "bool"}, "key": "sex", "value": {"name": "string", "value": "male"}}, "depth": 1}, {"name": "is_mr", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "name", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "contains", "dtype": {"name": "bool"}, "key": "name", "value": "Mr."}, "depth": 1}], "tags": null, "description": "Some features from the titanic dataset", "aggregated_features": [], "event_timestamp": {"name": "updated_at", "ttl": null, "description": null, "tags": null, "dtype": {"name": "datetime-UTC"}}, "stream_data_source": {"mapping_keys": {}, "name": "redis", "topic_name": "titanic_stream", "config": {"env_var": "REDIS_URL"}, "record_coder": {"coder_type": "json", "key": "json"}}, "application_source": null, "materialized_source": null, "acceptable_freshness": null, "unacceptable_freshness": null, "event_triggers": null, "contacts": null, "indexes": [{"location": {"name": "titanic", "location": "feature_view"}, "vector": {"name": "name_embedding", "dtype": {"name": "embedding"}, "description": null, "tags": null, "constraints": null}, "vector_dim": 50, "metadata": [{"name": "age", "dtype": {"name": "float"}, "description": "A float as some have decimals", "tags": null, "constraints": [{"name": "upper_bound_inc", "value": 100.0}, {"name": "lower_bound_inc", "value": 0.0}]}, {"name": "sex", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": [{"name": "in_domain", "values": ["male", "female"]}]}], "storage": {"type_name": "redis", "config": {"env_var": "REDIS_URL"}, "name": "name_embedding_index", "initial_cap": 10000, "distance_metric": "COSINE", "index_alogrithm": "FLAT", "embedding_type": "FLOAT32"}, "entities": [{"name": "passenger_id", "dtype": {"name": "int32"}, "description": null, "tags": null, "constraints": null}]}]}, {"name": "titanic_parquet", "source": {"mapping_keys": {}, "type_name": "parquet", "path": "test_data/titanic.parquet", "config": {"engine": "auto", "compression": "snappy", "should_write_index": false}, "date_formatter": {"name": "noop"}}, "entities": [{"name": "passenger_id", "dtype": {"name": "int32"}, "description": null, "tags": null, "constraints": null}], "features": [{"name": "age", "dtype": {"name": "float"}, "description": "A float as some have decimals", "tags": null, "constraints": [{"name": "upper_bound_inc", "value": 100.0}, {"name": "lower_bound_inc", "value": 0.0}]}, {"name": "name", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": null}, {"name": "sex", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": [{"name": "in_domain", "values": ["male", "female"]}]}, {"name": "cabin", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": null}, {"name": "survived", "dtype": {"name": "bool"}, "description": "If the passenger survived", "tags": null, "constraints": null}, {"name": "sibsp", "dtype": {"name": "int32"}, "description": "Number of siblings on titanic", "tags": null, "constraints": [{"name": "upper_bound_inc", "value": 20.0}, {"name": "lower_bound_inc", "value": 0.0}]}], "derived_features": [{"name": "is_male", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sex", "location": {"name": "titanic_parquet", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "equals", "dtype": {"name": "bool"}, "key": "sex", "value": {"name": "string", "value": "male"}}, "depth": 1}, {"name": "has_siblings", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sibsp", "location": {"name": "titanic_parquet", "location": "feature_view"}, "dtype": {"name": "int32"}}], "transformation": {"name": "not-equals", "dtype": {"name": "bool"}, "key": "sibsp", "value": {"name": "int", "value": 0}}, "depth": 1}, {"name": "is_mr", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "name", "location": {"name": "titanic_parquet", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "contains", "dtype": {"name": "bool"}, "key": "name", "value": "Mr."}, "depth": 1}, {"name": "is_female", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sex", "location": {"name": "titanic_parquet", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "equals", "dtype": {"name": "bool"}, "key": "sex", "value": {"name": "string", "value": "female"}}, "depth": 1}], "tags": null, "description": "Some features from the titanic dataset", "aggregated_features": [], "event_timestamp": null, "stream_data_source": null, "application_source": null, "materialized_source": null, "acceptable_freshness": null, "unacceptable_freshness": null, "event_triggers": null, "contacts": null, "indexes": []}], "combined_feature_views": [], "models": [{"name": "titanic", "features": {"default_version": "default", "versions": {"default": [{"name": "age", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "float"}}, {"name": "sibsp", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "int32"}}, {"name": "has_siblings", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "bool"}}, {"name": "is_male", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "bool"}}]}}, "predictions_view": {"entities": [], "features": [{"name": "probability", "dtype": {"name": "float"}, "description": "The probability of target named will_survive being 'True'.", "tags": null, "constraints": null}], "derived_features": [{"name": "will_survive", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "probability", "location": {"name": "titanic", "location": "model"}, "dtype": {"name": "float"}}], "transformation": {"name": "map_arg_max", "dtype": {"name": "bool"}, "column_mappings": {"probability": {"name": "bool", "value": true}}}, "depth": 1}], "model_version_column": null, "event_timestamp": null, "source": null, "application_source": null, "stream_source": null, "regression_targets": [], "classification_targets": [{"estimating": {"name": "survived", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "bool"}}, "feature": {"name": "will_survive", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null}, "on_ground_truth_event": null, "event_trigger": null, "class_probabilities": [{"outcome": {"name": "bool", "value": true}, "feature": {"name": "probability", "dtype": {"name": "float"}, "description": null, "tags": null, "constraints": null}}], "confidence": null}], "recommendation_targets": [], "acceptable_freshness": 86400.0, "unacceptable_freshness": 172800.0}, "description": "A model predicting if a passenger will survive", "contacts": null, "tags": null, "dataset_store": null, "exposed_at_url": null, "exposed_model": null}], "enrichers": []} +{"metadata": {"created_at": "2024-04-07T17:03:46.274215", "name": "feature_store_location.py", "repo_url": null, "github_url": null}, "feature_views": [{"name": "titanic_parquet", "source": {"mapping_keys": {}, "type_name": "parquet", "path": "test_data/titanic.parquet", "config": {"engine": "auto", "compression": "snappy", "should_write_index": false}, "date_formatter": {"name": "noop"}}, "entities": [{"name": "passenger_id", "dtype": {"name": "int32"}, "description": null, "tags": null, "constraints": null}], "features": [{"name": "age", "dtype": {"name": "float"}, "description": "A float as some have decimals", "tags": null, "constraints": [{"name": "upper_bound_inc", "value": 100.0}, {"name": "lower_bound_inc", "value": 0.0}]}, {"name": "sex", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": [{"name": "in_domain", "values": ["male", "female"]}]}, {"name": "sibsp", "dtype": {"name": "int32"}, "description": "Number of siblings on titanic", "tags": null, "constraints": [{"name": "upper_bound_inc", "value": 20.0}, {"name": "lower_bound_inc", "value": 0.0}]}, {"name": "survived", "dtype": {"name": "bool"}, "description": "If the passenger survived", "tags": null, "constraints": null}, {"name": "cabin", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": null}, {"name": "name", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": null}], "derived_features": [{"name": "is_female", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sex", "location": {"name": "titanic_parquet", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "equals", "dtype": {"name": "bool"}, "key": "sex", "value": {"name": "string", "value": "female"}}, "depth": 1}, {"name": "is_male", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sex", "location": {"name": "titanic_parquet", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "equals", "dtype": {"name": "bool"}, "key": "sex", "value": {"name": "string", "value": "male"}}, "depth": 1}, {"name": "has_siblings", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sibsp", "location": {"name": "titanic_parquet", "location": "feature_view"}, "dtype": {"name": "int32"}}], "transformation": {"name": "not-equals", "dtype": {"name": "bool"}, "key": "sibsp", "value": {"name": "int", "value": 0}}, "depth": 1}, {"name": "is_mr", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "name", "location": {"name": "titanic_parquet", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "contains", "dtype": {"name": "bool"}, "key": "name", "value": "Mr."}, "depth": 1}], "tags": null, "description": "Some features from the titanic dataset", "aggregated_features": [], "event_timestamp": null, "stream_data_source": null, "application_source": null, "materialized_source": null, "acceptable_freshness": null, "unacceptable_freshness": null, "event_triggers": null, "contacts": null, "indexes": []}, {"name": "titanic", "source": {"mapping_keys": {"PassengerId": "passenger_id", "Age": "age", "Sex": "sex", "Survived": "survived", "SibSp": "sibsp", "UpdatedAt": "updated_at"}, "type_name": "csv", "path": "test_data/titanic_scd_data.csv", "csv_config": {"seperator": ",", "compression": "infer", "should_write_index": false}, "formatter": {"date_format": "%Y-%m-%dT%H:%M:%S%.f+%Z", "time_unit": null, "time_zone": null, "name": "string_form"}, "expected_schema": null}, "entities": [{"name": "passenger_id", "dtype": {"name": "int32"}, "description": null, "tags": null, "constraints": null}], "features": [{"name": "age", "dtype": {"name": "float"}, "description": "A float as some have decimals", "tags": null, "constraints": [{"name": "upper_bound_inc", "value": 100.0}, {"name": "lower_bound_inc", "value": 0.0}]}, {"name": "sex", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": [{"name": "in_domain", "values": ["male", "female"]}]}, {"name": "sibsp", "dtype": {"name": "int32"}, "description": "Number of siblings on titanic", "tags": null, "constraints": [{"name": "upper_bound_inc", "value": 20.0}, {"name": "lower_bound_inc", "value": 0.0}]}, {"name": "survived", "dtype": {"name": "bool"}, "description": "If the passenger survived", "tags": null, "constraints": null}, {"name": "updated_at", "dtype": {"name": "datetime-UTC"}, "description": null, "tags": null, "constraints": null}, {"name": "cabin", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": null}, {"name": "name", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": null}], "derived_features": [{"name": "square_sibsp", "dtype": {"name": "float"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sibsp", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "int32"}}], "transformation": {"name": "mul", "dtype": {"name": "float"}, "front": "sibsp", "behind": "sibsp"}, "depth": 1}, {"name": "double_sibsp", "dtype": {"name": "float"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sibsp", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "int32"}}], "transformation": {"name": "mul_val", "dtype": {"name": "float"}, "key": "sibsp", "value": {"name": "int", "value": 2}}, "depth": 1}, {"name": "has_siblings", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sibsp", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "int32"}}], "transformation": {"name": "not-equals", "dtype": {"name": "bool"}, "key": "sibsp", "value": {"name": "int", "value": 0}}, "depth": 1}, {"name": "is_mr", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "name", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "contains", "dtype": {"name": "bool"}, "key": "name", "value": "Mr."}, "depth": 1}, {"name": "is_male", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sex", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "equals", "dtype": {"name": "bool"}, "key": "sex", "value": {"name": "string", "value": "male"}}, "depth": 1}, {"name": "name_embedding", "dtype": {"name": "embedding"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "name", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "word_vectoriser", "dtype": {"name": "embedding"}, "key": "name", "model": {"name": "gensim", "model_name": "glove-wiki-gigaword-50", "config": {"to_lowercase": false, "deaccent": false, "encoding": "utf8", "errors": "strict"}, "loaded_model": null}}, "depth": 1}, {"name": "is_female", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sex", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "equals", "dtype": {"name": "bool"}, "key": "sex", "value": {"name": "string", "value": "female"}}, "depth": 1}], "tags": null, "description": "Some features from the titanic dataset", "aggregated_features": [], "event_timestamp": {"name": "updated_at", "ttl": null, "description": null, "tags": null, "dtype": {"name": "datetime-UTC"}}, "stream_data_source": {"mapping_keys": {}, "name": "redis", "topic_name": "titanic_stream", "config": {"env_var": "REDIS_URL"}, "record_coder": {"coder_type": "json", "key": "json"}}, "application_source": null, "materialized_source": null, "acceptable_freshness": null, "unacceptable_freshness": null, "event_triggers": null, "contacts": null, "indexes": [{"location": {"name": "titanic", "location": "feature_view"}, "vector": {"name": "name_embedding", "dtype": {"name": "embedding"}, "description": null, "tags": null, "constraints": null}, "vector_dim": 50, "metadata": [{"name": "age", "dtype": {"name": "float"}, "description": "A float as some have decimals", "tags": null, "constraints": [{"name": "upper_bound_inc", "value": 100.0}, {"name": "lower_bound_inc", "value": 0.0}]}, {"name": "sex", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": [{"name": "in_domain", "values": ["male", "female"]}]}], "storage": {"type_name": "redis", "config": {"env_var": "REDIS_URL"}, "name": "name_embedding_index", "initial_cap": 10000, "distance_metric": "COSINE", "index_alogrithm": "FLAT", "embedding_type": "FLOAT32"}, "entities": [{"name": "passenger_id", "dtype": {"name": "int32"}, "description": null, "tags": null, "constraints": null}]}]}], "combined_feature_views": [], "models": [{"name": "titanic", "features": {"default_version": "default", "versions": {"default": [{"name": "age", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "float"}}, {"name": "sibsp", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "int32"}}, {"name": "has_siblings", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "bool"}}, {"name": "is_male", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "bool"}}]}}, "predictions_view": {"entities": [], "features": [{"name": "probability", "dtype": {"name": "float"}, "description": "The probability of target named will_survive being 'True'.", "tags": null, "constraints": null}], "derived_features": [{"name": "will_survive", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "probability", "location": {"name": "titanic", "location": "model"}, "dtype": {"name": "float"}}], "transformation": {"name": "map_arg_max", "dtype": {"name": "bool"}, "column_mappings": {"probability": {"name": "bool", "value": true}}}, "depth": 1}], "model_version_column": null, "event_timestamp": null, "source": null, "application_source": null, "stream_source": null, "regression_targets": [], "classification_targets": [{"estimating": {"name": "survived", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "bool"}}, "feature": {"name": "will_survive", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null}, "on_ground_truth_event": null, "event_trigger": null, "class_probabilities": [{"outcome": {"name": "bool", "value": true}, "feature": {"name": "probability", "dtype": {"name": "float"}, "description": null, "tags": null, "constraints": null}}], "confidence": null}], "recommendation_targets": [], "acceptable_freshness": 86400.0, "unacceptable_freshness": 172800.0}, "description": "A model predicting if a passenger will survive", "contacts": null, "tags": null, "dataset_store": null, "exposed_at_url": null, "exposed_model": null}], "enrichers": []} diff --git a/test_data/loan.csv b/test_data/loan.csv index c3bc530..b5f71ff 100644 --- a/test_data/loan.csv +++ b/test_data/loan.csv @@ -1,7 +1,7 @@ -loan_id,loan_status,personal_income,loan_amount,event_timestamp -10000,True,59000,35000,2020-04-26 18:01:04.746575+00:00 -10001,False,9600,1000,2020-04-26 18:01:04.746575+00:00 -10002,True,9600,5500,2020-04-26 18:01:04.746575+00:00 -10000,True,65500,35000,2020-04-27 18:01:04.746575+00:00 -10001,True,54400,35000,2020-04-27 18:01:04.746575+00:00 -10002,True,9900,2500,2020-04-27 18:01:04.746575+00:00 +personal_income,event_timestamp,loan_status,loan_amount,loan_id +59000,2020-04-26 18:01:04.746575+00:00,True,35000,10000 +9600,2020-04-26 18:01:04.746575+00:00,False,1000,10001 +9600,2020-04-26 18:01:04.746575+00:00,True,5500,10002 +65500,2020-04-27 18:01:04.746575+00:00,True,35000,10000 +54400,2020-04-27 18:01:04.746575+00:00,True,35000,10001 +9900,2020-04-27 18:01:04.746575+00:00,True,2500,10002 diff --git a/test_data/test_model.csv b/test_data/test_model.csv index 9bfa2bd..decf326 100644 --- a/test_data/test_model.csv +++ b/test_data/test_model.csv @@ -1,7 +1,7 @@ -some_id,a -1,10 -2,14 -3,20 +a,some_id +10,1 +14,2 +20,3 1,1 2,2 3,3 diff --git a/test_data/test_model.parquet b/test_data/test_model.parquet index 3d63e93e325d66955310baa075a964e3afa02562..e71f3c3fc09b3bfa0928ed8ca20493bfa0da04b8 100644 GIT binary patch delta 231 zcmcb{a*1Vv95)LCCnEzNC%XfK$i!l$i9RwA5hjob^F&SM`dN$&k}_f(q8VZg3?SAl zMhH`sMbgHDr6@nYM3jX=ltqc}uK01?AnMh1oe#~?!hiBKoO delta 207 zcmcb_a*btz95*uqCnEzBC%Xd!%fuq(iQY2YA|Meyh=`_={TxOHNf|K?(F`#L1`ulw zBZMi+B5C8nQk0)xBFe%b%A(37!IYT-VK62#h;c~DNb0B+$V_Y$QQ%+@+rT6$!l2f| zBm>l@%D^Tm!I&t@B*r0DAeNyvXEFz)8fOoaJ_wjwOg3kn&+h8z=;)N;IQcGPh-x~J k?VgkE=xA%pAi)-ppOaWrEU6>IzyL%H3mF+00vv-30n)D}N&o-= diff --git a/test_data/titanic-sets.json b/test_data/titanic-sets.json deleted file mode 100644 index e8dbef8..0000000 --- a/test_data/titanic-sets.json +++ /dev/null @@ -1 +0,0 @@ -{"raw_data": [], "train_test": [], "train_test_validation": [{"id": "titanic_test", "name": null, "request_result": {"entities": [{"name": "passenger_id", "dtype": {"name": "int32"}, "description": null, "tags": null, "constraints": null}], "features": [{"name": "age", "dtype": {"name": "float"}, "description": "A float as some have decimals", "tags": null, "constraints": [{"name": "upper_bound_inc", "value": 100.0}, {"name": "lower_bound_inc", "value": 0.0}]}, {"name": "name", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": [{"name": "optional"}]}, {"name": "has_siblings", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sibsp", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "int32"}}], "transformation": {"name": "not-equals", "dtype": {"name": "bool"}, "key": "sibsp", "value": {"name": "int", "value": 0}}, "depth": 1}, {"name": "sex", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": [{"name": "optional"}, {"name": "in_domain", "values": ["male", "female"]}]}, {"name": "is_female", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sex", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "equals", "dtype": {"name": "bool"}, "key": "sex", "value": {"name": "string", "value": "female"}}, "depth": 1}, {"name": "cabin", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": [{"name": "optional"}]}, {"name": "is_male", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sex", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "equals", "dtype": {"name": "bool"}, "key": "sex", "value": {"name": "string", "value": "male"}}, "depth": 1}, {"name": "survived", "dtype": {"name": "bool"}, "description": "If the passenger survived", "tags": null, "constraints": null}, {"name": "sibsp", "dtype": {"name": "int32"}, "description": "Number of siblings on titanic", "tags": null, "constraints": [{"name": "upper_bound_inc", "value": 20.0}, {"name": "optional"}, {"name": "lower_bound_inc", "value": 0.0}]}, {"name": "is_mr", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "name", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "contains", "dtype": {"name": "bool"}, "key": "name", "value": "Mr."}, "depth": 1}], "event_timestamp": null}, "train_dataset": {"mapping_keys": {}, "type_name": "csv", "path": "test_data/titanic-train.csv", "csv_config": {"seperator": ",", "compression": "infer", "should_write_index": false}, "formatter": {"date_format": "%Y-%m-%dT%H:%M:%S%.f+%Z", "time_unit": null, "time_zone": null, "name": "string_form"}, "expected_schema": null}, "test_dataset": {"mapping_keys": {}, "type_name": "csv", "path": "test_data/titanic-test.csv", "csv_config": {"seperator": ",", "compression": "infer", "should_write_index": false}, "formatter": {"date_format": "%Y-%m-%dT%H:%M:%S%.f+%Z", "time_unit": null, "time_zone": null, "name": "string_form"}, "expected_schema": null}, "validation_dataset": {"mapping_keys": {}, "type_name": "csv", "path": "test_data/titanic-validate.csv", "csv_config": {"seperator": ",", "compression": "infer", "should_write_index": false}, "formatter": {"date_format": "%Y-%m-%dT%H:%M:%S%.f+%Z", "time_unit": null, "time_zone": null, "name": "string_form"}, "expected_schema": null}, "train_size_fraction": 0.6, "test_size_fraction": 0.20000000000000007, "validate_size_fraction": 0.19999999999999996, "target": ["survived"], "description": null, "tags": null}], "active_learning": []} diff --git a/test_data/titanic-test.csv b/test_data/titanic-test.csv deleted file mode 100644 index c23e49e..0000000 --- a/test_data/titanic-test.csv +++ /dev/null @@ -1,21 +0,0 @@ -age,name,passenger_id,has_siblings,sex,is_female,cabin,is_male,survived,sibsp,is_mr -22.0,"Sirayanian, Mr. Orsen",61,False,male,False,,True,False,0,True -38.0,"Icard, Miss. Amelie",62,False,female,True,B28,False,True,0,False -45.0,"Harris, Mr. Henry Birkhardt",63,True,male,False,C83,True,False,1,True -4.0,"Skoog, Master. Harald",64,True,male,False,,True,False,3,False -,"Stewart, Mr. Albert A",65,False,male,False,,True,False,0,True -,"Moubarek, Master. Gerios",66,True,male,False,,True,True,1,False -29.0,"Nye, Mrs. (Elizabeth Ramell)",67,False,female,True,F33,False,True,0,True -19.0,"Crease, Mr. Ernest James",68,False,male,False,,True,False,0,True -17.0,"Andersson, Miss. Erna Alexandra",69,True,female,True,,False,True,4,False -26.0,"Kink, Mr. Vincenz",70,True,male,False,,True,False,2,True -32.0,"Jenkin, Mr. Stephen Curnow",71,False,male,False,,True,False,0,True -16.0,"Goodwin, Miss. Lillian Amy",72,True,female,True,,False,False,5,False -21.0,"Hood, Mr. Ambrose Jr",73,False,male,False,,True,False,0,True -26.0,"Chronopoulos, Mr. Apostolos",74,True,male,False,,True,False,1,True -32.0,"Bing, Mr. Lee",75,False,male,False,,True,True,0,True -25.0,"Moen, Mr. Sigurd Hansen",76,False,male,False,F G73,True,False,0,True -,"Staneff, Mr. Ivan",77,False,male,False,,True,False,0,True -,"Moutal, Mr. Rahamin Haim",78,False,male,False,,True,False,0,True -0.83,"Caldwell, Master. Alden Gates",79,False,male,False,,True,True,0,False -30.0,"Dowdell, Miss. Elizabeth",80,False,female,True,,False,True,0,False diff --git a/test_data/titanic-train.csv b/test_data/titanic-train.csv deleted file mode 100644 index c3a8cf0..0000000 --- a/test_data/titanic-train.csv +++ /dev/null @@ -1,61 +0,0 @@ -age,name,passenger_id,has_siblings,sex,is_female,cabin,is_male,survived,sibsp,is_mr -22.0,"Braund, Mr. Owen Harris",1,True,male,False,,True,False,1,True -38.0,"Cumings, Mrs. John Bradley (Florence Briggs Thayer)",2,True,female,True,C85,False,True,1,True -26.0,"Heikkinen, Miss. Laina",3,False,female,True,,False,True,0,False -35.0,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",4,True,female,True,C123,False,True,1,True -35.0,"Allen, Mr. William Henry",5,False,male,False,,True,False,0,True -,"Moran, Mr. James",6,False,male,False,,True,False,0,True -54.0,"McCarthy, Mr. Timothy J",7,False,other,False,E46,False,False,0,True -2.0,"Palsson, Master. Gosta Leonard",8,True,male,False,,True,False,3,False -27.0,"Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)",9,False,female,True,,False,True,0,True -14.0,"Nasser, Mrs. Nicholas (Adele Achem)",10,True,female,True,,False,True,1,True -4.0,"Sandstrom, Miss. Marguerite Rut",11,True,female,True,G6,False,True,1,False -58.0,"Bonnell, Miss. Elizabeth",12,False,female,True,C103,False,True,0,False -20.0,"Saundercock, Mr. William Henry",13,False,male,False,,True,False,0,True -39.0,"Andersson, Mr. Anders Johan",14,True,male,False,,True,False,1,True -14.0,"Vestrom, Miss. Hulda Amanda Adolfina",15,False,female,True,,False,False,0,False -55.0,"Hewlett, Mrs. (Mary D Kingcome) ",16,False,female,True,,False,True,0,True -2.0,"Rice, Master. Eugene",17,True,male,False,,True,False,4,False -,"Williams, Mr. Charles Eugene",18,False,male,False,,True,True,0,True -31.0,"Vander Planke, Mrs. Julius (Emelia Maria Vandemoortele)",19,True,female,True,,False,False,1,True -,"Masselmani, Mrs. Fatima",20,False,female,True,,False,True,0,True -35.0,"Fynney, Mr. Joseph J",21,False,male,False,,True,False,0,True -34.0,"Beesley, Mr. Lawrence",22,False,male,False,D56,True,True,0,True -15.0,"McGowan, Miss. Anna ""Annie""",23,False,female,True,,False,True,0,False -28.0,"Sloper, Mr. William Thompson",24,False,male,False,A6,True,True,0,True -8.0,"Palsson, Miss. Torborg Danira",25,True,female,True,,False,False,3,False -38.0,"Asplund, Mrs. Carl Oscar (Selma Augusta Emilia Johansson)",26,True,female,True,,False,True,1,True -,"Emir, Mr. Farred Chehab",27,False,male,False,,True,False,0,True -19.0,"Fortune, Mr. Charles Alexander",28,True,male,False,C23 C25 C27,True,False,3,True -,"O'Dwyer, Miss. Ellen ""Nellie""",29,False,female,True,,False,True,0,False -,"Todoroff, Mr. Lalio",30,False,male,False,,True,False,0,True -40.0,"Uruchurtu, Don. Manuel E",31,False,male,False,,True,False,0,False -,"Spencer, Mrs. William Augustus (Marie Eugenie)",32,True,female,True,B78,False,True,1,True -,"Glynn, Miss. Mary Agatha",33,False,female,True,,False,True,0,False -66.0,"Wheadon, Mr. Edward H",34,False,male,False,,True,False,0,True -28.0,"Meyer, Mr. Edgar Joseph",35,True,male,False,,True,False,1,True -42.0,"Holverson, Mr. Alexander Oskar",36,True,male,False,,True,False,1,True -,"Mamee, Mr. Hanna",37,False,male,False,,True,True,0,True -21.0,"Cann, Mr. Ernest Charles",38,False,male,False,,True,False,0,True -18.0,"Vander Planke, Miss. Augusta Maria",39,True,female,True,,False,False,2,False -14.0,"Nicola-Yarred, Miss. Jamila",40,True,female,True,,False,True,1,False -40.0,"Ahlin, Mrs. Johan (Johanna Persdotter Larsson)",41,True,female,True,,False,False,1,True -27.0,"Turpin, Mrs. William John Robert (Dorothy Ann Wonnacott)",42,True,female,True,,False,False,1,True -,"Kraeff, Mr. Theodor",43,False,male,False,,True,False,0,True -3.0,"Laroche, Miss. Simonne Marie Anne Andree",44,True,female,True,,False,True,1,False -19.0,"Devaney, Miss. Margaret Delia",45,False,female,True,,False,True,0,False -,"Rogers, Mr. William John",46,False,male,False,,True,False,0,True -,"Lennon, Mr. Denis",47,True,male,False,,True,False,1,True -,"O'Driscoll, Miss. Bridget",48,False,female,True,,False,True,0,False -,"Samaan, Mr. Youssef",49,True,male,False,,True,False,2,True -18.0,"Arnold-Franchi, Mrs. Josef (Josefine Franchi)",50,True,female,True,,False,False,1,True -7.0,"Panula, Master. Juha Niilo",51,True,male,False,,True,False,4,False -21.0,"Nosworthy, Mr. Richard Cater",52,False,male,False,,True,False,0,True -49.0,"Harper, Mrs. Henry Sleeper (Myna Haxtun)",53,True,female,True,D33,False,True,1,True -29.0,"Faunthorpe, Mrs. Lizzie (Elizabeth Anne Wilkinson)",54,True,female,True,,False,True,1,True -65.0,"Ostby, Mr. Engelhart Cornelius",55,False,male,False,B30,True,False,0,True -,"Woolner, Mr. Hugh",56,False,male,False,C52,True,True,0,True -21.0,"Rugg, Miss. Emily",57,False,female,True,,False,True,0,False -28.5,"Novel, Mr. Mansouer",58,False,male,False,,True,False,0,True -5.0,"West, Miss. Constance Mirium",59,True,female,True,,False,True,1,False -11.0,"Goodwin, Master. William Frederick",60,True,male,False,,True,False,5,False diff --git a/test_data/titanic-validate.csv b/test_data/titanic-validate.csv deleted file mode 100644 index 9078b2e..0000000 --- a/test_data/titanic-validate.csv +++ /dev/null @@ -1,21 +0,0 @@ -age,name,passenger_id,has_siblings,sex,is_female,cabin,is_male,survived,sibsp,is_mr -22.0,"Waelens, Mr. Achille",81,False,male,False,,True,False,0,True -29.0,"Sheerlinck, Mr. Jan Baptist",82,False,male,False,,True,True,0,True -,"McDermott, Miss. Brigdet Delia",83,False,female,True,,False,True,0,False -28.0,"Carrau, Mr. Francisco M",84,False,male,False,,True,False,0,True -17.0,"Ilett, Miss. Bertha",85,False,female,True,,False,True,0,False -33.0,"Backstrom, Mrs. Karl Alfred (Maria Mathilda Gustafsson)",86,True,female,True,,False,True,3,True -16.0,"Ford, Mr. William Neal",87,True,male,False,,True,False,1,True -,"Slocovski, Mr. Selman Francis",88,False,male,False,,True,False,0,True -23.0,"Fortune, Miss. Mabel Helen",89,True,female,True,C23 C25 C27,False,True,3,False -24.0,"Celotti, Mr. Francesco",90,False,male,False,,True,False,0,True -29.0,"Christmann, Mr. Emil",91,False,male,False,,True,False,0,True -20.0,"Andreasson, Mr. Paul Edvin",92,False,male,False,,True,False,0,True -46.0,"Chaffee, Mr. Herbert Fuller",93,True,male,False,E31,True,False,1,True -26.0,"Dean, Mr. Bertram Frank",94,True,male,False,,True,False,1,True -59.0,"Coxon, Mr. Daniel",95,False,male,False,,True,False,0,True -,"Shorney, Mr. Charles Joseph",96,False,male,False,,True,False,0,True -71.0,"Goldschmidt, Mr. George B",97,False,male,False,A5,True,False,0,True -23.0,"Greenfield, Mr. William Bertram",98,False,male,False,D10 D12,True,True,0,True -34.0,"Doling, Mrs. John T (Ada Julia Bone)",99,False,female,True,,False,True,0,True -34.0,"Kantor, Mr. Sinai",100,True,male,False,,True,False,1,True From bd2a12ab2fd4b07a9103e886660d5d20be0e529c Mon Sep 17 00:00:00 2001 From: "Mats E. Mollestad" Date: Tue, 9 Apr 2024 09:25:24 +0200 Subject: [PATCH 08/13] A lot of goodies --- aligned/compiler/model.py | 7 +- aligned/data_source/batch_data_source.py | 31 +++++ aligned/exposed_model/interface.py | 144 ++++++++++++++++++++--- aligned/feature_store.py | 28 +++++ aligned/feature_view/feature_view.py | 11 +- aligned/retrival_job.py | 123 +++++++++++++++++-- aligned/sources/local.py | 39 +++++- 7 files changed, 351 insertions(+), 32 deletions(-) diff --git a/aligned/compiler/model.py b/aligned/compiler/model.py index 0f9d0e7..30a5d97 100644 --- a/aligned/compiler/model.py +++ b/aligned/compiler/model.py @@ -280,7 +280,12 @@ def decorator(cls: Type[T]) -> ModelContractWrapper[T]: ) used_name = name or str(cls.__name__).lower() - used_description = description or str(cls.__doc__) + + used_description = None + if description: + used_description = description + elif cls.__doc__: + used_description = str(cls.__doc__) used_exposed_at_url = exposed_at_url if exposed_model: diff --git a/aligned/data_source/batch_data_source.py b/aligned/data_source/batch_data_source.py index 1a00753..8054ef7 100644 --- a/aligned/data_source/batch_data_source.py +++ b/aligned/data_source/batch_data_source.py @@ -803,6 +803,37 @@ def all_data(self, request: RetrivalRequest, limit: int | None) -> RetrivalJob: .derive_features([request]) ) + @classmethod + def multi_source_features_for( + cls, facts: RetrivalJob, requests: list[tuple[StackSource, RetrivalRequest]] + ) -> RetrivalJob: + sources = {source.job_group_key() for source, _ in requests} + if len(sources) != 1: + raise ValueError(f'Only able to load one {requests} at a time') + + source = requests[0][0] + if not isinstance(source, cls): + raise ValueError(f'Only {cls} is supported, recived: {source}') + + return source.features_for(facts, requests[0][1]) + + def features_for(self, facts: RetrivalJob, request: RetrivalRequest) -> RetrivalJob: + from aligned.local.job import FileFactualJob + from aligned.retrival_job import StackJob + + config = self.source_column_config + sub_request = request + + if config: + sub_request = self.sub_request(request, config) + + top = self.top.features_for(facts, sub_request) + bottom = self.bottom.features_for(facts, sub_request) + + stack_job = StackJob(top=top, bottom=bottom, source_column=config) + + return FileFactualJob(stack_job, [request], facts) + def all_between_dates( self, request: RetrivalRequest, start_date: datetime, end_date: datetime ) -> RetrivalJob: diff --git a/aligned/exposed_model/interface.py b/aligned/exposed_model/interface.py index 16fb260..31dcfb3 100644 --- a/aligned/exposed_model/interface.py +++ b/aligned/exposed_model/interface.py @@ -2,7 +2,7 @@ import polars as pl from typing import TYPE_CHECKING -from dataclasses import dataclass +from dataclasses import dataclass, field from aligned.retrival_job import RetrivalJob from aligned.schemas.codable import Codable from mashumaro.types import SerializableType @@ -112,20 +112,30 @@ def ollama_embedding( def in_memory_mlflow( model_name: str, model_alias: str, - prediction_column: str, - model_version_column: str | None = None, - predicted_at_column: str | None = None, model_contract_version_tag: str | None = None, ): return InMemMLFlowAlias( model_name=model_name, model_alias=model_alias, - prediction_column=prediction_column, - predicted_at_column=predicted_at_column or 'predicted_at', - model_version_column=model_version_column or 'model_version', model_contract_version_tag=model_contract_version_tag, ) + @staticmethod + def mlflow_server( + host: str, + model_name: str, + model_alias: str, + model_contract_version_tag: str | None = None, + timeout: int = 30, + ): + return MLFlowServer( + host=host, + model_name=model_name, + model_alias=model_alias, + model_contract_version_tag=model_contract_version_tag, + timeout=timeout, + ) + @dataclass class EnitityPredictor(ExposedModel): @@ -313,10 +323,6 @@ class InMemMLFlowAlias(ExposedModel): model_name: str model_alias: str - prediction_column: str - predicted_at_column: str - model_version_column: str - model_contract_version_tag: str | None model_type: str = 'latest_mlflow' @@ -341,7 +347,8 @@ def contract_version(self, model_version) -> str: if self.model_contract_version_tag: if self.model_contract_version_tag not in model_version.tags: # noqa raise ValueError( - f"Model contract version tag {self.model_contract_version_tag} not found in model version tags" + f"Model contract version tag {self.model_contract_version_tag} not " + 'found in model version tags' ) else: version = model_version.tags[self.model_contract_version_tag] @@ -362,6 +369,14 @@ async def run_polars(self, values: RetrivalJob, store: ModelFeatureStore) -> pl. import polars as pl from datetime import datetime, timezone + pred_label = list(store.model.predictions_view.labels())[0] + pred_at = store.model.predictions_view.event_timestamp + model_version_column = store.model.predictions_view.model_version_column + mv = None + + if model_version_column: + mv = self.get_model_version() + model_uri = f"models:/{self.model_name}@{self.model_alias}" mv = self.get_model_version() @@ -369,12 +384,109 @@ async def run_polars(self, values: RetrivalJob, store: ModelFeatureStore) -> pl. job = store.features_for(values) df = await job.to_polars() - features = job.request_result.feature_columns + features = job.request_result.feature_columns predictions = model.predict(df[features]) + if pred_at: + df = df.with_columns( + pl.lit(datetime.now(timezone.utc)).alias(pred_at.name), + ) + + if mv and model_version_column: + df = df.with_columns( + pl.lit(mv.run_id).alias(model_version_column.name), + ) + + return df.with_columns( + pl.Series(name=pred_label.name, values=predictions), + ) + + +@dataclass +class MLFlowServer(ExposedModel): + + host: str + + model_name: str + model_alias: str + model_contract_version_tag: str | None + + timeout: int = field(default=30) + + model_type: str = 'latest_mlflow' + + @property + def exposed_at_url(self) -> str | None: + return self.host + + @property + def as_markdown(self) -> str: + return f"""Using the latest MLFlow model: `{self.model_name}`.""" + + def get_model_version(self): + from mlflow.tracking import MlflowClient + + mlflow_client = MlflowClient() + return mlflow_client.get_model_version_by_alias(self.model_name, self.model_alias) + + def contract_version(self, model_version) -> str: + version = 'default' + if self.model_contract_version_tag: + if self.model_contract_version_tag not in model_version.tags: # noqa + raise ValueError( + f"Model contract version tag {self.model_contract_version_tag} not " + 'found in model version tags' + ) + else: + version = model_version.tags[self.model_contract_version_tag] + return version + + async def needed_features(self, store: ModelFeatureStore) -> list[FeatureReferance]: + mv = self.get_model_version() + version = self.contract_version(mv) + return store.feature_references_for(version) + + async def needed_entities(self, store: ModelFeatureStore) -> set[Feature]: + mv = self.get_model_version() + version = self.contract_version(mv) + return store.using_version(version).needed_entities() + + async def run_polars(self, values: RetrivalJob, store: ModelFeatureStore) -> pl.DataFrame: + import polars as pl + from httpx import AsyncClient + from datetime import datetime, timezone + + pred_label = list(store.model.predictions_view.labels())[0] + pred_at = store.model.predictions_view.event_timestamp + model_version_column = store.model.predictions_view.model_version_column + mv = None + + if model_version_column: + mv = self.get_model_version() + + job = store.features_for(values) + df = await job.to_polars() + + features = job.request_result.feature_columns + + async with AsyncClient(timeout=self.timeout) as client: + response = await client.post( + f'{self.host}/invocations', json={'dataframe_records': df[features].to_dicts()} + ) + response.raise_for_status() + preds = response.json()['predictions'] + + if pred_at: + df = df.with_columns( + pl.lit(datetime.now(timezone.utc)).alias(pred_at.name), + ) + + if mv and model_version_column: + df = df.with_columns( + pl.lit(mv.run_id).alias(model_version_column.name), + ) + return df.with_columns( - pl.Series(name=self.prediction_column, values=predictions), - pl.lit(mv.run_id).alias(self.model_version_column), - pl.lit(datetime.now(timezone.utc)).alias(self.predicted_at_column), + pl.Series(name=pred_label.name, values=preds), ) diff --git a/aligned/feature_store.py b/aligned/feature_store.py index d7146fd..177e405 100644 --- a/aligned/feature_store.py +++ b/aligned/feature_store.py @@ -917,6 +917,34 @@ async def upsert_into( else: raise ValueError(f'The source {type(source)} do not support writes') + async def overwrite( + self, location: FeatureLocation | str, values: ConvertableToRetrivalJob | RetrivalJob + ) -> None: + if isinstance(location, str): + used_location = FeatureLocation.from_string(location) + elif isinstance(location, FeatureLocation): + used_location = location + else: + raise ValueError(f'Location was of an unsupported type: {type(location)}') + + source: FeatureSource | BatchDataSource = self.feature_source + + if isinstance(source, BatchFeatureSource): + source = source.sources[used_location.identifier] + + write_request = self.write_request_for(used_location) + + if not isinstance(values, RetrivalJob): + values = RetrivalJob.from_convertable(values, write_request) + + if isinstance(source, WritableFeatureSource): + await source.overwrite(values, [write_request]) + elif isinstance(source, DataFileReference): + df = (await values.to_lazy_polars()).select(write_request.all_returned_columns) + await source.write_polars(df) + else: + raise ValueError(f'The source {type(source)} do not support writes') + @dataclass class ModelFeatureStore: diff --git a/aligned/feature_view/feature_view.py b/aligned/feature_view/feature_view.py index 0809a25..da3cff1 100644 --- a/aligned/feature_view/feature_view.py +++ b/aligned/feature_view/feature_view.py @@ -236,11 +236,20 @@ def join_asof( right_on=right_on, ) - def with_source(self, named: str, source: BatchDataSource | FeatureViewWrapper) -> FeatureViewWrapper[T]: + def with_source( + self, + named: str, + source: BatchDataSource | FeatureViewWrapper, + materialized_source: BatchDataSource | None = None, + ) -> FeatureViewWrapper[T]: meta = copy.deepcopy(self.metadata) meta.name = named meta.source = resolve_source(source) + meta.materialized_source = None + + if materialized_source: + meta.materialized_source = resolve_source(materialized_source) return FeatureViewWrapper(meta, self.view) diff --git a/aligned/retrival_job.py b/aligned/retrival_job.py index ec30a70..1f24801 100644 --- a/aligned/retrival_job.py +++ b/aligned/retrival_job.py @@ -183,17 +183,19 @@ class TrainTestValidateJob: target_columns: set[str] + should_filter_out_null_targets: bool = True + @property def train(self) -> SupervisedJob: - return SupervisedJob(self.train_job, self.target_columns) + return SupervisedJob(self.train_job, self.target_columns, self.should_filter_out_null_targets) @property def test(self) -> SupervisedJob: - return SupervisedJob(self.test_job, self.target_columns) + return SupervisedJob(self.test_job, self.target_columns, self.should_filter_out_null_targets) @property def validate(self) -> SupervisedJob: - return SupervisedJob(self.validate_job, self.target_columns) + return SupervisedJob(self.validate_job, self.target_columns, self.should_filter_out_null_targets) def store_dataset( self, @@ -262,6 +264,9 @@ async def update_metadata() -> None: ) +SplitterCallable = Callable[[pl.DataFrame], tuple[pl.DataFrame, pl.DataFrame]] + + @dataclass class SupervisedJob: @@ -334,20 +339,62 @@ def train_test(self, train_size: float) -> TrainTestJob: target_columns=self.target_columns, ) - def train_test_validate(self, train_size: float, validate_size: float) -> TrainTestValidateJob: + def train_test_validate( + self, + train_size: float, + validate_size: float, + splitter_factory: Callable[[SplitConfig], SplitterCallable] | None = None, + ) -> TrainTestValidateJob: - cached_job = InMemoryCacheJob(self.job) + job_to_cache = self.job + if self.should_filter_out_null_targets: + job_to_cache = self.job.polars_method(lambda df: df.drop_nulls(self.target_columns)) event_timestamp = self.job.request_result.event_timestamp - test_ratio_start = train_size - validate_ratio_start = test_ratio_start + validate_size + leftover_size = 1 - train_size + + train_config = SplitConfig( + left_size=train_size, + right_size=leftover_size, + event_timestamp_column=event_timestamp, + target_columns=list(self.target_columns), + ) + test_config = SplitConfig( + left_size=(leftover_size - validate_size) / leftover_size, + right_size=validate_size / leftover_size, + event_timestamp_column=event_timestamp, + target_columns=list(self.target_columns), + ) + + if splitter_factory: + train_splitter = splitter_factory(train_config) + validate_splitter = splitter_factory(test_config) + else: + + def train_splitter(df: pl.DataFrame) -> tuple[pl.DataFrame, pl.DataFrame]: + return ( + subset_polars(df, 0, train_config.left_size, event_timestamp), + subset_polars(df, train_config.left_size, 1, event_timestamp), + ) + + def validate_splitter(df: pl.DataFrame) -> tuple[pl.DataFrame, pl.DataFrame]: + return ( + subset_polars(df, 0, test_config.left_size, event_timestamp), + subset_polars(df, test_config.left_size, 1, event_timestamp), + ) + + train_job, rem_job = job_to_cache.split(train_splitter, (train_size, 1 - train_size)) + test_job, validate_job = rem_job.split( + validate_splitter, (1 - train_size - validate_size, validate_size) + ) return TrainTestValidateJob( - train_job=SubsetJob(cached_job, 0, train_size, event_timestamp), - test_job=SubsetJob(cached_job, train_size, validate_ratio_start, event_timestamp), - validate_job=SubsetJob(cached_job, validate_ratio_start, 1, event_timestamp), + train_job=train_job, + test_job=test_job, + validate_job=validate_job, target_columns=self.target_columns, + should_filter_out_null_targets=self.should_filter_out_null_targets, ) def with_subfeatures(self) -> SupervisedJob: @@ -551,6 +598,15 @@ def return_invalid(self, should_return_validation: bool | None = None) -> Retriv should_return_validation = False return ReturnInvalidJob(self, should_return_validation) + def split( + self, + splitter: Callable[[pl.DataFrame], tuple[pl.DataFrame, pl.DataFrame]], + dataset_sizes: tuple[float, float], + ) -> tuple[RetrivalJob, RetrivalJob]: + + job = InMemorySplitCacheJob(self, splitter, dataset_sizes, 0) + return (job, job.with_dataset_index(1)) + def join( self, job: RetrivalJob, method: str, left_on: str | list[str], right_on: str | list[str] ) -> RetrivalJob: @@ -979,6 +1035,51 @@ async def to_pandas(self) -> pd.DataFrame: return (await self.to_lazy_polars()).collect().to_pandas() +@dataclass +class SplitConfig: + + left_size: float + right_size: float + + event_timestamp_column: str | None = None + target_columns: list[str] | None = None + + +@dataclass +class InMemorySplitCacheJob(RetrivalJob, ModificationJob): + + job: RetrivalJob + + splitter: Callable[[pl.DataFrame], tuple[pl.DataFrame, pl.DataFrame]] + dataset_sizes: tuple[float, float] + dataset_index: int + + cached_data: tuple[pl.DataFrame, pl.DataFrame] | None = None + + @property + def fraction(self) -> float: + return self.dataset_sizes[self.dataset_index] + + async def to_lazy_polars(self) -> pl.LazyFrame: + cache = self + + if isinstance(self.job, InMemorySplitCacheJob): + cache = self.job + + if cache.cached_data is not None: + return cache.cached_data[self.dataset_index].lazy() + + data = (await self.job.to_lazy_polars()).collect() + self.cached_data = self.splitter(data) + return self.cached_data[self.dataset_index].lazy() + + def with_dataset_index(self, dataset_index: int) -> InMemorySplitCacheJob: + return InMemorySplitCacheJob(self, self.splitter, self.dataset_sizes, dataset_index, self.cached_data) + + async def to_pandas(self) -> pd.DataFrame: + return (await self.to_lazy_polars()).collect().to_pandas() + + @dataclass class InMemoryCacheJob(RetrivalJob, ModificationJob): @@ -1074,7 +1175,7 @@ async def to_lazy_polars(self) -> pl.LazyFrame: pl.lit(self.source_column.bottom_source_name).alias(self.source_column.source_column) ) - return top.collect().vstack(bottom.collect()).lazy() + return top.select(top.columns).collect().vstack(bottom.select(top.columns).collect()).lazy() async def to_pandas(self) -> pd.DataFrame: return (await self.to_lazy_polars()).collect().to_pandas() diff --git a/aligned/sources/local.py b/aligned/sources/local.py index ee1d2a1..92a7fee 100644 --- a/aligned/sources/local.py +++ b/aligned/sources/local.py @@ -126,6 +126,20 @@ def job_group_key(self) -> str: def __hash__(self) -> int: return hash(self.job_group_key()) + def to_markdown(self) -> str: + return f"""### CSV File + +*Renames*: {self.mapping_keys} + +*File*: {self.path} + +*CSV Config*: {self.csv_config} + +*Datetime Formatter*: {self.formatter} + +[Go to file]({self.path}) +""" + async def read_pandas(self) -> pd.DataFrame: try: return pd.read_csv( @@ -181,7 +195,7 @@ async def upsert(self, job: RetrivalJob, requests: list[RetrivalRequest]) -> Non potential_timestamps.add(request.event_timestamp.as_feature()) for feature in potential_timestamps: - if feature.dtype.name == 'datetime': + if feature.dtype.is_datetime: data = data.with_columns(self.formatter.encode_polars(feature.name)) if self.mapping_keys: @@ -206,7 +220,7 @@ async def insert(self, job: RetrivalJob, requests: list[RetrivalRequest]) -> Non data = (await job.to_lazy_polars()).select(request.all_returned_columns) for feature in request.features: - if feature.dtype.name == 'datetime': + if feature.dtype.is_datetime: data = data.with_columns(self.formatter.encode_polars(feature.name)) if self.mapping_keys: @@ -221,6 +235,25 @@ async def insert(self, job: RetrivalJob, requests: list[RetrivalRequest]) -> Non await self.write_polars(write_df) + async def overwrite(self, job: RetrivalJob, requests: list[RetrivalRequest]) -> None: + + if len(requests) != 1: + raise ValueError('Csv files only support one write request as of now') + + request = requests[0] + + data = (await job.to_lazy_polars()).select(request.all_returned_columns) + for feature in request.features: + if feature.dtype.is_datetime: + data = data.with_columns(self.formatter.encode_polars(feature.name)) + + if self.mapping_keys: + columns = self.feature_identifier_for(data.columns) + data = data.rename(dict(zip(data.columns, columns))) + + logger.error(f'Overwriting {self.path} with {data.columns}') + await self.write_polars(data) + async def write_pandas(self, df: pd.DataFrame) -> None: create_parent_dir(self.path) df.to_csv( @@ -269,7 +302,7 @@ def all_data(self, request: RetrivalRequest, limit: int | None) -> RetrivalJob: formatter=self.formatter, expected_schema={ feat.name: feat.dtype - for feat in request.features + for feat in request.features.union(request.entities) if not (feat.constraints and optional_constraint in feat.constraints) and not feat.name.isdigit() }, From a03b3ef5c1fb31dbc28d734707d534984fadb2cd Mon Sep 17 00:00:00 2001 From: "Mats E. Mollestad" Date: Sun, 14 Apr 2024 01:06:59 +0200 Subject: [PATCH 09/13] A lot of improvments --- aligned/__init__.py | 3 +- aligned/cli.py | 133 +----------------- aligned/compiler/aggregation_factory.py | 54 +++---- aligned/compiler/feature_factory.py | 42 +++--- aligned/compiler/model.py | 37 ++++- aligned/compiler/vector_index_factory.py | 2 +- aligned/exposed_model/interface.py | 19 +-- aligned/exposed_model/ollama.py | 20 ++- aligned/feature_store.py | 70 +++++---- aligned/feature_view/combined_view.py | 4 +- aligned/feature_view/feature_view.py | 14 +- .../feature_view/tests/test_brest_cancer.py | 4 +- .../test_brest_cancer_event_timestamp.py | 6 +- .../feature_view/tests/test_combined_view.py | 6 +- aligned/jobs/tests/test_derived_job.py | 6 +- aligned/local/tests/test_jobs.py | 4 +- aligned/retrival_job.py | 50 ++++++- aligned/schemas/constraint_types.py | 13 ++ aligned/schemas/constraints.py | 18 +++ aligned/schemas/derivied_feature.py | 14 +- aligned/schemas/feature.py | 19 ++- aligned/schemas/folder.py | 25 +++- aligned/schemas/model.py | 14 +- aligned/schemas/target.py | 10 +- aligned/server.py | 12 +- aligned/sources/local.py | 16 +-- aligned/sources/tests/test_parquet.py | 10 +- aligned/sources/tests/test_psql.py | 8 +- aligned/tests/test_cached_parquet.py | 4 +- aligned/tests/test_model_target.py | 18 +-- aligned/tests/test_models_as_feature.py | 8 +- aligned/tests/test_source_validation.py | 7 +- aligned/tests/test_train_test_validate_set.py | 8 +- aligned/tests/test_transformations.py | 5 +- .../tests/test_pandera_validator.py | 8 +- aligned/worker.py | 4 +- conftest.py | 47 ++++--- pyproject.toml | 4 - setup.cfg | 2 +- 39 files changed, 381 insertions(+), 367 deletions(-) create mode 100644 aligned/schemas/constraint_types.py diff --git a/aligned/__init__.py b/aligned/__init__.py index 7e56e40..3f646cf 100644 --- a/aligned/__init__.py +++ b/aligned/__init__.py @@ -22,7 +22,7 @@ from aligned.compiler.model import model_contract, FeatureInputVersions from aligned.data_source.stream_data_source import HttpStreamSource from aligned.data_source.batch_data_source import CustomMethodDataSource -from aligned.feature_store import FeatureStore +from aligned.feature_store import ContractStore, FeatureStore from aligned.feature_view import feature_view, combined_feature_view, check_schema from aligned.schemas.text_vectoriser import EmbeddingModel from aligned.sources.kafka import KafkaConfig @@ -36,6 +36,7 @@ from aligned.schemas.feature import FeatureLocation __all__ = [ + 'ContractStore', 'FeatureStore', 'feature_view', # Batch Data sources diff --git a/aligned/cli.py b/aligned/cli.py index cdac83c..f04820e 100644 --- a/aligned/cli.py +++ b/aligned/cli.py @@ -3,7 +3,6 @@ import os import sys from contextlib import suppress -from dataclasses import dataclass from functools import wraps from pathlib import Path from typing import Any @@ -12,8 +11,6 @@ from pytz import utc # type: ignore from aligned.compiler.repo_reader import RepoReader, RepoReference -from aligned.schemas.codable import Codable -from aligned.schemas.feature import Feature from aligned.worker import StreamWorker from collections.abc import Callable from datetime import datetime @@ -263,132 +260,6 @@ async def serve_worker_command(repo_path: str, worker_path: str, env_file: str) await worker.start() -@dataclass -class CategoricalFeatureSummary(Codable): - missing_percentage: float - unique_values: int - values: list[str] - value_count: list[int] - - -@dataclass -class NumericFeatureSummary(Codable): - missing_percentage: float - mean: float | None - median: float | None - std: float | None - lowest: float | None - highests: float | None - histogram_count: list[int] - histogram_splits: list[float] - - -@dataclass -class ProfilingResult(Codable): - numeric_features: dict[str, NumericFeatureSummary] - categorical_features: dict[str, CategoricalFeatureSummary] - - -# Should add some way of profiling models, not feature views. -# Or maybe both -@cli.command('profile') -@coro -@click.option( - '--repo-path', - default='.', - help='The path to the repo', -) -@click.option( - '--reference-file', - default='feature_store_location.py', - help='The file defining where to read the feature store from', -) -@click.option('--output', default='profiling-result.json') -@click.option('--dataset-size', default=10000) -@click.option( - '--env-file', - default='.env', - help='The path to env variables', -) -async def profile(repo_path: str, reference_file: str, env_file: str, output: str, dataset_size: int) -> None: - import numpy as np - from pandas import DataFrame - - from aligned import FeatureStore - - # Make sure modules can be read, and that the env is set - dir = Path.cwd() if repo_path == '.' else Path(repo_path).absolute() - sys.path.append(str(dir)) - env_file_path = dir / env_file - load_envs(env_file_path) - - online_store: FeatureStore = await FeatureStore.from_reference_at_path(repo_path, reference_file) - feature_store = online_store.offline_store() - - results = ProfilingResult(numeric_features={}, categorical_features={}) - - for feature_view_name in sorted(feature_store.feature_views.keys()): - click.echo(f'Profiling: {feature_view_name}') - feature_view = feature_store.feature_view(feature_view_name) - data_set: DataFrame = feature_view.all(limit=dataset_size).to_pandas() - - all_features: list[Feature] = list(feature_view.view.features) + list( - feature_view.view.derived_features - ) - for feature in all_features: - - data_slice = data_set[feature.name] - - reference = f'{feature_view_name}:{feature.name}' - - if (not feature.dtype.is_numeric) or feature.dtype.name == 'bool': - unique_values = data_slice.unique() - filter_unique_nan_values = [ - value - for value in unique_values - if not ( - str(value).lower() == 'nan' or str(value).lower() == 'nat' or str(value) == '' - ) - ] - - results.categorical_features[reference] = CategoricalFeatureSummary( - missing_percentage=(data_slice.isna() | data_slice.isnull()).sum() / data_slice.shape[0], - unique_values=unique_values.shape[0], - values=[str(value) for value in filter_unique_nan_values], - value_count=data_slice.value_counts()[filter_unique_nan_values].tolist(), - ) - else: - description = data_slice.describe() - n_bins = np.min([50, len(data_slice.unique())]) - max_value = description['max'] - min_value = description['min'] - - if np.isnan(max_value): - continue - - width = (max_value - min_value) / n_bins - - if width <= 0: - histogram = [description['count']] - cuts = [] - else: - cuts = np.arange(start=min_value, stop=max_value + width, step=width) - histogram, _ = np.histogram(data_slice.loc[~data_slice.isna()].values, cuts) - - results.numeric_features[reference] = NumericFeatureSummary( - missing_percentage=(data_slice.isna() | data_slice.isnull()).sum() / data_slice.shape[0], - mean=description['mean'] if not np.isnan(description['mean']) else None, - median=description['50%'] if not np.isnan(description['50%']) else None, - std=description['std'] if not np.isnan(description['std']) else None, - lowest=description['min'] if not np.isnan(description['min']) else None, - highests=description['max'] if not np.isnan(description['max']) else None, - histogram_count=list(histogram), - histogram_splits=list(cuts), - ) - - Path(output).write_bytes(results.to_json().encode('utf-8')) - - @cli.command('create-indexes') @coro @click.option( @@ -407,7 +278,7 @@ async def profile(repo_path: str, reference_file: str, env_file: str, output: st help='The path to env variables', ) async def create_indexes(repo_path: str, reference_file: str, env_file: str) -> None: - from aligned import FeatureStore, FileSource + from aligned import ContractStore, FileSource setup_logger() @@ -432,7 +303,7 @@ async def create_indexes(repo_path: str, reference_file: str, env_file: str) -> click.echo(f'No repo file found at {dir}. Returning without creating indexes') return - feature_store = FeatureStore.from_definition(repo_def) + feature_store = ContractStore.from_definition(repo_def) for feature_view_name in sorted(feature_store.feature_views.keys()): view = feature_store.feature_views[feature_view_name] diff --git a/aligned/compiler/aggregation_factory.py b/aligned/compiler/aggregation_factory.py index d701bff..0f8aa3c 100644 --- a/aligned/compiler/aggregation_factory.py +++ b/aligned/compiler/aggregation_factory.py @@ -6,7 +6,7 @@ from aligned.compiler.feature_factory import ( AggregationTransformationFactory, FeatureFactory, - FeatureReferance, + FeatureReference, String, TransformationFactory, ) @@ -15,8 +15,8 @@ def aggregate_over( - group_by: list[FeatureReferance], - time_column: FeatureReferance | None, + group_by: list[FeatureReference], + time_column: FeatureReference | None, time_window: timedelta | None, every_interval: timedelta | None, offset_interval: timedelta | None, @@ -54,12 +54,12 @@ def compile(self) -> Transformation: from aligned.schemas.transformation import ConcatStringAggregation return ConcatStringAggregation( - key=self.feature.feature_referance().name, + key=self.feature.feature_reference().name, separator=self.separator or '', ) def aggregate_over( - self, group_by: list[FeatureReferance], time_column: FeatureReferance | None + self, group_by: list[FeatureReference], time_column: FeatureReference | None ) -> AggregateOver: return aggregate_over( group_by, time_column, self.time_window, self.every_interval, self.offset_interval, None @@ -82,11 +82,11 @@ def compile(self) -> Transformation: from aligned.schemas.transformation import SumAggregation return SumAggregation( - key=self.feature.feature_referance().name, + key=self.feature.feature_reference().name, ) def aggregate_over( - self, group_by: list[FeatureReferance], time_column: FeatureReferance | None + self, group_by: list[FeatureReference], time_column: FeatureReference | None ) -> AggregateOver: return aggregate_over( group_by, time_column, self.time_window, self.every_interval, self.offset_interval, None @@ -109,11 +109,11 @@ def compile(self) -> Transformation: from aligned.schemas.transformation import MeanAggregation return MeanAggregation( - key=self.feature.feature_referance().name, + key=self.feature.feature_reference().name, ) def aggregate_over( - self, group_by: list[FeatureReferance], time_column: FeatureReferance | None + self, group_by: list[FeatureReference], time_column: FeatureReference | None ) -> AggregateOver: return aggregate_over( group_by, time_column, self.time_window, self.every_interval, self.offset_interval, None @@ -136,11 +136,11 @@ def compile(self) -> Transformation: from aligned.schemas.transformation import MinAggregation return MinAggregation( - key=self.feature.feature_referance().name, + key=self.feature.feature_reference().name, ) def aggregate_over( - self, group_by: list[FeatureReferance], time_column: FeatureReferance | None + self, group_by: list[FeatureReference], time_column: FeatureReference | None ) -> AggregateOver: return aggregate_over( group_by, time_column, self.time_window, self.every_interval, self.offset_interval, None @@ -163,11 +163,11 @@ def compile(self) -> Transformation: from aligned.schemas.transformation import MaxAggregation return MaxAggregation( - key=self.feature.feature_referance().name, + key=self.feature.feature_reference().name, ) def aggregate_over( - self, group_by: list[FeatureReferance], time_column: FeatureReferance | None + self, group_by: list[FeatureReference], time_column: FeatureReference | None ) -> AggregateOver: return aggregate_over( group_by, time_column, self.time_window, self.every_interval, self.offset_interval, None @@ -190,11 +190,11 @@ def compile(self) -> Transformation: from aligned.schemas.transformation import CountAggregation return CountAggregation( - key=self.feature.feature_referance().name, + key=self.feature.feature_reference().name, ) def aggregate_over( - self, group_by: list[FeatureReferance], time_column: FeatureReferance | None + self, group_by: list[FeatureReference], time_column: FeatureReference | None ) -> AggregateOver: return aggregate_over( group_by, time_column, self.time_window, self.every_interval, self.offset_interval, None @@ -217,11 +217,11 @@ def compile(self) -> Transformation: from aligned.schemas.transformation import CountDistinctAggregation return CountDistinctAggregation( - key=self.feature.feature_referance().name, + key=self.feature.feature_reference().name, ) def aggregate_over( - self, group_by: list[FeatureReferance], time_column: FeatureReferance | None + self, group_by: list[FeatureReference], time_column: FeatureReference | None ) -> AggregateOver: return aggregate_over( group_by, time_column, self.time_window, self.every_interval, self.offset_interval, None @@ -244,11 +244,11 @@ def compile(self) -> Transformation: from aligned.schemas.transformation import StdAggregation return StdAggregation( - key=self.feature.feature_referance().name, + key=self.feature.feature_reference().name, ) def aggregate_over( - self, group_by: list[FeatureReferance], time_column: FeatureReferance | None + self, group_by: list[FeatureReference], time_column: FeatureReference | None ) -> AggregateOver: return aggregate_over( group_by, time_column, self.time_window, self.every_interval, self.offset_interval, None @@ -271,11 +271,11 @@ def compile(self) -> Transformation: from aligned.schemas.transformation import VarianceAggregation return VarianceAggregation( - key=self.feature.feature_referance().name, + key=self.feature.feature_reference().name, ) def aggregate_over( - self, group_by: list[FeatureReferance], time_column: FeatureReferance | None + self, group_by: list[FeatureReference], time_column: FeatureReference | None ) -> AggregateOver: return aggregate_over( group_by, time_column, self.time_window, self.every_interval, self.offset_interval, None @@ -298,11 +298,11 @@ def compile(self) -> Transformation: from aligned.schemas.transformation import MedianAggregation return MedianAggregation( - key=self.feature.feature_referance().name, + key=self.feature.feature_reference().name, ) def aggregate_over( - self, group_by: list[FeatureReferance], time_column: FeatureReferance | None + self, group_by: list[FeatureReference], time_column: FeatureReference | None ) -> AggregateOver: return aggregate_over( group_by, time_column, self.time_window, self.every_interval, self.offset_interval, None @@ -326,12 +326,12 @@ def compile(self) -> Transformation: from aligned.schemas.transformation import PercentileAggregation return PercentileAggregation( - key=self.feature.feature_referance().name, + key=self.feature.feature_reference().name, percentile=self.percentile, ) def aggregate_over( - self, group_by: list[FeatureReferance], time_column: FeatureReferance | None + self, group_by: list[FeatureReference], time_column: FeatureReference | None ) -> AggregateOver: return aggregate_over( group_by, time_column, self.time_window, self.every_interval, self.offset_interval, None @@ -350,7 +350,7 @@ def using_features(self) -> list[FeatureFactory]: return self._using_features def aggregate_over( - self, group_by: list[FeatureReferance], time_column: FeatureReferance | None + self, group_by: list[FeatureReference], time_column: FeatureReference | None ) -> AggregateOver: return aggregate_over(group_by, time_column, None, None, None, None) @@ -363,7 +363,7 @@ def compile(self) -> Transformation: from aligned.schemas.transformation import PolarsFunctionTransformation, PolarsLambdaTransformation if isinstance(self.method, pl.Expr): - method = lambda df, alias: self.method # type: ignore + method = lambda df, alias: self.method # noqa: E731 code = '' return PolarsLambdaTransformation(method=dill.dumps(method), code=code, dtype=self.dtype.dtype) else: diff --git a/aligned/compiler/feature_factory.py b/aligned/compiler/feature_factory.py index 820ee4f..9be3cab 100644 --- a/aligned/compiler/feature_factory.py +++ b/aligned/compiler/feature_factory.py @@ -25,7 +25,7 @@ from aligned.schemas.derivied_feature import DerivedFeature, AggregateOver from aligned.schemas.event_trigger import EventTrigger as EventTriggerSchema from aligned.schemas.feature import EventTimestamp as EventTimestampFeature -from aligned.schemas.feature import Feature, FeatureLocation, FeatureReferance, FeatureType +from aligned.schemas.feature import Feature, FeatureLocation, FeatureReference, FeatureType from aligned.schemas.literal_value import LiteralValue from aligned.schemas.target import ClassificationTarget as ClassificationTargetSchemas from aligned.schemas.target import ClassTargetProbability @@ -65,7 +65,7 @@ def using_features(self) -> list[FeatureFactory]: class AggregationTransformationFactory: def aggregate_over( - self, group_by: list[FeatureReferance], time_columns: FeatureReferance | None + self, group_by: list[FeatureReference], time_columns: FeatureReference | None ) -> AggregateOver: raise NotImplementedError(type(self)) @@ -99,8 +99,8 @@ def compile(self) -> ClassTargetProbability: class FeatureReferencable: - def feature_referance(self) -> FeatureReferance: - pass + def feature_reference(self) -> FeatureReference: + raise NotImplementedError(type(self)) def compile_hidden_features( @@ -184,24 +184,24 @@ class RecommendationTarget(FeatureReferencable): def __set_name__(self, owner, name): self._name = name - def feature_referance(self) -> FeatureReferance: + def feature_reference(self) -> FeatureReference: if not self._name: raise ValueError('Missing name, can not create reference') if not self._location: raise ValueError('Missing location, can not create reference') - return FeatureReferance(self._name, self._location, self.feature.dtype) + return FeatureReference(self._name, self._location, self.feature.dtype) def estemating_rank(self, feature: FeatureFactory) -> RecommendationTarget: self.rank_feature = feature return self def compile(self) -> RecommendationTargetSchemas: - self_ref = self.feature_referance() + self_ref = self.feature_reference() return RecommendationTargetSchemas( - self.feature.feature_referance(), + self.feature.feature_reference(), feature=self_ref.as_feature(), - estimating_rank=self.rank_feature.feature_referance() if self.rank_feature else None, + estimating_rank=self.rank_feature.feature_reference() if self.rank_feature else None, ) @@ -216,12 +216,12 @@ class RegressionLabel(FeatureReferencable): def __set_name__(self, owner, name): self._name = name - def feature_referance(self) -> FeatureReferance: + def feature_reference(self) -> FeatureReference: if not self._name: raise ValueError('Missing name, can not create reference') if not self._location: raise ValueError('Missing location, can not create reference') - return FeatureReferance(self._name, self._location, self.feature.dtype) + return FeatureReference(self._name, self._location, self.feature.dtype) def listen_to_ground_truth_event(self, stream: StreamDataSource) -> RegressionLabel: return RegressionLabel( @@ -253,7 +253,7 @@ def compile(self) -> RegressionTargetSchemas: on_ground_truth_event = event.event return RegressionTargetSchemas( - self.feature.feature_referance(), + self.feature.feature_reference(), feature=Feature(self._name, self.feature.dtype), on_ground_truth_event=on_ground_truth_event, event_trigger=trigger, @@ -271,12 +271,12 @@ class ClassificationLabel(FeatureReferencable): def __set_name__(self, owner, name): self._name = name - def feature_referance(self) -> FeatureReferance: + def feature_reference(self) -> FeatureReference: if not self._name: raise ValueError('Missing name, can not create reference') if not self._location: raise ValueError('Missing location, can not create reference') - return FeatureReferance(self._name, self._location, self.feature.dtype) + return FeatureReference(self._name, self._location, self.feature.dtype) def listen_to_ground_truth_event(self, stream: StreamDataSource) -> ClassificationLabel: return ClassificationLabel( @@ -335,7 +335,7 @@ def compile(self) -> ClassificationTargetSchemas: on_ground_truth_event = event.event return ClassificationTargetSchemas( - self.feature.feature_referance(), + self.feature.feature_reference(), feature=Feature(self._name, self.feature.dtype), on_ground_truth_event=on_ground_truth_event, event_trigger=trigger, @@ -386,13 +386,13 @@ def depending_on_names(self) -> list[str]: return [] return [feat._name for feat in self.transformation.using_features if feat._name] - def feature_referance(self) -> FeatureReferance: + def feature_reference(self) -> FeatureReference: if not self._location: raise ValueError( f'_location is not set for {self.name}. ' 'Therefore, making it impossible to create a referance.' ) - return FeatureReferance(self.name, self._location, self.dtype) + return FeatureReference(self.name, self._location, self.dtype) def feature(self) -> Feature: return Feature( @@ -441,7 +441,7 @@ def compile(self) -> DerivedFeature: return DerivedFeature( name=self.name, dtype=self.dtype, - depending_on={feat.feature_referance() for feat in self.transformation.using_features}, + depending_on={feat.feature_reference() for feat in self.transformation.using_features}, transformation=self.transformation.compile(), depth=self.depth(), description=self._description, @@ -586,6 +586,12 @@ def is_not_null(self) -> Bool: instance.transformation = NotNullFactory(self) return instance + def referencing(self, entity: FeatureFactory) -> FeatureFactory: + from aligned.schemas.constraint_types import ReferencingColumn + + self._add_constraint(ReferencingColumn(entity.feature_reference())) + return self + class CouldBeModelVersion: def as_model_version(self) -> ModelVersion: diff --git a/aligned/compiler/model.py b/aligned/compiler/model.py index 30a5d97..2df1f54 100644 --- a/aligned/compiler/model.py +++ b/aligned/compiler/model.py @@ -27,7 +27,7 @@ from aligned.request.retrival_request import RetrivalRequest from aligned.retrival_job import ConvertableToRetrivalJob, PredictionJob, RetrivalJob from aligned.schemas.derivied_feature import DerivedFeature -from aligned.schemas.feature import Feature, FeatureLocation, FeatureReferance, FeatureType +from aligned.schemas.feature import Feature, FeatureLocation, FeatureReference, FeatureType from aligned.schemas.feature_view import CompiledFeatureView from aligned.schemas.literal_value import LiteralValue from aligned.schemas.model import Model as ModelSchema @@ -102,7 +102,7 @@ def predict_over( values: ConvertableToRetrivalJob | RetrivalJob, needed_views: list[FeatureViewWrapper | ModelContractWrapper] | None = None, ) -> PredictionJob: - from aligned import FeatureStore + from aligned import ContractStore from aligned.retrival_job import RetrivalJob model = self.compile() @@ -122,7 +122,7 @@ def predict_over( ) values = RetrivalJob.from_convertable(values, request) - store = FeatureStore.empty() + store = ContractStore.empty() for needed_data in needed_views or []: if isinstance(needed_data, ModelContractWrapper): @@ -249,14 +249,15 @@ def compile(self) -> FeatureVersionSchema: return FeatureVersionSchema( default_version=self.default_version, versions={ - version: [feature.feature_referance() for feature in features] + version: [feature.feature_reference() for feature in features] for version, features in self.versions.items() }, ) def model_contract( - input_features: list[FeatureReferencable] | FeatureInputVersions, + input_features: list[FeatureReferencable | FeatureViewWrapper | ModelContractWrapper] + | FeatureInputVersions, name: str | None = None, contacts: list[str] | None = None, tags: list[str] | None = None, @@ -275,8 +276,30 @@ def decorator(cls: Type[T]) -> ModelContractWrapper[T]: if isinstance(input_features, FeatureInputVersions): features_versions = input_features else: + unwrapped_input_features: list[FeatureReferencable] = [] + + for feature in input_features: + if isinstance(feature, FeatureViewWrapper): + compiled_view = feature.compile() + request = compiled_view.request_all + features = [ + feat.as_reference(FeatureLocation.feature_view(compiled_view.name)) + for feat in request.request_result.features + ] + unwrapped_input_features.extend(features) + elif isinstance(feature, ModelContractWrapper): + compiled_model = feature.compile() + request = compiled_model.predictions_view.request('') + features = [ + feat.as_reference(FeatureLocation.model(compiled_model.name)) + for feat in request.request_result.features + ] + unwrapped_input_features.extend(features) + else: + unwrapped_input_features.append(feature) + features_versions = FeatureInputVersions( - default_version='default', versions={'default': input_features} + default_version='default', versions={'default': unwrapped_input_features} ) used_name = name or str(cls.__name__).lower() @@ -422,7 +445,7 @@ class MyModel(ModelContract): dtype=transformation.dtype, transformation=transformation, depending_on={ - FeatureReferance(feat, FeatureLocation.model(metadata.name), dtype=FeatureType.float()) + FeatureReference(feat, FeatureLocation.model(metadata.name), dtype=FeatureType.float()) for feat in transformation.column_mappings.keys() }, depth=1, diff --git a/aligned/compiler/vector_index_factory.py b/aligned/compiler/vector_index_factory.py index 3721eca..dc48c15 100644 --- a/aligned/compiler/vector_index_factory.py +++ b/aligned/compiler/vector_index_factory.py @@ -3,10 +3,10 @@ from dataclasses import dataclass from typing import TYPE_CHECKING -from aligned.schemas.feature import Feature, FeatureLocation from aligned.schemas.vector_storage import VectorIndex, VectorStorage if TYPE_CHECKING: + from aligned.schemas.feature import Feature, FeatureLocation from aligned.compiler.feature_factory import FeatureFactory diff --git a/aligned/exposed_model/interface.py b/aligned/exposed_model/interface.py index 31dcfb3..158518c 100644 --- a/aligned/exposed_model/interface.py +++ b/aligned/exposed_model/interface.py @@ -8,7 +8,7 @@ from mashumaro.types import SerializableType import logging -from aligned.schemas.feature import Feature, FeatureReferance +from aligned.schemas.feature import Feature, FeatureReference if TYPE_CHECKING: from aligned.feature_store import ModelFeatureStore @@ -48,7 +48,7 @@ class ExposedModel(Codable, SerializableType): def exposed_at_url(self) -> str | None: return None - async def needed_features(self, store: ModelFeatureStore) -> list[FeatureReferance]: + async def needed_features(self, store: ModelFeatureStore) -> list[FeatureReference]: raise NotImplementedError(type(self)) async def needed_entities(self, store: ModelFeatureStore) -> set[Feature]: @@ -154,7 +154,7 @@ def exposed_at_url(self) -> str | None: def as_markdown(self) -> str: return f"""Sending entities to as a JSON payload stored column wise: {self.endpoint}.""" - async def needed_features(self, store: ModelFeatureStore) -> list[FeatureReferance]: + async def needed_features(self, store: ModelFeatureStore) -> list[FeatureReference]: return store.feature_references_for(self.input_features_versions) async def needed_entities(self, store: ModelFeatureStore) -> set[Feature]: @@ -204,7 +204,7 @@ def as_markdown(self) -> str: {self.prompt_template} ```""" - async def needed_features(self, store: ModelFeatureStore) -> list[FeatureReferance]: + async def needed_features(self, store: ModelFeatureStore) -> list[FeatureReference]: return store.feature_references_for(self.input_features_versions) async def needed_entities(self, store: ModelFeatureStore) -> set[Feature]: @@ -274,7 +274,7 @@ def as_markdown(self) -> str: This will use the model: `{self.model_name}` to generate the embeddings.""" - async def needed_features(self, store: ModelFeatureStore) -> list[FeatureReferance]: + async def needed_features(self, store: ModelFeatureStore) -> list[FeatureReference]: return store.feature_references_for(self.input_features_versions) async def needed_entities(self, store: ModelFeatureStore) -> set[Feature]: @@ -354,7 +354,7 @@ def contract_version(self, model_version) -> str: version = model_version.tags[self.model_contract_version_tag] return version - async def needed_features(self, store: ModelFeatureStore) -> list[FeatureReferance]: + async def needed_features(self, store: ModelFeatureStore) -> list[FeatureReference]: mv = self.get_model_version() version = self.contract_version(mv) return store.feature_references_for(version) @@ -414,7 +414,7 @@ class MLFlowServer(ExposedModel): timeout: int = field(default=30) - model_type: str = 'latest_mlflow' + model_type: str = 'mlflow_server' @property def exposed_at_url(self) -> str | None: @@ -422,7 +422,8 @@ def exposed_at_url(self) -> str | None: @property def as_markdown(self) -> str: - return f"""Using the latest MLFlow model: `{self.model_name}`.""" + return f"""Using a MLFlow server at `{self.host}`. +Assumes that it is the model: `{self.model_name}` with alias: `{self.model_alias}`, and will load the features needed for that model based on the input version defined at tag `{self.model_contract_version_tag}`.""" # noqa: E501 def get_model_version(self): from mlflow.tracking import MlflowClient @@ -442,7 +443,7 @@ def contract_version(self, model_version) -> str: version = model_version.tags[self.model_contract_version_tag] return version - async def needed_features(self, store: ModelFeatureStore) -> list[FeatureReferance]: + async def needed_features(self, store: ModelFeatureStore) -> list[FeatureReference]: mv = self.get_model_version() version = self.contract_version(mv) return store.feature_references_for(version) diff --git a/aligned/exposed_model/ollama.py b/aligned/exposed_model/ollama.py index a6197ab..3bcf0f7 100644 --- a/aligned/exposed_model/ollama.py +++ b/aligned/exposed_model/ollama.py @@ -98,23 +98,35 @@ class OllamaOutput: def ollama_embedding_contract( - text: FeatureFactory, + input: FeatureFactory | list[FeatureFactory], contract_name: str, endpoint: str, model: str, entities: list[FeatureFactory] | FeatureFactory, output_source: BatchDataSource | None = None, + prompt_template: str | None = None, ): from aligned import model_contract, FeatureInputVersions + if isinstance(input, FeatureFactory) and prompt_template is None: + prompt_template = f"{{{input.name}}}" + + if prompt_template is None: + raise ValueError('prompt_template must be provided if input is a list') + + if not isinstance(input, list): + input = [input] + @model_contract( name=contract_name, - input_features=FeatureInputVersions(default_version='default', versions={'default': [text]}), + input_features=FeatureInputVersions( + default_version='default', versions={'default': input} # type: ignore + ), exposed_model=ExposedModel.ollama_embedding( endpoint=endpoint, model=model, input_features_versions='default', - prompt_template=f"{{{text.name}}}", + prompt_template=prompt_template, embedding_name='embedding', ), output_source=output_source, @@ -178,7 +190,7 @@ def ollama_classification_contract( prompt_template += 'You have the following information at your disposal:\n' for feature in input: - ref = feature.feature_referance() + ref = feature.feature_reference() prompt_template += f"{ref.name}: {{{ref.name}}}\n" prompt_template += ( diff --git a/aligned/feature_store.py b/aligned/feature_store.py index 177e405..ebebd57 100644 --- a/aligned/feature_store.py +++ b/aligned/feature_store.py @@ -36,8 +36,9 @@ ConvertableToRetrivalJob, CustomLazyPolarsJob, ) -from aligned.schemas.feature import FeatureLocation, Feature, FeatureReferance +from aligned.schemas.feature import FeatureLocation, Feature, FeatureReference from aligned.schemas.feature_view import CompiledFeatureView +from aligned.schemas.folder import DatasetStore from aligned.schemas.model import EventTrigger from aligned.schemas.model import Model as ModelSchema from aligned.schemas.repo_definition import EnricherReference, RepoDefinition, RepoMetadata @@ -97,7 +98,7 @@ def unpack_feature(feature: str) -> tuple[FeatureLocation, str]: raise ValueError(f'Unable to decode {splits}') -class FeatureStore: +class ContractStore: feature_source: FeatureSource feature_views: dict[str, CompiledFeatureView] @@ -121,12 +122,12 @@ def __init__( self.models = models @staticmethod - def empty() -> FeatureStore: + def empty() -> ContractStore: """ Creates a feature store with no features or models. ```python - store = FeatureStore.empty() + store = ContractStore.empty() store.add_compiled_view(MyFeatureView.compile()) store.add_compiled_model(MyModel.compile()) @@ -134,15 +135,15 @@ def empty() -> FeatureStore: df = await store.execute_sql("SELECT * FROM my_view LIMIT 10").to_polars() ``` """ - return FeatureStore.from_definition( + return ContractStore.from_definition( RepoDefinition( metadata=RepoMetadata(created_at=datetime.utcnow(), name='experimental'), ) ) @staticmethod - def experimental() -> FeatureStore: - return FeatureStore.empty() + def experimental() -> ContractStore: + return ContractStore.empty() @staticmethod def register_enrichers(enrichers: list[EnricherReference]) -> None: @@ -181,7 +182,7 @@ def set_module(path: str, module_class: DynamicEnricher) -> None: ) @staticmethod - def from_definition(repo: RepoDefinition) -> FeatureStore: + def from_definition(repo: RepoDefinition) -> ContractStore: """Creates a feature store based on a repo definition A feature source can also be defined if wanted, otherwise will the batch source be used for reads @@ -201,7 +202,7 @@ def from_definition(repo: RepoDefinition) -> FeatureStore: feature_views = {fv.name: fv for fv in repo.feature_views} combined_feature_views = {fv.name: fv for fv in repo.combined_feature_views} - FeatureStore.register_enrichers(repo.enrichers) + ContractStore.register_enrichers(repo.enrichers) sources = { FeatureLocation.feature_view(view.name).identifier: view.materialized_source if view.materialized_source @@ -213,7 +214,7 @@ def from_definition(repo: RepoDefinition) -> FeatureStore: if model.predictions_view.source is not None } - return FeatureStore( + return ContractStore( feature_views=feature_views, combined_feature_views=combined_feature_views, models={model.name: model for model in repo.models}, @@ -229,11 +230,11 @@ def repo_definition(self) -> RepoDefinition: enrichers=[], ) - def combine(self, other: FeatureStore) -> FeatureStore: + def combine(self, other: ContractStore) -> ContractStore: """ Combines two different feature stores together. """ - new_store = FeatureStore.empty() + new_store = ContractStore.empty() for view in self.feature_views.values(): new_store.add_view(view) @@ -252,7 +253,7 @@ def combine(self, other: FeatureStore) -> FeatureStore: @staticmethod async def from_reference_at_path( path: str = '.', reference_file: str = 'feature_store_location.py' - ) -> FeatureStore: + ) -> ContractStore: """Looks for a file reference struct, and loads the associated repo. This can be used for changing which feature store definitions @@ -268,10 +269,10 @@ async def from_reference_at_path( FeatureStore: A feature store based on the feature references """ repo_def = await RepoDefinition.from_reference_at_path(path, reference_file) - return FeatureStore.from_definition(repo_def) + return ContractStore.from_definition(repo_def) @staticmethod - async def from_dir(path: str = '.') -> FeatureStore: + async def from_dir(path: str = '.') -> ContractStore: """Reads and generates a feature store based on the given directory's content. This will read the feature views, services etc in a given repo and generate a feature store. @@ -288,7 +289,7 @@ async def from_dir(path: str = '.') -> FeatureStore: FeatureStore: The generated feature store """ definition = await RepoDefinition.from_path(path) - return FeatureStore.from_definition(definition) + return ContractStore.from_definition(definition) def execute_sql(self, query: str) -> RetrivalJob: import polars as pl @@ -572,7 +573,7 @@ def requests_for( event_timestamp_column: str | None = None, model_version_as_entity: bool | None = None, ) -> FeatureRequest: - return FeatureStore._requests_for( + return ContractStore._requests_for( feature_request, self.feature_views, self.combined_feature_views, @@ -690,7 +691,7 @@ def add_compiled_model(self, model: ModelSchema) -> None: FeatureLocation.model(model.name).identifier ] = model.predictions_view.source - def with_source(self, source: FeatureSourceable = None) -> FeatureStore: + def with_source(self, source: FeatureSourceable = None) -> ContractStore: """ Creates a new instance of a feature store, but changes where to fetch the features from @@ -726,14 +727,14 @@ def with_source(self, source: FeatureSourceable = None) -> FeatureStore: f'or FeatureSourceFactory. Got: {type(source)}' ) - return FeatureStore( + return ContractStore( feature_views=self.feature_views, combined_feature_views=self.combined_feature_views, models=self.models, feature_source=feature_source, ) - def update_source_for(self, location: FeatureLocation | str, source: BatchDataSource) -> FeatureStore: + def update_source_for(self, location: FeatureLocation | str, source: BatchDataSource) -> ContractStore: if not isinstance(self.feature_source, BatchFeatureSource): raise ValueError( f'.update_source_for(...) needs a `BatchFeatureSource`, got {type(self.feature_source)}' @@ -745,14 +746,14 @@ def update_source_for(self, location: FeatureLocation | str, source: BatchDataSo new_source = self.feature_source new_source.sources[location.identifier] = source - return FeatureStore( + return ContractStore( feature_views=self.feature_views, combined_feature_views=self.combined_feature_views, models=self.models, feature_source=new_source, ) - def offline_store(self) -> FeatureStore: + def offline_store(self) -> ContractStore: """ Will set the source to the defined batch sources. @@ -761,7 +762,7 @@ def offline_store(self) -> FeatureStore: """ return self.with_source() - def use_application_sources(self) -> FeatureStore: + def use_application_sources(self) -> ContractStore: """ Selects features from the application source if added. Otherwise, the we will default back to the batch source. @@ -777,7 +778,7 @@ def use_application_sources(self) -> FeatureStore: for model in set(self.models.values()) if model.predictions_view.source is not None } - return FeatureStore( + return ContractStore( feature_views=self.feature_views, combined_feature_views=self.combined_feature_views, models=self.models, @@ -946,17 +947,24 @@ async def overwrite( raise ValueError(f'The source {type(source)} do not support writes') +FeatureStore = ContractStore + + @dataclass class ModelFeatureStore: model: ModelSchema - store: FeatureStore + store: ContractStore selected_version: str | None = None @property def location(self) -> FeatureLocation: return FeatureLocation.model(self.model.name) + @property + def dataset_store(self) -> DatasetStore | None: + return self.model.dataset_store + def raw_string_features(self, except_features: set[str]) -> set[str]: version = self.selected_version or self.model.features.default_version @@ -986,7 +994,7 @@ def request( def needed_entities(self) -> set[Feature]: return self.request().request_result.entities - def feature_references_for(self, version: str) -> list[FeatureReferance]: + def feature_references_for(self, version: str) -> list[FeatureReference]: return self.model.features.features_for(version) def has_exposed_model(self) -> bool: @@ -1069,7 +1077,7 @@ def features_for( if not needs_core_features: job = RetrivalJob.from_convertable(entities, request).derive_features(request.needed_requests) - return job.select_columns(request.features_to_include) + return job async def freshness(self) -> dict[FeatureLocation, datetime | None]: from aligned.schemas.feature import EventTimestamp @@ -1082,7 +1090,7 @@ async def freshness(self) -> dict[FeatureLocation, datetime | None]: return await self.store.feature_source.freshness_for(locs) - def with_labels(self, label_refs: set[FeatureReferance] | None = None) -> SupervisedModelFeatureStore: + def with_labels(self, label_refs: set[FeatureReference] | None = None) -> SupervisedModelFeatureStore: """Will also load the labels for the model ```python @@ -1346,8 +1354,8 @@ class TaxiEta: class SupervisedModelFeatureStore: model: ModelSchema - store: FeatureStore - labels_estimates_refs: set[FeatureReferance] + store: ContractStore + labels_estimates_refs: set[FeatureReference] selected_version: str | None = None @@ -1499,7 +1507,7 @@ def predictions_for( @dataclass class FeatureViewStore: - store: FeatureStore + store: ContractStore view: CompiledFeatureView event_triggers: set[EventTrigger] = field(default_factory=set) feature_filter: set[str] | None = field(default=None) diff --git a/aligned/feature_view/combined_view.py b/aligned/feature_view/combined_view.py index a18ab93..cc87d78 100644 --- a/aligned/feature_view/combined_view.py +++ b/aligned/feature_view/combined_view.py @@ -60,9 +60,9 @@ class SomeView: Returns: FeatureViewStore: Returns a queryable `FeatureViewStore` containing the feature view """ - from aligned import FeatureStore + from aligned import ContractStore - store = FeatureStore.experimental() + store = ContractStore.experimental() store.add_combined_view(self.compile()) return store.feature_view(self.metadata.name) diff --git a/aligned/feature_view/feature_view.py b/aligned/feature_view/feature_view.py index da3cff1..1778b6f 100644 --- a/aligned/feature_view/feature_view.py +++ b/aligned/feature_view/feature_view.py @@ -31,7 +31,7 @@ from aligned.schemas.derivied_feature import ( AggregatedFeature, ) -from aligned.schemas.feature import FeatureLocation, FeatureReferance +from aligned.schemas.feature import FeatureLocation, FeatureReference from aligned.schemas.feature_view import CompiledFeatureView from aligned.compiler.feature_factory import FeatureFactory @@ -316,9 +316,9 @@ class SomeView(FeatureView): Returns: FeatureViewStore: Returns a queryable `FeatureViewStore` containing the feature view """ - from aligned import FeatureStore + from aligned import ContractStore - store = FeatureStore.experimental() + store = ContractStore.experimental() store.add_compiled_view(self.compile()) return store.feature_view(self.metadata.name) @@ -605,9 +605,9 @@ def sort_key(x: tuple[int, FeatureFactory]) -> int: raise ValueError(f'FeatureView {metadata.name} must contain at least one Entity') loc = FeatureLocation.feature_view(view.name) - aggregation_group_by = [FeatureReferance(entity.name, loc, entity.dtype) for entity in view.entities] + aggregation_group_by = [FeatureReference(entity.name, loc, entity.dtype) for entity in view.entities] event_timestamp_ref = ( - FeatureReferance(view.event_timestamp.name, loc, view.event_timestamp.dtype) + FeatureReference(view.event_timestamp.name, loc, view.event_timestamp.dtype) if view.event_timestamp else None ) @@ -649,10 +649,10 @@ class SomeView(FeatureView): Returns: FeatureViewStore: Returns a queryable `FeatureViewStore` containing the feature view """ - from aligned import FeatureStore + from aligned import ContractStore self = cls() - store = FeatureStore.experimental() + store = ContractStore.experimental() store.add_feature_view(self) return store.feature_view(self.metadata.name) diff --git a/aligned/feature_view/tests/test_brest_cancer.py b/aligned/feature_view/tests/test_brest_cancer.py index c6e313e..0b50593 100644 --- a/aligned/feature_view/tests/test_brest_cancer.py +++ b/aligned/feature_view/tests/test_brest_cancer.py @@ -1,12 +1,12 @@ import pytest -from aligned import FeatureStore +from aligned import ContractStore from aligned.feature_view.feature_view import FeatureView @pytest.mark.asyncio async def test_all_features( - breast_scan_without_timestamp_feature_store: FeatureStore, + breast_scan_without_timestamp_feature_store: ContractStore, breast_scan_feature_viewout_with_datetime: FeatureView, ) -> None: store = breast_scan_without_timestamp_feature_store diff --git a/aligned/feature_view/tests/test_brest_cancer_event_timestamp.py b/aligned/feature_view/tests/test_brest_cancer_event_timestamp.py index fb9f11e..f17cb4c 100644 --- a/aligned/feature_view/tests/test_brest_cancer_event_timestamp.py +++ b/aligned/feature_view/tests/test_brest_cancer_event_timestamp.py @@ -2,14 +2,14 @@ import pytest -from aligned import FeatureStore +from aligned import ContractStore from aligned.feature_view.feature_view import FeatureView @pytest.mark.asyncio async def test_between_datetime_features( breast_scan_feature_view_with_datetime: FeatureView, - breast_scan_with_timestamp_feature_store: FeatureStore, + breast_scan_with_timestamp_feature_store: ContractStore, ) -> None: feature_view = breast_scan_feature_view_with_datetime store = breast_scan_with_timestamp_feature_store @@ -34,7 +34,7 @@ async def test_between_datetime_features( @pytest.mark.asyncio async def test_between_datetime_features_with_aggregation( breast_scan_feature_view_with_datetime_and_aggregation: FeatureView, - breast_scan_with_timestamp_and_aggregation_feature_store: FeatureStore, + breast_scan_with_timestamp_and_aggregation_feature_store: ContractStore, ) -> None: feature_view = breast_scan_feature_view_with_datetime_and_aggregation store = breast_scan_with_timestamp_and_aggregation_feature_store diff --git a/aligned/feature_view/tests/test_combined_view.py b/aligned/feature_view/tests/test_combined_view.py index 3d58fa8..68f44db 100644 --- a/aligned/feature_view/tests/test_combined_view.py +++ b/aligned/feature_view/tests/test_combined_view.py @@ -1,10 +1,10 @@ import pytest -from aligned import FeatureStore, feature_view, Int32, Int64, FileSource +from aligned import ContractStore, feature_view, Int32, Int64, FileSource @pytest.mark.asyncio -async def test_combined_view(combined_feature_store: FeatureStore) -> None: +async def test_combined_view(combined_feature_store: ContractStore) -> None: entities = {'passenger_id': [1, 2, 3, 4, None], 'scan_id': [842302, 84300903, 843786, None, 842301]} result_job = combined_feature_store.features_for( @@ -24,7 +24,7 @@ async def test_combined_view(combined_feature_store: FeatureStore) -> None: @pytest.mark.asyncio -async def test_combined_view_get_all_features(combined_feature_store: FeatureStore) -> None: +async def test_combined_view_get_all_features(combined_feature_store: ContractStore) -> None: entities = {'passenger_id': [1, 2, 3, 4, None], 'scan_id': [842302, 84300903, 843786, None, 842301]} result = await combined_feature_store.features_for(entities, features=['combined:*']).to_pandas() diff --git a/aligned/jobs/tests/test_derived_job.py b/aligned/jobs/tests/test_derived_job.py index 9ec717a..a32df67 100644 --- a/aligned/jobs/tests/test_derived_job.py +++ b/aligned/jobs/tests/test_derived_job.py @@ -5,7 +5,7 @@ from aligned import feature_view, Float, String, FileSource from aligned.compiler.model import model_contract -from aligned.feature_store import FeatureStore +from aligned.feature_store import ContractStore from aligned.local.job import FileFullJob from aligned.retrival_job import DerivedFeatureJob, RetrivalRequest from aligned.sources.local import LiteralReference @@ -113,8 +113,8 @@ class Model: pred_amount = expences.amount.as_regression_label() -def feature_store() -> FeatureStore: - store = FeatureStore.experimental() +def feature_store() -> ContractStore: + store = ContractStore.experimental() views = [Transaction, Expences, Income, ExpenceAgg, IncomeAgg] for view in views: diff --git a/aligned/local/tests/test_jobs.py b/aligned/local/tests/test_jobs.py index f75d5bc..b450a07 100644 --- a/aligned/local/tests/test_jobs.py +++ b/aligned/local/tests/test_jobs.py @@ -1,7 +1,7 @@ import pandas as pd import pytest -from aligned import FeatureStore, FileSource +from aligned import ContractStore, FileSource from aligned.local.job import FileFullJob from aligned.retrival_job import RetrivalRequest from aligned.sources.local import LiteralReference @@ -39,7 +39,7 @@ async def test_file_full_job_polars(retrival_request_without_derived: RetrivalRe @pytest.mark.asyncio -async def test_write_and_read_feature_store(titanic_feature_store_scd: FeatureStore) -> None: +async def test_write_and_read_feature_store(titanic_feature_store_scd: ContractStore) -> None: source = FileSource.json_at('test_data/feature-store.json') definition = titanic_feature_store_scd.repo_definition() await source.write(definition.to_json().encode('utf-8')) diff --git a/aligned/retrival_job.py b/aligned/retrival_job.py index 1f24801..b979c09 100644 --- a/aligned/retrival_job.py +++ b/aligned/retrival_job.py @@ -31,13 +31,14 @@ if TYPE_CHECKING: from typing import AsyncIterator + from aligned.sources.local import Directory from aligned.schemas.folder import DatasetMetadata, DatasetStore from aligned.feature_source import WritableFeatureSource from aligned.schemas.derivied_feature import AggregatedFeature, AggregateOver from aligned.schemas.model import EventTrigger, Model from aligned.sources.local import DataFileReference, StorageFileReference - from aligned.feature_store import FeatureStore + from aligned.feature_store import ContractStore logger = logging.getLogger(__name__) @@ -197,22 +198,59 @@ def test(self) -> SupervisedJob: def validate(self) -> SupervisedJob: return SupervisedJob(self.validate_job, self.target_columns, self.should_filter_out_null_targets) + def store_dataset_at_directory( + self, + directory: Directory, + dataset_store: DatasetStore | StorageFileReference | None, + metadata: DatasetMetadata | None = None, + ) -> TrainTestValidateJob: + from uuid import uuid4 + from aligned.schemas.folder import DatasetMetadata + + if not dataset_store: + logger.info('No dataset store provided, skipping to store dataset.') + return self + + if not metadata: + metadata = DatasetMetadata( + id=str(uuid4()), + name='train_test_validate - ' + datetime.now().strftime('%Y-%m-%d %H:%M:%S'), + description='A train, test and validation dataset.', + ) + + run_dir = directory.sub_directory(metadata.id) + return self.store_dataset( + dataset_store=dataset_store, + train_source=run_dir.parquet_at('train.parquet'), # type: ignore + test_source=run_dir.parquet_at('test.parquet'), # type: ignore + validate_source=run_dir.parquet_at('validate.parquet'), # type: ignore + metadata=metadata, + ) + def store_dataset( self, dataset_store: DatasetStore | StorageFileReference, - metadata: DatasetMetadata, train_source: DataFileReference, test_source: DataFileReference, validate_source: DataFileReference, + metadata: DatasetMetadata | None = None, train_size: float | None = None, test_size: float | None = None, validation_size: float | None = None, ) -> TrainTestValidateJob: - from aligned.schemas.folder import TrainDatasetMetadata, JsonDatasetStore + from aligned.schemas.folder import TrainDatasetMetadata, JsonDatasetStore, DatasetMetadata from aligned.data_source.batch_data_source import BatchDataSource - from aligned.sources.local import StorageFileReference + from aligned.sources.local import StorageFileSource + from uuid import uuid4 + + if metadata is None: + metadata = DatasetMetadata( + id=str(uuid4()), + name='train_test_validate - ' + datetime.now().strftime('%Y-%m-%d %H:%M:%S'), + description='A train, test and validation dataset.', + ) - if isinstance(dataset_store, StorageFileReference): + if isinstance(dataset_store, StorageFileSource): data_store = JsonDatasetStore(dataset_store) else: data_store = dataset_store @@ -2331,7 +2369,7 @@ class PredictionJob(RetrivalJob): job: RetrivalJob model: Model - store: FeatureStore + store: ContractStore @property def request_result(self) -> RequestResult: diff --git a/aligned/schemas/constraint_types.py b/aligned/schemas/constraint_types.py new file mode 100644 index 0000000..7b89b58 --- /dev/null +++ b/aligned/schemas/constraint_types.py @@ -0,0 +1,13 @@ +from dataclasses import dataclass +from aligned.schemas.constraints import Constraint +from aligned.schemas.feature import FeatureReference + + +@dataclass +class ReferencingColumn(Constraint): + + value: FeatureReference + name = 'referencing_column' + + def __hash__(self) -> int: + return hash(self.name) diff --git a/aligned/schemas/constraints.py b/aligned/schemas/constraints.py index e585abc..f265083 100644 --- a/aligned/schemas/constraints.py +++ b/aligned/schemas/constraints.py @@ -32,6 +32,23 @@ class SupportedConstraints: _shared: OptionalType['SupportedConstraints'] = None def __init__(self) -> None: + from aligned.schemas.constraint_types import ( + LowerBound, + LowerBoundInclusive, + UpperBound, + UpperBoundInclusive, + Required, + Optional, + InDomain, + MaxLength, + MinLength, + StartsWith, + EndsWith, + Unique, + Regex, + ReferencingColumn, + ) + self.types = {} for tran_type in [ @@ -48,6 +65,7 @@ def __init__(self) -> None: EndsWith, Unique, Regex, + ReferencingColumn, ]: self.add(tran_type) diff --git a/aligned/schemas/derivied_feature.py b/aligned/schemas/derivied_feature.py index 0c67044..1d4874b 100644 --- a/aligned/schemas/derivied_feature.py +++ b/aligned/schemas/derivied_feature.py @@ -4,13 +4,13 @@ from datetime import timedelta from aligned.schemas.codable import Codable -from aligned.schemas.feature import Constraint, Feature, FeatureLocation, FeatureReferance, FeatureType +from aligned.schemas.feature import Constraint, Feature, FeatureLocation, FeatureReference, FeatureType from aligned.schemas.transformation import Transformation class DerivedFeature(Feature): - depending_on: set[FeatureReferance] + depending_on: set[FeatureReference] transformation: Transformation depth: int = 1 @@ -18,7 +18,7 @@ def __init__( self, name: str, dtype: FeatureType, - depending_on: set[FeatureReferance], + depending_on: set[FeatureReference], transformation: Transformation, depth: int, description: str | None = None, @@ -38,7 +38,7 @@ def __pre_serialize__(self) -> DerivedFeature: from aligned.schemas.transformation import SupportedTransformations for feature in self.depending_on: - assert isinstance(feature, FeatureReferance) + assert isinstance(feature, FeatureReference) assert isinstance(self.transformation, Transformation) assert self.transformation.name in SupportedTransformations.shared().types @@ -67,7 +67,7 @@ def feature(self) -> Feature: @dataclass class AggregationTimeWindow(Codable): time_window: timedelta - time_column: FeatureReferance + time_column: FeatureReference every_interval: timedelta | None = field(default=None) offset_interval: timedelta | None = field(default=None) @@ -78,7 +78,7 @@ def __hash__(self) -> int: @dataclass class AggregateOver(Codable): - group_by: list[FeatureReferance] + group_by: list[FeatureReference] window: AggregationTimeWindow | None = field(default=None) condition: DerivedFeature | None = field(default=None) @@ -106,7 +106,7 @@ def __hash__(self) -> int: return self.derived_feature.name.__hash__() @property - def depending_on(self) -> set[FeatureReferance]: + def depending_on(self) -> set[FeatureReference]: return self.derived_feature.depending_on @property diff --git a/aligned/schemas/feature.py b/aligned/schemas/feature.py index f1be91d..ddd749e 100644 --- a/aligned/schemas/feature.py +++ b/aligned/schemas/feature.py @@ -1,15 +1,18 @@ from __future__ import annotations from dataclasses import dataclass, field -from typing import Literal +from typing import Literal, TYPE_CHECKING from zoneinfo import ZoneInfo import polars as pl -import aligned.compiler.feature_factory as ff from aligned.schemas.codable import Codable from aligned.schemas.constraints import Constraint +if TYPE_CHECKING: + from aligned.compiler.feature_factory import FeatureFactory + + NAME_POLARS_MAPPING = [ ('string', pl.Utf8), ('int8', pl.Int8), @@ -148,7 +151,8 @@ def polars_type(self) -> type: raise ValueError(f'Unable to find a value that can represent {self.name}') @property - def feature_factory(self) -> ff.FeatureFactory: + def feature_factory(self) -> FeatureFactory: + from aligned.compiler import feature_factory as ff if self.name.startswith('datetime-'): time_zone = self.name.split('-')[1] @@ -320,8 +324,8 @@ def renamed(self, new_name: str) -> Feature: constraints=self.constraints, ) - def as_reference(self, location: FeatureLocation) -> FeatureReferance: - return FeatureReferance( + def as_reference(self, location: FeatureLocation) -> FeatureReference: + return FeatureReference( name=self.name, location=location, dtype=self.dtype, @@ -397,7 +401,7 @@ def from_string(string: str) -> FeatureLocation: @dataclass -class FeatureReferance(Codable): +class FeatureReference(Codable): name: str location: FeatureLocation dtype: FeatureType @@ -418,3 +422,6 @@ def __hash__(self) -> int: @property def identifier(self) -> str: return f'{self.location.identifier}:{self.name}' + + def feature_reference(self) -> FeatureReference: + return self diff --git a/aligned/schemas/folder.py b/aligned/schemas/folder.py index 934dfa0..f50e22a 100644 --- a/aligned/schemas/folder.py +++ b/aligned/schemas/folder.py @@ -1,6 +1,7 @@ from __future__ import annotations from dataclasses import dataclass, field +from typing import Protocol from mashumaro.types import SerializableType from aligned.data_source.batch_data_source import BatchDataSource @@ -12,7 +13,7 @@ class DatasetStorageFactory: - supported_stores: dict[str, type[DatasetStore]] = dict() + supported_stores: dict[str, type[DatasetStore]] = {} _shared: DatasetStorageFactory | None = None @@ -31,6 +32,13 @@ def shared(cls) -> DatasetStorageFactory: return cls._shared +class DatasetMetadataInterface(Protocol): + id: str + name: str | None + description: str | None + tags: list[str] | None + + @dataclass class DatasetMetadata(Codable): @@ -40,6 +48,15 @@ class DatasetMetadata(Codable): tags: list[str] | None = field(default=None) +@dataclass +class SingleDatasetMetadata(Codable): + id: str + source: BatchDataSource + name: str | None = field(default=None) + description: str | None = field(default=None) + tags: list[str] | None = field(default=None) + + @dataclass class TrainDatasetMetadata(Codable): @@ -66,15 +83,15 @@ class TrainDatasetMetadata(Codable): @dataclass class GroupedDatasetList(Codable): - raw_data: list[DatasetMetadata] + raw_data: list[SingleDatasetMetadata] train_test: list[TrainDatasetMetadata] train_test_validation: list[TrainDatasetMetadata] - active_learning: list[DatasetMetadata] + active_learning: list[SingleDatasetMetadata] @property - def all(self) -> list[DatasetMetadata]: + def all(self) -> list[DatasetMetadataInterface]: return self.raw_data + self.train_test + self.train_test_validation + self.active_learning diff --git a/aligned/schemas/model.py b/aligned/schemas/model.py index 3c063eb..bd4b58f 100644 --- a/aligned/schemas/model.py +++ b/aligned/schemas/model.py @@ -5,7 +5,7 @@ from aligned.request.retrival_request import FeatureRequest, RetrivalRequest from aligned.schemas.codable import Codable from aligned.schemas.feature import FeatureLocation, FeatureType -from aligned.schemas.feature import EventTimestamp, Feature, FeatureReferance +from aligned.schemas.feature import EventTimestamp, Feature, FeatureReference from aligned.schemas.event_trigger import EventTrigger from aligned.schemas.target import ClassificationTarget, RecommendationTarget, RegressionTarget from aligned.schemas.feature_view import CompiledFeatureView, FeatureViewReferenceSource @@ -23,19 +23,19 @@ class FeatureInputVersions(Codable): default_version: str - versions: dict[str, list[FeatureReferance]] + versions: dict[str, list[FeatureReference]] - def features_for(self, version: str) -> list[FeatureReferance]: + def features_for(self, version: str) -> list[FeatureReference]: return self.versions.get(version, []) @property - def default_features(self) -> list[FeatureReferance]: + def default_features(self) -> list[FeatureReference]: return self.features_for(self.default_version) @dataclass class Target(Codable): - estimating: FeatureReferance + estimating: FeatureReference feature: Feature on_ground_truth_event: StreamDataSource | None = field(default=None) @@ -136,7 +136,7 @@ def request_for( event_timestamp=self.event_timestamp, ) - def labels_estimates_refs(self) -> set[FeatureReferance]: + def labels_estimates_refs(self) -> set[FeatureReference]: if self.classification_targets: return {feature.estimating for feature in self.classification_targets} elif self.regression_targets: @@ -172,7 +172,7 @@ class Model(Codable): def __hash__(self) -> int: return self.name.__hash__() - def feature_references(self, version: str | None = None) -> set[FeatureReferance]: + def feature_references(self, version: str | None = None) -> set[FeatureReference]: return set(self.features.features_for(version or self.features.default_version)) @property diff --git a/aligned/schemas/target.py b/aligned/schemas/target.py index 48444a0..2976372 100644 --- a/aligned/schemas/target.py +++ b/aligned/schemas/target.py @@ -3,13 +3,13 @@ from aligned.data_source.stream_data_source import StreamDataSource from aligned.schemas.codable import Codable from aligned.schemas.event_trigger import EventTrigger -from aligned.schemas.feature import Feature, FeatureReferance +from aligned.schemas.feature import Feature, FeatureReference from aligned.schemas.literal_value import LiteralValue @dataclass class RegressionTarget(Codable): - estimating: FeatureReferance + estimating: FeatureReference feature: Feature on_ground_truth_event: StreamDataSource | None = field(default=None) @@ -35,7 +35,7 @@ def __hash__(self) -> int: @dataclass class ClassificationTarget(Codable): - estimating: FeatureReferance + estimating: FeatureReference feature: Feature on_ground_truth_event: StreamDataSource | None = field(default=None) @@ -52,10 +52,10 @@ def __hash__(self) -> int: @dataclass class RecommendationTarget(Codable): - estimating: FeatureReferance + estimating: FeatureReference feature: Feature - estimating_rank: FeatureReferance | None = field(default=None) + estimating_rank: FeatureReference | None = field(default=None) def __hash__(self) -> int: return self.feature.name.__hash__() diff --git a/aligned/server.py b/aligned/server.py index da06fbf..9a843c4 100644 --- a/aligned/server.py +++ b/aligned/server.py @@ -13,7 +13,7 @@ from aligned.data_source.stream_data_source import HttpStreamSource from aligned.feature_source import WritableFeatureSource -from aligned.feature_store import FeatureStore +from aligned.feature_store import ContractStore from aligned.schemas.feature import Feature from aligned.schemas.feature_view import CompiledFeatureView from aligned.sources.local import StorageFileReference @@ -35,7 +35,7 @@ class TopicInfo: class FastAPIServer: @staticmethod - def write_to_topic_path(topic: TopicInfo, feature_store: FeatureStore, app: FastAPI) -> None: + def write_to_topic_path(topic: TopicInfo, feature_store: ContractStore, app: FastAPI) -> None: required_features: set[Feature] = set() for view in topic.views: @@ -95,7 +95,7 @@ def find_path_variable(values: dict, key: str) -> Any: ) @staticmethod - def feature_view_path(name: str, feature_store: FeatureStore, app: FastAPI) -> None: + def feature_view_path(name: str, feature_store: ContractStore, app: FastAPI) -> None: @app.post(f'/feature-views/{name}/all') async def all(limit: int | None = None) -> dict: df = await feature_store.feature_view(name).all(limit=limit).to_pandas() @@ -103,7 +103,7 @@ async def all(limit: int | None = None) -> dict: return df.to_dict('list') @staticmethod - def model_path(name: str, feature_store: FeatureStore, app: FastAPI) -> None: + def model_path(name: str, feature_store: ContractStore, app: FastAPI) -> None: from aligned.feature_store import RawStringFeatureRequest model = feature_store.models[name] @@ -168,7 +168,7 @@ async def get_model(entity_values: dict) -> str: return Response(content=f'{{{body}}}', media_type='application/json') @staticmethod - def app(feature_store: FeatureStore, auth_tokens: list[str] | None = None) -> FastAPI: + def app(feature_store: ContractStore, auth_tokens: list[str] | None = None) -> FastAPI: from asgi_correlation_id import CorrelationIdMiddleware from fastapi import FastAPI from fastapi.middleware import Middleware @@ -243,7 +243,7 @@ async def features(payload: APIFeatureRequest) -> dict: @staticmethod def run( - feature_store: FeatureStore, + feature_store: ContractStore, host: str | None = None, port: int | None = None, workers: int | None = None, diff --git a/aligned/sources/local.py b/aligned/sources/local.py index 92a7fee..1d641bd 100644 --- a/aligned/sources/local.py +++ b/aligned/sources/local.py @@ -27,7 +27,7 @@ if TYPE_CHECKING: from datetime import datetime from aligned.schemas.repo_definition import RepoDefinition - from aligned.feature_store import FeatureStore + from aligned.feature_store import ContractStore logger = logging.getLogger(__name__) @@ -37,10 +37,10 @@ class AsRepoDefinition: async def as_repo_definition(self) -> RepoDefinition: raise NotImplementedError() - async def feature_store(self) -> FeatureStore: - from aligned.feature_store import FeatureStore + async def feature_store(self) -> ContractStore: + from aligned.feature_store import ContractStore - return FeatureStore.from_definition(await self.as_repo_definition()) + return ContractStore.from_definition(await self.as_repo_definition()) class StorageFileReference(AsRepoDefinition): @@ -83,9 +83,9 @@ def create_parent_dir(path: str) -> None: file_path = Path(path) parent = file_path.parent - while not parent.is_dir(): + while not parent.exists(): parents.append(parent) - parent = file_path.parent + parent = parent.parent for parent in reversed(parents): parent.mkdir(exist_ok=True) @@ -138,7 +138,7 @@ def to_markdown(self) -> str: *Datetime Formatter*: {self.formatter} [Go to file]({self.path}) -""" +""" # noqa async def read_pandas(self) -> pd.DataFrame: try: @@ -392,7 +392,7 @@ def to_markdown(self) -> str: *File*: {self.path} -[Go to file]({self.path})''' +[Go to file]({self.path})''' # noqa def job_group_key(self) -> str: return f'{self.type_name}/{self.path}' diff --git a/aligned/sources/tests/test_parquet.py b/aligned/sources/tests/test_parquet.py index 7d999d4..0303557 100644 --- a/aligned/sources/tests/test_parquet.py +++ b/aligned/sources/tests/test_parquet.py @@ -2,7 +2,7 @@ import polars as pl from pathlib import Path -from aligned import FeatureStore, FileSource, feature_view, Int32 +from aligned import ContractStore, FileSource, feature_view, Int32 from aligned.feature_view.feature_view import FeatureView from aligned.schemas.date_formatter import DateFormatter from conftest import DataTest @@ -11,7 +11,7 @@ @pytest.mark.asyncio async def test_read_parquet(point_in_time_data_test: DataTest) -> None: - store = FeatureStore.experimental() + store = ContractStore.experimental() for source in point_in_time_data_test.sources: view = source.view @@ -37,7 +37,7 @@ async def test_read_parquet(point_in_time_data_test: DataTest) -> None: @pytest.mark.asyncio async def test_parquest(point_in_time_data_test: DataTest) -> None: - store = FeatureStore.experimental() + store = ContractStore.experimental() for source in point_in_time_data_test.sources: view = source.view @@ -74,7 +74,7 @@ async def test_parquet_without_event_timestamp( point_in_time_data_test_wituout_event_timestamp: DataTest, ) -> None: - store = FeatureStore.experimental() + store = ContractStore.experimental() for source in point_in_time_data_test_wituout_event_timestamp.sources: view = source.view @@ -108,7 +108,7 @@ async def test_parquet_without_event_timestamp( @pytest.mark.asyncio async def test_read_csv(point_in_time_data_test: DataTest) -> None: - store = FeatureStore.experimental() + store = ContractStore.experimental() for source in point_in_time_data_test.sources: view = source.view diff --git a/aligned/sources/tests/test_psql.py b/aligned/sources/tests/test_psql.py index df89ed1..9396d68 100644 --- a/aligned/sources/tests/test_psql.py +++ b/aligned/sources/tests/test_psql.py @@ -2,7 +2,7 @@ import pytest -from aligned import FeatureStore, PostgreSQLConfig +from aligned import ContractStore, PostgreSQLConfig from aligned.feature_view.feature_view import FeatureView from conftest import DataTest import platform @@ -24,7 +24,7 @@ async def test_postgresql(point_in_time_data_test: DataTest, psql: PostgreSQLCon psql_database = environ['PSQL_DATABASE_TEST'] - store = FeatureStore.experimental() + store = ContractStore.experimental() for source in point_in_time_data_test.sources: view = source.view @@ -58,7 +58,7 @@ async def test_postgresql(point_in_time_data_test: DataTest, psql: PostgreSQLCon platform.uname().machine.startswith('arm'), reason='Needs psycopg2 which is not supported on arm' ) @pytest.mark.asyncio -async def test_postgresql_write(titanic_feature_store: FeatureStore, psql: PostgreSQLConfig) -> None: +async def test_postgresql_write(titanic_feature_store: ContractStore, psql: PostgreSQLConfig) -> None: import polars as pl from polars.testing import assert_frame_equal @@ -98,7 +98,7 @@ async def test_postgresql_without_event( psql_database = environ['PSQL_DATABASE_TEST'] - store = FeatureStore.experimental() + store = ContractStore.experimental() for source in point_in_time_data_test_wituout_event_timestamp.sources: view = source.view diff --git a/aligned/tests/test_cached_parquet.py b/aligned/tests/test_cached_parquet.py index 3c0d47e..3067e02 100644 --- a/aligned/tests/test_cached_parquet.py +++ b/aligned/tests/test_cached_parquet.py @@ -1,10 +1,10 @@ import pytest -from aligned import FeatureStore, FileSource +from aligned import ContractStore, FileSource @pytest.mark.asyncio -async def test_cached_at(titanic_feature_store: FeatureStore) -> None: +async def test_cached_at(titanic_feature_store: ContractStore) -> None: """ Checks that we load all rows form the cached file. """ diff --git a/aligned/tests/test_model_target.py b/aligned/tests/test_model_target.py index dce0676..b0fe647 100644 --- a/aligned/tests/test_model_target.py +++ b/aligned/tests/test_model_target.py @@ -4,12 +4,12 @@ import polars as pl import pytest -from aligned import FeatureStore, model_contract, String, Int32 +from aligned import ContractStore, model_contract, String, Int32 from aligned.schemas.feature import FeatureLocation @pytest.mark.asyncio -async def test_titanic_model_with_targets(titanic_feature_store: FeatureStore) -> None: +async def test_titanic_model_with_targets(titanic_feature_store: ContractStore) -> None: entity_list = [1, 4, 5, 6, 7, 30, 31, 2] @@ -28,7 +28,7 @@ async def test_titanic_model_with_targets(titanic_feature_store: FeatureStore) - @pytest.mark.asyncio -async def test_titanic_model_with_targets_and_scd(titanic_feature_store_scd: FeatureStore) -> None: +async def test_titanic_model_with_targets_and_scd(titanic_feature_store_scd: ContractStore) -> None: entities = pl.DataFrame( { @@ -114,7 +114,7 @@ async def test_model_insert_predictions() -> None: """ Test the insert (aka. ish append) method on the feature store. """ - from aligned import FileSource, FeatureStore + from aligned import FileSource, ContractStore path = 'test_data/test_model.parquet' @@ -128,7 +128,7 @@ class TestModel: a = Int32() - store = FeatureStore.experimental() + store = ContractStore.experimental() initial_frame = pl.DataFrame({'id': [1, 2, 3], 'a': [1, 2, 3]}) initial_frame.write_parquet(path) @@ -151,7 +151,7 @@ async def test_model_insert_predictions_csv() -> None: """ Test the insert (aka. ish append) method on the feature store. """ - from aligned import FileSource, FeatureStore + from aligned import FileSource, ContractStore path = 'test_data/test_model.csv' @@ -165,7 +165,7 @@ class TestModel: a = Int32() - store = FeatureStore.experimental() + store = ContractStore.experimental() initial_frame = pl.DataFrame({'some_id': [1, 2, 3], 'a': [1, 2, 3]}) initial_frame.write_csv(path) @@ -189,7 +189,7 @@ async def test_model_upsert_predictions() -> None: """ Test the insert (aka. ish append) method on the feature store. """ - from aligned import FileSource, FeatureStore + from aligned import FileSource, ContractStore path = 'test_data/test_model.parquet' @@ -199,7 +199,7 @@ class TestModel: a = Int32() - store = FeatureStore.experimental() + store = ContractStore.experimental() initial_frame = pl.DataFrame({'id': [1, 2, 3, 4], 'a': [1, 2, 3, 4]}) initial_frame.write_parquet(path) diff --git a/aligned/tests/test_models_as_feature.py b/aligned/tests/test_models_as_feature.py index 1f04d46..de4f80f 100644 --- a/aligned/tests/test_models_as_feature.py +++ b/aligned/tests/test_models_as_feature.py @@ -1,5 +1,5 @@ import pytest -from aligned import Bool, FeatureStore, FileSource, Int32, String +from aligned import Bool, ContractStore, FileSource, Int32, String from aligned.feature_view.feature_view import feature_view from aligned.compiler.model import FeatureInputVersions, model_contract from aligned.schemas.feature import FeatureLocation @@ -68,7 +68,7 @@ def test_model_referenced_as_feature() -> None: def test_model_request() -> None: - store = FeatureStore.experimental() + store = ContractStore.experimental() store.add_feature_view(View) # type: ignore store.add_feature_view(OtherView) # type: ignore store.add_model(First) @@ -80,7 +80,7 @@ def test_model_request() -> None: def test_model_version() -> None: - store = FeatureStore.experimental() + store = ContractStore.experimental() store.add_feature_view(View) # type: ignore store.add_feature_view(OtherView) # type: ignore store.add_model(First) @@ -95,7 +95,7 @@ def test_model_version() -> None: async def test_load_preds_with_different_model_version() -> None: import polars as pl - store = FeatureStore.experimental() + store = ContractStore.experimental() store.add_model(FirstWithVersions) source = FileSource.csv_at('test_data/model_preds.csv') diff --git a/aligned/tests/test_source_validation.py b/aligned/tests/test_source_validation.py index c44631d..e25abb9 100644 --- a/aligned/tests/test_source_validation.py +++ b/aligned/tests/test_source_validation.py @@ -1,12 +1,12 @@ import pytest -from aligned import FeatureStore, FileSource +from aligned import ContractStore, FileSource from aligned.schemas.feature import FeatureType, FeatureLocation from aligned.source_validation import validate_sources_in @pytest.mark.asyncio -async def test_source_validation(titanic_feature_store: FeatureStore) -> None: +async def test_source_validation(titanic_feature_store: ContractStore) -> None: source = FileSource.parquet_at('test_data/titanic.parquet') @@ -21,8 +21,7 @@ async def test_source_validation(titanic_feature_store: FeatureStore) -> None: @pytest.mark.asyncio async def test_schema_loading() -> None: source = FileSource.parquet_at('test_data/titanic.parquet') - schema = await source.schema() - dtype_schema = {key: feature for key, feature in schema.items()} + dtype_schema = await source.schema() assert dtype_schema == { 'passenger_id': FeatureType(name='int64'), 'survived': FeatureType(name='int64'), diff --git a/aligned/tests/test_train_test_validate_set.py b/aligned/tests/test_train_test_validate_set.py index 53e6815..f002dae 100644 --- a/aligned/tests/test_train_test_validate_set.py +++ b/aligned/tests/test_train_test_validate_set.py @@ -1,7 +1,7 @@ import pytest from pathlib import Path -from aligned.feature_store import FeatureStore +from aligned.feature_store import ContractStore from aligned.retrival_job import split from aligned.schemas.folder import DatasetMetadata from aligned.sources.local import CsvFileSource, FileSource @@ -25,7 +25,7 @@ async def test_split(scan_with_datetime: CsvFileSource) -> None: @pytest.mark.asyncio -async def test_train_test_validate_set(titanic_feature_store: FeatureStore) -> None: +async def test_train_test_validate_set(titanic_feature_store: ContractStore) -> None: dataset_size = 100 train_fraction = 0.6 @@ -55,7 +55,7 @@ async def test_train_test_validate_set(titanic_feature_store: FeatureStore) -> N @pytest.mark.asyncio -async def test_train_test_validate_set_new(titanic_feature_store: FeatureStore) -> None: +async def test_train_test_validate_set_new(titanic_feature_store: ContractStore) -> None: from aligned.schemas.folder import JsonDatasetStore unlink_paths = [ @@ -101,7 +101,7 @@ async def test_train_test_validate_set_new(titanic_feature_store: FeatureStore) store = JsonDatasetStore(dataset_store) datasets = await store.list_datasets() - assert store.to_json() != None + assert store.to_json() is not None assert len(datasets.train_test_validation) == 1 train_dataset = datasets.train_test_validation[0] diff --git a/aligned/tests/test_transformations.py b/aligned/tests/test_transformations.py index eb8c688..ef1b335 100644 --- a/aligned/tests/test_transformations.py +++ b/aligned/tests/test_transformations.py @@ -1,7 +1,7 @@ import pytest from aligned.compiler.feature_factory import EventTimestamp, Int32, String, Float -from aligned.feature_store import FeatureStore +from aligned.feature_store import ContractStore from aligned.feature_view.feature_view import feature_view from aligned.schemas.transformation import SupportedTransformations from aligned.sources.local import FileSource, CsvFileSource @@ -24,7 +24,7 @@ async def test_polars_transformation() -> None: @pytest.mark.asyncio -async def test_transformations_in_feture_view(alot_of_transforation_feature_store: FeatureStore) -> None: +async def test_transformations_in_feture_view(alot_of_transforation_feature_store: ContractStore) -> None: store = alot_of_transforation_feature_store amount = 100 @@ -154,4 +154,3 @@ class TestFill: assert df['some_new_column'].isnull().sum() == 0 assert df['some_string'].isnull().sum() == 0 - assert False diff --git a/aligned/validation/tests/test_pandera_validator.py b/aligned/validation/tests/test_pandera_validator.py index 8438ce5..0b10eeb 100644 --- a/aligned/validation/tests/test_pandera_validator.py +++ b/aligned/validation/tests/test_pandera_validator.py @@ -1,11 +1,11 @@ import pytest -from aligned import FeatureStore +from aligned import ContractStore from aligned.validation.pandera import PanderaValidator @pytest.mark.asyncio -async def test_validate_valid_feature_view(titanic_feature_store: FeatureStore) -> None: +async def test_validate_valid_feature_view(titanic_feature_store: ContractStore) -> None: original = await titanic_feature_store.feature_view('titanic').all(limit=5).to_pandas() validated_df = ( await titanic_feature_store.feature_view('titanic') @@ -18,7 +18,7 @@ async def test_validate_valid_feature_view(titanic_feature_store: FeatureStore) @pytest.mark.asyncio -async def test_validate_invalid_feature_view(titanic_feature_store: FeatureStore) -> None: +async def test_validate_invalid_feature_view(titanic_feature_store: ContractStore) -> None: validated_df = ( await titanic_feature_store.feature_view('titanic') .all(limit=20) @@ -30,7 +30,7 @@ async def test_validate_invalid_feature_view(titanic_feature_store: FeatureStore @pytest.mark.asyncio -async def test_return_invalid_rows(titanic_feature_store: FeatureStore) -> None: +async def test_return_invalid_rows(titanic_feature_store: ContractStore) -> None: validated_job = titanic_feature_store.feature_view('titanic').all(limit=20).return_invalid() validated_df = await validated_job.to_pandas() diff --git a/aligned/worker.py b/aligned/worker.py index 03b5dcd..eb45460 100644 --- a/aligned/worker.py +++ b/aligned/worker.py @@ -14,7 +14,7 @@ from aligned.data_source.batch_data_source import ColumnFeatureMappable from aligned.data_source.stream_data_source import StreamDataSource from aligned.feature_source import WritableFeatureSource -from aligned.feature_store import FeatureStore, FeatureViewStore, ModelFeatureStore +from aligned.feature_store import ContractStore, FeatureViewStore, ModelFeatureStore from aligned.retrival_job import RetrivalJob, StreamAggregationJob from aligned.sources.local import AsRepoDefinition from aligned.streams.interface import ReadableStream @@ -132,7 +132,7 @@ def prune_unused_features(self, should_prune_unused_features: bool | None = None self.should_prune_unused_features = should_prune_unused_features return self - def feature_views_by_topic(self, store: FeatureStore) -> dict[str, list[FeatureViewStore]]: + def feature_views_by_topic(self, store: ContractStore) -> dict[str, list[FeatureViewStore]]: from aligned.data_source.stream_data_source import HttpStreamSource feature_views_to_process = self.views_to_process or set() diff --git a/conftest.py b/conftest.py index d306d3a..8359b51 100644 --- a/conftest.py +++ b/conftest.py @@ -20,11 +20,11 @@ ) from aligned.feature_view.feature_view import FeatureView, FeatureViewMetadata from aligned.compiler.model import model_contract, ModelContractWrapper -from aligned.feature_store import FeatureStore +from aligned.feature_store import ContractStore from aligned.feature_view.combined_view import CombinedFeatureView, CombinedFeatureViewMetadata from aligned.retrival_job import DerivedFeatureJob, RetrivalJob, RetrivalRequest from aligned.schemas.derivied_feature import DerivedFeature -from aligned.schemas.feature import Feature, FeatureLocation, FeatureReferance, FeatureType +from aligned.schemas.feature import Feature, FeatureLocation, FeatureReference, FeatureType from aligned.schemas.record_coders import JsonRecordCoder from aligned.sources.local import CsvFileSource, FileFullJob, LiteralReference, ParquetFileSource @@ -72,12 +72,12 @@ def retrival_request_with_derived() -> RetrivalRequest: name='c+d', dtype=FeatureType.int32(), depending_on={ - FeatureReferance( + FeatureReference( name='c', location=FeatureLocation.feature_view('test_with_ts'), dtype=FeatureType.int32(), ), - FeatureReferance( + FeatureReference( name='d', location=FeatureLocation.feature_view('test_with_ts'), dtype=FeatureType.int32(), @@ -131,12 +131,12 @@ def combined_retrival_request() -> RetrivalRequest: name='a+c+d', dtype=FeatureType.int32(), depending_on={ - FeatureReferance( + FeatureReference( name='c+d', location=FeatureLocation.feature_view('test_with_ts'), dtype=FeatureType.int32(), ), - FeatureReferance( + FeatureReference( name='a', location=FeatureLocation.feature_view('test'), dtype=FeatureType.int32() ), }, @@ -212,8 +212,8 @@ class BreastDiagnoseFeatureView(FeatureView): @pytest_asyncio.fixture async def breast_scan_without_timestamp_feature_store( breast_scan_feature_viewout_with_datetime: FeatureView, -) -> FeatureStore: - store = FeatureStore.empty() +) -> ContractStore: + store = ContractStore.empty() store.add_feature_view(breast_scan_feature_viewout_with_datetime) return store @@ -346,8 +346,8 @@ class BreastDiagnoseFeatureView(FeatureView): @pytest_asyncio.fixture async def breast_scan_with_timestamp_feature_store( breast_scan_feature_view_with_datetime: FeatureView, -) -> FeatureStore: - store = FeatureStore.empty() +) -> ContractStore: + store = ContractStore.empty() store.add_feature_view(breast_scan_feature_view_with_datetime) return store @@ -355,8 +355,8 @@ async def breast_scan_with_timestamp_feature_store( @pytest_asyncio.fixture async def breast_scan_with_timestamp_and_aggregation_feature_store( breast_scan_feature_view_with_datetime_and_aggregation: FeatureView, -) -> FeatureStore: - store = FeatureStore.empty() +) -> ContractStore: + store = ContractStore.empty() store.add_feature_view(breast_scan_feature_view_with_datetime_and_aggregation) return store @@ -490,8 +490,8 @@ async def titanic_feature_store( titanic_feature_view: FeatureView, titanic_feature_view_parquet: FeatureView, titanic_model: ModelContractWrapper, -) -> FeatureStore: - feature_store = FeatureStore.empty() +) -> ContractStore: + feature_store = ContractStore.empty() feature_store.add_feature_view(titanic_feature_view) feature_store.add_feature_view(titanic_feature_view_parquet) feature_store.add_model(titanic_model) @@ -539,8 +539,8 @@ class TitanicPassenger(FeatureView): @pytest_asyncio.fixture async def alot_of_transforation_feature_store( alot_of_transforations_feature_view: FeatureView, -) -> FeatureStore: - feature_store = FeatureStore.empty() +) -> ContractStore: + feature_store = ContractStore.empty() feature_store.add_feature_view(alot_of_transforations_feature_view) return feature_store @@ -569,8 +569,8 @@ async def combined_feature_store( titanic_feature_view: FeatureView, breast_scan_feature_viewout_with_datetime: FeatureView, combined_view: CombinedFeatureView, -) -> FeatureStore: - feature_store = FeatureStore.empty() +) -> ContractStore: + feature_store = ContractStore.empty() feature_store.add_feature_view(titanic_feature_view) feature_store.add_feature_view(breast_scan_feature_viewout_with_datetime) feature_store.add_combined_feature_view(combined_view) @@ -629,7 +629,12 @@ def titanic_model_scd(titanic_feature_view_scd: FeatureView) -> ModelContractWra @model_contract( name='titanic', description='A model predicting if a passenger will survive', - input_features=[features.age, features.sibsp, features.has_siblings, features.is_male], # type: ignore + input_features=[ + features.age, # type: ignore + features.sibsp, # type: ignore + features.has_siblings, # type: ignore + features.is_male, # type: ignore + ], acceptable_freshness=timedelta(days=1), unacceptable_freshness=timedelta(days=2), ) @@ -646,8 +651,8 @@ async def titanic_feature_store_scd( titanic_feature_view_scd: FeatureView, titanic_feature_view_parquet: FeatureView, titanic_model_scd: ModelContractWrapper, -) -> FeatureStore: - feature_store = FeatureStore.empty() +) -> ContractStore: + feature_store = ContractStore.empty() feature_store.add_feature_view(titanic_feature_view_scd) feature_store.add_feature_view(titanic_feature_view_parquet) feature_store.add_model(titanic_model_scd) diff --git a/pyproject.toml b/pyproject.toml index afc0647..af9c0f5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -61,9 +61,6 @@ pandera = { version = "^0.17.0", optional = true} polars = { version = "^0.20.0", extras = ["pyarrow"] } pillow = { version = "^9.4.0", optional = true } prometheus-fastapi-instrumentator = { version="^5.9.1", optional = true } -# gensim = { version = "4.3.0", optional = true } -# openai = { version = "^0.27.2", optional = true } -# sentence-transformers = { version = "^2.2.2", optional = true } kafka-python = { version= "^2.0.2", optional = true } connectorx = { version = "^0.3.2", optional = true } asyncpg = { version = "^0.29.0", optional = true } @@ -83,7 +80,6 @@ kafka = ["kafka-python"] ollama = ["ollama"] sql = ["sqlglot"] mlflow = ["mlflow"] -# text = ["gensim", "openai", "sentence-transformers"] [tool.poetry.group.dev.dependencies] types-redis = "^4.2.6" diff --git a/setup.cfg b/setup.cfg index 5833453..e924da7 100644 --- a/setup.cfg +++ b/setup.cfg @@ -6,7 +6,7 @@ inline-quotes = 'double' # W503 line break before binary operator - not Black/PEP8 compatible # SIM106 handle error cases first # TC002 Move third-party import into a type-checking block (not compatible with pydantic) -ignore = E203, W503, SIM106, TC002, SIM110, TC001, E231, E201, E202, E241 +ignore = E203, W503, SIM106, TC002, SIM110, TC001, E231, E201, E202, E241, TC003 enable-extensions = TC, TC1 pytest-mark-no-parentheses=true pytest-fixture-no-parentheses=true From 6ca638832f8564e53de92fb4d6c92b90ecb8d126 Mon Sep 17 00:00:00 2001 From: "Mats E. Mollestad" Date: Sun, 14 Apr 2024 01:35:13 +0200 Subject: [PATCH 10/13] Updated tests --- aligned/exposed_model/tests/test_mlflow.py | 7 +- .../feature_view/tests/test_combined_view.py | 1 + aligned/schemas/constraints.py | 13 ---- aligned/tests/test_transformations.py | 4 +- conftest.py | 7 +- test_data/credit_history.csv | 14 ++-- test_data/credit_history_mater.parquet | Bin 983 -> 984 bytes test_data/data/csv_iso.csv | 6 +- test_data/data/csv_unix.csv | 6 +- test_data/data/parquet_iso.parquet | Bin 1131 -> 1138 bytes test_data/data/parquet_unix.parquet | Bin 1077 -> 1077 bytes test_data/feature-store.json | 2 +- test_data/loan.csv | 14 ++-- test_data/test_model.parquet | Bin 594 -> 594 bytes test_data/titanic-sets.json | 1 + test_data/titanic-test.csv | 21 ++++++ test_data/titanic-train.csv | 61 ++++++++++++++++++ test_data/titanic-validate.csv | 21 ++++++ 18 files changed, 135 insertions(+), 43 deletions(-) create mode 100644 test_data/titanic-sets.json create mode 100644 test_data/titanic-test.csv create mode 100644 test_data/titanic-train.csv create mode 100644 test_data/titanic-validate.csv diff --git a/aligned/exposed_model/tests/test_mlflow.py b/aligned/exposed_model/tests/test_mlflow.py index 77b98da..cf32269 100644 --- a/aligned/exposed_model/tests/test_mlflow.py +++ b/aligned/exposed_model/tests/test_mlflow.py @@ -34,20 +34,19 @@ class InputFeatureView: entity_id = String().as_entity() x = Int32() + input = InputFeatureView() + @model_contract( input_features=[InputFeatureView().x], exposed_model=ExposedModel.in_memory_mlflow( model_name=model_name, model_alias=model_alias, - prediction_column='prediction', - model_version_column='model_version', - predicted_at_column='predicted_at', ), ) class MyModelContract: entity_id = String().as_entity() predicted_at = EventTimestamp() - prediction = Int32() + prediction = input.x.as_regression_target() model_version = String().as_model_version() preds = await MyModelContract.predict_over( diff --git a/aligned/feature_view/tests/test_combined_view.py b/aligned/feature_view/tests/test_combined_view.py index 68f44db..5194793 100644 --- a/aligned/feature_view/tests/test_combined_view.py +++ b/aligned/feature_view/tests/test_combined_view.py @@ -99,4 +99,5 @@ class TestRef: result = await TestRef.query().all().to_pandas() # type: ignore result['new_feature'] = result['new_feature'].astype('int64') + result['some_id'] = result['some_id'].astype('int64') assert result[expected_df.columns].equals(expected_df) diff --git a/aligned/schemas/constraints.py b/aligned/schemas/constraints.py index f265083..4778bff 100644 --- a/aligned/schemas/constraints.py +++ b/aligned/schemas/constraints.py @@ -33,19 +33,6 @@ class SupportedConstraints: def __init__(self) -> None: from aligned.schemas.constraint_types import ( - LowerBound, - LowerBoundInclusive, - UpperBound, - UpperBoundInclusive, - Required, - Optional, - InDomain, - MaxLength, - MinLength, - StartsWith, - EndsWith, - Unique, - Regex, ReferencingColumn, ) diff --git a/aligned/tests/test_transformations.py b/aligned/tests/test_transformations.py index ef1b335..0bdf233 100644 --- a/aligned/tests/test_transformations.py +++ b/aligned/tests/test_transformations.py @@ -152,5 +152,5 @@ class TestFill: df = await TestFill.query().all().to_polars() - assert df['some_new_column'].isnull().sum() == 0 - assert df['some_string'].isnull().sum() == 0 + assert df['some_new_column'].is_null().sum() == 0 + assert df['some_string'].is_null().sum() == 0 diff --git a/conftest.py b/conftest.py index 8359b51..97f8a90 100644 --- a/conftest.py +++ b/conftest.py @@ -16,6 +16,7 @@ Int64, RedisConfig, String, + Int8, EmbeddingModel, ) from aligned.feature_view.feature_view import FeatureView, FeatureViewMetadata @@ -405,14 +406,14 @@ class TitanicPassenger(FeatureView): name='titanic', description='Some features from the titanic dataset', source=titanic_source ) - passenger_id = Entity(dtype=Int32()) + passenger_id = Int32().as_entity() # Input values age = Float().lower_bound(0).upper_bound(100).description('A float as some have decimals') name = String().is_optional() sex = String().is_optional().accepted_values(['male', 'female']) - survived = Bool().description('If the passenger survived') + survived = Int8().description('If the passenger survived') sibsp = ( Int32().is_optional().lower_bound(0).upper_bound(20).description('Number of siblings on titanic') @@ -512,7 +513,7 @@ class TitanicPassenger(FeatureView): age = Float() name = String() sex = String() - survived = Bool() + survived = Int8() sibsp = Int32() cabin = String().fill_na('Nada') diff --git a/test_data/credit_history.csv b/test_data/credit_history.csv index 9571ca4..3e39ac1 100644 --- a/test_data/credit_history.csv +++ b/test_data/credit_history.csv @@ -1,7 +1,7 @@ -event_timestamp,credit_card_due,dob_ssn,due_sum,bankruptcies,student_loan_due -2020-04-26 18:01:04.746575+00:00,8419,19530219_5179,30747,0,22328 -2020-04-26 18:01:04.746575+00:00,2944,19520816_8737,5459,0,2515 -2020-04-26 18:01:04.746575+00:00,833,19860413_2537,33833,0,33000 -2020-04-27 18:01:04.746575+00:00,5936,19530219_5179,54891,0,48955 -2020-04-27 18:01:04.746575+00:00,1575,19520816_8737,11076,0,9501 -2020-04-27 18:01:04.746575+00:00,6263,19860413_2537,41773,0,35510 +student_loan_due,bankruptcies,event_timestamp,credit_card_due,due_sum,dob_ssn +22328,0,1587924064746575,8419,30747,19530219_5179 +2515,0,1587924064746575,2944,5459,19520816_8737 +33000,0,1587924064746575,833,33833,19860413_2537 +48955,0,1588010464746575,5936,54891,19530219_5179 +9501,0,1588010464746575,1575,11076,19520816_8737 +35510,0,1588010464746575,6263,41773,19860413_2537 diff --git a/test_data/credit_history_mater.parquet b/test_data/credit_history_mater.parquet index 224e36f5dfe5725c8ef0513e1d89343bf4ff3f67..f60043753c1ddb105e77bdb9224c28776f6068f0 100644 GIT binary patch delta 314 zcmcc4euI6229H7dLk9*Rc(C(j`#^#gHFiI%*Fsi*`Vvv*(;}A^{1DXqD z?P7#8UonaHFitjQ)RW*+Ws(q1E=o_<*?1Ai^;cOa=kvoE#lp(;Z#GbfBXrNZbW1>FB5*0Mt|-2$J#u2}QYE UW=0tMRVAfQp2L(qIh?rx0N+(pW&i*H delta 299 zcmcb?ew}@S2G4_?4;&bPz##ph!{k|vnrxzMqFJJG6L+ghaSAF}m>HND8pj)%3JPkN z8XFiHTE?3inp;l(&nUs##i+K6abk=rH^YwIU?Unh&rIx5Q)pmjkdzVQ5KRyR8Vh9Y zVifISgt8l$MH42QG3x2?s4_{gr{pKa7Z>NLGWf^zIAi^C)IJ$;T&SsXAtkT!lw{Qh1bONbx S4=Rhyv8+lqGMOC2+y(#?zfAW4 diff --git a/test_data/data/csv_iso.csv b/test_data/data/csv_iso.csv index 0d53936..d8a94e5 100644 --- a/test_data/data/csv_iso.csv +++ b/test_data/data/csv_iso.csv @@ -1,4 +1,4 @@ id,other,et,timestamp -1,foo,2024-04-07T17:03:43.252072+UTC,2024-04-07T17:03:43.252290+UTC -2,bar,2024-04-06T17:03:43.252286+UTC,2024-04-08T17:03:43.252290+UTC -3,baz,2024-04-05T17:03:43.252289+UTC,2024-04-09T17:03:43.252290+UTC +1,foo,2024-04-13T23:34:35.782584+UTC,2024-04-13T23:34:35.782691+UTC +2,bar,2024-04-12T23:34:35.782688+UTC,2024-04-14T23:34:35.782691+UTC +3,baz,2024-04-11T23:34:35.782690+UTC,2024-04-15T23:34:35.782692+UTC diff --git a/test_data/data/csv_unix.csv b/test_data/data/csv_unix.csv index 395a9bc..d1da430 100644 --- a/test_data/data/csv_unix.csv +++ b/test_data/data/csv_unix.csv @@ -1,4 +1,4 @@ id,other,et,timestamp -1,foo,1712509423252072,1712509423252290 -2,bar,1712423023252286,1712595823252290 -3,baz,1712336623252289,1712682223252290 +1,foo,1713051275782584,1713051275782691 +2,bar,1712964875782688,1713137675782691 +3,baz,1712878475782690,1713224075782692 diff --git a/test_data/data/parquet_iso.parquet b/test_data/data/parquet_iso.parquet index 04fcf4e9a2301227ec07d7c573175b46a42c8ef8..e4aa5b6cead546e61a71dbbf5c530dab529ca1d8 100644 GIT binary patch delta 256 zcmaFO@rh%?45k*w$=r+zL59X5M#fggCRWC#dgc~JrWPjJp&`z4dP)ooMn*~u9A*|4 zPC&MynG%DDr2$Azlt)sPflX3^DYZnDNsLXbfk|u&quLTC22nQA3yh*AAY--Aj5V_~ zgd1zp1T@doN{Kx} delta 271 zcmeyw@tR}845lW=$vlh-fd=LwhUQiV##ScAdPb&32IfZEp&`z4dP)ooW=2X397Yyq zPC&M)l@f!1B}ht?M^cr6O;Um>wM3Ljj7_Y8No*6N+7c!PQ8v*FjG`GJQ&rGRHL^5- zn`+SnG|tjbiD7aAla$^xMzszW21yw)4$(L<1_lso5+j_sgh_N0BUHG9MReNad5lGj z_a+N6DQkUV6%}JpyT&Yo(>-DxY@!kjYV#(?G0BKR>=9c45)#|ND#oV9A=a|FpXo6p E04!!S;Q#;t diff --git a/test_data/data/parquet_unix.parquet b/test_data/data/parquet_unix.parquet index fe01b657b08f40d0eb4c7a4d7c801935fe448910..25b679ba4e65e5152d977fb0634c8ccd090a9387 100644 GIT binary patch delta 59 zcmdnWv6W-O8qpn@$F4Amu`wucz2Ejul#M}YmH5nC6Azk*DQ7_>luyn5BnDKVqE#xO LEw-7PQHvP>lEM>W delta 59 zcmdnWv6W-O8qthTH&(ZZvN70AX6!2f(vHSj1u+v3nus}lhDbOCZ#X#vDB-lZ{rMWv J&D@My%mA9E6=47X diff --git a/test_data/feature-store.json b/test_data/feature-store.json index 91ff6fb..761d99e 100644 --- a/test_data/feature-store.json +++ b/test_data/feature-store.json @@ -1 +1 @@ -{"metadata": {"created_at": "2024-04-07T17:03:46.274215", "name": "feature_store_location.py", "repo_url": null, "github_url": null}, "feature_views": [{"name": "titanic_parquet", "source": {"mapping_keys": {}, "type_name": "parquet", "path": "test_data/titanic.parquet", "config": {"engine": "auto", "compression": "snappy", "should_write_index": false}, "date_formatter": {"name": "noop"}}, "entities": [{"name": "passenger_id", "dtype": {"name": "int32"}, "description": null, "tags": null, "constraints": null}], "features": [{"name": "age", "dtype": {"name": "float"}, "description": "A float as some have decimals", "tags": null, "constraints": [{"name": "upper_bound_inc", "value": 100.0}, {"name": "lower_bound_inc", "value": 0.0}]}, {"name": "sex", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": [{"name": "in_domain", "values": ["male", "female"]}]}, {"name": "sibsp", "dtype": {"name": "int32"}, "description": "Number of siblings on titanic", "tags": null, "constraints": [{"name": "upper_bound_inc", "value": 20.0}, {"name": "lower_bound_inc", "value": 0.0}]}, {"name": "survived", "dtype": {"name": "bool"}, "description": "If the passenger survived", "tags": null, "constraints": null}, {"name": "cabin", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": null}, {"name": "name", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": null}], "derived_features": [{"name": "is_female", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sex", "location": {"name": "titanic_parquet", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "equals", "dtype": {"name": "bool"}, "key": "sex", "value": {"name": "string", "value": "female"}}, "depth": 1}, {"name": "is_male", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sex", "location": {"name": "titanic_parquet", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "equals", "dtype": {"name": "bool"}, "key": "sex", "value": {"name": "string", "value": "male"}}, "depth": 1}, {"name": "has_siblings", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sibsp", "location": {"name": "titanic_parquet", "location": "feature_view"}, "dtype": {"name": "int32"}}], "transformation": {"name": "not-equals", "dtype": {"name": "bool"}, "key": "sibsp", "value": {"name": "int", "value": 0}}, "depth": 1}, {"name": "is_mr", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "name", "location": {"name": "titanic_parquet", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "contains", "dtype": {"name": "bool"}, "key": "name", "value": "Mr."}, "depth": 1}], "tags": null, "description": "Some features from the titanic dataset", "aggregated_features": [], "event_timestamp": null, "stream_data_source": null, "application_source": null, "materialized_source": null, "acceptable_freshness": null, "unacceptable_freshness": null, "event_triggers": null, "contacts": null, "indexes": []}, {"name": "titanic", "source": {"mapping_keys": {"PassengerId": "passenger_id", "Age": "age", "Sex": "sex", "Survived": "survived", "SibSp": "sibsp", "UpdatedAt": "updated_at"}, "type_name": "csv", "path": "test_data/titanic_scd_data.csv", "csv_config": {"seperator": ",", "compression": "infer", "should_write_index": false}, "formatter": {"date_format": "%Y-%m-%dT%H:%M:%S%.f+%Z", "time_unit": null, "time_zone": null, "name": "string_form"}, "expected_schema": null}, "entities": [{"name": "passenger_id", "dtype": {"name": "int32"}, "description": null, "tags": null, "constraints": null}], "features": [{"name": "age", "dtype": {"name": "float"}, "description": "A float as some have decimals", "tags": null, "constraints": [{"name": "upper_bound_inc", "value": 100.0}, {"name": "lower_bound_inc", "value": 0.0}]}, {"name": "sex", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": [{"name": "in_domain", "values": ["male", "female"]}]}, {"name": "sibsp", "dtype": {"name": "int32"}, "description": "Number of siblings on titanic", "tags": null, "constraints": [{"name": "upper_bound_inc", "value": 20.0}, {"name": "lower_bound_inc", "value": 0.0}]}, {"name": "survived", "dtype": {"name": "bool"}, "description": "If the passenger survived", "tags": null, "constraints": null}, {"name": "updated_at", "dtype": {"name": "datetime-UTC"}, "description": null, "tags": null, "constraints": null}, {"name": "cabin", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": null}, {"name": "name", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": null}], "derived_features": [{"name": "square_sibsp", "dtype": {"name": "float"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sibsp", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "int32"}}], "transformation": {"name": "mul", "dtype": {"name": "float"}, "front": "sibsp", "behind": "sibsp"}, "depth": 1}, {"name": "double_sibsp", "dtype": {"name": "float"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sibsp", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "int32"}}], "transformation": {"name": "mul_val", "dtype": {"name": "float"}, "key": "sibsp", "value": {"name": "int", "value": 2}}, "depth": 1}, {"name": "has_siblings", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sibsp", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "int32"}}], "transformation": {"name": "not-equals", "dtype": {"name": "bool"}, "key": "sibsp", "value": {"name": "int", "value": 0}}, "depth": 1}, {"name": "is_mr", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "name", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "contains", "dtype": {"name": "bool"}, "key": "name", "value": "Mr."}, "depth": 1}, {"name": "is_male", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sex", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "equals", "dtype": {"name": "bool"}, "key": "sex", "value": {"name": "string", "value": "male"}}, "depth": 1}, {"name": "name_embedding", "dtype": {"name": "embedding"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "name", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "word_vectoriser", "dtype": {"name": "embedding"}, "key": "name", "model": {"name": "gensim", "model_name": "glove-wiki-gigaword-50", "config": {"to_lowercase": false, "deaccent": false, "encoding": "utf8", "errors": "strict"}, "loaded_model": null}}, "depth": 1}, {"name": "is_female", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sex", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "equals", "dtype": {"name": "bool"}, "key": "sex", "value": {"name": "string", "value": "female"}}, "depth": 1}], "tags": null, "description": "Some features from the titanic dataset", "aggregated_features": [], "event_timestamp": {"name": "updated_at", "ttl": null, "description": null, "tags": null, "dtype": {"name": "datetime-UTC"}}, "stream_data_source": {"mapping_keys": {}, "name": "redis", "topic_name": "titanic_stream", "config": {"env_var": "REDIS_URL"}, "record_coder": {"coder_type": "json", "key": "json"}}, "application_source": null, "materialized_source": null, "acceptable_freshness": null, "unacceptable_freshness": null, "event_triggers": null, "contacts": null, "indexes": [{"location": {"name": "titanic", "location": "feature_view"}, "vector": {"name": "name_embedding", "dtype": {"name": "embedding"}, "description": null, "tags": null, "constraints": null}, "vector_dim": 50, "metadata": [{"name": "age", "dtype": {"name": "float"}, "description": "A float as some have decimals", "tags": null, "constraints": [{"name": "upper_bound_inc", "value": 100.0}, {"name": "lower_bound_inc", "value": 0.0}]}, {"name": "sex", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": [{"name": "in_domain", "values": ["male", "female"]}]}], "storage": {"type_name": "redis", "config": {"env_var": "REDIS_URL"}, "name": "name_embedding_index", "initial_cap": 10000, "distance_metric": "COSINE", "index_alogrithm": "FLAT", "embedding_type": "FLOAT32"}, "entities": [{"name": "passenger_id", "dtype": {"name": "int32"}, "description": null, "tags": null, "constraints": null}]}]}], "combined_feature_views": [], "models": [{"name": "titanic", "features": {"default_version": "default", "versions": {"default": [{"name": "age", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "float"}}, {"name": "sibsp", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "int32"}}, {"name": "has_siblings", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "bool"}}, {"name": "is_male", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "bool"}}]}}, "predictions_view": {"entities": [], "features": [{"name": "probability", "dtype": {"name": "float"}, "description": "The probability of target named will_survive being 'True'.", "tags": null, "constraints": null}], "derived_features": [{"name": "will_survive", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "probability", "location": {"name": "titanic", "location": "model"}, "dtype": {"name": "float"}}], "transformation": {"name": "map_arg_max", "dtype": {"name": "bool"}, "column_mappings": {"probability": {"name": "bool", "value": true}}}, "depth": 1}], "model_version_column": null, "event_timestamp": null, "source": null, "application_source": null, "stream_source": null, "regression_targets": [], "classification_targets": [{"estimating": {"name": "survived", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "bool"}}, "feature": {"name": "will_survive", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null}, "on_ground_truth_event": null, "event_trigger": null, "class_probabilities": [{"outcome": {"name": "bool", "value": true}, "feature": {"name": "probability", "dtype": {"name": "float"}, "description": null, "tags": null, "constraints": null}}], "confidence": null}], "recommendation_targets": [], "acceptable_freshness": 86400.0, "unacceptable_freshness": 172800.0}, "description": "A model predicting if a passenger will survive", "contacts": null, "tags": null, "dataset_store": null, "exposed_at_url": null, "exposed_model": null}], "enrichers": []} +{"metadata": {"created_at": "2024-04-13T23:34:37.998308", "name": "feature_store_location.py", "repo_url": null, "github_url": null}, "feature_views": [{"name": "titanic_parquet", "source": {"mapping_keys": {}, "type_name": "parquet", "path": "test_data/titanic.parquet", "config": {"engine": "auto", "compression": "snappy", "should_write_index": false}, "date_formatter": {"name": "noop"}}, "entities": [{"name": "passenger_id", "dtype": {"name": "int32"}, "description": null, "tags": null, "constraints": null}], "features": [{"name": "sex", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": [{"name": "in_domain", "values": ["male", "female"]}]}, {"name": "survived", "dtype": {"name": "bool"}, "description": "If the passenger survived", "tags": null, "constraints": null}, {"name": "cabin", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": null}, {"name": "name", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": null}, {"name": "sibsp", "dtype": {"name": "int32"}, "description": "Number of siblings on titanic", "tags": null, "constraints": [{"name": "lower_bound_inc", "value": 0.0}, {"name": "upper_bound_inc", "value": 20.0}]}, {"name": "age", "dtype": {"name": "float"}, "description": "A float as some have decimals", "tags": null, "constraints": [{"name": "lower_bound_inc", "value": 0.0}, {"name": "upper_bound_inc", "value": 100.0}]}], "derived_features": [{"name": "is_mr", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "name", "location": {"name": "titanic_parquet", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "contains", "dtype": {"name": "bool"}, "key": "name", "value": "Mr."}, "depth": 1}, {"name": "is_male", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sex", "location": {"name": "titanic_parquet", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "equals", "dtype": {"name": "bool"}, "key": "sex", "value": {"name": "string", "value": "male"}}, "depth": 1}, {"name": "has_siblings", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sibsp", "location": {"name": "titanic_parquet", "location": "feature_view"}, "dtype": {"name": "int32"}}], "transformation": {"name": "not-equals", "dtype": {"name": "bool"}, "key": "sibsp", "value": {"name": "int", "value": 0}}, "depth": 1}, {"name": "is_female", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sex", "location": {"name": "titanic_parquet", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "equals", "dtype": {"name": "bool"}, "key": "sex", "value": {"name": "string", "value": "female"}}, "depth": 1}], "tags": null, "description": "Some features from the titanic dataset", "aggregated_features": [], "event_timestamp": null, "stream_data_source": null, "application_source": null, "materialized_source": null, "acceptable_freshness": null, "unacceptable_freshness": null, "event_triggers": null, "contacts": null, "indexes": []}, {"name": "titanic", "source": {"mapping_keys": {"PassengerId": "passenger_id", "Age": "age", "Sex": "sex", "Survived": "survived", "SibSp": "sibsp", "UpdatedAt": "updated_at"}, "type_name": "csv", "path": "test_data/titanic_scd_data.csv", "csv_config": {"seperator": ",", "compression": "infer", "should_write_index": false}, "formatter": {"date_format": "%Y-%m-%dT%H:%M:%S%.f+%Z", "time_unit": null, "time_zone": null, "name": "string_form"}, "expected_schema": null}, "entities": [{"name": "passenger_id", "dtype": {"name": "int32"}, "description": null, "tags": null, "constraints": null}], "features": [{"name": "sex", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": [{"name": "in_domain", "values": ["male", "female"]}]}, {"name": "survived", "dtype": {"name": "bool"}, "description": "If the passenger survived", "tags": null, "constraints": null}, {"name": "cabin", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": null}, {"name": "name", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": null}, {"name": "sibsp", "dtype": {"name": "int32"}, "description": "Number of siblings on titanic", "tags": null, "constraints": [{"name": "lower_bound_inc", "value": 0.0}, {"name": "upper_bound_inc", "value": 20.0}]}, {"name": "age", "dtype": {"name": "float"}, "description": "A float as some have decimals", "tags": null, "constraints": [{"name": "lower_bound_inc", "value": 0.0}, {"name": "upper_bound_inc", "value": 100.0}]}, {"name": "updated_at", "dtype": {"name": "datetime-UTC"}, "description": null, "tags": null, "constraints": null}], "derived_features": [{"name": "is_female", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sex", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "equals", "dtype": {"name": "bool"}, "key": "sex", "value": {"name": "string", "value": "female"}}, "depth": 1}, {"name": "is_male", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sex", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "equals", "dtype": {"name": "bool"}, "key": "sex", "value": {"name": "string", "value": "male"}}, "depth": 1}, {"name": "has_siblings", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sibsp", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "int32"}}], "transformation": {"name": "not-equals", "dtype": {"name": "bool"}, "key": "sibsp", "value": {"name": "int", "value": 0}}, "depth": 1}, {"name": "is_mr", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "name", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "contains", "dtype": {"name": "bool"}, "key": "name", "value": "Mr."}, "depth": 1}, {"name": "name_embedding", "dtype": {"name": "embedding"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "name", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "word_vectoriser", "dtype": {"name": "embedding"}, "key": "name", "model": {"name": "gensim", "model_name": "glove-wiki-gigaword-50", "config": {"to_lowercase": false, "deaccent": false, "encoding": "utf8", "errors": "strict"}, "loaded_model": null}}, "depth": 1}, {"name": "square_sibsp", "dtype": {"name": "float"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sibsp", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "int32"}}], "transformation": {"name": "mul", "dtype": {"name": "float"}, "front": "sibsp", "behind": "sibsp"}, "depth": 1}, {"name": "double_sibsp", "dtype": {"name": "float"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sibsp", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "int32"}}], "transformation": {"name": "mul_val", "dtype": {"name": "float"}, "key": "sibsp", "value": {"name": "int", "value": 2}}, "depth": 1}], "tags": null, "description": "Some features from the titanic dataset", "aggregated_features": [], "event_timestamp": {"name": "updated_at", "ttl": null, "description": null, "tags": null, "dtype": {"name": "datetime-UTC"}}, "stream_data_source": {"mapping_keys": {}, "name": "redis", "topic_name": "titanic_stream", "config": {"env_var": "REDIS_URL"}, "record_coder": {"coder_type": "json", "key": "json"}}, "application_source": null, "materialized_source": null, "acceptable_freshness": null, "unacceptable_freshness": null, "event_triggers": null, "contacts": null, "indexes": [{"location": {"name": "titanic", "location": "feature_view"}, "vector": {"name": "name_embedding", "dtype": {"name": "embedding"}, "description": null, "tags": null, "constraints": null}, "vector_dim": 50, "metadata": [{"name": "age", "dtype": {"name": "float"}, "description": "A float as some have decimals", "tags": null, "constraints": [{"name": "lower_bound_inc", "value": 0.0}, {"name": "upper_bound_inc", "value": 100.0}]}, {"name": "sex", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": [{"name": "in_domain", "values": ["male", "female"]}]}], "storage": {"type_name": "redis", "config": {"env_var": "REDIS_URL"}, "name": "name_embedding_index", "initial_cap": 10000, "distance_metric": "COSINE", "index_alogrithm": "FLAT", "embedding_type": "FLOAT32"}, "entities": [{"name": "passenger_id", "dtype": {"name": "int32"}, "description": null, "tags": null, "constraints": null}]}]}], "combined_feature_views": [], "models": [{"name": "titanic", "features": {"default_version": "default", "versions": {"default": [{"name": "age", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "float"}}, {"name": "sibsp", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "int32"}}, {"name": "has_siblings", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "bool"}}, {"name": "is_male", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "bool"}}]}}, "predictions_view": {"entities": [], "features": [{"name": "probability", "dtype": {"name": "float"}, "description": "The probability of target named will_survive being 'True'.", "tags": null, "constraints": null}], "derived_features": [{"name": "will_survive", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "probability", "location": {"name": "titanic", "location": "model"}, "dtype": {"name": "float"}}], "transformation": {"name": "map_arg_max", "dtype": {"name": "bool"}, "column_mappings": {"probability": {"name": "bool", "value": true}}}, "depth": 1}], "model_version_column": null, "event_timestamp": null, "source": null, "application_source": null, "stream_source": null, "regression_targets": [], "classification_targets": [{"estimating": {"name": "survived", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "bool"}}, "feature": {"name": "will_survive", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null}, "on_ground_truth_event": null, "event_trigger": null, "class_probabilities": [{"outcome": {"name": "bool", "value": true}, "feature": {"name": "probability", "dtype": {"name": "float"}, "description": null, "tags": null, "constraints": null}}], "confidence": null}], "recommendation_targets": [], "acceptable_freshness": 86400.0, "unacceptable_freshness": 172800.0}, "description": "A model predicting if a passenger will survive", "contacts": null, "tags": null, "dataset_store": null, "exposed_at_url": null, "exposed_model": null}], "enrichers": []} diff --git a/test_data/loan.csv b/test_data/loan.csv index b5f71ff..6467696 100644 --- a/test_data/loan.csv +++ b/test_data/loan.csv @@ -1,7 +1,7 @@ -personal_income,event_timestamp,loan_status,loan_amount,loan_id -59000,2020-04-26 18:01:04.746575+00:00,True,35000,10000 -9600,2020-04-26 18:01:04.746575+00:00,False,1000,10001 -9600,2020-04-26 18:01:04.746575+00:00,True,5500,10002 -65500,2020-04-27 18:01:04.746575+00:00,True,35000,10000 -54400,2020-04-27 18:01:04.746575+00:00,True,35000,10001 -9900,2020-04-27 18:01:04.746575+00:00,True,2500,10002 +loan_id,event_timestamp,loan_status,loan_amount,personal_income +10000,1587924064746575,True,35000,59000 +10001,1587924064746575,False,1000,9600 +10002,1587924064746575,True,5500,9600 +10000,1588010464746575,True,35000,65500 +10001,1588010464746575,True,35000,54400 +10002,1588010464746575,True,2500,9900 diff --git a/test_data/test_model.parquet b/test_data/test_model.parquet index e71f3c3fc09b3bfa0928ed8ca20493bfa0da04b8..79719b2dabde4ab3b300fe3b141cd30188e34fec 100644 GIT binary patch delta 53 ucmcb_a*1VvoG=#yCnEzNC%XfK2m=JLOf)nTW&{Z{frOc1!W*k@FaiL$3I=Nc delta 53 tcmcb_a*1VvoG=RmCnEzNC%XfK2m=IgO*Axv2s43%nW4gr8>? Date: Mon, 15 Apr 2024 16:51:14 +0200 Subject: [PATCH 11/13] Updated readme and added a few more tests --- README.md | 54 ++++- aligned/data_source/batch_data_source.py | 59 ++++- .../data_source/tests/test_batch_source.py | 50 ++++ aligned/exposed_model/interface.py | 194 +-------------- aligned/exposed_model/mlflow.py | 224 ++++++++++++++++++ aligned/local/job.py | 6 + aligned/request/retrival_request.py | 21 +- aligned/schemas/feature_view.py | 3 + aligned/schemas/model.py | 3 + 9 files changed, 426 insertions(+), 188 deletions(-) create mode 100644 aligned/data_source/tests/test_batch_source.py create mode 100644 aligned/exposed_model/mlflow.py diff --git a/README.md b/README.md index 7d1ab25..3f77250 100644 --- a/README.md +++ b/README.md @@ -52,7 +52,8 @@ Bellow are some of the features Aligned offers: - [Data Quality Assurance](#data-quality) - [Easy Data Loading](#access-data) - [Feature Store](https://matsmoll.github.io/posts/understanding-the-chaotic-landscape-of-mlops#feature-store) -- [Load Form Multiple Sources](#fast-development) +- [Exposing Models](#exposed-models) +- [Load From Multiple Sources](#fast-development) - [Feature Server](#feature-server) - [Stream Processing](#stream-worker) @@ -91,12 +92,12 @@ All this is described through a `model_contract`, as shown bellow. ```python @model_contract( name="eta_taxi", - features=[ + input_features=[ trips.eucledian_distance, trips.number_of_passengers, traffic.expected_delay ], - prediction_source=FileSource.delta_at("titanic_model/predictions") + output_source=FileSource.delta_at("titanic_model/predictions") ) class EtaTaxi: trip_id = Int32().as_entity() @@ -183,6 +184,51 @@ class TitanicPassenger: is_male, is_female = sex.one_hot_encode(['male', 'female']) ``` +### Exposed models + +Aligned mainly focuses on defining the expected input and output of different models. However, this in itself makes it hard to use the models. This is why Aligned makes it possible to define how our ML models are exposed by setting an `exposed_model` attribute. + + +```python +from aligned.exposed_model.mlflow import mlflow_server + +@model_contract( + name="eta_taxi", + exposed_model=mlflow_server( + host="http://localhost:8000", + ), + ... +) +class EtaTaxi: + trip_id = Int32().as_entity() + predicted_at = EventTimestamp() + predicted_duration = trips.duration.as_regression_target() +``` + +This also makes it possible to get predictions with the following command: + +```python +await store.model("eta_taxi").predict_over({ + "trip_id": [...] +}).to_polars() +``` + +Or store them directly in the `output_source` with something like: + +```python +await store.model("eta_taxi").predict_over({ + "trip_id": [...] +}).upsert_into_output_source() +``` + +Some of the existing implementations are: +- MLFlow Server +- Run MLFLow model in memory +- Ollama completion endpoint +- Ollama embedded endpoint +- Send entities to generic endpoint + + ### Fast development Making iterativ and fast exploration in ML is important. This is why Aligned also makes it super easy to combine, and test multiple sources. @@ -226,7 +272,7 @@ location = LocationFeatures() @model_contract( name="titanic", - features=[ # aka. the model input + input_features=[ passenger.constant_filled_age, passenger.ordinal_sex, passenger.sibsp, diff --git a/aligned/data_source/batch_data_source.py b/aligned/data_source/batch_data_source.py index 8054ef7..4c28bdd 100644 --- a/aligned/data_source/batch_data_source.py +++ b/aligned/data_source/batch_data_source.py @@ -2,7 +2,7 @@ from copy import copy from datetime import timedelta, timezone, datetime -from typing import TYPE_CHECKING, TypeVar, Any, Callable, Coroutine +from typing import TYPE_CHECKING, Awaitable, TypeVar, Any, Callable, Coroutine from dataclasses import dataclass from uuid import uuid4 @@ -120,6 +120,49 @@ def _serialize(self) -> dict: def __hash__(self) -> int: return hash(self.job_group_key()) + def transform_with_polars( + self, + method: Callable[[pl.LazyFrame], Awaitable[pl.LazyFrame]] | Callable[[pl.LazyFrame], pl.LazyFrame], + ) -> BatchDataSource: + async def all(request: RetrivalRequest, limit: int | None) -> pl.LazyFrame: + import inspect + + df = await self.all_data(request, limit).to_lazy_polars() + + if inspect.iscoroutinefunction(method): + return await method(df) + else: + return method(df) + + async def all_between_dates( + request: RetrivalRequest, start_date: datetime, end_date: datetime + ) -> pl.LazyFrame: + import inspect + + df = await self.all_between_dates(request, start_date, end_date).to_lazy_polars() + + if inspect.iscoroutinefunction(method): + return await method(df) + else: + return method(df) + + async def features_for(entities: RetrivalJob, request: RetrivalRequest) -> pl.LazyFrame: + import inspect + + df = await self.features_for(entities, request).to_lazy_polars() + + if inspect.iscoroutinefunction(method): + return await method(df) + else: + return method(df) + + return CustomMethodDataSource.from_methods( + all_data=all, + all_between_dates=all_between_dates, + features_for=features_for, + depends_on_sources=self.location_id(), + ) + def contains_config(self, config: Any) -> bool: """ Checks if a data source contains a source config. @@ -161,6 +204,9 @@ def _deserialize(cls, value: dict) -> BatchDataSource: data_class = BatchDataSourceFactory.shared().supported_data_sources[name_type] return data_class.from_dict(value) + def all_columns(self, limit: int | None = None) -> RetrivalJob: + return self.all(RequestResult.empty(), limit=limit) + def all(self, result: RequestResult, limit: int | None = None) -> RetrivalJob: return self.all_data( result.as_retrival_request('read_all', location=FeatureLocation.feature_view('read_all')), @@ -193,7 +239,7 @@ def all_between_dates( return FileDateJob(self, request=request, start_date=start_date, end_date=end_date) - raise NotImplementedError() + raise NotImplementedError(type(self)) @classmethod def multi_source_features_for( @@ -298,6 +344,9 @@ async def freshness(self, event_timestamp: EventTimestamp) -> datetime | None: def filter(self, condition: DerivedFeature | Feature) -> BatchDataSource: return FilteredDataSource(self, condition) + def location_id(self) -> set[FeatureLocation]: + return self.depends_on() + def depends_on(self) -> set[FeatureLocation]: return set() @@ -308,6 +357,7 @@ class CustomMethodDataSource(BatchDataSource): all_data_method: bytes all_between_dates_method: bytes features_for_method: bytes + depends_on_sources: set[FeatureLocation] | None = None type_name: str = 'custom_method' @@ -363,6 +413,7 @@ def from_methods( | None = None, features_for: Callable[[RetrivalJob, RetrivalRequest], Coroutine[None, None, pl.LazyFrame]] | None = None, + depends_on_sources: set[FeatureLocation] | None = None, ) -> 'CustomMethodDataSource': import dill @@ -379,12 +430,16 @@ def from_methods( all_data_method=dill.dumps(all_data), all_between_dates_method=dill.dumps(all_between_dates), features_for_method=dill.dumps(features_for), + depends_on_sources=depends_on_sources, ) @staticmethod def default_throw(**kwargs: Any) -> pl.LazyFrame: raise NotImplementedError('No method is defined for this data source.') + def depends_on(self) -> set[FeatureLocation]: + return self.depends_on_sources or set() + @dataclass class FilteredDataSource(BatchDataSource): diff --git a/aligned/data_source/tests/test_batch_source.py b/aligned/data_source/tests/test_batch_source.py new file mode 100644 index 0000000..4f129f1 --- /dev/null +++ b/aligned/data_source/tests/test_batch_source.py @@ -0,0 +1,50 @@ +import polars as pl +import json +from aligned.data_source.batch_data_source import BatchDataSource +from aligned.request.retrival_request import RetrivalRequest +from aligned.sources.local import CsvFileSource +import pytest + + +@pytest.mark.asyncio +async def test_custom_transformation_as_lambda(scan_without_datetime: CsvFileSource) -> None: + + new_source = scan_without_datetime.transform_with_polars( + lambda df: df.with_columns(bucket=pl.col('id').mod(3)) + .groupby('bucket') + .agg( + pl.col('radius_mean').sum().alias('sum_radius_mean'), + ) + ) + + df = await new_source.all_data(RetrivalRequest.all_data(), limit=None).to_polars() + + source_as_json = new_source.to_json() + + ds = BatchDataSource._deserialize(json.loads(source_as_json)) + new_df = await ds.all_data(RetrivalRequest.all_data(), limit=None).to_polars() + + assert new_df.sort('bucket').equals(df.sort('bucket').select(new_df.columns)) + + +@pytest.mark.asyncio +async def test_custom_transformation_as_function(scan_without_datetime: CsvFileSource) -> None: + async def custom_function(df: pl.LazyFrame) -> pl.LazyFrame: + return ( + df.with_columns(bucket=pl.col('id').mod(3)) + .groupby('bucket') + .agg( + pl.col('radius_mean').sum().alias('sum_radius_mean'), + ) + ) + + new_source = scan_without_datetime.transform_with_polars(custom_function) + + df = await new_source.all_data(RetrivalRequest.all_data(), limit=None).to_polars() + + source_as_json = new_source.to_json() + + ds = BatchDataSource._deserialize(json.loads(source_as_json)) + new_df = await ds.all_data(RetrivalRequest.all_data(), limit=None).to_polars() + + assert new_df.sort('bucket').equals(df.sort('bucket').select(new_df.columns)) diff --git a/aligned/exposed_model/interface.py b/aligned/exposed_model/interface.py index 158518c..c072975 100644 --- a/aligned/exposed_model/interface.py +++ b/aligned/exposed_model/interface.py @@ -2,7 +2,7 @@ import polars as pl from typing import TYPE_CHECKING -from dataclasses import dataclass, field +from dataclasses import dataclass from aligned.retrival_job import RetrivalJob from aligned.schemas.codable import Codable from mashumaro.types import SerializableType @@ -22,12 +22,16 @@ class PredictorFactory: _shared: PredictorFactory | None = None def __init__(self): + from aligned.exposed_model.mlflow import MLFlowServer, InMemMLFlowAlias + self.supported_predictors = {} types: list[type[ExposedModel]] = [ EnitityPredictor, OllamaGeneratePredictor, OllamaEmbeddingPredictor, + MLFlowServer, + InMemMLFlowAlias, ] for predictor in types: self.supported_predictors[predictor.model_type] = predictor @@ -114,7 +118,9 @@ def in_memory_mlflow( model_alias: str, model_contract_version_tag: str | None = None, ): - return InMemMLFlowAlias( + from aligned.exposed_model.mlflow import in_memory_mlflow + + return in_memory_mlflow( model_name=model_name, model_alias=model_alias, model_contract_version_tag=model_contract_version_tag, @@ -123,12 +129,14 @@ def in_memory_mlflow( @staticmethod def mlflow_server( host: str, - model_name: str, - model_alias: str, + model_alias: str | None = None, + model_name: str | None = None, model_contract_version_tag: str | None = None, timeout: int = 30, ): - return MLFlowServer( + from aligned.exposed_model.mlflow import mlflow_server + + return mlflow_server( host=host, model_name=model_name, model_alias=model_alias, @@ -315,179 +323,3 @@ async def run_polars(self, values: RetrivalJob, store: ModelFeatureStore) -> pl. return prompts.hstack([pl.Series(name=self.embedding_name, values=ret_vals)]).with_columns( pl.lit(model_version).alias('model_version') ) - - -@dataclass -class InMemMLFlowAlias(ExposedModel): - - model_name: str - model_alias: str - - model_contract_version_tag: str | None - - model_type: str = 'latest_mlflow' - - @property - def exposed_at_url(self) -> str | None: - return None - - @property - def as_markdown(self) -> str: - return f"""Using the latest MLFlow model: `{self.model_name}`.""" - - def get_model_version(self): - from mlflow.tracking import MlflowClient - - mlflow_client = MlflowClient() - - return mlflow_client.get_model_version_by_alias(self.model_name, self.model_alias) - - def contract_version(self, model_version) -> str: - version = 'default' - if self.model_contract_version_tag: - if self.model_contract_version_tag not in model_version.tags: # noqa - raise ValueError( - f"Model contract version tag {self.model_contract_version_tag} not " - 'found in model version tags' - ) - else: - version = model_version.tags[self.model_contract_version_tag] - return version - - async def needed_features(self, store: ModelFeatureStore) -> list[FeatureReference]: - mv = self.get_model_version() - version = self.contract_version(mv) - return store.feature_references_for(version) - - async def needed_entities(self, store: ModelFeatureStore) -> set[Feature]: - mv = self.get_model_version() - version = self.contract_version(mv) - return store.using_version(version).needed_entities() - - async def run_polars(self, values: RetrivalJob, store: ModelFeatureStore) -> pl.DataFrame: - import mlflow - import polars as pl - from datetime import datetime, timezone - - pred_label = list(store.model.predictions_view.labels())[0] - pred_at = store.model.predictions_view.event_timestamp - model_version_column = store.model.predictions_view.model_version_column - mv = None - - if model_version_column: - mv = self.get_model_version() - - model_uri = f"models:/{self.model_name}@{self.model_alias}" - mv = self.get_model_version() - - model = mlflow.pyfunc.load_model(model_uri) - - job = store.features_for(values) - df = await job.to_polars() - - features = job.request_result.feature_columns - predictions = model.predict(df[features]) - - if pred_at: - df = df.with_columns( - pl.lit(datetime.now(timezone.utc)).alias(pred_at.name), - ) - - if mv and model_version_column: - df = df.with_columns( - pl.lit(mv.run_id).alias(model_version_column.name), - ) - - return df.with_columns( - pl.Series(name=pred_label.name, values=predictions), - ) - - -@dataclass -class MLFlowServer(ExposedModel): - - host: str - - model_name: str - model_alias: str - model_contract_version_tag: str | None - - timeout: int = field(default=30) - - model_type: str = 'mlflow_server' - - @property - def exposed_at_url(self) -> str | None: - return self.host - - @property - def as_markdown(self) -> str: - return f"""Using a MLFlow server at `{self.host}`. -Assumes that it is the model: `{self.model_name}` with alias: `{self.model_alias}`, and will load the features needed for that model based on the input version defined at tag `{self.model_contract_version_tag}`.""" # noqa: E501 - - def get_model_version(self): - from mlflow.tracking import MlflowClient - - mlflow_client = MlflowClient() - return mlflow_client.get_model_version_by_alias(self.model_name, self.model_alias) - - def contract_version(self, model_version) -> str: - version = 'default' - if self.model_contract_version_tag: - if self.model_contract_version_tag not in model_version.tags: # noqa - raise ValueError( - f"Model contract version tag {self.model_contract_version_tag} not " - 'found in model version tags' - ) - else: - version = model_version.tags[self.model_contract_version_tag] - return version - - async def needed_features(self, store: ModelFeatureStore) -> list[FeatureReference]: - mv = self.get_model_version() - version = self.contract_version(mv) - return store.feature_references_for(version) - - async def needed_entities(self, store: ModelFeatureStore) -> set[Feature]: - mv = self.get_model_version() - version = self.contract_version(mv) - return store.using_version(version).needed_entities() - - async def run_polars(self, values: RetrivalJob, store: ModelFeatureStore) -> pl.DataFrame: - import polars as pl - from httpx import AsyncClient - from datetime import datetime, timezone - - pred_label = list(store.model.predictions_view.labels())[0] - pred_at = store.model.predictions_view.event_timestamp - model_version_column = store.model.predictions_view.model_version_column - mv = None - - if model_version_column: - mv = self.get_model_version() - - job = store.features_for(values) - df = await job.to_polars() - - features = job.request_result.feature_columns - - async with AsyncClient(timeout=self.timeout) as client: - response = await client.post( - f'{self.host}/invocations', json={'dataframe_records': df[features].to_dicts()} - ) - response.raise_for_status() - preds = response.json()['predictions'] - - if pred_at: - df = df.with_columns( - pl.lit(datetime.now(timezone.utc)).alias(pred_at.name), - ) - - if mv and model_version_column: - df = df.with_columns( - pl.lit(mv.run_id).alias(model_version_column.name), - ) - - return df.with_columns( - pl.Series(name=pred_label.name, values=preds), - ) diff --git a/aligned/exposed_model/mlflow.py b/aligned/exposed_model/mlflow.py new file mode 100644 index 0000000..323b3c0 --- /dev/null +++ b/aligned/exposed_model/mlflow.py @@ -0,0 +1,224 @@ +from __future__ import annotations + +import polars as pl +from typing import TYPE_CHECKING +from dataclasses import dataclass, field +from aligned.retrival_job import RetrivalJob +from aligned.exposed_model.interface import ExposedModel + +from aligned.schemas.feature import Feature, FeatureReference + +if TYPE_CHECKING: + from aligned.feature_store import ModelFeatureStore + + +def in_memory_mlflow( + model_name: str, + model_alias: str, + model_contract_version_tag: str | None = None, +): + """A model that is loaded from MLFlow using the given model name and alias. + + This will also run in memory, and not require that `mlflow` is installed. + """ + return InMemMLFlowAlias( + model_name=model_name, + model_alias=model_alias, + model_contract_version_tag=model_contract_version_tag, + ) + + +def mlflow_server( + host: str, + model_alias: str | None = None, + model_name: str | None = None, + model_contract_version_tag: str | None = None, + timeout: int = 30, +): + """Calls an MLFlow server to get the model predictions. + + This will load the model version from mlflow to get the expected model contract, and version name. + """ + return MLFlowServer( + host=host, + model_name=model_name, + model_alias=model_alias or 'champion', + model_contract_version_tag=model_contract_version_tag, + timeout=timeout, + ) + + +@dataclass +class InMemMLFlowAlias(ExposedModel): + + model_name: str + model_alias: str + + model_contract_version_tag: str | None + + model_type: str = 'latest_mlflow' + + @property + def exposed_at_url(self) -> str | None: + return None + + @property + def as_markdown(self) -> str: + return f"""Using the latest MLFlow model: `{self.model_name}`.""" + + def get_model_version(self): + from mlflow.tracking import MlflowClient + + mlflow_client = MlflowClient() + + return mlflow_client.get_model_version_by_alias(self.model_name, self.model_alias) + + def contract_version(self, model_version) -> str: + version = 'default' + if self.model_contract_version_tag: + if self.model_contract_version_tag not in model_version.tags: # noqa + raise ValueError( + f"Model contract version tag {self.model_contract_version_tag} not " + 'found in model version tags' + ) + else: + version = model_version.tags[self.model_contract_version_tag] + return version + + async def needed_features(self, store: ModelFeatureStore) -> list[FeatureReference]: + mv = self.get_model_version() + version = self.contract_version(mv) + return store.feature_references_for(version) + + async def needed_entities(self, store: ModelFeatureStore) -> set[Feature]: + mv = self.get_model_version() + version = self.contract_version(mv) + return store.using_version(version).needed_entities() + + async def run_polars(self, values: RetrivalJob, store: ModelFeatureStore) -> pl.DataFrame: + import mlflow + import polars as pl + from datetime import datetime, timezone + + pred_label = list(store.model.predictions_view.labels())[0] + pred_at = store.model.predictions_view.event_timestamp + model_version_column = store.model.predictions_view.model_version_column + mv = None + + if model_version_column: + mv = self.get_model_version() + + model_uri = f"models:/{self.model_name}@{self.model_alias}" + mv = self.get_model_version() + + model = mlflow.pyfunc.load_model(model_uri) + + job = store.features_for(values) + df = await job.to_polars() + + features = job.request_result.feature_columns + predictions = model.predict(df[features]) + + if pred_at: + df = df.with_columns( + pl.lit(datetime.now(timezone.utc)).alias(pred_at.name), + ) + + if mv and model_version_column: + df = df.with_columns( + pl.lit(mv.run_id).alias(model_version_column.name), + ) + + return df.with_columns( + pl.Series(name=pred_label.name, values=predictions), + ) + + +@dataclass +class MLFlowServer(ExposedModel): + + host: str + + model_alias: str + model_name: str | None + model_contract_version_tag: str | None + + timeout: int = field(default=30) + + model_type: str = 'mlflow_server' + + @property + def exposed_at_url(self) -> str | None: + return self.host + + @property + def as_markdown(self) -> str: + return f"""Using a MLFlow server at `{self.host}`. +Assumes that it is the model: `{self.model_name}` with alias: `{self.model_alias}`, and will load the features needed for that model based on the input version defined at tag `{self.model_contract_version_tag}`.""" # noqa: E501 + + def get_model_version(self, model_name: str): + from mlflow.tracking import MlflowClient + + mlflow_client = MlflowClient() + return mlflow_client.get_model_version_by_alias(self.model_name or model_name, self.model_alias) + + def contract_version(self, model_version) -> str: + version = 'default' + if self.model_contract_version_tag: + if self.model_contract_version_tag not in model_version.tags: # noqa + raise ValueError( + f"Model contract version tag {self.model_contract_version_tag} not " + 'found in model version tags' + ) + else: + version = model_version.tags[self.model_contract_version_tag] + return version + + async def needed_features(self, store: ModelFeatureStore) -> list[FeatureReference]: + mv = self.get_model_version(store.model.name) + version = self.contract_version(mv) + return store.feature_references_for(version) + + async def needed_entities(self, store: ModelFeatureStore) -> set[Feature]: + mv = self.get_model_version(store.model.name) + version = self.contract_version(mv) + return store.using_version(version).needed_entities() + + async def run_polars(self, values: RetrivalJob, store: ModelFeatureStore) -> pl.DataFrame: + import polars as pl + from httpx import AsyncClient + from datetime import datetime, timezone + + pred_label = list(store.model.predictions_view.labels())[0] + pred_at = store.model.predictions_view.event_timestamp + model_version_column = store.model.predictions_view.model_version_column + mv = None + + if model_version_column: + mv = self.get_model_version(store.model.name) + + job = store.features_for(values) + df = await job.to_polars() + + features = job.request_result.feature_columns + + async with AsyncClient(timeout=self.timeout) as client: + response = await client.post( + f'{self.host}/invocations', json={'dataframe_records': df[features].to_dicts()} + ) + response.raise_for_status() + preds = response.json()['predictions'] + + if pred_at: + df = df.with_columns( + pl.lit(datetime.now(timezone.utc)).alias(pred_at.name), + ) + + if mv and model_version_column: + df = df.with_columns( + pl.lit(mv.run_id).alias(model_version_column.name), + ) + + return df.with_columns( + pl.Series(name=pred_label.name, values=preds), + ) diff --git a/aligned/local/job.py b/aligned/local/job.py index bd4b577..1081457 100644 --- a/aligned/local/job.py +++ b/aligned/local/job.py @@ -199,6 +199,9 @@ def describe(self) -> str: async def file_transform_polars(self, df: pl.LazyFrame) -> pl.LazyFrame: from aligned.data_source.batch_data_source import ColumnFeatureMappable + if not self.request.features_to_include: + return df + if self.request.aggregated_features: first_feature = list(self.request.aggregated_features)[0] if first_feature.name in df.columns: @@ -274,6 +277,9 @@ def retrival_requests(self) -> list[RetrivalRequest]: def file_transform_polars(self, df: pl.LazyFrame) -> pl.LazyFrame: from aligned.data_source.batch_data_source import ColumnFeatureMappable + if not self.request.features_to_include: + return df + entity_names = self.request.entity_names all_names = list(self.request.all_required_feature_names.union(entity_names)) diff --git a/aligned/request/retrival_request.py b/aligned/request/retrival_request.py index 35b63ca..91a1c62 100644 --- a/aligned/request/retrival_request.py +++ b/aligned/request/retrival_request.py @@ -207,7 +207,7 @@ def without_event_timestamp(self, name_sufix: str | None = None) -> 'RetrivalReq request = EventTimestampRequest(self.event_timestamp_request.event_timestamp, None) return RetrivalRequest( - name=f'{self.name}{name_sufix or ""}', + name=f'{self.name}{name_sufix or ' '}', location=self.location, entities=self.entities, features=self.features, @@ -230,6 +230,21 @@ def with_event_timestamp_column(self, column: str) -> 'RetrivalRequest': event_timestamp_request=et_request, ) + @staticmethod + def all_data() -> 'RetrivalRequest': + """ + This is a hack to tell aligned that we want all the data, and no filtering should be done. + """ + return RetrivalRequest( + name='', + location=FeatureLocation.feature_view(''), + entities=set(), + features=set(), + derived_features=set(), + aggregated_features=set(), + event_timestamp_request=None, + ) + @staticmethod def combine(requests: list['RetrivalRequest']) -> list['RetrivalRequest']: grouped_requests: dict[FeatureLocation, RetrivalRequest] = {} @@ -340,6 +355,10 @@ def filter_features(self, features_to_include: set[str]) -> 'RequestResult': event_timestamp=self.event_timestamp, ) + @staticmethod + def all_data() -> 'RequestResult': + return RequestResult(entities=set(), features=set(), event_timestamp=None) + @staticmethod def from_request(request: RetrivalRequest) -> 'RequestResult': return RequestResult( diff --git a/aligned/schemas/feature_view.py b/aligned/schemas/feature_view.py index 465f99e..9b88bc9 100644 --- a/aligned/schemas/feature_view.py +++ b/aligned/schemas/feature_view.py @@ -329,6 +329,9 @@ class FeatureViewReferenceSource(BatchDataSource): def job_group_key(self) -> str: return FeatureLocation.feature_view(self.view.name).identifier + def location_id(self) -> set[FeatureLocation]: + return {FeatureLocation.feature_view(self.view.name)} + async def schema(self) -> dict[str, FeatureType]: if self.view.materialized_source: return await self.view.materialized_source.schema() diff --git a/aligned/schemas/model.py b/aligned/schemas/model.py index bd4b58f..af17211 100644 --- a/aligned/schemas/model.py +++ b/aligned/schemas/model.py @@ -204,6 +204,9 @@ class ModelSource(BatchDataSource): def job_group_key(self) -> str: return FeatureLocation.model(self.pred_view.name).identifier + def location_id(self) -> set[FeatureLocation]: + return {FeatureLocation.model(self.model.name)} + async def schema(self) -> dict[str, FeatureType]: if self.model.predictions_view.source: return await self.model.predictions_view.source.schema() From a04a0954cccec1a064c0b42c8a791f430843453b Mon Sep 17 00:00:00 2001 From: "Mats E. Mollestad" Date: Mon, 15 Apr 2024 21:29:49 +0200 Subject: [PATCH 12/13] Fixed tests --- aligned/feature_view/feature_view.py | 7 +++++++ aligned/request/retrival_request.py | 6 +++++- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/aligned/feature_view/feature_view.py b/aligned/feature_view/feature_view.py index 1778b6f..ff84108 100644 --- a/aligned/feature_view/feature_view.py +++ b/aligned/feature_view/feature_view.py @@ -418,6 +418,13 @@ def drop_invalid(self, data: ConvertableData, validator: Validator | None = None else: raise ValueError(f'Invalid data type: {type(data)}') + def as_source(self, renames: dict[str, str] | None = None) -> BatchDataSource: + from aligned.schemas.feature_view import FeatureViewReferenceSource + + return FeatureViewReferenceSource( + self.compile(), FeatureLocation.feature_view(self.metadata.name), renames=renames or {} + ) + class FeatureView(ABC): """ diff --git a/aligned/request/retrival_request.py b/aligned/request/retrival_request.py index 91a1c62..69dc347 100644 --- a/aligned/request/retrival_request.py +++ b/aligned/request/retrival_request.py @@ -206,8 +206,12 @@ def without_event_timestamp(self, name_sufix: str | None = None) -> 'RetrivalReq if self.event_timestamp_request: request = EventTimestampRequest(self.event_timestamp_request.event_timestamp, None) + name = self.name + if name_sufix: + name = f'{name}{name_sufix}' + return RetrivalRequest( - name=f'{self.name}{name_sufix or ' '}', + name=name, location=self.location, entities=self.entities, features=self.features, From 9eab0df6e37bf5c8a11fc53eb1f649290421d709 Mon Sep 17 00:00:00 2001 From: "Mats E. Mollestad" Date: Tue, 16 Apr 2024 21:38:57 +0200 Subject: [PATCH 13/13] Updated Readme --- README.md | 178 ++++------------------------ aligned/exposed_model/interface.py | 151 +----------------------- aligned/exposed_model/ollama.py | 183 +++++++++++++++++++++++++++++ aligned/feature_store.py | 2 +- aligned/sources/local.py | 5 +- pyproject.toml | 2 +- 6 files changed, 215 insertions(+), 306 deletions(-) diff --git a/README.md b/README.md index 3f77250..a625ca5 100644 --- a/README.md +++ b/README.md @@ -31,7 +31,7 @@ View the [`MatsMoll/aligned-example` repo](https://github.com/MatsMoll/aligned-e Or see how you could query a file in a data lake. ```python -store = await FeatureStore.from_dir(".") +store = await ContractStore.from_dir(".") df = await store.execute_sql("SELECT * FROM titanic LIMIT 10").to_polars() ``` @@ -50,12 +50,8 @@ Bellow are some of the features Aligned offers: - [Model Performance Monitoring](https://aligned-managed-web.vercel.app/) - [Data Freshness](#data-freshness) - [Data Quality Assurance](#data-quality) -- [Easy Data Loading](#access-data) - [Feature Store](https://matsmoll.github.io/posts/understanding-the-chaotic-landscape-of-mlops#feature-store) - [Exposing Models](#exposed-models) -- [Load From Multiple Sources](#fast-development) -- [Feature Server](#feature-server) -- [Stream Processing](#stream-worker) All from the simple API of defining @@ -112,20 +108,19 @@ Alinged makes handling data sources easy, as you do not have to think about how Furthermore, Aligned makes it easy to switch parts of the business logic to a local setup for debugging purposes. ```python -from aligned import FileSource, AwsS3Config, AzureBlobConfig, Directory -import os +from aligned import FileSource, AwsS3Config, AzureBlobConfig -root_directory: Directory = FileSource.directory("my-awesome-project") - -if os.getenv("USE_AWS", "false").lower() == "true": +dir_type: Literal["local", "aws", "azure"] = ... +if dir_type == "aws": aws_config = AwsS3Config(...) root_directory = aws_config.directory("my-awesome-project") -elif os.getenv("USE_AZURE", "false").lower() == "true": - +elif dir_type == "azure": azure_config = AzureBlobConfig(...) root_directory = azure_config.directory("my-awesome-project") +else: + root_directory = FileSource.directory("my-awesome-project") taxi_project = root_directory.sub_directory("eta_taxi") @@ -228,65 +223,6 @@ Some of the existing implementations are: - Ollama embedded endpoint - Send entities to generic endpoint - -### Fast development - -Making iterativ and fast exploration in ML is important. This is why Aligned also makes it super easy to combine, and test multiple sources. - -```python -my_db = PostgreSQLConfig.localhost() - -aws_bucket = AwsS3Config(...) - -@feature_view( - name="passengers", - description="...", - source=my_db.table("passengers") -) -class TitanicPassenger: - - passenger_id = Int32().as_entity() - - # Some features - ... - -# Change data source -passenger_view = TitanicPassenger.query() - -psql_passengers = await passenger_view.all().to_pandas() -aws_passengers = await passenger_view.using_source( - aws_bucket.parquet_at("passengers.parquet") -).to_pandas() - -``` - -## Describe Models - -Usually will you need to combine multiple features for each model. -This is where a `Model` comes in. -Here can you define which features should be exposed. - -```python -passenger = TitanicPassenger() -location = LocationFeatures() - -@model_contract( - name="titanic", - input_features=[ - passenger.constant_filled_age, - passenger.ordinal_sex, - passenger.sibsp, - - location.distance_to_shore, - location.distance_to_closest_boat - ] -) -class Titanic: - - # Referencing the passenger's survived feature as the target - did_survive = passenger.survived.as_classification_target() -``` - ## Data Freshness Making sure a source contains fresh data is a crucial part to create propper ML applications. Therefore, Aligned provides an easy way to check how fresh a source is. @@ -318,66 +254,6 @@ if freshness < datetime.now() - timedelta(days=2): raise ValueError("To old data to create an ML model") ``` -## Access Data - -You can easily create a feature store that contains all your feature definitions. -This can then be used to genreate data sets, setup an instce to serve features, DAG's etc. - -```python -store = await FileSource.json_at("./feature-store.json").feature_store() - -# Select all features from a single feature view -df = await store.all_for("passenger", limit=100).to_pandas() -``` - -### Centraliced Feature Store Definition -You would often share the features with other coworkers, or split them into different stages, like `staging`, `shadow`, or `production`. -One option is therefore to reference the storage you use, and load the `FeatureStore` from there. - -```python -aws_bucket = AwsS3Config(...) -store = await aws_bucket.json_at("production.json").feature_store() - -# This switches from the production online store to the offline store -# Aka. the batch sources defined on the feature views -experimental_store = store.offline_store() -``` -This json file can be generated by running `aligned apply`. - -### Select multiple feature views - -```python -df = await store.features_for({ - "passenger_id": [1, 50, 110] -}, features=[ - "passenger:scaled_age", - "passenger:is_male", - "passenger:sibsp" - - "other_features:distance_to_closest_boat", -]).to_polars() -``` - -### Model Service - -Selecting features for a model is super simple. - - -```python -df = await store.model("titanic_model").features_for({ - "passenger_id": [1, 50, 110] -}).to_pandas() -``` - -### Feature View - -If you want to only select features for a specific feature view, then this is also possible. - -```python -prev_30_days = await store.feature_view("match").previous(days=30).to_pandas() -sample_of_20 = await store.feature_view("match").all(limit=20).to_pandas() -``` - ## Data quality Alinged will make sure all the different features gets formatted as the correct datatype. In addition will aligned also make sure that the returend features aligne with defined constraints. @@ -409,40 +285,28 @@ df = await store.model("titanic_model").features_for({ ).to_pandas() ``` -## Feature Server - -You can define how to serve your features with the `FeatureServer`. Here can you define where you want to load, and potentially write your features to. +## Contract Store -By default will it `aligned` look for a file called `server.py`, and a `FeatureServer` object called `server`. However, this can be defined manually as well. +Aligned collects all the feature views and model contracts in a contract store. You can generate this in a few different ways, and each method serves some different use-cases. -```python -from aligned import RedisConfig, FileSource -from aligned.schemas.repo_definition import FeatureServer +For experimentational use-cases will the `await ContractStore.from_dir(".")` probably make the most sense. However, this will scan the full directory which can lead to slow startup times. -store = FileSource.json_at("feature-store.json") +Therefore, it is also possible to manually add the different feature views and contracts with the following. -server = FeatureServer.from_reference( - store, - RedisConfig.localhost() -) +```python +store = ContractStore.empty() +store.add_feature_view(MyView) +store.add_model(MyModel) ``` -Then run `aligned serve`, and a FastAPI server will start. Here can you push new features, which then transforms and stores the features, or just fetch them. - -## Stream Worker - -You can also setup stream processing with a similar structure. However, here will a `StreamWorker` be used. - -by default will `aligned` look for a `worker.py` file with an object called `worker`. An example would be the following. +This makes it possible to define different contracts per project, or team. As a result, you can also combine differnet stores with. ```python -from aligned import RedisConfig, FileSource -from aligned.schemas.repo_definition import FeatureServer +combined_store = recommendation_store.combined_with(forecasting_store) +``` -store = FileSource.json_at("feature-store.json") +Lastly, we can also load the all features from a serializable format, such as a JSON file. -server = FeatureServer.from_reference( - store, - RedisConfig.localhost() -) +```python +await FileSource.json_at("contracts.json").as_contract_store() ``` diff --git a/aligned/exposed_model/interface.py b/aligned/exposed_model/interface.py index c072975..7e6e81e 100644 --- a/aligned/exposed_model/interface.py +++ b/aligned/exposed_model/interface.py @@ -23,6 +23,7 @@ class PredictorFactory: def __init__(self): from aligned.exposed_model.mlflow import MLFlowServer, InMemMLFlowAlias + from aligned.exposed_model.ollama import OllamaGeneratePredictor, OllamaEmbeddingPredictor self.supported_predictors = {} @@ -86,7 +87,8 @@ def ollama_generate( model: str, prompt_template: str, input_features_versions: str, - ) -> 'OllamaGeneratePredictor': + ) -> 'ExposedModel': + from aligned.exposed_model.ollama import OllamaGeneratePredictor return OllamaGeneratePredictor( endpoint=endpoint, @@ -102,7 +104,8 @@ def ollama_embedding( input_features_versions: str, prompt_template: str, embedding_name: str | None = None, - ) -> 'OllamaEmbeddingPredictor': + ) -> 'ExposedModel': + from aligned.exposed_model.ollama import OllamaEmbeddingPredictor return OllamaEmbeddingPredictor( endpoint=endpoint, @@ -179,147 +182,3 @@ async def run_polars(self, values: RetrivalJob, store: ModelFeatureStore) -> pl. dict_data = dict(response.json()) return pl.DataFrame(data=dict_data) - - -@dataclass -class OllamaGeneratePredictor(ExposedModel): - - endpoint: str - model_name: str - - prompt_template: str - input_features_versions: str - - model_type: str = 'ollama_generate' - - @property - def exposed_at_url(self) -> str | None: - return self.endpoint - - def prompt_template_hash(self) -> str: - from hashlib import sha256 - - return sha256(self.prompt_template.encode(), usedforsecurity=False).hexdigest() - - @property - def as_markdown(self) -> str: - return f"""Sending a `generate` request to an Ollama server located at: {self.endpoint}. - -This will use the model: `{self.model_name}` to generate the responses. - -And use the prompt template: -``` -{self.prompt_template} -```""" - - async def needed_features(self, store: ModelFeatureStore) -> list[FeatureReference]: - return store.feature_references_for(self.input_features_versions) - - async def needed_entities(self, store: ModelFeatureStore) -> set[Feature]: - return store.using_version(self.input_features_versions).needed_entities() - - async def run_polars(self, values: RetrivalJob, store: ModelFeatureStore) -> pl.DataFrame: - from ollama import AsyncClient - import polars as pl - - client = AsyncClient(host=self.endpoint) - - entities = await values.to_polars() - - features = store.feature_references_for(self.input_features_versions) - expected_cols = {feat.name for feat in features} - missing_cols = expected_cols - set(entities.columns) - - if missing_cols: - entities = await ( - store.using_version(self.input_features_versions).features_for(values).to_polars() - ) - - prompts = entities - - ret_vals = [] - model_version = f"{self.prompt_template_hash()} -> {self.model_name}" - - for value in prompts.iter_rows(named=True): - prompt = self.prompt_template.format(**value) - - response = await client.generate(self.model_name, prompt, stream=False) - - if isinstance(response, dict): - response['model_version'] = model_version - else: - logger.info(f"Unable to log prompt to the Ollama response. Got: {type(response)}") - - ret_vals.append(response) - - return prompts.hstack(pl.DataFrame(ret_vals)) - - -@dataclass -class OllamaEmbeddingPredictor(ExposedModel): - - endpoint: str - model_name: str - embedding_name: str - - prompt_template: str - input_features_versions: str - - model_type: str = 'ollama_embedding' - - @property - def exposed_at_url(self) -> str | None: - return self.endpoint - - def prompt_template_hash(self) -> str: - from hashlib import sha256 - - return sha256(self.prompt_template.encode(), usedforsecurity=False).hexdigest() - - @property - def as_markdown(self) -> str: - return f"""Sending a `embedding` request to an Ollama server located at: {self.endpoint}. - -This will use the model: `{self.model_name}` to generate the embeddings.""" - - async def needed_features(self, store: ModelFeatureStore) -> list[FeatureReference]: - return store.feature_references_for(self.input_features_versions) - - async def needed_entities(self, store: ModelFeatureStore) -> set[Feature]: - return store.using_version(self.input_features_versions).needed_entities() - - async def run_polars(self, values: RetrivalJob, store: ModelFeatureStore) -> pl.DataFrame: - from ollama import AsyncClient - import polars as pl - - client = AsyncClient(host=self.endpoint) - - expected_cols = [feat.name for feat in store.feature_references_for(self.input_features_versions)] - - entities = await values.to_polars() - missing_cols = set(expected_cols) - set(entities.columns) - if missing_cols: - entities = ( - await store.using_version(self.input_features_versions).features_for(values).to_polars() - ) - - prompts = entities - - ret_vals = [] - - for value in prompts.iter_rows(named=True): - prompt = self.prompt_template.format(**value) - - response = await client.embeddings(self.model_name, prompt) - - if isinstance(response, dict): - embedding = response['embedding'] # type: ignore - else: - embedding = response - - ret_vals.append(embedding) - - model_version = f"{self.prompt_template_hash()} -> {self.model_name}" - return prompts.hstack([pl.Series(name=self.embedding_name, values=ret_vals)]).with_columns( - pl.lit(model_version).alias('model_version') - ) diff --git a/aligned/exposed_model/ollama.py b/aligned/exposed_model/ollama.py index 3bcf0f7..24b47e5 100644 --- a/aligned/exposed_model/ollama.py +++ b/aligned/exposed_model/ollama.py @@ -1,3 +1,5 @@ +from dataclasses import dataclass +from typing import TYPE_CHECKING from aligned.compiler.model import ModelContractWrapper from aligned.compiler.feature_factory import ( Embedding, @@ -14,11 +16,192 @@ from aligned.data_source.batch_data_source import BatchDataSource from aligned.exposed_model.interface import ExposedModel +from aligned.schemas.feature import Feature, FeatureReference +from aligned.retrival_job import RetrivalJob +import polars as pl +if TYPE_CHECKING: + from aligned.feature_store import ModelFeatureStore logger = logging.getLogger(__name__) +@dataclass +class OllamaGeneratePredictor(ExposedModel): + + endpoint: str + model_name: str + + prompt_template: str + input_features_versions: str + + model_type: str = 'ollama_generate' + + @property + def exposed_at_url(self) -> str | None: + return self.endpoint + + def prompt_template_hash(self) -> str: + from hashlib import sha256 + + return sha256(self.prompt_template.encode(), usedforsecurity=False).hexdigest() + + @property + def as_markdown(self) -> str: + return f"""Sending a `generate` request to an Ollama server located at: {self.endpoint}. + +This will use the model: `{self.model_name}` to generate the responses. + +And use the prompt template: +``` +{self.prompt_template} +```""" + + async def needed_features(self, store: ModelFeatureStore) -> list[FeatureReference]: + return store.feature_references_for(self.input_features_versions) + + async def needed_entities(self, store: ModelFeatureStore) -> set[Feature]: + return store.using_version(self.input_features_versions).needed_entities() + + async def run_polars(self, values: RetrivalJob, store: ModelFeatureStore) -> pl.DataFrame: + from ollama import AsyncClient + import polars as pl + + client = AsyncClient(host=self.endpoint) + + entities = await values.to_polars() + + features = store.feature_references_for(self.input_features_versions) + expected_cols = {feat.name for feat in features} + missing_cols = expected_cols - set(entities.columns) + + if missing_cols: + entities = await ( + store.using_version(self.input_features_versions).features_for(values).to_polars() + ) + + prompts = entities + + ret_vals = [] + model_version = f"{self.prompt_template_hash()} -> {self.model_name}" + + for value in prompts.iter_rows(named=True): + prompt = self.prompt_template.format(**value) + + response = await client.generate(self.model_name, prompt, stream=False) + + if isinstance(response, dict): + response['model_version'] = model_version + else: + logger.info(f"Unable to log prompt to the Ollama response. Got: {type(response)}") + + ret_vals.append(response) + + return prompts.hstack(pl.DataFrame(ret_vals)) + + +@dataclass +class OllamaEmbeddingPredictor(ExposedModel): + + endpoint: str + model_name: str + embedding_name: str + + prompt_template: str + input_features_versions: str + + model_type: str = 'ollama_embedding' + + @property + def exposed_at_url(self) -> str | None: + return self.endpoint + + def prompt_template_hash(self) -> str: + from hashlib import sha256 + + return sha256(self.prompt_template.encode(), usedforsecurity=False).hexdigest() + + @property + def as_markdown(self) -> str: + return f"""Sending a `embedding` request to an Ollama server located at: {self.endpoint}. + +This will use the model: `{self.model_name}` to generate the embeddings.""" + + async def needed_features(self, store: ModelFeatureStore) -> list[FeatureReference]: + return store.feature_references_for(self.input_features_versions) + + async def needed_entities(self, store: ModelFeatureStore) -> set[Feature]: + return store.using_version(self.input_features_versions).needed_entities() + + async def run_polars(self, values: RetrivalJob, store: ModelFeatureStore) -> pl.DataFrame: + from ollama import AsyncClient + import polars as pl + + client = AsyncClient(host=self.endpoint) + + expected_cols = [feat.name for feat in store.feature_references_for(self.input_features_versions)] + + entities = await values.to_polars() + missing_cols = set(expected_cols) - set(entities.columns) + if missing_cols: + entities = ( + await store.using_version(self.input_features_versions).features_for(values).to_polars() + ) + + prompts = entities + + ret_vals = [] + + for value in prompts.iter_rows(named=True): + prompt = self.prompt_template.format(**value) + + response = await client.embeddings(self.model_name, prompt) + + if isinstance(response, dict): + embedding = response['embedding'] # type: ignore + else: + embedding = response + + ret_vals.append(embedding) + + model_version = f"{self.prompt_template_hash()} -> {self.model_name}" + return prompts.hstack([pl.Series(name=self.embedding_name, values=ret_vals)]).with_columns( + pl.lit(model_version).alias('model_version') + ) + + +def ollama_generate( + endpoint: str, + model: str, + prompt_template: str, + input_features_versions: str, +) -> 'OllamaGeneratePredictor': + + return OllamaGeneratePredictor( + endpoint=endpoint, + model_name=model, + prompt_template=prompt_template, + input_features_versions=input_features_versions, + ) + + +def ollama_embedding( + endpoint: str, + model: str, + input_features_versions: str, + prompt_template: str, + embedding_name: str | None = None, +) -> 'OllamaEmbeddingPredictor': + + return OllamaEmbeddingPredictor( + endpoint=endpoint, + model_name=model, + prompt_template=prompt_template, + input_features_versions=input_features_versions, + embedding_name=embedding_name or 'embedding', + ) + + class OllamaGeneration: model: String diff --git a/aligned/feature_store.py b/aligned/feature_store.py index ebebd57..93a0779 100644 --- a/aligned/feature_store.py +++ b/aligned/feature_store.py @@ -230,7 +230,7 @@ def repo_definition(self) -> RepoDefinition: enrichers=[], ) - def combine(self, other: ContractStore) -> ContractStore: + def combined_with(self, other: ContractStore) -> ContractStore: """ Combines two different feature stores together. """ diff --git a/aligned/sources/local.py b/aligned/sources/local.py index 1d641bd..5053872 100644 --- a/aligned/sources/local.py +++ b/aligned/sources/local.py @@ -37,11 +37,14 @@ class AsRepoDefinition: async def as_repo_definition(self) -> RepoDefinition: raise NotImplementedError() - async def feature_store(self) -> ContractStore: + async def as_contract_store(self) -> ContractStore: from aligned.feature_store import ContractStore return ContractStore.from_definition(await self.as_repo_definition()) + async def feature_store(self) -> ContractStore: + return await self.as_contract_store() + class StorageFileReference(AsRepoDefinition): """ diff --git a/pyproject.toml b/pyproject.toml index af9c0f5..251b052 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "aligned" -version = "0.0.92" +version = "0.0.93" description = "A data managment and lineage tool for ML applications." authors = ["Mats E. Mollestad "] license = "Apache-2.0"