diff --git a/.gitignore b/.gitignore index ac13d492..61d8d83c 100644 --- a/.gitignore +++ b/.gitignore @@ -6,6 +6,7 @@ mlruns test_data/feature-store.json test_data/mlruns +test_data/temp # Byte-compiled / optimized / DLL files __pycache__/ diff --git a/README.md b/README.md index a44ad6d8..1ac1160a 100644 --- a/README.md +++ b/README.md @@ -98,7 +98,7 @@ All this is described through a `model_contract`, as shown bellow. ) class EtaTaxi: trip_id = Int32().as_entity() - predicted_at = EventTimestamp() + predicted_at = ValidFrom() predicted_duration = trips.duration.as_regression_target() ``` diff --git a/aligned/data_source/batch_data_source.py b/aligned/data_source/batch_data_source.py index c68549ba..b7e31b97 100644 --- a/aligned/data_source/batch_data_source.py +++ b/aligned/data_source/batch_data_source.py @@ -33,7 +33,12 @@ class BatchDataSourceFactory: _shared: BatchDataSourceFactory | None = None def __init__(self) -> None: - from aligned.sources.local import CsvFileSource, ParquetFileSource, DeltaFileSource + from aligned.sources.local import ( + CsvFileSource, + ParquetFileSource, + DeltaFileSource, + PartitionedParquetFileSource, + ) from aligned.sources.psql import PostgreSQLDataSource from aligned.sources.redshift import RedshiftSQLDataSource from aligned.sources.s3 import AwsS3CsvDataSource, AwsS3ParquetDataSource @@ -49,6 +54,7 @@ def __init__(self) -> None: PostgreSQLDataSource, # File Sources ParquetFileSource, + PartitionedParquetFileSource, CsvFileSource, DeltaFileSource, # Aws Sources diff --git a/aligned/feature_view/feature_view.py b/aligned/feature_view/feature_view.py index ff841083..1c63dc85 100644 --- a/aligned/feature_view/feature_view.py +++ b/aligned/feature_view/feature_view.py @@ -57,6 +57,7 @@ class FeatureViewMetadata: stream_source: StreamDataSource | None = field(default=None) application_source: BatchDataSource | None = field(default=None) materialized_source: BatchDataSource | None = field(default=None) + materialize_from: datetime | None = field(default=None) contacts: list[str] | None = field(default=None) tags: list[str] | None = field(default=None) acceptable_freshness: timedelta | None = field(default=None) @@ -72,6 +73,7 @@ def from_compiled(view: CompiledFeatureView) -> FeatureViewMetadata: stream_source=view.stream_data_source, application_source=view.application_source, materialized_source=view.materialized_source, + materialize_from=view.materialize_from, acceptable_freshness=view.acceptable_freshness, unacceptable_freshness=view.unacceptable_freshness, ) @@ -97,6 +99,7 @@ def feature_view( stream_source: StreamDataSource | None = None, application_source: BatchDataSource | None = None, materialized_source: BatchDataSource | None = None, + materialize_from: datetime | None = None, contacts: list[str] | None = None, tags: list[str] | None = None, acceptable_freshness: timedelta | None = None, @@ -114,6 +117,7 @@ def decorator(cls: Type[T]) -> FeatureViewWrapper[T]: stream_source=stream_source, application_source=application_source, materialized_source=materialized_source, + materialize_from=materialize_from, contacts=contacts, tags=tags, acceptable_freshness=acceptable_freshness, @@ -508,6 +512,7 @@ def compile_with_metadata(feature_view: Any, metadata: FeatureViewMetadata) -> C stream_data_source=metadata.stream_source, application_source=metadata.application_source, materialized_source=metadata.materialized_source, + materialize_from=metadata.materialize_from, acceptable_freshness=metadata.acceptable_freshness, unacceptable_freshness=metadata.unacceptable_freshness, indexes=[], diff --git a/aligned/local/tests/test_jobs.py b/aligned/local/tests/test_jobs.py index b450a07b..50d0436f 100644 --- a/aligned/local/tests/test_jobs.py +++ b/aligned/local/tests/test_jobs.py @@ -40,7 +40,7 @@ async def test_file_full_job_polars(retrival_request_without_derived: RetrivalRe @pytest.mark.asyncio async def test_write_and_read_feature_store(titanic_feature_store_scd: ContractStore) -> None: - source = FileSource.json_at('test_data/feature-store.json') + source = FileSource.json_at('test_data/temp/feature-store.json') definition = titanic_feature_store_scd.repo_definition() await source.write(definition.to_json().encode('utf-8')) store = await source.feature_store() diff --git a/aligned/retrival_job.py b/aligned/retrival_job.py index e816df11..9a25fe1d 100644 --- a/aligned/retrival_job.py +++ b/aligned/retrival_job.py @@ -931,9 +931,12 @@ def ignore_event_timestamp(self) -> RetrivalJob: return self.copy_with(self.job.ignore_event_timestamp()) raise NotImplementedError('Not implemented ignore_event_timestamp') - def polars_method(self, polars_method: Callable[[pl.LazyFrame], pl.LazyFrame]) -> RetrivalJob: + def transform_polars(self, polars_method: Callable[[pl.LazyFrame], pl.LazyFrame]) -> RetrivalJob: return CustomPolarsJob(self, polars_method) + def polars_method(self, polars_method: Callable[[pl.LazyFrame], pl.LazyFrame]) -> RetrivalJob: + return self.transform_polars(polars_method) + @staticmethod def from_dict(data: dict[str, list], request: list[RetrivalRequest] | RetrivalRequest) -> RetrivalJob: if isinstance(request, RetrivalRequest): diff --git a/aligned/schemas/feature_view.py b/aligned/schemas/feature_view.py index 9b88bc9a..7f1a67cd 100644 --- a/aligned/schemas/feature_view.py +++ b/aligned/schemas/feature_view.py @@ -37,6 +37,8 @@ class CompiledFeatureView(Codable): application_source: BatchDataSource | None = field(default=None) materialized_source: BatchDataSource | None = field(default=None) + materialize_from: datetime | None = field(default=None) + acceptable_freshness: timedelta | None = field(default=None) unacceptable_freshness: timedelta | None = field(default=None) diff --git a/aligned/sources/azure_blob_storage.py b/aligned/sources/azure_blob_storage.py index aeef5127..7cdd8bbf 100644 --- a/aligned/sources/azure_blob_storage.py +++ b/aligned/sources/azure_blob_storage.py @@ -22,6 +22,7 @@ ParquetConfig, StorageFileReference, Directory, + PartitionedParquetFileSource, data_file_freshness, ) from aligned.storage import Storage @@ -88,6 +89,16 @@ def parquet_at( self, path, mapping_keys=mapping_keys or {}, date_formatter=date_formatter or DateFormatter.noop() ) + def partitioned_parquet_at( + self, + directory: str, + partition_keys: list[str], + mapping_keys: dict[str, str] | None = None, + config: ParquetConfig | None = None, + date_formatter: DateFormatter | None = None, + ) -> PartitionedParquetFileSource: + raise NotImplementedError(type(self)) + def csv_at( self, path: str, diff --git a/aligned/sources/local.py b/aligned/sources/local.py index 50538721..f8d2154f 100644 --- a/aligned/sources/local.py +++ b/aligned/sources/local.py @@ -375,6 +375,109 @@ class ParquetConfig(Codable): should_write_index: bool = field(default=False) +@dataclass +class PartitionedParquetFileSource(BatchDataSource, ColumnFeatureMappable, DataFileReference): + """ + A source pointing to a Parquet file + """ + + directory: str + partition_keys: list[str] + mapping_keys: dict[str, str] = field(default_factory=dict) + config: ParquetConfig = field(default_factory=ParquetConfig) + date_formatter: DateFormatter = field(default_factory=lambda: DateFormatter.noop()) + + type_name: str = 'partition_parquet' + + @property + def to_markdown(self) -> str: + return f'''#### Partitioned Parquet File +*Partition keys*: {self.partition_keys} + +*Renames*: {self.mapping_keys} + +*Directory*: {self.directory} + +[Go to directory]({self.directory})''' # noqa + + def job_group_key(self) -> str: + return f'{self.type_name}/{self.directory}' + + def __hash__(self) -> int: + return hash(self.job_group_key()) + + async def to_pandas(self) -> pd.DataFrame: + return (await self.to_lazy_polars()).collect().to_pandas() + + async def to_lazy_polars(self) -> pl.LazyFrame: + + glob_path = f'{self.directory}/**/*.parquet' + try: + return pl.scan_parquet(glob_path, retries=3) + except OSError: + raise UnableToFindFileException(self.directory) + + async def write_polars(self, df: pl.LazyFrame) -> None: + create_parent_dir(self.directory) + df.collect().write_parquet( + self.directory, + compression=self.config.compression, + use_pyarrow=True, + pyarrow_options={ + 'partition_cols': self.partition_keys, + }, + ) + + def all_data(self, request: RetrivalRequest, limit: int | None) -> RetrivalJob: + return FileFullJob(self, request, limit, date_formatter=self.date_formatter) + + def all_between_dates( + self, request: RetrivalRequest, start_date: datetime, end_date: datetime + ) -> RetrivalJob: + return FileDateJob( + source=self, + request=request, + start_date=start_date, + end_date=end_date, + date_formatter=self.date_formatter, + ) + + @classmethod + def multi_source_features_for( + cls, facts: RetrivalJob, requests: list[tuple[ParquetFileSource, RetrivalRequest]] + ) -> RetrivalJob: + + source = requests[0][0] + if not isinstance(source, cls): + raise ValueError(f'Only {cls} is supported, recived: {source}') + + # Group based on config + return FileFactualJob( + source=source, + requests=[request for _, request in requests], + facts=facts, + date_formatter=source.date_formatter, + ) + + async def schema(self) -> dict[str, FeatureType]: + if self.path.startswith('http'): + parquet_schema = pl.scan_parquet(self.path).schema + else: + parquet_schema = pl.read_parquet_schema(self.path) + + return {name: FeatureType.from_polars(pl_type) for name, pl_type in parquet_schema.items()} + + async def feature_view_code(self, view_name: str) -> str: + from aligned.feature_view.feature_view import FeatureView + + raw_schema = await self.schema() + schema = {name: feat.feature_factory for name, feat in raw_schema.items()} + data_source_code = f'FileSource.parquet_at("{self.path}")' + return FeatureView.feature_view_code_template( + schema, data_source_code, view_name, 'from aligned import FileSource' + ) + + @dataclass class ParquetFileSource(BatchDataSource, ColumnFeatureMappable, DataFileReference): """ @@ -642,6 +745,16 @@ def csv_at( ) -> BatchDataSource: ... + def partitioned_parquet_at( + self, + directory: str, + partition_keys: list[str], + mapping_keys: dict[str, str] | None = None, + config: ParquetConfig | None = None, + date_formatter: DateFormatter | None = None, + ) -> PartitionedParquetFileSource: + ... + def parquet_at( self, path: str, mapping_keys: dict[str, str] | None = None, config: ParquetConfig | None = None ) -> BatchDataSource: @@ -688,6 +801,23 @@ def parquet_at( path=self.path_string(path), mapping_keys=mapping_keys or {}, config=config or ParquetConfig() ) + def partitioned_parquet_at( + self, + directory: str, + partition_keys: list[str], + mapping_keys: dict[str, str] | None = None, + config: ParquetConfig | None = None, + date_formatter: DateFormatter | None = None, + ) -> PartitionedParquetFileSource: + + return PartitionedParquetFileSource( + directory=self.path_string(directory), + partition_keys=partition_keys, + mapping_keys=mapping_keys or {}, + config=config or ParquetConfig(), + date_formatter=date_formatter or DateFormatter.noop(), + ) + def delta_at( self, path: str, mapping_keys: dict[str, str] | None = None, config: DeltaFileConfig | None = None ) -> DeltaFileSource: @@ -729,6 +859,23 @@ def csv_at( formatter=date_formatter or DateFormatter.iso_8601(), ) + @staticmethod + def partitioned_parquet_at( + directory: str, + partition_keys: list[str], + mapping_keys: dict[str, str] | None = None, + config: ParquetConfig | None = None, + date_formatter: DateFormatter | None = None, + ) -> PartitionedParquetFileSource: + + return PartitionedParquetFileSource( + directory=directory, + partition_keys=partition_keys, + mapping_keys=mapping_keys or {}, + config=config or ParquetConfig(), + date_formatter=date_formatter or DateFormatter.noop(), + ) + @staticmethod def parquet_at( path: str, diff --git a/aligned/sources/s3.py b/aligned/sources/s3.py index 12f0a9af..7c7a50fa 100644 --- a/aligned/sources/s3.py +++ b/aligned/sources/s3.py @@ -16,9 +16,11 @@ CsvConfig, DataFileReference, ParquetConfig, + PartitionedParquetFileSource, StorageFileReference, Directory, DeltaFileConfig, + DateFormatter, ) from aligned.storage import Storage @@ -114,6 +116,16 @@ def parquet_at( parquet_config=config or ParquetConfig(), ) + def partitioned_parquet_at( + self, + directory: str, + partition_keys: list[str], + mapping_keys: dict[str, str] | None = None, + config: ParquetConfig | None = None, + date_formatter: DateFormatter | None = None, + ) -> PartitionedParquetFileSource: + raise NotImplementedError(type(self)) + def delta_at( self, path: str, mapping_keys: dict[str, str] | None = None, config: DeltaFileConfig | None = None ) -> BatchDataSource: diff --git a/aligned/sources/tests/test_parquet.py b/aligned/sources/tests/test_parquet.py index 03035573..0b5f79a3 100644 --- a/aligned/sources/tests/test_parquet.py +++ b/aligned/sources/tests/test_parquet.py @@ -35,7 +35,53 @@ async def test_read_parquet(point_in_time_data_test: DataTest) -> None: @pytest.mark.asyncio -async def test_parquest(point_in_time_data_test: DataTest) -> None: +async def test_partition_parquet(point_in_time_data_test: DataTest) -> None: + store = ContractStore.experimental() + + agg_features: list[str] = [] + + for source in point_in_time_data_test.sources: + view = source.view + view_name = view.metadata.name + + compiled = view.compile() + + if '_agg' in view_name: + agg_features.extend([feat.name for feat in compiled.aggregated_features]) + continue + + entities = compiled.entitiy_names + + file_source = FileSource.partitioned_parquet_at( + f'test_data/temp/{view_name}', + partition_keys=list(entities), + ) + await file_source.write_polars(source.data.lazy()) + + view.metadata = FeatureView.metadata_with( # type: ignore + name=view.metadata.name, + description=view.metadata.description, + batch_source=file_source, + ) + store.add_feature_view(view) + + job = store.features_for( + point_in_time_data_test.entities, + [feat for feat in point_in_time_data_test.feature_reference if '_agg' not in feat], + event_timestamp_column='event_timestamp', + ) + data = (await job.to_lazy_polars()).collect() + + expected = point_in_time_data_test.expected_output.drop(agg_features) + assert expected.shape == data.shape, f'Expected: {expected.shape}\nGot: {data.shape}' + assert set(expected.columns) == set(data.columns), f'Expected: {expected.columns}\nGot: {data.columns}' + + ordered_columns = data.select(expected.columns) + assert ordered_columns.equals(expected), f'Expected: {expected}\nGot: {ordered_columns}' + + +@pytest.mark.asyncio +async def test_parquet(point_in_time_data_test: DataTest) -> None: store = ContractStore.experimental() diff --git a/pyproject.toml b/pyproject.toml index a7904b5e..f076a95a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "aligned" -version = "0.0.96" +version = "0.0.97" description = "A data managment and lineage tool for ML applications." authors = ["Mats E. Mollestad "] license = "Apache-2.0" diff --git a/test_data/feature-store.json b/test_data/feature-store.json deleted file mode 100644 index 62816617..00000000 --- a/test_data/feature-store.json +++ /dev/null @@ -1 +0,0 @@ -{"metadata": {"created_at": "2024-05-01T19:45:33.202948", "name": "feature_store_location.py", "repo_url": null, "github_url": null}, "feature_views": [{"name": "titanic", "source": {"mapping_keys": {"PassengerId": "passenger_id", "Age": "age", "Sex": "sex", "Survived": "survived", "SibSp": "sibsp", "UpdatedAt": "updated_at"}, "type_name": "csv", "path": "test_data/titanic_scd_data.csv", "csv_config": {"seperator": ",", "compression": "infer", "should_write_index": false}, "formatter": {"date_format": "%Y-%m-%dT%H:%M:%S%.f+%Z", "time_unit": null, "time_zone": null, "name": "string_form"}, "expected_schema": null}, "entities": [{"name": "passenger_id", "dtype": {"name": "int32"}, "description": null, "tags": null, "constraints": null}], "features": [{"name": "name", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": null}, {"name": "updated_at", "dtype": {"name": "datetime-UTC"}, "description": null, "tags": null, "constraints": null}, {"name": "sex", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": [{"name": "in_domain", "values": ["male", "female"]}]}, {"name": "sibsp", "dtype": {"name": "int32"}, "description": "Number of siblings on titanic", "tags": null, "constraints": [{"name": "lower_bound_inc", "value": 0.0}, {"name": "upper_bound_inc", "value": 20.0}]}, {"name": "age", "dtype": {"name": "float"}, "description": "A float as some have decimals", "tags": null, "constraints": [{"name": "lower_bound_inc", "value": 0.0}, {"name": "upper_bound_inc", "value": 100.0}]}, {"name": "survived", "dtype": {"name": "bool"}, "description": "If the passenger survived", "tags": null, "constraints": null}, {"name": "cabin", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": null}], "derived_features": [{"name": "is_female", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sex", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "equals", "dtype": {"name": "bool"}, "key": "sex", "value": {"name": "string", "value": "female"}}, "depth": 1}, {"name": "is_mr", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "name", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "contains", "dtype": {"name": "bool"}, "key": "name", "value": "Mr."}, "depth": 1}, {"name": "name_embedding", "dtype": {"name": "embedding-0"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "name", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "word_vectoriser", "dtype": {"name": "embedding-768"}, "key": "name", "model": {"name": "gensim", "model_name": "glove-wiki-gigaword-50", "config": {"to_lowercase": false, "deaccent": false, "encoding": "utf8", "errors": "strict"}, "loaded_model": null}}, "depth": 1}, {"name": "double_sibsp", "dtype": {"name": "float"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sibsp", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "int32"}}], "transformation": {"name": "mul_val", "dtype": {"name": "float"}, "key": "sibsp", "value": {"name": "int", "value": 2}}, "depth": 1}, {"name": "is_male", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sex", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "equals", "dtype": {"name": "bool"}, "key": "sex", "value": {"name": "string", "value": "male"}}, "depth": 1}, {"name": "square_sibsp", "dtype": {"name": "float"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sibsp", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "int32"}}], "transformation": {"name": "mul", "dtype": {"name": "float"}, "front": "sibsp", "behind": "sibsp"}, "depth": 1}, {"name": "has_siblings", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sibsp", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "int32"}}], "transformation": {"name": "not-equals", "dtype": {"name": "bool"}, "key": "sibsp", "value": {"name": "int", "value": 0}}, "depth": 1}], "tags": null, "description": "Some features from the titanic dataset", "aggregated_features": [], "event_timestamp": {"name": "updated_at", "ttl": null, "description": null, "tags": null, "dtype": {"name": "datetime-UTC"}}, "stream_data_source": {"mapping_keys": {}, "name": "redis", "topic_name": "titanic_stream", "config": {"env_var": "REDIS_URL"}, "record_coder": {"coder_type": "json", "key": "json"}}, "application_source": null, "materialized_source": null, "acceptable_freshness": null, "unacceptable_freshness": null, "event_triggers": null, "contacts": null, "indexes": [{"location": {"name": "titanic", "location": "feature_view"}, "vector": {"name": "name_embedding", "dtype": {"name": "embedding-0"}, "description": null, "tags": null, "constraints": null}, "vector_dim": 50, "metadata": [{"name": "age", "dtype": {"name": "float"}, "description": "A float as some have decimals", "tags": null, "constraints": [{"name": "lower_bound_inc", "value": 0.0}, {"name": "upper_bound_inc", "value": 100.0}]}, {"name": "sex", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": [{"name": "in_domain", "values": ["male", "female"]}]}], "storage": {"type_name": "redis", "config": {"env_var": "REDIS_URL"}, "name": "name_embedding_index", "initial_cap": 10000, "distance_metric": "COSINE", "index_alogrithm": "FLAT", "embedding_type": "FLOAT32"}, "entities": [{"name": "passenger_id", "dtype": {"name": "int32"}, "description": null, "tags": null, "constraints": null}]}]}, {"name": "titanic_parquet", "source": {"mapping_keys": {}, "type_name": "parquet", "path": "test_data/titanic.parquet", "config": {"engine": "auto", "compression": "snappy", "should_write_index": false}, "date_formatter": {"name": "noop"}}, "entities": [{"name": "passenger_id", "dtype": {"name": "int32"}, "description": null, "tags": null, "constraints": null}], "features": [{"name": "name", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": null}, {"name": "sex", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": [{"name": "in_domain", "values": ["male", "female"]}]}, {"name": "sibsp", "dtype": {"name": "int32"}, "description": "Number of siblings on titanic", "tags": null, "constraints": [{"name": "lower_bound_inc", "value": 0.0}, {"name": "upper_bound_inc", "value": 20.0}]}, {"name": "age", "dtype": {"name": "float"}, "description": "A float as some have decimals", "tags": null, "constraints": [{"name": "lower_bound_inc", "value": 0.0}, {"name": "upper_bound_inc", "value": 100.0}]}, {"name": "survived", "dtype": {"name": "bool"}, "description": "If the passenger survived", "tags": null, "constraints": null}, {"name": "cabin", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": null}], "derived_features": [{"name": "is_mr", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "name", "location": {"name": "titanic_parquet", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "contains", "dtype": {"name": "bool"}, "key": "name", "value": "Mr."}, "depth": 1}, {"name": "is_male", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sex", "location": {"name": "titanic_parquet", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "equals", "dtype": {"name": "bool"}, "key": "sex", "value": {"name": "string", "value": "male"}}, "depth": 1}, {"name": "is_female", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sex", "location": {"name": "titanic_parquet", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "equals", "dtype": {"name": "bool"}, "key": "sex", "value": {"name": "string", "value": "female"}}, "depth": 1}, {"name": "has_siblings", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sibsp", "location": {"name": "titanic_parquet", "location": "feature_view"}, "dtype": {"name": "int32"}}], "transformation": {"name": "not-equals", "dtype": {"name": "bool"}, "key": "sibsp", "value": {"name": "int", "value": 0}}, "depth": 1}], "tags": null, "description": "Some features from the titanic dataset", "aggregated_features": [], "event_timestamp": null, "stream_data_source": null, "application_source": null, "materialized_source": null, "acceptable_freshness": null, "unacceptable_freshness": null, "event_triggers": null, "contacts": null, "indexes": []}], "combined_feature_views": [], "models": [{"name": "titanic", "features": {"default_version": "default", "versions": {"default": [{"name": "age", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "float"}}, {"name": "sibsp", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "int32"}}, {"name": "has_siblings", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "bool"}}, {"name": "is_male", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "bool"}}]}}, "predictions_view": {"entities": [], "features": [{"name": "probability", "dtype": {"name": "float"}, "description": "The probability of target named will_survive being 'True'.", "tags": null, "constraints": null}], "derived_features": [{"name": "will_survive", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "probability", "location": {"name": "titanic", "location": "model"}, "dtype": {"name": "float"}}], "transformation": {"name": "map_arg_max", "dtype": {"name": "bool"}, "column_mappings": {"probability": {"name": "bool", "value": true}}}, "depth": 1}], "event_timestamp": null, "model_version_column": null, "is_shadow_model_flag": null, "source": null, "application_source": null, "stream_source": null, "regression_targets": [], "classification_targets": [{"estimating": {"name": "survived", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "bool"}}, "feature": {"name": "will_survive", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null}, "on_ground_truth_event": null, "event_trigger": null, "class_probabilities": [{"outcome": {"name": "bool", "value": true}, "feature": {"name": "probability", "dtype": {"name": "float"}, "description": null, "tags": null, "constraints": null}}], "confidence": null}], "recommendation_targets": [], "acceptable_freshness": 86400.0, "unacceptable_freshness": 172800.0}, "description": "A model predicting if a passenger will survive", "contacts": null, "tags": null, "dataset_store": null, "exposed_at_url": null, "exposed_model": null}], "enrichers": []}