diff --git a/aligned/feature_store.py b/aligned/feature_store.py index cbce7fe..38ce8a5 100644 --- a/aligned/feature_store.py +++ b/aligned/feature_store.py @@ -676,22 +676,20 @@ async def insert_into( import polars as pl columns = write_request.all_returned_columns - new_df = (await values.to_lazy_polars()).select(columns) - try: - if isinstance(source, ColumnFeatureMappable): - new_cols = source.feature_identifier_for(columns) - - mappings = dict(zip(columns, new_cols)) - values = values.rename(mappings) - columns = new_cols - existing_df = (await source.to_lazy_polars()).rename(mappings) - else: - existing_df = await source.to_lazy_polars() + try: + existing_df = await source.to_lazy_polars() write_df = pl.concat([new_df, existing_df.select(columns)], how='vertical_relaxed') except UnableToFindFileException: write_df = new_df + + if isinstance(source, ColumnFeatureMappable): + new_cols = source.feature_identifier_for(columns) + + mappings = dict(zip(columns, new_cols)) + write_df = write_df.rename(mappings) + await source.write_polars(write_df) else: raise ValueError(f'The source {type(source)} do not support writes') diff --git a/aligned/sources/local.py b/aligned/sources/local.py index f338d46..e79d210 100644 --- a/aligned/sources/local.py +++ b/aligned/sources/local.py @@ -149,7 +149,7 @@ async def upsert(self, job: RetrivalJob, requests: list[RetrivalRequest]) -> Non request = requests[0] - data = await job.to_lazy_polars() + data = (await job.to_lazy_polars()).select(request.all_returned_columns) potential_timestamps = request.all_features if request.event_timestamp: @@ -160,8 +160,8 @@ async def upsert(self, job: RetrivalJob, requests: list[RetrivalRequest]) -> Non data = data.with_columns(self.formatter.encode_polars(feature.name)) if self.mapping_keys: - mapping = {self.mapping_keys.get(name, name): name for name in data.columns} - data = data.rename(mapping) + columns = self.feature_identifier_for(data.columns) + data = data.rename(dict(zip(data.columns, columns))) new_df = data.select(request.all_returned_columns) entities = list(request.entity_names) @@ -179,23 +179,22 @@ async def insert(self, job: RetrivalJob, requests: list[RetrivalRequest]) -> Non request = requests[0] - data = await job.to_lazy_polars() + data = (await job.to_lazy_polars()).select(request.all_returned_columns) for feature in request.features: if feature.dtype.name == 'datetime': data = data.with_columns(self.formatter.encode_polars(feature.name)) if self.mapping_keys: - mapping = {self.mapping_keys.get(name, name): name for name in data.columns} - data = data.rename(mapping) + columns = self.feature_identifier_for(data.columns) + data = data.rename(dict(zip(data.columns, columns))) try: existing_df = await self.to_lazy_polars() - write_df = pl.concat([data, existing_df.select(data.columns)], how='vertical_relaxed') except UnableToFindFileException: write_df = data - await self.write_polars(write_df.select(request.all_returned_columns)) + await self.write_polars(write_df) async def write_pandas(self, df: pd.DataFrame) -> None: create_parent_dir(self.path) diff --git a/aligned/tests/test_model_target.py b/aligned/tests/test_model_target.py index a08c5be..c2f1112 100644 --- a/aligned/tests/test_model_target.py +++ b/aligned/tests/test_model_target.py @@ -166,7 +166,8 @@ class TestModel: a = Int32() store = FeatureStore.experimental() - initial_frame = pl.DataFrame({'id': [1, 2, 3], 'a': [1, 2, 3]}) + + initial_frame = pl.DataFrame({'some_id': [1, 2, 3], 'a': [1, 2, 3]}) initial_frame.write_csv(path) expected_frame = pl.DataFrame({'id': [1, 2, 3, 1, 2, 3], 'a': [10, 14, 20, 1, 2, 3]}) @@ -175,7 +176,7 @@ class TestModel: await store.insert_into(FeatureLocation.model('test_model'), {'id': [1, 2, 3], 'a': [10, 14, 20]}) - preds = await store.model('test_model').all_predictions().to_polars() + preds = await store.model('test_model').all_predictions().log_each_job().to_polars() stored_data = pl.read_csv(path).select(id=pl.col('some_id'), a=pl.col('a')) assert stored_data.equals(expected_frame) diff --git a/test_data/credit_history.csv b/test_data/credit_history.csv index 23af17c..b2a681c 100644 --- a/test_data/credit_history.csv +++ b/test_data/credit_history.csv @@ -1,7 +1,7 @@ -due_sum,dob_ssn,credit_card_due,bankruptcies,student_loan_due,event_timestamp -30747,19530219_5179,8419,0,22328,1587924064746575 -5459,19520816_8737,2944,0,2515,1587924064746575 -33833,19860413_2537,833,0,33000,1587924064746575 -54891,19530219_5179,5936,0,48955,1588010464746575 -11076,19520816_8737,1575,0,9501,1588010464746575 -41773,19860413_2537,6263,0,35510,1588010464746575 +due_sum,student_loan_due,credit_card_due,event_timestamp,bankruptcies,dob_ssn +30747,22328,8419,1587924064746575,0,19530219_5179 +5459,2515,2944,1587924064746575,0,19520816_8737 +33833,33000,833,1587924064746575,0,19860413_2537 +54891,48955,5936,1588010464746575,0,19530219_5179 +11076,9501,1575,1588010464746575,0,19520816_8737 +41773,35510,6263,1588010464746575,0,19860413_2537 diff --git a/test_data/credit_history_mater.parquet b/test_data/credit_history_mater.parquet index b6416ab..3ff9af6 100644 Binary files a/test_data/credit_history_mater.parquet and b/test_data/credit_history_mater.parquet differ diff --git a/test_data/feature-store.json b/test_data/feature-store.json index 9665338..a9353a2 100644 --- a/test_data/feature-store.json +++ b/test_data/feature-store.json @@ -1 +1 @@ -{"metadata": {"created_at": "2024-03-02T10:34:30.080269", "name": "feature_store_location.py", "repo_url": null, "github_url": null}, "feature_views": [{"name": "titanic_parquet", "tags": {}, "source": {"mapping_keys": {}, "type_name": "parquet", "path": "test_data/titanic.parquet", "config": {"engine": "auto", "compression": "snappy", "should_write_index": false}}, "entities": [{"name": "passenger_id", "dtype": {"name": "int32"}, "description": null, "tags": null, "constraints": null}], "features": [{"name": "name", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": null}, {"name": "sex", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": [{"name": "in_domain", "values": ["male", "female"]}]}, {"name": "cabin", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": null}, {"name": "sibsp", "dtype": {"name": "int32"}, "description": "Number of siblings on titanic", "tags": null, "constraints": [{"name": "upper_bound_inc", "value": 20.0}, {"name": "lower_bound_inc", "value": 0.0}]}, {"name": "age", "dtype": {"name": "float"}, "description": "A float as some have decimals", "tags": null, "constraints": [{"name": "upper_bound_inc", "value": 100.0}, {"name": "lower_bound_inc", "value": 0.0}]}, {"name": "survived", "dtype": {"name": "bool"}, "description": "If the passenger survived", "tags": null, "constraints": null}], "derived_features": [{"name": "is_female", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sex", "location": {"name": "titanic_parquet", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "equals", "dtype": {"name": "bool"}, "key": "sex", "value": {"name": "string", "value": "female"}}, "depth": 1}, {"name": "has_siblings", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sibsp", "location": {"name": "titanic_parquet", "location": "feature_view"}, "dtype": {"name": "int32"}}], "transformation": {"name": "not-equals", "dtype": {"name": "bool"}, "key": "sibsp", "value": {"name": "int", "value": 0}}, "depth": 1}, {"name": "is_male", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sex", "location": {"name": "titanic_parquet", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "equals", "dtype": {"name": "bool"}, "key": "sex", "value": {"name": "string", "value": "male"}}, "depth": 1}, {"name": "is_mr", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "name", "location": {"name": "titanic_parquet", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "contains", "dtype": {"name": "bool"}, "key": "name", "value": "Mr."}, "depth": 1}], "description": "Some features from the titanic dataset", "aggregated_features": [], "event_timestamp": null, "stream_data_source": null, "application_source": null, "materialized_source": null, "acceptable_freshness": null, "unacceptable_freshness": null, "event_triggers": null, "contacts": null, "indexes": []}, {"name": "titanic", "tags": {}, "source": {"mapping_keys": {"PassengerId": "passenger_id", "Age": "age", "Sex": "sex", "Survived": "survived", "SibSp": "sibsp", "UpdatedAt": "updated_at"}, "type_name": "csv", "path": "test_data/titanic_scd_data.csv", "csv_config": {"seperator": ",", "compression": "infer", "should_write_index": false}, "formatter": {"date_format": "yyyy-MM-ddTHH:mm:ssZ", "time_unit": null, "time_zone": null, "name": "string_form"}}, "entities": [{"name": "passenger_id", "dtype": {"name": "int32"}, "description": null, "tags": null, "constraints": null}], "features": [{"name": "name", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": null}, {"name": "sex", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": [{"name": "in_domain", "values": ["male", "female"]}]}, {"name": "cabin", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": null}, {"name": "sibsp", "dtype": {"name": "int32"}, "description": "Number of siblings on titanic", "tags": null, "constraints": [{"name": "upper_bound_inc", "value": 20.0}, {"name": "lower_bound_inc", "value": 0.0}]}, {"name": "age", "dtype": {"name": "float"}, "description": "A float as some have decimals", "tags": null, "constraints": [{"name": "upper_bound_inc", "value": 100.0}, {"name": "lower_bound_inc", "value": 0.0}]}, {"name": "survived", "dtype": {"name": "bool"}, "description": "If the passenger survived", "tags": null, "constraints": null}, {"name": "updated_at", "dtype": {"name": "datetime"}, "description": null, "tags": null, "constraints": null}], "derived_features": [{"name": "name_embedding", "dtype": {"name": "embedding"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "name", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "word_vectoriser", "dtype": {"name": "embedding"}, "key": "name", "model": {"name": "gensim", "model_name": "glove-wiki-gigaword-50", "config": {"to_lowercase": false, "deaccent": false, "encoding": "utf8", "errors": "strict"}, "loaded_model": null}}, "depth": 1}, {"name": "has_siblings", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sibsp", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "int32"}}], "transformation": {"name": "not-equals", "dtype": {"name": "bool"}, "key": "sibsp", "value": {"name": "int", "value": 0}}, "depth": 1}, {"name": "square_sibsp", "dtype": {"name": "float"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sibsp", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "int32"}}], "transformation": {"name": "mul", "dtype": {"name": "float"}, "front": "sibsp", "behind": "sibsp"}, "depth": 1}, {"name": "double_sibsp", "dtype": {"name": "float"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sibsp", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "int32"}}], "transformation": {"name": "mul_val", "dtype": {"name": "float"}, "key": "sibsp", "value": {"name": "int", "value": 2}}, "depth": 1}, {"name": "is_female", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sex", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "equals", "dtype": {"name": "bool"}, "key": "sex", "value": {"name": "string", "value": "female"}}, "depth": 1}, {"name": "is_male", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sex", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "equals", "dtype": {"name": "bool"}, "key": "sex", "value": {"name": "string", "value": "male"}}, "depth": 1}, {"name": "is_mr", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "name", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "contains", "dtype": {"name": "bool"}, "key": "name", "value": "Mr."}, "depth": 1}], "description": "Some features from the titanic dataset", "aggregated_features": [], "event_timestamp": {"name": "updated_at", "ttl": null, "description": null, "tags": null, "dtype": {"name": "datetime"}}, "stream_data_source": {"mapping_keys": {}, "name": "redis", "topic_name": "titanic_stream", "config": {"env_var": "REDIS_URL"}, "record_coder": {"coder_type": "json", "key": "json"}}, "application_source": null, "materialized_source": null, "acceptable_freshness": null, "unacceptable_freshness": null, "event_triggers": null, "contacts": null, "indexes": [{"location": {"name": "titanic", "location": "feature_view"}, "vector": {"name": "name_embedding", "dtype": {"name": "embedding"}, "description": null, "tags": null, "constraints": null}, "vector_dim": 50, "metadata": [{"name": "age", "dtype": {"name": "float"}, "description": "A float as some have decimals", "tags": null, "constraints": [{"name": "upper_bound_inc", "value": 100.0}, {"name": "lower_bound_inc", "value": 0.0}]}, {"name": "sex", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": [{"name": "in_domain", "values": ["male", "female"]}]}], "storage": {"type_name": "redis", "config": {"env_var": "REDIS_URL"}, "name": "name_embedding_index", "initial_cap": 10000, "distance_metric": "COSINE", "index_alogrithm": "FLAT", "embedding_type": "FLOAT32"}, "entities": [{"name": "passenger_id", "dtype": {"name": "int32"}, "description": null, "tags": null, "constraints": null}]}]}], "combined_feature_views": [], "models": [{"name": "titanic", "features": {"default_version": "default", "versions": {"default": [{"name": "age", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "float"}}, {"name": "sibsp", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "int32"}}, {"name": "has_siblings", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "bool"}}, {"name": "is_male", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "bool"}}]}}, "predictions_view": {"entities": [], "features": [{"name": "probability", "dtype": {"name": "float"}, "description": "The probability of target named will_survive being 'True'.", "tags": null, "constraints": null}], "derived_features": [{"name": "will_survive", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "probability", "location": {"name": "titanic", "location": "model"}, "dtype": {"name": "float"}}], "transformation": {"name": "map_arg_max", "dtype": {"name": "bool"}, "column_mappings": {"probability": {"name": "bool", "value": true}}}, "depth": 1}], "model_version_column": null, "event_timestamp": null, "source": null, "application_source": null, "stream_source": null, "regression_targets": [], "classification_targets": [{"estimating": {"name": "survived", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "bool"}}, "feature": {"name": "will_survive", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null}, "on_ground_truth_event": null, "event_trigger": null, "class_probabilities": [{"outcome": {"name": "bool", "value": true}, "feature": {"name": "probability", "dtype": {"name": "float"}, "description": null, "tags": null, "constraints": null}}], "confidence": null}], "recommendation_targets": [], "acceptable_freshness": 86400.0, "unacceptable_freshness": 172800.0}, "description": "A model predicting if a passenger will survive", "contacts": null, "tags": null, "dataset_store": null, "exposed_at_url": null}], "enrichers": []} +{"metadata": {"created_at": "2024-03-05T16:45:11.675666", "name": "feature_store_location.py", "repo_url": null, "github_url": null}, "feature_views": [{"name": "titanic_parquet", "source": {"mapping_keys": {}, "type_name": "parquet", "path": "test_data/titanic.parquet", "config": {"engine": "auto", "compression": "snappy", "should_write_index": false}}, "entities": [{"name": "passenger_id", "dtype": {"name": "int32"}, "description": null, "tags": null, "constraints": null}], "features": [{"name": "age", "dtype": {"name": "float"}, "description": "A float as some have decimals", "tags": null, "constraints": [{"name": "lower_bound_inc", "value": 0.0}, {"name": "upper_bound_inc", "value": 100.0}]}, {"name": "cabin", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": null}, {"name": "name", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": null}, {"name": "sibsp", "dtype": {"name": "int32"}, "description": "Number of siblings on titanic", "tags": null, "constraints": [{"name": "lower_bound_inc", "value": 0.0}, {"name": "upper_bound_inc", "value": 20.0}]}, {"name": "survived", "dtype": {"name": "bool"}, "description": "If the passenger survived", "tags": null, "constraints": null}, {"name": "sex", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": [{"name": "in_domain", "values": ["male", "female"]}]}], "derived_features": [{"name": "is_male", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sex", "location": {"name": "titanic_parquet", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "equals", "dtype": {"name": "bool"}, "key": "sex", "value": {"name": "string", "value": "male"}}, "depth": 1}, {"name": "is_female", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sex", "location": {"name": "titanic_parquet", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "equals", "dtype": {"name": "bool"}, "key": "sex", "value": {"name": "string", "value": "female"}}, "depth": 1}, {"name": "has_siblings", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sibsp", "location": {"name": "titanic_parquet", "location": "feature_view"}, "dtype": {"name": "int32"}}], "transformation": {"name": "not-equals", "dtype": {"name": "bool"}, "key": "sibsp", "value": {"name": "int", "value": 0}}, "depth": 1}, {"name": "is_mr", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "name", "location": {"name": "titanic_parquet", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "contains", "dtype": {"name": "bool"}, "key": "name", "value": "Mr."}, "depth": 1}], "tags": null, "description": "Some features from the titanic dataset", "aggregated_features": [], "event_timestamp": null, "stream_data_source": null, "application_source": null, "materialized_source": null, "acceptable_freshness": null, "unacceptable_freshness": null, "event_triggers": null, "contacts": null, "indexes": []}, {"name": "titanic", "source": {"mapping_keys": {"PassengerId": "passenger_id", "Age": "age", "Sex": "sex", "Survived": "survived", "SibSp": "sibsp", "UpdatedAt": "updated_at"}, "type_name": "csv", "path": "test_data/titanic_scd_data.csv", "csv_config": {"seperator": ",", "compression": "infer", "should_write_index": false}, "formatter": {"date_format": "yyyy-MM-ddTHH:mm:ssZ", "time_unit": null, "time_zone": null, "name": "string_form"}}, "entities": [{"name": "passenger_id", "dtype": {"name": "int32"}, "description": null, "tags": null, "constraints": null}], "features": [{"name": "age", "dtype": {"name": "float"}, "description": "A float as some have decimals", "tags": null, "constraints": [{"name": "lower_bound_inc", "value": 0.0}, {"name": "upper_bound_inc", "value": 100.0}]}, {"name": "cabin", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": null}, {"name": "name", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": null}, {"name": "sibsp", "dtype": {"name": "int32"}, "description": "Number of siblings on titanic", "tags": null, "constraints": [{"name": "lower_bound_inc", "value": 0.0}, {"name": "upper_bound_inc", "value": 20.0}]}, {"name": "survived", "dtype": {"name": "bool"}, "description": "If the passenger survived", "tags": null, "constraints": null}, {"name": "sex", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": [{"name": "in_domain", "values": ["male", "female"]}]}, {"name": "updated_at", "dtype": {"name": "datetime"}, "description": null, "tags": null, "constraints": null}], "derived_features": [{"name": "name_embedding", "dtype": {"name": "embedding"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "name", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "word_vectoriser", "dtype": {"name": "embedding"}, "key": "name", "model": {"name": "gensim", "model_name": "glove-wiki-gigaword-50", "config": {"to_lowercase": false, "deaccent": false, "encoding": "utf8", "errors": "strict"}, "loaded_model": null}}, "depth": 1}, {"name": "square_sibsp", "dtype": {"name": "float"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sibsp", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "int32"}}], "transformation": {"name": "mul", "dtype": {"name": "float"}, "front": "sibsp", "behind": "sibsp"}, "depth": 1}, {"name": "has_siblings", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sibsp", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "int32"}}], "transformation": {"name": "not-equals", "dtype": {"name": "bool"}, "key": "sibsp", "value": {"name": "int", "value": 0}}, "depth": 1}, {"name": "double_sibsp", "dtype": {"name": "float"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sibsp", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "int32"}}], "transformation": {"name": "mul_val", "dtype": {"name": "float"}, "key": "sibsp", "value": {"name": "int", "value": 2}}, "depth": 1}, {"name": "is_male", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sex", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "equals", "dtype": {"name": "bool"}, "key": "sex", "value": {"name": "string", "value": "male"}}, "depth": 1}, {"name": "is_mr", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "name", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "contains", "dtype": {"name": "bool"}, "key": "name", "value": "Mr."}, "depth": 1}, {"name": "is_female", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sex", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "equals", "dtype": {"name": "bool"}, "key": "sex", "value": {"name": "string", "value": "female"}}, "depth": 1}], "tags": null, "description": "Some features from the titanic dataset", "aggregated_features": [], "event_timestamp": {"name": "updated_at", "ttl": null, "description": null, "tags": null, "dtype": {"name": "datetime"}}, "stream_data_source": {"mapping_keys": {}, "name": "redis", "topic_name": "titanic_stream", "config": {"env_var": "REDIS_URL"}, "record_coder": {"coder_type": "json", "key": "json"}}, "application_source": null, "materialized_source": null, "acceptable_freshness": null, "unacceptable_freshness": null, "event_triggers": null, "contacts": null, "indexes": [{"location": {"name": "titanic", "location": "feature_view"}, "vector": {"name": "name_embedding", "dtype": {"name": "embedding"}, "description": null, "tags": null, "constraints": null}, "vector_dim": 50, "metadata": [{"name": "age", "dtype": {"name": "float"}, "description": "A float as some have decimals", "tags": null, "constraints": [{"name": "lower_bound_inc", "value": 0.0}, {"name": "upper_bound_inc", "value": 100.0}]}, {"name": "sex", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": [{"name": "in_domain", "values": ["male", "female"]}]}], "storage": {"type_name": "redis", "config": {"env_var": "REDIS_URL"}, "name": "name_embedding_index", "initial_cap": 10000, "distance_metric": "COSINE", "index_alogrithm": "FLAT", "embedding_type": "FLOAT32"}, "entities": [{"name": "passenger_id", "dtype": {"name": "int32"}, "description": null, "tags": null, "constraints": null}]}]}], "combined_feature_views": [], "models": [{"name": "titanic", "features": {"default_version": "default", "versions": {"default": [{"name": "age", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "float"}}, {"name": "sibsp", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "int32"}}, {"name": "has_siblings", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "bool"}}, {"name": "is_male", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "bool"}}]}}, "predictions_view": {"entities": [], "features": [{"name": "probability", "dtype": {"name": "float"}, "description": "The probability of target named will_survive being 'True'.", "tags": null, "constraints": null}], "derived_features": [{"name": "will_survive", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "probability", "location": {"name": "titanic", "location": "model"}, "dtype": {"name": "float"}}], "transformation": {"name": "map_arg_max", "dtype": {"name": "bool"}, "column_mappings": {"probability": {"name": "bool", "value": true}}}, "depth": 1}], "model_version_column": null, "event_timestamp": null, "source": null, "application_source": null, "stream_source": null, "regression_targets": [], "classification_targets": [{"estimating": {"name": "survived", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "bool"}}, "feature": {"name": "will_survive", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null}, "on_ground_truth_event": null, "event_trigger": null, "class_probabilities": [{"outcome": {"name": "bool", "value": true}, "feature": {"name": "probability", "dtype": {"name": "float"}, "description": null, "tags": null, "constraints": null}}], "confidence": null}], "recommendation_targets": [], "acceptable_freshness": 86400.0, "unacceptable_freshness": 172800.0}, "description": "A model predicting if a passenger will survive", "contacts": null, "tags": null, "dataset_store": null, "exposed_at_url": null}], "enrichers": []} diff --git a/test_data/loan.csv b/test_data/loan.csv index f7165d8..9add4e6 100644 --- a/test_data/loan.csv +++ b/test_data/loan.csv @@ -1,7 +1,7 @@ -loan_amount,loan_status,loan_id,event_timestamp,personal_income -35000,True,10000,1587924064746575,59000 -1000,False,10001,1587924064746575,9600 -5500,True,10002,1587924064746575,9600 -35000,True,10000,1588010464746575,65500 -35000,True,10001,1588010464746575,54400 -2500,True,10002,1588010464746575,9900 +loan_id,loan_amount,event_timestamp,loan_status,personal_income +10000,35000,1587924064746575,True,59000 +10001,1000,1587924064746575,False,9600 +10002,5500,1587924064746575,True,9600 +10000,35000,1588010464746575,True,65500 +10001,35000,1588010464746575,True,54400 +10002,2500,1588010464746575,True,9900 diff --git a/test_data/test_model.csv b/test_data/test_model.csv index 9bfa2bd..decf326 100644 --- a/test_data/test_model.csv +++ b/test_data/test_model.csv @@ -1,7 +1,7 @@ -some_id,a -1,10 -2,14 -3,20 +a,some_id +10,1 +14,2 +20,3 1,1 2,2 3,3 diff --git a/test_data/titanic-sets.json b/test_data/titanic-sets.json index 2b344e4..5a9ac45 100644 --- a/test_data/titanic-sets.json +++ b/test_data/titanic-sets.json @@ -1 +1 @@ -{"raw_data": [], "train_test": [], "train_test_validation": [{"id": "titanic_test", "name": null, "request_result": {"entities": [{"name": "passenger_id", "dtype": {"name": "int32"}, "description": null, "tags": null, "constraints": null}], "features": [{"name": "name", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": [{"name": "optional"}]}, {"name": "sibsp", "dtype": {"name": "int32"}, "description": "Number of siblings on titanic", "tags": null, "constraints": [{"name": "optional"}, {"name": "upper_bound_inc", "value": 20.0}, {"name": "lower_bound_inc", "value": 0.0}]}, {"name": "is_male", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sex", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "equals", "dtype": {"name": "bool"}, "key": "sex", "value": {"name": "string", "value": "male"}}, "depth": 1}, {"name": "cabin", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": [{"name": "optional"}]}, {"name": "age", "dtype": {"name": "float"}, "description": "A float as some have decimals", "tags": null, "constraints": [{"name": "lower_bound_inc", "value": 0.0}, {"name": "upper_bound_inc", "value": 100.0}]}, {"name": "survived", "dtype": {"name": "bool"}, "description": "If the passenger survived", "tags": null, "constraints": null}, {"name": "has_siblings", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sibsp", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "int32"}}], "transformation": {"name": "not-equals", "dtype": {"name": "bool"}, "key": "sibsp", "value": {"name": "int", "value": 0}}, "depth": 1}, {"name": "is_female", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sex", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "equals", "dtype": {"name": "bool"}, "key": "sex", "value": {"name": "string", "value": "female"}}, "depth": 1}, {"name": "is_mr", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "name", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "contains", "dtype": {"name": "bool"}, "key": "name", "value": "Mr."}, "depth": 1}, {"name": "sex", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": [{"name": "optional"}, {"name": "in_domain", "values": ["male", "female"]}]}], "event_timestamp": null}, "train_dataset": {"mapping_keys": {}, "type_name": "csv", "path": "test_data/titanic-train.csv", "csv_config": {"seperator": ",", "compression": "infer", "should_write_index": false}, "formatter": {"date_format": "yyyy-MM-ddTHH:mm:ssZ", "time_unit": null, "time_zone": null, "name": "string_form"}}, "test_dataset": {"mapping_keys": {}, "type_name": "csv", "path": "test_data/titanic-test.csv", "csv_config": {"seperator": ",", "compression": "infer", "should_write_index": false}, "formatter": {"date_format": "yyyy-MM-ddTHH:mm:ssZ", "time_unit": null, "time_zone": null, "name": "string_form"}}, "validation_dataset": {"mapping_keys": {}, "type_name": "csv", "path": "test_data/titanic-validate.csv", "csv_config": {"seperator": ",", "compression": "infer", "should_write_index": false}, "formatter": {"date_format": "yyyy-MM-ddTHH:mm:ssZ", "time_unit": null, "time_zone": null, "name": "string_form"}}, "train_size_fraction": 0.6, "test_size_fraction": 0.20000000000000007, "validate_size_fraction": 0.19999999999999996, "target": ["survived"], "description": null, "tags": null}], "active_learning": []} +{"raw_data": [], "train_test": [], "train_test_validation": [{"id": "titanic_test", "name": null, "request_result": {"entities": [{"name": "passenger_id", "dtype": {"name": "int32"}, "description": null, "tags": null, "constraints": null}], "features": [{"name": "age", "dtype": {"name": "float"}, "description": "A float as some have decimals", "tags": null, "constraints": [{"name": "lower_bound_inc", "value": 0.0}, {"name": "upper_bound_inc", "value": 100.0}]}, {"name": "cabin", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": [{"name": "optional"}]}, {"name": "name", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": [{"name": "optional"}]}, {"name": "sibsp", "dtype": {"name": "int32"}, "description": "Number of siblings on titanic", "tags": null, "constraints": [{"name": "optional"}, {"name": "lower_bound_inc", "value": 0.0}, {"name": "upper_bound_inc", "value": 20.0}]}, {"name": "survived", "dtype": {"name": "bool"}, "description": "If the passenger survived", "tags": null, "constraints": null}, {"name": "sex", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": [{"name": "optional"}, {"name": "in_domain", "values": ["male", "female"]}]}, {"name": "has_siblings", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sibsp", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "int32"}}], "transformation": {"name": "not-equals", "dtype": {"name": "bool"}, "key": "sibsp", "value": {"name": "int", "value": 0}}, "depth": 1}, {"name": "is_male", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sex", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "equals", "dtype": {"name": "bool"}, "key": "sex", "value": {"name": "string", "value": "male"}}, "depth": 1}, {"name": "is_mr", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "name", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "contains", "dtype": {"name": "bool"}, "key": "name", "value": "Mr."}, "depth": 1}, {"name": "is_female", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sex", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "equals", "dtype": {"name": "bool"}, "key": "sex", "value": {"name": "string", "value": "female"}}, "depth": 1}], "event_timestamp": null}, "train_dataset": {"mapping_keys": {}, "type_name": "csv", "path": "test_data/titanic-train.csv", "csv_config": {"seperator": ",", "compression": "infer", "should_write_index": false}, "formatter": {"date_format": "yyyy-MM-ddTHH:mm:ssZ", "time_unit": null, "time_zone": null, "name": "string_form"}}, "test_dataset": {"mapping_keys": {}, "type_name": "csv", "path": "test_data/titanic-test.csv", "csv_config": {"seperator": ",", "compression": "infer", "should_write_index": false}, "formatter": {"date_format": "yyyy-MM-ddTHH:mm:ssZ", "time_unit": null, "time_zone": null, "name": "string_form"}}, "validation_dataset": {"mapping_keys": {}, "type_name": "csv", "path": "test_data/titanic-validate.csv", "csv_config": {"seperator": ",", "compression": "infer", "should_write_index": false}, "formatter": {"date_format": "yyyy-MM-ddTHH:mm:ssZ", "time_unit": null, "time_zone": null, "name": "string_form"}}, "train_size_fraction": 0.6, "test_size_fraction": 0.20000000000000007, "validate_size_fraction": 0.19999999999999996, "target": ["survived"], "description": null, "tags": null}], "active_learning": []} diff --git a/test_data/titanic-test.csv b/test_data/titanic-test.csv index c83fe88..f6706c0 100644 --- a/test_data/titanic-test.csv +++ b/test_data/titanic-test.csv @@ -1,21 +1,21 @@ -name,sibsp,is_male,cabin,age,passenger_id,survived,has_siblings,is_female,is_mr,sex -"Sirayanian, Mr. Orsen",0,True,,22.0,61,False,False,False,True,male -"Icard, Miss. Amelie",0,False,B28,38.0,62,True,False,True,False,female -"Harris, Mr. Henry Birkhardt",1,True,C83,45.0,63,False,True,False,True,male -"Skoog, Master. Harald",3,True,,4.0,64,False,True,False,False,male -"Stewart, Mr. Albert A",0,True,,,65,False,False,False,True,male -"Moubarek, Master. Gerios",1,True,,,66,True,True,False,False,male -"Nye, Mrs. (Elizabeth Ramell)",0,False,F33,29.0,67,True,False,True,True,female -"Crease, Mr. Ernest James",0,True,,19.0,68,False,False,False,True,male -"Andersson, Miss. Erna Alexandra",4,False,,17.0,69,True,True,True,False,female -"Kink, Mr. Vincenz",2,True,,26.0,70,False,True,False,True,male -"Jenkin, Mr. Stephen Curnow",0,True,,32.0,71,False,False,False,True,male -"Goodwin, Miss. Lillian Amy",5,False,,16.0,72,False,True,True,False,female -"Hood, Mr. Ambrose Jr",0,True,,21.0,73,False,False,False,True,male -"Chronopoulos, Mr. Apostolos",1,True,,26.0,74,False,True,False,True,male -"Bing, Mr. Lee",0,True,,32.0,75,True,False,False,True,male -"Moen, Mr. Sigurd Hansen",0,True,F G73,25.0,76,False,False,False,True,male -"Staneff, Mr. Ivan",0,True,,,77,False,False,False,True,male -"Moutal, Mr. Rahamin Haim",0,True,,,78,False,False,False,True,male -"Caldwell, Master. Alden Gates",0,True,,0.83,79,True,False,False,False,male -"Dowdell, Miss. Elizabeth",0,False,,30.0,80,True,False,True,False,female +age,cabin,name,sibsp,survived,sex,has_siblings,is_male,is_mr,is_female,passenger_id +22.0,,"Sirayanian, Mr. Orsen",0,False,male,False,True,True,False,61 +38.0,B28,"Icard, Miss. Amelie",0,True,female,False,False,False,True,62 +45.0,C83,"Harris, Mr. Henry Birkhardt",1,False,male,True,True,True,False,63 +4.0,,"Skoog, Master. Harald",3,False,male,True,True,False,False,64 +,,"Stewart, Mr. Albert A",0,False,male,False,True,True,False,65 +,,"Moubarek, Master. Gerios",1,True,male,True,True,False,False,66 +29.0,F33,"Nye, Mrs. (Elizabeth Ramell)",0,True,female,False,False,True,True,67 +19.0,,"Crease, Mr. Ernest James",0,False,male,False,True,True,False,68 +17.0,,"Andersson, Miss. Erna Alexandra",4,True,female,True,False,False,True,69 +26.0,,"Kink, Mr. Vincenz",2,False,male,True,True,True,False,70 +32.0,,"Jenkin, Mr. Stephen Curnow",0,False,male,False,True,True,False,71 +16.0,,"Goodwin, Miss. Lillian Amy",5,False,female,True,False,False,True,72 +21.0,,"Hood, Mr. Ambrose Jr",0,False,male,False,True,True,False,73 +26.0,,"Chronopoulos, Mr. Apostolos",1,False,male,True,True,True,False,74 +32.0,,"Bing, Mr. Lee",0,True,male,False,True,True,False,75 +25.0,F G73,"Moen, Mr. Sigurd Hansen",0,False,male,False,True,True,False,76 +,,"Staneff, Mr. Ivan",0,False,male,False,True,True,False,77 +,,"Moutal, Mr. Rahamin Haim",0,False,male,False,True,True,False,78 +0.83,,"Caldwell, Master. Alden Gates",0,True,male,False,True,False,False,79 +30.0,,"Dowdell, Miss. Elizabeth",0,True,female,False,False,False,True,80 diff --git a/test_data/titanic-train.csv b/test_data/titanic-train.csv index a957a41..f847eae 100644 --- a/test_data/titanic-train.csv +++ b/test_data/titanic-train.csv @@ -1,61 +1,61 @@ -name,sibsp,is_male,cabin,age,passenger_id,survived,has_siblings,is_female,is_mr,sex -"Braund, Mr. Owen Harris",1,True,,22.0,1,False,True,False,True,male -"Cumings, Mrs. John Bradley (Florence Briggs Thayer)",1,False,C85,38.0,2,True,True,True,True,female -"Heikkinen, Miss. Laina",0,False,,26.0,3,True,False,True,False,female -"Futrelle, Mrs. Jacques Heath (Lily May Peel)",1,False,C123,35.0,4,True,True,True,True,female -"Allen, Mr. William Henry",0,True,,35.0,5,False,False,False,True,male -"Moran, Mr. James",0,True,,,6,False,False,False,True,male -"McCarthy, Mr. Timothy J",0,False,E46,54.0,7,False,False,False,True,other -"Palsson, Master. Gosta Leonard",3,True,,2.0,8,False,True,False,False,male -"Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)",0,False,,27.0,9,True,False,True,True,female -"Nasser, Mrs. Nicholas (Adele Achem)",1,False,,14.0,10,True,True,True,True,female -"Sandstrom, Miss. Marguerite Rut",1,False,G6,4.0,11,True,True,True,False,female -"Bonnell, Miss. Elizabeth",0,False,C103,58.0,12,True,False,True,False,female -"Saundercock, Mr. William Henry",0,True,,20.0,13,False,False,False,True,male -"Andersson, Mr. Anders Johan",1,True,,39.0,14,False,True,False,True,male -"Vestrom, Miss. Hulda Amanda Adolfina",0,False,,14.0,15,False,False,True,False,female -"Hewlett, Mrs. (Mary D Kingcome) ",0,False,,55.0,16,True,False,True,True,female -"Rice, Master. Eugene",4,True,,2.0,17,False,True,False,False,male -"Williams, Mr. Charles Eugene",0,True,,,18,True,False,False,True,male -"Vander Planke, Mrs. Julius (Emelia Maria Vandemoortele)",1,False,,31.0,19,False,True,True,True,female -"Masselmani, Mrs. Fatima",0,False,,,20,True,False,True,True,female -"Fynney, Mr. Joseph J",0,True,,35.0,21,False,False,False,True,male -"Beesley, Mr. Lawrence",0,True,D56,34.0,22,True,False,False,True,male -"McGowan, Miss. Anna ""Annie""",0,False,,15.0,23,True,False,True,False,female -"Sloper, Mr. William Thompson",0,True,A6,28.0,24,True,False,False,True,male -"Palsson, Miss. Torborg Danira",3,False,,8.0,25,False,True,True,False,female -"Asplund, Mrs. Carl Oscar (Selma Augusta Emilia Johansson)",1,False,,38.0,26,True,True,True,True,female -"Emir, Mr. Farred Chehab",0,True,,,27,False,False,False,True,male -"Fortune, Mr. Charles Alexander",3,True,C23 C25 C27,19.0,28,False,True,False,True,male -"O'Dwyer, Miss. Ellen ""Nellie""",0,False,,,29,True,False,True,False,female -"Todoroff, Mr. Lalio",0,True,,,30,False,False,False,True,male -"Uruchurtu, Don. Manuel E",0,True,,40.0,31,False,False,False,False,male -"Spencer, Mrs. William Augustus (Marie Eugenie)",1,False,B78,,32,True,True,True,True,female -"Glynn, Miss. Mary Agatha",0,False,,,33,True,False,True,False,female -"Wheadon, Mr. Edward H",0,True,,66.0,34,False,False,False,True,male -"Meyer, Mr. Edgar Joseph",1,True,,28.0,35,False,True,False,True,male -"Holverson, Mr. Alexander Oskar",1,True,,42.0,36,False,True,False,True,male -"Mamee, Mr. Hanna",0,True,,,37,True,False,False,True,male -"Cann, Mr. Ernest Charles",0,True,,21.0,38,False,False,False,True,male -"Vander Planke, Miss. Augusta Maria",2,False,,18.0,39,False,True,True,False,female -"Nicola-Yarred, Miss. Jamila",1,False,,14.0,40,True,True,True,False,female -"Ahlin, Mrs. Johan (Johanna Persdotter Larsson)",1,False,,40.0,41,False,True,True,True,female -"Turpin, Mrs. William John Robert (Dorothy Ann Wonnacott)",1,False,,27.0,42,False,True,True,True,female -"Kraeff, Mr. Theodor",0,True,,,43,False,False,False,True,male -"Laroche, Miss. Simonne Marie Anne Andree",1,False,,3.0,44,True,True,True,False,female -"Devaney, Miss. Margaret Delia",0,False,,19.0,45,True,False,True,False,female -"Rogers, Mr. William John",0,True,,,46,False,False,False,True,male -"Lennon, Mr. Denis",1,True,,,47,False,True,False,True,male -"O'Driscoll, Miss. Bridget",0,False,,,48,True,False,True,False,female -"Samaan, Mr. Youssef",2,True,,,49,False,True,False,True,male -"Arnold-Franchi, Mrs. Josef (Josefine Franchi)",1,False,,18.0,50,False,True,True,True,female -"Panula, Master. Juha Niilo",4,True,,7.0,51,False,True,False,False,male -"Nosworthy, Mr. Richard Cater",0,True,,21.0,52,False,False,False,True,male -"Harper, Mrs. Henry Sleeper (Myna Haxtun)",1,False,D33,49.0,53,True,True,True,True,female -"Faunthorpe, Mrs. Lizzie (Elizabeth Anne Wilkinson)",1,False,,29.0,54,True,True,True,True,female -"Ostby, Mr. Engelhart Cornelius",0,True,B30,65.0,55,False,False,False,True,male -"Woolner, Mr. Hugh",0,True,C52,,56,True,False,False,True,male -"Rugg, Miss. Emily",0,False,,21.0,57,True,False,True,False,female -"Novel, Mr. Mansouer",0,True,,28.5,58,False,False,False,True,male -"West, Miss. Constance Mirium",1,False,,5.0,59,True,True,True,False,female -"Goodwin, Master. William Frederick",5,True,,11.0,60,False,True,False,False,male +age,cabin,name,sibsp,survived,sex,has_siblings,is_male,is_mr,is_female,passenger_id +22.0,,"Braund, Mr. Owen Harris",1,False,male,True,True,True,False,1 +38.0,C85,"Cumings, Mrs. John Bradley (Florence Briggs Thayer)",1,True,female,True,False,True,True,2 +26.0,,"Heikkinen, Miss. Laina",0,True,female,False,False,False,True,3 +35.0,C123,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",1,True,female,True,False,True,True,4 +35.0,,"Allen, Mr. William Henry",0,False,male,False,True,True,False,5 +,,"Moran, Mr. James",0,False,male,False,True,True,False,6 +54.0,E46,"McCarthy, Mr. Timothy J",0,False,other,False,False,True,False,7 +2.0,,"Palsson, Master. Gosta Leonard",3,False,male,True,True,False,False,8 +27.0,,"Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)",0,True,female,False,False,True,True,9 +14.0,,"Nasser, Mrs. Nicholas (Adele Achem)",1,True,female,True,False,True,True,10 +4.0,G6,"Sandstrom, Miss. Marguerite Rut",1,True,female,True,False,False,True,11 +58.0,C103,"Bonnell, Miss. Elizabeth",0,True,female,False,False,False,True,12 +20.0,,"Saundercock, Mr. William Henry",0,False,male,False,True,True,False,13 +39.0,,"Andersson, Mr. Anders Johan",1,False,male,True,True,True,False,14 +14.0,,"Vestrom, Miss. Hulda Amanda Adolfina",0,False,female,False,False,False,True,15 +55.0,,"Hewlett, Mrs. (Mary D Kingcome) ",0,True,female,False,False,True,True,16 +2.0,,"Rice, Master. Eugene",4,False,male,True,True,False,False,17 +,,"Williams, Mr. Charles Eugene",0,True,male,False,True,True,False,18 +31.0,,"Vander Planke, Mrs. Julius (Emelia Maria Vandemoortele)",1,False,female,True,False,True,True,19 +,,"Masselmani, Mrs. Fatima",0,True,female,False,False,True,True,20 +35.0,,"Fynney, Mr. Joseph J",0,False,male,False,True,True,False,21 +34.0,D56,"Beesley, Mr. Lawrence",0,True,male,False,True,True,False,22 +15.0,,"McGowan, Miss. Anna ""Annie""",0,True,female,False,False,False,True,23 +28.0,A6,"Sloper, Mr. William Thompson",0,True,male,False,True,True,False,24 +8.0,,"Palsson, Miss. Torborg Danira",3,False,female,True,False,False,True,25 +38.0,,"Asplund, Mrs. Carl Oscar (Selma Augusta Emilia Johansson)",1,True,female,True,False,True,True,26 +,,"Emir, Mr. Farred Chehab",0,False,male,False,True,True,False,27 +19.0,C23 C25 C27,"Fortune, Mr. Charles Alexander",3,False,male,True,True,True,False,28 +,,"O'Dwyer, Miss. Ellen ""Nellie""",0,True,female,False,False,False,True,29 +,,"Todoroff, Mr. Lalio",0,False,male,False,True,True,False,30 +40.0,,"Uruchurtu, Don. Manuel E",0,False,male,False,True,False,False,31 +,B78,"Spencer, Mrs. William Augustus (Marie Eugenie)",1,True,female,True,False,True,True,32 +,,"Glynn, Miss. Mary Agatha",0,True,female,False,False,False,True,33 +66.0,,"Wheadon, Mr. Edward H",0,False,male,False,True,True,False,34 +28.0,,"Meyer, Mr. Edgar Joseph",1,False,male,True,True,True,False,35 +42.0,,"Holverson, Mr. Alexander Oskar",1,False,male,True,True,True,False,36 +,,"Mamee, Mr. Hanna",0,True,male,False,True,True,False,37 +21.0,,"Cann, Mr. Ernest Charles",0,False,male,False,True,True,False,38 +18.0,,"Vander Planke, Miss. Augusta Maria",2,False,female,True,False,False,True,39 +14.0,,"Nicola-Yarred, Miss. Jamila",1,True,female,True,False,False,True,40 +40.0,,"Ahlin, Mrs. Johan (Johanna Persdotter Larsson)",1,False,female,True,False,True,True,41 +27.0,,"Turpin, Mrs. William John Robert (Dorothy Ann Wonnacott)",1,False,female,True,False,True,True,42 +,,"Kraeff, Mr. Theodor",0,False,male,False,True,True,False,43 +3.0,,"Laroche, Miss. Simonne Marie Anne Andree",1,True,female,True,False,False,True,44 +19.0,,"Devaney, Miss. Margaret Delia",0,True,female,False,False,False,True,45 +,,"Rogers, Mr. William John",0,False,male,False,True,True,False,46 +,,"Lennon, Mr. Denis",1,False,male,True,True,True,False,47 +,,"O'Driscoll, Miss. Bridget",0,True,female,False,False,False,True,48 +,,"Samaan, Mr. Youssef",2,False,male,True,True,True,False,49 +18.0,,"Arnold-Franchi, Mrs. Josef (Josefine Franchi)",1,False,female,True,False,True,True,50 +7.0,,"Panula, Master. Juha Niilo",4,False,male,True,True,False,False,51 +21.0,,"Nosworthy, Mr. Richard Cater",0,False,male,False,True,True,False,52 +49.0,D33,"Harper, Mrs. Henry Sleeper (Myna Haxtun)",1,True,female,True,False,True,True,53 +29.0,,"Faunthorpe, Mrs. Lizzie (Elizabeth Anne Wilkinson)",1,True,female,True,False,True,True,54 +65.0,B30,"Ostby, Mr. Engelhart Cornelius",0,False,male,False,True,True,False,55 +,C52,"Woolner, Mr. Hugh",0,True,male,False,True,True,False,56 +21.0,,"Rugg, Miss. Emily",0,True,female,False,False,False,True,57 +28.5,,"Novel, Mr. Mansouer",0,False,male,False,True,True,False,58 +5.0,,"West, Miss. Constance Mirium",1,True,female,True,False,False,True,59 +11.0,,"Goodwin, Master. William Frederick",5,False,male,True,True,False,False,60 diff --git a/test_data/titanic-validate.csv b/test_data/titanic-validate.csv index dfdcb80..9dad6a8 100644 --- a/test_data/titanic-validate.csv +++ b/test_data/titanic-validate.csv @@ -1,21 +1,21 @@ -name,sibsp,is_male,cabin,age,passenger_id,survived,has_siblings,is_female,is_mr,sex -"Waelens, Mr. Achille",0,True,,22.0,81,False,False,False,True,male -"Sheerlinck, Mr. Jan Baptist",0,True,,29.0,82,True,False,False,True,male -"McDermott, Miss. Brigdet Delia",0,False,,,83,True,False,True,False,female -"Carrau, Mr. Francisco M",0,True,,28.0,84,False,False,False,True,male -"Ilett, Miss. Bertha",0,False,,17.0,85,True,False,True,False,female -"Backstrom, Mrs. Karl Alfred (Maria Mathilda Gustafsson)",3,False,,33.0,86,True,True,True,True,female -"Ford, Mr. William Neal",1,True,,16.0,87,False,True,False,True,male -"Slocovski, Mr. Selman Francis",0,True,,,88,False,False,False,True,male -"Fortune, Miss. Mabel Helen",3,False,C23 C25 C27,23.0,89,True,True,True,False,female -"Celotti, Mr. Francesco",0,True,,24.0,90,False,False,False,True,male -"Christmann, Mr. Emil",0,True,,29.0,91,False,False,False,True,male -"Andreasson, Mr. Paul Edvin",0,True,,20.0,92,False,False,False,True,male -"Chaffee, Mr. Herbert Fuller",1,True,E31,46.0,93,False,True,False,True,male -"Dean, Mr. Bertram Frank",1,True,,26.0,94,False,True,False,True,male -"Coxon, Mr. Daniel",0,True,,59.0,95,False,False,False,True,male -"Shorney, Mr. Charles Joseph",0,True,,,96,False,False,False,True,male -"Goldschmidt, Mr. George B",0,True,A5,71.0,97,False,False,False,True,male -"Greenfield, Mr. William Bertram",0,True,D10 D12,23.0,98,True,False,False,True,male -"Doling, Mrs. John T (Ada Julia Bone)",0,False,,34.0,99,True,False,True,True,female -"Kantor, Mr. Sinai",1,True,,34.0,100,False,True,False,True,male +age,cabin,name,sibsp,survived,sex,has_siblings,is_male,is_mr,is_female,passenger_id +22.0,,"Waelens, Mr. Achille",0,False,male,False,True,True,False,81 +29.0,,"Sheerlinck, Mr. Jan Baptist",0,True,male,False,True,True,False,82 +,,"McDermott, Miss. Brigdet Delia",0,True,female,False,False,False,True,83 +28.0,,"Carrau, Mr. Francisco M",0,False,male,False,True,True,False,84 +17.0,,"Ilett, Miss. Bertha",0,True,female,False,False,False,True,85 +33.0,,"Backstrom, Mrs. Karl Alfred (Maria Mathilda Gustafsson)",3,True,female,True,False,True,True,86 +16.0,,"Ford, Mr. William Neal",1,False,male,True,True,True,False,87 +,,"Slocovski, Mr. Selman Francis",0,False,male,False,True,True,False,88 +23.0,C23 C25 C27,"Fortune, Miss. Mabel Helen",3,True,female,True,False,False,True,89 +24.0,,"Celotti, Mr. Francesco",0,False,male,False,True,True,False,90 +29.0,,"Christmann, Mr. Emil",0,False,male,False,True,True,False,91 +20.0,,"Andreasson, Mr. Paul Edvin",0,False,male,False,True,True,False,92 +46.0,E31,"Chaffee, Mr. Herbert Fuller",1,False,male,True,True,True,False,93 +26.0,,"Dean, Mr. Bertram Frank",1,False,male,True,True,True,False,94 +59.0,,"Coxon, Mr. Daniel",0,False,male,False,True,True,False,95 +,,"Shorney, Mr. Charles Joseph",0,False,male,False,True,True,False,96 +71.0,A5,"Goldschmidt, Mr. George B",0,False,male,False,True,True,False,97 +23.0,D10 D12,"Greenfield, Mr. William Bertram",0,True,male,False,True,True,False,98 +34.0,,"Doling, Mrs. John T (Ada Julia Bone)",0,True,female,False,False,True,True,99 +34.0,,"Kantor, Mr. Sinai",1,False,male,True,True,True,False,100