Skip to content

Commit

Permalink
fixed test
Browse files Browse the repository at this point in the history
  • Loading branch information
MatsMoll committed Mar 5, 2024
1 parent 9d5737a commit 413655e
Show file tree
Hide file tree
Showing 12 changed files with 142 additions and 144 deletions.
20 changes: 9 additions & 11 deletions aligned/feature_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -676,22 +676,20 @@ async def insert_into(
import polars as pl

columns = write_request.all_returned_columns

new_df = (await values.to_lazy_polars()).select(columns)
try:
if isinstance(source, ColumnFeatureMappable):
new_cols = source.feature_identifier_for(columns)

mappings = dict(zip(columns, new_cols))
values = values.rename(mappings)
columns = new_cols
existing_df = (await source.to_lazy_polars()).rename(mappings)
else:
existing_df = await source.to_lazy_polars()

try:
existing_df = await source.to_lazy_polars()
write_df = pl.concat([new_df, existing_df.select(columns)], how='vertical_relaxed')
except UnableToFindFileException:
write_df = new_df

if isinstance(source, ColumnFeatureMappable):
new_cols = source.feature_identifier_for(columns)

mappings = dict(zip(columns, new_cols))
write_df = write_df.rename(mappings)

await source.write_polars(write_df)
else:
raise ValueError(f'The source {type(source)} do not support writes')
Expand Down
15 changes: 7 additions & 8 deletions aligned/sources/local.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ async def upsert(self, job: RetrivalJob, requests: list[RetrivalRequest]) -> Non

request = requests[0]

data = await job.to_lazy_polars()
data = (await job.to_lazy_polars()).select(request.all_returned_columns)
potential_timestamps = request.all_features

if request.event_timestamp:
Expand All @@ -160,8 +160,8 @@ async def upsert(self, job: RetrivalJob, requests: list[RetrivalRequest]) -> Non
data = data.with_columns(self.formatter.encode_polars(feature.name))

if self.mapping_keys:
mapping = {self.mapping_keys.get(name, name): name for name in data.columns}
data = data.rename(mapping)
columns = self.feature_identifier_for(data.columns)
data = data.rename(dict(zip(data.columns, columns)))

new_df = data.select(request.all_returned_columns)
entities = list(request.entity_names)
Expand All @@ -179,23 +179,22 @@ async def insert(self, job: RetrivalJob, requests: list[RetrivalRequest]) -> Non

request = requests[0]

data = await job.to_lazy_polars()
data = (await job.to_lazy_polars()).select(request.all_returned_columns)
for feature in request.features:
if feature.dtype.name == 'datetime':
data = data.with_columns(self.formatter.encode_polars(feature.name))

if self.mapping_keys:
mapping = {self.mapping_keys.get(name, name): name for name in data.columns}
data = data.rename(mapping)
columns = self.feature_identifier_for(data.columns)
data = data.rename(dict(zip(data.columns, columns)))

try:
existing_df = await self.to_lazy_polars()

write_df = pl.concat([data, existing_df.select(data.columns)], how='vertical_relaxed')
except UnableToFindFileException:
write_df = data

await self.write_polars(write_df.select(request.all_returned_columns))
await self.write_polars(write_df)

async def write_pandas(self, df: pd.DataFrame) -> None:
create_parent_dir(self.path)
Expand Down
5 changes: 3 additions & 2 deletions aligned/tests/test_model_target.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,8 @@ class TestModel:
a = Int32()

store = FeatureStore.experimental()
initial_frame = pl.DataFrame({'id': [1, 2, 3], 'a': [1, 2, 3]})

initial_frame = pl.DataFrame({'some_id': [1, 2, 3], 'a': [1, 2, 3]})
initial_frame.write_csv(path)

expected_frame = pl.DataFrame({'id': [1, 2, 3, 1, 2, 3], 'a': [10, 14, 20, 1, 2, 3]})
Expand All @@ -175,7 +176,7 @@ class TestModel:

await store.insert_into(FeatureLocation.model('test_model'), {'id': [1, 2, 3], 'a': [10, 14, 20]})

preds = await store.model('test_model').all_predictions().to_polars()
preds = await store.model('test_model').all_predictions().log_each_job().to_polars()

stored_data = pl.read_csv(path).select(id=pl.col('some_id'), a=pl.col('a'))
assert stored_data.equals(expected_frame)
Expand Down
14 changes: 7 additions & 7 deletions test_data/credit_history.csv
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
due_sum,dob_ssn,credit_card_due,bankruptcies,student_loan_due,event_timestamp
30747,19530219_5179,8419,0,22328,1587924064746575
5459,19520816_8737,2944,0,2515,1587924064746575
33833,19860413_2537,833,0,33000,1587924064746575
54891,19530219_5179,5936,0,48955,1588010464746575
11076,19520816_8737,1575,0,9501,1588010464746575
41773,19860413_2537,6263,0,35510,1588010464746575
due_sum,student_loan_due,credit_card_due,event_timestamp,bankruptcies,dob_ssn
30747,22328,8419,1587924064746575,0,19530219_5179
5459,2515,2944,1587924064746575,0,19520816_8737
33833,33000,833,1587924064746575,0,19860413_2537
54891,48955,5936,1588010464746575,0,19530219_5179
11076,9501,1575,1588010464746575,0,19520816_8737
41773,35510,6263,1588010464746575,0,19860413_2537
Binary file modified test_data/credit_history_mater.parquet
Binary file not shown.
2 changes: 1 addition & 1 deletion test_data/feature-store.json

Large diffs are not rendered by default.

14 changes: 7 additions & 7 deletions test_data/loan.csv
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
loan_amount,loan_status,loan_id,event_timestamp,personal_income
35000,True,10000,1587924064746575,59000
1000,False,10001,1587924064746575,9600
5500,True,10002,1587924064746575,9600
35000,True,10000,1588010464746575,65500
35000,True,10001,1588010464746575,54400
2500,True,10002,1588010464746575,9900
loan_id,loan_amount,event_timestamp,loan_status,personal_income
10000,35000,1587924064746575,True,59000
10001,1000,1587924064746575,False,9600
10002,5500,1587924064746575,True,9600
10000,35000,1588010464746575,True,65500
10001,35000,1588010464746575,True,54400
10002,2500,1588010464746575,True,9900
8 changes: 4 additions & 4 deletions test_data/test_model.csv
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
some_id,a
1,10
2,14
3,20
a,some_id
10,1
14,2
20,3
1,1
2,2
3,3
2 changes: 1 addition & 1 deletion test_data/titanic-sets.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"raw_data": [], "train_test": [], "train_test_validation": [{"id": "titanic_test", "name": null, "request_result": {"entities": [{"name": "passenger_id", "dtype": {"name": "int32"}, "description": null, "tags": null, "constraints": null}], "features": [{"name": "name", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": [{"name": "optional"}]}, {"name": "sibsp", "dtype": {"name": "int32"}, "description": "Number of siblings on titanic", "tags": null, "constraints": [{"name": "optional"}, {"name": "upper_bound_inc", "value": 20.0}, {"name": "lower_bound_inc", "value": 0.0}]}, {"name": "is_male", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sex", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "equals", "dtype": {"name": "bool"}, "key": "sex", "value": {"name": "string", "value": "male"}}, "depth": 1}, {"name": "cabin", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": [{"name": "optional"}]}, {"name": "age", "dtype": {"name": "float"}, "description": "A float as some have decimals", "tags": null, "constraints": [{"name": "lower_bound_inc", "value": 0.0}, {"name": "upper_bound_inc", "value": 100.0}]}, {"name": "survived", "dtype": {"name": "bool"}, "description": "If the passenger survived", "tags": null, "constraints": null}, {"name": "has_siblings", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sibsp", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "int32"}}], "transformation": {"name": "not-equals", "dtype": {"name": "bool"}, "key": "sibsp", "value": {"name": "int", "value": 0}}, "depth": 1}, {"name": "is_female", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sex", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "equals", "dtype": {"name": "bool"}, "key": "sex", "value": {"name": "string", "value": "female"}}, "depth": 1}, {"name": "is_mr", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "name", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "contains", "dtype": {"name": "bool"}, "key": "name", "value": "Mr."}, "depth": 1}, {"name": "sex", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": [{"name": "optional"}, {"name": "in_domain", "values": ["male", "female"]}]}], "event_timestamp": null}, "train_dataset": {"mapping_keys": {}, "type_name": "csv", "path": "test_data/titanic-train.csv", "csv_config": {"seperator": ",", "compression": "infer", "should_write_index": false}, "formatter": {"date_format": "yyyy-MM-ddTHH:mm:ssZ", "time_unit": null, "time_zone": null, "name": "string_form"}}, "test_dataset": {"mapping_keys": {}, "type_name": "csv", "path": "test_data/titanic-test.csv", "csv_config": {"seperator": ",", "compression": "infer", "should_write_index": false}, "formatter": {"date_format": "yyyy-MM-ddTHH:mm:ssZ", "time_unit": null, "time_zone": null, "name": "string_form"}}, "validation_dataset": {"mapping_keys": {}, "type_name": "csv", "path": "test_data/titanic-validate.csv", "csv_config": {"seperator": ",", "compression": "infer", "should_write_index": false}, "formatter": {"date_format": "yyyy-MM-ddTHH:mm:ssZ", "time_unit": null, "time_zone": null, "name": "string_form"}}, "train_size_fraction": 0.6, "test_size_fraction": 0.20000000000000007, "validate_size_fraction": 0.19999999999999996, "target": ["survived"], "description": null, "tags": null}], "active_learning": []}
{"raw_data": [], "train_test": [], "train_test_validation": [{"id": "titanic_test", "name": null, "request_result": {"entities": [{"name": "passenger_id", "dtype": {"name": "int32"}, "description": null, "tags": null, "constraints": null}], "features": [{"name": "age", "dtype": {"name": "float"}, "description": "A float as some have decimals", "tags": null, "constraints": [{"name": "lower_bound_inc", "value": 0.0}, {"name": "upper_bound_inc", "value": 100.0}]}, {"name": "cabin", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": [{"name": "optional"}]}, {"name": "name", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": [{"name": "optional"}]}, {"name": "sibsp", "dtype": {"name": "int32"}, "description": "Number of siblings on titanic", "tags": null, "constraints": [{"name": "optional"}, {"name": "lower_bound_inc", "value": 0.0}, {"name": "upper_bound_inc", "value": 20.0}]}, {"name": "survived", "dtype": {"name": "bool"}, "description": "If the passenger survived", "tags": null, "constraints": null}, {"name": "sex", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": [{"name": "optional"}, {"name": "in_domain", "values": ["male", "female"]}]}, {"name": "has_siblings", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sibsp", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "int32"}}], "transformation": {"name": "not-equals", "dtype": {"name": "bool"}, "key": "sibsp", "value": {"name": "int", "value": 0}}, "depth": 1}, {"name": "is_male", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sex", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "equals", "dtype": {"name": "bool"}, "key": "sex", "value": {"name": "string", "value": "male"}}, "depth": 1}, {"name": "is_mr", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "name", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "contains", "dtype": {"name": "bool"}, "key": "name", "value": "Mr."}, "depth": 1}, {"name": "is_female", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sex", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "equals", "dtype": {"name": "bool"}, "key": "sex", "value": {"name": "string", "value": "female"}}, "depth": 1}], "event_timestamp": null}, "train_dataset": {"mapping_keys": {}, "type_name": "csv", "path": "test_data/titanic-train.csv", "csv_config": {"seperator": ",", "compression": "infer", "should_write_index": false}, "formatter": {"date_format": "yyyy-MM-ddTHH:mm:ssZ", "time_unit": null, "time_zone": null, "name": "string_form"}}, "test_dataset": {"mapping_keys": {}, "type_name": "csv", "path": "test_data/titanic-test.csv", "csv_config": {"seperator": ",", "compression": "infer", "should_write_index": false}, "formatter": {"date_format": "yyyy-MM-ddTHH:mm:ssZ", "time_unit": null, "time_zone": null, "name": "string_form"}}, "validation_dataset": {"mapping_keys": {}, "type_name": "csv", "path": "test_data/titanic-validate.csv", "csv_config": {"seperator": ",", "compression": "infer", "should_write_index": false}, "formatter": {"date_format": "yyyy-MM-ddTHH:mm:ssZ", "time_unit": null, "time_zone": null, "name": "string_form"}}, "train_size_fraction": 0.6, "test_size_fraction": 0.20000000000000007, "validate_size_fraction": 0.19999999999999996, "target": ["survived"], "description": null, "tags": null}], "active_learning": []}
42 changes: 21 additions & 21 deletions test_data/titanic-test.csv
Original file line number Diff line number Diff line change
@@ -1,21 +1,21 @@
name,sibsp,is_male,cabin,age,passenger_id,survived,has_siblings,is_female,is_mr,sex
"Sirayanian, Mr. Orsen",0,True,,22.0,61,False,False,False,True,male
"Icard, Miss. Amelie",0,False,B28,38.0,62,True,False,True,False,female
"Harris, Mr. Henry Birkhardt",1,True,C83,45.0,63,False,True,False,True,male
"Skoog, Master. Harald",3,True,,4.0,64,False,True,False,False,male
"Stewart, Mr. Albert A",0,True,,,65,False,False,False,True,male
"Moubarek, Master. Gerios",1,True,,,66,True,True,False,False,male
"Nye, Mrs. (Elizabeth Ramell)",0,False,F33,29.0,67,True,False,True,True,female
"Crease, Mr. Ernest James",0,True,,19.0,68,False,False,False,True,male
"Andersson, Miss. Erna Alexandra",4,False,,17.0,69,True,True,True,False,female
"Kink, Mr. Vincenz",2,True,,26.0,70,False,True,False,True,male
"Jenkin, Mr. Stephen Curnow",0,True,,32.0,71,False,False,False,True,male
"Goodwin, Miss. Lillian Amy",5,False,,16.0,72,False,True,True,False,female
"Hood, Mr. Ambrose Jr",0,True,,21.0,73,False,False,False,True,male
"Chronopoulos, Mr. Apostolos",1,True,,26.0,74,False,True,False,True,male
"Bing, Mr. Lee",0,True,,32.0,75,True,False,False,True,male
"Moen, Mr. Sigurd Hansen",0,True,F G73,25.0,76,False,False,False,True,male
"Staneff, Mr. Ivan",0,True,,,77,False,False,False,True,male
"Moutal, Mr. Rahamin Haim",0,True,,,78,False,False,False,True,male
"Caldwell, Master. Alden Gates",0,True,,0.83,79,True,False,False,False,male
"Dowdell, Miss. Elizabeth",0,False,,30.0,80,True,False,True,False,female
age,cabin,name,sibsp,survived,sex,has_siblings,is_male,is_mr,is_female,passenger_id
22.0,,"Sirayanian, Mr. Orsen",0,False,male,False,True,True,False,61
38.0,B28,"Icard, Miss. Amelie",0,True,female,False,False,False,True,62
45.0,C83,"Harris, Mr. Henry Birkhardt",1,False,male,True,True,True,False,63
4.0,,"Skoog, Master. Harald",3,False,male,True,True,False,False,64
,,"Stewart, Mr. Albert A",0,False,male,False,True,True,False,65
,,"Moubarek, Master. Gerios",1,True,male,True,True,False,False,66
29.0,F33,"Nye, Mrs. (Elizabeth Ramell)",0,True,female,False,False,True,True,67
19.0,,"Crease, Mr. Ernest James",0,False,male,False,True,True,False,68
17.0,,"Andersson, Miss. Erna Alexandra",4,True,female,True,False,False,True,69
26.0,,"Kink, Mr. Vincenz",2,False,male,True,True,True,False,70
32.0,,"Jenkin, Mr. Stephen Curnow",0,False,male,False,True,True,False,71
16.0,,"Goodwin, Miss. Lillian Amy",5,False,female,True,False,False,True,72
21.0,,"Hood, Mr. Ambrose Jr",0,False,male,False,True,True,False,73
26.0,,"Chronopoulos, Mr. Apostolos",1,False,male,True,True,True,False,74
32.0,,"Bing, Mr. Lee",0,True,male,False,True,True,False,75
25.0,F G73,"Moen, Mr. Sigurd Hansen",0,False,male,False,True,True,False,76
,,"Staneff, Mr. Ivan",0,False,male,False,True,True,False,77
,,"Moutal, Mr. Rahamin Haim",0,False,male,False,True,True,False,78
0.83,,"Caldwell, Master. Alden Gates",0,True,male,False,True,False,False,79
30.0,,"Dowdell, Miss. Elizabeth",0,True,female,False,False,False,True,80
Loading

0 comments on commit 413655e

Please sign in to comment.