Skip to content

Commit

Permalink
Updated polars
Browse files Browse the repository at this point in the history
  • Loading branch information
MatsMoll committed Jan 6, 2024
1 parent b85860b commit 7ffc51c
Show file tree
Hide file tree
Showing 13 changed files with 522 additions and 1,043 deletions.
12 changes: 5 additions & 7 deletions aligned/schemas/transformation.py
Original file line number Diff line number Diff line change
Expand Up @@ -643,12 +643,10 @@ async def transform_polars(self, df: pl.LazyFrame, alias: str) -> pl.LazyFrame |

@staticmethod
def test_definition() -> TransformationTestDefinition:
from numpy import nan

return TransformationTestDefinition(
GreaterThenValue(key='x', value=2),
input={'x': [1, 2, 3, nan]},
output=[False, False, True, False],
input={'x': [1, 2, 3]},
output=[False, False, True],
)


Expand All @@ -673,8 +671,8 @@ def test_definition() -> TransformationTestDefinition:

return TransformationTestDefinition(
GreaterThen(left_key='x', right_key='y'),
input={'x': [1, 2, 3, nan, 5], 'y': [3, 2, 1, 5, nan]},
output=[False, False, True, False, False],
input={'x': [1, 2, 3, 5], 'y': [3, 2, 1, nan]},
output=[False, False, True, False],
)


Expand Down Expand Up @@ -2149,7 +2147,7 @@ async def transform_pandas(self, df: pd.DataFrame) -> pd.Series:

async def transform_polars(self, df: pl.LazyFrame, alias: str) -> pl.LazyFrame | pl.Expr:
if df.schema[self.key].is_(pl.Utf8):
return await JsonPath(self.key, self.field).transform_polars(df, alias)
return await JsonPath(self.key, f'$.{self.field}').transform_polars(df, alias)
else:
return pl.col(self.key).struct.field(self.field).alias(alias)

Expand Down
1,339 changes: 410 additions & 929 deletions poetry.lock

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "aligned"
version = "0.0.59"
version = "0.0.60"
description = "A data managment and lineage tool for ML applications."
authors = ["Mats E. Mollestad <mats@mollestad.no>"]
license = "Apache-2.0"
Expand Down Expand Up @@ -60,7 +60,7 @@ prometheus_client = "^0.16.0"
asgi-correlation-id = { version = "^3.0.0", optional = true }
pandera = { version = "^0.17.0", optional = true}
httpx = "^0.23.0"
polars = { version = "^0.18.0", extras = ["all"] }
polars = { version = "^0.20.0" }
pillow = { version = "^9.4.0", optional = true }
prometheus-fastapi-instrumentator = { version="^5.9.1", optional = true }
# gensim = { version = "4.3.0", optional = true }
Expand Down
Binary file modified test_data/credit_history.parquet
Binary file not shown.
Binary file modified test_data/credit_history_agg.parquet
Binary file not shown.
Binary file modified test_data/credit_history_mater.parquet
Binary file not shown.
2 changes: 1 addition & 1 deletion test_data/feature-store.json

Large diffs are not rendered by default.

Binary file modified test_data/loan.parquet
Binary file not shown.
Binary file modified test_data/test_model.parquet
Binary file not shown.
2 changes: 1 addition & 1 deletion test_data/titanic-sets.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"raw_data": [], "train_test": [], "train_test_validation": [{"id": "titanic_test", "name": null, "request_result": {"entities": [{"name": "passenger_id", "dtype": {"name": "int32"}, "description": null, "tags": null, "constraints": null}], "features": [{"name": "sex", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": [{"name": "in_domain", "values": ["male", "female"]}, {"name": "optional"}]}, {"name": "is_mr", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "name", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "contains", "dtype": {"name": "bool"}, "key": "name", "value": "Mr."}, "depth": 1}, {"name": "survived", "dtype": {"name": "bool"}, "description": "If the passenger survived", "tags": null, "constraints": null}, {"name": "has_siblings", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sibsp", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "int32"}}], "transformation": {"name": "not-equals", "dtype": {"name": "bool"}, "key": "sibsp", "value": {"name": "int", "value": 0}}, "depth": 1}, {"name": "sibsp", "dtype": {"name": "int32"}, "description": "Number of siblings on titanic", "tags": null, "constraints": [{"name": "upper_bound_inc", "value": 20.0}, {"name": "lower_bound_inc", "value": 0.0}, {"name": "optional"}]}, {"name": "name", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": [{"name": "optional"}]}, {"name": "age", "dtype": {"name": "float"}, "description": "A float as some have decimals", "tags": null, "constraints": [{"name": "upper_bound_inc", "value": 100.0}, {"name": "lower_bound_inc", "value": 0.0}]}, {"name": "is_male", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sex", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "equals", "dtype": {"name": "bool"}, "key": "sex", "value": {"name": "string", "value": "male"}}, "depth": 1}, {"name": "cabin", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": [{"name": "optional"}]}, {"name": "is_female", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sex", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "equals", "dtype": {"name": "bool"}, "key": "sex", "value": {"name": "string", "value": "female"}}, "depth": 1}], "event_timestamp": null}, "train_dataset": {"mapping_keys": {}, "type_name": "csv", "path": "test_data/titanic-train.csv", "csv_config": {"seperator": ",", "compression": "infer", "should_write_index": false}}, "test_dataset": {"mapping_keys": {}, "type_name": "csv", "path": "test_data/titanic-test.csv", "csv_config": {"seperator": ",", "compression": "infer", "should_write_index": false}}, "validation_dataset": {"mapping_keys": {}, "type_name": "csv", "path": "test_data/titanic-validate.csv", "csv_config": {"seperator": ",", "compression": "infer", "should_write_index": false}}, "train_size_fraction": 0.6, "test_size_fraction": 0.20000000000000007, "validate_size_fraction": 0.19999999999999996, "target": ["survived"], "description": null, "tags": null}], "active_learning": []}
{"raw_data": [], "train_test": [], "train_test_validation": [{"id": "titanic_test", "name": null, "request_result": {"entities": [{"name": "passenger_id", "dtype": {"name": "int32"}, "description": null, "tags": null, "constraints": null}], "features": [{"name": "cabin", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": [{"name": "optional"}]}, {"name": "has_siblings", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sibsp", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "int32"}}], "transformation": {"name": "not-equals", "dtype": {"name": "bool"}, "key": "sibsp", "value": {"name": "int", "value": 0}}, "depth": 1}, {"name": "is_female", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sex", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "equals", "dtype": {"name": "bool"}, "key": "sex", "value": {"name": "string", "value": "female"}}, "depth": 1}, {"name": "is_mr", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "name", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "contains", "dtype": {"name": "bool"}, "key": "name", "value": "Mr."}, "depth": 1}, {"name": "name", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": [{"name": "optional"}]}, {"name": "survived", "dtype": {"name": "bool"}, "description": "If the passenger survived", "tags": null, "constraints": null}, {"name": "is_male", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sex", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "equals", "dtype": {"name": "bool"}, "key": "sex", "value": {"name": "string", "value": "male"}}, "depth": 1}, {"name": "sex", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": [{"name": "in_domain", "values": ["male", "female"]}, {"name": "optional"}]}, {"name": "sibsp", "dtype": {"name": "int32"}, "description": "Number of siblings on titanic", "tags": null, "constraints": [{"name": "upper_bound_inc", "value": 20.0}, {"name": "lower_bound_inc", "value": 0.0}, {"name": "optional"}]}, {"name": "age", "dtype": {"name": "float"}, "description": "A float as some have decimals", "tags": null, "constraints": [{"name": "upper_bound_inc", "value": 100.0}, {"name": "lower_bound_inc", "value": 0.0}]}], "event_timestamp": null}, "train_dataset": {"mapping_keys": {}, "type_name": "csv", "path": "test_data/titanic-train.csv", "csv_config": {"seperator": ",", "compression": "infer", "should_write_index": false}}, "test_dataset": {"mapping_keys": {}, "type_name": "csv", "path": "test_data/titanic-test.csv", "csv_config": {"seperator": ",", "compression": "infer", "should_write_index": false}}, "validation_dataset": {"mapping_keys": {}, "type_name": "csv", "path": "test_data/titanic-validate.csv", "csv_config": {"seperator": ",", "compression": "infer", "should_write_index": false}}, "train_size_fraction": 0.6, "test_size_fraction": 0.20000000000000007, "validate_size_fraction": 0.19999999999999996, "target": ["survived"], "description": null, "tags": null}], "active_learning": []}
42 changes: 21 additions & 21 deletions test_data/titanic-test.csv
Original file line number Diff line number Diff line change
@@ -1,21 +1,21 @@
is_male,sex,is_mr,survived,has_siblings,name,age,sibsp,cabin,is_female,passenger_id
True,male,True,False,False,"Sirayanian, Mr. Orsen",22.0,0,,False,61
False,female,False,True,False,"Icard, Miss. Amelie",38.0,0,B28,True,62
True,male,True,False,True,"Harris, Mr. Henry Birkhardt",45.0,1,C83,False,63
True,male,False,False,True,"Skoog, Master. Harald",4.0,3,,False,64
True,male,True,False,False,"Stewart, Mr. Albert A",,0,,False,65
True,male,False,True,True,"Moubarek, Master. Gerios",,1,,False,66
False,female,True,True,False,"Nye, Mrs. (Elizabeth Ramell)",29.0,0,F33,True,67
True,male,True,False,False,"Crease, Mr. Ernest James",19.0,0,,False,68
False,female,False,True,True,"Andersson, Miss. Erna Alexandra",17.0,4,,True,69
True,male,True,False,True,"Kink, Mr. Vincenz",26.0,2,,False,70
True,male,True,False,False,"Jenkin, Mr. Stephen Curnow",32.0,0,,False,71
False,female,False,False,True,"Goodwin, Miss. Lillian Amy",16.0,5,,True,72
True,male,True,False,False,"Hood, Mr. Ambrose Jr",21.0,0,,False,73
True,male,True,False,True,"Chronopoulos, Mr. Apostolos",26.0,1,,False,74
True,male,True,True,False,"Bing, Mr. Lee",32.0,0,,False,75
True,male,True,False,False,"Moen, Mr. Sigurd Hansen",25.0,0,F G73,False,76
True,male,True,False,False,"Staneff, Mr. Ivan",,0,,False,77
True,male,True,False,False,"Moutal, Mr. Rahamin Haim",,0,,False,78
True,male,False,True,False,"Caldwell, Master. Alden Gates",0.83,0,,False,79
False,female,False,True,False,"Dowdell, Miss. Elizabeth",30.0,0,,True,80
cabin,has_siblings,is_female,is_mr,name,survived,age,is_male,sex,sibsp,passenger_id
,False,False,True,"Sirayanian, Mr. Orsen",False,22.0,True,male,0,61
B28,False,True,False,"Icard, Miss. Amelie",True,38.0,False,female,0,62
C83,True,False,True,"Harris, Mr. Henry Birkhardt",False,45.0,True,male,1,63
,True,False,False,"Skoog, Master. Harald",False,4.0,True,male,3,64
,False,False,True,"Stewart, Mr. Albert A",False,,True,male,0,65
,True,False,False,"Moubarek, Master. Gerios",True,,True,male,1,66
F33,False,True,True,"Nye, Mrs. (Elizabeth Ramell)",True,29.0,False,female,0,67
,False,False,True,"Crease, Mr. Ernest James",False,19.0,True,male,0,68
,True,True,False,"Andersson, Miss. Erna Alexandra",True,17.0,False,female,4,69
,True,False,True,"Kink, Mr. Vincenz",False,26.0,True,male,2,70
,False,False,True,"Jenkin, Mr. Stephen Curnow",False,32.0,True,male,0,71
,True,True,False,"Goodwin, Miss. Lillian Amy",False,16.0,False,female,5,72
,False,False,True,"Hood, Mr. Ambrose Jr",False,21.0,True,male,0,73
,True,False,True,"Chronopoulos, Mr. Apostolos",False,26.0,True,male,1,74
,False,False,True,"Bing, Mr. Lee",True,32.0,True,male,0,75
F G73,False,False,True,"Moen, Mr. Sigurd Hansen",False,25.0,True,male,0,76
,False,False,True,"Staneff, Mr. Ivan",False,,True,male,0,77
,False,False,True,"Moutal, Mr. Rahamin Haim",False,,True,male,0,78
,False,False,False,"Caldwell, Master. Alden Gates",True,0.83,True,male,0,79
,False,True,False,"Dowdell, Miss. Elizabeth",True,30.0,False,female,0,80
Loading

0 comments on commit 7ffc51c

Please sign in to comment.