Skip to content

Commit

Permalink
Minor fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
MatsMoll committed Jan 28, 2024
1 parent 84203c6 commit 42173c9
Show file tree
Hide file tree
Showing 11 changed files with 120 additions and 116 deletions.
2 changes: 2 additions & 0 deletions aligned/retrival_job.py
Original file line number Diff line number Diff line change
Expand Up @@ -654,6 +654,8 @@ def fill_missing_columns(self) -> RetrivalJob:
return FillMissingColumnsJob(self)

def rename(self, mappings: dict[str, str]) -> RetrivalJob:
if not mappings:
return self
return RenameJob(self, mappings)

def drop_duplicate_entities(self) -> RetrivalJob:
Expand Down
14 changes: 9 additions & 5 deletions aligned/schemas/feature_view.py
Original file line number Diff line number Diff line change
Expand Up @@ -316,6 +316,7 @@ def __hash__(self) -> int:
class FeatureViewReferenceSource(BatchDataSource):

view: CompiledFeatureView
renames: dict[str, str] = field(default_factory=dict)

type_name = 'view_ref'

Expand Down Expand Up @@ -373,7 +374,7 @@ def multi_source_features_for(
else:
available_features = sub_job.derive_features([request])

return FileFactualJob(available_features, [request], facts)
return FileFactualJob(available_features, [request], facts).rename(source.renames)

def all_data(self, request: RetrivalRequest, limit: int | None) -> RetrivalJob:
sub_source = self.view.materialized_source or self.view.source
Expand All @@ -382,9 +383,11 @@ def all_data(self, request: RetrivalRequest, limit: int | None) -> RetrivalJob:

core_job = sub_source.all_data(sub_req, limit=limit)
if request.aggregated_features:
return core_job.aggregate(request).derive_features([request])
job = core_job.aggregate(request)
else:
return core_job.derive_features().derive_features([request])
job = core_job.derive_features()

return job.derive_features([request]).rename(self.renames)

def all_between_dates(
self, request: RetrivalRequest, start_date: datetime, end_date: datetime
Expand All @@ -395,9 +398,10 @@ def all_between_dates(

core_job = sub_source.all_between_dates(sub_req, start_date, end_date)
if request.aggregated_features:
return core_job.aggregate(request).derive_features([request])
job = core_job.aggregate(request)
else:
return core_job.derive_features().derive_features([request])
job = core_job.derive_features()
return job.derive_features([request]).rename(self.renames)

def depends_on(self) -> set[FeatureLocation]:
return {FeatureLocation.feature_view(self.view.name)}
Expand Down
8 changes: 3 additions & 5 deletions aligned/schemas/text_vectoriser.py
Original file line number Diff line number Diff line change
Expand Up @@ -275,11 +275,9 @@ async def load_model(self):
async def vectorise_polars(self, texts: pl.LazyFrame, text_key: str, output_key: str) -> pl.LazyFrame:
if self.loaded_model is None:
await self.load_model()
return texts.with_columns(
pl.Series(
self.loaded_model.encode(texts.select(pl.col(text_key)).collect().to_series().to_list())
).alias(output_key)
)
return pl.Series(
self.loaded_model.encode(texts.select(pl.col(text_key)).collect().to_series().to_list())
).alias(output_key)

async def vectorise_pandas(self, texts: pd.Series) -> pd.Series:
if self.loaded_model is None:
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "aligned"
version = "0.0.63"
version = "0.0.64"
description = "A data managment and lineage tool for ML applications."
authors = ["Mats E. Mollestad <mats@mollestad.no>"]
license = "Apache-2.0"
Expand Down
Binary file modified test_data/credit_history_mater.parquet
Binary file not shown.
2 changes: 1 addition & 1 deletion test_data/feature-store.json

Large diffs are not rendered by default.

Binary file modified test_data/test_model.parquet
Binary file not shown.
2 changes: 1 addition & 1 deletion test_data/titanic-sets.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"raw_data": [], "train_test": [], "train_test_validation": [{"id": "titanic_test", "name": null, "request_result": {"entities": [{"name": "passenger_id", "dtype": {"name": "int32"}, "description": null, "tags": null, "constraints": null}], "features": [{"name": "survived", "dtype": {"name": "bool"}, "description": "If the passenger survived", "tags": null, "constraints": null}, {"name": "has_siblings", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sibsp", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "int32"}}], "transformation": {"name": "not-equals", "dtype": {"name": "bool"}, "key": "sibsp", "value": {"name": "int", "value": 0}}, "depth": 1}, {"name": "sex", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": [{"name": "optional"}, {"name": "in_domain", "values": ["male", "female"]}]}, {"name": "sibsp", "dtype": {"name": "int32"}, "description": "Number of siblings on titanic", "tags": null, "constraints": [{"name": "upper_bound_inc", "value": 20.0}, {"name": "optional"}, {"name": "lower_bound_inc", "value": 0.0}]}, {"name": "is_male", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sex", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "equals", "dtype": {"name": "bool"}, "key": "sex", "value": {"name": "string", "value": "male"}}, "depth": 1}, {"name": "name", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": [{"name": "optional"}]}, {"name": "is_mr", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "name", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "contains", "dtype": {"name": "bool"}, "key": "name", "value": "Mr."}, "depth": 1}, {"name": "age", "dtype": {"name": "float"}, "description": "A float as some have decimals", "tags": null, "constraints": [{"name": "upper_bound_inc", "value": 100.0}, {"name": "lower_bound_inc", "value": 0.0}]}, {"name": "is_female", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sex", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "equals", "dtype": {"name": "bool"}, "key": "sex", "value": {"name": "string", "value": "female"}}, "depth": 1}, {"name": "cabin", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": [{"name": "optional"}]}], "event_timestamp": null}, "train_dataset": {"mapping_keys": {}, "type_name": "csv", "path": "test_data/titanic-train.csv", "csv_config": {"seperator": ",", "compression": "infer", "should_write_index": false}}, "test_dataset": {"mapping_keys": {}, "type_name": "csv", "path": "test_data/titanic-test.csv", "csv_config": {"seperator": ",", "compression": "infer", "should_write_index": false}}, "validation_dataset": {"mapping_keys": {}, "type_name": "csv", "path": "test_data/titanic-validate.csv", "csv_config": {"seperator": ",", "compression": "infer", "should_write_index": false}}, "train_size_fraction": 0.6, "test_size_fraction": 0.20000000000000007, "validate_size_fraction": 0.19999999999999996, "target": ["survived"], "description": null, "tags": null}], "active_learning": []}
{"raw_data": [], "train_test": [], "train_test_validation": [{"id": "titanic_test", "name": null, "request_result": {"entities": [{"name": "passenger_id", "dtype": {"name": "int32"}, "description": null, "tags": null, "constraints": null}], "features": [{"name": "has_siblings", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sibsp", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "int32"}}], "transformation": {"name": "not-equals", "dtype": {"name": "bool"}, "key": "sibsp", "value": {"name": "int", "value": 0}}, "depth": 1}, {"name": "is_female", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sex", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "equals", "dtype": {"name": "bool"}, "key": "sex", "value": {"name": "string", "value": "female"}}, "depth": 1}, {"name": "is_male", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sex", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "equals", "dtype": {"name": "bool"}, "key": "sex", "value": {"name": "string", "value": "male"}}, "depth": 1}, {"name": "name", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": [{"name": "optional"}]}, {"name": "sibsp", "dtype": {"name": "int32"}, "description": "Number of siblings on titanic", "tags": null, "constraints": [{"name": "lower_bound_inc", "value": 0.0}, {"name": "upper_bound_inc", "value": 20.0}, {"name": "optional"}]}, {"name": "sex", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": [{"name": "optional"}, {"name": "in_domain", "values": ["male", "female"]}]}, {"name": "cabin", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": [{"name": "optional"}]}, {"name": "survived", "dtype": {"name": "bool"}, "description": "If the passenger survived", "tags": null, "constraints": null}, {"name": "is_mr", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "name", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "contains", "dtype": {"name": "bool"}, "key": "name", "value": "Mr."}, "depth": 1}, {"name": "age", "dtype": {"name": "float"}, "description": "A float as some have decimals", "tags": null, "constraints": [{"name": "lower_bound_inc", "value": 0.0}, {"name": "upper_bound_inc", "value": 100.0}]}], "event_timestamp": null}, "train_dataset": {"mapping_keys": {}, "type_name": "csv", "path": "test_data/titanic-train.csv", "csv_config": {"seperator": ",", "compression": "infer", "should_write_index": false}}, "test_dataset": {"mapping_keys": {}, "type_name": "csv", "path": "test_data/titanic-test.csv", "csv_config": {"seperator": ",", "compression": "infer", "should_write_index": false}}, "validation_dataset": {"mapping_keys": {}, "type_name": "csv", "path": "test_data/titanic-validate.csv", "csv_config": {"seperator": ",", "compression": "infer", "should_write_index": false}}, "train_size_fraction": 0.6, "test_size_fraction": 0.20000000000000007, "validate_size_fraction": 0.19999999999999996, "target": ["survived"], "description": null, "tags": null}], "active_learning": []}
42 changes: 21 additions & 21 deletions test_data/titanic-test.csv
Original file line number Diff line number Diff line change
@@ -1,21 +1,21 @@
survived,has_siblings,sex,sibsp,is_male,name,passenger_id,is_mr,age,is_female,cabin
False,False,male,0,True,"Sirayanian, Mr. Orsen",61,True,22.0,False,
True,False,female,0,False,"Icard, Miss. Amelie",62,False,38.0,True,B28
False,True,male,1,True,"Harris, Mr. Henry Birkhardt",63,True,45.0,False,C83
False,True,male,3,True,"Skoog, Master. Harald",64,False,4.0,False,
False,False,male,0,True,"Stewart, Mr. Albert A",65,True,,False,
True,True,male,1,True,"Moubarek, Master. Gerios",66,False,,False,
True,False,female,0,False,"Nye, Mrs. (Elizabeth Ramell)",67,True,29.0,True,F33
False,False,male,0,True,"Crease, Mr. Ernest James",68,True,19.0,False,
True,True,female,4,False,"Andersson, Miss. Erna Alexandra",69,False,17.0,True,
False,True,male,2,True,"Kink, Mr. Vincenz",70,True,26.0,False,
False,False,male,0,True,"Jenkin, Mr. Stephen Curnow",71,True,32.0,False,
False,True,female,5,False,"Goodwin, Miss. Lillian Amy",72,False,16.0,True,
False,False,male,0,True,"Hood, Mr. Ambrose Jr",73,True,21.0,False,
False,True,male,1,True,"Chronopoulos, Mr. Apostolos",74,True,26.0,False,
True,False,male,0,True,"Bing, Mr. Lee",75,True,32.0,False,
False,False,male,0,True,"Moen, Mr. Sigurd Hansen",76,True,25.0,False,F G73
False,False,male,0,True,"Staneff, Mr. Ivan",77,True,,False,
False,False,male,0,True,"Moutal, Mr. Rahamin Haim",78,True,,False,
True,False,male,0,True,"Caldwell, Master. Alden Gates",79,False,0.83,False,
True,False,female,0,False,"Dowdell, Miss. Elizabeth",80,False,30.0,True,
has_siblings,is_female,is_male,name,sibsp,sex,cabin,survived,is_mr,passenger_id,age
False,False,True,"Sirayanian, Mr. Orsen",0,male,,False,True,61,22.0
False,True,False,"Icard, Miss. Amelie",0,female,B28,True,False,62,38.0
True,False,True,"Harris, Mr. Henry Birkhardt",1,male,C83,False,True,63,45.0
True,False,True,"Skoog, Master. Harald",3,male,,False,False,64,4.0
False,False,True,"Stewart, Mr. Albert A",0,male,,False,True,65,
True,False,True,"Moubarek, Master. Gerios",1,male,,True,False,66,
False,True,False,"Nye, Mrs. (Elizabeth Ramell)",0,female,F33,True,True,67,29.0
False,False,True,"Crease, Mr. Ernest James",0,male,,False,True,68,19.0
True,True,False,"Andersson, Miss. Erna Alexandra",4,female,,True,False,69,17.0
True,False,True,"Kink, Mr. Vincenz",2,male,,False,True,70,26.0
False,False,True,"Jenkin, Mr. Stephen Curnow",0,male,,False,True,71,32.0
True,True,False,"Goodwin, Miss. Lillian Amy",5,female,,False,False,72,16.0
False,False,True,"Hood, Mr. Ambrose Jr",0,male,,False,True,73,21.0
True,False,True,"Chronopoulos, Mr. Apostolos",1,male,,False,True,74,26.0
False,False,True,"Bing, Mr. Lee",0,male,,True,True,75,32.0
False,False,True,"Moen, Mr. Sigurd Hansen",0,male,F G73,False,True,76,25.0
False,False,True,"Staneff, Mr. Ivan",0,male,,False,True,77,
False,False,True,"Moutal, Mr. Rahamin Haim",0,male,,False,True,78,
False,False,True,"Caldwell, Master. Alden Gates",0,male,,True,False,79,0.83
False,True,False,"Dowdell, Miss. Elizabeth",0,female,,True,False,80,30.0
Loading

0 comments on commit 42173c9

Please sign in to comment.