Skip to content

Commit

Permalink
Added recommendation target
Browse files Browse the repository at this point in the history
  • Loading branch information
MatsMoll committed Jan 16, 2024
1 parent 2155a6c commit b12b722
Show file tree
Hide file tree
Showing 14 changed files with 185 additions and 117 deletions.
37 changes: 37 additions & 0 deletions aligned/compiler/feature_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
from aligned.schemas.target import ClassificationTarget as ClassificationTargetSchemas
from aligned.schemas.target import ClassTargetProbability
from aligned.schemas.target import RegressionTarget as RegressionTargetSchemas
from aligned.schemas.target import RecommendationTarget as RecommendationTargetSchemas
from aligned.schemas.transformation import EmbeddingModel, Transformation
from aligned.schemas.vector_storage import VectorStorage

Expand Down Expand Up @@ -171,6 +172,39 @@ def sort_key(x: tuple[int, FeatureFactory]) -> int:
return features, derived_features


@dataclass
class RecommendationTarget(FeatureReferencable):

feature: FeatureFactory
rank_feature: FeatureFactory | None = field(default=None)

_name: str | None = field(default=None)
_location: FeatureLocation | None = field(default=None)

def __set_name__(self, owner, name):
self._name = name

def feature_referance(self) -> FeatureReferance:
if not self._name:
raise ValueError('Missing name, can not create reference')
if not self._location:
raise ValueError('Missing location, can not create reference')
return FeatureReferance(self._name, self._location, self.feature.dtype)

def estemating_rank(self, feature: FeatureFactory) -> RecommendationTarget:
self.rank_feature = feature
return self

def compile(self) -> RecommendationTargetSchemas:
self_ref = self.feature_referance()

return RecommendationTargetSchemas(
self.feature.feature_referance(),
feature=self_ref.as_feature(),
estimating_rank=self.rank_feature.feature_referance() if self.rank_feature else None,
)


@dataclass
class RegressionLabel(FeatureReferencable):
feature: FeatureFactory
Expand Down Expand Up @@ -375,6 +409,9 @@ def as_classification_label(self) -> ClassificationLabel:
def as_regression_label(self) -> RegressionLabel:
return RegressionLabel(self)

def as_recommendation_target(self) -> RecommendationTarget:
return RecommendationTarget(self)

def compile(self) -> DerivedFeature:

if not self.transformation:
Expand Down
5 changes: 5 additions & 0 deletions aligned/compiler/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
EventTimestamp,
FeatureFactory,
FeatureReferencable,
RecommendationTarget,
RegressionLabel,
TargetProbability,
ModelVersion,
Expand Down Expand Up @@ -203,6 +204,7 @@ class MyModel(ModelContract):
stream_source=metadata.prediction_stream,
classification_targets=set(),
regression_targets=set(),
recommendation_targets=set(),
)
probability_features: dict[str, set[TargetProbability]] = {}

Expand Down Expand Up @@ -261,6 +263,8 @@ class MyModel(ModelContract):
probability_features[feature_name] = probability_features.get(feature_name, set()).union(
{feature}
)
elif isinstance(feature, RecommendationTarget):
inference_view.recommendation_targets.add(feature.compile())
elif isinstance(feature, Entity):
inference_view.entities.add(feature.feature())
elif isinstance(feature, FeatureFactory):
Expand Down Expand Up @@ -299,4 +303,5 @@ class MyModel(ModelContract):
tags=metadata.tags,
description=metadata.description,
dataset_store=metadata.dataset_store,
exposed_at_url=metadata.exposed_at_url,
)
8 changes: 2 additions & 6 deletions aligned/feature_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
from prometheus_client import Histogram

from aligned.compiler.model import ModelContractWrapper
from aligned.schemas.model import FeatureInputVersions
from aligned.data_file import DataFileReference, upsert_on_column
from aligned.data_source.batch_data_source import BatchDataSource
from aligned.enricher import Enricher
Expand Down Expand Up @@ -736,11 +735,8 @@ def location(self) -> FeatureLocation:

def raw_string_features(self, except_features: set[str]) -> set[str]:

if isinstance(self.model.features, FeatureInputVersions):
version = self.selected_version or self.model.features.default_version
features = self.model.features.features_for(version)
else:
features = self.model.features
version = self.selected_version or self.model.features.default_version
features = self.model.features.features_for(version)

return {
f'{feature.location.identifier}:{feature.name}'
Expand Down
16 changes: 16 additions & 0 deletions aligned/schemas/feature.py
Original file line number Diff line number Diff line change
Expand Up @@ -286,6 +286,13 @@ def renamed(self, new_name: str) -> Feature:
constraints=self.constraints,
)

def as_reference(self, location: FeatureLocation) -> FeatureReferance:
return FeatureReferance(
name=self.name,
location=location,
dtype=self.dtype,
)

def __hash__(self) -> int:
return hash(self.name)

Expand Down Expand Up @@ -354,6 +361,15 @@ class FeatureReferance(Codable):
dtype: FeatureType
# is_derived: bool

def as_feature(self) -> Feature:
return Feature(
name=self.name,
dtype=self.dtype,
description=None,
tags=None,
constraints=None,
)

def __hash__(self) -> int:
return hash(self.name)

Expand Down
12 changes: 7 additions & 5 deletions aligned/schemas/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from aligned.schemas.feature import EventTimestamp, Feature, FeatureReferance
from aligned.data_source.stream_data_source import StreamDataSource
from aligned.schemas.event_trigger import EventTrigger
from aligned.schemas.target import ClassificationTarget, RegressionTarget
from aligned.schemas.target import ClassificationTarget, RecommendationTarget, RegressionTarget
from aligned.schemas.derivied_feature import DerivedFeature
from aligned.data_source.batch_data_source import BatchDataSource
from aligned.schemas.folder import DatasetStore
Expand Down Expand Up @@ -57,6 +57,7 @@ class PredictionsView(Codable):

regression_targets: set[RegressionTarget] | None = field(default=None)
classification_targets: set[ClassificationTarget] | None = field(default=None)
recommendation_targets: set[RecommendationTarget] | None = field(default=None)

@property
def full_schema(self) -> set[Feature]:
Expand Down Expand Up @@ -110,6 +111,8 @@ def labels_estimates_refs(self) -> set[FeatureReferance]:
return {feature.estimating for feature in self.classification_targets}
elif self.regression_targets:
return {feature.estimating for feature in self.regression_targets}
elif self.recommendation_targets:
return {feature.estimating for feature in self.recommendation_targets}
else:
raise ValueError('Found no targets in the model')

Expand All @@ -118,6 +121,8 @@ def labels(self) -> set[Feature]:
return {feature.feature for feature in self.classification_targets}
elif self.regression_targets:
return {feature.feature for feature in self.regression_targets}
elif self.recommendation_targets:
return {feature.feature for feature in self.recommendation_targets}
else:
raise ValueError('Found no targets in the model')

Expand All @@ -137,10 +142,7 @@ def __hash__(self) -> int:
return self.name.__hash__()

def feature_references(self, version: str | None = None) -> set[FeatureReferance]:
if isinstance(self.features, FeatureInputVersions):
return set(self.features.features_for(version or self.features.default_version))
else:
return self.features
return set(self.features.features_for(version or self.features.default_version))

@property
def request_all_predictions(self) -> FeatureRequest:
Expand Down
12 changes: 12 additions & 0 deletions aligned/schemas/target.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,3 +47,15 @@ class ClassificationTarget(Codable):

def __hash__(self) -> int:
return self.feature.name.__hash__()


@dataclass
class RecommendationTarget(Codable):

estimating: FeatureReferance
feature: Feature

estimating_rank: FeatureReferance | None = field(default=None)

def __hash__(self) -> int:
return self.feature.name.__hash__()
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "aligned"
version = "0.0.61"
version = "0.0.62"
description = "A data managment and lineage tool for ML applications."
authors = ["Mats E. Mollestad <mats@mollestad.no>"]
license = "Apache-2.0"
Expand Down
Binary file modified test_data/credit_history_mater.parquet
Binary file not shown.
2 changes: 1 addition & 1 deletion test_data/feature-store.json

Large diffs are not rendered by default.

Binary file modified test_data/test_model.parquet
Binary file not shown.
2 changes: 1 addition & 1 deletion test_data/titanic-sets.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"raw_data": [], "train_test": [], "train_test_validation": [{"id": "titanic_test", "name": null, "request_result": {"entities": [{"name": "passenger_id", "dtype": {"name": "int32"}, "description": null, "tags": null, "constraints": null}], "features": [{"name": "survived", "dtype": {"name": "bool"}, "description": "If the passenger survived", "tags": null, "constraints": null}, {"name": "is_mr", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "name", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "contains", "dtype": {"name": "bool"}, "key": "name", "value": "Mr."}, "depth": 1}, {"name": "age", "dtype": {"name": "float"}, "description": "A float as some have decimals", "tags": null, "constraints": [{"name": "lower_bound_inc", "value": 0.0}, {"name": "upper_bound_inc", "value": 100.0}]}, {"name": "cabin", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": [{"name": "optional"}]}, {"name": "name", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": [{"name": "optional"}]}, {"name": "sibsp", "dtype": {"name": "int32"}, "description": "Number of siblings on titanic", "tags": null, "constraints": [{"name": "lower_bound_inc", "value": 0.0}, {"name": "optional"}, {"name": "upper_bound_inc", "value": 20.0}]}, {"name": "is_male", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sex", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "equals", "dtype": {"name": "bool"}, "key": "sex", "value": {"name": "string", "value": "male"}}, "depth": 1}, {"name": "is_female", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sex", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "equals", "dtype": {"name": "bool"}, "key": "sex", "value": {"name": "string", "value": "female"}}, "depth": 1}, {"name": "has_siblings", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sibsp", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "int32"}}], "transformation": {"name": "not-equals", "dtype": {"name": "bool"}, "key": "sibsp", "value": {"name": "int", "value": 0}}, "depth": 1}, {"name": "sex", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": [{"name": "in_domain", "values": ["male", "female"]}, {"name": "optional"}]}], "event_timestamp": null}, "train_dataset": {"mapping_keys": {}, "type_name": "csv", "path": "test_data/titanic-train.csv", "csv_config": {"seperator": ",", "compression": "infer", "should_write_index": false}}, "test_dataset": {"mapping_keys": {}, "type_name": "csv", "path": "test_data/titanic-test.csv", "csv_config": {"seperator": ",", "compression": "infer", "should_write_index": false}}, "validation_dataset": {"mapping_keys": {}, "type_name": "csv", "path": "test_data/titanic-validate.csv", "csv_config": {"seperator": ",", "compression": "infer", "should_write_index": false}}, "train_size_fraction": 0.6, "test_size_fraction": 0.20000000000000007, "validate_size_fraction": 0.19999999999999996, "target": ["survived"], "description": null, "tags": null}], "active_learning": []}
{"raw_data": [], "train_test": [], "train_test_validation": [{"id": "titanic_test", "name": null, "request_result": {"entities": [{"name": "passenger_id", "dtype": {"name": "int32"}, "description": null, "tags": null, "constraints": null}], "features": [{"name": "survived", "dtype": {"name": "bool"}, "description": "If the passenger survived", "tags": null, "constraints": null}, {"name": "has_siblings", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sibsp", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "int32"}}], "transformation": {"name": "not-equals", "dtype": {"name": "bool"}, "key": "sibsp", "value": {"name": "int", "value": 0}}, "depth": 1}, {"name": "sex", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": [{"name": "optional"}, {"name": "in_domain", "values": ["male", "female"]}]}, {"name": "sibsp", "dtype": {"name": "int32"}, "description": "Number of siblings on titanic", "tags": null, "constraints": [{"name": "upper_bound_inc", "value": 20.0}, {"name": "optional"}, {"name": "lower_bound_inc", "value": 0.0}]}, {"name": "is_male", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sex", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "equals", "dtype": {"name": "bool"}, "key": "sex", "value": {"name": "string", "value": "male"}}, "depth": 1}, {"name": "name", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": [{"name": "optional"}]}, {"name": "is_mr", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "name", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "contains", "dtype": {"name": "bool"}, "key": "name", "value": "Mr."}, "depth": 1}, {"name": "age", "dtype": {"name": "float"}, "description": "A float as some have decimals", "tags": null, "constraints": [{"name": "upper_bound_inc", "value": 100.0}, {"name": "lower_bound_inc", "value": 0.0}]}, {"name": "is_female", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sex", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "equals", "dtype": {"name": "bool"}, "key": "sex", "value": {"name": "string", "value": "female"}}, "depth": 1}, {"name": "cabin", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": [{"name": "optional"}]}], "event_timestamp": null}, "train_dataset": {"mapping_keys": {}, "type_name": "csv", "path": "test_data/titanic-train.csv", "csv_config": {"seperator": ",", "compression": "infer", "should_write_index": false}}, "test_dataset": {"mapping_keys": {}, "type_name": "csv", "path": "test_data/titanic-test.csv", "csv_config": {"seperator": ",", "compression": "infer", "should_write_index": false}}, "validation_dataset": {"mapping_keys": {}, "type_name": "csv", "path": "test_data/titanic-validate.csv", "csv_config": {"seperator": ",", "compression": "infer", "should_write_index": false}}, "train_size_fraction": 0.6, "test_size_fraction": 0.20000000000000007, "validate_size_fraction": 0.19999999999999996, "target": ["survived"], "description": null, "tags": null}], "active_learning": []}
42 changes: 21 additions & 21 deletions test_data/titanic-test.csv
Original file line number Diff line number Diff line change
@@ -1,21 +1,21 @@
passenger_id,has_siblings,is_mr,age,cabin,name,sibsp,is_male,is_female,survived,sex
61,False,True,22.0,,"Sirayanian, Mr. Orsen",0,True,False,False,male
62,False,False,38.0,B28,"Icard, Miss. Amelie",0,False,True,True,female
63,True,True,45.0,C83,"Harris, Mr. Henry Birkhardt",1,True,False,False,male
64,True,False,4.0,,"Skoog, Master. Harald",3,True,False,False,male
65,False,True,,,"Stewart, Mr. Albert A",0,True,False,False,male
66,True,False,,,"Moubarek, Master. Gerios",1,True,False,True,male
67,False,True,29.0,F33,"Nye, Mrs. (Elizabeth Ramell)",0,False,True,True,female
68,False,True,19.0,,"Crease, Mr. Ernest James",0,True,False,False,male
69,True,False,17.0,,"Andersson, Miss. Erna Alexandra",4,False,True,True,female
70,True,True,26.0,,"Kink, Mr. Vincenz",2,True,False,False,male
71,False,True,32.0,,"Jenkin, Mr. Stephen Curnow",0,True,False,False,male
72,True,False,16.0,,"Goodwin, Miss. Lillian Amy",5,False,True,False,female
73,False,True,21.0,,"Hood, Mr. Ambrose Jr",0,True,False,False,male
74,True,True,26.0,,"Chronopoulos, Mr. Apostolos",1,True,False,False,male
75,False,True,32.0,,"Bing, Mr. Lee",0,True,False,True,male
76,False,True,25.0,F G73,"Moen, Mr. Sigurd Hansen",0,True,False,False,male
77,False,True,,,"Staneff, Mr. Ivan",0,True,False,False,male
78,False,True,,,"Moutal, Mr. Rahamin Haim",0,True,False,False,male
79,False,False,0.83,,"Caldwell, Master. Alden Gates",0,True,False,True,male
80,False,False,30.0,,"Dowdell, Miss. Elizabeth",0,False,True,True,female
survived,has_siblings,sex,sibsp,is_male,name,passenger_id,is_mr,age,is_female,cabin
False,False,male,0,True,"Sirayanian, Mr. Orsen",61,True,22.0,False,
True,False,female,0,False,"Icard, Miss. Amelie",62,False,38.0,True,B28
False,True,male,1,True,"Harris, Mr. Henry Birkhardt",63,True,45.0,False,C83
False,True,male,3,True,"Skoog, Master. Harald",64,False,4.0,False,
False,False,male,0,True,"Stewart, Mr. Albert A",65,True,,False,
True,True,male,1,True,"Moubarek, Master. Gerios",66,False,,False,
True,False,female,0,False,"Nye, Mrs. (Elizabeth Ramell)",67,True,29.0,True,F33
False,False,male,0,True,"Crease, Mr. Ernest James",68,True,19.0,False,
True,True,female,4,False,"Andersson, Miss. Erna Alexandra",69,False,17.0,True,
False,True,male,2,True,"Kink, Mr. Vincenz",70,True,26.0,False,
False,False,male,0,True,"Jenkin, Mr. Stephen Curnow",71,True,32.0,False,
False,True,female,5,False,"Goodwin, Miss. Lillian Amy",72,False,16.0,True,
False,False,male,0,True,"Hood, Mr. Ambrose Jr",73,True,21.0,False,
False,True,male,1,True,"Chronopoulos, Mr. Apostolos",74,True,26.0,False,
True,False,male,0,True,"Bing, Mr. Lee",75,True,32.0,False,
False,False,male,0,True,"Moen, Mr. Sigurd Hansen",76,True,25.0,False,F G73
False,False,male,0,True,"Staneff, Mr. Ivan",77,True,,False,
False,False,male,0,True,"Moutal, Mr. Rahamin Haim",78,True,,False,
True,False,male,0,True,"Caldwell, Master. Alden Gates",79,False,0.83,False,
True,False,female,0,False,"Dowdell, Miss. Elizabeth",80,False,30.0,True,
Loading

0 comments on commit b12b722

Please sign in to comment.