From acfba6fb6478dd7724102a29322075b103a9565e Mon Sep 17 00:00:00 2001 From: "Mats E. Mollestad" Date: Tue, 26 Dec 2023 00:16:31 +0100 Subject: [PATCH] Added feature versions --- aligned/__init__.py | 3 +- aligned/compiler/model.py | 26 ++++- aligned/feature_store.py | 14 ++- aligned/schemas/model.py | 16 +++- aligned/tests/test_models_as_feature.py | 26 ++++- pyproject.toml | 2 +- test_data/credit_history_mater.parquet | Bin 1016 -> 1018 bytes test_data/feature-store.json | 2 +- test_data/test_model.parquet | Bin 628 -> 624 bytes test_data/titanic-sets.json | 2 +- test_data/titanic-test.csv | 42 ++++---- test_data/titanic-train.csv | 122 ++++++++++++------------ test_data/titanic-validate.csv | 42 ++++---- 13 files changed, 183 insertions(+), 114 deletions(-) diff --git a/aligned/__init__.py b/aligned/__init__.py index 65146f6..25be87c 100644 --- a/aligned/__init__.py +++ b/aligned/__init__.py @@ -11,7 +11,7 @@ Timestamp, CustomAggregation, ) -from aligned.compiler.model import model_contract +from aligned.compiler.model import model_contract, FeatureInputVersions from aligned.data_source.stream_data_source import HttpStreamSource from aligned.feature_store import FeatureStore from aligned.feature_view import ( @@ -61,4 +61,5 @@ 'CustomAggregation', # Schemas 'FeatureLocation', + 'FeatureInputVersions', ] diff --git a/aligned/compiler/model.py b/aligned/compiler/model.py index 864d34c..545fdf6 100644 --- a/aligned/compiler/model.py +++ b/aligned/compiler/model.py @@ -26,6 +26,7 @@ from aligned.schemas.folder import DatasetStore, JsonDatasetStore from aligned.schemas.literal_value import LiteralValue from aligned.schemas.model import Model as ModelSchema +from aligned.schemas.model import FeatureInputVersions as FeatureVersionSchema from aligned.schemas.model import PredictionsView from aligned.schemas.target import ClassificationTarget as ClassificationTargetSchema from aligned.schemas.target import RegressionTarget as RegressionTargetSchema @@ -41,7 +42,7 @@ @dataclass class ModelMetadata: name: str - features: list[FeatureReferencable] + features: list[FeatureReferencable] | FeatureInputVersions # Will log the feature inputs to a model. Therefore, enabling log and wait etc. # feature_logger: WritableBatchSource | None = field(default=None) contacts: list[str] | None = field(default=None) @@ -120,9 +121,25 @@ def resolve_dataset_store(dataset_store: DatasetStore | StorageFileReference) -> return JsonDatasetStore(dataset_store) +@dataclass +class FeatureInputVersions: + + default_version: str + versions: dict[str, list[FeatureReferencable]] + + def compile(self) -> FeatureVersionSchema: + return FeatureVersionSchema( + default_version=self.default_version, + versions={ + version: [feature.feature_referance() for feature in features] + for version, features in self.versions.items() + }, + ) + + def model_contract( name: str, - features: list[FeatureReferencable], + features: list[FeatureReferencable] | FeatureInputVersions, contacts: list[str] | None = None, tags: dict[str, str] | None = None, description: str | None = None, @@ -240,7 +257,10 @@ class MyModel(ModelContract): inference_view.features.add(feature.feature()) # Needs to run after the feature views have compiled - features: set[FeatureReferance] = {feature.feature_referance() for feature in metadata.features} + if isinstance(metadata.features, FeatureInputVersions): + features = metadata.features.compile() + else: + features = {feature.feature_referance() for feature in metadata.features} for target, probabilities in probability_features.items(): from aligned.schemas.transformation import MapArgMax diff --git a/aligned/feature_store.py b/aligned/feature_store.py index 36f6cf0..e1ebff0 100644 --- a/aligned/feature_store.py +++ b/aligned/feature_store.py @@ -10,6 +10,7 @@ from prometheus_client import Histogram from aligned.compiler.model import ModelContractWrapper +from aligned.schemas.model import FeatureInputVersions from aligned.data_file import DataFileReference, upsert_on_column from aligned.data_source.batch_data_source import BatchDataSource from aligned.enricher import Enricher @@ -724,18 +725,29 @@ class ModelFeatureStore: model: ModelSchema store: FeatureStore + selected_version: str | None = None @property def location(self) -> FeatureLocation: return FeatureLocation.model(self.model.name) def raw_string_features(self, except_features: set[str]) -> set[str]: + + if isinstance(self.model.features, FeatureInputVersions): + version = self.selected_version or self.model.features.default_version + features = self.model.features.features_for(version) + else: + features = self.model.features + return { f'{feature.location.identifier}:{feature.name}' - for feature in self.model.features + for feature in features if feature.name not in except_features } + def using_version(self, version: str) -> ModelFeatureStore: + return ModelFeatureStore(self.model, self.store, version) + def request( self, except_features: set[str] | None = None, event_timestamp_column: str | None = None ) -> FeatureRequest: diff --git a/aligned/schemas/model.py b/aligned/schemas/model.py index 6d7f63c..7e0b8a3 100644 --- a/aligned/schemas/model.py +++ b/aligned/schemas/model.py @@ -15,6 +15,20 @@ logger = logging.getLogger(__name__) +@dataclass +class FeatureInputVersions(Codable): + + default_version: str + versions: dict[str, list[FeatureReferance]] + + def features_for(self, version: str) -> list[FeatureReferance]: + return self.versions.get(version, []) + + @property + def default_features(self) -> list[FeatureReferance]: + return self.features_for(self.default_version) + + @dataclass class Target(Codable): estimating: FeatureReferance @@ -111,7 +125,7 @@ def labels(self) -> set[Feature]: @dataclass class Model(Codable): name: str - features: set[FeatureReferance] + features: set[FeatureReferance] | FeatureInputVersions predictions_view: PredictionsView description: str | None = field(default=None) contacts: list[str] | None = field(default=None) diff --git a/aligned/tests/test_models_as_feature.py b/aligned/tests/test_models_as_feature.py index f6763a5..fbbf35f 100644 --- a/aligned/tests/test_models_as_feature.py +++ b/aligned/tests/test_models_as_feature.py @@ -1,6 +1,6 @@ from aligned import Bool, FeatureStore, FileSource, Int32, String from aligned.feature_view.feature_view import feature_view -from aligned.compiler.model import model_contract +from aligned.compiler.model import FeatureInputVersions, model_contract from aligned.schemas.feature import FeatureLocation @@ -25,7 +25,16 @@ class OtherView: other = OtherView() -@model_contract('test_model', features=[view.feature_a, other.feature_b]) +@model_contract( + 'test_model', + features=FeatureInputVersions( + default_version='v1', + versions={ + 'v1': [view.feature_a, other.feature_b], + 'v2': [view.feature_a, other.feature_b, other.is_true], + }, + ), +) class First: target = other.is_true.as_classification_label() @@ -43,6 +52,7 @@ class Second: def test_model_referenced_as_feature() -> None: model = Second.compile() # type: ignore + assert isinstance(model.features, set) feature = list(model.features)[0] assert feature.location == FeatureLocation.model('test_model') @@ -60,3 +70,15 @@ def test_model_request() -> None: model_request = store.model('test_model').request() assert model_request.features_to_include == {'feature_a', 'feature_b', 'view_id', 'other_id'} + + +def test_model_version() -> None: + store = FeatureStore.experimental() + store.add_feature_view(View) # type: ignore + store.add_feature_view(OtherView) # type: ignore + store.add_model(First) + + assert len(store.feature_views) == 2 + + model_request = store.model('test_model').using_version('v2').request() + assert model_request.features_to_include == {'feature_a', 'is_true', 'feature_b', 'view_id', 'other_id'} diff --git a/pyproject.toml b/pyproject.toml index da1b729..e55a47e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "aligned" -version = "0.0.55" +version = "0.0.56" description = "A data managment and lineage tool for ML applications." authors = ["Mats E. Mollestad "] license = "Apache-2.0" diff --git a/test_data/credit_history_mater.parquet b/test_data/credit_history_mater.parquet index 8604240f02f4657214eaaf8a42092361f0bbd16f..da99097c07a043485cb70e11dd281d8cbb9549b8 100644 GIT binary patch delta 481 zcmeyt{)?R@z%j^hVzh#asj-2Pp=G?Op}8fmpo)=!g`ruzg}Jdgr=W&~nSqI+alDb~ z#QCy}9257+3mK$8bYSGrVW`;dzyJgfc0QQ+UW4)6#Jvi_4V-7dQp-1jr5JYXc9=Yy zQQ0U-t%aFEQbvqJw1-g)s0YYA$0(WvWwtOwn4&C_J{~MZ`S~THY?Bii4OK+BRGB1% zlZ#SQGE3r<6N^&fQ%Y0gi%WAE#MmTdByH5DFv?6^D9yvcAhwM~RE$Aw1Cz|e59&PU z7#YOQfCZj0PoBl7A_BHw>=8&v>>868n;M7Mg~_^1No@I!KoB;0DwBV`YdMhV?pGNV z9#oLxW|6l53`W?7XIZs2Ghps%lA1yb*r4kFw^grlnqkO*{i@^o|p)2@!r zU~!OPKoC#_)MNn^iga{JcXV}h3Iz%}Ri$`@Wk-dZ8blaXltqS{rbT&#spLc$8RkTW zn*bHK6a#hX`+y99m=+0QItB$!-p(W|r4MpQInYi=M-Py2l)GhSgt1>$Qu^e5%tjpB Km>3v9@d^N=cYk02 delta 551 zcmeyx{)62lz%j^Blugt`)FcK-F@Q0PD1#sa12ek>0|O%?$Bx|&jO;oMoM#*uKw$Ys zhsi$}m01$hI2b0fD5zPO8JHLv#~Ya%n{x_km>L@x8Cu4h8k$@33aS_xSQwhcTbLW0 zPoB#t&A4si0R^cCJ0CbOa_AVOKZKZ9vE4zGOH!4AO>%M~qqv@H;KPCoA z88Hsg1TmncK-M-!(H=%9`yZ2N5{N0vBI)D7Qk0)xBFe@f%B9LQ`9GuRWDiC|4ly=K z8A%(pjLF{_Rd_*872CohD#D<4h-va1Mj4T9j0|EYSVScl)E+R)AXzVsY`xe67Ev(< zv1>pVsd0#%o2<{2#1`oY1mTmXGx^uM1p=8Keg-MwK{-izL1{_umKjMomf0!cVQER> zVHrv3KzaWveSLk4VvsIR5a9$8baZk960VMp!H$lBAUep=5iITtl5})*u>fk)cLFO( z2J)STqlQGR}jC<}uqiz<@@Vl@%D^Tm!I&t@B*r0DAeNyvXEFz)8fOoaJ_wjwOg3kn&+h8z=;)N;IQcGP2zNS= Z?VgkE=xA#@*@DS{V?QGULx5wDApj?|BZB|{ diff --git a/test_data/titanic-sets.json b/test_data/titanic-sets.json index 8ddf5e7..bb482a1 100644 --- a/test_data/titanic-sets.json +++ b/test_data/titanic-sets.json @@ -1 +1 @@ -{"raw_data": [], "train_test": [], "train_test_validation": [{"id": "titanic_test", "name": null, "request_result": {"entities": [{"name": "passenger_id", "dtype": {"name": "int32"}, "description": null, "tags": null, "constraints": null}], "features": [{"name": "name", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": [{"name": "optional"}]}, {"name": "sex", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": [{"name": "optional"}, {"name": "in_domain", "values": ["male", "female"]}]}, {"name": "is_female", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sex", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "equals", "dtype": {"name": "bool"}, "key": "sex", "value": {"name": "string", "value": "female"}}, "depth": 1}, {"name": "is_mr", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "name", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "contains", "dtype": {"name": "bool"}, "key": "name", "value": "Mr."}, "depth": 1}, {"name": "is_male", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sex", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "equals", "dtype": {"name": "bool"}, "key": "sex", "value": {"name": "string", "value": "male"}}, "depth": 1}, {"name": "age", "dtype": {"name": "float"}, "description": "A float as some have decimals", "tags": null, "constraints": [{"name": "lower_bound_inc", "value": 0.0}, {"name": "upper_bound_inc", "value": 100.0}]}, {"name": "sibsp", "dtype": {"name": "int32"}, "description": "Number of siblings on titanic", "tags": null, "constraints": [{"name": "lower_bound_inc", "value": 0.0}, {"name": "optional"}, {"name": "upper_bound_inc", "value": 20.0}]}, {"name": "has_siblings", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sibsp", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "int32"}}], "transformation": {"name": "not-equals", "dtype": {"name": "bool"}, "key": "sibsp", "value": {"name": "int", "value": 0}}, "depth": 1}, {"name": "cabin", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": [{"name": "optional"}]}, {"name": "survived", "dtype": {"name": "bool"}, "description": "If the passenger survived", "tags": null, "constraints": null}], "event_timestamp": null}, "train_dataset": {"mapping_keys": {}, "type_name": "csv", "path": "test_data/titanic-train.csv", "csv_config": {"seperator": ",", "compression": "infer", "should_write_index": false}}, "test_dataset": {"mapping_keys": {}, "type_name": "csv", "path": "test_data/titanic-test.csv", "csv_config": {"seperator": ",", "compression": "infer", "should_write_index": false}}, "validation_dataset": {"mapping_keys": {}, "type_name": "csv", "path": "test_data/titanic-validate.csv", "csv_config": {"seperator": ",", "compression": "infer", "should_write_index": false}}, "train_size_fraction": 0.6, "test_size_fraction": 0.20000000000000007, "validate_size_fraction": 0.19999999999999996, "target": ["survived"], "description": null, "tags": null}], "active_learning": []} +{"raw_data": [], "train_test": [], "train_test_validation": [{"id": "titanic_test", "name": null, "request_result": {"entities": [{"name": "passenger_id", "dtype": {"name": "int32"}, "description": null, "tags": null, "constraints": null}], "features": [{"name": "sibsp", "dtype": {"name": "int32"}, "description": "Number of siblings on titanic", "tags": null, "constraints": [{"name": "optional"}, {"name": "upper_bound_inc", "value": 20.0}, {"name": "lower_bound_inc", "value": 0.0}]}, {"name": "is_male", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sex", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "equals", "dtype": {"name": "bool"}, "key": "sex", "value": {"name": "string", "value": "male"}}, "depth": 1}, {"name": "age", "dtype": {"name": "float"}, "description": "A float as some have decimals", "tags": null, "constraints": [{"name": "upper_bound_inc", "value": 100.0}, {"name": "lower_bound_inc", "value": 0.0}]}, {"name": "name", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": [{"name": "optional"}]}, {"name": "is_mr", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "name", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "contains", "dtype": {"name": "bool"}, "key": "name", "value": "Mr."}, "depth": 1}, {"name": "has_siblings", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sibsp", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "int32"}}], "transformation": {"name": "not-equals", "dtype": {"name": "bool"}, "key": "sibsp", "value": {"name": "int", "value": 0}}, "depth": 1}, {"name": "cabin", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": [{"name": "optional"}]}, {"name": "is_female", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sex", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "equals", "dtype": {"name": "bool"}, "key": "sex", "value": {"name": "string", "value": "female"}}, "depth": 1}, {"name": "sex", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": [{"name": "optional"}, {"name": "in_domain", "values": ["male", "female"]}]}, {"name": "survived", "dtype": {"name": "bool"}, "description": "If the passenger survived", "tags": null, "constraints": null}], "event_timestamp": null}, "train_dataset": {"mapping_keys": {}, "type_name": "csv", "path": "test_data/titanic-train.csv", "csv_config": {"seperator": ",", "compression": "infer", "should_write_index": false}}, "test_dataset": {"mapping_keys": {}, "type_name": "csv", "path": "test_data/titanic-test.csv", "csv_config": {"seperator": ",", "compression": "infer", "should_write_index": false}}, "validation_dataset": {"mapping_keys": {}, "type_name": "csv", "path": "test_data/titanic-validate.csv", "csv_config": {"seperator": ",", "compression": "infer", "should_write_index": false}}, "train_size_fraction": 0.6, "test_size_fraction": 0.20000000000000007, "validate_size_fraction": 0.19999999999999996, "target": ["survived"], "description": null, "tags": null}], "active_learning": []} diff --git a/test_data/titanic-test.csv b/test_data/titanic-test.csv index a42042e..515a1ad 100644 --- a/test_data/titanic-test.csv +++ b/test_data/titanic-test.csv @@ -1,21 +1,21 @@ -name,sex,is_female,is_mr,is_male,age,sibsp,has_siblings,passenger_id,cabin,survived -"Sirayanian, Mr. Orsen",male,False,True,True,22.0,0,False,61,,False -"Icard, Miss. Amelie",female,True,False,False,38.0,0,False,62,B28,True -"Harris, Mr. Henry Birkhardt",male,False,True,True,45.0,1,True,63,C83,False -"Skoog, Master. Harald",male,False,False,True,4.0,3,True,64,,False -"Stewart, Mr. Albert A",male,False,True,True,,0,False,65,,False -"Moubarek, Master. Gerios",male,False,False,True,,1,True,66,,True -"Nye, Mrs. (Elizabeth Ramell)",female,True,True,False,29.0,0,False,67,F33,True -"Crease, Mr. Ernest James",male,False,True,True,19.0,0,False,68,,False -"Andersson, Miss. Erna Alexandra",female,True,False,False,17.0,4,True,69,,True -"Kink, Mr. Vincenz",male,False,True,True,26.0,2,True,70,,False -"Jenkin, Mr. Stephen Curnow",male,False,True,True,32.0,0,False,71,,False -"Goodwin, Miss. Lillian Amy",female,True,False,False,16.0,5,True,72,,False -"Hood, Mr. Ambrose Jr",male,False,True,True,21.0,0,False,73,,False -"Chronopoulos, Mr. Apostolos",male,False,True,True,26.0,1,True,74,,False -"Bing, Mr. Lee",male,False,True,True,32.0,0,False,75,,True -"Moen, Mr. Sigurd Hansen",male,False,True,True,25.0,0,False,76,F G73,False -"Staneff, Mr. Ivan",male,False,True,True,,0,False,77,,False -"Moutal, Mr. Rahamin Haim",male,False,True,True,,0,False,78,,False -"Caldwell, Master. Alden Gates",male,False,False,True,0.83,0,False,79,,True -"Dowdell, Miss. Elizabeth",female,True,False,False,30.0,0,False,80,,True +is_male,age,name,is_mr,has_siblings,cabin,is_female,passenger_id,survived,sex,sibsp +True,22.0,"Sirayanian, Mr. Orsen",True,False,,False,61,False,male,0 +False,38.0,"Icard, Miss. Amelie",False,False,B28,True,62,True,female,0 +True,45.0,"Harris, Mr. Henry Birkhardt",True,True,C83,False,63,False,male,1 +True,4.0,"Skoog, Master. Harald",False,True,,False,64,False,male,3 +True,,"Stewart, Mr. Albert A",True,False,,False,65,False,male,0 +True,,"Moubarek, Master. Gerios",False,True,,False,66,True,male,1 +False,29.0,"Nye, Mrs. (Elizabeth Ramell)",True,False,F33,True,67,True,female,0 +True,19.0,"Crease, Mr. Ernest James",True,False,,False,68,False,male,0 +False,17.0,"Andersson, Miss. Erna Alexandra",False,True,,True,69,True,female,4 +True,26.0,"Kink, Mr. Vincenz",True,True,,False,70,False,male,2 +True,32.0,"Jenkin, Mr. Stephen Curnow",True,False,,False,71,False,male,0 +False,16.0,"Goodwin, Miss. Lillian Amy",False,True,,True,72,False,female,5 +True,21.0,"Hood, Mr. Ambrose Jr",True,False,,False,73,False,male,0 +True,26.0,"Chronopoulos, Mr. Apostolos",True,True,,False,74,False,male,1 +True,32.0,"Bing, Mr. Lee",True,False,,False,75,True,male,0 +True,25.0,"Moen, Mr. Sigurd Hansen",True,False,F G73,False,76,False,male,0 +True,,"Staneff, Mr. Ivan",True,False,,False,77,False,male,0 +True,,"Moutal, Mr. Rahamin Haim",True,False,,False,78,False,male,0 +True,0.83,"Caldwell, Master. Alden Gates",False,False,,False,79,True,male,0 +False,30.0,"Dowdell, Miss. Elizabeth",False,False,,True,80,True,female,0 diff --git a/test_data/titanic-train.csv b/test_data/titanic-train.csv index 336326e..4c1b0c8 100644 --- a/test_data/titanic-train.csv +++ b/test_data/titanic-train.csv @@ -1,61 +1,61 @@ -name,sex,is_female,is_mr,is_male,age,sibsp,has_siblings,passenger_id,cabin,survived -"Braund, Mr. Owen Harris",male,False,True,True,22.0,1,True,1,,False -"Cumings, Mrs. John Bradley (Florence Briggs Thayer)",female,True,True,False,38.0,1,True,2,C85,True -"Heikkinen, Miss. Laina",female,True,False,False,26.0,0,False,3,,True -"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,True,True,False,35.0,1,True,4,C123,True -"Allen, Mr. William Henry",male,False,True,True,35.0,0,False,5,,False -"Moran, Mr. James",male,False,True,True,,0,False,6,,False -"McCarthy, Mr. Timothy J",other,False,True,False,54.0,0,False,7,E46,False -"Palsson, Master. Gosta Leonard",male,False,False,True,2.0,3,True,8,,False -"Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)",female,True,True,False,27.0,0,False,9,,True -"Nasser, Mrs. Nicholas (Adele Achem)",female,True,True,False,14.0,1,True,10,,True -"Sandstrom, Miss. Marguerite Rut",female,True,False,False,4.0,1,True,11,G6,True -"Bonnell, Miss. Elizabeth",female,True,False,False,58.0,0,False,12,C103,True -"Saundercock, Mr. William Henry",male,False,True,True,20.0,0,False,13,,False -"Andersson, Mr. Anders Johan",male,False,True,True,39.0,1,True,14,,False -"Vestrom, Miss. Hulda Amanda Adolfina",female,True,False,False,14.0,0,False,15,,False -"Hewlett, Mrs. (Mary D Kingcome) ",female,True,True,False,55.0,0,False,16,,True -"Rice, Master. Eugene",male,False,False,True,2.0,4,True,17,,False -"Williams, Mr. Charles Eugene",male,False,True,True,,0,False,18,,True -"Vander Planke, Mrs. Julius (Emelia Maria Vandemoortele)",female,True,True,False,31.0,1,True,19,,False -"Masselmani, Mrs. Fatima",female,True,True,False,,0,False,20,,True -"Fynney, Mr. Joseph J",male,False,True,True,35.0,0,False,21,,False -"Beesley, Mr. Lawrence",male,False,True,True,34.0,0,False,22,D56,True -"McGowan, Miss. Anna ""Annie""",female,True,False,False,15.0,0,False,23,,True -"Sloper, Mr. William Thompson",male,False,True,True,28.0,0,False,24,A6,True -"Palsson, Miss. Torborg Danira",female,True,False,False,8.0,3,True,25,,False -"Asplund, Mrs. Carl Oscar (Selma Augusta Emilia Johansson)",female,True,True,False,38.0,1,True,26,,True -"Emir, Mr. Farred Chehab",male,False,True,True,,0,False,27,,False -"Fortune, Mr. Charles Alexander",male,False,True,True,19.0,3,True,28,C23 C25 C27,False -"O'Dwyer, Miss. Ellen ""Nellie""",female,True,False,False,,0,False,29,,True -"Todoroff, Mr. Lalio",male,False,True,True,,0,False,30,,False -"Uruchurtu, Don. Manuel E",male,False,False,True,40.0,0,False,31,,False -"Spencer, Mrs. William Augustus (Marie Eugenie)",female,True,True,False,,1,True,32,B78,True -"Glynn, Miss. Mary Agatha",female,True,False,False,,0,False,33,,True -"Wheadon, Mr. Edward H",male,False,True,True,66.0,0,False,34,,False -"Meyer, Mr. Edgar Joseph",male,False,True,True,28.0,1,True,35,,False -"Holverson, Mr. Alexander Oskar",male,False,True,True,42.0,1,True,36,,False -"Mamee, Mr. Hanna",male,False,True,True,,0,False,37,,True -"Cann, Mr. Ernest Charles",male,False,True,True,21.0,0,False,38,,False -"Vander Planke, Miss. Augusta Maria",female,True,False,False,18.0,2,True,39,,False -"Nicola-Yarred, Miss. Jamila",female,True,False,False,14.0,1,True,40,,True -"Ahlin, Mrs. Johan (Johanna Persdotter Larsson)",female,True,True,False,40.0,1,True,41,,False -"Turpin, Mrs. William John Robert (Dorothy Ann Wonnacott)",female,True,True,False,27.0,1,True,42,,False -"Kraeff, Mr. Theodor",male,False,True,True,,0,False,43,,False -"Laroche, Miss. Simonne Marie Anne Andree",female,True,False,False,3.0,1,True,44,,True -"Devaney, Miss. Margaret Delia",female,True,False,False,19.0,0,False,45,,True -"Rogers, Mr. William John",male,False,True,True,,0,False,46,,False -"Lennon, Mr. Denis",male,False,True,True,,1,True,47,,False -"O'Driscoll, Miss. Bridget",female,True,False,False,,0,False,48,,True -"Samaan, Mr. Youssef",male,False,True,True,,2,True,49,,False -"Arnold-Franchi, Mrs. Josef (Josefine Franchi)",female,True,True,False,18.0,1,True,50,,False -"Panula, Master. Juha Niilo",male,False,False,True,7.0,4,True,51,,False -"Nosworthy, Mr. Richard Cater",male,False,True,True,21.0,0,False,52,,False -"Harper, Mrs. Henry Sleeper (Myna Haxtun)",female,True,True,False,49.0,1,True,53,D33,True -"Faunthorpe, Mrs. Lizzie (Elizabeth Anne Wilkinson)",female,True,True,False,29.0,1,True,54,,True -"Ostby, Mr. Engelhart Cornelius",male,False,True,True,65.0,0,False,55,B30,False -"Woolner, Mr. Hugh",male,False,True,True,,0,False,56,C52,True -"Rugg, Miss. Emily",female,True,False,False,21.0,0,False,57,,True -"Novel, Mr. Mansouer",male,False,True,True,28.5,0,False,58,,False -"West, Miss. Constance Mirium",female,True,False,False,5.0,1,True,59,,True -"Goodwin, Master. William Frederick",male,False,False,True,11.0,5,True,60,,False +is_male,age,name,is_mr,has_siblings,cabin,is_female,passenger_id,survived,sex,sibsp +True,22.0,"Braund, Mr. Owen Harris",True,True,,False,1,False,male,1 +False,38.0,"Cumings, Mrs. John Bradley (Florence Briggs Thayer)",True,True,C85,True,2,True,female,1 +False,26.0,"Heikkinen, Miss. Laina",False,False,,True,3,True,female,0 +False,35.0,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",True,True,C123,True,4,True,female,1 +True,35.0,"Allen, Mr. William Henry",True,False,,False,5,False,male,0 +True,,"Moran, Mr. James",True,False,,False,6,False,male,0 +False,54.0,"McCarthy, Mr. Timothy J",True,False,E46,False,7,False,other,0 +True,2.0,"Palsson, Master. Gosta Leonard",False,True,,False,8,False,male,3 +False,27.0,"Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)",True,False,,True,9,True,female,0 +False,14.0,"Nasser, Mrs. Nicholas (Adele Achem)",True,True,,True,10,True,female,1 +False,4.0,"Sandstrom, Miss. Marguerite Rut",False,True,G6,True,11,True,female,1 +False,58.0,"Bonnell, Miss. Elizabeth",False,False,C103,True,12,True,female,0 +True,20.0,"Saundercock, Mr. William Henry",True,False,,False,13,False,male,0 +True,39.0,"Andersson, Mr. Anders Johan",True,True,,False,14,False,male,1 +False,14.0,"Vestrom, Miss. Hulda Amanda Adolfina",False,False,,True,15,False,female,0 +False,55.0,"Hewlett, Mrs. (Mary D Kingcome) ",True,False,,True,16,True,female,0 +True,2.0,"Rice, Master. Eugene",False,True,,False,17,False,male,4 +True,,"Williams, Mr. Charles Eugene",True,False,,False,18,True,male,0 +False,31.0,"Vander Planke, Mrs. Julius (Emelia Maria Vandemoortele)",True,True,,True,19,False,female,1 +False,,"Masselmani, Mrs. Fatima",True,False,,True,20,True,female,0 +True,35.0,"Fynney, Mr. Joseph J",True,False,,False,21,False,male,0 +True,34.0,"Beesley, Mr. Lawrence",True,False,D56,False,22,True,male,0 +False,15.0,"McGowan, Miss. Anna ""Annie""",False,False,,True,23,True,female,0 +True,28.0,"Sloper, Mr. William Thompson",True,False,A6,False,24,True,male,0 +False,8.0,"Palsson, Miss. Torborg Danira",False,True,,True,25,False,female,3 +False,38.0,"Asplund, Mrs. Carl Oscar (Selma Augusta Emilia Johansson)",True,True,,True,26,True,female,1 +True,,"Emir, Mr. Farred Chehab",True,False,,False,27,False,male,0 +True,19.0,"Fortune, Mr. Charles Alexander",True,True,C23 C25 C27,False,28,False,male,3 +False,,"O'Dwyer, Miss. Ellen ""Nellie""",False,False,,True,29,True,female,0 +True,,"Todoroff, Mr. Lalio",True,False,,False,30,False,male,0 +True,40.0,"Uruchurtu, Don. Manuel E",False,False,,False,31,False,male,0 +False,,"Spencer, Mrs. William Augustus (Marie Eugenie)",True,True,B78,True,32,True,female,1 +False,,"Glynn, Miss. Mary Agatha",False,False,,True,33,True,female,0 +True,66.0,"Wheadon, Mr. Edward H",True,False,,False,34,False,male,0 +True,28.0,"Meyer, Mr. Edgar Joseph",True,True,,False,35,False,male,1 +True,42.0,"Holverson, Mr. Alexander Oskar",True,True,,False,36,False,male,1 +True,,"Mamee, Mr. Hanna",True,False,,False,37,True,male,0 +True,21.0,"Cann, Mr. Ernest Charles",True,False,,False,38,False,male,0 +False,18.0,"Vander Planke, Miss. Augusta Maria",False,True,,True,39,False,female,2 +False,14.0,"Nicola-Yarred, Miss. Jamila",False,True,,True,40,True,female,1 +False,40.0,"Ahlin, Mrs. Johan (Johanna Persdotter Larsson)",True,True,,True,41,False,female,1 +False,27.0,"Turpin, Mrs. William John Robert (Dorothy Ann Wonnacott)",True,True,,True,42,False,female,1 +True,,"Kraeff, Mr. Theodor",True,False,,False,43,False,male,0 +False,3.0,"Laroche, Miss. Simonne Marie Anne Andree",False,True,,True,44,True,female,1 +False,19.0,"Devaney, Miss. Margaret Delia",False,False,,True,45,True,female,0 +True,,"Rogers, Mr. William John",True,False,,False,46,False,male,0 +True,,"Lennon, Mr. Denis",True,True,,False,47,False,male,1 +False,,"O'Driscoll, Miss. Bridget",False,False,,True,48,True,female,0 +True,,"Samaan, Mr. Youssef",True,True,,False,49,False,male,2 +False,18.0,"Arnold-Franchi, Mrs. Josef (Josefine Franchi)",True,True,,True,50,False,female,1 +True,7.0,"Panula, Master. Juha Niilo",False,True,,False,51,False,male,4 +True,21.0,"Nosworthy, Mr. Richard Cater",True,False,,False,52,False,male,0 +False,49.0,"Harper, Mrs. Henry Sleeper (Myna Haxtun)",True,True,D33,True,53,True,female,1 +False,29.0,"Faunthorpe, Mrs. Lizzie (Elizabeth Anne Wilkinson)",True,True,,True,54,True,female,1 +True,65.0,"Ostby, Mr. Engelhart Cornelius",True,False,B30,False,55,False,male,0 +True,,"Woolner, Mr. Hugh",True,False,C52,False,56,True,male,0 +False,21.0,"Rugg, Miss. Emily",False,False,,True,57,True,female,0 +True,28.5,"Novel, Mr. Mansouer",True,False,,False,58,False,male,0 +False,5.0,"West, Miss. Constance Mirium",False,True,,True,59,True,female,1 +True,11.0,"Goodwin, Master. William Frederick",False,True,,False,60,False,male,5 diff --git a/test_data/titanic-validate.csv b/test_data/titanic-validate.csv index 2c6ef7e..d1fb16d 100644 --- a/test_data/titanic-validate.csv +++ b/test_data/titanic-validate.csv @@ -1,21 +1,21 @@ -name,sex,is_female,is_mr,is_male,age,sibsp,has_siblings,passenger_id,cabin,survived -"Waelens, Mr. Achille",male,False,True,True,22.0,0,False,81,,False -"Sheerlinck, Mr. Jan Baptist",male,False,True,True,29.0,0,False,82,,True -"McDermott, Miss. Brigdet Delia",female,True,False,False,,0,False,83,,True -"Carrau, Mr. Francisco M",male,False,True,True,28.0,0,False,84,,False -"Ilett, Miss. Bertha",female,True,False,False,17.0,0,False,85,,True -"Backstrom, Mrs. Karl Alfred (Maria Mathilda Gustafsson)",female,True,True,False,33.0,3,True,86,,True -"Ford, Mr. William Neal",male,False,True,True,16.0,1,True,87,,False -"Slocovski, Mr. Selman Francis",male,False,True,True,,0,False,88,,False -"Fortune, Miss. Mabel Helen",female,True,False,False,23.0,3,True,89,C23 C25 C27,True -"Celotti, Mr. Francesco",male,False,True,True,24.0,0,False,90,,False -"Christmann, Mr. Emil",male,False,True,True,29.0,0,False,91,,False -"Andreasson, Mr. Paul Edvin",male,False,True,True,20.0,0,False,92,,False -"Chaffee, Mr. Herbert Fuller",male,False,True,True,46.0,1,True,93,E31,False -"Dean, Mr. Bertram Frank",male,False,True,True,26.0,1,True,94,,False -"Coxon, Mr. Daniel",male,False,True,True,59.0,0,False,95,,False -"Shorney, Mr. Charles Joseph",male,False,True,True,,0,False,96,,False -"Goldschmidt, Mr. George B",male,False,True,True,71.0,0,False,97,A5,False -"Greenfield, Mr. William Bertram",male,False,True,True,23.0,0,False,98,D10 D12,True -"Doling, Mrs. John T (Ada Julia Bone)",female,True,True,False,34.0,0,False,99,,True -"Kantor, Mr. Sinai",male,False,True,True,34.0,1,True,100,,False +is_male,age,name,is_mr,has_siblings,cabin,is_female,passenger_id,survived,sex,sibsp +True,22.0,"Waelens, Mr. Achille",True,False,,False,81,False,male,0 +True,29.0,"Sheerlinck, Mr. Jan Baptist",True,False,,False,82,True,male,0 +False,,"McDermott, Miss. Brigdet Delia",False,False,,True,83,True,female,0 +True,28.0,"Carrau, Mr. Francisco M",True,False,,False,84,False,male,0 +False,17.0,"Ilett, Miss. Bertha",False,False,,True,85,True,female,0 +False,33.0,"Backstrom, Mrs. Karl Alfred (Maria Mathilda Gustafsson)",True,True,,True,86,True,female,3 +True,16.0,"Ford, Mr. William Neal",True,True,,False,87,False,male,1 +True,,"Slocovski, Mr. Selman Francis",True,False,,False,88,False,male,0 +False,23.0,"Fortune, Miss. Mabel Helen",False,True,C23 C25 C27,True,89,True,female,3 +True,24.0,"Celotti, Mr. Francesco",True,False,,False,90,False,male,0 +True,29.0,"Christmann, Mr. Emil",True,False,,False,91,False,male,0 +True,20.0,"Andreasson, Mr. Paul Edvin",True,False,,False,92,False,male,0 +True,46.0,"Chaffee, Mr. Herbert Fuller",True,True,E31,False,93,False,male,1 +True,26.0,"Dean, Mr. Bertram Frank",True,True,,False,94,False,male,1 +True,59.0,"Coxon, Mr. Daniel",True,False,,False,95,False,male,0 +True,,"Shorney, Mr. Charles Joseph",True,False,,False,96,False,male,0 +True,71.0,"Goldschmidt, Mr. George B",True,False,A5,False,97,False,male,0 +True,23.0,"Greenfield, Mr. William Bertram",True,False,D10 D12,False,98,True,male,0 +False,34.0,"Doling, Mrs. John T (Ada Julia Bone)",True,False,,True,99,True,female,0 +True,34.0,"Kantor, Mr. Sinai",True,True,,False,100,False,male,1