From 0aa536494dd1ee41a817ac708e85f1d54a7aaf57 Mon Sep 17 00:00:00 2001 From: "Mats E. Mollestad" Date: Sun, 3 Mar 2024 20:28:26 +0100 Subject: [PATCH] Minor bug fixes --- aligned/compiler/model.py | 3 +- aligned/schemas/folder.py | 10 +- aligned/sources/local.py | 18 ++- aligned/tests/test_train_test_validate_set.py | 5 +- pyproject.toml | 2 +- test_data/titanic-sets.json | 2 +- test_data/titanic-test.csv | 42 +++--- test_data/titanic-train.csv | 122 +++++++++--------- test_data/titanic-validate.csv | 42 +++--- 9 files changed, 127 insertions(+), 119 deletions(-) diff --git a/aligned/compiler/model.py b/aligned/compiler/model.py index 9e5f6b0..4c0030f 100644 --- a/aligned/compiler/model.py +++ b/aligned/compiler/model.py @@ -26,7 +26,6 @@ from aligned.schemas.derivied_feature import DerivedFeature from aligned.schemas.feature import Feature, FeatureLocation, FeatureReferance, FeatureType from aligned.schemas.feature_view import CompiledFeatureView -from aligned.schemas.folder import DatasetStore, JsonDatasetStore from aligned.schemas.literal_value import LiteralValue from aligned.schemas.model import Model as ModelSchema from aligned.schemas.model import FeatureInputVersions as FeatureVersionSchema @@ -36,6 +35,7 @@ if TYPE_CHECKING: from aligned.sources.local import StorageFileReference + from aligned.schemas.folder import DatasetStore logger = logging.getLogger(__name__) @@ -199,6 +199,7 @@ def join_asof(self, view: FeatureViewWrapper, on_left: list[str], on_right: list def resolve_dataset_store(dataset_store: DatasetStore | StorageFileReference) -> DatasetStore: + from aligned.schemas.folder import DatasetStore, JsonDatasetStore if isinstance(dataset_store, DatasetStore): return dataset_store diff --git a/aligned/schemas/folder.py b/aligned/schemas/folder.py index c64240b..934dfa0 100644 --- a/aligned/schemas/folder.py +++ b/aligned/schemas/folder.py @@ -1,22 +1,18 @@ from __future__ import annotations -from typing import TYPE_CHECKING - from dataclasses import dataclass, field from mashumaro.types import SerializableType from aligned.data_source.batch_data_source import BatchDataSource from aligned.request.retrival_request import RequestResult +from aligned.sources.local import StorageFileSource from aligned.schemas.codable import Codable -if TYPE_CHECKING: - from aligned.sources.local import StorageFileReference - class DatasetStorageFactory: - supported_stores: dict[str, type[DatasetStore]] + supported_stores: dict[str, type[DatasetStore]] = dict() _shared: DatasetStorageFactory | None = None @@ -130,7 +126,7 @@ async def delete_metadata_for(self, dataset_id: str) -> DatasetMetadata | None: @dataclass class JsonDatasetStore(DatasetStore): - source: StorageFileReference + source: StorageFileSource name = 'json' async def list_datasets(self) -> GroupedDatasetList: diff --git a/aligned/sources/local.py b/aligned/sources/local.py index d7dc7ef..a2ccf64 100644 --- a/aligned/sources/local.py +++ b/aligned/sources/local.py @@ -20,15 +20,15 @@ from aligned.s3.storage import FileStorage, HttpStorage from aligned.schemas.codable import Codable from aligned.schemas.feature import EventTimestamp, FeatureType -from aligned.schemas.repo_definition import RepoDefinition from aligned.storage import Storage -from aligned.feature_store import FeatureStore from aligned.feature_source import WritableFeatureSource from aligned.schemas.date_formatter import DateFormatter if TYPE_CHECKING: from aligned.compiler.feature_factory import FeatureFactory from datetime import datetime + from aligned.schemas.repo_definition import RepoDefinition + from aligned.feature_store import FeatureStore logger = logging.getLogger(__name__) @@ -39,6 +39,8 @@ async def as_repo_definition(self) -> RepoDefinition: raise NotImplementedError() async def feature_store(self) -> FeatureStore: + from aligned.feature_store import FeatureStore + return FeatureStore.from_definition(await self.as_repo_definition()) @@ -56,6 +58,8 @@ async def write(self, content: bytes) -> None: raise NotImplementedError(type(self)) async def as_repo_definition(self) -> RepoDefinition: + from aligned.schemas.repo_definition import RepoDefinition + file = await self.read() return RepoDefinition.from_json(file) @@ -270,7 +274,7 @@ async def write_pandas(self, df: pd.DataFrame) -> None: async def to_lazy_polars(self) -> pl.LazyFrame: - if not do_file_exist(self.path): + if (not self.path.startswith('http')) and (not do_file_exist(self.path)): raise UnableToFindFileException(self.path) try: @@ -307,7 +311,11 @@ def multi_source_features_for( ) async def schema(self) -> dict[str, FeatureFactory]: - parquet_schema = pl.read_parquet_schema(self.path) + if self.path.startswith('http'): + parquet_schema = pl.scan_parquet(self.path).schema + else: + parquet_schema = pl.read_parquet_schema(self.path) + return { name: FeatureType.from_polars(pl_type).feature_factory for name, pl_type in parquet_schema.items() } @@ -407,7 +415,7 @@ async def upsert(self, job: RetrivalJob, requests: list[RetrivalRequest]) -> Non @dataclass -class StorageFileSource(StorageFileReference): +class StorageFileSource(StorageFileReference, Codable): path: str diff --git a/aligned/tests/test_train_test_validate_set.py b/aligned/tests/test_train_test_validate_set.py index 02113a0..53e6815 100644 --- a/aligned/tests/test_train_test_validate_set.py +++ b/aligned/tests/test_train_test_validate_set.py @@ -98,7 +98,10 @@ async def test_train_test_validate_set_new(titanic_feature_store: FeatureStore) test = await dataset.test.to_pandas() validate = await dataset.validate.to_pandas() - datasets = await JsonDatasetStore(dataset_store).list_datasets() + store = JsonDatasetStore(dataset_store) + datasets = await store.list_datasets() + + assert store.to_json() != None assert len(datasets.train_test_validation) == 1 train_dataset = datasets.train_test_validation[0] diff --git a/pyproject.toml b/pyproject.toml index 143724c..93a8482 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "aligned" -version = "0.0.73" +version = "0.0.74" description = "A data managment and lineage tool for ML applications." authors = ["Mats E. Mollestad "] license = "Apache-2.0" diff --git a/test_data/titanic-sets.json b/test_data/titanic-sets.json index 986ba66..2b344e4 100644 --- a/test_data/titanic-sets.json +++ b/test_data/titanic-sets.json @@ -1 +1 @@ -{"raw_data": [], "train_test": [], "train_test_validation": [{"id": "titanic_test", "name": null, "request_result": {"entities": [{"name": "passenger_id", "dtype": {"name": "int32"}, "description": null, "tags": null, "constraints": null}], "features": [{"name": "has_siblings", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sibsp", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "int32"}}], "transformation": {"name": "not-equals", "dtype": {"name": "bool"}, "key": "sibsp", "value": {"name": "int", "value": 0}}, "depth": 1}, {"name": "name", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": [{"name": "optional"}]}, {"name": "sex", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": [{"name": "optional"}, {"name": "in_domain", "values": ["male", "female"]}]}, {"name": "cabin", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": [{"name": "optional"}]}, {"name": "sibsp", "dtype": {"name": "int32"}, "description": "Number of siblings on titanic", "tags": null, "constraints": [{"name": "upper_bound_inc", "value": 20.0}, {"name": "lower_bound_inc", "value": 0.0}, {"name": "optional"}]}, {"name": "age", "dtype": {"name": "float"}, "description": "A float as some have decimals", "tags": null, "constraints": [{"name": "upper_bound_inc", "value": 100.0}, {"name": "lower_bound_inc", "value": 0.0}]}, {"name": "is_female", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sex", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "equals", "dtype": {"name": "bool"}, "key": "sex", "value": {"name": "string", "value": "female"}}, "depth": 1}, {"name": "is_male", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sex", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "equals", "dtype": {"name": "bool"}, "key": "sex", "value": {"name": "string", "value": "male"}}, "depth": 1}, {"name": "is_mr", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "name", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "contains", "dtype": {"name": "bool"}, "key": "name", "value": "Mr."}, "depth": 1}, {"name": "survived", "dtype": {"name": "bool"}, "description": "If the passenger survived", "tags": null, "constraints": null}], "event_timestamp": null}, "train_dataset": {"mapping_keys": {}, "type_name": "csv", "path": "test_data/titanic-train.csv", "csv_config": {"seperator": ",", "compression": "infer", "should_write_index": false}, "formatter": {"date_format": "yyyy-MM-ddTHH:mm:ssZ", "time_unit": null, "time_zone": null, "name": "string_form"}}, "test_dataset": {"mapping_keys": {}, "type_name": "csv", "path": "test_data/titanic-test.csv", "csv_config": {"seperator": ",", "compression": "infer", "should_write_index": false}, "formatter": {"date_format": "yyyy-MM-ddTHH:mm:ssZ", "time_unit": null, "time_zone": null, "name": "string_form"}}, "validation_dataset": {"mapping_keys": {}, "type_name": "csv", "path": "test_data/titanic-validate.csv", "csv_config": {"seperator": ",", "compression": "infer", "should_write_index": false}, "formatter": {"date_format": "yyyy-MM-ddTHH:mm:ssZ", "time_unit": null, "time_zone": null, "name": "string_form"}}, "train_size_fraction": 0.6, "test_size_fraction": 0.20000000000000007, "validate_size_fraction": 0.19999999999999996, "target": ["survived"], "description": null, "tags": null}], "active_learning": []} +{"raw_data": [], "train_test": [], "train_test_validation": [{"id": "titanic_test", "name": null, "request_result": {"entities": [{"name": "passenger_id", "dtype": {"name": "int32"}, "description": null, "tags": null, "constraints": null}], "features": [{"name": "name", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": [{"name": "optional"}]}, {"name": "sibsp", "dtype": {"name": "int32"}, "description": "Number of siblings on titanic", "tags": null, "constraints": [{"name": "optional"}, {"name": "upper_bound_inc", "value": 20.0}, {"name": "lower_bound_inc", "value": 0.0}]}, {"name": "is_male", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sex", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "equals", "dtype": {"name": "bool"}, "key": "sex", "value": {"name": "string", "value": "male"}}, "depth": 1}, {"name": "cabin", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": [{"name": "optional"}]}, {"name": "age", "dtype": {"name": "float"}, "description": "A float as some have decimals", "tags": null, "constraints": [{"name": "lower_bound_inc", "value": 0.0}, {"name": "upper_bound_inc", "value": 100.0}]}, {"name": "survived", "dtype": {"name": "bool"}, "description": "If the passenger survived", "tags": null, "constraints": null}, {"name": "has_siblings", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sibsp", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "int32"}}], "transformation": {"name": "not-equals", "dtype": {"name": "bool"}, "key": "sibsp", "value": {"name": "int", "value": 0}}, "depth": 1}, {"name": "is_female", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sex", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "equals", "dtype": {"name": "bool"}, "key": "sex", "value": {"name": "string", "value": "female"}}, "depth": 1}, {"name": "is_mr", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "name", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "contains", "dtype": {"name": "bool"}, "key": "name", "value": "Mr."}, "depth": 1}, {"name": "sex", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": [{"name": "optional"}, {"name": "in_domain", "values": ["male", "female"]}]}], "event_timestamp": null}, "train_dataset": {"mapping_keys": {}, "type_name": "csv", "path": "test_data/titanic-train.csv", "csv_config": {"seperator": ",", "compression": "infer", "should_write_index": false}, "formatter": {"date_format": "yyyy-MM-ddTHH:mm:ssZ", "time_unit": null, "time_zone": null, "name": "string_form"}}, "test_dataset": {"mapping_keys": {}, "type_name": "csv", "path": "test_data/titanic-test.csv", "csv_config": {"seperator": ",", "compression": "infer", "should_write_index": false}, "formatter": {"date_format": "yyyy-MM-ddTHH:mm:ssZ", "time_unit": null, "time_zone": null, "name": "string_form"}}, "validation_dataset": {"mapping_keys": {}, "type_name": "csv", "path": "test_data/titanic-validate.csv", "csv_config": {"seperator": ",", "compression": "infer", "should_write_index": false}, "formatter": {"date_format": "yyyy-MM-ddTHH:mm:ssZ", "time_unit": null, "time_zone": null, "name": "string_form"}}, "train_size_fraction": 0.6, "test_size_fraction": 0.20000000000000007, "validate_size_fraction": 0.19999999999999996, "target": ["survived"], "description": null, "tags": null}], "active_learning": []} diff --git a/test_data/titanic-test.csv b/test_data/titanic-test.csv index 6aac248..c83fe88 100644 --- a/test_data/titanic-test.csv +++ b/test_data/titanic-test.csv @@ -1,21 +1,21 @@ -has_siblings,name,sex,cabin,sibsp,age,is_female,is_male,is_mr,survived,passenger_id -False,"Sirayanian, Mr. Orsen",male,,0,22.0,False,True,True,False,61 -False,"Icard, Miss. Amelie",female,B28,0,38.0,True,False,False,True,62 -True,"Harris, Mr. Henry Birkhardt",male,C83,1,45.0,False,True,True,False,63 -True,"Skoog, Master. Harald",male,,3,4.0,False,True,False,False,64 -False,"Stewart, Mr. Albert A",male,,0,,False,True,True,False,65 -True,"Moubarek, Master. Gerios",male,,1,,False,True,False,True,66 -False,"Nye, Mrs. (Elizabeth Ramell)",female,F33,0,29.0,True,False,True,True,67 -False,"Crease, Mr. Ernest James",male,,0,19.0,False,True,True,False,68 -True,"Andersson, Miss. Erna Alexandra",female,,4,17.0,True,False,False,True,69 -True,"Kink, Mr. Vincenz",male,,2,26.0,False,True,True,False,70 -False,"Jenkin, Mr. Stephen Curnow",male,,0,32.0,False,True,True,False,71 -True,"Goodwin, Miss. Lillian Amy",female,,5,16.0,True,False,False,False,72 -False,"Hood, Mr. Ambrose Jr",male,,0,21.0,False,True,True,False,73 -True,"Chronopoulos, Mr. Apostolos",male,,1,26.0,False,True,True,False,74 -False,"Bing, Mr. Lee",male,,0,32.0,False,True,True,True,75 -False,"Moen, Mr. Sigurd Hansen",male,F G73,0,25.0,False,True,True,False,76 -False,"Staneff, Mr. Ivan",male,,0,,False,True,True,False,77 -False,"Moutal, Mr. Rahamin Haim",male,,0,,False,True,True,False,78 -False,"Caldwell, Master. Alden Gates",male,,0,0.83,False,True,False,True,79 -False,"Dowdell, Miss. Elizabeth",female,,0,30.0,True,False,False,True,80 +name,sibsp,is_male,cabin,age,passenger_id,survived,has_siblings,is_female,is_mr,sex +"Sirayanian, Mr. Orsen",0,True,,22.0,61,False,False,False,True,male +"Icard, Miss. Amelie",0,False,B28,38.0,62,True,False,True,False,female +"Harris, Mr. Henry Birkhardt",1,True,C83,45.0,63,False,True,False,True,male +"Skoog, Master. Harald",3,True,,4.0,64,False,True,False,False,male +"Stewart, Mr. Albert A",0,True,,,65,False,False,False,True,male +"Moubarek, Master. Gerios",1,True,,,66,True,True,False,False,male +"Nye, Mrs. (Elizabeth Ramell)",0,False,F33,29.0,67,True,False,True,True,female +"Crease, Mr. Ernest James",0,True,,19.0,68,False,False,False,True,male +"Andersson, Miss. Erna Alexandra",4,False,,17.0,69,True,True,True,False,female +"Kink, Mr. Vincenz",2,True,,26.0,70,False,True,False,True,male +"Jenkin, Mr. Stephen Curnow",0,True,,32.0,71,False,False,False,True,male +"Goodwin, Miss. Lillian Amy",5,False,,16.0,72,False,True,True,False,female +"Hood, Mr. Ambrose Jr",0,True,,21.0,73,False,False,False,True,male +"Chronopoulos, Mr. Apostolos",1,True,,26.0,74,False,True,False,True,male +"Bing, Mr. Lee",0,True,,32.0,75,True,False,False,True,male +"Moen, Mr. Sigurd Hansen",0,True,F G73,25.0,76,False,False,False,True,male +"Staneff, Mr. Ivan",0,True,,,77,False,False,False,True,male +"Moutal, Mr. Rahamin Haim",0,True,,,78,False,False,False,True,male +"Caldwell, Master. Alden Gates",0,True,,0.83,79,True,False,False,False,male +"Dowdell, Miss. Elizabeth",0,False,,30.0,80,True,False,True,False,female diff --git a/test_data/titanic-train.csv b/test_data/titanic-train.csv index 9ad5e1c..a957a41 100644 --- a/test_data/titanic-train.csv +++ b/test_data/titanic-train.csv @@ -1,61 +1,61 @@ -has_siblings,name,sex,cabin,sibsp,age,is_female,is_male,is_mr,survived,passenger_id -True,"Braund, Mr. Owen Harris",male,,1,22.0,False,True,True,False,1 -True,"Cumings, Mrs. John Bradley (Florence Briggs Thayer)",female,C85,1,38.0,True,False,True,True,2 -False,"Heikkinen, Miss. Laina",female,,0,26.0,True,False,False,True,3 -True,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,C123,1,35.0,True,False,True,True,4 -False,"Allen, Mr. William Henry",male,,0,35.0,False,True,True,False,5 -False,"Moran, Mr. James",male,,0,,False,True,True,False,6 -False,"McCarthy, Mr. Timothy J",other,E46,0,54.0,False,False,True,False,7 -True,"Palsson, Master. Gosta Leonard",male,,3,2.0,False,True,False,False,8 -False,"Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)",female,,0,27.0,True,False,True,True,9 -True,"Nasser, Mrs. Nicholas (Adele Achem)",female,,1,14.0,True,False,True,True,10 -True,"Sandstrom, Miss. Marguerite Rut",female,G6,1,4.0,True,False,False,True,11 -False,"Bonnell, Miss. Elizabeth",female,C103,0,58.0,True,False,False,True,12 -False,"Saundercock, Mr. William Henry",male,,0,20.0,False,True,True,False,13 -True,"Andersson, Mr. Anders Johan",male,,1,39.0,False,True,True,False,14 -False,"Vestrom, Miss. Hulda Amanda Adolfina",female,,0,14.0,True,False,False,False,15 -False,"Hewlett, Mrs. (Mary D Kingcome) ",female,,0,55.0,True,False,True,True,16 -True,"Rice, Master. Eugene",male,,4,2.0,False,True,False,False,17 -False,"Williams, Mr. Charles Eugene",male,,0,,False,True,True,True,18 -True,"Vander Planke, Mrs. Julius (Emelia Maria Vandemoortele)",female,,1,31.0,True,False,True,False,19 -False,"Masselmani, Mrs. Fatima",female,,0,,True,False,True,True,20 -False,"Fynney, Mr. Joseph J",male,,0,35.0,False,True,True,False,21 -False,"Beesley, Mr. Lawrence",male,D56,0,34.0,False,True,True,True,22 -False,"McGowan, Miss. Anna ""Annie""",female,,0,15.0,True,False,False,True,23 -False,"Sloper, Mr. William Thompson",male,A6,0,28.0,False,True,True,True,24 -True,"Palsson, Miss. Torborg Danira",female,,3,8.0,True,False,False,False,25 -True,"Asplund, Mrs. Carl Oscar (Selma Augusta Emilia Johansson)",female,,1,38.0,True,False,True,True,26 -False,"Emir, Mr. Farred Chehab",male,,0,,False,True,True,False,27 -True,"Fortune, Mr. Charles Alexander",male,C23 C25 C27,3,19.0,False,True,True,False,28 -False,"O'Dwyer, Miss. Ellen ""Nellie""",female,,0,,True,False,False,True,29 -False,"Todoroff, Mr. Lalio",male,,0,,False,True,True,False,30 -False,"Uruchurtu, Don. Manuel E",male,,0,40.0,False,True,False,False,31 -True,"Spencer, Mrs. William Augustus (Marie Eugenie)",female,B78,1,,True,False,True,True,32 -False,"Glynn, Miss. Mary Agatha",female,,0,,True,False,False,True,33 -False,"Wheadon, Mr. Edward H",male,,0,66.0,False,True,True,False,34 -True,"Meyer, Mr. Edgar Joseph",male,,1,28.0,False,True,True,False,35 -True,"Holverson, Mr. Alexander Oskar",male,,1,42.0,False,True,True,False,36 -False,"Mamee, Mr. Hanna",male,,0,,False,True,True,True,37 -False,"Cann, Mr. Ernest Charles",male,,0,21.0,False,True,True,False,38 -True,"Vander Planke, Miss. Augusta Maria",female,,2,18.0,True,False,False,False,39 -True,"Nicola-Yarred, Miss. Jamila",female,,1,14.0,True,False,False,True,40 -True,"Ahlin, Mrs. Johan (Johanna Persdotter Larsson)",female,,1,40.0,True,False,True,False,41 -True,"Turpin, Mrs. William John Robert (Dorothy Ann Wonnacott)",female,,1,27.0,True,False,True,False,42 -False,"Kraeff, Mr. Theodor",male,,0,,False,True,True,False,43 -True,"Laroche, Miss. Simonne Marie Anne Andree",female,,1,3.0,True,False,False,True,44 -False,"Devaney, Miss. Margaret Delia",female,,0,19.0,True,False,False,True,45 -False,"Rogers, Mr. William John",male,,0,,False,True,True,False,46 -True,"Lennon, Mr. Denis",male,,1,,False,True,True,False,47 -False,"O'Driscoll, Miss. Bridget",female,,0,,True,False,False,True,48 -True,"Samaan, Mr. Youssef",male,,2,,False,True,True,False,49 -True,"Arnold-Franchi, Mrs. Josef (Josefine Franchi)",female,,1,18.0,True,False,True,False,50 -True,"Panula, Master. Juha Niilo",male,,4,7.0,False,True,False,False,51 -False,"Nosworthy, Mr. Richard Cater",male,,0,21.0,False,True,True,False,52 -True,"Harper, Mrs. Henry Sleeper (Myna Haxtun)",female,D33,1,49.0,True,False,True,True,53 -True,"Faunthorpe, Mrs. Lizzie (Elizabeth Anne Wilkinson)",female,,1,29.0,True,False,True,True,54 -False,"Ostby, Mr. Engelhart Cornelius",male,B30,0,65.0,False,True,True,False,55 -False,"Woolner, Mr. Hugh",male,C52,0,,False,True,True,True,56 -False,"Rugg, Miss. Emily",female,,0,21.0,True,False,False,True,57 -False,"Novel, Mr. Mansouer",male,,0,28.5,False,True,True,False,58 -True,"West, Miss. Constance Mirium",female,,1,5.0,True,False,False,True,59 -True,"Goodwin, Master. William Frederick",male,,5,11.0,False,True,False,False,60 +name,sibsp,is_male,cabin,age,passenger_id,survived,has_siblings,is_female,is_mr,sex +"Braund, Mr. Owen Harris",1,True,,22.0,1,False,True,False,True,male +"Cumings, Mrs. John Bradley (Florence Briggs Thayer)",1,False,C85,38.0,2,True,True,True,True,female +"Heikkinen, Miss. Laina",0,False,,26.0,3,True,False,True,False,female +"Futrelle, Mrs. Jacques Heath (Lily May Peel)",1,False,C123,35.0,4,True,True,True,True,female +"Allen, Mr. William Henry",0,True,,35.0,5,False,False,False,True,male +"Moran, Mr. James",0,True,,,6,False,False,False,True,male +"McCarthy, Mr. Timothy J",0,False,E46,54.0,7,False,False,False,True,other +"Palsson, Master. Gosta Leonard",3,True,,2.0,8,False,True,False,False,male +"Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)",0,False,,27.0,9,True,False,True,True,female +"Nasser, Mrs. Nicholas (Adele Achem)",1,False,,14.0,10,True,True,True,True,female +"Sandstrom, Miss. Marguerite Rut",1,False,G6,4.0,11,True,True,True,False,female +"Bonnell, Miss. Elizabeth",0,False,C103,58.0,12,True,False,True,False,female +"Saundercock, Mr. William Henry",0,True,,20.0,13,False,False,False,True,male +"Andersson, Mr. Anders Johan",1,True,,39.0,14,False,True,False,True,male +"Vestrom, Miss. Hulda Amanda Adolfina",0,False,,14.0,15,False,False,True,False,female +"Hewlett, Mrs. (Mary D Kingcome) ",0,False,,55.0,16,True,False,True,True,female +"Rice, Master. Eugene",4,True,,2.0,17,False,True,False,False,male +"Williams, Mr. Charles Eugene",0,True,,,18,True,False,False,True,male +"Vander Planke, Mrs. Julius (Emelia Maria Vandemoortele)",1,False,,31.0,19,False,True,True,True,female +"Masselmani, Mrs. Fatima",0,False,,,20,True,False,True,True,female +"Fynney, Mr. Joseph J",0,True,,35.0,21,False,False,False,True,male +"Beesley, Mr. Lawrence",0,True,D56,34.0,22,True,False,False,True,male +"McGowan, Miss. Anna ""Annie""",0,False,,15.0,23,True,False,True,False,female +"Sloper, Mr. William Thompson",0,True,A6,28.0,24,True,False,False,True,male +"Palsson, Miss. Torborg Danira",3,False,,8.0,25,False,True,True,False,female +"Asplund, Mrs. Carl Oscar (Selma Augusta Emilia Johansson)",1,False,,38.0,26,True,True,True,True,female +"Emir, Mr. Farred Chehab",0,True,,,27,False,False,False,True,male +"Fortune, Mr. Charles Alexander",3,True,C23 C25 C27,19.0,28,False,True,False,True,male +"O'Dwyer, Miss. Ellen ""Nellie""",0,False,,,29,True,False,True,False,female +"Todoroff, Mr. Lalio",0,True,,,30,False,False,False,True,male +"Uruchurtu, Don. Manuel E",0,True,,40.0,31,False,False,False,False,male +"Spencer, Mrs. William Augustus (Marie Eugenie)",1,False,B78,,32,True,True,True,True,female +"Glynn, Miss. Mary Agatha",0,False,,,33,True,False,True,False,female +"Wheadon, Mr. Edward H",0,True,,66.0,34,False,False,False,True,male +"Meyer, Mr. Edgar Joseph",1,True,,28.0,35,False,True,False,True,male +"Holverson, Mr. Alexander Oskar",1,True,,42.0,36,False,True,False,True,male +"Mamee, Mr. Hanna",0,True,,,37,True,False,False,True,male +"Cann, Mr. Ernest Charles",0,True,,21.0,38,False,False,False,True,male +"Vander Planke, Miss. Augusta Maria",2,False,,18.0,39,False,True,True,False,female +"Nicola-Yarred, Miss. Jamila",1,False,,14.0,40,True,True,True,False,female +"Ahlin, Mrs. Johan (Johanna Persdotter Larsson)",1,False,,40.0,41,False,True,True,True,female +"Turpin, Mrs. William John Robert (Dorothy Ann Wonnacott)",1,False,,27.0,42,False,True,True,True,female +"Kraeff, Mr. Theodor",0,True,,,43,False,False,False,True,male +"Laroche, Miss. Simonne Marie Anne Andree",1,False,,3.0,44,True,True,True,False,female +"Devaney, Miss. Margaret Delia",0,False,,19.0,45,True,False,True,False,female +"Rogers, Mr. William John",0,True,,,46,False,False,False,True,male +"Lennon, Mr. Denis",1,True,,,47,False,True,False,True,male +"O'Driscoll, Miss. Bridget",0,False,,,48,True,False,True,False,female +"Samaan, Mr. Youssef",2,True,,,49,False,True,False,True,male +"Arnold-Franchi, Mrs. Josef (Josefine Franchi)",1,False,,18.0,50,False,True,True,True,female +"Panula, Master. Juha Niilo",4,True,,7.0,51,False,True,False,False,male +"Nosworthy, Mr. Richard Cater",0,True,,21.0,52,False,False,False,True,male +"Harper, Mrs. Henry Sleeper (Myna Haxtun)",1,False,D33,49.0,53,True,True,True,True,female +"Faunthorpe, Mrs. Lizzie (Elizabeth Anne Wilkinson)",1,False,,29.0,54,True,True,True,True,female +"Ostby, Mr. Engelhart Cornelius",0,True,B30,65.0,55,False,False,False,True,male +"Woolner, Mr. Hugh",0,True,C52,,56,True,False,False,True,male +"Rugg, Miss. Emily",0,False,,21.0,57,True,False,True,False,female +"Novel, Mr. Mansouer",0,True,,28.5,58,False,False,False,True,male +"West, Miss. Constance Mirium",1,False,,5.0,59,True,True,True,False,female +"Goodwin, Master. William Frederick",5,True,,11.0,60,False,True,False,False,male diff --git a/test_data/titanic-validate.csv b/test_data/titanic-validate.csv index 6df9c0d..dfdcb80 100644 --- a/test_data/titanic-validate.csv +++ b/test_data/titanic-validate.csv @@ -1,21 +1,21 @@ -has_siblings,name,sex,cabin,sibsp,age,is_female,is_male,is_mr,survived,passenger_id -False,"Waelens, Mr. Achille",male,,0,22.0,False,True,True,False,81 -False,"Sheerlinck, Mr. Jan Baptist",male,,0,29.0,False,True,True,True,82 -False,"McDermott, Miss. Brigdet Delia",female,,0,,True,False,False,True,83 -False,"Carrau, Mr. Francisco M",male,,0,28.0,False,True,True,False,84 -False,"Ilett, Miss. Bertha",female,,0,17.0,True,False,False,True,85 -True,"Backstrom, Mrs. Karl Alfred (Maria Mathilda Gustafsson)",female,,3,33.0,True,False,True,True,86 -True,"Ford, Mr. William Neal",male,,1,16.0,False,True,True,False,87 -False,"Slocovski, Mr. Selman Francis",male,,0,,False,True,True,False,88 -True,"Fortune, Miss. Mabel Helen",female,C23 C25 C27,3,23.0,True,False,False,True,89 -False,"Celotti, Mr. Francesco",male,,0,24.0,False,True,True,False,90 -False,"Christmann, Mr. Emil",male,,0,29.0,False,True,True,False,91 -False,"Andreasson, Mr. Paul Edvin",male,,0,20.0,False,True,True,False,92 -True,"Chaffee, Mr. Herbert Fuller",male,E31,1,46.0,False,True,True,False,93 -True,"Dean, Mr. Bertram Frank",male,,1,26.0,False,True,True,False,94 -False,"Coxon, Mr. Daniel",male,,0,59.0,False,True,True,False,95 -False,"Shorney, Mr. Charles Joseph",male,,0,,False,True,True,False,96 -False,"Goldschmidt, Mr. George B",male,A5,0,71.0,False,True,True,False,97 -False,"Greenfield, Mr. William Bertram",male,D10 D12,0,23.0,False,True,True,True,98 -False,"Doling, Mrs. John T (Ada Julia Bone)",female,,0,34.0,True,False,True,True,99 -True,"Kantor, Mr. Sinai",male,,1,34.0,False,True,True,False,100 +name,sibsp,is_male,cabin,age,passenger_id,survived,has_siblings,is_female,is_mr,sex +"Waelens, Mr. Achille",0,True,,22.0,81,False,False,False,True,male +"Sheerlinck, Mr. Jan Baptist",0,True,,29.0,82,True,False,False,True,male +"McDermott, Miss. Brigdet Delia",0,False,,,83,True,False,True,False,female +"Carrau, Mr. Francisco M",0,True,,28.0,84,False,False,False,True,male +"Ilett, Miss. Bertha",0,False,,17.0,85,True,False,True,False,female +"Backstrom, Mrs. Karl Alfred (Maria Mathilda Gustafsson)",3,False,,33.0,86,True,True,True,True,female +"Ford, Mr. William Neal",1,True,,16.0,87,False,True,False,True,male +"Slocovski, Mr. Selman Francis",0,True,,,88,False,False,False,True,male +"Fortune, Miss. Mabel Helen",3,False,C23 C25 C27,23.0,89,True,True,True,False,female +"Celotti, Mr. Francesco",0,True,,24.0,90,False,False,False,True,male +"Christmann, Mr. Emil",0,True,,29.0,91,False,False,False,True,male +"Andreasson, Mr. Paul Edvin",0,True,,20.0,92,False,False,False,True,male +"Chaffee, Mr. Herbert Fuller",1,True,E31,46.0,93,False,True,False,True,male +"Dean, Mr. Bertram Frank",1,True,,26.0,94,False,True,False,True,male +"Coxon, Mr. Daniel",0,True,,59.0,95,False,False,False,True,male +"Shorney, Mr. Charles Joseph",0,True,,,96,False,False,False,True,male +"Goldschmidt, Mr. George B",0,True,A5,71.0,97,False,False,False,True,male +"Greenfield, Mr. William Bertram",0,True,D10 D12,23.0,98,True,False,False,True,male +"Doling, Mrs. John T (Ada Julia Bone)",0,False,,34.0,99,True,False,True,True,female +"Kantor, Mr. Sinai",1,True,,34.0,100,False,True,False,True,male