diff --git a/aligned/compiler/feature_factory.py b/aligned/compiler/feature_factory.py index 710e295..9788997 100644 --- a/aligned/compiler/feature_factory.py +++ b/aligned/compiler/feature_factory.py @@ -1125,21 +1125,33 @@ def aggregate(self) -> CategoricalAggregation: class Timestamp(DateFeature, ArithmeticFeature): + + time_zone: str | None + + def __init__(self, time_zone: str | None = 'UTC') -> None: + self.time_zone = time_zone + @property def dtype(self) -> FeatureType: - return FeatureType.datetime() + from zoneinfo import ZoneInfo + + return FeatureType.datetime(ZoneInfo(self.time_zone) if self.time_zone else None) class EventTimestamp(DateFeature, ArithmeticFeature): ttl: timedelta | None + time_zone: str | None @property def dtype(self) -> FeatureType: - return FeatureType.datetime() + from zoneinfo import ZoneInfo + + return FeatureType.datetime(ZoneInfo(self.time_zone) if self.time_zone else None) - def __init__(self, ttl: timedelta | None = None): + def __init__(self, ttl: timedelta | None = None, time_zone: str | None = 'UTC') -> None: self.ttl = ttl + self.time_zone = time_zone def event_timestamp(self) -> EventTimestampFeature: return EventTimestampFeature( diff --git a/aligned/data_source/batch_data_source.py b/aligned/data_source/batch_data_source.py index b303a02..5574c3f 100644 --- a/aligned/data_source/batch_data_source.py +++ b/aligned/data_source/batch_data_source.py @@ -9,7 +9,7 @@ from aligned.schemas.codable import Codable from aligned.schemas.derivied_feature import DerivedFeature -from aligned.schemas.feature import EventTimestamp, Feature, FeatureLocation +from aligned.schemas.feature import EventTimestamp, Feature, FeatureLocation, FeatureType from aligned.request.retrival_request import RequestResult, RetrivalRequest from aligned.compiler.feature_factory import FeatureFactory from polars.type_aliases import TimeUnit @@ -203,7 +203,7 @@ def multi_source_features_for( def features_for(self, facts: RetrivalJob, request: RetrivalRequest) -> RetrivalJob: return type(self).multi_source_features_for(facts, [(self, request)]) - async def schema(self) -> dict[str, FeatureFactory]: + async def schema(self) -> dict[str, FeatureType]: """Returns the schema for the data source ```python @@ -259,7 +259,8 @@ class MyView(FeatureView): from aligned.feature_view.feature_view import FeatureView schema = await self.schema() - return FeatureView.feature_view_code_template(schema, f'{self}', view_name) + feature_types = {name: feature_type.feature_factory for name, feature_type in schema.items()} + return FeatureView.feature_view_code_template(feature_types, f'{self}', view_name) async def freshness(self, event_timestamp: EventTimestamp) -> datetime | None: """ @@ -378,6 +379,9 @@ class FilteredDataSource(BatchDataSource): def job_group_key(self) -> str: return f'subset/{self.source.job_group_key()}' + async def schema(self) -> dict[str, FeatureType]: + return await self.source.schema() + @classmethod def multi_source_features_for( cls: type[FilteredDataSource], @@ -599,6 +603,12 @@ class JoinAsofDataSource(BatchDataSource): type_name: str = 'join_asof' + async def schema(self) -> dict[str, FeatureType]: + left_schema = await self.source.schema() + right_schema = await self.right_source.schema() + + return {**left_schema, **right_schema} + def job_group_key(self) -> str: return f'join/{self.source.job_group_key()}' @@ -720,6 +730,12 @@ class JoinDataSource(BatchDataSource): type_name: str = 'join' + async def schema(self) -> dict[str, FeatureType]: + left_schema = await self.source.schema() + right_schema = await self.right_source.schema() + + return {**left_schema, **right_schema} + def job_group_key(self) -> str: return f'join/{self.source.job_group_key()}' diff --git a/aligned/feature_view/feature_view.py b/aligned/feature_view/feature_view.py index 8733d2c..31661bd 100644 --- a/aligned/feature_view/feature_view.py +++ b/aligned/feature_view/feature_view.py @@ -728,7 +728,7 @@ def my_function(data: Annotated[pd.DataFrame, MyView]): """ def decorator(func: Callable) -> Callable: - def func_wrapper(*args, **kwargs) -> Any: + def func_wrapper(*args, **kwargs) -> Any: # type: ignore from typing import _AnnotatedAlias # type: ignore params_to_check = { diff --git a/aligned/retrival_job.py b/aligned/retrival_job.py index 43a4da6..0551626 100644 --- a/aligned/retrival_job.py +++ b/aligned/retrival_job.py @@ -299,7 +299,7 @@ async def to_polars(self) -> SupervisedDataSet[pl.DataFrame]: async def to_lazy_polars(self) -> SupervisedDataSet[pl.LazyFrame]: data = await self.job.to_lazy_polars() if self.should_filter_out_null_targets: - data = data.drop_nulls([column for column in self.target_columns]) + data = data.drop_nulls(list(self.target_columns)) features = [ feature.name @@ -1782,9 +1782,7 @@ async def to_pandas(self) -> pd.DataFrame: df[feature.name] = df[feature.name].apply( lambda x: json.loads(x) if isinstance(x, str) else x ) - elif feature.dtype == FeatureType.json(): - pass - elif feature.dtype == FeatureType.datetime(): + elif (feature.dtype == FeatureType.json()) or feature.dtype.is_datetime: pass else: if feature.dtype.is_numeric: @@ -1829,9 +1827,7 @@ async def to_lazy_polars(self) -> pl.LazyFrame: dtype = df.select(feature.name).dtypes[0] if dtype == pl.Utf8: df = df.with_columns(pl.col(feature.name).str.json_extract(pl.List(pl.Utf8))) - elif feature.dtype == FeatureType.json(): - pass - elif feature.dtype == FeatureType.datetime(): + elif (feature.dtype == FeatureType.json()) or feature.dtype.is_datetime: pass else: df = df.with_columns(pl.col(feature.name).cast(feature.dtype.polars_type, strict=False)) diff --git a/aligned/schemas/feature.py b/aligned/schemas/feature.py index 3a912b8..d84150b 100644 --- a/aligned/schemas/feature.py +++ b/aligned/schemas/feature.py @@ -2,6 +2,7 @@ from dataclasses import dataclass from typing import Literal +from zoneinfo import ZoneInfo import polars as pl @@ -9,24 +10,25 @@ from aligned.schemas.codable import Codable from aligned.schemas.constraints import Constraint -NAME_POLARS_MAPPING = { - 'string': pl.Utf8, - 'int8': pl.Int8, - 'int16': pl.Int16, - 'int32': pl.Int32, - 'int64': pl.Int64, - 'float': pl.Float64, - 'double': pl.Float64, - 'bool': pl.Boolean, - 'date': pl.Date, - 'datetime': pl.Datetime, - 'time': pl.Time, - 'timedelta': pl.Duration, - 'uuid': pl.Utf8, - 'array': pl.List(pl.Utf8), - 'embedding': pl.List, - 'json': pl.Utf8, -} +NAME_POLARS_MAPPING = [ + ('string', pl.Utf8), + ('int8', pl.Int8), + ('int16', pl.Int16), + ('int32', pl.Int32), + ('int64', pl.Int64), + ('float', pl.Float64), + ('float', pl.Float32), + ('double', pl.Float64), + ('bool', pl.Boolean), + ('date', pl.Date), + ('datetime', pl.Datetime), + ('time', pl.Time), + ('timedelta', pl.Duration), + ('uuid', pl.Utf8), + ('array', pl.List(pl.Utf8)), + ('embedding', pl.List), + ('json', pl.Utf8), +] @dataclass @@ -47,6 +49,10 @@ def is_numeric(self) -> bool: 'double', } # Can be represented as an int + @property + def is_datetime(self) -> bool: + return self.name.startswith('datetime') + @property def python_type(self) -> type: from datetime import date, datetime, time, timedelta @@ -98,10 +104,27 @@ def pandas_type(self) -> str | type: @property def polars_type(self) -> type: - return NAME_POLARS_MAPPING[self.name] + if self.name.startswith('datetime-'): + time_zone = self.name.split('-')[1] + return pl.Datetime(time_zone=time_zone) # type: ignore + + for name, dtype in NAME_POLARS_MAPPING: + if name == self.name: + return dtype + + raise ValueError(f'Unable to find a value that can represent {self.name}') @property def feature_factory(self) -> ff.FeatureFactory: + + if self.name.startswith('datetime-'): + time_zone = self.name.split('-')[1] + return ff.Timestamp(time_zone=time_zone) + + if self.name.startswith('array-'): + sub_type = '-'.join(self.name.split('-')[1:]) + return ff.List(FeatureType(name=sub_type).feature_factory) + return { 'string': ff.String(), 'int8': ff.Int8(), @@ -135,9 +158,25 @@ def __pre_serialize__(self) -> FeatureType: @staticmethod def from_polars(polars_type: pl.DataType) -> FeatureType: - for name, dtype in NAME_POLARS_MAPPING.items(): + if isinstance(polars_type, pl.Datetime): + if polars_type.time_zone: + return FeatureType(name=f'datetime-{polars_type.time_zone}') + return FeatureType(name='datetime') + + if isinstance(polars_type, pl.List): + if polars_type.inner: + sub_type = FeatureType.from_polars(polars_type.inner) # type: ignore + return FeatureType(name=f'array-{sub_type.name}') + + return FeatureType(name='array') + + if isinstance(polars_type, pl.Struct): + return FeatureType(name='json') + + for name, dtype in NAME_POLARS_MAPPING: if polars_type.is_(dtype): return FeatureType(name=name) + raise ValueError(f'Unable to find a value that can represent {polars_type}') @staticmethod @@ -181,8 +220,10 @@ def uuid() -> FeatureType: return FeatureType(name='uuid') @staticmethod - def datetime() -> FeatureType: - return FeatureType(name='datetime') + def datetime(tz: ZoneInfo | None = ZoneInfo('UTC')) -> FeatureType: + if not tz: + return FeatureType(name='datetime') + return FeatureType(name=f'datetime-{tz.key}') @staticmethod def json() -> FeatureType: diff --git a/aligned/schemas/feature_view.py b/aligned/schemas/feature_view.py index 5b94d6d..ad374d8 100644 --- a/aligned/schemas/feature_view.py +++ b/aligned/schemas/feature_view.py @@ -12,7 +12,7 @@ from aligned.schemas.codable import Codable from aligned.schemas.derivied_feature import AggregatedFeature, DerivedFeature from aligned.schemas.event_trigger import EventTrigger -from aligned.schemas.feature import EventTimestamp, Feature, FeatureLocation +from aligned.schemas.feature import EventTimestamp, Feature, FeatureLocation, FeatureType from aligned.schemas.vector_storage import VectorIndex if TYPE_CHECKING: @@ -329,6 +329,12 @@ class FeatureViewReferenceSource(BatchDataSource): def job_group_key(self) -> str: return self.view.name + async def schema(self) -> dict[str, FeatureType]: + if self.view.materialized_source: + return await self.view.materialized_source.schema() + + return await self.view.source.schema() + def sub_request(self, request: RetrivalRequest) -> RetrivalRequest: sub_references: set[str] = request.entity_names.union(request.feature_names) diff --git a/aligned/schemas/model.py b/aligned/schemas/model.py index 0394ba9..43f8c2f 100644 --- a/aligned/schemas/model.py +++ b/aligned/schemas/model.py @@ -4,7 +4,7 @@ from aligned.request.retrival_request import FeatureRequest, RetrivalRequest from aligned.schemas.codable import Codable -from aligned.schemas.feature import FeatureLocation +from aligned.schemas.feature import FeatureLocation, FeatureType from aligned.schemas.feature import EventTimestamp, Feature, FeatureReferance from aligned.schemas.event_trigger import EventTrigger from aligned.schemas.target import ClassificationTarget, RecommendationTarget, RegressionTarget @@ -176,6 +176,11 @@ class ModelSource(BatchDataSource): type_name: str = 'model_source' + async def schema(self) -> dict[str, FeatureType]: + if self.model.predictions_view.source: + return await self.model.predictions_view.source.schema() + return {} + def source(self) -> FeatureViewReferenceSource: return FeatureViewReferenceSource(self.pred_view, FeatureLocation.model(self.pred_view.name)) diff --git a/aligned/sources/local.py b/aligned/sources/local.py index e79d210..17e2816 100644 --- a/aligned/sources/local.py +++ b/aligned/sources/local.py @@ -25,7 +25,6 @@ from aligned.schemas.date_formatter import DateFormatter if TYPE_CHECKING: - from aligned.compiler.feature_factory import FeatureFactory from datetime import datetime from aligned.schemas.repo_definition import RepoDefinition from aligned.feature_store import FeatureStore @@ -266,14 +265,15 @@ def multi_source_features_for( date_formatter=source.formatter, ) - async def schema(self) -> dict[str, FeatureFactory]: + async def schema(self) -> dict[str, FeatureType]: df = await self.to_lazy_polars() - return {name: FeatureType.from_polars(pl_type).feature_factory for name, pl_type in df.schema.items()} + return {name: FeatureType.from_polars(pl_type) for name, pl_type in df.schema.items()} async def feature_view_code(self, view_name: str) -> str: from aligned.feature_view.feature_view import FeatureView - schema = await self.schema() + raw_schema = await self.schema() + schema = {name: feat.feature_factory for name, feat in raw_schema.items()} data_source_code = f'FileSource.csv_at("{self.path}", csv_config={self.csv_config})' return FeatureView.feature_view_code_template( schema, @@ -370,20 +370,19 @@ def multi_source_features_for( facts=facts, ) - async def schema(self) -> dict[str, FeatureFactory]: + async def schema(self) -> dict[str, FeatureType]: if self.path.startswith('http'): parquet_schema = pl.scan_parquet(self.path).schema else: parquet_schema = pl.read_parquet_schema(self.path) - return { - name: FeatureType.from_polars(pl_type).feature_factory for name, pl_type in parquet_schema.items() - } + return {name: FeatureType.from_polars(pl_type) for name, pl_type in parquet_schema.items()} async def feature_view_code(self, view_name: str) -> str: from aligned.feature_view.feature_view import FeatureView - schema = await self.schema() + raw_schema = await self.schema() + schema = {name: feat.feature_factory for name, feat in raw_schema.items()} data_source_code = f'FileSource.parquet_at("{self.path}")' return FeatureView.feature_view_code_template( schema, data_source_code, view_name, 'from aligned import FileSource' @@ -436,16 +435,15 @@ async def write_polars(self, df: pl.LazyFrame) -> None: self.path, mode=self.config.mode, overwrite_schema=self.config.overwrite_schema ) - async def schema(self) -> dict[str, FeatureFactory]: + async def schema(self) -> dict[str, FeatureType]: parquet_schema = pl.read_delta(self.path).schema - return { - name: FeatureType.from_polars(pl_type).feature_factory for name, pl_type in parquet_schema.items() - } + return {name: FeatureType.from_polars(pl_type) for name, pl_type in parquet_schema.items()} async def feature_view_code(self, view_name: str) -> str: from aligned.feature_view.feature_view import FeatureView - schema = await self.schema() + raw_schema = await self.schema() + schema = {name: feat.feature_factory for name, feat in raw_schema.items()} data_source_code = f'FileSource.parquet_at("{self.path}")' return FeatureView.feature_view_code_template( schema, data_source_code, view_name, 'from aligned import FileSource' diff --git a/aligned/sources/psql.py b/aligned/sources/psql.py index df6b1a6..1b1defc 100644 --- a/aligned/sources/psql.py +++ b/aligned/sources/psql.py @@ -10,8 +10,9 @@ from aligned.schemas.codable import Codable from datetime import datetime +from aligned.schemas.feature import FeatureType + if TYPE_CHECKING: - from aligned.compiler.feature_factory import FeatureFactory from aligned.enricher import Enricher from aligned.schemas.feature import EventTimestamp @@ -110,11 +111,9 @@ def multi_source_features_for( facts=facts, ) - async def schema(self) -> dict[str, FeatureFactory]: + async def schema(self) -> dict[str, FeatureType]: import polars as pl - import aligned.compiler.feature_factory as ff - config = self.config schema = config.schema or 'public' table = self.table @@ -136,18 +135,18 @@ async def schema(self) -> dict[str, FeatureFactory]: AND table_name = '{table}'""" df = pl.read_database(sql_query, connection=self.config.url, engine='adbc') psql_types = { - 'uuid': ff.UUID(), - 'timestamp with time zone': ff.Timestamp(), - 'timestamp without time zone': ff.Timestamp(), - 'character varying': ff.String(), - 'text': ff.String(), - 'integer': ff.Int64(), - 'float': ff.Float(), - 'date': ff.Timestamp(), - 'boolean': ff.Bool(), - 'jsonb': ff.Json(), - 'smallint': ff.Int32(), - 'numeric': ff.Float(), + 'uuid': FeatureType.uuid(), + 'timestamp with time zone': FeatureType.datetime(), + 'timestamp without time zone': FeatureType.datetime(None), + 'character varying': FeatureType.string(), + 'text': FeatureType.string(), + 'integer': FeatureType.int32(), + 'float': FeatureType.float(), + 'date': FeatureType.date(), + 'boolean': FeatureType.bool(), + 'jsonb': FeatureType.json(), + 'smallint': FeatureType.int16(), + 'numeric': FeatureType.float(), } values = df.select(['column_name', 'data_type']).to_dicts() return {value['column_name']: psql_types[value['data_type']] for value in values} diff --git a/aligned/tests/test_source_validation.py b/aligned/tests/test_source_validation.py index fab7f96..c44631d 100644 --- a/aligned/tests/test_source_validation.py +++ b/aligned/tests/test_source_validation.py @@ -18,30 +18,11 @@ async def test_source_validation(titanic_feature_store: FeatureStore) -> None: assert {FeatureLocation.feature_view('titanic_parquet'): True} == validation -# @pytest.mark.asyncio -# async def test_source_validation_psql(titanic_feature_view: FeatureView) -> None: -# -# if 'PSQL_DATABASE_TEST' not in environ: -# environ['PSQL_DATABASE_TEST'] = 'postgresql://postgres:postgres@localhost:5433/aligned-test' -# -# psql_config = PostgreSQLConfig('PSQL_DATABASE_TEST') -# titanic_feature_view.metadata.source = psql_config.table('titanic') -# -# store = FeatureStore.experimental() -# store.add_feature_view(titanic_feature_view) -# views = store.views_with_config(psql_config) -# -# assert len(views) == 1 -# validation = await validate_sources_in(views) -# -# assert {FeatureLocation.feature_view('titanic'): False} == validation - - @pytest.mark.asyncio async def test_schema_loading() -> None: source = FileSource.parquet_at('test_data/titanic.parquet') schema = await source.schema() - dtype_schema = {key: feature.dtype for key, feature in schema.items()} + dtype_schema = {key: feature for key, feature in schema.items()} assert dtype_schema == { 'passenger_id': FeatureType(name='int64'), 'survived': FeatureType(name='int64'), diff --git a/aligned/validation/pandera.py b/aligned/validation/pandera.py index a66f30f..87d54c1 100644 --- a/aligned/validation/pandera.py +++ b/aligned/validation/pandera.py @@ -41,7 +41,7 @@ def _column_for(self, feature: Feature) -> Column: if feature.constraints is None: return Column( feature.dtype.pandas_type if feature.dtype in self.datatype_check else None, - nullable=True, + nullable=False, coerce=True, ) diff --git a/pyproject.toml b/pyproject.toml index 536fa0a..479cf63 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "aligned" -version = "0.0.79" +version = "0.0.80" description = "A data managment and lineage tool for ML applications." authors = ["Mats E. Mollestad "] license = "Apache-2.0" diff --git a/test_data/credit_history.csv b/test_data/credit_history.csv index 81ff56a..8f41bd9 100644 --- a/test_data/credit_history.csv +++ b/test_data/credit_history.csv @@ -1,7 +1,7 @@ -bankruptcies,event_timestamp,student_loan_due,credit_card_due,dob_ssn,due_sum -0,1587924064746575,22328,8419,19530219_5179,30747 -0,1587924064746575,2515,2944,19520816_8737,5459 -0,1587924064746575,33000,833,19860413_2537,33833 -0,1588010464746575,48955,5936,19530219_5179,54891 -0,1588010464746575,9501,1575,19520816_8737,11076 -0,1588010464746575,35510,6263,19860413_2537,41773 +student_loan_due,bankruptcies,credit_card_due,event_timestamp,dob_ssn,due_sum +22328,0,8419,2020-04-26 18:01:04.746575+00:00,19530219_5179,30747 +2515,0,2944,2020-04-26 18:01:04.746575+00:00,19520816_8737,5459 +33000,0,833,2020-04-26 18:01:04.746575+00:00,19860413_2537,33833 +48955,0,5936,2020-04-27 18:01:04.746575+00:00,19530219_5179,54891 +9501,0,1575,2020-04-27 18:01:04.746575+00:00,19520816_8737,11076 +35510,0,6263,2020-04-27 18:01:04.746575+00:00,19860413_2537,41773 diff --git a/test_data/credit_history_mater.parquet b/test_data/credit_history_mater.parquet index 5bd897b..02e451b 100644 Binary files a/test_data/credit_history_mater.parquet and b/test_data/credit_history_mater.parquet differ diff --git a/test_data/feature-store.json b/test_data/feature-store.json index d4d62f2..2c4916d 100644 --- a/test_data/feature-store.json +++ b/test_data/feature-store.json @@ -1 +1 @@ -{"metadata": {"created_at": "2024-03-12T18:22:07.588714", "name": "feature_store_location.py", "repo_url": null, "github_url": null}, "feature_views": [{"name": "titanic", "source": {"mapping_keys": {"PassengerId": "passenger_id", "Age": "age", "Sex": "sex", "Survived": "survived", "SibSp": "sibsp", "UpdatedAt": "updated_at"}, "type_name": "csv", "path": "test_data/titanic_scd_data.csv", "csv_config": {"seperator": ",", "compression": "infer", "should_write_index": false}, "formatter": {"date_format": "yyyy-MM-ddTHH:mm:ssZ", "time_unit": null, "time_zone": null, "name": "string_form"}}, "entities": [{"name": "passenger_id", "dtype": {"name": "int32"}, "description": null, "tags": null, "constraints": null}], "features": [{"name": "survived", "dtype": {"name": "bool"}, "description": "If the passenger survived", "tags": null, "constraints": null}, {"name": "name", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": null}, {"name": "age", "dtype": {"name": "float"}, "description": "A float as some have decimals", "tags": null, "constraints": [{"name": "upper_bound_inc", "value": 100.0}, {"name": "lower_bound_inc", "value": 0.0}]}, {"name": "updated_at", "dtype": {"name": "datetime"}, "description": null, "tags": null, "constraints": null}, {"name": "sex", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": [{"name": "in_domain", "values": ["male", "female"]}]}, {"name": "cabin", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": null}, {"name": "sibsp", "dtype": {"name": "int32"}, "description": "Number of siblings on titanic", "tags": null, "constraints": [{"name": "upper_bound_inc", "value": 20.0}, {"name": "lower_bound_inc", "value": 0.0}]}], "derived_features": [{"name": "double_sibsp", "dtype": {"name": "float"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sibsp", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "int32"}}], "transformation": {"name": "mul_val", "dtype": {"name": "float"}, "key": "sibsp", "value": {"name": "int", "value": 2}}, "depth": 1}, {"name": "square_sibsp", "dtype": {"name": "float"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sibsp", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "int32"}}], "transformation": {"name": "mul", "dtype": {"name": "float"}, "front": "sibsp", "behind": "sibsp"}, "depth": 1}, {"name": "is_female", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sex", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "equals", "dtype": {"name": "bool"}, "key": "sex", "value": {"name": "string", "value": "female"}}, "depth": 1}, {"name": "is_male", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sex", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "equals", "dtype": {"name": "bool"}, "key": "sex", "value": {"name": "string", "value": "male"}}, "depth": 1}, {"name": "name_embedding", "dtype": {"name": "embedding"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "name", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "word_vectoriser", "dtype": {"name": "embedding"}, "key": "name", "model": {"name": "gensim", "model_name": "glove-wiki-gigaword-50", "config": {"to_lowercase": false, "deaccent": false, "encoding": "utf8", "errors": "strict"}, "loaded_model": null}}, "depth": 1}, {"name": "has_siblings", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sibsp", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "int32"}}], "transformation": {"name": "not-equals", "dtype": {"name": "bool"}, "key": "sibsp", "value": {"name": "int", "value": 0}}, "depth": 1}, {"name": "is_mr", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "name", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "contains", "dtype": {"name": "bool"}, "key": "name", "value": "Mr."}, "depth": 1}], "tags": null, "description": "Some features from the titanic dataset", "aggregated_features": [], "event_timestamp": {"name": "updated_at", "ttl": null, "description": null, "tags": null, "dtype": {"name": "datetime"}}, "stream_data_source": {"mapping_keys": {}, "name": "redis", "topic_name": "titanic_stream", "config": {"env_var": "REDIS_URL"}, "record_coder": {"coder_type": "json", "key": "json"}}, "application_source": null, "materialized_source": null, "acceptable_freshness": null, "unacceptable_freshness": null, "event_triggers": null, "contacts": null, "indexes": [{"location": {"name": "titanic", "location": "feature_view"}, "vector": {"name": "name_embedding", "dtype": {"name": "embedding"}, "description": null, "tags": null, "constraints": null}, "vector_dim": 50, "metadata": [{"name": "age", "dtype": {"name": "float"}, "description": "A float as some have decimals", "tags": null, "constraints": [{"name": "upper_bound_inc", "value": 100.0}, {"name": "lower_bound_inc", "value": 0.0}]}, {"name": "sex", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": [{"name": "in_domain", "values": ["male", "female"]}]}], "storage": {"type_name": "redis", "config": {"env_var": "REDIS_URL"}, "name": "name_embedding_index", "initial_cap": 10000, "distance_metric": "COSINE", "index_alogrithm": "FLAT", "embedding_type": "FLOAT32"}, "entities": [{"name": "passenger_id", "dtype": {"name": "int32"}, "description": null, "tags": null, "constraints": null}]}]}, {"name": "titanic_parquet", "source": {"mapping_keys": {}, "type_name": "parquet", "path": "test_data/titanic.parquet", "config": {"engine": "auto", "compression": "snappy", "should_write_index": false}}, "entities": [{"name": "passenger_id", "dtype": {"name": "int32"}, "description": null, "tags": null, "constraints": null}], "features": [{"name": "name", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": null}, {"name": "survived", "dtype": {"name": "bool"}, "description": "If the passenger survived", "tags": null, "constraints": null}, {"name": "age", "dtype": {"name": "float"}, "description": "A float as some have decimals", "tags": null, "constraints": [{"name": "upper_bound_inc", "value": 100.0}, {"name": "lower_bound_inc", "value": 0.0}]}, {"name": "sex", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": [{"name": "in_domain", "values": ["male", "female"]}]}, {"name": "cabin", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": null}, {"name": "sibsp", "dtype": {"name": "int32"}, "description": "Number of siblings on titanic", "tags": null, "constraints": [{"name": "upper_bound_inc", "value": 20.0}, {"name": "lower_bound_inc", "value": 0.0}]}], "derived_features": [{"name": "is_male", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sex", "location": {"name": "titanic_parquet", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "equals", "dtype": {"name": "bool"}, "key": "sex", "value": {"name": "string", "value": "male"}}, "depth": 1}, {"name": "is_mr", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "name", "location": {"name": "titanic_parquet", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "contains", "dtype": {"name": "bool"}, "key": "name", "value": "Mr."}, "depth": 1}, {"name": "has_siblings", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sibsp", "location": {"name": "titanic_parquet", "location": "feature_view"}, "dtype": {"name": "int32"}}], "transformation": {"name": "not-equals", "dtype": {"name": "bool"}, "key": "sibsp", "value": {"name": "int", "value": 0}}, "depth": 1}, {"name": "is_female", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sex", "location": {"name": "titanic_parquet", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "equals", "dtype": {"name": "bool"}, "key": "sex", "value": {"name": "string", "value": "female"}}, "depth": 1}], "tags": null, "description": "Some features from the titanic dataset", "aggregated_features": [], "event_timestamp": null, "stream_data_source": null, "application_source": null, "materialized_source": null, "acceptable_freshness": null, "unacceptable_freshness": null, "event_triggers": null, "contacts": null, "indexes": []}], "combined_feature_views": [], "models": [{"name": "titanic", "features": {"default_version": "default", "versions": {"default": [{"name": "age", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "float"}}, {"name": "sibsp", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "int32"}}, {"name": "has_siblings", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "bool"}}, {"name": "is_male", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "bool"}}]}}, "predictions_view": {"entities": [], "features": [{"name": "probability", "dtype": {"name": "float"}, "description": "The probability of target named will_survive being 'True'.", "tags": null, "constraints": null}], "derived_features": [{"name": "will_survive", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "probability", "location": {"name": "titanic", "location": "model"}, "dtype": {"name": "float"}}], "transformation": {"name": "map_arg_max", "dtype": {"name": "bool"}, "column_mappings": {"probability": {"name": "bool", "value": true}}}, "depth": 1}], "model_version_column": null, "event_timestamp": null, "source": null, "application_source": null, "stream_source": null, "regression_targets": [], "classification_targets": [{"estimating": {"name": "survived", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "bool"}}, "feature": {"name": "will_survive", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null}, "on_ground_truth_event": null, "event_trigger": null, "class_probabilities": [{"outcome": {"name": "bool", "value": true}, "feature": {"name": "probability", "dtype": {"name": "float"}, "description": null, "tags": null, "constraints": null}}], "confidence": null}], "recommendation_targets": [], "acceptable_freshness": 86400.0, "unacceptable_freshness": 172800.0}, "description": "A model predicting if a passenger will survive", "contacts": null, "tags": null, "dataset_store": null, "exposed_at_url": null}], "enrichers": []} +{"metadata": {"created_at": "2024-03-13T20:02:39.692563", "name": "feature_store_location.py", "repo_url": null, "github_url": null}, "feature_views": [{"name": "titanic_parquet", "source": {"mapping_keys": {}, "type_name": "parquet", "path": "test_data/titanic.parquet", "config": {"engine": "auto", "compression": "snappy", "should_write_index": false}}, "entities": [{"name": "passenger_id", "dtype": {"name": "int32"}, "description": null, "tags": null, "constraints": null}], "features": [{"name": "age", "dtype": {"name": "float"}, "description": "A float as some have decimals", "tags": null, "constraints": [{"name": "lower_bound_inc", "value": 0.0}, {"name": "upper_bound_inc", "value": 100.0}]}, {"name": "sibsp", "dtype": {"name": "int32"}, "description": "Number of siblings on titanic", "tags": null, "constraints": [{"name": "lower_bound_inc", "value": 0.0}, {"name": "upper_bound_inc", "value": 20.0}]}, {"name": "survived", "dtype": {"name": "bool"}, "description": "If the passenger survived", "tags": null, "constraints": null}, {"name": "sex", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": [{"name": "in_domain", "values": ["male", "female"]}]}, {"name": "name", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": null}, {"name": "cabin", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": null}], "derived_features": [{"name": "has_siblings", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sibsp", "location": {"name": "titanic_parquet", "location": "feature_view"}, "dtype": {"name": "int32"}}], "transformation": {"name": "not-equals", "dtype": {"name": "bool"}, "key": "sibsp", "value": {"name": "int", "value": 0}}, "depth": 1}, {"name": "is_mr", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "name", "location": {"name": "titanic_parquet", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "contains", "dtype": {"name": "bool"}, "key": "name", "value": "Mr."}, "depth": 1}, {"name": "is_female", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sex", "location": {"name": "titanic_parquet", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "equals", "dtype": {"name": "bool"}, "key": "sex", "value": {"name": "string", "value": "female"}}, "depth": 1}, {"name": "is_male", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sex", "location": {"name": "titanic_parquet", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "equals", "dtype": {"name": "bool"}, "key": "sex", "value": {"name": "string", "value": "male"}}, "depth": 1}], "tags": null, "description": "Some features from the titanic dataset", "aggregated_features": [], "event_timestamp": null, "stream_data_source": null, "application_source": null, "materialized_source": null, "acceptable_freshness": null, "unacceptable_freshness": null, "event_triggers": null, "contacts": null, "indexes": []}, {"name": "titanic", "source": {"mapping_keys": {"PassengerId": "passenger_id", "Age": "age", "Sex": "sex", "Survived": "survived", "SibSp": "sibsp", "UpdatedAt": "updated_at"}, "type_name": "csv", "path": "test_data/titanic_scd_data.csv", "csv_config": {"seperator": ",", "compression": "infer", "should_write_index": false}, "formatter": {"date_format": "yyyy-MM-ddTHH:mm:ssZ", "time_unit": null, "time_zone": null, "name": "string_form"}}, "entities": [{"name": "passenger_id", "dtype": {"name": "int32"}, "description": null, "tags": null, "constraints": null}], "features": [{"name": "age", "dtype": {"name": "float"}, "description": "A float as some have decimals", "tags": null, "constraints": [{"name": "lower_bound_inc", "value": 0.0}, {"name": "upper_bound_inc", "value": 100.0}]}, {"name": "survived", "dtype": {"name": "bool"}, "description": "If the passenger survived", "tags": null, "constraints": null}, {"name": "sibsp", "dtype": {"name": "int32"}, "description": "Number of siblings on titanic", "tags": null, "constraints": [{"name": "lower_bound_inc", "value": 0.0}, {"name": "upper_bound_inc", "value": 20.0}]}, {"name": "sex", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": [{"name": "in_domain", "values": ["male", "female"]}]}, {"name": "name", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": null}, {"name": "updated_at", "dtype": {"name": "datetime-UTC"}, "description": null, "tags": null, "constraints": null}, {"name": "cabin", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": null}], "derived_features": [{"name": "name_embedding", "dtype": {"name": "embedding"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "name", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "word_vectoriser", "dtype": {"name": "embedding"}, "key": "name", "model": {"name": "gensim", "model_name": "glove-wiki-gigaword-50", "config": {"to_lowercase": false, "deaccent": false, "encoding": "utf8", "errors": "strict"}, "loaded_model": null}}, "depth": 1}, {"name": "is_mr", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "name", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "contains", "dtype": {"name": "bool"}, "key": "name", "value": "Mr."}, "depth": 1}, {"name": "is_male", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sex", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "equals", "dtype": {"name": "bool"}, "key": "sex", "value": {"name": "string", "value": "male"}}, "depth": 1}, {"name": "double_sibsp", "dtype": {"name": "float"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sibsp", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "int32"}}], "transformation": {"name": "mul_val", "dtype": {"name": "float"}, "key": "sibsp", "value": {"name": "int", "value": 2}}, "depth": 1}, {"name": "has_siblings", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sibsp", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "int32"}}], "transformation": {"name": "not-equals", "dtype": {"name": "bool"}, "key": "sibsp", "value": {"name": "int", "value": 0}}, "depth": 1}, {"name": "is_female", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sex", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "equals", "dtype": {"name": "bool"}, "key": "sex", "value": {"name": "string", "value": "female"}}, "depth": 1}, {"name": "square_sibsp", "dtype": {"name": "float"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sibsp", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "int32"}}], "transformation": {"name": "mul", "dtype": {"name": "float"}, "front": "sibsp", "behind": "sibsp"}, "depth": 1}], "tags": null, "description": "Some features from the titanic dataset", "aggregated_features": [], "event_timestamp": {"name": "updated_at", "ttl": null, "description": null, "tags": null, "dtype": {"name": "datetime-UTC"}}, "stream_data_source": {"mapping_keys": {}, "name": "redis", "topic_name": "titanic_stream", "config": {"env_var": "REDIS_URL"}, "record_coder": {"coder_type": "json", "key": "json"}}, "application_source": null, "materialized_source": null, "acceptable_freshness": null, "unacceptable_freshness": null, "event_triggers": null, "contacts": null, "indexes": [{"location": {"name": "titanic", "location": "feature_view"}, "vector": {"name": "name_embedding", "dtype": {"name": "embedding"}, "description": null, "tags": null, "constraints": null}, "vector_dim": 50, "metadata": [{"name": "age", "dtype": {"name": "float"}, "description": "A float as some have decimals", "tags": null, "constraints": [{"name": "lower_bound_inc", "value": 0.0}, {"name": "upper_bound_inc", "value": 100.0}]}, {"name": "sex", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": [{"name": "in_domain", "values": ["male", "female"]}]}], "storage": {"type_name": "redis", "config": {"env_var": "REDIS_URL"}, "name": "name_embedding_index", "initial_cap": 10000, "distance_metric": "COSINE", "index_alogrithm": "FLAT", "embedding_type": "FLOAT32"}, "entities": [{"name": "passenger_id", "dtype": {"name": "int32"}, "description": null, "tags": null, "constraints": null}]}]}], "combined_feature_views": [], "models": [{"name": "titanic", "features": {"default_version": "default", "versions": {"default": [{"name": "age", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "float"}}, {"name": "sibsp", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "int32"}}, {"name": "has_siblings", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "bool"}}, {"name": "is_male", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "bool"}}]}}, "predictions_view": {"entities": [], "features": [{"name": "probability", "dtype": {"name": "float"}, "description": "The probability of target named will_survive being 'True'.", "tags": null, "constraints": null}], "derived_features": [{"name": "will_survive", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "probability", "location": {"name": "titanic", "location": "model"}, "dtype": {"name": "float"}}], "transformation": {"name": "map_arg_max", "dtype": {"name": "bool"}, "column_mappings": {"probability": {"name": "bool", "value": true}}}, "depth": 1}], "model_version_column": null, "event_timestamp": null, "source": null, "application_source": null, "stream_source": null, "regression_targets": [], "classification_targets": [{"estimating": {"name": "survived", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "bool"}}, "feature": {"name": "will_survive", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null}, "on_ground_truth_event": null, "event_trigger": null, "class_probabilities": [{"outcome": {"name": "bool", "value": true}, "feature": {"name": "probability", "dtype": {"name": "float"}, "description": null, "tags": null, "constraints": null}}], "confidence": null}], "recommendation_targets": [], "acceptable_freshness": 86400.0, "unacceptable_freshness": 172800.0}, "description": "A model predicting if a passenger will survive", "contacts": null, "tags": null, "dataset_store": null, "exposed_at_url": null}], "enrichers": []} diff --git a/test_data/loan.csv b/test_data/loan.csv index ee17382..0af05d3 100644 --- a/test_data/loan.csv +++ b/test_data/loan.csv @@ -1,7 +1,7 @@ -loan_id,loan_amount,personal_income,loan_status,event_timestamp -10000,35000,59000,True,1587924064746575 -10001,1000,9600,False,1587924064746575 -10002,5500,9600,True,1587924064746575 -10000,35000,65500,True,1588010464746575 -10001,35000,54400,True,1588010464746575 -10002,2500,9900,True,1588010464746575 +event_timestamp,loan_id,personal_income,loan_status,loan_amount +2020-04-26 18:01:04.746575+00:00,10000,59000,True,35000 +2020-04-26 18:01:04.746575+00:00,10001,9600,False,1000 +2020-04-26 18:01:04.746575+00:00,10002,9600,True,5500 +2020-04-27 18:01:04.746575+00:00,10000,65500,True,35000 +2020-04-27 18:01:04.746575+00:00,10001,54400,True,35000 +2020-04-27 18:01:04.746575+00:00,10002,9900,True,2500 diff --git a/test_data/test_model.parquet b/test_data/test_model.parquet index aa314b5..3c0ae11 100644 Binary files a/test_data/test_model.parquet and b/test_data/test_model.parquet differ diff --git a/test_data/titanic-sets.json b/test_data/titanic-sets.json index a51bda3..c8cfa68 100644 --- a/test_data/titanic-sets.json +++ b/test_data/titanic-sets.json @@ -1 +1 @@ -{"raw_data": [], "train_test": [], "train_test_validation": [{"id": "titanic_test", "name": null, "request_result": {"entities": [{"name": "passenger_id", "dtype": {"name": "int32"}, "description": null, "tags": null, "constraints": null}], "features": [{"name": "sibsp", "dtype": {"name": "int32"}, "description": "Number of siblings on titanic", "tags": null, "constraints": [{"name": "optional"}, {"name": "lower_bound_inc", "value": 0.0}, {"name": "upper_bound_inc", "value": 20.0}]}, {"name": "is_female", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sex", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "equals", "dtype": {"name": "bool"}, "key": "sex", "value": {"name": "string", "value": "female"}}, "depth": 1}, {"name": "survived", "dtype": {"name": "bool"}, "description": "If the passenger survived", "tags": null, "constraints": null}, {"name": "name", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": [{"name": "optional"}]}, {"name": "age", "dtype": {"name": "float"}, "description": "A float as some have decimals", "tags": null, "constraints": [{"name": "upper_bound_inc", "value": 100.0}, {"name": "lower_bound_inc", "value": 0.0}]}, {"name": "is_male", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sex", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "equals", "dtype": {"name": "bool"}, "key": "sex", "value": {"name": "string", "value": "male"}}, "depth": 1}, {"name": "sex", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": [{"name": "in_domain", "values": ["male", "female"]}, {"name": "optional"}]}, {"name": "cabin", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": [{"name": "optional"}]}, {"name": "has_siblings", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sibsp", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "int32"}}], "transformation": {"name": "not-equals", "dtype": {"name": "bool"}, "key": "sibsp", "value": {"name": "int", "value": 0}}, "depth": 1}, {"name": "is_mr", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "name", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "contains", "dtype": {"name": "bool"}, "key": "name", "value": "Mr."}, "depth": 1}], "event_timestamp": null}, "train_dataset": {"mapping_keys": {}, "type_name": "csv", "path": "test_data/titanic-train.csv", "csv_config": {"seperator": ",", "compression": "infer", "should_write_index": false}, "formatter": {"date_format": "yyyy-MM-ddTHH:mm:ssZ", "time_unit": null, "time_zone": null, "name": "string_form"}}, "test_dataset": {"mapping_keys": {}, "type_name": "csv", "path": "test_data/titanic-test.csv", "csv_config": {"seperator": ",", "compression": "infer", "should_write_index": false}, "formatter": {"date_format": "yyyy-MM-ddTHH:mm:ssZ", "time_unit": null, "time_zone": null, "name": "string_form"}}, "validation_dataset": {"mapping_keys": {}, "type_name": "csv", "path": "test_data/titanic-validate.csv", "csv_config": {"seperator": ",", "compression": "infer", "should_write_index": false}, "formatter": {"date_format": "yyyy-MM-ddTHH:mm:ssZ", "time_unit": null, "time_zone": null, "name": "string_form"}}, "train_size_fraction": 0.6, "test_size_fraction": 0.20000000000000007, "validate_size_fraction": 0.19999999999999996, "target": ["survived"], "description": null, "tags": null}], "active_learning": []} +{"raw_data": [], "train_test": [], "train_test_validation": [{"id": "titanic_test", "name": null, "request_result": {"entities": [{"name": "passenger_id", "dtype": {"name": "int32"}, "description": null, "tags": null, "constraints": null}], "features": [{"name": "age", "dtype": {"name": "float"}, "description": "A float as some have decimals", "tags": null, "constraints": [{"name": "lower_bound_inc", "value": 0.0}, {"name": "upper_bound_inc", "value": 100.0}]}, {"name": "survived", "dtype": {"name": "bool"}, "description": "If the passenger survived", "tags": null, "constraints": null}, {"name": "sibsp", "dtype": {"name": "int32"}, "description": "Number of siblings on titanic", "tags": null, "constraints": [{"name": "upper_bound_inc", "value": 20.0}, {"name": "lower_bound_inc", "value": 0.0}, {"name": "optional"}]}, {"name": "is_mr", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "name", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "contains", "dtype": {"name": "bool"}, "key": "name", "value": "Mr."}, "depth": 1}, {"name": "sex", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": [{"name": "in_domain", "values": ["male", "female"]}, {"name": "optional"}]}, {"name": "is_male", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sex", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "equals", "dtype": {"name": "bool"}, "key": "sex", "value": {"name": "string", "value": "male"}}, "depth": 1}, {"name": "name", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": [{"name": "optional"}]}, {"name": "has_siblings", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sibsp", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "int32"}}], "transformation": {"name": "not-equals", "dtype": {"name": "bool"}, "key": "sibsp", "value": {"name": "int", "value": 0}}, "depth": 1}, {"name": "cabin", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": [{"name": "optional"}]}, {"name": "is_female", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sex", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "equals", "dtype": {"name": "bool"}, "key": "sex", "value": {"name": "string", "value": "female"}}, "depth": 1}], "event_timestamp": null}, "train_dataset": {"mapping_keys": {}, "type_name": "csv", "path": "test_data/titanic-train.csv", "csv_config": {"seperator": ",", "compression": "infer", "should_write_index": false}, "formatter": {"date_format": "yyyy-MM-ddTHH:mm:ssZ", "time_unit": null, "time_zone": null, "name": "string_form"}}, "test_dataset": {"mapping_keys": {}, "type_name": "csv", "path": "test_data/titanic-test.csv", "csv_config": {"seperator": ",", "compression": "infer", "should_write_index": false}, "formatter": {"date_format": "yyyy-MM-ddTHH:mm:ssZ", "time_unit": null, "time_zone": null, "name": "string_form"}}, "validation_dataset": {"mapping_keys": {}, "type_name": "csv", "path": "test_data/titanic-validate.csv", "csv_config": {"seperator": ",", "compression": "infer", "should_write_index": false}, "formatter": {"date_format": "yyyy-MM-ddTHH:mm:ssZ", "time_unit": null, "time_zone": null, "name": "string_form"}}, "train_size_fraction": 0.6, "test_size_fraction": 0.20000000000000007, "validate_size_fraction": 0.19999999999999996, "target": ["survived"], "description": null, "tags": null}], "active_learning": []} diff --git a/test_data/titanic-test.csv b/test_data/titanic-test.csv index 4f5d957..a05cf94 100644 --- a/test_data/titanic-test.csv +++ b/test_data/titanic-test.csv @@ -1,21 +1,21 @@ -sibsp,is_female,survived,name,age,is_male,sex,cabin,has_siblings,passenger_id,is_mr -0,False,False,"Sirayanian, Mr. Orsen",22.0,True,male,,False,61,True -0,True,True,"Icard, Miss. Amelie",38.0,False,female,B28,False,62,False -1,False,False,"Harris, Mr. Henry Birkhardt",45.0,True,male,C83,True,63,True -3,False,False,"Skoog, Master. Harald",4.0,True,male,,True,64,False -0,False,False,"Stewart, Mr. Albert A",,True,male,,False,65,True -1,False,True,"Moubarek, Master. Gerios",,True,male,,True,66,False -0,True,True,"Nye, Mrs. (Elizabeth Ramell)",29.0,False,female,F33,False,67,True -0,False,False,"Crease, Mr. Ernest James",19.0,True,male,,False,68,True -4,True,True,"Andersson, Miss. Erna Alexandra",17.0,False,female,,True,69,False -2,False,False,"Kink, Mr. Vincenz",26.0,True,male,,True,70,True -0,False,False,"Jenkin, Mr. Stephen Curnow",32.0,True,male,,False,71,True -5,True,False,"Goodwin, Miss. Lillian Amy",16.0,False,female,,True,72,False -0,False,False,"Hood, Mr. Ambrose Jr",21.0,True,male,,False,73,True -1,False,False,"Chronopoulos, Mr. Apostolos",26.0,True,male,,True,74,True -0,False,True,"Bing, Mr. Lee",32.0,True,male,,False,75,True -0,False,False,"Moen, Mr. Sigurd Hansen",25.0,True,male,F G73,False,76,True -0,False,False,"Staneff, Mr. Ivan",,True,male,,False,77,True -0,False,False,"Moutal, Mr. Rahamin Haim",,True,male,,False,78,True -0,False,True,"Caldwell, Master. Alden Gates",0.83,True,male,,False,79,False -0,True,True,"Dowdell, Miss. Elizabeth",30.0,False,female,,False,80,False +age,sibsp,survived,is_mr,sex,is_male,name,passenger_id,has_siblings,cabin,is_female +22.0,0,False,True,male,True,"Sirayanian, Mr. Orsen",61,False,,False +38.0,0,True,False,female,False,"Icard, Miss. Amelie",62,False,B28,True +45.0,1,False,True,male,True,"Harris, Mr. Henry Birkhardt",63,True,C83,False +4.0,3,False,False,male,True,"Skoog, Master. Harald",64,True,,False +,0,False,True,male,True,"Stewart, Mr. Albert A",65,False,,False +,1,True,False,male,True,"Moubarek, Master. Gerios",66,True,,False +29.0,0,True,True,female,False,"Nye, Mrs. (Elizabeth Ramell)",67,False,F33,True +19.0,0,False,True,male,True,"Crease, Mr. Ernest James",68,False,,False +17.0,4,True,False,female,False,"Andersson, Miss. Erna Alexandra",69,True,,True +26.0,2,False,True,male,True,"Kink, Mr. Vincenz",70,True,,False +32.0,0,False,True,male,True,"Jenkin, Mr. Stephen Curnow",71,False,,False +16.0,5,False,False,female,False,"Goodwin, Miss. Lillian Amy",72,True,,True +21.0,0,False,True,male,True,"Hood, Mr. Ambrose Jr",73,False,,False +26.0,1,False,True,male,True,"Chronopoulos, Mr. Apostolos",74,True,,False +32.0,0,True,True,male,True,"Bing, Mr. Lee",75,False,,False +25.0,0,False,True,male,True,"Moen, Mr. Sigurd Hansen",76,False,F G73,False +,0,False,True,male,True,"Staneff, Mr. Ivan",77,False,,False +,0,False,True,male,True,"Moutal, Mr. Rahamin Haim",78,False,,False +0.83,0,True,False,male,True,"Caldwell, Master. Alden Gates",79,False,,False +30.0,0,True,False,female,False,"Dowdell, Miss. Elizabeth",80,False,,True diff --git a/test_data/titanic-train.csv b/test_data/titanic-train.csv index 13870a0..9759d8f 100644 --- a/test_data/titanic-train.csv +++ b/test_data/titanic-train.csv @@ -1,61 +1,61 @@ -sibsp,is_female,survived,name,age,is_male,sex,cabin,has_siblings,passenger_id,is_mr -1,False,False,"Braund, Mr. Owen Harris",22.0,True,male,,True,1,True -1,True,True,"Cumings, Mrs. John Bradley (Florence Briggs Thayer)",38.0,False,female,C85,True,2,True -0,True,True,"Heikkinen, Miss. Laina",26.0,False,female,,False,3,False -1,True,True,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",35.0,False,female,C123,True,4,True -0,False,False,"Allen, Mr. William Henry",35.0,True,male,,False,5,True -0,False,False,"Moran, Mr. James",,True,male,,False,6,True -0,False,False,"McCarthy, Mr. Timothy J",54.0,False,other,E46,False,7,True -3,False,False,"Palsson, Master. Gosta Leonard",2.0,True,male,,True,8,False -0,True,True,"Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)",27.0,False,female,,False,9,True -1,True,True,"Nasser, Mrs. Nicholas (Adele Achem)",14.0,False,female,,True,10,True -1,True,True,"Sandstrom, Miss. Marguerite Rut",4.0,False,female,G6,True,11,False -0,True,True,"Bonnell, Miss. Elizabeth",58.0,False,female,C103,False,12,False -0,False,False,"Saundercock, Mr. William Henry",20.0,True,male,,False,13,True -1,False,False,"Andersson, Mr. Anders Johan",39.0,True,male,,True,14,True -0,True,False,"Vestrom, Miss. Hulda Amanda Adolfina",14.0,False,female,,False,15,False -0,True,True,"Hewlett, Mrs. (Mary D Kingcome) ",55.0,False,female,,False,16,True -4,False,False,"Rice, Master. Eugene",2.0,True,male,,True,17,False -0,False,True,"Williams, Mr. Charles Eugene",,True,male,,False,18,True -1,True,False,"Vander Planke, Mrs. Julius (Emelia Maria Vandemoortele)",31.0,False,female,,True,19,True -0,True,True,"Masselmani, Mrs. Fatima",,False,female,,False,20,True -0,False,False,"Fynney, Mr. Joseph J",35.0,True,male,,False,21,True -0,False,True,"Beesley, Mr. Lawrence",34.0,True,male,D56,False,22,True -0,True,True,"McGowan, Miss. Anna ""Annie""",15.0,False,female,,False,23,False -0,False,True,"Sloper, Mr. William Thompson",28.0,True,male,A6,False,24,True -3,True,False,"Palsson, Miss. Torborg Danira",8.0,False,female,,True,25,False -1,True,True,"Asplund, Mrs. Carl Oscar (Selma Augusta Emilia Johansson)",38.0,False,female,,True,26,True -0,False,False,"Emir, Mr. Farred Chehab",,True,male,,False,27,True -3,False,False,"Fortune, Mr. Charles Alexander",19.0,True,male,C23 C25 C27,True,28,True -0,True,True,"O'Dwyer, Miss. Ellen ""Nellie""",,False,female,,False,29,False -0,False,False,"Todoroff, Mr. Lalio",,True,male,,False,30,True -0,False,False,"Uruchurtu, Don. Manuel E",40.0,True,male,,False,31,False -1,True,True,"Spencer, Mrs. William Augustus (Marie Eugenie)",,False,female,B78,True,32,True -0,True,True,"Glynn, Miss. Mary Agatha",,False,female,,False,33,False -0,False,False,"Wheadon, Mr. Edward H",66.0,True,male,,False,34,True -1,False,False,"Meyer, Mr. Edgar Joseph",28.0,True,male,,True,35,True -1,False,False,"Holverson, Mr. Alexander Oskar",42.0,True,male,,True,36,True -0,False,True,"Mamee, Mr. Hanna",,True,male,,False,37,True -0,False,False,"Cann, Mr. Ernest Charles",21.0,True,male,,False,38,True -2,True,False,"Vander Planke, Miss. Augusta Maria",18.0,False,female,,True,39,False -1,True,True,"Nicola-Yarred, Miss. Jamila",14.0,False,female,,True,40,False -1,True,False,"Ahlin, Mrs. Johan (Johanna Persdotter Larsson)",40.0,False,female,,True,41,True -1,True,False,"Turpin, Mrs. William John Robert (Dorothy Ann Wonnacott)",27.0,False,female,,True,42,True -0,False,False,"Kraeff, Mr. Theodor",,True,male,,False,43,True -1,True,True,"Laroche, Miss. Simonne Marie Anne Andree",3.0,False,female,,True,44,False -0,True,True,"Devaney, Miss. Margaret Delia",19.0,False,female,,False,45,False -0,False,False,"Rogers, Mr. William John",,True,male,,False,46,True -1,False,False,"Lennon, Mr. Denis",,True,male,,True,47,True -0,True,True,"O'Driscoll, Miss. Bridget",,False,female,,False,48,False -2,False,False,"Samaan, Mr. Youssef",,True,male,,True,49,True -1,True,False,"Arnold-Franchi, Mrs. Josef (Josefine Franchi)",18.0,False,female,,True,50,True -4,False,False,"Panula, Master. Juha Niilo",7.0,True,male,,True,51,False -0,False,False,"Nosworthy, Mr. Richard Cater",21.0,True,male,,False,52,True -1,True,True,"Harper, Mrs. Henry Sleeper (Myna Haxtun)",49.0,False,female,D33,True,53,True -1,True,True,"Faunthorpe, Mrs. Lizzie (Elizabeth Anne Wilkinson)",29.0,False,female,,True,54,True -0,False,False,"Ostby, Mr. Engelhart Cornelius",65.0,True,male,B30,False,55,True -0,False,True,"Woolner, Mr. Hugh",,True,male,C52,False,56,True -0,True,True,"Rugg, Miss. Emily",21.0,False,female,,False,57,False -0,False,False,"Novel, Mr. Mansouer",28.5,True,male,,False,58,True -1,True,True,"West, Miss. Constance Mirium",5.0,False,female,,True,59,False -5,False,False,"Goodwin, Master. William Frederick",11.0,True,male,,True,60,False +age,sibsp,survived,is_mr,sex,is_male,name,passenger_id,has_siblings,cabin,is_female +22.0,1,False,True,male,True,"Braund, Mr. Owen Harris",1,True,,False +38.0,1,True,True,female,False,"Cumings, Mrs. John Bradley (Florence Briggs Thayer)",2,True,C85,True +26.0,0,True,False,female,False,"Heikkinen, Miss. Laina",3,False,,True +35.0,1,True,True,female,False,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",4,True,C123,True +35.0,0,False,True,male,True,"Allen, Mr. William Henry",5,False,,False +,0,False,True,male,True,"Moran, Mr. James",6,False,,False +54.0,0,False,True,other,False,"McCarthy, Mr. Timothy J",7,False,E46,False +2.0,3,False,False,male,True,"Palsson, Master. Gosta Leonard",8,True,,False +27.0,0,True,True,female,False,"Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)",9,False,,True +14.0,1,True,True,female,False,"Nasser, Mrs. Nicholas (Adele Achem)",10,True,,True +4.0,1,True,False,female,False,"Sandstrom, Miss. Marguerite Rut",11,True,G6,True +58.0,0,True,False,female,False,"Bonnell, Miss. Elizabeth",12,False,C103,True +20.0,0,False,True,male,True,"Saundercock, Mr. William Henry",13,False,,False +39.0,1,False,True,male,True,"Andersson, Mr. Anders Johan",14,True,,False +14.0,0,False,False,female,False,"Vestrom, Miss. Hulda Amanda Adolfina",15,False,,True +55.0,0,True,True,female,False,"Hewlett, Mrs. (Mary D Kingcome) ",16,False,,True +2.0,4,False,False,male,True,"Rice, Master. Eugene",17,True,,False +,0,True,True,male,True,"Williams, Mr. Charles Eugene",18,False,,False +31.0,1,False,True,female,False,"Vander Planke, Mrs. Julius (Emelia Maria Vandemoortele)",19,True,,True +,0,True,True,female,False,"Masselmani, Mrs. Fatima",20,False,,True +35.0,0,False,True,male,True,"Fynney, Mr. Joseph J",21,False,,False +34.0,0,True,True,male,True,"Beesley, Mr. Lawrence",22,False,D56,False +15.0,0,True,False,female,False,"McGowan, Miss. Anna ""Annie""",23,False,,True +28.0,0,True,True,male,True,"Sloper, Mr. William Thompson",24,False,A6,False +8.0,3,False,False,female,False,"Palsson, Miss. Torborg Danira",25,True,,True +38.0,1,True,True,female,False,"Asplund, Mrs. Carl Oscar (Selma Augusta Emilia Johansson)",26,True,,True +,0,False,True,male,True,"Emir, Mr. Farred Chehab",27,False,,False +19.0,3,False,True,male,True,"Fortune, Mr. Charles Alexander",28,True,C23 C25 C27,False +,0,True,False,female,False,"O'Dwyer, Miss. Ellen ""Nellie""",29,False,,True +,0,False,True,male,True,"Todoroff, Mr. Lalio",30,False,,False +40.0,0,False,False,male,True,"Uruchurtu, Don. Manuel E",31,False,,False +,1,True,True,female,False,"Spencer, Mrs. William Augustus (Marie Eugenie)",32,True,B78,True +,0,True,False,female,False,"Glynn, Miss. Mary Agatha",33,False,,True +66.0,0,False,True,male,True,"Wheadon, Mr. Edward H",34,False,,False +28.0,1,False,True,male,True,"Meyer, Mr. Edgar Joseph",35,True,,False +42.0,1,False,True,male,True,"Holverson, Mr. Alexander Oskar",36,True,,False +,0,True,True,male,True,"Mamee, Mr. Hanna",37,False,,False +21.0,0,False,True,male,True,"Cann, Mr. Ernest Charles",38,False,,False +18.0,2,False,False,female,False,"Vander Planke, Miss. Augusta Maria",39,True,,True +14.0,1,True,False,female,False,"Nicola-Yarred, Miss. Jamila",40,True,,True +40.0,1,False,True,female,False,"Ahlin, Mrs. Johan (Johanna Persdotter Larsson)",41,True,,True +27.0,1,False,True,female,False,"Turpin, Mrs. William John Robert (Dorothy Ann Wonnacott)",42,True,,True +,0,False,True,male,True,"Kraeff, Mr. Theodor",43,False,,False +3.0,1,True,False,female,False,"Laroche, Miss. Simonne Marie Anne Andree",44,True,,True +19.0,0,True,False,female,False,"Devaney, Miss. Margaret Delia",45,False,,True +,0,False,True,male,True,"Rogers, Mr. William John",46,False,,False +,1,False,True,male,True,"Lennon, Mr. Denis",47,True,,False +,0,True,False,female,False,"O'Driscoll, Miss. Bridget",48,False,,True +,2,False,True,male,True,"Samaan, Mr. Youssef",49,True,,False +18.0,1,False,True,female,False,"Arnold-Franchi, Mrs. Josef (Josefine Franchi)",50,True,,True +7.0,4,False,False,male,True,"Panula, Master. Juha Niilo",51,True,,False +21.0,0,False,True,male,True,"Nosworthy, Mr. Richard Cater",52,False,,False +49.0,1,True,True,female,False,"Harper, Mrs. Henry Sleeper (Myna Haxtun)",53,True,D33,True +29.0,1,True,True,female,False,"Faunthorpe, Mrs. Lizzie (Elizabeth Anne Wilkinson)",54,True,,True +65.0,0,False,True,male,True,"Ostby, Mr. Engelhart Cornelius",55,False,B30,False +,0,True,True,male,True,"Woolner, Mr. Hugh",56,False,C52,False +21.0,0,True,False,female,False,"Rugg, Miss. Emily",57,False,,True +28.5,0,False,True,male,True,"Novel, Mr. Mansouer",58,False,,False +5.0,1,True,False,female,False,"West, Miss. Constance Mirium",59,True,,True +11.0,5,False,False,male,True,"Goodwin, Master. William Frederick",60,True,,False diff --git a/test_data/titanic-validate.csv b/test_data/titanic-validate.csv index e56c86f..523e1e7 100644 --- a/test_data/titanic-validate.csv +++ b/test_data/titanic-validate.csv @@ -1,21 +1,21 @@ -sibsp,is_female,survived,name,age,is_male,sex,cabin,has_siblings,passenger_id,is_mr -0,False,False,"Waelens, Mr. Achille",22.0,True,male,,False,81,True -0,False,True,"Sheerlinck, Mr. Jan Baptist",29.0,True,male,,False,82,True -0,True,True,"McDermott, Miss. Brigdet Delia",,False,female,,False,83,False -0,False,False,"Carrau, Mr. Francisco M",28.0,True,male,,False,84,True -0,True,True,"Ilett, Miss. Bertha",17.0,False,female,,False,85,False -3,True,True,"Backstrom, Mrs. Karl Alfred (Maria Mathilda Gustafsson)",33.0,False,female,,True,86,True -1,False,False,"Ford, Mr. William Neal",16.0,True,male,,True,87,True -0,False,False,"Slocovski, Mr. Selman Francis",,True,male,,False,88,True -3,True,True,"Fortune, Miss. Mabel Helen",23.0,False,female,C23 C25 C27,True,89,False -0,False,False,"Celotti, Mr. Francesco",24.0,True,male,,False,90,True -0,False,False,"Christmann, Mr. Emil",29.0,True,male,,False,91,True -0,False,False,"Andreasson, Mr. Paul Edvin",20.0,True,male,,False,92,True -1,False,False,"Chaffee, Mr. Herbert Fuller",46.0,True,male,E31,True,93,True -1,False,False,"Dean, Mr. Bertram Frank",26.0,True,male,,True,94,True -0,False,False,"Coxon, Mr. Daniel",59.0,True,male,,False,95,True -0,False,False,"Shorney, Mr. Charles Joseph",,True,male,,False,96,True -0,False,False,"Goldschmidt, Mr. George B",71.0,True,male,A5,False,97,True -0,False,True,"Greenfield, Mr. William Bertram",23.0,True,male,D10 D12,False,98,True -0,True,True,"Doling, Mrs. John T (Ada Julia Bone)",34.0,False,female,,False,99,True -1,False,False,"Kantor, Mr. Sinai",34.0,True,male,,True,100,True +age,sibsp,survived,is_mr,sex,is_male,name,passenger_id,has_siblings,cabin,is_female +22.0,0,False,True,male,True,"Waelens, Mr. Achille",81,False,,False +29.0,0,True,True,male,True,"Sheerlinck, Mr. Jan Baptist",82,False,,False +,0,True,False,female,False,"McDermott, Miss. Brigdet Delia",83,False,,True +28.0,0,False,True,male,True,"Carrau, Mr. Francisco M",84,False,,False +17.0,0,True,False,female,False,"Ilett, Miss. Bertha",85,False,,True +33.0,3,True,True,female,False,"Backstrom, Mrs. Karl Alfred (Maria Mathilda Gustafsson)",86,True,,True +16.0,1,False,True,male,True,"Ford, Mr. William Neal",87,True,,False +,0,False,True,male,True,"Slocovski, Mr. Selman Francis",88,False,,False +23.0,3,True,False,female,False,"Fortune, Miss. Mabel Helen",89,True,C23 C25 C27,True +24.0,0,False,True,male,True,"Celotti, Mr. Francesco",90,False,,False +29.0,0,False,True,male,True,"Christmann, Mr. Emil",91,False,,False +20.0,0,False,True,male,True,"Andreasson, Mr. Paul Edvin",92,False,,False +46.0,1,False,True,male,True,"Chaffee, Mr. Herbert Fuller",93,True,E31,False +26.0,1,False,True,male,True,"Dean, Mr. Bertram Frank",94,True,,False +59.0,0,False,True,male,True,"Coxon, Mr. Daniel",95,False,,False +,0,False,True,male,True,"Shorney, Mr. Charles Joseph",96,False,,False +71.0,0,False,True,male,True,"Goldschmidt, Mr. George B",97,False,A5,False +23.0,0,True,True,male,True,"Greenfield, Mr. William Bertram",98,False,D10 D12,False +34.0,0,True,True,female,False,"Doling, Mrs. John T (Ada Julia Bone)",99,False,,True +34.0,1,False,True,male,True,"Kantor, Mr. Sinai",100,True,,False