Skip to content

Commit

Permalink
More lost changes
Browse files Browse the repository at this point in the history
  • Loading branch information
MatsMoll committed Jan 5, 2024
1 parent abb4088 commit b85860b
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 3 deletions.
4 changes: 4 additions & 0 deletions aligned/retrival_job.py
Original file line number Diff line number Diff line change
Expand Up @@ -1694,6 +1694,8 @@ async def to_pandas(self) -> pd.DataFrame:
df[feature.name] = df[feature.name].apply(
lambda x: json.loads(x) if isinstance(x, str) else x
)
elif feature.dtype == FeatureType.json():
pass
else:
if feature.dtype.is_numeric:
df[feature.name] = pd.to_numeric(df[feature.name], errors='coerce').astype(
Expand Down Expand Up @@ -1740,6 +1742,8 @@ async def to_polars(self) -> pl.LazyFrame:
dtype = df.select(feature.name).dtypes[0]
if dtype == pl.Utf8:
df = df.with_columns(pl.col(feature.name).str.json_extract(pl.List(pl.Utf8)))
elif feature.dtype == FeatureType.json():
pass
else:
df = df.with_columns(pl.col(feature.name).cast(feature.dtype.polars_type, strict=False))

Expand Down
11 changes: 9 additions & 2 deletions aligned/schemas/transformation.py
Original file line number Diff line number Diff line change
Expand Up @@ -2141,10 +2141,17 @@ class StructField(Transformation):
dtype = FeatureType.string()

async def transform_pandas(self, df: pd.DataFrame) -> pd.Series:
return df[self.key].apply(lambda x: x[self.field])
return (
(await self.transform_polars(pl.from_pandas(df).lazy(), 'feature'))
.collect()
.to_pandas()['feature']
)

async def transform_polars(self, df: pl.LazyFrame, alias: str) -> pl.LazyFrame | pl.Expr:
return pl.col(self.key).struct.field(self.field).alias(alias)
if df.schema[self.key].is_(pl.Utf8):
return await JsonPath(self.key, self.field).transform_polars(df, alias)
else:
return pl.col(self.key).struct.field(self.field).alias(alias)


@dataclass
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "aligned"
version = "0.0.58"
version = "0.0.59"
description = "A data managment and lineage tool for ML applications."
authors = ["Mats E. Mollestad <mats@mollestad.no>"]
license = "Apache-2.0"
Expand Down

0 comments on commit b85860b

Please sign in to comment.