diff --git a/aligned/retrival_job.py b/aligned/retrival_job.py index 9d0f603..9dc876a 100644 --- a/aligned/retrival_job.py +++ b/aligned/retrival_job.py @@ -1694,6 +1694,8 @@ async def to_pandas(self) -> pd.DataFrame: df[feature.name] = df[feature.name].apply( lambda x: json.loads(x) if isinstance(x, str) else x ) + elif feature.dtype == FeatureType.json(): + pass else: if feature.dtype.is_numeric: df[feature.name] = pd.to_numeric(df[feature.name], errors='coerce').astype( @@ -1740,6 +1742,8 @@ async def to_polars(self) -> pl.LazyFrame: dtype = df.select(feature.name).dtypes[0] if dtype == pl.Utf8: df = df.with_columns(pl.col(feature.name).str.json_extract(pl.List(pl.Utf8))) + elif feature.dtype == FeatureType.json(): + pass else: df = df.with_columns(pl.col(feature.name).cast(feature.dtype.polars_type, strict=False)) diff --git a/aligned/schemas/transformation.py b/aligned/schemas/transformation.py index dc16be7..97d4187 100644 --- a/aligned/schemas/transformation.py +++ b/aligned/schemas/transformation.py @@ -2141,10 +2141,17 @@ class StructField(Transformation): dtype = FeatureType.string() async def transform_pandas(self, df: pd.DataFrame) -> pd.Series: - return df[self.key].apply(lambda x: x[self.field]) + return ( + (await self.transform_polars(pl.from_pandas(df).lazy(), 'feature')) + .collect() + .to_pandas()['feature'] + ) async def transform_polars(self, df: pl.LazyFrame, alias: str) -> pl.LazyFrame | pl.Expr: - return pl.col(self.key).struct.field(self.field).alias(alias) + if df.schema[self.key].is_(pl.Utf8): + return await JsonPath(self.key, self.field).transform_polars(df, alias) + else: + return pl.col(self.key).struct.field(self.field).alias(alias) @dataclass diff --git a/pyproject.toml b/pyproject.toml index b8caf0e..99a4e6b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "aligned" -version = "0.0.58" +version = "0.0.59" description = "A data managment and lineage tool for ML applications." authors = ["Mats E. Mollestad "] license = "Apache-2.0"