Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add inferschema on read_csv to infer correct datatype #35

Merged
merged 1 commit into from
Mar 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions app/utils/dataframes.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ async def get_dataframe_honouring_encoding_async(
pl.DataFrame: polars Dataframe object
"""
try:
df = pl.read_csv(source, null_values="NA", infer_schema_length=0)
df = pl.read_csv(source, null_values="NA", infer_schema_length=None)
except (UnicodeDecodeError, pl_exc.ComputeError) as err:
logger.warning(f"File encoding is not default: {err}")
logger.warning("Trying to read file with proper encoding")
Expand All @@ -59,7 +59,7 @@ async def get_dataframe_honouring_encoding_async(
source,
null_values="NA",
encoding=encoding,
infer_schema_length=0,
infer_schema_length=None,
)
return df

Expand All @@ -78,7 +78,7 @@ def get_dataframe_honouring_encoding(
pl.DataFrame: polars Dataframe object
"""
try:
df = pl.read_csv(source, null_values="NA", infer_schema_length=0)
df = pl.read_csv(source, null_values="NA", infer_schema_length=None)
except (UnicodeDecodeError, pl_exc.ComputeError) as err:
logger.error(f"Could not interpret File encoding : {err}")
encoding = get_encoding(obj=source, is_object=is_object)
Expand All @@ -87,7 +87,7 @@ def get_dataframe_honouring_encoding(
source,
null_values="NA",
encoding=encoding,
infer_schema_length=0,
infer_schema_length=None,
)
return df

Expand Down
8 changes: 7 additions & 1 deletion app/utils/profile_segments.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from numpy import bool_
from pandas import DataFrame
from pydantic import parse_obj_as
from ydata_profiling import ProfileReport

from app.core.config import Settings
from app.models.analysis import Analysis
Expand Down Expand Up @@ -38,7 +39,9 @@ def json_conversion_objects(obj):


class ProfileSegments:
def __init__(self, pandas_profile, columns=None, round_to=3):
def __init__(
self, pandas_profile: ProfileReport, columns=None, round_to=3
):
"""
Pass pandas profile of a dataset as argument
"""
Expand Down Expand Up @@ -93,6 +96,9 @@ def package(self) -> Dict:
def samples(self) -> List[Sample]:
# get samples
samples = self.profile_description.sample
import logging

logging.error(samples)
for sample in samples:
sample.data = sample.data.round(decimals=self.round_to).to_json()
# * 'head' and 'tail' are returned as dataset sample
Expand Down
Loading