Skip to content

Commit

Permalink
Ensure schema generation is robust to nans and await describe_data call
Browse files Browse the repository at this point in the history
  • Loading branch information
philippjfr committed Oct 16, 2024
1 parent c2ea8c9 commit d767527
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 4 deletions.
3 changes: 1 addition & 2 deletions lumen/ai/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,8 +241,7 @@ def describe_data_sync(df):
"stats": df_describe_dict,
}

data = asyncio.to_thread(describe_data_sync, df)
return data
return await asyncio.to_thread(describe_data_sync, df)


def clean_sql(sql_expr):
Expand Down
7 changes: 5 additions & 2 deletions lumen/sources/duckdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from typing import TYPE_CHECKING, Any

import duckdb
import pandas as pd
import param

from ..config import config
Expand Down Expand Up @@ -321,8 +322,10 @@ def get_schema(
cast = str
else:
cast = lambda v: v
schema[col]['inclusiveMinimum'] = cast(minmax_data[f'{col}_min'].iloc[0])
schema[col]['inclusiveMaximum'] = cast(minmax_data[f'{col}_max'].iloc[0])
min_data = minmax_data[f'{col}_min'].iloc[0]
schema[col]['inclusiveMinimum'] = min_data if pd.isna(min_data) else cast(min_data)
max_data = minmax_data[f'{col}_max'].iloc[0]
schema[col]['inclusiveMaximum'] = max_data if pd.isna(max_data) else cast(max_data)

count_expr = SQLCount().apply(sql_expr)
count_expr = ' '.join(count_expr.splitlines())
Expand Down

0 comments on commit d767527

Please sign in to comment.