Ensure schema generation is robust to nans and await describe_data call

holoviz · Oct 16, 2024 · d767527 · d767527
1 parent c2ea8c9
commit d767527
Show file tree

Hide file tree

Showing 2 changed files with 6 additions and 4 deletions.
diff --git a/lumen/ai/utils.py b/lumen/ai/utils.py
@@ -241,8 +241,7 @@ def describe_data_sync(df):
             "stats": df_describe_dict,
         }
 
-    data = asyncio.to_thread(describe_data_sync, df)
-    return data
+    return await asyncio.to_thread(describe_data_sync, df)
 
 
 def clean_sql(sql_expr):

diff --git a/lumen/sources/duckdb.py b/lumen/sources/duckdb.py
@@ -5,6 +5,7 @@
 from typing import TYPE_CHECKING, Any
 
 import duckdb
+import pandas as pd
 import param
 
 from ..config import config
@@ -321,8 +322,10 @@ def get_schema(
                     cast = str
                 else:
                     cast = lambda v: v
-                schema[col]['inclusiveMinimum'] = cast(minmax_data[f'{col}_min'].iloc[0])
-                schema[col]['inclusiveMaximum'] = cast(minmax_data[f'{col}_max'].iloc[0])
+                min_data = minmax_data[f'{col}_min'].iloc[0]
+                schema[col]['inclusiveMinimum'] = min_data if pd.isna(min_data) else cast(min_data)
+                max_data = minmax_data[f'{col}_max'].iloc[0]
+                schema[col]['inclusiveMaximum'] = max_data if pd.isna(max_data) else cast(max_data)
 
             count_expr = SQLCount().apply(sql_expr)
             count_expr = ' '.join(count_expr.splitlines())