Fix pandas 1.5.3 compatibility issues

staskh · staskh · commit ec925d1c01e5 · 2025-09-10T21:15:29.000+03:00
- Remove include_groups parameter from all groupby().apply() calls
- Add explicit column selection to prevent deprecation warnings
- Update AUC function to work with pandas 1.5.3
- Update GRADE function to work with pandas 1.5.3
- Update episode_calculation function to work with pandas 1.5.3
- Bump version to 0.3.1

The include_groups parameter was introduced in pandas 2.0.0, causing
'unexpected keyword argument' errors in pandas 1.5.3. This fix:

1. Removes all include_groups=False parameters
2. Adds explicit column selection [['gl', 'gl_next']] to groupby operations
3. Maintains same functionality while ensuring compatibility

All functions now work correctly with pandas 1.5.3:
- iglu.auc() - AUC calculation
- iglu.grade() - GRADE score calculation
- iglu.episode_calculation() - Episode analysis

Resolves compatibility issues for users with older pandas versions.
diff --git a/iglu_python/auc.py b/iglu_python/auc.py
@@ -86,8 +86,8 @@ def auc_single(subject_data: pd.DataFrame | pd.Series, tz: str = "") -> float:
     if is_iglu_r_compatible():
         input_data["day"] = input_data["time"].dt.floor("d")
         input_data["gl_next"] = input_data["gl"].shift(-1)
-        each_day_area = input_data.groupby("day").apply(
-            lambda x: np.nansum((dt0 / 60) * (x["gl"].values + x["gl_next"].values) / 2), include_groups=False
+        each_day_area = input_data.groupby("day")[["gl", "gl_next"]].apply(
+            lambda x: np.nansum((dt0 / 60) * (x["gl"].values + x["gl_next"].values) / 2)
         )
         # calculate number of not nan trapezoids in total (number of not nan gl and gl_next)
         n_trapezoids = (~np.isnan(input_data["gl"]) & ~np.isnan(input_data["gl_next"])).sum()
@@ -102,8 +102,8 @@ def auc_single(subject_data: pd.DataFrame | pd.Series, tz: str = "") -> float:
         input_data["gl_next"] = input_data["gl"].shift(-1)
 
         # Calculate AUC for each hour using trapezoidal rule (mg*min/dL)
-        hourly_auc = input_data.groupby("hour").apply(
-            lambda x: np.nansum((dt0 / 60) * (x["gl"].values + x["gl_next"].values) / 2), include_groups=False
+        hourly_auc = input_data.groupby("hour")[["gl", "gl_next"]].apply(
+            lambda x: np.nansum((dt0 / 60) * (x["gl"].values + x["gl_next"].values) / 2)
         )
         # 0 mean no data in this hour, replace with nan
         hourly_auc = hourly_auc.replace(0, np.nan)
diff --git a/iglu_python/episode_calculation.py b/iglu_python/episode_calculation.py
@@ -292,7 +292,7 @@ def episode_single(
 
     # Classify events for each segment
     ep_per_seg = (
-        segment_data.groupby("segment")
+        segment_data.groupby("segment")[["gl"]]
         .apply(
             lambda x: pd.DataFrame(
                 {
@@ -302,8 +302,7 @@ def episode_single(
                     "lv2_hyper": event_class(x, "hyper", lv2_hyper, dur_idx, end_idx),
                     "ext_hypo": event_class(x, "hypo", lv1_hypo, int(120 / dt0) + 1, end_idx),
                 }
-            ),
-            include_groups=False,
+            )
         )
         .reset_index()
         .drop(columns=["level_1"])
@@ -383,7 +382,7 @@ def event_class(
 
     # Group by event and calculate start/end positions
     annotated_grouped = (
-        annotated.groupby("event")
+        annotated.groupby("event")[["level"]]
         .apply(
             lambda x: pd.DataFrame(
                 {
@@ -399,8 +398,7 @@ def event_class(
                         ["end" if (not x["level"].iloc[0] and len(x) >= end_duration) else None] + [None] * (len(x) - 1)
                     ),
                 }
-            ),
-            include_groups=False,
+            )
         )
         .reset_index()
         .drop(columns=["level_1"])
@@ -462,12 +460,16 @@ def lv1_excl(data: pd.DataFrame) -> np.ndarray:
     lv2 = [column for column in data.columns if column.startswith("lv2")]
     lv2_first = lv2[0]
     # Group by segment and lv1
-    grouped = data.groupby(["segment", lv1_first])
+    grouped = data.groupby(["segment", lv1_first])[
+        [
+            lv1_first,
+            lv2_first,
+        ]
+    ]
 
     # Calculate exclusive labels
     excl = grouped.apply(
-        lambda x: pd.DataFrame({"excl": [0 if (x[lv2_first].values > 0).any() else x[lv1_first].iloc[0]] * len(x)}),
-        include_groups=False,
+        lambda x: pd.DataFrame({"excl": [0 if (x[lv2_first].values > 0).any() else x[lv1_first].iloc[0]] * len(x)})
     )
 
     excl = excl.reset_index()
diff --git a/iglu_python/grade.py b/iglu_python/grade.py
@@ -61,11 +61,7 @@ def grade(data: Union[pd.DataFrame, pd.Series]) -> pd.DataFrame:
     data = check_data_columns(data)
 
     # Calculate GRADE score for each subject
-    result = (
-        data.groupby("id")
-        .apply(lambda x: np.mean(_grade_formula(x["gl"].dropna())), include_groups=False)
-        .reset_index()
-    )
+    result = data.groupby("id")[["gl"]].apply(lambda x: np.mean(_grade_formula(x["gl"].dropna()))).reset_index()
     result.columns = ["id", "GRADE"]
 
     return result
diff --git a/pyproject.toml b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 
 [project]
 name = "iglu_python"
-version = "0.3.0"
+version = "0.3.1"
 description = "Python implementation of the iglu package for continuous glucose monitoring data analysis"
 readme = "README.md"
 requires-python = ">=3.11"