Skip to content

Commit bec5fa5

Browse files
committed
support for Series, list and ndarray -> return float or dict
1 parent 3614774 commit bec5fa5

16 files changed

+284
-248
lines changed

README.md

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -57,14 +57,14 @@ Unless noted, IGLU-R test compatability is considered successful if it achieves
5757
| mage | Mean Amplitude of Glycemic Excursions||✅ only Series(DatetimeIndex) returns float || See algorithm at [MAGE](https://irinagain.github.io/iglu/articles/MAGE.html) |
5858
| mean_glu | Mean glucose value || ✅ returns float|
5959
| median_glu |Median glucose value||✅ returns float |
60-
| modd | Mean of Daily Differences||
61-
| pgs | Personal Glycemic State || || |
62-
| quantile_glu |glucose level quantiles||
63-
| range_glu |glucose level range||
64-
| roc | Rate of Change||
60+
| modd | Mean of Daily Differences|| ✅ only Series(DatetimeIndex) returns float|
61+
| pgs | Personal Glycemic State ||✅ only Series(DatetimeIndex) returns float| ||
62+
| quantile_glu |glucose level quantiles||✅ returns List[float] |
63+
| range_glu |glucose level range||✅ returns float|
64+
| roc | Rate of Change||🟡 always returns DataFrame|
6565
| sd_glu | standard deviation of glucose values| ✅ | ✅ returns float
66-
| sd_measures |various standard deviation subtypes||
67-
| sd_roc | standard deviation of the rate of change|| |||
66+
| sd_measures |various standard deviation subtypes||✅ only Series(DatetimeIndex) returns Dict[str,float]|
67+
| sd_roc | standard deviation of the rate of change||✅ only Series(DatetimeIndex) returns float ||
6868
| summary_glu | summary glucose level||
6969
| process_data | Data Pre-Processor ||
7070
| CGMS2DayByDay |Interpolate glucose input||

iglu_python/modd.py

Lines changed: 37 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,8 @@
77

88

99
def modd(
10-
data: Union[pd.DataFrame, pd.Series], lag: int = 1, tz: str = ""
11-
) -> pd.DataFrame:
10+
data: Union[pd.DataFrame, pd.Series, np.ndarray, list], lag: int = 1, tz: str = ""
11+
) -> pd.DataFrame|float:
1212
"""
1313
Calculate Mean of Daily Differences (MODD).
1414
@@ -18,19 +18,21 @@ def modd(
1818
1919
Parameters
2020
----------
21-
data : Union[pd.DataFrame, pd.Series]
22-
DataFrame with columns 'id', 'time', and 'gl', or a Series of glucose values
21+
data : Union[pd.DataFrame, pd.Series, np.ndarray, list]
22+
DataFrame with columns 'id', 'time', and 'gl', or a Series of glucose values,
23+
or a numpy array or list of glucose values
2324
lag : int, default=1
2425
Integer indicating which lag (# days) to use. Default is 1.
2526
tz : str, default=""
2627
Time zone to use for datetime conversion. Empty string means use local time zone.
2728
2829
Returns
2930
-------
30-
pd.DataFrame
31+
pd.DataFrame|float
3132
DataFrame with columns:
3233
- id: subject identifier (if DataFrame input)
33-
- MODD: Mean of Daily Differences value
34+
- MODD: Mean of Daily Differences value.
35+
If a Series of glucose values is passed, then a float is returned.
3436
3537
References
3638
----------
@@ -56,55 +58,40 @@ def modd(
5658
0 45.0
5759
"""
5860

59-
def modd_single(data: pd.DataFrame) -> float:
60-
"""Calculate MODD for a single subject"""
61-
# Convert data to day-by-day format
62-
data_ip = CGMS2DayByDay(data, tz=tz)
63-
gl_by_id_ip = data_ip[0].flatten() # Get interpolated glucose values
64-
dt0 = data_ip[2] # Get time frequency
65-
66-
# Calculate absolute differences with specified lag
67-
# lag is in days, so we need to convert to minutes and divide of dt0 frequency
68-
shift = int(lag * 24 * 60 / dt0) # Convert lag to minutes and divide by dt0
69-
# Shift array by lag and calculate differences
70-
abs_diffs = np.abs(gl_by_id_ip[shift:] - gl_by_id_ip[:-shift])
71-
# Remove NaNs
72-
abs_diffs = abs_diffs[~np.isnan(abs_diffs)] # Remove NaNs
73-
74-
# Calculate mean of absolute differences, ignoring NaN values
75-
if len(abs_diffs) == 0:
76-
modd_val = np.nan
77-
else:
78-
modd_val = np.nanmean(abs_diffs)
79-
80-
return float(modd_val) if not pd.isna(modd_val) else np.nan
81-
8261
# Handle Series input
8362
if isinstance(data, pd.Series):
8463
if not isinstance(data.index, pd.DatetimeIndex):
8564
raise ValueError("Series must have a DatetimeIndex")
86-
data_df = pd.DataFrame(
87-
{
88-
"id": ["subject1"] * len(data.values),
89-
"time": data.index,
90-
"gl": data.values,
91-
}
92-
)
93-
94-
modd_val = modd_single(data_df)
95-
return pd.DataFrame({"MODD": [modd_val]})
65+
return modd_single(data, lag, tz)
9666

9767
# Handle DataFrame input
9868
data = check_data_columns(data)
9969

100-
# Calculate MODD for each subject
101-
result = []
102-
for subject in data["id"].unique():
103-
subject_data = data[data["id"] == subject].copy()
104-
if len(subject_data.dropna(subset=["gl"])) == 0:
105-
continue
106-
107-
modd_val = modd_single(subject_data)
108-
result.append({"id": subject, "MODD": modd_val})
109-
110-
return pd.DataFrame(result)
70+
data.set_index('time', drop=True, inplace=True)
71+
out = data.groupby('id').agg(
72+
MODD = ("gl", lambda x: modd_single(x, lag, tz))
73+
).reset_index()
74+
return out
75+
76+
def modd_single(data: pd.Series, lag: int = 1, tz: str = "") -> float:
77+
"""Calculate MODD for a single subject"""
78+
# Convert data to day-by-day format
79+
data_ip = CGMS2DayByDay(data, tz=tz)
80+
gl_by_id_ip = data_ip[0].flatten() # Get interpolated glucose values
81+
dt0 = data_ip[2] # Get time frequency
82+
83+
# Calculate absolute differences with specified lag
84+
# lag is in days, so we need to convert to minutes and divide of dt0 frequency
85+
shift = int(lag * 24 * 60 / dt0) # Convert lag to minutes and divide by dt0
86+
# Shift array by lag and calculate differences
87+
abs_diffs = np.abs(gl_by_id_ip[shift:] - gl_by_id_ip[:-shift])
88+
# Remove NaNs
89+
abs_diffs = abs_diffs[~np.isnan(abs_diffs)] # Remove NaNs
90+
91+
# Calculate mean of absolute differences, ignoring NaN values
92+
if len(abs_diffs) == 0:
93+
modd_val = np.nan
94+
else:
95+
modd_val = np.nanmean(abs_diffs)
96+
97+
return float(modd_val) if not pd.isna(modd_val) else np.nan

iglu_python/pgs.py

Lines changed: 46 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212

1313
def pgs(
1414
data: Union[pd.DataFrame, pd.Series], dur_length: int = 20, end_length: int = 30
15-
) -> pd.DataFrame:
15+
) -> pd.DataFrame|float:
1616
"""
1717
Calculate Personal Glycemic State (PGS).
1818
@@ -23,7 +23,8 @@ def pgs(
2323
Parameters
2424
----------
2525
data : Union[pd.DataFrame, pd.Series]
26-
DataFrame with columns 'id', 'time', and 'gl', or a Series of glucose values.
26+
DataFrame with columns 'id', 'time', and 'gl', or a Series of glucose values,
27+
or a numpy array or list of glucose values
2728
Should only be data for 1 subject. In case multiple subject ids are detected,
2829
a warning is produced and only 1st subject is used.
2930
dur_length : int, optional
@@ -36,9 +37,9 @@ def pgs(
3637
3738
Returns
3839
-------
39-
pd.DataFrame
40+
pd.DataFrame|float
4041
DataFrame with 1 row for each subject, a column for subject id and a column
41-
for PGS value.
42+
for PGS value. If a Series of glucose values is passed, then a float is returned.
4243
4344
Notes
4445
-----
@@ -82,57 +83,48 @@ def pgs(
8283
if isinstance(data, pd.Series):
8384
if not isinstance(data.index, pd.DatetimeIndex):
8485
raise ValueError("Series must have a DatetimeIndex")
85-
data = pd.DataFrame(
86-
{
87-
"id": ["subject1"] * len(data.values),
88-
"time": data.index,
89-
"gl": data.values,
90-
}
91-
)
86+
return pgs_single(data, dur_length, end_length)
9287

9388
# Handle DataFrame input
9489
data = check_data_columns(data)
90+
data.set_index('time', drop=True, inplace=True)
91+
92+
out = data.groupby('id').agg(
93+
PGS = ("gl", lambda x: pgs_single(x, dur_length, end_length))
94+
).reset_index()
95+
return out
96+
97+
def pgs_single(gl: pd.Series, dur_length: int = 20, end_length: int = 30) -> float:
98+
"""Calculate PGS for a single subject"""
99+
# Calculate components
100+
gvp_val = gvp(gl)
101+
mean_val = mean_glu(gl)
102+
ptir_val = in_range_percent(gl, target_ranges=[[70, 180]])['in_range_70_180']
103+
104+
# Calculate episode components
105+
eps = episode_calculation(
106+
gl,
107+
lv1_hypo=70,
108+
lv2_hypo=54,
109+
dur_length=dur_length,
110+
end_length=end_length,
111+
)
112+
n54 = eps["avg_ep_per_day"].iloc[1] * 7 # Convert to weekly episodes
113+
n70 = eps["avg_ep_per_day"].iloc[5] * 7 # Use lv1 exclusive, not lv1 super set
114+
115+
# Calculate PGS components
116+
f_gvp = 1 + (9 / (1 + np.exp(-0.049 * (gvp_val - 65.47))))
117+
f_ptir = 1 + (9 / (1 + np.exp(0.0833 * (ptir_val - 55.04))))
118+
f_mg = 1 + 9 * (
119+
(1 / (1 + np.exp(0.1139 * (mean_val - 72.08))))
120+
+ (1 / (1 + np.exp(-0.09195 * (mean_val - 157.57))))
121+
)
122+
123+
f_h54 = 0.5 + 4.5 * (1 - np.exp(-0.91093 * n54))
124+
f_h70 = 0.5714 * n70 + 0.625 if n70 <= 7.65 else 5
125+
126+
# Calculate final PGS score
127+
pgs_score = f_gvp + f_ptir + f_mg + f_h54 + f_h70
128+
129+
return pgs_score
95130

96-
def pgs_single(subj_data: pd.DataFrame) -> float:
97-
"""Calculate PGS for a single subject"""
98-
# Calculate components
99-
gvp_val = gvp(subj_data)["GVP"].iloc[0]
100-
mean_val = mean_glu(subj_data)["mean"].iloc[0]
101-
ptir_val = in_range_percent(subj_data, target_ranges=[[70, 180]])["in_range_70_180"].iloc[0]
102-
103-
# Calculate episode components
104-
eps = episode_calculation(
105-
subj_data,
106-
lv1_hypo=70,
107-
lv2_hypo=54,
108-
dur_length=dur_length,
109-
end_length=end_length,
110-
)
111-
n54 = eps["avg_ep_per_day"].iloc[1] * 7 # Convert to weekly episodes
112-
n70 = eps["avg_ep_per_day"].iloc[5] * 7 # Use lv1 exclusive, not lv1 super set
113-
114-
# Calculate PGS components
115-
f_gvp = 1 + (9 / (1 + np.exp(-0.049 * (gvp_val - 65.47))))
116-
f_ptir = 1 + (9 / (1 + np.exp(0.0833 * (ptir_val - 55.04))))
117-
f_mg = 1 + 9 * (
118-
(1 / (1 + np.exp(0.1139 * (mean_val - 72.08))))
119-
+ (1 / (1 + np.exp(-0.09195 * (mean_val - 157.57))))
120-
)
121-
122-
f_h54 = 0.5 + 4.5 * (1 - np.exp(-0.91093 * n54))
123-
f_h70 = 0.5714 * n70 + 0.625 if n70 <= 7.65 else 5
124-
125-
# Calculate final PGS score
126-
pgs_score = f_gvp + f_ptir + f_mg + f_h54 + f_h70
127-
128-
return pgs_score
129-
130-
131-
# Calculate PGS for each subject
132-
results = []
133-
for subject_id in data["id"].unique():
134-
subject_data = data[data["id"] == subject_id].copy()
135-
pgs_value = pgs_single(subject_data)
136-
results.append({"id": subject_id, "PGS": pgs_value})
137-
138-
return pd.DataFrame(results)

iglu_python/quantile_glu.py

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,8 @@
77

88

99
def quantile_glu(
10-
data: Union[pd.DataFrame, pd.Series], quantiles: List[float] = None
11-
) -> pd.DataFrame:
10+
data: Union[pd.DataFrame, pd.Series, np.ndarray, list], quantiles: List[float] = None
11+
) -> pd.DataFrame|list[float]:
1212
"""
1313
Calculate glucose level quantiles.
1414
@@ -18,17 +18,17 @@ def quantile_glu(
1818
1919
Parameters
2020
----------
21-
data : Union[pd.DataFrame, pd.Series]
22-
DataFrame with columns 'id', 'time', and 'gl', or a Series of glucose values
21+
data : Union[pd.DataFrame, pd.Series, np.ndarray, list]
22+
DataFrame with columns 'id', 'time', and 'gl', or a Series of glucose values,
23+
or a numpy array or list of glucose values
2324
quantiles : List[float], default=[0, 25, 50, 75, 100]
2425
List of quantile values between 0 and 100
2526
2627
Returns
2728
-------
28-
pd.DataFrame
29+
pd.DataFrame|list[float]
2930
DataFrame with 1 row for each subject, a column for subject id and a column
30-
for each quantile. If a Series of glucose values is passed, then a DataFrame
31-
without the subject id is returned.
31+
for each quantile. If a Series of glucose values is passed, then a list of floats is returned.
3232
3333
Notes
3434
-----
@@ -58,10 +58,12 @@ def quantile_glu(
5858
# Handle Series input
5959
if quantiles is None:
6060
quantiles = [0, 25, 50, 75, 100]
61-
if isinstance(data, pd.Series):
61+
if isinstance(data, (pd.Series, np.ndarray, list)):
62+
if isinstance(data, (np.ndarray, list)):
63+
data = pd.Series(data)
6264
# Calculate quantiles for Series
6365
quantile_vals = np.quantile(data.dropna(), np.array(quantiles) / 100)
64-
return pd.DataFrame([quantile_vals], columns=quantiles)
66+
return quantile_vals.tolist()
6567

6668
# Handle DataFrame input
6769
data = check_data_columns(data)

iglu_python/range_glu.py

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
11
from typing import Union
22

3+
import numpy as np
34
import pandas as pd
45

56
from .utils import check_data_columns
67

78

8-
def range_glu(data: Union[pd.DataFrame, pd.Series]) -> pd.DataFrame:
9+
def range_glu(data: Union[pd.DataFrame, pd.Series, np.ndarray, list]) -> pd.DataFrame|float:
910
"""
1011
Calculate glucose level range.
1112
@@ -14,15 +15,16 @@ def range_glu(data: Union[pd.DataFrame, pd.Series]) -> pd.DataFrame:
1415
1516
Parameters
1617
----------
17-
data : Union[pd.DataFrame, pd.Series]
18-
DataFrame with columns 'id', 'time', and 'gl', or a Series of glucose values
18+
data : Union[pd.DataFrame, pd.Series, np.ndarray, list]
19+
DataFrame with columns 'id', 'time', and 'gl', or a Series of glucose values,
20+
or a numpy array or list of glucose values
1921
2022
Returns
2123
-------
22-
pd.DataFrame
24+
pd.DataFrame|float
2325
DataFrame with columns:
2426
- id: subject identifier (if DataFrame input)
25-
- range: range of glucose values (max - min)
27+
- range: range of glucose values (max - min). If a Series of glucose values is passed, then a float is returned.
2628
2729
Examples
2830
--------
@@ -43,10 +45,12 @@ def range_glu(data: Union[pd.DataFrame, pd.Series]) -> pd.DataFrame:
4345
0 70
4446
"""
4547
# Handle Series input
46-
if isinstance(data, pd.Series):
48+
if isinstance(data, (pd.Series, np.ndarray, list)):
49+
if isinstance(data, (np.ndarray, list)):
50+
data = pd.Series(data)
4751
# Calculate range for Series
48-
range_val = data.max() - data.min()
49-
return pd.DataFrame({"range": [range_val]})
52+
range_val = float(data.max() - data.min())
53+
return range_val
5054

5155
# Handle DataFrame input
5256
data = check_data_columns(data)

iglu_python/roc.py

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -118,17 +118,16 @@ def roc_single(data: pd.DataFrame,
118118

119119
# Handle Series input
120120
if isinstance(data, pd.Series):
121-
data = data.dropna()
122-
if len(data) == 0:
123-
return pd.DataFrame({"ROC": [np.nan]})
121+
if not isinstance(data.index, pd.DatetimeIndex):
122+
raise ValueError("Series input must have a datetime index")
123+
if len(data.dropna()) == 0:
124+
return pd.DataFrame({"roc": [np.nan]})
124125

125126
# Convert Series to DataFrame format
126127
data = pd.DataFrame(
127128
{
128129
"id": ["subject1"] * len(data),
129-
"time": pd.date_range(
130-
start="2020-01-01", periods=len(data), freq=f"{dt0}min"
131-
),
130+
"time": data.index,
132131
"gl": data.values,
133132
}
134133
)

0 commit comments

Comments
 (0)