Skip to content

Commit 17cb51d

Browse files
committed
Fix plot_daily for missing dates and add timezone/DST tests
- Fix bug in plot_daily: handle timeseries with missing dates correctly - Changed from resample('D') to groupby(normalize()) to only include days with data - Prevents KeyError when trying to get groups for days without data - Add comprehensive unittests for plot_daily: - Test missing dates (non-continuous timeseries) - Test multiple missing dates - Test single day in middle of range - Test timezone-aware (non-UTC) timeseries - Test DST spring forward (losing an hour) - Test DST fall back (gaining an hour) - Bump version to 0.4.3
1 parent 1c7a730 commit 17cb51d

File tree

4 files changed

+319
-6
lines changed

4 files changed

+319
-6
lines changed

iglu_python/extension/plots.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,12 @@ def plot_daily(cgm_timeseries: pd.Series, lower: int = 70, upper: int = 140) ->
2222
plt.Figure object
2323
"""
2424
# divide cgm_timeseries into list of daily series
25-
cgm_daily_group = cgm_timeseries.resample("D")
26-
cgm_timeseries_daily = {day: cgm_daily_group.get_group(day) for day in cgm_daily_group.groups}
25+
# Group by date (normalize to date for grouping) and only include days with data
26+
cgm_timeseries_daily = {}
27+
for date, day_data in cgm_timeseries.groupby(cgm_timeseries.index.normalize()):
28+
if len(day_data) > 0:
29+
# Use the date as a Timestamp at midnight for consistency
30+
cgm_timeseries_daily[date] = day_data
2731

2832
# plot each day separately
2933
# Create one figure with subplots for each day

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
44

55
[project]
66
name = "iglu_python"
7-
version = "0.4.2"
7+
version = "0.4.3"
88
description = "Python implementation of the iglu package for continuous glucose monitoring data analysis"
99
readme = "README.md"
1010
requires-python = ">=3.11"

tests/test_plots.py

Lines changed: 311 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,13 @@
11
import os
22
import tempfile
33
from pathlib import Path
4+
from datetime import datetime
45

56
import numpy as np
67
import pandas as pd
78
import pytest
89
import matplotlib.pyplot as plt
10+
import pytz
911

1012
# Import the module to test
1113
from iglu_python.extension.plots import plot_daily, plot_statistics
@@ -359,6 +361,315 @@ def test_plot_daily_very_high_glucose_values():
359361
plt.close(fig)
360362

361363

364+
def test_plot_daily_missing_dates():
365+
"""Test plot_daily with timeseries that has missing dates (non-continuous)"""
366+
# Create data for Jan 1 and Jan 3, but skip Jan 2 (missing date)
367+
day1_start = pd.Timestamp('2023-01-01 00:00:00')
368+
day1_end = pd.Timestamp('2023-01-01 23:59:59')
369+
day1_index = pd.date_range(start=day1_start, end=day1_end, freq='2h')
370+
371+
day3_start = pd.Timestamp('2023-01-03 00:00:00')
372+
day3_end = pd.Timestamp('2023-01-03 23:59:59')
373+
day3_index = pd.date_range(start=day3_start, end=day3_end, freq='2h')
374+
375+
# Combine indices (note: Jan 2 is missing)
376+
time_index = day1_index.union(day3_index)
377+
378+
# Create glucose values
379+
np.random.seed(42)
380+
glucose_values = 120 + np.random.normal(0, 20, len(time_index))
381+
glucose_values = np.clip(glucose_values, 50, 400)
382+
383+
data_with_missing_dates = pd.Series(glucose_values, index=time_index)
384+
385+
# Verify that the data indeed has missing dates
386+
all_dates = pd.date_range(start='2023-01-01', end='2023-01-03', freq='D')
387+
dates_in_data = set(data_with_missing_dates.index.date)
388+
dates_in_range = set(all_dates.date)
389+
missing_dates = dates_in_range - dates_in_data
390+
assert len(missing_dates) > 0, "Test data should have missing dates"
391+
392+
# Test that plot_daily handles missing dates correctly
393+
fig = plot_daily(data_with_missing_dates)
394+
395+
assert isinstance(fig, plt.Figure)
396+
397+
# Should only plot days that have data (2 days: Jan 1 and Jan 3)
398+
assert len(fig.axes) == 2, "Should have 2 subplots for 2 days with data"
399+
400+
# Check that the subplots have the expected dates
401+
expected_dates = ['2023-01-01', '2023-01-03']
402+
for i, ax in enumerate(fig.axes):
403+
assert f'Day: {expected_dates[i]}' in ax.get_title()
404+
405+
# Verify that each subplot has data
406+
for ax in fig.axes:
407+
lines = ax.get_lines()
408+
assert len(lines) > 0, "Each subplot should have at least one line"
409+
# Check that the line has data points
410+
for line in lines:
411+
x_data = line.get_xdata()
412+
y_data = line.get_ydata()
413+
assert len(x_data) > 0, "Line should have x data"
414+
assert len(y_data) > 0, "Line should have y data"
415+
416+
plt.close(fig)
417+
418+
419+
def test_plot_daily_missing_dates_multiple_gaps():
420+
"""Test plot_daily with timeseries that has multiple missing dates"""
421+
# Create data for Jan 1, Jan 3, and Jan 5, skipping Jan 2 and Jan 4
422+
day1_index = pd.date_range(start='2023-01-01 00:00:00', end='2023-01-01 23:59:59', freq='3h')
423+
day3_index = pd.date_range(start='2023-01-03 00:00:00', end='2023-01-03 23:59:59', freq='3h')
424+
day5_index = pd.date_range(start='2023-01-05 00:00:00', end='2023-01-05 23:59:59', freq='3h')
425+
426+
# Combine indices (Jan 2 and Jan 4 are missing)
427+
time_index = day1_index.union(day3_index).union(day5_index)
428+
429+
# Create glucose values
430+
np.random.seed(123)
431+
glucose_values = 120 + np.random.normal(0, 20, len(time_index))
432+
glucose_values = np.clip(glucose_values, 50, 400)
433+
434+
data_with_multiple_gaps = pd.Series(glucose_values, index=time_index)
435+
436+
# Test that plot_daily handles multiple missing dates correctly
437+
fig = plot_daily(data_with_multiple_gaps)
438+
439+
assert isinstance(fig, plt.Figure)
440+
441+
# Should only plot days that have data (3 days: Jan 1, Jan 3, Jan 5)
442+
assert len(fig.axes) == 3, "Should have 3 subplots for 3 days with data"
443+
444+
# Check that the subplots have the expected dates
445+
expected_dates = ['2023-01-01', '2023-01-03', '2023-01-05']
446+
for i, ax in enumerate(fig.axes):
447+
assert f'Day: {expected_dates[i]}' in ax.get_title()
448+
449+
plt.close(fig)
450+
451+
452+
def test_plot_daily_missing_dates_single_day_in_middle():
453+
"""Test plot_daily with timeseries that has data only for one day in the middle of a range"""
454+
# Create data only for Jan 2, but the timeseries spans from Jan 1 to Jan 3
455+
day2_index = pd.date_range(start='2023-01-02 00:00:00', end='2023-01-02 23:59:59', freq='4h')
456+
457+
# Create glucose values
458+
np.random.seed(456)
459+
glucose_values = 120 + np.random.normal(0, 20, len(day2_index))
460+
glucose_values = np.clip(glucose_values, 50, 400)
461+
462+
data_single_day = pd.Series(glucose_values, index=day2_index)
463+
464+
# Test that plot_daily handles this correctly
465+
fig = plot_daily(data_single_day)
466+
467+
assert isinstance(fig, plt.Figure)
468+
469+
# Should only plot the one day that has data
470+
assert len(fig.axes) == 1, "Should have 1 subplot for 1 day with data"
471+
472+
# Check that the subplot has the expected date
473+
ax = fig.axes[0]
474+
assert 'Day: 2023-01-02' in ax.get_title()
475+
476+
plt.close(fig)
477+
478+
479+
def test_plot_daily_timezone_aware():
480+
"""Test plot_daily with timezone-aware (non-UTC) timeseries"""
481+
# Use US/Eastern timezone (UTC-5 or UTC-4 depending on DST)
482+
tz = pytz.timezone('US/Eastern')
483+
484+
# Create data for 2 days in summer (EDT, UTC-4)
485+
# Day 1: June 15, 2024
486+
day1_timestamps = [
487+
tz.localize(datetime(2024, 6, 15, 0, 0)), # Midnight EDT
488+
tz.localize(datetime(2024, 6, 15, 6, 0)), # 6 AM EDT
489+
tz.localize(datetime(2024, 6, 15, 12, 0)), # Noon EDT
490+
tz.localize(datetime(2024, 6, 15, 18, 0)), # 6 PM EDT
491+
]
492+
493+
# Day 2: June 16, 2024
494+
day2_timestamps = [
495+
tz.localize(datetime(2024, 6, 16, 0, 0)), # Midnight EDT
496+
tz.localize(datetime(2024, 6, 16, 6, 0)), # 6 AM EDT
497+
tz.localize(datetime(2024, 6, 16, 12, 0)), # Noon EDT
498+
tz.localize(datetime(2024, 6, 16, 18, 0)), # 6 PM EDT
499+
]
500+
501+
# Combine timestamps
502+
all_timestamps = day1_timestamps + day2_timestamps
503+
504+
# Create glucose values
505+
np.random.seed(789)
506+
glucose_values = 120 + np.random.normal(0, 20, len(all_timestamps))
507+
glucose_values = np.clip(glucose_values, 50, 400)
508+
509+
# Create timezone-aware Series
510+
data_tz_aware = pd.Series(glucose_values, index=all_timestamps)
511+
512+
# Verify timezone awareness
513+
assert data_tz_aware.index.tz is not None, "Index should be timezone-aware"
514+
assert str(data_tz_aware.index.tz) == 'US/Eastern', "Index should be in US/Eastern timezone"
515+
516+
# Test that plot_daily handles timezone-aware data correctly
517+
fig = plot_daily(data_tz_aware)
518+
519+
assert isinstance(fig, plt.Figure)
520+
521+
# Should plot 2 days
522+
assert len(fig.axes) == 2, "Should have 2 subplots for 2 days"
523+
524+
# Check that the subplots have the expected dates (in local timezone)
525+
expected_dates = ['2024-06-15', '2024-06-16']
526+
for i, ax in enumerate(fig.axes):
527+
assert f'Day: {expected_dates[i]}' in ax.get_title()
528+
529+
# Verify that each subplot has data
530+
for ax in fig.axes:
531+
lines = ax.get_lines()
532+
assert len(lines) > 0, "Each subplot should have at least one line"
533+
for line in lines:
534+
x_data = line.get_xdata()
535+
y_data = line.get_ydata()
536+
assert len(x_data) > 0, "Line should have x data"
537+
assert len(y_data) > 0, "Line should have y data"
538+
539+
plt.close(fig)
540+
541+
542+
def test_plot_daily_dst_spring_forward():
543+
"""Test plot_daily with timezone-aware timeseries during DST spring forward (losing an hour)"""
544+
# US/Eastern: DST spring forward happens on March 10, 2024 at 2:00 AM
545+
# Clocks jump from 1:59 AM EST to 3:00 AM EDT (losing 1 hour)
546+
tz = pytz.timezone('US/Eastern')
547+
548+
# Create data spanning the DST transition
549+
# Before DST: March 10, 2024 1:00 AM EST (UTC-5)
550+
# After DST: March 10, 2024 3:00 AM EDT (UTC-4) - note: 2:00 AM doesn't exist!
551+
timestamps = [
552+
tz.localize(datetime(2024, 3, 10, 0, 0)), # 12:00 AM EST
553+
tz.localize(datetime(2024, 3, 10, 0, 30)), # 12:30 AM EST
554+
tz.localize(datetime(2024, 3, 10, 1, 0)), # 1:00 AM EST
555+
tz.localize(datetime(2024, 3, 10, 1, 30)), # 1:30 AM EST
556+
# Note: 2:00 AM EST doesn't exist - it becomes 3:00 AM EDT
557+
tz.localize(datetime(2024, 3, 10, 3, 0)), # 3:00 AM EDT (spring forward)
558+
tz.localize(datetime(2024, 3, 10, 3, 30)), # 3:30 AM EDT
559+
tz.localize(datetime(2024, 3, 10, 12, 0)), # Noon EDT
560+
tz.localize(datetime(2024, 3, 10, 18, 0)), # 6 PM EDT
561+
]
562+
563+
# Create glucose values
564+
np.random.seed(101)
565+
glucose_values = 120 + np.random.normal(0, 20, len(timestamps))
566+
glucose_values = np.clip(glucose_values, 50, 400)
567+
568+
# Create timezone-aware Series
569+
data_dst_spring = pd.Series(glucose_values, index=timestamps)
570+
571+
# Verify timezone awareness and that we have the DST transition
572+
assert data_dst_spring.index.tz is not None, "Index should be timezone-aware"
573+
574+
# Verify that timestamps are monotonically increasing (even with DST transition)
575+
assert data_dst_spring.index.is_monotonic_increasing, "Timestamps should be monotonically increasing"
576+
577+
# Check that we have both EST and EDT timestamps
578+
offsets = [ts.utcoffset().total_seconds() / 3600 for ts in timestamps]
579+
assert -5.0 in offsets, "Should have EST timestamps (UTC-5)"
580+
assert -4.0 in offsets, "Should have EDT timestamps (UTC-4)"
581+
582+
# Test that plot_daily handles DST spring forward correctly
583+
fig = plot_daily(data_dst_spring)
584+
585+
assert isinstance(fig, plt.Figure)
586+
587+
# Should plot 1 day (all data is on March 10, even with DST transition)
588+
assert len(fig.axes) == 1, "Should have 1 subplot for 1 day (DST transition within same day)"
589+
590+
# Check that the subplot has the expected date
591+
ax = fig.axes[0]
592+
assert 'Day: 2024-03-10' in ax.get_title()
593+
594+
# Verify that the subplot has data
595+
lines = ax.get_lines()
596+
assert len(lines) > 0, "Subplot should have at least one line"
597+
for line in lines:
598+
x_data = line.get_xdata()
599+
y_data = line.get_ydata()
600+
assert len(x_data) > 0, "Line should have x data"
601+
assert len(y_data) > 0, "Line should have y data"
602+
603+
plt.close(fig)
604+
605+
606+
def test_plot_daily_dst_fall_back():
607+
"""Test plot_daily with timezone-aware timeseries during DST fall back (gaining an hour)"""
608+
# US/Eastern: DST fall back happens on November 3, 2024 at 2:00 AM
609+
# Clocks jump from 1:59 AM EDT back to 1:00 AM EST (gaining 1 hour)
610+
tz = pytz.timezone('US/Eastern')
611+
612+
# Create data spanning the DST transition
613+
# Before DST: November 3, 2024 1:00 AM EDT (UTC-4)
614+
# After DST: November 3, 2024 1:00 AM EST (UTC-5) - note: 1:00 AM happens twice!
615+
# Important: The first 1:00 AM EDT (5:00 AM UTC) comes BEFORE the second 1:00 AM EST (6:00 AM UTC)
616+
timestamps = [
617+
tz.localize(datetime(2024, 11, 3, 0, 0)), # 12:00 AM EDT (4:00 AM UTC)
618+
tz.localize(datetime(2024, 11, 3, 0, 30)), # 12:30 AM EDT (4:30 AM UTC)
619+
tz.localize(datetime(2024, 11, 3, 1, 0), is_dst=True), # 1:00 AM EDT (5:00 AM UTC) - first occurrence
620+
tz.localize(datetime(2024, 11, 3, 1, 30), is_dst=True), # 1:30 AM EDT (5:30 AM UTC)
621+
# Note: 2:00 AM EDT becomes 1:00 AM EST (fall back)
622+
# The second 1:00 AM EST (6:00 AM UTC) comes after the first 1:00 AM EDT
623+
tz.localize(datetime(2024, 11, 3, 1, 0), is_dst=False), # 1:00 AM EST (6:00 AM UTC) - second occurrence after fall back
624+
tz.localize(datetime(2024, 11, 3, 1, 30), is_dst=False), # 1:30 AM EST (6:30 AM UTC)
625+
tz.localize(datetime(2024, 11, 3, 12, 0), is_dst=False), # Noon EST (17:00 AM UTC)
626+
tz.localize(datetime(2024, 11, 3, 18, 0), is_dst=False), # 6 PM EST (23:00 AM UTC)
627+
]
628+
629+
# Create glucose values
630+
np.random.seed(202)
631+
glucose_values = 120 + np.random.normal(0, 20, len(timestamps))
632+
glucose_values = np.clip(glucose_values, 50, 400)
633+
634+
# Create timezone-aware Series
635+
data_dst_fall = pd.Series(glucose_values, index=timestamps)
636+
637+
# Verify timezone awareness and that we have the DST transition
638+
assert data_dst_fall.index.tz is not None, "Index should be timezone-aware"
639+
640+
# Verify that timestamps are monotonically increasing (even with DST transition)
641+
# Note: The timestamps should be in chronological order (UTC time)
642+
assert data_dst_fall.index.is_monotonic_increasing, "Timestamps should be monotonically increasing"
643+
644+
# Check that we have both EDT and EST timestamps
645+
offsets = [ts.utcoffset().total_seconds() / 3600 for ts in timestamps]
646+
assert -4.0 in offsets, "Should have EDT timestamps (UTC-4)"
647+
assert -5.0 in offsets, "Should have EST timestamps (UTC-5)"
648+
649+
# Test that plot_daily handles DST fall back correctly
650+
fig = plot_daily(data_dst_fall)
651+
652+
assert isinstance(fig, plt.Figure)
653+
654+
# Should plot 1 day (all data is on November 3, even with DST transition)
655+
assert len(fig.axes) == 1, "Should have 1 subplot for 1 day (DST transition within same day)"
656+
657+
# Check that the subplot has the expected date
658+
ax = fig.axes[0]
659+
assert 'Day: 2024-11-03' in ax.get_title()
660+
661+
# Verify that the subplot has data
662+
lines = ax.get_lines()
663+
assert len(lines) > 0, "Subplot should have at least one line"
664+
for line in lines:
665+
x_data = line.get_xdata()
666+
y_data = line.get_ydata()
667+
assert len(x_data) > 0, "Line should have x data"
668+
assert len(y_data) > 0, "Line should have y data"
669+
670+
plt.close(fig)
671+
672+
362673
# Tests for plot_statistics function
363674
def test_plot_statistics_returns_figure(multi_day_statistics_data):
364675
"""Test that plot_statistics returns a matplotlib Figure object"""

uv.lock

Lines changed: 1 addition & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)