|
1 | 1 | import os |
2 | 2 | import tempfile |
3 | 3 | from pathlib import Path |
| 4 | +from datetime import datetime |
4 | 5 |
|
5 | 6 | import numpy as np |
6 | 7 | import pandas as pd |
7 | 8 | import pytest |
8 | 9 | import matplotlib.pyplot as plt |
| 10 | +import pytz |
9 | 11 |
|
10 | 12 | # Import the module to test |
11 | 13 | from iglu_python.extension.plots import plot_daily, plot_statistics |
@@ -359,6 +361,315 @@ def test_plot_daily_very_high_glucose_values(): |
359 | 361 | plt.close(fig) |
360 | 362 |
|
361 | 363 |
|
| 364 | +def test_plot_daily_missing_dates(): |
| 365 | + """Test plot_daily with timeseries that has missing dates (non-continuous)""" |
| 366 | + # Create data for Jan 1 and Jan 3, but skip Jan 2 (missing date) |
| 367 | + day1_start = pd.Timestamp('2023-01-01 00:00:00') |
| 368 | + day1_end = pd.Timestamp('2023-01-01 23:59:59') |
| 369 | + day1_index = pd.date_range(start=day1_start, end=day1_end, freq='2h') |
| 370 | + |
| 371 | + day3_start = pd.Timestamp('2023-01-03 00:00:00') |
| 372 | + day3_end = pd.Timestamp('2023-01-03 23:59:59') |
| 373 | + day3_index = pd.date_range(start=day3_start, end=day3_end, freq='2h') |
| 374 | + |
| 375 | + # Combine indices (note: Jan 2 is missing) |
| 376 | + time_index = day1_index.union(day3_index) |
| 377 | + |
| 378 | + # Create glucose values |
| 379 | + np.random.seed(42) |
| 380 | + glucose_values = 120 + np.random.normal(0, 20, len(time_index)) |
| 381 | + glucose_values = np.clip(glucose_values, 50, 400) |
| 382 | + |
| 383 | + data_with_missing_dates = pd.Series(glucose_values, index=time_index) |
| 384 | + |
| 385 | + # Verify that the data indeed has missing dates |
| 386 | + all_dates = pd.date_range(start='2023-01-01', end='2023-01-03', freq='D') |
| 387 | + dates_in_data = set(data_with_missing_dates.index.date) |
| 388 | + dates_in_range = set(all_dates.date) |
| 389 | + missing_dates = dates_in_range - dates_in_data |
| 390 | + assert len(missing_dates) > 0, "Test data should have missing dates" |
| 391 | + |
| 392 | + # Test that plot_daily handles missing dates correctly |
| 393 | + fig = plot_daily(data_with_missing_dates) |
| 394 | + |
| 395 | + assert isinstance(fig, plt.Figure) |
| 396 | + |
| 397 | + # Should only plot days that have data (2 days: Jan 1 and Jan 3) |
| 398 | + assert len(fig.axes) == 2, "Should have 2 subplots for 2 days with data" |
| 399 | + |
| 400 | + # Check that the subplots have the expected dates |
| 401 | + expected_dates = ['2023-01-01', '2023-01-03'] |
| 402 | + for i, ax in enumerate(fig.axes): |
| 403 | + assert f'Day: {expected_dates[i]}' in ax.get_title() |
| 404 | + |
| 405 | + # Verify that each subplot has data |
| 406 | + for ax in fig.axes: |
| 407 | + lines = ax.get_lines() |
| 408 | + assert len(lines) > 0, "Each subplot should have at least one line" |
| 409 | + # Check that the line has data points |
| 410 | + for line in lines: |
| 411 | + x_data = line.get_xdata() |
| 412 | + y_data = line.get_ydata() |
| 413 | + assert len(x_data) > 0, "Line should have x data" |
| 414 | + assert len(y_data) > 0, "Line should have y data" |
| 415 | + |
| 416 | + plt.close(fig) |
| 417 | + |
| 418 | + |
| 419 | +def test_plot_daily_missing_dates_multiple_gaps(): |
| 420 | + """Test plot_daily with timeseries that has multiple missing dates""" |
| 421 | + # Create data for Jan 1, Jan 3, and Jan 5, skipping Jan 2 and Jan 4 |
| 422 | + day1_index = pd.date_range(start='2023-01-01 00:00:00', end='2023-01-01 23:59:59', freq='3h') |
| 423 | + day3_index = pd.date_range(start='2023-01-03 00:00:00', end='2023-01-03 23:59:59', freq='3h') |
| 424 | + day5_index = pd.date_range(start='2023-01-05 00:00:00', end='2023-01-05 23:59:59', freq='3h') |
| 425 | + |
| 426 | + # Combine indices (Jan 2 and Jan 4 are missing) |
| 427 | + time_index = day1_index.union(day3_index).union(day5_index) |
| 428 | + |
| 429 | + # Create glucose values |
| 430 | + np.random.seed(123) |
| 431 | + glucose_values = 120 + np.random.normal(0, 20, len(time_index)) |
| 432 | + glucose_values = np.clip(glucose_values, 50, 400) |
| 433 | + |
| 434 | + data_with_multiple_gaps = pd.Series(glucose_values, index=time_index) |
| 435 | + |
| 436 | + # Test that plot_daily handles multiple missing dates correctly |
| 437 | + fig = plot_daily(data_with_multiple_gaps) |
| 438 | + |
| 439 | + assert isinstance(fig, plt.Figure) |
| 440 | + |
| 441 | + # Should only plot days that have data (3 days: Jan 1, Jan 3, Jan 5) |
| 442 | + assert len(fig.axes) == 3, "Should have 3 subplots for 3 days with data" |
| 443 | + |
| 444 | + # Check that the subplots have the expected dates |
| 445 | + expected_dates = ['2023-01-01', '2023-01-03', '2023-01-05'] |
| 446 | + for i, ax in enumerate(fig.axes): |
| 447 | + assert f'Day: {expected_dates[i]}' in ax.get_title() |
| 448 | + |
| 449 | + plt.close(fig) |
| 450 | + |
| 451 | + |
| 452 | +def test_plot_daily_missing_dates_single_day_in_middle(): |
| 453 | + """Test plot_daily with timeseries that has data only for one day in the middle of a range""" |
| 454 | + # Create data only for Jan 2, but the timeseries spans from Jan 1 to Jan 3 |
| 455 | + day2_index = pd.date_range(start='2023-01-02 00:00:00', end='2023-01-02 23:59:59', freq='4h') |
| 456 | + |
| 457 | + # Create glucose values |
| 458 | + np.random.seed(456) |
| 459 | + glucose_values = 120 + np.random.normal(0, 20, len(day2_index)) |
| 460 | + glucose_values = np.clip(glucose_values, 50, 400) |
| 461 | + |
| 462 | + data_single_day = pd.Series(glucose_values, index=day2_index) |
| 463 | + |
| 464 | + # Test that plot_daily handles this correctly |
| 465 | + fig = plot_daily(data_single_day) |
| 466 | + |
| 467 | + assert isinstance(fig, plt.Figure) |
| 468 | + |
| 469 | + # Should only plot the one day that has data |
| 470 | + assert len(fig.axes) == 1, "Should have 1 subplot for 1 day with data" |
| 471 | + |
| 472 | + # Check that the subplot has the expected date |
| 473 | + ax = fig.axes[0] |
| 474 | + assert 'Day: 2023-01-02' in ax.get_title() |
| 475 | + |
| 476 | + plt.close(fig) |
| 477 | + |
| 478 | + |
| 479 | +def test_plot_daily_timezone_aware(): |
| 480 | + """Test plot_daily with timezone-aware (non-UTC) timeseries""" |
| 481 | + # Use US/Eastern timezone (UTC-5 or UTC-4 depending on DST) |
| 482 | + tz = pytz.timezone('US/Eastern') |
| 483 | + |
| 484 | + # Create data for 2 days in summer (EDT, UTC-4) |
| 485 | + # Day 1: June 15, 2024 |
| 486 | + day1_timestamps = [ |
| 487 | + tz.localize(datetime(2024, 6, 15, 0, 0)), # Midnight EDT |
| 488 | + tz.localize(datetime(2024, 6, 15, 6, 0)), # 6 AM EDT |
| 489 | + tz.localize(datetime(2024, 6, 15, 12, 0)), # Noon EDT |
| 490 | + tz.localize(datetime(2024, 6, 15, 18, 0)), # 6 PM EDT |
| 491 | + ] |
| 492 | + |
| 493 | + # Day 2: June 16, 2024 |
| 494 | + day2_timestamps = [ |
| 495 | + tz.localize(datetime(2024, 6, 16, 0, 0)), # Midnight EDT |
| 496 | + tz.localize(datetime(2024, 6, 16, 6, 0)), # 6 AM EDT |
| 497 | + tz.localize(datetime(2024, 6, 16, 12, 0)), # Noon EDT |
| 498 | + tz.localize(datetime(2024, 6, 16, 18, 0)), # 6 PM EDT |
| 499 | + ] |
| 500 | + |
| 501 | + # Combine timestamps |
| 502 | + all_timestamps = day1_timestamps + day2_timestamps |
| 503 | + |
| 504 | + # Create glucose values |
| 505 | + np.random.seed(789) |
| 506 | + glucose_values = 120 + np.random.normal(0, 20, len(all_timestamps)) |
| 507 | + glucose_values = np.clip(glucose_values, 50, 400) |
| 508 | + |
| 509 | + # Create timezone-aware Series |
| 510 | + data_tz_aware = pd.Series(glucose_values, index=all_timestamps) |
| 511 | + |
| 512 | + # Verify timezone awareness |
| 513 | + assert data_tz_aware.index.tz is not None, "Index should be timezone-aware" |
| 514 | + assert str(data_tz_aware.index.tz) == 'US/Eastern', "Index should be in US/Eastern timezone" |
| 515 | + |
| 516 | + # Test that plot_daily handles timezone-aware data correctly |
| 517 | + fig = plot_daily(data_tz_aware) |
| 518 | + |
| 519 | + assert isinstance(fig, plt.Figure) |
| 520 | + |
| 521 | + # Should plot 2 days |
| 522 | + assert len(fig.axes) == 2, "Should have 2 subplots for 2 days" |
| 523 | + |
| 524 | + # Check that the subplots have the expected dates (in local timezone) |
| 525 | + expected_dates = ['2024-06-15', '2024-06-16'] |
| 526 | + for i, ax in enumerate(fig.axes): |
| 527 | + assert f'Day: {expected_dates[i]}' in ax.get_title() |
| 528 | + |
| 529 | + # Verify that each subplot has data |
| 530 | + for ax in fig.axes: |
| 531 | + lines = ax.get_lines() |
| 532 | + assert len(lines) > 0, "Each subplot should have at least one line" |
| 533 | + for line in lines: |
| 534 | + x_data = line.get_xdata() |
| 535 | + y_data = line.get_ydata() |
| 536 | + assert len(x_data) > 0, "Line should have x data" |
| 537 | + assert len(y_data) > 0, "Line should have y data" |
| 538 | + |
| 539 | + plt.close(fig) |
| 540 | + |
| 541 | + |
| 542 | +def test_plot_daily_dst_spring_forward(): |
| 543 | + """Test plot_daily with timezone-aware timeseries during DST spring forward (losing an hour)""" |
| 544 | + # US/Eastern: DST spring forward happens on March 10, 2024 at 2:00 AM |
| 545 | + # Clocks jump from 1:59 AM EST to 3:00 AM EDT (losing 1 hour) |
| 546 | + tz = pytz.timezone('US/Eastern') |
| 547 | + |
| 548 | + # Create data spanning the DST transition |
| 549 | + # Before DST: March 10, 2024 1:00 AM EST (UTC-5) |
| 550 | + # After DST: March 10, 2024 3:00 AM EDT (UTC-4) - note: 2:00 AM doesn't exist! |
| 551 | + timestamps = [ |
| 552 | + tz.localize(datetime(2024, 3, 10, 0, 0)), # 12:00 AM EST |
| 553 | + tz.localize(datetime(2024, 3, 10, 0, 30)), # 12:30 AM EST |
| 554 | + tz.localize(datetime(2024, 3, 10, 1, 0)), # 1:00 AM EST |
| 555 | + tz.localize(datetime(2024, 3, 10, 1, 30)), # 1:30 AM EST |
| 556 | + # Note: 2:00 AM EST doesn't exist - it becomes 3:00 AM EDT |
| 557 | + tz.localize(datetime(2024, 3, 10, 3, 0)), # 3:00 AM EDT (spring forward) |
| 558 | + tz.localize(datetime(2024, 3, 10, 3, 30)), # 3:30 AM EDT |
| 559 | + tz.localize(datetime(2024, 3, 10, 12, 0)), # Noon EDT |
| 560 | + tz.localize(datetime(2024, 3, 10, 18, 0)), # 6 PM EDT |
| 561 | + ] |
| 562 | + |
| 563 | + # Create glucose values |
| 564 | + np.random.seed(101) |
| 565 | + glucose_values = 120 + np.random.normal(0, 20, len(timestamps)) |
| 566 | + glucose_values = np.clip(glucose_values, 50, 400) |
| 567 | + |
| 568 | + # Create timezone-aware Series |
| 569 | + data_dst_spring = pd.Series(glucose_values, index=timestamps) |
| 570 | + |
| 571 | + # Verify timezone awareness and that we have the DST transition |
| 572 | + assert data_dst_spring.index.tz is not None, "Index should be timezone-aware" |
| 573 | + |
| 574 | + # Verify that timestamps are monotonically increasing (even with DST transition) |
| 575 | + assert data_dst_spring.index.is_monotonic_increasing, "Timestamps should be monotonically increasing" |
| 576 | + |
| 577 | + # Check that we have both EST and EDT timestamps |
| 578 | + offsets = [ts.utcoffset().total_seconds() / 3600 for ts in timestamps] |
| 579 | + assert -5.0 in offsets, "Should have EST timestamps (UTC-5)" |
| 580 | + assert -4.0 in offsets, "Should have EDT timestamps (UTC-4)" |
| 581 | + |
| 582 | + # Test that plot_daily handles DST spring forward correctly |
| 583 | + fig = plot_daily(data_dst_spring) |
| 584 | + |
| 585 | + assert isinstance(fig, plt.Figure) |
| 586 | + |
| 587 | + # Should plot 1 day (all data is on March 10, even with DST transition) |
| 588 | + assert len(fig.axes) == 1, "Should have 1 subplot for 1 day (DST transition within same day)" |
| 589 | + |
| 590 | + # Check that the subplot has the expected date |
| 591 | + ax = fig.axes[0] |
| 592 | + assert 'Day: 2024-03-10' in ax.get_title() |
| 593 | + |
| 594 | + # Verify that the subplot has data |
| 595 | + lines = ax.get_lines() |
| 596 | + assert len(lines) > 0, "Subplot should have at least one line" |
| 597 | + for line in lines: |
| 598 | + x_data = line.get_xdata() |
| 599 | + y_data = line.get_ydata() |
| 600 | + assert len(x_data) > 0, "Line should have x data" |
| 601 | + assert len(y_data) > 0, "Line should have y data" |
| 602 | + |
| 603 | + plt.close(fig) |
| 604 | + |
| 605 | + |
| 606 | +def test_plot_daily_dst_fall_back(): |
| 607 | + """Test plot_daily with timezone-aware timeseries during DST fall back (gaining an hour)""" |
| 608 | + # US/Eastern: DST fall back happens on November 3, 2024 at 2:00 AM |
| 609 | + # Clocks jump from 1:59 AM EDT back to 1:00 AM EST (gaining 1 hour) |
| 610 | + tz = pytz.timezone('US/Eastern') |
| 611 | + |
| 612 | + # Create data spanning the DST transition |
| 613 | + # Before DST: November 3, 2024 1:00 AM EDT (UTC-4) |
| 614 | + # After DST: November 3, 2024 1:00 AM EST (UTC-5) - note: 1:00 AM happens twice! |
| 615 | + # Important: The first 1:00 AM EDT (5:00 AM UTC) comes BEFORE the second 1:00 AM EST (6:00 AM UTC) |
| 616 | + timestamps = [ |
| 617 | + tz.localize(datetime(2024, 11, 3, 0, 0)), # 12:00 AM EDT (4:00 AM UTC) |
| 618 | + tz.localize(datetime(2024, 11, 3, 0, 30)), # 12:30 AM EDT (4:30 AM UTC) |
| 619 | + tz.localize(datetime(2024, 11, 3, 1, 0), is_dst=True), # 1:00 AM EDT (5:00 AM UTC) - first occurrence |
| 620 | + tz.localize(datetime(2024, 11, 3, 1, 30), is_dst=True), # 1:30 AM EDT (5:30 AM UTC) |
| 621 | + # Note: 2:00 AM EDT becomes 1:00 AM EST (fall back) |
| 622 | + # The second 1:00 AM EST (6:00 AM UTC) comes after the first 1:00 AM EDT |
| 623 | + tz.localize(datetime(2024, 11, 3, 1, 0), is_dst=False), # 1:00 AM EST (6:00 AM UTC) - second occurrence after fall back |
| 624 | + tz.localize(datetime(2024, 11, 3, 1, 30), is_dst=False), # 1:30 AM EST (6:30 AM UTC) |
| 625 | + tz.localize(datetime(2024, 11, 3, 12, 0), is_dst=False), # Noon EST (17:00 AM UTC) |
| 626 | + tz.localize(datetime(2024, 11, 3, 18, 0), is_dst=False), # 6 PM EST (23:00 AM UTC) |
| 627 | + ] |
| 628 | + |
| 629 | + # Create glucose values |
| 630 | + np.random.seed(202) |
| 631 | + glucose_values = 120 + np.random.normal(0, 20, len(timestamps)) |
| 632 | + glucose_values = np.clip(glucose_values, 50, 400) |
| 633 | + |
| 634 | + # Create timezone-aware Series |
| 635 | + data_dst_fall = pd.Series(glucose_values, index=timestamps) |
| 636 | + |
| 637 | + # Verify timezone awareness and that we have the DST transition |
| 638 | + assert data_dst_fall.index.tz is not None, "Index should be timezone-aware" |
| 639 | + |
| 640 | + # Verify that timestamps are monotonically increasing (even with DST transition) |
| 641 | + # Note: The timestamps should be in chronological order (UTC time) |
| 642 | + assert data_dst_fall.index.is_monotonic_increasing, "Timestamps should be monotonically increasing" |
| 643 | + |
| 644 | + # Check that we have both EDT and EST timestamps |
| 645 | + offsets = [ts.utcoffset().total_seconds() / 3600 for ts in timestamps] |
| 646 | + assert -4.0 in offsets, "Should have EDT timestamps (UTC-4)" |
| 647 | + assert -5.0 in offsets, "Should have EST timestamps (UTC-5)" |
| 648 | + |
| 649 | + # Test that plot_daily handles DST fall back correctly |
| 650 | + fig = plot_daily(data_dst_fall) |
| 651 | + |
| 652 | + assert isinstance(fig, plt.Figure) |
| 653 | + |
| 654 | + # Should plot 1 day (all data is on November 3, even with DST transition) |
| 655 | + assert len(fig.axes) == 1, "Should have 1 subplot for 1 day (DST transition within same day)" |
| 656 | + |
| 657 | + # Check that the subplot has the expected date |
| 658 | + ax = fig.axes[0] |
| 659 | + assert 'Day: 2024-11-03' in ax.get_title() |
| 660 | + |
| 661 | + # Verify that the subplot has data |
| 662 | + lines = ax.get_lines() |
| 663 | + assert len(lines) > 0, "Subplot should have at least one line" |
| 664 | + for line in lines: |
| 665 | + x_data = line.get_xdata() |
| 666 | + y_data = line.get_ydata() |
| 667 | + assert len(x_data) > 0, "Line should have x data" |
| 668 | + assert len(y_data) > 0, "Line should have y data" |
| 669 | + |
| 670 | + plt.close(fig) |
| 671 | + |
| 672 | + |
362 | 673 | # Tests for plot_statistics function |
363 | 674 | def test_plot_statistics_returns_figure(multi_day_statistics_data): |
364 | 675 | """Test that plot_statistics returns a matplotlib Figure object""" |
|
0 commit comments