ruff cleanup

staskh · staskh · commit 3aecc1015135 · 2025-06-25T13:54:02.000+03:00
diff --git a/iglu_python/__init__.py b/iglu_python/__init__.py
@@ -9,6 +9,7 @@
 from .cv_measures import cv_measures
 from .ea1c import ea1c
 from .episode_calculation import episode_calculation
+from .extension.load_data import load_dexcom, load_libre
 from .gmi import gmi
 from .grade import grade
 from .grade_eugly import grade_eugly
@@ -41,7 +42,6 @@
 from .sd_roc import sd_roc
 from .summary_glu import summary_glu
 from .utils import CGMS2DayByDay, check_data_columns, gd2d_to_df, is_iglu_r_compatible, set_iglu_r_compatible
-from .extension.load_data import load_libre, load_dexcom
 
 __all__ = [
     "above_percent",
diff --git a/iglu_python/extension/load_data.py b/iglu_python/extension/load_data.py
@@ -4,6 +4,7 @@
 """
 
 from pathlib import Path
+
 import pandas as pd
 
 
@@ -43,7 +44,7 @@ def load_libre(file_path: str) -> pd.Series:
         format = '%m-%d-%Y %H:%M'
     elif 'Historic Glucose mmol/L' in df.columns:
         df = df.loc[:, ('Device Timestamp', 'Historic Glucose mmol/L', 'Scan Glucose mmol/L')]
-        format = '%d-%m-%Y %I:%M %p' 
+        format = '%d-%m-%Y %I:%M %p'
         convert = True
     else:
         df = df = df.loc[:, ('Device Timestamp', 'Historic Glucose mg/dL', 'Scan Glucose mg/dL')]
@@ -56,10 +57,10 @@ def load_libre(file_path: str) -> pd.Series:
 
     # Convert glucose values to numeric
     df['glc'] = pd.to_numeric(df['glc'], errors='coerce')
-    
+
     # convert to mg/dL if needed
     if convert:
-        df['glc'] = df['glc'] * 18.01559    
+        df['glc'] = df['glc'] * 18.01559
 
     # Drop NaN values and sort by 'time'
     df = df.dropna(subset=['time', 'glc']).sort_values('time').reset_index(drop=True)
@@ -97,46 +98,46 @@ def load_dexcom(file_path: str) -> pd.Series:
     # Drop top rows
     df = df.iloc[1:]
     df.reset_index(inplace=True, drop=True)
-    
+
     # Find timestamp column
     timestamp_cols = [col for col in df.columns if 'Timestamp' in str(col)]
     if not timestamp_cols:
         raise ValueError("No timestamp column found in Dexcom data")
     timestamp_col = timestamp_cols[0]
-    
+
     # Find glucose column
     glucose_cols = [col for col in df.columns if 'Glucose' in str(col)]
     if not glucose_cols:
         raise ValueError("No glucose column found in Dexcom data")
     glucose_col = glucose_cols[0]
-    
+
     # Check if conversion is needed (mmol/L to mg/dL)
     convert = False
     if 'mmol/L' in str(glucose_col):
         convert = True
-    
+
     # Select relevant columns
     df = df.loc[:, [timestamp_col, glucose_col]]
-    
+
     # Rename columns
     df.columns = ['time', 'glc']
-    
+
     # Convert 'time' column to datetime
     df['time'] = pd.to_datetime(df['time'], errors='coerce')
-    
+
     # Convert glucose values to numeric
     df['glc'] = pd.to_numeric(df['glc'], errors='coerce')
-    
+
     # Convert to mg/dL if needed
     if convert:
         df['glc'] = df['glc'] * 18.01559
-    
+
     # Drop NaN values and sort by 'time'
     df = df.dropna(subset=['time', 'glc']).sort_values('time').reset_index(drop=True)
-    
+
     # Convert into timeseries
     timeseries = df.set_index('time')['glc']
-    
+
     return timeseries
 
 
@@ -159,23 +160,23 @@ def _open_file(filepath: str) -> pd.DataFrame:
     if not Path(filepath).exists():
         raise FileNotFoundError(f"File not found: {filepath}")
 
-    
+
     # Get file extension using basename
     extension = Path(filepath).suffix
-    
+
     try:
         if extension == '.csv':
             # Assume that the user uploaded a CSV file
-            df = pd.read_csv(filepath, header=None, names=[i for i in range(0, 20)])
+            df = pd.read_csv(filepath, header=None, names=list(range(0, 20)))
         elif extension == '.xls' or extension == '.xlsx':
             # Assume that the user uploaded an Excel file
-            df = pd.read_excel(filepath, header=None, names=[i for i in range(0, 20)])
+            df = pd.read_excel(filepath, header=None, names=list(range(0, 20)))
         elif extension == '.txt' or extension == '.tsv':
             # Assume that the user uploaded a text file
-            df = pd.read_table(filepath, header=None, names=[i for i in range(0, 20)])
+            df = pd.read_table(filepath, header=None, names=list(range(0, 20)))
         else:
             raise ValueError(f"Unsupported file extension: {extension}")
-        
+
         return df
     except Exception as e:
-        raise ValueError(f"Error reading file: {filepath}") from e
+        raise ValueError(f"Error reading file: {filepath}") from e
diff --git a/tests/test_load_data.py b/tests/test_load_data.py
@@ -4,15 +4,17 @@
 Tests the functionality of loading CGM data from device-specific files.
 """
 
-import pytest
-import pandas as pd
-import numpy as np
-from pathlib import Path
-import tempfile
 import os
+import tempfile
+from pathlib import Path
+
+import numpy as np
+import pandas as pd
+import pytest
 
 # Import the module to test
-from iglu_python import load_libre, load_dexcom
+from iglu_python import load_dexcom, load_libre
+
 
 @pytest.fixture(scope="module")
 def test_data_paths():
@@ -136,7 +138,7 @@ def test_load_dexcom_glucose_statistics(test_data_paths):
     ts_01 = load_dexcom(str(test_data_paths['dexcom_eur_01']))
     ts_02 = load_dexcom(str(test_data_paths['dexcom_eur_02']))
     ts_03 = load_dexcom(str(test_data_paths['dexcom_eur_03']))
-    
+
     for ts in [ts_01, ts_02, ts_03]:
         # Convert to numeric for statistics
         numeric_values = pd.to_numeric(ts, errors='coerce').dropna()
@@ -186,7 +188,7 @@ def test_load_libre_time_interval(test_data_paths):
     expected_interval = pd.Timedelta(minutes=15)
     tolerance = pd.Timedelta(minutes=5)
     close_intervals = time_diffs[abs(time_diffs - expected_interval) <= tolerance]
-    assert len(close_intervals) / len(time_diffs) > 0.8  # At least 80% should be close 
+    assert len(close_intervals) / len(time_diffs) > 0.8  # At least 80% should be close
 
 def test_load_dexcom_time_interval(test_data_paths):
     timeseries = load_dexcom(str(test_data_paths['dexcom_eur_01']))
@@ -211,4 +213,4 @@ def test_load_dexcom_numeric_values(test_data_paths):
     # Check that all values are numeric
     assert pd.api.types.is_numeric_dtype(timeseries)
     # Check that there are no NaN values (all should be valid numbers)
-    assert not timeseries.isna().any()
+    assert not timeseries.isna().any()