diff --git a/.gitignore b/.gitignore index e646466..48f6fc2 100644 --- a/.gitignore +++ b/.gitignore @@ -18,3 +18,5 @@ source/ poetry.lock .coverage coverage.xml + +*.backup \ No newline at end of file diff --git a/README.rst b/README.rst index 257d7dc..6a11ad8 100644 --- a/README.rst +++ b/README.rst @@ -50,6 +50,21 @@ implemented while others are still work in progress. The following code snippet should give you a brief overview and idea on how to use this package. Further examples and more information on the functions can be found in the documentation. +.. code-block:: python + + # Load data from file + data, sample_freq = paat.read_gt3x('path/to/gt3x/file') + + # Annotate the acceleration data + data = paat.annotate(data, sample_freq) + + +The annotation function is a convenience function that applies the standard +workflow. However, *paat* gives you the freedom to adjust your pipeline as you +wish and combine it with other packages if required. Each step in *paat* is +defined as an own function, so you can adjust everything. For example, here is +a more detailed annotation pipeline: + .. code-block:: python # Load data from file diff --git a/paat/__init__.py b/paat/__init__.py index d893567..4c8f496 100644 --- a/paat/__init__.py +++ b/paat/__init__.py @@ -22,17 +22,19 @@ import platform from importlib import metadata -from pip._vendor import pkg_resources +import pkg_resources import toml from . import estimates, features, io, preprocessing, sleep, wear_time # Expose API functions from .estimates import calculate_pa_levels, create_activity_column -from .features import calculate_actigraph_counts, calculate_vector_magnitude, calculate_brond_counts +from .features import calculate_actigraph_counts, calculate_vector_magnitude, calculate_brond_counts, calculate_enmo from .io import read_gt3x from .sleep import detect_sleep_weitz2022, detect_sleep_triaxial_weitz2022, detect_time_in_bed_weitz2024 from .wear_time import detect_non_wear_time_naive, detect_non_wear_time_hees2011, detect_non_wear_time_syed2021 +from .visualizations import visualize +from .analysis import annotate, summary try: __version__ = metadata.version(__package__) @@ -40,7 +42,6 @@ __version__ = toml.load("pyproject.toml")["tool"]["poetry"]["version"] + "dev" - def sysinfo(): """ Prints system the dependency information diff --git a/paat/analysis.py b/paat/analysis.py new file mode 100644 index 0000000..3ba6fd8 --- /dev/null +++ b/paat/analysis.py @@ -0,0 +1,61 @@ +import pandas as pd + +from .estimates import calculate_pa_levels, create_activity_column +from .sleep import detect_sleep_weitz2022, detect_sleep_triaxial_weitz2022 +from .wear_time import detect_non_wear_time_syed2021 + + +def annotate(data, sample_freq): + """ + Function to annotate the raw acceleration data. Performs the standard + pipeline of paat's functions. + + Parameters + ---------- + data : DataFrame + a DataFrame containg the raw acceleration data + sample_freq : int + the sampling frequency in which the data was recorded + + Returns + ------- + data : DataFrame + the raw acceleration data plus a new column holding the activity labels + """ + + # Detect non-wear time + data.loc[:, "Non Wear Time"] = detect_non_wear_time_syed2021(data, sample_freq) + + # Detect sleep episodes + data.loc[:, "Time in bed"] = detect_sleep_triaxial_weitz2022(data, sample_freq)[:len(data)] + + # Classify moderate-to-vigorous and sedentary behavior + data.loc[:, ["MVPA", "SB"]] = calculate_pa_levels(data, sample_freq) + + # Merge the activity columns into one labelled column. columns indicates the + # importance of the columns, later names are more important and will be kept + data.loc[:, "Activity"] = create_activity_column(data, columns=["SB", "MVPA", "Time in bed", "Non Wear Time"]) + + # Remove the other columns after merging + return data[["X", "Y", "Z", "Activity"]] + + +def summary(data, sample_freq): + """ + Create a daily summary of the DataFrame + + Parameters + ---------- + data : DataFrame + a DataFrame containg the raw acceleration data + sample_freq : int + the sampling frequency in which the data was recorded + level : str + a string indicating to which level the data should be aggregated + + Returns + ------- + agg : DataFrame + the aggregated data holding the minutes spend in each activity + """ + return pd.get_dummies(data["Activity"]).resample("D").sum() / (sample_freq * 60) \ No newline at end of file diff --git a/paat/features.py b/paat/features.py index f1a5268..15e3057 100644 --- a/paat/features.py +++ b/paat/features.py @@ -22,7 +22,6 @@ -0.046015, 0.036283, -0.012977, -0.0046262, 0.012835, -0.0093762, 0.0034485, -0.00080972, -0.00019623]) -["Y", "X", "Z"] def calculate_vector_magnitude(data, minus_one=False, round_negative_to_zero=False, dtype=np.float32): r""" @@ -37,7 +36,7 @@ def calculate_vector_magnitude(data, minus_one=False, round_negative_to_zero=Fal round_negative_to_zero : Boolean (optional) If set to True, round negative values to zero dtype : np.dtype (optional) - set the data type of the return array. Standard float 16, but can be set to better precision + set the data type of the return array. Standard float 32, but can be set to better precision Returns @@ -78,6 +77,30 @@ def calculate_vector_magnitude(data, minus_one=False, round_negative_to_zero=Fal return vector_magnitude.reshape(data.shape[0], 1) +def calculate_enmo(data, dtype=np.float32): + """ + Calculate the Euclidean norm minus one from raw acceleration data. + This function is a wrapper of `calculate_vector_magnitude`. + + Parameters + ---------- + data : array_like + numpy array with acceleration data + dtype : np.dtype (optional) + set the data type of the return array. Standard float 32, but can be set to better precision + + + Returns + ------- + vector_magnitude : np.array (acceleration values, 1)(np.float) + numpy array with vector magnitude of the acceleration + """ + if isinstance(data, pd.DataFrame): + data = data[["Y", "X", "Z"]].values + + return calculate_vector_magnitude(data, minus_one=True, round_negative_to_zero=True) + + def calculate_frequency_features(data, win_len=60, win_step=60, sample_rate=100, nfft=512, nfilt=40): """ Calculate frequency features from raw acceleration signal. @@ -210,14 +233,14 @@ def _calc_one_axis_brond_counts(acc, sample_freq, epoch_length, deadband=0.068, resampled_data = resampy.resample(acc, sample_freq, target_hz) # Step 1: Aliasing filter (0.01-7hz) - B2, A2 = signal.butter(4, np.array([0.01, 7])/(target_hz/2), btype='bandpass') + B2, A2 = signal.butter(4, np.array([0.01, 7]) / (target_hz / 2), btype='bandpass') dataf = signal.filtfilt(B2, A2, resampled_data) # Step 2: ActiGraph filter filtered = signal.lfilter(0.965 * B, A, dataf) # Step 3, 4 & 5: Downsample to 10hz, clip at peak (2.13g) and rectify - rectified = np.abs(np.clip(filtered[::3], a_min=-1*peak, a_max=peak)) + rectified = np.abs(np.clip(filtered[::3], a_min=-1 * peak, a_max=peak)) # Step 6 & 7: Dead-band and convert to 8bit resolution downsampled = np.where(rectified < deadband, 0, rectified) // adcResolution @@ -262,7 +285,6 @@ def calculate_brond_counts(data, sample_freq, epoch_length): if isinstance(epoch_length, str): epoch_length = pd.Timedelta(epoch_length).seconds - counts = pd.DataFrame({"Y": _calc_one_axis_brond_counts(data["Y"].values, sample_freq, epoch_length), "X": _calc_one_axis_brond_counts(data["X"].values, sample_freq, epoch_length), "Z": _calc_one_axis_brond_counts(data["Z"].values, sample_freq, epoch_length)}, @@ -293,6 +315,6 @@ def calculate_actigraph_counts(data, sample_freq, epoch_length): sec_per_epoch = pd.Timedelta(epoch_length).seconds counts = get_counts(data[["Y", "X", "Z"]].values, sample_freq, sec_per_epoch) - index = data.resample(epoch_length).mean().index + index = data.resample(epoch_length).mean(numeric_only=True).index counts = pd.DataFrame(counts, columns=["Y", "X", "Z"], index=index[:len(counts)]) return counts diff --git a/paat/visualizations.py b/paat/visualizations.py new file mode 100644 index 0000000..3447b34 --- /dev/null +++ b/paat/visualizations.py @@ -0,0 +1,67 @@ +import numpy as np +import pandas as pd +import matplotlib.pyplot as plt +from matplotlib.patches import Rectangle, Patch +import seaborn as sns + +from .features import calculate_enmo + +COLOR = {'Non Wear Time': "grey", 'Time in bed': "blue", 'SB': "green", 'LPA': "yellow", 'MVPA': "red"} + + +def visualize(data, show_date=False, title=None, file_path=None): + + data["ENMO"] = calculate_enmo(data).copy() + data = data.resample("1min").apply({"X": "mean", "Y": "mean", "Z": "mean", "ENMO": "mean", "Activity": lambda x: x.value_counts().idxmax()}) + + n_days = len(data.groupby(data.index.day)) + ymax = data["ENMO"].max() * 1.05 + min_per_day = 1440 + + fig = plt.figure(figsize=(8.27, 11.69)) #figsize=(10, n_days * 3)) + axes = fig.subplots(n_days, 1, sharex=True) + plt.subplots_adjust(hspace=.3) + + if title: + fig.suptitle(title, fontsize=14) + + for ii, (_, day) in enumerate(data.groupby(data.index.day)): + + offset = (day.index[0] - pd.Timestamp(day.index[0].date())).total_seconds() // 60 + day["Time"] = np.arange(offset, offset + len(day)) + + ax = axes[ii] + + if show_date: + ax.set_title(day.index[0].strftime('%A, %d.%m.%Y'), fontsize=12) + + ax = sns.lineplot(x="Time", y="ENMO", data=day, color="black", linewidth=.75, ax=ax) + ax.set_ylabel("ENMO", fontsize=10) + ax.set_ylim((0, ymax)) + ax.set_xlim((0, min_per_day)) + + ymin, ymax = ax.get_ylim() + height = ymax - ymin + + # Add background + for _, row in day.iterrows(): + ax.add_patch(Rectangle((row["Time"], ymin), 1, height, alpha=.1, facecolor=COLOR[row["Activity"]])) + + ticks, labels = list(zip(*[(tick, f"{tick // 60:0>2}:{tick % 60:0>2}") for tick in range(min_per_day) if (tick + 60) % 120 == 0])) + ax.set_xticks(ticks) + ax.set_xticklabels("") + ax.set_xlabel("") + + ax.set_xticklabels(labels) + ax.set_xlabel("Time", fontsize=10) + + # Add legend to last plot + #ax = axes[-1] + handles = [Patch(color=value, label=key, alpha=.1) for key, value in COLOR.items()] + ax.legend(handles=handles, loc='upper center', bbox_to_anchor=(0.5, -.9 * ymax), fancybox=False, shadow=False, ncol=5) + #ax.set_axis_off() + + if file_path: + plt.savefig(file_path, dpi=300) + else: + plt.show() diff --git a/pyproject.toml b/pyproject.toml index 77b6524..61ec8cd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "paat" -version = "1.0.0b4" +version = "1.0.0b5" description = "A comprehensive toolbox to analyse and model raw physical activity data" license = "MIT" @@ -40,6 +40,8 @@ torch = "^1.10.1" agcounts = "^0.1.1" toml = "^0.10.2" tables = "^3.7.0" +matplotlib = "^3.6.1" +seaborn = "^0.13.2" [tool.poetry.extras] docs = ["sphinx", "sphinx_rtd_theme", "numpydoc", "easydev", "nbsphinx", "docutils"] @@ -53,7 +55,6 @@ pytest-cov = "^4.1.0" pycodestyle = "^2.11.1" flake8 = "^7.0.0" notebook = "^7.0.7" -seaborn = "^0.13.2" numpydoc = "^1.6.0" easydev = "^0.12.1" pylint = "^3.0.3" @@ -72,6 +73,10 @@ jinja2 = "<3.1.0" [tool.pylint.message_control] disable = ["E1101", "C0330", "C0326"] +[tool.pytest.ini_options] +markers = [ + "slow: marks tests as slow (deselect with '-m \"not slow\"')", +] [build-system] requires = ["poetry-core>=1.0.0"] diff --git a/tests/test_debug.py b/tests/test_debug.py index de9cd13..f83d529 100644 --- a/tests/test_debug.py +++ b/tests/test_debug.py @@ -1,4 +1,5 @@ import paat + def test_sysinfo(): paat.sysinfo() diff --git a/tests/test_io.py b/tests/test_io.py index 71fb1c5..58a5446 100644 --- a/tests/test_io.py +++ b/tests/test_io.py @@ -11,7 +11,8 @@ @pytest.fixture def unscaled_data(file_path): - return io.read_gt3x(file_path, rescale=False, pandas=True) + acc, _ = io.read_gt3x(file_path, rescale=False, pandas=True) + return acc def test_loading_data(file_path, load_gt3x_file): @@ -27,7 +28,6 @@ def test_against_actigraph_implementation(file_path, unscaled_data): ref = reader.to_pandas() npt.assert_almost_equal(unscaled_data[["X", "Y", "Z"]].values, ref[["X", "Y", "Z"]].values) - #assert np.allclose(data[["X", "Y", "Z"]].values, ref[["X", "Y", "Z"]].values) def test_exceptions(): diff --git a/tests/test_pipeline.py b/tests/test_readme.py similarity index 64% rename from tests/test_pipeline.py rename to tests/test_readme.py index 5df2632..620782e 100644 --- a/tests/test_pipeline.py +++ b/tests/test_readme.py @@ -8,13 +8,29 @@ FILE_PATH_SIMPLE = os.path.join(TEST_ROOT, 'resources/10min_recording.gt3x') -def test_pipeline(): +def test_simple_example(): """ Test the Quickstart/Readme processing pipeline """ # Load data from file data, sample_freq = paat.read_gt3x(FILE_PATH_SIMPLE) + # Annotate the acceleration data + data = paat.annotate(data, sample_freq) + + # Get ActiLife counts + counts = paat.calculate_actigraph_counts(data, sample_freq, "10s") + data.loc[:, ["Y_counts", "X_counts", "Z_counts"]] = counts + + +def test_advanced_example(): + """ + Test the advanced example from the Readme + """ + + # Load data from file + data, sample_freq = paat.read_gt3x(FILE_PATH_SIMPLE) + # Detect non-wear time data.loc[:, "Non Wear Time"] = paat.detect_non_wear_time_syed2021(data, sample_freq) @@ -24,12 +40,9 @@ def test_pipeline(): # Classify moderate-to-vigorous and sedentary behavior data.loc[:, ["MVPA", "SB"]] = paat.calculate_pa_levels(data, sample_freq) - # Merge the activity columns into one labelled column - data.loc[:, "Activity"] = paat.create_activity_column(data, columns=["Non Wear Time", "Sleep", "MVPA", "SB"]) + # Merge the activity columns into one labelled column. columns indicates the + # importance of the columns, later names are more important and will be kept + data.loc[:, "Activity"] = paat.create_activity_column(data, columns=["SB", "MVPA", "Sleep", "Non Wear Time"]) # Remove the other columns after merging - data = data[["X", "Y", "Z", "Activity"]] - - # Get ActiLife counts - counts = paat.calculate_actigraph_counts(data, sample_freq, "10s") - data.loc[:, ["Y_counts", "X_counts", "Z_counts"]] = counts + data = data[["X", "Y", "Z", "Activity"]] \ No newline at end of file