Trybnetic · Trybnetic · Oct 9, 2022 · Oct 9, 2022 · Oct 9, 2022 · Oct 9, 2022
diff --git a/.gitignore b/.gitignore
@@ -18,3 +18,5 @@ source/
 poetry.lock
 .coverage
 coverage.xml
+
+*.backup
diff --git a/README.rst b/README.rst
@@ -50,6 +50,21 @@ implemented while others are still work in progress. The following code snippet
 should give you a brief overview and idea on how to use this package. Further
 examples and more information on the functions can be found in the documentation.
 
+.. code-block:: python
+
+    # Load data from file
+    data, sample_freq = paat.read_gt3x('path/to/gt3x/file')
+
+    # Annotate the acceleration data
+    data = paat.annotate(data, sample_freq)
+
+
+The annotation function is a convenience function that applies the standard 
+workflow. However, *paat* gives you the freedom to adjust your pipeline as you 
+wish and combine it with other packages if required. Each step in *paat* is 
+defined as an own function, so you can adjust everything. For example, here is 
+a more detailed annotation pipeline:
+
 .. code-block:: python
 
     # Load data from file

diff --git a/paat/__init__.py b/paat/__init__.py
@@ -22,25 +22,26 @@
 import platform
 from importlib import metadata
 
-from pip._vendor import pkg_resources
+import pkg_resources
 import toml
 
 from . import estimates, features, io, preprocessing, sleep, wear_time
 
 # Expose API functions
 from .estimates import calculate_pa_levels, create_activity_column
-from .features import calculate_actigraph_counts, calculate_vector_magnitude, calculate_brond_counts
+from .features import calculate_actigraph_counts, calculate_vector_magnitude, calculate_brond_counts, calculate_enmo
 from .io import read_gt3x
 from .sleep import detect_sleep_weitz2022, detect_sleep_triaxial_weitz2022, detect_time_in_bed_weitz2024
 from .wear_time import detect_non_wear_time_naive, detect_non_wear_time_hees2011, detect_non_wear_time_syed2021
+from .visualizations import visualize
+from .analysis import annotate, summary
 
 try:
     __version__ = metadata.version(__package__)
 except metadata.PackageNotFoundError:
     __version__ = toml.load("pyproject.toml")["tool"]["poetry"]["version"] + "dev"
 
 
-
 def sysinfo():
     """
     Prints system the dependency information

diff --git a/paat/analysis.py b/paat/analysis.py
@@ -0,0 +1,61 @@
+import pandas as pd 
+
+from .estimates import calculate_pa_levels, create_activity_column
+from .sleep import detect_sleep_weitz2022, detect_sleep_triaxial_weitz2022
+from .wear_time import detect_non_wear_time_syed2021
+
+
+def annotate(data, sample_freq):
+    """
+    Function to annotate the raw acceleration data. Performs the standard
+    pipeline of paat's functions.
+
+    Parameters
+    ----------
+    data : DataFrame
+        a DataFrame containg the raw acceleration data
+    sample_freq : int
+        the sampling frequency in which the data was recorded
+
+    Returns
+    -------
+    data : DataFrame
+        the raw acceleration data plus a new column holding the activity labels
+    """
+
+    # Detect non-wear time
+    data.loc[:, "Non Wear Time"] = detect_non_wear_time_syed2021(data, sample_freq)
+
+    # Detect sleep episodes
+    data.loc[:, "Time in bed"] = detect_sleep_triaxial_weitz2022(data, sample_freq)[:len(data)]
+
+    # Classify moderate-to-vigorous and sedentary behavior
+    data.loc[:, ["MVPA", "SB"]] = calculate_pa_levels(data, sample_freq)
+
+    # Merge the activity columns into one labelled column. columns indicates the
+    # importance of the columns, later names are more important and will be kept
+    data.loc[:, "Activity"] = create_activity_column(data, columns=["SB", "MVPA", "Time in bed", "Non Wear Time"])
+
+    # Remove the other columns after merging
+    return data[["X", "Y", "Z", "Activity"]]
+
+
+def summary(data, sample_freq):
+    """
+    Create a daily summary of the DataFrame
+
+    Parameters
+    ----------
+    data : DataFrame
+        a DataFrame containg the raw acceleration data
+    sample_freq : int
+        the sampling frequency in which the data was recorded
+    level : str
+        a string indicating to which level the data should be aggregated
+
+    Returns
+    -------
+    agg : DataFrame
+        the aggregated data holding the minutes spend in each activity
+    """
+    return pd.get_dummies(data["Activity"]).resample("D").sum() / (sample_freq * 60)
diff --git a/paat/features.py b/paat/features.py
@@ -22,7 +22,6 @@
                           -0.046015, 0.036283, -0.012977, -0.0046262, 0.012835, -0.0093762,
                           0.0034485, -0.00080972, -0.00019623])
 
-["Y", "X", "Z"]
 
 def calculate_vector_magnitude(data, minus_one=False, round_negative_to_zero=False, dtype=np.float32):
     r"""
@@ -37,7 +36,7 @@ def calculate_vector_magnitude(data, minus_one=False, round_negative_to_zero=Fal
     round_negative_to_zero : Boolean (optional)
         If set to True, round negative values to zero
     dtype : np.dtype (optional)
-        set the data type of the return array. Standard float 16, but can be set to better precision
+        set the data type of the return array. Standard float 32, but can be set to better precision
 
 
     Returns
@@ -78,6 +77,30 @@ def calculate_vector_magnitude(data, minus_one=False, round_negative_to_zero=Fal
     return vector_magnitude.reshape(data.shape[0], 1)
 
 
+def calculate_enmo(data, dtype=np.float32):
+    """
+    Calculate the Euclidean norm minus one from raw acceleration data.
+    This function is a wrapper of `calculate_vector_magnitude`.
+
+    Parameters
+    ----------
+    data : array_like
+        numpy array with acceleration data
+    dtype : np.dtype (optional)
+        set the data type of the return array. Standard float 32, but can be set to better precision
+
+
+    Returns
+    -------
+    vector_magnitude : np.array (acceleration values, 1)(np.float)
+       numpy array with vector magnitude of the acceleration
+    """
+    if isinstance(data, pd.DataFrame):
+        data = data[["Y", "X", "Z"]].values
+
+    return calculate_vector_magnitude(data, minus_one=True, round_negative_to_zero=True)
+
+
 def calculate_frequency_features(data, win_len=60, win_step=60, sample_rate=100, nfft=512, nfilt=40):
     """
     Calculate frequency features from raw acceleration signal.
@@ -210,14 +233,14 @@ def _calc_one_axis_brond_counts(acc, sample_freq, epoch_length, deadband=0.068,
         resampled_data = resampy.resample(acc, sample_freq, target_hz)
 
     # Step 1: Aliasing filter (0.01-7hz)
-    B2, A2 = signal.butter(4, np.array([0.01, 7])/(target_hz/2), btype='bandpass')
+    B2, A2 = signal.butter(4, np.array([0.01, 7]) / (target_hz / 2), btype='bandpass')
     dataf = signal.filtfilt(B2, A2, resampled_data)
 
     # Step 2: ActiGraph filter
     filtered = signal.lfilter(0.965 * B, A, dataf)
 
     # Step 3, 4 & 5: Downsample to 10hz, clip at peak (2.13g) and rectify
-    rectified = np.abs(np.clip(filtered[::3], a_min=-1*peak, a_max=peak))
+    rectified = np.abs(np.clip(filtered[::3], a_min=-1 * peak, a_max=peak))
 
     # Step 6 & 7: Dead-band and convert to 8bit resolution
     downsampled = np.where(rectified < deadband, 0, rectified) // adcResolution
@@ -262,7 +285,6 @@ def calculate_brond_counts(data, sample_freq, epoch_length):
     if isinstance(epoch_length, str):
         epoch_length = pd.Timedelta(epoch_length).seconds
 
-
     counts = pd.DataFrame({"Y": _calc_one_axis_brond_counts(data["Y"].values, sample_freq, epoch_length),
                            "X": _calc_one_axis_brond_counts(data["X"].values, sample_freq, epoch_length),
                            "Z": _calc_one_axis_brond_counts(data["Z"].values, sample_freq, epoch_length)},
@@ -293,6 +315,6 @@ def calculate_actigraph_counts(data, sample_freq, epoch_length):
     sec_per_epoch = pd.Timedelta(epoch_length).seconds
 
     counts = get_counts(data[["Y", "X", "Z"]].values, sample_freq, sec_per_epoch)
-    index = data.resample(epoch_length).mean().index
+    index = data.resample(epoch_length).mean(numeric_only=True).index
     counts = pd.DataFrame(counts, columns=["Y", "X", "Z"], index=index[:len(counts)])
     return counts
diff --git a/paat/visualizations.py b/paat/visualizations.py
@@ -0,0 +1,67 @@
+import numpy as np
+import pandas as pd
+import matplotlib.pyplot as plt
+from matplotlib.patches import Rectangle, Patch
+import seaborn as sns
+
+from .features import calculate_enmo
+
+COLOR = {'Non Wear Time': "grey", 'Time in bed': "blue", 'SB': "green", 'LPA': "yellow", 'MVPA': "red"}
+
+
+def visualize(data, show_date=False, title=None, file_path=None):
+
+    data["ENMO"] = calculate_enmo(data).copy()
+    data = data.resample("1min").apply({"X": "mean", "Y": "mean", "Z": "mean", "ENMO": "mean", "Activity": lambda x: x.value_counts().idxmax()})
+
+    n_days = len(data.groupby(data.index.day))
+    ymax = data["ENMO"].max() * 1.05
+    min_per_day = 1440
+
+    fig = plt.figure(figsize=(8.27, 11.69)) #figsize=(10, n_days * 3))
+    axes = fig.subplots(n_days, 1, sharex=True)
+    plt.subplots_adjust(hspace=.3)
+
+    if title:
+        fig.suptitle(title, fontsize=14)
+
+    for ii, (_, day) in enumerate(data.groupby(data.index.day)):
+
+        offset = (day.index[0] - pd.Timestamp(day.index[0].date())).total_seconds() // 60
+        day["Time"] = np.arange(offset, offset + len(day))
+
+        ax = axes[ii]
+
+        if show_date:
+            ax.set_title(day.index[0].strftime('%A, %d.%m.%Y'), fontsize=12)
+
+        ax = sns.lineplot(x="Time", y="ENMO", data=day, color="black", linewidth=.75, ax=ax)
+        ax.set_ylabel("ENMO", fontsize=10)
+        ax.set_ylim((0, ymax))
+        ax.set_xlim((0, min_per_day))
+
+        ymin, ymax = ax.get_ylim()
+        height = ymax - ymin
+
+        # Add background
+        for _, row in day.iterrows():
+            ax.add_patch(Rectangle((row["Time"], ymin), 1, height, alpha=.1, facecolor=COLOR[row["Activity"]]))
+
+        ticks, labels = list(zip(*[(tick, f"{tick // 60:0>2}:{tick % 60:0>2}") for tick in range(min_per_day) if (tick + 60) % 120 == 0]))
+        ax.set_xticks(ticks)
+        ax.set_xticklabels("")
+        ax.set_xlabel("")
+
+    ax.set_xticklabels(labels)
+    ax.set_xlabel("Time", fontsize=10)
+
+    # Add legend to last plot
+    #ax = axes[-1]
+    handles = [Patch(color=value, label=key, alpha=.1) for key, value in COLOR.items()]
+    ax.legend(handles=handles, loc='upper center', bbox_to_anchor=(0.5, -.9 * ymax), fancybox=False, shadow=False, ncol=5)
+    #ax.set_axis_off()
+
+    if file_path:
+        plt.savefig(file_path, dpi=300)
+    else:
+        plt.show()
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "paat"
-version = "1.0.0b4"
+version = "1.0.0b5"
 description = "A comprehensive toolbox to analyse and model raw physical activity data"
 
 license = "MIT"
@@ -40,6 +40,8 @@ torch = "^1.10.1"
 agcounts = "^0.1.1"
 toml = "^0.10.2"
 tables = "^3.7.0"
+matplotlib = "^3.6.1"
+seaborn = "^0.13.2"
 
 [tool.poetry.extras]
 docs = ["sphinx", "sphinx_rtd_theme", "numpydoc", "easydev", "nbsphinx", "docutils"]
@@ -53,7 +55,6 @@ pytest-cov = "^4.1.0"
 pycodestyle = "^2.11.1"
 flake8 = "^7.0.0"
 notebook = "^7.0.7"
-seaborn = "^0.13.2"
 numpydoc = "^1.6.0"
 easydev = "^0.12.1"
 pylint = "^3.0.3"
@@ -72,6 +73,10 @@ jinja2 = "<3.1.0"
   [tool.pylint.message_control]
   disable = ["E1101", "C0330", "C0326"]
 
+[tool.pytest.ini_options]
+markers = [
+    "slow: marks tests as slow (deselect with '-m \"not slow\"')",
+]
 
 [build-system]
 requires = ["poetry-core>=1.0.0"]

diff --git a/tests/test_debug.py b/tests/test_debug.py
@@ -1,4 +1,5 @@
 import paat
 
+
 def test_sysinfo():
     paat.sysinfo()
diff --git a/tests/test_io.py b/tests/test_io.py
@@ -11,7 +11,8 @@
 
 @pytest.fixture
 def unscaled_data(file_path):
-    return io.read_gt3x(file_path, rescale=False, pandas=True)
+    acc, _ = io.read_gt3x(file_path, rescale=False, pandas=True)
+    return acc
 
 
 def test_loading_data(file_path, load_gt3x_file):
@@ -27,7 +28,6 @@ def test_against_actigraph_implementation(file_path, unscaled_data):
         ref = reader.to_pandas()
 
     npt.assert_almost_equal(unscaled_data[["X", "Y", "Z"]].values, ref[["X", "Y", "Z"]].values)
-    #assert np.allclose(data[["X", "Y", "Z"]].values, ref[["X", "Y", "Z"]].values)
 
 
 def test_exceptions():

diff --git a/tests/test_pipeline.py → tests/test_readme.py b/tests/test_pipeline.py → tests/test_readme.py
@@ -8,13 +8,29 @@
 FILE_PATH_SIMPLE = os.path.join(TEST_ROOT, 'resources/10min_recording.gt3x')
 
 
-def test_pipeline():
+def test_simple_example():
     """
     Test the Quickstart/Readme processing pipeline
     """
     # Load data from file
     data, sample_freq = paat.read_gt3x(FILE_PATH_SIMPLE)
 
+    # Annotate the acceleration data
+    data = paat.annotate(data, sample_freq)
+
+    # Get ActiLife counts
+    counts = paat.calculate_actigraph_counts(data, sample_freq, "10s")
+    data.loc[:, ["Y_counts", "X_counts", "Z_counts"]] = counts
+
+
+def test_advanced_example():
+    """
+    Test the advanced example from the Readme
+    """
+
+    # Load data from file
+    data, sample_freq = paat.read_gt3x(FILE_PATH_SIMPLE)
+
     # Detect non-wear time
     data.loc[:, "Non Wear Time"] = paat.detect_non_wear_time_syed2021(data, sample_freq)
 
@@ -24,12 +40,9 @@ def test_pipeline():
     # Classify moderate-to-vigorous and sedentary behavior
     data.loc[:, ["MVPA", "SB"]] = paat.calculate_pa_levels(data, sample_freq)
 
-    # Merge the activity columns into one labelled column
-    data.loc[:, "Activity"] = paat.create_activity_column(data, columns=["Non Wear Time", "Sleep", "MVPA", "SB"])
+    # Merge the activity columns into one labelled column. columns indicates the
+    # importance of the columns, later names are more important and will be kept
+    data.loc[:, "Activity"] = paat.create_activity_column(data, columns=["SB", "MVPA", "Sleep", "Non Wear Time"])
 
     # Remove the other columns after merging
-    data =  data[["X", "Y", "Z", "Activity"]]
-
-    # Get ActiLife counts
-    counts = paat.calculate_actigraph_counts(data, sample_freq, "10s")
-    data.loc[:, ["Y_counts", "X_counts", "Z_counts"]] = counts
+    data =  data[["X", "Y", "Z", "Activity"]]