From 2f1670af1e9f8888dae43a794f3e934a52ce41da Mon Sep 17 00:00:00 2001
From: Ryan Harvey <ryanh412@gmail.com>
Date: Wed, 16 Oct 2024 16:35:02 -0400
Subject: [PATCH] docs and typing

---
 neuro_py/ensemble/explained_variance.py |  39 +-
 neuro_py/ensemble/similarity_index.py   |  21 +-
 neuro_py/ensemble/similaritymat.py      |  53 +-
 neuro_py/io/loading.py                  | 871 +++++++++++++++++-------
 4 files changed, 678 insertions(+), 306 deletions(-)

diff --git a/neuro_py/ensemble/explained_variance.py b/neuro_py/ensemble/explained_variance.py
index 3b08de1..d79bcc7 100644
--- a/neuro_py/ensemble/explained_variance.py
+++ b/neuro_py/ensemble/explained_variance.py
@@ -86,32 +86,31 @@ class ExplainedVariance(object):
 
     # Most simple case, returns single explained variance value
     >>> expvar = explained_variance.ExplainedVariance(
-            st=st,
-            template=beh_epochs[1],
-            matching=beh_epochs[2],
-            control=beh_epochs[0],
-            window=None,
-        )
+    >>>        st=st,
+    >>>        template=beh_epochs[1],
+    >>>        matching=beh_epochs[2],
+    >>>        control=beh_epochs[0],
+    >>>        window=None,
+    >>>    )
 
     # Get time resolved explained variance across entire session in 200sec bins
     >>> expvar = explained_variance.ExplainedVariance(
-            st=st,
-            template=beh_epochs[1],
-            matching=nel.EpochArray([beh_epochs.start, beh_epochs.stop]),
-            control=beh_epochs[0],
-            window=200
-        )
+    >>>        st=st,
+    >>>        template=beh_epochs[1],
+    >>>        matching=nel.EpochArray([beh_epochs.start, beh_epochs.stop]),
+    >>>        control=beh_epochs[0],
+    >>>        window=200
+    >>>    )
 
     # Get time resolved explained variance across entire session in 200sec bins sliding by 100sec
     >>> expvar = explained_variance.ExplainedVariance(
-            st=st,
-            template=beh_epochs[1],
-            matching=nel.EpochArray([beh_epochs.start, beh_epochs.stop]),
-            control=beh_epochs[0],
-            window=200,
-            slideby=100
-        )
-
+    >>>        st=st,
+    >>>        template=beh_epochs[1],
+    >>>        matching=nel.EpochArray([beh_epochs.start, beh_epochs.stop]),
+    >>>        control=beh_epochs[0],
+    >>>        window=200,
+    >>>        slideby=100
+    >>>    )
     """
 
     def __init__(
diff --git a/neuro_py/ensemble/similarity_index.py b/neuro_py/ensemble/similarity_index.py
index 7aa38fb..98e2e1d 100644
--- a/neuro_py/ensemble/similarity_index.py
+++ b/neuro_py/ensemble/similarity_index.py
@@ -1,5 +1,6 @@
 import itertools
 import multiprocessing
+from typing import Tuple
 
 import numpy as np
 from joblib import Parallel, delayed
@@ -7,7 +8,9 @@
 from neuro_py.stats.stats import get_significant_events
 
 
-def similarity_index(patterns, n_shuffles=1000, parallel=True):
+def similarity_index(
+    patterns: np.ndarray, n_shuffles: int = 1000, parallel: bool = True
+) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
     """
     Calculate the similarity index of a set of patterns.
 
@@ -20,10 +23,18 @@ def similarity_index(patterns, n_shuffles=1000, parallel=True):
     attribute large weights to the same neurons, SI will be large;
     if assemblies are orthogonal, SI will be zero.
 
-    Input:
-        patterns: list of patterns (n patterns x n neurons)
-        n_shuffles: number of shuffles to calculate the similarity index
-    Output:
+    Parameters
+    ----------
+    patterns : np.ndarray
+        List of patterns (n patterns x n neurons).
+    n_shuffles : int, optional
+        Number of shuffles to calculate the similarity index, by default 1000.
+    parallel : bool, optional
+        Whether to run in parallel, by default True.
+
+    Returns
+    -------
+    Tuple[np.ndarray, np.ndarray, np.ndarray]
         si: similarity index: float (0-1)
         combos: list of all possible combinations of patterns
         pvalues: list of p-values for each pattern combination
diff --git a/neuro_py/ensemble/similaritymat.py b/neuro_py/ensemble/similaritymat.py
index 5853119..0d24852 100644
--- a/neuro_py/ensemble/similaritymat.py
+++ b/neuro_py/ensemble/similaritymat.py
@@ -1,28 +1,39 @@
+from typing import Optional, Tuple, Union
+
 import numpy as np
+import scipy.optimize as optimize
 from sklearn.metrics.pairwise import cosine_similarity as getsim
 
 
-def similaritymat(patternsX, patternsY=None, method="cosine", findpairs=False):
+def similaritymat(
+    patternsX: np.ndarray,
+    patternsY: Optional[np.ndarray] = None,
+    method: str = "cosine",
+    findpairs: bool = False,
+) -> Union[np.ndarray, Tuple[np.ndarray, np.ndarray, np.ndarray]]:
     """
-    INPUTS
-
-        patternsX:     co-activation patterns (assemblies)
-                                    - numpy array (assemblies, neurons)
-        patternsY:     co-activation patterns (assemblies)
-                                    - numpy array (assemblies, neurons)
-                                    - if None, will compute similarity
-                                                of patternsX to itself
-
-        method:        defines similarity measure method
-                                    'cosine' - cosine similarity
-        findpairs:     maximizes main diagonal of the sim matrix to define pairs
-                                                from patterns X and Y
-                                    returns rowind,colind which can be used to reorder
-                                                patterns X and Y to maximize the diagonal
-    OUTPUTS
-
-        simmat:        similarity matrix
-                                    - array (assemblies from X, assemblies from Y)
+    Calculate the similarity matrix of co-activation patterns (assemblies).
+
+    Parameters
+    ----------
+    patternsX : np.ndarray
+        Co-activation patterns (assemblies) - numpy array (assemblies, neurons).
+    patternsY : Optional[np.ndarray], optional
+        Co-activation patterns (assemblies) - numpy array (assemblies, neurons).
+        If None, will compute similarity of patternsX to itself, by default None.
+    method : str, optional
+        Defines similarity measure method, by default 'cosine'.
+        'cosine' - cosine similarity.
+    findpairs : bool, optional
+        Maximizes main diagonal of the similarity matrix to define pairs
+        from patterns X and Y. Returns rowind, colind which can be used to reorder
+        patterns X and Y to maximize the diagonal, by default False.
+
+    Returns
+    -------
+    Union[np.ndarray, Tuple[np.ndarray, np.ndarray, np.ndarray]]
+        Similarity matrix (assemblies from X, assemblies from Y).
+        If findpairs is True, also returns rowind and colind.
     """
 
     if method != "cosine":
@@ -39,8 +50,6 @@ def fillmissingidxs(ind, n):
             ind = np.array(list(ind) + missing)
             return ind
 
-        import scipy.optimize as optimize
-
         rowind, colind = optimize.linear_sum_assignment(-simmat)
 
         rowind = fillmissingidxs(rowind, np.size(simmat, 0))
diff --git a/neuro_py/io/loading.py b/neuro_py/io/loading.py
index 26d0730..864f169 100644
--- a/neuro_py/io/loading.py
+++ b/neuro_py/io/loading.py
@@ -1,11 +1,12 @@
 """ Loading functions for cell explorer format"""
+
 import glob
 import multiprocessing
 import os
 import sys
 import warnings
 from itertools import chain
-from typing import List, Union
+from typing import Any, List, Tuple, Union, Dict
 from xml.dom import minidom
 
 import nelpy as nel
@@ -20,18 +21,28 @@
 from neuro_py.process.peri_event import get_participation
 
 
-def loadXML(basepath: str):
+def loadXML(basepath: str) -> Union[Tuple[int, int, int, Dict[int, list]], None]:
     """
-    path should be the folder session containing the XML file
-    Function returns :
-        1. the number of channels
-        2. the sampling frequency of the dat file or the eeg file depending of what is present in the folder
-            eeg file first if both are present or both are absent
-        3. the mappings shanks to channels as a dict
-    Args:
-        path : string
-    Returns:
-        int, int, dict
+    Load XML file and extract relevant information.
+
+    Parameters
+    ----------
+    basepath : str
+        Path to the folder session containing the XML file.
+
+    Returns
+    -------
+    Union[Tuple[int, int, int, Dict[int, list]], None]
+        A tuple containing:
+        - The number of channels (int)
+        - The sampling frequency of the dat file (int)
+        - The sampling frequency of the eeg file (int)
+        - The mappings shanks to channels as a dict (Dict[int, list])
+
+    Notes
+    -----
+    If both dat and eeg files are present, eeg file is prioritized.
+    If neither are present, returns None.
 
     by Guillaume Viejo
     """
@@ -81,10 +92,40 @@ def loadLFP(
     frequency: float = 1250.0,
     precision: str = "int16",
     ext: str = "lfp",
-    filename: str = None, # name of file to load, located in basepath
+    filename: str = None,  # name of file to load, located in basepath
 ):
+    """
+    Load LFP data from a specified file.
+
+    Parameters
+    ----------
+    basepath : str
+        Path to the folder containing the LFP file.
+    n_channels : int, optional
+        Number of channels, by default 90.
+    channel : Optional[Union[int, list]], optional
+        Specific channel(s) to load, by default None.
+    frequency : float, optional
+        Sampling frequency, by default 1250.0.
+    precision : str, optional
+        Data precision, by default "int16".
+    ext : str, optional
+        File extension, by default "lfp".
+    filename : Optional[str], optional
+        Name of the file to load, located in basepath, by default None.
+
+    Returns
+    -------
+    Optional[Tuple[np.ndarray, np.ndarray]]
+        Data and corresponding timestamps.
+
+    Notes
+    -----
+    If both .lfp and .eeg files are present, .lfp file is prioritized.
+    If neither are present, returns None.
+    """
     if filename is not None:
-        path = os.path.join(basepath,filename)
+        path = os.path.join(basepath, filename)
     else:
         path = ""
         if ext == "lfp":
@@ -152,32 +193,39 @@ def loadLFP(
 
 class LFPLoader(object):
     """
-    Simple class to load LFP or wideband data from a recording folder
-    Args:
-        basepath : string (path to the recording folder)
-        channels : int or list of int or None (default None, load all channels memmap)
-        ext : string (lfp or dat)
-        epoch: nelpy EpochArray or ndarray (default None, load all data)
-    Returns:
-        : nelpy analogsignalarray of shape (n_channels, n_samples)
+    Simple class to load LFP or wideband data from a recording folder.
 
-    Example:
-        # load lfp file
-        >>> basepath = r"X:/data/Barrage/NN10/day10"
-        >>> lfp = loading.LFPLoader(basepath,ext="lfp")
-        >>> lfp
-        >>>    <AnalogSignalArray at 0x25ba1576640: 128 signals> for a total of 5:33:58:789 hours
+    Parameters
+    ----------
+    basepath : str
+        Path to the recording folder.
+    channels : Union[int, list, None], optional
+        Channel number or list of channel numbers, by default None (load all channels memmap).
+    ext : str, optional
+        File extension, by default "lfp".
+    epoch : Union[np.ndarray, nel.EpochArray, None], optional
+        Epoch array or ndarray, by default None (load all data).
 
-        # Loading dat file
-        >>> dat = loading.LFPLoader(basepath,ext="dat")
-        >>> dat
-        >>>    <AnalogSignalArray at 0x25ba4fedc40: 128 signals> for a total of 5:33:58:790 hours
-        >>> dat.lfp.data.shape
-        >>>    (128, 400775808)
-        >>> type(dat.lfp.data)
-        >>>    numpy.memmap
+    Returns
+    -------
+    nelpy.AnalogSignalArray
+        Analog signal array of shape (n_channels, n_samples).
 
-    Ryan Harvey 2023
+    Example:
+    # load lfp file
+    >>> basepath = r"X:/data/Barrage/NN10/day10"
+    >>> lfp = loading.LFPLoader(basepath,ext="lfp")
+    >>> lfp
+    >>>    <AnalogSignalArray at 0x25ba1576640: 128 signals> for a total of 5:33:58:789 hours
+
+    # Loading dat file
+    >>> dat = loading.LFPLoader(basepath,ext="dat")
+    >>> dat
+    >>>    <AnalogSignalArray at 0x25ba4fedc40: 128 signals> for a total of 5:33:58:790 hours
+    >>> dat.lfp.data.shape
+    >>>    (128, 400775808)
+    >>> type(dat.lfp.data)
+    >>>    numpy.memmap
     """
 
     def __init__(
@@ -202,14 +250,14 @@ def __init__(
         # load lfp
         self.load_lfp()
 
-    def get_xml_data(self):
+    def get_xml_data(self) -> None:
         nChannels, fs, fs_dat, shank_to_channel = loadXML(self.basepath)
         self.nChannels = nChannels
         self.fs = fs
         self.fs_dat = fs_dat
         self.shank_to_channel = shank_to_channel
 
-    def load_lfp(self):
+    def load_lfp(self) -> None:
         lfp, timestep = loadLFP(
             self.basepath,
             n_channels=self.nChannels,
@@ -250,24 +298,44 @@ def load_lfp(self):
     def __repr__(self) -> None:
         return self.lfp.__repr__()
 
-    def get_phase(self, band2filter: list = [6, 12], ford=3):
+    def get_phase(self, band2filter: list = [6, 12], ford: int = 3) -> np.ndarray:
+        """
+        Get the phase of the LFP signal using a bandpass filter and Hilbert transform.
+
+        Parameters
+        ----------
+        band2filter : list, optional
+            The frequency band to filter, by default [6, 12].
+        ford : int, optional
+            The order of the Butterworth filter, by default 3.
+
+        Returns
+        -------
+        np.ndarray
+            The phase of the LFP signal.
+        """
         band2filter = np.array(band2filter, dtype=float)
         b, a = signal.butter(ford, band2filter / (self.fs / 2), btype="bandpass")
         filt_sig = signal.filtfilt(b, a, self.lfp.data, padtype="odd")
         return np.angle(signal.hilbert(filt_sig))
 
-    def get_freq_phase_amp(self, band2filter: list = [6, 12], ford=3):
+    def get_freq_phase_amp(
+        self, band2filter: list = [6, 12], ford: int = 3
+    ) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
         """
-        Uses the Hilbert transform to calculate the instantaneous phase and
-        amplitude of the time series in sig
+        Get the filtered signal, phase, amplitude, and filtered amplitude of the LFP signal.
+
         Parameters
         ----------
-        sig: np.array
-            The signal to be analysed
-        ford: int
-            The order for the Butterworth filter
-        band2filter: list
-            The two frequencies to be filtered for e.g. [6, 12]
+        band2filter : list, optional
+            The frequency band to filter, by default [6, 12].
+        ford : int, optional
+            The order of the Butterworth filter, by default 3.
+
+        Returns
+        -------
+        Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]
+            The filtered signal, phase, amplitude, and filtered amplitude of the LFP signal.
         """
 
         band2filter = np.array(band2filter, dtype=float)
@@ -292,7 +360,26 @@ def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
 
 
-def load_position(basepath, fs=39.0625):
+def load_position(basepath: str, fs: float = 39.0625) -> Tuple[pd.DataFrame, float]:
+    """
+    Load position data from a .whl file in the specified directory.
+
+    Parameters
+    ----------
+    basepath : str
+        Path to the directory containing the .whl file.
+    fs : float, optional
+        Sampling frequency, by default 39.0625.
+
+    Returns
+    -------
+    Tuple[pd.DataFrame, float]
+        DataFrame containing position data and the sampling frequency.
+
+    Notes
+    -----
+    If the directory does not exist or contains no .whl files, the function will exit.
+    """
     if not os.path.exists(basepath):
         print("The path " + basepath + " doesn't exist; Exiting ...")
         sys.exit()
@@ -307,7 +394,23 @@ def load_position(basepath, fs=39.0625):
     return df, fs
 
 
-def writeNeuroscopeEvents(path, ep, name):
+def writeNeuroscopeEvents(path: str, ep: Any, name: str) -> None:
+    """
+    Write events to a Neuroscope-compatible file.
+
+    Parameters
+    ----------
+    path : str
+        Path to the output file.
+    ep : Any
+        Epoch data containing start and end times.
+    name : str
+        Name of the event.
+
+    Returns
+    -------
+    None
+    """
     f = open(path, "w")
     for i in range(len(ep)):
         f.writelines(
@@ -323,19 +426,25 @@ def writeNeuroscopeEvents(path, ep, name):
             str(ep.as_units("ms").iloc[i]["end"]) + " " + name + " end " + str(1) + "\n"
         )
     f.close()
-    return
 
 
-def load_all_cell_metrics(basepaths):
+def load_all_cell_metrics(basepaths: List[str]) -> pd.DataFrame:
     """
-    load cell metrics from multiple sessions
+    Load cell metrics from multiple sessions.
 
-    Input:
-            basepaths: list of basepths, can be pandas column
-    Output:
-            cell_metrics: concatenated pandas df with metrics
+    Parameters
+    ----------
+    basepaths : List[str]
+        List of basepaths, can be a pandas column.
 
-    Note: to get waveforms, spike times, etc. use load_cell_metrics
+    Returns
+    -------
+    pd.DataFrame
+        Concatenated pandas DataFrame with metrics.
+
+    Notes
+    -----
+    To get waveforms, spike times, etc., use load_cell_metrics.
     """
 
     # to speed up, use parallel
@@ -347,19 +456,29 @@ def load_all_cell_metrics(basepaths):
     return pd.concat(cell_metrics, ignore_index=True)
 
 
-def load_cell_metrics(basepath: str, only_metrics: bool = False) -> tuple:
+def load_cell_metrics(
+    basepath: str, only_metrics: bool = False
+) -> Union[pd.DataFrame, Tuple[pd.DataFrame, dict]]:
     """
-    loader of cell-explorer cell_metrics.cellinfo.mat
+    Loader of cell-explorer cell_metrics.cellinfo.mat.
 
-    Inputs: basepath: path to folder with cell_metrics.cellinfo.mat
-    outputs: df: data frame of single unit features
-    data_: dict with data that does not fit nicely into a dataframe (waveforms, acgs, epochs, etc.)
+    Parameters
+    ----------
+    basepath : str
+        Path to folder with cell_metrics.cellinfo.mat.
+    only_metrics : bool, optional
+        If True, only metrics are loaded, by default False.
 
-    See https://cellexplorer.org/datastructure/standard-cell-metrics/ for details
+    Returns
+    -------
+    Union[pd.DataFrame, Tuple[pd.DataFrame, dict]]
+        DataFrame of single unit features and a dictionary with data that does not fit nicely into a DataFrame (waveforms, acgs, epochs, etc.).
 
-    TODO: extract all fields from cell_metrics.cellinfo. There are more items that can be extracted
+    Notes
+    -----
+    See https://cellexplorer.org/datastructure/standard-cell-metrics/ for details.
 
-    - Ryan H
+    TODO: Extract all fields from cell_metrics.cellinfo. There are more items that can be extracted.
     """
 
     def extract_epochs(data):
@@ -497,16 +616,19 @@ def un_nest_df(df):
         try:
             if (data["cell_metrics"][dn][0][0][0][0].size == 1) & (
                 data["cell_metrics"][dn][0][0][0].size == n_cells
-            ):  
+            ):
                 # check if nested within brackets
                 try:
-                    df[dn] = [value[0] if len(value)==1 else value for value in data["cell_metrics"][dn][0][0][0]]
+                    df[dn] = [
+                        value[0] if len(value) == 1 else value
+                        for value in data["cell_metrics"][dn][0][0][0]
+                    ]
                 except Exception:
                     df[dn] = data["cell_metrics"][dn][0][0][0]
         except Exception:
             continue
 
-    df = pd.DataFrame(df)     
+    df = pd.DataFrame(df)
 
     # load in tag
     # check if tags exist within cell_metrics
@@ -562,17 +684,25 @@ def un_nest_df(df):
     return df, data_
 
 
-def load_SWRunitMetrics(basepath):
+def load_SWRunitMetrics(basepath: str) -> pd.DataFrame:
     """
-    load_SWRunitMetrics loads SWRunitMetrics.mat into pandas dataframe
+    Load SWRunitMetrics.mat into a pandas DataFrame.
+
+    Parameters
+    ----------
+    basepath : str
+        Path to the folder containing the SWRunitMetrics.mat file.
 
-    returns pandas dataframe with the following fields
-        particip: the probability of participation into ripples for each unit
-        FRall: mean firing rate during ripples
-        FRparticip: mean firing rate for ripples with at least 1 spike
-        nSpkAll: mean number of spikes in all ripples
-        nSpkParticip: mean number of spikes in ripples with at least 1 spike
-        epoch: behavioral epoch label
+    Returns
+    -------
+    pd.DataFrame
+        DataFrame with the following fields:
+        - particip: the probability of participation into ripples for each unit
+        - FRall: mean firing rate during ripples
+        - FRparticip: mean firing rate for ripples with at least 1 spike
+        - nSpkAll: mean number of spikes in all ripples
+        - nSpkParticip: mean number of spikes in ripples with at least 1 spike
+        - epoch: behavioral epoch label
     """
 
     def extract_swr_epoch_data(data, epoch):
@@ -620,20 +750,24 @@ def extract_swr_epoch_data(data, epoch):
     return df2
 
 
-def add_manual_events(df, added_ts):
+def _add_manual_events(df: pd.DataFrame, added_ts: list) -> pd.DataFrame:
     """
     Add new rows to a dataframe representing manual events (from Neuroscope2)
     with durations equal to the mean duration of the existing events.
 
-    Parameters:
-    df (pandas DataFrame): The input dataframe, with at least two columns called
-        'start' and 'stop', representing the start and stop times of the events
-
-    added_ts (list): A list of timestamps representing the peaks of the new
-        events to be added to the dataframe.
+    Parameters
+    ----------
+    df : pd.DataFrame
+        The input dataframe, with at least two columns called 'start' and 'stop',
+        representing the start and stop times of the events.
+    added_ts : list
+        A list of timestamps representing the peaks of the new events to be added
+        to the dataframe.
 
-    Returns:
-    pandas DataFrame: The modified dataframe with the new rows added and sorted by the 'peaks' column.
+    Returns
+    -------
+    pd.DataFrame
+        The modified dataframe with the new rows added and sorted by the 'peaks' column.
     """
     # Calculate the mean duration of the existing events
     mean_duration = (df["stop"] - df["start"]).mean()
@@ -660,31 +794,38 @@ def add_manual_events(df, added_ts):
 
 def load_ripples_events(
     basepath: str, return_epoch_array: bool = False, manual_events: bool = True
-):
+) -> Union[pd.DataFrame, nel.EpochArray]:
     """
-    load info from ripples.events.mat and store within df
+    Load info from ripples.events.mat and store within a DataFrame.
 
-    args:
-        basepath: path to your session where ripples.events.mat is
-        return_epoch_array: if you want the output in an EpochArray
-        manual_events: add manually added events from Neuroscope2
-            (interval will be calculated from mean event duration)
-
-    returns pandas dataframe with the following fields
-        start: start time of ripple
-        stop: end time of ripple
-        peaks: peak time of ripple
-        amplitude: envlope value at peak time
-        duration: ripple duration
-        frequency: insta frequency at peak
-        detectorName: the name of ripple detector used
-        event_spk_thres: 1 or 0 for if a mua thres was used
-        basepath: path name
-        basename: session id
-        animal: animal id *
+    Parameters
+    ----------
+    basepath : str
+        Path to your session where ripples.events.mat is located.
+    return_epoch_array : bool, optional
+        If True, the output will be an EpochArray, by default False.
+    manual_events : bool, optional
+        If True, add manually added events from Neuroscope2 (interval will be calculated from mean event duration), by default True.
 
-        * Note that basepath/basename/animal relies on specific folder
-        structure and may be incorrect for some data structures
+    Returns
+    -------
+    Union[pd.DataFrame, nel.EpochArray]
+        DataFrame with the following fields:
+        - start: start time of ripple
+        - stop: end time of ripple
+        - peaks: peak time of ripple
+        - amplitude: envelope value at peak time
+        - duration: ripple duration
+        - frequency: instant frequency at peak
+        - detectorName: the name of ripple detector used
+        - event_spk_thres: 1 or 0 for if a mua threshold was used
+        - basepath: path name
+        - basename: session id
+        - animal: animal id
+
+    Notes
+    -----
+    * Note that basepath/basename/animal relies on specific folder structure and may be incorrect for some data structures.
     """
 
     # locate .mat file
@@ -768,7 +909,7 @@ def load_ripples_events(
     # adding manual events
     if manual_events:
         try:
-            df = add_manual_events(df, data["ripples"]["added"][0][0].T[0])
+            df = _add_manual_events(df, data["ripples"]["added"][0][0].T[0])
         except Exception:
             pass
 
@@ -792,9 +933,29 @@ def load_ripples_events(
     return df
 
 
-def load_theta_cycles(basepath, return_epoch_array=False):
+def load_theta_cycles(
+    basepath: str, return_epoch_array: bool = False
+) -> Union[pd.DataFrame, nel.EpochArray]:
     """
-    load theta cycles calculated from auto_theta_cycles.m
+    Load theta cycles calculated from auto_theta_cycles.m.
+
+    Parameters
+    ----------
+    basepath : str
+        Path to your session where thetacycles.events.mat is located.
+    return_epoch_array : bool, optional
+        If True, the output will be an EpochArray, by default False.
+
+    Returns
+    -------
+    Union[pd.DataFrame, nel.EpochArray]
+        DataFrame with the following fields:
+        - start: start time of theta cycle
+        - stop: end time of theta cycle
+        - duration: theta cycle duration
+        - center: center time of theta cycle
+        - trough: trough time of theta cycle
+        - theta_channel: the theta channel used for detection
     """
     filename = os.path.join(
         basepath, os.path.basename(basepath) + ".thetacycles.events.mat"
@@ -818,7 +979,7 @@ def load_barrage_events(
     return_epoch_array: bool = False,
     restrict_to_nrem: bool = True,
     min_duration: float = 0.0,
-):
+) -> Union[pd.DataFrame, nel.EpochArray]:
     """
     Load barrage events from the .HSEn2.events.mat file.
 
@@ -835,7 +996,7 @@ def load_barrage_events(
 
     Returns
     -------
-    pd.DataFrame
+    Union[pd.DataFrame, nel.EpochArray]
         DataFrame with barrage events.
     """
 
@@ -893,20 +1054,31 @@ def load_barrage_events(
 
 def load_ied_events(
     basepath: str, manual_events: bool = True, return_epoch_array: bool = False
-):
+) -> Union[pd.DataFrame, nel.EpochArray]:
     """
-    load info from ripples.events.mat and store within df
-
-    args:
-        basepath: path to your session where ripples.events.mat is
-        return_epoch_array: if you want the output in an EpochArray
-        manual_events: add manually added events from Neuroscope2
-            (interval will be calculated from mean event duration)
+    Load info from ripples.events.mat and store within a DataFrame.
 
-    returns pandas dataframe
+    Parameters
+    ----------
+    basepath : str
+        Path to your session where ripples.events.mat is located.
+    return_epoch_array : bool, optional
+        If True, the output will be an EpochArray, by default False.
+    manual_events : bool, optional
+        If True, add manually added events from Neuroscope2 (interval will be calculated from mean event duration), by default True.
 
-        * Note that basepath/basename/animal relies on specific folder
-        structure and may be incorrect for some data structures
+    Returns
+    -------
+    Union[pd.DataFrame, nel.EpochArray]
+        DataFrame with the following fields:
+        - start: start time of ripple
+        - stop: end time of ripple
+        - center: center time of ripple
+        - peaks: peak time of ripple
+
+    Notes
+    -----
+    * Note that basepath/basename/animal relies on specific folder structure and may be incorrect for some data structures.
     """
 
     # locate .mat file
@@ -942,7 +1114,7 @@ def load_ied_events(
     # adding manual events
     if manual_events:
         try:
-            df = add_manual_events(df, data[struct_name]["added"])
+            df = _add_manual_events(df, data[struct_name]["added"])
         except Exception:
             pass
 
@@ -957,27 +1129,38 @@ def load_dentate_spikes(
     dentate_spike_type: List[str] = ["DS1", "DS2"],
     manual_events: bool = True,
     return_epoch_array: bool = False,
-):
+) -> Union[pd.DataFrame, nel.EpochArray]:
     """
-    load info from DS*.events.mat and store within df
-    basepath: path to your session where DS*.events.mat is
-    dentate_spike_type: list of DS types to load
-    manual_events: add manually added events from Neuroscope2
-        (interval will be calculated from mean event duration)
-    return_epoch_array: if you want the output in an EpochArray
-
-    returns pandas dataframe with the following fields
-        start: start time of DS
-        stop: end time of DS
-        peaks: peak time of DS
-        amplitude: envlope value at peak time
-        duration: DS duration
-        detectorName: the name of DS detector used
-        basepath: path name
-        basename: session id
-        animal: animal id *
-        * Note that basepath/basename/animal relies on specific folder
-        structure and may be incorrect for some data structures
+    Load info from DS*.events.mat and store within a DataFrame.
+
+    Parameters
+    ----------
+    basepath : str
+        Path to your session where DS*.events.mat is located.
+    dentate_spike_type : List[str], optional
+        List of DS types to load, by default ["DS1", "DS2"].
+    manual_events : bool, optional
+        If True, add manually added events from Neuroscope2 (interval will be calculated from mean event duration), by default True.
+    return_epoch_array : bool, optional
+        If True, the output will be an EpochArray, by default False.
+
+    Returns
+    -------
+    Union[pd.DataFrame, nel.EpochArray]
+        DataFrame with the following fields:
+        - start: start time of DS
+        - stop: end time of DS
+        - peaks: peak time of DS
+        - amplitude: envelope value at peak time
+        - duration: DS duration
+        - detectorName: the name of DS detector used
+        - basepath: path name
+        - basename: session id
+        - animal: animal id
+
+    Notes
+    -----
+    * Note that basepath/basename/animal relies on specific folder structure and may be incorrect for some data structures.
     """
 
     def extract_data(s_type, data, manual_events):
@@ -1008,7 +1191,7 @@ def extract_data(s_type, data, manual_events):
         # adding manual events
         if manual_events:
             try:
-                df = add_manual_events(df, data[s_type]["added"])
+                df = _add_manual_events(df, data[s_type]["added"])
             except Exception:
                 pass
         return df
@@ -1043,9 +1226,36 @@ def extract_data(s_type, data, manual_events):
     return df
 
 
-def load_theta_rem_shift(basepath):
+def load_theta_rem_shift(basepath: str) -> Tuple[pd.DataFrame, dict]:
     """
-    load_theta_rem_shift: loads matlab structure from get_rem_shift.m
+    Load theta REM shift data from get_rem_shift.m.
+
+    Parameters
+    ----------
+    basepath : str
+        Path to your session where theta_rem_shift.mat is located.
+
+    Returns
+    -------
+    Tuple[pd.DataFrame, dict]
+        DataFrame with the following fields:
+        - UID: unique identifier for each unit
+        - circ_dist: circular distance
+        - rem_shift: REM shift
+        - non_rem_shift: non-REM shift
+        - m_rem: mean phase locking value during REM
+        - r_rem: resultant vector length during REM
+        - k_rem: concentration parameter during REM
+        - p_rem: p-value of phase locking during REM
+        - mode_rem: mode of phase locking during REM
+        - m_wake: mean phase locking value during wake
+        - r_wake: resultant vector length during wake
+        - k_wake: concentration parameter during wake
+        - p_wake: p-value of phase locking during wake
+        - mode_wake: mode of phase locking during wake
+
+    dict
+        Dictionary with phase distributions and spike phases for REM and wake states.
     """
     try:
         filename = glob.glob(basepath + os.sep + "*theta_rem_shift.mat")[0]
@@ -1117,14 +1327,23 @@ def get_spikephases(data, state):
     return df, data_dict
 
 
-def load_SleepState_states(basepath):
+def load_SleepState_states(basepath: str) -> dict:
     """
-    loader of SleepState.states.mat
+    Loader of SleepState.states.mat.
 
-    returns dict of structures contents.
+    Parameters
+    ----------
+    basepath : str
+        Path to the folder containing the SleepState.states.mat file.
 
-    TODO: extract more from file, this extracts the basics for now.
+    Returns
+    -------
+    dict
+        Dictionary containing the contents of the SleepState.states.mat file.
 
+    Notes
+    -----
+    TODO: Extract more from the file, this extracts the basics for now.
     """
     try:
         filename = glob.glob(os.path.join(basepath, "*.SleepState.states.mat"))[0]
@@ -1177,7 +1396,27 @@ def load_animal_behavior(
     load_animal_behavior loads basename.animal.behavior.mat files created by general_behavior_file.m
     The output is a pandas data frame with [time,x,y,z,linearized,speed,acceleration,trials,epochs]
 
-    Ryan H 2021
+    Parameters
+    ----------
+    basepath : str
+        Path to the session folder.
+    alternative_file : Union[str, None], optional
+        Alternative file name to load, by default None.
+
+    Returns
+    -------
+    pd.DataFrame
+        DataFrame with the following fields:
+        - time: timestamps
+        - x: x-coordinate
+        - y: y-coordinate
+        - z: z-coordinate
+        - linearized: linearized position
+        - speed: speed of the animal
+        - acceleration: acceleration of the animal
+        - trials: trial numbers
+        - epochs: epoch names
+        - environment: environment names
     """
     df = pd.DataFrame()
 
@@ -1205,7 +1444,7 @@ def load_animal_behavior(
     # add all other position coordinates to df (will add everything it can within position)
     for key in data["behavior"]["position"].keys():
         values = data["behavior"]["position"][key]
-        if (isinstance(values, (list, np.ndarray)) and len(values) == 0):
+        if isinstance(values, (list, np.ndarray)) and len(values) == 0:
             continue
         df[key] = values
 
@@ -1243,11 +1482,30 @@ def load_animal_behavior(
 
 def load_epoch(basepath: str) -> pd.DataFrame:
     """
-    Loads epoch info from cell explorer basename.session and stores in df
+    Loads epoch info from cell explorer basename.session and stores in a DataFrame.
+
+    Parameters
+    ----------
+    basepath : str
+        Path to the session folder.
+
+    Returns
+    -------
+    pd.DataFrame
+        DataFrame with the following fields:
+        - name: name of the epoch
+        - startTime: start time of the epoch
+        - stopTime: stop time of the epoch
+        - environment: environment during the epoch
+        - manipulation: manipulation during the epoch
+        - behavioralParadigm: behavioral paradigm during the epoch
+        - stimuli: stimuli during the epoch
+        - notes: notes about the epoch
+        - basepath: path to the session folder
     """
 
     filename = os.path.join(basepath, os.path.basename(basepath) + ".session.mat")
-    
+
     if not os.path.exists(filename):
         warnings.warn(f"file {filename} does not exist")
         return pd.DataFrame()
@@ -1284,9 +1542,22 @@ def add_columns(df):
         return epoch_df
 
 
-def load_trials(basepath):
+def load_trials(basepath: str) -> pd.DataFrame:
     """
-    Loads trials from cell explorer basename.session.behavioralTracking and stores in df
+    Loads trials from cell explorer basename.session.behavioralTracking and stores in a DataFrame.
+
+    Parameters
+    ----------
+    basepath : str
+        Path to the session folder.
+
+    Returns
+    -------
+    pd.DataFrame
+        DataFrame with the following fields:
+        - startTime: start time of the trial
+        - stopTime: stop time of the trial
+        - trialsID: ID of the trial
     """
     try:
         filename = glob.glob(os.path.join(basepath, "*.animal.behavior.mat"))[0]
@@ -1309,25 +1580,36 @@ def load_trials(basepath):
         return df
 
 
-def load_brain_regions(basepath, out_format="dict"):
+def load_brain_regions(
+    basepath: str, out_format: str = "dict"
+) -> Union[dict, pd.DataFrame]:
     """
-    Loads brain region info from cell explorer basename.session and stores in dict (default) or DataFrame
+    Loads brain region info from cell explorer basename.session and stores in dict (default) or DataFrame.
 
-    Example:
-        Input:
-            brainRegions = load_epoch("Z:\\Data\\GirardeauG\\Rat09\\Rat09-20140327")
-            print(brainRegions.keys())
-            print(brainRegions['CA1'].keys())
-            print(brainRegions['CA1']['channels'])
-            print(brainRegions['CA1']['electrodeGroups'])
-        output:
-            dict_keys(['CA1', 'Unknown', 'blv', 'bmp', 'ven'])
-            dict_keys(['channels', 'electrodeGroups'])
-            [145 146 147 148 149 153 155 157 150 151 154 159 156 152 158 160 137 140
-            129 136 138 134 130 132 142 143 144 141 131 139 133 135]
-            [17 18 19 20]
-
-            region_df: pandas DataFrame with columns ['channels','brain_region','shank']
+    Parameters
+    ----------
+    basepath : str
+        Path to the session folder.
+    out_format : str, optional
+        Output format, either 'dict' or 'DataFrame', by default 'dict'.
+
+    Returns
+    -------
+    Union[dict, pd.DataFrame]
+        Dictionary or DataFrame with brain region information.
+
+    Example
+    -------
+    >>> brainRegions = load_epoch("Z:\\Data\\GirardeauG\\Rat09\\Rat09-20140327")
+    >>> print(brainRegions.keys())
+    dict_keys(['CA1', 'Unknown', 'blv', 'bmp', 'ven'])
+    >>> print(brainRegions['CA1'].keys())
+    dict_keys(['channels', 'electrodeGroups'])
+    >>> print(brainRegions['CA1']['channels'])
+    [145 146 147 148 149 153 155 157 150 151 154 159 156 152 158 160 137 140
+    129 136 138 134 130 132 142 143 144 141 131 139 133 135]
+    >>> print(brainRegions['CA1']['electrodeGroups'])
+        [17 18 19 20]
     """
     filename = glob.glob(os.path.join(basepath, "*.session.mat"))[0]
     _, _, _, shank_to_channel = loadXML(basepath)
@@ -1351,26 +1633,28 @@ def load_brain_regions(basepath, out_format="dict"):
 
     if out_format == "DataFrame":  # return as DataFrame
 
-        region_df = pd.DataFrame(columns = ["channels","region"])
+        region_df = pd.DataFrame(columns=["channels", "region"])
         for key in brainRegions.keys():
-            temp_df = pd.DataFrame(brainRegions[key]['channels'],columns=['channels'])
-            temp_df['region'] = key
+            temp_df = pd.DataFrame(brainRegions[key]["channels"], columns=["channels"])
+            temp_df["region"] = key
+
+            region_df = pd.concat([region_df, temp_df])
 
-            region_df = pd.concat([region_df,temp_df])
-        
-                # # sort channels by shank
-        mapped_channels= []
+            # # sort channels by shank
+        mapped_channels = []
         mapped_shanks = []
-        for key,shank_i in enumerate(shank_to_channel.keys()):
-            mapped_channels.append(shank_to_channel[key]) 
-            mapped_shanks.append(np.repeat(shank_i,len(shank_to_channel[key])))
+        for key, shank_i in enumerate(shank_to_channel.keys()):
+            mapped_channels.append(shank_to_channel[key])
+            mapped_shanks.append(np.repeat(shank_i, len(shank_to_channel[key])))
         #  unpack to listssss
         idx = list(chain(*mapped_channels))
         shanks = list(chain(*mapped_shanks))
 
-        mapped_df = region_df.sort_values('channels').reset_index(drop=True).iloc[idx]
-        mapped_df['shank'] = shanks
-        mapped_df['channels'] = mapped_df['channels'] - 1 # save channel as zero-indexed
+        mapped_df = region_df.sort_values("channels").reset_index(drop=True).iloc[idx]
+        mapped_df["shank"] = shanks
+        mapped_df["channels"] = (
+            mapped_df["channels"] - 1
+        )  # save channel as zero-indexed
 
         return mapped_df.reset_index(drop=True)
 
@@ -1378,19 +1662,19 @@ def load_brain_regions(basepath, out_format="dict"):
         return brainRegions
 
 
-def get_animal_id(basepath) -> str:
+def get_animal_id(basepath: str) -> str:
     """
-    return animal ID from basepath using basename.session.mat
+    Return animal ID from basepath using basename.session.mat.
 
     Parameters
     ----------
     basepath : str
-        path to session folder
+        Path to session folder.
 
     Returns
     -------
     str
-        animal ID
+        Animal ID.
     """
     try:
         filename = glob.glob(os.path.join(basepath, "*.session.mat"))[0]
@@ -1405,17 +1689,17 @@ def get_animal_id(basepath) -> str:
 
 def add_animal_id(df: pd.core.frame.DataFrame) -> pd.core.frame.DataFrame:
     """
-    Add animal_id column to a dataframe based on the basepath column
+    Add animal_id column to a dataframe based on the basepath column.
 
     Parameters
     ----------
     df : pd.core.frame.DataFrame
-        Dataframe with a basepath column
+        Dataframe with a basepath column.
 
     Returns
     -------
     pd.core.frame.DataFrame
-        Dataframe with an additional animal_id column
+        Dataframe with an additional animal_id column.
     """
     df["animal_id"] = df.basepath.map(
         dict([(basepath, get_animal_id(basepath)) for basepath in df.basepath.unique()])
@@ -1423,7 +1707,23 @@ def add_animal_id(df: pd.core.frame.DataFrame) -> pd.core.frame.DataFrame:
     return df
 
 
-def load_basic_data(basepath):
+def load_basic_data(basepath: str) -> Tuple[pd.DataFrame, dict, pd.DataFrame, float]:
+    """
+    Load basic data from the specified basepath.
+
+    Parameters
+    ----------
+    basepath : str
+        Path to the session folder.
+
+    Returns
+    -------
+    Tuple[pd.DataFrame, dict, pd.DataFrame, float]
+        - cell_metrics: DataFrame containing cell metrics.
+        - data: Dictionary containing additional data.
+        - ripples: DataFrame containing ripple events.
+        - fs_dat: Sampling rate of the data.
+    """
     try:
         nChannels, fs, fs_dat, shank_to_channel = loadXML(basepath)
     except Exception:
@@ -1436,17 +1736,41 @@ def load_basic_data(basepath):
 
 
 def load_spikes(
-    basepath,
-    putativeCellType=[],  # restrict spikes to putativeCellType
-    brainRegion=[],  # restrict spikes to brainRegion
-    remove_bad_unit=True,  # true for not loading bad cells (tagged in CE)
-    brain_state=[],  # restrict spikes to brainstate
-    other_metric=None,  # restrict spikes to other_metric
-    other_metric_value=None,  # restrict spikes to other_metric_value
-    support=None,  # provide time support
-):
+    basepath: str,
+    putativeCellType: List[str] = [],  
+    brainRegion: List[str] = [],  
+    remove_bad_unit: bool = True,  
+    brain_state: List[str] = [],  
+    other_metric: Union[str, None] = None,  
+    other_metric_value: Union[str, None] = None,  
+    support: Union[nel.EpochArray, None] = None,  
+) -> Tuple[Union[nel.SpikeTrainArray, None], Union[pd.DataFrame, None]]:
     """
-    Load specific cells' spike times
+    Load specific cells' spike times.
+
+    Parameters
+    ----------
+    basepath : str
+        Path to the session folder.
+    putativeCellType : List[str], optional
+        List of putative cell types to restrict spikes to, by default [].
+    brainRegion : List[str], optional
+        List of brain regions to restrict spikes to, by default [].
+    remove_bad_unit : bool, optional
+        If True, do not load bad cells (tagged in CE), by default True.
+    brain_state : List[str], optional
+        List of brain states to restrict spikes to, by default [].
+    other_metric : Union[str, None], optional
+        Metric to restrict spikes to, by default None.
+    other_metric_value : Union[str, None], optional
+        Value of the metric to restrict spikes to, by default None.
+    support : Union[nel.EpochArray, None], optional
+        Time support to provide, by default None.
+
+    Returns
+    -------
+    Tuple[Union[nel.SpikeTrainArray, None], Union[pd.DataFrame, None]]
+        Spike train array and cell metrics DataFrame.
     """
     if not isinstance(putativeCellType, list):
         putativeCellType = [putativeCellType]
@@ -1540,10 +1864,23 @@ def load_spikes(
     return st, cell_metrics
 
 
-def load_deepSuperficialfromRipple(basepath, bypass_mismatch_exception=False):
+def load_deepSuperficialfromRipple(basepath: str, bypass_mismatch_exception: bool = False) -> Tuple[pd.DataFrame, np.ndarray, np.ndarray]:
     """
-    Load deepSuperficialfromRipple file created by classification_DeepSuperficial.m
+    Load deepSuperficialfromRipple file created by classification_DeepSuperficial.m.
+
+    Parameters
+    ----------
+    basepath : str
+        Path to the session folder.
+    bypass_mismatch_exception : bool, optional
+        If True, bypass the mismatch exception, by default False.
 
+    Returns
+    -------
+    Tuple[pd.DataFrame, np.ndarray, np.ndarray]
+        - channel_df: DataFrame containing channel information.
+        - ripple_average: Array containing average ripple traces.
+        - ripple_time_axis: Array containing ripple time axis.
     """
     # locate .mat file
     file_type = "*.deepSuperficialfromRipple.channelinfo.mat"
@@ -1642,19 +1979,24 @@ def load_deepSuperficialfromRipple(basepath, bypass_mismatch_exception=False):
     return channel_df, ripple_average, ripple_time_axis
 
 
-def load_mua_events(basepath):
+def load_mua_events(basepath: str) -> pd.DataFrame:
     """
     Loads the MUA data from the basepath.
-    Meant to load .mat file created by find_HSE.m
+    Meant to load .mat file created by find_HSE.m.
 
-    input:
-        basepath: str
-            The path to the folder containing the MUA data.
-    output:
-        mua_data: pandas.DataFrame
-            The pandas.DataFrame containing the MUA data
+    Parameters
+    ----------
+    basepath : str
+        The path to the folder containing the MUA data.
 
-    TODO: if none exist in basepath, create one
+    Returns
+    -------
+    pd.DataFrame
+        The pandas DataFrame containing the MUA data.
+
+    TODO
+    ----
+    If none exist in basepath, create one.
     """
 
     # locate .mat file
@@ -1689,42 +2031,45 @@ def load_mua_events(basepath):
 
 
 def load_manipulation(
-    basepath, struct_name=None, return_epoch_array=True, merge_gap=None
-):
+    basepath: str, struct_name: Union[str, None] = None, return_epoch_array: bool = True, merge_gap: Union[int, None] = None
+) -> Union[pd.DataFrame, nel.EpochArray]:
     """
-    Loads the data from the basename.eventName.manipulations.mat file and returns a pandas dataframe
+    Loads the data from the basename.eventName.manipulations.mat file and returns a pandas dataframe.
 
     file structure defined here:
         https://cellexplorer.org/datastructure/data-structure-and-format/#manipulations
 
-    inputs:
-        basepath: string, path to the basename.eventName.manipulations.mat file
-        struct_name: string, name of the structure in the mat file to load. If None, loads all the manipulation files.
-        return_epoch_array: bool, if True, returns only the epoch array
-        merge_gap: int, if not None, merges the epochs that are separated by less than merge_gap (sec). return_epoch_array must be True.
-    outputs:
-        df: pandas dataframe, with the following columns:
-            - start (float): start time of the manipulation in frames
-            - stop (float): stop time of the manipulation in frames
-            - peaks (float): list of the peak times of the manipulation in frames
-            - center (float): center time of the manipulation in frames
-            - duration (float): duration of the manipulation in frames
-            - amplitude (float): amplitude of the manipulation
-            - amplitudeUnit (string): unit of the amplitude
-    Example:
-        >> basepath = r"Z:\Data\Can\OML22\day8"
-        >> df_manipulation = load_manipulation(basepath,struct_name="optoStim",return_epoch_array=False)
-        >> df_manipulation.head(2)
+    Parameters
+    ----------
+    basepath : str
+        Path to the basename.eventName.manipulations.mat file.
+    struct_name : Union[str, None], optional
+        Name of the structure in the mat file to load. If None, loads all the manipulation files, by default None.
+    return_epoch_array : bool, optional
+        If True, returns only the epoch array, by default True.
+    merge_gap : Union[int, None], optional
+        If not None, merges the epochs that are separated by less than merge_gap (sec). return_epoch_array must be True, by default None.
+
+    Returns
+    -------
+    Union[pd.DataFrame, nel.EpochArray]
+        DataFrame or EpochArray with the manipulation data.
 
-                start	    stop	    peaks	    center	    duration amplitude amplitudeUnits
-        0	8426.83650	8426.84845	8426.842475	8426.842475	0.01195	19651	pulse_respect_baseline
-        1	8426.85245	8426.86745	8426.859950	8426.859950	0.01500	17516	pulse_respect_baseline
+    Example
+    -------
+    >> basepath = r"Z:\Data\Can\OML22\day8"
+    >> df_manipulation = load_manipulation(basepath, struct_name="optoStim", return_epoch_array=False)
+    >> df_manipulation.head(2)
+
+            start	    stop	    peaks	    center	    duration amplitude amplitudeUnits
+    0	8426.83650	8426.84845	8426.842475	8426.842475	0.01195	19651	pulse_respect_baseline
+    1	8426.85245	8426.86745	8426.859950	8426.859950	0.01500	17516	pulse_respect_baseline
 
-        >> basepath = r"Z:\Data\Can\OML22\day8"
-        >> df_manipulation = load_manipulation(basepath,struct_name="optoStim",return_epoch_array=True)
-        >> df_manipulation
+    >> basepath = r"Z:\Data\Can\OML22\day8"
+    >> df_manipulation = load_manipulation(basepath, struct_name="optoStim", return_epoch_array=True)
+    >> df_manipulation
 
-        <EpochArray at 0x1faba577520: 5,774 epochs> of length 1:25:656 minutes
+    <EpochArray at 0x1faba577520: 5,774 epochs> of length 1:25:656 minutes
     """
     try:
         if struct_name is None:
@@ -1831,42 +2176,50 @@ def load_probe_layout(basepath):
     ----------
     basepath : str
         path to the session folder
-        
+
     Returns
     -------
 
     probe_layout : pd.DataFrame
         DataFrame with x,y coordinates and shank number
-        
+
     Laura Berkowitz 09/2024
     """
 
-    # load session file 
+    # load session file
     filename = glob.glob(os.path.join(basepath, "*.session.mat"))[0]
 
     # load file
     data = sio.loadmat(filename)
-    x = data["session"][0][0]['extracellular'][0][0]['chanCoords'][0][0][0]
-    y = data["session"][0][0]['extracellular'][0][0]['chanCoords'][0][0][1]
-    electrode_groups = data["session"][0][0]['extracellular'][0][0]['electrodeGroups'][0][0][0]
+    x = data["session"][0][0]["extracellular"][0][0]["chanCoords"][0][0][0]
+    y = data["session"][0][0]["extracellular"][0][0]["chanCoords"][0][0][1]
+    electrode_groups = data["session"][0][0]["extracellular"][0][0]["electrodeGroups"][
+        0
+    ][0][0]
 
     # for each group in electrodeGroups
     mapped_shanks = []
     mapped_channels = []
     for shank_i in np.arange(electrode_groups.shape[1]):
-        mapped_channels.append(electrode_groups[0][shank_i][0]-1) # -1 to make 0 indexed
-        mapped_shanks.append(np.repeat(shank_i,len(electrode_groups[0][shank_i][0])))
+        mapped_channels.append(
+            electrode_groups[0][shank_i][0] - 1
+        )  # -1 to make 0 indexed
+        mapped_shanks.append(np.repeat(shank_i, len(electrode_groups[0][shank_i][0])))
 
     #  unpack to lists
     mapped_channels = list(chain(*mapped_channels))
     shanks = list(chain(*mapped_shanks))
 
     # get shank in same dimension as channels
-    shanks = np.expand_dims(shanks,axis=1)
+    shanks = np.expand_dims(shanks, axis=1)
 
-    probe_layout = pd.DataFrame({'x':x.flatten(),'y':y.flatten()}).iloc[mapped_channels].reset_index(drop=True)
-    probe_layout['shank'] = shanks
-    probe_layout['channels']= mapped_channels
+    probe_layout = (
+        pd.DataFrame({"x": x.flatten(), "y": y.flatten()})
+        .iloc[mapped_channels]
+        .reset_index(drop=True)
+    )
+    probe_layout["shank"] = shanks
+    probe_layout["channels"] = mapped_channels
 
     return probe_layout