diff --git a/aaanalysis/plotting/__init__.py b/aaanalysis/plotting/__init__.py
new file mode 100644
index 00000000..dfa4cf8d
--- /dev/null
+++ b/aaanalysis/plotting/__init__.py
@@ -0,0 +1,4 @@
+from aaanalysis.plotting.plotting_functions import plot_get_cmap, plot_get_cdict, plot_gcfs, \
+    plot_settings, plot_set_legend
+
+__all__ = ["plot_get_cmap", "plot_get_cdict", "plot_settings", "plot_set_legend", "plot_gcfs"]
diff --git a/aaanalysis/plotting/plotting_functions.py b/aaanalysis/plotting/plotting_functions.py
new file mode 100644
index 00000000..e310225c
--- /dev/null
+++ b/aaanalysis/plotting/plotting_functions.py
@@ -0,0 +1,434 @@
+#! /usr/bin/python3
+"""
+Default plotting functions
+"""
+import seaborn as sns
+import matplotlib as mpl
+import matplotlib.pyplot as plt
+import aaanalysis.utils as ut
+
+
+
+LIST_AA_COLOR_PALETTES = ["FEAT", "SHAP", "GGPLOT"]
+LIST_AA_COLOR_DICTS = ["DICT_SCALE_CAT", "DICT_COLOR"]
+LIST_AA_COLORS = LIST_AA_COLOR_PALETTES + LIST_AA_COLOR_DICTS
+
+LIST_FONTS = ['Arial', 'Avant Garde', 'Bitstream Vera Sans', 'Computer Modern Sans Serif', 'DejaVu Sans',
+              'Geneva', 'Helvetica', 'Lucid', 'Lucida Grande', 'Verdana']
+
+
+# Helper functions
+def check_font_style(font="Arial"):
+    """"""
+    if font not in LIST_FONTS:
+        error_message = f"'font' ({font}) not in recommended fonts: {LIST_FONTS}. Set font manually by:" \
+                        f"\n\tplt.rcParams['font.sans-serif'] = '{font}'"
+        raise ValueError(error_message)
+
+
+def check_fig_format(fig_format="pdf"):
+    """"""
+    list_fig_formats = ['eps', 'jpg', 'jpeg', 'pdf', 'pgf', 'png', 'ps',
+                        'raw', 'rgba', 'svg', 'svgz', 'tif', 'tiff', 'webp']
+    ut.check_str(name="fig_format", val=fig_format)
+    if fig_format not in list_fig_formats:
+        raise ValueError(f"'fig_format' should be one of following: {list_fig_formats}")
+
+
+def check_grid_axis(grid_axis="y"):
+    list_grid_axis = ["y", "x", "both"]
+    if grid_axis not in list_grid_axis:
+        raise ValueError(f"'grid_axis' ({grid_axis}) should be one of following: {list_grid_axis}")
+
+
+def check_cats(list_cat=None, dict_color=None, labels=None):
+    """"""
+    ut.check_dict(name="dict_color", val=dict_color, accept_none=False)
+    if labels is not None:
+        if list_cat is not None:
+            if len(list_cat) != len(labels):
+                raise ValueError(f"Length of 'list_cat' ({len(list_cat)}) and 'labels' ({len(labels)}) must match")
+        elif len(dict_color) != len(labels):
+            raise ValueError(f"Length of 'dict_color' ({len(dict_color)}) and 'labels' ({len(labels)}) must match")
+    if list_cat is None:
+        list_cat = list(dict_color.keys())
+    else:
+        raise ValueError("'list_cat' and 'dict_color' should not be None")
+    return list_cat
+
+
+# Get color maps
+def _get_shap_cmap(n_colors=100, facecolor_dark=True):
+    """Generate a diverging color map for feature values."""
+    n = 20
+    cmap_low = sns.light_palette(ut.COLOR_SHAP_NEG, input="hex", reverse=True, n_colors=int(n_colors/2)+n)
+    cmap_high = sns.light_palette(ut.COLOR_SHAP_POS, input="hex", n_colors=int(n_colors/2)+n)
+    c_middle = [(0, 0, 0)] if facecolor_dark else [cmap_low[-1]]
+    cmap = cmap_low[0:-n] + c_middle + cmap_high[n:]
+    return cmap
+
+
+def _get_feat_cmap(n_colors=100, facecolor_dark=False):
+    """Generate a diverging color map for feature values."""
+    n = 5
+    cmap = sns.color_palette("RdBu_r", n_colors=n_colors + n * 2)
+    cmap_low, cmap_high = cmap[0:int((n_colors + n * 2) / 2)], cmap[int((n_colors + n * 2) / 2):]
+    c_middle = [(0, 0, 0)] if facecolor_dark else [cmap_low[-1]]
+    cmap = cmap_low[0:-n] + c_middle + cmap_high[n:]
+    return cmap
+
+
+def _get_ggplot_cmap(n_colors=100):
+    """Generate a circular GGplot color palette."""
+    cmap = sns.color_palette("husl", n_colors)
+    return cmap
+
+
+def _get_default_colors(name=None, n_colors=100, facecolor_dark=True):
+    """Retrieve default color maps based on palette name."""
+    args = dict(n_colors=n_colors, facecolor_dark=facecolor_dark)
+    if name == "SHAP":
+        return _get_shap_cmap(**args)
+    elif name == "FEAT":
+        return _get_feat_cmap(**args)
+    elif name == "GGPLOT":
+        return _get_ggplot_cmap(n_colors=n_colors)
+
+
+def _get_cmap_with_gap(n_colors=100, color_pos=None, color_neg=None, color_center=None, pct_gap=10, pct_center=None,
+                       input="hex"):
+    """Generate a custom color map with a gap."""
+    n_gap = int(n_colors*pct_gap/2)
+    cmap_pos = sns.light_palette(color_pos, input=input, n_colors=int(n_colors/2)+n_gap)
+    cmap_neg = sns.light_palette(color_neg, input=input, reverse=True, n_colors=int(n_colors/2)+n_gap)
+    color_center = [cmap_neg[-1]] if color_center is None else color_center
+    color_center = [color_center] if type(color_center) is str else color_center
+    if pct_center is None:
+        cmap = cmap_neg[0:-n_gap] + color_center + cmap_pos[n_gap:]
+    else:
+        n_center = int(n_colors * pct_center)
+        n_gap += int(n_center/2)
+        cmap = cmap_neg[0:-n_gap] + color_center * n_center + cmap_pos[n_gap:]
+    return cmap
+
+
+# Default plotting function
+def plot_get_cmap(name=None, n_colors=100, facecolor_dark=False,
+                  color_pos=None, color_neg=None, color_center=None,
+                  input="hex", pct_gap=10, pct_center=None):
+    """
+    Retrieve color maps or color dictionaries specified for AAanalysis.
+
+    Parameters
+    ----------
+    name : str, optional
+        The name of the color palette to use in AAanalysis. Options include:
+         - 'SHAP', 'FEAT', 'GGPLOT': Return color maps for SHAP plots, CPP feature maps/heatmaps,
+            and datagrouping as in GGplot, respectively.
+         - 'DICT_COLOR', 'DICT_SCALE_CAT': Return default color dictionaries for plots (e.g., bars in CPPPlot.profile)
+            and scale categories (e.g., CPPPlot.heatmap), respectively.
+    n_colors : int, default=100
+        Number of colors in the color map.
+    facecolor_dark : bool, default=False
+        Whether to use a dark face color for 'SHAP' and 'FEAT'.
+    color_pos : str, optional
+        Hex code for the positive color.
+    color_neg : str, optional
+        Hex code for the negative color.
+    color_center : str or list, optional
+        Hex code or list for the center color.
+    input : str, {'rgb', 'hls', 'husl', 'xkcd'}
+        Color space to interpret the input color. The first three options
+        apply to tuple inputs and the latter applies to string inputs.
+    pct_gap : int, default=10
+        Percentage size of the gap between color ranges.
+    pct_center : float, optional
+        Percentage size of the center color in the map.
+
+    Returns
+    -------
+    cmap : list or dict
+        If 'name' parameter is 'SHAP', 'FEAT', or 'GGPLOT', a list of colors specified for AAanalysis will be returned.
+        If 'name' parameter is None, a list of colors based on provided colors
+
+    See Also
+    --------
+    sns.color_palette : Function to generate a color palette in seaborn.
+    sns.light_palette : Function to generate a lighter color palette in seaborn.
+    """
+    # TODO check color dict name
+    if name in LIST_AA_COLOR_PALETTES:
+        cmap = _get_default_colors(name=name, n_colors=n_colors, facecolor_dark=facecolor_dark)
+        return cmap
+    cmap = _get_cmap_with_gap(n_colors=n_colors, color_pos=color_pos, color_neg=color_neg,
+                              color_center=color_center, pct_gap=pct_gap, pct_center=pct_center,
+                              input=input)
+    return cmap
+
+
+def plot_get_cdict(name=None):
+    """
+    Retrieve color dictionaries specified for AAanalysis.
+
+    Parameters
+    ----------
+    name : str, {'DICT_COLOR', 'DICT_SCALE_CAT'}
+        The name of default color dictionaries for plots (e.g., bars in CPPPlot.profile)
+        and scale categories (e.g., CPPPlot.heatmap), respectively.
+
+    Returns
+    -------
+    cmap :  dict
+       Specific AAanalysis color dictionary.
+    """
+    # TODO check color dict name
+    color_dict = ut.DICT_COLOR if name == "DICT_COLORS" else ut.DICT_COLOR_CAT
+    return color_dict
+
+
+def plot_settings(fig_format="pdf", verbose=False, grid=False, grid_axis="y",
+                  font_scale=0.7, font="Arial",
+                  change_size=True, weight_bold=True, adjust_elements=True,
+                  short_ticks=False, no_ticks=False,
+                  no_ticks_y=False, short_ticks_y=False, no_ticks_x=False, short_ticks_x=False):
+    """
+    Configure general settings for plot visualization with various customization options.
+
+    Parameters
+    ----------
+    fig_format : str, default='pdf'
+        Specifies the file format for saving the plot.
+    verbose : bool, default=False
+        If True, enables verbose output.
+    grid : bool, default=False
+        If True, makes the grid visible.
+    grid_axis : str, default='y'
+        Choose the axis ('y', 'x', 'both') to apply the grid to.
+    font_scale : float, default=0.7
+        Sets the scale for font sizes in the plot.
+    font : str, default='Arial'
+        Name of sans-serif font (e.g., 'Arial', 'Verdana', 'Helvetica', 'DejaVu Sans')
+    change_size : bool, default=True
+        If True, adjusts the size of plot elements.
+    weight_bold : bool, default=True
+        If True, text elements appear in bold.
+    adjust_elements : bool, default=True
+        If True, makes additional visual and layout adjustments to the plot.
+    short_ticks : bool, default=False
+        If True, uses short tick marks.
+    no_ticks : bool, default=False
+        If True, removes all tick marks.
+    no_ticks_y : bool, default=False
+        If True, removes tick marks on the y-axis.
+    short_ticks_y : bool, default=False
+        If True, uses short tick marks on the y-axis.
+    no_ticks_x : bool, default=False
+        If True, removes tick marks on the x-axis.
+    short_ticks_x : bool, default=False
+        If True, uses short tick marks on the x-axis.
+
+    Notes
+    -----
+    This function modifies the global settings of Matplotlib and Seaborn libraries.
+
+    Examples
+    --------
+    >>> import aaanalysis as aa
+    >>> aa.plot_settings(fig_format="pdf", font_scale=1.0, weight_bold=False)
+    """
+    # Check input
+    check_fig_format(fig_format=fig_format)
+    check_font_style(font=font)
+    check_grid_axis(grid_axis=grid_axis)
+    args_bool = {"verbose": verbose, "grid": grid, "change_size": change_size, "weight_bold": weight_bold,
+                 "adjust_elements": adjust_elements,
+                 "short_ticks": short_ticks, "no_ticks": no_ticks, "no_ticks_y": no_ticks_y,
+                 "short_ticks_y": short_ticks_y, "no_ticks_x": no_ticks_x, "short_ticks_x": short_ticks_x}
+    for key in args_bool:
+        ut.check_bool(name=key, val=args_bool[key])
+    ut.check_non_negative_number(name="font_scale", val=font_scale, min_val=0, just_int=False)
+
+    # Set embedded fonts in PDF
+    mpl.rcParams.update(mpl.rcParamsDefault)
+    mpl.rcParams["pdf.fonttype"] = 42
+    mpl.rcParams["pdf.fonttype"] = 42
+    if verbose:
+        print(plt.rcParams.keys)    # Print all plot settings that can be modified in general
+    if not change_size:
+        plt.rcParams["font.family"] = "sans-serif"
+        plt.rcParams["font.sans-serif"] = font
+        mpl.rc('font', **{'family': font})
+        return
+    sns.set_context("talk", font_scale=font_scale)  # Font settings https://matplotlib.org/3.1.1/tutorials/text/text_props.html
+    plt.rcParams["font.family"] = "sans-serif"
+    plt.rcParams["font.sans-serif"] = font
+    if weight_bold:
+        plt.rcParams["axes.labelweight"] = "bold"
+        plt.rcParams["axes.titleweight"] = "bold"
+    else:
+        plt.rcParams["axes.linewidth"] = 1
+        plt.rcParams["xtick.major.width"] = 0.8
+        plt.rcParams["xtick.minor.width"] = 0.6
+        plt.rcParams["ytick.major.width"] = 0.8
+        plt.rcParams["ytick.minor.width"] = 0.6
+    if short_ticks:
+        plt.rcParams["xtick.major.size"] = 3.5
+        plt.rcParams["xtick.minor.size"] = 2
+        plt.rcParams["ytick.major.size"] = 3.5
+        plt.rcParams["ytick.minor.size"] = 2
+    if short_ticks_x:
+        plt.rcParams["xtick.major.size"] = 3.5
+        plt.rcParams["xtick.minor.size"] = 2
+    if short_ticks_y:
+        plt.rcParams["ytick.major.size"] = 3.5
+        plt.rcParams["ytick.minor.size"] = 2
+    if no_ticks:
+        plt.rcParams["xtick.major.size"] = 0
+        plt.rcParams["xtick.minor.size"] = 0
+        plt.rcParams["ytick.major.size"] = 0
+        plt.rcParams["ytick.minor.size"] = 0
+    if no_ticks_x:
+        plt.rcParams["xtick.major.size"] = 0
+        plt.rcParams["xtick.minor.size"] = 0
+    if no_ticks_y:
+        plt.rcParams["ytick.major.size"] = 0
+        plt.rcParams["ytick.minor.size"] = 0
+
+    plt.rcParams["axes.labelsize"] = 17 #13.5
+    plt.rcParams["axes.titlesize"] = 16.5 #15
+    if fig_format == "pdf":
+        mpl.rcParams['pdf.fonttype'] = 42
+    elif "svg" in fig_format:
+        mpl.rcParams['svg.fonttype'] = 'none'
+    font = {'family': font, "weight": "bold"} if weight_bold else {"family": font}
+    mpl.rc('font', **font)
+    if adjust_elements:
+        # Error bars
+        plt.rcParams["errorbar.capsize"] = 10   # https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.errorbar.html
+        # Grid
+        plt.rcParams["axes.grid.axis"] = grid_axis  # 'y', 'x', 'both'
+        plt.rcParams["axes.grid"] = grid
+        # Legend
+        plt.rcParams["legend.frameon"] = False
+        plt.rcParams["legend.fontsize"] = "medium" #"x-small"
+        plt.rcParams["legend.loc"] = 'upper right'  # https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.legend.html
+
+
+def plot_gcfs():
+    """Get current font size, which is set by ut.plot_settings function"""
+    # Get the current plotting context
+    current_context = sns.plotting_context()
+    font_size = current_context['font.size']
+    return font_size
+
+
+def plot_set_legend(ax=None, handles=None, dict_color=None, list_cat=None, labels=None, y=-0.2, x=0.5, ncol=3,
+                    fontsize=11, weight="normal", lw=0, edgecolor=None, return_handles=False, loc="upper left",
+                    labelspacing=0.2, columnspacing=1, title=None, fontsize_legend=None, title_align_left=True,
+                    fontsize_weight="normal", shape=None, **kwargs):
+    """
+    Set a customizable legend for a plot.
+
+    Parameters
+    ----------
+    ax : matplotlib.axes.Axes, default=None
+        The axes to attach the legend to.
+    handles : list, default=None
+        Handles for legend items.
+    dict_color : dict, default=None
+        A dictionary mapping categories to colors.
+    list_cat : list, default=None
+        List of categories to include in the legend.
+    labels : list, default=None
+        Labels for legend items.
+    y : float, default=-0.2
+        The y-coordinate for the legend's anchor point.
+    x : float, default=0.5
+        The x-coordinate for the legend's anchor point.
+    ncol : int, default=3
+        Number of columns in the legend.
+    fontsize : int, default=11
+        Font size for the legend text.
+    weight : str, default='normal'
+        Weight of the font.
+    lw : float, default=0
+        Line width for legend items.
+    edgecolor : color, default=None
+        Edge color for legend items.
+    return_handles : bool, default=False
+        Whether to return handles and labels.
+    loc : str, default='upper left'
+        Location for the legend.
+    labelspacing : float, default=0.2
+        Vertical spacing between legend items.
+    columnspacing : int, default=1
+        Horizontal spacing between legend columns.
+    title : str, default=None
+        Title for the legend.
+    fontsize_legend : int, default=None
+        Font size for the legend title.
+    title_align_left : bool, default=True
+        Whether to align the title to the left.
+    fontsize_weight : str, default='normal'
+        Font weight for the legend title.
+    shape : str, default=None
+        Marker shape for legend items.
+    **kwargs : dict
+        Additional arguments passed directly to ax.legend() for finer control.
+
+    Returns
+    -------
+    ax : matplotlib.axes.Axes
+        The axes with the legend applied.
+
+    See Also
+    --------
+    matplotlib.pyplot.legend : For additional details on how the 'loc' parameter can be customized.
+    matplotlib.lines.Line2D : For additional details on the different types of marker shapes ('shape' parameter).
+
+    Examples
+    --------
+    >>> import aaanalysis as aa
+    >>> aa.plot_set_legend(ax=ax, dict_color={'Cat1': 'red', 'Cat2': 'blue'}, shape='o')
+    """
+    # Check input
+    if ax is None:
+        ax = plt.gca()
+    list_cat = check_cats(list_cat=list_cat, dict_color=dict_color, labels=labels)
+    args_float = {"y": y, "x": x, "lw": lw, "labelspacing": labelspacing,
+                  "columnspacing": columnspacing}
+    for key in args_float:
+        ut.check_float(name=key, val=args_float[key])
+    ut.check_non_negative_number(name="ncol", val=ncol, min_val=1, just_int=True, accept_none=False)
+    ut.check_non_negative_number(name="ncol", val=ncol, min_val=0, just_int=False, accept_none=True)
+    ut.check_bool(name="return_handles", val=return_handles)
+    ut.check_bool(name="title_align_left", val=title_align_left)
+    # TODO check other args
+    # Prepare the legend handles
+    dict_leg = {cat: dict_color[cat] for cat in list_cat}
+    # Generate function for legend markers based on provided shape
+    if shape is None:
+        if edgecolor is None:
+            f = lambda l, c: mpl.patches.Patch(facecolor=l, label=c, lw=lw, edgecolor=l)
+        else:
+            f = lambda l, c: mpl.patches.Patch(facecolor=l, label=c, lw=lw, edgecolor=edgecolor)
+    else:
+        f = lambda l, c: plt.Line2D([0], [0], marker=shape, color='w', markerfacecolor=l, markersize=10, label=c)
+    # Create handles if not provided
+    handles = [f(l, c) for c, l in dict_leg.items()] if handles is None else handles
+    # Return handles and labels if required
+    if return_handles:
+        return handles, labels
+    # Prepare labels and args
+    if labels is None:
+        labels = list(dict_leg.keys())
+    args = dict(prop={"weight": weight, "size": fontsize}, **kwargs)
+    if fontsize_legend is not None:
+        args["title_fontproperties"] = {"weight": fontsize_weight, "size": fontsize_legend}
+    # Create the legend
+    legend = ax.legend(handles=handles, labels=labels, bbox_to_anchor=(x, y), ncol=ncol, loc=loc,
+                       labelspacing=labelspacing, columnspacing=columnspacing, borderpad=0, **args, title=title)
+    # Align the title if required
+    if title_align_left:
+        legend._legend_box.align = "left"
+    return ax
diff --git a/docs/source/_index/tables.rst b/docs/source/_index/tables.rst
new file mode 100644
index 00000000..9e4ceec9
--- /dev/null
+++ b/docs/source/_index/tables.rst
@@ -0,0 +1,246 @@
+..
+    Developer Notes:
+    This is the index file for all tables of the AAanalysis documentation. Each table should be saved the /tables
+    directory. This file will serve as template for tables.rst, which is automatically created on the information
+    provided here and in the .csv tables from the /tables directory. Add a new table as .csv in the /tables directory,
+    in the overview table at the beginning of this document, and a new section with a short description of it in this
+    document. Each column and important data types (e.g., categories) should be described. Each table should contain a
+    'Reference' column.
+    Ignore 'tables_template.rst: WARNING: document isn't included in any toctree' warning
+
+Tables
+======================
+
+.. contents::
+    :local:
+    :depth: 1
+
+Overview Table
+--------------
+All tables from the AAanalysis documentation are given here in chronological order of the project history.
+
+.. _0_mapper:
+.. list-table::
+   :header-rows: 1
+   :widths: 8 8 8
+
+   * - Table
+     - Description
+     - See also
+   * - 1_overview_benchmarks
+     - Protein benchmark datasets
+     - aa.load_dataset
+   * - 2_overview_scales
+     - Amino acid scale datasets
+     - aa.load_scales
+
+
+Protein benchmark datasets
+--------------------------
+Three types of benchmark datasets are provided:
+
+- Residue prediction (AA): Datasets used to predict residue (amino acid) specific properties.
+- Domain prediction (DOM): Dataset used to predict domain specific properties.
+- Sequence prediction (SEQ): Datasets used to predict sequence specific properties.
+
+The classification of each dataset is indicated as first part of their name followed by an abbreviation for the
+specific dataset (e.g., 'AA_LDR', 'DOM_GSEC', 'SEQ_AMYLO'). For some datasets, an additional version of it is provided
+for positive-unlabeled (PU) learning containing only positive (1) and unlabeled (2) data samples, as indicated by
+*dataset_name_PU* (e.g., 'DOM_GSEC_PU').
+
+.. _1_overview_benchmarks:
+.. list-table::
+   :header-rows: 1
+   :widths: 8 8 8 8 8 8 8 8 8 8
+
+   * - Level
+     - Dataset
+     - # Sequences
+     - # Amino acids
+     - # Positives
+     - # Negatives
+     - Predictor
+     - Description
+     - Reference
+     - Label
+   * - Amino acid
+     - AA_CASPASE3
+     - 233
+     - 185605
+     - 705
+     - 184900
+     - PROSPERous
+     - Prediction of caspase-3 cleavage site
+     - :ref:`Song18 <Song18>`
+     - 1 (adjacent to cleavage site), 0 (not adjacent to cleavage site)
+   * - Amino acid
+     - AA_FURIN
+     - 71
+     - 59003
+     - 163
+     - 58840
+     - PROSPERous
+     - Prediction of furin cleavage site
+     - :ref:`Song18 <Song18>`
+     - 1 (adjacent to cleavage site), 0 (not adjacent to cleavage site)
+   * - Amino acid
+     - AA_LDR
+     - 342
+     - 118248
+     - 35469
+     - 82779
+     - IDP-Seq2Seq
+     - Prediction of long intrinsically disordered regions (LDR)
+     - :ref:`Tang20 <Tang20>`
+     - 1 (disordered), 0 (ordered)
+   * - Amino acid
+     - AA_MMP2
+     - 573
+     - 312976
+     - 2416
+     - 310560
+     - PROSPERous
+     - Prediction of Matrix metallopeptidase-2 (MMP2) cleavage site
+     - :ref:`Song18 <Song18>`
+     - 1 (adjacent to cleavage site), 0 (not adjacent to cleavage site)
+   * - Amino acid
+     - AA_RNABIND
+     - 221
+     - 55001
+     - 6492
+     - 48509
+     - GMKSVM-RU
+     - Prediction of RNA-binding protein residues (RBP60 dataset)
+     - :ref:`Yang21 <Yang21>`
+     - 1 (binding), 0 (non-binding)
+   * - Amino acid
+     - AA_SA
+     - 233
+     - 185605
+     - 101082
+     - 84523
+     - PROSPERous
+     - Prediction of solvent accessibility (SA) of residue (AA_CASPASE3 data set)
+     - :ref:`Song18 <Song18>`
+     - 1 (exposed/accessible), 0 (buried/non-accessible)
+   * - Sequence
+     - SEQ_AMYLO
+     - 1414
+     - 8484
+     - 511
+     - 903
+     - ReRF-Pred
+     - Prediction of amyloidognenic regions
+     - :ref:`Teng21 <Teng21>`
+     - 1 (amyloidogenic), 0 (non-amyloidogenic)
+   * - Sequence
+     - SEQ_CAPSID
+     - 7935
+     - 3364680
+     - 3864
+     - 4071
+     - VIRALpro
+     - Prediction of capdsid proteins
+     - :ref:`Galiez16 <Galiez16>`
+     - 1 (capsid protein), 0 (non-capsid protein)
+   * - Sequence
+     - SEQ_DISULFIDE
+     - 2547
+     - 614470
+     - 897
+     - 1650
+     - Dipro
+     - Prediction of disulfide bridges in sequences
+     - :ref:`Cheng06 <Cheng06>`
+     - 1 (sequence with SS bond), 0 (sequence without SS bond)
+   * - Sequence
+     - SEQ_LOCATION
+     - 1835
+     - 732398
+     - 1045
+     - 790
+     - nan
+     - Prediction of subcellular location of protein (cytoplasm vs plasma membrane)
+     - :ref:`Shen19 <Shen19>`
+     - 1 (protein in cytoplasm), 0 (protein in plasma membrane) 
+   * - Sequence
+     - SEQ_SOLUBLE
+     - 17408
+     - 4432269
+     - 8704
+     - 8704
+     - SOLpro
+     - Prediction of soluble and insoluble proteins
+     - :ref:`Magnan09 <Magnan09>`
+     - 1 (soluble), 0 (insoluble)
+   * - Sequence
+     - SEQ_TAIL
+     - 6668
+     - 2671690
+     - 2574
+     - 4094
+     - VIRALpro
+     - Prediction of tail proteins
+     - :ref:`Galiez16 <Galiez16>`
+     - 1 (tail protein), 0 (non-tail protein)
+   * - Domain
+     - DOM_GSEC
+     - 126
+     - 92964
+     - 63
+     - 63
+     - nan
+     - Prediction of gamma-secretase substrates
+     - :ref:`Breimann23c <Breimann23c>`
+     - 1 (substrate), 0 (non-substrate)
+   * - Domain
+     - DOM_GSEC_PU
+     - 694
+     - 494524
+     - 63
+     - 0
+     - nan
+     - Prediction of gamma-secretase substrates (PU dataset)
+     - :ref:`Breimann23c <Breimann23c>`
+     - 1 (substrate), 2 (unknown substrate status)
+
+
+Amino acid scale datasets
+-------------------------
+Different amino acid scale datasets are provided
+
+.. _2_overview_scales:
+.. list-table::
+   :header-rows: 1
+   :widths: 8 8 8 8
+
+   * - Dataset
+     - Description
+     - # Scales
+     - Reference
+   * - scales
+     - Amino acid scales (min-max normalized)
+     - 586
+     - :ref:`Breimann23b <Breimann23b>`
+   * - scales_raw
+     - Amino acid scales (raw values)
+     - 586
+     - :ref:`Kawashima08 <Kawashima08>`
+   * - scales_classification
+     - Classification of scales (Aaontology)
+     - 586
+     - :ref:`Breimann23b <Breimann23b>`
+   * - scales_pc
+     - Principal component (PC) compressed scales
+     - 20
+     - :ref:`Breimann23a <Breimann23a>`
+   * - top60
+     - Top 60 scale subsets
+     - 60
+     - :ref:`Breimann23a <Breimann23a>`
+   * - top60_eval
+     - Evaluation of top 60 scale subsets
+     - 60
+     - :ref:`Breimann23a <Breimann23a>`
+
+
diff --git a/docs/source/_index/tables/0_mapper.xlsx b/docs/source/_index/tables/0_mapper.xlsx
new file mode 100644
index 00000000..7ff00447
Binary files /dev/null and b/docs/source/_index/tables/0_mapper.xlsx differ
diff --git a/docs/source/_index/tables/1_overview_benchmarks.xlsx b/docs/source/_index/tables/1_overview_benchmarks.xlsx
new file mode 100644
index 00000000..232d82b8
Binary files /dev/null and b/docs/source/_index/tables/1_overview_benchmarks.xlsx differ
diff --git a/docs/source/_index/tables/2_overview_scales.xlsx b/docs/source/_index/tables/2_overview_scales.xlsx
new file mode 100644
index 00000000..4565face
Binary files /dev/null and b/docs/source/_index/tables/2_overview_scales.xlsx differ
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 347a8c15..1485d697 100755
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -9,7 +9,7 @@
 
 sys.path.append(os.path.abspath('.'))
 
-#from create_tables_doc import generate_table_rst
+from create_tables_doc import generate_table_rst
 
 # -- Path and Platform setup --------------------------------------------------
 SEP = "\\" if platform.system() == "Windows" else "/"
@@ -172,7 +172,7 @@
 ]
 
 # Create table.rst
-#generate_table_rst()
+generate_table_rst()
 
 # -- Linkcode configuration ---------------------------------------------------
 _module_path = os.path.dirname(importlib.util.find_spec("aaanalysis").origin)  # type: ignore
diff --git a/docs/source/index.rst b/docs/source/index.rst
index 2206616b..57d04826 100755
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -5,6 +5,7 @@
 
 Welcome to the AAanalysis documentation
 =======================================
+.. include:: index/badges.rst
 .. include:: index/overview.rst
 
 Install
@@ -24,12 +25,14 @@ Install
    :caption: OVERVIEW
 
    index/introduction.rst
+   index/usage_principles.rst
    index/CONTRIBUTING_COPY.rst
 
 .. toctree::
    :maxdepth: 1
    :caption: EXAMPLES
 
+   tutorials.rst
 
 .. toctree::
    :maxdepth: 2
@@ -40,6 +43,7 @@ Install
 .. toctree::
    :maxdepth: 1
 
+   _index/tables.rst
    index/references.rst
 
 Indices and tables
diff --git a/docs/source/index/tables_template.rst b/docs/source/index/tables_template.rst
new file mode 100755
index 00000000..13007991
--- /dev/null
+++ b/docs/source/index/tables_template.rst
@@ -0,0 +1,44 @@
+..
+    Developer Notes:
+    This is the index file for all tables of the AAanalysis documentation. Each table should be saved the /tables
+    directory. This file will serve as template for tables.rst, which is automatically created on the information
+    provided here and in the .csv tables from the /tables directory. Add a new table as .csv in the /tables directory,
+    in the overview table at the beginning of this document, and a new section with a short description of it in this
+    document. Each column and important data types (e.g., categories) should be described. Each table should contain a
+    'Reference' column.
+    Ignore 'tables_template.rst: WARNING: document isn't included in any toctree' warning
+
+Tables
+======================
+
+.. contents::
+    :local:
+    :depth: 1
+
+Overview Table
+--------------
+All tables from the AAanalysis documentation are given here in chronological order of the project history.
+
+.. _0_mapper:
+
+Protein benchmark datasets
+--------------------------
+Three types of benchmark datasets are provided:
+
+- Residue prediction (AA): Datasets used to predict residue (amino acid) specific properties.
+- Domain prediction (DOM): Dataset used to predict domain specific properties.
+- Sequence prediction (SEQ): Datasets used to predict sequence specific properties.
+
+The classification of each dataset is indicated as first part of their name followed by an abbreviation for the
+specific dataset (e.g., 'AA_LDR', 'DOM_GSEC', 'SEQ_AMYLO'). For some datasets, an additional version of it is provided
+for positive-unlabeled (PU) learning containing only positive (1) and unlabeled (2) data samples, as indicated by
+*dataset_name_PU* (e.g., 'DOM_GSEC_PU').
+
+.. _1_overview_benchmarks:
+
+Amino acid scale datasets
+-------------------------
+Different amino acid scale datasets are provided
+
+.. _2_overview_scales:
+
diff --git a/docs/source/index/usage_principles.rst b/docs/source/index/usage_principles.rst
new file mode 100755
index 00000000..c86f9432
--- /dev/null
+++ b/docs/source/index/usage_principles.rst
@@ -0,0 +1,22 @@
+.. Developer Notes:
+    This is the index file for usage principles. Files for each part are saved in the /usage_principles directory
+    and the overview the AAanalysis package is given as component diagram (internal dependencies) and context diagram
+    (external dependencies). Always give the concise code examples reflecting the usage examples. Instead of including
+    comprehensive tables here, add them in tables.rst and refer to them with a short explanation
+
+Usage Principles
+================
+Import AAanalysis as:
+
+.. code-block:: python
+
+    import aaanalysis as aa
+
+.. toctree::
+   :maxdepth: 1
+
+   usage_principles/data_flow_entry_points
+   usage_principles/aaontology
+   usage_principles/feature_identification
+   usage_principles/pu_learning
+   usage_principles/xai
diff --git a/docs/source/index/usage_principles/aaontology.rst b/docs/source/index/usage_principles/aaontology.rst
new file mode 100755
index 00000000..90620117
--- /dev/null
+++ b/docs/source/index/usage_principles/aaontology.rst
@@ -0,0 +1,5 @@
+AAontology: Classification of amino acid scales
+===============================================
+
+AAontology is a two-level classification of amino acid scale, introduced in.
+
diff --git a/docs/source/index/usage_principles/data_flow_entry_points.rst b/docs/source/index/usage_principles/data_flow_entry_points.rst
new file mode 100755
index 00000000..8e8af181
--- /dev/null
+++ b/docs/source/index/usage_principles/data_flow_entry_points.rst
@@ -0,0 +1,8 @@
+Data Flow and Enry Points
+=========================
+
+The AAanalysis toolkit uses different DataFrames starting from DataFrames containing amino acid scales information
+(df_scales, df_cat) or sequence information (df_seq), which can be modified to obtain specific sequence parts (df_parts).
+Amino acid scales and sequence parts together with split settings are the input for the CPP algorithm, creating
+various physicochemical features (df_feat) by comparing two sets of protein sequences.
+
diff --git a/docs/source/index/usage_principles/feature_identification.rst b/docs/source/index/usage_principles/feature_identification.rst
new file mode 100755
index 00000000..27b8acf1
--- /dev/null
+++ b/docs/source/index/usage_principles/feature_identification.rst
@@ -0,0 +1,7 @@
+Identifying Physicochemical Signatures using CPP
+================================================
+
+The central algorithm of the AAanalysis platform is Comparative Physicochemical Profiling (CPP), a novel sequence-based
+feature engineering algorithm, designed to enable interpretable protein prediction.
+
+
diff --git a/docs/source/index/usage_principles/pu_learning.rst b/docs/source/index/usage_principles/pu_learning.rst
new file mode 100755
index 00000000..5020f5a7
--- /dev/null
+++ b/docs/source/index/usage_principles/pu_learning.rst
@@ -0,0 +1,17 @@
+Learning from unbalanced and small data
+=======================================
+
+Unbalanced and small datasets are everywhere in life science ....
+
+In a standard binary classification setup, data with positive (1) and negative (0) labels are provided, which can be
+used for training by machine learning models. If only a view samples of the negative class exist, data augmentation
+techniques (e.g., SMOTE) can be used to extend the negative dataset by artificially generated sequences. Such approaches
+are very popular for deep learning-based image recognition, but not feasible for protein sequence prediction tasks
+because slight amino acid mutations (sequence alterations or perturbations) can already have dramatic biological effects.
+Alternatively, negatives samples can be identified from unlabeled samples (2), which often exist in great quantities.
+These unlabeled samples should be biologically as similar as possible to the positive class, beside not containing
+the features distinguishing the positive from the negative class. For example, .
+
+What is PU learning?
+--------------------
+Positive Unlabeled (PU) learning is a subfield of machine learning ...
\ No newline at end of file
diff --git a/docs/source/index/usage_principles/xai.rst b/docs/source/index/usage_principles/xai.rst
new file mode 100755
index 00000000..8357963d
--- /dev/null
+++ b/docs/source/index/usage_principles/xai.rst
@@ -0,0 +1,8 @@
+Explainable AI at Sequence Level
+================================
+
+Unbalanced and small datasets are everywhere in life science ...
+
+What is explainable AI?
+-----------------------
+
diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 00000000..78d9b423
--- /dev/null
+++ b/tests/__init__.py
@@ -0,0 +1,47 @@
+"""
+A summary of different testing strategies is provided as general background:
+
+1. Testing pyramid: Unit test (unit) >> Integration test (integration) >> System/End-to-End test (e2e)
+    a) Unit test: Check small bit of code (e.g., function) in isolation
+    b) Integration/Regression test: Check a larger bit of code (e.g., several classes)
+        Integration with external components/Sequence regression of internal calls
+    c) System test: Check whole system in different environments
+
+2. Positive vs negative testing
+    a) Positive unit testing: Check if code runs with valid input
+    b) Negative testing: Check if code troughs error with invalid input
+
+3. Additional test strategies
+    a) Property-Based Testing: Validate assumptions (hypothesis) of code using automatically generated data
+        "Complementary to unit testing" (p. 224-230, The Pragmatic Programmer)
+    b) Functional test: Check single bit of functionality in a system (similar to regression test?)
+        Unit test vs. functional test (Code is doing things right vs. Code is doing right things)
+
+Notes
+-----
+Recommended testing commands:
+    a) General:     pytest -v -p no:warnings --tb=no test_cpp.py {line, short}
+    b) Function:    pytest -v -p no:warnings --tb=no test_cpp.py::TestCPP:test_add_stat
+    c) Doctest:     pytest -v --doctest-modules -p no:warnings cpp_tools/feature.py
+    d) Last failed: pytest --lf
+
+Recommended testing pattern: GIVEN, WHEN, THEN
+
+Recommended testing tools for pytest (given page from Brian, 2017):
+    a) Fixtures in conftest file (p. 50)
+    b) Parametrized Fixtures (p. 64)
+    c) Testing doctest namespace (p. 89)
+
+Following other testing tools are used:
+    a) Coverage.py: Determine how much code is tested (via pytest --cov=cpp_tools) (p. 126, Brian, 2017)
+    b) tox:         Testing multiple configuration
+    c) hypothesis:  Testing tool for property-based testing
+
+References
+----------
+Brian Okken, Python Testing with pytest, The Pragmatic Programmers (2017)
+David Thomas & Andrew Hunt, The Pragmatic Programmer, 20th Anniversary Edition (2019)
+    pp. 224-231
+David R. Maclver, Zac Hatfield-Dodds, ..., Hypothesis: A new approach to property-based testing (2019)
+Harry Percival & Bob Gergory, Architecture Patterns with Python (2020)
+"""
diff --git a/tests/_data/cpp_features.xlsx b/tests/_data/cpp_features.xlsx
new file mode 100644
index 00000000..576c5a19
Binary files /dev/null and b/tests/_data/cpp_features.xlsx differ
diff --git a/tests/_utils.py b/tests/_utils.py
new file mode 100644
index 00000000..205b41f5
--- /dev/null
+++ b/tests/_utils.py
@@ -0,0 +1,35 @@
+"""
+File for testing utility functions and constants.
+"""
+
+import os
+import platform
+from pathlib import Path
+
+
+# Helper Function
+def _folder_path(super_folder, folder_name):
+    """Modification of separator (OS depending)"""
+    path = os.path.join(super_folder, folder_name + SEP)
+    return path
+
+
+# Folder
+SEP = "\\" if platform.system() == "Windows" else "/"
+FOLDER_PROJECT = str(Path(__file__).parent.parent).replace('/', SEP) + SEP
+FOLDER_PROJECT += "tests" + SEP
+FOLDER_RESULTS = _folder_path(FOLDER_PROJECT, 'results')
+FOLDER_DATA = _folder_path(FOLDER_PROJECT, '_data')
+FILE_FEAT = "cpp_features.xlsx"
+
+
+# General Columns and strings
+COL_SCALE_ID = "scale_id"
+COL_CAT = "category"
+COL_SUBCAT = "subcategory"
+COL_NAME = "scale_name"
+COL_SCALE_DESCRIPTION = "scale_description"
+COL_SUBCAT_DESCRIPTION = "subcategory_description"
+COL_COUNT = "n_scales"
+COL_PROPERTY = "property"
+COLS_SCALE_INFOS = [COL_SCALE_ID, COL_CAT, COL_SUBCAT, COL_NAME, COL_SCALE_DESCRIPTION]
diff --git a/tests/conftest.py b/tests/conftest.py
new file mode 100644
index 00000000..fc68cdd8
--- /dev/null
+++ b/tests/conftest.py
@@ -0,0 +1,209 @@
+"""
+This file contains shared fixtures (preloaded data) that can be used by tests.
+"""
+import pandas as pd
+import numpy as np
+import pytest
+
+import aaanalysis as aa
+from aaanalysis import SequenceFeature
+import tests._utils as ut
+
+
+# Valid functions
+@pytest.fixture(scope="module")
+def df_seq():
+    return aa.load_dataset(name="DOM_GSEC_PU", n=10)
+
+
+@pytest.fixture(scope="module")
+def labels(df_seq):
+    labels = [1 if x == "SUBEXPERT" else 0 for x in df_seq["label"]]
+    return labels
+
+
+@pytest.fixture(scope="module")
+def df_cat():
+    df_cat = aa.load_scales(name="scales_cat").head(100)
+    return df_cat
+
+
+@pytest.fixture(scope="module")
+def df_scales():
+    df_scales = aa.load_scales()
+    return df_scales
+
+
+@pytest.fixture(scope="module")
+def df_parts(df_seq):
+    sf = SequenceFeature()
+    return sf.get_df_parts(df_seq=df_seq)
+
+
+@pytest.fixture(scope="module")
+def split_kws():
+    sf = SequenceFeature()
+    return sf.get_split_kws()
+
+
+@pytest.fixture(scope="function")
+def df_feat():
+    return pd.read_excel(ut.FOLDER_DATA + ut.FILE_FEAT)
+
+
+@pytest.fixture(scope="module")
+def df_feat_module_scope():
+    return pd.read_excel(ut.FOLDER_DATA + ut.FILE_FEAT)
+
+
+@pytest.fixture(scope="module")
+def list_parts():
+    list_parts = [["tmd_jmd"], ["tmd"], ["tmd_e"], ["tmd_e", "tmd_c_jmd_c", "jmd_n_tmd_n"],
+                  ["tmd", "tmd_e", "tmd_c_jmd_c", "jmd_n_tmd_n"]]
+    return list_parts
+
+
+@pytest.fixture(scope="module")
+def list_splits():
+    list_splits = ["Segment(5,7)", "Segment(1,1)", "Pattern(C,1,2)", "Pattern(N,1)", "Pattern(N,1,4,10)",
+                   "PeriodicPattern(N,i+2/3,1)", "PeriodicPattern(N,i+4/2,5)", "PeriodicPattern(C,i+1/5,1)"]
+    return list_splits
+
+
+# Wrong
+@pytest.fixture(params=[pd.DataFrame(), 2, "s", dict])
+def wrong_df(request):
+    return request.param
+
+
+# Corrupted input using parametrized fixtures
+def _corrupted_list_parts():
+    list_parts = [["tmd_md"], ["TMD"], ["tmd_E"], ["md_e", "tmd_c_jmd_n", "jmd_n_tmd_a"],
+                  ["tmd", "tmd_e", "tmd_c_jmd_c", "jmd_c_tmd_n"]]
+    return list_parts
+
+
+@pytest.fixture(params=_corrupted_list_parts())
+def corrupted_list_parts(request):
+    return request.param
+
+
+def _corrupted_list_splits():
+    list_splits = ["Segment(5,2)", "segment(1,1)", "Pttern(C,1,2)", "Pattern(A,1)", "Pattern(N,25,4,10)",
+                   "PeriodicPattern(N,i2/3,1)", "PeriodicPattern(N,i+4/2)", "Periodicattern(C,i+1/5,1)"]
+    return list_splits
+
+
+@pytest.fixture(params=_corrupted_list_splits())
+def corrupted_list_splits(request):
+    return request.param
+
+
+def _corrupted_df_seq():
+    df_seq = aa.load_dataset(name="DOM_GSEC_PU", n=10)
+    dfa = df_seq.drop(["sequence"], axis=1)
+    df1 = dfa.drop(["tmd"], axis=1)
+    df2 = dfa.copy()
+    df2.iloc[:1, df2.columns.get_loc("tmd")] = np.nan
+    df3 = df2.copy()
+    df3["tmd"] = 4
+    df4 = dfa.copy()
+    df4["tmd"] = np.nan
+    dfb = df_seq.drop(["tmd"], axis=1)
+    df5 = dfb.copy()
+    df5["sequence"] = 4
+    df6 = dfb.copy()
+    df6["sequence"] = np.nan
+    return [df1, df2, df3, df4, df5, df6]
+
+
+@pytest.fixture(params=_corrupted_df_seq())
+def corrupted_df_seq(request):
+    return request.param
+
+
+def _corrupted_df_scales():
+    df_scales = aa.load_scales()
+    scales = list(df_scales)
+    df1 = df_scales.copy()
+    df1[scales[0]] = "a"
+    df2 = pd.concat([df_scales, df_scales], axis=0)
+    df3 = pd.concat([df_scales, df_scales], axis=1)
+    df4 = df_scales.copy()
+    df4[scales[1]] = [np.NaN] + [0.5] * 19
+    df5 = df_scales.copy()
+    df5.reset_index(inplace=True)
+    df6 = df_scales.copy()
+    df6.index = ["A"] * 20
+    return [df1, df2, df3, df4, df5, df6]
+
+
+@pytest.fixture(params=_corrupted_df_scales())
+def corrupted_df_scales(request):
+    return request.param
+
+
+def _corrupted_split_kws():
+    sf = SequenceFeature()
+    split_kws = sf.get_split_kws()
+    kws1 = split_kws.copy()
+    kws1["test"] = 1
+    kws2 = split_kws.copy()
+    kws2["segment"] = kws2["Segment"]
+    kws2.pop("Segment")
+    kws3 = split_kws.copy()
+    kws3["Pattern"]["steps"] = [-1, 3]
+    kws4 = split_kws.copy()
+    kws4["PeriodicPattern"]["steps"] = [0, 0, None]
+    kws5 = split_kws.copy()
+    kws5["Segment"]["n_split_min"] = 10
+    kws5["Segment"]["n_split_max"] = 5
+    return [kws1, kws2, kws3, kws4, kws5]
+
+
+@pytest.fixture(params=_corrupted_split_kws())
+def corrupted_split_kws(request):
+    return request.param
+
+
+def _corrupted_df_parts():
+    df_seq = aa.load_dataset(name="DOM_GSEC")
+    sf = SequenceFeature()
+    df_parts = sf.get_df_parts(df_seq=df_seq, all_parts=True)
+    df1 = pd.concat([df_parts, df_parts], axis=0)
+    df2 = pd.concat([df_parts, df_parts], axis=1)
+    df3 = df_parts.copy()
+    df3["test"] = "AAAAAAAAAAAAAAAAAAAA"
+    df4 = df_parts.copy()
+    df4["tmd"] = "AAAAAAAAAAAAAAAAa"
+    df5 = df_parts.copy()
+    df5.columns = [x.upper() for x in list(df_parts)]
+    return [df1, df2, df3, df4, df5, df5]
+
+
+@pytest.fixture(params=_corrupted_df_parts())
+def corrupted_df_parts(request):
+    return request.param
+
+
+def _corrupted_labels():
+    df_seq = aa.load_dataset(name="DOM_GSEC", n=10)
+    labels = df_seq["label"].to_list()
+    labels_a = [str(x) for x in labels]
+    labels_b = [x + 1 for x in labels]
+    labels_c = labels.copy()
+    labels_c[0] = np.NaN
+    labels_d = labels.copy()
+    labels_d[5] = "a"
+    labels_e = labels.copy()
+    labels_e.extend([0, 1, 0])
+    labels_f = labels.copy()
+    labels_f.remove(1)
+    labels_g = [0] * len(labels)
+    labels_h = [1] * len(labels)
+    return [labels_a, labels_b, labels_c, labels_d, labels_e, labels_f, labels_g, labels_h]
+
+
+@pytest.fixture(params=_corrupted_labels())
+def corrupted_labels(request):
+    return request.param
diff --git a/tests/e2e/__init__.py b/tests/e2e/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/integration/__init__.py b/tests/integration/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/pytest.ini b/tests/pytest.ini
new file mode 100644
index 00000000..b4441f07
--- /dev/null
+++ b/tests/pytest.ini
@@ -0,0 +1,6 @@
+# pytest.ini
+
+[pytest]
+filterwarnings =
+    ignore::DeprecationWarning
+minversion = 6.0
\ No newline at end of file
diff --git a/tests/unit/.hypothesis/examples/0338a9f663ab7546/cd6bd1dcfebeffe9 b/tests/unit/.hypothesis/examples/0338a9f663ab7546/cd6bd1dcfebeffe9
new file mode 100644
index 00000000..4227ca4e
Binary files /dev/null and b/tests/unit/.hypothesis/examples/0338a9f663ab7546/cd6bd1dcfebeffe9 differ
diff --git a/tests/unit/.hypothesis/examples/0b9fbbc4b67e1594/bec021b4f368e306 b/tests/unit/.hypothesis/examples/0b9fbbc4b67e1594/bec021b4f368e306
new file mode 100644
index 00000000..f76dd238
Binary files /dev/null and b/tests/unit/.hypothesis/examples/0b9fbbc4b67e1594/bec021b4f368e306 differ
diff --git a/tests/unit/.hypothesis/examples/0d49c621ea836a14/bec021b4f368e306 b/tests/unit/.hypothesis/examples/0d49c621ea836a14/bec021b4f368e306
new file mode 100644
index 00000000..f76dd238
Binary files /dev/null and b/tests/unit/.hypothesis/examples/0d49c621ea836a14/bec021b4f368e306 differ
diff --git a/tests/unit/.hypothesis/examples/2051beb3e8b6fe40/7210af19145ec2a8 b/tests/unit/.hypothesis/examples/2051beb3e8b6fe40/7210af19145ec2a8
new file mode 100644
index 00000000..f66c9cf4
Binary files /dev/null and b/tests/unit/.hypothesis/examples/2051beb3e8b6fe40/7210af19145ec2a8 differ
diff --git a/tests/unit/.hypothesis/examples/2051beb3e8b6fe40/bec021b4f368e306 b/tests/unit/.hypothesis/examples/2051beb3e8b6fe40/bec021b4f368e306
new file mode 100644
index 00000000..f76dd238
Binary files /dev/null and b/tests/unit/.hypothesis/examples/2051beb3e8b6fe40/bec021b4f368e306 differ
diff --git a/tests/unit/.hypothesis/examples/31734d851f1a4e2b/bec021b4f368e306 b/tests/unit/.hypothesis/examples/31734d851f1a4e2b/bec021b4f368e306
new file mode 100644
index 00000000..f76dd238
Binary files /dev/null and b/tests/unit/.hypothesis/examples/31734d851f1a4e2b/bec021b4f368e306 differ
diff --git a/tests/unit/.hypothesis/examples/3693a99b3ae6f7c9/ad0afad02fa57cb9 b/tests/unit/.hypothesis/examples/3693a99b3ae6f7c9/ad0afad02fa57cb9
new file mode 100644
index 00000000..f8fa5a23
--- /dev/null
+++ b/tests/unit/.hypothesis/examples/3693a99b3ae6f7c9/ad0afad02fa57cb9
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/tests/unit/.hypothesis/examples/3693a99b3ae6f7c9/bec021b4f368e306 b/tests/unit/.hypothesis/examples/3693a99b3ae6f7c9/bec021b4f368e306
new file mode 100644
index 00000000..f76dd238
Binary files /dev/null and b/tests/unit/.hypothesis/examples/3693a99b3ae6f7c9/bec021b4f368e306 differ
diff --git a/tests/unit/.hypothesis/examples/3927be5e3c3f64a6/bec021b4f368e306 b/tests/unit/.hypothesis/examples/3927be5e3c3f64a6/bec021b4f368e306
new file mode 100644
index 00000000..f76dd238
Binary files /dev/null and b/tests/unit/.hypothesis/examples/3927be5e3c3f64a6/bec021b4f368e306 differ
diff --git a/tests/unit/.hypothesis/examples/3be25a19924ed0e2/cd6bd1dcfebeffe9 b/tests/unit/.hypothesis/examples/3be25a19924ed0e2/cd6bd1dcfebeffe9
new file mode 100644
index 00000000..4227ca4e
Binary files /dev/null and b/tests/unit/.hypothesis/examples/3be25a19924ed0e2/cd6bd1dcfebeffe9 differ
diff --git a/tests/unit/.hypothesis/examples/473a3d8204356f77/bec021b4f368e306 b/tests/unit/.hypothesis/examples/473a3d8204356f77/bec021b4f368e306
new file mode 100644
index 00000000..f76dd238
Binary files /dev/null and b/tests/unit/.hypothesis/examples/473a3d8204356f77/bec021b4f368e306 differ
diff --git a/tests/unit/.hypothesis/examples/522e9e2445800a97/8a65d0e6b9d94717 b/tests/unit/.hypothesis/examples/522e9e2445800a97/8a65d0e6b9d94717
new file mode 100644
index 00000000..1de00ecd
--- /dev/null
+++ b/tests/unit/.hypothesis/examples/522e9e2445800a97/8a65d0e6b9d94717
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/tests/unit/.hypothesis/examples/522e9e2445800a97/bec021b4f368e306 b/tests/unit/.hypothesis/examples/522e9e2445800a97/bec021b4f368e306
new file mode 100644
index 00000000..f76dd238
Binary files /dev/null and b/tests/unit/.hypothesis/examples/522e9e2445800a97/bec021b4f368e306 differ
diff --git a/tests/unit/.hypothesis/examples/5b65fce0c423c045/bec021b4f368e306 b/tests/unit/.hypothesis/examples/5b65fce0c423c045/bec021b4f368e306
new file mode 100644
index 00000000..f76dd238
Binary files /dev/null and b/tests/unit/.hypothesis/examples/5b65fce0c423c045/bec021b4f368e306 differ
diff --git a/tests/unit/.hypothesis/examples/5ff96874b65ba13a/cd6bd1dcfebeffe9 b/tests/unit/.hypothesis/examples/5ff96874b65ba13a/cd6bd1dcfebeffe9
new file mode 100644
index 00000000..4227ca4e
Binary files /dev/null and b/tests/unit/.hypothesis/examples/5ff96874b65ba13a/cd6bd1dcfebeffe9 differ
diff --git a/tests/unit/.hypothesis/examples/62540fab9eff6bc0/bec021b4f368e306 b/tests/unit/.hypothesis/examples/62540fab9eff6bc0/bec021b4f368e306
new file mode 100644
index 00000000..f76dd238
Binary files /dev/null and b/tests/unit/.hypothesis/examples/62540fab9eff6bc0/bec021b4f368e306 differ
diff --git a/tests/unit/.hypothesis/examples/6bbd04bcbcf9b4b9/bec021b4f368e306 b/tests/unit/.hypothesis/examples/6bbd04bcbcf9b4b9/bec021b4f368e306
new file mode 100644
index 00000000..f76dd238
Binary files /dev/null and b/tests/unit/.hypothesis/examples/6bbd04bcbcf9b4b9/bec021b4f368e306 differ
diff --git a/tests/unit/.hypothesis/examples/711334a0a54002bb/cd6bd1dcfebeffe9 b/tests/unit/.hypothesis/examples/711334a0a54002bb/cd6bd1dcfebeffe9
new file mode 100644
index 00000000..4227ca4e
Binary files /dev/null and b/tests/unit/.hypothesis/examples/711334a0a54002bb/cd6bd1dcfebeffe9 differ
diff --git a/tests/unit/.hypothesis/examples/7124a863f1536def/bec021b4f368e306 b/tests/unit/.hypothesis/examples/7124a863f1536def/bec021b4f368e306
new file mode 100644
index 00000000..f76dd238
Binary files /dev/null and b/tests/unit/.hypothesis/examples/7124a863f1536def/bec021b4f368e306 differ
diff --git a/tests/unit/.hypothesis/examples/7ac0e969251cf182/bec021b4f368e306 b/tests/unit/.hypothesis/examples/7ac0e969251cf182/bec021b4f368e306
new file mode 100644
index 00000000..f76dd238
Binary files /dev/null and b/tests/unit/.hypothesis/examples/7ac0e969251cf182/bec021b4f368e306 differ
diff --git a/tests/unit/.hypothesis/examples/81b963090f45ad74/bec021b4f368e306 b/tests/unit/.hypothesis/examples/81b963090f45ad74/bec021b4f368e306
new file mode 100644
index 00000000..f76dd238
Binary files /dev/null and b/tests/unit/.hypothesis/examples/81b963090f45ad74/bec021b4f368e306 differ
diff --git a/tests/unit/.hypothesis/examples/8301a69e314dc668/0bd1deb1f5e4e394 b/tests/unit/.hypothesis/examples/8301a69e314dc668/0bd1deb1f5e4e394
new file mode 100644
index 00000000..8214d0ee
--- /dev/null
+++ b/tests/unit/.hypothesis/examples/8301a69e314dc668/0bd1deb1f5e4e394
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/tests/unit/.hypothesis/examples/8301a69e314dc668/b16ae799954fe41e b/tests/unit/.hypothesis/examples/8301a69e314dc668/b16ae799954fe41e
new file mode 100644
index 00000000..147efaa6
--- /dev/null
+++ b/tests/unit/.hypothesis/examples/8301a69e314dc668/b16ae799954fe41e
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/tests/unit/.hypothesis/examples/8301a69e314dc668/e9d93fbaf0a1b2a4 b/tests/unit/.hypothesis/examples/8301a69e314dc668/e9d93fbaf0a1b2a4
new file mode 100644
index 00000000..c96ab3cc
--- /dev/null
+++ b/tests/unit/.hypothesis/examples/8301a69e314dc668/e9d93fbaf0a1b2a4
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/tests/unit/.hypothesis/examples/8822ba4b2c91a43c/cd6bd1dcfebeffe9 b/tests/unit/.hypothesis/examples/8822ba4b2c91a43c/cd6bd1dcfebeffe9
new file mode 100644
index 00000000..4227ca4e
Binary files /dev/null and b/tests/unit/.hypothesis/examples/8822ba4b2c91a43c/cd6bd1dcfebeffe9 differ
diff --git a/tests/unit/.hypothesis/examples/8c42ed866132948e/bec021b4f368e306 b/tests/unit/.hypothesis/examples/8c42ed866132948e/bec021b4f368e306
new file mode 100644
index 00000000..f76dd238
Binary files /dev/null and b/tests/unit/.hypothesis/examples/8c42ed866132948e/bec021b4f368e306 differ
diff --git a/tests/unit/.hypothesis/examples/92f6d0b72a3b7ae6/bec021b4f368e306 b/tests/unit/.hypothesis/examples/92f6d0b72a3b7ae6/bec021b4f368e306
new file mode 100644
index 00000000..f76dd238
Binary files /dev/null and b/tests/unit/.hypothesis/examples/92f6d0b72a3b7ae6/bec021b4f368e306 differ
diff --git a/tests/unit/.hypothesis/examples/a56b7aa70695021d/bec021b4f368e306 b/tests/unit/.hypothesis/examples/a56b7aa70695021d/bec021b4f368e306
new file mode 100644
index 00000000..f76dd238
Binary files /dev/null and b/tests/unit/.hypothesis/examples/a56b7aa70695021d/bec021b4f368e306 differ
diff --git a/tests/unit/.hypothesis/examples/a60ad3cb3c7faff3/bec021b4f368e306 b/tests/unit/.hypothesis/examples/a60ad3cb3c7faff3/bec021b4f368e306
new file mode 100644
index 00000000..f76dd238
Binary files /dev/null and b/tests/unit/.hypothesis/examples/a60ad3cb3c7faff3/bec021b4f368e306 differ
diff --git a/tests/unit/.hypothesis/examples/a60ad3cb3c7faff3/dbadd266fff9f046 b/tests/unit/.hypothesis/examples/a60ad3cb3c7faff3/dbadd266fff9f046
new file mode 100644
index 00000000..9280c0d3
--- /dev/null
+++ b/tests/unit/.hypothesis/examples/a60ad3cb3c7faff3/dbadd266fff9f046
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/tests/unit/.hypothesis/examples/b052e35ac20e0ec5/bec021b4f368e306 b/tests/unit/.hypothesis/examples/b052e35ac20e0ec5/bec021b4f368e306
new file mode 100644
index 00000000..f76dd238
Binary files /dev/null and b/tests/unit/.hypothesis/examples/b052e35ac20e0ec5/bec021b4f368e306 differ
diff --git a/tests/unit/.hypothesis/examples/b49dc780abd56132/cd6bd1dcfebeffe9 b/tests/unit/.hypothesis/examples/b49dc780abd56132/cd6bd1dcfebeffe9
new file mode 100644
index 00000000..4227ca4e
Binary files /dev/null and b/tests/unit/.hypothesis/examples/b49dc780abd56132/cd6bd1dcfebeffe9 differ
diff --git a/tests/unit/.hypothesis/examples/c4e8db3eac978a66/7210af19145ec2a8 b/tests/unit/.hypothesis/examples/c4e8db3eac978a66/7210af19145ec2a8
new file mode 100644
index 00000000..f66c9cf4
Binary files /dev/null and b/tests/unit/.hypothesis/examples/c4e8db3eac978a66/7210af19145ec2a8 differ
diff --git a/tests/unit/.hypothesis/examples/c4e8db3eac978a66/bec021b4f368e306 b/tests/unit/.hypothesis/examples/c4e8db3eac978a66/bec021b4f368e306
new file mode 100644
index 00000000..f76dd238
Binary files /dev/null and b/tests/unit/.hypothesis/examples/c4e8db3eac978a66/bec021b4f368e306 differ
diff --git a/tests/unit/.hypothesis/examples/c862f96f1db540a5/bec021b4f368e306 b/tests/unit/.hypothesis/examples/c862f96f1db540a5/bec021b4f368e306
new file mode 100644
index 00000000..f76dd238
Binary files /dev/null and b/tests/unit/.hypothesis/examples/c862f96f1db540a5/bec021b4f368e306 differ
diff --git a/tests/unit/.hypothesis/examples/ca3c5f0a6e532da0/ad0afad02fa57cb9 b/tests/unit/.hypothesis/examples/ca3c5f0a6e532da0/ad0afad02fa57cb9
new file mode 100644
index 00000000..f8fa5a23
--- /dev/null
+++ b/tests/unit/.hypothesis/examples/ca3c5f0a6e532da0/ad0afad02fa57cb9
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/tests/unit/.hypothesis/examples/cd014e4eb63571d0/bec021b4f368e306 b/tests/unit/.hypothesis/examples/cd014e4eb63571d0/bec021b4f368e306
new file mode 100644
index 00000000..f76dd238
Binary files /dev/null and b/tests/unit/.hypothesis/examples/cd014e4eb63571d0/bec021b4f368e306 differ
diff --git a/tests/unit/.hypothesis/examples/e7884a84aa0309aa/bec021b4f368e306 b/tests/unit/.hypothesis/examples/e7884a84aa0309aa/bec021b4f368e306
new file mode 100644
index 00000000..f76dd238
Binary files /dev/null and b/tests/unit/.hypothesis/examples/e7884a84aa0309aa/bec021b4f368e306 differ
diff --git a/tests/unit/.hypothesis/examples/ee49e7dd0cc2ebc2/bec021b4f368e306 b/tests/unit/.hypothesis/examples/ee49e7dd0cc2ebc2/bec021b4f368e306
new file mode 100644
index 00000000..f76dd238
Binary files /dev/null and b/tests/unit/.hypothesis/examples/ee49e7dd0cc2ebc2/bec021b4f368e306 differ
diff --git a/tests/unit/.hypothesis/examples/f29ca4420287aed1/bec021b4f368e306 b/tests/unit/.hypothesis/examples/f29ca4420287aed1/bec021b4f368e306
new file mode 100644
index 00000000..f76dd238
Binary files /dev/null and b/tests/unit/.hypothesis/examples/f29ca4420287aed1/bec021b4f368e306 differ
diff --git a/tests/unit/.hypothesis/examples/f534cb803dee1cc6/cd6bd1dcfebeffe9 b/tests/unit/.hypothesis/examples/f534cb803dee1cc6/cd6bd1dcfebeffe9
new file mode 100644
index 00000000..4227ca4e
Binary files /dev/null and b/tests/unit/.hypothesis/examples/f534cb803dee1cc6/cd6bd1dcfebeffe9 differ
diff --git a/tests/unit/.hypothesis/examples/f58941429ec6524d/bec021b4f368e306 b/tests/unit/.hypothesis/examples/f58941429ec6524d/bec021b4f368e306
new file mode 100644
index 00000000..f76dd238
Binary files /dev/null and b/tests/unit/.hypothesis/examples/f58941429ec6524d/bec021b4f368e306 differ
diff --git a/tests/unit/.hypothesis/unicode_data/13.0.0/charmap.json.gz b/tests/unit/.hypothesis/unicode_data/13.0.0/charmap.json.gz
new file mode 100644
index 00000000..3cb2f83e
Binary files /dev/null and b/tests/unit/.hypothesis/unicode_data/13.0.0/charmap.json.gz differ
diff --git a/tests/unit/__init__.py b/tests/unit/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/unit/test_aaclust.py b/tests/unit/test_aaclust.py
new file mode 100644
index 00000000..ab657cb7
--- /dev/null
+++ b/tests/unit/test_aaclust.py
@@ -0,0 +1,90 @@
+"""
+This is a script for ...
+"""
+import time
+import pandas as pd
+import numpy as np
+from sklearn.cluster import AgglomerativeClustering, KMeans
+
+import aaanalysis as aa
+from aaanalysis.aaclust.aaclust import get_min_cor, estimate_lower_bound_n_clusters, \
+    optimize_n_clusters, merge_clusters, AAclust
+
+import tests._utils as ut
+
+# Settings
+pd.set_option('expand_frame_repr', False)  # Single line print for pd.Dataframe
+
+# TODO change to proper test (CPP)
+
+
+# I Helper Functions
+def get_feat_matrix(df_cat=None, df_scales=None, unclassified_in=True, return_col=ut.COL_SCALE_ID, cat=None):
+    """"""
+    if cat is not None:
+        df_cat = df_cat[df_cat[ut.COL_CAT] == cat]
+    if unclassified_in:
+        scales = df_cat[return_col].to_list()
+    else:
+        mask = (~df_cat[ut.COL_SUBCAT].str.contains("Unclassified")) & (df_cat[ut.COL_CAT] != "Others")
+        df_cat = df_cat[mask]
+        scales = df_cat[ut.COL_SCALE_ID].to_list()
+    X = np.array(df_scales[scales]).T
+    labels = list(df_cat[return_col])
+    return X, labels
+
+
+def get_data():
+    """"""
+    df_cat = aa.load_scales(name="scales_cat")
+    df_scales = aa.load_scales(name="scales")
+    X, scales = get_feat_matrix(df_cat=df_cat.copy(),
+                                df_scales=df_scales.copy(),
+                                unclassified_in=True)
+    return X
+
+
+def get_model():
+    """"""
+    model_kwargs=dict()
+    model = AgglomerativeClustering
+    return model, model_kwargs
+
+
+# II Main Functions
+def test_steps():
+    """"""
+    X = get_data()
+    model, model_kwargs = get_model()
+    args = dict(X=X, model=model, model_kwargs=model_kwargs, min_th=0.3, on_center=False)
+    k = estimate_lower_bound_n_clusters(**args)
+    k = optimize_n_clusters(**args, n_clusters=k)
+    labels = model(n_clusters=k, **model_kwargs).fit(X).labels_.tolist()
+    print(len(set(labels)))
+    labels_ = merge_clusters(X, labels=labels, min_th=0.3, on_center=False)
+    print(len(set(labels_)))
+    print(get_min_cor(X, labels=labels_, on_center=False))
+
+
+def test_aaclust():
+    """"""
+    X = get_data()
+    model, model_kwargs = get_model()
+    aac = AAclust(model=model, model_kwargs=model_kwargs)
+    args = dict(on_center=False, min_th=0.3, merge=True, merge_metric="euclidean")
+    aac.fit(X,  **args)
+
+
+# III Test/Caller Functions
+
+
+# IV Main
+def main():
+    t0 = time.time()
+    test_steps()
+    t1 = time.time()
+    print("Time:", t1 - t0)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tests/unit/test_cpp.py b/tests/unit/test_cpp.py
new file mode 100644
index 00000000..d0c71853
--- /dev/null
+++ b/tests/unit/test_cpp.py
@@ -0,0 +1,625 @@
+"""
+This is a script for Unit tests of the CPP class
+"""
+import pandas as pd
+import numpy as np
+import pytest
+from hypothesis import given, settings
+import hypothesis.strategies as some
+import matplotlib as mpl
+import matplotlib.pyplot as plt
+
+import tests._utils as ut
+import aaanalysis as aa
+
+
+@pytest.fixture(params=["a", 3, dict(), list(), pd.DataFrame(), -0])
+def wrong_input_cpp(request):
+    return request.param
+
+
+@pytest.fixture(params=["a", dict(), list(), pd.DataFrame()])
+def wrong_input(request):
+    return request.param
+
+
+@pytest.fixture
+def cpp(df_scales, df_cat, df_parts, split_kws):
+    return aa.CPP(df_scales=df_scales, df_cat=df_cat, df_parts=df_parts, split_kws=split_kws)
+
+
+# I Unit Tests
+class TestCPP:
+    """Test CPP class interface"""
+
+    # Positive unit test
+    def test_cpp_call(self, df_scales, df_cat, df_parts, split_kws):
+        cpp = aa.CPP(df_scales=df_scales, df_cat=df_cat,
+                  df_parts=df_parts, split_kws=split_kws)
+        assert isinstance(cpp, object)
+        cpp = aa.CPP(df_parts=df_parts, )
+        assert isinstance(cpp, object)
+
+    # Negative unit test
+    def test_missing_input(self, df_scales, df_cat, df_parts, split_kws):
+        with pytest.raises(ValueError):
+            aa.CPP()
+
+    def test_wrong_df_scales(self, wrong_input_cpp, df_cat, df_parts, split_kws):
+        with pytest.raises(ValueError):
+            aa.CPP(df_scales=wrong_input_cpp, df_cat=df_cat, df_parts=df_parts, split_kws=split_kws)
+
+    def test_wrong_df_cat(self, df_scales, wrong_input_cpp, df_parts, split_kws):
+        with pytest.raises(ValueError):
+            aa.CPP(df_scales=df_scales, df_cat=wrong_input_cpp, df_parts=df_parts, split_kws=split_kws)
+
+    def test_wrong_df_parts(self, df_scales, df_cat, wrong_input_cpp, split_kws):
+        with pytest.raises(ValueError):
+            aa.CPP(df_scales=df_scales, df_cat=df_cat, df_parts=wrong_input_cpp, split_kws=split_kws)
+
+    def test_wrong_split_kws(self, df_scales, df_cat, df_parts, wrong_input_cpp):
+        with pytest.raises(ValueError):
+            aa.CPP(df_scales=df_scales, df_cat=df_cat, df_parts=df_parts, split_kws=wrong_input_cpp)
+
+
+class TestAddStat:
+    """Test adding statistics of features to DataFrame"""
+
+    # Positive unit tests
+    def test_add_stat(self, cpp, df_feat, labels):
+        assert isinstance(cpp.add_stat(df_feat=df_feat, labels=labels, parametric=True), pd.DataFrame)
+        assert isinstance(cpp.add_stat(df_feat=df_feat, labels=labels, parametric=False), pd.DataFrame)
+        df_feat = df_feat[["feature"]]
+        assert isinstance(cpp.add_stat(df_feat=df_feat, labels=labels, parametric=True), pd.DataFrame)
+        assert isinstance(cpp.add_stat(df_feat=df_feat, labels=labels, parametric=False), pd.DataFrame)
+
+    # Negative unit tests
+    def test_wrong_df_feat(self, cpp, labels, wrong_df):
+        with pytest.raises(ValueError):
+            cpp.add_stat(df_feat=wrong_df, labels=labels)
+
+    def test_corrupted_labels(self, cpp, corrupted_labels, df_feat):
+        with pytest.raises(ValueError):
+            cpp.add_stat(df_feat=df_feat, labels=corrupted_labels)
+
+
+class TestAddPositions:
+    """Test add_positions method"""
+
+    # Positive unit tests
+    def test_add_positions(self, df_feat, cpp):
+        df_feat = cpp._add_positions(df_feat=df_feat, tmd_len=30)
+        assert isinstance(df_feat, pd.DataFrame)
+        assert "positions" in list(df_feat)
+
+    # Property based testing
+    @given(tmd_len=some.integers(min_value=15, max_value=100),
+           jmd_n_len=some.integers(min_value=5, max_value=20),
+           jmd_c_len=some.integers(min_value=5, max_value=20),
+           ext_len=some.integers(min_value=1, max_value=4),
+           start=some.integers(min_value=0, max_value=50))
+    @settings(max_examples=10, deadline=None)
+    def test_add_position_tmd_len(self, df_feat_module_scope, df_parts, tmd_len, jmd_n_len, jmd_c_len, ext_len, start):
+        cpp = aa.CPP(df_parts=df_parts)
+        df_feat = cpp._add_positions(df_feat=df_feat_module_scope, tmd_len=tmd_len, jmd_n_len=jmd_n_len, jmd_c_len=jmd_c_len,
+                                     ext_len=ext_len, start=start)
+        assert isinstance(df_feat, pd.DataFrame)
+
+    # Negative unit tests
+    def test_wrong_tmd_len(self, df_feat, cpp, wrong_input):
+        with pytest.raises(ValueError):
+            cpp._add_positions(df_feat=df_feat, tmd_len=wrong_input)
+
+    def test_wrong_jmd_len(self, df_feat, cpp, wrong_input):
+        with pytest.raises(ValueError):
+            cpp._add_positions(df_feat=df_feat, jmd_n_len=wrong_input)
+        with pytest.raises(ValueError):
+            cpp._add_positions(df_feat=df_feat, jmd_c_len=wrong_input)
+
+    def test_wrong_ext_len(self, df_feat, cpp, wrong_input):
+        with pytest.raises(ValueError):
+            cpp._add_positions(df_feat=df_feat, ext_len=wrong_input)
+        # ext_len >= jmd_n_len or jmd_c_len
+        with pytest.raises(ValueError):
+            cpp._add_positions(df_feat=df_feat, ext_len=5, jmd_n_len=3)
+        with pytest.raises(ValueError):
+            cpp._add_positions(df_feat=df_feat, ext_len=5, jmd_c_len=3)
+
+    def test_wrong_start(self, df_feat, cpp, wrong_input):
+        with pytest.raises(ValueError):
+            cpp._add_positions(df_feat=df_feat, start=wrong_input)
+        with pytest.raises(ValueError):
+            cpp._add_positions(df_feat=df_feat, start=-4)
+
+
+class TestAddScaleCategory:
+    """Test add_positions method"""
+
+    # Positive unit tests
+    def test_add_scale_category(self, df_feat, cpp):
+        assert df_feat.equals(cpp.add_scale_info(df_feat=df_feat))
+        df_no_cat = df_feat.drop([ut.COL_CAT, ut.COL_SUBCAT], axis=1)
+        df_with_cat = cpp.add_scale_info(df_feat=df_no_cat)
+        assert df_feat.equals(df_with_cat)
+
+    # Negative unit tests
+    def test_wrong_input(self, cpp, wrong_input):
+        with pytest.raises(ValueError):
+            cpp.add_scale_info(df_feat=wrong_input)
+
+    def test_missing_feature(self, cpp, df_feat):
+        df_no_cat = df_feat.drop([ut.COL_CAT, ut.COL_SUBCAT, ut.COL_FEATURE], axis=1)
+        with pytest.raises(ValueError):
+            cpp.add_scale_info(df_feat=df_no_cat)
+
+
+class TestAddFeatureImpact:
+    """Test adding feature impact to feature DataFrame"""
+
+    # Positive unit tests
+    def test_add_feat_impact(self, cpp, df_feat, df_parts, df_scales, labels):
+        from sklearn.ensemble import RandomForestClassifier
+        import shap
+        sf = aa.SequenceFeature()
+        X = sf.feat_matrix(features=list(df_feat["feature"]), df_parts=df_parts, df_scales=df_scales)
+        assert isinstance(X, np.ndarray)
+        model = RandomForestClassifier().fit(X=X, y=labels)
+        # compute SHAP values
+        explainer = shap.TreeExplainer(model)
+        shap_values = explainer.shap_values(X, y=labels)
+        df_feat["shap_value"] = shap_values[1][0]
+        df_feat = cpp.add_shap(df_feat=df_feat)
+        assert isinstance(df_feat, pd.DataFrame)
+        df_feat = cpp.add_shap(df_feat=df_feat, name_feat_impact="Test")
+        assert isinstance(df_feat, pd.DataFrame)
+        df_feat["shap_value"] = [0.4] * (len(df_feat) - 1) + [np.NaN]
+        df_feat = cpp.add_shap(df_feat=df_feat, name_feat_impact="test")
+        assert isinstance(df_feat, pd.DataFrame)
+
+    # Negative unit tests
+    def test_wrong_shap_value(self, cpp, df_feat):
+        with pytest.raises(ValueError):
+            df_feat = cpp.add_shap(df_feat=df_feat)
+        df_feat["shap_value"] = "wrong"
+        with pytest.raises(ValueError):
+            df_feat = cpp.add_shap(df_feat=df_feat)
+
+
+class TestAddSampleDif:
+    """Test adding differences of sample and reference mean to feature DataFrame"""
+
+    # Positive unit tests
+    def test_add_sample(self, df_feat, df_seq, labels, cpp):
+        list_names = list(df_seq[ut.COL_NAME])[0:2]
+        ref_group = 0
+        # Test all names
+        for prot_name in list_names:
+            df_feat = cpp.add_sample_dif(df_feat=df_feat, df_seq=df_seq, labels=labels,
+                                         sample_name=prot_name, ref_group=ref_group)
+            assert isinstance(df_feat, pd.DataFrame)
+
+    # Negative unit tests
+    def test_wrong_input(self, df_feat, df_seq, labels, cpp):
+        args = dict(df_feat=df_feat, df_seq=df_seq, labels=labels)
+        name = "A4_HUMAN"
+        ref_group = 0
+        with pytest.raises(ValueError):
+            cpp.add_sample_dif(**args, sample_name=name.lower(), ref_group=ref_group)
+        with pytest.raises(ValueError):
+            cpp.add_sample_dif(**args, sample_name=1, ref_group=ref_group)
+        with pytest.raises(ValueError):
+            cpp.add_sample_dif(**args, sample_name=name, ref_group=5)
+        with pytest.raises(ValueError):
+            cpp.add_sample_dif(**args, sample_name=name, ref_group=[0, 1])
+
+    def test_corrupted_df_seq(self, df_feat, wrong_df, labels, cpp):
+        name = "A4_HUMAN"
+        ref_group = 0
+        with pytest.raises(ValueError):
+            cpp.add_sample_dif(df_feat=df_feat, df_seq=wrong_df,
+                               labels=labels, sample_name=name, ref_group=ref_group)
+
+    def test_corrupted_labels(self, df_feat, df_seq, corrupted_labels, cpp):
+        name = "A4_HUMAN"
+        ref_group = 0
+        with pytest.raises(ValueError):
+            cpp.add_sample_dif(df_feat=df_feat, df_seq=df_seq,
+                               labels=corrupted_labels, sample_name=name, ref_group=ref_group)
+
+
+class TestRun:
+    """Test add_positions method"""
+
+    # Positive unit tests
+    def test_cpp_run(self):
+        sf = aa.SequenceFeature()
+        df_seq = sf.load_sequences(n_in_class=2)
+        labels = [1 if x == "SUBEXPERT" else 0 for x in df_seq["class"]]
+        df_parts = sf.get_df_parts(df_seq=df_seq)
+        df_cat = sf.load_categories()
+        df_scales = sf.load_scales()
+        list_scales = list(df_scales)[0:2]
+        df_scales = df_scales[list_scales]
+        cpp = aa.CPP(df_parts=df_parts, df_scales=df_scales, df_cat=df_cat)
+        args = dict(verbose=False, labels=labels)
+        assert isinstance(cpp.run(**args), pd.DataFrame)
+        """
+        assert isinstance(cpp.run(parametric=True, **args), pd.DataFrame)
+        assert isinstance(cpp.run(n_filter=1000, **args), pd.DataFrame)
+        assert isinstance(cpp.run(n_pre_filter=1000, **args), pd.DataFrame)
+        assert isinstance(cpp.run(accept_gaps=True, **args), pd.DataFrame)
+        assert isinstance(cpp.run(pct_pre_filter=20, **args), pd.DataFrame)
+        """
+
+    # Negative unit tests
+    def test_corrupted_labels(self, cpp, corrupted_labels):
+        with pytest.raises(ValueError):
+            cpp.run(verbose=False, labels=corrupted_labels)
+
+    def test_wrong_n_filter(self, cpp, labels):
+        for n in ["a", -3, list(), np.NaN]:
+            with pytest.raises(ValueError):
+                cpp.run(verbose=False, labels=labels, n_filter=n)
+        # Should be non-negative int > 1 and not None
+        for n in [-1, 0, -100, 0.5, None]:
+            with pytest.raises(ValueError):
+                cpp.run(verbose=False, labels=labels, n_filter=n)
+
+    def test_wrong_n_pre_filter(self, cpp, labels):
+        for n in ["a", -3, list(), np.NaN]:
+            with pytest.raises(ValueError):
+                cpp.run(verbose=False, labels=labels, n_pre_filter=n)
+        # Should be non-negative int > 1 (None accepted)
+        for n in [-1, 0, -100, 0.5]:
+            with pytest.raises(ValueError):
+                cpp.run(verbose=False, labels=labels, n_pre_filter=n)
+
+    def test_wrong_pct_pre_filter(self, cpp, labels):
+        for n in ["a", -3, list(), np.NaN]:
+            with pytest.raises(ValueError):
+                cpp.run(verbose=False, labels=labels, pct_pre_filter=n)
+        # Should be non-negative int >= 5 and not None
+        for n in [-1, 0, -100, 0.5, 4, 3, None]:
+            with pytest.raises(ValueError):
+                cpp.run(verbose=False, labels=labels, pct_pre_filter=n)
+
+    def test_wrong_max_std(self, cpp, labels):
+        for n in ["a", -3, list(), np.NaN]:
+            with pytest.raises(ValueError):
+                cpp.run(verbose=False, labels=labels, max_std_test=n)
+        # Should be non-negative int >= 5 and not None
+        for n in [-1, 100, -100, 4, 3, None]:
+            with pytest.raises(ValueError):
+                cpp.run(verbose=False, labels=labels, max_std_test=n)
+
+"""
+class TestGetDfPos:
+    # Positive unit tests
+    def test_get_df_pos(self, df_feat, df_cat):
+        df_pos = _get_df_pos(df_feat=df_feat, df_cat=df_cat)
+        assert isinstance(df_pos, pd.DataFrame)
+        for i in ["count", "mean", "sum", "std"]:
+            assert isinstance(_get_df_pos(df_feat=df_feat, df_cat=df_cat, value_type=i), pd.DataFrame)
+
+    # Property based testing
+    @given(tmd_len=some.integers(min_value=15, max_value=100),
+           jmd_n_len=some.integers(min_value=5, max_value=20),
+           jmd_c_len=some.integers(min_value=5, max_value=20),
+           start=some.integers(min_value=0, max_value=50))
+    @settings(max_examples=10, deadline=None)
+    def test_get_df_pos_len(self, df_feat_module_scope, df_cat, tmd_len, jmd_n_len, jmd_c_len, start):
+        df_pos = _get_df_pos(df_feat=df_feat_module_scope, df_cat=df_cat,
+                             tmd_len=tmd_len, jmd_n_len=jmd_n_len, jmd_c_len=jmd_c_len,
+                             start=start)
+        assert isinstance(df_pos, pd.DataFrame)
+
+    # Negative unit tests
+    def test_wrong_value_type(self, df_feat, df_cat):
+        with pytest.raises(ValueError):
+            _get_df_pos(df_feat=df_feat, df_cat=df_cat, tmd_len=wrong_input)
+
+    def test_wrong_tmd_len(self, df_feat, df_cat, wrong_input):
+        with pytest.raises(ValueError):
+            _get_df_pos(df_feat=df_feat, df_cat=df_cat, tmd_len=wrong_input)
+
+    def test_wrong_jmd_len(self, df_feat, df_cat, wrong_input):
+        with pytest.raises(ValueError):
+            _get_df_pos(df_feat=df_feat, df_cat=df_cat, jmd_n_len=wrong_input)
+        with pytest.raises(ValueError):
+            _get_df_pos(df_feat=df_feat, df_cat=df_cat, jmd_c_len=wrong_input)
+
+    def test_wrong_start(self, df_feat, df_cat, wrong_input):
+        with pytest.raises(ValueError):
+            _get_df_pos(df_feat=df_feat, start=wrong_input)
+        with pytest.raises(ValueError):
+            _get_df_pos(df_feat=df_feat, df_cat=df_cat, start=-4)
+
+    def test_wrong_normalize(self, df_feat, df_cat, wrong_input):
+        with pytest.raises(ValueError):
+            _get_df_pos(df_feat=df_feat, df_cat=df_cat, normalize=wrong_input)
+
+    def test_wrong_value_type(self, df_feat, df_cat, wrong_input):
+        with pytest.raises(ValueError):
+            _get_df_pos(df_feat=df_feat, df_cat=df_cat, value_type=wrong_input)
+
+    def test_wrong_value_col(self, df_feat, df_cat, wrong_input):
+        with pytest.raises(ValueError):
+            _get_df_pos(df_feat=df_feat, df_cat=df_cat, val_col=wrong_input)
+
+    def test_wrong_y(self, df_feat, df_cat, wrong_input):
+        with pytest.raises(ValueError):
+            _get_df_pos(df_feat=df_feat, df_cat=df_cat, y=wrong_input)
+"""
+
+class TestPlotMethods:
+    """General test for plotting methods (using heatmap)"""
+
+    # Positive & Negative unit tests
+    def test_df_feat(self, df_feat, cpp):
+        """Positive unit Test main arguments: df_feat, y, val_col, value_type, normalize"""
+        assert isinstance(cpp.plot_heatmap(df_feat=df_feat), mpl.axes.Axes)
+        for y in ["category", "subcategory", "scale_name"]:
+            assert isinstance(cpp.plot_heatmap(df_feat=df_feat, y=y), mpl.axes.Axes)
+        for val_col in ["abs_auc", "abs_mean_dif", "mean_dif", "std_test", "p_val_fdr_bh"]:
+            assert isinstance(cpp.plot_heatmap(df_feat=df_feat, val_col=val_col), mpl.axes.Axes)
+        for val_type in ["sum", "mean", "std"]:
+            for normalize in [True, False, "positions"]:
+                assert isinstance(cpp.plot_heatmap(df_feat=df_feat, val_type=val_type, normalize=normalize),
+                                  mpl.axes.Axes)
+
+    def test_wrong_df_feat(self, df_feat, cpp):
+        for y in ["categorY", "sub__category", "Scale", "feature", 1, list, "abs_mean_dif"]:
+            with pytest.raises(ValueError):
+                cpp.plot_heatmap(df_feat=df_feat, y=y)
+        for val_col in ["subcategory", "Abs_mean_dif", "p_val", 1, list]:
+            with pytest.raises(ValueError):
+                cpp.plot_heatmap(df_feat=df_feat, val_col=val_col)
+        for val_type in ["SUM", "man", 1, 2]:
+            for normalize in ["positions", True]:
+                with pytest.raises(ValueError):
+                    cpp.plot_heatmap(df_feat=df_feat, val_type=val_type, normalize=normalize)
+
+    def test_plotting(self, df_feat, df_parts):
+        """Test main plotting arguments: figsize, title, title_kws"""
+        cpp = aa.CPP(df_parts=df_parts)
+        # Figsize and title checked by matplotlib
+        title_kws = {'fontsize': 11,
+                     'fontweight': "bold",
+                     'verticalalignment': 'baseline',
+                     'horizontalalignment': "center"}
+        assert isinstance(cpp.plot_heatmap(df_feat=df_feat, title="Test", title_kws=title_kws)
+                          , mpl.axes.Axes)
+
+    def test_figsize(self, df_feat, df_parts):
+        """Test figsize"""
+        cpp = aa.CPP(df_parts=df_parts)
+        args = dict(df_feat=df_feat, figsize=(10, 5))
+        assert isinstance(cpp.plot_heatmap(**args), mpl.axes.Axes)
+        assert isinstance(cpp.plot_bargraph(**args), mpl.axes.Axes)
+        assert isinstance(cpp.plot_profile(**args), mpl.axes.Axes)
+
+    def test_wrong_figsize(self, df_feat, df_parts):
+        """Test wrong figsize"""
+        cpp = aa.CPP(df_parts=df_parts)
+        for figsize in [(0, 10), "a", [1, 2], (10, "a")]:
+            args = dict(df_feat=df_feat, figsize=figsize)
+            with pytest.raises(ValueError):
+                cpp.plot_heatmap(**args)
+            with pytest.raises(ValueError):
+                cpp.plot_bargraph(**args)
+            with pytest.raises(ValueError):
+                cpp.plot_profile(**args)
+
+    def test_dict_color(self, df_feat, df_parts):
+        cpp = aa.CPP(df_parts=df_parts)
+        sf = aa.SequenceFeature()
+        dict_color = sf.load_colors()
+        assert isinstance(cpp.plot_heatmap(df_feat=df_feat, dict_color=dict_color)
+                          , mpl.axes.Axes)
+
+    def test_wrong_dict_color(self, df_feat, df_parts):
+        cpp = aa.CPP(df_parts=df_parts)
+        sf = aa.SequenceFeature()
+        dict_color = sf.load_colors()
+        for i in [1, dict(), "asdf", 0.1]:
+            with pytest.raises(ValueError):
+                cpp.plot_heatmap(df_feat=df_feat, dict_color=i)
+        dict_color["Composition"] = 1
+        with pytest.raises(ValueError):
+            cpp.plot_heatmap(df_feat=df_feat, dict_color=dict_color)
+        dict_color = {"Composition": "blue"}
+        with pytest.raises(ValueError):
+            cpp.plot_heatmap(df_feat=df_feat, dict_color=dict_color)
+
+    def test_sequences(self, df_feat, df_parts):
+        """Test sequence input: tmd_seq, jmd_n_seq, jmd_c_seq"""
+        # Length input tested in TestGetDfPos
+        jmd_c_seq = "AAAAAAAAAAa"
+        jmd_n_seq = "aa"*10
+        cpp = aa.CPP(df_parts=df_parts, jmd_n_len=len(jmd_n_seq), jmd_c_len=len(jmd_c_seq))
+        assert isinstance(cpp.plot_heatmap(df_feat=df_feat,
+                                           tmd_seq="AA"*16, jmd_c_seq=jmd_c_seq, jmd_n_seq=jmd_n_seq)
+                          , mpl.axes.Axes)
+
+    def test_wrong_sequences(self, df_feat, df_parts):
+        """Test sequence input: tmd_seq, jmd_n_seq, jmd_c_seq"""
+        # Length input tested in TestGetDfPos
+        cpp = aa.CPP(df_parts=df_parts)
+        wrong_seq = [1, None, list, dict]
+        tmd_seq = "A" * 20
+        jmd_c_seq = "B" * 10
+        jmd_n_seq = "C" * 10
+        for w in wrong_seq:
+            with pytest.raises(ValueError):
+                cpp.plot_heatmap(df_feat=df_feat, tmd_seq=w, jmd_c_seq=jmd_c_seq, jmd_n_seq=jmd_n_seq)
+            with pytest.raises(ValueError):
+                cpp.plot_heatmap(df_feat=df_feat, tmd_seq=tmd_seq, jmd_c_seq=w, jmd_n_seq=jmd_n_seq)
+            with pytest.raises(ValueError):
+                cpp.plot_heatmap(df_feat=df_feat, tmd_seq=tmd_seq, jmd_c_seq=jmd_c_seq, jmd_n_seq=w)
+        with pytest.raises(ValueError):
+            cpp.plot_heatmap(df_feat=df_feat, tmd_seq=tmd_seq, jmd_c_seq=jmd_c_seq)
+        with pytest.raises(ValueError):
+            cpp.plot_heatmap(df_feat=df_feat, tmd_seq=tmd_seq, jmd_n_seq=jmd_n_seq)
+        with pytest.raises(ValueError):
+            cpp.plot_heatmap(df_feat=df_feat, jmd_c_seq=jmd_c_seq, jmd_n_seq=jmd_n_seq)
+        jmd_c_seq = "AAAAAAAAAAa"
+        jmd_n_seq = "aa"*10
+        with pytest.raises(ValueError):
+            cpp.plot_heatmap(df_feat=df_feat, jmd_c_seq=jmd_c_seq, jmd_n_seq=jmd_n_seq)
+
+    def test_size(self, df_feat, df_parts):
+        """Test size input: seq_size, tmd_fontsize, jmd_fontsize"""
+        cpp = aa.CPP(df_parts=df_parts)
+        assert isinstance(cpp.plot_heatmap(df_feat=df_feat,
+                                           tmd_seq=11, jmd_fontsize=12, tmd_fontsize=11)
+                          , mpl.axes.Axes)
+        # Simple check function -> No negative test
+
+    def test_color(self, df_feat, df_parts):
+        """Test color input: tmd_color, jmd_color, tmd_seq_color, jmd_seq_color"""
+        cpp = aa.CPP(df_parts=df_parts)
+        args = dict(df_feat=df_feat, tmd_color="b")
+        assert isinstance(cpp.plot_heatmap(**args), mpl.axes.Axes)
+        assert isinstance(cpp.plot_heatmap(**args), mpl.axes.Axes)
+        args = dict(df_feat=df_feat, jmd_seq_color="b")
+        assert isinstance(cpp.plot_heatmap(**args), mpl.axes.Axes)
+        assert isinstance(cpp.plot_heatmap(**args), mpl.axes.Axes)
+        # Simple check function -> No negative test
+
+    def test_ticks(self, df_feat, df_parts):
+        """Test xtick input: xtick_size, xtick_width, xtick_length"""
+        cpp = aa.CPP(df_parts=df_parts)
+        assert isinstance(cpp.plot_heatmap(df_feat=df_feat, xtick_size=11, xtick_width=2,
+                                           xtick_length=5, ytick_size=11)
+                          , mpl.axes.Axes)
+        # Simple check function -> No negative test
+
+    def test_legend(self, df_feat, df_parts):
+        """Test legend args for heatmap and profile: add_legend_cat, legend_kws"""
+        cpp = aa.CPP(df_parts=df_parts)
+        assert isinstance(cpp.plot_heatmap(df_feat=df_feat, legend_kws=dict(fontsize=11))
+                          , mpl.axes.Axes)
+        # Simple check function -> No negative test
+
+
+class TestPlotHeatmap:
+    """Test additional interface of heatmap"""
+
+    # Positive and negative unit tests
+    def test_vmin_vmax(self, df_feat, df_parts):
+        cpp = aa.CPP(df_parts=df_parts)
+        for vmin, vmax in zip([-10, -5, 1, 2, 0.1], [0, 10, 2, 3, 0.2]):
+            args = dict(df_feat=df_feat, vmin=vmin, vmax=vmax)
+            assert isinstance(cpp.plot_heatmap(**args), mpl.axes.Axes)
+
+    def test_wrong_vmin_vmax(self, df_feat, cpp):
+        for vmin, vmax in zip([1, "a", -10, 2], [0, 1, -100, "2"]):
+            with pytest.raises(ValueError):
+                cpp.plot_heatmap(df_feat=df_feat, vmin=vmin, vmax=vmax)
+
+
+class TestPlotGraphProfile:
+    """Test additional interface of bargraph"""
+
+    # Positive and negative unit tests
+    def test_color(self, df_feat, df_parts):
+        cpp = aa.CPP(df_parts=df_parts)
+        args = dict(df_feat=df_feat, bar_color="r", edge_color="b")
+        assert isinstance(cpp.plot_bargraph(**args), mpl.axes.Axes)
+        args = dict(df_feat=df_feat, edge_color="b")
+        assert isinstance(cpp.plot_profile(**args), mpl.axes.Axes)
+
+    def test_wrong_color(self, df_feat, df_parts):
+        cpp = aa.CPP(df_parts=df_parts)
+        args = dict(df_feat=df_feat, bar_color="a", edge_color=1)
+        with pytest.raises(ValueError):
+            cpp.plot_bargraph(**args)
+        args = dict(df_feat=df_feat, edge_color=1)
+        with pytest.raises(ValueError):
+            cpp.plot_profile(**args)
+
+    def test_ylim(self, df_feat, df_parts):
+        cpp = aa.CPP(df_parts=df_parts)
+        args = dict(df_feat=df_feat, ylim=(0, 100))
+        assert isinstance(cpp.plot_bargraph(**args), mpl.axes.Axes)
+        assert isinstance(cpp.plot_profile(**args), mpl.axes.Axes)
+
+    def test_wrong_ylim(self, df_feat, df_parts):
+        cpp = aa.CPP(df_parts=df_parts)
+        for ylim in [1, "a", [1, 40], (0), (0, 2), (-10, "a")]:
+            args = dict(df_feat=df_feat, ylim=ylim)
+            with pytest.raises(ValueError):
+                cpp.plot_bargraph(**args)
+            with pytest.raises(ValueError):
+                cpp.plot_profile(**args)
+
+    def test_highlight_alpha(self, df_feat, df_parts):
+        cpp = aa.CPP(df_parts=df_parts)
+        args = dict(df_feat=df_feat, highlight_alpha=0.5)
+        assert isinstance(cpp.plot_bargraph(**args), mpl.axes.Axes)
+        assert isinstance(cpp.plot_profile(**args), mpl.axes.Axes)
+
+    def test_wrong_highlight_alpha(self, df_feat, df_parts):
+        cpp = aa.CPP(df_parts=df_parts)
+        for i in ["a", 10, list]:
+            args = dict(df_feat=df_feat, highlight_alpha=i)
+            with pytest.raises(ValueError):
+                cpp.plot_bargraph(**args)
+            with pytest.raises(ValueError):
+                cpp.plot_profile(**args)
+
+    def test_grid_axis(self, df_feat, df_parts):
+        cpp = aa.CPP(df_parts=df_parts)
+        for grid_axis in ["x", "y", "both"]:
+            args = dict(df_feat=df_feat, grid_axis=grid_axis)
+            assert isinstance(cpp.plot_bargraph(**args), mpl.axes.Axes)
+            assert isinstance(cpp.plot_profile(**args), mpl.axes.Axes)
+
+    def test_wrong_grid_axis(self, df_feat, df_parts):
+        cpp = aa.CPP(df_parts=df_parts)
+        for grid_axis in ["X", 1, None, list, "XY"]:
+            args = dict(df_feat=df_feat, grid_axis=grid_axis)
+            with pytest.raises(ValueError):
+                cpp.plot_bargraph(**args)
+            with pytest.raises(ValueError):
+                cpp.plot_profile(**args)
+
+
+class TestPlotStat:
+    """Test additional interface of stat plot"""
+
+    # Positive unit tests
+
+    # Negative unit tests
+
+
+# II Regression/Functional test
+def test_add_pipeline(df_feat):
+    # TODO check
+    sf = aa.SequenceFeature()
+    df_seq = sf.load_sequences(n_in_class=50)
+    labels = [1 if x == "SUBEXPERT" else 0 for x in df_seq["class"]]
+    df_parts = sf.get_df_parts(df_seq=df_seq)
+    cpp = aa.CPP(df_parts=df_parts)
+    df = df_feat.copy()
+    cols = [x for x in list(df) if "p_val" not in x]
+    df = cpp.add_scale_info(df_feat=df)
+    assert df_feat[cols].equals(df[cols])
+    df = cpp.add_stat(df_feat=df, labels=labels)
+    df = cpp._add_positions(df_feat=df)
+    assert df_feat[cols].equals(df[cols])
+    df = cpp.add_scale_info(df_feat=df)
+    assert df_feat[cols].equals(df[cols])
+    df = cpp._add_positions(df_feat=df)
+    df = cpp.add_stat(df_feat=df, labels=labels)
+    assert df_feat[cols].equals(df[cols])
+    df = cpp._add_positions(df_feat=df)
+    assert df_feat[cols].equals(df[cols])
+
+
+def test_cpp_pipeline():
+    pass
+
+
+def test_cpp_with_shap():
+    pass
diff --git a/tests/unit/test_cpp_feature.py b/tests/unit/test_cpp_feature.py
new file mode 100644
index 00000000..9bb77b8e
--- /dev/null
+++ b/tests/unit/test_cpp_feature.py
@@ -0,0 +1,420 @@
+"""
+This is a script testing methods of SequenceFeature object
+"""
+import pandas as pd
+import numpy as np
+import pytest
+from hypothesis import given, settings
+import hypothesis.strategies as some
+
+import aaanalysis as aa
+
+
+# I Unit Tests
+class TestLoadScales:
+    """Unit test for loading scales"""
+
+    # Positive unit test
+    def test_load_data(self):
+        """Unit test for aa.SequenceFeature().load_scales() method"""
+        sf = aa.SequenceFeature()
+        assert isinstance(sf.load_scales(clust_th=0.5), pd.DataFrame)
+
+    # Negative test
+    def test_wrong_clustered_values(self):
+        sf = aa.SequenceFeature()
+        for i in [0.1, -0.2, "a", None]:
+            with pytest.raises(ValueError):
+                sf.load_scales(clust_th=i)
+
+    # Property-based testing
+    @given(clustered=some.floats(min_value=-10, max_value=10))
+    def test_clustered_integer(self, clustered):
+        sf = aa.SequenceFeature()
+        if clustered not in [0.5, 0.7]:
+            with pytest.raises(ValueError):
+                sf.load_scales(clust_th=clustered)
+
+
+class TestLoadCategories:
+    """Unit test for loading DataFrame with sequence categories"""
+
+    # Positive unit test
+    def test_load_categories(self):
+        sf = aa.SequenceFeature()
+        assert isinstance(aa.load_scales(clust_th=0.5), pd.DataFrame)
+
+    # Negative test
+    def test_wrong_clustered_values(self):
+        sf = aa.SequenceFeature()
+        for i in [0.1, -0.2, "a", None]:
+            with pytest.raises(ValueError):
+                aa.load_scales(clust_th=i)
+
+    # Property-based testing
+    @given(clustered=some.floats(min_value=-10, max_value=10))
+    def test_clustered_integer(self, clustered):
+        sf = aa.SequenceFeature()
+        if clustered not in [0.5, 0.7]:
+            with pytest.raises(ValueError):
+                aa.load_scales(clust_th=clustered)
+
+
+class TestGetDfParts:
+    """Unit test for loading DataFrame with sequence parts"""
+
+    # Positive unit test
+    def test_getting_df_parts_based_on_parts(self, df_seq):
+        sf = aa.SequenceFeature()
+        assert isinstance(sf.get_df_parts(df_seq=df_seq), pd.DataFrame)
+        df = df_seq.drop(["sequence"], axis=1)
+        assert isinstance(sf.get_df_parts(df_seq=df, list_parts=["tmd"]), pd.DataFrame)
+
+    def test_getting_df_parts_based_on_seq_info(self, df_seq):
+        sf = aa.SequenceFeature()
+        assert isinstance(sf.get_df_parts(df_seq=df_seq, jmd_n_len=10, jmd_c_len=10), pd.DataFrame)
+        df = df_seq.drop(["tmd"], axis=1)
+        assert isinstance(sf.get_df_parts(df_seq=df, jmd_n_len=10, jmd_c_len=10), pd.DataFrame)
+
+    def test_getting_df_parts_based_on_sequence(self, df_seq):
+        sf = aa.SequenceFeature()
+        assert isinstance(sf.get_df_parts(df_seq=df_seq, jmd_n_len=10, jmd_c_len=10), pd.DataFrame)
+        df = df_seq.drop(["tmd", "tmd_start", "tmd_stop", "jmd_c"], axis=1)
+        assert isinstance(sf.get_df_parts(df_seq=df, jmd_n_len=10, jmd_c_len=10), pd.DataFrame)
+        assert isinstance(sf.get_df_parts(df_seq=df, jmd_n_len=0, jmd_c_len=0, ext_len=0), pd.DataFrame)
+
+    # Negative unit tests
+    def test_wrong_inputs(self, df_seq, df_cat, df_scales):
+        sf = aa.SequenceFeature()
+        for i in [None, "a", df_cat, df_scales, 1.1, -1]:
+            with pytest.raises(ValueError):
+                sf.get_df_parts(df_seq=i)
+            with pytest.raises(ValueError):
+                sf.get_df_parts(df_seq=df_seq, ext_len=i)
+        for i in ["a", df_cat, df_scales, 1.1, -1]:
+            with pytest.raises(ValueError):
+                sf.get_df_parts(df_seq=df_seq, jmd_n_len=i, jmd_c_len=i)
+            with pytest.raises(ValueError):
+                sf.get_df_parts(df_seq=df_seq, jmd_n_len=i, jmd_c_len=10)
+            with pytest.raises(ValueError):
+                sf.get_df_parts(df_seq=df_seq, jmd_n_len=10, jmd_c_len=i)
+
+    def test_corrupted_df_seq(self, corrupted_df_seq):
+        sf = aa.SequenceFeature()
+        with pytest.raises(ValueError):
+            sf.get_df_parts(df_seq=corrupted_df_seq)    # Via parametrized fixtures
+
+    def test_wrong_parameter_combinations(self, df_seq, df_scales):
+        sf = aa.SequenceFeature()
+        df = df_seq.drop(["sequence"], axis=1)
+        with pytest.raises(ValueError):
+            sf.get_df_parts(df_seq=df, jmd_n_len=10, jmd_c_len=10)
+        df = df_seq.drop(["tmd"], axis=1)
+        with pytest.raises(ValueError):
+            sf.get_df_parts(df_seq=df)
+
+
+class TestGetSplitKws:
+    """Unit tests for getting split arguments"""
+
+    # Positive unit test
+    def test_get_split_kws(self, df_cat):
+        sf = aa.SequenceFeature()
+        for i in ["Segment", "Pattern", "PeriodicPattern"]:
+            assert isinstance(sf.get_split_kws(n_split_min=2, steps_pattern=[1, 3, 4], split_types=i), dict)
+
+    # Negative unit tests
+    def test_wrong_integer_input(self, df_cat):
+        sf = aa.SequenceFeature()
+        list_int_args = ["n_split_min", "n_split_max", "n_min", "n_max", "len_max"]
+        for i in ["a", 1.1, -1, df_cat, dict, None]:
+            for arg_names in list_int_args:
+                arg = {arg_names: i}
+                with pytest.raises(ValueError):
+                    sf.get_split_kws(**arg)
+
+    def test_wrong_ordered_list_input(self, df_cat):
+        sf = aa.SequenceFeature()
+        list_args = [[1, None, df_cat], [2, 1], [-1, 9], [0.1, 0.2], ["a", 4]]
+        for list_arg in list_args:
+            with pytest.raises(ValueError):
+                sf.get_split_kws(steps_pattern=list_arg)
+            with pytest.raises(ValueError):
+                sf.get_split_kws(steps_periodicpattern=list_arg)
+
+    def test_wrong_combination_of_input(self, df_cat):
+        sf = aa.SequenceFeature()
+        with pytest.raises(ValueError):
+            sf.get_split_kws(n_split_max=4, n_split_min=6)
+        with pytest.raises(ValueError):
+            sf.get_split_kws(n_max=4, n_min=6)
+        with pytest.raises(ValueError):
+            sf.get_split_kws(n_max=3, n_min=6, len_max=1)
+
+
+class TestFeatures:
+    """Unit test for creating feature ids"""
+
+    # Positive unit test
+    def test_features(self, df_scales, list_parts):
+        sf = aa.SequenceFeature()
+        split_kws = sf.get_split_kws()
+        assert isinstance(sf.get_features(), list)
+        for parts in list_parts:
+            assert isinstance(sf.get_features(list_parts=parts), list)
+            for split_type in split_kws:
+                args = dict(list_parts=parts, df_scales=df_scales, split_kws={split_type: split_kws[split_type]})
+                assert isinstance(sf.get_features(**args), list)
+
+    # Negative unit tests
+    def test_wrong_input(self, df_cat, df_seq):
+        sf = aa.SequenceFeature()
+        for wrong_input in [1, -1, "TMD", ["TMD"], [1, 2], ["aa", "a"], [["tmd", "tmd_e"]], df_cat, [df_cat, df_seq]]:
+            with pytest.raises(ValueError):
+                sf.get_features(list_parts=wrong_input)
+            with pytest.raises(ValueError):
+                sf.get_features(list_parts=["tmd"], df_scales=wrong_input)
+            with pytest.raises(ValueError):
+                sf.get_features(list_parts=["tmd"], split_kws=wrong_input)
+
+    def test_corrupted_list_parts(self, corrupted_list_parts):
+        sf = aa.SequenceFeature()
+        with pytest.raises(ValueError):
+            sf.get_features(list_parts=corrupted_list_parts)  # Via parametrized fixtures
+
+    def test_corrupted_df_scales(self, corrupted_df_scales):
+        sf = aa.SequenceFeature()
+        with pytest.raises(ValueError):
+            sf.get_features(list_parts=["tmd"], df_scales=corrupted_df_scales)    # Via parametrized fixtures
+
+    def test_corrupted_split_kws(self, corrupted_split_kws):
+        sf = aa.SequenceFeature()
+        with pytest.raises(ValueError):
+            sf.get_features(list_parts=["tmd"], split_kws=corrupted_split_kws)    # Via parametrized fixtures
+
+
+class TestFeatureName:
+    """Unit tests for getting feature names"""
+
+    # Positive unit test
+    def test_feat_name(self, df_feat, df_cat):
+        sf = aa.SequenceFeature()
+        assert isinstance(sf.feat_names(features=df_feat["feature"]), list)
+        assert isinstance(sf.feat_names(features=list(df_feat["feature"])), list)
+        assert isinstance(sf.feat_names(features=list(df_feat["feature"])[0]), list)
+        assert isinstance(sf.feat_names(features=df_feat["feature"], df_cat=df_cat), list)
+
+    # Property based testing
+    @given(tmd_len=some.integers(min_value=15, max_value=100),
+           jmd_n_len=some.integers(min_value=5, max_value=20),
+           jmd_c_len=some.integers(min_value=5, max_value=20),
+           ext_len=some.integers(min_value=1, max_value=4),
+           start=some.integers(min_value=0, max_value=50))
+    @settings(max_examples=10, deadline=None)
+    def test_feat_name_tmd_len(self, df_feat_module_scope, tmd_len, jmd_n_len, jmd_c_len, ext_len, start):
+        sf = aa.SequenceFeature()
+        feat_names = sf.feat_names(features=df_feat_module_scope["feature"],
+                                   tmd_len=tmd_len, jmd_n_len=jmd_n_len, jmd_c_len=jmd_c_len,
+                                   ext_len=ext_len, start=start)
+        assert isinstance(feat_names, list)
+
+    # Negative unit test
+    def test_wrong_features(self, wrong_df):
+        sf = aa.SequenceFeature()
+        with pytest.raises(ValueError):
+            feat_names = sf.feat_names(features=wrong_df)
+
+    def test_corrupted_feature(self, df_feat):
+        sf = aa.SequenceFeature()
+        for col in df_feat:
+            if col != "feature":
+                with pytest.raises(ValueError):
+                    feat_names = sf.feat_names(features=df_feat[col])
+        wrong_feat = list(df_feat["feature"])[0]
+        wrong_feat = "WRONG" + "-" + wrong_feat.split("-")[1] + "-" +wrong_feat.split("-")[2]
+        with pytest.raises(ValueError):
+            feat_names = sf.feat_names(features=wrong_feat)
+
+    def test_wrong_df_cat(self, df_feat, wrong_df):
+        sf = aa.SequenceFeature()
+        with pytest.raises(ValueError):
+            feat_names = sf.feat_names(features=df_feat["feature"], df_cat=wrong_df)
+
+    def test_corrupted_df_cat(self, df_cat, df_feat):
+        sf = aa.SequenceFeature()
+        df_cat = df_cat[list(df_cat)[0:1]]
+        with pytest.raises(ValueError):
+            feat_names = sf.feat_names(features=df_feat["feature"], df_cat=df_cat)
+
+
+class TestFeatureValue:
+    """Unit tests for getting feature values"""
+
+    # Positive unit test
+    def test_feature_value(self, df_seq, df_scales, list_parts, list_splits):
+        sf = aa.SequenceFeature()
+        df_parts = sf.get_df_parts(df_seq=df_seq, all_parts=True)
+        for parts in list_parts:
+            for split in list_splits:
+                for i in range(0, len(df_scales)):
+                    dict_scale = df_scales.iloc[:, i].to_dict()
+                    x = sf.add_feat_value(split=split, dict_scale=dict_scale, df_parts=df_parts[parts])
+                    assert isinstance(x, np.ndarray)
+
+    def test_accept_gaps(self, df_seq, list_parts, list_splits, df_scales):
+        sf = aa.SequenceFeature()
+        df_parts = sf.get_df_parts(df_seq=df_seq, all_parts=True)
+        parts, split, dict_scale = list_parts[0], list_splits[0], df_scales.iloc[:, 0].to_dict()
+        df = df_parts.copy()
+        args = dict(split=split, dict_scale=dict_scale)
+        df[parts] = "AAA-CCC"
+        assert isinstance(sf.add_feat_value(**args, df_parts=df[parts], accept_gaps=True), np.ndarray)
+        with pytest.raises(ValueError):
+            sf.add_feat_value(**args, df_parts=df[parts], accept_gaps=False)
+        df[parts] = "------"
+        with pytest.raises(ValueError):
+            sf.add_feat_value(**args, df_parts=df[parts], accept_gaps=True)
+        args = dict(split=split, df_parts=df_parts[parts])
+        dict_scale_na = dict_scale.copy()
+        dict_scale_na["A"] = np.NaN
+        assert isinstance(sf.add_feat_value(**args, dict_scale=dict_scale_na, accept_gaps=True), np.ndarray)
+        with pytest.raises(ValueError):
+            sf.add_feat_value(**args, dict_scale=dict_scale_na, accept_gaps=False)
+
+    # Negative test
+    def test_wrong_input(self, df_cat, df_seq, list_parts, list_splits, df_scales):
+        sf = aa.SequenceFeature()
+        df_parts = sf.get_df_parts(df_seq=df_seq, all_parts=True)
+        parts, split, dict_scale = list_parts[0], list_splits[0], df_scales.iloc[:, 0].to_dict()
+        list_wrong_input = [1, -1, "TMD", ["TMD"], None, [1, 2], ["aa", "a"], [["tmd", "tmd_e"]],
+                            df_cat, [df_cat, df_seq], dict(a=1)]
+        for wrong_input in list_wrong_input:
+            with pytest.raises(ValueError):
+                sf.add_feat_value(split=wrong_input, dict_scale=dict_scale, df_parts=df_parts[parts])
+            with pytest.raises(ValueError):
+                sf.add_feat_value(split=split, dict_scale=wrong_input, df_parts=df_parts[parts])
+            with pytest.raises(ValueError):
+                sf.add_feat_value(split=split, dict_scale=dict_scale, df_parts=wrong_input)
+
+    def test_corrupted_split(self, df_seq, list_parts, df_scales, corrupted_list_splits):
+        sf = aa.SequenceFeature()
+        df_parts = sf.get_df_parts(df_seq=df_seq, all_parts=True)
+        parts, dict_scale = list_parts[0], df_scales.iloc[:, 0].to_dict()
+        with pytest.raises(ValueError):
+            # Via parametrized fixtures
+            sf.add_feat_value(split=corrupted_list_splits, dict_scale=dict_scale, df_parts=df_parts[parts])
+
+    def test_corrupted_dict_scale(self, df_seq, list_parts, list_splits, df_scales):
+        sf = aa.SequenceFeature()
+        df_parts = sf.get_df_parts(df_seq=df_seq, all_parts=True)
+        parts, split, dict_scale = list_parts[0], list_splits[0], df_scales.iloc[:, 0].to_dict()
+        dict_scale1 = dict_scale.copy()
+        dict_scale1["A"] = "A"
+        dict_scale2 = dict_scale.copy()
+        dict_scale2.pop("A")
+        dict_scale3 = dict_scale.copy()
+        dict_scale3["A"] = dict
+        wrong_dict_scales = [dict(A=1, B=np.NaN), dict(a=0), dict_scale1, dict_scale2, dict_scale3, dict_scale3]
+        for d in wrong_dict_scales:
+            with pytest.raises(ValueError):
+                sf.add_feat_value(split=split, dict_scale=d, df_parts=df_parts[parts])
+
+    def test_corrupted_df_parts(self, list_splits, df_scales, corrupted_df_parts):
+        sf = aa.SequenceFeature()
+        split, dict_scale = list_splits[0], df_scales.iloc[:, 0].to_dict()
+        with pytest.raises(ValueError):
+            # Via parametrized fixtures
+            sf.add_feat_value(split=split, dict_scale=dict_scale, df_parts=corrupted_df_parts)
+
+
+class TestFeatureMatrix:
+    """Unit tests for getting feature matrix"""
+
+    # Positive unit test
+    def test_feature_matrix(self, df_seq, df_scales):
+        sf = aa.SequenceFeature()
+        df_parts = sf.get_df_parts(df_seq=df_seq, all_parts=True)
+        features = sf.get_features()[0:100]
+        feat_matrix = sf.feat_matrix(df_parts=df_parts, df_scales=df_scales, features=features)
+        assert isinstance(feat_matrix, np.ndarray)
+        assert feat_matrix.shape == (len(df_seq), len(features))
+        feat_matrix = sf.feat_matrix(df_parts=df_parts, features=features)
+        assert isinstance(feat_matrix, np.ndarray)
+
+    # Negative test
+    def test_missing_parameters(self, df_scales, df_seq):
+        sf = aa.SequenceFeature()
+        df_parts = sf.get_df_parts(df_seq=df_seq, all_parts=True)
+        features = sf.get_features()[0:100]
+        with pytest.raises(ValueError):
+            sf.feat_matrix(df_parts=df_parts)
+        with pytest.raises(ValueError):
+            sf.feat_matrix(features=features)
+        with pytest.raises(ValueError):
+            sf.feat_matrix(df_scales=df_scales)
+        with pytest.raises(ValueError):
+            sf.feat_matrix(df_parts=df_parts, df_scales=df_scales)
+        with pytest.raises(ValueError):
+            sf.feat_matrix(df_scales=df_scales, features=features)
+
+    def test_wrong_input(self, df_cat, df_seq, df_scales):
+        sf = aa.SequenceFeature()
+        df_parts = sf.get_df_parts(df_seq=df_seq, all_parts=True)
+        features = sf.get_features()[0:100]
+        list_wrong_input = [1, -1, "TMD", ["TMD"], None, [1, 2], ["aa", "a"],
+                            [["tmd", "tmd_e"]], df_cat, [df_cat, df_seq], dict(a=1)]
+        for wrong_input in list_wrong_input:
+            with pytest.raises(ValueError):
+                sf.feat_matrix(df_parts=df_parts, df_scales=df_scales, features=wrong_input)
+            if wrong_input is not None:
+                with pytest.raises(ValueError):
+                    sf.feat_matrix(df_parts=df_parts, df_scales=wrong_input, features=features)
+            with pytest.raises(ValueError):
+                sf.feat_matrix(df_parts=wrong_input, df_scales=df_scales, features=features)
+
+    def test_corrupted_df_parts(self, corrupted_df_parts, df_scales):
+        sf = aa.SequenceFeature()
+        features = sf.get_features()[0:100]
+        with pytest.raises(ValueError):
+            # Via parametrized fixtures
+            sf.feat_matrix(df_parts=corrupted_df_parts, df_scales=df_scales, features=features)
+
+    def test_corrupted_df_scales(self, corrupted_df_scales, df_seq):
+        sf = aa.SequenceFeature()
+        df_parts = sf.get_df_parts(df_seq=df_seq, all_parts=True)
+        features = sf.get_features()[0:100]
+        with pytest.raises(ValueError):
+            # Via parametrized fixtures
+            sf.feat_matrix(df_parts=df_parts, df_scales=corrupted_df_scales, features=features)
+
+    def test_corrupted_features(self, df_scales, df_seq):
+        sf = aa.SequenceFeature()
+        df_parts = sf.get_df_parts(df_seq=df_seq, all_parts=True)
+        features = sf.get_features()[0:100]
+        corrupted_features = [features[0:5] + [np.NaN], features[0:3] + ["Test"],
+                              "a",
+                              [[features[0:4]]],
+                              [x.upper() for x in features[0:5]],
+                              [x[0:5] for x in features[0:5]],
+                              ["a".join(x.split("-")) for x in features[0:6]]]
+        for features in corrupted_features:
+            with pytest.raises(ValueError):
+                sf.feat_matrix(df_parts=df_parts, df_scales=df_scales, features=features)
+
+
+# II Regression test (Functional test)
+def test_sequence_feature(list_splits):
+    """Positive regression/functional test of all aa.SequenceFeature() methods"""
+    sf = aa.SequenceFeature()
+    # Get test set of sequences
+    df_seq = sf.load_sequences()
+    # Get feature components
+    df_parts = sf.get_df_parts(df_seq=df_seq, all_parts=False)
+    df_scales = sf.load_scales()
+    split_kws = sf.get_split_kws()
+    # Get features (names, values, matrix)
+    features = sf.get_features()[0:100]
+    feat_matrix = sf.feat_matrix(df_parts=df_parts, df_scales=df_scales, features=features)
+    assert isinstance(feat_matrix, np.ndarray)
+    assert feat_matrix.shape == (len(df_seq), len(features))
+
diff --git a/tests/unit/test_data_loader.py b/tests/unit/test_data_loader.py
new file mode 100644
index 00000000..ca838b9a
--- /dev/null
+++ b/tests/unit/test_data_loader.py
@@ -0,0 +1,135 @@
+"""
+This is a script for ...
+"""
+from hypothesis import given, settings, example
+import hypothesis.strategies as some
+import aaanalysis.utils as ut
+import aaanalysis as aa
+from pandas import DataFrame
+import pytest
+
+
+class TestLoadDataset:
+    """Test load_dataset function"""
+
+    # Property-based testing for positive cases
+    def test_df_seq_output_columns(self):
+        """"""
+        all_data_set_names = aa.load_dataset()["Dataset"].to_list()
+        for name in all_data_set_names:
+            df = aa.load_dataset(name=name)
+            assert set(ut.COLS_SEQ_KEY).issubset(set(df))
+
+    @given(n=some.integers(min_value=1, max_value=100))
+    def test_load_dataset_n_value(self, n):
+        """Test the 'n' parameter for limiting rows."""
+        max_n = aa.load_dataset(name="SEQ_LOCATION")["label"].value_counts().min()
+        if max_n > n:
+            df = aa.load_dataset(name="SEQ_LOCATION", n=n)
+            assert len(df) == (n * 2)
+
+    @given(min_len=some.integers(min_value=400, max_value=1000))
+    def test_load_dataset_min_len(self, min_len):
+        """Test the 'min_len' parameter for filtering sequences."""
+        df = aa.load_dataset(name="SEQ_LOCATION", min_len=min_len)
+        assert all(len(seq) >= min_len for seq in df[ut.COL_SEQ])
+
+    @given(max_len=some.integers(min_value=50, max_value=100))
+    def test_load_dataset_max_len(self, max_len):
+        """Test the 'max_len' parameter for filtering sequences."""
+        df = aa.load_dataset(name="SEQ_LOCATION", max_len=max_len)
+        assert all(len(seq) <= max_len for seq in df[ut.COL_SEQ])
+
+    # Property-based testing for negative cases
+    @given(n=some.integers(max_value=0))
+    def test_load_dataset_invalid_n(self, n):
+        """Test with an invalid 'n' value."""
+        with pytest.raises(ValueError):
+            aa.load_dataset(name="SEQ_LOCATION", n=n)
+
+    @given(min_len=some.integers(max_value=0))
+    def test_load_dataset_invalid_min_len(self, min_len):
+        """Test with an invalid 'min_len' value."""
+        with pytest.raises(ValueError):
+            aa.load_dataset(name="SEQ_LOCATION", min_len=min_len)
+        with pytest.raises(ValueError):
+            aa.load_dataset(name="SEQ_AMYLO", min_len=10)
+
+    @given(max_len=some.integers(max_value=0))
+    def test_load_dataset_invalid_max_len(self, max_len):
+        """Test with an invalid 'max_len' value."""
+        with pytest.raises(ValueError):
+            aa.load_dataset(name="SEQ_LOCATION", max_len=max_len)
+
+    # Additional Negative Tests
+    @given(n=some.integers(min_value=1000, max_value=1050))
+    def test_load_dataset_n_value_too_high(self, n):
+        """Test the 'n' parameter for limiting rows."""
+        max_n = aa.load_dataset(name="SEQ_LOCATION")["label"].value_counts().min()
+        if max_n < n:
+            with pytest.raises(ValueError):
+                df = aa.load_dataset(name="SEQ_LOCATION", n=n)
+
+    @given(negative_n=some.integers(min_value=-100, max_value=-1))
+    def test_load_dataset_negative_n(self, negative_n):
+        """Test with a negative 'n' value."""
+        with pytest.raises(ValueError):
+            aa.load_dataset(name="SEQ_LOCATION", n=negative_n)
+
+    @given(non_canonical_aa=some.text())
+    @example(non_canonical_aa="invalid_option")
+    def test_load_dataset_invalid_non_canonical_aa(self, non_canonical_aa):
+        """Test with an invalid 'non_canonical_aa' value."""
+        if non_canonical_aa not in ["remove", "keep", "gap"]:
+            with pytest.raises(ValueError):
+                aa.load_dataset(name="SEQ_LOCATION", non_canonical_aa=non_canonical_aa)
+
+
+class TestLoadDatasetComplex:
+    """Test load_dataset function with complex scenarios"""
+
+    def test_load_dataset_n_and_min_len(self):
+        """Test the 'n' and 'min_len' parameters together."""
+        df = aa.load_dataset(name="SEQ_LOCATION", n=10, min_len=5)
+        assert len(df) == 10 * 2
+        assert all(len(seq) >= 5 for seq in df[ut.COL_SEQ])
+
+    def test_load_dataset_n_and_max_len(self):
+        """Test the 'n' and 'max_len' parameters together."""
+        df = aa.load_dataset(name="SEQ_LOCATION", n=10, max_len=200)
+        assert len(df) == 10 * 2
+        assert all(len(seq) <= 200 for seq in df[ut.COL_SEQ])
+
+    def test_load_dataset_min_max_len(self):
+        """Test both 'min_len' and 'max_len' together."""
+        df = aa.load_dataset(name="SEQ_LOCATION", min_len=5, max_len=200)
+        assert all(5 <= len(seq) <= 200 for seq in df[ut.COL_SEQ])
+
+    def test_load_dataset_min_max_len_and_n(self):
+        """Test 'min_len', 'max_len', and 'n' together."""
+        df = aa.load_dataset(name="SEQ_LOCATION", min_len=5, max_len=200, n=10)
+        assert len(df) == 10 * 2
+        assert all(5 <= len(seq) <= 200 for seq in df[ut.COL_SEQ])
+
+    def test_load_dataset_all_filters(self):
+        """Test all filters together ('n', 'min_len', 'max_len', 'non_canonical_aa')."""
+        df = aa.load_dataset(name="SEQ_LOCATION", n=10, min_len=5, max_len=200, non_canonical_aa="remove")
+        assert len(df) == 10 * 2
+        assert all(5 <= len(seq) <= 200 for seq in df[ut.COL_SEQ])
+        # Add your assertion to check if non-canonical amino acids are removed
+
+    def test_load_dataset_invalid_min_max_len(self):
+        """Test with 'min_len' greater than 'max_len'."""
+        with pytest.raises(ValueError):
+            aa.load_dataset(name="SEQ_LOCATION", min_len=10, max_len=5)
+
+    def test_load_dataset_invalid_min_max_len_and_n(self):
+        """Test with 'min_len' greater than 'max_len' and a valid 'n'."""
+        with pytest.raises(ValueError):
+            aa.load_dataset(name="SEQ_LOCATION", min_len=10, max_len=5, n=10)
+
+    def test_load_dataset_invalid_all_filters(self):
+        """Test with all invalid filters ('n', 'min_len', 'max_len', 'non_canonical_aa')."""
+        with pytest.raises(ValueError):
+            aa.load_dataset(name="SEQ_LOCATION", n=-1, min_len=10, max_len=5, non_canonical_aa="invalid_option")
+
diff --git a/tests/unit/test_dpulearn.py b/tests/unit/test_dpulearn.py
new file mode 100644
index 00000000..2463d63c
--- /dev/null
+++ b/tests/unit/test_dpulearn.py
@@ -0,0 +1,31 @@
+"""
+This is a script for ...
+"""
+import time
+import pandas as pd
+import numpy as np
+
+
+# Settings
+pd.set_option('expand_frame_repr', False)  # Single line print for pd.Dataframe
+
+
+# I Helper Functions
+
+
+# II Main Functions
+
+
+# III Test/Caller Functions
+
+
+# IV Main
+def main():
+    t0 = time.time()
+
+    t1 = time.time()
+    print("Time:", t1 - t0)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tests/unit/test_plotting.py b/tests/unit/test_plotting.py
new file mode 100644
index 00000000..2463d63c
--- /dev/null
+++ b/tests/unit/test_plotting.py
@@ -0,0 +1,31 @@
+"""
+This is a script for ...
+"""
+import time
+import pandas as pd
+import numpy as np
+
+
+# Settings
+pd.set_option('expand_frame_repr', False)  # Single line print for pd.Dataframe
+
+
+# I Helper Functions
+
+
+# II Main Functions
+
+
+# III Test/Caller Functions
+
+
+# IV Main
+def main():
+    t0 = time.time()
+
+    t1 = time.time()
+    print("Time:", t1 - t0)
+
+
+if __name__ == "__main__":
+    main()