diff --git a/aaanalysis/data_loader/__pycache__/data_loader.cpython-39.pyc b/aaanalysis/data_loader/__pycache__/data_loader.cpython-39.pyc
index 9afbe317..77532fa5 100644
Binary files a/aaanalysis/data_loader/__pycache__/data_loader.cpython-39.pyc and b/aaanalysis/data_loader/__pycache__/data_loader.cpython-39.pyc differ
diff --git a/aaanalysis/data_loader/data_loader.py b/aaanalysis/data_loader/data_loader.py
index 0bfee94f..3cd41dd2 100644
--- a/aaanalysis/data_loader/data_loader.py
+++ b/aaanalysis/data_loader/data_loader.py
@@ -186,7 +186,7 @@ def load_dataset(name: str = "INFO",
 
     See Also
     --------
-    * Overview of all benchmarks in :ref:`1_overview_benchmarks`.
+    * Overview of all benchmarks in :ref:`t1_overview_benchmarks`.
     * Step-by-step guide in the `data loading tutorial <tutorial2_data_loader.html>`_.
 
     Examples
diff --git a/docs/build/doctrees/environment.pickle b/docs/build/doctrees/environment.pickle
index 4ff85bb4..b67e3ce5 100644
Binary files a/docs/build/doctrees/environment.pickle and b/docs/build/doctrees/environment.pickle differ
diff --git a/docs/build/doctrees/generated/aaanalysis.load_dataset.doctree b/docs/build/doctrees/generated/aaanalysis.load_dataset.doctree
index 16bcf26c..e0caca53 100644
Binary files a/docs/build/doctrees/generated/aaanalysis.load_dataset.doctree and b/docs/build/doctrees/generated/aaanalysis.load_dataset.doctree differ
diff --git a/docs/build/doctrees/index/tables.doctree b/docs/build/doctrees/index/tables.doctree
index a183fda9..e1e172ec 100644
Binary files a/docs/build/doctrees/index/tables.doctree and b/docs/build/doctrees/index/tables.doctree differ
diff --git a/docs/build/html/_sources/index/tables.rst.txt b/docs/build/html/_sources/index/tables.rst.txt
index 89b21fb5..2d37e8bb 100644
--- a/docs/build/html/_sources/index/tables.rst.txt
+++ b/docs/build/html/_sources/index/tables.rst.txt
@@ -5,28 +5,35 @@
    for tables.rst, which is automatically generated based on the information here and
    in the .csv tables from the /tables directory.
 
-   To add a new table:
-   1. Save it as a .csv file in the /tables directory.
-   2. Add an entry for it in the "Overview Table" section below.
-   3. Add a new section describing it, including each column and any important data types (e.g., categories).
+   Instructions for Adding a New Table:
+   1. Store the table as a .csv file in the index/tables directory. Name it using the format tX,
+      where X is incremented based on the last entry's number.
+   2. Update the t0_mapper.xlsx with a corresponding entry for the new table.
+   3. Create a new descriptive section here that elucidates the table's columns and any
+      essential data types, such as categories.
 
    Note: Each table should include a 'Reference' column.
 
-   Ignore the warning: 'tables_template.rst: WARNING: document isn't included in any toctree.'
+   # Key Annotations for Automated Table Generation via create_tables_doc.py:
+   _XXX: A string to be stripped from the references. This prevents redundancies that may result
+         in broken links.
+   ADD-TABLE: Placeholder indicating where tables for the corresponding section should be inserted.
 ..
 
 Tables
-======================
+======
 
 .. contents::
     :local:
     :depth: 1
 
+.. _t0_mapper:
+
 Overview Table
 --------------
 All tables from the AAanalysis documentation are listed here, in chronological order based on the project history.
 
-.. _0_mapper:
+ADD-TABLE
 
 .. list-table::
    :header-rows: 1
@@ -35,14 +42,16 @@ All tables from the AAanalysis documentation are listed here, in chronological o
    * - Table
      - Description
      - See Also
-   * - 1_overview_benchmarks
+   * - t1_overview_benchmarks
      - Protein benchmark datasets
      - aa.load_dataset
-   * - 2_overview_scales
+   * - t2_overview_scales
      - Amino acid scale datasets
      - aa.load_scales
 
 
+.. _t1_overview_benchmarks:
+
 Protein Benchmark Datasets
 --------------------------
 Three types of benchmark datasets are provided:
@@ -55,7 +64,7 @@ Datasets are named beginning with a classification (e.g., 'AA_LDR', 'DOM_GSEC',
 Some datasets have an additional version for positive-unlabeled (PU) learning containing only positive (1)
 and unlabeled (2) data samples, as indicated by appending '_PU' to the dataset name (e.g., 'DOM_GSEC_PU').
 
-.. _1_overview_benchmarks:
+ADD-TABLE
 
 .. list-table::
    :header-rows: 1
@@ -213,11 +222,13 @@ and unlabeled (2) data samples, as indicated by appending '_PU' to the dataset n
      - 1 (substrate), 2 (unknown substrate status)
 
 
+.. _t2_overview_scales:
+
 Amino Acid Scale Datasets
 -------------------------
 Various amino acid scale datasets are provided.
 
-.. _2_overview_scales:
+ADD-TABLE
 
 .. list-table::
    :header-rows: 1
@@ -252,3 +263,4 @@ Various amino acid scale datasets are provided.
      - 60
      - :ref:`Breimann23a <Breimann23a>`
 
+
diff --git a/docs/build/html/generated/aaanalysis.load_dataset.html b/docs/build/html/generated/aaanalysis.load_dataset.html
index adf70680..56f73bac 100644
--- a/docs/build/html/generated/aaanalysis.load_dataset.html
+++ b/docs/build/html/generated/aaanalysis.load_dataset.html
@@ -149,7 +149,7 @@ <h1>aaanalysis.load_dataset<a class="headerlink" href="#aaanalysis-load-dataset"
 </ul>
 </dd>
 <dt class="field-even">Returns<span class="colon">:</span></dt>
-<dd class="field-even"><p>Dataframe (df_seq) containing the selected sequence dataset.</p>
+<dd class="field-even"><p>Dataframe (df_seq) with the selected sequence dataset.</p>
 </dd>
 <dt class="field-odd">Return type<span class="colon">:</span></dt>
 <dd class="field-odd"><p>DataFrame</p>
@@ -158,7 +158,7 @@ <h1>aaanalysis.load_dataset<a class="headerlink" href="#aaanalysis-load-dataset"
 <div class="admonition seealso">
 <p class="admonition-title">See also</p>
 <ul class="simple">
-<li><p>Overview of all benchmarks in <span class="xref std std-ref">1_overview_benchmarks</span>.</p></li>
+<li><p>Overview of all benchmarks in <a class="reference internal" href="../index/tables.html#t1-overview-benchmarks"><span class="std std-ref">Protein Benchmark Datasets</span></a>.</p></li>
 <li><p>Step-by-step guide in the <a class="reference external" href="tutorial2_data_loader.html">data loading tutorial</a>.</p></li>
 </ul>
 </div>
diff --git a/docs/build/html/index/tables.html b/docs/build/html/index/tables.html
index 0cfc4b0a..db83a8b5 100644
--- a/docs/build/html/index/tables.html
+++ b/docs/build/html/index/tables.html
@@ -126,9 +126,10 @@ <h1>Tables<a class="headerlink" href="#tables" title="Permalink to this heading"
 </ul>
 </nav>
 <section id="overview-table">
-<h2><a class="toc-backref" href="#id1" role="doc-backlink">Overview Table</a><a class="headerlink" href="#overview-table" title="Permalink to this heading"></a></h2>
+<span id="t0-mapper"></span><h2><a class="toc-backref" href="#id1" role="doc-backlink">Overview Table</a><a class="headerlink" href="#overview-table" title="Permalink to this heading"></a></h2>
 <p>All tables from the AAanalysis documentation are listed here, in chronological order based on the project history.</p>
-<table class="docutils align-default" id="mapper">
+<p>ADD-TABLE</p>
+<table class="docutils align-default">
 <colgroup>
 <col style="width: 33%" />
 <col style="width: 33%" />
@@ -141,11 +142,11 @@ <h2><a class="toc-backref" href="#id1" role="doc-backlink">Overview Table</a><a
 </tr>
 </thead>
 <tbody>
-<tr class="row-even"><td><p>1_overview_benchmarks</p></td>
+<tr class="row-even"><td><p>t1_overview_benchmarks</p></td>
 <td><p>Protein benchmark datasets</p></td>
 <td><p>aa.load_dataset</p></td>
 </tr>
-<tr class="row-odd"><td><p>2_overview_scales</p></td>
+<tr class="row-odd"><td><p>t2_overview_scales</p></td>
 <td><p>Amino acid scale datasets</p></td>
 <td><p>aa.load_scales</p></td>
 </tr>
@@ -153,7 +154,7 @@ <h2><a class="toc-backref" href="#id1" role="doc-backlink">Overview Table</a><a
 </table>
 </section>
 <section id="protein-benchmark-datasets">
-<h2><a class="toc-backref" href="#id2" role="doc-backlink">Protein Benchmark Datasets</a><a class="headerlink" href="#protein-benchmark-datasets" title="Permalink to this heading"></a></h2>
+<span id="t1-overview-benchmarks"></span><h2><a class="toc-backref" href="#id2" role="doc-backlink">Protein Benchmark Datasets</a><a class="headerlink" href="#protein-benchmark-datasets" title="Permalink to this heading"></a></h2>
 <p>Three types of benchmark datasets are provided:</p>
 <ul class="simple">
 <li><p>Residue prediction (AA): Datasets used to predict specific properties of amino acid residues.</p></li>
@@ -163,7 +164,8 @@ <h2><a class="toc-backref" href="#id2" role="doc-backlink">Protein Benchmark Dat
 <p>Datasets are named beginning with a classification (e.g., ‘AA_LDR’, ‘DOM_GSEC’, ‘SEQ_AMYLO’).
 Some datasets have an additional version for positive-unlabeled (PU) learning containing only positive (1)
 and unlabeled (2) data samples, as indicated by appending ‘_PU’ to the dataset name (e.g., ‘DOM_GSEC_PU’).</p>
-<table class="docutils align-default" id="overview-benchmarks">
+<p>ADD-TABLE</p>
+<table class="docutils align-default">
 <colgroup>
 <col style="width: 10%" />
 <col style="width: 10%" />
@@ -348,9 +350,10 @@ <h2><a class="toc-backref" href="#id2" role="doc-backlink">Protein Benchmark Dat
 </table>
 </section>
 <section id="amino-acid-scale-datasets">
-<h2><a class="toc-backref" href="#id3" role="doc-backlink">Amino Acid Scale Datasets</a><a class="headerlink" href="#amino-acid-scale-datasets" title="Permalink to this heading"></a></h2>
+<span id="t2-overview-scales"></span><h2><a class="toc-backref" href="#id3" role="doc-backlink">Amino Acid Scale Datasets</a><a class="headerlink" href="#amino-acid-scale-datasets" title="Permalink to this heading"></a></h2>
 <p>Various amino acid scale datasets are provided.</p>
-<table class="docutils align-default" id="overview-scales">
+<p>ADD-TABLE</p>
+<table class="docutils align-default">
 <colgroup>
 <col style="width: 25%" />
 <col style="width: 25%" />
diff --git a/docs/build/html/objects.inv b/docs/build/html/objects.inv
index d6b6af68..cdf88987 100644
Binary files a/docs/build/html/objects.inv and b/docs/build/html/objects.inv differ
diff --git a/docs/build/html/searchindex.js b/docs/build/html/searchindex.js
index e6e4db13..a096414c 100644
--- a/docs/build/html/searchindex.js
+++ b/docs/build/html/searchindex.js
@@ -1 +1 @@
-Search.setIndex({"docnames": ["api", "generated/aaanalysis.AAclust", "generated/aaanalysis.CPP", "generated/aaanalysis.CPPPlot", "generated/aaanalysis.SequenceFeature", "generated/aaanalysis.dPULearn", "generated/aaanalysis.load_dataset", "generated/aaanalysis.load_scales", "generated/aaanalysis.plot_gcfs", "generated/aaanalysis.plot_get_cdict", "generated/aaanalysis.plot_get_cmap", "generated/aaanalysis.plot_set_legend", "generated/aaanalysis.plot_settings", "generated/tutorial2_data_loader", "index", "index/CONTRIBUTING_COPY", "index/badges", "index/citations", "index/introduction", "index/overview", "index/references", "index/tables", "index/usage_principles", "index/usage_principles/aaontology", "index/usage_principles/data_flow_entry_points", "index/usage_principles/feature_identification", "index/usage_principles/pu_learning", "index/usage_principles/xai", "tutorials"], "filenames": ["api.rst", "generated/aaanalysis.AAclust.rst", "generated/aaanalysis.CPP.rst", "generated/aaanalysis.CPPPlot.rst", "generated/aaanalysis.SequenceFeature.rst", "generated/aaanalysis.dPULearn.rst", "generated/aaanalysis.load_dataset.rst", "generated/aaanalysis.load_scales.rst", "generated/aaanalysis.plot_gcfs.rst", "generated/aaanalysis.plot_get_cdict.rst", "generated/aaanalysis.plot_get_cmap.rst", "generated/aaanalysis.plot_set_legend.rst", "generated/aaanalysis.plot_settings.rst", "generated/tutorial2_data_loader.rst", "index.rst", "index/CONTRIBUTING_COPY.rst", "index/badges.rst", "index/citations.rst", "index/introduction.rst", "index/overview.rst", "index/references.rst", "index/tables.rst", "index/usage_principles.rst", "index/usage_principles/aaontology.rst", "index/usage_principles/data_flow_entry_points.rst", "index/usage_principles/feature_identification.rst", "index/usage_principles/pu_learning.rst", "index/usage_principles/xai.rst", "tutorials.rst"], "titles": ["API", "aaanalysis.AAclust", "aaanalysis.CPP", "aaanalysis.CPPPlot", "aaanalysis.SequenceFeature", "aaanalysis.dPULearn", "aaanalysis.load_dataset", "aaanalysis.load_scales", "aaanalysis.plot_gcfs", "aaanalysis.plot_get_cdict", "aaanalysis.plot_get_cmap", "aaanalysis.plot_set_legend", "aaanalysis.plot_settings", "Data loading", "Welcome to the AAanalysis documentation!", "Contributing", "&lt;no title&gt;", "&lt;no title&gt;", "Introduction", "&lt;no title&gt;", "References", "Tables", "Usage Principles", "AAontology: Classification of amino acid scales", "Data Flow and Enry Points", "Identifying Physicochemical Signatures using CPP", "Learning from unbalanced and small data", "Explainable AI at Sequence Level", "Tutorials"], "terms": {"thi": [0, 1, 3, 12, 15], "page": [0, 14], "contain": [0, 2, 3, 5, 6, 13, 15, 21, 24, 26], "refer": [0, 1, 2, 4, 15, 21], "public": [0, 14, 15, 17], "object": [0, 1, 3, 4, 5], "function": [0, 3, 8, 10, 12, 14, 19], "aaanalysi": [0, 13, 15, 17, 18, 19, 21, 22, 24, 25], "see": [0, 3, 15, 21], "more": [0, 3, 15], "exampl": [0, 15, 26], "practic": 0, "usag": [0, 14, 15], "our": [0, 15], "tutori": [0, 6, 14, 15], "For": [0, 1, 4, 11, 13, 15, 26], "conveni": 0, "i": [0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 13, 14, 15, 18, 19, 23, 25], "common": [0, 15], "import": [0, 4, 5, 6, 11, 12, 13, 15, 22], "modul": [0, 1, 14], "follow": [0, 1, 2, 4, 5, 14, 15, 17, 18, 19, 22], "aa": [0, 2, 4, 5, 6, 11, 12, 13, 21, 22], "Then": 0, "you": [0, 14, 15, 17], "can": [0, 1, 4, 5, 7, 11, 14, 15, 18, 24, 26], "access": [0, 21], "all": [0, 1, 2, 3, 4, 6, 12, 15, 21], "method": [0, 1, 2, 3, 4, 5, 20], "via": [0, 15, 20], "alia": [0, 4], "load_dataset": [0, 4, 13, 21], "class": [1, 2, 3, 4, 5, 6, 26], "model": [1, 5, 15, 26], "none": [1, 2, 3, 4, 5, 6, 9, 10, 11], "model_kwarg": 1, "verbos": [1, 2, 3, 4, 5, 12], "fals": [1, 2, 3, 4, 5, 6, 7, 10, 11, 12], "sourc": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 15], "base": [1, 2, 3, 4, 5, 10, 14, 15, 18, 19, 20, 21, 25, 26], "A": [1, 4, 7, 11, 15, 18, 20], "k": [1, 14, 18, 19, 20], "optim": [1, 2, 3, 14, 18, 19, 20], "cluster": [1, 14, 18, 19, 20], "framework": [1, 14, 18, 19], "select": [1, 2, 3, 6, 7, 14, 15, 18, 19, 20], "redund": [1, 2, 14, 18, 19, 20], "reduc": [1, 5, 14, 18, 19, 20], "set": [1, 2, 3, 4, 5, 6, 8, 11, 12, 14, 15, 18, 19, 20, 21, 24], "numer": [1, 3, 4, 14, 18, 19], "scale": [1, 2, 3, 4, 7, 9, 10, 12, 14, 17, 18, 19, 20, 22, 24], "design": [1, 3, 15, 25], "primarili": [1, 5, 15], "amino": [1, 2, 3, 4, 6, 7, 13, 14, 17, 18, 19, 20, 22, 24, 26], "acid": [1, 2, 3, 4, 6, 7, 13, 14, 17, 18, 19, 20, 22, 24, 26], "versatil": 1, "enough": 1, "ani": [1, 15, 18], "indic": [1, 3, 4, 5, 13, 21], "It": [1, 18], "take": 1, "requir": 1, "pre": [1, 2, 15], "defin": [1, 4, 15], "number": [1, 2, 3, 4, 5, 6, 10, 11], "from": [1, 2, 3, 4, 5, 6, 7, 14, 15, 21, 22, 24], "scikit": [1, 15], "learn": [1, 5, 13, 14, 15, 17, 18, 19, 20, 21, 22], "http": [1, 15], "org": [1, 15], "stabl": 1, "html": [1, 15], "By": 1, "leverag": 1, "pearson": [1, 2], "correl": [1, 2], "similar": [1, 26], "measur": [1, 15], "valu": [1, 2, 3, 4, 15, 18, 21], "one": [1, 3], "repres": [1, 3, 18], "sampl": [1, 2, 3, 4, 5, 13, 21, 26], "term": 1, "medoid": 1, "each": [1, 2, 3, 4, 5, 15], "which": [1, 3, 4, 8, 18, 24, 26], "closest": 1, "": [1, 11, 15, 20], "center": [1, 10], "yield": 1, "paramet": [1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 12], "callabl": 1, "option": [1, 2, 3, 4, 5, 6, 7, 10, 12], "default": [1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 12, 13], "sklearn": 1, "kmean": 1, "The": [1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 15, 24, 25], "emploi": [1, 5], "given": [1, 3, 4, 6, 7], "n_cluster": 1, "dict": [1, 2, 3, 4, 5, 9, 10, 11], "dictionari": [1, 2, 3, 4, 9, 10, 11], "keyword": [1, 3, 5], "argument": [1, 3, 4, 5, 11], "pass": [1, 3, 5, 11, 15], "bool": [1, 2, 3, 4, 5, 6, 7, 10, 11, 12], "flag": 1, "enabl": [1, 2, 3, 4, 5, 12, 14, 15, 18, 19, 25], "disabl": [1, 6], "output": [1, 4, 5, 12], "obtain": [1, 4, 24], "type": [1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 15, 21], "int": [1, 2, 3, 4, 5, 6, 10, 11], "labels_": [1, 5], "label": [1, 2, 3, 4, 5, 11, 13, 15, 21, 26], "order": [1, 21], "featur": [1, 2, 3, 4, 5, 10, 14, 15, 18, 19, 24, 25, 26], "matrix": [1, 4, 5, 21], "arrai": [1, 2, 4, 5], "like": [1, 2, 4, 5, 15], "centers_": 1, "averag": [1, 4], "correspond": [1, 15], "center_labels_": 1, "medoids_": 1, "medoid_labels_": 1, "medoid_ind_": 1, "chosen": [1, 2, 4, 6], "within": [1, 2, 4], "origin": 1, "dataset": [1, 2, 6, 7, 13, 14, 15, 18, 19, 26, 27], "__init__": [1, 2, 3, 4, 5], "fit": [1, 5, 15], "x": [1, 3, 5, 6, 11, 12], "name": [1, 2, 3, 4, 5, 6, 7, 9, 10, 12, 13, 21], "on_cent": 1, "true": [1, 2, 3, 4, 6, 7, 11, 12], "min_th": 1, "0": [1, 2, 3, 4, 5, 11, 12, 13, 21, 26], "merge_metr": 1, "euclidean": [1, 5], "data": [1, 3, 5, 6, 14, 15, 21, 22, 28], "format": [1, 12], "us": [1, 2, 3, 5, 6, 10, 12, 14, 15, 17, 18, 21, 22, 24, 26], "determin": 1, "without": [1, 3, 15, 21], "specif": [1, 9, 15, 21, 24], "partit": 1, "maxim": 1, "beyond": 1, "threshold": [1, 2], "qualiti": 1, "either": [1, 4, 14], "minimum": [1, 4, 6], "member": 1, "min_cor": 1, "between": [1, 2, 3, 4, 10, 11], "its": [1, 15], "govern": 1, "undergo": 1, "three": [1, 4, 10, 21], "stage": 1, "1": [1, 2, 3, 4, 5, 11, 12, 13, 21, 26], "estim": 1, "lower": 1, "bound": 1, "2": [1, 2, 3, 4, 5, 11, 13, 21, 26], "refin": 1, "metric": [1, 5, 15], "3": [1, 4, 5, 11, 13, 15, 21], "merg": 1, "smaller": 1, "direct": 1, "final": 1, "reduct": 1, "shape": [1, 2, 3, 4, 5, 11], "n_sampl": [1, 2, 4, 5], "n_featur": [1, 2, 3, 4, 5], "where": [1, 4, 5], "list": [1, 3, 4, 10, 11, 21], "str": [1, 3, 4, 5, 6, 7, 9, 10, 11, 12], "If": [1, 2, 3, 4, 5, 6, 10, 12, 14, 15, 17, 26], "provid": [1, 2, 3, 5, 7, 10, 13, 14, 15, 19, 21, 26], "return": [1, 2, 3, 4, 5, 6, 7, 9, 10, 11], "appli": [1, 5, 10, 11, 12], "otherwis": [1, 3, 4, 5], "float": [1, 2, 3, 5, 10, 11, 12], "instead": 1, "names_medoid": 1, "attribut": 1, "attr": 1, "further": [1, 3, 15], "inform": [1, 2, 3, 4, 5, 24], "paper": 1, "todo": [1, 2], "add": [1, 2, 3, 4], "link": [1, 2, 14, 15, 17, 20], "cluster_nam": 1, "name_unclassifi": 1, "unclassifi": [1, 7], "assign": [1, 3, 4, 5], "frequenc": 1, "renam": 1, "prioriti": 1, "most": [1, 2, 3, 5, 14, 18, 19], "frequent": 1, "alreadi": [1, 26], "doe": 1, "exist": [1, 15, 26], "cannot": 1, "classifi": [1, 3], "static": [1, 2, 4], "get_cluster_cent": 1, "comput": [1, 2, 3, 4, 15, 20], "center_label": 1, "associ": 1, "get_cluster_medoid": 1, "medoid_label": 1, "medoid_ind": 1, "index": [1, 6, 14, 15, 20], "x_test": 1, "x_ref": 1, "labels_test": 1, "labels_ref": 1, "n": [1, 2, 3, 4, 6, 15, 20], "posit": [1, 2, 3, 4, 5, 10, 13, 14, 18, 19, 21, 26], "except_unclassifi": 1, "test": [1, 2], "top": [1, 21], "consid": [1, 15], "strength": 1, "els": 1, "neg": [1, 4, 5, 10, 13, 21, 26], "exclud": 1, "list_top_center_name_corr": 1, "have": [1, 15, 21, 26], "strongest": 1, "eval": [1, 2, 5, 15], "df_scale": [2, 4, 24], "df_cat": [2, 3, 4, 24], "df_part": [2, 4, 24], "split_kw": [2, 4], "accept_gap": [2, 3, 4], "tool": [2, 15, 20], "creat": [2, 3, 4, 5, 15, 24], "filter": [2, 3, 6], "ar": [2, 3, 4, 5, 6, 7, 13, 15, 21, 24, 26, 27], "discrimin": [2, 3], "two": [2, 3, 14, 15, 18, 19, 20, 23, 24], "sequenc": [2, 3, 4, 5, 6, 7, 13, 14, 15, 18, 19, 20, 21, 22, 24, 25, 26], "panda": [2, 3, 4, 5, 15], "datafram": [2, 3, 4, 5, 6, 7, 15, 24], "load_categori": [2, 4], "categori": [2, 3, 4, 7, 9, 10, 11], "physicochem": [2, 4, 14, 18, 19, 20, 22, 24], "part": [2, 3, 4, 15, 24], "sequencefeatur": 2, "get_split_kw": [2, 4], "nest": [2, 4], "split_typ": [2, 4], "whether": [2, 3, 4, 7, 10, 11], "accept": [2, 3, 4], "miss": [2, 3, 4], "omit": [2, 3, 4], "print": [2, 3, 4, 13], "progress": [2, 3, 20], "about": [2, 3], "algorithm": [2, 3, 14, 15, 18, 19, 24, 25], "run": [2, 4], "perform": [2, 5], "step": [2, 3, 4, 6, 15, 18], "parametr": 2, "n_filter": 2, "100": [2, 6, 10], "tmd_len": [2, 3, 4], "20": [2, 3, 4, 15, 21], "jmd_n_len": [2, 3, 4], "10": [2, 3, 4, 10, 13], "jmd_c_len": [2, 3, 4], "ext_len": [2, 3, 4], "4": [2, 3, 4, 13], "start": [2, 3, 4, 15, 22, 24], "check_cat": 2, "n_pre_filt": 2, "pct_pre_filt": 2, "5": [2, 3, 4, 5, 11, 13], "max_std_test": 2, "max_overlap": 2, "max_cor": 2, "n_process": 2, "pipelin": [2, 15], "creation": 2, "aim": [2, 3, 15], "identifi": [2, 3, 5, 14, 18, 19, 20, 22, 26], "collect": 2, "non": [2, 4, 6, 21], "group": [2, 3, 4], "t": [2, 6], "u": [2, 14, 15], "p": [2, 20], "percentag": [2, 5, 10], "length": [2, 3, 4, 6], "tmd": [2, 3, 4, 13], "explan": [2, 3, 15], "first": [2, 3, 4, 10, 15], "terminu": [2, 3, 4], "jmd": [2, 3, 4], "c": [2, 3, 4, 14, 20], "extend": [2, 3, 4, 15, 26], "termin": [2, 3, 4], "should": [2, 3, 4, 5, 7, 15, 26], "longer": 2, "than": 2, "check": [2, 15], "remain": [2, 15], "after": 2, "maximum": [2, 4, 5, 6], "standard": [2, 26], "deviat": 2, "overlap": 2, "cpu": 2, "multiprocess": 2, "automat": [2, 3, 5, 15], "df_feat": [2, 3, 4, 24], "uniqu": [2, 3], "statist": [2, 3], "n_feature_inform": [2, 3], "eleven": 2, "column": [2, 3, 4, 5, 6, 11, 15], "includ": [2, 4, 7, 10, 11, 15], "id": [2, 4], "result": 2, "rank": 2, "11": [2, 3, 11, 13], "split": [2, 4, 24], "subcategori": [2, 3], "sub": 2, "scale_nam": [2, 3], "abs_auc": [2, 3], "absolut": 2, "adjust": [2, 3, 12], "auc": 2, "abs_mean_dif": 2, "mean": [2, 3], "differ": [2, 3, 4, 11, 24], "std_test": [2, 3], "std_ref": 2, "p_val": 2, "mann_whitnei": 2, "ttest_indep": 2, "p_val_fdr_bh": 2, "benjamini": 2, "hochberg": 2, "fdr": 2, "correct": 2, "get": [2, 4, 8, 22], "evalu": [2, 7, 15, 21], "condit": [3, 4], "jmd_m_len": [3, 4], "profil": [3, 9, 10, 14, 18, 19, 25], "y": [3, 11, 12], "val_col": 3, "mean_dif": 3, "val_typ": 3, "count": [3, 7], "normal": [3, 11, 21], "figsiz": 3, "7": [3, 4, 5, 12, 13], "titl": [3, 11], "title_kw": 3, "dict_color": [3, 9, 10, 11], "edge_color": 3, "bar_width": 3, "75": 3, "add_jmd_tmd": 3, "jmd_n_seq": 3, "tmd_seq": 3, "jmd_c_seq": 3, "tmd_color": 3, "mediumspringgreen": 3, "jmd_color": 3, "blue": [3, 11], "tmd_seq_color": 3, "black": [3, 15], "jmd_seq_color": 3, "white": 3, "seq_siz": 3, "tmd_jmd_fontsiz": 3, "xtick_siz": 3, "xtick_width": 3, "xtick_length": 3, "xticks_po": 3, "ytick_siz": 3, "ytick_width": 3, "ytick_length": 3, "ylim": 3, "highlight_tmd_area": 3, "highlight_alpha": 3, "15": [3, 4], "grid": [3, 12], "grid_axi": [3, 12], "both": [3, 12], "add_legend_cat": 3, "legend_kw": 3, "shap_plot": 3, "kwarg": [3, 4, 11], "plot": [3, 9, 10, 11, 12, 14, 15], "instanc": 3, "avail": [3, 14, 17, 20], "specifi": [3, 4, 5, 9, 10, 12, 15], "check_value_typ": 3, "tupl": [3, 10], "size": [3, 4, 8, 10, 11, 12], "custom": [3, 11, 12], "appear": [3, 12], "map": [3, 4, 10, 11], "color": [3, 9, 10, 11], "edg": [3, 11, 15], "bar": [3, 9, 10], "width": [3, 11], "line": [3, 11], "annot": 3, "font": [3, 8, 11, 12], "tick": [3, 12], "axi": [3, 12], "limit": 3, "highlight": 3, "area": 3, "alpha": 3, "ad": 3, "drawn": 3, "legend": [3, 11], "shap": [3, 10, 15], "shaplei": 3, "addit": [3, 4, 5, 11, 12, 13, 21], "gener": [3, 4, 10, 12, 15, 18, 20, 26], "other": [3, 7, 15], "intern": 3, "librari": [3, 12, 15], "ax": [3, 11], "matplotlib": [3, 11, 12, 15], "heatmap": [3, 9, 10], "8": [3, 4, 5, 13, 15], "vmin": 3, "vmax": 3, "grid_on": 3, "cmap": [3, 9, 10], "rdbu_r": 3, "cmap_n_color": 3, "cbar_kw": 3, "facecolor_dark": [3, 10], "add_importance_map": 3, "cbar_pct": 3, "featuremap": 3, "versu": 3, "wrapper": [3, 14, 15, 18, 19], "seaborn": [3, 10, 12, 15], "level": [3, 6, 13, 14, 15, 19, 21, 22, 23], "e": [3, 4, 9, 10, 12, 13, 14, 15, 18, 19, 21, 26], "g": [3, 4, 9, 10, 12, 13, 14, 15, 18, 19, 21, 26], "protein": [3, 4, 6, 14, 15, 18, 19, 20, 24, 25, 26], "shown": 3, "feat_impact": 3, "displai": 3, "sum": 3, "std": 3, "aggreg": 3, "positions_onli": 3, "across": [3, 15], "recommend": [3, 5, 15], "when": [3, 5], "emphas": [3, 15], "fewer": 3, "value_typ": 3, "height": 3, "figur": 3, "inch": 3, "pyplot": [3, 11], "anchor": [3, 11], "colormap": 3, "infer": [3, 15], "seismic": 3, "space": [3, 5, 10, 11], "impact": 3, "discret": 3, "diverg": 3, "sequenti": 3, "kei": [3, 15], "colorbar": 3, "under": [3, 15], "depicet": 3, "depict": 3, "jmd_n": [3, 4, 13], "jmd_c": [3, 4, 13], "point": [3, 11, 22], "set_xticklabel": 3, "widht": 3, "tick_param": 3, "classif": [3, 7, 14, 19, 21, 22, 26], "pcolormesh": 3, "effect": [3, 15, 26], "onli": [3, 6, 7, 13, 15, 21, 26], "align": [3, 11], "applic": 3, "document": [3, 21], "detail": [3, 6, 11, 14, 15, 17], "cpp": [3, 4, 10, 14, 17, 18, 19, 22, 24], "code": [3, 10], "update_seq_s": 3, "retriev": [4, 9, 10], "compon": [4, 5, 21], "continu": 4, "subset": [4, 21], "domain": [4, 6, 13, 21], "transmembran": 4, "membran": [4, 21], "principl": [4, 14], "distinct": [4, 14, 15, 18, 19], "segment": 4, "pattern": 4, "properti": [4, 21], "express": 4, "present": 4, "realiz": 4, "over": 4, "valid": [4, 15], "tmd_e": 4, "tmd_n": 4, "tmd_c": 4, "ext_c": 4, "ext_n": 4, "tmd_jmd": 4, "jmd_n_tmd_n": 4, "tmd_c_jmd_c": 4, "ext_n_tmd_n": 4, "tmd_c_ext_c": 4, "get_df_part": 4, "df_seq": [4, 5, 6, 24], "list_part": 4, "all_part": 4, "datafran": 4, "compris": 4, "tmd_start": [4, 13], "tmd_stop": [4, 13], "string": [4, 10], "len": 4, "must": 4, "lenght": 4, "resp": 4, "extra": 4, "possibl": [4, 26], "found": [4, 7, 15], "sf": 4, "dom_gsec": [4, 13, 21], "n_split_min": 4, "n_split_max": 4, "steps_pattern": 4, "n_min": 4, "n_max": 4, "len_max": 4, "steps_periodicpattern": 4, "periodicpattern": 4, "greater": 4, "greatest": 4, "whole": [4, 6], "specfii": 4, "smallest": 4, "integ": 4, "6": [4, 13], "vari": 4, "paramt": 4, "argumetn": 4, "get_featur": 4, "load_scal": [4, 14, 19, 21], "combin": [4, 15], "form": 4, "feat_matrix": 4, "n_job": 4, "return_label": 4, "pd": [4, 5, 15], "seri": 4, "job": 4, "parallel": 4, "spars": 4, "feat_nam": 4, "convert": 4, "depend": 4, "last": 4, "step1": 4, "step2": 4, "add_feat_valu": 4, "dict_scal": 4, "convent": 4, "letter": 4, "feature_valu": 4, "n_part": 4, "ha": [4, 15], "structur": [4, 20], "th": 4, "n_split": 4, "p1": 4, "p2": 4, "pn": 4, "end": 4, "odd": 4, "even": 4, "give": 4, "add_dif": 4, "sample_nam": 4, "ref_group": 4, "add_posit": 4, "part_split": 4, "feat_posit": 4, "total": [4, 5], "n_compon": 5, "pca_kwarg": 5, "determinist": [5, 14, 18, 19], "unlabel": [5, 13, 14, 18, 19, 21, 26], "offer": [5, 15], "approach": [5, 26], "pu": [5, 13, 14, 18, 19, 21], "princip": [5, 21], "analysi": [5, 6, 7, 14, 15, 18, 19], "pca": 5, "dimension": [5, 20], "pc": [5, 21], "iter": 5, "reliabl": [5, 15], "These": [5, 15, 26], "those": 5, "distant": 5, "altern": [5, 26], "also": [5, 15, 21], "distanc": 5, "manhattan": 5, "cosin": 5, "80": 5, "cover": 5, "varianc": 5, "identif": [5, 20], "datapoint": 5, "inspir": [5, 15], "techniqu": [5, 26], "an": [5, 6, 13, 14, 15, 17, 20, 21], "theoret": 5, "high": [5, 20], "n_neg": 5, "label_po": 5, "name_neg": 5, "rel_neg": 5, "col_class": 5, "newli": 5, "updat": [5, 15], "new": [5, 15], "store": 5, "Will": 5, "dure": 5, "initi": 5, "small": [5, 14, 15, 18, 19, 22, 27], "datafor": 5, "conta": 5, "po": 5, "unl": 5, "numpi": [5, 15], "np": 5, "atgc": 5, "gcta": 5, "actg": 5, "tacg": 5, "mode": 5, "modifi": [5, 6, 12, 24], "dpul": 5, "info": 6, "random": 6, "non_canonical_aa": 6, "remov": [6, 12], "min_len": 6, "max_len": 6, "aa_window_s": 6, "9": [6, 13, 15], "load": [6, 7, 14, 15, 19, 28], "benchmark": [6, 13, 14, 19], "categor": [6, 13], "dom": [6, 13, 21], "seq": [6, 13, 21], "overview": [6, 13, 15], "tabl": [6, 15], "breimann23a": [6, 20, 21], "per": 6, "liter": 6, "keep": 6, "gap": [6, 10], "canon": 6, "don": 6, "replac": 6, "symbol": 6, "window": 6, "aa_": 6, "1_overview_benchmark": [6, 21], "guid": [6, 15], "seq_amylo": [6, 13, 21], "just_aaindex": 7, "unclassified_in": 7, "aaontologi": [7, 14, 17, 19, 20, 21, 22], "thorough": 7, "residu": [7, 20, 21], "scales_raw": [7, 21], "scales_cat": 7, "scales_pc": [7, 21], "top60": [7, 21], "top60_ev": [7, 21], "relev": 7, "aaindex": [7, 20], "current": 8, "ut": 8, "plot_set": 8, "dict_scale_cat": [9, 10], "cppplot": [9, 10, 15], "respect": [9, 10, 14, 15, 17], "n_color": 10, "color_po": 10, "color_neg": 10, "color_cent": 10, "input": [10, 15, 24], "hex": 10, "pct_gap": 10, "pct_center": 10, "palett": 10, "feat": 10, "ggplot": 10, "datagroup": 10, "dark": 10, "face": 10, "rgb": 10, "hl": 10, "husl": 10, "xkcd": 10, "interpret": [10, 14, 15, 17, 18, 19, 20, 25], "latter": 10, "rang": 10, "sn": 10, "color_palett": 10, "light_palett": 10, "lighter": 10, "handl": 11, "list_cat": 11, "ncol": 11, "fontsiz": 11, "weight": [11, 20], "lw": 11, "edgecolor": 11, "return_handl": 11, "loc": 11, "upper": 11, "left": 11, "labelspac": 11, "columnspac": 11, "fontsize_legend": 11, "title_align_left": 11, "fontsize_weight": 11, "customiz": 11, "attach": 11, "item": 11, "coordin": 11, "text": [11, 12], "locat": [11, 21], "vertic": 11, "horizont": 11, "marker": 11, "directli": [11, 15], "finer": 11, "control": 11, "how": 11, "line2d": 11, "cat1": 11, "red": 11, "cat2": 11, "o": 11, "fig_format": 12, "pdf": 12, "font_scal": 12, "arial": 12, "change_s": 12, "weight_bold": 12, "adjust_el": 12, "short_tick": 12, "no_tick": 12, "no_ticks_i": 12, "short_ticks_i": 12, "no_ticks_x": 12, "short_ticks_x": 12, "configur": 12, "visual": [12, 15], "variou": [12, 15, 21, 24], "file": [12, 15], "save": 12, "make": [12, 15], "visibl": 12, "choos": 12, "san": 12, "serif": 12, "verdana": 12, "helvetica": 12, "dejavu": 12, "element": 12, "bold": 12, "layout": 12, "short": 12, "mark": 12, "global": 12, "df_info": 13, "iloc": 13, "predictor": [13, 21], "aa_caspase3": [13, 21], "233": [13, 21], "185605": [13, 21], "705": [13, 21], "184900": [13, 21], "prosper": [13, 20, 21], "aa_furin": [13, 21], "71": [13, 21], "59003": [13, 21], "163": [13, 21], "58840": [13, 21], "aa_ldr": [13, 21], "342": [13, 21], "118248": [13, 21], "35469": [13, 21], "82779": [13, 21], "idp": [13, 20, 21], "seq2seq": [13, 20, 21], "aa_mmp2": [13, 21], "573": [13, 21], "312976": [13, 21], "2416": [13, 21], "310560": [13, 21], "aa_rnabind": [13, 21], "221": [13, 21], "55001": [13, 21], "6492": [13, 21], "48509": [13, 21], "gmksvm": [13, 21], "ru": [13, 21], "aa_sa": [13, 21], "101082": [13, 21], "84523": [13, 21], "1414": [13, 21], "8484": [13, 21], "511": [13, 21], "903": [13, 21], "rerf": [13, 20, 21], "pred": [13, 20, 21], "seq_capsid": [13, 21], "7935": [13, 21], "3364680": [13, 21], "3864": [13, 21], "4071": [13, 21], "viralpro": [13, 20, 21], "seq_disulfid": [13, 21], "2547": [13, 21], "614470": [13, 21], "897": [13, 21], "1650": [13, 21], "dipro": [13, 21], "seq_loc": [13, 21], "1835": [13, 21], "732398": [13, 21], "1045": [13, 21], "790": [13, 21], "nan": [13, 21], "seq_solubl": [13, 21], "17408": [13, 21], "4432269": [13, 21], "8704": [13, 21], "solpro": [13, 20, 21], "seq_tail": [13, 21], "6668": [13, 21], "2671690": [13, 21], "2574": [13, 21], "4094": [13, 21], "12": 13, "126": [13, 21], "92964": [13, 21], "63": [13, 21], "13": 13, "dom_gsec_pu": [13, 21], "694": [13, 21], "494524": [13, 21], "suffix": [13, 15], "df_seq1": 13, "df_seq2": 13, "df_seq3": 13, "head": 13, "entri": 13, "p05067": 13, "mlpglallllaawtaralevptdgnagllaepqiamfcgrlnmhmn": 13, "701": 13, "723": 13, "faedvgsnkg": 13, "aiiglmvggvviatvivitlvml": 13, "kkkqytsihh": 13, "p14925": 13, "magrarsgllllllgllalqssclafrsplsvfkrfkettrsfsn": 13, "868": 13, "890": 13, "klstepgsgv": 13, "svvlittllvipvlvllaivmfi": 13, "rwkksrafgd": 13, "p70180": 13, "mrslllftfsacvllarvllaggassgagdtrpgsrrrarealaaq": 13, "477": 13, "499": 13, "pckssgglee": 13, "savtgivvgallgagllmafyff": 13, "rkkyriti": 13, "q03157": 13, "mgptspaargqgrrwrppplplllplsllllraqlavgnlavgsp": 13, "585": 13, "607": 13, "apsgtgvsr": 13, "alsgllimgagggslivlslll": 13, "rkkkpygti": 13, "q06481": 13, "maatgtaaaaatgrllllllvgltapalalagyiealaanagtgfa": 13, "716": 13, "lredfslsss": 13, "aligllviavaiatvivislvml": 13, "rkrqygtish": 13, "some": [13, 21], "version": [13, 21], "dataset_name_pu": 13, "python": [14, 15, 18, 19], "predict": [14, 15, 18, 19, 20, 21, 25, 26], "aaclust": [14, 17, 18, 19, 20], "compar": [14, 18, 19, 24, 25], "engin": [14, 15, 18, 19, 25], "dpulearn": [14, 17, 18, 19], "train": [14, 15, 18, 19, 26], "unbalanc": [14, 15, 18, 19, 22, 27], "moreov": [14, 19], "load_data": [14, 19], "depth": [14, 19], "pypi": 14, "conda": [14, 15], "forg": 14, "pip": [14, 15], "introduct": 14, "contribut": 14, "api": 14, "explain": [14, 15, 20, 22], "ai": [14, 15, 20, 22], "perturb": [14, 26], "util": [14, 15], "search": 14, "your": [14, 15, 17], "work": [14, 17], "pleas": [14, 15, 17], "cite": [14, 17], "breimann23b": [14, 17, 20, 21], "_": [14, 17], "breimann": [14, 17, 20], "kamp": [14, 17], "steiner": [14, 17], "frishman": [14, 17], "2023": [14, 17], "ontologi": [14, 17, 20], "machin": [14, 15, 17, 20, 26], "biorxiv": [14, 17, 20], "welcom": 15, "thank": 15, "we": 15, "open": 15, "project": [15, 21], "focus": 15, "involv": 15, "invalu": 15, "made": 15, "wai": 15, "suggest": 15, "github": 15, "issu": 15, "tracker": 15, "submit": 15, "improv": [15, 20], "particip": 15, "discuss": 15, "newcom": 15, "tackl": 15, "good": 15, "email": 15, "stephanbreimann": 15, "gmail": 15, "com": 15, "question": 15, "establish": 15, "comprehens": 15, "toolkit": [15, 24], "robust": 15, "life": [15, 26, 27], "scienc": [15, 26, 27], "integr": [15, 20], "seamlessli": 15, "flexibl": 15, "interoper": 15, "packag": 15, "biopython": 15, "reimplement": 15, "solut": 15, "ignor": 15, "biolog": [15, 18, 26], "context": 15, "relianc": 15, "opaqu": 15, "box": 15, "empir": 15, "insight": 15, "cut": 15, "fair": 15, "account": 15, "transpar": 15, "re": [15, 20], "commit": 15, "divers": 15, "aspect": 15, "causal": 15, "minim": 15, "reproduc": 15, "mre": 15, "least": 15, "amount": 15, "demonstr": 15, "self": 15, "ensur": 15, "necessari": 15, "confirm": 15, "replic": 15, "guidelin": 15, "here": [15, 21], "To": [15, 22], "git": 15, "breimanntool": 15, "master": 15, "repositori": 15, "your_usernam": 15, "navig": 15, "folder": 15, "up": 15, "cd": 15, "isol": 15, "aanalysi": 15, "activ": 15, "poetri": 15, "pytest": 15, "hypothesi": 15, "execut": 15, "case": 15, "directori": 15, "substanti": 15, "minor": 15, "typo": 15, "concis": 15, "descript": [15, 21], "clear": 15, "branch": 15, "fix": 15, "readm": 15, "date": 15, "readthedoc": 15, "crucial": 15, "modif": 15, "thei": 15, "render": 15, "correctli": 15, "strive": 15, "consist": [15, 18], "interfac": 15, "well": 15, "organ": 15, "codebas": 15, "standalon": 15, "focu": 15, "special": 15, "task": [15, 26], "carri": 15, "out": 15, "complet": 15, "process": 15, "fulfil": 15, "purpos": 15, "being": 15, "implement": 15, "inherit": 15, "supplementari": 15, "accordingli": 15, "support": 15, "semi": 15, "strictli": 15, "adher": 15, "aforement": 15, "primari": 15, "_util": 15, "_utils_const": 15, "py": 15, "modular": 15, "easili": 15, "therefor": 15, "flat": 15, "hierarchi": 15, "program": 15, "outlin": 15, "softwar": 15, "user": 15, "friendli": 15, "hint": 15, "enhanc": 15, "propos": 15, "pep": 15, "484": 15, "book": 15, "error": 15, "messag": 15, "docstr": 15, "257": 15, "markup": 15, "languag": 15, "restructuredtext": 15, "rst": 15, "primer": 15, "cheat": 15, "sheet": 15, "restructuretext": 15, "cheatsheet": 15, "sphinx": 15, "autodoc": 15, "inclus": 15, "napoleon": 15, "extens": 15, "conf": 15, "four": 15, "bird": 15, "ey": 15, "view": [15, 26], "background": 15, "reflect": 15, "close": 15, "essenti": 15, "medium": 15, "tabular": 15, "critic": 15, "go": 15, "_build": 15, "browser": 15, "citat": 17, "wa": 18, "develop": 18, "typic": 18, "et": 20, "al": 20, "2023a": 20, "2023b": 20, "breimann23c": [20, 21], "2023c": 20, "chart": 20, "\u03b3": 20, "secretas": [20, 21], "substrat": [20, 21], "cheng06": [20, 21], "cheng": 20, "2006": 20, "larg": 20, "disulphid": 20, "bridg": [20, 21], "kernel": 20, "recurs": 20, "neural": 20, "network": 20, "graph": 20, "match": 20, "struct": 20, "funct": 20, "kawashima08": [20, 21], "kawashima": 20, "2008": 20, "aid": 20, "databas": 20, "report": 20, "nucleic": 20, "magnan09": [20, 21], "magnan": 20, "randal": 20, "baldi": 20, "2009": 20, "accur": 20, "solubl": [20, 21], "bioinformat": 20, "galiez16": [20, 21], "galiez": 20, "2016": 20, "viral": 20, "capsid": [20, 21], "tail": [20, 21], "song18": [20, 21], "song": 20, "2018": 20, "throughput": 20, "cleavag": [20, 21], "site": [20, 21], "90": 20, "proteas": 20, "accuraci": 20, "shen19": [20, 21], "shen": 20, "2019": 20, "subcellular": [20, 21], "local": 20, "evolutionari": 20, "chou": 20, "pseaac": 20, "j": 20, "theor": 20, "biol": 20, "tang20": [20, 21], "tang": 20, "2020": 20, "intrins": [20, 21], "disord": [20, 21], "region": [20, 21], "teng21": [20, 21], "teng": 20, "2021": 20, "amyloidogen": [20, 21], "pseudo": 20, "composit": 20, "tripeptid": 20, "bmc": 20, "yang21": [20, 21], "yang": 20, "granular": 20, "multipl": 20, "rna": [20, 21], "bind": [20, 21], "appl": 20, "chronolog": 21, "histori": 21, "2_overview_scal": 21, "begin": 21, "append": 21, "_pu": 21, "caspas": 21, "adjac": 21, "furin": 21, "long": 21, "ldr": 21, "metallopeptidas": 21, "mmp2": 21, "rbp60": 21, "solvent": 21, "sa": 21, "expos": 21, "buri": 21, "amyloidognen": 21, "capdsid": 21, "disulfid": 21, "ss": 21, "bond": 21, "cytoplasm": 21, "v": 21, "plasma": 21, "insolubl": 21, "gamma": 21, "unknown": 21, "statu": 21, "min": 21, "max": 21, "586": 21, "raw": 21, "scales_classif": 21, "compress": 21, "60": 21, "flow": 22, "enri": 22, "signatur": 22, "introduc": 23, "togeth": 24, "central": 25, "platform": 25, "novel": 25, "everywher": [26, 27], "In": 26, "binari": 26, "setup": 26, "augment": 26, "smote": 26, "artifici": 26, "Such": 26, "veri": 26, "popular": 26, "deep": 26, "imag": 26, "recognit": 26, "feasibl": 26, "becaus": 26, "slight": 26, "mutat": 26, "alter": 26, "dramat": 26, "often": 26, "great": 26, "quantiti": 26, "besid": 26, "distinguish": 26, "subfield": 26}, "objects": {"aaanalysis": [[1, 0, 1, "", "AAclust"], [2, 0, 1, "", "CPP"], [3, 0, 1, "", "CPPPlot"], [4, 0, 1, "", "SequenceFeature"], [5, 0, 1, "", "dPULearn"], [6, 3, 1, "", "load_dataset"], [7, 3, 1, "", "load_scales"], [8, 3, 1, "", "plot_gcfs"], [9, 3, 1, "", "plot_get_cdict"], [10, 3, 1, "", "plot_get_cmap"], [11, 3, 1, "", "plot_set_legend"], [12, 3, 1, "", "plot_settings"]], "aaanalysis.AAclust": [[1, 1, 1, "", "__init__"], [1, 2, 1, "", "center_labels_"], [1, 2, 1, "", "centers_"], [1, 1, 1, "", "cluster_naming"], [1, 1, 1, "", "correlation"], [1, 1, 1, "", "eval"], [1, 1, 1, "", "fit"], [1, 1, 1, "", "get_cluster_centers"], [1, 1, 1, "", "get_cluster_medoids"], [1, 2, 1, "", "labels_"], [1, 2, 1, "", "medoid_ind_"], [1, 2, 1, "", "medoid_labels_"], [1, 2, 1, "", "medoids_"], [1, 2, 1, "", "n_clusters"]], "aaanalysis.CPP": [[2, 1, 1, "", "__init__"], [2, 1, 1, "", "eval"], [2, 1, 1, "", "run"]], "aaanalysis.CPPPlot": [[3, 1, 1, "", "__init__"], [3, 1, 1, "", "heatmap"], [3, 1, 1, "", "profile"], [3, 1, 1, "", "update_seq_size"]], "aaanalysis.SequenceFeature": [[4, 1, 1, "", "__init__"], [4, 1, 1, "", "add_dif"], [4, 1, 1, "", "add_feat_value"], [4, 1, 1, "", "add_position"], [4, 1, 1, "", "feat_matrix"], [4, 1, 1, "", "feat_names"], [4, 1, 1, "", "get_df_parts"], [4, 1, 1, "", "get_features"], [4, 1, 1, "", "get_split_kws"]], "aaanalysis.dPULearn": [[5, 1, 1, "", "__init__"], [5, 1, 1, "", "eval"], [5, 1, 1, "", "fit"], [5, 2, 1, "", "labels_"]]}, "objtypes": {"0": "py:class", "1": "py:method", "2": "py:attribute", "3": "py:function"}, "objnames": {"0": ["py", "class", "Python class"], "1": ["py", "method", "Python method"], "2": ["py", "attribute", "Python attribute"], "3": ["py", "function", "Python function"]}, "titleterms": {"api": 0, "data": [0, 13, 24, 26], "featur": 0, "engin": 0, "pu": [0, 26], "learn": [0, 26], "explain": [0, 27], "ai": [0, 27], "perturb": 0, "plot": 0, "util": 0, "aaanalysi": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14], "aaclust": 1, "note": [1, 2, 4, 5, 12], "cpp": [2, 25], "cppplot": 3, "exampl": [3, 4, 5, 6, 11, 12, 14], "sequencefeatur": 4, "dpulearn": 5, "load_dataset": 6, "load_scal": 7, "plot_gcf": 8, "plot_get_cdict": 9, "plot_get_cmap": 10, "plot_set_legend": 11, "plot_set": 12, "load": 13, "welcom": 14, "document": [14, 15], "instal": [14, 15], "overview": [14, 21], "refer": [14, 20], "indic": 14, "tabl": [14, 21], "citat": 14, "contribut": 15, "introduct": [15, 18], "vision": 15, "object": 15, "non": 15, "goal": 15, "principl": [15, 22], "bug": 15, "report": 15, "latest": 15, "version": 15, "local": 15, "develop": 15, "environ": 15, "fork": 15, "clone": 15, "depend": 15, "run": 15, "unit": 15, "test": 15, "pull": 15, "request": 15, "preview": 15, "chang": 15, "name": 15, "convent": 15, "class": 15, "templat": 15, "function": 15, "method": 15, "code": 15, "philosophi": 15, "style": 15, "layer": 15, "build": 15, "doc": 15, "workflow": 18, "algorithm": 20, "dataset": [20, 21], "benchmark": [20, 21], "us": [20, 25], "case": 20, "further": [20, 28], "inform": 20, "protein": 21, "amino": [21, 23], "acid": [21, 23], "scale": [21, 23], "usag": 22, "aaontologi": 23, "classif": 23, "flow": 24, "enri": 24, "point": 24, "identifi": 25, "physicochem": 25, "signatur": 25, "from": 26, "unbalanc": 26, "small": 26, "what": [26, 27], "i": [26, 27], "sequenc": 27, "level": 27, "tutori": 28, "quick": 28, "start": 28}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 8, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.intersphinx": 1, "nbsphinx": 4, "sphinx": 57}, "alltitles": {"API": [[0, "api"]], "Data": [[0, "data"]], "Feature Engineering": [[0, "feature-engineering"]], "PU Learning": [[0, "pu-learning"]], "Explainable AI": [[0, "explainable-ai"]], "Perturbation": [[0, "perturbation"]], "Plot Utilities": [[0, "plot-utilities"]], "aaanalysis.AAclust": [[1, "aaanalysis-aaclust"]], "Notes": [[1, null], [2, null], [2, null], [4, null], [4, null], [4, null], [4, null], [4, null], [5, null], [5, null], [12, null]], "aaanalysis.CPP": [[2, "aaanalysis-cpp"]], "aaanalysis.CPPPlot": [[3, "aaanalysis-cppplot"]], "Examples": [[3, null], [4, null], [4, null], [5, null], [6, null], [11, null], [12, null]], "aaanalysis.SequenceFeature": [[4, "aaanalysis-sequencefeature"]], "aaanalysis.dPULearn": [[5, "aaanalysis-dpulearn"]], "aaanalysis.load_dataset": [[6, "aaanalysis-load-dataset"]], "aaanalysis.load_scales": [[7, "aaanalysis-load-scales"]], "aaanalysis.plot_gcfs": [[8, "aaanalysis-plot-gcfs"]], "aaanalysis.plot_get_cdict": [[9, "aaanalysis-plot-get-cdict"]], "aaanalysis.plot_get_cmap": [[10, "aaanalysis-plot-get-cmap"]], "aaanalysis.plot_set_legend": [[11, "aaanalysis-plot-set-legend"]], "aaanalysis.plot_settings": [[12, "aaanalysis-plot-settings"]], "Data loading": [[13, "data-loading"]], "Welcome to the AAanalysis documentation!": [[14, "welcome-to-the-aaanalysis-documentation"]], "Install": [[14, "install"]], "OVERVIEW": [[14, null]], "EXAMPLES": [[14, null]], "REFERENCES": [[14, null]], "Indices and tables": [[14, "indices-and-tables"]], "Citation": [[14, "citation"]], "Contributing": [[15, "contributing"]], "Introduction": [[15, "introduction"], [18, "introduction"]], "Vision": [[15, "vision"]], "Objectives": [[15, "objectives"]], "Non-goals": [[15, "non-goals"]], "Principles": [[15, "principles"]], "Bug Reports": [[15, "bug-reports"]], "Installation": [[15, "installation"]], "Latest Version": [[15, "latest-version"]], "Local Development Environment": [[15, "local-development-environment"]], "Fork and Clone": [[15, "fork-and-clone"]], "Install Dependencies": [[15, "install-dependencies"]], "Run Unit Tests": [[15, "run-unit-tests"]], "Pull Requests": [[15, "pull-requests"]], "Preview Changes": [[15, "preview-changes"]], "Documentation": [[15, "documentation"]], "Naming Conventions": [[15, "naming-conventions"]], "Class Templates": [[15, "class-templates"]], "Function and Method Naming": [[15, "function-and-method-naming"]], "Code Philosophy": [[15, "code-philosophy"]], "Documentation Style": [[15, "documentation-style"]], "Documentation Layers": [[15, "documentation-layers"]], "Building the Docs": [[15, "building-the-docs"]], "Workflow": [[18, "workflow"]], "References": [[20, "references"]], "Algorithms": [[20, "algorithms"]], "Datasets and Benchmarks": [[20, "datasets-and-benchmarks"]], "Use Cases": [[20, "use-cases"]], "Further Information": [[20, "further-information"]], "Tables": [[21, "tables"]], "Overview Table": [[21, "overview-table"]], "Protein Benchmark Datasets": [[21, "protein-benchmark-datasets"]], "Amino Acid Scale Datasets": [[21, "amino-acid-scale-datasets"]], "Usage Principles": [[22, "usage-principles"]], "AAontology: Classification of amino acid scales": [[23, "aaontology-classification-of-amino-acid-scales"]], "Data Flow and Enry Points": [[24, "data-flow-and-enry-points"]], "Identifying Physicochemical Signatures using CPP": [[25, "identifying-physicochemical-signatures-using-cpp"]], "Learning from unbalanced and small data": [[26, "learning-from-unbalanced-and-small-data"]], "What is PU learning?": [[26, "what-is-pu-learning"]], "Explainable AI at Sequence Level": [[27, "explainable-ai-at-sequence-level"]], "What is explainable AI?": [[27, "what-is-explainable-ai"]], "Tutorials": [[28, "tutorials"]], "Quick start": [[28, "quick-start"]], "Further Tutorials": [[28, "further-tutorials"]]}, "indexentries": {"aaclust (class in aaanalysis)": [[1, "aaanalysis.AAclust"]], "__init__() (aaanalysis.aaclust method)": [[1, "aaanalysis.AAclust.__init__"]], "center_labels_ (aaanalysis.aaclust attribute)": [[1, "aaanalysis.AAclust.center_labels_"]], "centers_ (aaanalysis.aaclust attribute)": [[1, "aaanalysis.AAclust.centers_"]], "cluster_naming() (aaanalysis.aaclust method)": [[1, "aaanalysis.AAclust.cluster_naming"]], "correlation() (aaanalysis.aaclust static method)": [[1, "aaanalysis.AAclust.correlation"]], "eval() (aaanalysis.aaclust method)": [[1, "aaanalysis.AAclust.eval"]], "fit() (aaanalysis.aaclust method)": [[1, "aaanalysis.AAclust.fit"]], "get_cluster_centers() (aaanalysis.aaclust static method)": [[1, "aaanalysis.AAclust.get_cluster_centers"]], "get_cluster_medoids() (aaanalysis.aaclust static method)": [[1, "aaanalysis.AAclust.get_cluster_medoids"]], "labels_ (aaanalysis.aaclust attribute)": [[1, "aaanalysis.AAclust.labels_"]], "medoid_ind_ (aaanalysis.aaclust attribute)": [[1, "aaanalysis.AAclust.medoid_ind_"]], "medoid_labels_ (aaanalysis.aaclust attribute)": [[1, "aaanalysis.AAclust.medoid_labels_"]], "medoids_ (aaanalysis.aaclust attribute)": [[1, "aaanalysis.AAclust.medoids_"]], "n_clusters (aaanalysis.aaclust attribute)": [[1, "aaanalysis.AAclust.n_clusters"]], "cpp (class in aaanalysis)": [[2, "aaanalysis.CPP"]], "__init__() (aaanalysis.cpp method)": [[2, "aaanalysis.CPP.__init__"]], "eval() (aaanalysis.cpp static method)": [[2, "aaanalysis.CPP.eval"]], "run() (aaanalysis.cpp method)": [[2, "aaanalysis.CPP.run"]], "cppplot (class in aaanalysis)": [[3, "aaanalysis.CPPPlot"]], "__init__() (aaanalysis.cppplot method)": [[3, "aaanalysis.CPPPlot.__init__"]], "heatmap() (aaanalysis.cppplot method)": [[3, "aaanalysis.CPPPlot.heatmap"]], "profile() (aaanalysis.cppplot method)": [[3, "aaanalysis.CPPPlot.profile"]], "update_seq_size() (aaanalysis.cppplot method)": [[3, "aaanalysis.CPPPlot.update_seq_size"]], "sequencefeature (class in aaanalysis)": [[4, "aaanalysis.SequenceFeature"]], "__init__() (aaanalysis.sequencefeature method)": [[4, "aaanalysis.SequenceFeature.__init__"]], "add_dif() (aaanalysis.sequencefeature static method)": [[4, "aaanalysis.SequenceFeature.add_dif"]], "add_feat_value() (aaanalysis.sequencefeature static method)": [[4, "aaanalysis.SequenceFeature.add_feat_value"]], "add_position() (aaanalysis.sequencefeature static method)": [[4, "aaanalysis.SequenceFeature.add_position"]], "feat_matrix() (aaanalysis.sequencefeature static method)": [[4, "aaanalysis.SequenceFeature.feat_matrix"]], "feat_names() (aaanalysis.sequencefeature static method)": [[4, "aaanalysis.SequenceFeature.feat_names"]], "get_df_parts() (aaanalysis.sequencefeature static method)": [[4, "aaanalysis.SequenceFeature.get_df_parts"]], "get_features() (aaanalysis.sequencefeature method)": [[4, "aaanalysis.SequenceFeature.get_features"]], "get_split_kws() (aaanalysis.sequencefeature static method)": [[4, "aaanalysis.SequenceFeature.get_split_kws"]], "__init__() (aaanalysis.dpulearn method)": [[5, "aaanalysis.dPULearn.__init__"]], "dpulearn (class in aaanalysis)": [[5, "aaanalysis.dPULearn"]], "eval() (aaanalysis.dpulearn method)": [[5, "aaanalysis.dPULearn.eval"]], "fit() (aaanalysis.dpulearn method)": [[5, "aaanalysis.dPULearn.fit"]], "labels_ (aaanalysis.dpulearn attribute)": [[5, "aaanalysis.dPULearn.labels_"]], "load_dataset() (in module aaanalysis)": [[6, "aaanalysis.load_dataset"]], "load_scales() (in module aaanalysis)": [[7, "aaanalysis.load_scales"]], "plot_gcfs() (in module aaanalysis)": [[8, "aaanalysis.plot_gcfs"]], "plot_get_cdict() (in module aaanalysis)": [[9, "aaanalysis.plot_get_cdict"]], "plot_get_cmap() (in module aaanalysis)": [[10, "aaanalysis.plot_get_cmap"]], "plot_set_legend() (in module aaanalysis)": [[11, "aaanalysis.plot_set_legend"]], "plot_settings() (in module aaanalysis)": [[12, "aaanalysis.plot_settings"]]}})
\ No newline at end of file
+Search.setIndex({"docnames": ["api", "generated/aaanalysis.AAclust", "generated/aaanalysis.CPP", "generated/aaanalysis.CPPPlot", "generated/aaanalysis.SequenceFeature", "generated/aaanalysis.dPULearn", "generated/aaanalysis.load_dataset", "generated/aaanalysis.load_scales", "generated/aaanalysis.plot_gcfs", "generated/aaanalysis.plot_get_cdict", "generated/aaanalysis.plot_get_cmap", "generated/aaanalysis.plot_set_legend", "generated/aaanalysis.plot_settings", "generated/tutorial2_data_loader", "index", "index/CONTRIBUTING_COPY", "index/badges", "index/citations", "index/introduction", "index/overview", "index/references", "index/tables", "index/usage_principles", "index/usage_principles/aaontology", "index/usage_principles/data_flow_entry_points", "index/usage_principles/feature_identification", "index/usage_principles/pu_learning", "index/usage_principles/xai", "tutorials"], "filenames": ["api.rst", "generated/aaanalysis.AAclust.rst", "generated/aaanalysis.CPP.rst", "generated/aaanalysis.CPPPlot.rst", "generated/aaanalysis.SequenceFeature.rst", "generated/aaanalysis.dPULearn.rst", "generated/aaanalysis.load_dataset.rst", "generated/aaanalysis.load_scales.rst", "generated/aaanalysis.plot_gcfs.rst", "generated/aaanalysis.plot_get_cdict.rst", "generated/aaanalysis.plot_get_cmap.rst", "generated/aaanalysis.plot_set_legend.rst", "generated/aaanalysis.plot_settings.rst", "generated/tutorial2_data_loader.rst", "index.rst", "index/CONTRIBUTING_COPY.rst", "index/badges.rst", "index/citations.rst", "index/introduction.rst", "index/overview.rst", "index/references.rst", "index/tables.rst", "index/usage_principles.rst", "index/usage_principles/aaontology.rst", "index/usage_principles/data_flow_entry_points.rst", "index/usage_principles/feature_identification.rst", "index/usage_principles/pu_learning.rst", "index/usage_principles/xai.rst", "tutorials.rst"], "titles": ["API", "aaanalysis.AAclust", "aaanalysis.CPP", "aaanalysis.CPPPlot", "aaanalysis.SequenceFeature", "aaanalysis.dPULearn", "aaanalysis.load_dataset", "aaanalysis.load_scales", "aaanalysis.plot_gcfs", "aaanalysis.plot_get_cdict", "aaanalysis.plot_get_cmap", "aaanalysis.plot_set_legend", "aaanalysis.plot_settings", "Data loading", "Welcome to the AAanalysis documentation!", "Contributing", "&lt;no title&gt;", "&lt;no title&gt;", "Introduction", "&lt;no title&gt;", "References", "Tables", "Usage Principles", "AAontology: Classification of amino acid scales", "Data Flow and Enry Points", "Identifying Physicochemical Signatures using CPP", "Learning from unbalanced and small data", "Explainable AI at Sequence Level", "Tutorials"], "terms": {"thi": [0, 1, 3, 12, 15], "page": [0, 14], "contain": [0, 2, 3, 5, 6, 13, 15, 21, 24, 26], "refer": [0, 1, 2, 4, 15, 21], "public": [0, 14, 15, 17], "object": [0, 1, 3, 4, 5], "function": [0, 3, 8, 10, 12, 14, 19], "aaanalysi": [0, 13, 15, 17, 18, 19, 21, 22, 24, 25], "see": [0, 3, 15, 21], "more": [0, 3, 15], "exampl": [0, 15, 26], "practic": 0, "usag": [0, 14, 15], "our": [0, 15], "tutori": [0, 6, 14, 15], "For": [0, 1, 4, 11, 13, 15, 26], "conveni": 0, "i": [0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 13, 14, 15, 18, 19, 23, 25], "common": [0, 15], "import": [0, 4, 5, 6, 11, 12, 13, 15, 22], "modul": [0, 1, 14], "follow": [0, 1, 2, 4, 5, 14, 15, 17, 18, 19, 22], "aa": [0, 2, 4, 5, 6, 11, 12, 13, 21, 22], "Then": 0, "you": [0, 14, 15, 17], "can": [0, 1, 4, 5, 7, 11, 14, 15, 18, 24, 26], "access": [0, 21], "all": [0, 1, 2, 3, 4, 6, 12, 15, 21], "method": [0, 1, 2, 3, 4, 5, 20], "via": [0, 15, 20], "alia": [0, 4], "load_dataset": [0, 4, 13, 21], "class": [1, 2, 3, 4, 5, 6, 26], "model": [1, 5, 15, 26], "none": [1, 2, 3, 4, 5, 6, 9, 10, 11], "model_kwarg": 1, "verbos": [1, 2, 3, 4, 5, 12], "fals": [1, 2, 3, 4, 5, 6, 7, 10, 11, 12], "sourc": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 15], "base": [1, 2, 3, 4, 5, 10, 14, 15, 18, 19, 20, 21, 25, 26], "A": [1, 4, 7, 11, 15, 18, 20], "k": [1, 14, 18, 19, 20], "optim": [1, 2, 3, 14, 18, 19, 20], "cluster": [1, 14, 18, 19, 20], "framework": [1, 14, 18, 19], "select": [1, 2, 3, 6, 7, 14, 15, 18, 19, 20], "redund": [1, 2, 14, 18, 19, 20], "reduc": [1, 5, 14, 18, 19, 20], "set": [1, 2, 3, 4, 5, 6, 8, 11, 12, 14, 15, 18, 19, 20, 21, 24], "numer": [1, 3, 4, 14, 18, 19], "scale": [1, 2, 3, 4, 7, 9, 10, 12, 14, 17, 18, 19, 20, 22, 24], "design": [1, 3, 15, 25], "primarili": [1, 5, 15], "amino": [1, 2, 3, 4, 6, 7, 13, 14, 17, 18, 19, 20, 22, 24, 26], "acid": [1, 2, 3, 4, 6, 7, 13, 14, 17, 18, 19, 20, 22, 24, 26], "versatil": 1, "enough": 1, "ani": [1, 15, 18], "indic": [1, 3, 4, 5, 13, 21], "It": [1, 18], "take": 1, "requir": 1, "pre": [1, 2, 15], "defin": [1, 4, 15], "number": [1, 2, 3, 4, 5, 6, 10, 11], "from": [1, 2, 3, 4, 5, 6, 7, 14, 15, 21, 22, 24], "scikit": [1, 15], "learn": [1, 5, 13, 14, 15, 17, 18, 19, 20, 21, 22], "http": [1, 15], "org": [1, 15], "stabl": 1, "html": [1, 15], "By": 1, "leverag": 1, "pearson": [1, 2], "correl": [1, 2], "similar": [1, 26], "measur": [1, 15], "valu": [1, 2, 3, 4, 15, 18, 21], "one": [1, 3], "repres": [1, 3, 18], "sampl": [1, 2, 3, 4, 5, 13, 21, 26], "term": 1, "medoid": 1, "each": [1, 2, 3, 4, 5, 15], "which": [1, 3, 4, 8, 18, 24, 26], "closest": 1, "": [1, 11, 15, 20], "center": [1, 10], "yield": 1, "paramet": [1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 12], "callabl": 1, "option": [1, 2, 3, 4, 5, 6, 7, 10, 12], "default": [1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 12, 13], "sklearn": 1, "kmean": 1, "The": [1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 15, 24, 25], "emploi": [1, 5], "given": [1, 3, 4, 6, 7], "n_cluster": 1, "dict": [1, 2, 3, 4, 5, 9, 10, 11], "dictionari": [1, 2, 3, 4, 9, 10, 11], "keyword": [1, 3, 5], "argument": [1, 3, 4, 5, 11], "pass": [1, 3, 5, 11, 15], "bool": [1, 2, 3, 4, 5, 6, 7, 10, 11, 12], "flag": 1, "enabl": [1, 2, 3, 4, 5, 12, 14, 15, 18, 19, 25], "disabl": [1, 6], "output": [1, 4, 5, 12], "obtain": [1, 4, 24], "type": [1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 15, 21], "int": [1, 2, 3, 4, 5, 6, 10, 11], "labels_": [1, 5], "label": [1, 2, 3, 4, 5, 11, 13, 15, 21, 26], "order": [1, 21], "featur": [1, 2, 3, 4, 5, 10, 14, 15, 18, 19, 24, 25, 26], "matrix": [1, 4, 5, 21], "arrai": [1, 2, 4, 5], "like": [1, 2, 4, 5, 15], "centers_": 1, "averag": [1, 4], "correspond": [1, 15], "center_labels_": 1, "medoids_": 1, "medoid_labels_": 1, "medoid_ind_": 1, "chosen": [1, 2, 4, 6], "within": [1, 2, 4], "origin": 1, "dataset": [1, 2, 6, 7, 13, 14, 15, 18, 19, 26, 27], "__init__": [1, 2, 3, 4, 5], "fit": [1, 5, 15], "x": [1, 3, 5, 6, 11, 12], "name": [1, 2, 3, 4, 5, 6, 7, 9, 10, 12, 13, 21], "on_cent": 1, "true": [1, 2, 3, 4, 6, 7, 11, 12], "min_th": 1, "0": [1, 2, 3, 4, 5, 11, 12, 13, 21, 26], "merge_metr": 1, "euclidean": [1, 5], "data": [1, 3, 5, 6, 14, 15, 21, 22, 28], "format": [1, 12], "us": [1, 2, 3, 5, 6, 10, 12, 14, 15, 17, 18, 21, 22, 24, 26], "determin": 1, "without": [1, 3, 15, 21], "specif": [1, 9, 15, 21, 24], "partit": 1, "maxim": 1, "beyond": 1, "threshold": [1, 2], "qualiti": 1, "either": [1, 4, 14], "minimum": [1, 4, 6], "member": 1, "min_cor": 1, "between": [1, 2, 3, 4, 10, 11], "its": [1, 15], "govern": 1, "undergo": 1, "three": [1, 4, 10, 21], "stage": 1, "1": [1, 2, 3, 4, 5, 11, 12, 13, 21, 26], "estim": 1, "lower": 1, "bound": 1, "2": [1, 2, 3, 4, 5, 11, 13, 21, 26], "refin": 1, "metric": [1, 5, 15], "3": [1, 4, 5, 11, 13, 15, 21], "merg": 1, "smaller": 1, "direct": 1, "final": 1, "reduct": 1, "shape": [1, 2, 3, 4, 5, 11], "n_sampl": [1, 2, 4, 5], "n_featur": [1, 2, 3, 4, 5], "where": [1, 4, 5], "list": [1, 3, 4, 10, 11, 21], "str": [1, 3, 4, 5, 6, 7, 9, 10, 11, 12], "If": [1, 2, 3, 4, 5, 6, 10, 12, 14, 15, 17, 26], "provid": [1, 2, 3, 5, 7, 10, 13, 14, 15, 19, 21, 26], "return": [1, 2, 3, 4, 5, 6, 7, 9, 10, 11], "appli": [1, 5, 10, 11, 12], "otherwis": [1, 3, 4, 5], "float": [1, 2, 3, 5, 10, 11, 12], "instead": 1, "names_medoid": 1, "attribut": 1, "attr": 1, "further": [1, 3, 15], "inform": [1, 2, 3, 4, 5, 24], "paper": 1, "todo": [1, 2], "add": [1, 2, 3, 4, 21], "link": [1, 2, 14, 15, 17, 20], "cluster_nam": 1, "name_unclassifi": 1, "unclassifi": [1, 7], "assign": [1, 3, 4, 5], "frequenc": 1, "renam": 1, "prioriti": 1, "most": [1, 2, 3, 5, 14, 18, 19], "frequent": 1, "alreadi": [1, 26], "doe": 1, "exist": [1, 15, 26], "cannot": 1, "classifi": [1, 3], "static": [1, 2, 4], "get_cluster_cent": 1, "comput": [1, 2, 3, 4, 15, 20], "center_label": 1, "associ": 1, "get_cluster_medoid": 1, "medoid_label": 1, "medoid_ind": 1, "index": [1, 6, 14, 15, 20], "x_test": 1, "x_ref": 1, "labels_test": 1, "labels_ref": 1, "n": [1, 2, 3, 4, 6, 15, 20], "posit": [1, 2, 3, 4, 5, 10, 13, 14, 18, 19, 21, 26], "except_unclassifi": 1, "test": [1, 2], "top": [1, 21], "consid": [1, 15], "strength": 1, "els": 1, "neg": [1, 4, 5, 10, 13, 21, 26], "exclud": 1, "list_top_center_name_corr": 1, "have": [1, 15, 21, 26], "strongest": 1, "eval": [1, 2, 5, 15], "df_scale": [2, 4, 24], "df_cat": [2, 3, 4, 24], "df_part": [2, 4, 24], "split_kw": [2, 4], "accept_gap": [2, 3, 4], "tool": [2, 15, 20], "creat": [2, 3, 4, 5, 15, 24], "filter": [2, 3, 6], "ar": [2, 3, 4, 5, 6, 7, 13, 15, 21, 24, 26, 27], "discrimin": [2, 3], "two": [2, 3, 14, 15, 18, 19, 20, 23, 24], "sequenc": [2, 3, 4, 5, 6, 7, 13, 14, 15, 18, 19, 20, 21, 22, 24, 25, 26], "panda": [2, 3, 4, 5, 15], "datafram": [2, 3, 4, 5, 6, 7, 15, 24], "load_categori": [2, 4], "categori": [2, 3, 4, 7, 9, 10, 11], "physicochem": [2, 4, 14, 18, 19, 20, 22, 24], "part": [2, 3, 4, 15, 24], "sequencefeatur": 2, "get_split_kw": [2, 4], "nest": [2, 4], "split_typ": [2, 4], "whether": [2, 3, 4, 7, 10, 11], "accept": [2, 3, 4], "miss": [2, 3, 4], "omit": [2, 3, 4], "print": [2, 3, 4, 13], "progress": [2, 3, 20], "about": [2, 3], "algorithm": [2, 3, 14, 15, 18, 19, 24, 25], "run": [2, 4], "perform": [2, 5], "step": [2, 3, 4, 6, 15, 18], "parametr": 2, "n_filter": 2, "100": [2, 6, 10], "tmd_len": [2, 3, 4], "20": [2, 3, 4, 15, 21], "jmd_n_len": [2, 3, 4], "10": [2, 3, 4, 10, 13], "jmd_c_len": [2, 3, 4], "ext_len": [2, 3, 4], "4": [2, 3, 4, 13], "start": [2, 3, 4, 15, 22, 24], "check_cat": 2, "n_pre_filt": 2, "pct_pre_filt": 2, "5": [2, 3, 4, 5, 11, 13], "max_std_test": 2, "max_overlap": 2, "max_cor": 2, "n_process": 2, "pipelin": [2, 15], "creation": 2, "aim": [2, 3, 15], "identifi": [2, 3, 5, 14, 18, 19, 20, 22, 26], "collect": 2, "non": [2, 4, 6, 21], "group": [2, 3, 4], "t": [2, 6], "u": [2, 14, 15], "p": [2, 20], "percentag": [2, 5, 10], "length": [2, 3, 4, 6], "tmd": [2, 3, 4, 13], "explan": [2, 3, 15], "first": [2, 3, 4, 10, 15], "terminu": [2, 3, 4], "jmd": [2, 3, 4], "c": [2, 3, 4, 14, 20], "extend": [2, 3, 4, 15, 26], "termin": [2, 3, 4], "should": [2, 3, 4, 5, 7, 15, 26], "longer": 2, "than": 2, "check": [2, 15], "remain": [2, 15], "after": 2, "maximum": [2, 4, 5, 6], "standard": [2, 26], "deviat": 2, "overlap": 2, "cpu": 2, "multiprocess": 2, "automat": [2, 3, 5, 15], "df_feat": [2, 3, 4, 24], "uniqu": [2, 3], "statist": [2, 3], "n_feature_inform": [2, 3], "eleven": 2, "column": [2, 3, 4, 5, 6, 11, 15], "includ": [2, 4, 7, 10, 11, 15], "id": [2, 4], "result": 2, "rank": 2, "11": [2, 3, 11, 13], "split": [2, 4, 24], "subcategori": [2, 3], "sub": 2, "scale_nam": [2, 3], "abs_auc": [2, 3], "absolut": 2, "adjust": [2, 3, 12], "auc": 2, "abs_mean_dif": 2, "mean": [2, 3], "differ": [2, 3, 4, 11, 24], "std_test": [2, 3], "std_ref": 2, "p_val": 2, "mann_whitnei": 2, "ttest_indep": 2, "p_val_fdr_bh": 2, "benjamini": 2, "hochberg": 2, "fdr": 2, "correct": 2, "get": [2, 4, 8, 22], "evalu": [2, 7, 15, 21], "condit": [3, 4], "jmd_m_len": [3, 4], "profil": [3, 9, 10, 14, 18, 19, 25], "y": [3, 11, 12], "val_col": 3, "mean_dif": 3, "val_typ": 3, "count": [3, 7], "normal": [3, 11, 21], "figsiz": 3, "7": [3, 4, 5, 12, 13], "titl": [3, 11], "title_kw": 3, "dict_color": [3, 9, 10, 11], "edge_color": 3, "bar_width": 3, "75": 3, "add_jmd_tmd": 3, "jmd_n_seq": 3, "tmd_seq": 3, "jmd_c_seq": 3, "tmd_color": 3, "mediumspringgreen": 3, "jmd_color": 3, "blue": [3, 11], "tmd_seq_color": 3, "black": [3, 15], "jmd_seq_color": 3, "white": 3, "seq_siz": 3, "tmd_jmd_fontsiz": 3, "xtick_siz": 3, "xtick_width": 3, "xtick_length": 3, "xticks_po": 3, "ytick_siz": 3, "ytick_width": 3, "ytick_length": 3, "ylim": 3, "highlight_tmd_area": 3, "highlight_alpha": 3, "15": [3, 4], "grid": [3, 12], "grid_axi": [3, 12], "both": [3, 12], "add_legend_cat": 3, "legend_kw": 3, "shap_plot": 3, "kwarg": [3, 4, 11], "plot": [3, 9, 10, 11, 12, 14, 15], "instanc": 3, "avail": [3, 14, 17, 20], "specifi": [3, 4, 5, 9, 10, 12, 15], "check_value_typ": 3, "tupl": [3, 10], "size": [3, 4, 8, 10, 11, 12], "custom": [3, 11, 12], "appear": [3, 12], "map": [3, 4, 10, 11], "color": [3, 9, 10, 11], "edg": [3, 11, 15], "bar": [3, 9, 10], "width": [3, 11], "line": [3, 11], "annot": 3, "font": [3, 8, 11, 12], "tick": [3, 12], "axi": [3, 12], "limit": 3, "highlight": 3, "area": 3, "alpha": 3, "ad": 3, "drawn": 3, "legend": [3, 11], "shap": [3, 10, 15], "shaplei": 3, "addit": [3, 4, 5, 11, 12, 13, 21], "gener": [3, 4, 10, 12, 15, 18, 20, 26], "other": [3, 7, 15], "intern": 3, "librari": [3, 12, 15], "ax": [3, 11], "matplotlib": [3, 11, 12, 15], "heatmap": [3, 9, 10], "8": [3, 4, 5, 13, 15], "vmin": 3, "vmax": 3, "grid_on": 3, "cmap": [3, 9, 10], "rdbu_r": 3, "cmap_n_color": 3, "cbar_kw": 3, "facecolor_dark": [3, 10], "add_importance_map": 3, "cbar_pct": 3, "featuremap": 3, "versu": 3, "wrapper": [3, 14, 15, 18, 19], "seaborn": [3, 10, 12, 15], "level": [3, 6, 13, 14, 15, 19, 21, 22, 23], "e": [3, 4, 9, 10, 12, 13, 14, 15, 18, 19, 21, 26], "g": [3, 4, 9, 10, 12, 13, 14, 15, 18, 19, 21, 26], "protein": [3, 4, 6, 14, 15, 18, 19, 20, 24, 25, 26], "shown": 3, "feat_impact": 3, "displai": 3, "sum": 3, "std": 3, "aggreg": 3, "positions_onli": 3, "across": [3, 15], "recommend": [3, 5, 15], "when": [3, 5], "emphas": [3, 15], "fewer": 3, "value_typ": 3, "height": 3, "figur": 3, "inch": 3, "pyplot": [3, 11], "anchor": [3, 11], "colormap": 3, "infer": [3, 15], "seismic": 3, "space": [3, 5, 10, 11], "impact": 3, "discret": 3, "diverg": 3, "sequenti": 3, "kei": [3, 15], "colorbar": 3, "under": [3, 15], "depicet": 3, "depict": 3, "jmd_n": [3, 4, 13], "jmd_c": [3, 4, 13], "point": [3, 11, 22], "set_xticklabel": 3, "widht": 3, "tick_param": 3, "classif": [3, 7, 14, 19, 21, 22, 26], "pcolormesh": 3, "effect": [3, 15, 26], "onli": [3, 6, 7, 13, 15, 21, 26], "align": [3, 11], "applic": 3, "document": [3, 21], "detail": [3, 6, 11, 14, 15, 17], "cpp": [3, 4, 10, 14, 17, 18, 19, 22, 24], "code": [3, 10], "update_seq_s": 3, "retriev": [4, 9, 10], "compon": [4, 5, 21], "continu": 4, "subset": [4, 21], "domain": [4, 6, 13, 21], "transmembran": 4, "membran": [4, 21], "principl": [4, 14], "distinct": [4, 14, 15, 18, 19], "segment": 4, "pattern": 4, "properti": [4, 21], "express": 4, "present": 4, "realiz": 4, "over": 4, "valid": [4, 15], "tmd_e": 4, "tmd_n": 4, "tmd_c": 4, "ext_c": 4, "ext_n": 4, "tmd_jmd": 4, "jmd_n_tmd_n": 4, "tmd_c_jmd_c": 4, "ext_n_tmd_n": 4, "tmd_c_ext_c": 4, "get_df_part": 4, "df_seq": [4, 5, 6, 24], "list_part": 4, "all_part": 4, "datafran": 4, "compris": 4, "tmd_start": [4, 13], "tmd_stop": [4, 13], "string": [4, 10], "len": 4, "must": 4, "lenght": 4, "resp": 4, "extra": 4, "possibl": [4, 26], "found": [4, 7, 15], "sf": 4, "dom_gsec": [4, 13, 21], "n_split_min": 4, "n_split_max": 4, "steps_pattern": 4, "n_min": 4, "n_max": 4, "len_max": 4, "steps_periodicpattern": 4, "periodicpattern": 4, "greater": 4, "greatest": 4, "whole": [4, 6], "specfii": 4, "smallest": 4, "integ": 4, "6": [4, 13], "vari": 4, "paramt": 4, "argumetn": 4, "get_featur": 4, "load_scal": [4, 14, 19, 21], "combin": [4, 15], "form": 4, "feat_matrix": 4, "n_job": 4, "return_label": 4, "pd": [4, 5, 15], "seri": 4, "job": 4, "parallel": 4, "spars": 4, "feat_nam": 4, "convert": 4, "depend": 4, "last": 4, "step1": 4, "step2": 4, "add_feat_valu": 4, "dict_scal": 4, "convent": 4, "letter": 4, "feature_valu": 4, "n_part": 4, "ha": [4, 15], "structur": [4, 20], "th": 4, "n_split": 4, "p1": 4, "p2": 4, "pn": 4, "end": 4, "odd": 4, "even": 4, "give": 4, "add_dif": 4, "sample_nam": 4, "ref_group": 4, "add_posit": 4, "part_split": 4, "feat_posit": 4, "total": [4, 5], "n_compon": 5, "pca_kwarg": 5, "determinist": [5, 14, 18, 19], "unlabel": [5, 13, 14, 18, 19, 21, 26], "offer": [5, 15], "approach": [5, 26], "pu": [5, 13, 14, 18, 19, 21], "princip": [5, 21], "analysi": [5, 6, 7, 14, 15, 18, 19], "pca": 5, "dimension": [5, 20], "pc": [5, 21], "iter": 5, "reliabl": [5, 15], "These": [5, 15, 26], "those": 5, "distant": 5, "altern": [5, 26], "also": [5, 15, 21], "distanc": 5, "manhattan": 5, "cosin": 5, "80": 5, "cover": 5, "varianc": 5, "identif": [5, 20], "datapoint": 5, "inspir": [5, 15], "techniqu": [5, 26], "an": [5, 6, 13, 14, 15, 17, 20, 21], "theoret": 5, "high": [5, 20], "n_neg": 5, "label_po": 5, "name_neg": 5, "rel_neg": 5, "col_class": 5, "newli": 5, "updat": [5, 15], "new": [5, 15], "store": 5, "Will": 5, "dure": 5, "initi": 5, "small": [5, 14, 15, 18, 19, 22, 27], "datafor": 5, "conta": 5, "po": 5, "unl": 5, "numpi": [5, 15], "np": 5, "atgc": 5, "gcta": 5, "actg": 5, "tacg": 5, "mode": 5, "modifi": [5, 6, 12, 24], "dpul": 5, "info": 6, "random": 6, "non_canonical_aa": 6, "remov": [6, 12], "min_len": 6, "max_len": 6, "aa_window_s": 6, "9": [6, 13, 15], "load": [6, 7, 14, 15, 19, 28], "benchmark": [6, 13, 14, 19], "categor": [6, 13], "dom": [6, 13, 21], "seq": [6, 13, 21], "overview": [6, 13, 15], "tabl": [6, 15], "breimann23a": [6, 20, 21], "per": 6, "liter": 6, "keep": 6, "gap": [6, 10], "canon": 6, "don": 6, "replac": 6, "symbol": 6, "window": 6, "aa_": 6, "guid": [6, 15], "seq_amylo": [6, 13, 21], "just_aaindex": 7, "unclassified_in": 7, "aaontologi": [7, 14, 17, 19, 20, 21, 22], "thorough": 7, "residu": [7, 20, 21], "scales_raw": [7, 21], "scales_cat": 7, "scales_pc": [7, 21], "top60": [7, 21], "top60_ev": [7, 21], "relev": 7, "aaindex": [7, 20], "current": 8, "ut": 8, "plot_set": 8, "dict_scale_cat": [9, 10], "cppplot": [9, 10, 15], "respect": [9, 10, 14, 15, 17], "n_color": 10, "color_po": 10, "color_neg": 10, "color_cent": 10, "input": [10, 15, 24], "hex": 10, "pct_gap": 10, "pct_center": 10, "palett": 10, "feat": 10, "ggplot": 10, "datagroup": 10, "dark": 10, "face": 10, "rgb": 10, "hl": 10, "husl": 10, "xkcd": 10, "interpret": [10, 14, 15, 17, 18, 19, 20, 25], "latter": 10, "rang": 10, "sn": 10, "color_palett": 10, "light_palett": 10, "lighter": 10, "handl": 11, "list_cat": 11, "ncol": 11, "fontsiz": 11, "weight": [11, 20], "lw": 11, "edgecolor": 11, "return_handl": 11, "loc": 11, "upper": 11, "left": 11, "labelspac": 11, "columnspac": 11, "fontsize_legend": 11, "title_align_left": 11, "fontsize_weight": 11, "customiz": 11, "attach": 11, "item": 11, "coordin": 11, "text": [11, 12], "locat": [11, 21], "vertic": 11, "horizont": 11, "marker": 11, "directli": [11, 15], "finer": 11, "control": 11, "how": 11, "line2d": 11, "cat1": 11, "red": 11, "cat2": 11, "o": 11, "fig_format": 12, "pdf": 12, "font_scal": 12, "arial": 12, "change_s": 12, "weight_bold": 12, "adjust_el": 12, "short_tick": 12, "no_tick": 12, "no_ticks_i": 12, "short_ticks_i": 12, "no_ticks_x": 12, "short_ticks_x": 12, "configur": 12, "visual": [12, 15], "variou": [12, 15, 21, 24], "file": [12, 15], "save": 12, "make": [12, 15], "visibl": 12, "choos": 12, "san": 12, "serif": 12, "verdana": 12, "helvetica": 12, "dejavu": 12, "element": 12, "bold": 12, "layout": 12, "short": 12, "mark": 12, "global": 12, "df_info": 13, "iloc": 13, "predictor": [13, 21], "aa_caspase3": [13, 21], "233": [13, 21], "185605": [13, 21], "705": [13, 21], "184900": [13, 21], "prosper": [13, 20, 21], "aa_furin": [13, 21], "71": [13, 21], "59003": [13, 21], "163": [13, 21], "58840": [13, 21], "aa_ldr": [13, 21], "342": [13, 21], "118248": [13, 21], "35469": [13, 21], "82779": [13, 21], "idp": [13, 20, 21], "seq2seq": [13, 20, 21], "aa_mmp2": [13, 21], "573": [13, 21], "312976": [13, 21], "2416": [13, 21], "310560": [13, 21], "aa_rnabind": [13, 21], "221": [13, 21], "55001": [13, 21], "6492": [13, 21], "48509": [13, 21], "gmksvm": [13, 21], "ru": [13, 21], "aa_sa": [13, 21], "101082": [13, 21], "84523": [13, 21], "1414": [13, 21], "8484": [13, 21], "511": [13, 21], "903": [13, 21], "rerf": [13, 20, 21], "pred": [13, 20, 21], "seq_capsid": [13, 21], "7935": [13, 21], "3364680": [13, 21], "3864": [13, 21], "4071": [13, 21], "viralpro": [13, 20, 21], "seq_disulfid": [13, 21], "2547": [13, 21], "614470": [13, 21], "897": [13, 21], "1650": [13, 21], "dipro": [13, 21], "seq_loc": [13, 21], "1835": [13, 21], "732398": [13, 21], "1045": [13, 21], "790": [13, 21], "nan": [13, 21], "seq_solubl": [13, 21], "17408": [13, 21], "4432269": [13, 21], "8704": [13, 21], "solpro": [13, 20, 21], "seq_tail": [13, 21], "6668": [13, 21], "2671690": [13, 21], "2574": [13, 21], "4094": [13, 21], "12": 13, "126": [13, 21], "92964": [13, 21], "63": [13, 21], "13": 13, "dom_gsec_pu": [13, 21], "694": [13, 21], "494524": [13, 21], "suffix": [13, 15], "df_seq1": 13, "df_seq2": 13, "df_seq3": 13, "head": 13, "entri": 13, "p05067": 13, "mlpglallllaawtaralevptdgnagllaepqiamfcgrlnmhmn": 13, "701": 13, "723": 13, "faedvgsnkg": 13, "aiiglmvggvviatvivitlvml": 13, "kkkqytsihh": 13, "p14925": 13, "magrarsgllllllgllalqssclafrsplsvfkrfkettrsfsn": 13, "868": 13, "890": 13, "klstepgsgv": 13, "svvlittllvipvlvllaivmfi": 13, "rwkksrafgd": 13, "p70180": 13, "mrslllftfsacvllarvllaggassgagdtrpgsrrrarealaaq": 13, "477": 13, "499": 13, "pckssgglee": 13, "savtgivvgallgagllmafyff": 13, "rkkyriti": 13, "q03157": 13, "mgptspaargqgrrwrppplplllplsllllraqlavgnlavgsp": 13, "585": 13, "607": 13, "apsgtgvsr": 13, "alsgllimgagggslivlslll": 13, "rkkkpygti": 13, "q06481": 13, "maatgtaaaaatgrllllllvgltapalalagyiealaanagtgfa": 13, "716": 13, "lredfslsss": 13, "aligllviavaiatvivislvml": 13, "rkrqygtish": 13, "some": [13, 21], "version": [13, 21], "dataset_name_pu": 13, "python": [14, 15, 18, 19], "predict": [14, 15, 18, 19, 20, 21, 25, 26], "aaclust": [14, 17, 18, 19, 20], "compar": [14, 18, 19, 24, 25], "engin": [14, 15, 18, 19, 25], "dpulearn": [14, 17, 18, 19], "train": [14, 15, 18, 19, 26], "unbalanc": [14, 15, 18, 19, 22, 27], "moreov": [14, 19], "load_data": [14, 19], "depth": [14, 19], "pypi": 14, "conda": [14, 15], "forg": 14, "pip": [14, 15], "introduct": 14, "contribut": 14, "api": 14, "explain": [14, 15, 20, 22], "ai": [14, 15, 20, 22], "perturb": [14, 26], "util": [14, 15], "search": 14, "your": [14, 15, 17], "work": [14, 17], "pleas": [14, 15, 17], "cite": [14, 17], "breimann23b": [14, 17, 20, 21], "_": [14, 17], "breimann": [14, 17, 20], "kamp": [14, 17], "steiner": [14, 17], "frishman": [14, 17], "2023": [14, 17], "ontologi": [14, 17, 20], "machin": [14, 15, 17, 20, 26], "biorxiv": [14, 17, 20], "welcom": 15, "thank": 15, "we": 15, "open": 15, "project": [15, 21], "focus": 15, "involv": 15, "invalu": 15, "made": 15, "wai": 15, "suggest": 15, "github": 15, "issu": 15, "tracker": 15, "submit": 15, "improv": [15, 20], "particip": 15, "discuss": 15, "newcom": 15, "tackl": 15, "good": 15, "email": 15, "stephanbreimann": 15, "gmail": 15, "com": 15, "question": 15, "establish": 15, "comprehens": 15, "toolkit": [15, 24], "robust": 15, "life": [15, 26, 27], "scienc": [15, 26, 27], "integr": [15, 20], "seamlessli": 15, "flexibl": 15, "interoper": 15, "packag": 15, "biopython": 15, "reimplement": 15, "solut": 15, "ignor": 15, "biolog": [15, 18, 26], "context": 15, "relianc": 15, "opaqu": 15, "box": 15, "empir": 15, "insight": 15, "cut": 15, "fair": 15, "account": 15, "transpar": 15, "re": [15, 20], "commit": 15, "divers": 15, "aspect": 15, "causal": 15, "minim": 15, "reproduc": 15, "mre": 15, "least": 15, "amount": 15, "demonstr": 15, "self": 15, "ensur": 15, "necessari": 15, "confirm": 15, "replic": 15, "guidelin": 15, "here": [15, 21], "To": [15, 22], "git": 15, "breimanntool": 15, "master": 15, "repositori": 15, "your_usernam": 15, "navig": 15, "folder": 15, "up": 15, "cd": 15, "isol": 15, "aanalysi": 15, "activ": 15, "poetri": 15, "pytest": 15, "hypothesi": 15, "execut": 15, "case": 15, "directori": 15, "substanti": 15, "minor": 15, "typo": 15, "concis": 15, "descript": [15, 21], "clear": 15, "branch": 15, "fix": 15, "readm": 15, "date": 15, "readthedoc": 15, "crucial": 15, "modif": 15, "thei": 15, "render": 15, "correctli": 15, "strive": 15, "consist": [15, 18], "interfac": 15, "well": 15, "organ": 15, "codebas": 15, "standalon": 15, "focu": 15, "special": 15, "task": [15, 26], "carri": 15, "out": 15, "complet": 15, "process": 15, "fulfil": 15, "purpos": 15, "being": 15, "implement": 15, "inherit": 15, "supplementari": 15, "accordingli": 15, "support": 15, "semi": 15, "strictli": 15, "adher": 15, "aforement": 15, "primari": 15, "_util": 15, "_utils_const": 15, "py": 15, "modular": 15, "easili": 15, "therefor": 15, "flat": 15, "hierarchi": 15, "program": 15, "outlin": 15, "softwar": 15, "user": 15, "friendli": 15, "hint": 15, "enhanc": 15, "propos": 15, "pep": 15, "484": 15, "book": 15, "error": 15, "messag": 15, "docstr": 15, "257": 15, "markup": 15, "languag": 15, "restructuredtext": 15, "rst": 15, "primer": 15, "cheat": 15, "sheet": 15, "restructuretext": 15, "cheatsheet": 15, "sphinx": 15, "autodoc": 15, "inclus": 15, "napoleon": 15, "extens": 15, "conf": 15, "four": 15, "bird": 15, "ey": 15, "view": [15, 26], "background": 15, "reflect": 15, "close": 15, "essenti": 15, "medium": 15, "tabular": 15, "critic": 15, "go": 15, "_build": 15, "browser": 15, "citat": 17, "wa": 18, "develop": 18, "typic": 18, "et": 20, "al": 20, "2023a": 20, "2023b": 20, "breimann23c": [20, 21], "2023c": 20, "chart": 20, "\u03b3": 20, "secretas": [20, 21], "substrat": [20, 21], "cheng06": [20, 21], "cheng": 20, "2006": 20, "larg": 20, "disulphid": 20, "bridg": [20, 21], "kernel": 20, "recurs": 20, "neural": 20, "network": 20, "graph": 20, "match": 20, "struct": 20, "funct": 20, "kawashima08": [20, 21], "kawashima": 20, "2008": 20, "aid": 20, "databas": 20, "report": 20, "nucleic": 20, "magnan09": [20, 21], "magnan": 20, "randal": 20, "baldi": 20, "2009": 20, "accur": 20, "solubl": [20, 21], "bioinformat": 20, "galiez16": [20, 21], "galiez": 20, "2016": 20, "viral": 20, "capsid": [20, 21], "tail": [20, 21], "song18": [20, 21], "song": 20, "2018": 20, "throughput": 20, "cleavag": [20, 21], "site": [20, 21], "90": 20, "proteas": 20, "accuraci": 20, "shen19": [20, 21], "shen": 20, "2019": 20, "subcellular": [20, 21], "local": 20, "evolutionari": 20, "chou": 20, "pseaac": 20, "j": 20, "theor": 20, "biol": 20, "tang20": [20, 21], "tang": 20, "2020": 20, "intrins": [20, 21], "disord": [20, 21], "region": [20, 21], "teng21": [20, 21], "teng": 20, "2021": 20, "amyloidogen": [20, 21], "pseudo": 20, "composit": 20, "tripeptid": 20, "bmc": 20, "yang21": [20, 21], "yang": 20, "granular": 20, "multipl": 20, "rna": [20, 21], "bind": [20, 21], "appl": 20, "chronolog": 21, "histori": 21, "t1_overview_benchmark": 21, "t2_overview_scal": 21, "begin": 21, "append": 21, "_pu": 21, "caspas": 21, "adjac": 21, "furin": 21, "long": 21, "ldr": 21, "metallopeptidas": 21, "mmp2": 21, "rbp60": 21, "solvent": 21, "sa": 21, "expos": 21, "buri": 21, "amyloidognen": 21, "capdsid": 21, "disulfid": 21, "ss": 21, "bond": 21, "cytoplasm": 21, "v": 21, "plasma": 21, "insolubl": 21, "gamma": 21, "unknown": 21, "statu": 21, "min": 21, "max": 21, "586": 21, "raw": 21, "scales_classif": 21, "compress": 21, "60": 21, "flow": 22, "enri": 22, "signatur": 22, "introduc": 23, "togeth": 24, "central": 25, "platform": 25, "novel": 25, "everywher": [26, 27], "In": 26, "binari": 26, "setup": 26, "augment": 26, "smote": 26, "artifici": 26, "Such": 26, "veri": 26, "popular": 26, "deep": 26, "imag": 26, "recognit": 26, "feasibl": 26, "becaus": 26, "slight": 26, "mutat": 26, "alter": 26, "dramat": 26, "often": 26, "great": 26, "quantiti": 26, "besid": 26, "distinguish": 26, "subfield": 26}, "objects": {"aaanalysis": [[1, 0, 1, "", "AAclust"], [2, 0, 1, "", "CPP"], [3, 0, 1, "", "CPPPlot"], [4, 0, 1, "", "SequenceFeature"], [5, 0, 1, "", "dPULearn"], [6, 3, 1, "", "load_dataset"], [7, 3, 1, "", "load_scales"], [8, 3, 1, "", "plot_gcfs"], [9, 3, 1, "", "plot_get_cdict"], [10, 3, 1, "", "plot_get_cmap"], [11, 3, 1, "", "plot_set_legend"], [12, 3, 1, "", "plot_settings"]], "aaanalysis.AAclust": [[1, 1, 1, "", "__init__"], [1, 2, 1, "", "center_labels_"], [1, 2, 1, "", "centers_"], [1, 1, 1, "", "cluster_naming"], [1, 1, 1, "", "correlation"], [1, 1, 1, "", "eval"], [1, 1, 1, "", "fit"], [1, 1, 1, "", "get_cluster_centers"], [1, 1, 1, "", "get_cluster_medoids"], [1, 2, 1, "", "labels_"], [1, 2, 1, "", "medoid_ind_"], [1, 2, 1, "", "medoid_labels_"], [1, 2, 1, "", "medoids_"], [1, 2, 1, "", "n_clusters"]], "aaanalysis.CPP": [[2, 1, 1, "", "__init__"], [2, 1, 1, "", "eval"], [2, 1, 1, "", "run"]], "aaanalysis.CPPPlot": [[3, 1, 1, "", "__init__"], [3, 1, 1, "", "heatmap"], [3, 1, 1, "", "profile"], [3, 1, 1, "", "update_seq_size"]], "aaanalysis.SequenceFeature": [[4, 1, 1, "", "__init__"], [4, 1, 1, "", "add_dif"], [4, 1, 1, "", "add_feat_value"], [4, 1, 1, "", "add_position"], [4, 1, 1, "", "feat_matrix"], [4, 1, 1, "", "feat_names"], [4, 1, 1, "", "get_df_parts"], [4, 1, 1, "", "get_features"], [4, 1, 1, "", "get_split_kws"]], "aaanalysis.dPULearn": [[5, 1, 1, "", "__init__"], [5, 1, 1, "", "eval"], [5, 1, 1, "", "fit"], [5, 2, 1, "", "labels_"]]}, "objtypes": {"0": "py:class", "1": "py:method", "2": "py:attribute", "3": "py:function"}, "objnames": {"0": ["py", "class", "Python class"], "1": ["py", "method", "Python method"], "2": ["py", "attribute", "Python attribute"], "3": ["py", "function", "Python function"]}, "titleterms": {"api": 0, "data": [0, 13, 24, 26], "featur": 0, "engin": 0, "pu": [0, 26], "learn": [0, 26], "explain": [0, 27], "ai": [0, 27], "perturb": 0, "plot": 0, "util": 0, "aaanalysi": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14], "aaclust": 1, "note": [1, 2, 4, 5, 12], "cpp": [2, 25], "cppplot": 3, "exampl": [3, 4, 5, 6, 11, 12, 14], "sequencefeatur": 4, "dpulearn": 5, "load_dataset": 6, "load_scal": 7, "plot_gcf": 8, "plot_get_cdict": 9, "plot_get_cmap": 10, "plot_set_legend": 11, "plot_set": 12, "load": 13, "welcom": 14, "document": [14, 15], "instal": [14, 15], "overview": [14, 21], "refer": [14, 20], "indic": 14, "tabl": [14, 21], "citat": 14, "contribut": 15, "introduct": [15, 18], "vision": 15, "object": 15, "non": 15, "goal": 15, "principl": [15, 22], "bug": 15, "report": 15, "latest": 15, "version": 15, "local": 15, "develop": 15, "environ": 15, "fork": 15, "clone": 15, "depend": 15, "run": 15, "unit": 15, "test": 15, "pull": 15, "request": 15, "preview": 15, "chang": 15, "name": 15, "convent": 15, "class": 15, "templat": 15, "function": 15, "method": 15, "code": 15, "philosophi": 15, "style": 15, "layer": 15, "build": 15, "doc": 15, "workflow": 18, "algorithm": 20, "dataset": [20, 21], "benchmark": [20, 21], "us": [20, 25], "case": 20, "further": [20, 28], "inform": 20, "protein": 21, "amino": [21, 23], "acid": [21, 23], "scale": [21, 23], "usag": 22, "aaontologi": 23, "classif": 23, "flow": 24, "enri": 24, "point": 24, "identifi": 25, "physicochem": 25, "signatur": 25, "from": 26, "unbalanc": 26, "small": 26, "what": [26, 27], "i": [26, 27], "sequenc": 27, "level": 27, "tutori": 28, "quick": 28, "start": 28}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 8, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.intersphinx": 1, "nbsphinx": 4, "sphinx": 57}, "alltitles": {"API": [[0, "api"]], "Data": [[0, "data"]], "Feature Engineering": [[0, "feature-engineering"]], "PU Learning": [[0, "pu-learning"]], "Explainable AI": [[0, "explainable-ai"]], "Perturbation": [[0, "perturbation"]], "Plot Utilities": [[0, "plot-utilities"]], "aaanalysis.AAclust": [[1, "aaanalysis-aaclust"]], "Notes": [[1, null], [2, null], [2, null], [4, null], [4, null], [4, null], [4, null], [4, null], [5, null], [5, null], [12, null]], "aaanalysis.CPP": [[2, "aaanalysis-cpp"]], "aaanalysis.CPPPlot": [[3, "aaanalysis-cppplot"]], "Examples": [[3, null], [4, null], [4, null], [5, null], [6, null], [11, null], [12, null]], "aaanalysis.SequenceFeature": [[4, "aaanalysis-sequencefeature"]], "aaanalysis.dPULearn": [[5, "aaanalysis-dpulearn"]], "aaanalysis.load_dataset": [[6, "aaanalysis-load-dataset"]], "aaanalysis.load_scales": [[7, "aaanalysis-load-scales"]], "aaanalysis.plot_gcfs": [[8, "aaanalysis-plot-gcfs"]], "aaanalysis.plot_get_cdict": [[9, "aaanalysis-plot-get-cdict"]], "aaanalysis.plot_get_cmap": [[10, "aaanalysis-plot-get-cmap"]], "aaanalysis.plot_set_legend": [[11, "aaanalysis-plot-set-legend"]], "aaanalysis.plot_settings": [[12, "aaanalysis-plot-settings"]], "Data loading": [[13, "data-loading"]], "Welcome to the AAanalysis documentation!": [[14, "welcome-to-the-aaanalysis-documentation"]], "Install": [[14, "install"]], "OVERVIEW": [[14, null]], "EXAMPLES": [[14, null]], "REFERENCES": [[14, null]], "Indices and tables": [[14, "indices-and-tables"]], "Citation": [[14, "citation"]], "Contributing": [[15, "contributing"]], "Introduction": [[15, "introduction"], [18, "introduction"]], "Vision": [[15, "vision"]], "Objectives": [[15, "objectives"]], "Non-goals": [[15, "non-goals"]], "Principles": [[15, "principles"]], "Bug Reports": [[15, "bug-reports"]], "Installation": [[15, "installation"]], "Latest Version": [[15, "latest-version"]], "Local Development Environment": [[15, "local-development-environment"]], "Fork and Clone": [[15, "fork-and-clone"]], "Install Dependencies": [[15, "install-dependencies"]], "Run Unit Tests": [[15, "run-unit-tests"]], "Pull Requests": [[15, "pull-requests"]], "Preview Changes": [[15, "preview-changes"]], "Documentation": [[15, "documentation"]], "Naming Conventions": [[15, "naming-conventions"]], "Class Templates": [[15, "class-templates"]], "Function and Method Naming": [[15, "function-and-method-naming"]], "Code Philosophy": [[15, "code-philosophy"]], "Documentation Style": [[15, "documentation-style"]], "Documentation Layers": [[15, "documentation-layers"]], "Building the Docs": [[15, "building-the-docs"]], "Workflow": [[18, "workflow"]], "References": [[20, "references"]], "Algorithms": [[20, "algorithms"]], "Datasets and Benchmarks": [[20, "datasets-and-benchmarks"]], "Use Cases": [[20, "use-cases"]], "Further Information": [[20, "further-information"]], "Tables": [[21, "tables"]], "Overview Table": [[21, "overview-table"]], "Protein Benchmark Datasets": [[21, "protein-benchmark-datasets"]], "Amino Acid Scale Datasets": [[21, "amino-acid-scale-datasets"]], "Usage Principles": [[22, "usage-principles"]], "AAontology: Classification of amino acid scales": [[23, "aaontology-classification-of-amino-acid-scales"]], "Data Flow and Enry Points": [[24, "data-flow-and-enry-points"]], "Identifying Physicochemical Signatures using CPP": [[25, "identifying-physicochemical-signatures-using-cpp"]], "Learning from unbalanced and small data": [[26, "learning-from-unbalanced-and-small-data"]], "What is PU learning?": [[26, "what-is-pu-learning"]], "Explainable AI at Sequence Level": [[27, "explainable-ai-at-sequence-level"]], "What is explainable AI?": [[27, "what-is-explainable-ai"]], "Tutorials": [[28, "tutorials"]], "Quick start": [[28, "quick-start"]], "Further Tutorials": [[28, "further-tutorials"]]}, "indexentries": {"aaclust (class in aaanalysis)": [[1, "aaanalysis.AAclust"]], "__init__() (aaanalysis.aaclust method)": [[1, "aaanalysis.AAclust.__init__"]], "center_labels_ (aaanalysis.aaclust attribute)": [[1, "aaanalysis.AAclust.center_labels_"]], "centers_ (aaanalysis.aaclust attribute)": [[1, "aaanalysis.AAclust.centers_"]], "cluster_naming() (aaanalysis.aaclust method)": [[1, "aaanalysis.AAclust.cluster_naming"]], "correlation() (aaanalysis.aaclust static method)": [[1, "aaanalysis.AAclust.correlation"]], "eval() (aaanalysis.aaclust method)": [[1, "aaanalysis.AAclust.eval"]], "fit() (aaanalysis.aaclust method)": [[1, "aaanalysis.AAclust.fit"]], "get_cluster_centers() (aaanalysis.aaclust static method)": [[1, "aaanalysis.AAclust.get_cluster_centers"]], "get_cluster_medoids() (aaanalysis.aaclust static method)": [[1, "aaanalysis.AAclust.get_cluster_medoids"]], "labels_ (aaanalysis.aaclust attribute)": [[1, "aaanalysis.AAclust.labels_"]], "medoid_ind_ (aaanalysis.aaclust attribute)": [[1, "aaanalysis.AAclust.medoid_ind_"]], "medoid_labels_ (aaanalysis.aaclust attribute)": [[1, "aaanalysis.AAclust.medoid_labels_"]], "medoids_ (aaanalysis.aaclust attribute)": [[1, "aaanalysis.AAclust.medoids_"]], "n_clusters (aaanalysis.aaclust attribute)": [[1, "aaanalysis.AAclust.n_clusters"]], "cpp (class in aaanalysis)": [[2, "aaanalysis.CPP"]], "__init__() (aaanalysis.cpp method)": [[2, "aaanalysis.CPP.__init__"]], "eval() (aaanalysis.cpp static method)": [[2, "aaanalysis.CPP.eval"]], "run() (aaanalysis.cpp method)": [[2, "aaanalysis.CPP.run"]], "cppplot (class in aaanalysis)": [[3, "aaanalysis.CPPPlot"]], "__init__() (aaanalysis.cppplot method)": [[3, "aaanalysis.CPPPlot.__init__"]], "heatmap() (aaanalysis.cppplot method)": [[3, "aaanalysis.CPPPlot.heatmap"]], "profile() (aaanalysis.cppplot method)": [[3, "aaanalysis.CPPPlot.profile"]], "update_seq_size() (aaanalysis.cppplot method)": [[3, "aaanalysis.CPPPlot.update_seq_size"]], "sequencefeature (class in aaanalysis)": [[4, "aaanalysis.SequenceFeature"]], "__init__() (aaanalysis.sequencefeature method)": [[4, "aaanalysis.SequenceFeature.__init__"]], "add_dif() (aaanalysis.sequencefeature static method)": [[4, "aaanalysis.SequenceFeature.add_dif"]], "add_feat_value() (aaanalysis.sequencefeature static method)": [[4, "aaanalysis.SequenceFeature.add_feat_value"]], "add_position() (aaanalysis.sequencefeature static method)": [[4, "aaanalysis.SequenceFeature.add_position"]], "feat_matrix() (aaanalysis.sequencefeature static method)": [[4, "aaanalysis.SequenceFeature.feat_matrix"]], "feat_names() (aaanalysis.sequencefeature static method)": [[4, "aaanalysis.SequenceFeature.feat_names"]], "get_df_parts() (aaanalysis.sequencefeature static method)": [[4, "aaanalysis.SequenceFeature.get_df_parts"]], "get_features() (aaanalysis.sequencefeature method)": [[4, "aaanalysis.SequenceFeature.get_features"]], "get_split_kws() (aaanalysis.sequencefeature static method)": [[4, "aaanalysis.SequenceFeature.get_split_kws"]], "__init__() (aaanalysis.dpulearn method)": [[5, "aaanalysis.dPULearn.__init__"]], "dpulearn (class in aaanalysis)": [[5, "aaanalysis.dPULearn"]], "eval() (aaanalysis.dpulearn method)": [[5, "aaanalysis.dPULearn.eval"]], "fit() (aaanalysis.dpulearn method)": [[5, "aaanalysis.dPULearn.fit"]], "labels_ (aaanalysis.dpulearn attribute)": [[5, "aaanalysis.dPULearn.labels_"]], "load_dataset() (in module aaanalysis)": [[6, "aaanalysis.load_dataset"]], "load_scales() (in module aaanalysis)": [[7, "aaanalysis.load_scales"]], "plot_gcfs() (in module aaanalysis)": [[8, "aaanalysis.plot_gcfs"]], "plot_get_cdict() (in module aaanalysis)": [[9, "aaanalysis.plot_get_cdict"]], "plot_get_cmap() (in module aaanalysis)": [[10, "aaanalysis.plot_get_cmap"]], "plot_set_legend() (in module aaanalysis)": [[11, "aaanalysis.plot_set_legend"]], "plot_settings() (in module aaanalysis)": [[12, "aaanalysis.plot_settings"]]}})
\ No newline at end of file
diff --git a/docs/source/__pycache__/create_notebooks_docs.cpython-39.pyc b/docs/source/__pycache__/create_notebooks_docs.cpython-39.pyc
index d2868c10..6c116c62 100644
Binary files a/docs/source/__pycache__/create_notebooks_docs.cpython-39.pyc and b/docs/source/__pycache__/create_notebooks_docs.cpython-39.pyc differ
diff --git a/docs/source/__pycache__/create_tables_doc.cpython-39.pyc b/docs/source/__pycache__/create_tables_doc.cpython-39.pyc
index 6e8da10e..0f698b86 100644
Binary files a/docs/source/__pycache__/create_tables_doc.cpython-39.pyc and b/docs/source/__pycache__/create_tables_doc.cpython-39.pyc differ
diff --git a/docs/source/conf.py b/docs/source/conf.py
index e7ee5a66..cae920f7 100755
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -9,8 +9,12 @@
 
 sys.path.append(os.path.abspath('.'))
 
+
+# Create notebooks rst and table rst first
 from create_tables_doc import generate_table_rst
 from create_notebooks_docs import export_notebooks_to_rst
+export_notebooks_to_rst()
+generate_table_rst()
 
 # -- Path and Platform setup --------------------------------------------------
 path_source = os.path.join(os.path.dirname(__file__))
@@ -58,6 +62,7 @@
     # 'pydata_sphinx_theme',  # Theme with a focus on long-form content and optimized for _data-focused libraries
 ]
 
+
 # -- Autodoc & Numpydoc settings ----------------------------------------------
 # Autodoc settings
 # See https://www.sphinx-doc.org/en/master/usage/extensions/autodoc.html#configuration
@@ -110,9 +115,9 @@
 napoleon_custom_sections = None
 
 
-# -- Juypter tutorials integration -------------------------------------------
+# -- Juypter tutorials integration --------------------------------------------
 nbsphinx_execute = 'never'
-export_notebooks_to_rst()
+
 
 # -- Intersphinx mapping -----------------------------------------------------
 intersphinx_mapping = {
@@ -172,8 +177,7 @@
      "Advanced analysis tools for researchers.", "Miscellaneous"),
 ]
 
-# Create table.rst
-generate_table_rst()
+
 
 # -- Linkcode configuration ---------------------------------------------------
 _module_path = os.path.dirname(importlib.util.find_spec("aaanalysis").origin)  # type: ignore
diff --git a/docs/source/create_notebooks_docs.py b/docs/source/create_notebooks_docs.py
index a77d9642..95f0ebba 100644
--- a/docs/source/create_notebooks_docs.py
+++ b/docs/source/create_notebooks_docs.py
@@ -37,11 +37,9 @@ def export_notebooks_to_rst():
     for filename in os.listdir(FOLDER_NOTEBOOKS):
         if filename.endswith('.ipynb') and filename not in LIST_EXCLUDE:
             full_path = os.path.join(FOLDER_NOTEBOOKS, filename)
-
             # Load the notebook
             with open(full_path, 'r') as f:
                 notebook = nbformat.read(f, as_version=4)
-
             # Export to RST
             rst_exporter = nbconvert.RSTExporter()
             rst_data, _ = rst_exporter.from_notebook_node(notebook)
diff --git a/docs/source/create_tables_doc.py b/docs/source/create_tables_doc.py
index 1c4d825e..018891d6 100644
--- a/docs/source/create_tables_doc.py
+++ b/docs/source/create_tables_doc.py
@@ -12,7 +12,7 @@
 FILE_REF = FOLDER_IND + "references.rst"
 FILE_TABLE_TEMPLATE = FOLDER_IND + "tables_template.rst"
 FILE_TABLE_SAVED = FOLDER_IND + "tables.rst"
-FILE_MAPPER = FOLDER_TABLES + "0_mapper.xlsx"
+FILE_MAPPER = FOLDER_TABLES + "t0_mapper.xlsx"
 LIST_TABLES = list(sorted([x for x in os.listdir(FOLDER_TABLES) if x != "0_mapper.xlsx"]))
 
 COL_MAP_TABLE = "Table"
@@ -20,6 +20,8 @@
 COL_REF = "Reference"
 
 COLUMN_WIDTH = 8
+STR_REMOVE = "_XXX" # Check with tables_template.rst for consistency
+STR_ADD_TABLE = "ADD-TABLE"
 
 
 # Helper Functions
@@ -89,7 +91,7 @@ def generate_table_rst():
     overview_table_rst = _convert_excel_to_rst(df_mapper)
 
     # Generate the tables and store them in a dictionary
-    tables_dict = {"0_mapper": overview_table_rst}
+    tables_dict = {"t0_mapper": overview_table_rst}
     for index, row in df_mapper.iterrows():
         table_name = row[COL_MAP_TABLE]
         df = pd.read_excel(FOLDER_TABLES + _f_xlsx(on=True, file=table_name))
@@ -101,16 +103,19 @@ def generate_table_rst():
 
     # Initialize variables
     rst_content = ""
-
+    table_name = ""
     # Loop through the lines of the template
     for line in template_lines:
-        rst_content += line
         # Check for hooks like ".. _1_overview_benchmarks:"
         match = re.search(r'\.\. _(\w+):', line)
-        if match:
-            table_marker = match.group(1)
-            if table_marker in tables_dict:
-                rst_content += "\n" + tables_dict[table_marker] + "\n"
+        if not match:
+            rst_content += line
+        else:
+            line_with_new_marker = line.replace(STR_REMOVE, "")
+            rst_content += line_with_new_marker
+            table_name = match.group(1).replace(STR_REMOVE, "")
+        if STR_ADD_TABLE in line and table_name in tables_dict:
+            rst_content += "\n" + tables_dict[table_name] + "\n"
 
     # Write the new content to the output .rst file
     with open(FILE_TABLE_SAVED, 'w') as f:
diff --git a/docs/source/index/tables.rst b/docs/source/index/tables.rst
index 89b21fb5..2d37e8bb 100644
--- a/docs/source/index/tables.rst
+++ b/docs/source/index/tables.rst
@@ -5,28 +5,35 @@
    for tables.rst, which is automatically generated based on the information here and
    in the .csv tables from the /tables directory.
 
-   To add a new table:
-   1. Save it as a .csv file in the /tables directory.
-   2. Add an entry for it in the "Overview Table" section below.
-   3. Add a new section describing it, including each column and any important data types (e.g., categories).
+   Instructions for Adding a New Table:
+   1. Store the table as a .csv file in the index/tables directory. Name it using the format tX,
+      where X is incremented based on the last entry's number.
+   2. Update the t0_mapper.xlsx with a corresponding entry for the new table.
+   3. Create a new descriptive section here that elucidates the table's columns and any
+      essential data types, such as categories.
 
    Note: Each table should include a 'Reference' column.
 
-   Ignore the warning: 'tables_template.rst: WARNING: document isn't included in any toctree.'
+   # Key Annotations for Automated Table Generation via create_tables_doc.py:
+   _XXX: A string to be stripped from the references. This prevents redundancies that may result
+         in broken links.
+   ADD-TABLE: Placeholder indicating where tables for the corresponding section should be inserted.
 ..
 
 Tables
-======================
+======
 
 .. contents::
     :local:
     :depth: 1
 
+.. _t0_mapper:
+
 Overview Table
 --------------
 All tables from the AAanalysis documentation are listed here, in chronological order based on the project history.
 
-.. _0_mapper:
+ADD-TABLE
 
 .. list-table::
    :header-rows: 1
@@ -35,14 +42,16 @@ All tables from the AAanalysis documentation are listed here, in chronological o
    * - Table
      - Description
      - See Also
-   * - 1_overview_benchmarks
+   * - t1_overview_benchmarks
      - Protein benchmark datasets
      - aa.load_dataset
-   * - 2_overview_scales
+   * - t2_overview_scales
      - Amino acid scale datasets
      - aa.load_scales
 
 
+.. _t1_overview_benchmarks:
+
 Protein Benchmark Datasets
 --------------------------
 Three types of benchmark datasets are provided:
@@ -55,7 +64,7 @@ Datasets are named beginning with a classification (e.g., 'AA_LDR', 'DOM_GSEC',
 Some datasets have an additional version for positive-unlabeled (PU) learning containing only positive (1)
 and unlabeled (2) data samples, as indicated by appending '_PU' to the dataset name (e.g., 'DOM_GSEC_PU').
 
-.. _1_overview_benchmarks:
+ADD-TABLE
 
 .. list-table::
    :header-rows: 1
@@ -213,11 +222,13 @@ and unlabeled (2) data samples, as indicated by appending '_PU' to the dataset n
      - 1 (substrate), 2 (unknown substrate status)
 
 
+.. _t2_overview_scales:
+
 Amino Acid Scale Datasets
 -------------------------
 Various amino acid scale datasets are provided.
 
-.. _2_overview_scales:
+ADD-TABLE
 
 .. list-table::
    :header-rows: 1
@@ -252,3 +263,4 @@ Various amino acid scale datasets are provided.
      - 60
      - :ref:`Breimann23a <Breimann23a>`
 
+
diff --git a/docs/source/index/tables/0_mapper.xlsx b/docs/source/index/tables/0_mapper.xlsx
deleted file mode 100644
index c8ae9594..00000000
Binary files a/docs/source/index/tables/0_mapper.xlsx and /dev/null differ
diff --git a/docs/source/index/tables/t0_mapper.xlsx b/docs/source/index/tables/t0_mapper.xlsx
new file mode 100644
index 00000000..061537ea
Binary files /dev/null and b/docs/source/index/tables/t0_mapper.xlsx differ
diff --git a/docs/source/index/tables/1_overview_benchmarks.xlsx b/docs/source/index/tables/t1_overview_benchmarks.xlsx
similarity index 100%
rename from docs/source/index/tables/1_overview_benchmarks.xlsx
rename to docs/source/index/tables/t1_overview_benchmarks.xlsx
diff --git a/docs/source/index/tables/2_overview_scales.xlsx b/docs/source/index/tables/t2_overview_scales.xlsx
similarity index 100%
rename from docs/source/index/tables/2_overview_scales.xlsx
rename to docs/source/index/tables/t2_overview_scales.xlsx
diff --git a/docs/source/index/tables_template.rst b/docs/source/index/tables_template.rst
index 794520b0..785ee727 100755
--- a/docs/source/index/tables_template.rst
+++ b/docs/source/index/tables_template.rst
@@ -5,28 +5,37 @@
    for tables.rst, which is automatically generated based on the information here and
    in the .csv tables from the /tables directory.
 
-   To add a new table:
-   1. Save it as a .csv file in the /tables directory.
-   2. Add an entry for it in the "Overview Table" section below.
-   3. Add a new section describing it, including each column and any important data types (e.g., categories).
+   Instructions for Adding a New Table:
+   1. Store the table as a .csv file in the index/tables directory. Name it using the format tX,
+      where X is incremented based on the last entry's number.
+   2. Update the t0_mapper.xlsx with a corresponding entry for the new table.
+   3. Create a new descriptive section here that elucidates the table's columns and any
+      essential data types, such as categories.
 
    Note: Each table should include a 'Reference' column.
 
-   Ignore the warning: 'tables_template.rst: WARNING: document isn't included in any toctree.'
+   # Key Annotations for Automated Table Generation via create_tables_doc.py:
+   _XXX: A string to be stripped from the references. This prevents redundancies that may result
+         in broken links.
+   ADD-TABLE: Placeholder indicating where tables for the corresponding section should be inserted.
 ..
 
 Tables
-======================
+======
 
 .. contents::
     :local:
     :depth: 1
 
+.. _t0_mapper_XXX:
+
 Overview Table
 --------------
 All tables from the AAanalysis documentation are listed here, in chronological order based on the project history.
 
-.. _0_mapper:
+ADD-TABLE
+
+.. _t1_overview_benchmarks_XXX:
 
 Protein Benchmark Datasets
 --------------------------
@@ -40,10 +49,13 @@ Datasets are named beginning with a classification (e.g., 'AA_LDR', 'DOM_GSEC',
 Some datasets have an additional version for positive-unlabeled (PU) learning containing only positive (1)
 and unlabeled (2) data samples, as indicated by appending '_PU' to the dataset name (e.g., 'DOM_GSEC_PU').
 
-.. _1_overview_benchmarks:
+ADD-TABLE
+
+.. _t2_overview_scales_XXX:
 
 Amino Acid Scale Datasets
 -------------------------
 Various amino acid scale datasets are provided.
 
-.. _2_overview_scales:
+ADD-TABLE
+