Skip to content

Commit

Permalink
Remove load_dataset docstring for trouble shooting
Browse files Browse the repository at this point in the history
  • Loading branch information
breimanntools committed Sep 20, 2023
1 parent 00e2f98 commit db97251
Show file tree
Hide file tree
Showing 10 changed files with 12 additions and 94 deletions.
Binary file modified aaanalysis/data_loader/__pycache__/data_loader.cpython-39.pyc
Binary file not shown.
57 changes: 7 additions & 50 deletions aaanalysis/data_loader/data_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
from typing import Optional, Literal
import aaanalysis.utils as ut

# TODO add amino acid window selection
# Constants
STR_AA_GAP = "-"
LIST_CANONICAL_AA = ['N', 'A', 'I', 'V', 'K', 'Q', 'R', 'M', 'H', 'F', 'E', 'D', 'C', 'G', 'L', 'T', 'S', 'Y', 'W', 'P']
Expand Down Expand Up @@ -142,58 +141,16 @@ def _filter_scales(df_cat=None, unclassified_in=False, just_aaindex=False):


# II Main Functions
def load_dataset(name: str = "INFO",
n: Optional[int] = None,
random: bool = False,
non_canonical_aa: Literal["remove", "keep", "gap"] = "remove",
min_len: Optional[int] = None,
max_len: Optional[int] = None,
aa_window_size: Optional[int] = 9,
) -> DataFrame:
def load_dataset(name="INFO",
n=None,
random=False,
non_canonical_aa="remove",
min_len=None,
max_len=None,
aa_window_size=9):
"""
Load protein benchmarking datasets.
The benchmarks are categorized into amino acid ('AA'), domain ('DOM'), and sequence ('SEQ') level
datasets. Use default settings (``name='INFO'``) for an overview table. Detailed analysis is in [Breimann23a]_.
Parameters
----------
name
Name of the dataset. See 'Dataset' column in overview table.
n
Number of proteins per class, selected by index. If None, the whole dataset will be returned.
random
If True, ``n`` random selected proteins per class will be chosen.
non_canonical_aa
Options for modifying non-canonical amino acids:
- 'remove': Remove sequences containing non-canonical amino acids.
- 'keep': Dont remove sequences containing non-canonical amino acids.
- 'gap': Non-canonical amino acids are replaced by gap symbol ('X').
min_len
Minimum length of sequences for filtering, disabled by default.
max_len
Maximum length of sequences for filtering, disabled by default.
aa_window_size
Length of amino acid window, only used for amino acid dataset level (``name='AA_'``) and if ``n`` given.
Returns
-------
DataFrame
Dataframe (df_seq) containing the selected sequence dataset.
See also
--------
See an overview of all benchmarks in :ref:`1_overview_benchmarks` and a detailed usage tutorial in the
`data loader tutorial <tutorials/tutorial2_data_loader.ipynb>`_.
Examples
--------
>>> import aaanalysis as aa
>>> df_seq = aa.load_dataset(name="SEQ_AMYLO", n=100)
"""
check_name_of_dataset(name=name, folder_in=FOLDER_BENCHMARKS)
Expand Down
Binary file modified docs/build/doctrees/environment.pickle
Binary file not shown.
Binary file not shown.
Binary file modified docs/build/doctrees/generated/aaanalysis.load_scales.doctree
Binary file not shown.
41 changes: 1 addition & 40 deletions docs/build/html/generated/aaanalysis.load_dataset.html
Original file line number Diff line number Diff line change
Expand Up @@ -125,47 +125,8 @@
<h1>aaanalysis.load_dataset<a class="headerlink" href="#aaanalysis-load-dataset" title="Permalink to this heading"></a></h1>
<dl class="py function">
<dt class="sig sig-object py" id="aaanalysis.load_dataset">
<span class="sig-prename descclassname"><span class="pre">aaanalysis.</span></span><span class="sig-name descname"><span class="pre">load_dataset</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">name</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">'INFO'</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">n</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">random</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">non_canonical_aa</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">'remove'</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">min_len</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_len</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">aa_window_size</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">9</span></span></em><span class="sig-paren">)</span><a class="reference external" href="https://github.com/breimanntools/aaanalysis/tree/master/aaanalysis/data_loader/data_loader.py#L145-L238"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#aaanalysis.load_dataset" title="Permalink to this definition"></a></dt>
<span class="sig-prename descclassname"><span class="pre">aaanalysis.</span></span><span class="sig-name descname"><span class="pre">load_dataset</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">name</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">'INFO'</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">n</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">random</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">non_canonical_aa</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">'remove'</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">min_len</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_len</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">aa_window_size</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">9</span></span></em><span class="sig-paren">)</span><a class="reference external" href="https://github.com/breimanntools/aaanalysis/tree/master/aaanalysis/data_loader/data_loader.py#L144-L195"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#aaanalysis.load_dataset" title="Permalink to this definition"></a></dt>
<dd><p>Load protein benchmarking datasets.</p>
<p>The benchmarks are categorized into amino acid (‘AA’), domain (‘DOM’), and sequence (‘SEQ’) level
datasets. Use default settings (<code class="docutils literal notranslate"><span class="pre">name='INFO'</span></code>) for an overview table. Detailed analysis is in <a class="reference internal" href="../index/references.html#breimann23a" id="id1"><span>[Breimann23a]</span></a>.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>name</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.11)"><code class="xref py py-class docutils literal notranslate"><span class="pre">str</span></code></a>) – Name of the dataset. See ‘Dataset’ column in overview table.</p></li>
<li><p><strong>n</strong> (<a class="reference external" href="https://docs.python.org/3/library/typing.html#typing.Optional" title="(in Python v3.11)"><code class="xref py py-data docutils literal notranslate"><span class="pre">Optional</span></code></a>[<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.11)"><code class="xref py py-class docutils literal notranslate"><span class="pre">int</span></code></a>]) – Number of proteins per class, selected by index. If None, the whole dataset will be returned.</p></li>
<li><p><strong>random</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.11)"><code class="xref py py-class docutils literal notranslate"><span class="pre">bool</span></code></a>) – If True, <code class="docutils literal notranslate"><span class="pre">n</span></code> random selected proteins per class will be chosen.</p></li>
<li><p><strong>non_canonical_aa</strong> (<a class="reference external" href="https://docs.python.org/3/library/typing.html#typing.Literal" title="(in Python v3.11)"><code class="xref py py-data docutils literal notranslate"><span class="pre">Literal</span></code></a>[‘remove’, ‘keep’, ‘gap’]) – <p>Options for modifying non-canonical amino acids:</p>
<ul>
<li><p>’remove’: Remove sequences containing non-canonical amino acids.</p></li>
<li><p>’keep’: Dont remove sequences containing non-canonical amino acids.</p></li>
<li><p>’gap’: Non-canonical amino acids are replaced by gap symbol (‘X’).</p></li>
</ul>
</p></li>
<li><p><strong>min_len</strong> (<a class="reference external" href="https://docs.python.org/3/library/typing.html#typing.Optional" title="(in Python v3.11)"><code class="xref py py-data docutils literal notranslate"><span class="pre">Optional</span></code></a>[<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.11)"><code class="xref py py-class docutils literal notranslate"><span class="pre">int</span></code></a>]) – Minimum length of sequences for filtering, disabled by default.</p></li>
<li><p><strong>max_len</strong> (<a class="reference external" href="https://docs.python.org/3/library/typing.html#typing.Optional" title="(in Python v3.11)"><code class="xref py py-data docutils literal notranslate"><span class="pre">Optional</span></code></a>[<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.11)"><code class="xref py py-class docutils literal notranslate"><span class="pre">int</span></code></a>]) – Maximum length of sequences for filtering, disabled by default.</p></li>
<li><p><strong>aa_window_size</strong> (<a class="reference external" href="https://docs.python.org/3/library/typing.html#typing.Optional" title="(in Python v3.11)"><code class="xref py py-data docutils literal notranslate"><span class="pre">Optional</span></code></a>[<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.11)"><code class="xref py py-class docutils literal notranslate"><span class="pre">int</span></code></a>]) – Length of amino acid window, only used for amino acid dataset level (<code class="docutils literal notranslate"><span class="pre">name='AA_'</span></code>) and if <code class="docutils literal notranslate"><span class="pre">n</span></code> given.</p></li>
</ul>
</dd>
<dt class="field-even">Returns<span class="colon">:</span></dt>
<dd class="field-even"><p>Dataframe (df_seq) containing the selected sequence dataset.</p>
</dd>
<dt class="field-odd">Return type<span class="colon">:</span></dt>
<dd class="field-odd"><p>DataFrame</p>
</dd>
</dl>
<div class="admonition seealso">
<p class="admonition-title">See also</p>
<p>See an overview of all benchmarks in <a class="reference internal" href="../tables_template.html#overview-benchmarks"><span class="std std-ref">Amino acid scale datasets</span></a> and a detailed usage tutorial in the
<a class="reference external" href="tutorials/tutorial2_data_loader.ipynb">data loader tutorial</a>.</p>
</div>
<div class="admonition-examples admonition">
<p class="admonition-title">Examples</p>
<div class="doctest highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="kn">import</span> <span class="nn">aaanalysis</span> <span class="k">as</span> <span class="nn">aa</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df_seq</span> <span class="o">=</span> <span class="n">aa</span><span class="o">.</span><span class="n">load_dataset</span><span class="p">(</span><span class="n">name</span><span class="o">=</span><span class="s2">&quot;SEQ_AMYLO&quot;</span><span class="p">,</span> <span class="n">n</span><span class="o">=</span><span class="mi">100</span><span class="p">)</span>
</pre></div>
</div>
</div>
</dd></dl>

</section>
Expand Down
2 changes: 1 addition & 1 deletion docs/build/html/generated/aaanalysis.load_scales.html
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@
<h1>aaanalysis.load_scales<a class="headerlink" href="#aaanalysis-load-scales" title="Permalink to this heading"></a></h1>
<dl class="py function">
<dt class="sig sig-object py" id="aaanalysis.load_scales">
<span class="sig-prename descclassname"><span class="pre">aaanalysis.</span></span><span class="sig-name descname"><span class="pre">load_scales</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">name</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">'scales'</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">just_aaindex</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">unclassified_in</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">True</span></span></em><span class="sig-paren">)</span><a class="reference external" href="https://github.com/breimanntools/aaanalysis/tree/master/aaanalysis/data_loader/data_loader.py#L242-L276"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#aaanalysis.load_scales" title="Permalink to this definition"></a></dt>
<span class="sig-prename descclassname"><span class="pre">aaanalysis.</span></span><span class="sig-name descname"><span class="pre">load_scales</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">name</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">'scales'</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">just_aaindex</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">unclassified_in</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">True</span></span></em><span class="sig-paren">)</span><a class="reference external" href="https://github.com/breimanntools/aaanalysis/tree/master/aaanalysis/data_loader/data_loader.py#L199-L233"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#aaanalysis.load_scales" title="Permalink to this definition"></a></dt>
<dd><p>Load amino acid scales, scale classification (AAontology), or scale evaluation.</p>
<p>A thorough analysis of the residue and sequence datasets can be found in <a class="reference internal" href="../index/references.html#breimann23a" id="id1"><span>[Breimann23a]</span></a>.</p>
<dl class="field-list simple">
Expand Down
2 changes: 1 addition & 1 deletion docs/build/html/searchindex.js

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion docs/requirements_docs.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Adjust these changes to the pyproject.toml
sphinx>=5.3.0 # Newest version not campatible with myst_nb
sphinx>=5.0,<7.0 # Newest version not campatible with myst_nb
sphinx_rtd_theme>=1.3.0 # 1.1.1 # 0.5.2
#sphinx_book_theme>=1.0.1
#pydata_sphinx_theme>=0.13.3
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ threadpoolctl = "^3.1.0"

#... (keep the other dependencies as they are)
[tool.poetry.dev-dependencies]
sphinx = "^5.3"
sphinx = ">=5.0,<7.0"
sphinx_rtd_theme = "^1.3.0"
#sphinx_book_theme = "^1.0.1"
#pydata_sphinx_theme = "^0.13.3"
Expand Down

0 comments on commit db97251

Please sign in to comment.