Skip to content

Commit

Permalink
Change INFO to Overview inf load_dataset
Browse files Browse the repository at this point in the history
  • Loading branch information
breimanntools committed Oct 10, 2023
1 parent 4f7a589 commit 05d4ae3
Show file tree
Hide file tree
Showing 25 changed files with 20 additions and 19 deletions.
File renamed without changes.
Binary file modified aaanalysis/data_handling/__pycache__/_load_dataset.cpython-39.pyc
Binary file not shown.
19 changes: 10 additions & 9 deletions aaanalysis/data_handling/_load_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,16 +19,17 @@

# I Helper Functions
# Check functions
def check_name_of_dataset(name="INFO", folder_in=None):
def check_name_of_dataset(name="Overview", folder_in=None):
""""""
if name == "INFO":
if name == "Overview":
return
list_datasets = [x.split(".")[0] for x in os.listdir(folder_in) if "." in x]
list_datasets = [x.split(".")[0] for x in os.listdir(folder_in)
if "." in x and not x.startswith(".")]
if name not in list_datasets:
list_aa = [x for x in list_datasets if 'AA' in x]
list_seq = [x for x in list_datasets if 'SEQ' in x]
list_dom = [x for x in list_datasets if 'DOM' in x]
raise ValueError(f"'name' ({name}) is not valid."
raise ValueError(f"'name' ({name}) is not valid. Chose one of the following:"
f"\n Amino acid datasets: {list_aa}"
f"\n Sequence datasets: {list_seq}"
f"\n Domain datasets: {list_dom}")
Expand Down Expand Up @@ -119,7 +120,7 @@ def _get_aa_window(df_seq=None, aa_window_size=9):


# II Main Functions
def load_dataset(name: str = "INFO",
def load_dataset(name: str = "Overview",
n: Optional[int] = None,
random: bool = False,
non_canonical_aa: Literal["remove", "keep", "gap"] = "remove",
Expand All @@ -131,7 +132,7 @@ def load_dataset(name: str = "INFO",
Loads protein benchmarking datasets.
The benchmarks are categorized into amino acid ('AA'), domain ('DOM'), and sequence ('SEQ') level datasets.
By default, an overview table is provided (``name='INFO'``). For in-depth details, refer to [Breimann23a]_.
By default, an overview table is provided (``name='Overview'``). For in-depth details, refer to [Breimann23a]_.
Parameters
----------
Expand Down Expand Up @@ -159,7 +160,7 @@ def load_dataset(name: str = "INFO",
-------
pandas.DataFrame
A DataFrame of either the selected sequence dataset (``df_seq``) or
general info on all benchmark datasets (``df_info``).
overview on all benchmark datasets (``df_overview``).
Notes
-----
Expand Down Expand Up @@ -188,8 +189,8 @@ def load_dataset(name: str = "INFO",
check_min_max_val(min_len=min_len, max_len=max_len)
check_aa_window_size(aa_window_size=aa_window_size)
# Load overview table
if name == "INFO":
return ut.read_excel_cached(FOLDER_BENCHMARKS + "INFO_benchmarks.xlsx")
if name == "Overview":
return ut.read_excel_cached(FOLDER_BENCHMARKS + "Overview.xlsx")
df = ut.read_csv_cached(FOLDER_BENCHMARKS + name + ".tsv", sep="\t")
# Filter data
if min_len is not None:
Expand Down
Binary file not shown.
Binary file modified docs/build/doctrees/environment.pickle
Binary file not shown.
Binary file modified docs/build/doctrees/generated/aaanalysis.AAclustPlot.doctree
Binary file not shown.
Binary file modified docs/build/doctrees/generated/aaanalysis.load_dataset.doctree
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
12 changes: 6 additions & 6 deletions docs/build/html/generated/aaanalysis.AAclustPlot.html

Large diffs are not rendered by default.

6 changes: 3 additions & 3 deletions docs/build/html/generated/aaanalysis.load_dataset.html
Original file line number Diff line number Diff line change
Expand Up @@ -126,10 +126,10 @@
<h1>aaanalysis.load_dataset<a class="headerlink" href="#aaanalysis-load-dataset" title="Permalink to this heading"></a></h1>
<dl class="py function">
<dt class="sig sig-object py" id="aaanalysis.load_dataset">
<span class="sig-prename descclassname"><span class="pre">aaanalysis.</span></span><span class="sig-name descname"><span class="pre">load_dataset</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">name</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">'INFO'</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">n</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">random</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">non_canonical_aa</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">'remove'</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">min_len</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_len</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">aa_window_size</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">9</span></span></em><span class="sig-paren">)</span><a class="reference external" href="https://github.com/breimanntools/aaanalysis/tree/master/aaanalysis/data_handling/_load_dataset.py#L122-L224"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#aaanalysis.load_dataset" title="Permalink to this definition"></a></dt>
<span class="sig-prename descclassname"><span class="pre">aaanalysis.</span></span><span class="sig-name descname"><span class="pre">load_dataset</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">name</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">'Overview'</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">n</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">random</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">non_canonical_aa</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">'remove'</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">min_len</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_len</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">aa_window_size</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">9</span></span></em><span class="sig-paren">)</span><a class="reference external" href="https://github.com/breimanntools/aaanalysis/tree/master/aaanalysis/data_handling/_load_dataset.py#L123-L225"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#aaanalysis.load_dataset" title="Permalink to this definition"></a></dt>
<dd><p>Loads protein benchmarking datasets.</p>
<p>The benchmarks are categorized into amino acid (‘AA’), domain (‘DOM’), and sequence (‘SEQ’) level datasets.
By default, an overview table is provided (<code class="docutils literal notranslate"><span class="pre">name='INFO'</span></code>). For in-depth details, refer to <a class="reference internal" href="../index/references.html#breimann23a" id="id1"><span>[Breimann23a]</span></a>.</p>
By default, an overview table is provided (<code class="docutils literal notranslate"><span class="pre">name='Overview'</span></code>). For in-depth details, refer to <a class="reference internal" href="../index/references.html#breimann23a" id="id1"><span>[Breimann23a]</span></a>.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><ul class="simple">
Expand All @@ -150,7 +150,7 @@ <h1>aaanalysis.load_dataset<a class="headerlink" href="#aaanalysis-load-dataset"
</dd>
<dt class="field-even">Returns<span class="colon">:</span></dt>
<dd class="field-even"><p>A DataFrame of either the selected sequence dataset (<code class="docutils literal notranslate"><span class="pre">df_seq</span></code>) or
general info on all benchmark datasets (<code class="docutils literal notranslate"><span class="pre">df_info</span></code>).</p>
overview on all benchmark datasets (<code class="docutils literal notranslate"><span class="pre">df_overview</span></code>).</p>
</dd>
<dt class="field-odd">Return type<span class="colon">:</span></dt>
<dd class="field-odd"><p><a class="reference external" href="https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html#pandas.DataFrame" title="(in pandas v2.1.1)">pandas.DataFrame</a></p>
Expand Down
2 changes: 1 addition & 1 deletion docs/build/html/searchindex.js

Large diffs are not rendered by default.

Binary file modified docs/build/plot_directive/generated/aaanalysis-plot_gcfs-1.pdf
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file modified docs/build/plot_directive/generated/aaanalysis-plot_legend-1.pdf
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file modified tests/unit/.hypothesis/unicode_data/13.0.0/codec-utf-8.json.gz
Binary file not shown.

0 comments on commit 05d4ae3

Please sign in to comment.