Skip to content

Commit

Permalink
Update quick_start tutorial
Browse files Browse the repository at this point in the history
  • Loading branch information
breimanntools committed Oct 2, 2023
1 parent a5bb440 commit 7179f13
Show file tree
Hide file tree
Showing 44 changed files with 80 additions and 82 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@
/dev_scripts/
/recipe/
/tutorials/.ipynb_checkpoints/
/aaanalysis/_archive/
Binary file modified aaanalysis/__pycache__/utils.cpython-39.pyc
Binary file not shown.
Binary file modified aaanalysis/_utils/__pycache__/_check_type.cpython-39.pyc
Binary file not shown.
2 changes: 1 addition & 1 deletion aaanalysis/_utils/_check_type.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ def check_tuple(name=None, val=None, n=None, check_n=True, accept_none=False):
raise ValueError(f"'{name}' ({val}) should be a tuple with {n} elements.")


def check_list(name=None, val=None, accept_none=False, convert=True):
def check_list_like(name=None, val=None, accept_none=False, convert=True):
""""""
if accept_none and val is None:
return None
Expand Down
Binary file modified aaanalysis/aaclust/__pycache__/aaclust.cpython-39.pyc
Binary file not shown.
14 changes: 7 additions & 7 deletions aaanalysis/aaclust/aaclust.py
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,7 @@ def fit(self,
# Check input
X = ut.check_X(X=X)
ut.check_X_unique_samples(X=X)
names = ut.check_list(name="names", val=names, accept_none=True)
names = ut.check_list_like(name="names", val=names, accept_none=True)
ut.check_number_range(name="mint_th", val=min_th, min_val=0, max_val=1, just_int=False, accept_none=False)
ut.check_number_range(name="n_clusters", val=n_clusters, min_val=1, just_int=True, accept_none=True)
check_merge_metric(merge_metric=merge_metric)
Expand Down Expand Up @@ -391,7 +391,7 @@ def name_clusters(X: ut.ArrayLike2D,
X = ut.check_X(X=X)
ut.check_X_unique_samples(X=X)
labels = ut.check_labels(labels=labels)
names = ut.check_list(name="names", val=names, accept_none=False)
names = ut.check_list_like(name="names", val=names, accept_none=False)
ut.check_bool(name="shorten_names", val=shorten_names)
ut.check_match_X_labels(X=X, labels=labels)
check_match_X_names(X=X, names=names, accept_none=False)
Expand Down Expand Up @@ -487,7 +487,7 @@ def comp_correlation(X: ut.ArrayLike2D,
Returns
-------
df_corr
df_corr : pd.DataFrame
DataFrame with correlation either for each pair in ``X`` of shape (n_samples, n_samples) or
for each pair between ``X`` and ``X_ref`` of shape (n_samples, n_samples_ref).
Expand Down Expand Up @@ -535,15 +535,15 @@ def comp_coverage(names : [List[str]] =None,
names
List of sample names. Should be subset of ``names_ref``.
names_ref
List of reference sample names. Should superset of ``names``.
List of reference sample names. Should be superset of ``names``.
Returns
-------
coverage
coverage : float
Percentage of unique names from ``names`` that are found in ``names_ref``.
"""
names = ut.check_list(name="names", val=names, accept_none=False)
names_ref = ut.check_list(name="names_ref", val=names_ref, accept_none=False)
names = ut.check_list_like(name="names", val=names, accept_none=False)
names_ref = ut.check_list_like(name="names_ref", val=names_ref, accept_none=False)
ut.check_superset_subset(subset=names, name_subset="names",
superset=names_ref, name_superset="names_ref")
# Compute coverage
Expand Down
Binary file modified aaanalysis/cpp/__pycache__/cpp.cpython-39.pyc
Binary file not shown.
Binary file modified aaanalysis/cpp/__pycache__/cpp_plot.cpython-39.pyc
Binary file not shown.
Binary file modified aaanalysis/cpp/__pycache__/feature.cpython-39.pyc
Binary file not shown.
5 changes: 2 additions & 3 deletions aaanalysis/cpp/cpp.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
"""
import pandas as pd

import aaanalysis.data_handling.load_scales_
from aaanalysis.cpp.feature import SequenceFeature
from aaanalysis.cpp._feature_stat import SequenceFeatureStatistics

Expand Down Expand Up @@ -60,9 +59,9 @@ def __init__(self, df_scales=None, df_cat=None, df_parts=None, split_kws=None,
# Load default scales if not specified
sf = SequenceFeature()
if df_cat is None:
df_cat = aaanalysis.data_loader.load_scales_.load_scales(name=ut.STR_SCALE_CAT)
df_cat = aa.load_scales(name=ut.STR_SCALE_CAT)
if df_scales is None:
df_scales = aaanalysis.data_loader.load_scales_.load_scales()
df_scales = aa.load_scales()
if split_kws is None:
split_kws = sf.get_split_kws()
ut.check_bool(name="verbose", val=verbose)
Expand Down
5 changes: 2 additions & 3 deletions aaanalysis/cpp/cpp_plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,8 @@
import seaborn as sns
import inspect

import aaanalysis
import aaanalysis.data_handling.load_scales_
from aaanalysis.cpp._cpp import CPPPlots, get_optimal_fontsize
import aaanalysis as aa

import aaanalysis.utils as ut

Expand Down Expand Up @@ -230,7 +229,7 @@ def __init__(self, df_cat=None, accept_gaps=False, jmd_n_len=10, jmd_c_len=10, e

ut.check_bool(name="verbose", val=verbose)
if df_cat is None:
df_cat = aaanalysis.data_loader.load_scales_.load_scales(name=ut.COL_SCALE_ID)
df_cat = aa.load_scales(name=ut.COL_SCALE_ID)
self.df_cat = df_cat
self._verbose = verbose
self._accept_gaps = accept_gaps
Expand Down
7 changes: 3 additions & 4 deletions aaanalysis/cpp/feature.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
import multiprocessing as mp
import warnings

import aaanalysis.data_handling.load_scales_
from aaanalysis.cpp._feature_pos import SequenceFeaturePositions
from aaanalysis.cpp._split import Split, SplitRange
from aaanalysis.cpp._part import Parts
Expand Down Expand Up @@ -343,7 +342,7 @@ def get_features(self, list_parts=None, split_kws=None, df_scales=None, all_part
ut.check_split_kws(split_kws=split_kws)
ut.check_df_scales(df_scales=df_scales, accept_none=True)
if df_scales is None:
df_scales = aaanalysis.data_loader.load_scales_.load_scales()
df_scales = aa.load_scales()
if split_kws is None:
split_kws = self.get_split_kws()
scales = list(df_scales)
Expand Down Expand Up @@ -387,7 +386,7 @@ def feat_matrix(features=None, df_parts=None, df_scales=None, accept_gaps=False,
"""
ut.check_number_range(name="j_jobs", val=n_jobs, accept_none=True, min_val=1, just_int=True)
if df_scales is None:
df_scales = aaanalysis.data_loader.load_scales_.load_scales()
df_scales = aa.load_scales()
ut.check_df_scales(df_scales=df_scales)
ut.check_df_parts(df_parts=df_parts)
features = ut.check_features(features=features, parts=df_parts, df_scales=df_scales)
Expand Down Expand Up @@ -459,7 +458,7 @@ def feat_names(features=None, df_cat=None, tmd_len=20, jmd_c_len=10, jmd_n_len=1
features = ut.check_features(features=features)
ut.check_df_cat(df_cat=df_cat)
if df_cat is None:
df_cat = aaanalysis.data_loader.load_scales_.load_scales(name=ut.STR_SCALE_CAT)
df_cat = aa.load_scales(name=ut.STR_SCALE_CAT)
# Get feature names
sfp = SequenceFeaturePositions()
dict_part_pos = sfp.get_dict_part_pos(tmd_len=tmd_len, jmd_n_len=jmd_n_len, jmd_c_len=jmd_c_len,
Expand Down
2 changes: 1 addition & 1 deletion aaanalysis/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

# Import utility functions explicitly
from aaanalysis._utils._check_type import (check_number_range, check_number_val, check_str, check_bool,
check_dict, check_tuple, check_list,
check_dict, check_tuple, check_list_like,
check_ax)
from aaanalysis._utils._check_data import (check_X, check_X_unique_samples, check_labels, check_match_X_labels,
check_superset_subset,
Expand Down
Binary file modified docs/build/doctrees/environment.pickle
Binary file not shown.
Binary file modified docs/build/doctrees/generated/aaanalysis.AAclust.doctree
Binary file not shown.
Binary file modified docs/build/doctrees/generated/aaanalysis.CPP.doctree
Binary file not shown.
Binary file modified docs/build/doctrees/generated/aaanalysis.CPPPlot.doctree
Binary file not shown.
Binary file modified docs/build/doctrees/generated/aaanalysis.SequenceFeature.doctree
Binary file not shown.
Binary file modified docs/build/doctrees/generated/tutorial1_quick_start.doctree
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file modified docs/build/html/_images/output_13_1.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
12 changes: 6 additions & 6 deletions docs/build/html/_sources/generated/tutorial1_quick_start.rst.txt
Original file line number Diff line number Diff line change
Expand Up @@ -69,9 +69,9 @@ set of 100 scales, as defined by the ``n_clusters`` parameters:
from sklearn.cluster import AgglomerativeClustering
aac = aa.AAclust(model=AgglomerativeClustering)
X = np.array(df_scales)
scales = aac.fit(X, names=list(df_scales), n_clusters=100)
aac = aa.AAclust(model_class=AgglomerativeClustering)
X = np.array(df_scales).T
scales = aac.fit(X, names=list(df_scales), n_clusters=100).medoid_names_
df_scales = df_scales[scales]
Comparative Physicochemical Profiling (CPP)
Expand Down Expand Up @@ -131,10 +131,10 @@ A feature matrix from a given set of CPP features can be created using
.. parsed-literal::
Mean accuracy of 0.6
Mean accuracy of 0.58
Creating more features with CPP will take some more time, but improve
Creating more features with CPP will take a little time, but improve
prediction performance:

.. code:: ipython3
Expand All @@ -153,7 +153,7 @@ prediction performance:
sns.barplot(pd.DataFrame({"Baseline": cv_base, "CPP": cv}), palette=["tab:blue", "tab:red"])
plt.ylabel("Mean accuracy", size=aa.plot_gcfs()+1)
plt.ylim(0, 1)
plt.title("Comparison of Feature Engineering Methods")
plt.title("Comparison of Feature Engineering Methods", size=aa.plot_gcfs()-1)
sns.despine()
plt.show()
Expand Down
10 changes: 5 additions & 5 deletions docs/build/html/generated/aaanalysis.AAclust.html
Original file line number Diff line number Diff line change
Expand Up @@ -498,11 +498,11 @@ <h1>aaanalysis.AAclust<a class="headerlink" href="#aaanalysis-aaclust" title="Pe
</ul>
</dd>
<dt class="field-even">Returns<span class="colon">:</span></dt>
<dd class="field-even"><p>DataFrame with correlation either for each pair in <code class="docutils literal notranslate"><span class="pre">X</span></code> of shape (n_samples, n_samples) or
<dd class="field-even"><p><strong>df_corr</strong>DataFrame with correlation either for each pair in <code class="docutils literal notranslate"><span class="pre">X</span></code> of shape (n_samples, n_samples) or
for each pair between <code class="docutils literal notranslate"><span class="pre">X</span></code> and <code class="docutils literal notranslate"><span class="pre">X_ref</span></code> of shape (n_samples, n_samples_ref).</p>
</dd>
<dt class="field-odd">Return type<span class="colon">:</span></dt>
<dd class="field-odd"><p>df_corr</p>
<dd class="field-odd"><p>pd.DataFrame</p>
</dd>
</dl>
<div class="admonition-notes admonition">
Expand Down Expand Up @@ -532,14 +532,14 @@ <h1>aaanalysis.AAclust<a class="headerlink" href="#aaanalysis-aaclust" title="Pe
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>names</strong> – List of sample names. Should be subset of <code class="docutils literal notranslate"><span class="pre">names_ref</span></code>.</p></li>
<li><p><strong>names_ref</strong> – List of reference sample names. Should superset of <code class="docutils literal notranslate"><span class="pre">names</span></code>.</p></li>
<li><p><strong>names_ref</strong> – List of reference sample names. Should be superset of <code class="docutils literal notranslate"><span class="pre">names</span></code>.</p></li>
</ul>
</dd>
<dt class="field-even">Returns<span class="colon">:</span></dt>
<dd class="field-even"><p>Percentage of unique names from <code class="docutils literal notranslate"><span class="pre">names</span></code> that are found in <code class="docutils literal notranslate"><span class="pre">names_ref</span></code>.</p>
<dd class="field-even"><p><strong>coverage</strong>Percentage of unique names from <code class="docutils literal notranslate"><span class="pre">names</span></code> that are found in <code class="docutils literal notranslate"><span class="pre">names_ref</span></code>.</p>
</dd>
<dt class="field-odd">Return type<span class="colon">:</span></dt>
<dd class="field-odd"><p>coverage</p>
<dd class="field-odd"><p><a class="reference external" href="https://docs.python.org/3/library/functions.html#float" title="(in Python v3.11)">float</a></p>
</dd>
</dl>
</dd></dl>
Expand Down
Loading

0 comments on commit 7179f13

Please sign in to comment.