diff --git a/.gitignore b/.gitignore index 988b9c61..c9ce5eb0 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,4 @@ /dev_scripts/ /recipe/ /tutorials/.ipynb_checkpoints/ +/aaanalysis/_archive/ diff --git a/aaanalysis/__pycache__/utils.cpython-39.pyc b/aaanalysis/__pycache__/utils.cpython-39.pyc index 8e27d44b..c101441e 100644 Binary files a/aaanalysis/__pycache__/utils.cpython-39.pyc and b/aaanalysis/__pycache__/utils.cpython-39.pyc differ diff --git a/aaanalysis/_utils/__pycache__/_check_type.cpython-39.pyc b/aaanalysis/_utils/__pycache__/_check_type.cpython-39.pyc index 71841fa1..eeabce26 100644 Binary files a/aaanalysis/_utils/__pycache__/_check_type.cpython-39.pyc and b/aaanalysis/_utils/__pycache__/_check_type.cpython-39.pyc differ diff --git a/aaanalysis/_utils/_check_type.py b/aaanalysis/_utils/_check_type.py index 5cd73ff0..6ff80bbc 100644 --- a/aaanalysis/_utils/_check_type.py +++ b/aaanalysis/_utils/_check_type.py @@ -70,7 +70,7 @@ def check_tuple(name=None, val=None, n=None, check_n=True, accept_none=False): raise ValueError(f"'{name}' ({val}) should be a tuple with {n} elements.") -def check_list(name=None, val=None, accept_none=False, convert=True): +def check_list_like(name=None, val=None, accept_none=False, convert=True): """""" if accept_none and val is None: return None diff --git a/aaanalysis/aaclust/__pycache__/aaclust.cpython-39.pyc b/aaanalysis/aaclust/__pycache__/aaclust.cpython-39.pyc index c0a20c47..1b275e4a 100644 Binary files a/aaanalysis/aaclust/__pycache__/aaclust.cpython-39.pyc and b/aaanalysis/aaclust/__pycache__/aaclust.cpython-39.pyc differ diff --git a/aaanalysis/aaclust/aaclust.py b/aaanalysis/aaclust/aaclust.py index d40b1933..dfb550cf 100644 --- a/aaanalysis/aaclust/aaclust.py +++ b/aaanalysis/aaclust/aaclust.py @@ -248,7 +248,7 @@ def fit(self, # Check input X = ut.check_X(X=X) ut.check_X_unique_samples(X=X) - names = ut.check_list(name="names", val=names, accept_none=True) + names = ut.check_list_like(name="names", val=names, accept_none=True) ut.check_number_range(name="mint_th", val=min_th, min_val=0, max_val=1, just_int=False, accept_none=False) ut.check_number_range(name="n_clusters", val=n_clusters, min_val=1, just_int=True, accept_none=True) check_merge_metric(merge_metric=merge_metric) @@ -391,7 +391,7 @@ def name_clusters(X: ut.ArrayLike2D, X = ut.check_X(X=X) ut.check_X_unique_samples(X=X) labels = ut.check_labels(labels=labels) - names = ut.check_list(name="names", val=names, accept_none=False) + names = ut.check_list_like(name="names", val=names, accept_none=False) ut.check_bool(name="shorten_names", val=shorten_names) ut.check_match_X_labels(X=X, labels=labels) check_match_X_names(X=X, names=names, accept_none=False) @@ -487,7 +487,7 @@ def comp_correlation(X: ut.ArrayLike2D, Returns ------- - df_corr + df_corr : pd.DataFrame DataFrame with correlation either for each pair in ``X`` of shape (n_samples, n_samples) or for each pair between ``X`` and ``X_ref`` of shape (n_samples, n_samples_ref). @@ -535,15 +535,15 @@ def comp_coverage(names : [List[str]] =None, names List of sample names. Should be subset of ``names_ref``. names_ref - List of reference sample names. Should superset of ``names``. + List of reference sample names. Should be superset of ``names``. Returns ------- - coverage + coverage : float Percentage of unique names from ``names`` that are found in ``names_ref``. """ - names = ut.check_list(name="names", val=names, accept_none=False) - names_ref = ut.check_list(name="names_ref", val=names_ref, accept_none=False) + names = ut.check_list_like(name="names", val=names, accept_none=False) + names_ref = ut.check_list_like(name="names_ref", val=names_ref, accept_none=False) ut.check_superset_subset(subset=names, name_subset="names", superset=names_ref, name_superset="names_ref") # Compute coverage diff --git a/aaanalysis/cpp/__pycache__/cpp.cpython-39.pyc b/aaanalysis/cpp/__pycache__/cpp.cpython-39.pyc index 25752c1e..2d02ee5c 100644 Binary files a/aaanalysis/cpp/__pycache__/cpp.cpython-39.pyc and b/aaanalysis/cpp/__pycache__/cpp.cpython-39.pyc differ diff --git a/aaanalysis/cpp/__pycache__/cpp_plot.cpython-39.pyc b/aaanalysis/cpp/__pycache__/cpp_plot.cpython-39.pyc index 2239c8b6..77973308 100644 Binary files a/aaanalysis/cpp/__pycache__/cpp_plot.cpython-39.pyc and b/aaanalysis/cpp/__pycache__/cpp_plot.cpython-39.pyc differ diff --git a/aaanalysis/cpp/__pycache__/feature.cpython-39.pyc b/aaanalysis/cpp/__pycache__/feature.cpython-39.pyc index 80662d8d..ce10b1ba 100644 Binary files a/aaanalysis/cpp/__pycache__/feature.cpython-39.pyc and b/aaanalysis/cpp/__pycache__/feature.cpython-39.pyc differ diff --git a/aaanalysis/cpp/cpp.py b/aaanalysis/cpp/cpp.py index 747d11dd..6635d302 100644 --- a/aaanalysis/cpp/cpp.py +++ b/aaanalysis/cpp/cpp.py @@ -3,7 +3,6 @@ """ import pandas as pd -import aaanalysis.data_handling.load_scales_ from aaanalysis.cpp.feature import SequenceFeature from aaanalysis.cpp._feature_stat import SequenceFeatureStatistics @@ -60,9 +59,9 @@ def __init__(self, df_scales=None, df_cat=None, df_parts=None, split_kws=None, # Load default scales if not specified sf = SequenceFeature() if df_cat is None: - df_cat = aaanalysis.data_loader.load_scales_.load_scales(name=ut.STR_SCALE_CAT) + df_cat = aa.load_scales(name=ut.STR_SCALE_CAT) if df_scales is None: - df_scales = aaanalysis.data_loader.load_scales_.load_scales() + df_scales = aa.load_scales() if split_kws is None: split_kws = sf.get_split_kws() ut.check_bool(name="verbose", val=verbose) diff --git a/aaanalysis/cpp/cpp_plot.py b/aaanalysis/cpp/cpp_plot.py index cbc9ffdc..e3e2f288 100644 --- a/aaanalysis/cpp/cpp_plot.py +++ b/aaanalysis/cpp/cpp_plot.py @@ -6,9 +6,8 @@ import seaborn as sns import inspect -import aaanalysis -import aaanalysis.data_handling.load_scales_ from aaanalysis.cpp._cpp import CPPPlots, get_optimal_fontsize +import aaanalysis as aa import aaanalysis.utils as ut @@ -230,7 +229,7 @@ def __init__(self, df_cat=None, accept_gaps=False, jmd_n_len=10, jmd_c_len=10, e ut.check_bool(name="verbose", val=verbose) if df_cat is None: - df_cat = aaanalysis.data_loader.load_scales_.load_scales(name=ut.COL_SCALE_ID) + df_cat = aa.load_scales(name=ut.COL_SCALE_ID) self.df_cat = df_cat self._verbose = verbose self._accept_gaps = accept_gaps diff --git a/aaanalysis/cpp/feature.py b/aaanalysis/cpp/feature.py index 07ab5e8b..fbd41a50 100644 --- a/aaanalysis/cpp/feature.py +++ b/aaanalysis/cpp/feature.py @@ -10,7 +10,6 @@ import multiprocessing as mp import warnings -import aaanalysis.data_handling.load_scales_ from aaanalysis.cpp._feature_pos import SequenceFeaturePositions from aaanalysis.cpp._split import Split, SplitRange from aaanalysis.cpp._part import Parts @@ -343,7 +342,7 @@ def get_features(self, list_parts=None, split_kws=None, df_scales=None, all_part ut.check_split_kws(split_kws=split_kws) ut.check_df_scales(df_scales=df_scales, accept_none=True) if df_scales is None: - df_scales = aaanalysis.data_loader.load_scales_.load_scales() + df_scales = aa.load_scales() if split_kws is None: split_kws = self.get_split_kws() scales = list(df_scales) @@ -387,7 +386,7 @@ def feat_matrix(features=None, df_parts=None, df_scales=None, accept_gaps=False, """ ut.check_number_range(name="j_jobs", val=n_jobs, accept_none=True, min_val=1, just_int=True) if df_scales is None: - df_scales = aaanalysis.data_loader.load_scales_.load_scales() + df_scales = aa.load_scales() ut.check_df_scales(df_scales=df_scales) ut.check_df_parts(df_parts=df_parts) features = ut.check_features(features=features, parts=df_parts, df_scales=df_scales) @@ -459,7 +458,7 @@ def feat_names(features=None, df_cat=None, tmd_len=20, jmd_c_len=10, jmd_n_len=1 features = ut.check_features(features=features) ut.check_df_cat(df_cat=df_cat) if df_cat is None: - df_cat = aaanalysis.data_loader.load_scales_.load_scales(name=ut.STR_SCALE_CAT) + df_cat = aa.load_scales(name=ut.STR_SCALE_CAT) # Get feature names sfp = SequenceFeaturePositions() dict_part_pos = sfp.get_dict_part_pos(tmd_len=tmd_len, jmd_n_len=jmd_n_len, jmd_c_len=jmd_c_len, diff --git a/aaanalysis/utils.py b/aaanalysis/utils.py index f458a729..4b9e886e 100644 --- a/aaanalysis/utils.py +++ b/aaanalysis/utils.py @@ -11,7 +11,7 @@ # Import utility functions explicitly from aaanalysis._utils._check_type import (check_number_range, check_number_val, check_str, check_bool, - check_dict, check_tuple, check_list, + check_dict, check_tuple, check_list_like, check_ax) from aaanalysis._utils._check_data import (check_X, check_X_unique_samples, check_labels, check_match_X_labels, check_superset_subset, diff --git a/docs/build/doctrees/environment.pickle b/docs/build/doctrees/environment.pickle index e8c9925f..b2a85a51 100644 Binary files a/docs/build/doctrees/environment.pickle and b/docs/build/doctrees/environment.pickle differ diff --git a/docs/build/doctrees/generated/aaanalysis.AAclust.doctree b/docs/build/doctrees/generated/aaanalysis.AAclust.doctree index a00769c9..1a0be0de 100644 Binary files a/docs/build/doctrees/generated/aaanalysis.AAclust.doctree and b/docs/build/doctrees/generated/aaanalysis.AAclust.doctree differ diff --git a/docs/build/doctrees/generated/aaanalysis.CPP.doctree b/docs/build/doctrees/generated/aaanalysis.CPP.doctree index fa5c083d..6997db1f 100644 Binary files a/docs/build/doctrees/generated/aaanalysis.CPP.doctree and b/docs/build/doctrees/generated/aaanalysis.CPP.doctree differ diff --git a/docs/build/doctrees/generated/aaanalysis.CPPPlot.doctree b/docs/build/doctrees/generated/aaanalysis.CPPPlot.doctree index b6a6f4f6..b3bca136 100644 Binary files a/docs/build/doctrees/generated/aaanalysis.CPPPlot.doctree and b/docs/build/doctrees/generated/aaanalysis.CPPPlot.doctree differ diff --git a/docs/build/doctrees/generated/aaanalysis.SequenceFeature.doctree b/docs/build/doctrees/generated/aaanalysis.SequenceFeature.doctree index 8333577d..2936b72e 100644 Binary files a/docs/build/doctrees/generated/aaanalysis.SequenceFeature.doctree and b/docs/build/doctrees/generated/aaanalysis.SequenceFeature.doctree differ diff --git a/docs/build/doctrees/generated/tutorial1_quick_start.doctree b/docs/build/doctrees/generated/tutorial1_quick_start.doctree index 4611be63..65af52ea 100644 Binary files a/docs/build/doctrees/generated/tutorial1_quick_start.doctree and b/docs/build/doctrees/generated/tutorial1_quick_start.doctree differ diff --git a/docs/build/html/_downloads/004048c0cbb6684bdb9047282ab71735/aaanalysis-plot_settings-2.pdf b/docs/build/html/_downloads/004048c0cbb6684bdb9047282ab71735/aaanalysis-plot_settings-2.pdf index b5ebd506..034540bd 100644 Binary files a/docs/build/html/_downloads/004048c0cbb6684bdb9047282ab71735/aaanalysis-plot_settings-2.pdf and b/docs/build/html/_downloads/004048c0cbb6684bdb9047282ab71735/aaanalysis-plot_settings-2.pdf differ diff --git a/docs/build/html/_downloads/163aacac4bd235c9af7a62d7b4d0c89f/aaanalysis-plot_get_cdict-1.pdf b/docs/build/html/_downloads/163aacac4bd235c9af7a62d7b4d0c89f/aaanalysis-plot_get_cdict-1.pdf index 52458eb0..1771af1b 100644 Binary files a/docs/build/html/_downloads/163aacac4bd235c9af7a62d7b4d0c89f/aaanalysis-plot_get_cdict-1.pdf and b/docs/build/html/_downloads/163aacac4bd235c9af7a62d7b4d0c89f/aaanalysis-plot_get_cdict-1.pdf differ diff --git a/docs/build/html/_downloads/1f3abea1675a65bb341756c52c9927f4/aaanalysis-plot_gcfs-1.pdf b/docs/build/html/_downloads/1f3abea1675a65bb341756c52c9927f4/aaanalysis-plot_gcfs-1.pdf index 29fca4d7..af43810e 100644 Binary files a/docs/build/html/_downloads/1f3abea1675a65bb341756c52c9927f4/aaanalysis-plot_gcfs-1.pdf and b/docs/build/html/_downloads/1f3abea1675a65bb341756c52c9927f4/aaanalysis-plot_gcfs-1.pdf differ diff --git a/docs/build/html/_downloads/72c2e4be500ecf10c85a4e6f81c365fc/aaanalysis-plot_legend-1.pdf b/docs/build/html/_downloads/72c2e4be500ecf10c85a4e6f81c365fc/aaanalysis-plot_legend-1.pdf index 63459a8c..7064ab06 100644 Binary files a/docs/build/html/_downloads/72c2e4be500ecf10c85a4e6f81c365fc/aaanalysis-plot_legend-1.pdf and b/docs/build/html/_downloads/72c2e4be500ecf10c85a4e6f81c365fc/aaanalysis-plot_legend-1.pdf differ diff --git a/docs/build/html/_downloads/795a736e5af756908120c8bda412fd28/aaanalysis-plot_get_cmap-1.pdf b/docs/build/html/_downloads/795a736e5af756908120c8bda412fd28/aaanalysis-plot_get_cmap-1.pdf index e38c7d65..bb323fe1 100644 Binary files a/docs/build/html/_downloads/795a736e5af756908120c8bda412fd28/aaanalysis-plot_get_cmap-1.pdf and b/docs/build/html/_downloads/795a736e5af756908120c8bda412fd28/aaanalysis-plot_get_cmap-1.pdf differ diff --git a/docs/build/html/_downloads/88d7f3f7cb5a284c0bfaa377fb4ce1d8/aaanalysis-plot_get_clist-1.pdf b/docs/build/html/_downloads/88d7f3f7cb5a284c0bfaa377fb4ce1d8/aaanalysis-plot_get_clist-1.pdf index 50bf2f30..f0b11b7a 100644 Binary files a/docs/build/html/_downloads/88d7f3f7cb5a284c0bfaa377fb4ce1d8/aaanalysis-plot_get_clist-1.pdf and b/docs/build/html/_downloads/88d7f3f7cb5a284c0bfaa377fb4ce1d8/aaanalysis-plot_get_clist-1.pdf differ diff --git a/docs/build/html/_downloads/fb29bffb69140db4f68c4eb913c6f7d3/aaanalysis-plot_settings-1.pdf b/docs/build/html/_downloads/fb29bffb69140db4f68c4eb913c6f7d3/aaanalysis-plot_settings-1.pdf index e6ddea7f..c2c1a032 100644 Binary files a/docs/build/html/_downloads/fb29bffb69140db4f68c4eb913c6f7d3/aaanalysis-plot_settings-1.pdf and b/docs/build/html/_downloads/fb29bffb69140db4f68c4eb913c6f7d3/aaanalysis-plot_settings-1.pdf differ diff --git a/docs/build/html/_images/output_13_1.png b/docs/build/html/_images/output_13_1.png index 815e3a39..6e2513d5 100644 Binary files a/docs/build/html/_images/output_13_1.png and b/docs/build/html/_images/output_13_1.png differ diff --git a/docs/build/html/_sources/generated/tutorial1_quick_start.rst.txt b/docs/build/html/_sources/generated/tutorial1_quick_start.rst.txt index 75442ba7..c9cc5aa9 100644 --- a/docs/build/html/_sources/generated/tutorial1_quick_start.rst.txt +++ b/docs/build/html/_sources/generated/tutorial1_quick_start.rst.txt @@ -69,9 +69,9 @@ set of 100 scales, as defined by the ``n_clusters`` parameters: from sklearn.cluster import AgglomerativeClustering - aac = aa.AAclust(model=AgglomerativeClustering) - X = np.array(df_scales) - scales = aac.fit(X, names=list(df_scales), n_clusters=100) + aac = aa.AAclust(model_class=AgglomerativeClustering) + X = np.array(df_scales).T + scales = aac.fit(X, names=list(df_scales), n_clusters=100).medoid_names_ df_scales = df_scales[scales] Comparative Physicochemical Profiling (CPP) @@ -131,10 +131,10 @@ A feature matrix from a given set of CPP features can be created using .. parsed-literal:: - Mean accuracy of 0.6 + Mean accuracy of 0.58 -Creating more features with CPP will take some more time, but improve +Creating more features with CPP will take a little time, but improve prediction performance: .. code:: ipython3 @@ -153,7 +153,7 @@ prediction performance: sns.barplot(pd.DataFrame({"Baseline": cv_base, "CPP": cv}), palette=["tab:blue", "tab:red"]) plt.ylabel("Mean accuracy", size=aa.plot_gcfs()+1) plt.ylim(0, 1) - plt.title("Comparison of Feature Engineering Methods") + plt.title("Comparison of Feature Engineering Methods", size=aa.plot_gcfs()-1) sns.despine() plt.show() diff --git a/docs/build/html/generated/aaanalysis.AAclust.html b/docs/build/html/generated/aaanalysis.AAclust.html index 48d50661..3e1e9520 100644 --- a/docs/build/html/generated/aaanalysis.AAclust.html +++ b/docs/build/html/generated/aaanalysis.AAclust.html @@ -498,11 +498,11 @@
DataFrame with correlation either for each pair in X
of shape (n_samples, n_samples) or
+
df_corr – DataFrame with correlation either for each pair in X
of shape (n_samples, n_samples) or
for each pair between X
and X_ref
of shape (n_samples, n_samples_ref).
df_corr
+pd.DataFrame
names – List of sample names. Should be subset of names_ref
.
names_ref – List of reference sample names. Should superset of names
.
names_ref – List of reference sample names. Should be superset of names
.
Percentage of unique names from names
that are found in names_ref
.
coverage – Percentage of unique names from names
that are found in names_ref
.
coverage
+Bases: Tool
Create and filter features that are most discriminant between two sets of sequences.
Methods
@@ -168,7 +168,7 @@Perform CPP pipeline by creation and two-step filtering of features. CPP aims to identify a collection of non-redundant features that are most discriminant between a test and a reference group of sequences.
@@ -223,7 +223,7 @@Evaluate the output generated by the tool.
Bases: object
Create and filter features that are most discriminant between two sets of sequences.
Methods
@@ -167,7 +167,7 @@Plot feature profile for given features from ‘df_feat’.
Plot a featuremap of the selected value column with scale information (y-axis) versus sequence position (x-axis).
This is a wrapper function for seaborn.heatmap()
, designed to highlight differences between two sets
of sequences at the positional level (e.g., amino acid level for protein sequences).
Bases: object
Retrieve and create sequence feature components (Part, Split, and Scale).
Create DataFrane with sequence parts.
Create dictionary with kwargs for three split types: Segment, Pattern, PeriodicPattern
Create list of all feature ids for given Parts, Splits, and Scales
Create feature matrix for given feature ids and sequence parts.
Convert feature ids (PART-SPLIT-SCALE) into feature names (scale name [positions]).
Create feature values for all sequence parts by combining Part, Split, and Scale.
Add feature value difference between sample and reference group to DataFrame.
Create list with positions for given feature names
from sklearn.cluster import AgglomerativeClustering
-aac = aa.AAclust(model=AgglomerativeClustering)
-X = np.array(df_scales)
-scales = aac.fit(X, names=list(df_scales), n_clusters=100)
+aac = aa.AAclust(model_class=AgglomerativeClustering)
+X = np.array(df_scales).T
+scales = aac.fit(X, names=list(df_scales), n_clusters=100).medoid_names_
df_scales = df_scales[scales]
Mean accuracy of 0.6
+Mean accuracy of 0.58
-Creating more features with CPP will take some more time, but improve
+
Creating more features with CPP will take a little time, but improve
prediction performance:
# CPP features with default splits (around 100.000 features)
df_parts = sf.get_df_parts(df_seq=df_seq)
@@ -254,7 +254,7 @@ 3. Protein Predictionsns.barplot(pd.DataFrame({"Baseline": cv_base, "CPP": cv}), palette=["tab:blue", "tab:red"])
plt.ylabel("Mean accuracy", size=aa.plot_gcfs()+1)
plt.ylim(0, 1)
-plt.title("Comparison of Feature Engineering Methods")
+plt.title("Comparison of Feature Engineering Methods", size=aa.plot_gcfs()-1)
sns.despine()
plt.show()
diff --git a/docs/build/html/searchindex.js b/docs/build/html/searchindex.js
index 94d0e30a..168405de 100644
--- a/docs/build/html/searchindex.js
+++ b/docs/build/html/searchindex.js
@@ -1 +1 @@
-Search.setIndex({"docnames": ["api", "generated/aaanalysis.AAclust", "generated/aaanalysis.CPP", "generated/aaanalysis.CPPPlot", "generated/aaanalysis.SequenceFeature", "generated/aaanalysis.dPULearn", "generated/aaanalysis.load_dataset", "generated/aaanalysis.load_scales", "generated/aaanalysis.plot_gcfs", "generated/aaanalysis.plot_get_cdict", "generated/aaanalysis.plot_get_clist", "generated/aaanalysis.plot_get_cmap", "generated/aaanalysis.plot_legend", "generated/aaanalysis.plot_settings", "generated/plotting_prelude", "generated/tutorial1_quick_start", "generated/tutorial2a_data_loader", "generated/tutorial2b_scales_loader", "index", "index/CONTRIBUTING_COPY", "index/badges", "index/citations", "index/introduction", "index/overview", "index/references", "index/tables", "index/usage_principles", "index/usage_principles/aaontology", "index/usage_principles/data_flow_entry_points", "index/usage_principles/feature_identification", "index/usage_principles/pu_learning", "index/usage_principles/xai", "tutorials"], "filenames": ["api.rst", "generated/aaanalysis.AAclust.rst", "generated/aaanalysis.CPP.rst", "generated/aaanalysis.CPPPlot.rst", "generated/aaanalysis.SequenceFeature.rst", "generated/aaanalysis.dPULearn.rst", "generated/aaanalysis.load_dataset.rst", "generated/aaanalysis.load_scales.rst", "generated/aaanalysis.plot_gcfs.rst", "generated/aaanalysis.plot_get_cdict.rst", "generated/aaanalysis.plot_get_clist.rst", "generated/aaanalysis.plot_get_cmap.rst", "generated/aaanalysis.plot_legend.rst", "generated/aaanalysis.plot_settings.rst", "generated/plotting_prelude.rst", "generated/tutorial1_quick_start.rst", "generated/tutorial2a_data_loader.rst", "generated/tutorial2b_scales_loader.rst", "index.rst", "index/CONTRIBUTING_COPY.rst", "index/badges.rst", "index/citations.rst", "index/introduction.rst", "index/overview.rst", "index/references.rst", "index/tables.rst", "index/usage_principles.rst", "index/usage_principles/aaontology.rst", "index/usage_principles/data_flow_entry_points.rst", "index/usage_principles/feature_identification.rst", "index/usage_principles/pu_learning.rst", "index/usage_principles/xai.rst", "tutorials.rst"], "titles": ["API", "aaanalysis.AAclust", "aaanalysis.CPP", "aaanalysis.CPPPlot", "aaanalysis.SequenceFeature", "aaanalysis.dPULearn", "aaanalysis.load_dataset", "aaanalysis.load_scales", "aaanalysis.plot_gcfs", "aaanalysis.plot_get_cdict", "aaanalysis.plot_get_clist", "aaanalysis.plot_get_cmap", "aaanalysis.plot_legend", "aaanalysis.plot_settings", "Plotting Prelude", "Quick Start with AAanalysis", "Data Loading Tutorial", "Scale Loading Tutorial", "Welcome to the AAanalysis documentation!", "Contributing", "<no title>", "<no title>", "Introduction", "<no title>", "References", "Tables", "Usage Principles", "AAontology: Classification of amino acid scales", "Data Flow and Enry Points", "Identifying Physicochemical Signatures using CPP", "Learning from unbalanced and small data", "Explainable AI at Sequence Level", "Tutorials"], "terms": {"thi": [0, 1, 3, 7, 8, 10, 12, 13, 14, 15, 16, 17, 19, 28], "applic": [0, 3, 12], "program": [0, 19], "interfac": [0, 19, 25], "i": [0, 1, 2, 3, 4, 5, 6, 7, 11, 12, 13, 15, 16, 17, 18, 19, 22, 23, 25, 27, 29], "public": [0, 14, 16, 18, 19, 21], "object": [0, 1, 3, 4, 5, 12, 15], "function": [0, 1, 3, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 23], "our": [0, 8, 9, 11, 14, 15, 17, 19, 22], "aaanalysi": [0, 14, 16, 17, 19, 21, 22, 23, 25, 26, 29, 32], "python": [0, 15, 18, 19, 22, 23], "toolkit": [0, 19, 28], "which": [0, 3, 4, 12, 13, 15, 16, 17, 19, 22, 25, 28, 30], "can": [0, 1, 4, 5, 8, 12, 14, 15, 16, 17, 18, 19, 22, 25, 28, 30], "import": [0, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 19, 26], "aa": [0, 2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 25, 26], "you": [0, 14, 17, 18, 19, 21], "access": [0, 1, 6, 15, 17, 25], "all": [0, 1, 2, 3, 4, 6, 7, 13, 14, 15, 17, 19, 25], "method": [0, 1, 2, 3, 4, 5, 15, 24], "via": [0, 14, 19, 24], "alia": [0, 4], "load_dataset": [0, 4, 15, 16, 17, 25], "class": [1, 2, 3, 4, 5, 6, 8, 10, 11, 12, 13, 14, 16, 30], "model_class": 1, "sklearn": [1, 15], "cluster": [1, 15, 18, 22, 23, 24, 25], "_kmean": 1, "kmean": 1, "model_kwarg": 1, "none": [1, 2, 3, 4, 5, 6, 7, 12, 16], "verbos": [1, 2, 3, 4, 5, 15], "fals": [1, 2, 3, 4, 5, 6, 7, 9, 11, 12, 13, 15, 17], "sourc": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 17, 19], "base": [1, 2, 3, 4, 5, 6, 12, 15, 18, 19, 22, 23, 24, 25, 29, 30], "wrapper": [1, 3, 15, 18, 19, 22, 23], "A": [1, 4, 6, 8, 10, 11, 12, 13, 14, 15, 16, 17, 19, 22, 24], "k": [1, 17, 18, 22, 23, 24], "optim": [1, 2, 3, 10, 14, 18, 19, 22, 23, 24], "select": [1, 2, 3, 6, 7, 15, 16, 17, 18, 19, 22, 23, 24], "redund": [1, 2, 7, 15, 18, 19, 22, 23, 24], "reduc": [1, 5, 7, 18, 22, 23, 24, 25], "set": [1, 2, 3, 4, 5, 7, 8, 12, 13, 14, 15, 16, 18, 19, 22, 23, 24, 25, 28], "numer": [1, 3, 4, 15, 18, 22, 23], "scale": [1, 2, 3, 4, 7, 9, 13, 18, 21, 22, 23, 24, 26, 28, 32], "us": [1, 2, 3, 5, 6, 7, 8, 12, 14, 15, 16, 17, 18, 19, 21, 22, 25, 26, 28, 30], "model": [1, 5, 15, 19, 30], "requir": [1, 19], "pre": [1, 2, 15, 16, 19], "defin": [1, 4, 7, 15, 16, 19, 25, 28], "number": [1, 2, 3, 4, 5, 6, 7, 10, 11, 12, 16, 17, 25], "n_cluster": [1, 15], "mean": [1, 2, 3, 15, 17, 25], "other": [1, 3, 7, 13, 14, 17, 19, 25], "scikit": [1, 19], "learn": [1, 5, 16, 18, 19, 21, 22, 23, 24, 25, 26], "valu": [1, 2, 3, 4, 8, 9, 10, 11, 12, 13, 14, 15, 17, 19, 22, 25], "util": [1, 13, 14, 16, 18, 19], "pearson": [1, 2], "correl": [1, 2, 25], "repres": [1, 3, 15, 16, 22, 25], "sampl": [1, 2, 3, 4, 5, 16, 25, 30], "medoid": 1, "each": [1, 2, 3, 4, 5, 15, 16, 17, 19], "closest": 1, "center": [1, 15, 25], "result": [1, 2, 19], "see": [1, 3, 19, 22, 25, 28], "breimann23a": [1, 6, 7, 24, 25], "paramet": [1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 12, 13, 15, 16, 17, 19, 25], "type": [1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 12, 16, 19, 25], "clustermixin": 1, "instanti": 1, "dure": [1, 5], "fit": [1, 5, 15, 19], "option": [1, 2, 3, 4, 5, 6, 7, 12, 13], "dict": [1, 2, 3, 4, 5, 9, 12], "keyword": [1, 3, 5], "argument": [1, 3, 4, 5, 12], "pass": [1, 3, 5, 19], "bool": [1, 2, 3, 4, 5, 6, 7, 11, 12, 13], "If": [1, 2, 3, 4, 5, 6, 7, 12, 13, 17, 18, 19, 21, 30], "true": [1, 2, 3, 4, 6, 7, 11, 12, 13, 14, 16, 17], "output": [1, 2, 4, 5, 14, 19], "ar": [1, 2, 3, 4, 5, 6, 7, 8, 12, 13, 14, 15, 16, 17, 19, 25, 28, 30, 31], "enabl": [1, 2, 3, 4, 5, 18, 19, 22, 23, 29], "The": [1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 12, 14, 15, 16, 17, 19, 25, 28, 29], "after": [1, 2, 25], "call": [1, 7, 14, 25], "obtain": [1, 4, 7, 15, 25], "int": [1, 2, 3, 4, 5, 6, 7, 10, 11, 12], "labels_": [1, 5], "label": [1, 2, 3, 4, 5, 6, 12, 14, 15, 16, 19, 25, 30], "order": [1, 19, 25], "x": [1, 3, 5, 6, 8, 9, 10, 11, 12, 13, 14, 15], "arrai": [1, 2, 4, 5, 15], "like": [1, 2, 4, 5, 13, 14, 19, 25], "shape": [1, 2, 3, 4, 5, 12, 25], "n_sampl": [1, 2, 4, 5], "centers_": 1, "averag": [1, 4, 15, 17, 25], "correspond": [1, 12, 16, 19, 25], "n_featur": [1, 2, 3, 4, 5], "center_labels_": 1, "medoids_": 1, "one": [1, 3, 10, 12, 19], "medoid_labels_": 1, "is_medoid_": 1, "indic": [1, 3, 4, 5, 16, 17, 19, 25], "being": [1, 16, 19, 25], "1": [1, 2, 3, 4, 5, 6, 7, 9, 12, 13, 14, 16, 17, 19, 25, 30], "0": [1, 2, 3, 4, 5, 6, 12, 14, 15, 16, 17, 25, 30], "same": [1, 7, 17], "medoid_names_": 1, "name": [1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 13, 15, 16, 17, 25], "provid": [1, 3, 5, 6, 7, 12, 15, 16, 17, 18, 19, 23, 25, 30], "list": [1, 3, 4, 9, 10, 11, 12, 15, 25], "attribut": 1, "directli": [1, 19], "design": [1, 3, 19, 25, 29], "primarili": [1, 5, 19], "amino": [1, 2, 3, 4, 6, 7, 15, 18, 21, 22, 23, 24, 26, 28, 30], "acid": [1, 2, 3, 4, 6, 7, 15, 18, 21, 22, 23, 24, 26, 28, 30], "ani": [1, 17, 19, 22, 25], "__init__": [1, 2, 3, 4, 5], "on_cent": 1, "min_th": 1, "merge_metr": 1, "euclidean": [1, 5], "appli": [1, 5, 12, 13, 16], "algorithm": [1, 2, 3, 15, 18, 19, 22, 23, 28, 29], "featur": [1, 2, 3, 4, 5, 18, 19, 22, 23, 28, 29, 30], "matrix": [1, 4, 5, 15, 25], "determin": [1, 7], "without": [1, 3, 19, 25], "specif": [1, 16, 19, 25], "It": [1, 13, 15, 16, 22, 25, 28], "partit": [1, 17, 25], "data": [1, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 17, 18, 19, 25, 26, 32], "maxim": 1, "within": [1, 2, 4, 19, 25, 28], "beyond": 1, "threshold": [1, 2], "qualiti": 1, "either": [1, 4, 6, 7, 17, 18], "minimum": [1, 4, 6], "member": 1, "between": [1, 2, 3, 4, 7, 10, 12, 15, 16, 19, 25], "its": [1, 16, 19, 25], "min_cor_al": 1, "min_cor_cent": 1, "respect": [1, 6, 15, 18, 19, 21, 25], "describ": [1, 25], "row": [1, 16, 17], "column": [1, 2, 3, 4, 5, 6, 7, 12, 16, 17, 19], "must": [1, 4, 10, 11, 19], "float": [1, 2, 3, 5, 12, 13], "otherwis": [1, 3, 4, 5, 25], "str": [1, 3, 4, 5, 6, 7, 9, 11, 12, 13], "metric": [1, 5, 19], "similar": [1, 19, 25, 30], "measur": [1, 19, 25], "merg": 1, "No": 1, "perform": [1, 2, 5, 7, 15, 17, 25], "distanc": [1, 5, 25], "manhattan": [1, 5], "cosin": [1, 5], "return": [1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 12, 16], "instanc": [1, 3, 12], "allow": 1, "direct": [1, 19], "aanalysi": [1, 19], "consist": [1, 13, 19, 22, 25], "three": [1, 4, 16, 25], "main": [1, 25], "step": [1, 2, 3, 4, 6, 7, 19, 22], "estim": 1, "lower": [1, 25], "bound": 1, "refin": [1, 19], "recurs": [1, 24], "chosen": [1, 2, 4, 6, 7, 16], "smaller": [1, 14], "reduct": 1, "pairwise_dist": 1, "were": [1, 7, 17, 25], "runtimewarn": 1, "caught": 1, "bundl": 1, "static": [1, 4], "eval": [1, 2, 5, 19], "evalu": [1, 2, 7, 17, 19, 25], "establish": [1, 19], "quantifi": 1, "bic": 1, "bayesian": 1, "inform": [1, 2, 3, 4, 5, 17, 28], "criterion": 1, "reflect": [1, 19, 25], "good": [1, 19], "while": [1, 16], "account": [1, 19, 25], "rang": 1, "from": [1, 2, 3, 4, 5, 6, 7, 15, 16, 17, 18, 19, 25, 26], "neg": [1, 4, 5, 6, 12, 16, 19, 25, 30], "infin": 1, "posit": [1, 2, 3, 4, 5, 6, 18, 19, 22, 23, 25, 30], "higher": [1, 25], "superior": 1, "ch": [1, 17, 25], "calinski": 1, "harabasz": 1, "index": [1, 6, 17, 18, 19, 24], "ratio": 1, "dispers": 1, "score": [1, 15], "suggest": [1, 19], "better": 1, "sc": 1, "silhouett": 1, "coeffici": 1, "proxim": 1, "point": [1, 3, 8, 12, 25, 26], "neighbor": [1, 25], "li": 1, "closer": 1, "impli": 1, "wa": [1, 22], "modifi": [1, 5, 13], "align": [1, 3, 12, 15, 17, 19], "so": 1, "signifi": 1, "contrari": 1, "convent": [1, 4, 7], "implement": [1, 19], "favor": 1, "calinski_harabasz_scor": 1, "silhouette_scor": 1, "name_clust": 1, "shorten_nam": 1, "assign": [1, 3, 4, 5, 17, 25], "frequenc": [1, 25], "priorit": 1, "alreadi": [1, 30], "contain": [1, 2, 3, 5, 6, 7, 17, 19, 25, 28, 30], "unclassifi": [1, 7, 17, 25], "shorten": 1, "version": [1, 17, 25], "cluster_nam": 1, "renam": 1, "comp_cent": 1, "comput": [1, 2, 3, 4, 15, 19, 24, 25], "given": [1, 3, 4, 6, 10, 11, 12, 15, 17, 19, 25], "center_label": 1, "associ": [1, 25], "comp_medoid": 1, "medoid_label": 1, "comp_correl": 1, "x_ref": 1, "labels_ref": 1, "names_ref": 1, "refer": [1, 2, 4, 6, 15, 19, 25], "compar": [1, 16, 18, 22, 23, 25, 28, 29], "n_samples_ref": 1, "datafram": [1, 2, 3, 4, 5, 6, 7, 15, 19, 28], "pair": 1, "df_corr": 1, "sort": 1, "ascend": 1, "replac": [1, 6], "panda": [1, 2, 3, 4, 5, 6, 7, 15, 19], "corr": 1, "comp_coverag": 1, "percentag": [1, 2, 5, 17], "uniqu": [1, 2, 3, 17, 19], "present": [1, 4, 6], "help": 1, "understand": 1, "coverag": [1, 19], "particular": 1, "subset": [1, 4, 7, 25], "univers": 1, "both": [1, 3, 13, 16], "consid": [1, 7, 19], "onli": [1, 3, 6, 7, 12, 13, 16, 19, 25, 30], "onc": [1, 19], "regardless": 1, "repetit": 1, "should": [1, 2, 3, 4, 5, 19, 30], "superset": 1, "found": [1, 4, 19], "df_scale": [2, 4, 7, 15, 17, 28], "df_cat": [2, 3, 4, 7, 17, 28], "df_part": [2, 4, 15, 28], "split_kw": [2, 4, 15, 28], "accept_gap": [2, 3, 4], "tool": [2, 19, 24], "creat": [2, 3, 4, 5, 13, 14, 15, 19, 28], "filter": [2, 3, 6, 15, 16], "most": [2, 3, 5, 12, 15, 18, 22, 23], "discrimin": [2, 3, 15], "two": [2, 3, 7, 8, 15, 17, 18, 19, 22, 23, 24, 25, 27, 28], "sequenc": [2, 3, 4, 5, 6, 16, 18, 19, 22, 23, 24, 25, 26, 28, 29, 30], "default": [2, 3, 4, 5, 6, 8, 9, 12, 13, 14, 15, 16, 17], "load_categori": [2, 4], "categori": [2, 3, 4, 7, 9, 10, 12, 16, 17], "physicochem": [2, 4, 18, 22, 23, 24, 25, 26, 28], "part": [2, 3, 4, 15, 19, 28], "sequencefeatur": [2, 15], "get_split_kw": [2, 4, 15], "nest": [2, 4], "dictionari": [2, 3, 4, 9, 12], "split_typ": [2, 4, 15], "whether": [2, 3, 4, 11, 12], "accept": [2, 3, 4], "miss": [2, 3, 4], "omit": [2, 3, 4], "print": [2, 3, 4, 15], "progress": [2, 3, 24], "about": [2, 3], "run": [2, 4, 15], "parametr": 2, "n_filter": 2, "100": [2, 6, 15, 16], "tmd_len": [2, 3, 4], "20": [2, 3, 4, 7, 16, 17, 19, 25], "jmd_n_len": [2, 3, 4], "10": [2, 3, 4, 10, 12, 16, 17, 19, 25], "jmd_c_len": [2, 3, 4], "ext_len": [2, 3, 4], "4": [2, 3, 4, 16, 17, 25], "start": [2, 3, 4, 6, 19, 25, 26, 28], "check_cat": 2, "n_pre_filt": 2, "pct_pre_filt": 2, "5": [2, 3, 4, 5, 14, 15, 16, 17, 19, 25], "max_std_test": 2, "2": [2, 3, 4, 5, 8, 10, 12, 14, 16, 17, 19, 25, 30], "max_overlap": 2, "max_cor": 2, "n_process": 2, "pipelin": [2, 19], "creation": 2, "aim": [2, 3, 15, 19], "identifi": [2, 3, 5, 6, 15, 16, 18, 22, 23, 24, 26, 30], "collect": [2, 7], "non": [2, 4, 6, 15, 25], "test": [2, 15, 17], "group": [2, 3, 4, 12, 14, 25], "t": [2, 6, 17, 25], "u": [2, 14, 18, 19], "p": [2, 17, 24], "length": [2, 3, 4, 6, 12, 16, 25], "tmd": [2, 3, 4, 6, 15, 16], "todo": [2, 19], "add": [2, 3, 4, 19], "link": [2, 18, 19, 21, 24], "explan": [2, 3, 19], "first": [2, 3, 4, 7, 14, 19], "n": [2, 3, 4, 6, 7, 15, 16, 17, 19, 24, 25], "terminu": [2, 3, 4, 25], "jmd": [2, 3, 4, 15], "c": [2, 3, 4, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 24, 25], "extend": [2, 3, 4, 19, 25, 30], "termin": [2, 3, 4, 15, 16, 25], "longer": 2, "than": [2, 25], "check": [2, 19], "remain": [2, 17, 19], "maximum": [2, 4, 5, 6, 15], "standard": [2, 30], "deviat": 2, "overlap": 2, "cpu": 2, "multiprocess": 2, "automat": [2, 3, 5, 12, 19], "df_feat": [2, 3, 4, 15, 28], "statist": [2, 3], "n_feature_inform": [2, 3], "follow": [2, 4, 5, 7, 18, 19, 21, 22, 23, 26], "eleven": 2, "includ": [2, 4, 6, 7, 12, 19], "id": [2, 4, 6, 7, 17], "rank": [2, 17], "11": [2, 3, 16, 25], "split": [2, 4, 15, 28], "subcategori": [2, 3, 7, 17], "sub": 2, "scale_nam": [2, 3, 7, 17], "abs_auc": [2, 3], "absolut": [2, 19], "adjust": [2, 3, 12, 13, 14], "auc": 2, "abs_mean_dif": 2, "differ": [2, 3, 4, 10, 16, 17, 28], "std_test": [2, 3], "std_ref": 2, "p_val": 2, "mann_whitnei": 2, "ttest_indep": 2, "p_val_fdr_bh": 2, "benjamini": 2, "hochberg": 2, "fdr": 2, "correct": 2, "gener": [2, 3, 4, 6, 10, 11, 13, 19, 22, 24, 25, 30], "condit": [3, 4], "jmd_m_len": [3, 4], "profil": [3, 18, 22, 23, 29], "y": [3, 8, 9, 10, 11, 12, 13, 14, 15, 17], "val_col": 3, "mean_dif": 3, "val_typ": 3, "count": [3, 16], "normal": [3, 7, 12, 17, 19, 25], "figsiz": 3, "7": [3, 4, 5, 14, 16, 17, 25], "titl": [3, 8, 12, 13, 14, 15], "title_kw": 3, "dict_color": [3, 9, 12, 14], "edge_color": 3, "bar_width": 3, "75": 3, "add_jmd_tmd": 3, "jmd_n_seq": 3, "tmd_seq": 3, "jmd_c_seq": 3, "tmd_color": 3, "mediumspringgreen": 3, "jmd_color": 3, "blue": [3, 15], "tmd_seq_color": 3, "black": [3, 11, 12, 14, 19], "jmd_seq_color": 3, "white": [3, 11, 12], "seq_siz": 3, "tmd_jmd_fontsiz": 3, "xtick_siz": 3, "xtick_width": 3, "xtick_length": 3, "xticks_po": 3, "ytick_siz": 3, "ytick_width": 3, "ytick_length": 3, "ylim": [3, 15], "highlight_tmd_area": 3, "highlight_alpha": 3, "15": [3, 4, 16, 25], "grid": [3, 13, 14], "grid_axi": [3, 13, 14], "add_legend_cat": 3, "legend_kw": 3, "shap_plot": 3, "kwarg": [3, 4, 12], "plot": [3, 8, 9, 10, 11, 12, 13, 16, 18, 19, 25, 32], "avail": [3, 7, 12, 15, 17, 18, 21, 24], "specifi": [3, 4, 5, 9, 11, 15, 19], "check_value_typ": 3, "tupl": [3, 11], "size": [3, 4, 8, 12, 13, 14, 15, 25], "custom": [3, 7, 14, 19], "appear": [3, 25], "map": [3, 4, 11, 12], "color": [3, 8, 9, 10, 11, 12, 13, 14], "edg": [3, 12, 19, 25], "bar": 3, "width": [3, 12], "line": [3, 12, 13, 14, 19], "annot": 3, "font": [3, 8, 12, 13], "tick": [3, 13, 14], "axi": [3, 13, 17], "limit": [3, 19], "highlight": 3, "area": [3, 17, 25], "alpha": 3, "ad": 3, "drawn": 3, "legend": [3, 12, 13, 14], "shap": [3, 8, 11, 15, 19], "shaplei": 3, "addit": [3, 4, 5, 7, 13, 17, 19, 25], "intern": [3, 19, 25], "librari": [3, 13, 19], "ax": [3, 9, 12, 13], "matplotlib": [3, 8, 9, 10, 11, 12, 13, 14, 15, 16, 19], "heatmap": 3, "8": [3, 4, 5, 12, 16, 17, 19, 25], "vmin": 3, "vmax": 3, "grid_on": 3, "cmap": 3, "rdbu_r": 3, "cmap_n_color": 3, "cbar_kw": 3, "facecolor_dark": [3, 11], "add_importance_map": 3, "cbar_pct": 3, "featuremap": 3, "versu": 3, "seaborn": [3, 8, 9, 10, 11, 12, 13, 14, 15, 16, 19], "level": [3, 6, 7, 16, 17, 18, 19, 23, 25, 26, 27], "e": [3, 4, 14, 15, 17, 18, 19, 22, 23, 25, 30], "g": [3, 4, 17, 18, 19, 22, 23, 25, 30], "protein": [3, 4, 6, 17, 18, 19, 22, 23, 24, 28, 29, 30], "shown": 3, "feat_impact": 3, "displai": [3, 13], "sum": [3, 17, 25], "std": 3, "aggreg": 3, "positions_onli": 3, "further": [3, 17, 19, 25], "across": [3, 13, 17, 19], "recommend": [3, 5, 7, 19], "when": [3, 5, 12, 19, 25], "emphas": [3, 19], "fewer": 3, "value_typ": 3, "height": 3, "figur": 3, "inch": 3, "pyplot": [3, 8, 9, 10, 11, 12, 13, 14, 15, 16], "anchor": [3, 12, 25], "colormap": 3, "infer": [3, 19], "seismic": 3, "space": [3, 5, 12, 19], "impact": 3, "discret": 3, "diverg": 3, "sequenti": 3, "classifi": 3, "kei": [3, 9, 12, 19, 25], "colorbar": 3, "under": [3, 7, 19], "depicet": 3, "depict": 3, "jmd_n": [3, 4, 6, 16], "jmd_c": [3, 4, 6, 16], "set_xticklabel": 3, "widht": 3, "tick_param": 3, "classif": [3, 6, 7, 15, 16, 17, 18, 23, 25, 26, 30], "pcolormesh": 3, "effect": [3, 19, 25, 30], "document": [3, 25], "more": [3, 12, 13, 15, 19], "detail": [3, 6, 7, 17, 18, 19, 21], "cpp": [3, 4, 8, 11, 18, 21, 22, 23, 26, 28], "code": [3, 8, 9, 10, 11, 12, 13, 14], "update_seq_s": 3, "retriev": [4, 15], "compon": [4, 5, 7, 17, 25], "continu": [4, 11, 15, 19], "domain": [4, 6, 15, 16, 25], "transmembran": [4, 25], "membran": [4, 25], "principl": [4, 18], "distinct": [4, 18, 19, 22, 23, 25], "segment": [4, 15, 28], "pattern": [4, 12, 15], "properti": [4, 12, 19, 25], "express": 4, "realiz": 4, "For": [4, 6, 16, 19, 30], "over": [4, 15], "valid": [4, 19], "tmd_e": 4, "tmd_n": 4, "tmd_c": 4, "ext_c": 4, "ext_n": 4, "tmd_jmd": [4, 15], "jmd_n_tmd_n": 4, "tmd_c_jmd_c": 4, "ext_n_tmd_n": 4, "tmd_c_ext_c": 4, "get_df_part": [4, 15], "df_seq": [4, 5, 6, 15, 16, 28], "list_part": [4, 15], "all_part": 4, "datafran": 4, "compris": [4, 12, 17], "tmd_start": [4, 6, 16], "tmd_stop": [4, 6, 16], "string": 4, "len": [4, 9, 16], "lenght": 4, "resp": [4, 25], "extra": [4, 14, 25], "possibl": [4, 16, 25, 30], "get": [4, 8, 12, 14, 26], "sf": [4, 15], "dom_gsec": [4, 15, 16, 25], "n_split_min": 4, "n_split_max": [4, 15], "steps_pattern": 4, "n_min": 4, "n_max": 4, "len_max": 4, "steps_periodicpattern": 4, "periodicpattern": 4, "greater": 4, "greatest": 4, "whole": [4, 6, 17], "specfii": 4, "smallest": [4, 25], "integ": 4, "3": [4, 5, 10, 11, 12, 16, 17, 19, 25], "6": [4, 15, 16, 17, 25], "vari": [4, 16], "paramt": 4, "argumetn": 4, "get_featur": 4, "load_scal": [4, 15, 17, 18, 23, 25], "combin": [4, 15, 19, 25], "form": [4, 25], "feat_matrix": [4, 15], "n_job": [4, 15], "return_label": 4, "pd": [4, 5, 15, 19], "seri": 4, "job": 4, "parallel": [4, 25], "spars": 4, "feat_nam": 4, "convert": 4, "depend": [4, 25], "last": 4, "step1": 4, "step2": 4, "add_feat_valu": 4, "dict_scal": 4, "letter": 4, "feature_valu": 4, "n_part": 4, "ha": [4, 19, 25], "where": [4, 5, 13, 25], "structur": [4, 24, 25], "th": [4, 7, 17], "n_split": 4, "p1": 4, "p2": 4, "pn": 4, "end": [4, 19, 25], "odd": [4, 16], "even": 4, "give": 4, "add_dif": 4, "sample_nam": 4, "ref_group": 4, "add_posit": 4, "part_split": 4, "feat_posit": 4, "total": [4, 5, 17, 19, 25], "n_compon": 5, "pca_kwarg": 5, "determinist": [5, 18, 22, 23], "unlabel": [5, 18, 22, 23, 25, 30], "offer": [5, 16, 19], "approach": [5, 15, 16, 19, 30], "pu": [5, 18, 22, 23, 25], "emploi": 5, "princip": [5, 7, 17, 25], "analysi": [5, 7, 15, 17, 18, 19, 22, 23, 25], "pca": [5, 17], "dimension": [5, 24], "pc": [5, 7, 25], "iter": 5, "reliabl": [5, 16, 19], "These": [5, 7, 14, 15, 17, 19, 30], "those": [5, 25], "distant": 5, "altern": [5, 30], "also": [5, 16, 17, 19, 25], "80": 5, "cover": 5, "varianc": 5, "identif": [5, 24], "datapoint": 5, "inspir": [5, 19], "techniqu": [5, 30], "an": [5, 6, 7, 12, 14, 15, 16, 17, 18, 19, 21, 24, 25], "theoret": [5, 25], "high": [5, 24, 25], "n_neg": 5, "label_po": 5, "name_neg": 5, "rel_neg": 5, "col_class": 5, "newli": 5, "updat": [5, 19], "new": [5, 19], "store": 5, "Will": 5, "initi": [5, 25], "small": [5, 15, 16, 18, 19, 22, 23, 26, 31], "datafor": 5, "conta": 5, "po": 5, "unl": 5, "numpi": [5, 15, 19], "np": [5, 15], "atgc": 5, "gcta": 5, "actg": 5, "tacg": 5, "mode": 5, "dpul": 5, "info": 6, "random": [6, 16, 25], "non_canonical_aa": 6, "remov": [6, 13, 14], "min_len": [6, 16], "max_len": [6, 16], "aa_window_s": [6, 16], "9": [6, 10, 14, 15, 16, 17, 19, 25], "load": [6, 7, 18, 19, 23, 32], "benchmark": [6, 15, 17, 18, 23], "dataset": [6, 7, 15, 17, 18, 19, 22, 23, 30, 31], "categor": [6, 14, 16], "dom": [6, 16, 25], "seq": [6, 16, 25], "By": 6, "overview": [6, 7, 16, 19], "tabl": [6, 7, 16, 19], "depth": [6, 7, 17, 18, 23], "per": [6, 16, 25], "randomli": [6, 16], "liter": 6, "keep": 6, "gap": 6, "handl": [6, 12], "canon": [6, 17], "don": 6, "symbol": 6, "disabl": [6, 17], "window": [6, 25], "aa_": 6, "df_info": [6, 16], "entri": [6, 16, 17], "uniprot": 6, "binari": [6, 15, 16, 30], "stop": 6, "seq_amylo": [6, 16, 17, 25], "guid": [6, 7], "tutori": [6, 7, 15, 18, 19, 22], "just_aaindex": [7, 17], "unclassified_out": 7, "top60_n": [7, 17], "aaontologi": [7, 15, 18, 21, 23, 24, 26], "scales_raw": [7, 17, 25], "encompass": [7, 25], "aaindex": [7, 15, 17, 24], "kawashima08": [7, 24, 25], "along": [7, 15], "min": [7, 17, 25], "max": [7, 17, 25], "organ": [7, 19], "scales_cat": [7, 17, 25], "breimann23b": [7, 18, 21, 24, 25], "compress": [7, 17, 25], "scales_pc": [7, 17, 25], "aaclust": [7, 17, 18, 21, 22, 23, 24, 25], "top": [7, 14, 25], "60": [7, 17, 25], "top60": [7, 17, 25], "individu": [7, 19], "accompani": 7, "top60_ev": [7, 17, 25], "normliz": 7, "raw": [7, 17, 25], "best": [7, 17], "Or": [7, 16], "relev": 7, "exclus": 7, "suffix": [7, 16, 19], "scale_id": [7, 17], "deriv": 7, "descript": [7, 17, 19, 25], "scale_descript": [7, 17], "current": [8, 12], "plot_set": [8, 9, 10, 11, 12, 14, 15, 16], "here": [8, 16, 19, 25], "plt": [8, 9, 10, 11, 12, 13, 14, 15, 16], "sn": [8, 9, 10, 11, 12, 13, 14, 15, 16], "b": [8, 10, 11, 12, 13, 14, 25], "23": [8, 10, 11, 12, 13, 14, 25], "27": [8, 12, 13, 14], "43": [8, 12, 13, 14], "plot_get_clist": [8, 12, 13, 14], "barplot": [8, 9, 10, 11, 12, 13, 14, 15], "palett": [8, 9, 10, 11, 12, 13, 14, 15], "despin": [8, 9, 12, 13, 14, 15, 16], "bigger": 8, "tight_layout": [8, 9, 12, 13, 14], "show": [8, 9, 10, 11, 12, 13, 14, 15, 16, 17], "png": [8, 9, 10, 11, 12, 13], "hire": [8, 9, 10, 11, 12, 13], "pdf": [8, 9, 10, 11, 12, 13], "prelud": [8, 9, 10, 11, 12, 13, 32], "dict_cat": 9, "weight_bold": [9, 13], "xaxi": 9, "set_vis": 9, "n_color": [10, 11, 14], "fuction": 10, "eight": 10, "colorl": 10, "appeal": [10, 14], "visual": [10, 13, 14, 19], "33": [10, 11], "notebook": 10, "color_palett": [10, 11], "101": 11, "shp": 11, "least": [11, 12, 19], "central": [11, 29], "rgb": 11, "14": [11, 14, 25], "light_palett": 11, "lighter": 11, "packag": [11, 19], "list_cat": 12, "loc": [12, 17], "upper": 12, "left": [12, 25], "loc_out": 12, "ncol": [12, 14], "labelspac": 12, "columnspac": 12, "handletextpad": 12, "handlelength": 12, "fontsiz": [12, 14], "fontsize_titl": 12, "weight": [12, 24, 25], "fontsize_weight": 12, "marker": 12, "marker_s": 12, "lw": 12, "linestyl": 12, "edgecolor": 12, "hatch": [12, 14], "hatchcolor": 12, "title_align_left": 12, "independntli": 12, "customiz": 12, "flexbili": 12, "convini": 12, "func": 12, "attach": 12, "item": 12, "locat": [12, 25], "25": 12, "thei": [12, 15, 16, 19], "union": 12, "coordin": 12, "": [12, 16, 17, 19, 24, 25], "vertic": 12, "horizont": 12, "bewtween": 12, "text": [12, 13], "visiabl": 12, "corner": 12, "round": [12, 15], "style": [12, 13], "Not": 12, "fill": [12, 19], "furhter": 12, "word": 12, "line2d": 12, "core": 12, "gca": 12, "font_scal": [13, 16], "arial": 13, "adjust_only_font": 13, "adjust_further_el": 13, "no_tick": 13, "short_tick": 13, "no_ticks_x": [13, 14], "short_ticks_x": 13, "no_ticks_i": 13, "short_ticks_i": [13, 14], "show_opt": 13, "configur": 13, "global": 13, "embed": 13, "vector": [13, 25], "format": [13, 25], "svg": 13, "ensur": [13, 16, 19], "compat": 13, "edit": 13, "variou": [13, 15, 19, 25, 28], "viewer": 13, "softwar": [13, 19], "factor": [13, 25], "element": [13, 14], "set_context": 13, "common": [13, 19], "verdana": 13, "helvetica": 13, "dejavu": 13, "san": 13, "bold": 13, "leav": [13, 19], "unchang": 13, "make": [13, 14, 15, 16, 19], "layout": 13, "errorbar": 13, "choos": 13, "mark": 13, "short": 13, "ignor": [13, 16, 19], "runtim": 13, "polt": 13, "rcparam": 13, "manag": 13, "some": [14, 15, 25], "readi": [14, 16], "view": [14, 19, 30], "let": 14, "right": [14, 25], "spine": 14, "look": 14, "just": 14, "easili": [14, 15, 16, 19], "comparison": [14, 15], "d": [14, 17], "increas": [14, 25], "match": [14, 24], "independ": 14, "plot_gcf": [14, 15], "plot_set_legend": 14, "dive": 15, "power": 15, "capabl": [15, 25], "framework": [15, 18, 22, 23], "dedic": 15, "free": [15, 17, 25], "In": [15, 16, 19, 30], "gamma": [15, 25], "secretas": [15, 24, 25], "substrat": [15, 24, 25], "exampl": [15, 16, 19, 22, 30], "we": [15, 16, 19], "ll": 15, "focu": [15, 19], "extract": 15, "interpret": [15, 18, 19, 21, 22, 23, 24, 25, 29], "how": 15, "har": 15, "task": [15, 19, 30], "essenti": [15, 16, 19], "randomforest": 15, "With": 15, "have": [15, 16, 17, 19, 25, 30], "\u03b3": [15, 24], "hand": [15, 25], "effortlessli": 15, "furthermor": 15, "predominantli": 15, "hierarch": 15, "known": 15, "your": [15, 18, 19, 21], "fingertip": 15, "now": 15, "50": [15, 16], "centerpiec": 15, "support": [15, 19, 25], "sinc": 15, "problem": 15, "machin": [15, 18, 19, 21, 24, 30], "lightweight": 15, "agglom": 15, "close": [15, 19], "agglomerativeclust": 15, "aac": 15, "integr": [15, 19, 24], "target": [15, 19], "middl": [15, 25], "adjac": [15, 25], "region": [15, 24, 25], "discontinu": 15, "togeth": [15, 28], "input": [15, 19, 28], "characterist": [15, 25], "As": 15, "baselin": 15, "entir": [15, 19], "ensembl": 15, "randomforestclassifi": 15, "model_select": 15, "cross_val_scor": 15, "rf": 15, "cv_base": 15, "accuraci": [15, 17, 24], "f": [15, 17], "take": 15, "time": 15, "improv": [15, 19, 24], "around": 15, "000": [15, 17], "cv": 15, "tab": 15, "red": 15, "ylabel": 15, "iloc": 16, "head": [16, 17], "13": [16, 25], "predictor": [16, 25], "aa_caspase3": [16, 25], "233": [16, 17, 25], "185605": [16, 25], "705": [16, 17, 25], "184900": [16, 25], "prosper": [16, 24, 25], "aa_furin": [16, 25], "71": [16, 25], "59003": [16, 25], "163": [16, 17, 25], "58840": [16, 25], "aa_ldr": [16, 25], "342": [16, 25], "118248": [16, 25], "35469": [16, 25], "82779": [16, 25], "idp": [16, 24, 25], "seq2seq": [16, 24, 25], "aa_mmp2": [16, 25], "573": [16, 25], "312976": [16, 25], "2416": [16, 25], "310560": [16, 25], "aa_rnabind": [16, 25], "221": [16, 17, 25], "55001": [16, 25], "6492": [16, 25], "48509": [16, 25], "gmksvm": [16, 25], "ru": [16, 25], "aa_sa": [16, 25], "101082": [16, 25], "84523": [16, 25], "1414": [16, 25], "8484": [16, 25], "511": [16, 25], "903": [16, 17, 25], "rerf": [16, 24, 25], "pred": [16, 24, 25], "seq_capsid": [16, 17, 25], "7935": [16, 25], "3364680": [16, 25], "3864": [16, 25], "4071": [16, 25], "viralpro": [16, 24, 25], "seq_disulfid": [16, 17, 25], "2547": [16, 25], "614470": [16, 25], "897": [16, 25], "1650": [16, 25], "dipro": [16, 25], "seq_loc": [16, 17, 25], "1835": [16, 25], "732398": [16, 25], "1045": [16, 25], "790": [16, 17, 25], "nan": [16, 25], "seq_solubl": [16, 17, 25], "17408": [16, 25], "4432269": [16, 25], "8704": [16, 25], "solpro": [16, 24, 25], "seq_tail": [16, 17, 25], "6668": [16, 25], "2671690": [16, 25], "2574": [16, 25], "4094": [16, 25], "12": [16, 25], "126": [16, 25], "92964": [16, 25], "63": [16, 25], "prefix": 16, "exemplifi": 16, "df_seq1": 16, "df_seq2": 16, "df_seq3": 16, "capsid_1": 16, "mvthnvkinkhvtrrsyssakevleippltevqtasykwfmdkgik": 16, "capsid_2": 16, "mkkrqkkmtlsnftdtsfqdfvsaeqvddksamalinraedfkagq": 16, "balanc": 16, "200": [16, 17], "value_count": 16, "dtype": 16, "int64": 16, "distribut": 16, "warn": 16, "simplefilt": 16, "action": 16, "futurewarn": 16, "list_seq_len": 16, "histplot": 16, "binwidth": 16, "xlim": 16, "1500": 16, "800": [16, 17], "residu": [16, 17, 24, 25], "seen": 16, "caspase3_1": 16, "mslfdlfrgffgfpgprshrdpffggmtrdedddeeeeeeggswgr": 16, "caspase3_2": 16, "mevtgdagvpesgeirtlkpcllrrnysreqhgvaascledlrska": 16, "caspase3_3": 16, "mrarsgargalllalllcwdptpslagidsggqalpdsfpsapaeq": 16, "caspase3_4": 16, "mdakarncllqhrealekdiktsyimdhmisdgfltiseeekvrn": 16, "conveni": 16, "flank": 16, "side": [16, 17, 25], "equal": 16, "popular": [16, 30], "caspase3_1_pos4": 16, "mslfdlfrg": 16, "caspase3_1_pos5": 16, "slfdlfrgf": 16, "caspase3_1_pos6": 16, "lfdlfrgff": 16, "caspase3_1_pos7": 16, "fdlfrgffg": 16, "21": [16, 25], "caspase3_55_pos170": 16, "kkrkleeeedgklkkpknkdk": 16, "caspase3_29_pos185": 16, "cphhercsdsdglappqhlir": 16, "caspase3_64_pos431": 16, "dnplnwpdekdssfyrnfgst": 16, "caspase3_93_pos455": 16, "fvknmnrdstfivnktitaev": 16, "caspase3_38_pos129": 16, "ssfdldydfqrdyydrmysyp": 16, "caspase3_8_pos33": 16, "rppqlrpgaptslqtepqgnp": 16, "typic": [16, 22, 25], "But": 16, "mani": 16, "face": 16, "challeng": [16, 19], "might": [16, 25], "unbalanc": [16, 18, 19, 22, 23, 26, 31], "lack": 16, "clear": [16, 19], "scenario": 16, "denot": [16, 25], "_pu": [16, 25], "dom_gsec_pu": [16, 25], "p05067": 16, "mlpglallllaawtaralevptdgnagllaepqiamfcgrlnmhmn": 16, "701": [16, 17], "723": [16, 17], "faedvgsnkg": 16, "aiiglmvggvviatvivitlvml": 16, "kkkqytsihh": 16, "p14925": 16, "magrarsgllllllgllalqssclafrsplsvfkrfkettrsfsn": 16, "868": [16, 17], "890": 16, "klstepgsgv": 16, "svvlittllvipvlvllaivmfi": 16, "rwkksrafgd": 16, "p70180": 16, "mrslllftfsacvllarvllaggassgagdtrpgsrrrarealaaq": 16, "477": 16, "499": 16, "pckssgglee": 16, "savtgivvgallgagllmafyff": 16, "rkkyriti": 16, "q03157": 16, "mgptspaargqgrrwrppplplllplsllllraqlavgnlavgsp": 16, "585": [16, 17], "607": [16, 17], "apsgtgvsr": 16, "alsgllimgagggslivlslll": 16, "rkkkpygti": 16, "q06481": 16, "maatgtaaaaatgrllllllvgltapalalagyiealaanagtgfa": 16, "694": [16, 17, 25], "716": [16, 17], "lredfslsss": 16, "aligllviavaiatvivislvml": 16, "rkrqygtish": 16, "121": 16, "p36941": 16, "mllpwatsapglawgplvlglfgllaasqpqavppyasenqtcrdq": 16, "226": [16, 17], "248": [16, 17], "plppemsgtm": 16, "lmlavllplafflllatvfsciw": 16, "kshpslcrkl": 16, "122": 16, "p25446": 16, "mlwiwavlplvlagsqlrvhtqgtnsiseslklrrrvretdkncs": 16, "170": [16, 17], "187": 16, "ncrkqsprnr": 16, "lwlltilvlliplvfiyr": 16, "kyrkrkcwkr": 16, "123": 16, "q9p2j2": 16, "mvwclglavlslvisqgadgrgkpevvsvvgragesvvlgcdllpp": 16, "738": [16, 17], "760": [16, 17], "pgllpqpvla": 16, "gvvggvcflgvavlvsilagcl": 16, "nrrraarrrr": 16, "124": 16, "q96j42": 16, "mvpaagrrpprvmrllgwwqvllwvlglpvrgvevaeesgrlwse": 16, "324": [16, 17], "lpstliksvd": 16, "wllvfslfflisfimyati": 16, "rtesirwlip": 16, "125": 16, "p0dpa2": 16, "mrvggafhlllvclspallsavringdgqevlylaegdnvrlgcpi": 16, "265": 16, "287": 16, "kvsdsrrigv": 16, "iigivlgsllalgclavgiwglv": 16, "ccccggsgag": 16, "df_seq_pu": 16, "689": [16, 17], "p60852": 16, "maggsattwgypvallllvatlglgrwlqpdpglpglrhsydcgik": 16, "602": [16, 17], "624": [16, 17], "dsngnsslrp": 16, "llwavlllpavalvlgfgvfvgl": 16, "sqtwaqklw": 16, "690": [16, 17], "p20239": 16, "marwqrkasvsspcgrsiyrflsllftlvtsvnsvslpqsenpafp": 16, "684": [16, 17], "703": [16, 17], "iiakdiaskt": 16, "lgavaalvgsavilgficyl": 16, "ykkrtirfnh": 16, "691": [16, 17], "p21754": 16, "melsyrlficlllwgstelcypqplwllqggashpetsvqpvlvec": 16, "387": [16, 17], "409": 16, "eqwalpsdt": 16, "vvllgvglavvvsltltavilvl": 16, "trrcrtashp": 16, "692": [16, 17], "q12836": 16, "mwllrcvllcvslslavsgqhkpeapdyssvlhcgpwsfqfavnln": 16, "506": [16, 17], "528": 16, "eklrvpvdsk": 16, "vlwvaglsgtlilgallvsylav": 16, "kkqkscpdqm": 16, "693": [16, 17], "q8tcw7": 16, "meqiwllllltirvlpgsaqfngyncdanlhsrfpaerdisvycgv": 16, "374": 16, "396": [16, 17], "pfqlnaitsa": 16, "lisgmvilgvtsfslllcslal": 16, "hrkgptslvl": 16, "six": 17, "origin": 17, "df_raw": 17, "df_pc": 17, "andn920101": 17, "argp820101": 17, "argp820102": 17, "argp820103": 17, "begf750101": 17, "begf750102": 17, "begf750103": 17, "bhar880101": 17, "bigc670101": 17, "biov880101": 17, "koeh090103": 17, "koeh090104": 17, "koeh090105": 17, "koeh090106": 17, "koeh090107": 17, "koeh090108": 17, "koeh090109": 17, "koeh090110": 17, "koeh090111": 17, "koeh090112": 17, "494": 17, "230": 17, "355": 17, "504": 17, "512": 17, "249": 17, "164": 17, "476": 17, "194": 17, "300": 17, "551": 17, "222": 17, "308": 17, "273": 17, "140": 17, "522": 17, "345": 17, "864": 17, "404": 17, "579": 17, "783": 17, "205": 17, "323": 17, "936": 17, "279": 17, "174": 17, "449": 17, "346": 17, "285": 17, "416": 17, "867": 17, "191": 17, "583": 17, "889": 17, "720": 17, "556": 17, "875": 17, "919": 17, "796": 17, "440": 17, "420": 17, "177": 17, "019": 17, "032": 17, "713": 17, "267": 17, "811": 17, "488": 17, "106": 17, "542": 17, "732": 17, "593": 17, "718": 17, "857": 17, "853": 17, "913": 17, "681": 17, "877": 17, "762": 17, "601": 17, "670": 17, "574": 17, "076": 17, "049": 17, "189": 17, "148": 17, "182": 17, "029": 17, "186": 17, "017": 17, "025": 17, "026": 17, "138": 17, "309": 17, "388": 17, "544": 17, "608": 17, "538": 17, "571": 17, "481": 17, "112": 17, "h": 17, "840": 17, "082": 17, "053": 17, "651": 17, "633": 17, "561": 17, "455": 17, "856": 17, "402": 17, "370": 17, "500": 17, "545": 17, "618": 17, "726": 17, "838": 17, "543": 17, "671": 17, "663": 17, "885": 17, "246": 17, "074": 17, "167": 17, "091": 17, "051": 17, "398": 17, "276": 17, "434": 17, "003": 17, "004": 17, "687": 17, "737": 17, "933": 17, "873": 17, "779": 17, "734": 17, "405": 17, "l": 17, "272": 17, "577": 17, "989": 17, "281": 17, "078": 17, "118": 17, "333": 17, "259": 17, "m": 17, "704": 17, "445": 17, "824": 17, "450": 17, "620": 17, "803": 17, "289": 17, "132": 17, "185": 17, "192": 17, "180": [17, 25], "419": 17, "224": [17, 25], "988": 17, "023": 17, "057": 17, "046": 17, "675": 17, "203": 17, "552": 17, "645": 17, "519": 17, "756": 17, "753": 17, "706": 17, "599": 17, "587": 17, "293": 17, "605": 17, "736": 17, "223": 17, "220": 17, "859": 17, "376": 17, "367": 17, "322": 17, "678": 17, "707": 17, "444": 17, "662": 17, "570": 17, "594": 17, "q": 17, "211": 17, "131": 17, "395": 17, "795": 17, "539": 17, "206": 17, "676": 17, "733": 17, "628": 17, "483": 17, "r": [17, 25], "531": 17, "047": 17, "110": 17, "489": 17, "940": 17, "735": 17, "215": 17, "852": 17, "883": 17, "743": 17, "362": 17, "679": 17, "238": 17, "851": 17, "188": 17, "399": 17, "589": 17, "655": 17, "590": 17, "382": 17, "384": 17, "379": 17, "598": 17, "352": 17, "312": 17, "366": 17, "578": 17, "407": 17, "364": 17, "331": 17, "250": 17, "514": 17, "v": [17, 25], "498": 17, "809": 17, "365": 17, "492": 17, "077": 17, "033": 17, "111": [17, 25], "156": 17, "154": 17, "496": 17, "w": 17, "926": 17, "040": 17, "146": 17, "600": 17, "400": 17, "104": 17, "316": 17, "244": 17, "802": 17, "709": 17, "107": 17, "502": 17, "806": 17, "588": 17, "286": 17, "644": 17, "474": 17, "410": 17, "429": 17, "413": 17, "235": 17, "336": 17, "586": [17, 25], "term": [17, 25], "lins030110": 17, "asa": [17, 25], "volum": [17, 25], "surfac": [17, 25], "fold": [17, 25], "coil": [17, 25], "turn": [17, 25], "median": 17, "resi": 17, "lins030113": 17, "janj780101": 17, "janin": [17, 25], "et": [17, 24, 25], "al": [17, 24, 25], "janj780103": 17, "expos": [17, 19, 25], "lins030104": 17, "lins030107": 17, "win3": 17, "choc760102": 17, "prot": 17, "lins030116": 17, "\u03b2": [17, 25], "strand": [17, 25], "lins030119": 17, "lins030103": 17, "hydrophil": [17, 25], "resid": 17, "stem": 17, "top60_id": 17, "acc": 17, "presenc": [17, 25], "absenc": [17, 25], "df_top60": 17, "aac01": 17, "aac02": 17, "aac03": 17, "aac04": 17, "aac05": 17, "aac06": 17, "aac07": 17, "aac08": 17, "aac09": 17, "aac10": 17, "df_eval": 17, "overal": 17, "aa5_caspase3": 17, "aa5_furin": 17, "aa5_ldr": 17, "aa5_mmp2": 17, "aa9_ldr": 17, "aa9_mmp2": 17, "aa9_rnabind": 17, "aa9_sa": 17, "aa13_caspase3": 17, "aa13_furin": 17, "aa13_ldr": 17, "aa13_mmp2": 17, "aa13_rnabind": 17, "aa13_sa": 17, "761": 17, "827": 17, "746": 17, "646": 17, "884": 17, "862": 17, "901": 17, "612": 17, "680": 17, "659": 17, "664": 17, "918": 17, "652": 17, "615": 17, "747": 17, "830": 17, "742": 17, "653": 17, "886": 17, "855": 17, "907": 17, "688": 17, "642": 17, "657": 17, "792": 17, "916": 17, "656": 17, "741": 17, "829": 17, "648": 17, "904": 17, "685": 17, "636": 17, "710": 17, "791": 17, "914": 17, "695": 17, "613": 17, "828": 17, "731": 17, "654": 17, "906": 17, "686": 17, "640": 17, "714": 17, "915": 17, "610": 17, "739": 17, "752": 17, "888": 17, "658": 17, "682": 17, "649": 17, "665": 17, "789": 17, "611": 17, "833": 17, "650": 17, "882": 17, "858": 17, "606": 17, "638": 17, "711": 17, "661": 17, "831": 17, "603": 17, "669": 17, "787": 17, "826": 17, "647": 17, "905": 17, "614": 17, "750": 17, "748": 17, "860": 17, "908": 17, "632": 17, "aac11": 17, "749": 17, "832": 17, "751": 17, "781": 17, "683": 17, "aac12": 17, "708": 17, "666": 17, "785": 17, "917": 17, "aac13": 17, "744": 17, "634": 17, "aac14": 17, "902": 17, "673": 17, "794": 17, "604": 17, "aac15": 17, "617": 17, "660": 17, "aac16": 17, "755": 17, "635": 17, "702": 17, "aac17": 17, "740": 17, "835": 17, "793": 17, "609": 17, "aac18": 17, "757": 17, "730": 17, "643": 17, "881": 17, "899": 17, "912": 17, "aac19": 17, "764": 17, "745": 17, "887": 17, "909": 17, "aac20": 17, "677": 17, "aac21": 17, "637": 17, "aac22": 17, "823": 17, "880": 17, "700": 17, "788": 17, "aac23": 17, "629": 17, "aac24": 17, "641": 17, "aac25": 17, "639": 17, "879": 17, "aac26": 17, "698": 17, "aac27": 17, "854": 17, "aac28": 17, "821": 17, "898": 17, "aac29": 17, "763": 17, "900": 17, "aac30": 17, "911": 17, "616": 17, "aac31": 17, "727": 17, "631": 17, "784": 17, "aac32": 17, "aac33": 17, "817": 17, "922": 17, "aac34": 17, "729": 17, "aac35": 17, "758": 17, "822": 17, "aac36": 17, "759": 17, "874": 17, "aac37": 17, "596": 17, "aac38": 17, "766": 17, "921": 17, "aac39": 17, "786": 17, "aac40": 17, "819": 17, "870": 17, "775": 17, "910": 17, "aac41": 17, "896": 17, "aac42": 17, "861": 17, "895": 17, "799": 17, "674": 17, "aac43": 17, "767": 17, "815": 17, "871": 17, "848": 17, "782": 17, "625": 17, "aac44": 17, "825": 17, "621": 17, "696": 17, "780": 17, "923": 17, "aac45": 17, "844": 17, "893": 17, "672": 17, "774": 17, "aac46": 17, "812": 17, "626": 17, "872": 17, "843": 17, "667": 17, "623": 17, "aac47": 17, "717": 17, "aac48": 17, "771": 17, "891": 17, "776": 17, "619": 17, "aac49": 17, "807": 17, "630": 17, "850": 17, "892": 17, "aac50": 17, "728": 17, "773": 17, "aac51": 17, "768": 17, "865": 17, "836": 17, "894": 17, "668": 17, "697": 17, "aac52": 17, "814": 17, "aac53": 17, "765": 17, "798": 17, "aac54": 17, "699": 17, "770": 17, "aac55": 17, "769": 17, "580": 17, "595": 17, "aac56": 17, "aac57": 17, "aac58": 17, "715": 17, "568": 17, "aac59": 17, "725": 17, "797": 17, "592": 17, "562": 17, "aac60": 17, "563": 17, "772": 17, "529": 17, "813": 17, "546": 17, "24": [17, 25], "df_cat_1": 17, "df_raw_1": 17, "df_scales_1": 17, "selected_scal": 17, "tolist": 17, "df_aac1": 17, "buna790103": 17, "bura740102": 17, "cham820102": 17, "cham830102": 17, "cham830103": 17, "cham830105": 17, "chop780101": 17, "chop780204": 17, "chop780206": 17, "kars160110": 17, "kars160112": 17, "kars160118": 17, "kars160119": 17, "kars160120": 17, "kars160122": 17, "lins030105": 17, "lins030109": 17, "264": 17, "262": 17, "425": 17, "298": 17, "863": 17, "952": 17, "149": 17, "947": 17, "442": 17, "256": 17, "557": 17, "213": 17, "397": 17, "473": 17, "566": 17, "247": 17, "311": 17, "152": 17, "354": 17, "462": 17, "119": 17, "085": 17, "208": 17, "139": 17, "169": 17, "133": 17, "240": 17, "470": 17, "160": 17, "393": 17, "313": 17, "145": 17, "134": 17, "424": 17, "115": 17, "044": 17, "195": 17, "495": 17, "554": 17, "433": 17, "458": 17, "114": 17, "463": 17, "070": 17, "421": 17, "218": 17, "553": 17, "067": 17, "021": 17, "526": 17, "135": 17, "480": 17, "043": 17, "087": 17, "532": 17, "335": 17, "963": 17, "317": 17, "319": 17, "381": 17, "198": 17, "468": 17, "390": 17, "339": 17, "282": 17, "515": 17, "486": 17, "275": 17, "257": [17, 19], "350": 17, "150": [17, 19], "534": 17, "178": 17, "565": 17, "550": 17, "320": 17, "327": 17, "326": 17, "369": 17, "028": 17, "093": 17, "537": 17, "540": 17, "231": 17, "002": 17, "372": 17, "457": 17, "120": 17, "209": 17, "081": 17, "467": 17, "183": 17, "exclud": 17, "well": [17, 19], "subordin": 17, "want": 17, "unclassified_in": 17, "guyh850104": 17, "energi": [17, 25], "appar": 17, "calcul": 17, "ja": 17, "guyh850105": 17, "racs770103": 17, "chain": [17, 25], "orient": 17, "prefer": [17, 25], "rackovski": [17, 25], "vheg790101": 17, "tfe": 17, "lipophil": 17, "phase": 17, "transfer": [17, 25], "von": 17, "buri": [17, 25], "buriabl": 17, "biov880102": 17, "werd780101": 17, "propens": [17, 25], "insid": [17, 25], "wertz": 17, "scheraga": [17, 25], "predict": [18, 19, 22, 23, 24, 25, 29, 30], "engin": [18, 19, 22, 23, 29], "dpulearn": [18, 21, 22, 23], "train": [18, 19, 22, 23, 30], "moreov": [18, 23], "load_data": [18, 23], "pypi": 18, "conda": [18, 19], "forg": 18, "pip": [18, 19], "introduct": 18, "usag": [18, 19, 22], "contribut": [18, 25], "api": [18, 19], "explain": [18, 19, 24, 26], "ai": [18, 19, 24, 26], "perturb": [18, 30], "modul": 18, "search": 18, "page": 18, "work": [18, 21], "pleas": [18, 19, 21], "cite": [18, 21], "_": [18, 21], "breimann": [18, 21, 24], "kamp": [18, 21], "steiner": [18, 21], "frishman": [18, 21], "2023": [18, 21], "ontologi": [18, 21, 24], "biorxiv": [18, 21, 24], "welcom": 19, "thank": 19, "open": 19, "project": [19, 25], "focus": 19, "involv": 19, "invalu": 19, "made": 19, "wai": 19, "file": 19, "github": 19, "issu": 19, "tracker": 19, "submit": 19, "particip": [19, 25], "discuss": 19, "newcom": 19, "tackl": 19, "email": 19, "stephanbreimann": 19, "gmail": 19, "com": 19, "question": 19, "comprehens": 19, "robust": 19, "life": [19, 30, 31], "scienc": [19, 30, 31], "seamlessli": 19, "flexibl": [19, 25], "interoper": 19, "biopython": 19, "reimplement": 19, "exist": [19, 30], "solut": 19, "biolog": [19, 22, 25, 30], "context": 19, "relianc": 19, "opaqu": 19, "box": 19, "empir": 19, "insight": 19, "cut": 19, "fair": 19, "transpar": 19, "re": [19, 24], "commit": 19, "divers": 19, "aspect": 19, "causal": 19, "minim": 19, "reproduc": 19, "mre": 19, "amount": 19, "demonstr": 19, "self": 19, "necessari": 19, "confirm": 19, "replic": 19, "guidelin": 19, "To": [19, 26], "git": 19, "http": 19, "breimanntool": 19, "master": 19, "repositori": 19, "your_usernam": 19, "navig": 19, "folder": 19, "up": 19, "cd": 19, "isol": 19, "activ": [19, 25], "poetri": 19, "pytest": 19, "hypothesi": 19, "execut": 19, "case": 19, "directori": 19, "out": [19, 25], "readm": 19, "command": 19, "cheat": 19, "sheet": [19, 25], "substanti": 19, "minor": 19, "typo": 19, "concis": 19, "branch": [19, 25], "fix": 19, "date": 19, "readthedoc": 19, "org": 19, "crucial": 19, "modif": 19, "render": 19, "correctli": 19, "strive": 19, "codebas": 19, "standalon": 19, "special": 19, "carri": 19, "complet": 19, "process": 19, "fulfil": 19, "purpos": 19, "inherit": 19, "supplementari": 19, "accordingli": 19, "cppplot": 19, "semi": 19, "strictli": 19, "adher": 19, "aforement": 19, "primari": [19, 28], "_util": 19, "_utils_const": 19, "py": 19, "modular": 19, "therefor": 19, "flat": 19, "hierarchi": 19, "outlin": 19, "user": 19, "friendli": 19, "hint": 19, "enhanc": [19, 25], "propos": 19, "pep": 19, "484": 19, "book": 19, "error": 19, "messag": 19, "docstr": 19, "markup": 19, "languag": 19, "restructuredtext": 19, "rst": 19, "primer": 19, "restructuretext": 19, "cheatsheet": 19, "sphinx": 19, "autodoc": 19, "inclus": 19, "napoleon": 19, "extens": 19, "conf": 19, "four": 19, "bird": 19, "ey": 19, "background": 19, "medium": [19, 25], "tabular": 19, "critic": 19, "except": 19, "rule": 19, "showcas": 19, "scientif": 19, "mai": 19, "mention": 19, "section": 19, "extern": 19, "note": 19, "go": 19, "html": 19, "_build": 19, "browser": 19, "below": 19, "blank": 19, "OF": 19, "ONE": 19, "complex": 19, "At": 19, "intric": 19, "do": 19, "placehold": 19, "incomplet": 19, "potenti": [19, 25], "expect": 19, "30": 19, "remind": 19, "token": 19, "truncat": 19, "respons": 19, "simpli": 19, "ask": 19, "someth": 19, "repeat": 19, "compil": 19, "done": 19, "script": 19, "leverag": 19, "struggl": 19, "produc": 19, "erron": 19, "often": [19, 30], "ambigu": 19, "logic": 19, "address": 19, "intuit": 19, "through": 19, "signatur": [19, 26], "behavior": 19, "deeper": 19, "intricaci": 19, "citat": 21, "develop": 22, "practic": 22, "2023a": 24, "2023b": 24, "breimann23c": [24, 25], "2023c": 24, "chart": 24, "cheng06": [24, 25], "cheng": 24, "2006": 24, "larg": 24, "disulphid": 24, "bridg": [24, 25], "kernel": 24, "neural": 24, "network": 24, "graph": [24, 25], "struct": 24, "funct": 24, "kawashima": 24, "2008": 24, "aid": 24, "databas": 24, "report": 24, "nucleic": 24, "magnan09": [24, 25], "magnan": 24, "randal": 24, "baldi": 24, "2009": [24, 25], "accur": 24, "solubl": [24, 25], "bioinformat": 24, "galiez16": [24, 25], "galiez": 24, "2016": [24, 25], "viral": 24, "capsid": [24, 25], "tail": [24, 25], "song18": [24, 25], "song": 24, "2018": 24, "throughput": 24, "cleavag": [24, 25], "site": [24, 25], "90": 24, "proteas": 24, "shen19": [24, 25], "shen": 24, "2019": 24, "subcellular": [24, 25], "local": [24, 25], "evolutionari": 24, "chou": [24, 25], "pseaac": 24, "j": 24, "theor": 24, "biol": 24, "tang20": [24, 25], "tang": 24, "2020": 24, "intrins": [24, 25], "disord": [24, 25], "teng21": [24, 25], "teng": 24, "2021": 24, "amyloidogen": [24, 25], "pseudo": 24, "composit": [24, 25], "tripeptid": 24, "bmc": 24, "yang21": [24, 25], "yang": 24, "granular": 24, "multipl": 24, "rna": [24, 25], "bind": [24, 25], "appl": 24, "chronolog": 25, "histori": 25, "t1_overview_benchmark": 25, "t2_overview_scal": 25, "t3a_aaontology_categori": 25, "t3b_aaontology_subcategori": 25, "begin": 25, "append": 25, "caspas": 25, "furin": 25, "long": 25, "ldr": 25, "metallopeptidas": 25, "mmp2": 25, "rbp60": 25, "solvent": 25, "sa": 25, "amyloidognen": 25, "capdsid": 25, "disulfid": 25, "ss": 25, "bond": 25, "cytoplasm": 25, "plasma": 25, "insolubl": 25, "494524": 25, "unknown": 25, "statu": 25, "tier": 25, "system": 25, "systemat": 25, "arrang": 25, "67": 25, "everi": 25, "clearli": 25, "assess": 25, "couldn": 25, "alloc": 25, "regard": 25, "chothia": 25, "1976": 25, "lin": 25, "2003": 25, "64": 25, "occurr": 25, "cellular": 25, "mitochondria": 25, "nakashima": 25, "1990": 25, "nishikawa": 25, "1992": 25, "58": 25, "conform": 25, "\u03b1": 25, "helix": 25, "ranodm": 25, "tanaka": 25, "1977": 25, "fasman": 25, "1978b": 25, "richardson": 25, "1988": 25, "qian": 25, "sejnowski": 25, "aurora": 25, "rose": 25, "1998": 25, "19": 25, "charg": 25, "entropi": 25, "charton": 25, "1983": 25, "gui": 25, "1985": 25, "radzicka": 25, "wolfenden": 25, "36": 25, "could": 25, "mutabl": 25, "sneath": 25, "1966": 25, "17": 25, "polar": 25, "hydrophob": 25, "amphiphil": 25, "kyte": 25, "doolittl": 25, "1982": 25, "mitaku": 25, "2002": 25, "koehler": 25, "steric": 25, "angl": 25, "symmetri": 25, "represent": 25, "eccentr": 25, "prabhakaran": 25, "ponnuswami": 25, "karkbara": 25, "knislei": 25, "45": 25, "stabil": 25, "backbon": 25, "dynam": 25, "vihinen": 25, "1994": 25, "bastolla": 25, "2005": 25, "31": 25, "water": 25, "tendenc": 25, "oppos": 25, "1978": 25, "partial": 25, "physic": 25, "displac": 25, "caus": 25, "interact": 25, "mainli": 25, "ones": 25, "bull": 25, "brees": 25, "1974": 25, "bigelow": 25, "1967": 25, "jone": 25, "dayhoff": 25, "interior": 25, "unpolar": 25, "fukuchi": 25, "2001": 25, "mp": 25, "cedano": 25, "1997": 25, "mitochondri": 25, "less": 25, "val": 25, "cf": 25, "cap": 25, "asp": 25, "glu": 25, "ly": 25, "arg": 25, "observ": 25, "character": 25, "punta": 25, "maritan": 25, "robson": 25, "suzuki": 25, "linker": 25, "georg": 25, "heringa": 25, "2004": 25, "helic": 25, "half": 25, "finkelstein": 25, "1991": 25, "outsid": 25, "befor": 25, "geisow": 25, "robert": 25, "1980": 25, "ramachandran": 25, "state": 25, "quadrant": 25, "bottom": 25, "paul": 25, "1951": 25, "antiparallel": 25, "lifson": 25, "sander": 25, "1979": 25, "bend": 25, "revers": 25, "tight": 25, "consecut": 25, "back": 25, "hydrogen": 25, "3rd": 25, "4th": 25, "1st": 25, "2nd": 25, "tm": 25, "place": 25, "monn\u00e9": 25, "1999": 25, "\u03c0": 25, "ala": 25, "gln": 25, "fodj": 25, "karadaghi": 25, "net": 25, "donor": 25, "klein": 25, "1984": 25, "acceptor": 25, "faucher": 25, "hi": 25, "electron": 25, "ion": 25, "pot": 25, "valenc": 25, "chemic": 25, "cosic": 25, "low": 25, "due": 25, "strong": 25, "hutchen": 25, "1970": 25, "unfold": 25, "gibb": 25, "denatur": 25, "yutani": 25, "1987": 25, "instabl": 25, "highest": 25, "break": 25, "pro": 25, "munoz": 25, "serrano": 25, "isoelectr": 25, "ph": 25, "electr": 25, "neutral": 25, "zimmerman": 25, "1968": 25, "16": 25, "crystal": 25, "pairwis": 25, "constitu": 25, "atom": 25, "lennard": 25, "oobatak": 25, "ooi": 25, "rel": 25, "chang": 25, "divid": 25, "aliphat": 25, "linear": 25, "aromat": 25, "carbon": 25, "approxim": 25, "invers": 25, "reactiv": 25, "hydroxythiol": 25, "wold": 25, "occur": 25, "esp": 25, "amphipath": 25, "highli": 25, "signal": 25, "argo": 25, "cornett": 25, "38": 25, "environ": 25, "eisenberg": 25, "mclachlan": 25, "1986": 25, "surround": 25, "angstrom": 25, "radiu": 25, "pack": 25, "globular": 25, "1981": 25, "28": 25, "eigenvalu": 25, "laplacian": 25, "undirect": 25, "node": 25, "mass": 25, "molecular": 25, "second": 25, "actual": 25, "root": 25, "squar": 25, "gyrat": 25, "farther": 25, "awai": 25, "relationship": 25, "rate": 25, "shift": 25, "bundi": 25, "wuthrich": 25, "nh": 25, "temperatur": 25, "rigid": 25, "gly": 25, "ser": 25, "particularli": 25, "ptitsyn": 25, "zhou": 25, "equilibrium": 25, "sueki": 25, "flow": 26, "enri": 26, "introduc": 27, "diagram": 28, "platform": 29, "novel": 29, "everywher": [30, 31], "setup": 30, "augment": 30, "smote": 30, "artifici": 30, "Such": 30, "veri": 30, "deep": 30, "imag": 30, "recognit": 30, "feasibl": 30, "becaus": 30, "slight": 30, "mutat": 30, "alter": 30, "dramat": 30, "great": 30, "quantiti": 30, "besid": 30, "distinguish": 30, "subfield": 30}, "objects": {"aaanalysis": [[1, 0, 1, "", "AAclust"], [2, 0, 1, "", "CPP"], [3, 0, 1, "", "CPPPlot"], [4, 0, 1, "", "SequenceFeature"], [5, 0, 1, "", "dPULearn"], [6, 3, 1, "", "load_dataset"], [7, 3, 1, "", "load_scales"], [8, 3, 1, "", "plot_gcfs"], [9, 3, 1, "", "plot_get_cdict"], [10, 3, 1, "", "plot_get_clist"], [11, 3, 1, "", "plot_get_cmap"], [12, 3, 1, "", "plot_legend"], [13, 3, 1, "", "plot_settings"]], "aaanalysis.AAclust": [[1, 1, 1, "", "__init__"], [1, 2, 1, "", "center_labels_"], [1, 2, 1, "", "centers_"], [1, 1, 1, "", "comp_centers"], [1, 1, 1, "", "comp_correlation"], [1, 1, 1, "", "comp_coverage"], [1, 1, 1, "", "comp_medoids"], [1, 1, 1, "", "eval"], [1, 1, 1, "", "fit"], [1, 2, 1, "", "is_medoid_"], [1, 2, 1, "", "labels_"], [1, 2, 1, "", "medoid_labels_"], [1, 2, 1, "", "medoid_names_"], [1, 2, 1, "", "medoids_"], [1, 2, 1, "", "model"], [1, 2, 1, "", "n_clusters"], [1, 1, 1, "", "name_clusters"]], "aaanalysis.CPP": [[2, 1, 1, "", "__init__"], [2, 1, 1, "", "eval"], [2, 1, 1, "", "run"]], "aaanalysis.CPPPlot": [[3, 1, 1, "", "__init__"], [3, 1, 1, "", "heatmap"], [3, 1, 1, "", "profile"], [3, 1, 1, "", "update_seq_size"]], "aaanalysis.SequenceFeature": [[4, 1, 1, "", "__init__"], [4, 1, 1, "", "add_dif"], [4, 1, 1, "", "add_feat_value"], [4, 1, 1, "", "add_position"], [4, 1, 1, "", "feat_matrix"], [4, 1, 1, "", "feat_names"], [4, 1, 1, "", "get_df_parts"], [4, 1, 1, "", "get_features"], [4, 1, 1, "", "get_split_kws"]], "aaanalysis.dPULearn": [[5, 1, 1, "", "__init__"], [5, 1, 1, "", "eval"], [5, 1, 1, "", "fit"], [5, 2, 1, "", "labels_"]]}, "objtypes": {"0": "py:class", "1": "py:method", "2": "py:attribute", "3": "py:function"}, "objnames": {"0": ["py", "class", "Python class"], "1": ["py", "method", "Python method"], "2": ["py", "attribute", "Python attribute"], "3": ["py", "function", "Python function"]}, "titleterms": {"api": 0, "data": [0, 16, 28, 30], "featur": [0, 15], "engin": [0, 15], "pu": [0, 16, 30], "learn": [0, 15, 30], "explain": [0, 15, 31], "ai": [0, 15, 31], "perturb": 0, "plot": [0, 14], "util": 0, "aaanalysi": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 15, 18, 28], "aaclust": [1, 15], "note": [1, 2, 4, 5, 6, 7, 12], "cpp": [2, 15, 29], "cppplot": 3, "exampl": [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 18], "sequencefeatur": 4, "dpulearn": 5, "load_dataset": 6, "load_scal": 7, "plot_gcf": 8, "plot_get_cdict": 9, "plot_get_clist": 10, "plot_get_cmap": 11, "plot_legend": 12, "plot_set": 13, "prelud": 14, "quick": [15, 32], "start": [15, 32], "what": [15, 30, 31], "you": 15, "Will": 15, "1": 15, "load": [15, 16, 17], "sequenc": [15, 31], "scale": [15, 17, 25, 27], "2": 15, "compar": 15, "physicochem": [15, 29], "profil": 15, "3": 15, "protein": [15, 16, 25], "predict": 15, "4": 15, "group": 15, "level": [15, 31], "individu": 15, "tutori": [16, 17, 32], "benchmark": [16, 24, 25], "amino": [16, 17, 25, 27], "acid": [16, 17, 25, 27], "window": 16, "size": 16, "posit": 16, "unlabel": 16, "dataset": [16, 24, 25], "three": 17, "set": 17, "numer": 17, "aaontologi": [17, 25, 27], "redund": 17, "reduc": 17, "subset": 17, "filter": 17, "welcom": 18, "document": [18, 19, 22], "instal": [18, 19], "overview": [18, 22, 25], "refer": [18, 24], "indic": 18, "tabl": [18, 25], "citat": 18, "contribut": 19, "introduct": [19, 22], "vision": 19, "object": 19, "non": 19, "goal": 19, "principl": [19, 26], "bug": 19, "report": 19, "latest": 19, "version": 19, "local": 19, "develop": 19, "environ": 19, "fork": 19, "clone": 19, "depend": 19, "run": 19, "unit": 19, "test": 19, "pull": 19, "request": 19, "preview": 19, "chang": 19, "name": 19, "convent": 19, "class": 19, "templat": 19, "function": 19, "method": 19, "code": 19, "philosophi": 19, "style": 19, "layer": 19, "build": 19, "doc": 19, "chatgpt": 19, "guid": 19, "tgd": 19, "workflow": 22, "algorithm": 24, "us": [24, 29], "case": 24, "further": [24, 32], "inform": 24, "categori": 25, "subcategori": 25, "usag": 26, "classif": 27, "flow": 28, "enri": 28, "point": 28, "compon": 28, "entri": 28, "bridg": 28, "extern": 28, "librari": 28, "identifi": 29, "signatur": 29, "from": 30, "unbalanc": 30, "small": 30, "i": [30, 31]}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 8, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.intersphinx": 1, "nbsphinx": 4, "sphinx": 57}, "alltitles": {"API": [[0, "api"]], "Data": [[0, "data"]], "Feature Engineering": [[0, "feature-engineering"]], "PU Learning": [[0, "pu-learning"]], "Explainable AI": [[0, "explainable-ai"]], "Perturbation": [[0, "perturbation"]], "Plot Utilities": [[0, "plot-utilities"]], "aaanalysis.AAclust": [[1, "aaanalysis-aaclust"]], "Notes": [[1, null], [1, null], [1, null], [1, null], [2, null], [2, null], [4, null], [4, null], [4, null], [4, null], [4, null], [5, null], [5, null], [6, null], [7, null], [12, null]], "aaanalysis.CPP": [[2, "aaanalysis-cpp"]], "aaanalysis.CPPPlot": [[3, "aaanalysis-cppplot"]], "Examples": [[3, null], [4, null], [4, null], [5, null], [6, null], [7, null], [8, null], [9, null], [10, null], [11, null], [12, null], [13, null]], "aaanalysis.SequenceFeature": [[4, "aaanalysis-sequencefeature"]], "aaanalysis.dPULearn": [[5, "aaanalysis-dpulearn"]], "aaanalysis.load_dataset": [[6, "aaanalysis-load-dataset"]], "aaanalysis.load_scales": [[7, "aaanalysis-load-scales"]], "aaanalysis.plot_gcfs": [[8, "aaanalysis-plot-gcfs"]], "aaanalysis.plot_get_cdict": [[9, "aaanalysis-plot-get-cdict"]], "aaanalysis.plot_get_clist": [[10, "aaanalysis-plot-get-clist"]], "aaanalysis.plot_get_cmap": [[11, "aaanalysis-plot-get-cmap"]], "aaanalysis.plot_legend": [[12, "aaanalysis-plot-legend"]], "aaanalysis.plot_settings": [[13, "aaanalysis-plot-settings"]], "Plotting Prelude": [[14, "plotting-prelude"]], "Quick Start with AAanalysis": [[15, "quick-start-with-aaanalysis"]], "What You Will Learn:": [[15, "what-you-will-learn"]], "1. Loading Sequences and Scales": [[15, "loading-sequences-and-scales"]], "2. Feature Engineering": [[15, "feature-engineering"]], "AAclust": [[15, "aaclust"]], "Comparative Physicochemical Profiling (CPP)": [[15, "comparative-physicochemical-profiling-cpp"]], "3. Protein Prediction": [[15, "protein-prediction"]], "4. Explainable AI": [[15, "explainable-ai"]], "Explainable AI on group level": [[15, "explainable-ai-on-group-level"]], "Explainable AI on individual level": [[15, "explainable-ai-on-individual-level"]], "Data Loading Tutorial": [[16, "data-loading-tutorial"]], "Loading of protein benchmarks": [[16, "loading-of-protein-benchmarks"]], "Loading of protein benchmarks: Amino acid window size": [[16, "loading-of-protein-benchmarks-amino-acid-window-size"]], "Loading of protein benchmarks: Positive-Unlabeled (PU) datasets": [[16, "loading-of-protein-benchmarks-positive-unlabeled-pu-datasets"]], "Scale Loading Tutorial": [[17, "scale-loading-tutorial"]], "Three sets of numerical amino acid scales": [[17, "three-sets-of-numerical-amino-acid-scales"]], "AAontology": [[17, "aaontology"], [25, "aaontology"]], "Redundancy-reduce scale subsets": [[17, "redundancy-reduce-scale-subsets"]], "Filtering of scales": [[17, "filtering-of-scales"]], "Welcome to the AAanalysis documentation!": [[18, "welcome-to-the-aaanalysis-documentation"]], "Install": [[18, "install"]], "OVERVIEW": [[18, null]], "EXAMPLES": [[18, null]], "REFERENCES": [[18, null]], "Indices and tables": [[18, "indices-and-tables"]], "Citation": [[18, "citation"]], "Contributing": [[19, "contributing"]], "Introduction": [[19, "introduction"], [22, "introduction"]], "Vision": [[19, "vision"]], "Objectives": [[19, "objectives"]], "Non-goals": [[19, "non-goals"]], "Principles": [[19, "principles"]], "Bug Reports": [[19, "bug-reports"]], "Installation": [[19, "installation"]], "Latest Version": [[19, "latest-version"]], "Local Development Environment": [[19, "local-development-environment"]], "Fork and Clone": [[19, "fork-and-clone"]], "Install Dependencies": [[19, "install-dependencies"]], "Run Unit Tests": [[19, "run-unit-tests"]], "Pull Requests": [[19, "pull-requests"]], "Preview Changes": [[19, "preview-changes"]], "Documentation": [[19, "documentation"]], "Naming Conventions": [[19, "naming-conventions"]], "Class Templates": [[19, "class-templates"]], "Function and Method Naming": [[19, "function-and-method-naming"]], "Code Philosophy": [[19, "code-philosophy"]], "Documentation Style": [[19, "documentation-style"]], "Documentation Layers": [[19, "documentation-layers"]], "Building the Docs": [[19, "building-the-docs"]], "Test with ChatGPT": [[19, "test-with-chatgpt"]], "Test Guided Development (TGD)": [[19, "test-guided-development-tgd"]], "Workflow": [[22, "workflow"]], "Overview of documentation": [[22, "overview-of-documentation"]], "References": [[24, "references"]], "Algorithms": [[24, "algorithms"]], "Datasets and Benchmarks": [[24, "datasets-and-benchmarks"]], "Use Cases": [[24, "use-cases"]], "Further Information": [[24, "further-information"]], "Tables": [[25, "tables"]], "Overview Table": [[25, "overview-table"]], "Protein Benchmark Datasets": [[25, "protein-benchmark-datasets"]], "Amino Acid Scale Datasets": [[25, "amino-acid-scale-datasets"]], "Categories": [[25, "categories"]], "Subcategories": [[25, "subcategories"]], "Usage Principles": [[26, "usage-principles"]], "AAontology: Classification of amino acid scales": [[27, "aaontology-classification-of-amino-acid-scales"]], "Data Flow and Enry Points": [[28, "data-flow-and-enry-points"]], "Data Flow: Components of AAanalysis": [[28, "data-flow-components-of-aaanalysis"]], "Entry Points: Bridges to External Libraries": [[28, "entry-points-bridges-to-external-libraries"]], "Identifying Physicochemical Signatures using CPP": [[29, "identifying-physicochemical-signatures-using-cpp"]], "Learning from unbalanced and small data": [[30, "learning-from-unbalanced-and-small-data"]], "What is PU learning?": [[30, "what-is-pu-learning"]], "Explainable AI at Sequence Level": [[31, "explainable-ai-at-sequence-level"]], "What is explainable AI?": [[31, "what-is-explainable-ai"]], "Tutorials": [[32, "tutorials"]], "Quick start": [[32, "quick-start"]], "Further Tutorials": [[32, "further-tutorials"]]}, "indexentries": {"aaclust (class in aaanalysis)": [[1, "aaanalysis.AAclust"]], "__init__() (aaanalysis.aaclust method)": [[1, "aaanalysis.AAclust.__init__"]], "center_labels_ (aaanalysis.aaclust attribute)": [[1, "aaanalysis.AAclust.center_labels_"]], "centers_ (aaanalysis.aaclust attribute)": [[1, "aaanalysis.AAclust.centers_"]], "comp_centers() (aaanalysis.aaclust static method)": [[1, "aaanalysis.AAclust.comp_centers"]], "comp_correlation() (aaanalysis.aaclust static method)": [[1, "aaanalysis.AAclust.comp_correlation"]], "comp_coverage() (aaanalysis.aaclust static method)": [[1, "aaanalysis.AAclust.comp_coverage"]], "comp_medoids() (aaanalysis.aaclust static method)": [[1, "aaanalysis.AAclust.comp_medoids"]], "eval() (aaanalysis.aaclust static method)": [[1, "aaanalysis.AAclust.eval"]], "fit() (aaanalysis.aaclust method)": [[1, "aaanalysis.AAclust.fit"]], "is_medoid_ (aaanalysis.aaclust attribute)": [[1, "aaanalysis.AAclust.is_medoid_"]], "labels_ (aaanalysis.aaclust attribute)": [[1, "aaanalysis.AAclust.labels_"]], "medoid_labels_ (aaanalysis.aaclust attribute)": [[1, "aaanalysis.AAclust.medoid_labels_"]], "medoid_names_ (aaanalysis.aaclust attribute)": [[1, "aaanalysis.AAclust.medoid_names_"]], "medoids_ (aaanalysis.aaclust attribute)": [[1, "aaanalysis.AAclust.medoids_"]], "model (aaanalysis.aaclust attribute)": [[1, "aaanalysis.AAclust.model"]], "n_clusters (aaanalysis.aaclust attribute)": [[1, "aaanalysis.AAclust.n_clusters"]], "name_clusters() (aaanalysis.aaclust static method)": [[1, "aaanalysis.AAclust.name_clusters"]], "cpp (class in aaanalysis)": [[2, "aaanalysis.CPP"]], "__init__() (aaanalysis.cpp method)": [[2, "aaanalysis.CPP.__init__"]], "eval() (aaanalysis.cpp method)": [[2, "aaanalysis.CPP.eval"]], "run() (aaanalysis.cpp method)": [[2, "aaanalysis.CPP.run"]], "cppplot (class in aaanalysis)": [[3, "aaanalysis.CPPPlot"]], "__init__() (aaanalysis.cppplot method)": [[3, "aaanalysis.CPPPlot.__init__"]], "heatmap() (aaanalysis.cppplot method)": [[3, "aaanalysis.CPPPlot.heatmap"]], "profile() (aaanalysis.cppplot method)": [[3, "aaanalysis.CPPPlot.profile"]], "update_seq_size() (aaanalysis.cppplot method)": [[3, "aaanalysis.CPPPlot.update_seq_size"]], "sequencefeature (class in aaanalysis)": [[4, "aaanalysis.SequenceFeature"]], "__init__() (aaanalysis.sequencefeature method)": [[4, "aaanalysis.SequenceFeature.__init__"]], "add_dif() (aaanalysis.sequencefeature static method)": [[4, "aaanalysis.SequenceFeature.add_dif"]], "add_feat_value() (aaanalysis.sequencefeature static method)": [[4, "aaanalysis.SequenceFeature.add_feat_value"]], "add_position() (aaanalysis.sequencefeature static method)": [[4, "aaanalysis.SequenceFeature.add_position"]], "feat_matrix() (aaanalysis.sequencefeature static method)": [[4, "aaanalysis.SequenceFeature.feat_matrix"]], "feat_names() (aaanalysis.sequencefeature static method)": [[4, "aaanalysis.SequenceFeature.feat_names"]], "get_df_parts() (aaanalysis.sequencefeature static method)": [[4, "aaanalysis.SequenceFeature.get_df_parts"]], "get_features() (aaanalysis.sequencefeature method)": [[4, "aaanalysis.SequenceFeature.get_features"]], "get_split_kws() (aaanalysis.sequencefeature static method)": [[4, "aaanalysis.SequenceFeature.get_split_kws"]], "__init__() (aaanalysis.dpulearn method)": [[5, "aaanalysis.dPULearn.__init__"]], "dpulearn (class in aaanalysis)": [[5, "aaanalysis.dPULearn"]], "eval() (aaanalysis.dpulearn method)": [[5, "aaanalysis.dPULearn.eval"]], "fit() (aaanalysis.dpulearn method)": [[5, "aaanalysis.dPULearn.fit"]], "labels_ (aaanalysis.dpulearn attribute)": [[5, "aaanalysis.dPULearn.labels_"]], "load_dataset() (in module aaanalysis)": [[6, "aaanalysis.load_dataset"]], "load_scales() (in module aaanalysis)": [[7, "aaanalysis.load_scales"]], "plot_gcfs() (in module aaanalysis)": [[8, "aaanalysis.plot_gcfs"]], "plot_get_cdict() (in module aaanalysis)": [[9, "aaanalysis.plot_get_cdict"]], "plot_get_clist() (in module aaanalysis)": [[10, "aaanalysis.plot_get_clist"]], "plot_get_cmap() (in module aaanalysis)": [[11, "aaanalysis.plot_get_cmap"]], "plot_legend() (in module aaanalysis)": [[12, "aaanalysis.plot_legend"]], "plot_settings() (in module aaanalysis)": [[13, "aaanalysis.plot_settings"]]}})
\ No newline at end of file
+Search.setIndex({"docnames": ["api", "generated/aaanalysis.AAclust", "generated/aaanalysis.CPP", "generated/aaanalysis.CPPPlot", "generated/aaanalysis.SequenceFeature", "generated/aaanalysis.dPULearn", "generated/aaanalysis.load_dataset", "generated/aaanalysis.load_scales", "generated/aaanalysis.plot_gcfs", "generated/aaanalysis.plot_get_cdict", "generated/aaanalysis.plot_get_clist", "generated/aaanalysis.plot_get_cmap", "generated/aaanalysis.plot_legend", "generated/aaanalysis.plot_settings", "generated/plotting_prelude", "generated/tutorial1_quick_start", "generated/tutorial2a_data_loader", "generated/tutorial2b_scales_loader", "index", "index/CONTRIBUTING_COPY", "index/badges", "index/citations", "index/introduction", "index/overview", "index/references", "index/tables", "index/usage_principles", "index/usage_principles/aaontology", "index/usage_principles/data_flow_entry_points", "index/usage_principles/feature_identification", "index/usage_principles/pu_learning", "index/usage_principles/xai", "tutorials"], "filenames": ["api.rst", "generated/aaanalysis.AAclust.rst", "generated/aaanalysis.CPP.rst", "generated/aaanalysis.CPPPlot.rst", "generated/aaanalysis.SequenceFeature.rst", "generated/aaanalysis.dPULearn.rst", "generated/aaanalysis.load_dataset.rst", "generated/aaanalysis.load_scales.rst", "generated/aaanalysis.plot_gcfs.rst", "generated/aaanalysis.plot_get_cdict.rst", "generated/aaanalysis.plot_get_clist.rst", "generated/aaanalysis.plot_get_cmap.rst", "generated/aaanalysis.plot_legend.rst", "generated/aaanalysis.plot_settings.rst", "generated/plotting_prelude.rst", "generated/tutorial1_quick_start.rst", "generated/tutorial2a_data_loader.rst", "generated/tutorial2b_scales_loader.rst", "index.rst", "index/CONTRIBUTING_COPY.rst", "index/badges.rst", "index/citations.rst", "index/introduction.rst", "index/overview.rst", "index/references.rst", "index/tables.rst", "index/usage_principles.rst", "index/usage_principles/aaontology.rst", "index/usage_principles/data_flow_entry_points.rst", "index/usage_principles/feature_identification.rst", "index/usage_principles/pu_learning.rst", "index/usage_principles/xai.rst", "tutorials.rst"], "titles": ["API", "aaanalysis.AAclust", "aaanalysis.CPP", "aaanalysis.CPPPlot", "aaanalysis.SequenceFeature", "aaanalysis.dPULearn", "aaanalysis.load_dataset", "aaanalysis.load_scales", "aaanalysis.plot_gcfs", "aaanalysis.plot_get_cdict", "aaanalysis.plot_get_clist", "aaanalysis.plot_get_cmap", "aaanalysis.plot_legend", "aaanalysis.plot_settings", "Plotting Prelude", "Quick Start with AAanalysis", "Data Loading Tutorial", "Scale Loading Tutorial", "Welcome to the AAanalysis documentation!", "Contributing", "<no title>", "<no title>", "Introduction", "<no title>", "References", "Tables", "Usage Principles", "AAontology: Classification of amino acid scales", "Data Flow and Enry Points", "Identifying Physicochemical Signatures using CPP", "Learning from unbalanced and small data", "Explainable AI at Sequence Level", "Tutorials"], "terms": {"thi": [0, 1, 3, 7, 8, 10, 12, 13, 14, 15, 16, 17, 19, 28], "applic": [0, 3, 12], "program": [0, 19], "interfac": [0, 19, 25], "i": [0, 1, 2, 3, 4, 5, 6, 7, 11, 12, 13, 15, 16, 17, 18, 19, 22, 23, 25, 27, 29], "public": [0, 14, 16, 18, 19, 21], "object": [0, 1, 3, 4, 5, 12, 15], "function": [0, 1, 3, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 23], "our": [0, 8, 9, 11, 14, 15, 17, 19, 22], "aaanalysi": [0, 14, 16, 17, 19, 21, 22, 23, 25, 26, 29, 32], "python": [0, 15, 18, 19, 22, 23], "toolkit": [0, 19, 28], "which": [0, 3, 4, 12, 13, 15, 16, 17, 19, 22, 25, 28, 30], "can": [0, 1, 4, 5, 8, 12, 14, 15, 16, 17, 18, 19, 22, 25, 28, 30], "import": [0, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 19, 26], "aa": [0, 2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 25, 26], "you": [0, 14, 17, 18, 19, 21], "access": [0, 1, 6, 15, 17, 25], "all": [0, 1, 2, 3, 4, 6, 7, 13, 14, 15, 17, 19, 25], "method": [0, 1, 2, 3, 4, 5, 15, 24], "via": [0, 14, 19, 24], "alia": [0, 4], "load_dataset": [0, 4, 15, 16, 17, 25], "class": [1, 2, 3, 4, 5, 6, 8, 10, 11, 12, 13, 14, 16, 30], "model_class": [1, 15], "sklearn": [1, 15], "cluster": [1, 15, 18, 22, 23, 24, 25], "_kmean": 1, "kmean": 1, "model_kwarg": 1, "none": [1, 2, 3, 4, 5, 6, 7, 12, 16], "verbos": [1, 2, 3, 4, 5, 15], "fals": [1, 2, 3, 4, 5, 6, 7, 9, 11, 12, 13, 15, 17], "sourc": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 17, 19], "base": [1, 2, 3, 4, 5, 6, 12, 15, 18, 19, 22, 23, 24, 25, 29, 30], "wrapper": [1, 3, 15, 18, 19, 22, 23], "A": [1, 4, 6, 8, 10, 11, 12, 13, 14, 15, 16, 17, 19, 22, 24], "k": [1, 17, 18, 22, 23, 24], "optim": [1, 2, 3, 10, 14, 18, 19, 22, 23, 24], "select": [1, 2, 3, 6, 7, 15, 16, 17, 18, 19, 22, 23, 24], "redund": [1, 2, 7, 15, 18, 19, 22, 23, 24], "reduc": [1, 5, 7, 18, 22, 23, 24, 25], "set": [1, 2, 3, 4, 5, 7, 8, 12, 13, 14, 15, 16, 18, 19, 22, 23, 24, 25, 28], "numer": [1, 3, 4, 15, 18, 22, 23], "scale": [1, 2, 3, 4, 7, 9, 13, 18, 21, 22, 23, 24, 26, 28, 32], "us": [1, 2, 3, 5, 6, 7, 8, 12, 14, 15, 16, 17, 18, 19, 21, 22, 25, 26, 28, 30], "model": [1, 5, 15, 19, 30], "requir": [1, 19], "pre": [1, 2, 15, 16, 19], "defin": [1, 4, 7, 15, 16, 19, 25, 28], "number": [1, 2, 3, 4, 5, 6, 7, 10, 11, 12, 16, 17, 25], "n_cluster": [1, 15], "mean": [1, 2, 3, 15, 17, 25], "other": [1, 3, 7, 13, 14, 17, 19, 25], "scikit": [1, 19], "learn": [1, 5, 16, 18, 19, 21, 22, 23, 24, 25, 26], "valu": [1, 2, 3, 4, 8, 9, 10, 11, 12, 13, 14, 15, 17, 19, 22, 25], "util": [1, 13, 14, 16, 18, 19], "pearson": [1, 2], "correl": [1, 2, 25], "repres": [1, 3, 15, 16, 22, 25], "sampl": [1, 2, 3, 4, 5, 16, 25, 30], "medoid": 1, "each": [1, 2, 3, 4, 5, 15, 16, 17, 19], "closest": 1, "center": [1, 15, 25], "result": [1, 2, 19], "see": [1, 3, 19, 22, 25, 28], "breimann23a": [1, 6, 7, 24, 25], "paramet": [1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 12, 13, 15, 16, 17, 19, 25], "type": [1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 12, 16, 19, 25], "clustermixin": 1, "instanti": 1, "dure": [1, 5], "fit": [1, 5, 15, 19], "option": [1, 2, 3, 4, 5, 6, 7, 12, 13], "dict": [1, 2, 3, 4, 5, 9, 12], "keyword": [1, 3, 5], "argument": [1, 3, 4, 5, 12], "pass": [1, 3, 5, 19], "bool": [1, 2, 3, 4, 5, 6, 7, 11, 12, 13], "If": [1, 2, 3, 4, 5, 6, 7, 12, 13, 17, 18, 19, 21, 30], "true": [1, 2, 3, 4, 6, 7, 11, 12, 13, 14, 16, 17], "output": [1, 2, 4, 5, 14, 19], "ar": [1, 2, 3, 4, 5, 6, 7, 8, 12, 13, 14, 15, 16, 17, 19, 25, 28, 30, 31], "enabl": [1, 2, 3, 4, 5, 18, 19, 22, 23, 29], "The": [1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 12, 14, 15, 16, 17, 19, 25, 28, 29], "after": [1, 2, 25], "call": [1, 7, 14, 25], "obtain": [1, 4, 7, 15, 25], "int": [1, 2, 3, 4, 5, 6, 7, 10, 11, 12], "labels_": [1, 5], "label": [1, 2, 3, 4, 5, 6, 12, 14, 15, 16, 19, 25, 30], "order": [1, 19, 25], "x": [1, 3, 5, 6, 8, 9, 10, 11, 12, 13, 14, 15], "arrai": [1, 2, 4, 5, 15], "like": [1, 2, 4, 5, 13, 14, 19, 25], "shape": [1, 2, 3, 4, 5, 12, 25], "n_sampl": [1, 2, 4, 5], "centers_": 1, "averag": [1, 4, 15, 17, 25], "correspond": [1, 12, 16, 19, 25], "n_featur": [1, 2, 3, 4, 5], "center_labels_": 1, "medoids_": 1, "one": [1, 3, 10, 12, 19], "medoid_labels_": 1, "is_medoid_": 1, "indic": [1, 3, 4, 5, 16, 17, 19, 25], "being": [1, 16, 19, 25], "1": [1, 2, 3, 4, 5, 6, 7, 9, 12, 13, 14, 16, 17, 19, 25, 30], "0": [1, 2, 3, 4, 5, 6, 12, 14, 15, 16, 17, 25, 30], "same": [1, 7, 17], "medoid_names_": [1, 15], "name": [1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 13, 15, 16, 17, 25], "provid": [1, 3, 5, 6, 7, 12, 15, 16, 17, 18, 19, 23, 25, 30], "list": [1, 3, 4, 9, 10, 11, 12, 15, 25], "attribut": 1, "directli": [1, 19], "design": [1, 3, 19, 25, 29], "primarili": [1, 5, 19], "amino": [1, 2, 3, 4, 6, 7, 15, 18, 21, 22, 23, 24, 26, 28, 30], "acid": [1, 2, 3, 4, 6, 7, 15, 18, 21, 22, 23, 24, 26, 28, 30], "ani": [1, 17, 19, 22, 25], "__init__": [1, 2, 3, 4, 5], "on_cent": 1, "min_th": 1, "merge_metr": 1, "euclidean": [1, 5], "appli": [1, 5, 12, 13, 16], "algorithm": [1, 2, 3, 15, 18, 19, 22, 23, 28, 29], "featur": [1, 2, 3, 4, 5, 18, 19, 22, 23, 28, 29, 30], "matrix": [1, 4, 5, 15, 25], "determin": [1, 7], "without": [1, 3, 19, 25], "specif": [1, 16, 19, 25], "It": [1, 13, 15, 16, 22, 25, 28], "partit": [1, 17, 25], "data": [1, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 17, 18, 19, 25, 26, 32], "maxim": 1, "within": [1, 2, 4, 19, 25, 28], "beyond": 1, "threshold": [1, 2], "qualiti": 1, "either": [1, 4, 6, 7, 17, 18], "minimum": [1, 4, 6], "member": 1, "between": [1, 2, 3, 4, 7, 10, 12, 15, 16, 19, 25], "its": [1, 16, 19, 25], "min_cor_al": 1, "min_cor_cent": 1, "respect": [1, 6, 15, 18, 19, 21, 25], "describ": [1, 25], "row": [1, 16, 17], "column": [1, 2, 3, 4, 5, 6, 7, 12, 16, 17, 19], "must": [1, 4, 10, 11, 19], "float": [1, 2, 3, 5, 12, 13], "otherwis": [1, 3, 4, 5, 25], "str": [1, 3, 4, 5, 6, 7, 9, 11, 12, 13], "metric": [1, 5, 19], "similar": [1, 19, 25, 30], "measur": [1, 19, 25], "merg": 1, "No": 1, "perform": [1, 2, 5, 7, 15, 17, 25], "distanc": [1, 5, 25], "manhattan": [1, 5], "cosin": [1, 5], "return": [1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 12, 16], "instanc": [1, 3, 12], "allow": 1, "direct": [1, 19], "aanalysi": [1, 19], "consist": [1, 13, 19, 22, 25], "three": [1, 4, 16, 25], "main": [1, 25], "step": [1, 2, 3, 4, 6, 7, 19, 22], "estim": 1, "lower": [1, 25], "bound": 1, "refin": [1, 19], "recurs": [1, 24], "chosen": [1, 2, 4, 6, 7, 16], "smaller": [1, 14], "reduct": 1, "pairwise_dist": 1, "were": [1, 7, 17, 25], "runtimewarn": 1, "caught": 1, "bundl": 1, "static": [1, 4], "eval": [1, 2, 5, 19], "evalu": [1, 2, 7, 17, 19, 25], "establish": [1, 19], "quantifi": 1, "bic": 1, "bayesian": 1, "inform": [1, 2, 3, 4, 5, 17, 28], "criterion": 1, "reflect": [1, 19, 25], "good": [1, 19], "while": [1, 16], "account": [1, 19, 25], "rang": 1, "from": [1, 2, 3, 4, 5, 6, 7, 15, 16, 17, 18, 19, 25, 26], "neg": [1, 4, 5, 6, 12, 16, 19, 25, 30], "infin": 1, "posit": [1, 2, 3, 4, 5, 6, 18, 19, 22, 23, 25, 30], "higher": [1, 25], "superior": 1, "ch": [1, 17, 25], "calinski": 1, "harabasz": 1, "index": [1, 6, 17, 18, 19, 24], "ratio": 1, "dispers": 1, "score": [1, 15], "suggest": [1, 19], "better": 1, "sc": 1, "silhouett": 1, "coeffici": 1, "proxim": 1, "point": [1, 3, 8, 12, 25, 26], "neighbor": [1, 25], "li": 1, "closer": 1, "impli": 1, "wa": [1, 22], "modifi": [1, 5, 13], "align": [1, 3, 12, 15, 17, 19], "so": 1, "signifi": 1, "contrari": 1, "convent": [1, 4, 7], "implement": [1, 19], "favor": 1, "calinski_harabasz_scor": 1, "silhouette_scor": 1, "name_clust": 1, "shorten_nam": 1, "assign": [1, 3, 4, 5, 17, 25], "frequenc": [1, 25], "priorit": 1, "alreadi": [1, 30], "contain": [1, 2, 3, 5, 6, 7, 17, 19, 25, 28, 30], "unclassifi": [1, 7, 17, 25], "shorten": 1, "version": [1, 17, 25], "cluster_nam": 1, "renam": 1, "comp_cent": 1, "comput": [1, 2, 3, 4, 15, 19, 24, 25], "given": [1, 3, 4, 6, 10, 11, 12, 15, 17, 19, 25], "center_label": 1, "associ": [1, 25], "comp_medoid": 1, "medoid_label": 1, "comp_correl": 1, "x_ref": 1, "labels_ref": 1, "names_ref": 1, "refer": [1, 2, 4, 6, 15, 19, 25], "compar": [1, 16, 18, 22, 23, 25, 28, 29], "n_samples_ref": 1, "df_corr": 1, "datafram": [1, 2, 3, 4, 5, 6, 7, 15, 19, 28], "pair": 1, "pd": [1, 4, 5, 15, 19], "sort": 1, "ascend": 1, "replac": [1, 6], "panda": [1, 2, 3, 4, 5, 6, 7, 15, 19], "corr": 1, "comp_coverag": 1, "percentag": [1, 2, 5, 17], "uniqu": [1, 2, 3, 17, 19], "present": [1, 4, 6], "help": 1, "understand": 1, "coverag": [1, 19], "particular": 1, "subset": [1, 4, 7, 25], "univers": 1, "both": [1, 3, 13, 16], "consid": [1, 7, 19], "onli": [1, 3, 6, 7, 12, 13, 16, 19, 25, 30], "onc": [1, 19], "regardless": 1, "repetit": 1, "should": [1, 2, 3, 4, 5, 19, 30], "superset": 1, "found": [1, 4, 19], "df_scale": [2, 4, 7, 15, 17, 28], "df_cat": [2, 3, 4, 7, 17, 28], "df_part": [2, 4, 15, 28], "split_kw": [2, 4, 15, 28], "accept_gap": [2, 3, 4], "tool": [2, 19, 24], "creat": [2, 3, 4, 5, 13, 14, 15, 19, 28], "filter": [2, 3, 6, 15, 16], "most": [2, 3, 5, 12, 15, 18, 22, 23], "discrimin": [2, 3, 15], "two": [2, 3, 7, 8, 15, 17, 18, 19, 22, 23, 24, 25, 27, 28], "sequenc": [2, 3, 4, 5, 6, 16, 18, 19, 22, 23, 24, 25, 26, 28, 29, 30], "default": [2, 3, 4, 5, 6, 8, 9, 12, 13, 14, 15, 16, 17], "load_categori": [2, 4], "categori": [2, 3, 4, 7, 9, 10, 12, 16, 17], "physicochem": [2, 4, 18, 22, 23, 24, 25, 26, 28], "part": [2, 3, 4, 15, 19, 28], "sequencefeatur": [2, 15], "get_split_kw": [2, 4, 15], "nest": [2, 4], "dictionari": [2, 3, 4, 9, 12], "split_typ": [2, 4, 15], "whether": [2, 3, 4, 11, 12], "accept": [2, 3, 4], "miss": [2, 3, 4], "omit": [2, 3, 4], "print": [2, 3, 4, 15], "progress": [2, 3, 24], "about": [2, 3], "run": [2, 4, 15], "parametr": 2, "n_filter": 2, "100": [2, 6, 15, 16], "tmd_len": [2, 3, 4], "20": [2, 3, 4, 7, 16, 17, 19, 25], "jmd_n_len": [2, 3, 4], "10": [2, 3, 4, 10, 12, 16, 17, 19, 25], "jmd_c_len": [2, 3, 4], "ext_len": [2, 3, 4], "4": [2, 3, 4, 16, 17, 25], "start": [2, 3, 4, 6, 19, 25, 26, 28], "check_cat": 2, "n_pre_filt": 2, "pct_pre_filt": 2, "5": [2, 3, 4, 5, 14, 15, 16, 17, 19, 25], "max_std_test": 2, "2": [2, 3, 4, 5, 8, 10, 12, 14, 16, 17, 19, 25, 30], "max_overlap": 2, "max_cor": 2, "n_process": 2, "pipelin": [2, 19], "creation": 2, "aim": [2, 3, 15, 19], "identifi": [2, 3, 5, 6, 15, 16, 18, 22, 23, 24, 26, 30], "collect": [2, 7], "non": [2, 4, 6, 15, 25], "test": [2, 15, 17], "group": [2, 3, 4, 12, 14, 25], "t": [2, 6, 15, 17, 25], "u": [2, 14, 18, 19], "p": [2, 17, 24], "length": [2, 3, 4, 6, 12, 16, 25], "tmd": [2, 3, 4, 6, 15, 16], "todo": [2, 19], "add": [2, 3, 4, 19], "link": [2, 18, 19, 21, 24], "explan": [2, 3, 19], "first": [2, 3, 4, 7, 14, 19], "n": [2, 3, 4, 6, 7, 15, 16, 17, 19, 24, 25], "terminu": [2, 3, 4, 25], "jmd": [2, 3, 4, 15], "c": [2, 3, 4, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 24, 25], "extend": [2, 3, 4, 19, 25, 30], "termin": [2, 3, 4, 15, 16, 25], "longer": 2, "than": [2, 25], "check": [2, 19], "remain": [2, 17, 19], "maximum": [2, 4, 5, 6, 15], "standard": [2, 30], "deviat": 2, "overlap": 2, "cpu": 2, "multiprocess": 2, "automat": [2, 3, 5, 12, 19], "df_feat": [2, 3, 4, 15, 28], "statist": [2, 3], "n_feature_inform": [2, 3], "follow": [2, 4, 5, 7, 18, 19, 21, 22, 23, 26], "eleven": 2, "includ": [2, 4, 6, 7, 12, 19], "id": [2, 4, 6, 7, 17], "rank": [2, 17], "11": [2, 3, 16, 25], "split": [2, 4, 15, 28], "subcategori": [2, 3, 7, 17], "sub": 2, "scale_nam": [2, 3, 7, 17], "abs_auc": [2, 3], "absolut": [2, 19], "adjust": [2, 3, 12, 13, 14], "auc": 2, "abs_mean_dif": 2, "differ": [2, 3, 4, 10, 16, 17, 28], "std_test": [2, 3], "std_ref": 2, "p_val": 2, "mann_whitnei": 2, "ttest_indep": 2, "p_val_fdr_bh": 2, "benjamini": 2, "hochberg": 2, "fdr": 2, "correct": 2, "gener": [2, 3, 4, 6, 10, 11, 13, 19, 22, 24, 25, 30], "condit": [3, 4], "jmd_m_len": [3, 4], "profil": [3, 18, 22, 23, 29], "y": [3, 8, 9, 10, 11, 12, 13, 14, 15, 17], "val_col": 3, "mean_dif": 3, "val_typ": 3, "count": [3, 16], "normal": [3, 7, 12, 17, 19, 25], "figsiz": 3, "7": [3, 4, 5, 14, 16, 17, 25], "titl": [3, 8, 12, 13, 14, 15], "title_kw": 3, "dict_color": [3, 9, 12, 14], "edge_color": 3, "bar_width": 3, "75": 3, "add_jmd_tmd": 3, "jmd_n_seq": 3, "tmd_seq": 3, "jmd_c_seq": 3, "tmd_color": 3, "mediumspringgreen": 3, "jmd_color": 3, "blue": [3, 15], "tmd_seq_color": 3, "black": [3, 11, 12, 14, 19], "jmd_seq_color": 3, "white": [3, 11, 12], "seq_siz": 3, "tmd_jmd_fontsiz": 3, "xtick_siz": 3, "xtick_width": 3, "xtick_length": 3, "xticks_po": 3, "ytick_siz": 3, "ytick_width": 3, "ytick_length": 3, "ylim": [3, 15], "highlight_tmd_area": 3, "highlight_alpha": 3, "15": [3, 4, 16, 25], "grid": [3, 13, 14], "grid_axi": [3, 13, 14], "add_legend_cat": 3, "legend_kw": 3, "shap_plot": 3, "kwarg": [3, 4, 12], "plot": [3, 8, 9, 10, 11, 12, 13, 16, 18, 19, 25, 32], "avail": [3, 7, 12, 15, 17, 18, 21, 24], "specifi": [3, 4, 5, 9, 11, 15, 19], "check_value_typ": 3, "tupl": [3, 11], "size": [3, 4, 8, 12, 13, 14, 15, 25], "custom": [3, 7, 14, 19], "appear": [3, 25], "map": [3, 4, 11, 12], "color": [3, 8, 9, 10, 11, 12, 13, 14], "edg": [3, 12, 19, 25], "bar": 3, "width": [3, 12], "line": [3, 12, 13, 14, 19], "annot": 3, "font": [3, 8, 12, 13], "tick": [3, 13, 14], "axi": [3, 13, 17], "limit": [3, 19], "highlight": 3, "area": [3, 17, 25], "alpha": 3, "ad": 3, "drawn": 3, "legend": [3, 12, 13, 14], "shap": [3, 8, 11, 15, 19], "shaplei": 3, "addit": [3, 4, 5, 7, 13, 17, 19, 25], "intern": [3, 19, 25], "librari": [3, 13, 19], "ax": [3, 9, 12, 13], "matplotlib": [3, 8, 9, 10, 11, 12, 13, 14, 15, 16, 19], "heatmap": 3, "8": [3, 4, 5, 12, 16, 17, 19, 25], "vmin": 3, "vmax": 3, "grid_on": 3, "cmap": 3, "rdbu_r": 3, "cmap_n_color": 3, "cbar_kw": 3, "facecolor_dark": [3, 11], "add_importance_map": 3, "cbar_pct": 3, "featuremap": 3, "versu": 3, "seaborn": [3, 8, 9, 10, 11, 12, 13, 14, 15, 16, 19], "level": [3, 6, 7, 16, 17, 18, 19, 23, 25, 26, 27], "e": [3, 4, 14, 15, 17, 18, 19, 22, 23, 25, 30], "g": [3, 4, 17, 18, 19, 22, 23, 25, 30], "protein": [3, 4, 6, 17, 18, 19, 22, 23, 24, 28, 29, 30], "shown": 3, "feat_impact": 3, "displai": [3, 13], "sum": [3, 17, 25], "std": 3, "aggreg": 3, "positions_onli": 3, "further": [3, 17, 19, 25], "across": [3, 13, 17, 19], "recommend": [3, 5, 7, 19], "when": [3, 5, 12, 19, 25], "emphas": [3, 19], "fewer": 3, "value_typ": 3, "height": 3, "figur": 3, "inch": 3, "pyplot": [3, 8, 9, 10, 11, 12, 13, 14, 15, 16], "anchor": [3, 12, 25], "colormap": 3, "infer": [3, 19], "seismic": 3, "space": [3, 5, 12, 19], "impact": 3, "discret": 3, "diverg": 3, "sequenti": 3, "classifi": 3, "kei": [3, 9, 12, 19, 25], "colorbar": 3, "under": [3, 7, 19], "depicet": 3, "depict": 3, "jmd_n": [3, 4, 6, 16], "jmd_c": [3, 4, 6, 16], "set_xticklabel": 3, "widht": 3, "tick_param": 3, "classif": [3, 6, 7, 15, 16, 17, 18, 23, 25, 26, 30], "pcolormesh": 3, "effect": [3, 19, 25, 30], "document": [3, 25], "more": [3, 12, 13, 15, 19], "detail": [3, 6, 7, 17, 18, 19, 21], "cpp": [3, 4, 8, 11, 18, 21, 22, 23, 26, 28], "code": [3, 8, 9, 10, 11, 12, 13, 14], "update_seq_s": 3, "retriev": [4, 15], "compon": [4, 5, 7, 17, 25], "continu": [4, 11, 15, 19], "domain": [4, 6, 15, 16, 25], "transmembran": [4, 25], "membran": [4, 25], "principl": [4, 18], "distinct": [4, 18, 19, 22, 23, 25], "segment": [4, 15, 28], "pattern": [4, 12, 15], "properti": [4, 12, 19, 25], "express": 4, "realiz": 4, "For": [4, 6, 16, 19, 30], "over": [4, 15], "valid": [4, 19], "tmd_e": 4, "tmd_n": 4, "tmd_c": 4, "ext_c": 4, "ext_n": 4, "tmd_jmd": [4, 15], "jmd_n_tmd_n": 4, "tmd_c_jmd_c": 4, "ext_n_tmd_n": 4, "tmd_c_ext_c": 4, "get_df_part": [4, 15], "df_seq": [4, 5, 6, 15, 16, 28], "list_part": [4, 15], "all_part": 4, "datafran": 4, "compris": [4, 12, 17], "tmd_start": [4, 6, 16], "tmd_stop": [4, 6, 16], "string": 4, "len": [4, 9, 16], "lenght": 4, "resp": [4, 25], "extra": [4, 14, 25], "possibl": [4, 16, 25, 30], "get": [4, 8, 12, 14, 26], "sf": [4, 15], "dom_gsec": [4, 15, 16, 25], "n_split_min": 4, "n_split_max": [4, 15], "steps_pattern": 4, "n_min": 4, "n_max": 4, "len_max": 4, "steps_periodicpattern": 4, "periodicpattern": 4, "greater": 4, "greatest": 4, "whole": [4, 6, 17], "specfii": 4, "smallest": [4, 25], "integ": 4, "3": [4, 5, 10, 11, 12, 16, 17, 19, 25], "6": [4, 16, 17, 25], "vari": [4, 16], "paramt": 4, "argumetn": 4, "get_featur": 4, "load_scal": [4, 15, 17, 18, 23, 25], "combin": [4, 15, 19, 25], "form": [4, 25], "feat_matrix": [4, 15], "n_job": [4, 15], "return_label": 4, "seri": 4, "job": 4, "parallel": [4, 25], "spars": 4, "feat_nam": 4, "convert": 4, "depend": [4, 25], "last": 4, "step1": 4, "step2": 4, "add_feat_valu": 4, "dict_scal": 4, "letter": 4, "feature_valu": 4, "n_part": 4, "ha": [4, 19, 25], "where": [4, 5, 13, 25], "structur": [4, 24, 25], "th": [4, 7, 17], "n_split": 4, "p1": 4, "p2": 4, "pn": 4, "end": [4, 19, 25], "odd": [4, 16], "even": 4, "give": 4, "add_dif": 4, "sample_nam": 4, "ref_group": 4, "add_posit": 4, "part_split": 4, "feat_posit": 4, "total": [4, 5, 17, 19, 25], "n_compon": 5, "pca_kwarg": 5, "determinist": [5, 18, 22, 23], "unlabel": [5, 18, 22, 23, 25, 30], "offer": [5, 16, 19], "approach": [5, 15, 16, 19, 30], "pu": [5, 18, 22, 23, 25], "emploi": 5, "princip": [5, 7, 17, 25], "analysi": [5, 7, 15, 17, 18, 19, 22, 23, 25], "pca": [5, 17], "dimension": [5, 24], "pc": [5, 7, 25], "iter": 5, "reliabl": [5, 16, 19], "These": [5, 7, 14, 15, 17, 19, 30], "those": [5, 25], "distant": 5, "altern": [5, 30], "also": [5, 16, 17, 19, 25], "80": 5, "cover": 5, "varianc": 5, "identif": [5, 24], "datapoint": 5, "inspir": [5, 19], "techniqu": [5, 30], "an": [5, 6, 7, 12, 14, 15, 16, 17, 18, 19, 21, 24, 25], "theoret": [5, 25], "high": [5, 24, 25], "n_neg": 5, "label_po": 5, "name_neg": 5, "rel_neg": 5, "col_class": 5, "newli": 5, "updat": [5, 19], "new": [5, 19], "store": 5, "Will": 5, "initi": [5, 25], "small": [5, 15, 16, 18, 19, 22, 23, 26, 31], "datafor": 5, "conta": 5, "po": 5, "unl": 5, "numpi": [5, 15, 19], "np": [5, 15], "atgc": 5, "gcta": 5, "actg": 5, "tacg": 5, "mode": 5, "dpul": 5, "info": 6, "random": [6, 16, 25], "non_canonical_aa": 6, "remov": [6, 13, 14], "min_len": [6, 16], "max_len": [6, 16], "aa_window_s": [6, 16], "9": [6, 10, 14, 15, 16, 17, 19, 25], "load": [6, 7, 18, 19, 23, 32], "benchmark": [6, 15, 17, 18, 23], "dataset": [6, 7, 15, 17, 18, 19, 22, 23, 30, 31], "categor": [6, 14, 16], "dom": [6, 16, 25], "seq": [6, 16, 25], "By": 6, "overview": [6, 7, 16, 19], "tabl": [6, 7, 16, 19], "depth": [6, 7, 17, 18, 23], "per": [6, 16, 25], "randomli": [6, 16], "liter": 6, "keep": 6, "gap": 6, "handl": [6, 12], "canon": [6, 17], "don": 6, "symbol": 6, "disabl": [6, 17], "window": [6, 25], "aa_": 6, "df_info": [6, 16], "entri": [6, 16, 17], "uniprot": 6, "binari": [6, 15, 16, 30], "stop": 6, "seq_amylo": [6, 16, 17, 25], "guid": [6, 7], "tutori": [6, 7, 15, 18, 19, 22], "just_aaindex": [7, 17], "unclassified_out": 7, "top60_n": [7, 17], "aaontologi": [7, 15, 18, 21, 23, 24, 26], "scales_raw": [7, 17, 25], "encompass": [7, 25], "aaindex": [7, 15, 17, 24], "kawashima08": [7, 24, 25], "along": [7, 15], "min": [7, 17, 25], "max": [7, 17, 25], "organ": [7, 19], "scales_cat": [7, 17, 25], "breimann23b": [7, 18, 21, 24, 25], "compress": [7, 17, 25], "scales_pc": [7, 17, 25], "aaclust": [7, 17, 18, 21, 22, 23, 24, 25], "top": [7, 14, 25], "60": [7, 17, 25], "top60": [7, 17, 25], "individu": [7, 19], "accompani": 7, "top60_ev": [7, 17, 25], "normliz": 7, "raw": [7, 17, 25], "best": [7, 17], "Or": [7, 16], "relev": 7, "exclus": 7, "suffix": [7, 16, 19], "scale_id": [7, 17], "deriv": 7, "descript": [7, 17, 19, 25], "scale_descript": [7, 17], "current": [8, 12], "plot_set": [8, 9, 10, 11, 12, 14, 15, 16], "here": [8, 16, 19, 25], "plt": [8, 9, 10, 11, 12, 13, 14, 15, 16], "sn": [8, 9, 10, 11, 12, 13, 14, 15, 16], "b": [8, 10, 11, 12, 13, 14, 25], "23": [8, 10, 11, 12, 13, 14, 25], "27": [8, 12, 13, 14], "43": [8, 12, 13, 14], "plot_get_clist": [8, 12, 13, 14], "barplot": [8, 9, 10, 11, 12, 13, 14, 15], "palett": [8, 9, 10, 11, 12, 13, 14, 15], "despin": [8, 9, 12, 13, 14, 15, 16], "bigger": 8, "tight_layout": [8, 9, 12, 13, 14], "show": [8, 9, 10, 11, 12, 13, 14, 15, 16, 17], "png": [8, 9, 10, 11, 12, 13], "hire": [8, 9, 10, 11, 12, 13], "pdf": [8, 9, 10, 11, 12, 13], "prelud": [8, 9, 10, 11, 12, 13, 32], "dict_cat": 9, "weight_bold": [9, 13], "xaxi": 9, "set_vis": 9, "n_color": [10, 11, 14], "fuction": 10, "eight": 10, "colorl": 10, "appeal": [10, 14], "visual": [10, 13, 14, 19], "33": [10, 11], "notebook": 10, "color_palett": [10, 11], "101": 11, "shp": 11, "least": [11, 12, 19], "central": [11, 29], "rgb": 11, "14": [11, 14, 25], "light_palett": 11, "lighter": 11, "packag": [11, 19], "list_cat": 12, "loc": [12, 17], "upper": 12, "left": [12, 25], "loc_out": 12, "ncol": [12, 14], "labelspac": 12, "columnspac": 12, "handletextpad": 12, "handlelength": 12, "fontsiz": [12, 14], "fontsize_titl": 12, "weight": [12, 24, 25], "fontsize_weight": 12, "marker": 12, "marker_s": 12, "lw": 12, "linestyl": 12, "edgecolor": 12, "hatch": [12, 14], "hatchcolor": 12, "title_align_left": 12, "independntli": 12, "customiz": 12, "flexbili": 12, "convini": 12, "func": 12, "attach": 12, "item": 12, "locat": [12, 25], "25": 12, "thei": [12, 15, 16, 19], "union": 12, "coordin": 12, "": [12, 16, 17, 19, 24, 25], "vertic": 12, "horizont": 12, "bewtween": 12, "text": [12, 13], "visiabl": 12, "corner": 12, "round": [12, 15], "style": [12, 13], "Not": 12, "fill": [12, 19], "furhter": 12, "word": 12, "line2d": 12, "core": 12, "gca": 12, "font_scal": [13, 16], "arial": 13, "adjust_only_font": 13, "adjust_further_el": 13, "no_tick": 13, "short_tick": 13, "no_ticks_x": [13, 14], "short_ticks_x": 13, "no_ticks_i": 13, "short_ticks_i": [13, 14], "show_opt": 13, "configur": 13, "global": 13, "embed": 13, "vector": [13, 25], "format": [13, 25], "svg": 13, "ensur": [13, 16, 19], "compat": 13, "edit": 13, "variou": [13, 15, 19, 25, 28], "viewer": 13, "softwar": [13, 19], "factor": [13, 25], "element": [13, 14], "set_context": 13, "common": [13, 19], "verdana": 13, "helvetica": 13, "dejavu": 13, "san": 13, "bold": 13, "leav": [13, 19], "unchang": 13, "make": [13, 14, 15, 16, 19], "layout": 13, "errorbar": 13, "choos": 13, "mark": 13, "short": 13, "ignor": [13, 16, 19], "runtim": 13, "polt": 13, "rcparam": 13, "manag": 13, "some": [14, 25], "readi": [14, 16], "view": [14, 19, 30], "let": 14, "right": [14, 25], "spine": 14, "look": 14, "just": 14, "easili": [14, 15, 16, 19], "comparison": [14, 15], "d": [14, 17], "increas": [14, 25], "match": [14, 24], "independ": 14, "plot_gcf": [14, 15], "plot_set_legend": 14, "dive": 15, "power": 15, "capabl": [15, 25], "framework": [15, 18, 22, 23], "dedic": 15, "free": [15, 17, 25], "In": [15, 16, 19, 30], "gamma": [15, 25], "secretas": [15, 24, 25], "substrat": [15, 24, 25], "exampl": [15, 16, 19, 22, 30], "we": [15, 16, 19], "ll": 15, "focu": [15, 19], "extract": 15, "interpret": [15, 18, 19, 21, 22, 23, 24, 25, 29], "how": 15, "har": 15, "task": [15, 19, 30], "essenti": [15, 16, 19], "randomforest": 15, "With": 15, "have": [15, 16, 17, 19, 25, 30], "\u03b3": [15, 24], "hand": [15, 25], "effortlessli": 15, "furthermor": 15, "predominantli": 15, "hierarch": 15, "known": 15, "your": [15, 18, 19, 21], "fingertip": 15, "now": 15, "50": [15, 16], "centerpiec": 15, "support": [15, 19, 25], "sinc": 15, "problem": 15, "machin": [15, 18, 19, 21, 24, 30], "lightweight": 15, "agglom": 15, "close": [15, 19], "agglomerativeclust": 15, "aac": 15, "integr": [15, 19, 24], "target": [15, 19], "middl": [15, 25], "adjac": [15, 25], "region": [15, 24, 25], "discontinu": 15, "togeth": [15, 28], "input": [15, 19, 28], "characterist": [15, 25], "As": 15, "baselin": 15, "entir": [15, 19], "ensembl": 15, "randomforestclassifi": 15, "model_select": 15, "cross_val_scor": 15, "rf": 15, "cv_base": 15, "accuraci": [15, 17, 24], "f": [15, 17], "58": [15, 25], "take": 15, "littl": 15, "time": 15, "improv": [15, 19, 24], "around": 15, "000": [15, 17], "cv": 15, "tab": 15, "red": 15, "ylabel": 15, "iloc": 16, "head": [16, 17], "13": [16, 25], "predictor": [16, 25], "aa_caspase3": [16, 25], "233": [16, 17, 25], "185605": [16, 25], "705": [16, 17, 25], "184900": [16, 25], "prosper": [16, 24, 25], "aa_furin": [16, 25], "71": [16, 25], "59003": [16, 25], "163": [16, 17, 25], "58840": [16, 25], "aa_ldr": [16, 25], "342": [16, 25], "118248": [16, 25], "35469": [16, 25], "82779": [16, 25], "idp": [16, 24, 25], "seq2seq": [16, 24, 25], "aa_mmp2": [16, 25], "573": [16, 25], "312976": [16, 25], "2416": [16, 25], "310560": [16, 25], "aa_rnabind": [16, 25], "221": [16, 17, 25], "55001": [16, 25], "6492": [16, 25], "48509": [16, 25], "gmksvm": [16, 25], "ru": [16, 25], "aa_sa": [16, 25], "101082": [16, 25], "84523": [16, 25], "1414": [16, 25], "8484": [16, 25], "511": [16, 25], "903": [16, 17, 25], "rerf": [16, 24, 25], "pred": [16, 24, 25], "seq_capsid": [16, 17, 25], "7935": [16, 25], "3364680": [16, 25], "3864": [16, 25], "4071": [16, 25], "viralpro": [16, 24, 25], "seq_disulfid": [16, 17, 25], "2547": [16, 25], "614470": [16, 25], "897": [16, 25], "1650": [16, 25], "dipro": [16, 25], "seq_loc": [16, 17, 25], "1835": [16, 25], "732398": [16, 25], "1045": [16, 25], "790": [16, 17, 25], "nan": [16, 25], "seq_solubl": [16, 17, 25], "17408": [16, 25], "4432269": [16, 25], "8704": [16, 25], "solpro": [16, 24, 25], "seq_tail": [16, 17, 25], "6668": [16, 25], "2671690": [16, 25], "2574": [16, 25], "4094": [16, 25], "12": [16, 25], "126": [16, 25], "92964": [16, 25], "63": [16, 25], "prefix": 16, "exemplifi": 16, "df_seq1": 16, "df_seq2": 16, "df_seq3": 16, "capsid_1": 16, "mvthnvkinkhvtrrsyssakevleippltevqtasykwfmdkgik": 16, "capsid_2": 16, "mkkrqkkmtlsnftdtsfqdfvsaeqvddksamalinraedfkagq": 16, "balanc": 16, "200": [16, 17], "value_count": 16, "dtype": 16, "int64": 16, "distribut": 16, "warn": 16, "simplefilt": 16, "action": 16, "futurewarn": 16, "list_seq_len": 16, "histplot": 16, "binwidth": 16, "xlim": 16, "1500": 16, "800": [16, 17], "residu": [16, 17, 24, 25], "seen": 16, "caspase3_1": 16, "mslfdlfrgffgfpgprshrdpffggmtrdedddeeeeeeggswgr": 16, "caspase3_2": 16, "mevtgdagvpesgeirtlkpcllrrnysreqhgvaascledlrska": 16, "caspase3_3": 16, "mrarsgargalllalllcwdptpslagidsggqalpdsfpsapaeq": 16, "caspase3_4": 16, "mdakarncllqhrealekdiktsyimdhmisdgfltiseeekvrn": 16, "conveni": 16, "flank": 16, "side": [16, 17, 25], "equal": 16, "popular": [16, 30], "caspase3_1_pos4": 16, "mslfdlfrg": 16, "caspase3_1_pos5": 16, "slfdlfrgf": 16, "caspase3_1_pos6": 16, "lfdlfrgff": 16, "caspase3_1_pos7": 16, "fdlfrgffg": 16, "21": [16, 25], "caspase3_55_pos170": 16, "kkrkleeeedgklkkpknkdk": 16, "caspase3_29_pos185": 16, "cphhercsdsdglappqhlir": 16, "caspase3_64_pos431": 16, "dnplnwpdekdssfyrnfgst": 16, "caspase3_93_pos455": 16, "fvknmnrdstfivnktitaev": 16, "caspase3_38_pos129": 16, "ssfdldydfqrdyydrmysyp": 16, "caspase3_8_pos33": 16, "rppqlrpgaptslqtepqgnp": 16, "typic": [16, 22, 25], "But": 16, "mani": 16, "face": 16, "challeng": [16, 19], "might": [16, 25], "unbalanc": [16, 18, 19, 22, 23, 26, 31], "lack": 16, "clear": [16, 19], "scenario": 16, "denot": [16, 25], "_pu": [16, 25], "dom_gsec_pu": [16, 25], "p05067": 16, "mlpglallllaawtaralevptdgnagllaepqiamfcgrlnmhmn": 16, "701": [16, 17], "723": [16, 17], "faedvgsnkg": 16, "aiiglmvggvviatvivitlvml": 16, "kkkqytsihh": 16, "p14925": 16, "magrarsgllllllgllalqssclafrsplsvfkrfkettrsfsn": 16, "868": [16, 17], "890": 16, "klstepgsgv": 16, "svvlittllvipvlvllaivmfi": 16, "rwkksrafgd": 16, "p70180": 16, "mrslllftfsacvllarvllaggassgagdtrpgsrrrarealaaq": 16, "477": 16, "499": 16, "pckssgglee": 16, "savtgivvgallgagllmafyff": 16, "rkkyriti": 16, "q03157": 16, "mgptspaargqgrrwrppplplllplsllllraqlavgnlavgsp": 16, "585": [16, 17], "607": [16, 17], "apsgtgvsr": 16, "alsgllimgagggslivlslll": 16, "rkkkpygti": 16, "q06481": 16, "maatgtaaaaatgrllllllvgltapalalagyiealaanagtgfa": 16, "694": [16, 17, 25], "716": [16, 17], "lredfslsss": 16, "aligllviavaiatvivislvml": 16, "rkrqygtish": 16, "121": 16, "p36941": 16, "mllpwatsapglawgplvlglfgllaasqpqavppyasenqtcrdq": 16, "226": [16, 17], "248": [16, 17], "plppemsgtm": 16, "lmlavllplafflllatvfsciw": 16, "kshpslcrkl": 16, "122": 16, "p25446": 16, "mlwiwavlplvlagsqlrvhtqgtnsiseslklrrrvretdkncs": 16, "170": [16, 17], "187": 16, "ncrkqsprnr": 16, "lwlltilvlliplvfiyr": 16, "kyrkrkcwkr": 16, "123": 16, "q9p2j2": 16, "mvwclglavlslvisqgadgrgkpevvsvvgragesvvlgcdllpp": 16, "738": [16, 17], "760": [16, 17], "pgllpqpvla": 16, "gvvggvcflgvavlvsilagcl": 16, "nrrraarrrr": 16, "124": 16, "q96j42": 16, "mvpaagrrpprvmrllgwwqvllwvlglpvrgvevaeesgrlwse": 16, "324": [16, 17], "lpstliksvd": 16, "wllvfslfflisfimyati": 16, "rtesirwlip": 16, "125": 16, "p0dpa2": 16, "mrvggafhlllvclspallsavringdgqevlylaegdnvrlgcpi": 16, "265": 16, "287": 16, "kvsdsrrigv": 16, "iigivlgsllalgclavgiwglv": 16, "ccccggsgag": 16, "df_seq_pu": 16, "689": [16, 17], "p60852": 16, "maggsattwgypvallllvatlglgrwlqpdpglpglrhsydcgik": 16, "602": [16, 17], "624": [16, 17], "dsngnsslrp": 16, "llwavlllpavalvlgfgvfvgl": 16, "sqtwaqklw": 16, "690": [16, 17], "p20239": 16, "marwqrkasvsspcgrsiyrflsllftlvtsvnsvslpqsenpafp": 16, "684": [16, 17], "703": [16, 17], "iiakdiaskt": 16, "lgavaalvgsavilgficyl": 16, "ykkrtirfnh": 16, "691": [16, 17], "p21754": 16, "melsyrlficlllwgstelcypqplwllqggashpetsvqpvlvec": 16, "387": [16, 17], "409": 16, "eqwalpsdt": 16, "vvllgvglavvvsltltavilvl": 16, "trrcrtashp": 16, "692": [16, 17], "q12836": 16, "mwllrcvllcvslslavsgqhkpeapdyssvlhcgpwsfqfavnln": 16, "506": [16, 17], "528": 16, "eklrvpvdsk": 16, "vlwvaglsgtlilgallvsylav": 16, "kkqkscpdqm": 16, "693": [16, 17], "q8tcw7": 16, "meqiwllllltirvlpgsaqfngyncdanlhsrfpaerdisvycgv": 16, "374": 16, "396": [16, 17], "pfqlnaitsa": 16, "lisgmvilgvtsfslllcslal": 16, "hrkgptslvl": 16, "six": 17, "origin": 17, "df_raw": 17, "df_pc": 17, "andn920101": 17, "argp820101": 17, "argp820102": 17, "argp820103": 17, "begf750101": 17, "begf750102": 17, "begf750103": 17, "bhar880101": 17, "bigc670101": 17, "biov880101": 17, "koeh090103": 17, "koeh090104": 17, "koeh090105": 17, "koeh090106": 17, "koeh090107": 17, "koeh090108": 17, "koeh090109": 17, "koeh090110": 17, "koeh090111": 17, "koeh090112": 17, "494": 17, "230": 17, "355": 17, "504": 17, "512": 17, "249": 17, "164": 17, "476": 17, "194": 17, "300": 17, "551": 17, "222": 17, "308": 17, "273": 17, "140": 17, "522": 17, "345": 17, "864": 17, "404": 17, "579": 17, "783": 17, "205": 17, "323": 17, "936": 17, "279": 17, "174": 17, "449": 17, "346": 17, "285": 17, "416": 17, "867": 17, "191": 17, "583": 17, "889": 17, "720": 17, "556": 17, "875": 17, "919": 17, "796": 17, "440": 17, "420": 17, "177": 17, "019": 17, "032": 17, "713": 17, "267": 17, "811": 17, "488": 17, "106": 17, "542": 17, "732": 17, "593": 17, "718": 17, "857": 17, "853": 17, "913": 17, "681": 17, "877": 17, "762": 17, "601": 17, "670": 17, "574": 17, "076": 17, "049": 17, "189": 17, "148": 17, "182": 17, "029": 17, "186": 17, "017": 17, "025": 17, "026": 17, "138": 17, "309": 17, "388": 17, "544": 17, "608": 17, "538": 17, "571": 17, "481": 17, "112": 17, "h": 17, "840": 17, "082": 17, "053": 17, "651": 17, "633": 17, "561": 17, "455": 17, "856": 17, "402": 17, "370": 17, "500": 17, "545": 17, "618": 17, "726": 17, "838": 17, "543": 17, "671": 17, "663": 17, "885": 17, "246": 17, "074": 17, "167": 17, "091": 17, "051": 17, "398": 17, "276": 17, "434": 17, "003": 17, "004": 17, "687": 17, "737": 17, "933": 17, "873": 17, "779": 17, "734": 17, "405": 17, "l": 17, "272": 17, "577": 17, "989": 17, "281": 17, "078": 17, "118": 17, "333": 17, "259": 17, "m": 17, "704": 17, "445": 17, "824": 17, "450": 17, "620": 17, "803": 17, "289": 17, "132": 17, "185": 17, "192": 17, "180": [17, 25], "419": 17, "224": [17, 25], "988": 17, "023": 17, "057": 17, "046": 17, "675": 17, "203": 17, "552": 17, "645": 17, "519": 17, "756": 17, "753": 17, "706": 17, "599": 17, "587": 17, "293": 17, "605": 17, "736": 17, "223": 17, "220": 17, "859": 17, "376": 17, "367": 17, "322": 17, "678": 17, "707": 17, "444": 17, "662": 17, "570": 17, "594": 17, "q": 17, "211": 17, "131": 17, "395": 17, "795": 17, "539": 17, "206": 17, "676": 17, "733": 17, "628": 17, "483": 17, "r": [17, 25], "531": 17, "047": 17, "110": 17, "489": 17, "940": 17, "735": 17, "215": 17, "852": 17, "883": 17, "743": 17, "362": 17, "679": 17, "238": 17, "851": 17, "188": 17, "399": 17, "589": 17, "655": 17, "590": 17, "382": 17, "384": 17, "379": 17, "598": 17, "352": 17, "312": 17, "366": 17, "578": 17, "407": 17, "364": 17, "331": 17, "250": 17, "514": 17, "v": [17, 25], "498": 17, "809": 17, "365": 17, "492": 17, "077": 17, "033": 17, "111": [17, 25], "156": 17, "154": 17, "496": 17, "w": 17, "926": 17, "040": 17, "146": 17, "600": 17, "400": 17, "104": 17, "316": 17, "244": 17, "802": 17, "709": 17, "107": 17, "502": 17, "806": 17, "588": 17, "286": 17, "644": 17, "474": 17, "410": 17, "429": 17, "413": 17, "235": 17, "336": 17, "586": [17, 25], "term": [17, 25], "lins030110": 17, "asa": [17, 25], "volum": [17, 25], "surfac": [17, 25], "fold": [17, 25], "coil": [17, 25], "turn": [17, 25], "median": 17, "resi": 17, "lins030113": 17, "janj780101": 17, "janin": [17, 25], "et": [17, 24, 25], "al": [17, 24, 25], "janj780103": 17, "expos": [17, 19, 25], "lins030104": 17, "lins030107": 17, "win3": 17, "choc760102": 17, "prot": 17, "lins030116": 17, "\u03b2": [17, 25], "strand": [17, 25], "lins030119": 17, "lins030103": 17, "hydrophil": [17, 25], "resid": 17, "stem": 17, "top60_id": 17, "acc": 17, "presenc": [17, 25], "absenc": [17, 25], "df_top60": 17, "aac01": 17, "aac02": 17, "aac03": 17, "aac04": 17, "aac05": 17, "aac06": 17, "aac07": 17, "aac08": 17, "aac09": 17, "aac10": 17, "df_eval": 17, "overal": 17, "aa5_caspase3": 17, "aa5_furin": 17, "aa5_ldr": 17, "aa5_mmp2": 17, "aa9_ldr": 17, "aa9_mmp2": 17, "aa9_rnabind": 17, "aa9_sa": 17, "aa13_caspase3": 17, "aa13_furin": 17, "aa13_ldr": 17, "aa13_mmp2": 17, "aa13_rnabind": 17, "aa13_sa": 17, "761": 17, "827": 17, "746": 17, "646": 17, "884": 17, "862": 17, "901": 17, "612": 17, "680": 17, "659": 17, "664": 17, "918": 17, "652": 17, "615": 17, "747": 17, "830": 17, "742": 17, "653": 17, "886": 17, "855": 17, "907": 17, "688": 17, "642": 17, "657": 17, "792": 17, "916": 17, "656": 17, "741": 17, "829": 17, "648": 17, "904": 17, "685": 17, "636": 17, "710": 17, "791": 17, "914": 17, "695": 17, "613": 17, "828": 17, "731": 17, "654": 17, "906": 17, "686": 17, "640": 17, "714": 17, "915": 17, "610": 17, "739": 17, "752": 17, "888": 17, "658": 17, "682": 17, "649": 17, "665": 17, "789": 17, "611": 17, "833": 17, "650": 17, "882": 17, "858": 17, "606": 17, "638": 17, "711": 17, "661": 17, "831": 17, "603": 17, "669": 17, "787": 17, "826": 17, "647": 17, "905": 17, "614": 17, "750": 17, "748": 17, "860": 17, "908": 17, "632": 17, "aac11": 17, "749": 17, "832": 17, "751": 17, "781": 17, "683": 17, "aac12": 17, "708": 17, "666": 17, "785": 17, "917": 17, "aac13": 17, "744": 17, "634": 17, "aac14": 17, "902": 17, "673": 17, "794": 17, "604": 17, "aac15": 17, "617": 17, "660": 17, "aac16": 17, "755": 17, "635": 17, "702": 17, "aac17": 17, "740": 17, "835": 17, "793": 17, "609": 17, "aac18": 17, "757": 17, "730": 17, "643": 17, "881": 17, "899": 17, "912": 17, "aac19": 17, "764": 17, "745": 17, "887": 17, "909": 17, "aac20": 17, "677": 17, "aac21": 17, "637": 17, "aac22": 17, "823": 17, "880": 17, "700": 17, "788": 17, "aac23": 17, "629": 17, "aac24": 17, "641": 17, "aac25": 17, "639": 17, "879": 17, "aac26": 17, "698": 17, "aac27": 17, "854": 17, "aac28": 17, "821": 17, "898": 17, "aac29": 17, "763": 17, "900": 17, "aac30": 17, "911": 17, "616": 17, "aac31": 17, "727": 17, "631": 17, "784": 17, "aac32": 17, "aac33": 17, "817": 17, "922": 17, "aac34": 17, "729": 17, "aac35": 17, "758": 17, "822": 17, "aac36": 17, "759": 17, "874": 17, "aac37": 17, "596": 17, "aac38": 17, "766": 17, "921": 17, "aac39": 17, "786": 17, "aac40": 17, "819": 17, "870": 17, "775": 17, "910": 17, "aac41": 17, "896": 17, "aac42": 17, "861": 17, "895": 17, "799": 17, "674": 17, "aac43": 17, "767": 17, "815": 17, "871": 17, "848": 17, "782": 17, "625": 17, "aac44": 17, "825": 17, "621": 17, "696": 17, "780": 17, "923": 17, "aac45": 17, "844": 17, "893": 17, "672": 17, "774": 17, "aac46": 17, "812": 17, "626": 17, "872": 17, "843": 17, "667": 17, "623": 17, "aac47": 17, "717": 17, "aac48": 17, "771": 17, "891": 17, "776": 17, "619": 17, "aac49": 17, "807": 17, "630": 17, "850": 17, "892": 17, "aac50": 17, "728": 17, "773": 17, "aac51": 17, "768": 17, "865": 17, "836": 17, "894": 17, "668": 17, "697": 17, "aac52": 17, "814": 17, "aac53": 17, "765": 17, "798": 17, "aac54": 17, "699": 17, "770": 17, "aac55": 17, "769": 17, "580": 17, "595": 17, "aac56": 17, "aac57": 17, "aac58": 17, "715": 17, "568": 17, "aac59": 17, "725": 17, "797": 17, "592": 17, "562": 17, "aac60": 17, "563": 17, "772": 17, "529": 17, "813": 17, "546": 17, "24": [17, 25], "df_cat_1": 17, "df_raw_1": 17, "df_scales_1": 17, "selected_scal": 17, "tolist": 17, "df_aac1": 17, "buna790103": 17, "bura740102": 17, "cham820102": 17, "cham830102": 17, "cham830103": 17, "cham830105": 17, "chop780101": 17, "chop780204": 17, "chop780206": 17, "kars160110": 17, "kars160112": 17, "kars160118": 17, "kars160119": 17, "kars160120": 17, "kars160122": 17, "lins030105": 17, "lins030109": 17, "264": 17, "262": 17, "425": 17, "298": 17, "863": 17, "952": 17, "149": 17, "947": 17, "442": 17, "256": 17, "557": 17, "213": 17, "397": 17, "473": 17, "566": 17, "247": 17, "311": 17, "152": 17, "354": 17, "462": 17, "119": 17, "085": 17, "208": 17, "139": 17, "169": 17, "133": 17, "240": 17, "470": 17, "160": 17, "393": 17, "313": 17, "145": 17, "134": 17, "424": 17, "115": 17, "044": 17, "195": 17, "495": 17, "554": 17, "433": 17, "458": 17, "114": 17, "463": 17, "070": 17, "421": 17, "218": 17, "553": 17, "067": 17, "021": 17, "526": 17, "135": 17, "480": 17, "043": 17, "087": 17, "532": 17, "335": 17, "963": 17, "317": 17, "319": 17, "381": 17, "198": 17, "468": 17, "390": 17, "339": 17, "282": 17, "515": 17, "486": 17, "275": 17, "257": [17, 19], "350": 17, "150": [17, 19], "534": 17, "178": 17, "565": 17, "550": 17, "320": 17, "327": 17, "326": 17, "369": 17, "028": 17, "093": 17, "537": 17, "540": 17, "231": 17, "002": 17, "372": 17, "457": 17, "120": 17, "209": 17, "081": 17, "467": 17, "183": 17, "exclud": 17, "well": [17, 19], "subordin": 17, "want": 17, "unclassified_in": 17, "guyh850104": 17, "energi": [17, 25], "appar": 17, "calcul": 17, "ja": 17, "guyh850105": 17, "racs770103": 17, "chain": [17, 25], "orient": 17, "prefer": [17, 25], "rackovski": [17, 25], "vheg790101": 17, "tfe": 17, "lipophil": 17, "phase": 17, "transfer": [17, 25], "von": 17, "buri": [17, 25], "buriabl": 17, "biov880102": 17, "werd780101": 17, "propens": [17, 25], "insid": [17, 25], "wertz": 17, "scheraga": [17, 25], "predict": [18, 19, 22, 23, 24, 25, 29, 30], "engin": [18, 19, 22, 23, 29], "dpulearn": [18, 21, 22, 23], "train": [18, 19, 22, 23, 30], "moreov": [18, 23], "load_data": [18, 23], "pypi": 18, "conda": [18, 19], "forg": 18, "pip": [18, 19], "introduct": 18, "usag": [18, 19, 22], "contribut": [18, 25], "api": [18, 19], "explain": [18, 19, 24, 26], "ai": [18, 19, 24, 26], "perturb": [18, 30], "modul": 18, "search": 18, "page": 18, "work": [18, 21], "pleas": [18, 19, 21], "cite": [18, 21], "_": [18, 21], "breimann": [18, 21, 24], "kamp": [18, 21], "steiner": [18, 21], "frishman": [18, 21], "2023": [18, 21], "ontologi": [18, 21, 24], "biorxiv": [18, 21, 24], "welcom": 19, "thank": 19, "open": 19, "project": [19, 25], "focus": 19, "involv": 19, "invalu": 19, "made": 19, "wai": 19, "file": 19, "github": 19, "issu": 19, "tracker": 19, "submit": 19, "particip": [19, 25], "discuss": 19, "newcom": 19, "tackl": 19, "email": 19, "stephanbreimann": 19, "gmail": 19, "com": 19, "question": 19, "comprehens": 19, "robust": 19, "life": [19, 30, 31], "scienc": [19, 30, 31], "seamlessli": 19, "flexibl": [19, 25], "interoper": 19, "biopython": 19, "reimplement": 19, "exist": [19, 30], "solut": 19, "biolog": [19, 22, 25, 30], "context": 19, "relianc": 19, "opaqu": 19, "box": 19, "empir": 19, "insight": 19, "cut": 19, "fair": 19, "transpar": 19, "re": [19, 24], "commit": 19, "divers": 19, "aspect": 19, "causal": 19, "minim": 19, "reproduc": 19, "mre": 19, "amount": 19, "demonstr": 19, "self": 19, "necessari": 19, "confirm": 19, "replic": 19, "guidelin": 19, "To": [19, 26], "git": 19, "http": 19, "breimanntool": 19, "master": 19, "repositori": 19, "your_usernam": 19, "navig": 19, "folder": 19, "up": 19, "cd": 19, "isol": 19, "activ": [19, 25], "poetri": 19, "pytest": 19, "hypothesi": 19, "execut": 19, "case": 19, "directori": 19, "out": [19, 25], "readm": 19, "command": 19, "cheat": 19, "sheet": [19, 25], "substanti": 19, "minor": 19, "typo": 19, "concis": 19, "branch": [19, 25], "fix": 19, "date": 19, "readthedoc": 19, "org": 19, "crucial": 19, "modif": 19, "render": 19, "correctli": 19, "strive": 19, "codebas": 19, "standalon": 19, "special": 19, "carri": 19, "complet": 19, "process": 19, "fulfil": 19, "purpos": 19, "inherit": 19, "supplementari": 19, "accordingli": 19, "cppplot": 19, "semi": 19, "strictli": 19, "adher": 19, "aforement": 19, "primari": [19, 28], "_util": 19, "_utils_const": 19, "py": 19, "modular": 19, "therefor": 19, "flat": 19, "hierarchi": 19, "outlin": 19, "user": 19, "friendli": 19, "hint": 19, "enhanc": [19, 25], "propos": 19, "pep": 19, "484": 19, "book": 19, "error": 19, "messag": 19, "docstr": 19, "markup": 19, "languag": 19, "restructuredtext": 19, "rst": 19, "primer": 19, "restructuretext": 19, "cheatsheet": 19, "sphinx": 19, "autodoc": 19, "inclus": 19, "napoleon": 19, "extens": 19, "conf": 19, "four": 19, "bird": 19, "ey": 19, "background": 19, "medium": [19, 25], "tabular": 19, "critic": 19, "except": 19, "rule": 19, "showcas": 19, "scientif": 19, "mai": 19, "mention": 19, "section": 19, "extern": 19, "note": 19, "go": 19, "html": 19, "_build": 19, "browser": 19, "below": 19, "blank": 19, "OF": 19, "ONE": 19, "complex": 19, "At": 19, "intric": 19, "do": 19, "placehold": 19, "incomplet": 19, "potenti": [19, 25], "expect": 19, "30": 19, "remind": 19, "token": 19, "truncat": 19, "respons": 19, "simpli": 19, "ask": 19, "someth": 19, "repeat": 19, "compil": 19, "done": 19, "script": 19, "leverag": 19, "struggl": 19, "produc": 19, "erron": 19, "often": [19, 30], "ambigu": 19, "logic": 19, "address": 19, "intuit": 19, "through": 19, "signatur": [19, 26], "behavior": 19, "deeper": 19, "intricaci": 19, "citat": 21, "develop": 22, "practic": 22, "2023a": 24, "2023b": 24, "breimann23c": [24, 25], "2023c": 24, "chart": 24, "cheng06": [24, 25], "cheng": 24, "2006": 24, "larg": 24, "disulphid": 24, "bridg": [24, 25], "kernel": 24, "neural": 24, "network": 24, "graph": [24, 25], "struct": 24, "funct": 24, "kawashima": 24, "2008": 24, "aid": 24, "databas": 24, "report": 24, "nucleic": 24, "magnan09": [24, 25], "magnan": 24, "randal": 24, "baldi": 24, "2009": [24, 25], "accur": 24, "solubl": [24, 25], "bioinformat": 24, "galiez16": [24, 25], "galiez": 24, "2016": [24, 25], "viral": 24, "capsid": [24, 25], "tail": [24, 25], "song18": [24, 25], "song": 24, "2018": 24, "throughput": 24, "cleavag": [24, 25], "site": [24, 25], "90": 24, "proteas": 24, "shen19": [24, 25], "shen": 24, "2019": 24, "subcellular": [24, 25], "local": [24, 25], "evolutionari": 24, "chou": [24, 25], "pseaac": 24, "j": 24, "theor": 24, "biol": 24, "tang20": [24, 25], "tang": 24, "2020": 24, "intrins": [24, 25], "disord": [24, 25], "teng21": [24, 25], "teng": 24, "2021": 24, "amyloidogen": [24, 25], "pseudo": 24, "composit": [24, 25], "tripeptid": 24, "bmc": 24, "yang21": [24, 25], "yang": 24, "granular": 24, "multipl": 24, "rna": [24, 25], "bind": [24, 25], "appl": 24, "chronolog": 25, "histori": 25, "t1_overview_benchmark": 25, "t2_overview_scal": 25, "t3a_aaontology_categori": 25, "t3b_aaontology_subcategori": 25, "begin": 25, "append": 25, "caspas": 25, "furin": 25, "long": 25, "ldr": 25, "metallopeptidas": 25, "mmp2": 25, "rbp60": 25, "solvent": 25, "sa": 25, "amyloidognen": 25, "capdsid": 25, "disulfid": 25, "ss": 25, "bond": 25, "cytoplasm": 25, "plasma": 25, "insolubl": 25, "494524": 25, "unknown": 25, "statu": 25, "tier": 25, "system": 25, "systemat": 25, "arrang": 25, "67": 25, "everi": 25, "clearli": 25, "assess": 25, "couldn": 25, "alloc": 25, "regard": 25, "chothia": 25, "1976": 25, "lin": 25, "2003": 25, "64": 25, "occurr": 25, "cellular": 25, "mitochondria": 25, "nakashima": 25, "1990": 25, "nishikawa": 25, "1992": 25, "conform": 25, "\u03b1": 25, "helix": 25, "ranodm": 25, "tanaka": 25, "1977": 25, "fasman": 25, "1978b": 25, "richardson": 25, "1988": 25, "qian": 25, "sejnowski": 25, "aurora": 25, "rose": 25, "1998": 25, "19": 25, "charg": 25, "entropi": 25, "charton": 25, "1983": 25, "gui": 25, "1985": 25, "radzicka": 25, "wolfenden": 25, "36": 25, "could": 25, "mutabl": 25, "sneath": 25, "1966": 25, "17": 25, "polar": 25, "hydrophob": 25, "amphiphil": 25, "kyte": 25, "doolittl": 25, "1982": 25, "mitaku": 25, "2002": 25, "koehler": 25, "steric": 25, "angl": 25, "symmetri": 25, "represent": 25, "eccentr": 25, "prabhakaran": 25, "ponnuswami": 25, "karkbara": 25, "knislei": 25, "45": 25, "stabil": 25, "backbon": 25, "dynam": 25, "vihinen": 25, "1994": 25, "bastolla": 25, "2005": 25, "31": 25, "water": 25, "tendenc": 25, "oppos": 25, "1978": 25, "partial": 25, "physic": 25, "displac": 25, "caus": 25, "interact": 25, "mainli": 25, "ones": 25, "bull": 25, "brees": 25, "1974": 25, "bigelow": 25, "1967": 25, "jone": 25, "dayhoff": 25, "interior": 25, "unpolar": 25, "fukuchi": 25, "2001": 25, "mp": 25, "cedano": 25, "1997": 25, "mitochondri": 25, "less": 25, "val": 25, "cf": 25, "cap": 25, "asp": 25, "glu": 25, "ly": 25, "arg": 25, "observ": 25, "character": 25, "punta": 25, "maritan": 25, "robson": 25, "suzuki": 25, "linker": 25, "georg": 25, "heringa": 25, "2004": 25, "helic": 25, "half": 25, "finkelstein": 25, "1991": 25, "outsid": 25, "befor": 25, "geisow": 25, "robert": 25, "1980": 25, "ramachandran": 25, "state": 25, "quadrant": 25, "bottom": 25, "paul": 25, "1951": 25, "antiparallel": 25, "lifson": 25, "sander": 25, "1979": 25, "bend": 25, "revers": 25, "tight": 25, "consecut": 25, "back": 25, "hydrogen": 25, "3rd": 25, "4th": 25, "1st": 25, "2nd": 25, "tm": 25, "place": 25, "monn\u00e9": 25, "1999": 25, "\u03c0": 25, "ala": 25, "gln": 25, "fodj": 25, "karadaghi": 25, "net": 25, "donor": 25, "klein": 25, "1984": 25, "acceptor": 25, "faucher": 25, "hi": 25, "electron": 25, "ion": 25, "pot": 25, "valenc": 25, "chemic": 25, "cosic": 25, "low": 25, "due": 25, "strong": 25, "hutchen": 25, "1970": 25, "unfold": 25, "gibb": 25, "denatur": 25, "yutani": 25, "1987": 25, "instabl": 25, "highest": 25, "break": 25, "pro": 25, "munoz": 25, "serrano": 25, "isoelectr": 25, "ph": 25, "electr": 25, "neutral": 25, "zimmerman": 25, "1968": 25, "16": 25, "crystal": 25, "pairwis": 25, "constitu": 25, "atom": 25, "lennard": 25, "oobatak": 25, "ooi": 25, "rel": 25, "chang": 25, "divid": 25, "aliphat": 25, "linear": 25, "aromat": 25, "carbon": 25, "approxim": 25, "invers": 25, "reactiv": 25, "hydroxythiol": 25, "wold": 25, "occur": 25, "esp": 25, "amphipath": 25, "highli": 25, "signal": 25, "argo": 25, "cornett": 25, "38": 25, "environ": 25, "eisenberg": 25, "mclachlan": 25, "1986": 25, "surround": 25, "angstrom": 25, "radiu": 25, "pack": 25, "globular": 25, "1981": 25, "28": 25, "eigenvalu": 25, "laplacian": 25, "undirect": 25, "node": 25, "mass": 25, "molecular": 25, "second": 25, "actual": 25, "root": 25, "squar": 25, "gyrat": 25, "farther": 25, "awai": 25, "relationship": 25, "rate": 25, "shift": 25, "bundi": 25, "wuthrich": 25, "nh": 25, "temperatur": 25, "rigid": 25, "gly": 25, "ser": 25, "particularli": 25, "ptitsyn": 25, "zhou": 25, "equilibrium": 25, "sueki": 25, "flow": 26, "enri": 26, "introduc": 27, "diagram": 28, "platform": 29, "novel": 29, "everywher": [30, 31], "setup": 30, "augment": 30, "smote": 30, "artifici": 30, "Such": 30, "veri": 30, "deep": 30, "imag": 30, "recognit": 30, "feasibl": 30, "becaus": 30, "slight": 30, "mutat": 30, "alter": 30, "dramat": 30, "great": 30, "quantiti": 30, "besid": 30, "distinguish": 30, "subfield": 30}, "objects": {"aaanalysis": [[1, 0, 1, "", "AAclust"], [2, 0, 1, "", "CPP"], [3, 0, 1, "", "CPPPlot"], [4, 0, 1, "", "SequenceFeature"], [5, 0, 1, "", "dPULearn"], [6, 3, 1, "", "load_dataset"], [7, 3, 1, "", "load_scales"], [8, 3, 1, "", "plot_gcfs"], [9, 3, 1, "", "plot_get_cdict"], [10, 3, 1, "", "plot_get_clist"], [11, 3, 1, "", "plot_get_cmap"], [12, 3, 1, "", "plot_legend"], [13, 3, 1, "", "plot_settings"]], "aaanalysis.AAclust": [[1, 1, 1, "", "__init__"], [1, 2, 1, "", "center_labels_"], [1, 2, 1, "", "centers_"], [1, 1, 1, "", "comp_centers"], [1, 1, 1, "", "comp_correlation"], [1, 1, 1, "", "comp_coverage"], [1, 1, 1, "", "comp_medoids"], [1, 1, 1, "", "eval"], [1, 1, 1, "", "fit"], [1, 2, 1, "", "is_medoid_"], [1, 2, 1, "", "labels_"], [1, 2, 1, "", "medoid_labels_"], [1, 2, 1, "", "medoid_names_"], [1, 2, 1, "", "medoids_"], [1, 2, 1, "", "model"], [1, 2, 1, "", "n_clusters"], [1, 1, 1, "", "name_clusters"]], "aaanalysis.CPP": [[2, 1, 1, "", "__init__"], [2, 1, 1, "", "eval"], [2, 1, 1, "", "run"]], "aaanalysis.CPPPlot": [[3, 1, 1, "", "__init__"], [3, 1, 1, "", "heatmap"], [3, 1, 1, "", "profile"], [3, 1, 1, "", "update_seq_size"]], "aaanalysis.SequenceFeature": [[4, 1, 1, "", "__init__"], [4, 1, 1, "", "add_dif"], [4, 1, 1, "", "add_feat_value"], [4, 1, 1, "", "add_position"], [4, 1, 1, "", "feat_matrix"], [4, 1, 1, "", "feat_names"], [4, 1, 1, "", "get_df_parts"], [4, 1, 1, "", "get_features"], [4, 1, 1, "", "get_split_kws"]], "aaanalysis.dPULearn": [[5, 1, 1, "", "__init__"], [5, 1, 1, "", "eval"], [5, 1, 1, "", "fit"], [5, 2, 1, "", "labels_"]]}, "objtypes": {"0": "py:class", "1": "py:method", "2": "py:attribute", "3": "py:function"}, "objnames": {"0": ["py", "class", "Python class"], "1": ["py", "method", "Python method"], "2": ["py", "attribute", "Python attribute"], "3": ["py", "function", "Python function"]}, "titleterms": {"api": 0, "data": [0, 16, 28, 30], "featur": [0, 15], "engin": [0, 15], "pu": [0, 16, 30], "learn": [0, 15, 30], "explain": [0, 15, 31], "ai": [0, 15, 31], "perturb": 0, "plot": [0, 14], "util": 0, "aaanalysi": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 15, 18, 28], "aaclust": [1, 15], "note": [1, 2, 4, 5, 6, 7, 12], "cpp": [2, 15, 29], "cppplot": 3, "exampl": [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 18], "sequencefeatur": 4, "dpulearn": 5, "load_dataset": 6, "load_scal": 7, "plot_gcf": 8, "plot_get_cdict": 9, "plot_get_clist": 10, "plot_get_cmap": 11, "plot_legend": 12, "plot_set": 13, "prelud": 14, "quick": [15, 32], "start": [15, 32], "what": [15, 30, 31], "you": 15, "Will": 15, "1": 15, "load": [15, 16, 17], "sequenc": [15, 31], "scale": [15, 17, 25, 27], "2": 15, "compar": 15, "physicochem": [15, 29], "profil": 15, "3": 15, "protein": [15, 16, 25], "predict": 15, "4": 15, "group": 15, "level": [15, 31], "individu": 15, "tutori": [16, 17, 32], "benchmark": [16, 24, 25], "amino": [16, 17, 25, 27], "acid": [16, 17, 25, 27], "window": 16, "size": 16, "posit": 16, "unlabel": 16, "dataset": [16, 24, 25], "three": 17, "set": 17, "numer": 17, "aaontologi": [17, 25, 27], "redund": 17, "reduc": 17, "subset": 17, "filter": 17, "welcom": 18, "document": [18, 19, 22], "instal": [18, 19], "overview": [18, 22, 25], "refer": [18, 24], "indic": 18, "tabl": [18, 25], "citat": 18, "contribut": 19, "introduct": [19, 22], "vision": 19, "object": 19, "non": 19, "goal": 19, "principl": [19, 26], "bug": 19, "report": 19, "latest": 19, "version": 19, "local": 19, "develop": 19, "environ": 19, "fork": 19, "clone": 19, "depend": 19, "run": 19, "unit": 19, "test": 19, "pull": 19, "request": 19, "preview": 19, "chang": 19, "name": 19, "convent": 19, "class": 19, "templat": 19, "function": 19, "method": 19, "code": 19, "philosophi": 19, "style": 19, "layer": 19, "build": 19, "doc": 19, "chatgpt": 19, "guid": 19, "tgd": 19, "workflow": 22, "algorithm": 24, "us": [24, 29], "case": 24, "further": [24, 32], "inform": 24, "categori": 25, "subcategori": 25, "usag": 26, "classif": 27, "flow": 28, "enri": 28, "point": 28, "compon": 28, "entri": 28, "bridg": 28, "extern": 28, "librari": 28, "identifi": 29, "signatur": 29, "from": 30, "unbalanc": 30, "small": 30, "i": [30, 31]}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 8, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.intersphinx": 1, "nbsphinx": 4, "sphinx": 57}, "alltitles": {"API": [[0, "api"]], "Data": [[0, "data"]], "Feature Engineering": [[0, "feature-engineering"]], "PU Learning": [[0, "pu-learning"]], "Explainable AI": [[0, "explainable-ai"]], "Perturbation": [[0, "perturbation"]], "Plot Utilities": [[0, "plot-utilities"]], "aaanalysis.AAclust": [[1, "aaanalysis-aaclust"]], "Notes": [[1, null], [1, null], [1, null], [1, null], [2, null], [2, null], [4, null], [4, null], [4, null], [4, null], [4, null], [5, null], [5, null], [6, null], [7, null], [12, null]], "aaanalysis.CPP": [[2, "aaanalysis-cpp"]], "aaanalysis.CPPPlot": [[3, "aaanalysis-cppplot"]], "Examples": [[3, null], [4, null], [4, null], [5, null], [6, null], [7, null], [8, null], [9, null], [10, null], [11, null], [12, null], [13, null]], "aaanalysis.SequenceFeature": [[4, "aaanalysis-sequencefeature"]], "aaanalysis.dPULearn": [[5, "aaanalysis-dpulearn"]], "aaanalysis.load_dataset": [[6, "aaanalysis-load-dataset"]], "aaanalysis.load_scales": [[7, "aaanalysis-load-scales"]], "aaanalysis.plot_gcfs": [[8, "aaanalysis-plot-gcfs"]], "aaanalysis.plot_get_cdict": [[9, "aaanalysis-plot-get-cdict"]], "aaanalysis.plot_get_clist": [[10, "aaanalysis-plot-get-clist"]], "aaanalysis.plot_get_cmap": [[11, "aaanalysis-plot-get-cmap"]], "aaanalysis.plot_legend": [[12, "aaanalysis-plot-legend"]], "aaanalysis.plot_settings": [[13, "aaanalysis-plot-settings"]], "Plotting Prelude": [[14, "plotting-prelude"]], "Quick Start with AAanalysis": [[15, "quick-start-with-aaanalysis"]], "What You Will Learn:": [[15, "what-you-will-learn"]], "1. Loading Sequences and Scales": [[15, "loading-sequences-and-scales"]], "2. Feature Engineering": [[15, "feature-engineering"]], "AAclust": [[15, "aaclust"]], "Comparative Physicochemical Profiling (CPP)": [[15, "comparative-physicochemical-profiling-cpp"]], "3. Protein Prediction": [[15, "protein-prediction"]], "4. Explainable AI": [[15, "explainable-ai"]], "Explainable AI on group level": [[15, "explainable-ai-on-group-level"]], "Explainable AI on individual level": [[15, "explainable-ai-on-individual-level"]], "Data Loading Tutorial": [[16, "data-loading-tutorial"]], "Loading of protein benchmarks": [[16, "loading-of-protein-benchmarks"]], "Loading of protein benchmarks: Amino acid window size": [[16, "loading-of-protein-benchmarks-amino-acid-window-size"]], "Loading of protein benchmarks: Positive-Unlabeled (PU) datasets": [[16, "loading-of-protein-benchmarks-positive-unlabeled-pu-datasets"]], "Scale Loading Tutorial": [[17, "scale-loading-tutorial"]], "Three sets of numerical amino acid scales": [[17, "three-sets-of-numerical-amino-acid-scales"]], "AAontology": [[17, "aaontology"], [25, "aaontology"]], "Redundancy-reduce scale subsets": [[17, "redundancy-reduce-scale-subsets"]], "Filtering of scales": [[17, "filtering-of-scales"]], "Welcome to the AAanalysis documentation!": [[18, "welcome-to-the-aaanalysis-documentation"]], "Install": [[18, "install"]], "OVERVIEW": [[18, null]], "EXAMPLES": [[18, null]], "REFERENCES": [[18, null]], "Indices and tables": [[18, "indices-and-tables"]], "Citation": [[18, "citation"]], "Contributing": [[19, "contributing"]], "Introduction": [[19, "introduction"], [22, "introduction"]], "Vision": [[19, "vision"]], "Objectives": [[19, "objectives"]], "Non-goals": [[19, "non-goals"]], "Principles": [[19, "principles"]], "Bug Reports": [[19, "bug-reports"]], "Installation": [[19, "installation"]], "Latest Version": [[19, "latest-version"]], "Local Development Environment": [[19, "local-development-environment"]], "Fork and Clone": [[19, "fork-and-clone"]], "Install Dependencies": [[19, "install-dependencies"]], "Run Unit Tests": [[19, "run-unit-tests"]], "Pull Requests": [[19, "pull-requests"]], "Preview Changes": [[19, "preview-changes"]], "Documentation": [[19, "documentation"]], "Naming Conventions": [[19, "naming-conventions"]], "Class Templates": [[19, "class-templates"]], "Function and Method Naming": [[19, "function-and-method-naming"]], "Code Philosophy": [[19, "code-philosophy"]], "Documentation Style": [[19, "documentation-style"]], "Documentation Layers": [[19, "documentation-layers"]], "Building the Docs": [[19, "building-the-docs"]], "Test with ChatGPT": [[19, "test-with-chatgpt"]], "Test Guided Development (TGD)": [[19, "test-guided-development-tgd"]], "Workflow": [[22, "workflow"]], "Overview of documentation": [[22, "overview-of-documentation"]], "References": [[24, "references"]], "Algorithms": [[24, "algorithms"]], "Datasets and Benchmarks": [[24, "datasets-and-benchmarks"]], "Use Cases": [[24, "use-cases"]], "Further Information": [[24, "further-information"]], "Tables": [[25, "tables"]], "Overview Table": [[25, "overview-table"]], "Protein Benchmark Datasets": [[25, "protein-benchmark-datasets"]], "Amino Acid Scale Datasets": [[25, "amino-acid-scale-datasets"]], "Categories": [[25, "categories"]], "Subcategories": [[25, "subcategories"]], "Usage Principles": [[26, "usage-principles"]], "AAontology: Classification of amino acid scales": [[27, "aaontology-classification-of-amino-acid-scales"]], "Data Flow and Enry Points": [[28, "data-flow-and-enry-points"]], "Data Flow: Components of AAanalysis": [[28, "data-flow-components-of-aaanalysis"]], "Entry Points: Bridges to External Libraries": [[28, "entry-points-bridges-to-external-libraries"]], "Identifying Physicochemical Signatures using CPP": [[29, "identifying-physicochemical-signatures-using-cpp"]], "Learning from unbalanced and small data": [[30, "learning-from-unbalanced-and-small-data"]], "What is PU learning?": [[30, "what-is-pu-learning"]], "Explainable AI at Sequence Level": [[31, "explainable-ai-at-sequence-level"]], "What is explainable AI?": [[31, "what-is-explainable-ai"]], "Tutorials": [[32, "tutorials"]], "Quick start": [[32, "quick-start"]], "Further Tutorials": [[32, "further-tutorials"]]}, "indexentries": {"aaclust (class in aaanalysis)": [[1, "aaanalysis.AAclust"]], "__init__() (aaanalysis.aaclust method)": [[1, "aaanalysis.AAclust.__init__"]], "center_labels_ (aaanalysis.aaclust attribute)": [[1, "aaanalysis.AAclust.center_labels_"]], "centers_ (aaanalysis.aaclust attribute)": [[1, "aaanalysis.AAclust.centers_"]], "comp_centers() (aaanalysis.aaclust static method)": [[1, "aaanalysis.AAclust.comp_centers"]], "comp_correlation() (aaanalysis.aaclust static method)": [[1, "aaanalysis.AAclust.comp_correlation"]], "comp_coverage() (aaanalysis.aaclust static method)": [[1, "aaanalysis.AAclust.comp_coverage"]], "comp_medoids() (aaanalysis.aaclust static method)": [[1, "aaanalysis.AAclust.comp_medoids"]], "eval() (aaanalysis.aaclust static method)": [[1, "aaanalysis.AAclust.eval"]], "fit() (aaanalysis.aaclust method)": [[1, "aaanalysis.AAclust.fit"]], "is_medoid_ (aaanalysis.aaclust attribute)": [[1, "aaanalysis.AAclust.is_medoid_"]], "labels_ (aaanalysis.aaclust attribute)": [[1, "aaanalysis.AAclust.labels_"]], "medoid_labels_ (aaanalysis.aaclust attribute)": [[1, "aaanalysis.AAclust.medoid_labels_"]], "medoid_names_ (aaanalysis.aaclust attribute)": [[1, "aaanalysis.AAclust.medoid_names_"]], "medoids_ (aaanalysis.aaclust attribute)": [[1, "aaanalysis.AAclust.medoids_"]], "model (aaanalysis.aaclust attribute)": [[1, "aaanalysis.AAclust.model"]], "n_clusters (aaanalysis.aaclust attribute)": [[1, "aaanalysis.AAclust.n_clusters"]], "name_clusters() (aaanalysis.aaclust static method)": [[1, "aaanalysis.AAclust.name_clusters"]], "cpp (class in aaanalysis)": [[2, "aaanalysis.CPP"]], "__init__() (aaanalysis.cpp method)": [[2, "aaanalysis.CPP.__init__"]], "eval() (aaanalysis.cpp method)": [[2, "aaanalysis.CPP.eval"]], "run() (aaanalysis.cpp method)": [[2, "aaanalysis.CPP.run"]], "cppplot (class in aaanalysis)": [[3, "aaanalysis.CPPPlot"]], "__init__() (aaanalysis.cppplot method)": [[3, "aaanalysis.CPPPlot.__init__"]], "heatmap() (aaanalysis.cppplot method)": [[3, "aaanalysis.CPPPlot.heatmap"]], "profile() (aaanalysis.cppplot method)": [[3, "aaanalysis.CPPPlot.profile"]], "update_seq_size() (aaanalysis.cppplot method)": [[3, "aaanalysis.CPPPlot.update_seq_size"]], "sequencefeature (class in aaanalysis)": [[4, "aaanalysis.SequenceFeature"]], "__init__() (aaanalysis.sequencefeature method)": [[4, "aaanalysis.SequenceFeature.__init__"]], "add_dif() (aaanalysis.sequencefeature static method)": [[4, "aaanalysis.SequenceFeature.add_dif"]], "add_feat_value() (aaanalysis.sequencefeature static method)": [[4, "aaanalysis.SequenceFeature.add_feat_value"]], "add_position() (aaanalysis.sequencefeature static method)": [[4, "aaanalysis.SequenceFeature.add_position"]], "feat_matrix() (aaanalysis.sequencefeature static method)": [[4, "aaanalysis.SequenceFeature.feat_matrix"]], "feat_names() (aaanalysis.sequencefeature static method)": [[4, "aaanalysis.SequenceFeature.feat_names"]], "get_df_parts() (aaanalysis.sequencefeature static method)": [[4, "aaanalysis.SequenceFeature.get_df_parts"]], "get_features() (aaanalysis.sequencefeature method)": [[4, "aaanalysis.SequenceFeature.get_features"]], "get_split_kws() (aaanalysis.sequencefeature static method)": [[4, "aaanalysis.SequenceFeature.get_split_kws"]], "__init__() (aaanalysis.dpulearn method)": [[5, "aaanalysis.dPULearn.__init__"]], "dpulearn (class in aaanalysis)": [[5, "aaanalysis.dPULearn"]], "eval() (aaanalysis.dpulearn method)": [[5, "aaanalysis.dPULearn.eval"]], "fit() (aaanalysis.dpulearn method)": [[5, "aaanalysis.dPULearn.fit"]], "labels_ (aaanalysis.dpulearn attribute)": [[5, "aaanalysis.dPULearn.labels_"]], "load_dataset() (in module aaanalysis)": [[6, "aaanalysis.load_dataset"]], "load_scales() (in module aaanalysis)": [[7, "aaanalysis.load_scales"]], "plot_gcfs() (in module aaanalysis)": [[8, "aaanalysis.plot_gcfs"]], "plot_get_cdict() (in module aaanalysis)": [[9, "aaanalysis.plot_get_cdict"]], "plot_get_clist() (in module aaanalysis)": [[10, "aaanalysis.plot_get_clist"]], "plot_get_cmap() (in module aaanalysis)": [[11, "aaanalysis.plot_get_cmap"]], "plot_legend() (in module aaanalysis)": [[12, "aaanalysis.plot_legend"]], "plot_settings() (in module aaanalysis)": [[13, "aaanalysis.plot_settings"]]}})
\ No newline at end of file
diff --git a/docs/build/plot_directive/generated/aaanalysis-plot_gcfs-1.pdf b/docs/build/plot_directive/generated/aaanalysis-plot_gcfs-1.pdf
index 29fca4d7..af43810e 100644
Binary files a/docs/build/plot_directive/generated/aaanalysis-plot_gcfs-1.pdf and b/docs/build/plot_directive/generated/aaanalysis-plot_gcfs-1.pdf differ
diff --git a/docs/build/plot_directive/generated/aaanalysis-plot_get_cdict-1.pdf b/docs/build/plot_directive/generated/aaanalysis-plot_get_cdict-1.pdf
index 52458eb0..1771af1b 100644
Binary files a/docs/build/plot_directive/generated/aaanalysis-plot_get_cdict-1.pdf and b/docs/build/plot_directive/generated/aaanalysis-plot_get_cdict-1.pdf differ
diff --git a/docs/build/plot_directive/generated/aaanalysis-plot_get_clist-1.pdf b/docs/build/plot_directive/generated/aaanalysis-plot_get_clist-1.pdf
index 50bf2f30..f0b11b7a 100644
Binary files a/docs/build/plot_directive/generated/aaanalysis-plot_get_clist-1.pdf and b/docs/build/plot_directive/generated/aaanalysis-plot_get_clist-1.pdf differ
diff --git a/docs/build/plot_directive/generated/aaanalysis-plot_get_cmap-1.pdf b/docs/build/plot_directive/generated/aaanalysis-plot_get_cmap-1.pdf
index e38c7d65..bb323fe1 100644
Binary files a/docs/build/plot_directive/generated/aaanalysis-plot_get_cmap-1.pdf and b/docs/build/plot_directive/generated/aaanalysis-plot_get_cmap-1.pdf differ
diff --git a/docs/build/plot_directive/generated/aaanalysis-plot_legend-1.pdf b/docs/build/plot_directive/generated/aaanalysis-plot_legend-1.pdf
index 63459a8c..7064ab06 100644
Binary files a/docs/build/plot_directive/generated/aaanalysis-plot_legend-1.pdf and b/docs/build/plot_directive/generated/aaanalysis-plot_legend-1.pdf differ
diff --git a/docs/build/plot_directive/generated/aaanalysis-plot_settings-1.pdf b/docs/build/plot_directive/generated/aaanalysis-plot_settings-1.pdf
index e6ddea7f..c2c1a032 100644
Binary files a/docs/build/plot_directive/generated/aaanalysis-plot_settings-1.pdf and b/docs/build/plot_directive/generated/aaanalysis-plot_settings-1.pdf differ
diff --git a/docs/build/plot_directive/generated/aaanalysis-plot_settings-2.pdf b/docs/build/plot_directive/generated/aaanalysis-plot_settings-2.pdf
index b5ebd506..034540bd 100644
Binary files a/docs/build/plot_directive/generated/aaanalysis-plot_settings-2.pdf and b/docs/build/plot_directive/generated/aaanalysis-plot_settings-2.pdf differ
diff --git a/docs/source/generated/output_13_1.png b/docs/source/generated/output_13_1.png
index 815e3a39..6e2513d5 100644
Binary files a/docs/source/generated/output_13_1.png and b/docs/source/generated/output_13_1.png differ
diff --git a/docs/source/generated/tutorial1_quick_start.rst b/docs/source/generated/tutorial1_quick_start.rst
index 75442ba7..c9cc5aa9 100644
--- a/docs/source/generated/tutorial1_quick_start.rst
+++ b/docs/source/generated/tutorial1_quick_start.rst
@@ -69,9 +69,9 @@ set of 100 scales, as defined by the ``n_clusters`` parameters:
from sklearn.cluster import AgglomerativeClustering
- aac = aa.AAclust(model=AgglomerativeClustering)
- X = np.array(df_scales)
- scales = aac.fit(X, names=list(df_scales), n_clusters=100)
+ aac = aa.AAclust(model_class=AgglomerativeClustering)
+ X = np.array(df_scales).T
+ scales = aac.fit(X, names=list(df_scales), n_clusters=100).medoid_names_
df_scales = df_scales[scales]
Comparative Physicochemical Profiling (CPP)
@@ -131,10 +131,10 @@ A feature matrix from a given set of CPP features can be created using
.. parsed-literal::
- Mean accuracy of 0.6
+ Mean accuracy of 0.58
-Creating more features with CPP will take some more time, but improve
+Creating more features with CPP will take a little time, but improve
prediction performance:
.. code:: ipython3
@@ -153,7 +153,7 @@ prediction performance:
sns.barplot(pd.DataFrame({"Baseline": cv_base, "CPP": cv}), palette=["tab:blue", "tab:red"])
plt.ylabel("Mean accuracy", size=aa.plot_gcfs()+1)
plt.ylim(0, 1)
- plt.title("Comparison of Feature Engineering Methods")
+ plt.title("Comparison of Feature Engineering Methods", size=aa.plot_gcfs()-1)
sns.despine()
plt.show()
diff --git a/tutorials/tutorial1_quick_start.ipynb b/tutorials/tutorial1_quick_start.ipynb
index c3195c34..03993f2e 100644
--- a/tutorials/tutorial1_quick_start.ipynb
+++ b/tutorials/tutorial1_quick_start.ipynb
@@ -30,8 +30,8 @@
"metadata": {
"collapsed": false,
"ExecuteTime": {
- "end_time": "2023-09-28T08:06:29.545799916Z",
- "start_time": "2023-09-28T08:06:29.522441946Z"
+ "end_time": "2023-10-02T20:32:53.039469001Z",
+ "start_time": "2023-10-02T20:32:53.019487708Z"
}
}
},
@@ -55,8 +55,8 @@
"name": "#%%\n"
},
"ExecuteTime": {
- "end_time": "2023-09-28T08:06:30.369962978Z",
- "start_time": "2023-09-28T08:06:29.532292243Z"
+ "end_time": "2023-10-02T20:32:54.531812229Z",
+ "start_time": "2023-10-02T20:32:53.027672866Z"
}
},
"outputs": [],
@@ -89,16 +89,16 @@
"source": [
"from sklearn.cluster import AgglomerativeClustering\n",
"\n",
- "aac = aa.AAclust(model=AgglomerativeClustering)\n",
- "X = np.array(df_scales)\n",
- "scales = aac.fit(X, names=list(df_scales), n_clusters=100) \n",
+ "aac = aa.AAclust(model_class=AgglomerativeClustering)\n",
+ "X = np.array(df_scales).T\n",
+ "scales = aac.fit(X, names=list(df_scales), n_clusters=100).medoid_names_ \n",
"df_scales = df_scales[scales]"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
- "end_time": "2023-09-28T08:06:30.451850383Z",
- "start_time": "2023-09-28T08:06:30.376700053Z"
+ "end_time": "2023-10-02T20:32:54.532997978Z",
+ "start_time": "2023-10-02T20:32:54.250325038Z"
}
}
},
@@ -126,8 +126,8 @@
"name": "#%%\n"
},
"ExecuteTime": {
- "end_time": "2023-09-28T08:06:30.578306544Z",
- "start_time": "2023-09-28T08:06:30.532859763Z"
+ "end_time": "2023-10-02T20:32:54.533610828Z",
+ "start_time": "2023-10-02T20:32:54.388921950Z"
}
},
"outputs": [],
@@ -160,8 +160,8 @@
"metadata": {
"collapsed": false,
"ExecuteTime": {
- "end_time": "2023-09-28T08:06:39.221976283Z",
- "start_time": "2023-09-28T08:06:30.576931617Z"
+ "end_time": "2023-10-02T20:33:05.471265655Z",
+ "start_time": "2023-10-02T20:32:54.511627903Z"
}
}
},
@@ -185,7 +185,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
- "Mean accuracy of 0.6\n"
+ "Mean accuracy of 0.58\n"
]
}
],
@@ -201,15 +201,15 @@
"metadata": {
"collapsed": false,
"ExecuteTime": {
- "end_time": "2023-09-28T08:06:43.613234709Z",
- "start_time": "2023-09-28T08:06:39.228528460Z"
+ "end_time": "2023-10-02T20:33:10.066404589Z",
+ "start_time": "2023-10-02T20:33:05.478739730Z"
}
}
},
{
"cell_type": "markdown",
"source": [
- "Creating more features with CPP will take some more time, but improve prediction performance: "
+ "Creating more features with CPP will take a little time, but improve prediction performance: "
],
"metadata": {
"collapsed": false
@@ -229,7 +229,7 @@
{
"data": {
"text/plain": "