diff --git a/aaanalysis/__pycache__/utils.cpython-39.pyc b/aaanalysis/__pycache__/utils.cpython-39.pyc index 93844be0..8f096ef8 100644 Binary files a/aaanalysis/__pycache__/utils.cpython-39.pyc and b/aaanalysis/__pycache__/utils.cpython-39.pyc differ diff --git a/aaanalysis/_utils/__pycache__/_utils_check.cpython-39.pyc b/aaanalysis/_utils/__pycache__/_utils_check.cpython-39.pyc index 1c0a77f9..8420f63e 100644 Binary files a/aaanalysis/_utils/__pycache__/_utils_check.cpython-39.pyc and b/aaanalysis/_utils/__pycache__/_utils_check.cpython-39.pyc differ diff --git a/aaanalysis/_utils/__pycache__/utils_aaclust.cpython-39.pyc b/aaanalysis/_utils/__pycache__/utils_aaclust.cpython-39.pyc index e38ecd09..3882a91b 100644 Binary files a/aaanalysis/_utils/__pycache__/utils_aaclust.cpython-39.pyc and b/aaanalysis/_utils/__pycache__/utils_aaclust.cpython-39.pyc differ diff --git a/aaanalysis/_utils/_utils_check.py b/aaanalysis/_utils/_utils_check.py index 6e136c27..56bd1f31 100644 --- a/aaanalysis/_utils/_utils_check.py +++ b/aaanalysis/_utils/_utils_check.py @@ -80,14 +80,13 @@ def check_tuple(name=None, val=None, n=None): # Array checking functions def check_feat_matrix(X=None, names=None, labels=None): - """Check if X and y match (y can be labels or names). Otherwise, transpose X or give error.""" - # TODO type check - X = check_array(X) + """Transpose matrix and check if X and y match (y can be labels or names). Transpose back otherwise """ + X = check_array(X).transpose() if labels is not None: check_consistent_length(X, labels) n_samples, n_features = X.shape if n_samples == 0 or n_features == 0: - raise ValueError(f"Shape of X ({n_samples}, {n_features}) indicates empty feature matrix.") + raise ValueError(f"Shape of 'X' ({n_samples}, {n_features}) indicates empty feature matrix.") if names is None: return X, names else: diff --git a/aaanalysis/_utils/utils_aaclust.py b/aaanalysis/_utils/utils_aaclust.py index 26c45173..6974766e 100644 --- a/aaanalysis/_utils/utils_aaclust.py +++ b/aaanalysis/_utils/utils_aaclust.py @@ -37,3 +37,9 @@ def check_merge_metric(merge_metric=None): error = f"'merge_metric' should be None or one of following: {LIST_METRICS}" raise ValueError(error) return merge_metric + +def check_feat_matrix_n_clust_match(X=None, n_clusters=None): + """""" + n_samples, n_features = X.shape + if n_samples <= n_clusters: + raise ValueError(f"'X' must contain more samples ({n_samples}) then 'n_clusters' ({n_clusters})") diff --git a/aaanalysis/aaclust/__pycache__/aaclust.cpython-39.pyc b/aaanalysis/aaclust/__pycache__/aaclust.cpython-39.pyc index 0c20615d..b68040b4 100644 Binary files a/aaanalysis/aaclust/__pycache__/aaclust.cpython-39.pyc and b/aaanalysis/aaclust/__pycache__/aaclust.cpython-39.pyc differ diff --git a/aaanalysis/aaclust/aaclust.py b/aaanalysis/aaclust/aaclust.py index 6b5628b0..a0833b84 100644 --- a/aaanalysis/aaclust/aaclust.py +++ b/aaanalysis/aaclust/aaclust.py @@ -11,6 +11,8 @@ # I Helper Functions + + # Obtain centroids and medoids def cluster_center(X): """Compute cluster center (i.e., arithmetical mean over all data points/observations of a cluster)""" @@ -29,7 +31,7 @@ def _cluster_medoid(X): """Obtain cluster medoids (i.e., scale closest to cluster center used as representative scale for a cluster)""" # Create new array with cluster center and given center_X = np.concatenate([cluster_center(X), X], axis=0) - # Get index for scale with highest correlation with cluster center + # Get index for scale with the highest correlation with cluster center ind_max = np.corrcoef(center_X)[0, 1:].argmax() return ind_max @@ -410,6 +412,7 @@ def fit(self, X, names=None, on_center=True, min_th=0, merge_metric="euclidean" ut.check_min_th(min_th=min_th) merge_metric = ut.check_merge_metric(merge_metric=merge_metric) X, names = ut.check_feat_matrix(X=X, names=names) + ut.check_feat_matrix_n_clust_match(X=X, n_clusters=n_clusters) args = dict(model=self.model, model_kwargs=self._model_kwargs, min_th=min_th, on_center=on_center) # Clustering using given clustering models if n_clusters is not None: diff --git a/aaanalysis/cpp/__pycache__/cpp.cpython-39.pyc b/aaanalysis/cpp/__pycache__/cpp.cpython-39.pyc index 9d669888..88affe85 100644 Binary files a/aaanalysis/cpp/__pycache__/cpp.cpython-39.pyc and b/aaanalysis/cpp/__pycache__/cpp.cpython-39.pyc differ diff --git a/aaanalysis/utils.py b/aaanalysis/utils.py index 2bc53737..f512ce89 100644 --- a/aaanalysis/utils.py +++ b/aaanalysis/utils.py @@ -13,6 +13,7 @@ check_feat_matrix, check_col_in_df) from aaanalysis._utils._utils_output import (print_red, print_start_progress, print_progress, print_finished_progress) from aaanalysis._utils.utils_aaclust import (check_model, check_min_th, check_merge_metric, + check_feat_matrix_n_clust_match, METRIC_CORRELATION, LIST_METRICS) from aaanalysis._utils.utils_cpp import (check_color, check_y_categorical, check_labels, check_ylim, check_args_len, check_args_len, check_list_parts, diff --git a/docs/build/doctrees/environment.pickle b/docs/build/doctrees/environment.pickle index 797868a7..bdff3fef 100644 Binary files a/docs/build/doctrees/environment.pickle and b/docs/build/doctrees/environment.pickle differ diff --git a/docs/build/doctrees/generated/aaanalysis.AAclust.doctree b/docs/build/doctrees/generated/aaanalysis.AAclust.doctree index 706d8507..c5989ddb 100644 Binary files a/docs/build/doctrees/generated/aaanalysis.AAclust.doctree and b/docs/build/doctrees/generated/aaanalysis.AAclust.doctree differ diff --git a/docs/build/doctrees/generated/tutorial1_quick_start.doctree b/docs/build/doctrees/generated/tutorial1_quick_start.doctree index 81728585..47be535c 100644 Binary files a/docs/build/doctrees/generated/tutorial1_quick_start.doctree and b/docs/build/doctrees/generated/tutorial1_quick_start.doctree differ diff --git a/docs/build/html/_images/output_13_1.png b/docs/build/html/_images/output_13_1.png index d7e83427..4a4b3449 100644 Binary files a/docs/build/html/_images/output_13_1.png and b/docs/build/html/_images/output_13_1.png differ diff --git a/docs/build/html/_sources/generated/tutorial1_quick_start.rst.txt b/docs/build/html/_sources/generated/tutorial1_quick_start.rst.txt index 23e6ef58..419f2e19 100644 --- a/docs/build/html/_sources/generated/tutorial1_quick_start.rst.txt +++ b/docs/build/html/_sources/generated/tutorial1_quick_start.rst.txt @@ -34,9 +34,8 @@ available at your fingertips with the ``aa.load_scales()`` function. .. code:: ipython3 import aaanalysis as aa - # Load scales and scale categories (AAontology) + df_scales = aa.load_scales() - # Load training data df_seq = aa.load_dataset(name="DOM_GSEC", n=50) df_seq.head(5) @@ -156,11 +155,12 @@ set of 100 scales, as defined by the ``n_clusters`` parameters: from sklearn.cluster import AgglomerativeClustering import numpy as np - aac = aa.AAclust(model=AgglomerativeClustering, model_kwargs=dict(linkage="ward")) + + aac = aa.AAclust(model=AgglomerativeClustering) X = np.array(df_scales) - scales = aac.fit(X, n_clusters=100, names=list(df_scales)) + scales = aac.fit(X, names=list(df_scales), n_clusters=100) df_scales = df_scales[scales] - df_scales + df_scales[scales[0:4]].head(5) @@ -189,23 +189,6 @@ set of 100 scales, as defined by the ``n_clusters`` parameters: SIMZ760101 NAKH900106 AURR980112 - CORJ870107 - ROBB760113 - MIYS990104 - BIGC670101 - ROSG850102 - ZIMJ680105 - ... - YUTK870102 - SUEM840102 - VASM830102 - VELV850101 - VENT840101 - MONM990101 - GEOR030102 - GEOR030106 - KARS160120 - LINS030117 AA @@ -213,23 +196,6 @@ set of 100 scales, as defined by the ``n_clusters`` parameters: - - - - - - - - - - - - - - - - - @@ -239,23 +205,6 @@ set of 100 scales, as defined by the ``n_clusters`` parameters: 0.268 0.237 0.787 - 0.446 - 0.101 - 0.479 - 0.164 - 0.564 - 0.444 - ... - 0.557 - 0.103 - 0.617 - 0.295 - 0 - 0.077 - 0.250 - 0.516 - 0.952 - 0.186 C @@ -263,23 +212,6 @@ set of 100 scales, as defined by the ``n_clusters`` parameters: 0.258 0.303 0.104 - 0.725 - 0.849 - 0.000 - 0.323 - 1.000 - 0.000 - ... - 0.680 - 0.337 - 0.734 - 0.657 - 0 - 0.154 - 0.246 - 0.000 - 0.952 - 0.000 D @@ -287,23 +219,6 @@ set of 100 scales, as defined by the ``n_clusters`` parameters: 0.206 0.000 0.451 - 0.000 - 0.790 - 0.803 - 0.324 - 0.256 - 0.000 - ... - 0.574 - 0.909 - 0.225 - 1.000 - 0 - 0.923 - 0.091 - 0.404 - 0.952 - 0.186 E @@ -311,23 +226,6 @@ set of 100 scales, as defined by the ``n_clusters`` parameters: 0.210 0.090 0.823 - 0.233 - 0.092 - 0.859 - 0.488 - 0.256 - 0.025 - ... - 0.402 - 0.077 - 0.531 - 0.046 - 0 - 0.923 - 0.404 - 0.610 - 0.952 - 0.349 F @@ -335,387 +233,9 @@ set of 100 scales, as defined by the ``n_clusters`` parameters: 0.887 0.724 0.402 - 0.950 - 0.328 - 0.000 - 0.783 - 0.923 - 1.000 - ... - 0.680 - 0.233 - 0.023 - 0.749 - 1 - 0.000 - 0.536 - 0.712 - 0.952 - 0.326 - - - G - 0.025 - 0.032 - 0.259 - 0.055 - 0.352 - 1.000 - 0.662 - 0.000 - 0.513 - 0.175 - ... - 0.525 - 0.000 - 0.455 - 0.040 - 0 - 0.692 - 0.000 - 0.210 - 0.952 - 0.023 - - - H - 0.840 - 0.387 - 0.401 - 0.463 - 0.610 - 0.454 - 0.479 - 0.561 - 0.667 - 0.338 - ... - 0.754 - 0.000 - 0.345 - 0.191 - 0 - 0.923 - 0.201 - 0.612 - 0.562 - 0.419 - - - I - 0.000 - 0.990 - 0.697 - 0.512 - 0.969 - 0.151 - 0.056 - 0.663 - 0.923 - 0.894 - ... - 0.820 - 0.714 - 0.070 - 0.000 - 1 - 0.154 - 0.161 - 0.457 - 0.583 - 0.140 - - - K - 0.506 - 0.516 - 0.127 - 0.591 - 0.027 - 0.613 - 1.000 - 0.694 - 0.000 - 0.044 - ... - 0.615 - 0.012 - 0.688 - 0.294 - 0 - 0.923 - 0.195 - 0.536 - 0.912 - 1.000 - - - L - 0.272 - 0.835 - 0.905 - 0.732 - 1.000 - 0.076 - 0.014 - 0.663 - 0.846 - 0.925 - ... - 1.000 - 0.428 - 0.771 - 0.000 - 1 - 0.000 - 0.513 - 0.690 - 0.952 - 0.186 - - - M - 0.704 - 0.452 - 1.000 - 1.000 - 0.883 - 0.084 - 0.113 - 0.620 - 0.846 - 0.756 - ... - 0.689 - 0.701 - 0.512 - 0.651 - 0 - 0.077 - 0.151 - 0.670 - 0.952 - 0.372 - - - N - 0.988 - 0.029 - 0.381 - 0.287 - 0.171 - 0.924 - 0.718 - 0.398 - 0.282 - 0.162 - ... - 0.508 - 0.000 - 0.313 - 0.028 - 0 - 1.000 - 0.277 - 0.342 - 0.952 - 0.093 - - - P - 0.605 - 0.871 - 0.403 - 0.000 - 0.130 - 0.824 - 0.803 - 0.376 - 0.308 - 0.750 - ... - 0.566 - 0.545 - 0.937 - 0.157 - 0 - 1.000 - 1.000 - 1.000 - 0.952 - 0.698 - - - Q - 0.519 - 0.000 - 0.203 - 0.805 - 0.238 - 0.546 - 0.732 - 0.539 - 0.256 - 0.388 - ... - 0.697 - 0.428 - 0.446 - 0.602 - 0 - 0.923 - 0.478 - 0.530 - 0.952 - 0.256 - - - R - 0.531 - 0.268 - 0.061 - 0.738 - 0.482 - 0.748 - 0.634 - 0.735 - 0.308 - 0.112 - ... - 0.000 - 0.000 - 0.550 - 0.760 - 0 - 1.000 - 0.549 - 0.728 - 0.952 - 0.372 - - - S - 0.679 - 0.045 - 0.450 - 0.293 - 0.293 - 0.798 - 0.704 - 0.188 - 0.359 - 0.256 - ... - 0.656 - 0.000 - 0.868 - 0.657 - 0 - 0.231 - 0.168 - 0.399 - 0.952 - 0.186 - - - T - 0.494 - 0.174 - 0.619 - 0.360 - 0.279 - 0.529 - 0.577 - 0.352 - 0.462 - 0.419 - ... - 0.574 - 0.000 - 1.000 - 0.745 - 0 - 0.000 - 0.344 - 0.513 - 0.000 - 0.419 - - - V - 0.000 - 0.577 - 0.183 - 0.451 - 0.907 - 0.000 - 0.127 - 0.492 - 0.872 - 0.719 - ... - 0.770 - 0.000 - 0.408 - 0.045 - 1 - 0.077 - 0.151 - 0.467 - 0.952 - 0.163 - - - W - 0.926 - 1.000 - 0.707 - 0.805 - 0.500 - 0.773 - 0.070 - 1.000 - 0.846 - 0.894 - ... - 0.467 - 1.000 - 0.138 - 0.434 - 1 - 0.231 - 0.066 - 0.440 - 1.000 - 0.349 - - - Y - 0.802 - 0.990 - 0.425 - 0.524 - 0.771 - 0.798 - 0.127 - 0.806 - 0.615 - 0.762 - ... - 0.557 - 0.857 - 0.000 - 0.408 - 1 - 0.154 - 0.110 - 0.666 - 0.736 - 0.349 -

20 rows × 100 columns

@@ -731,15 +251,15 @@ sequences: the test set and the reference set. Supported by the C-terminal adjacent regions (JMD-N and JMD-C, respectively), obtained ``sf.get_df_parts``. - ``Splits``: These ``Parts`` can be split into various continuous segments or discontinuous patterns, specified -``sf.get_split_kws()``. - ``Scales``: Sets of amino acid scales. We -first use SequenceFeature to obtain Parts and Splits: +``sf.get_split_kws()``. - ``Scales``: Sets of amino acid scales. + +We use SequenceFeature to obtain Parts and Splits: .. code:: ipython3 - # Feature Engineering y = list(df_seq["label"]) sf = aa.SequenceFeature() - df_parts = sf.get_df_parts(df_seq=df_seq, jmd_n_len=10, jmd_c_len=10, list_parts=["tmd_jmd"]) + df_parts = sf.get_df_parts(df_seq=df_seq, list_parts=["tmd_jmd"]) split_kws = sf.get_split_kws(n_split_max=1, split_types=["Segment"]) df_parts.head(5) @@ -803,9 +323,9 @@ As a baseline approach, we use CPP to compute the average values for the .. code:: ipython3 - # Small set of features (100 features created) - cpp = aa.CPP(df_parts=df_parts, df_scales=df_scales, split_kws=split_kws, verbose=False) - df_feat = cpp.run(labels=y, tmd_len=20, jmd_n_len=10, jmd_c_len=10, n_filter=100) # Default values for lengths are used + # Small set of CPP features (100 features are created) + cpp = aa.CPP(df_scales=df_scales, df_parts=df_parts, split_kws=split_kws, verbose=False) + df_feat = cpp.run(labels=y) df_feat @@ -927,16 +447,16 @@ A feature matrix from a given set of CPP features can be created using from sklearn.ensemble import RandomForestClassifier from sklearn.model_selection import cross_val_score - X = sf.feat_matrix(df_parts=df_parts, df_scales=df_scales, features=df_feat["feature"]) - # ML evaluation + + X = sf.feat_matrix(df_parts=df_parts, features=df_feat["feature"]) rf = RandomForestClassifier() - cv_base = cross_val_score(rf, X, y, scoring="accuracy", cv=5, n_jobs=8) # Set n_jobs=1 to disable multi-processing + cv_base = cross_val_score(rf, X, y, scoring="accuracy") print(f"Mean accuracy of {round(np.mean(cv_base), 2)}") .. parsed-literal:: - Mean accuracy of 0.57 + Mean accuracy of 0.58 Creating more features with CPP will take some more time. but improve @@ -944,12 +464,11 @@ prediction performance: .. code:: ipython3 - # Default CPP features (around 100.000 features) - split_kws = sf.get_split_kws() - df_parts = sf.get_df_parts(df_seq=df_seq, jmd_n_len=10, jmd_c_len=10) - cpp = aa.CPP(df_parts=df_parts, df_scales=df_scales, split_kws=split_kws, verbose=False) - df_feat = cpp.run(labels=y, n_processes=8, n_filter=100) - df_feat + # CPP features with default splits (around 100.000 features) + df_parts = sf.get_df_parts(df_seq=df_seq) + cpp = aa.CPP(df_scales=df_scales, df_parts=df_parts, verbose=False) + df_feat = cpp.run(labels=y) + df_feat.head(10) @@ -1071,104 +590,87 @@ prediction performance: 32,33 - ... - ... - ... - ... - ... - ... - ... - ... - ... - ... - ... - ... - ... - ... + 5 + TMD_C_JMD_C-Segment(4,9)-ROBB760113 + Conformation + β-turn + β-turn + Information measure for loop (Robson-Suzuki, 1... + 0.337 + 0.319440 + -0.319440 + 0.175203 + 0.255754 + 6.100000e-09 + 1.185395e-06 + 27,28 - 95 - JMD_N_TMD_N-Pattern(C,6,9)-NAKH900106 - Composition - Mitochondrial proteins - Mitochondrial proteins - Normalized composition from animal (Nakashima ... - 0.228 - 0.172120 - -0.172120 - 0.180254 - 0.199987 - 8.754340e-05 - 2.693037e-04 - 12,15 + 6 + TMD_C_JMD_C-Segment(2,2)-EISD860102 + Energy + Isoelectric point + Atom-based hydrophobic moment + Atom-based hydrophobic moment (Eisenberg-McLac... + 0.337 + 0.139567 + 0.139567 + 0.098917 + 0.101842 + 6.300000e-09 + 1.185395e-06 + 31,32,33,34,35,36,37,38,39,40 - 96 - JMD_N_TMD_N-Pattern(C,6,9,12)-ZIMJ680105 - Others - PC 2 - Principal Component 1 (Zimmerman) - RF rank (Zimmerman et al., 1968) - 0.227 - 0.133867 - -0.133867 - 0.160532 - 0.161415 - 9.118090e-05 - 2.778863e-04 - 9,12,15 + 7 + TMD_C_JMD_C-Segment(4,5)-RICJ880113 + Conformation + α-helix (C-cap) + α-helix (C-terminal, inside) + Relative preference value at C2 (Richardson-Ri... + 0.336 + 0.223765 + 0.223765 + 0.133513 + 0.178217 + 7.100000e-09 + 1.185395e-06 + 33,34,35,36 - 97 - JMD_N_TMD_N-Segment(7,8)-KARS160107 + 8 + TMD_C_JMD_C-Segment(5,7)-KARS160107 Shape Side chain length Eccentricity (maximum) Diameter (maximum eccentricity) (Karkbara-Knis... - 0.227 - 0.098674 - -0.098674 - 0.104428 - 0.124875 - 8.945330e-05 - 2.740061e-04 - 16,17 + 0.331 + 0.217594 + 0.217594 + 0.136011 + 0.172395 + 1.130000e-08 + 1.331786e-06 + 32,33,34 - 98 - JMD_N_TMD_N-Pattern(C,6,9,12)-SIMZ760101 + 9 + TMD_C_JMD_C-Pattern(C,4,8)-JURD980101 Polarity Hydrophobicity - Transfer free energy (TFE) to outside - Transfer free energy (Simon, 1976), Cited by C... - 0.225 - 0.161307 - -0.161307 - 0.192235 - 0.212741 - 1.036749e-04 - 3.042894e-04 - 9,12,15 - - - 99 - JMD_N_TMD_N-Pattern(C,3,6)-TANS770102 - Conformation - α-helix (C-term, out) - α-helix (C-terminal, outside) - Normalized frequency of isolated helix (Tanaka... - 0.224 - 0.108020 - -0.108020 - 0.133731 - 0.139419 - 1.143783e-04 - 3.272494e-04 - 15,18 + Hydrophobicity + Modified Kyte-Doolittle hydrophobicity scale (... + 0.329 + 0.264720 + -0.264720 + 0.141666 + 0.233134 + 1.480000e-08 + 1.425259e-06 + 33,37 -

100 rows × 13 columns

@@ -1182,21 +684,23 @@ Which can be again used for machine learning: warnings.simplefilter(action='ignore', category=FutureWarning) import matplotlib.pyplot as plt import pandas as pd - X = sf.feat_matrix(df_parts=df_parts, df_scales=df_scales, features=df_feat["feature"]) - # ML evaluation + + X = sf.feat_matrix(df_parts=df_parts, features=df_feat["feature"]) rf = RandomForestClassifier() cv = cross_val_score(rf, X, y, scoring="accuracy", cv=5, n_jobs=1) print(f"Mean accuracy of {round(np.mean(cv), 2)}") + aa.plot_settings(font_scale=1.1) sns.barplot(pd.DataFrame({"Baseline": cv_base, "CPP": cv}), palette=["tab:blue", "tab:red"]) plt.ylabel("Mean accuracy", size=aa.plot_gcfs()+1) + plt.ylim(0, 1) sns.despine() plt.show() .. parsed-literal:: - Mean accuracy of 0.95 + Mean accuracy of 0.9 diff --git a/docs/build/html/generated/aaanalysis.AAclust.html b/docs/build/html/generated/aaanalysis.AAclust.html index 51764f6c..2ecc5c34 100644 --- a/docs/build/html/generated/aaanalysis.AAclust.html +++ b/docs/build/html/generated/aaanalysis.AAclust.html @@ -128,7 +128,7 @@

aaanalysis.AAclust

-class aaanalysis.AAclust(model=None, model_kwargs=None, verbose=False)[source]
+class aaanalysis.AAclust(model=None, model_kwargs=None, verbose=False)[source]

Bases: object

AAclust: A k-optimized clustering framework for selecting redundancy-reduced set of numerical scales.

AAclust is designed primarily for amino acid scales but is versatile enough for any set of numerical indices. @@ -224,7 +224,7 @@

aaanalysis.AAclust
-__init__(model=None, model_kwargs=None, verbose=False)[source]
+__init__(model=None, model_kwargs=None, verbose=False)[source]

Methods

@@ -255,7 +255,7 @@

aaanalysis.AAclust
-fit(X, names=None, on_center=True, min_th=0, merge_metric='euclidean', n_clusters=None)[source]
+fit(X, names=None, on_center=True, min_th=0, merge_metric='euclidean', n_clusters=None)[source]

Fit the AAclust model on the data, optimizing cluster formation using Pearson correlation.

AAclust determines the optimal number of clusters, k, without pre-specification. It partitions data(X) into clusters by maximizing the within-cluster Pearson correlation beyond the ‘min_th’ threshold. The quality of @@ -296,7 +296,7 @@

aaanalysis.AAclust
-cluster_naming(names=None, labels=None, name_unclassified='Unclassified')[source]
+cluster_naming(names=None, labels=None, name_unclassified='Unclassified')[source]

Assigns names to clusters based on scale names and their frequency.

This method renames clusters based on the names of the scales in each cluster, with priority given to the most frequent scales. If the name is already used or does not exist, it defaults to ‘name_unclassified’.

@@ -319,7 +319,7 @@

aaanalysis.AAclust
-static get_cluster_centers(X, labels=None)[source]
+static get_cluster_centers(X, labels=None)[source]

Computes the center of each cluster based on the given labels.

Parameters:
@@ -340,7 +340,7 @@

aaanalysis.AAclust
-static get_cluster_medoids(X, labels=None)[source]
+static get_cluster_medoids(X, labels=None)[source]

Computes the medoid of each cluster based on the given labels.

Parameters:
@@ -365,7 +365,7 @@

aaanalysis.AAclust
-static correlation(X_test, X_ref, labels_test=None, labels_ref=None, n=3, positive=True, on_center=False, except_unclassified=True)[source]
+static correlation(X_test, X_ref, labels_test=None, labels_ref=None, n=3, positive=True, on_center=False, except_unclassified=True)[source]

Computes the correlation of test data with reference cluster centers.

Parameters:
@@ -391,7 +391,7 @@

aaanalysis.AAclust
-eval()[source]
+eval()[source]

diff --git a/docs/build/html/generated/tutorial1_quick_start.html b/docs/build/html/generated/tutorial1_quick_start.html index a03664d5..7ef47c67 100644 --- a/docs/build/html/generated/tutorial1_quick_start.html +++ b/docs/build/html/generated/tutorial1_quick_start.html @@ -151,9 +151,8 @@

1. Loading Sequences and ScalesAAontology), are available at your fingertips with the aa.load_scales() function.

import aaanalysis as aa
-# Load scales and scale categories (AAontology)
+
 df_scales = aa.load_scales()
-# Load training data
 df_seq = aa.load_dataset(name="DOM_GSEC", n=50)
 df_seq.head(5)
 
@@ -260,11 +259,12 @@

AAclustn_clusters parameters:

from sklearn.cluster import AgglomerativeClustering
 import numpy as np
-aac = aa.AAclust(model=AgglomerativeClustering, model_kwargs=dict(linkage="ward"))
+
+aac = aa.AAclust(model=AgglomerativeClustering)
 X = np.array(df_scales)
-scales = aac.fit(X, n_clusters=100, names=list(df_scales))
+scales = aac.fit(X, names=list(df_scales), n_clusters=100)
 df_scales = df_scales[scales]
-df_scales
+df_scales[scales[0:4]].head(5)
 
@@ -289,23 +289,6 @@

AAclust

Comparative Physicochemical Profiling (CPP)

@@ -827,12 +347,11 @@

Comparative Physicochemical Profiling (CPP)sf.get_df_parts. - Splits: These Parts can be split into various continuous segments or discontinuous patterns, specified -sf.get_split_kws(). - Scales: Sets of amino acid scales. We -first use SequenceFeature to obtain Parts and Splits:

-
# Feature Engineering
-y = list(df_seq["label"])
+sf.get_split_kws(). - Scales: Sets of amino acid scales.

+

We use SequenceFeature to obtain Parts and Splits:

+
y = list(df_seq["label"])
 sf = aa.SequenceFeature()
-df_parts = sf.get_df_parts(df_seq=df_seq, jmd_n_len=10, jmd_c_len=10, list_parts=["tmd_jmd"])
+df_parts = sf.get_df_parts(df_seq=df_seq, list_parts=["tmd_jmd"])
 split_kws = sf.get_split_kws(n_split_max=1, split_types=["Segment"])
 df_parts.head(5)
 
@@ -885,9 +404,9 @@

Comparative Physicochemical Profiling (CPP)
# Small set of features (100 features created)
-cpp = aa.CPP(df_parts=df_parts, df_scales=df_scales, split_kws=split_kws, verbose=False)
-df_feat = cpp.run(labels=y, tmd_len=20, jmd_n_len=10, jmd_c_len=10, n_filter=100)  # Default values for lengths are used
+
# Small set of CPP features (100 features are created)
+cpp = aa.CPP(df_scales=df_scales, df_parts=df_parts, split_kws=split_kws, verbose=False)
+df_feat = cpp.run(labels=y)
 df_feat
 
@@ -999,24 +518,23 @@

3. Protein Predictionsf.feat_matrix and used for machine learning:

from sklearn.ensemble import RandomForestClassifier
 from sklearn.model_selection import cross_val_score
-X = sf.feat_matrix(df_parts=df_parts, df_scales=df_scales, features=df_feat["feature"])
-# ML evaluation
+
+X = sf.feat_matrix(df_parts=df_parts, features=df_feat["feature"])
 rf = RandomForestClassifier()
-cv_base = cross_val_score(rf, X, y, scoring="accuracy", cv=5, n_jobs=8) # Set n_jobs=1 to disable multi-processing
+cv_base = cross_val_score(rf, X, y, scoring="accuracy")
 print(f"Mean accuracy of {round(np.mean(cv_base), 2)}")
 
-
Mean accuracy of 0.57
+
Mean accuracy of 0.58
 

Creating more features with CPP will take some more time. but improve prediction performance:

-
# Default CPP features  (around 100.000 features)
-split_kws = sf.get_split_kws()
-df_parts = sf.get_df_parts(df_seq=df_seq, jmd_n_len=10, jmd_c_len=10)
-cpp = aa.CPP(df_parts=df_parts, df_scales=df_scales, split_kws=split_kws, verbose=False)
-df_feat = cpp.run(labels=y, n_processes=8, n_filter=100)
-df_feat
+
# CPP features with default splits (around 100.000 features)
+df_parts = sf.get_df_parts(df_seq=df_seq)
+cpp = aa.CPP(df_scales=df_scales, df_parts=df_parts, verbose=False)
+df_feat = cpp.run(labels=y)
+df_feat.head(10)
 
@@ -1134,123 +652,108 @@

3. Protein Prediction32,33 - ... - ... - ... - ... - ... - ... - ... - ... - ... - ... - ... - ... - ... - ... + 5 + TMD_C_JMD_C-Segment(4,9)-ROBB760113 + Conformation + β-turn + β-turn + Information measure for loop (Robson-Suzuki, 1... + 0.337 + 0.319440 + -0.319440 + 0.175203 + 0.255754 + 6.100000e-09 + 1.185395e-06 + 27,28 - 95 - JMD_N_TMD_N-Pattern(C,6,9)-NAKH900106 - Composition - Mitochondrial proteins - Mitochondrial proteins - Normalized composition from animal (Nakashima ... - 0.228 - 0.172120 - -0.172120 - 0.180254 - 0.199987 - 8.754340e-05 - 2.693037e-04 - 12,15 + 6 + TMD_C_JMD_C-Segment(2,2)-EISD860102 + Energy + Isoelectric point + Atom-based hydrophobic moment + Atom-based hydrophobic moment (Eisenberg-McLac... + 0.337 + 0.139567 + 0.139567 + 0.098917 + 0.101842 + 6.300000e-09 + 1.185395e-06 + 31,32,33,34,35,36,37,38,39,40 - 96 - JMD_N_TMD_N-Pattern(C,6,9,12)-ZIMJ680105 - Others - PC 2 - Principal Component 1 (Zimmerman) - RF rank (Zimmerman et al., 1968) - 0.227 - 0.133867 - -0.133867 - 0.160532 - 0.161415 - 9.118090e-05 - 2.778863e-04 - 9,12,15 + 7 + TMD_C_JMD_C-Segment(4,5)-RICJ880113 + Conformation + α-helix (C-cap) + α-helix (C-terminal, inside) + Relative preference value at C2 (Richardson-Ri... + 0.336 + 0.223765 + 0.223765 + 0.133513 + 0.178217 + 7.100000e-09 + 1.185395e-06 + 33,34,35,36 - 97 - JMD_N_TMD_N-Segment(7,8)-KARS160107 + 8 + TMD_C_JMD_C-Segment(5,7)-KARS160107 Shape Side chain length Eccentricity (maximum) Diameter (maximum eccentricity) (Karkbara-Knis... - 0.227 - 0.098674 - -0.098674 - 0.104428 - 0.124875 - 8.945330e-05 - 2.740061e-04 - 16,17 + 0.331 + 0.217594 + 0.217594 + 0.136011 + 0.172395 + 1.130000e-08 + 1.331786e-06 + 32,33,34 - 98 - JMD_N_TMD_N-Pattern(C,6,9,12)-SIMZ760101 + 9 + TMD_C_JMD_C-Pattern(C,4,8)-JURD980101 Polarity Hydrophobicity - Transfer free energy (TFE) to outside - Transfer free energy (Simon, 1976), Cited by C... - 0.225 - 0.161307 - -0.161307 - 0.192235 - 0.212741 - 1.036749e-04 - 3.042894e-04 - 9,12,15 - - - 99 - JMD_N_TMD_N-Pattern(C,3,6)-TANS770102 - Conformation - α-helix (C-term, out) - α-helix (C-terminal, outside) - Normalized frequency of isolated helix (Tanaka... - 0.224 - 0.108020 - -0.108020 - 0.133731 - 0.139419 - 1.143783e-04 - 3.272494e-04 - 15,18 + Hydrophobicity + Modified Kyte-Doolittle hydrophobicity scale (... + 0.329 + 0.264720 + -0.264720 + 0.141666 + 0.233134 + 1.480000e-08 + 1.425259e-06 + 33,37 -

100 rows × 13 columns

Which can be again used for machine learning:

import seaborn as sns
 import warnings
 warnings.simplefilter(action='ignore', category=FutureWarning)
 import matplotlib.pyplot as plt
 import pandas as pd
-X = sf.feat_matrix(df_parts=df_parts, df_scales=df_scales, features=df_feat["feature"])
-# ML evaluation
+
+X = sf.feat_matrix(df_parts=df_parts, features=df_feat["feature"])
 rf = RandomForestClassifier()
 cv = cross_val_score(rf, X, y, scoring="accuracy", cv=5, n_jobs=1)
 print(f"Mean accuracy of {round(np.mean(cv), 2)}")
+
 aa.plot_settings(font_scale=1.1)
 sns.barplot(pd.DataFrame({"Baseline": cv_base, "CPP": cv}), palette=["tab:blue", "tab:red"])
 plt.ylabel("Mean accuracy", size=aa.plot_gcfs()+1)
+plt.ylim(0, 1)
 sns.despine()
 plt.show()
 
-
Mean accuracy of 0.95
+
Mean accuracy of 0.9
 
../_images/output_13_1.png diff --git a/docs/build/html/searchindex.js b/docs/build/html/searchindex.js index 780cbe6e..b18eb997 100644 --- a/docs/build/html/searchindex.js +++ b/docs/build/html/searchindex.js @@ -1 +1 @@ -Search.setIndex({"docnames": ["api", "generated/aaanalysis.AAclust", "generated/aaanalysis.CPP", "generated/aaanalysis.CPPPlot", "generated/aaanalysis.SequenceFeature", "generated/aaanalysis.dPULearn", "generated/aaanalysis.load_dataset", "generated/aaanalysis.load_scales", "generated/aaanalysis.plot_gcfs", "generated/aaanalysis.plot_get_cdict", "generated/aaanalysis.plot_get_cmap", "generated/aaanalysis.plot_set_legend", "generated/aaanalysis.plot_settings", "generated/plotting_prelude", "generated/tutorial1_quick_start", "generated/tutorial2a_data_loader", "generated/tutorial2b_scales_loader", "index", "index/CONTRIBUTING_COPY", "index/badges", "index/citations", "index/introduction", "index/overview", "index/references", "index/tables", "index/usage_principles", "index/usage_principles/aaontology", "index/usage_principles/data_flow_entry_points", "index/usage_principles/feature_identification", "index/usage_principles/pu_learning", "index/usage_principles/xai", "tutorials"], "filenames": ["api.rst", "generated/aaanalysis.AAclust.rst", "generated/aaanalysis.CPP.rst", "generated/aaanalysis.CPPPlot.rst", "generated/aaanalysis.SequenceFeature.rst", "generated/aaanalysis.dPULearn.rst", "generated/aaanalysis.load_dataset.rst", "generated/aaanalysis.load_scales.rst", "generated/aaanalysis.plot_gcfs.rst", "generated/aaanalysis.plot_get_cdict.rst", "generated/aaanalysis.plot_get_cmap.rst", "generated/aaanalysis.plot_set_legend.rst", "generated/aaanalysis.plot_settings.rst", "generated/plotting_prelude.rst", "generated/tutorial1_quick_start.rst", "generated/tutorial2a_data_loader.rst", "generated/tutorial2b_scales_loader.rst", "index.rst", "index/CONTRIBUTING_COPY.rst", "index/badges.rst", "index/citations.rst", "index/introduction.rst", "index/overview.rst", "index/references.rst", "index/tables.rst", "index/usage_principles.rst", "index/usage_principles/aaontology.rst", "index/usage_principles/data_flow_entry_points.rst", "index/usage_principles/feature_identification.rst", "index/usage_principles/pu_learning.rst", "index/usage_principles/xai.rst", "tutorials.rst"], "titles": ["API", "aaanalysis.AAclust", "aaanalysis.CPP", "aaanalysis.CPPPlot", "aaanalysis.SequenceFeature", "aaanalysis.dPULearn", "aaanalysis.load_dataset", "aaanalysis.load_scales", "aaanalysis.plot_gcfs", "aaanalysis.plot_get_cdict", "aaanalysis.plot_get_cmap", "aaanalysis.plot_set_legend", "aaanalysis.plot_settings", "Plotting prelude", "Quick Start with AAanalysis", "Data Loading Tutorial", "Scale Loading Tutorial", "Welcome to the AAanalysis documentation!", "Contributing", "<no title>", "<no title>", "Introduction", "<no title>", "References", "Tables", "Usage Principles", "AAontology: Classification of amino acid scales", "Data Flow and Enry Points", "Identifying Physicochemical Signatures using CPP", "Learning from unbalanced and small data", "Explainable AI at Sequence Level", "Tutorials"], "terms": {"thi": [0, 1, 3, 7, 12, 13, 14, 15, 16, 18, 27], "applic": [0, 3], "program": [0, 18], "interfac": [0, 18, 24], "i": [0, 1, 2, 3, 4, 5, 6, 8, 10, 13, 14, 15, 16, 17, 18, 21, 22, 24, 26, 28], "public": [0, 13, 15, 17, 18, 20], "object": [0, 1, 3, 4, 5, 14], "function": [0, 3, 8, 10, 12, 13, 14, 15, 16, 17, 22], "our": [0, 13, 14, 16, 18, 21], "aaanalysi": [0, 15, 16, 18, 20, 21, 22, 24, 25, 28, 31], "python": [0, 14, 17, 18, 21, 22], "toolkit": [0, 18, 27], "which": [0, 1, 3, 4, 8, 14, 15, 16, 18, 21, 24, 27, 29], "can": [0, 1, 4, 5, 11, 14, 15, 16, 17, 18, 21, 24, 27, 29], "import": [0, 4, 5, 6, 7, 11, 12, 14, 15, 16, 18, 25], "aa": [0, 2, 4, 5, 6, 7, 11, 12, 14, 15, 16, 24, 25], "you": [0, 16, 17, 18, 20], "access": [0, 6, 14, 16, 24], "all": [0, 1, 2, 3, 4, 6, 7, 12, 14, 16, 18, 24], "method": [0, 1, 2, 3, 4, 5, 23], "via": [0, 18, 23], "alia": [0, 4], "load_dataset": [0, 4, 14, 15, 16, 24], "class": [1, 2, 3, 4, 5, 6, 15, 29], "model": [1, 5, 14, 18, 29], "none": [1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 15], "model_kwarg": [1, 14], "verbos": [1, 2, 3, 4, 5, 12, 14], "fals": [1, 2, 3, 4, 5, 6, 7, 10, 11, 12, 14, 16], "sourc": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 16, 18], "base": [1, 2, 3, 4, 5, 6, 10, 14, 17, 18, 21, 22, 23, 24, 28, 29], "A": [1, 4, 6, 11, 14, 15, 16, 18, 21, 23], "k": [1, 14, 16, 17, 21, 22, 23], "optim": [1, 2, 3, 17, 21, 22, 23], "cluster": [1, 14, 17, 21, 22, 23, 24], "framework": [1, 14, 17, 21, 22], "select": [1, 2, 3, 6, 7, 14, 15, 16, 17, 18, 21, 22, 23], "redund": [1, 2, 7, 14, 17, 21, 22, 23], "reduc": [1, 5, 7, 17, 21, 22, 23, 24], "set": [1, 2, 3, 4, 5, 7, 8, 11, 12, 14, 15, 17, 18, 21, 22, 23, 24, 27], "numer": [1, 3, 4, 14, 17, 21, 22], "scale": [1, 2, 3, 4, 7, 9, 10, 12, 17, 20, 21, 22, 23, 25, 27, 31], "design": [1, 3, 18, 24, 28], "primarili": [1, 5, 18], "amino": [1, 2, 3, 4, 6, 7, 14, 17, 20, 21, 22, 23, 25, 27, 29], "acid": [1, 2, 3, 4, 6, 7, 14, 17, 20, 21, 22, 23, 25, 27, 29], "versatil": 1, "enough": 1, "ani": [1, 16, 18, 21, 24], "indic": [1, 3, 4, 5, 15, 16, 24], "It": [1, 14, 15, 21, 24, 27], "take": [1, 14], "requir": 1, "pre": [1, 2, 14, 15, 18], "defin": [1, 4, 7, 14, 15, 18, 24, 27], "number": [1, 2, 3, 4, 5, 6, 10, 11, 15, 16, 24], "from": [1, 2, 3, 4, 5, 6, 7, 14, 15, 16, 17, 18, 24, 25], "scikit": [1, 18], "learn": [1, 5, 15, 17, 18, 20, 21, 22, 23, 24, 25], "http": [1, 18], "org": [1, 18], "stabl": 1, "modul": [1, 17], "html": [1, 18], "By": [1, 6], "leverag": 1, "pearson": [1, 2], "correl": [1, 2, 24], "similar": [1, 24, 29], "measur": [1, 14, 18, 24], "valu": [1, 2, 3, 4, 14, 16, 18, 21, 24], "one": [1, 3], "repres": [1, 3, 14, 15, 21, 24], "sampl": [1, 2, 3, 4, 5, 15, 24, 29], "term": [1, 14, 16, 24], "medoid": 1, "each": [1, 2, 3, 4, 5, 14, 15, 16, 18], "closest": 1, "": [1, 11, 14, 15, 16, 18, 23, 24], "center": [1, 10, 14, 24], "yield": 1, "paramet": [1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 12, 14, 15, 16, 24], "callabl": 1, "option": [1, 2, 3, 4, 5, 6, 7, 10, 12], "default": [1, 2, 3, 4, 5, 6, 9, 10, 11, 12, 14, 15, 16], "sklearn": [1, 14], "kmean": 1, "The": [1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 14, 15, 16, 18, 24, 27, 28], "emploi": [1, 5], "given": [1, 3, 4, 6, 14, 16, 24], "n_cluster": [1, 14], "dict": [1, 2, 3, 4, 5, 9, 10, 11, 14], "dictionari": [1, 2, 3, 4, 9, 10, 11], "keyword": [1, 3, 5], "argument": [1, 3, 4, 5, 11], "pass": [1, 3, 5, 11, 18], "bool": [1, 2, 3, 4, 5, 6, 7, 10, 11, 12], "flag": 1, "enabl": [1, 2, 3, 4, 5, 12, 17, 18, 21, 22, 28], "disabl": [1, 6, 14, 16], "output": [1, 4, 5, 12], "obtain": [1, 4, 7, 14, 24], "type": [1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 15, 18, 24], "int": [1, 2, 3, 4, 5, 6, 7, 10, 11], "labels_": [1, 5], "label": [1, 2, 3, 4, 5, 6, 11, 14, 15, 18, 24, 29], "order": [1, 18, 24], "featur": [1, 2, 3, 4, 5, 10, 17, 18, 21, 22, 27, 28, 29], "matrix": [1, 4, 5, 14, 24], "arrai": [1, 2, 4, 5, 14], "like": [1, 2, 4, 5, 18, 24], "centers_": 1, "averag": [1, 4, 14, 16, 24], "correspond": [1, 15, 18, 24], "center_labels_": 1, "medoids_": 1, "medoid_labels_": 1, "medoid_ind_": 1, "chosen": [1, 2, 4, 6, 7, 15], "within": [1, 2, 4, 18, 24, 27], "origin": [1, 16], "dataset": [1, 2, 6, 7, 14, 16, 17, 18, 21, 22, 29, 30], "__init__": [1, 2, 3, 4, 5], "fit": [1, 5, 14, 18], "x": [1, 3, 5, 6, 11, 12, 14], "name": [1, 2, 3, 4, 5, 6, 7, 9, 10, 12, 14, 15, 16, 24], "on_cent": 1, "true": [1, 2, 3, 4, 6, 7, 11, 12, 15, 16], "min_th": 1, "0": [1, 2, 3, 4, 5, 6, 11, 12, 14, 15, 16, 24, 29], "merge_metr": 1, "euclidean": [1, 5], "data": [1, 3, 5, 6, 7, 14, 16, 17, 18, 24, 25, 31], "format": [1, 12, 24], "us": [1, 2, 3, 5, 6, 7, 10, 12, 13, 14, 15, 16, 17, 18, 20, 21, 24, 25, 27, 29], "determin": [1, 7], "without": [1, 3, 18, 24], "specif": [1, 9, 15, 18, 24], "partit": [1, 16, 24], "maxim": 1, "beyond": 1, "threshold": [1, 2], "qualiti": 1, "either": [1, 4, 6, 7, 16, 17], "minimum": [1, 4, 6], "member": 1, "min_cor": 1, "between": [1, 2, 3, 4, 10, 11, 14, 15, 24], "its": [1, 15, 18, 24], "govern": 1, "undergo": 1, "three": [1, 4, 10, 13, 15, 24], "stage": 1, "1": [1, 2, 3, 4, 5, 6, 7, 11, 12, 15, 16, 24, 29], "estim": 1, "lower": [1, 24], "bound": 1, "2": [1, 2, 3, 4, 5, 11, 15, 16, 24, 29], "refin": 1, "metric": [1, 5, 18], "3": [1, 4, 5, 11, 15, 16, 18, 24], "merg": 1, "smaller": 1, "direct": [1, 18], "final": 1, "reduct": 1, "shape": [1, 2, 3, 4, 5, 11, 14, 24], "n_sampl": [1, 2, 4, 5], "n_featur": [1, 2, 3, 4, 5], "where": [1, 4, 5, 24], "list": [1, 3, 4, 10, 11, 14, 24], "str": [1, 3, 4, 5, 6, 7, 9, 10, 11, 12], "If": [1, 2, 3, 4, 5, 6, 7, 10, 12, 16, 17, 18, 20, 29], "provid": [1, 2, 3, 5, 6, 7, 10, 14, 15, 16, 17, 18, 22, 24, 29], "return": [1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 15], "appli": [1, 5, 10, 11, 12, 15], "otherwis": [1, 3, 4, 5, 24], "float": [1, 2, 3, 5, 10, 11, 12], "instead": 1, "names_medoid": 1, "follow": [1, 2, 4, 5, 7, 13, 17, 18, 20, 21, 22, 25], "attribut": 1, "attr": 1, "For": [1, 4, 6, 11, 15, 18, 29], "further": [1, 3, 16, 18, 24], "inform": [1, 2, 3, 4, 5, 14, 16, 27], "refer": [1, 2, 4, 6, 14, 18, 24], "paper": 1, "todo": [1, 2], "add": [1, 2, 3, 4], "link": [1, 2, 17, 18, 20, 23], "cluster_nam": 1, "name_unclassifi": 1, "unclassifi": [1, 7, 14, 16, 24], "assign": [1, 3, 4, 5, 16, 24], "frequenc": [1, 14, 24], "renam": 1, "prioriti": 1, "most": [1, 2, 3, 5, 14, 17, 21, 22], "frequent": 1, "alreadi": [1, 29], "doe": 1, "exist": [1, 18, 29], "cannot": 1, "classifi": [1, 3], "static": [1, 2, 4], "get_cluster_cent": 1, "comput": [1, 2, 3, 4, 14, 18, 23, 24], "center_label": 1, "associ": [1, 24], "get_cluster_medoid": 1, "medoid_label": 1, "medoid_ind": 1, "index": [1, 6, 16, 17, 18, 23], "x_test": 1, "x_ref": 1, "labels_test": 1, "labels_ref": 1, "n": [1, 2, 3, 4, 6, 7, 14, 15, 16, 18, 23, 24], "posit": [1, 2, 3, 4, 5, 6, 10, 14, 17, 21, 22, 24, 29], "except_unclassifi": 1, "test": [1, 2, 14, 16], "top": [1, 7, 24], "consid": [1, 7, 18], "strength": 1, "els": 1, "neg": [1, 4, 5, 6, 10, 15, 24, 29], "exclud": [1, 16], "list_top_center_name_corr": 1, "have": [1, 14, 15, 16, 18, 24, 29], "strongest": 1, "eval": [1, 2, 5, 18], "df_scale": [2, 4, 7, 14, 16, 27], "df_cat": [2, 3, 4, 7, 16, 27], "df_part": [2, 4, 14, 27], "split_kw": [2, 4, 14, 27], "accept_gap": [2, 3, 4], "tool": [2, 18, 23], "creat": [2, 3, 4, 5, 14, 18, 27], "filter": [2, 3, 6, 14, 15], "ar": [2, 3, 4, 5, 6, 7, 14, 15, 16, 18, 24, 27, 29, 30], "discrimin": [2, 3, 14], "two": [2, 3, 7, 14, 16, 17, 18, 21, 22, 23, 24, 26, 27], "sequenc": [2, 3, 4, 5, 6, 15, 17, 18, 21, 22, 23, 24, 25, 27, 28, 29], "panda": [2, 3, 4, 5, 6, 7, 14, 18], "datafram": [2, 3, 4, 5, 6, 7, 14, 18, 27], "load_categori": [2, 4], "categori": [2, 3, 4, 7, 9, 10, 11, 14, 15, 16], "physicochem": [2, 4, 17, 21, 22, 23, 24, 25, 27], "part": [2, 3, 4, 14, 18, 27], "sequencefeatur": [2, 14], "get_split_kw": [2, 4, 14], "nest": [2, 4], "split_typ": [2, 4, 14], "whether": [2, 3, 4, 10, 11], "accept": [2, 3, 4], "miss": [2, 3, 4], "omit": [2, 3, 4], "print": [2, 3, 4, 14], "progress": [2, 3, 23], "about": [2, 3], "algorithm": [2, 3, 14, 17, 18, 21, 22, 27, 28], "run": [2, 4, 14], "perform": [2, 5, 14, 16, 24], "step": [2, 3, 4, 6, 7, 18, 21], "parametr": 2, "n_filter": [2, 14], "100": [2, 6, 10, 14, 15], "tmd_len": [2, 3, 4, 14], "20": [2, 3, 4, 7, 14, 15, 16, 18, 24], "jmd_n_len": [2, 3, 4, 14], "10": [2, 3, 4, 10, 14, 15, 16, 24], "jmd_c_len": [2, 3, 4, 14], "ext_len": [2, 3, 4], "4": [2, 3, 4, 15, 16, 24], "start": [2, 3, 4, 6, 18, 24, 25, 27], "check_cat": 2, "n_pre_filt": 2, "pct_pre_filt": 2, "5": [2, 3, 4, 5, 11, 14, 15, 16, 24], "max_std_test": 2, "max_overlap": 2, "max_cor": 2, "n_process": [2, 14], "pipelin": [2, 18], "creation": 2, "aim": [2, 3, 14, 18], "identifi": [2, 3, 5, 6, 14, 15, 17, 21, 22, 23, 25, 29], "collect": [2, 7], "non": [2, 4, 6, 14, 24], "group": [2, 3, 4, 24], "t": [2, 6, 14, 16, 24], "u": [2, 17, 18], "p": [2, 14, 16, 23], "percentag": [2, 5, 10, 16], "length": [2, 3, 4, 6, 14, 15, 24], "tmd": [2, 3, 4, 6, 14, 15], "explan": [2, 3, 18], "first": [2, 3, 4, 7, 10, 14, 18], "terminu": [2, 3, 4, 24], "jmd": [2, 3, 4, 14], "c": [2, 3, 4, 14, 15, 16, 17, 23, 24], "extend": [2, 3, 4, 18, 24, 29], "termin": [2, 3, 4, 14, 15, 24], "should": [2, 3, 4, 5, 18, 29], "longer": 2, "than": [2, 24], "check": [2, 18], "remain": [2, 16, 18], "after": [2, 24], "maximum": [2, 4, 5, 6, 14], "standard": [2, 29], "deviat": 2, "overlap": 2, "cpu": 2, "multiprocess": 2, "automat": [2, 3, 5, 18], "df_feat": [2, 3, 4, 14, 27], "uniqu": [2, 3, 16], "statist": [2, 3], "n_feature_inform": [2, 3], "contain": [2, 3, 5, 6, 7, 16, 18, 24, 27, 29], "eleven": 2, "column": [2, 3, 4, 5, 6, 7, 11, 14, 15, 16, 18], "includ": [2, 4, 6, 7, 10, 11, 18], "id": [2, 4, 6, 7, 16], "result": 2, "rank": [2, 14, 16], "11": [2, 3, 11, 14, 15, 24], "split": [2, 4, 14, 27], "subcategori": [2, 3, 7, 14, 16], "sub": 2, "scale_nam": [2, 3, 7, 14, 16], "abs_auc": [2, 3, 14], "absolut": [2, 18], "adjust": [2, 3, 12], "auc": 2, "abs_mean_dif": [2, 14], "mean": [2, 3, 14, 16, 24], "differ": [2, 3, 4, 11, 15, 16, 27], "std_test": [2, 3, 14], "std_ref": [2, 14], "p_val": 2, "mann_whitnei": 2, "ttest_indep": 2, "p_val_fdr_bh": [2, 14], "benjamini": 2, "hochberg": 2, "fdr": 2, "correct": 2, "get": [2, 4, 8, 25], "evalu": [2, 7, 14, 16, 18, 24], "condit": [3, 4], "jmd_m_len": [3, 4], "profil": [3, 9, 10, 17, 21, 22, 28], "y": [3, 11, 12, 14, 16], "val_col": 3, "mean_dif": [3, 14], "val_typ": 3, "count": [3, 15], "normal": [3, 7, 11, 14, 16, 24], "figsiz": 3, "7": [3, 4, 5, 12, 14, 15, 16, 24], "titl": [3, 11], "title_kw": 3, "dict_color": [3, 9, 10, 11], "edge_color": 3, "bar_width": 3, "75": 3, "add_jmd_tmd": 3, "jmd_n_seq": 3, "tmd_seq": 3, "jmd_c_seq": 3, "tmd_color": 3, "mediumspringgreen": 3, "jmd_color": 3, "blue": [3, 11, 14], "tmd_seq_color": 3, "black": [3, 18], "jmd_seq_color": 3, "white": 3, "seq_siz": 3, "tmd_jmd_fontsiz": 3, "xtick_siz": 3, "xtick_width": 3, "xtick_length": 3, "xticks_po": 3, "ytick_siz": 3, "ytick_width": 3, "ytick_length": 3, "ylim": 3, "highlight_tmd_area": 3, "highlight_alpha": 3, "15": [3, 4, 14, 15, 24], "grid": [3, 12], "grid_axi": [3, 12], "both": [3, 12, 15], "add_legend_cat": 3, "legend_kw": 3, "shap_plot": 3, "kwarg": [3, 4, 11], "plot": [3, 9, 10, 11, 12, 15, 17, 18, 24, 31], "instanc": 3, "avail": [3, 7, 14, 16, 17, 20, 23], "specifi": [3, 4, 5, 9, 10, 12, 14, 18], "check_value_typ": 3, "tupl": [3, 10], "size": [3, 4, 8, 10, 11, 12, 14, 24], "custom": [3, 7, 11, 12], "appear": [3, 12, 24], "map": [3, 4, 10, 11], "color": [3, 9, 10, 11], "edg": [3, 11, 18, 24], "bar": [3, 9, 10], "width": [3, 11], "line": [3, 11, 13], "annot": 3, "font": [3, 8, 11, 12], "tick": [3, 12], "axi": [3, 12, 16], "limit": 3, "highlight": 3, "area": [3, 14, 16, 24], "alpha": [3, 14], "ad": 3, "drawn": 3, "legend": [3, 11], "shap": [3, 10, 14, 18], "shaplei": 3, "addit": [3, 4, 5, 7, 11, 12, 16, 18, 24], "gener": [3, 4, 6, 10, 12, 18, 21, 23, 24, 29], "other": [3, 7, 14, 16, 18, 24], "intern": [3, 24], "librari": [3, 12, 18], "ax": [3, 11], "matplotlib": [3, 11, 12, 14, 15, 18], "heatmap": [3, 9, 10], "8": [3, 4, 5, 14, 15, 16, 18, 24], "vmin": 3, "vmax": 3, "grid_on": 3, "cmap": [3, 9, 10], "rdbu_r": 3, "cmap_n_color": 3, "cbar_kw": 3, "facecolor_dark": [3, 10], "add_importance_map": 3, "cbar_pct": 3, "featuremap": 3, "versu": 3, "wrapper": [3, 14, 17, 18, 21, 22], "seaborn": [3, 10, 12, 14, 15, 18], "level": [3, 6, 7, 15, 16, 17, 18, 22, 24, 25, 26], "e": [3, 4, 9, 10, 12, 14, 16, 17, 18, 21, 22, 24, 29], "g": [3, 4, 9, 10, 12, 14, 16, 17, 18, 21, 22, 24, 29], "protein": [3, 4, 6, 16, 17, 18, 21, 22, 23, 27, 28, 29], "shown": 3, "feat_impact": 3, "displai": 3, "sum": [3, 16, 24], "std": 3, "aggreg": 3, "positions_onli": 3, "across": [3, 16, 18], "recommend": [3, 5, 7, 18], "when": [3, 5, 18, 24], "emphas": [3, 18], "fewer": 3, "value_typ": 3, "height": 3, "figur": 3, "inch": 3, "pyplot": [3, 11, 14, 15], "anchor": [3, 11, 24], "colormap": 3, "infer": [3, 18], "seismic": 3, "space": [3, 5, 10, 11], "impact": 3, "discret": 3, "diverg": 3, "sequenti": 3, "kei": [3, 18, 24], "colorbar": 3, "under": [3, 7, 18], "depicet": 3, "depict": 3, "jmd_n": [3, 4, 6, 14, 15], "jmd_c": [3, 4, 6, 14, 15], "point": [3, 11, 14, 24, 25], "set_xticklabel": 3, "widht": 3, "tick_param": 3, "classif": [3, 6, 7, 14, 15, 16, 17, 22, 24, 25, 29], "pcolormesh": 3, "effect": [3, 18, 24, 29], "onli": [3, 6, 7, 15, 18, 24, 29], "align": [3, 11, 14, 16], "see": [3, 18, 21, 24, 27], "document": [3, 24], "more": [3, 14, 18], "detail": [3, 6, 7, 11, 16, 17, 18, 20], "cpp": [3, 4, 10, 17, 20, 21, 22, 25, 27], "code": [3, 10, 13], "update_seq_s": 3, "retriev": [4, 9, 10, 14], "compon": [4, 5, 7, 14, 16, 24], "continu": [4, 14], "subset": [4, 7, 24], "domain": [4, 6, 14, 15, 24], "transmembran": [4, 24], "membran": [4, 24], "principl": [4, 17], "distinct": [4, 17, 18, 21, 22, 24], "segment": [4, 14, 27], "pattern": [4, 14], "properti": [4, 24], "express": 4, "present": [4, 6], "realiz": 4, "over": [4, 14], "valid": [4, 18], "tmd_e": 4, "tmd_n": 4, "tmd_c": 4, "ext_c": 4, "ext_n": 4, "tmd_jmd": [4, 14], "jmd_n_tmd_n": [4, 14], "tmd_c_jmd_c": [4, 14], "ext_n_tmd_n": 4, "tmd_c_ext_c": 4, "get_df_part": [4, 14], "df_seq": [4, 5, 6, 14, 15, 27], "list_part": [4, 14], "all_part": 4, "datafran": 4, "compris": [4, 16], "tmd_start": [4, 6, 14, 15], "tmd_stop": [4, 6, 14, 15], "string": [4, 10], "len": [4, 15], "must": 4, "lenght": 4, "resp": [4, 24], "extra": [4, 13, 24], "possibl": [4, 15, 24, 29], "found": [4, 18], "sf": [4, 14], "dom_gsec": [4, 14, 15, 24], "n_split_min": 4, "n_split_max": [4, 14], "steps_pattern": 4, "n_min": 4, "n_max": 4, "len_max": 4, "steps_periodicpattern": 4, "periodicpattern": 4, "greater": 4, "greatest": 4, "whole": [4, 6, 14, 16], "specfii": 4, "smallest": [4, 24], "integ": 4, "6": [4, 14, 15, 16, 24], "vari": [4, 15], "paramt": 4, "argumetn": 4, "get_featur": 4, "load_scal": [4, 14, 16, 17, 22, 24], "combin": [4, 14, 18, 24], "form": [4, 24], "feat_matrix": [4, 14], "n_job": [4, 14], "return_label": 4, "pd": [4, 5, 14, 18], "seri": 4, "job": 4, "parallel": [4, 24], "spars": 4, "feat_nam": 4, "convert": 4, "depend": [4, 24], "last": 4, "step1": 4, "step2": 4, "add_feat_valu": 4, "dict_scal": 4, "convent": [4, 7], "letter": 4, "feature_valu": 4, "n_part": 4, "ha": [4, 18, 24], "structur": [4, 14, 23, 24], "th": [4, 7, 16], "n_split": 4, "p1": 4, "p2": 4, "pn": 4, "end": [4, 24], "odd": [4, 15], "even": 4, "give": 4, "add_dif": 4, "sample_nam": 4, "ref_group": 4, "add_posit": 4, "part_split": 4, "feat_posit": 4, "total": [4, 5, 14, 16, 24], "n_compon": 5, "pca_kwarg": 5, "determinist": [5, 17, 21, 22], "unlabel": [5, 17, 21, 22, 24, 29], "offer": [5, 15, 18], "approach": [5, 14, 15, 29], "pu": [5, 17, 21, 22, 24], "princip": [5, 7, 14, 16, 24], "analysi": [5, 7, 14, 16, 17, 18, 21, 22, 24], "pca": [5, 16], "dimension": [5, 23], "pc": [5, 7, 14, 24], "iter": 5, "reliabl": [5, 15, 18], "These": [5, 7, 14, 16, 18, 29], "those": [5, 24], "distant": 5, "altern": [5, 29], "also": [5, 15, 16, 18, 24], "distanc": [5, 24], "manhattan": 5, "cosin": 5, "80": 5, "cover": 5, "varianc": 5, "identif": [5, 23], "datapoint": 5, "inspir": [5, 18], "techniqu": [5, 29], "an": [5, 6, 7, 14, 15, 16, 17, 18, 20, 23, 24], "theoret": [5, 24], "high": [5, 23, 24], "n_neg": 5, "label_po": 5, "name_neg": 5, "rel_neg": 5, "col_class": 5, "newli": 5, "updat": [5, 18], "new": [5, 18], "store": 5, "Will": 5, "dure": 5, "initi": [5, 24], "small": [5, 14, 15, 17, 18, 21, 22, 25, 30], "datafor": 5, "conta": 5, "po": 5, "unl": 5, "numpi": [5, 14, 18], "np": [5, 14], "atgc": 5, "gcta": 5, "actg": 5, "tacg": 5, "mode": 5, "modifi": [5, 12], "dpul": 5, "info": 6, "random": [6, 15, 24], "non_canonical_aa": 6, "remov": [6, 12], "min_len": [6, 15], "max_len": [6, 15], "aa_window_s": [6, 15], "9": [6, 14, 15, 16, 18, 24], "load": [6, 7, 17, 18, 22, 31], "benchmark": [6, 14, 16, 17, 22], "categor": [6, 15], "dom": [6, 15, 24], "seq": [6, 15, 24], "overview": [6, 7, 13, 15, 18], "tabl": [6, 7, 15, 18], "depth": [6, 7, 16, 17, 22], "breimann23a": [6, 7, 23, 24], "per": [6, 15, 24], "randomli": [6, 15], "liter": 6, "keep": 6, "gap": [6, 10], "handl": [6, 11], "canon": [6, 16], "don": 6, "replac": 6, "symbol": 6, "window": [6, 14, 24], "aa_": 6, "df_info": [6, 15], "entri": [6, 14, 15, 16], "uniprot": 6, "binari": [6, 14, 15, 29], "stop": 6, "respect": [6, 9, 10, 14, 17, 18, 20, 24], "seq_amylo": [6, 15, 16, 24], "guid": [6, 7, 18], "tutori": [6, 7, 14, 17, 18, 21], "just_aaindex": [7, 16], "unclassified_in": [7, 16], "top60_n": [7, 16], "aaontologi": [7, 14, 17, 20, 22, 23, 25], "scales_raw": [7, 16, 24], "encompass": [7, 24], "aaindex": [7, 14, 16, 23], "kawashima08": [7, 23, 24], "along": [7, 14], "were": [7, 16, 24], "min": [7, 16, 24], "max": [7, 16, 24], "organ": [7, 18], "call": [7, 24], "scales_cat": [7, 16, 24], "breimann23b": [7, 17, 20, 23, 24], "compress": [7, 16, 24], "scales_pc": [7, 16, 24], "aaclust": [7, 16, 17, 20, 21, 22, 23, 24], "60": [7, 16, 24], "top60": [7, 16, 24], "individu": 7, "accompani": 7, "top60_ev": [7, 16, 24], "relev": 7, "inclus": [7, 18], "suffix": [7, 15, 18], "scale_id": [7, 16], "same": [7, 16], "deriv": 7, "descript": [7, 16, 18, 24], "scale_descript": [7, 14, 16], "current": 8, "ut": 8, "plot_set": [8, 14, 15], "dict_scale_cat": [9, 10], "cppplot": [9, 10, 18], "n_color": 10, "color_po": 10, "color_neg": 10, "color_cent": 10, "input": [10, 18, 27], "hex": 10, "pct_gap": 10, "pct_center": 10, "palett": [10, 14], "feat": 10, "ggplot": 10, "datagroup": 10, "dark": 10, "face": [10, 15], "rgb": 10, "hl": 10, "husl": 10, "xkcd": 10, "interpret": [10, 14, 17, 18, 20, 21, 22, 23, 24, 28], "latter": 10, "rang": 10, "sn": [10, 14, 15], "color_palett": 10, "light_palett": 10, "lighter": 10, "list_cat": 11, "ncol": 11, "fontsiz": 11, "weight": [11, 14, 23, 24], "lw": 11, "edgecolor": 11, "return_handl": 11, "loc": [11, 16], "upper": 11, "left": [11, 14, 24], "labelspac": 11, "columnspac": 11, "fontsize_legend": 11, "title_align_left": 11, "fontsize_weight": 11, "customiz": 11, "attach": 11, "item": 11, "coordin": 11, "text": [11, 12], "locat": [11, 24], "vertic": 11, "horizont": 11, "marker": 11, "directli": [11, 18], "finer": 11, "control": 11, "how": [11, 14], "line2d": 11, "cat1": 11, "red": [11, 14], "cat2": 11, "o": 11, "fig_format": 12, "pdf": 12, "font_scal": [12, 14, 15], "arial": 12, "change_s": 12, "weight_bold": 12, "adjust_el": 12, "short_tick": 12, "no_tick": 12, "no_ticks_i": 12, "short_ticks_i": 12, "no_ticks_x": 12, "short_ticks_x": 12, "configur": 12, "visual": [12, 13, 18], "variou": [12, 14, 18, 24, 27], "file": [12, 18], "save": 12, "make": [12, 13, 14, 15, 18], "visibl": 12, "choos": 12, "san": 12, "serif": 12, "verdana": 12, "helvetica": 12, "dejavu": 12, "element": 12, "bold": 12, "layout": 12, "short": [12, 13], "mark": 12, "global": 12, "util": [13, 15, 17, 18], "readi": [13, 15], "view": [13, 18, 29], "dive": 14, "power": 14, "capabl": [14, 24], "dedic": 14, "free": [14, 16, 24], "In": [14, 15, 29], "gamma": [14, 24], "secretas": [14, 23, 24], "substrat": [14, 23, 24], "exampl": [14, 15, 18, 21, 29], "we": [14, 15, 18], "ll": 14, "focu": [14, 18], "extract": 14, "thei": [14, 15, 18], "har": 14, "task": [14, 18, 29], "easili": [14, 15, 18], "essenti": [14, 15, 18], "randomforest": 14, "With": 14, "\u03b3": [14, 23], "hand": [14, 24], "effortlessli": 14, "furthermor": 14, "predominantli": 14, "hierarch": 14, "known": 14, "your": [14, 17, 18, 20], "fingertip": 14, "train": [14, 17, 18, 21, 22, 29], "50": [14, 15], "head": [14, 15, 16], "q14802": 14, "mqkvtlgllvflagfpvldandledknspfyydwhslqvgglicag": 14, "37": 14, "59": 14, "nspfyydwh": 14, "lqvgglicagvlcamgiiivmsa": 14, "kckckfgqk": 14, "q86ue4": 14, "maarswqdelaqqaeegsarlremlsvglgflrtelgldlglepkr": 14, "72": 14, "lglepkrypg": 14, "wvilvgtgalgllllfllgygwa": 14, "aacagarkkr": 14, "q969w9": 14, "mhrlmgvnstaaaaagqpnvsctcnckrslfqsmeitelefvqiii": 14, "41": 14, "63": [14, 15, 24], "fqsmeitel": 14, "fvqiiiivvvmmvmvvvitcl": 14, "hyklsarsfi": 14, "p53801": 14, "mapgvargptpywrlrlggaalllllipvaaaqeppgaacsqntnk": 14, "97": 14, "119": [14, 16], "rwgvcwvnfe": 14, "aliitmsvvggtlllgiaicccc": 14, "ccrrkrsrkp": 14, "q8iuw5": 14, "mapralpgsavlaaavfvggavssplvapdngssrtlhsrtettp": 14, "81": 14, "ndtgnghpei": 14, "iayalvpvffimglfgvlichl": 14, "kkkgyrctt": 14, "centerpiec": 14, "support": [14, 18, 24], "sinc": 14, "problem": 14, "machin": [14, 17, 18, 20, 23, 29], "lightweight": 14, "agglom": 14, "close": [14, 18], "agglomerativeclust": 14, "aac": 14, "linkag": 14, "ward": 14, "andn920101": [14, 16], "simz760101": 14, "nakh900106": 14, "aurr980112": 14, "corj870107": 14, "robb760113": 14, "miys990104": 14, "bigc670101": [14, 16], "rosg850102": 14, "zimj680105": 14, "yutk870102": 14, "suem840102": 14, "vasm830102": 14, "velv850101": 14, "vent840101": 14, "monm990101": 14, "geor030102": 14, "geor030106": 14, "kars160120": [14, 16], "lins030117": 14, "494": [14, 16], "268": 14, "237": 14, "787": [14, 16], "446": 14, "101": 14, "479": 14, "164": [14, 16], "564": 14, "444": [14, 16], "557": [14, 16], "103": 14, "617": [14, 16], "295": 14, "077": [14, 16], "250": [14, 16], "516": 14, "952": [14, 16], "186": [14, 16], "864": [14, 16], "258": 14, "303": 14, "104": [14, 16], "725": [14, 16], "849": 14, "000": [14, 16], "323": [14, 16], "680": [14, 16], "337": 14, "734": [14, 16], "657": [14, 16], "154": [14, 16], "246": [14, 16], "d": [14, 16], "206": [14, 16], "451": 14, "790": [14, 15, 16, 24], "803": [14, 16], "324": [14, 15, 16], "256": [14, 16], "574": [14, 16], "909": [14, 16], "225": 14, "923": [14, 16], "091": [14, 16], "404": [14, 16], "420": [14, 16], "210": 14, "090": 14, "823": [14, 16], "233": [14, 15, 16, 24], "092": 14, "859": [14, 16], "488": [14, 16], "025": [14, 16], "402": [14, 16], "531": [14, 16], "046": [14, 16], "610": [14, 16], "349": 14, "f": [14, 16], "877": [14, 16], "887": [14, 16], "724": 14, "950": 14, "328": 14, "783": [14, 16], "023": [14, 16], "749": [14, 16], "536": 14, "712": 14, "326": [14, 16], "032": [14, 16], "259": [14, 16], "055": 14, "352": [14, 16], "662": [14, 16], "513": 14, "175": 14, "525": 14, "455": [14, 16], "040": [14, 16], "692": [14, 15, 16], "h": [14, 16], "840": [14, 16], "387": [14, 15, 16], "401": 14, "463": [14, 16], "454": 14, "561": [14, 16], "667": [14, 16], "338": 14, "754": 14, "345": [14, 16], "191": [14, 16], "201": 14, "612": [14, 16], "562": [14, 16], "419": [14, 16], "990": 14, "697": [14, 16], "512": [14, 16], "969": 14, "151": 14, "056": 14, "663": [14, 16], "894": [14, 16], "820": 14, "714": [14, 16], "070": [14, 16], "161": 14, "457": [14, 16], "583": [14, 16], "140": [14, 16], "506": [14, 15, 16], "127": 14, "591": 14, "027": 14, "613": [14, 16], "694": [14, 15, 16, 24], "044": [14, 16], "615": [14, 16], "012": 14, "688": [14, 16], "294": 14, "195": [14, 16], "912": [14, 16], "l": [14, 16], "272": [14, 16], "835": [14, 16], "905": [14, 16], "732": [14, 16], "076": [14, 16], "014": 14, "846": 14, "925": 14, "428": 14, "771": [14, 16], "690": [14, 15, 16], "m": [14, 16], "704": [14, 16], "452": 14, "883": [14, 16], "084": 14, "113": 14, "620": [14, 16], "756": [14, 16], "689": [14, 15, 16], "701": [14, 15, 16], "651": [14, 16], "670": [14, 16], "372": [14, 16], "988": [14, 16], "029": [14, 16], "381": [14, 16], "287": [14, 15], "171": 14, "924": 14, "718": [14, 16], "398": [14, 16], "282": [14, 16], "162": 14, "508": 14, "313": [14, 16], "028": [14, 16], "277": 14, "342": [14, 15, 24], "093": [14, 16], "605": [14, 16], "871": [14, 16], "403": 14, "130": 14, "824": [14, 16], "376": [14, 16], "308": [14, 16], "750": [14, 16], "566": [14, 16], "545": [14, 16], "937": 14, "157": 14, "698": [14, 16], "q": [14, 16], "519": [14, 16], "203": [14, 16], "805": 14, "238": [14, 16], "546": [14, 16], "539": [14, 16], "388": [14, 16], "602": [14, 15, 16], "478": 14, "530": 14, "r": [14, 16, 24], "061": 14, "738": [14, 15, 16], "482": 14, "748": [14, 16], "634": [14, 16], "735": [14, 16], "112": [14, 16], "550": [14, 16], "760": [14, 15, 16], "549": 14, "728": [14, 16], "679": [14, 16], "045": 14, "450": [14, 16], "293": [14, 16], "798": [14, 16], "188": [14, 16], "359": 14, "656": [14, 16], "868": [14, 15, 16], "231": [14, 16], "168": 14, "399": [14, 16], "174": [14, 16], "619": [14, 16], "360": 14, "279": [14, 16], "529": [14, 16], "577": [14, 16], "462": [14, 16], "745": [14, 16], "344": 14, "v": [14, 16, 24], "183": [14, 16], "907": [14, 16], "492": [14, 16], "872": [14, 16], "719": 14, "770": [14, 16], "408": 14, "467": [14, 16], "163": [14, 15, 16, 24], "w": [14, 16], "926": [14, 16], "707": [14, 16], "500": [14, 16], "773": [14, 16], "138": [14, 16], "434": [14, 16], "066": 14, "440": [14, 16], "802": [14, 16], "425": [14, 16], "524": 14, "806": [14, 16], "762": [14, 16], "857": [14, 16], "110": [14, 16], "666": [14, 16], "736": [14, 16], "row": [14, 15, 16], "integr": [14, 18, 23], "target": 14, "middl": [14, 24], "adjac": [14, 24], "region": [14, 23, 24], "discontinu": 14, "d3zzk3": 14, "riigdganstvllvsvsgsvvlvviliaafvisrrrskysqak": 14, "o14786": 14, "pgnvlktldpilitiiamsalgvllgavcgvvlycacwhngm": 14, "o35516": 14, "selesprnaqllyllavavviilffillgvimakrkrkhgflw": 14, "o43914": 14, "dcscstvspgvlagivmgdlvltvlialavyflgrlvprgrga": 14, "o75581": 14, "ypteepapqatntvgsvigvivtifvsgtvyficqrmlcprmk": 14, "As": 14, "baselin": 14, "entir": 14, "p_val_mann_whitnei": 14, "activ": [14, 18, 24], "backbon": [14, 24], "dynam": [14, 24], "ch": [14, 16, 24], "\u03b1": [14, 24], "chemic": [14, 24], "shift": [14, 24], "andersen": 14, "et": [14, 16, 23, 24], "al": [14, 16, 23, 24], "1992": [14, 24], "022966": 14, "054433": 14, "053266": 14, "025737": 14, "099022": 14, "12": [14, 15, 24], "13": [14, 15, 24], "14": [14, 24], "16": [14, 24], "17": [14, 24], "18": 14, "vasm830101": 14, "conform": [14, 24], "helix": [14, 24], "rel": [14, 24], "popul": 14, "state": [14, 24], "120": [14, 16], "019298": 14, "046755": 14, "049127": 14, "039609": 14, "\u03b2": [14, 16, 24], "turn": [14, 16, 24], "loop": 14, "robson": [14, 24], "suzuki": [14, 24], "108": 14, "021958": 14, "060658": 14, "053190": 14, "062212": 14, "100670": 14, "racs820103": 14, "fraction": 14, "occurr": [14, 24], "080": 14, "019579": 14, "072260": 14, "047452": 14, "166907": 14, "ensembl": 14, "randomforestclassifi": 14, "model_select": 14, "cross_val_scor": 14, "ml": 14, "rf": 14, "cv_base": 14, "score": 14, "accuraci": [14, 16, 23], "cv": 14, "multi": 14, "process": [14, 18], "round": 14, "57": 14, "some": [14, 24], "time": 14, "improv": [14, 18, 23], "around": 14, "qian880106": 14, "121446": 14, "069196": 14, "085013": 14, "000000e": 14, "00": 14, "27": 14, "28": [14, 24], "29": 14, "30": 14, "31": [14, 24], "32": 14, "33": 14, "zimj680104": 14, "energi": [14, 16, 24], "isoelectr": [14, 24], "zimmerman": [14, 24], "1968": [14, 24], "373": 14, "220000": 14, "123716": 14, "137350": 14, "475000e": 14, "07": 14, "34": 14, "35": 14, "36": [14, 24], "358": 14, "144860": 14, "079321": 14, "117515": 14, "150000e": 14, "25": 14, "lins030101": 14, "asa": [14, 16, 24], "volum": [14, 16, 24], "surfac": [14, 16, 24], "residu": [14, 15, 16, 23, 24], "b": [14, 24], "354": [14, 16], "237161": 14, "145884": 14, "164285": 14, "100000e": 14, "09": 14, "341": 14, "263651": 14, "187136": 14, "171995": 14, "185395e": 14, "06": 14, "95": 14, "composit": [14, 23, 24], "mitochondri": [14, 24], "anim": 14, "nakashima": [14, 24], "228": 14, "172120": 14, "180254": 14, "199987": 14, "754340e": 14, "05": 14, "693037e": 14, "04": 14, "96": 14, "227": 14, "133867": 14, "160532": 14, "161415": 14, "118090e": 14, "778863e": 14, "kars160107": 14, "side": [14, 15, 16, 24], "chain": [14, 16, 24], "eccentr": [14, 24], "diamet": 14, "karkbara": [14, 24], "kni": 14, "098674": 14, "104428": 14, "124875": 14, "945330e": 14, "740061e": 14, "98": 14, "polar": [14, 24], "hydrophob": [14, 24], "transfer": [14, 16, 24], "tfe": [14, 16], "outsid": [14, 24], "simon": 14, "1976": [14, 24], "cite": [14, 17, 20], "161307": 14, "192235": 14, "212741": 14, "036749e": 14, "042894e": 14, "99": 14, "tans770102": 14, "out": [14, 18, 24], "isol": [14, 18], "tanaka": [14, 24], "224": [14, 16, 24], "108020": 14, "133731": 14, "139419": 14, "143783e": 14, "272494e": 14, "again": 14, "warn": [14, 15], "simplefilt": [14, 15], "action": [14, 15], "ignor": [14, 15, 18], "futurewarn": [14, 15], "plt": [14, 15], "barplot": 14, "tab": 14, "ylabel": 14, "plot_gcf": 14, "despin": [14, 15], "show": [14, 15, 16], "iloc": 15, "predictor": [15, 24], "aa_caspase3": [15, 24], "185605": [15, 24], "705": [15, 16, 24], "184900": [15, 24], "prosper": [15, 23, 24], "aa_furin": [15, 24], "71": [15, 24], "59003": [15, 24], "58840": [15, 24], "aa_ldr": [15, 24], "118248": [15, 24], "35469": [15, 24], "82779": [15, 24], "idp": [15, 23, 24], "seq2seq": [15, 23, 24], "aa_mmp2": [15, 24], "573": [15, 24], "312976": [15, 24], "2416": [15, 24], "310560": [15, 24], "aa_rnabind": [15, 24], "221": [15, 16, 24], "55001": [15, 24], "6492": [15, 24], "48509": [15, 24], "gmksvm": [15, 24], "ru": [15, 24], "aa_sa": [15, 24], "101082": [15, 24], "84523": [15, 24], "1414": [15, 24], "8484": [15, 24], "511": [15, 24], "903": [15, 16, 24], "rerf": [15, 23, 24], "pred": [15, 23, 24], "seq_capsid": [15, 16, 24], "7935": [15, 24], "3364680": [15, 24], "3864": [15, 24], "4071": [15, 24], "viralpro": [15, 23, 24], "seq_disulfid": [15, 16, 24], "2547": [15, 24], "614470": [15, 24], "897": [15, 24], "1650": [15, 24], "dipro": [15, 24], "seq_loc": [15, 16, 24], "1835": [15, 24], "732398": [15, 24], "1045": [15, 24], "nan": [15, 24], "seq_solubl": [15, 16, 24], "17408": [15, 24], "4432269": [15, 24], "8704": [15, 24], "solpro": [15, 23, 24], "seq_tail": [15, 16, 24], "6668": [15, 24], "2671690": [15, 24], "2574": [15, 24], "4094": [15, 24], "126": [15, 24], "92964": [15, 24], "prefix": 15, "exemplifi": 15, "here": [15, 18, 24], "df_seq1": 15, "df_seq2": 15, "df_seq3": 15, "compar": [15, 17, 21, 22, 24, 27, 28], "capsid_1": 15, "mvthnvkinkhvtrrsyssakevleippltevqtasykwfmdkgik": 15, "capsid_2": 15, "mkkrqkkmtlsnftdtsfqdfvsaeqvddksamalinraedfkagq": 15, "being": [15, 18, 24], "balanc": 15, "200": [15, 16], "value_count": 15, "dtype": 15, "int64": 15, "Or": 15, "distribut": 15, "list_seq_len": 15, "histplot": 15, "binwidth": 15, "xlim": 15, "1500": 15, "800": [15, 16], "seen": 15, "caspase3_1": 15, "mslfdlfrgffgfpgprshrdpffggmtrdedddeeeeeeggswgr": 15, "caspase3_2": 15, "mevtgdagvpesgeirtlkpcllrrnysreqhgvaascledlrska": 15, "caspase3_3": 15, "mrarsgargalllalllcwdptpslagidsggqalpdsfpsapaeq": 15, "caspase3_4": 15, "mdakarncllqhrealekdiktsyimdhmisdgfltiseeekvrn": 15, "conveni": 15, "flank": 15, "ensur": [15, 18], "equal": 15, "while": 15, "popular": [15, 29], "caspase3_1_pos4": 15, "mslfdlfrg": 15, "caspase3_1_pos5": 15, "slfdlfrgf": 15, "caspase3_1_pos6": 15, "lfdlfrgff": 15, "caspase3_1_pos7": 15, "fdlfrgffg": 15, "21": [15, 24], "caspase3_55_pos170": 15, "kkrkleeeedgklkkpknkdk": 15, "caspase3_29_pos185": 15, "cphhercsdsdglappqhlir": 15, "caspase3_64_pos431": 15, "dnplnwpdekdssfyrnfgst": 15, "caspase3_93_pos455": 15, "fvknmnrdstfivnktitaev": 15, "caspase3_38_pos129": 15, "ssfdldydfqrdyydrmysyp": 15, "caspase3_8_pos33": 15, "rppqlrpgaptslqtepqgnp": 15, "typic": [15, 21, 24], "But": 15, "mani": 15, "challeng": 15, "might": [15, 24], "unbalanc": [15, 17, 18, 21, 22, 25, 30], "lack": 15, "clear": [15, 18], "scenario": 15, "denot": [15, 24], "_pu": [15, 24], "dom_gsec_pu": [15, 24], "p05067": 15, "mlpglallllaawtaralevptdgnagllaepqiamfcgrlnmhmn": 15, "723": [15, 16], "faedvgsnkg": 15, "aiiglmvggvviatvivitlvml": 15, "kkkqytsihh": 15, "p14925": 15, "magrarsgllllllgllalqssclafrsplsvfkrfkettrsfsn": 15, "890": 15, "klstepgsgv": 15, "svvlittllvipvlvllaivmfi": 15, "rwkksrafgd": 15, "p70180": 15, "mrslllftfsacvllarvllaggassgagdtrpgsrrrarealaaq": 15, "477": 15, "499": 15, "pckssgglee": 15, "savtgivvgallgagllmafyff": 15, "rkkyriti": 15, "q03157": 15, "mgptspaargqgrrwrppplplllplsllllraqlavgnlavgsp": 15, "585": [15, 16], "607": [15, 16], "apsgtgvsr": 15, "alsgllimgagggslivlslll": 15, "rkkkpygti": 15, "q06481": 15, "maatgtaaaaatgrllllllvgltapalalagyiealaanagtgfa": 15, "716": [15, 16], "lredfslsss": 15, "aligllviavaiatvivislvml": 15, "rkrqygtish": 15, "121": 15, "p36941": 15, "mllpwatsapglawgplvlglfgllaasqpqavppyasenqtcrdq": 15, "226": [15, 16], "248": [15, 16], "plppemsgtm": 15, "lmlavllplafflllatvfsciw": 15, "kshpslcrkl": 15, "122": 15, "p25446": 15, "mlwiwavlplvlagsqlrvhtqgtnsiseslklrrrvretdkncs": 15, "170": [15, 16], "187": 15, "ncrkqsprnr": 15, "lwlltilvlliplvfiyr": 15, "kyrkrkcwkr": 15, "123": 15, "q9p2j2": 15, "mvwclglavlslvisqgadgrgkpevvsvvgragesvvlgcdllpp": 15, "pgllpqpvla": 15, "gvvggvcflgvavlvsilagcl": 15, "nrrraarrrr": 15, "124": 15, "q96j42": 15, "mvpaagrrpprvmrllgwwqvllwvlglpvrgvevaeesgrlwse": 15, "lpstliksvd": 15, "wllvfslfflisfimyati": 15, "rtesirwlip": 15, "125": 15, "p0dpa2": 15, "mrvggafhlllvclspallsavringdgqevlylaegdnvrlgcpi": 15, "265": 15, "kvsdsrrigv": 15, "iigivlgsllalgclavgiwglv": 15, "ccccggsgag": 15, "df_seq_pu": 15, "p60852": 15, "maggsattwgypvallllvatlglgrwlqpdpglpglrhsydcgik": 15, "624": [15, 16], "dsngnsslrp": 15, "llwavlllpavalvlgfgvfvgl": 15, "sqtwaqklw": 15, "p20239": 15, "marwqrkasvsspcgrsiyrflsllftlvtsvnsvslpqsenpafp": 15, "684": [15, 16], "703": [15, 16], "iiakdiaskt": 15, "lgavaalvgsavilgficyl": 15, "ykkrtirfnh": 15, "691": [15, 16], "p21754": 15, "melsyrlficlllwgstelcypqplwllqggashpetsvqpvlvec": 15, "409": 15, "eqwalpsdt": 15, "vvllgvglavvvsltltavilvl": 15, "trrcrtashp": 15, "q12836": 15, "mwllrcvllcvslslavsgqhkpeapdyssvlhcgpwsfqfavnln": 15, "528": 15, "eklrvpvdsk": 15, "vlwvaglsgtlilgallvsylav": 15, "kkqkscpdqm": 15, "693": [15, 16], "q8tcw7": 15, "meqiwllllltirvlpgsaqfngyncdanlhsrfpaerdisvycgv": 15, "374": 15, "396": [15, 16], "pfqlnaitsa": 15, "lisgmvilgvtsfslllcslal": 15, "hrkgptslvl": 15, "six": 16, "version": [16, 24], "raw": [16, 24], "df_raw": 16, "df_pc": 16, "argp820101": 16, "argp820102": 16, "argp820103": 16, "begf750101": 16, "begf750102": 16, "begf750103": 16, "bhar880101": 16, "biov880101": 16, "koeh090103": 16, "koeh090104": 16, "koeh090105": 16, "koeh090106": 16, "koeh090107": 16, "koeh090108": 16, "koeh090109": 16, "koeh090110": 16, "koeh090111": 16, "koeh090112": 16, "230": 16, "355": 16, "504": 16, "249": 16, "476": 16, "194": 16, "300": 16, "551": 16, "222": 16, "273": 16, "522": 16, "579": 16, "205": 16, "936": 16, "449": 16, "346": 16, "285": 16, "416": 16, "867": 16, "889": 16, "720": 16, "556": 16, "875": 16, "919": 16, "796": 16, "177": 16, "019": 16, "713": 16, "267": 16, "811": 16, "106": 16, "542": 16, "593": 16, "853": 16, "913": 16, "681": 16, "601": 16, "049": 16, "189": 16, "148": 16, "182": 16, "017": 16, "026": 16, "309": 16, "544": 16, "608": 16, "538": 16, "571": 16, "481": 16, "082": 16, "053": 16, "633": 16, "856": 16, "370": 16, "618": 16, "726": 16, "838": 16, "543": 16, "671": 16, "885": 16, "074": 16, "167": 16, "051": 16, "276": 16, "003": 16, "004": 16, "687": 16, "737": 16, "933": 16, "873": 16, "779": 16, "405": 16, "989": 16, "281": 16, "078": 16, "118": 16, "333": 16, "445": 16, "289": 16, "132": 16, "185": 16, "192": 16, "180": [16, 24], "057": 16, "675": 16, "552": 16, "645": 16, "753": 16, "706": 16, "599": 16, "587": 16, "223": 16, "220": 16, "367": 16, "322": 16, "678": 16, "570": 16, "594": 16, "211": 16, "131": 16, "395": 16, "795": 16, "676": 16, "733": 16, "628": 16, "483": 16, "047": 16, "489": 16, "940": 16, "215": 16, "852": 16, "743": 16, "362": 16, "851": 16, "589": 16, "655": 16, "590": 16, "382": 16, "384": 16, "379": 16, "598": 16, "312": 16, "366": 16, "578": 16, "407": 16, "364": 16, "331": 16, "514": 16, "498": 16, "809": 16, "365": 16, "033": 16, "111": [16, 24], "156": 16, "496": 16, "146": 16, "600": 16, "400": 16, "316": 16, "244": 16, "709": 16, "107": 16, "502": 16, "588": 16, "286": 16, "644": 16, "474": 16, "410": 16, "429": 16, "413": 16, "235": 16, "336": 16, "586": [16, 24], "lins030110": 16, "fold": [16, 24], "coil": [16, 24], "median": 16, "resi": 16, "lins030113": 16, "janj780101": 16, "janin": [16, 24], "janj780103": 16, "expos": [16, 24], "lins030104": 16, "lins030107": 16, "win3": 16, "choc760102": 16, "prot": 16, "lins030116": 16, "strand": [16, 24], "lins030119": 16, "lins030103": 16, "hydrophil": [16, 24], "resid": 16, "stem": 16, "best": 16, "top60_id": 16, "acc": 16, "presenc": [16, 24], "absenc": [16, 24], "df_top60": 16, "aac01": 16, "aac02": 16, "aac03": 16, "aac04": 16, "aac05": 16, "aac06": 16, "aac07": 16, "aac08": 16, "aac09": 16, "aac10": 16, "df_eval": 16, "overal": 16, "aa5_caspase3": 16, "aa5_furin": 16, "aa5_ldr": 16, "aa5_mmp2": 16, "aa9_ldr": 16, "aa9_mmp2": 16, "aa9_rnabind": 16, "aa9_sa": 16, "aa13_caspase3": 16, "aa13_furin": 16, "aa13_ldr": 16, "aa13_mmp2": 16, "aa13_rnabind": 16, "aa13_sa": 16, "761": 16, "827": 16, "746": 16, "646": 16, "884": 16, "862": 16, "901": 16, "659": 16, "664": 16, "918": 16, "652": 16, "747": 16, "830": 16, "742": 16, "653": 16, "886": 16, "855": 16, "642": 16, "792": 16, "916": 16, "741": 16, "829": 16, "648": 16, "904": 16, "685": 16, "636": 16, "710": 16, "791": 16, "914": 16, "695": 16, "828": 16, "731": 16, "654": 16, "906": 16, "686": 16, "640": 16, "915": 16, "739": 16, "752": 16, "888": 16, "658": 16, "682": 16, "649": 16, "665": 16, "789": 16, "611": 16, "833": 16, "650": 16, "882": 16, "858": 16, "606": 16, "638": 16, "711": 16, "661": 16, "831": 16, "603": 16, "669": 16, "826": 16, "647": 16, "614": 16, "860": 16, "908": 16, "632": 16, "aac11": 16, "832": 16, "751": 16, "781": 16, "683": 16, "aac12": 16, "708": 16, "785": 16, "917": 16, "aac13": 16, "744": 16, "aac14": 16, "902": 16, "673": 16, "794": 16, "604": 16, "aac15": 16, "660": 16, "aac16": 16, "755": 16, "635": 16, "702": 16, "aac17": 16, "740": 16, "793": 16, "609": 16, "aac18": 16, "757": 16, "730": 16, "643": 16, "881": 16, "899": 16, "aac19": 16, "764": 16, "aac20": 16, "677": 16, "aac21": 16, "637": 16, "aac22": 16, "880": 16, "700": 16, "788": 16, "aac23": 16, "629": 16, "aac24": 16, "641": 16, "aac25": 16, "639": 16, "879": 16, "aac26": 16, "aac27": 16, "854": 16, "aac28": 16, "821": 16, "898": 16, "aac29": 16, "763": 16, "900": 16, "aac30": 16, "911": 16, "616": 16, "aac31": 16, "727": 16, "631": 16, "784": 16, "aac32": 16, "aac33": 16, "817": 16, "922": 16, "aac34": 16, "729": 16, "aac35": 16, "758": 16, "822": 16, "aac36": 16, "759": 16, "874": 16, "aac37": 16, "596": 16, "aac38": 16, "766": 16, "921": 16, "aac39": 16, "786": 16, "aac40": 16, "819": 16, "870": 16, "775": 16, "910": 16, "aac41": 16, "896": 16, "aac42": 16, "861": 16, "895": 16, "799": 16, "674": 16, "aac43": 16, "767": 16, "815": 16, "848": 16, "782": 16, "625": 16, "aac44": 16, "825": 16, "621": 16, "696": 16, "780": 16, "aac45": 16, "844": 16, "893": 16, "672": 16, "774": 16, "aac46": 16, "812": 16, "626": 16, "843": 16, "623": 16, "aac47": 16, "717": 16, "aac48": 16, "891": 16, "776": 16, "aac49": 16, "807": 16, "630": 16, "850": 16, "892": 16, "aac50": 16, "aac51": 16, "768": 16, "865": 16, "836": 16, "668": 16, "aac52": 16, "814": 16, "aac53": 16, "765": 16, "aac54": 16, "699": 16, "aac55": 16, "769": 16, "580": 16, "595": 16, "aac56": 16, "aac57": 16, "aac58": 16, "715": 16, "568": 16, "aac59": 16, "797": 16, "592": 16, "aac60": 16, "563": 16, "772": 16, "813": 16, "24": [16, 24], "df_cat_1": 16, "df_raw_1": 16, "df_scales_1": 16, "selected_scal": 16, "tolist": 16, "df_aac1": 16, "buna790103": 16, "bura740102": 16, "cham820102": 16, "cham830102": 16, "cham830103": 16, "cham830105": 16, "chop780101": 16, "chop780204": 16, "chop780206": 16, "kars160110": 16, "kars160112": 16, "kars160118": 16, "kars160119": 16, "kars160122": 16, "lins030105": 16, "lins030109": 16, "264": 16, "262": 16, "298": 16, "863": 16, "149": 16, "947": 16, "442": 16, "213": 16, "397": 16, "473": 16, "247": 16, "311": 16, "152": 16, "085": 16, "208": 16, "139": 16, "169": 16, "133": 16, "240": 16, "470": 16, "160": 16, "393": 16, "145": 16, "134": 16, "424": 16, "115": 16, "495": 16, "554": 16, "433": 16, "458": 16, "114": 16, "421": 16, "218": 16, "553": 16, "067": 16, "021": 16, "526": 16, "135": 16, "480": 16, "043": 16, "087": 16, "532": 16, "335": 16, "963": 16, "317": 16, "319": 16, "198": 16, "468": 16, "390": 16, "339": 16, "515": 16, "486": 16, "275": 16, "257": [16, 18], "350": 16, "150": 16, "534": 16, "178": 16, "565": 16, "320": 16, "327": 16, "369": 16, "537": 16, "540": 16, "002": 16, "209": 16, "081": 16, "well": [16, 18], "subordin": 16, "want": 16, "guyh850104": 16, "appar": 16, "calcul": 16, "ja": 16, "guyh850105": 16, "racs770103": 16, "orient": 16, "prefer": [16, 24], "rackovski": [16, 24], "vheg790101": 16, "lipophil": 16, "phase": 16, "von": 16, "buri": [16, 24], "buriabl": 16, "biov880102": 16, "werd780101": 16, "propens": [16, 24], "insid": [16, 24], "wertz": 16, "scheraga": [16, 24], "predict": [17, 18, 21, 22, 23, 24, 28, 29], "engin": [17, 18, 21, 22, 28], "dpulearn": [17, 20, 21, 22], "moreov": [17, 22], "load_data": [17, 22], "pypi": 17, "conda": [17, 18], "forg": 17, "pip": [17, 18], "introduct": 17, "usag": [17, 18, 21], "contribut": [17, 24], "api": [17, 18], "explain": [17, 18, 23, 25], "ai": [17, 18, 23, 25], "perturb": [17, 29], "search": 17, "page": 17, "work": [17, 20], "pleas": [17, 18, 20], "_": [17, 20], "breimann": [17, 20, 23], "kamp": [17, 20], "steiner": [17, 20], "frishman": [17, 20], "2023": [17, 20], "ontologi": [17, 20, 23], "biorxiv": [17, 20, 23], "welcom": 18, "thank": 18, "open": 18, "project": [18, 24], "focus": 18, "involv": 18, "invalu": 18, "made": 18, "wai": 18, "suggest": 18, "github": 18, "issu": 18, "tracker": 18, "submit": 18, "particip": [18, 24], "discuss": 18, "newcom": 18, "tackl": 18, "good": 18, "email": 18, "stephanbreimann": 18, "gmail": 18, "com": 18, "question": 18, "establish": 18, "comprehens": 18, "robust": 18, "common": 18, "life": [18, 29, 30], "scienc": [18, 29, 30], "seamlessli": 18, "flexibl": [18, 24], "interoper": 18, "packag": 18, "biopython": 18, "reimplement": 18, "solut": 18, "biolog": [18, 21, 24, 29], "context": 18, "relianc": 18, "opaqu": 18, "box": 18, "empir": 18, "insight": 18, "cut": 18, "fair": 18, "account": [18, 24], "transpar": 18, "re": [18, 23], "commit": 18, "divers": 18, "aspect": 18, "causal": 18, "minim": 18, "reproduc": 18, "mre": 18, "least": 18, "amount": 18, "demonstr": 18, "self": 18, "necessari": 18, "confirm": 18, "replic": 18, "guidelin": 18, "To": [18, 25], "git": 18, "breimanntool": 18, "master": 18, "repositori": 18, "your_usernam": 18, "navig": 18, "folder": 18, "up": 18, "cd": 18, "aanalysi": 18, "poetri": 18, "pytest": 18, "hypothesi": 18, "execut": 18, "case": 18, "directori": 18, "substanti": 18, "minor": 18, "typo": 18, "concis": 18, "branch": [18, 24], "fix": 18, "readm": 18, "date": 18, "readthedoc": 18, "crucial": 18, "modif": 18, "render": 18, "correctli": 18, "strive": 18, "consist": [18, 21, 24], "codebas": 18, "standalon": 18, "special": 18, "carri": 18, "complet": 18, "fulfil": 18, "purpos": 18, "implement": 18, "inherit": 18, "supplementari": 18, "accordingli": 18, "semi": 18, "strictli": 18, "adher": 18, "aforement": 18, "primari": [18, 27], "_util": 18, "_utils_const": 18, "py": 18, "modular": 18, "therefor": 18, "flat": 18, "hierarchi": 18, "outlin": 18, "softwar": 18, "user": 18, "friendli": 18, "hint": 18, "enhanc": [18, 24], "propos": 18, "pep": 18, "484": 18, "book": 18, "error": 18, "messag": 18, "docstr": 18, "markup": 18, "languag": 18, "restructuredtext": 18, "rst": 18, "primer": 18, "cheat": 18, "sheet": [18, 24], "restructuretext": 18, "cheatsheet": 18, "sphinx": 18, "autodoc": 18, "napoleon": 18, "extens": 18, "conf": 18, "four": 18, "bird": 18, "ey": 18, "background": 18, "reflect": [18, 24], "medium": [18, 24], "tabular": 18, "critic": 18, "except": 18, "rule": 18, "showcas": 18, "scientif": 18, "mai": 18, "mention": 18, "section": 18, "extern": 18, "note": 18, "go": 18, "_build": 18, "browser": 18, "citat": 20, "wa": 21, "develop": 21, "practic": 21, "2023a": 23, "2023b": 23, "breimann23c": [23, 24], "2023c": 23, "chart": 23, "cheng06": [23, 24], "cheng": 23, "2006": 23, "larg": 23, "disulphid": 23, "bridg": [23, 24], "kernel": 23, "recurs": 23, "neural": 23, "network": 23, "graph": [23, 24], "match": 23, "struct": 23, "funct": 23, "kawashima": 23, "2008": 23, "aid": 23, "databas": 23, "report": 23, "nucleic": 23, "magnan09": [23, 24], "magnan": 23, "randal": 23, "baldi": 23, "2009": [23, 24], "accur": 23, "solubl": [23, 24], "bioinformat": 23, "galiez16": [23, 24], "galiez": 23, "2016": [23, 24], "viral": 23, "capsid": [23, 24], "tail": [23, 24], "song18": [23, 24], "song": 23, "2018": 23, "throughput": 23, "cleavag": [23, 24], "site": [23, 24], "90": 23, "proteas": 23, "shen19": [23, 24], "shen": 23, "2019": 23, "subcellular": [23, 24], "local": [23, 24], "evolutionari": 23, "chou": [23, 24], "pseaac": 23, "j": 23, "theor": 23, "biol": 23, "tang20": [23, 24], "tang": 23, "2020": 23, "intrins": [23, 24], "disord": [23, 24], "teng21": [23, 24], "teng": 23, "2021": 23, "amyloidogen": [23, 24], "pseudo": 23, "tripeptid": 23, "bmc": 23, "yang21": [23, 24], "yang": 23, "granular": 23, "multipl": 23, "rna": [23, 24], "bind": [23, 24], "appl": 23, "chronolog": 24, "histori": 24, "t1_overview_benchmark": 24, "t2_overview_scal": 24, "t3a_aaontology_categori": 24, "t3b_aaontology_subcategori": 24, "begin": 24, "append": 24, "caspas": 24, "furin": 24, "long": 24, "ldr": 24, "metallopeptidas": 24, "mmp2": 24, "rbp60": 24, "solvent": 24, "sa": 24, "amyloidognen": 24, "capdsid": 24, "disulfid": 24, "ss": 24, "bond": 24, "cytoplasm": 24, "plasma": 24, "insolubl": 24, "494524": 24, "unknown": 24, "statu": 24, "tier": 24, "system": 24, "systemat": 24, "arrang": 24, "67": 24, "everi": 24, "main": 24, "clearli": 24, "assess": 24, "couldn": 24, "alloc": 24, "regard": 24, "chothia": 24, "lin": 24, "2003": 24, "64": 24, "cellular": 24, "mitochondria": 24, "1990": 24, "nishikawa": 24, "58": 24, "ranodm": 24, "1977": 24, "fasman": 24, "1978b": 24, "richardson": 24, "1988": 24, "qian": 24, "sejnowski": 24, "aurora": 24, "rose": 24, "1998": 24, "19": 24, "charg": 24, "entropi": 24, "charton": 24, "1983": 24, "gui": 24, "1985": 24, "radzicka": 24, "wolfenden": 24, "could": 24, "mutabl": 24, "sneath": 24, "1966": 24, "amphiphil": 24, "kyte": 24, "doolittl": 24, "1982": 24, "mitaku": 24, "2002": 24, "koehler": 24, "steric": 24, "characterist": 24, "angl": 24, "symmetri": 24, "represent": 24, "prabhakaran": 24, "ponnuswami": 24, "knislei": 24, "45": 24, "stabil": 24, "vihinen": 24, "1994": 24, "bastolla": 24, "2005": 24, "23": 24, "water": 24, "tendenc": 24, "oppos": 24, "1978": 24, "partial": 24, "physic": 24, "displac": 24, "caus": 24, "interact": 24, "mainli": 24, "ones": 24, "bull": 24, "brees": 24, "1974": 24, "bigelow": 24, "1967": 24, "jone": 24, "dayhoff": 24, "interior": 24, "unpolar": 24, "fukuchi": 24, "2001": 24, "mp": 24, "cedano": 24, "1997": 24, "less": 24, "val": 24, "cf": 24, "cap": 24, "asp": 24, "glu": 24, "ly": 24, "arg": 24, "observ": 24, "character": 24, "punta": 24, "maritan": 24, "linker": 24, "georg": 24, "heringa": 24, "2004": 24, "right": 24, "helic": 24, "half": 24, "finkelstein": 24, "1991": 24, "befor": 24, "geisow": 24, "robert": 24, "1980": 24, "ramachandran": 24, "quadrant": 24, "bottom": 24, "paul": 24, "1951": 24, "antiparallel": 24, "lifson": 24, "sander": 24, "1979": 24, "bend": 24, "revers": 24, "tight": 24, "consecut": 24, "back": 24, "hydrogen": 24, "3rd": 24, "4th": 24, "1st": 24, "2nd": 24, "tm": 24, "place": 24, "monn\u00e9": 24, "1999": 24, "\u03c0": 24, "ala": 24, "gln": 24, "fodj": 24, "karadaghi": 24, "net": 24, "donor": 24, "klein": 24, "1984": 24, "acceptor": 24, "faucher": 24, "hi": 24, "electron": 24, "ion": 24, "pot": 24, "potenti": 24, "valenc": 24, "cosic": 24, "low": 24, "due": 24, "strong": 24, "hutchen": 24, "1970": 24, "unfold": 24, "gibb": 24, "denatur": 24, "yutani": 24, "1987": 24, "instabl": 24, "highest": 24, "break": 24, "pro": 24, "munoz": 24, "serrano": 24, "ph": 24, "electr": 24, "neutral": 24, "crystal": 24, "pairwis": 24, "constitu": 24, "atom": 24, "lennard": 24, "oobatak": 24, "ooi": 24, "chang": 24, "divid": 24, "vector": 24, "describ": 24, "aliphat": 24, "linear": 24, "aromat": 24, "carbon": 24, "approxim": 24, "invers": 24, "reactiv": 24, "hydroxythiol": 24, "wold": 24, "occur": 24, "esp": 24, "amphipath": 24, "higher": 24, "highli": 24, "signal": 24, "argo": 24, "cornett": 24, "38": 24, "environ": 24, "eisenberg": 24, "mclachlan": 24, "1986": 24, "surround": 24, "angstrom": 24, "radiu": 24, "pack": 24, "globular": 24, "1981": 24, "eigenvalu": 24, "laplacian": 24, "undirect": 24, "node": 24, "mass": 24, "molecular": 24, "second": 24, "actual": 24, "root": 24, "squar": 24, "gyrat": 24, "farther": 24, "awai": 24, "relationship": 24, "rate": 24, "increas": 24, "factor": 24, "bundi": 24, "wuthrich": 24, "nh": 24, "temperatur": 24, "rigid": 24, "neighbor": 24, "gly": 24, "ser": 24, "particularli": 24, "ptitsyn": 24, "zhou": 24, "equilibrium": 24, "sueki": 24, "flow": 25, "enri": 25, "signatur": 25, "introduc": 26, "togeth": 27, "diagram": 27, "central": 28, "platform": 28, "novel": 28, "everywher": [29, 30], "setup": 29, "augment": 29, "smote": 29, "artifici": 29, "Such": 29, "veri": 29, "deep": 29, "imag": 29, "recognit": 29, "feasibl": 29, "becaus": 29, "slight": 29, "mutat": 29, "alter": 29, "dramat": 29, "often": 29, "great": 29, "quantiti": 29, "besid": 29, "distinguish": 29, "subfield": 29, "prelud": 31}, "objects": {"aaanalysis": [[1, 0, 1, "", "AAclust"], [2, 0, 1, "", "CPP"], [3, 0, 1, "", "CPPPlot"], [4, 0, 1, "", "SequenceFeature"], [5, 0, 1, "", "dPULearn"], [6, 3, 1, "", "load_dataset"], [7, 3, 1, "", "load_scales"], [8, 3, 1, "", "plot_gcfs"], [9, 3, 1, "", "plot_get_cdict"], [10, 3, 1, "", "plot_get_cmap"], [11, 3, 1, "", "plot_set_legend"], [12, 3, 1, "", "plot_settings"]], "aaanalysis.AAclust": [[1, 1, 1, "", "__init__"], [1, 2, 1, "", "center_labels_"], [1, 2, 1, "", "centers_"], [1, 1, 1, "", "cluster_naming"], [1, 1, 1, "", "correlation"], [1, 1, 1, "", "eval"], [1, 1, 1, "", "fit"], [1, 1, 1, "", "get_cluster_centers"], [1, 1, 1, "", "get_cluster_medoids"], [1, 2, 1, "", "labels_"], [1, 2, 1, "", "medoid_ind_"], [1, 2, 1, "", "medoid_labels_"], [1, 2, 1, "", "medoids_"], [1, 2, 1, "", "n_clusters"]], "aaanalysis.CPP": [[2, 1, 1, "", "__init__"], [2, 1, 1, "", "eval"], [2, 1, 1, "", "run"]], "aaanalysis.CPPPlot": [[3, 1, 1, "", "__init__"], [3, 1, 1, "", "heatmap"], [3, 1, 1, "", "profile"], [3, 1, 1, "", "update_seq_size"]], "aaanalysis.SequenceFeature": [[4, 1, 1, "", "__init__"], [4, 1, 1, "", "add_dif"], [4, 1, 1, "", "add_feat_value"], [4, 1, 1, "", "add_position"], [4, 1, 1, "", "feat_matrix"], [4, 1, 1, "", "feat_names"], [4, 1, 1, "", "get_df_parts"], [4, 1, 1, "", "get_features"], [4, 1, 1, "", "get_split_kws"]], "aaanalysis.dPULearn": [[5, 1, 1, "", "__init__"], [5, 1, 1, "", "eval"], [5, 1, 1, "", "fit"], [5, 2, 1, "", "labels_"]]}, "objtypes": {"0": "py:class", "1": "py:method", "2": "py:attribute", "3": "py:function"}, "objnames": {"0": ["py", "class", "Python class"], "1": ["py", "method", "Python method"], "2": ["py", "attribute", "Python attribute"], "3": ["py", "function", "Python function"]}, "titleterms": {"api": 0, "data": [0, 15, 27, 29], "featur": [0, 14], "engin": [0, 14], "pu": [0, 15, 29], "learn": [0, 14, 29], "explain": [0, 14, 30], "ai": [0, 14, 30], "perturb": 0, "plot": [0, 13], "util": 0, "aaanalysi": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 17, 27], "aaclust": [1, 14], "note": [1, 2, 4, 5, 6, 7, 12], "cpp": [2, 14, 28], "cppplot": 3, "exampl": [3, 4, 5, 6, 7, 11, 12, 17], "sequencefeatur": 4, "dpulearn": 5, "load_dataset": 6, "load_scal": 7, "plot_gcf": 8, "plot_get_cdict": 9, "plot_get_cmap": 10, "plot_set_legend": 11, "plot_set": 12, "prelud": 13, "quick": [14, 31], "start": [14, 31], "what": [14, 29, 30], "you": 14, "Will": 14, "1": 14, "load": [14, 15, 16], "sequenc": [14, 30], "scale": [14, 16, 24, 26], "2": 14, "compar": 14, "physicochem": [14, 28], "profil": 14, "3": 14, "protein": [14, 15, 24], "predict": 14, "4": 14, "group": 14, "level": [14, 30], "individu": 14, "tutori": [15, 16, 31], "benchmark": [15, 23, 24], "amino": [15, 16, 24, 26], "acid": [15, 16, 24, 26], "window": 15, "size": 15, "posit": 15, "unlabel": 15, "dataset": [15, 23, 24], "three": 16, "set": 16, "numer": 16, "aaontologi": [16, 24, 26], "redund": 16, "reduc": 16, "subset": 16, "filter": 16, "welcom": 17, "document": [17, 18, 21], "instal": [17, 18], "overview": [17, 21, 24], "refer": [17, 23], "indic": 17, "tabl": [17, 24], "citat": 17, "contribut": 18, "introduct": [18, 21], "vision": 18, "object": 18, "non": 18, "goal": 18, "principl": [18, 25], "bug": 18, "report": 18, "latest": 18, "version": 18, "local": 18, "develop": 18, "environ": 18, "fork": 18, "clone": 18, "depend": 18, "run": 18, "unit": 18, "test": 18, "pull": 18, "request": 18, "preview": 18, "chang": 18, "name": 18, "convent": 18, "class": 18, "templat": 18, "function": 18, "method": 18, "code": 18, "philosophi": 18, "style": 18, "layer": 18, "build": 18, "doc": 18, "workflow": 21, "algorithm": 23, "us": [23, 28], "case": 23, "further": [23, 31], "inform": 23, "categori": 24, "subcategori": 24, "usag": 25, "classif": 26, "flow": 27, "enri": 27, "point": 27, "compon": 27, "entri": 27, "bridg": 27, "extern": 27, "librari": 27, "identifi": 28, "signatur": 28, "from": 29, "unbalanc": 29, "small": 29, "i": [29, 30]}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 8, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.intersphinx": 1, "nbsphinx": 4, "sphinx": 57}, "alltitles": {"API": [[0, "api"]], "Data": [[0, "data"]], "Feature Engineering": [[0, "feature-engineering"]], "PU Learning": [[0, "pu-learning"]], "Explainable AI": [[0, "explainable-ai"]], "Perturbation": [[0, "perturbation"]], "Plot Utilities": [[0, "plot-utilities"]], "aaanalysis.AAclust": [[1, "aaanalysis-aaclust"]], "Notes": [[1, null], [2, null], [2, null], [4, null], [4, null], [4, null], [4, null], [4, null], [5, null], [5, null], [6, null], [7, null], [12, null]], "aaanalysis.CPP": [[2, "aaanalysis-cpp"]], "aaanalysis.CPPPlot": [[3, "aaanalysis-cppplot"]], "Examples": [[3, null], [4, null], [4, null], [5, null], [6, null], [7, null], [11, null], [12, null]], "aaanalysis.SequenceFeature": [[4, "aaanalysis-sequencefeature"]], "aaanalysis.dPULearn": [[5, "aaanalysis-dpulearn"]], "aaanalysis.load_dataset": [[6, "aaanalysis-load-dataset"]], "aaanalysis.load_scales": [[7, "aaanalysis-load-scales"]], "aaanalysis.plot_gcfs": [[8, "aaanalysis-plot-gcfs"]], "aaanalysis.plot_get_cdict": [[9, "aaanalysis-plot-get-cdict"]], "aaanalysis.plot_get_cmap": [[10, "aaanalysis-plot-get-cmap"]], "aaanalysis.plot_set_legend": [[11, "aaanalysis-plot-set-legend"]], "aaanalysis.plot_settings": [[12, "aaanalysis-plot-settings"]], "Plotting prelude": [[13, "plotting-prelude"]], "Quick Start with AAanalysis": [[14, "quick-start-with-aaanalysis"]], "What You Will Learn:": [[14, "what-you-will-learn"]], "1. Loading Sequences and Scales": [[14, "loading-sequences-and-scales"]], "2. Feature Engineering": [[14, "feature-engineering"]], "AAclust": [[14, "aaclust"]], "Comparative Physicochemical Profiling (CPP)": [[14, "comparative-physicochemical-profiling-cpp"]], "3. Protein Prediction": [[14, "protein-prediction"]], "4. Explainable AI": [[14, "explainable-ai"]], "Explainable AI on group level": [[14, "explainable-ai-on-group-level"]], "Explainable AI on individual level": [[14, "explainable-ai-on-individual-level"]], "Data Loading Tutorial": [[15, "data-loading-tutorial"]], "Loading of protein benchmarks": [[15, "loading-of-protein-benchmarks"]], "Loading of protein benchmarks: Amino acid window size": [[15, "loading-of-protein-benchmarks-amino-acid-window-size"]], "Loading of protein benchmarks: Positive-Unlabeled (PU) datasets": [[15, "loading-of-protein-benchmarks-positive-unlabeled-pu-datasets"]], "Scale Loading Tutorial": [[16, "scale-loading-tutorial"]], "Three sets of numerical amino acid scales": [[16, "three-sets-of-numerical-amino-acid-scales"]], "AAontology": [[16, "aaontology"], [24, "aaontology"]], "Redundancy-reduce scale subsets": [[16, "redundancy-reduce-scale-subsets"]], "Filtering of scales": [[16, "filtering-of-scales"]], "Welcome to the AAanalysis documentation!": [[17, "welcome-to-the-aaanalysis-documentation"]], "Install": [[17, "install"]], "OVERVIEW": [[17, null]], "EXAMPLES": [[17, null]], "REFERENCES": [[17, null]], "Indices and tables": [[17, "indices-and-tables"]], "Citation": [[17, "citation"]], "Contributing": [[18, "contributing"]], "Introduction": [[18, "introduction"], [21, "introduction"]], "Vision": [[18, "vision"]], "Objectives": [[18, "objectives"]], "Non-goals": [[18, "non-goals"]], "Principles": [[18, "principles"]], "Bug Reports": [[18, "bug-reports"]], "Installation": [[18, "installation"]], "Latest Version": [[18, "latest-version"]], "Local Development Environment": [[18, "local-development-environment"]], "Fork and Clone": [[18, "fork-and-clone"]], "Install Dependencies": [[18, "install-dependencies"]], "Run Unit Tests": [[18, "run-unit-tests"]], "Pull Requests": [[18, "pull-requests"]], "Preview Changes": [[18, "preview-changes"]], "Documentation": [[18, "documentation"]], "Naming Conventions": [[18, "naming-conventions"]], "Class Templates": [[18, "class-templates"]], "Function and Method Naming": [[18, "function-and-method-naming"]], "Code Philosophy": [[18, "code-philosophy"]], "Documentation Style": [[18, "documentation-style"]], "Documentation Layers": [[18, "documentation-layers"]], "Building the Docs": [[18, "building-the-docs"]], "Workflow": [[21, "workflow"]], "Overview of documentation": [[21, "overview-of-documentation"]], "References": [[23, "references"]], "Algorithms": [[23, "algorithms"]], "Datasets and Benchmarks": [[23, "datasets-and-benchmarks"]], "Use Cases": [[23, "use-cases"]], "Further Information": [[23, "further-information"]], "Tables": [[24, "tables"]], "Overview Table": [[24, "overview-table"]], "Protein Benchmark Datasets": [[24, "protein-benchmark-datasets"]], "Amino Acid Scale Datasets": [[24, "amino-acid-scale-datasets"]], "Categories": [[24, "categories"]], "Subcategories": [[24, "subcategories"]], "Usage Principles": [[25, "usage-principles"]], "AAontology: Classification of amino acid scales": [[26, "aaontology-classification-of-amino-acid-scales"]], "Data Flow and Enry Points": [[27, "data-flow-and-enry-points"]], "Data Flow: Components of AAanalysis": [[27, "data-flow-components-of-aaanalysis"]], "Entry Points: Bridges to External Libraries": [[27, "entry-points-bridges-to-external-libraries"]], "Identifying Physicochemical Signatures using CPP": [[28, "identifying-physicochemical-signatures-using-cpp"]], "Learning from unbalanced and small data": [[29, "learning-from-unbalanced-and-small-data"]], "What is PU learning?": [[29, "what-is-pu-learning"]], "Explainable AI at Sequence Level": [[30, "explainable-ai-at-sequence-level"]], "What is explainable AI?": [[30, "what-is-explainable-ai"]], "Tutorials": [[31, "tutorials"]], "Quick start": [[31, "quick-start"]], "Further Tutorials": [[31, "further-tutorials"]]}, "indexentries": {"aaclust (class in aaanalysis)": [[1, "aaanalysis.AAclust"]], "__init__() (aaanalysis.aaclust method)": [[1, "aaanalysis.AAclust.__init__"]], "center_labels_ (aaanalysis.aaclust attribute)": [[1, "aaanalysis.AAclust.center_labels_"]], "centers_ (aaanalysis.aaclust attribute)": [[1, "aaanalysis.AAclust.centers_"]], "cluster_naming() (aaanalysis.aaclust method)": [[1, "aaanalysis.AAclust.cluster_naming"]], "correlation() (aaanalysis.aaclust static method)": [[1, "aaanalysis.AAclust.correlation"]], "eval() (aaanalysis.aaclust method)": [[1, "aaanalysis.AAclust.eval"]], "fit() (aaanalysis.aaclust method)": [[1, "aaanalysis.AAclust.fit"]], "get_cluster_centers() (aaanalysis.aaclust static method)": [[1, "aaanalysis.AAclust.get_cluster_centers"]], "get_cluster_medoids() (aaanalysis.aaclust static method)": [[1, "aaanalysis.AAclust.get_cluster_medoids"]], "labels_ (aaanalysis.aaclust attribute)": [[1, "aaanalysis.AAclust.labels_"]], "medoid_ind_ (aaanalysis.aaclust attribute)": [[1, "aaanalysis.AAclust.medoid_ind_"]], "medoid_labels_ (aaanalysis.aaclust attribute)": [[1, "aaanalysis.AAclust.medoid_labels_"]], "medoids_ (aaanalysis.aaclust attribute)": [[1, "aaanalysis.AAclust.medoids_"]], "n_clusters (aaanalysis.aaclust attribute)": [[1, "aaanalysis.AAclust.n_clusters"]], "cpp (class in aaanalysis)": [[2, "aaanalysis.CPP"]], "__init__() (aaanalysis.cpp method)": [[2, "aaanalysis.CPP.__init__"]], "eval() (aaanalysis.cpp static method)": [[2, "aaanalysis.CPP.eval"]], "run() (aaanalysis.cpp method)": [[2, "aaanalysis.CPP.run"]], "cppplot (class in aaanalysis)": [[3, "aaanalysis.CPPPlot"]], "__init__() (aaanalysis.cppplot method)": [[3, "aaanalysis.CPPPlot.__init__"]], "heatmap() (aaanalysis.cppplot method)": [[3, "aaanalysis.CPPPlot.heatmap"]], "profile() (aaanalysis.cppplot method)": [[3, "aaanalysis.CPPPlot.profile"]], "update_seq_size() (aaanalysis.cppplot method)": [[3, "aaanalysis.CPPPlot.update_seq_size"]], "sequencefeature (class in aaanalysis)": [[4, "aaanalysis.SequenceFeature"]], "__init__() (aaanalysis.sequencefeature method)": [[4, "aaanalysis.SequenceFeature.__init__"]], "add_dif() (aaanalysis.sequencefeature static method)": [[4, "aaanalysis.SequenceFeature.add_dif"]], "add_feat_value() (aaanalysis.sequencefeature static method)": [[4, "aaanalysis.SequenceFeature.add_feat_value"]], "add_position() (aaanalysis.sequencefeature static method)": [[4, "aaanalysis.SequenceFeature.add_position"]], "feat_matrix() (aaanalysis.sequencefeature static method)": [[4, "aaanalysis.SequenceFeature.feat_matrix"]], "feat_names() (aaanalysis.sequencefeature static method)": [[4, "aaanalysis.SequenceFeature.feat_names"]], "get_df_parts() (aaanalysis.sequencefeature static method)": [[4, "aaanalysis.SequenceFeature.get_df_parts"]], "get_features() (aaanalysis.sequencefeature method)": [[4, "aaanalysis.SequenceFeature.get_features"]], "get_split_kws() (aaanalysis.sequencefeature static method)": [[4, "aaanalysis.SequenceFeature.get_split_kws"]], "__init__() (aaanalysis.dpulearn method)": [[5, "aaanalysis.dPULearn.__init__"]], "dpulearn (class in aaanalysis)": [[5, "aaanalysis.dPULearn"]], "eval() (aaanalysis.dpulearn method)": [[5, "aaanalysis.dPULearn.eval"]], "fit() (aaanalysis.dpulearn method)": [[5, "aaanalysis.dPULearn.fit"]], "labels_ (aaanalysis.dpulearn attribute)": [[5, "aaanalysis.dPULearn.labels_"]], "load_dataset() (in module aaanalysis)": [[6, "aaanalysis.load_dataset"]], "load_scales() (in module aaanalysis)": [[7, "aaanalysis.load_scales"]], "plot_gcfs() (in module aaanalysis)": [[8, "aaanalysis.plot_gcfs"]], "plot_get_cdict() (in module aaanalysis)": [[9, "aaanalysis.plot_get_cdict"]], "plot_get_cmap() (in module aaanalysis)": [[10, "aaanalysis.plot_get_cmap"]], "plot_set_legend() (in module aaanalysis)": [[11, "aaanalysis.plot_set_legend"]], "plot_settings() (in module aaanalysis)": [[12, "aaanalysis.plot_settings"]]}}) \ No newline at end of file +Search.setIndex({"docnames": ["api", "generated/aaanalysis.AAclust", "generated/aaanalysis.CPP", "generated/aaanalysis.CPPPlot", "generated/aaanalysis.SequenceFeature", "generated/aaanalysis.dPULearn", "generated/aaanalysis.load_dataset", "generated/aaanalysis.load_scales", "generated/aaanalysis.plot_gcfs", "generated/aaanalysis.plot_get_cdict", "generated/aaanalysis.plot_get_cmap", "generated/aaanalysis.plot_set_legend", "generated/aaanalysis.plot_settings", "generated/plotting_prelude", "generated/tutorial1_quick_start", "generated/tutorial2a_data_loader", "generated/tutorial2b_scales_loader", "index", "index/CONTRIBUTING_COPY", "index/badges", "index/citations", "index/introduction", "index/overview", "index/references", "index/tables", "index/usage_principles", "index/usage_principles/aaontology", "index/usage_principles/data_flow_entry_points", "index/usage_principles/feature_identification", "index/usage_principles/pu_learning", "index/usage_principles/xai", "tutorials"], "filenames": ["api.rst", "generated/aaanalysis.AAclust.rst", "generated/aaanalysis.CPP.rst", "generated/aaanalysis.CPPPlot.rst", "generated/aaanalysis.SequenceFeature.rst", "generated/aaanalysis.dPULearn.rst", "generated/aaanalysis.load_dataset.rst", "generated/aaanalysis.load_scales.rst", "generated/aaanalysis.plot_gcfs.rst", "generated/aaanalysis.plot_get_cdict.rst", "generated/aaanalysis.plot_get_cmap.rst", "generated/aaanalysis.plot_set_legend.rst", "generated/aaanalysis.plot_settings.rst", "generated/plotting_prelude.rst", "generated/tutorial1_quick_start.rst", "generated/tutorial2a_data_loader.rst", "generated/tutorial2b_scales_loader.rst", "index.rst", "index/CONTRIBUTING_COPY.rst", "index/badges.rst", "index/citations.rst", "index/introduction.rst", "index/overview.rst", "index/references.rst", "index/tables.rst", "index/usage_principles.rst", "index/usage_principles/aaontology.rst", "index/usage_principles/data_flow_entry_points.rst", "index/usage_principles/feature_identification.rst", "index/usage_principles/pu_learning.rst", "index/usage_principles/xai.rst", "tutorials.rst"], "titles": ["API", "aaanalysis.AAclust", "aaanalysis.CPP", "aaanalysis.CPPPlot", "aaanalysis.SequenceFeature", "aaanalysis.dPULearn", "aaanalysis.load_dataset", "aaanalysis.load_scales", "aaanalysis.plot_gcfs", "aaanalysis.plot_get_cdict", "aaanalysis.plot_get_cmap", "aaanalysis.plot_set_legend", "aaanalysis.plot_settings", "Plotting prelude", "Quick Start with AAanalysis", "Data Loading Tutorial", "Scale Loading Tutorial", "Welcome to the AAanalysis documentation!", "Contributing", "<no title>", "<no title>", "Introduction", "<no title>", "References", "Tables", "Usage Principles", "AAontology: Classification of amino acid scales", "Data Flow and Enry Points", "Identifying Physicochemical Signatures using CPP", "Learning from unbalanced and small data", "Explainable AI at Sequence Level", "Tutorials"], "terms": {"thi": [0, 1, 3, 7, 12, 13, 14, 15, 16, 18, 27], "applic": [0, 3], "program": [0, 18], "interfac": [0, 18, 24], "i": [0, 1, 2, 3, 4, 5, 6, 8, 10, 13, 14, 15, 16, 17, 18, 21, 22, 24, 26, 28], "public": [0, 13, 15, 17, 18, 20], "object": [0, 1, 3, 4, 5, 14], "function": [0, 3, 8, 10, 12, 13, 14, 15, 16, 17, 22], "our": [0, 13, 14, 16, 18, 21], "aaanalysi": [0, 15, 16, 18, 20, 21, 22, 24, 25, 28, 31], "python": [0, 14, 17, 18, 21, 22], "toolkit": [0, 18, 27], "which": [0, 1, 3, 4, 8, 14, 15, 16, 18, 21, 24, 27, 29], "can": [0, 1, 4, 5, 11, 14, 15, 16, 17, 18, 21, 24, 27, 29], "import": [0, 4, 5, 6, 7, 11, 12, 14, 15, 16, 18, 25], "aa": [0, 2, 4, 5, 6, 7, 11, 12, 14, 15, 16, 24, 25], "you": [0, 16, 17, 18, 20], "access": [0, 6, 14, 16, 24], "all": [0, 1, 2, 3, 4, 6, 7, 12, 14, 16, 18, 24], "method": [0, 1, 2, 3, 4, 5, 23], "via": [0, 18, 23], "alia": [0, 4], "load_dataset": [0, 4, 14, 15, 16, 24], "class": [1, 2, 3, 4, 5, 6, 15, 29], "model": [1, 5, 14, 18, 29], "none": [1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 15], "model_kwarg": 1, "verbos": [1, 2, 3, 4, 5, 12, 14], "fals": [1, 2, 3, 4, 5, 6, 7, 10, 11, 12, 14, 16], "sourc": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 16, 18], "base": [1, 2, 3, 4, 5, 6, 10, 14, 17, 18, 21, 22, 23, 24, 28, 29], "A": [1, 4, 6, 11, 14, 15, 16, 18, 21, 23], "k": [1, 16, 17, 21, 22, 23], "optim": [1, 2, 3, 17, 21, 22, 23], "cluster": [1, 14, 17, 21, 22, 23, 24], "framework": [1, 14, 17, 21, 22], "select": [1, 2, 3, 6, 7, 14, 15, 16, 17, 18, 21, 22, 23], "redund": [1, 2, 7, 14, 17, 21, 22, 23], "reduc": [1, 5, 7, 17, 21, 22, 23, 24], "set": [1, 2, 3, 4, 5, 7, 8, 11, 12, 14, 15, 17, 18, 21, 22, 23, 24, 27], "numer": [1, 3, 4, 14, 17, 21, 22], "scale": [1, 2, 3, 4, 7, 9, 10, 12, 17, 20, 21, 22, 23, 25, 27, 31], "design": [1, 3, 18, 24, 28], "primarili": [1, 5, 18], "amino": [1, 2, 3, 4, 6, 7, 14, 17, 20, 21, 22, 23, 25, 27, 29], "acid": [1, 2, 3, 4, 6, 7, 14, 17, 20, 21, 22, 23, 25, 27, 29], "versatil": 1, "enough": 1, "ani": [1, 16, 18, 21, 24], "indic": [1, 3, 4, 5, 15, 16, 24], "It": [1, 14, 15, 21, 24, 27], "take": [1, 14], "requir": 1, "pre": [1, 2, 14, 15, 18], "defin": [1, 4, 7, 14, 15, 18, 24, 27], "number": [1, 2, 3, 4, 5, 6, 10, 11, 15, 16, 24], "from": [1, 2, 3, 4, 5, 6, 7, 14, 15, 16, 17, 18, 24, 25], "scikit": [1, 18], "learn": [1, 5, 15, 17, 18, 20, 21, 22, 23, 24, 25], "http": [1, 18], "org": [1, 18], "stabl": 1, "modul": [1, 17], "html": [1, 18], "By": [1, 6], "leverag": 1, "pearson": [1, 2], "correl": [1, 2, 24], "similar": [1, 24, 29], "measur": [1, 14, 18, 24], "valu": [1, 2, 3, 4, 14, 16, 18, 21, 24], "one": [1, 3], "repres": [1, 3, 14, 15, 21, 24], "sampl": [1, 2, 3, 4, 5, 15, 24, 29], "term": [1, 16, 24], "medoid": 1, "each": [1, 2, 3, 4, 5, 14, 15, 16, 18], "closest": 1, "": [1, 11, 15, 16, 18, 23, 24], "center": [1, 10, 14, 24], "yield": 1, "paramet": [1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 12, 14, 15, 16, 24], "callabl": 1, "option": [1, 2, 3, 4, 5, 6, 7, 10, 12], "default": [1, 2, 3, 4, 5, 6, 9, 10, 11, 12, 14, 15, 16], "sklearn": [1, 14], "kmean": 1, "The": [1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 14, 15, 16, 18, 24, 27, 28], "emploi": [1, 5], "given": [1, 3, 4, 6, 14, 16, 24], "n_cluster": [1, 14], "dict": [1, 2, 3, 4, 5, 9, 10, 11], "dictionari": [1, 2, 3, 4, 9, 10, 11], "keyword": [1, 3, 5], "argument": [1, 3, 4, 5, 11], "pass": [1, 3, 5, 11, 18], "bool": [1, 2, 3, 4, 5, 6, 7, 10, 11, 12], "flag": 1, "enabl": [1, 2, 3, 4, 5, 12, 17, 18, 21, 22, 28], "disabl": [1, 6, 16], "output": [1, 4, 5, 12], "obtain": [1, 4, 7, 14, 24], "type": [1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 15, 18, 24], "int": [1, 2, 3, 4, 5, 6, 7, 10, 11], "labels_": [1, 5], "label": [1, 2, 3, 4, 5, 6, 11, 14, 15, 18, 24, 29], "order": [1, 18, 24], "featur": [1, 2, 3, 4, 5, 10, 17, 18, 21, 22, 27, 28, 29], "matrix": [1, 4, 5, 14, 24], "arrai": [1, 2, 4, 5, 14], "like": [1, 2, 4, 5, 18, 24], "centers_": 1, "averag": [1, 4, 14, 16, 24], "correspond": [1, 15, 18, 24], "center_labels_": 1, "medoids_": 1, "medoid_labels_": 1, "medoid_ind_": 1, "chosen": [1, 2, 4, 6, 7, 15], "within": [1, 2, 4, 18, 24, 27], "origin": [1, 16], "dataset": [1, 2, 6, 7, 14, 16, 17, 18, 21, 22, 29, 30], "__init__": [1, 2, 3, 4, 5], "fit": [1, 5, 14, 18], "x": [1, 3, 5, 6, 11, 12, 14], "name": [1, 2, 3, 4, 5, 6, 7, 9, 10, 12, 14, 15, 16, 24], "on_cent": 1, "true": [1, 2, 3, 4, 6, 7, 11, 12, 15, 16], "min_th": 1, "0": [1, 2, 3, 4, 5, 6, 11, 12, 14, 15, 16, 24, 29], "merge_metr": 1, "euclidean": [1, 5], "data": [1, 3, 5, 6, 7, 16, 17, 18, 24, 25, 31], "format": [1, 12, 24], "us": [1, 2, 3, 5, 6, 7, 10, 12, 13, 14, 15, 16, 17, 18, 20, 21, 24, 25, 27, 29], "determin": [1, 7], "without": [1, 3, 18, 24], "specif": [1, 9, 15, 18, 24], "partit": [1, 16, 24], "maxim": 1, "beyond": 1, "threshold": [1, 2], "qualiti": 1, "either": [1, 4, 6, 7, 16, 17], "minimum": [1, 4, 6], "member": 1, "min_cor": 1, "between": [1, 2, 3, 4, 10, 11, 14, 15, 24], "its": [1, 15, 18, 24], "govern": 1, "undergo": 1, "three": [1, 4, 10, 13, 15, 24], "stage": 1, "1": [1, 2, 3, 4, 5, 6, 7, 11, 12, 15, 16, 24, 29], "estim": 1, "lower": [1, 24], "bound": 1, "2": [1, 2, 3, 4, 5, 11, 15, 16, 24, 29], "refin": 1, "metric": [1, 5, 18], "3": [1, 4, 5, 11, 15, 16, 18, 24], "merg": 1, "smaller": 1, "direct": [1, 18], "final": 1, "reduct": 1, "shape": [1, 2, 3, 4, 5, 11, 14, 24], "n_sampl": [1, 2, 4, 5], "n_featur": [1, 2, 3, 4, 5], "where": [1, 4, 5, 24], "list": [1, 3, 4, 10, 11, 14, 24], "str": [1, 3, 4, 5, 6, 7, 9, 10, 11, 12], "If": [1, 2, 3, 4, 5, 6, 7, 10, 12, 16, 17, 18, 20, 29], "provid": [1, 2, 3, 5, 6, 7, 10, 14, 15, 16, 17, 18, 22, 24, 29], "return": [1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 15], "appli": [1, 5, 10, 11, 12, 15], "otherwis": [1, 3, 4, 5, 24], "float": [1, 2, 3, 5, 10, 11, 12], "instead": 1, "names_medoid": 1, "follow": [1, 2, 4, 5, 7, 13, 17, 18, 20, 21, 22, 25], "attribut": 1, "attr": 1, "For": [1, 4, 6, 11, 15, 18, 29], "further": [1, 3, 16, 18, 24], "inform": [1, 2, 3, 4, 5, 14, 16, 27], "refer": [1, 2, 4, 6, 14, 18, 24], "paper": 1, "todo": [1, 2], "add": [1, 2, 3, 4], "link": [1, 2, 17, 18, 20, 23], "cluster_nam": 1, "name_unclassifi": 1, "unclassifi": [1, 7, 14, 16, 24], "assign": [1, 3, 4, 5, 16, 24], "frequenc": [1, 24], "renam": 1, "prioriti": 1, "most": [1, 2, 3, 5, 14, 17, 21, 22], "frequent": 1, "alreadi": [1, 29], "doe": 1, "exist": [1, 18, 29], "cannot": 1, "classifi": [1, 3], "static": [1, 2, 4], "get_cluster_cent": 1, "comput": [1, 2, 3, 4, 14, 18, 23, 24], "center_label": 1, "associ": [1, 24], "get_cluster_medoid": 1, "medoid_label": 1, "medoid_ind": 1, "index": [1, 6, 16, 17, 18, 23], "x_test": 1, "x_ref": 1, "labels_test": 1, "labels_ref": 1, "n": [1, 2, 3, 4, 6, 7, 14, 15, 16, 18, 23, 24], "posit": [1, 2, 3, 4, 5, 6, 10, 14, 17, 21, 22, 24, 29], "except_unclassifi": 1, "test": [1, 2, 14, 16], "top": [1, 7, 24], "consid": [1, 7, 18], "strength": 1, "els": 1, "neg": [1, 4, 5, 6, 10, 15, 24, 29], "exclud": [1, 16], "list_top_center_name_corr": 1, "have": [1, 14, 15, 16, 18, 24, 29], "strongest": 1, "eval": [1, 2, 5, 18], "df_scale": [2, 4, 7, 14, 16, 27], "df_cat": [2, 3, 4, 7, 16, 27], "df_part": [2, 4, 14, 27], "split_kw": [2, 4, 14, 27], "accept_gap": [2, 3, 4], "tool": [2, 18, 23], "creat": [2, 3, 4, 5, 14, 18, 27], "filter": [2, 3, 6, 14, 15], "ar": [2, 3, 4, 5, 6, 7, 14, 15, 16, 18, 24, 27, 29, 30], "discrimin": [2, 3, 14], "two": [2, 3, 7, 14, 16, 17, 18, 21, 22, 23, 24, 26, 27], "sequenc": [2, 3, 4, 5, 6, 15, 17, 18, 21, 22, 23, 24, 25, 27, 28, 29], "panda": [2, 3, 4, 5, 6, 7, 14, 18], "datafram": [2, 3, 4, 5, 6, 7, 14, 18, 27], "load_categori": [2, 4], "categori": [2, 3, 4, 7, 9, 10, 11, 14, 15, 16], "physicochem": [2, 4, 17, 21, 22, 23, 24, 25, 27], "part": [2, 3, 4, 14, 18, 27], "sequencefeatur": [2, 14], "get_split_kw": [2, 4, 14], "nest": [2, 4], "split_typ": [2, 4, 14], "whether": [2, 3, 4, 10, 11], "accept": [2, 3, 4], "miss": [2, 3, 4], "omit": [2, 3, 4], "print": [2, 3, 4, 14], "progress": [2, 3, 23], "about": [2, 3], "algorithm": [2, 3, 14, 17, 18, 21, 22, 27, 28], "run": [2, 4, 14], "perform": [2, 5, 14, 16, 24], "step": [2, 3, 4, 6, 7, 18, 21], "parametr": 2, "n_filter": 2, "100": [2, 6, 10, 14, 15], "tmd_len": [2, 3, 4], "20": [2, 3, 4, 7, 15, 16, 18, 24], "jmd_n_len": [2, 3, 4], "10": [2, 3, 4, 10, 14, 15, 16, 24], "jmd_c_len": [2, 3, 4], "ext_len": [2, 3, 4], "4": [2, 3, 4, 15, 16, 24], "start": [2, 3, 4, 6, 18, 24, 25, 27], "check_cat": 2, "n_pre_filt": 2, "pct_pre_filt": 2, "5": [2, 3, 4, 5, 11, 14, 15, 16, 24], "max_std_test": 2, "max_overlap": 2, "max_cor": 2, "n_process": 2, "pipelin": [2, 18], "creation": 2, "aim": [2, 3, 14, 18], "identifi": [2, 3, 5, 6, 14, 15, 17, 21, 22, 23, 25, 29], "collect": [2, 7], "non": [2, 4, 6, 14, 24], "group": [2, 3, 4, 24], "t": [2, 6, 16, 24], "u": [2, 17, 18], "p": [2, 16, 23], "percentag": [2, 5, 10, 16], "length": [2, 3, 4, 6, 14, 15, 24], "tmd": [2, 3, 4, 6, 14, 15], "explan": [2, 3, 18], "first": [2, 3, 4, 7, 10, 18], "terminu": [2, 3, 4, 24], "jmd": [2, 3, 4, 14], "c": [2, 3, 4, 14, 15, 16, 17, 23, 24], "extend": [2, 3, 4, 18, 24, 29], "termin": [2, 3, 4, 14, 15, 24], "should": [2, 3, 4, 5, 18, 29], "longer": 2, "than": [2, 24], "check": [2, 18], "remain": [2, 16, 18], "after": [2, 24], "maximum": [2, 4, 5, 6, 14], "standard": [2, 29], "deviat": 2, "overlap": 2, "cpu": 2, "multiprocess": 2, "automat": [2, 3, 5, 18], "df_feat": [2, 3, 4, 14, 27], "uniqu": [2, 3, 16], "statist": [2, 3], "n_feature_inform": [2, 3], "contain": [2, 3, 5, 6, 7, 16, 18, 24, 27, 29], "eleven": 2, "column": [2, 3, 4, 5, 6, 7, 11, 15, 16, 18], "includ": [2, 4, 6, 7, 10, 11, 18], "id": [2, 4, 6, 7, 16], "result": 2, "rank": [2, 16], "11": [2, 3, 11, 14, 15, 24], "split": [2, 4, 14, 27], "subcategori": [2, 3, 7, 14, 16], "sub": 2, "scale_nam": [2, 3, 7, 14, 16], "abs_auc": [2, 3, 14], "absolut": [2, 18], "adjust": [2, 3, 12], "auc": 2, "abs_mean_dif": [2, 14], "mean": [2, 3, 14, 16, 24], "differ": [2, 3, 4, 11, 15, 16, 27], "std_test": [2, 3, 14], "std_ref": [2, 14], "p_val": 2, "mann_whitnei": 2, "ttest_indep": 2, "p_val_fdr_bh": [2, 14], "benjamini": 2, "hochberg": 2, "fdr": 2, "correct": 2, "get": [2, 4, 8, 25], "evalu": [2, 7, 16, 18, 24], "condit": [3, 4], "jmd_m_len": [3, 4], "profil": [3, 9, 10, 17, 21, 22, 28], "y": [3, 11, 12, 14, 16], "val_col": 3, "mean_dif": [3, 14], "val_typ": 3, "count": [3, 15], "normal": [3, 7, 11, 16, 24], "figsiz": 3, "7": [3, 4, 5, 12, 14, 15, 16, 24], "titl": [3, 11], "title_kw": 3, "dict_color": [3, 9, 10, 11], "edge_color": 3, "bar_width": 3, "75": 3, "add_jmd_tmd": 3, "jmd_n_seq": 3, "tmd_seq": 3, "jmd_c_seq": 3, "tmd_color": 3, "mediumspringgreen": 3, "jmd_color": 3, "blue": [3, 11, 14], "tmd_seq_color": 3, "black": [3, 18], "jmd_seq_color": 3, "white": 3, "seq_siz": 3, "tmd_jmd_fontsiz": 3, "xtick_siz": 3, "xtick_width": 3, "xtick_length": 3, "xticks_po": 3, "ytick_siz": 3, "ytick_width": 3, "ytick_length": 3, "ylim": [3, 14], "highlight_tmd_area": 3, "highlight_alpha": 3, "15": [3, 4, 14, 15, 24], "grid": [3, 12], "grid_axi": [3, 12], "both": [3, 12, 15], "add_legend_cat": 3, "legend_kw": 3, "shap_plot": 3, "kwarg": [3, 4, 11], "plot": [3, 9, 10, 11, 12, 15, 17, 18, 24, 31], "instanc": 3, "avail": [3, 7, 14, 16, 17, 20, 23], "specifi": [3, 4, 5, 9, 10, 12, 14, 18], "check_value_typ": 3, "tupl": [3, 10], "size": [3, 4, 8, 10, 11, 12, 14, 24], "custom": [3, 7, 11, 12], "appear": [3, 12, 24], "map": [3, 4, 10, 11], "color": [3, 9, 10, 11], "edg": [3, 11, 18, 24], "bar": [3, 9, 10], "width": [3, 11], "line": [3, 11, 13], "annot": 3, "font": [3, 8, 11, 12], "tick": [3, 12], "axi": [3, 12, 16], "limit": 3, "highlight": 3, "area": [3, 14, 16, 24], "alpha": [3, 14], "ad": 3, "drawn": 3, "legend": [3, 11], "shap": [3, 10, 14, 18], "shaplei": 3, "addit": [3, 4, 5, 7, 11, 12, 16, 18, 24], "gener": [3, 4, 6, 10, 12, 18, 21, 23, 24, 29], "other": [3, 7, 16, 18, 24], "intern": [3, 24], "librari": [3, 12, 18], "ax": [3, 11], "matplotlib": [3, 11, 12, 14, 15, 18], "heatmap": [3, 9, 10], "8": [3, 4, 5, 14, 15, 16, 18, 24], "vmin": 3, "vmax": 3, "grid_on": 3, "cmap": [3, 9, 10], "rdbu_r": 3, "cmap_n_color": 3, "cbar_kw": 3, "facecolor_dark": [3, 10], "add_importance_map": 3, "cbar_pct": 3, "featuremap": 3, "versu": 3, "wrapper": [3, 14, 17, 18, 21, 22], "seaborn": [3, 10, 12, 14, 15, 18], "level": [3, 6, 7, 15, 16, 17, 18, 22, 24, 25, 26], "e": [3, 4, 9, 10, 12, 14, 16, 17, 18, 21, 22, 24, 29], "g": [3, 4, 9, 10, 12, 16, 17, 18, 21, 22, 24, 29], "protein": [3, 4, 6, 16, 17, 18, 21, 22, 23, 27, 28, 29], "shown": 3, "feat_impact": 3, "displai": 3, "sum": [3, 16, 24], "std": 3, "aggreg": 3, "positions_onli": 3, "across": [3, 16, 18], "recommend": [3, 5, 7, 18], "when": [3, 5, 18, 24], "emphas": [3, 18], "fewer": 3, "value_typ": 3, "height": 3, "figur": 3, "inch": 3, "pyplot": [3, 11, 14, 15], "anchor": [3, 11, 24], "colormap": 3, "infer": [3, 18], "seismic": 3, "space": [3, 5, 10, 11], "impact": 3, "discret": 3, "diverg": 3, "sequenti": 3, "kei": [3, 18, 24], "colorbar": 3, "under": [3, 7, 18], "depicet": 3, "depict": 3, "jmd_n": [3, 4, 6, 14, 15], "jmd_c": [3, 4, 6, 14, 15], "point": [3, 11, 14, 24, 25], "set_xticklabel": 3, "widht": 3, "tick_param": 3, "classif": [3, 6, 7, 14, 15, 16, 17, 22, 24, 25, 29], "pcolormesh": 3, "effect": [3, 18, 24, 29], "onli": [3, 6, 7, 15, 18, 24, 29], "align": [3, 11, 14, 16], "see": [3, 18, 21, 24, 27], "document": [3, 24], "more": [3, 14, 18], "detail": [3, 6, 7, 11, 16, 17, 18, 20], "cpp": [3, 4, 10, 17, 20, 21, 22, 25, 27], "code": [3, 10, 13], "update_seq_s": 3, "retriev": [4, 9, 10, 14], "compon": [4, 5, 7, 16, 24], "continu": [4, 14], "subset": [4, 7, 24], "domain": [4, 6, 14, 15, 24], "transmembran": [4, 24], "membran": [4, 24], "principl": [4, 17], "distinct": [4, 17, 18, 21, 22, 24], "segment": [4, 14, 27], "pattern": [4, 14], "properti": [4, 24], "express": 4, "present": [4, 6], "realiz": 4, "over": [4, 14], "valid": [4, 18], "tmd_e": 4, "tmd_n": 4, "tmd_c": 4, "ext_c": 4, "ext_n": 4, "tmd_jmd": [4, 14], "jmd_n_tmd_n": 4, "tmd_c_jmd_c": [4, 14], "ext_n_tmd_n": 4, "tmd_c_ext_c": 4, "get_df_part": [4, 14], "df_seq": [4, 5, 6, 14, 15, 27], "list_part": [4, 14], "all_part": 4, "datafran": 4, "compris": [4, 16], "tmd_start": [4, 6, 14, 15], "tmd_stop": [4, 6, 14, 15], "string": [4, 10], "len": [4, 15], "must": 4, "lenght": 4, "resp": [4, 24], "extra": [4, 13, 24], "possibl": [4, 15, 24, 29], "found": [4, 18], "sf": [4, 14], "dom_gsec": [4, 14, 15, 24], "n_split_min": 4, "n_split_max": [4, 14], "steps_pattern": 4, "n_min": 4, "n_max": 4, "len_max": 4, "steps_periodicpattern": 4, "periodicpattern": 4, "greater": 4, "greatest": 4, "whole": [4, 6, 14, 16], "specfii": 4, "smallest": [4, 24], "integ": 4, "6": [4, 14, 15, 16, 24], "vari": [4, 15], "paramt": 4, "argumetn": 4, "get_featur": 4, "load_scal": [4, 14, 16, 17, 22, 24], "combin": [4, 14, 18, 24], "form": [4, 24], "feat_matrix": [4, 14], "n_job": [4, 14], "return_label": 4, "pd": [4, 5, 14, 18], "seri": 4, "job": 4, "parallel": [4, 24], "spars": 4, "feat_nam": 4, "convert": 4, "depend": [4, 24], "last": 4, "step1": 4, "step2": 4, "add_feat_valu": 4, "dict_scal": 4, "convent": [4, 7], "letter": 4, "feature_valu": 4, "n_part": 4, "ha": [4, 18, 24], "structur": [4, 14, 23, 24], "th": [4, 7, 16], "n_split": 4, "p1": 4, "p2": 4, "pn": 4, "end": [4, 24], "odd": [4, 15], "even": 4, "give": 4, "add_dif": 4, "sample_nam": 4, "ref_group": 4, "add_posit": 4, "part_split": 4, "feat_posit": 4, "total": [4, 5, 14, 16, 24], "n_compon": 5, "pca_kwarg": 5, "determinist": [5, 17, 21, 22], "unlabel": [5, 17, 21, 22, 24, 29], "offer": [5, 15, 18], "approach": [5, 14, 15, 29], "pu": [5, 17, 21, 22, 24], "princip": [5, 7, 16, 24], "analysi": [5, 7, 14, 16, 17, 18, 21, 22, 24], "pca": [5, 16], "dimension": [5, 23], "pc": [5, 7, 24], "iter": 5, "reliabl": [5, 15, 18], "These": [5, 7, 14, 16, 18, 29], "those": [5, 24], "distant": 5, "altern": [5, 29], "also": [5, 15, 16, 18, 24], "distanc": [5, 24], "manhattan": 5, "cosin": 5, "80": 5, "cover": 5, "varianc": 5, "identif": [5, 23], "datapoint": 5, "inspir": [5, 18], "techniqu": [5, 29], "an": [5, 6, 7, 14, 15, 16, 17, 18, 20, 23, 24], "theoret": [5, 24], "high": [5, 23, 24], "n_neg": 5, "label_po": 5, "name_neg": 5, "rel_neg": 5, "col_class": 5, "newli": 5, "updat": [5, 18], "new": [5, 18], "store": 5, "Will": 5, "dure": 5, "initi": [5, 24], "small": [5, 14, 15, 17, 18, 21, 22, 25, 30], "datafor": 5, "conta": 5, "po": 5, "unl": 5, "numpi": [5, 14, 18], "np": [5, 14], "atgc": 5, "gcta": 5, "actg": 5, "tacg": 5, "mode": 5, "modifi": [5, 12, 14], "dpul": 5, "info": 6, "random": [6, 15, 24], "non_canonical_aa": 6, "remov": [6, 12], "min_len": [6, 15], "max_len": [6, 15], "aa_window_s": [6, 15], "9": [6, 14, 15, 16, 18, 24], "load": [6, 7, 17, 18, 22, 31], "benchmark": [6, 14, 16, 17, 22], "categor": [6, 15], "dom": [6, 15, 24], "seq": [6, 15, 24], "overview": [6, 7, 13, 15, 18], "tabl": [6, 7, 15, 18], "depth": [6, 7, 16, 17, 22], "breimann23a": [6, 7, 23, 24], "per": [6, 15, 24], "randomli": [6, 15], "liter": 6, "keep": 6, "gap": [6, 10], "handl": [6, 11], "canon": [6, 16], "don": 6, "replac": 6, "symbol": 6, "window": [6, 14, 24], "aa_": 6, "df_info": [6, 15], "entri": [6, 14, 15, 16], "uniprot": 6, "binari": [6, 14, 15, 29], "stop": 6, "respect": [6, 9, 10, 14, 17, 18, 20, 24], "seq_amylo": [6, 15, 16, 24], "guid": [6, 7, 18], "tutori": [6, 7, 14, 17, 18, 21], "just_aaindex": [7, 16], "unclassified_in": [7, 16], "top60_n": [7, 16], "aaontologi": [7, 14, 17, 20, 22, 23, 25], "scales_raw": [7, 16, 24], "encompass": [7, 24], "aaindex": [7, 14, 16, 23], "kawashima08": [7, 23, 24], "along": [7, 14], "were": [7, 16, 24], "min": [7, 16, 24], "max": [7, 16, 24], "organ": [7, 18], "call": [7, 24], "scales_cat": [7, 16, 24], "breimann23b": [7, 17, 20, 23, 24], "compress": [7, 16, 24], "scales_pc": [7, 16, 24], "aaclust": [7, 16, 17, 20, 21, 22, 23, 24], "60": [7, 16, 24], "top60": [7, 16, 24], "individu": 7, "accompani": 7, "top60_ev": [7, 16, 24], "relev": 7, "inclus": [7, 18], "suffix": [7, 15, 18], "scale_id": [7, 16], "same": [7, 16], "deriv": 7, "descript": [7, 16, 18, 24], "scale_descript": [7, 14, 16], "current": 8, "ut": 8, "plot_set": [8, 14, 15], "dict_scale_cat": [9, 10], "cppplot": [9, 10, 18], "n_color": 10, "color_po": 10, "color_neg": 10, "color_cent": 10, "input": [10, 18, 27], "hex": 10, "pct_gap": 10, "pct_center": 10, "palett": [10, 14], "feat": 10, "ggplot": 10, "datagroup": 10, "dark": 10, "face": [10, 15], "rgb": 10, "hl": 10, "husl": 10, "xkcd": 10, "interpret": [10, 14, 17, 18, 20, 21, 22, 23, 24, 28], "latter": 10, "rang": 10, "sn": [10, 14, 15], "color_palett": 10, "light_palett": 10, "lighter": 10, "list_cat": 11, "ncol": 11, "fontsiz": 11, "weight": [11, 14, 23, 24], "lw": 11, "edgecolor": 11, "return_handl": 11, "loc": [11, 16], "upper": 11, "left": [11, 14, 24], "labelspac": 11, "columnspac": 11, "fontsize_legend": 11, "title_align_left": 11, "fontsize_weight": 11, "customiz": 11, "attach": 11, "item": 11, "coordin": 11, "text": [11, 12], "locat": [11, 24], "vertic": 11, "horizont": 11, "marker": 11, "directli": [11, 18], "finer": 11, "control": 11, "how": [11, 14], "line2d": 11, "cat1": 11, "red": [11, 14], "cat2": 11, "o": 11, "fig_format": 12, "pdf": 12, "font_scal": [12, 14, 15], "arial": 12, "change_s": 12, "weight_bold": 12, "adjust_el": 12, "short_tick": 12, "no_tick": 12, "no_ticks_i": 12, "short_ticks_i": 12, "no_ticks_x": 12, "short_ticks_x": 12, "configur": 12, "visual": [12, 13, 18], "variou": [12, 14, 18, 24, 27], "file": [12, 18], "save": 12, "make": [12, 13, 14, 15, 18], "visibl": 12, "choos": 12, "san": 12, "serif": 12, "verdana": 12, "helvetica": 12, "dejavu": 12, "element": 12, "bold": 12, "layout": 12, "short": [12, 13], "mark": 12, "global": 12, "util": [13, 15, 17, 18], "readi": [13, 15], "view": [13, 18, 29], "dive": 14, "power": 14, "capabl": [14, 24], "dedic": 14, "free": [14, 16, 24], "In": [14, 15, 29], "gamma": [14, 24], "secretas": [14, 23, 24], "substrat": [14, 23, 24], "exampl": [14, 15, 18, 21, 29], "we": [14, 15, 18], "ll": 14, "focu": [14, 18], "extract": 14, "thei": [14, 15, 18], "har": 14, "task": [14, 18, 29], "easili": [14, 15, 18], "essenti": [14, 15, 18], "randomforest": 14, "With": 14, "\u03b3": [14, 23], "hand": [14, 24], "effortlessli": 14, "furthermor": 14, "predominantli": 14, "hierarch": 14, "known": 14, "your": [14, 17, 18, 20], "fingertip": 14, "50": [14, 15], "head": [14, 15, 16], "q14802": 14, "mqkvtlgllvflagfpvldandledknspfyydwhslqvgglicag": 14, "37": 14, "59": 14, "nspfyydwh": 14, "lqvgglicagvlcamgiiivmsa": 14, "kckckfgqk": 14, "q86ue4": 14, "maarswqdelaqqaeegsarlremlsvglgflrtelgldlglepkr": 14, "72": 14, "lglepkrypg": 14, "wvilvgtgalgllllfllgygwa": 14, "aacagarkkr": 14, "q969w9": 14, "mhrlmgvnstaaaaagqpnvsctcnckrslfqsmeitelefvqiii": 14, "41": 14, "63": [14, 15, 24], "fqsmeitel": 14, "fvqiiiivvvmmvmvvvitcl": 14, "hyklsarsfi": 14, "p53801": 14, "mapgvargptpywrlrlggaalllllipvaaaqeppgaacsqntnk": 14, "97": 14, "119": [14, 16], "rwgvcwvnfe": 14, "aliitmsvvggtlllgiaicccc": 14, "ccrrkrsrkp": 14, "q8iuw5": 14, "mapralpgsavlaaavfvggavssplvapdngssrtlhsrtettp": 14, "81": 14, "ndtgnghpei": 14, "iayalvpvffimglfgvlichl": 14, "kkkgyrctt": 14, "centerpiec": 14, "support": [14, 18, 24], "sinc": 14, "problem": 14, "machin": [14, 17, 18, 20, 23, 29], "lightweight": 14, "agglom": 14, "close": [14, 18], "agglomerativeclust": 14, "aac": 14, "andn920101": [14, 16], "simz760101": 14, "nakh900106": 14, "aurr980112": 14, "494": [14, 16], "268": 14, "237": 14, "787": [14, 16], "864": [14, 16], "258": 14, "303": 14, "104": [14, 16], "d": [14, 16], "000": [14, 16], "206": [14, 16], "451": 14, "420": [14, 16], "210": 14, "090": 14, "823": [14, 16], "f": [14, 16], "877": [14, 16], "887": [14, 16], "724": 14, "402": [14, 16], "integr": [14, 18, 23], "target": 14, "middl": [14, 24], "adjac": [14, 24], "region": [14, 23, 24], "discontinu": 14, "d3zzk3": 14, "riigdganstvllvsvsgsvvlvviliaafvisrrrskysqak": 14, "o14786": 14, "pgnvlktldpilitiiamsalgvllgavcgvvlycacwhngm": 14, "o35516": 14, "selesprnaqllyllavavviilffillgvimakrkrkhgflw": 14, "o43914": 14, "dcscstvspgvlagivmgdlvltvlialavyflgrlvprgrga": 14, "o75581": 14, "ypteepapqatntvgsvigvivtifvsgtvyficqrmlcprmk": 14, "As": 14, "baselin": 14, "entir": 14, "p_val_mann_whitnei": 14, "activ": [14, 18, 24], "backbon": [14, 24], "dynam": [14, 24], "ch": [14, 16, 24], "\u03b1": [14, 24], "chemic": [14, 24], "shift": [14, 24], "andersen": 14, "et": [14, 16, 23, 24], "al": [14, 16, 23, 24], "1992": [14, 24], "130": 14, "022966": 14, "054433": 14, "053266": 14, "025737": 14, "099022": 14, "12": [14, 15, 24], "13": [14, 15, 24], "14": [14, 24], "16": [14, 24], "17": [14, 24], "18": 14, "vasm830101": 14, "conform": [14, 24], "helix": [14, 24], "rel": [14, 24], "popul": 14, "state": [14, 24], "120": [14, 16], "019298": 14, "046755": 14, "049127": 14, "039609": 14, "robb760113": 14, "\u03b2": [14, 16, 24], "turn": [14, 16, 24], "loop": 14, "robson": [14, 24], "suzuki": [14, 24], "108": 14, "021958": 14, "060658": 14, "053190": 14, "062212": 14, "100670": 14, "racs820103": 14, "fraction": 14, "occurr": [14, 24], "080": 14, "019579": 14, "072260": 14, "047452": 14, "166907": 14, "ensembl": 14, "randomforestclassifi": 14, "model_select": 14, "cross_val_scor": 14, "rf": 14, "cv_base": 14, "score": 14, "accuraci": [14, 16, 23], "round": 14, "58": [14, 24], "some": [14, 24], "time": 14, "improv": [14, 18, 23], "around": 14, "qian880106": 14, "387": [14, 15, 16], "121446": 14, "069196": 14, "085013": 14, "000000e": 14, "00": 14, "27": 14, "28": [14, 24], "29": 14, "30": 14, "31": [14, 24], "32": 14, "33": 14, "zimj680104": 14, "energi": [14, 16, 24], "isoelectr": [14, 24], "zimmerman": [14, 24], "1968": [14, 24], "373": 14, "220000": 14, "123716": 14, "137350": 14, "475000e": 14, "07": 14, "34": 14, "35": 14, "36": [14, 24], "358": 14, "144860": 14, "079321": 14, "117515": 14, "150000e": 14, "25": 14, "lins030101": 14, "asa": [14, 16, 24], "volum": [14, 16, 24], "surfac": [14, 16, 24], "residu": [14, 15, 16, 23, 24], "b": [14, 24], "354": [14, 16], "237161": 14, "145884": 14, "164285": 14, "100000e": 14, "09": 14, "341": 14, "263651": 14, "187136": 14, "171995": 14, "185395e": 14, "06": 14, "337": 14, "319440": 14, "175203": 14, "255754": 14, "eisd860102": 14, "atom": [14, 24], "hydrophob": [14, 24], "moment": 14, "eisenberg": [14, 24], "mclac": 14, "139567": 14, "098917": 14, "101842": 14, "300000e": 14, "38": [14, 24], "39": 14, "40": 14, "ricj880113": 14, "cap": [14, 24], "insid": [14, 16, 24], "prefer": [14, 16, 24], "c2": 14, "richardson": [14, 24], "ri": 14, "336": [14, 16], "223765": 14, "133513": 14, "178217": 14, "kars160107": 14, "side": [14, 15, 16, 24], "chain": [14, 16, 24], "eccentr": [14, 24], "diamet": 14, "karkbara": [14, 24], "kni": 14, "331": [14, 16], "217594": 14, "136011": 14, "172395": 14, "130000e": 14, "08": 14, "331786e": 14, "jurd980101": 14, "polar": [14, 24], "kyte": [14, 24], "doolittl": [14, 24], "329": 14, "264720": 14, "141666": 14, "233134": 14, "480000e": 14, "425259e": 14, "again": 14, "warn": [14, 15], "simplefilt": [14, 15], "action": [14, 15], "ignor": [14, 15, 18], "futurewarn": [14, 15], "plt": [14, 15], "cv": 14, "barplot": 14, "tab": 14, "ylabel": 14, "plot_gcf": 14, "despin": [14, 15], "show": [14, 15, 16], "iloc": 15, "predictor": [15, 24], "aa_caspase3": [15, 24], "233": [15, 16, 24], "185605": [15, 24], "705": [15, 16, 24], "184900": [15, 24], "prosper": [15, 23, 24], "aa_furin": [15, 24], "71": [15, 24], "59003": [15, 24], "163": [15, 16, 24], "58840": [15, 24], "aa_ldr": [15, 24], "342": [15, 24], "118248": [15, 24], "35469": [15, 24], "82779": [15, 24], "idp": [15, 23, 24], "seq2seq": [15, 23, 24], "aa_mmp2": [15, 24], "573": [15, 24], "312976": [15, 24], "2416": [15, 24], "310560": [15, 24], "aa_rnabind": [15, 24], "221": [15, 16, 24], "55001": [15, 24], "6492": [15, 24], "48509": [15, 24], "gmksvm": [15, 24], "ru": [15, 24], "aa_sa": [15, 24], "101082": [15, 24], "84523": [15, 24], "1414": [15, 24], "8484": [15, 24], "511": [15, 24], "903": [15, 16, 24], "rerf": [15, 23, 24], "pred": [15, 23, 24], "seq_capsid": [15, 16, 24], "7935": [15, 24], "3364680": [15, 24], "3864": [15, 24], "4071": [15, 24], "viralpro": [15, 23, 24], "seq_disulfid": [15, 16, 24], "2547": [15, 24], "614470": [15, 24], "897": [15, 24], "1650": [15, 24], "dipro": [15, 24], "seq_loc": [15, 16, 24], "1835": [15, 24], "732398": [15, 24], "1045": [15, 24], "790": [15, 16, 24], "nan": [15, 24], "seq_solubl": [15, 16, 24], "17408": [15, 24], "4432269": [15, 24], "8704": [15, 24], "solpro": [15, 23, 24], "seq_tail": [15, 16, 24], "6668": [15, 24], "2671690": [15, 24], "2574": [15, 24], "4094": [15, 24], "126": [15, 24], "92964": [15, 24], "prefix": 15, "exemplifi": 15, "here": [15, 18, 24], "df_seq1": 15, "df_seq2": 15, "df_seq3": 15, "compar": [15, 17, 21, 22, 24, 27, 28], "capsid_1": 15, "mvthnvkinkhvtrrsyssakevleippltevqtasykwfmdkgik": 15, "capsid_2": 15, "mkkrqkkmtlsnftdtsfqdfvsaeqvddksamalinraedfkagq": 15, "being": [15, 18, 24], "balanc": 15, "200": [15, 16], "value_count": 15, "dtype": 15, "int64": 15, "Or": 15, "distribut": 15, "list_seq_len": 15, "histplot": 15, "binwidth": 15, "xlim": 15, "1500": 15, "800": [15, 16], "seen": 15, "caspase3_1": 15, "mslfdlfrgffgfpgprshrdpffggmtrdedddeeeeeeggswgr": 15, "caspase3_2": 15, "mevtgdagvpesgeirtlkpcllrrnysreqhgvaascledlrska": 15, "caspase3_3": 15, "mrarsgargalllalllcwdptpslagidsggqalpdsfpsapaeq": 15, "caspase3_4": 15, "mdakarncllqhrealekdiktsyimdhmisdgfltiseeekvrn": 15, "conveni": 15, "flank": 15, "ensur": [15, 18], "equal": 15, "while": 15, "popular": [15, 29], "caspase3_1_pos4": 15, "mslfdlfrg": 15, "caspase3_1_pos5": 15, "slfdlfrgf": 15, "caspase3_1_pos6": 15, "lfdlfrgff": 15, "caspase3_1_pos7": 15, "fdlfrgffg": 15, "21": [15, 24], "caspase3_55_pos170": 15, "kkrkleeeedgklkkpknkdk": 15, "caspase3_29_pos185": 15, "cphhercsdsdglappqhlir": 15, "caspase3_64_pos431": 15, "dnplnwpdekdssfyrnfgst": 15, "caspase3_93_pos455": 15, "fvknmnrdstfivnktitaev": 15, "caspase3_38_pos129": 15, "ssfdldydfqrdyydrmysyp": 15, "caspase3_8_pos33": 15, "rppqlrpgaptslqtepqgnp": 15, "typic": [15, 21, 24], "But": 15, "mani": 15, "challeng": 15, "might": [15, 24], "unbalanc": [15, 17, 18, 21, 22, 25, 30], "lack": 15, "clear": [15, 18], "scenario": 15, "denot": [15, 24], "_pu": [15, 24], "dom_gsec_pu": [15, 24], "p05067": 15, "mlpglallllaawtaralevptdgnagllaepqiamfcgrlnmhmn": 15, "701": [15, 16], "723": [15, 16], "faedvgsnkg": 15, "aiiglmvggvviatvivitlvml": 15, "kkkqytsihh": 15, "p14925": 15, "magrarsgllllllgllalqssclafrsplsvfkrfkettrsfsn": 15, "868": [15, 16], "890": 15, "klstepgsgv": 15, "svvlittllvipvlvllaivmfi": 15, "rwkksrafgd": 15, "p70180": 15, "mrslllftfsacvllarvllaggassgagdtrpgsrrrarealaaq": 15, "477": 15, "499": 15, "pckssgglee": 15, "savtgivvgallgagllmafyff": 15, "rkkyriti": 15, "q03157": 15, "mgptspaargqgrrwrppplplllplsllllraqlavgnlavgsp": 15, "585": [15, 16], "607": [15, 16], "apsgtgvsr": 15, "alsgllimgagggslivlslll": 15, "rkkkpygti": 15, "q06481": 15, "maatgtaaaaatgrllllllvgltapalalagyiealaanagtgfa": 15, "694": [15, 16, 24], "716": [15, 16], "lredfslsss": 15, "aligllviavaiatvivislvml": 15, "rkrqygtish": 15, "121": 15, "p36941": 15, "mllpwatsapglawgplvlglfgllaasqpqavppyasenqtcrdq": 15, "226": [15, 16], "248": [15, 16], "plppemsgtm": 15, "lmlavllplafflllatvfsciw": 15, "kshpslcrkl": 15, "122": 15, "p25446": 15, "mlwiwavlplvlagsqlrvhtqgtnsiseslklrrrvretdkncs": 15, "170": [15, 16], "187": 15, "ncrkqsprnr": 15, "lwlltilvlliplvfiyr": 15, "kyrkrkcwkr": 15, "123": 15, "q9p2j2": 15, "mvwclglavlslvisqgadgrgkpevvsvvgragesvvlgcdllpp": 15, "738": [15, 16], "760": [15, 16], "pgllpqpvla": 15, "gvvggvcflgvavlvsilagcl": 15, "nrrraarrrr": 15, "124": 15, "q96j42": 15, "mvpaagrrpprvmrllgwwqvllwvlglpvrgvevaeesgrlwse": 15, "324": [15, 16], "lpstliksvd": 15, "wllvfslfflisfimyati": 15, "rtesirwlip": 15, "125": 15, "p0dpa2": 15, "mrvggafhlllvclspallsavringdgqevlylaegdnvrlgcpi": 15, "265": 15, "287": 15, "kvsdsrrigv": 15, "iigivlgsllalgclavgiwglv": 15, "ccccggsgag": 15, "row": [15, 16], "df_seq_pu": 15, "689": [15, 16], "p60852": 15, "maggsattwgypvallllvatlglgrwlqpdpglpglrhsydcgik": 15, "602": [15, 16], "624": [15, 16], "dsngnsslrp": 15, "llwavlllpavalvlgfgvfvgl": 15, "sqtwaqklw": 15, "690": [15, 16], "p20239": 15, "marwqrkasvsspcgrsiyrflsllftlvtsvnsvslpqsenpafp": 15, "684": [15, 16], "703": [15, 16], "iiakdiaskt": 15, "lgavaalvgsavilgficyl": 15, "ykkrtirfnh": 15, "691": [15, 16], "p21754": 15, "melsyrlficlllwgstelcypqplwllqggashpetsvqpvlvec": 15, "409": 15, "eqwalpsdt": 15, "vvllgvglavvvsltltavilvl": 15, "trrcrtashp": 15, "692": [15, 16], "q12836": 15, "mwllrcvllcvslslavsgqhkpeapdyssvlhcgpwsfqfavnln": 15, "506": [15, 16], "528": 15, "eklrvpvdsk": 15, "vlwvaglsgtlilgallvsylav": 15, "kkqkscpdqm": 15, "693": [15, 16], "q8tcw7": 15, "meqiwllllltirvlpgsaqfngyncdanlhsrfpaerdisvycgv": 15, "374": 15, "396": [15, 16], "pfqlnaitsa": 15, "lisgmvilgvtsfslllcslal": 15, "hrkgptslvl": 15, "six": 16, "version": [16, 24], "raw": [16, 24], "df_raw": 16, "df_pc": 16, "argp820101": 16, "argp820102": 16, "argp820103": 16, "begf750101": 16, "begf750102": 16, "begf750103": 16, "bhar880101": 16, "bigc670101": 16, "biov880101": 16, "koeh090103": 16, "koeh090104": 16, "koeh090105": 16, "koeh090106": 16, "koeh090107": 16, "koeh090108": 16, "koeh090109": 16, "koeh090110": 16, "koeh090111": 16, "koeh090112": 16, "230": 16, "355": 16, "504": 16, "512": 16, "249": 16, "164": 16, "476": 16, "194": 16, "300": 16, "551": 16, "222": 16, "308": 16, "273": 16, "140": 16, "522": 16, "345": 16, "404": 16, "579": 16, "783": 16, "205": 16, "323": 16, "936": 16, "279": 16, "174": 16, "449": 16, "346": 16, "285": 16, "416": 16, "867": 16, "191": 16, "583": 16, "889": 16, "720": 16, "556": 16, "875": 16, "919": 16, "796": 16, "440": 16, "177": 16, "019": 16, "032": 16, "713": 16, "267": 16, "811": 16, "488": 16, "106": 16, "542": 16, "732": 16, "593": 16, "718": 16, "857": 16, "853": 16, "913": 16, "681": 16, "762": 16, "601": 16, "670": 16, "574": 16, "076": 16, "049": 16, "189": 16, "148": 16, "182": 16, "029": 16, "186": 16, "017": 16, "025": 16, "026": 16, "138": 16, "309": 16, "388": 16, "544": 16, "608": 16, "538": 16, "571": 16, "481": 16, "112": 16, "h": 16, "840": 16, "082": 16, "053": 16, "651": 16, "633": 16, "561": 16, "455": 16, "856": 16, "370": 16, "500": 16, "545": 16, "618": 16, "726": 16, "838": 16, "543": 16, "671": 16, "663": 16, "885": 16, "246": 16, "074": 16, "167": 16, "091": 16, "051": 16, "398": 16, "276": 16, "434": 16, "003": 16, "004": 16, "687": 16, "737": 16, "933": 16, "873": 16, "779": 16, "734": 16, "405": 16, "l": 16, "272": 16, "577": 16, "989": 16, "281": 16, "078": 16, "118": 16, "333": 16, "259": 16, "m": 16, "704": 16, "445": 16, "824": 16, "450": 16, "620": 16, "803": 16, "289": 16, "132": 16, "185": 16, "192": 16, "180": [16, 24], "419": 16, "224": [16, 24], "988": 16, "023": 16, "057": 16, "046": 16, "675": 16, "203": 16, "552": 16, "645": 16, "519": 16, "756": 16, "753": 16, "706": 16, "599": 16, "587": 16, "293": 16, "605": 16, "736": 16, "223": 16, "220": 16, "859": 16, "376": 16, "367": 16, "322": 16, "678": 16, "707": 16, "444": 16, "662": 16, "570": 16, "594": 16, "q": 16, "211": 16, "131": 16, "395": 16, "795": 16, "539": 16, "676": 16, "733": 16, "628": 16, "483": 16, "r": [16, 24], "531": 16, "047": 16, "110": 16, "489": 16, "940": 16, "735": 16, "215": 16, "852": 16, "883": 16, "743": 16, "362": 16, "679": 16, "238": 16, "851": 16, "188": 16, "399": 16, "589": 16, "655": 16, "590": 16, "382": 16, "384": 16, "379": 16, "598": 16, "352": 16, "312": 16, "366": 16, "578": 16, "407": 16, "364": 16, "250": 16, "514": 16, "v": [16, 24], "498": 16, "809": 16, "365": 16, "492": 16, "077": 16, "033": 16, "111": [16, 24], "156": 16, "154": 16, "496": 16, "w": 16, "926": 16, "040": 16, "146": 16, "600": 16, "400": 16, "316": 16, "244": 16, "802": 16, "709": 16, "107": 16, "502": 16, "806": 16, "588": 16, "286": 16, "644": 16, "474": 16, "410": 16, "429": 16, "413": 16, "235": 16, "586": [16, 24], "lins030110": 16, "fold": [16, 24], "coil": [16, 24], "median": 16, "resi": 16, "lins030113": 16, "janj780101": 16, "janin": [16, 24], "janj780103": 16, "expos": [16, 24], "lins030104": 16, "lins030107": 16, "win3": 16, "choc760102": 16, "prot": 16, "lins030116": 16, "strand": [16, 24], "lins030119": 16, "lins030103": 16, "hydrophil": [16, 24], "resid": 16, "stem": 16, "best": 16, "top60_id": 16, "acc": 16, "presenc": [16, 24], "absenc": [16, 24], "df_top60": 16, "aac01": 16, "aac02": 16, "aac03": 16, "aac04": 16, "aac05": 16, "aac06": 16, "aac07": 16, "aac08": 16, "aac09": 16, "aac10": 16, "df_eval": 16, "overal": 16, "aa5_caspase3": 16, "aa5_furin": 16, "aa5_ldr": 16, "aa5_mmp2": 16, "aa9_ldr": 16, "aa9_mmp2": 16, "aa9_rnabind": 16, "aa9_sa": 16, "aa13_caspase3": 16, "aa13_furin": 16, "aa13_ldr": 16, "aa13_mmp2": 16, "aa13_rnabind": 16, "aa13_sa": 16, "761": 16, "827": 16, "746": 16, "646": 16, "884": 16, "862": 16, "901": 16, "612": 16, "680": 16, "659": 16, "664": 16, "918": 16, "652": 16, "615": 16, "747": 16, "830": 16, "742": 16, "653": 16, "886": 16, "855": 16, "907": 16, "688": 16, "642": 16, "657": 16, "792": 16, "916": 16, "656": 16, "741": 16, "829": 16, "648": 16, "904": 16, "685": 16, "636": 16, "710": 16, "791": 16, "914": 16, "695": 16, "613": 16, "828": 16, "731": 16, "654": 16, "906": 16, "686": 16, "640": 16, "714": 16, "915": 16, "610": 16, "739": 16, "752": 16, "888": 16, "658": 16, "682": 16, "649": 16, "665": 16, "789": 16, "611": 16, "833": 16, "650": 16, "882": 16, "858": 16, "606": 16, "638": 16, "711": 16, "661": 16, "831": 16, "603": 16, "669": 16, "826": 16, "647": 16, "905": 16, "614": 16, "750": 16, "748": 16, "860": 16, "908": 16, "632": 16, "aac11": 16, "749": 16, "832": 16, "751": 16, "781": 16, "683": 16, "aac12": 16, "708": 16, "666": 16, "785": 16, "917": 16, "aac13": 16, "744": 16, "634": 16, "aac14": 16, "902": 16, "673": 16, "794": 16, "604": 16, "aac15": 16, "617": 16, "660": 16, "aac16": 16, "755": 16, "635": 16, "702": 16, "aac17": 16, "740": 16, "835": 16, "793": 16, "609": 16, "aac18": 16, "757": 16, "730": 16, "643": 16, "881": 16, "899": 16, "912": 16, "aac19": 16, "764": 16, "745": 16, "909": 16, "aac20": 16, "677": 16, "aac21": 16, "637": 16, "aac22": 16, "880": 16, "700": 16, "788": 16, "aac23": 16, "629": 16, "aac24": 16, "641": 16, "aac25": 16, "639": 16, "879": 16, "aac26": 16, "698": 16, "aac27": 16, "854": 16, "aac28": 16, "821": 16, "898": 16, "aac29": 16, "763": 16, "900": 16, "aac30": 16, "911": 16, "616": 16, "aac31": 16, "727": 16, "631": 16, "784": 16, "aac32": 16, "aac33": 16, "817": 16, "922": 16, "aac34": 16, "729": 16, "aac35": 16, "758": 16, "822": 16, "aac36": 16, "759": 16, "874": 16, "aac37": 16, "596": 16, "aac38": 16, "766": 16, "921": 16, "aac39": 16, "786": 16, "aac40": 16, "819": 16, "870": 16, "775": 16, "910": 16, "aac41": 16, "896": 16, "aac42": 16, "861": 16, "895": 16, "799": 16, "674": 16, "aac43": 16, "767": 16, "815": 16, "871": 16, "848": 16, "782": 16, "625": 16, "aac44": 16, "825": 16, "621": 16, "696": 16, "780": 16, "923": 16, "aac45": 16, "844": 16, "893": 16, "672": 16, "774": 16, "aac46": 16, "812": 16, "626": 16, "872": 16, "843": 16, "667": 16, "623": 16, "aac47": 16, "717": 16, "aac48": 16, "771": 16, "891": 16, "776": 16, "619": 16, "aac49": 16, "807": 16, "630": 16, "850": 16, "892": 16, "aac50": 16, "728": 16, "773": 16, "aac51": 16, "768": 16, "865": 16, "836": 16, "894": 16, "668": 16, "697": 16, "aac52": 16, "814": 16, "aac53": 16, "765": 16, "798": 16, "aac54": 16, "699": 16, "770": 16, "aac55": 16, "769": 16, "580": 16, "595": 16, "aac56": 16, "aac57": 16, "aac58": 16, "715": 16, "568": 16, "aac59": 16, "725": 16, "797": 16, "592": 16, "562": 16, "aac60": 16, "563": 16, "772": 16, "529": 16, "813": 16, "546": 16, "24": [16, 24], "df_cat_1": 16, "df_raw_1": 16, "df_scales_1": 16, "selected_scal": 16, "tolist": 16, "df_aac1": 16, "buna790103": 16, "bura740102": 16, "cham820102": 16, "cham830102": 16, "cham830103": 16, "cham830105": 16, "chop780101": 16, "chop780204": 16, "chop780206": 16, "kars160110": 16, "kars160112": 16, "kars160118": 16, "kars160119": 16, "kars160120": 16, "kars160122": 16, "lins030105": 16, "lins030109": 16, "264": 16, "262": 16, "425": 16, "298": 16, "863": 16, "952": 16, "149": 16, "947": 16, "442": 16, "256": 16, "557": 16, "213": 16, "397": 16, "473": 16, "566": 16, "247": 16, "311": 16, "152": 16, "462": 16, "085": 16, "208": 16, "139": 16, "169": 16, "133": 16, "240": 16, "470": 16, "160": 16, "393": 16, "313": 16, "145": 16, "134": 16, "424": 16, "115": 16, "044": 16, "195": 16, "495": 16, "554": 16, "433": 16, "458": 16, "114": 16, "463": 16, "070": 16, "421": 16, "218": 16, "553": 16, "067": 16, "021": 16, "526": 16, "135": 16, "480": 16, "043": 16, "087": 16, "532": 16, "335": 16, "963": 16, "317": 16, "319": 16, "381": 16, "198": 16, "468": 16, "390": 16, "339": 16, "282": 16, "515": 16, "486": 16, "275": 16, "257": [16, 18], "350": 16, "150": 16, "534": 16, "178": 16, "565": 16, "550": 16, "320": 16, "327": 16, "326": 16, "369": 16, "028": 16, "093": 16, "537": 16, "540": 16, "231": 16, "002": 16, "372": 16, "457": 16, "209": 16, "081": 16, "467": 16, "183": 16, "well": [16, 18], "subordin": 16, "want": 16, "guyh850104": 16, "appar": 16, "calcul": 16, "ja": 16, "guyh850105": 16, "racs770103": 16, "orient": 16, "rackovski": [16, 24], "vheg790101": 16, "tfe": 16, "lipophil": 16, "phase": 16, "transfer": [16, 24], "von": 16, "buri": [16, 24], "buriabl": 16, "biov880102": 16, "werd780101": 16, "propens": [16, 24], "wertz": 16, "scheraga": [16, 24], "predict": [17, 18, 21, 22, 23, 24, 28, 29], "engin": [17, 18, 21, 22, 28], "dpulearn": [17, 20, 21, 22], "train": [17, 18, 21, 22, 29], "moreov": [17, 22], "load_data": [17, 22], "pypi": 17, "conda": [17, 18], "forg": 17, "pip": [17, 18], "introduct": 17, "usag": [17, 18, 21], "contribut": [17, 24], "api": [17, 18], "explain": [17, 18, 23, 25], "ai": [17, 18, 23, 25], "perturb": [17, 29], "search": 17, "page": 17, "work": [17, 20], "pleas": [17, 18, 20], "cite": [17, 20], "_": [17, 20], "breimann": [17, 20, 23], "kamp": [17, 20], "steiner": [17, 20], "frishman": [17, 20], "2023": [17, 20], "ontologi": [17, 20, 23], "biorxiv": [17, 20, 23], "welcom": 18, "thank": 18, "open": 18, "project": [18, 24], "focus": 18, "involv": 18, "invalu": 18, "made": 18, "wai": 18, "suggest": 18, "github": 18, "issu": 18, "tracker": 18, "submit": 18, "particip": [18, 24], "discuss": 18, "newcom": 18, "tackl": 18, "good": 18, "email": 18, "stephanbreimann": 18, "gmail": 18, "com": 18, "question": 18, "establish": 18, "comprehens": 18, "robust": 18, "common": 18, "life": [18, 29, 30], "scienc": [18, 29, 30], "seamlessli": 18, "flexibl": [18, 24], "interoper": 18, "packag": 18, "biopython": 18, "reimplement": 18, "solut": 18, "biolog": [18, 21, 24, 29], "context": 18, "relianc": 18, "opaqu": 18, "box": 18, "empir": 18, "insight": 18, "cut": 18, "fair": 18, "account": [18, 24], "transpar": 18, "re": [18, 23], "commit": 18, "divers": 18, "aspect": 18, "causal": 18, "minim": 18, "reproduc": 18, "mre": 18, "least": 18, "amount": 18, "demonstr": 18, "self": 18, "necessari": 18, "confirm": 18, "replic": 18, "guidelin": 18, "To": [18, 25], "git": 18, "breimanntool": 18, "master": 18, "repositori": 18, "your_usernam": 18, "navig": 18, "folder": 18, "up": 18, "cd": 18, "isol": 18, "aanalysi": 18, "poetri": 18, "pytest": 18, "hypothesi": 18, "execut": 18, "case": 18, "directori": 18, "substanti": 18, "minor": 18, "typo": 18, "concis": 18, "branch": [18, 24], "fix": 18, "readm": 18, "date": 18, "readthedoc": 18, "crucial": 18, "modif": 18, "render": 18, "correctli": 18, "strive": 18, "consist": [18, 21, 24], "codebas": 18, "standalon": 18, "special": 18, "carri": 18, "out": [18, 24], "complet": 18, "process": 18, "fulfil": 18, "purpos": 18, "implement": 18, "inherit": 18, "supplementari": 18, "accordingli": 18, "semi": 18, "strictli": 18, "adher": 18, "aforement": 18, "primari": [18, 27], "_util": 18, "_utils_const": 18, "py": 18, "modular": 18, "therefor": 18, "flat": 18, "hierarchi": 18, "outlin": 18, "softwar": 18, "user": 18, "friendli": 18, "hint": 18, "enhanc": [18, 24], "propos": 18, "pep": 18, "484": 18, "book": 18, "error": 18, "messag": 18, "docstr": 18, "markup": 18, "languag": 18, "restructuredtext": 18, "rst": 18, "primer": 18, "cheat": 18, "sheet": [18, 24], "restructuretext": 18, "cheatsheet": 18, "sphinx": 18, "autodoc": 18, "napoleon": 18, "extens": 18, "conf": 18, "four": 18, "bird": 18, "ey": 18, "background": 18, "reflect": [18, 24], "medium": [18, 24], "tabular": 18, "critic": 18, "except": 18, "rule": 18, "showcas": 18, "scientif": 18, "mai": 18, "mention": 18, "section": 18, "extern": 18, "note": 18, "go": 18, "_build": 18, "browser": 18, "citat": 20, "wa": 21, "develop": 21, "practic": 21, "2023a": 23, "2023b": 23, "breimann23c": [23, 24], "2023c": 23, "chart": 23, "cheng06": [23, 24], "cheng": 23, "2006": 23, "larg": 23, "disulphid": 23, "bridg": [23, 24], "kernel": 23, "recurs": 23, "neural": 23, "network": 23, "graph": [23, 24], "match": 23, "struct": 23, "funct": 23, "kawashima": 23, "2008": 23, "aid": 23, "databas": 23, "report": 23, "nucleic": 23, "magnan09": [23, 24], "magnan": 23, "randal": 23, "baldi": 23, "2009": [23, 24], "accur": 23, "solubl": [23, 24], "bioinformat": 23, "galiez16": [23, 24], "galiez": 23, "2016": [23, 24], "viral": 23, "capsid": [23, 24], "tail": [23, 24], "song18": [23, 24], "song": 23, "2018": 23, "throughput": 23, "cleavag": [23, 24], "site": [23, 24], "90": 23, "proteas": 23, "shen19": [23, 24], "shen": 23, "2019": 23, "subcellular": [23, 24], "local": [23, 24], "evolutionari": 23, "chou": [23, 24], "pseaac": 23, "j": 23, "theor": 23, "biol": 23, "tang20": [23, 24], "tang": 23, "2020": 23, "intrins": [23, 24], "disord": [23, 24], "teng21": [23, 24], "teng": 23, "2021": 23, "amyloidogen": [23, 24], "pseudo": 23, "composit": [23, 24], "tripeptid": 23, "bmc": 23, "yang21": [23, 24], "yang": 23, "granular": 23, "multipl": 23, "rna": [23, 24], "bind": [23, 24], "appl": 23, "chronolog": 24, "histori": 24, "t1_overview_benchmark": 24, "t2_overview_scal": 24, "t3a_aaontology_categori": 24, "t3b_aaontology_subcategori": 24, "begin": 24, "append": 24, "caspas": 24, "furin": 24, "long": 24, "ldr": 24, "metallopeptidas": 24, "mmp2": 24, "rbp60": 24, "solvent": 24, "sa": 24, "amyloidognen": 24, "capdsid": 24, "disulfid": 24, "ss": 24, "bond": 24, "cytoplasm": 24, "plasma": 24, "insolubl": 24, "494524": 24, "unknown": 24, "statu": 24, "tier": 24, "system": 24, "systemat": 24, "arrang": 24, "67": 24, "everi": 24, "main": 24, "clearli": 24, "assess": 24, "couldn": 24, "alloc": 24, "regard": 24, "chothia": 24, "1976": 24, "lin": 24, "2003": 24, "64": 24, "cellular": 24, "mitochondria": 24, "nakashima": 24, "1990": 24, "nishikawa": 24, "ranodm": 24, "tanaka": 24, "1977": 24, "fasman": 24, "1978b": 24, "1988": 24, "qian": 24, "sejnowski": 24, "aurora": 24, "rose": 24, "1998": 24, "19": 24, "charg": 24, "entropi": 24, "charton": 24, "1983": 24, "gui": 24, "1985": 24, "radzicka": 24, "wolfenden": 24, "could": 24, "mutabl": 24, "sneath": 24, "1966": 24, "amphiphil": 24, "1982": 24, "mitaku": 24, "2002": 24, "koehler": 24, "steric": 24, "characterist": 24, "angl": 24, "symmetri": 24, "represent": 24, "prabhakaran": 24, "ponnuswami": 24, "knislei": 24, "45": 24, "stabil": 24, "vihinen": 24, "1994": 24, "bastolla": 24, "2005": 24, "23": 24, "water": 24, "tendenc": 24, "oppos": 24, "1978": 24, "partial": 24, "physic": 24, "displac": 24, "caus": 24, "interact": 24, "mainli": 24, "ones": 24, "bull": 24, "brees": 24, "1974": 24, "bigelow": 24, "1967": 24, "jone": 24, "dayhoff": 24, "interior": 24, "unpolar": 24, "fukuchi": 24, "2001": 24, "mp": 24, "cedano": 24, "1997": 24, "mitochondri": 24, "less": 24, "val": 24, "cf": 24, "asp": 24, "glu": 24, "ly": 24, "arg": 24, "observ": 24, "character": 24, "punta": 24, "maritan": 24, "linker": 24, "georg": 24, "heringa": 24, "2004": 24, "right": 24, "helic": 24, "half": 24, "finkelstein": 24, "1991": 24, "outsid": 24, "befor": 24, "geisow": 24, "robert": 24, "1980": 24, "ramachandran": 24, "quadrant": 24, "bottom": 24, "paul": 24, "1951": 24, "antiparallel": 24, "lifson": 24, "sander": 24, "1979": 24, "bend": 24, "revers": 24, "tight": 24, "consecut": 24, "back": 24, "hydrogen": 24, "3rd": 24, "4th": 24, "1st": 24, "2nd": 24, "tm": 24, "place": 24, "monn\u00e9": 24, "1999": 24, "\u03c0": 24, "ala": 24, "gln": 24, "fodj": 24, "karadaghi": 24, "net": 24, "donor": 24, "klein": 24, "1984": 24, "acceptor": 24, "faucher": 24, "hi": 24, "electron": 24, "ion": 24, "pot": 24, "potenti": 24, "valenc": 24, "cosic": 24, "low": 24, "due": 24, "strong": 24, "hutchen": 24, "1970": 24, "unfold": 24, "gibb": 24, "denatur": 24, "yutani": 24, "1987": 24, "instabl": 24, "highest": 24, "break": 24, "pro": 24, "munoz": 24, "serrano": 24, "ph": 24, "electr": 24, "neutral": 24, "crystal": 24, "pairwis": 24, "constitu": 24, "lennard": 24, "oobatak": 24, "ooi": 24, "chang": 24, "divid": 24, "vector": 24, "describ": 24, "aliphat": 24, "linear": 24, "aromat": 24, "carbon": 24, "approxim": 24, "invers": 24, "reactiv": 24, "hydroxythiol": 24, "wold": 24, "occur": 24, "esp": 24, "amphipath": 24, "higher": 24, "highli": 24, "signal": 24, "argo": 24, "cornett": 24, "environ": 24, "mclachlan": 24, "1986": 24, "surround": 24, "angstrom": 24, "radiu": 24, "pack": 24, "globular": 24, "1981": 24, "eigenvalu": 24, "laplacian": 24, "undirect": 24, "node": 24, "mass": 24, "molecular": 24, "second": 24, "actual": 24, "root": 24, "squar": 24, "gyrat": 24, "farther": 24, "awai": 24, "relationship": 24, "rate": 24, "increas": 24, "factor": 24, "bundi": 24, "wuthrich": 24, "nh": 24, "temperatur": 24, "rigid": 24, "neighbor": 24, "gly": 24, "ser": 24, "particularli": 24, "ptitsyn": 24, "zhou": 24, "equilibrium": 24, "sueki": 24, "flow": 25, "enri": 25, "signatur": 25, "introduc": 26, "togeth": 27, "diagram": 27, "central": 28, "platform": 28, "novel": 28, "everywher": [29, 30], "setup": 29, "augment": 29, "smote": 29, "artifici": 29, "Such": 29, "veri": 29, "deep": 29, "imag": 29, "recognit": 29, "feasibl": 29, "becaus": 29, "slight": 29, "mutat": 29, "alter": 29, "dramat": 29, "often": 29, "great": 29, "quantiti": 29, "besid": 29, "distinguish": 29, "subfield": 29, "prelud": 31}, "objects": {"aaanalysis": [[1, 0, 1, "", "AAclust"], [2, 0, 1, "", "CPP"], [3, 0, 1, "", "CPPPlot"], [4, 0, 1, "", "SequenceFeature"], [5, 0, 1, "", "dPULearn"], [6, 3, 1, "", "load_dataset"], [7, 3, 1, "", "load_scales"], [8, 3, 1, "", "plot_gcfs"], [9, 3, 1, "", "plot_get_cdict"], [10, 3, 1, "", "plot_get_cmap"], [11, 3, 1, "", "plot_set_legend"], [12, 3, 1, "", "plot_settings"]], "aaanalysis.AAclust": [[1, 1, 1, "", "__init__"], [1, 2, 1, "", "center_labels_"], [1, 2, 1, "", "centers_"], [1, 1, 1, "", "cluster_naming"], [1, 1, 1, "", "correlation"], [1, 1, 1, "", "eval"], [1, 1, 1, "", "fit"], [1, 1, 1, "", "get_cluster_centers"], [1, 1, 1, "", "get_cluster_medoids"], [1, 2, 1, "", "labels_"], [1, 2, 1, "", "medoid_ind_"], [1, 2, 1, "", "medoid_labels_"], [1, 2, 1, "", "medoids_"], [1, 2, 1, "", "n_clusters"]], "aaanalysis.CPP": [[2, 1, 1, "", "__init__"], [2, 1, 1, "", "eval"], [2, 1, 1, "", "run"]], "aaanalysis.CPPPlot": [[3, 1, 1, "", "__init__"], [3, 1, 1, "", "heatmap"], [3, 1, 1, "", "profile"], [3, 1, 1, "", "update_seq_size"]], "aaanalysis.SequenceFeature": [[4, 1, 1, "", "__init__"], [4, 1, 1, "", "add_dif"], [4, 1, 1, "", "add_feat_value"], [4, 1, 1, "", "add_position"], [4, 1, 1, "", "feat_matrix"], [4, 1, 1, "", "feat_names"], [4, 1, 1, "", "get_df_parts"], [4, 1, 1, "", "get_features"], [4, 1, 1, "", "get_split_kws"]], "aaanalysis.dPULearn": [[5, 1, 1, "", "__init__"], [5, 1, 1, "", "eval"], [5, 1, 1, "", "fit"], [5, 2, 1, "", "labels_"]]}, "objtypes": {"0": "py:class", "1": "py:method", "2": "py:attribute", "3": "py:function"}, "objnames": {"0": ["py", "class", "Python class"], "1": ["py", "method", "Python method"], "2": ["py", "attribute", "Python attribute"], "3": ["py", "function", "Python function"]}, "titleterms": {"api": 0, "data": [0, 15, 27, 29], "featur": [0, 14], "engin": [0, 14], "pu": [0, 15, 29], "learn": [0, 14, 29], "explain": [0, 14, 30], "ai": [0, 14, 30], "perturb": 0, "plot": [0, 13], "util": 0, "aaanalysi": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 17, 27], "aaclust": [1, 14], "note": [1, 2, 4, 5, 6, 7, 12], "cpp": [2, 14, 28], "cppplot": 3, "exampl": [3, 4, 5, 6, 7, 11, 12, 17], "sequencefeatur": 4, "dpulearn": 5, "load_dataset": 6, "load_scal": 7, "plot_gcf": 8, "plot_get_cdict": 9, "plot_get_cmap": 10, "plot_set_legend": 11, "plot_set": 12, "prelud": 13, "quick": [14, 31], "start": [14, 31], "what": [14, 29, 30], "you": 14, "Will": 14, "1": 14, "load": [14, 15, 16], "sequenc": [14, 30], "scale": [14, 16, 24, 26], "2": 14, "compar": 14, "physicochem": [14, 28], "profil": 14, "3": 14, "protein": [14, 15, 24], "predict": 14, "4": 14, "group": 14, "level": [14, 30], "individu": 14, "tutori": [15, 16, 31], "benchmark": [15, 23, 24], "amino": [15, 16, 24, 26], "acid": [15, 16, 24, 26], "window": 15, "size": 15, "posit": 15, "unlabel": 15, "dataset": [15, 23, 24], "three": 16, "set": 16, "numer": 16, "aaontologi": [16, 24, 26], "redund": 16, "reduc": 16, "subset": 16, "filter": 16, "welcom": 17, "document": [17, 18, 21], "instal": [17, 18], "overview": [17, 21, 24], "refer": [17, 23], "indic": 17, "tabl": [17, 24], "citat": 17, "contribut": 18, "introduct": [18, 21], "vision": 18, "object": 18, "non": 18, "goal": 18, "principl": [18, 25], "bug": 18, "report": 18, "latest": 18, "version": 18, "local": 18, "develop": 18, "environ": 18, "fork": 18, "clone": 18, "depend": 18, "run": 18, "unit": 18, "test": 18, "pull": 18, "request": 18, "preview": 18, "chang": 18, "name": 18, "convent": 18, "class": 18, "templat": 18, "function": 18, "method": 18, "code": 18, "philosophi": 18, "style": 18, "layer": 18, "build": 18, "doc": 18, "workflow": 21, "algorithm": 23, "us": [23, 28], "case": 23, "further": [23, 31], "inform": 23, "categori": 24, "subcategori": 24, "usag": 25, "classif": 26, "flow": 27, "enri": 27, "point": 27, "compon": 27, "entri": 27, "bridg": 27, "extern": 27, "librari": 27, "identifi": 28, "signatur": 28, "from": 29, "unbalanc": 29, "small": 29, "i": [29, 30]}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 8, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.intersphinx": 1, "nbsphinx": 4, "sphinx": 57}, "alltitles": {"API": [[0, "api"]], "Data": [[0, "data"]], "Feature Engineering": [[0, "feature-engineering"]], "PU Learning": [[0, "pu-learning"]], "Explainable AI": [[0, "explainable-ai"]], "Perturbation": [[0, "perturbation"]], "Plot Utilities": [[0, "plot-utilities"]], "aaanalysis.AAclust": [[1, "aaanalysis-aaclust"]], "Notes": [[1, null], [2, null], [2, null], [4, null], [4, null], [4, null], [4, null], [4, null], [5, null], [5, null], [6, null], [7, null], [12, null]], "aaanalysis.CPP": [[2, "aaanalysis-cpp"]], "aaanalysis.CPPPlot": [[3, "aaanalysis-cppplot"]], "Examples": [[3, null], [4, null], [4, null], [5, null], [6, null], [7, null], [11, null], [12, null]], "aaanalysis.SequenceFeature": [[4, "aaanalysis-sequencefeature"]], "aaanalysis.dPULearn": [[5, "aaanalysis-dpulearn"]], "aaanalysis.load_dataset": [[6, "aaanalysis-load-dataset"]], "aaanalysis.load_scales": [[7, "aaanalysis-load-scales"]], "aaanalysis.plot_gcfs": [[8, "aaanalysis-plot-gcfs"]], "aaanalysis.plot_get_cdict": [[9, "aaanalysis-plot-get-cdict"]], "aaanalysis.plot_get_cmap": [[10, "aaanalysis-plot-get-cmap"]], "aaanalysis.plot_set_legend": [[11, "aaanalysis-plot-set-legend"]], "aaanalysis.plot_settings": [[12, "aaanalysis-plot-settings"]], "Plotting prelude": [[13, "plotting-prelude"]], "Quick Start with AAanalysis": [[14, "quick-start-with-aaanalysis"]], "What You Will Learn:": [[14, "what-you-will-learn"]], "1. Loading Sequences and Scales": [[14, "loading-sequences-and-scales"]], "2. Feature Engineering": [[14, "feature-engineering"]], "AAclust": [[14, "aaclust"]], "Comparative Physicochemical Profiling (CPP)": [[14, "comparative-physicochemical-profiling-cpp"]], "3. Protein Prediction": [[14, "protein-prediction"]], "4. Explainable AI": [[14, "explainable-ai"]], "Explainable AI on group level": [[14, "explainable-ai-on-group-level"]], "Explainable AI on individual level": [[14, "explainable-ai-on-individual-level"]], "Data Loading Tutorial": [[15, "data-loading-tutorial"]], "Loading of protein benchmarks": [[15, "loading-of-protein-benchmarks"]], "Loading of protein benchmarks: Amino acid window size": [[15, "loading-of-protein-benchmarks-amino-acid-window-size"]], "Loading of protein benchmarks: Positive-Unlabeled (PU) datasets": [[15, "loading-of-protein-benchmarks-positive-unlabeled-pu-datasets"]], "Scale Loading Tutorial": [[16, "scale-loading-tutorial"]], "Three sets of numerical amino acid scales": [[16, "three-sets-of-numerical-amino-acid-scales"]], "AAontology": [[16, "aaontology"], [24, "aaontology"]], "Redundancy-reduce scale subsets": [[16, "redundancy-reduce-scale-subsets"]], "Filtering of scales": [[16, "filtering-of-scales"]], "Welcome to the AAanalysis documentation!": [[17, "welcome-to-the-aaanalysis-documentation"]], "Install": [[17, "install"]], "OVERVIEW": [[17, null]], "EXAMPLES": [[17, null]], "REFERENCES": [[17, null]], "Indices and tables": [[17, "indices-and-tables"]], "Citation": [[17, "citation"]], "Contributing": [[18, "contributing"]], "Introduction": [[18, "introduction"], [21, "introduction"]], "Vision": [[18, "vision"]], "Objectives": [[18, "objectives"]], "Non-goals": [[18, "non-goals"]], "Principles": [[18, "principles"]], "Bug Reports": [[18, "bug-reports"]], "Installation": [[18, "installation"]], "Latest Version": [[18, "latest-version"]], "Local Development Environment": [[18, "local-development-environment"]], "Fork and Clone": [[18, "fork-and-clone"]], "Install Dependencies": [[18, "install-dependencies"]], "Run Unit Tests": [[18, "run-unit-tests"]], "Pull Requests": [[18, "pull-requests"]], "Preview Changes": [[18, "preview-changes"]], "Documentation": [[18, "documentation"]], "Naming Conventions": [[18, "naming-conventions"]], "Class Templates": [[18, "class-templates"]], "Function and Method Naming": [[18, "function-and-method-naming"]], "Code Philosophy": [[18, "code-philosophy"]], "Documentation Style": [[18, "documentation-style"]], "Documentation Layers": [[18, "documentation-layers"]], "Building the Docs": [[18, "building-the-docs"]], "Workflow": [[21, "workflow"]], "Overview of documentation": [[21, "overview-of-documentation"]], "References": [[23, "references"]], "Algorithms": [[23, "algorithms"]], "Datasets and Benchmarks": [[23, "datasets-and-benchmarks"]], "Use Cases": [[23, "use-cases"]], "Further Information": [[23, "further-information"]], "Tables": [[24, "tables"]], "Overview Table": [[24, "overview-table"]], "Protein Benchmark Datasets": [[24, "protein-benchmark-datasets"]], "Amino Acid Scale Datasets": [[24, "amino-acid-scale-datasets"]], "Categories": [[24, "categories"]], "Subcategories": [[24, "subcategories"]], "Usage Principles": [[25, "usage-principles"]], "AAontology: Classification of amino acid scales": [[26, "aaontology-classification-of-amino-acid-scales"]], "Data Flow and Enry Points": [[27, "data-flow-and-enry-points"]], "Data Flow: Components of AAanalysis": [[27, "data-flow-components-of-aaanalysis"]], "Entry Points: Bridges to External Libraries": [[27, "entry-points-bridges-to-external-libraries"]], "Identifying Physicochemical Signatures using CPP": [[28, "identifying-physicochemical-signatures-using-cpp"]], "Learning from unbalanced and small data": [[29, "learning-from-unbalanced-and-small-data"]], "What is PU learning?": [[29, "what-is-pu-learning"]], "Explainable AI at Sequence Level": [[30, "explainable-ai-at-sequence-level"]], "What is explainable AI?": [[30, "what-is-explainable-ai"]], "Tutorials": [[31, "tutorials"]], "Quick start": [[31, "quick-start"]], "Further Tutorials": [[31, "further-tutorials"]]}, "indexentries": {"aaclust (class in aaanalysis)": [[1, "aaanalysis.AAclust"]], "__init__() (aaanalysis.aaclust method)": [[1, "aaanalysis.AAclust.__init__"]], "center_labels_ (aaanalysis.aaclust attribute)": [[1, "aaanalysis.AAclust.center_labels_"]], "centers_ (aaanalysis.aaclust attribute)": [[1, "aaanalysis.AAclust.centers_"]], "cluster_naming() (aaanalysis.aaclust method)": [[1, "aaanalysis.AAclust.cluster_naming"]], "correlation() (aaanalysis.aaclust static method)": [[1, "aaanalysis.AAclust.correlation"]], "eval() (aaanalysis.aaclust method)": [[1, "aaanalysis.AAclust.eval"]], "fit() (aaanalysis.aaclust method)": [[1, "aaanalysis.AAclust.fit"]], "get_cluster_centers() (aaanalysis.aaclust static method)": [[1, "aaanalysis.AAclust.get_cluster_centers"]], "get_cluster_medoids() (aaanalysis.aaclust static method)": [[1, "aaanalysis.AAclust.get_cluster_medoids"]], "labels_ (aaanalysis.aaclust attribute)": [[1, "aaanalysis.AAclust.labels_"]], "medoid_ind_ (aaanalysis.aaclust attribute)": [[1, "aaanalysis.AAclust.medoid_ind_"]], "medoid_labels_ (aaanalysis.aaclust attribute)": [[1, "aaanalysis.AAclust.medoid_labels_"]], "medoids_ (aaanalysis.aaclust attribute)": [[1, "aaanalysis.AAclust.medoids_"]], "n_clusters (aaanalysis.aaclust attribute)": [[1, "aaanalysis.AAclust.n_clusters"]], "cpp (class in aaanalysis)": [[2, "aaanalysis.CPP"]], "__init__() (aaanalysis.cpp method)": [[2, "aaanalysis.CPP.__init__"]], "eval() (aaanalysis.cpp static method)": [[2, "aaanalysis.CPP.eval"]], "run() (aaanalysis.cpp method)": [[2, "aaanalysis.CPP.run"]], "cppplot (class in aaanalysis)": [[3, "aaanalysis.CPPPlot"]], "__init__() (aaanalysis.cppplot method)": [[3, "aaanalysis.CPPPlot.__init__"]], "heatmap() (aaanalysis.cppplot method)": [[3, "aaanalysis.CPPPlot.heatmap"]], "profile() (aaanalysis.cppplot method)": [[3, "aaanalysis.CPPPlot.profile"]], "update_seq_size() (aaanalysis.cppplot method)": [[3, "aaanalysis.CPPPlot.update_seq_size"]], "sequencefeature (class in aaanalysis)": [[4, "aaanalysis.SequenceFeature"]], "__init__() (aaanalysis.sequencefeature method)": [[4, "aaanalysis.SequenceFeature.__init__"]], "add_dif() (aaanalysis.sequencefeature static method)": [[4, "aaanalysis.SequenceFeature.add_dif"]], "add_feat_value() (aaanalysis.sequencefeature static method)": [[4, "aaanalysis.SequenceFeature.add_feat_value"]], "add_position() (aaanalysis.sequencefeature static method)": [[4, "aaanalysis.SequenceFeature.add_position"]], "feat_matrix() (aaanalysis.sequencefeature static method)": [[4, "aaanalysis.SequenceFeature.feat_matrix"]], "feat_names() (aaanalysis.sequencefeature static method)": [[4, "aaanalysis.SequenceFeature.feat_names"]], "get_df_parts() (aaanalysis.sequencefeature static method)": [[4, "aaanalysis.SequenceFeature.get_df_parts"]], "get_features() (aaanalysis.sequencefeature method)": [[4, "aaanalysis.SequenceFeature.get_features"]], "get_split_kws() (aaanalysis.sequencefeature static method)": [[4, "aaanalysis.SequenceFeature.get_split_kws"]], "__init__() (aaanalysis.dpulearn method)": [[5, "aaanalysis.dPULearn.__init__"]], "dpulearn (class in aaanalysis)": [[5, "aaanalysis.dPULearn"]], "eval() (aaanalysis.dpulearn method)": [[5, "aaanalysis.dPULearn.eval"]], "fit() (aaanalysis.dpulearn method)": [[5, "aaanalysis.dPULearn.fit"]], "labels_ (aaanalysis.dpulearn attribute)": [[5, "aaanalysis.dPULearn.labels_"]], "load_dataset() (in module aaanalysis)": [[6, "aaanalysis.load_dataset"]], "load_scales() (in module aaanalysis)": [[7, "aaanalysis.load_scales"]], "plot_gcfs() (in module aaanalysis)": [[8, "aaanalysis.plot_gcfs"]], "plot_get_cdict() (in module aaanalysis)": [[9, "aaanalysis.plot_get_cdict"]], "plot_get_cmap() (in module aaanalysis)": [[10, "aaanalysis.plot_get_cmap"]], "plot_set_legend() (in module aaanalysis)": [[11, "aaanalysis.plot_set_legend"]], "plot_settings() (in module aaanalysis)": [[12, "aaanalysis.plot_settings"]]}}) \ No newline at end of file diff --git a/docs/source/generated/output_13_1.png b/docs/source/generated/output_13_1.png index d7e83427..4a4b3449 100644 Binary files a/docs/source/generated/output_13_1.png and b/docs/source/generated/output_13_1.png differ diff --git a/docs/source/generated/tutorial1_quick_start.rst b/docs/source/generated/tutorial1_quick_start.rst index 23e6ef58..419f2e19 100644 --- a/docs/source/generated/tutorial1_quick_start.rst +++ b/docs/source/generated/tutorial1_quick_start.rst @@ -34,9 +34,8 @@ available at your fingertips with the ``aa.load_scales()`` function. .. code:: ipython3 import aaanalysis as aa - # Load scales and scale categories (AAontology) + df_scales = aa.load_scales() - # Load training data df_seq = aa.load_dataset(name="DOM_GSEC", n=50) df_seq.head(5) @@ -156,11 +155,12 @@ set of 100 scales, as defined by the ``n_clusters`` parameters: from sklearn.cluster import AgglomerativeClustering import numpy as np - aac = aa.AAclust(model=AgglomerativeClustering, model_kwargs=dict(linkage="ward")) + + aac = aa.AAclust(model=AgglomerativeClustering) X = np.array(df_scales) - scales = aac.fit(X, n_clusters=100, names=list(df_scales)) + scales = aac.fit(X, names=list(df_scales), n_clusters=100) df_scales = df_scales[scales] - df_scales + df_scales[scales[0:4]].head(5) @@ -189,23 +189,6 @@ set of 100 scales, as defined by the ``n_clusters`` parameters: SIMZ760101 NAKH900106 AURR980112 - CORJ870107 - ROBB760113 - MIYS990104 - BIGC670101 - ROSG850102 - ZIMJ680105 - ... - YUTK870102 - SUEM840102 - VASM830102 - VELV850101 - VENT840101 - MONM990101 - GEOR030102 - GEOR030106 - KARS160120 - LINS030117 AA @@ -213,23 +196,6 @@ set of 100 scales, as defined by the ``n_clusters`` parameters: - - - - - - - - - - - - - - - - - @@ -239,23 +205,6 @@ set of 100 scales, as defined by the ``n_clusters`` parameters: 0.268 0.237 0.787 - 0.446 - 0.101 - 0.479 - 0.164 - 0.564 - 0.444 - ... - 0.557 - 0.103 - 0.617 - 0.295 - 0 - 0.077 - 0.250 - 0.516 - 0.952 - 0.186 C @@ -263,23 +212,6 @@ set of 100 scales, as defined by the ``n_clusters`` parameters: 0.258 0.303 0.104 - 0.725 - 0.849 - 0.000 - 0.323 - 1.000 - 0.000 - ... - 0.680 - 0.337 - 0.734 - 0.657 - 0 - 0.154 - 0.246 - 0.000 - 0.952 - 0.000 D @@ -287,23 +219,6 @@ set of 100 scales, as defined by the ``n_clusters`` parameters: 0.206 0.000 0.451 - 0.000 - 0.790 - 0.803 - 0.324 - 0.256 - 0.000 - ... - 0.574 - 0.909 - 0.225 - 1.000 - 0 - 0.923 - 0.091 - 0.404 - 0.952 - 0.186 E @@ -311,23 +226,6 @@ set of 100 scales, as defined by the ``n_clusters`` parameters: 0.210 0.090 0.823 - 0.233 - 0.092 - 0.859 - 0.488 - 0.256 - 0.025 - ... - 0.402 - 0.077 - 0.531 - 0.046 - 0 - 0.923 - 0.404 - 0.610 - 0.952 - 0.349 F @@ -335,387 +233,9 @@ set of 100 scales, as defined by the ``n_clusters`` parameters: 0.887 0.724 0.402 - 0.950 - 0.328 - 0.000 - 0.783 - 0.923 - 1.000 - ... - 0.680 - 0.233 - 0.023 - 0.749 - 1 - 0.000 - 0.536 - 0.712 - 0.952 - 0.326 - - - G - 0.025 - 0.032 - 0.259 - 0.055 - 0.352 - 1.000 - 0.662 - 0.000 - 0.513 - 0.175 - ... - 0.525 - 0.000 - 0.455 - 0.040 - 0 - 0.692 - 0.000 - 0.210 - 0.952 - 0.023 - - - H - 0.840 - 0.387 - 0.401 - 0.463 - 0.610 - 0.454 - 0.479 - 0.561 - 0.667 - 0.338 - ... - 0.754 - 0.000 - 0.345 - 0.191 - 0 - 0.923 - 0.201 - 0.612 - 0.562 - 0.419 - - - I - 0.000 - 0.990 - 0.697 - 0.512 - 0.969 - 0.151 - 0.056 - 0.663 - 0.923 - 0.894 - ... - 0.820 - 0.714 - 0.070 - 0.000 - 1 - 0.154 - 0.161 - 0.457 - 0.583 - 0.140 - - - K - 0.506 - 0.516 - 0.127 - 0.591 - 0.027 - 0.613 - 1.000 - 0.694 - 0.000 - 0.044 - ... - 0.615 - 0.012 - 0.688 - 0.294 - 0 - 0.923 - 0.195 - 0.536 - 0.912 - 1.000 - - - L - 0.272 - 0.835 - 0.905 - 0.732 - 1.000 - 0.076 - 0.014 - 0.663 - 0.846 - 0.925 - ... - 1.000 - 0.428 - 0.771 - 0.000 - 1 - 0.000 - 0.513 - 0.690 - 0.952 - 0.186 - - - M - 0.704 - 0.452 - 1.000 - 1.000 - 0.883 - 0.084 - 0.113 - 0.620 - 0.846 - 0.756 - ... - 0.689 - 0.701 - 0.512 - 0.651 - 0 - 0.077 - 0.151 - 0.670 - 0.952 - 0.372 - - - N - 0.988 - 0.029 - 0.381 - 0.287 - 0.171 - 0.924 - 0.718 - 0.398 - 0.282 - 0.162 - ... - 0.508 - 0.000 - 0.313 - 0.028 - 0 - 1.000 - 0.277 - 0.342 - 0.952 - 0.093 - - - P - 0.605 - 0.871 - 0.403 - 0.000 - 0.130 - 0.824 - 0.803 - 0.376 - 0.308 - 0.750 - ... - 0.566 - 0.545 - 0.937 - 0.157 - 0 - 1.000 - 1.000 - 1.000 - 0.952 - 0.698 - - - Q - 0.519 - 0.000 - 0.203 - 0.805 - 0.238 - 0.546 - 0.732 - 0.539 - 0.256 - 0.388 - ... - 0.697 - 0.428 - 0.446 - 0.602 - 0 - 0.923 - 0.478 - 0.530 - 0.952 - 0.256 - - - R - 0.531 - 0.268 - 0.061 - 0.738 - 0.482 - 0.748 - 0.634 - 0.735 - 0.308 - 0.112 - ... - 0.000 - 0.000 - 0.550 - 0.760 - 0 - 1.000 - 0.549 - 0.728 - 0.952 - 0.372 - - - S - 0.679 - 0.045 - 0.450 - 0.293 - 0.293 - 0.798 - 0.704 - 0.188 - 0.359 - 0.256 - ... - 0.656 - 0.000 - 0.868 - 0.657 - 0 - 0.231 - 0.168 - 0.399 - 0.952 - 0.186 - - - T - 0.494 - 0.174 - 0.619 - 0.360 - 0.279 - 0.529 - 0.577 - 0.352 - 0.462 - 0.419 - ... - 0.574 - 0.000 - 1.000 - 0.745 - 0 - 0.000 - 0.344 - 0.513 - 0.000 - 0.419 - - - V - 0.000 - 0.577 - 0.183 - 0.451 - 0.907 - 0.000 - 0.127 - 0.492 - 0.872 - 0.719 - ... - 0.770 - 0.000 - 0.408 - 0.045 - 1 - 0.077 - 0.151 - 0.467 - 0.952 - 0.163 - - - W - 0.926 - 1.000 - 0.707 - 0.805 - 0.500 - 0.773 - 0.070 - 1.000 - 0.846 - 0.894 - ... - 0.467 - 1.000 - 0.138 - 0.434 - 1 - 0.231 - 0.066 - 0.440 - 1.000 - 0.349 - - - Y - 0.802 - 0.990 - 0.425 - 0.524 - 0.771 - 0.798 - 0.127 - 0.806 - 0.615 - 0.762 - ... - 0.557 - 0.857 - 0.000 - 0.408 - 1 - 0.154 - 0.110 - 0.666 - 0.736 - 0.349 -

20 rows × 100 columns

@@ -731,15 +251,15 @@ sequences: the test set and the reference set. Supported by the C-terminal adjacent regions (JMD-N and JMD-C, respectively), obtained ``sf.get_df_parts``. - ``Splits``: These ``Parts`` can be split into various continuous segments or discontinuous patterns, specified -``sf.get_split_kws()``. - ``Scales``: Sets of amino acid scales. We -first use SequenceFeature to obtain Parts and Splits: +``sf.get_split_kws()``. - ``Scales``: Sets of amino acid scales. + +We use SequenceFeature to obtain Parts and Splits: .. code:: ipython3 - # Feature Engineering y = list(df_seq["label"]) sf = aa.SequenceFeature() - df_parts = sf.get_df_parts(df_seq=df_seq, jmd_n_len=10, jmd_c_len=10, list_parts=["tmd_jmd"]) + df_parts = sf.get_df_parts(df_seq=df_seq, list_parts=["tmd_jmd"]) split_kws = sf.get_split_kws(n_split_max=1, split_types=["Segment"]) df_parts.head(5) @@ -803,9 +323,9 @@ As a baseline approach, we use CPP to compute the average values for the .. code:: ipython3 - # Small set of features (100 features created) - cpp = aa.CPP(df_parts=df_parts, df_scales=df_scales, split_kws=split_kws, verbose=False) - df_feat = cpp.run(labels=y, tmd_len=20, jmd_n_len=10, jmd_c_len=10, n_filter=100) # Default values for lengths are used + # Small set of CPP features (100 features are created) + cpp = aa.CPP(df_scales=df_scales, df_parts=df_parts, split_kws=split_kws, verbose=False) + df_feat = cpp.run(labels=y) df_feat @@ -927,16 +447,16 @@ A feature matrix from a given set of CPP features can be created using from sklearn.ensemble import RandomForestClassifier from sklearn.model_selection import cross_val_score - X = sf.feat_matrix(df_parts=df_parts, df_scales=df_scales, features=df_feat["feature"]) - # ML evaluation + + X = sf.feat_matrix(df_parts=df_parts, features=df_feat["feature"]) rf = RandomForestClassifier() - cv_base = cross_val_score(rf, X, y, scoring="accuracy", cv=5, n_jobs=8) # Set n_jobs=1 to disable multi-processing + cv_base = cross_val_score(rf, X, y, scoring="accuracy") print(f"Mean accuracy of {round(np.mean(cv_base), 2)}") .. parsed-literal:: - Mean accuracy of 0.57 + Mean accuracy of 0.58 Creating more features with CPP will take some more time. but improve @@ -944,12 +464,11 @@ prediction performance: .. code:: ipython3 - # Default CPP features (around 100.000 features) - split_kws = sf.get_split_kws() - df_parts = sf.get_df_parts(df_seq=df_seq, jmd_n_len=10, jmd_c_len=10) - cpp = aa.CPP(df_parts=df_parts, df_scales=df_scales, split_kws=split_kws, verbose=False) - df_feat = cpp.run(labels=y, n_processes=8, n_filter=100) - df_feat + # CPP features with default splits (around 100.000 features) + df_parts = sf.get_df_parts(df_seq=df_seq) + cpp = aa.CPP(df_scales=df_scales, df_parts=df_parts, verbose=False) + df_feat = cpp.run(labels=y) + df_feat.head(10) @@ -1071,104 +590,87 @@ prediction performance: 32,33 - ... - ... - ... - ... - ... - ... - ... - ... - ... - ... - ... - ... - ... - ... + 5 + TMD_C_JMD_C-Segment(4,9)-ROBB760113 + Conformation + β-turn + β-turn + Information measure for loop (Robson-Suzuki, 1... + 0.337 + 0.319440 + -0.319440 + 0.175203 + 0.255754 + 6.100000e-09 + 1.185395e-06 + 27,28 - 95 - JMD_N_TMD_N-Pattern(C,6,9)-NAKH900106 - Composition - Mitochondrial proteins - Mitochondrial proteins - Normalized composition from animal (Nakashima ... - 0.228 - 0.172120 - -0.172120 - 0.180254 - 0.199987 - 8.754340e-05 - 2.693037e-04 - 12,15 + 6 + TMD_C_JMD_C-Segment(2,2)-EISD860102 + Energy + Isoelectric point + Atom-based hydrophobic moment + Atom-based hydrophobic moment (Eisenberg-McLac... + 0.337 + 0.139567 + 0.139567 + 0.098917 + 0.101842 + 6.300000e-09 + 1.185395e-06 + 31,32,33,34,35,36,37,38,39,40 - 96 - JMD_N_TMD_N-Pattern(C,6,9,12)-ZIMJ680105 - Others - PC 2 - Principal Component 1 (Zimmerman) - RF rank (Zimmerman et al., 1968) - 0.227 - 0.133867 - -0.133867 - 0.160532 - 0.161415 - 9.118090e-05 - 2.778863e-04 - 9,12,15 + 7 + TMD_C_JMD_C-Segment(4,5)-RICJ880113 + Conformation + α-helix (C-cap) + α-helix (C-terminal, inside) + Relative preference value at C2 (Richardson-Ri... + 0.336 + 0.223765 + 0.223765 + 0.133513 + 0.178217 + 7.100000e-09 + 1.185395e-06 + 33,34,35,36 - 97 - JMD_N_TMD_N-Segment(7,8)-KARS160107 + 8 + TMD_C_JMD_C-Segment(5,7)-KARS160107 Shape Side chain length Eccentricity (maximum) Diameter (maximum eccentricity) (Karkbara-Knis... - 0.227 - 0.098674 - -0.098674 - 0.104428 - 0.124875 - 8.945330e-05 - 2.740061e-04 - 16,17 + 0.331 + 0.217594 + 0.217594 + 0.136011 + 0.172395 + 1.130000e-08 + 1.331786e-06 + 32,33,34 - 98 - JMD_N_TMD_N-Pattern(C,6,9,12)-SIMZ760101 + 9 + TMD_C_JMD_C-Pattern(C,4,8)-JURD980101 Polarity Hydrophobicity - Transfer free energy (TFE) to outside - Transfer free energy (Simon, 1976), Cited by C... - 0.225 - 0.161307 - -0.161307 - 0.192235 - 0.212741 - 1.036749e-04 - 3.042894e-04 - 9,12,15 - - - 99 - JMD_N_TMD_N-Pattern(C,3,6)-TANS770102 - Conformation - α-helix (C-term, out) - α-helix (C-terminal, outside) - Normalized frequency of isolated helix (Tanaka... - 0.224 - 0.108020 - -0.108020 - 0.133731 - 0.139419 - 1.143783e-04 - 3.272494e-04 - 15,18 + Hydrophobicity + Modified Kyte-Doolittle hydrophobicity scale (... + 0.329 + 0.264720 + -0.264720 + 0.141666 + 0.233134 + 1.480000e-08 + 1.425259e-06 + 33,37 -

100 rows × 13 columns

@@ -1182,21 +684,23 @@ Which can be again used for machine learning: warnings.simplefilter(action='ignore', category=FutureWarning) import matplotlib.pyplot as plt import pandas as pd - X = sf.feat_matrix(df_parts=df_parts, df_scales=df_scales, features=df_feat["feature"]) - # ML evaluation + + X = sf.feat_matrix(df_parts=df_parts, features=df_feat["feature"]) rf = RandomForestClassifier() cv = cross_val_score(rf, X, y, scoring="accuracy", cv=5, n_jobs=1) print(f"Mean accuracy of {round(np.mean(cv), 2)}") + aa.plot_settings(font_scale=1.1) sns.barplot(pd.DataFrame({"Baseline": cv_base, "CPP": cv}), palette=["tab:blue", "tab:red"]) plt.ylabel("Mean accuracy", size=aa.plot_gcfs()+1) + plt.ylim(0, 1) sns.despine() plt.show() .. parsed-literal:: - Mean accuracy of 0.95 + Mean accuracy of 0.9 diff --git a/tutorials/tutorial1_quick_start.ipynb b/tutorials/tutorial1_quick_start.ipynb index 8c635148..38fb55e9 100644 --- a/tutorials/tutorial1_quick_start.ipynb +++ b/tutorials/tutorial1_quick_start.ipynb @@ -22,14 +22,14 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 11, "metadata": { "pycharm": { "name": "#%%\n" }, "ExecuteTime": { - "end_time": "2023-09-24T06:49:44.600810538Z", - "start_time": "2023-09-24T06:49:44.523495924Z" + "end_time": "2023-09-24T11:18:19.227943399Z", + "start_time": "2023-09-24T11:18:19.176090140Z" } }, "outputs": [ @@ -38,16 +38,15 @@ "text/plain": " entry sequence label tmd_start tmd_stop jmd_n tmd jmd_c\n0 Q14802 MQKVTLGLLVFLAGFPVLDANDLEDKNSPFYYDWHSLQVGGLICAG... 0 37 59 NSPFYYDWHS LQVGGLICAGVLCAMGIIIVMSA KCKCKFGQKS\n1 Q86UE4 MAARSWQDELAQQAEEGSARLREMLSVGLGFLRTELGLDLGLEPKR... 0 50 72 LGLEPKRYPG WVILVGTGALGLLLLFLLGYGWA AACAGARKKR\n2 Q969W9 MHRLMGVNSTAAAAAGQPNVSCTCNCKRSLFQSMEITELEFVQIII... 0 41 63 FQSMEITELE FVQIIIIVVVMMVMVVVITCLLS HYKLSARSFI\n3 P53801 MAPGVARGPTPYWRLRLGGAALLLLLIPVAAAQEPPGAACSQNTNK... 0 97 119 RWGVCWVNFE ALIITMSVVGGTLLLGIAICCCC CCRRKRSRKP\n4 Q8IUW5 MAPRALPGSAVLAAAVFVGGAVSSPLVAPDNGSSRTLHSRTETTPS... 0 59 81 NDTGNGHPEY IAYALVPVFFIMGLFGVLICHLL KKKGYRCTTE", "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
entrysequencelabeltmd_starttmd_stopjmd_ntmdjmd_c
0Q14802MQKVTLGLLVFLAGFPVLDANDLEDKNSPFYYDWHSLQVGGLICAG...03759NSPFYYDWHSLQVGGLICAGVLCAMGIIIVMSAKCKCKFGQKS
1Q86UE4MAARSWQDELAQQAEEGSARLREMLSVGLGFLRTELGLDLGLEPKR...05072LGLEPKRYPGWVILVGTGALGLLLLFLLGYGWAAACAGARKKR
2Q969W9MHRLMGVNSTAAAAAGQPNVSCTCNCKRSLFQSMEITELEFVQIII...04163FQSMEITELEFVQIIIIVVVMMVMVVVITCLLSHYKLSARSFI
3P53801MAPGVARGPTPYWRLRLGGAALLLLLIPVAAAQEPPGAACSQNTNK...097119RWGVCWVNFEALIITMSVVGGTLLLGIAICCCCCCRRKRSRKP
4Q8IUW5MAPRALPGSAVLAAAVFVGGAVSSPLVAPDNGSSRTLHSRTETTPS...05981NDTGNGHPEYIAYALVPVFFIMGLFGVLICHLLKKKGYRCTTE
\n
" }, - "execution_count": 14, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import aaanalysis as aa\n", - "# Load scales and scale categories (AAontology) \n", + "\n", "df_scales = aa.load_scales()\n", - "# Load training data\n", "df_seq = aa.load_dataset(name=\"DOM_GSEC\", n=50)\n", "df_seq.head(5)" ] @@ -69,14 +68,14 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 12, "outputs": [ { "data": { - "text/plain": " ANDN920101 SIMZ760101 NAKH900106 AURR980112 CORJ870107 ROBB760113 MIYS990104 BIGC670101 ROSG850102 ZIMJ680105 ... YUTK870102 SUEM840102 VASM830102 VELV850101 VENT840101 MONM990101 GEOR030102 GEOR030106 KARS160120 LINS030117\nAA ... \nA 0.494 0.268 0.237 0.787 0.446 0.101 0.479 0.164 0.564 0.444 ... 0.557 0.103 0.617 0.295 0 0.077 0.250 0.516 0.952 0.186\nC 0.864 0.258 0.303 0.104 0.725 0.849 0.000 0.323 1.000 0.000 ... 0.680 0.337 0.734 0.657 0 0.154 0.246 0.000 0.952 0.000\nD 1.000 0.206 0.000 0.451 0.000 0.790 0.803 0.324 0.256 0.000 ... 0.574 0.909 0.225 1.000 0 0.923 0.091 0.404 0.952 0.186\nE 0.420 0.210 0.090 0.823 0.233 0.092 0.859 0.488 0.256 0.025 ... 0.402 0.077 0.531 0.046 0 0.923 0.404 0.610 0.952 0.349\nF 0.877 0.887 0.724 0.402 0.950 0.328 0.000 0.783 0.923 1.000 ... 0.680 0.233 0.023 0.749 1 0.000 0.536 0.712 0.952 0.326\nG 0.025 0.032 0.259 0.055 0.352 1.000 0.662 0.000 0.513 0.175 ... 0.525 0.000 0.455 0.040 0 0.692 0.000 0.210 0.952 0.023\nH 0.840 0.387 0.401 0.463 0.610 0.454 0.479 0.561 0.667 0.338 ... 0.754 0.000 0.345 0.191 0 0.923 0.201 0.612 0.562 0.419\nI 0.000 0.990 0.697 0.512 0.969 0.151 0.056 0.663 0.923 0.894 ... 0.820 0.714 0.070 0.000 1 0.154 0.161 0.457 0.583 0.140\nK 0.506 0.516 0.127 0.591 0.027 0.613 1.000 0.694 0.000 0.044 ... 0.615 0.012 0.688 0.294 0 0.923 0.195 0.536 0.912 1.000\nL 0.272 0.835 0.905 0.732 1.000 0.076 0.014 0.663 0.846 0.925 ... 1.000 0.428 0.771 0.000 1 0.000 0.513 0.690 0.952 0.186\nM 0.704 0.452 1.000 1.000 0.883 0.084 0.113 0.620 0.846 0.756 ... 0.689 0.701 0.512 0.651 0 0.077 0.151 0.670 0.952 0.372\nN 0.988 0.029 0.381 0.287 0.171 0.924 0.718 0.398 0.282 0.162 ... 0.508 0.000 0.313 0.028 0 1.000 0.277 0.342 0.952 0.093\nP 0.605 0.871 0.403 0.000 0.130 0.824 0.803 0.376 0.308 0.750 ... 0.566 0.545 0.937 0.157 0 1.000 1.000 1.000 0.952 0.698\nQ 0.519 0.000 0.203 0.805 0.238 0.546 0.732 0.539 0.256 0.388 ... 0.697 0.428 0.446 0.602 0 0.923 0.478 0.530 0.952 0.256\nR 0.531 0.268 0.061 0.738 0.482 0.748 0.634 0.735 0.308 0.112 ... 0.000 0.000 0.550 0.760 0 1.000 0.549 0.728 0.952 0.372\nS 0.679 0.045 0.450 0.293 0.293 0.798 0.704 0.188 0.359 0.256 ... 0.656 0.000 0.868 0.657 0 0.231 0.168 0.399 0.952 0.186\nT 0.494 0.174 0.619 0.360 0.279 0.529 0.577 0.352 0.462 0.419 ... 0.574 0.000 1.000 0.745 0 0.000 0.344 0.513 0.000 0.419\nV 0.000 0.577 0.183 0.451 0.907 0.000 0.127 0.492 0.872 0.719 ... 0.770 0.000 0.408 0.045 1 0.077 0.151 0.467 0.952 0.163\nW 0.926 1.000 0.707 0.805 0.500 0.773 0.070 1.000 0.846 0.894 ... 0.467 1.000 0.138 0.434 1 0.231 0.066 0.440 1.000 0.349\nY 0.802 0.990 0.425 0.524 0.771 0.798 0.127 0.806 0.615 0.762 ... 0.557 0.857 0.000 0.408 1 0.154 0.110 0.666 0.736 0.349\n\n[20 rows x 100 columns]", - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
ANDN920101SIMZ760101NAKH900106AURR980112CORJ870107ROBB760113MIYS990104BIGC670101ROSG850102ZIMJ680105...YUTK870102SUEM840102VASM830102VELV850101VENT840101MONM990101GEOR030102GEOR030106KARS160120LINS030117
AA
A0.4940.2680.2370.7870.4460.1010.4790.1640.5640.444...0.5570.1030.6170.29500.0770.2500.5160.9520.186
C0.8640.2580.3030.1040.7250.8490.0000.3231.0000.000...0.6800.3370.7340.65700.1540.2460.0000.9520.000
D1.0000.2060.0000.4510.0000.7900.8030.3240.2560.000...0.5740.9090.2251.00000.9230.0910.4040.9520.186
E0.4200.2100.0900.8230.2330.0920.8590.4880.2560.025...0.4020.0770.5310.04600.9230.4040.6100.9520.349
F0.8770.8870.7240.4020.9500.3280.0000.7830.9231.000...0.6800.2330.0230.74910.0000.5360.7120.9520.326
G0.0250.0320.2590.0550.3521.0000.6620.0000.5130.175...0.5250.0000.4550.04000.6920.0000.2100.9520.023
H0.8400.3870.4010.4630.6100.4540.4790.5610.6670.338...0.7540.0000.3450.19100.9230.2010.6120.5620.419
I0.0000.9900.6970.5120.9690.1510.0560.6630.9230.894...0.8200.7140.0700.00010.1540.1610.4570.5830.140
K0.5060.5160.1270.5910.0270.6131.0000.6940.0000.044...0.6150.0120.6880.29400.9230.1950.5360.9121.000
L0.2720.8350.9050.7321.0000.0760.0140.6630.8460.925...1.0000.4280.7710.00010.0000.5130.6900.9520.186
M0.7040.4521.0001.0000.8830.0840.1130.6200.8460.756...0.6890.7010.5120.65100.0770.1510.6700.9520.372
N0.9880.0290.3810.2870.1710.9240.7180.3980.2820.162...0.5080.0000.3130.02801.0000.2770.3420.9520.093
P0.6050.8710.4030.0000.1300.8240.8030.3760.3080.750...0.5660.5450.9370.15701.0001.0001.0000.9520.698
Q0.5190.0000.2030.8050.2380.5460.7320.5390.2560.388...0.6970.4280.4460.60200.9230.4780.5300.9520.256
R0.5310.2680.0610.7380.4820.7480.6340.7350.3080.112...0.0000.0000.5500.76001.0000.5490.7280.9520.372
S0.6790.0450.4500.2930.2930.7980.7040.1880.3590.256...0.6560.0000.8680.65700.2310.1680.3990.9520.186
T0.4940.1740.6190.3600.2790.5290.5770.3520.4620.419...0.5740.0001.0000.74500.0000.3440.5130.0000.419
V0.0000.5770.1830.4510.9070.0000.1270.4920.8720.719...0.7700.0000.4080.04510.0770.1510.4670.9520.163
W0.9261.0000.7070.8050.5000.7730.0701.0000.8460.894...0.4671.0000.1380.43410.2310.0660.4401.0000.349
Y0.8020.9900.4250.5240.7710.7980.1270.8060.6150.762...0.5570.8570.0000.40810.1540.1100.6660.7360.349
\n

20 rows × 100 columns

\n
" + "text/plain": " ANDN920101 SIMZ760101 NAKH900106 AURR980112\nAA \nA 0.494 0.268 0.237 0.787\nC 0.864 0.258 0.303 0.104\nD 1.000 0.206 0.000 0.451\nE 0.420 0.210 0.090 0.823\nF 0.877 0.887 0.724 0.402", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
ANDN920101SIMZ760101NAKH900106AURR980112
AA
A0.4940.2680.2370.787
C0.8640.2580.3030.104
D1.0000.2060.0000.451
E0.4200.2100.0900.823
F0.8770.8870.7240.402
\n
" }, - "execution_count": 31, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } @@ -84,17 +83,18 @@ "source": [ "from sklearn.cluster import AgglomerativeClustering\n", "import numpy as np\n", - "aac = aa.AAclust(model=AgglomerativeClustering, model_kwargs=dict(linkage=\"ward\"))\n", + "\n", + "aac = aa.AAclust(model=AgglomerativeClustering)\n", "X = np.array(df_scales)\n", - "scales = aac.fit(X, n_clusters=100, names=list(df_scales)) \n", + "scales = aac.fit(X, names=list(df_scales), n_clusters=100) \n", "df_scales = df_scales[scales]\n", - "df_scales" + "df_scales[scales[0:4]].head(5)" ], "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2023-09-24T07:09:02.332982857Z", - "start_time": "2023-09-24T07:09:02.142147429Z" + "end_time": "2023-09-24T11:18:22.964537774Z", + "start_time": "2023-09-24T11:18:22.855795499Z" } } }, @@ -106,7 +106,8 @@ "- ``Parts``: Are combination of a target middle domain (TMD) and N- and C-terminal adjacent regions (JMD-N and JMD-C, respectively), obtained ``sf.get_df_parts``.\n", "- ``Splits``: These `Parts` can be split into various continuous segments or discontinuous patterns, specified ``sf.get_split_kws()``. \n", "- ``Scales``: Sets of amino acid scales.\n", - "We first use SequenceFeature to obtain Parts and Splits:" + "\n", + "We use SequenceFeature to obtain Parts and Splits:" ], "metadata": { "collapsed": false @@ -114,14 +115,14 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 13, "metadata": { "pycharm": { "name": "#%%\n" }, "ExecuteTime": { - "end_time": "2023-09-24T07:09:02.340451349Z", - "start_time": "2023-09-24T07:09:02.251538089Z" + "end_time": "2023-09-24T11:18:25.253400531Z", + "start_time": "2023-09-24T11:18:25.143157741Z" } }, "outputs": [ @@ -130,16 +131,15 @@ "text/plain": " tmd_jmd\nD3ZZK3 RIIGDGANSTVLLVSVSGSVVLVVILIAAFVISRRRSKYSQAK\nO14786 PGNVLKTLDPILITIIAMSALGVLLGAVCGVVLYCACWHNGMS\nO35516 SELESPRNAQLLYLLAVAVVIILFFILLGVIMAKRKRKHGFLW\nO43914 DCSCSTVSPGVLAGIVMGDLVLTVLIALAVYFLGRLVPRGRGA\nO75581 YPTEEPAPQATNTVGSVIGVIVTIFVSGTVYFICQRMLCPRMK", "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
tmd_jmd
D3ZZK3RIIGDGANSTVLLVSVSGSVVLVVILIAAFVISRRRSKYSQAK
O14786PGNVLKTLDPILITIIAMSALGVLLGAVCGVVLYCACWHNGMS
O35516SELESPRNAQLLYLLAVAVVIILFFILLGVIMAKRKRKHGFLW
O43914DCSCSTVSPGVLAGIVMGDLVLTVLIALAVYFLGRLVPRGRGA
O75581YPTEEPAPQATNTVGSVIGVIVTIFVSGTVYFICQRMLCPRMK
\n
" }, - "execution_count": 32, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "# Feature Engineering\n", "y = list(df_seq[\"label\"])\n", "sf = aa.SequenceFeature()\n", - "df_parts = sf.get_df_parts(df_seq=df_seq, jmd_n_len=10, jmd_c_len=10, list_parts=[\"tmd_jmd\"])\n", + "df_parts = sf.get_df_parts(df_seq=df_seq, list_parts=[\"tmd_jmd\"])\n", "split_kws = sf.get_split_kws(n_split_max=1, split_types=[\"Segment\"])\n", "df_parts.head(5)" ] @@ -156,29 +156,29 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 14, "outputs": [ { "data": { "text/plain": " feature category subcategory scale_name scale_description abs_auc abs_mean_dif mean_dif std_test std_ref p_val_mann_whitney p_val_fdr_bh positions\n0 TMD_JMD-Segment(1,1)-ANDN920101 Structure-Activity Backbone-dynamics (-CH) α-CH chemical shifts (backbone-dynamics) alpha-CH chemical shifts (Andersen et al., 1992) 0.130 0.022966 0.022966 0.054433 0.053266 0.025737 0.099022 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,1...\n1 TMD_JMD-Segment(1,1)-VASM830101 Conformation Unclassified (Conformation) α-helix Relative population of conformational state A ... 0.120 0.019298 -0.019298 0.046755 0.049127 0.039609 0.099022 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,1...\n2 TMD_JMD-Segment(1,1)-ROBB760113 Conformation β-turn β-turn Information measure for loop (Robson-Suzuki, 1... 0.108 0.021958 0.021958 0.060658 0.053190 0.062212 0.100670 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,1...\n3 TMD_JMD-Segment(1,1)-RACS820103 Conformation Unclassified (Conformation) α-helix (left-handed) Average relative fractional occurrence in AL(i... 0.080 0.019579 -0.019579 0.072260 0.047452 0.166907 0.166907 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,1...", "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
featurecategorysubcategoryscale_namescale_descriptionabs_aucabs_mean_difmean_difstd_teststd_refp_val_mann_whitneyp_val_fdr_bhpositions
0TMD_JMD-Segment(1,1)-ANDN920101Structure-ActivityBackbone-dynamics (-CH)α-CH chemical shifts (backbone-dynamics)alpha-CH chemical shifts (Andersen et al., 1992)0.1300.0229660.0229660.0544330.0532660.0257370.0990221,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,1...
1TMD_JMD-Segment(1,1)-VASM830101ConformationUnclassified (Conformation)α-helixRelative population of conformational state A ...0.1200.019298-0.0192980.0467550.0491270.0396090.0990221,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,1...
2TMD_JMD-Segment(1,1)-ROBB760113Conformationβ-turnβ-turnInformation measure for loop (Robson-Suzuki, 1...0.1080.0219580.0219580.0606580.0531900.0622120.1006701,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,1...
3TMD_JMD-Segment(1,1)-RACS820103ConformationUnclassified (Conformation)α-helix (left-handed)Average relative fractional occurrence in AL(i...0.0800.019579-0.0195790.0722600.0474520.1669070.1669071,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,1...
\n
" }, - "execution_count": 33, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "# Small set of features (100 features created)\n", - "cpp = aa.CPP(df_parts=df_parts, df_scales=df_scales, split_kws=split_kws, verbose=False)\n", - "df_feat = cpp.run(labels=y, tmd_len=20, jmd_n_len=10, jmd_c_len=10, n_filter=100) # Default values for lengths are used\n", + "# Small set of CPP features (100 features are created)\n", + "cpp = aa.CPP(df_scales=df_scales, df_parts=df_parts, split_kws=split_kws, verbose=False)\n", + "df_feat = cpp.run(labels=y) \n", "df_feat" ], "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2023-09-24T07:09:16.423080603Z", - "start_time": "2023-09-24T07:09:07.771862935Z" + "end_time": "2023-09-24T11:18:35.720886606Z", + "start_time": "2023-09-24T11:18:27.250917313Z" } } }, @@ -196,30 +196,30 @@ }, { "cell_type": "code", - "execution_count": 34, + "execution_count": 15, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Mean accuracy of 0.57\n" + "Mean accuracy of 0.58\n" ] } ], "source": [ "from sklearn.ensemble import RandomForestClassifier\n", "from sklearn.model_selection import cross_val_score\n", - "X = sf.feat_matrix(df_parts=df_parts, df_scales=df_scales, features=df_feat[\"feature\"])\n", - "# ML evaluation\n", + "\n", + "X = sf.feat_matrix(df_parts=df_parts, features=df_feat[\"feature\"])\n", "rf = RandomForestClassifier()\n", - "cv_base = cross_val_score(rf, X, y, scoring=\"accuracy\", cv=5, n_jobs=8) # Set n_jobs=1 to disable multi-processing\n", + "cv_base = cross_val_score(rf, X, y, scoring=\"accuracy\")\n", "print(f\"Mean accuracy of {round(np.mean(cv_base), 2)}\")" ], "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2023-09-24T07:09:24.862316908Z", - "start_time": "2023-09-24T07:09:20.126515444Z" + "end_time": "2023-09-24T11:18:43.192973177Z", + "start_time": "2023-09-24T11:18:39.111479446Z" } } }, @@ -234,31 +234,30 @@ }, { "cell_type": "code", - "execution_count": 35, + "execution_count": 16, "outputs": [ { "data": { - "text/plain": " feature category subcategory scale_name scale_description abs_auc abs_mean_dif mean_dif std_test std_ref p_val_mann_whitney p_val_fdr_bh positions\n0 TMD_C_JMD_C-Segment(2,3)-QIAN880106 Conformation α-helix α-helix (middle) Weights for alpha-helix at the window position... 0.387 0.121446 0.121446 0.069196 0.085013 0.000000e+00 0.000000e+00 27,28,29,30,31,32,33\n1 TMD_C_JMD_C-Segment(4,5)-ZIMJ680104 Energy Isoelectric point Isoelectric point Isoelectric point (Zimmerman et al., 1968) 0.373 0.220000 0.220000 0.123716 0.137350 1.000000e-10 2.475000e-07 33,34,35,36\n2 TMD_C_JMD_C-Pattern(N,5,8,12,15)-QIAN880106 Conformation α-helix α-helix (middle) Weights for alpha-helix at the window position... 0.358 0.144860 0.144860 0.079321 0.117515 7.000000e-10 7.150000e-07 25,28,32,35\n3 TMD_C_JMD_C-Segment(5,7)-LINS030101 ASA/Volume Volume Accessible surface area (ASA) Total accessible surfaces of whole residues (b... 0.354 0.237161 0.237161 0.145884 0.164285 1.100000e-09 7.150000e-07 32,33,34\n4 TMD_C_JMD_C-Segment(6,9)-ZIMJ680104 Energy Isoelectric point Isoelectric point Isoelectric point (Zimmerman et al., 1968) 0.341 0.263651 0.263651 0.187136 0.171995 4.000000e-09 1.185395e-06 32,33\n.. ... ... ... ... ... ... ... ... ... ... ... ... ...\n95 JMD_N_TMD_N-Pattern(C,6,9)-NAKH900106 Composition Mitochondrial proteins Mitochondrial proteins Normalized composition from animal (Nakashima ... 0.228 0.172120 -0.172120 0.180254 0.199987 8.754340e-05 2.693037e-04 12,15\n96 JMD_N_TMD_N-Pattern(C,6,9,12)-ZIMJ680105 Others PC 2 Principal Component 1 (Zimmerman) RF rank (Zimmerman et al., 1968) 0.227 0.133867 -0.133867 0.160532 0.161415 9.118090e-05 2.778863e-04 9,12,15\n97 JMD_N_TMD_N-Segment(7,8)-KARS160107 Shape Side chain length Eccentricity (maximum) Diameter (maximum eccentricity) (Karkbara-Knis... 0.227 0.098674 -0.098674 0.104428 0.124875 8.945330e-05 2.740061e-04 16,17\n98 JMD_N_TMD_N-Pattern(C,6,9,12)-SIMZ760101 Polarity Hydrophobicity Transfer free energy (TFE) to outside Transfer free energy (Simon, 1976), Cited by C... 0.225 0.161307 -0.161307 0.192235 0.212741 1.036749e-04 3.042894e-04 9,12,15\n99 JMD_N_TMD_N-Pattern(C,3,6)-TANS770102 Conformation α-helix (C-term, out) α-helix (C-terminal, outside) Normalized frequency of isolated helix (Tanaka... 0.224 0.108020 -0.108020 0.133731 0.139419 1.143783e-04 3.272494e-04 15,18\n\n[100 rows x 13 columns]", - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
featurecategorysubcategoryscale_namescale_descriptionabs_aucabs_mean_difmean_difstd_teststd_refp_val_mann_whitneyp_val_fdr_bhpositions
0TMD_C_JMD_C-Segment(2,3)-QIAN880106Conformationα-helixα-helix (middle)Weights for alpha-helix at the window position...0.3870.1214460.1214460.0691960.0850130.000000e+000.000000e+0027,28,29,30,31,32,33
1TMD_C_JMD_C-Segment(4,5)-ZIMJ680104EnergyIsoelectric pointIsoelectric pointIsoelectric point (Zimmerman et al., 1968)0.3730.2200000.2200000.1237160.1373501.000000e-102.475000e-0733,34,35,36
2TMD_C_JMD_C-Pattern(N,5,8,12,15)-QIAN880106Conformationα-helixα-helix (middle)Weights for alpha-helix at the window position...0.3580.1448600.1448600.0793210.1175157.000000e-107.150000e-0725,28,32,35
3TMD_C_JMD_C-Segment(5,7)-LINS030101ASA/VolumeVolumeAccessible surface area (ASA)Total accessible surfaces of whole residues (b...0.3540.2371610.2371610.1458840.1642851.100000e-097.150000e-0732,33,34
4TMD_C_JMD_C-Segment(6,9)-ZIMJ680104EnergyIsoelectric pointIsoelectric pointIsoelectric point (Zimmerman et al., 1968)0.3410.2636510.2636510.1871360.1719954.000000e-091.185395e-0632,33
..........................................
95JMD_N_TMD_N-Pattern(C,6,9)-NAKH900106CompositionMitochondrial proteinsMitochondrial proteinsNormalized composition from animal (Nakashima ...0.2280.172120-0.1721200.1802540.1999878.754340e-052.693037e-0412,15
96JMD_N_TMD_N-Pattern(C,6,9,12)-ZIMJ680105OthersPC 2Principal Component 1 (Zimmerman)RF rank (Zimmerman et al., 1968)0.2270.133867-0.1338670.1605320.1614159.118090e-052.778863e-049,12,15
97JMD_N_TMD_N-Segment(7,8)-KARS160107ShapeSide chain lengthEccentricity (maximum)Diameter (maximum eccentricity) (Karkbara-Knis...0.2270.098674-0.0986740.1044280.1248758.945330e-052.740061e-0416,17
98JMD_N_TMD_N-Pattern(C,6,9,12)-SIMZ760101PolarityHydrophobicityTransfer free energy (TFE) to outsideTransfer free energy (Simon, 1976), Cited by C...0.2250.161307-0.1613070.1922350.2127411.036749e-043.042894e-049,12,15
99JMD_N_TMD_N-Pattern(C,3,6)-TANS770102Conformationα-helix (C-term, out)α-helix (C-terminal, outside)Normalized frequency of isolated helix (Tanaka...0.2240.108020-0.1080200.1337310.1394191.143783e-043.272494e-0415,18
\n

100 rows × 13 columns

\n
" + "text/plain": " feature category subcategory scale_name scale_description abs_auc abs_mean_dif mean_dif std_test std_ref p_val_mann_whitney p_val_fdr_bh positions\n0 TMD_C_JMD_C-Segment(2,3)-QIAN880106 Conformation α-helix α-helix (middle) Weights for alpha-helix at the window position... 0.387 0.121446 0.121446 0.069196 0.085013 0.000000e+00 0.000000e+00 27,28,29,30,31,32,33\n1 TMD_C_JMD_C-Segment(4,5)-ZIMJ680104 Energy Isoelectric point Isoelectric point Isoelectric point (Zimmerman et al., 1968) 0.373 0.220000 0.220000 0.123716 0.137350 1.000000e-10 2.475000e-07 33,34,35,36\n2 TMD_C_JMD_C-Pattern(N,5,8,12,15)-QIAN880106 Conformation α-helix α-helix (middle) Weights for alpha-helix at the window position... 0.358 0.144860 0.144860 0.079321 0.117515 7.000000e-10 7.150000e-07 25,28,32,35\n3 TMD_C_JMD_C-Segment(5,7)-LINS030101 ASA/Volume Volume Accessible surface area (ASA) Total accessible surfaces of whole residues (b... 0.354 0.237161 0.237161 0.145884 0.164285 1.100000e-09 7.150000e-07 32,33,34\n4 TMD_C_JMD_C-Segment(6,9)-ZIMJ680104 Energy Isoelectric point Isoelectric point Isoelectric point (Zimmerman et al., 1968) 0.341 0.263651 0.263651 0.187136 0.171995 4.000000e-09 1.185395e-06 32,33\n5 TMD_C_JMD_C-Segment(4,9)-ROBB760113 Conformation β-turn β-turn Information measure for loop (Robson-Suzuki, 1... 0.337 0.319440 -0.319440 0.175203 0.255754 6.100000e-09 1.185395e-06 27,28\n6 TMD_C_JMD_C-Segment(2,2)-EISD860102 Energy Isoelectric point Atom-based hydrophobic moment Atom-based hydrophobic moment (Eisenberg-McLac... 0.337 0.139567 0.139567 0.098917 0.101842 6.300000e-09 1.185395e-06 31,32,33,34,35,36,37,38,39,40\n7 TMD_C_JMD_C-Segment(4,5)-RICJ880113 Conformation α-helix (C-cap) α-helix (C-terminal, inside) Relative preference value at C2 (Richardson-Ri... 0.336 0.223765 0.223765 0.133513 0.178217 7.100000e-09 1.185395e-06 33,34,35,36\n8 TMD_C_JMD_C-Segment(5,7)-KARS160107 Shape Side chain length Eccentricity (maximum) Diameter (maximum eccentricity) (Karkbara-Knis... 0.331 0.217594 0.217594 0.136011 0.172395 1.130000e-08 1.331786e-06 32,33,34\n9 TMD_C_JMD_C-Pattern(C,4,8)-JURD980101 Polarity Hydrophobicity Hydrophobicity Modified Kyte-Doolittle hydrophobicity scale (... 0.329 0.264720 -0.264720 0.141666 0.233134 1.480000e-08 1.425259e-06 33,37", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
featurecategorysubcategoryscale_namescale_descriptionabs_aucabs_mean_difmean_difstd_teststd_refp_val_mann_whitneyp_val_fdr_bhpositions
0TMD_C_JMD_C-Segment(2,3)-QIAN880106Conformationα-helixα-helix (middle)Weights for alpha-helix at the window position...0.3870.1214460.1214460.0691960.0850130.000000e+000.000000e+0027,28,29,30,31,32,33
1TMD_C_JMD_C-Segment(4,5)-ZIMJ680104EnergyIsoelectric pointIsoelectric pointIsoelectric point (Zimmerman et al., 1968)0.3730.2200000.2200000.1237160.1373501.000000e-102.475000e-0733,34,35,36
2TMD_C_JMD_C-Pattern(N,5,8,12,15)-QIAN880106Conformationα-helixα-helix (middle)Weights for alpha-helix at the window position...0.3580.1448600.1448600.0793210.1175157.000000e-107.150000e-0725,28,32,35
3TMD_C_JMD_C-Segment(5,7)-LINS030101ASA/VolumeVolumeAccessible surface area (ASA)Total accessible surfaces of whole residues (b...0.3540.2371610.2371610.1458840.1642851.100000e-097.150000e-0732,33,34
4TMD_C_JMD_C-Segment(6,9)-ZIMJ680104EnergyIsoelectric pointIsoelectric pointIsoelectric point (Zimmerman et al., 1968)0.3410.2636510.2636510.1871360.1719954.000000e-091.185395e-0632,33
5TMD_C_JMD_C-Segment(4,9)-ROBB760113Conformationβ-turnβ-turnInformation measure for loop (Robson-Suzuki, 1...0.3370.319440-0.3194400.1752030.2557546.100000e-091.185395e-0627,28
6TMD_C_JMD_C-Segment(2,2)-EISD860102EnergyIsoelectric pointAtom-based hydrophobic momentAtom-based hydrophobic moment (Eisenberg-McLac...0.3370.1395670.1395670.0989170.1018426.300000e-091.185395e-0631,32,33,34,35,36,37,38,39,40
7TMD_C_JMD_C-Segment(4,5)-RICJ880113Conformationα-helix (C-cap)α-helix (C-terminal, inside)Relative preference value at C2 (Richardson-Ri...0.3360.2237650.2237650.1335130.1782177.100000e-091.185395e-0633,34,35,36
8TMD_C_JMD_C-Segment(5,7)-KARS160107ShapeSide chain lengthEccentricity (maximum)Diameter (maximum eccentricity) (Karkbara-Knis...0.3310.2175940.2175940.1360110.1723951.130000e-081.331786e-0632,33,34
9TMD_C_JMD_C-Pattern(C,4,8)-JURD980101PolarityHydrophobicityHydrophobicityModified Kyte-Doolittle hydrophobicity scale (...0.3290.264720-0.2647200.1416660.2331341.480000e-081.425259e-0633,37
\n
" }, - "execution_count": 35, + "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "# Default CPP features (around 100.000 features)\n", - "split_kws = sf.get_split_kws()\n", - "df_parts = sf.get_df_parts(df_seq=df_seq, jmd_n_len=10, jmd_c_len=10)\n", - "cpp = aa.CPP(df_parts=df_parts, df_scales=df_scales, split_kws=split_kws, verbose=False)\n", - "df_feat = cpp.run(labels=y, n_processes=8, n_filter=100)\n", - "df_feat" + "# CPP features with default splits (around 100.000 features)\n", + "df_parts = sf.get_df_parts(df_seq=df_seq)\n", + "cpp = aa.CPP(df_scales=df_scales, df_parts=df_parts, verbose=False)\n", + "df_feat = cpp.run(labels=y)\n", + "df_feat.head(10)" ], "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2023-09-24T07:10:55.161445139Z", - "start_time": "2023-09-24T07:09:27.289343470Z" + "end_time": "2023-09-24T11:20:06.208160304Z", + "start_time": "2023-09-24T11:18:46.701291090Z" } } }, @@ -273,19 +272,19 @@ }, { "cell_type": "code", - "execution_count": 36, + "execution_count": 18, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Mean accuracy of 0.95\n" + "Mean accuracy of 0.9\n" ] }, { "data": { "text/plain": "
", - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAlkAAAGtCAYAAAAlE2HVAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy81sbWrAAAACXBIWXMAAA9hAAAPYQGoP6dpAABGbElEQVR4nO3de1hVVeL/8c8BQRAUBFNRNG95TVPwmiJqTfmrtBwbK820bPQbY2qapd28TU3pWHmb0mrQahom7aI5Zs2YYNloKCmaeMnC+xVBQUFu+/eHw55zBPXAPkcO8H49D8/svdc6a61jg31ae+21bYZhGAIAAIBLeZX3AAAAACojQhYAAIAbELIAAADcgJAFAADgBoQsAAAANyBkAQAAuAEhCwAAwA0IWeVo2LBhioyM1LBhw8p7KAAAwMWqlfcAqrLdu3crKSmpvIcBAADcgJksAAAANyBkAQAAuAEhCwAAwA0IWQAAAG5AyAIAAHCDShOyjh49qurVq8tms8lms5W5ndzcXM2dO1cREREKDAxUrVq11L17dy1evFiFhYUuHDEAAKjMKsUWDoZh6IknnlBubq6ldi5cuKD+/fvr22+/dbi+efNmbd68WV9++aVWrFihatUqxR8bAABwowo/k5WXl6dRo0Zp1apVltuaPn26Q8Bq2bKlmjZtap6vXLlSb775puV+AABA5VehQ9aPP/6oLl26KDY21nJbZ8+e1fz5883z8ePHa8+ePfr555/14IMPmtf/9Kc/KS8vz3J/AACgcquwIWvgwIGKjIzU9u3bXdLeqlWrdPHiRUmSzWbT1KlTJUleXl564YUXzHpnzpzRunXrXNInAACovCpsyPriiy9kGIYkqWfPnnrssccstZeYmGgeN2nSRPXq1TPP27Vrp+Dg4BLrAgAAlKTChixJCgoK0vTp0xUfH69GjRpZaislJcU8Dg8PL1besGHDEusCAACUpMI+Jvfmm2/qscceU82aNV3S3unTp83joKCgYuWBgYHmcVpa2hXbWbx4sZYsWeJUn4Q1AAAqrwobssaPH+/S9s6fP28e+/j4FCu3v5aVlXXFdo4dO6akpCSXjg0AAFQ8FTZkuVrR+i5JJW5m6uwGp2FhYYqIiHCqbkpKirKzs50bIADA48yaNUuHDh1yuNaoUSO9+OKL5TQieBJC1n8FBASYxyVt0WB/zf7W4eXGjBmjMWPGONVnZGQks14AUIEdOnRI+/fvL+9hwENV6IXvrhQSEmIeZ2ZmFis/d+6ceVynTp3rMiYAAFBxEbL+q2XLlubx0aNHi5UfOXLEPG7Tps11GRMAAKi4CFn/FRkZaR7//PPPOnHihHm+Z88enT171jzv0qXLdR0bAACoeAhZ/zVw4EB5e3tLkgoLC/XKK69IurQg/k9/+pNZLzQ0VP369SuXMQIAgIqjSoWs48ePq0+fPubP2rVrzbJ69eo5LFifP3++WrVqpRYtWmjZsmXm9alTp5a4xQMAAIC9KvV0YU5OjhISEszzkSNHOpTPnj1bu3btUnx8vCRp7969DuUDBw50+f5cAACgcqpSM1nXEhAQoH/961+aP3++IiMjFRAQoICAAHXp0kVvvfWWPv30U1WrVqVyKQAAKCObYb8LJ66ron2yIiIitHXr1vIeDgCglEaPHl1sn6zmzZs7/Xo1VG7MZAEAALgBIQsAAMANCFkAAABuQMgCAABwA0IWAACAGxCyAAAA3ICQBQAA4AbsrAkAFVhC7+jyHkKVllXNW7LZHK/9/DP/XMpZ9IaEa1e6DpjJAgAAcANCFgAAgBsQsgAAANyAkAUAAOAGhCwAAAA3IGQBAAC4ASELAADADQhZAAAAbkDIAgAAcANCFgAAgBsQsgAAANyAkAUAAOAGvCAaAIAyCjUMp66haiJkAQBQRvcXFJb3EODBuF0IAADgBoQsAAAANyBkAQAAuAEhCwAAwA0IWQAAAG5AyAIAAHADQhYAAIAbELIAAADcgJAFAADgBoQsAAAANyBkAQAAuAEhCwAAwA0IWQAAAG5AyAIAAHADQhYAAIAbELIAAADcgJAFAADgBoQsAAAANyBkAQAAuAEhCwAAwA0IWQAAAG5AyAIAAHADQhYAAIAbELIAAADcgJAFAADgBoQsAAAANyBkAQAAuAEhCwAAwA0IWQAAAG5AyAIAAHCDCh2yMjMz9dJLL6lt27by9/dX7dq11bdvX3388cdlbnPTpk168MEHFR4eLl9fX9WqVUvdunXT3LlzdfHiRReOHgAAVGbVynsAZXXy5En16dNHKSkp5rWcnBzFx8crPj5eGzZs0MKFC0vV5uLFixUTE6PCwkLzWl5enn744Qf98MMP+sc//qF///vfqlWrlsu+BwAAqJwq7EzW2LFjHQJW+/btFRYWZp4vWrRIK1ascLq9ffv2aezYsWbA8vHxUUREhBo2bGjWSUxM1NNPP+2C0QMAgMquQoas3bt3a/ny5eb5G2+8oeTkZKWmpqpXr17m9RkzZjjdZlxcnPLz8yVJNWrUUGJiorZu3aoDBw5o5MiRZr2PPvpIeXl51r8EAACo1CpkyLIPWMHBwYqJiZEk+fr6asqUKWbZzp07tXv3bqfaPHz4sHn8m9/8RrfccoskydvbW+PHjzfLzp8/r7S0NEvjBwAAlV+FDFmJiYnmcadOneTr62ue9+zZ84p1r6ZJkybmcWZmpkNZRkaGeezr66s6deqUYrQAAKAqshSyvvzySxUUFLhqLE6zX4sVHh7uUBYcHKyAgIAS617NI488osDAQEnS+vXr9c477ygrK0spKSkO67AefvhhVatWYZ8XAAAA14mltHD33XerTp06GjJkiB566KFis0jucvr0afM4KCioWHlgYKDOnz8vSU7f2mvYsKFWrVql4cOH68iRIxo9erRGjx7tUKdXr1568803r9rO4sWLtWTJEqf6dDYAAgCAisfylExaWpreeustvfXWW2rUqJEeeughPfTQQ+rQoYMrxleiogAlXXoK8HL217Kyspxut1OnTrrrrrv0zjvvFCsLCAjQs88+q5o1a161jWPHjikpKcnpPgEAQOXksvtehmHo4MGDmj17tmbPnq02bdpo2LBhevDBB9W0aVNXdWP2VcRmsxUrL+nataSnp6tXr17atWuXpEtrr9q0aaP09HQdPHhQ58+f14ABAzRt2jRNnz79iu2EhYUpIiLCqT5TUlKUnZ1d6rECAADPZ2lNVkREhAzDMENPUbgxDEO7du3SCy+8oBYtWqhHjx5auHChTp48aX3EksOaq5K2U7C/VrTO6lqef/55M2A1bNhQO3bs0LZt23TgwAHNnTvXrDdjxgxt2rTpiu2MGTNGW7dudeqnTZs2To0NAABUPJZC1pYtW3TgwAHNmzdPffv2lZeXV4mB64cfftD48ePVsGFD3XHHHVq6dKnOnTtX5n5DQkLM48ufBJTk0LYzTwIahqEPPvjAPH/mmWfUsmVL83zixInq1KmTeb5s2bJSjxkAAFQtlrdwaNSokZ588kmtW7dOJ06cUGxsrO699175+fmZgatotqugoEDr1q3TqFGjVL9+fQ0ePFgrVqwwNwF1ln0AOnr0qENZRkaGLly4YJ47M1t08uRJh7VbLVq0KFbH/lpqampphgsAAKogl+6TFRISohEjRuizzz7T6dOntWLFCg0fPly1a9eWzWaTzWYzA1dOTo4+//xzPfDAA2rUqJHmzZvnsNbqaiIjI83jxMRE5ebmmuebN292qNulS5drtnf5YvYdO3YUq7Nnzx7zuKQnGgEAAOy5bTNSf39/3X777frNb36jHj16ONxGLPqRLs1ynThxQhMnTtTAgQMdXs58JYMGDTKP09PTzRdB5+Xlac6cOWZZ+/bt1apVq2u2V6NGDXXu3Nk8nz17tn788UfzfP78+UpOTjbPo6Ojr9kmAACo2myGs9NHTjpz5ow+//xzffLJJ1q3bl2JC9NLejrQMAzZbDb95S9/0ZgxY67Zz9133601a9aY5x06dFBaWpqOHDliXluxYoUGDx5snq9du1avvvqqeR4XF6f69etLkr744gsNHDjQLPP29lbbtm2VmZnpcHuwUaNG2r17t2rUqHHNMV5LZGSkkpKSFBERoa1bt1puD0DVk9Cb/+gDLhe9IaG8hyDJRVs4nDhxQp999pk++eQTJSQkmLvA289eFR3XqFFD9913n+677z795z//0eLFi5WdnW3W+dvf/uZUyIqNjVW/fv30008/SZLDTJMkxcTEOAQsSTp+/LgSEv73B5+Tk2MeDxgwQPPmzdOkSZOUn5+vgoKCYrcNGzRooC+++MIlAQsAAFRulkLWvHnz9Mknn+j77793WOQuyWENls1mU79+/TR8+HANHjzY3ILh/vvvV0xMjHr06GHuzF7SeqiS1K1bV4mJiZo7d67i4uK0f/9++fr6qmPHjoqJidEDDzxQ6u8zbtw49e3bV/PmzdP69et1+PBhVatWTc2bN9fAgQM1YcIE3lsIAACcYul2oZeXl8Mslf2tP0lq166dhg8frmHDhqlhw4ZXbOfFF1/Uyy+/LEmqVq2aw0L2yozbhQCs4nYhUFylul1o/9RgvXr1NHToUA0fPlwdO3Z06vP2L3kODg52xZAAAADKleWQZRiG/P39dd9992n48OG644475OVVuocW09PT1aJFC4WHh+vWW2+1OiQAAIByZylk2a+zcvb1NSWZMmWKpkyZYmUoAAAAHsVSyPr3v//tVL2CggJ5e3tb6QoAAKBCcdlmpHFxcerXr5/Onj1brGzkyJHq2rWrli5d6vSu7gAAABWZ5ZB1+vRpRUdHa9iwYUpISNC2bduK1dmxY4e2bNmiUaNGKTo6Wunp6Va7BQAA8GiWQlZhYaEGDBig7777zpyhujxkFRYWas+ePeYTiBs3btTAgQOZ0QIAAJWapZD1wQcfmC9kLgpR27dvd6hz7NgxeXl5mZuSGoah77//XnFxcVa6BgAA8GiWQtbf//53Sf977+DMmTM1d+5chzoNGzZUWlqa/vznP5ubl0rSRx99ZKVrAAAAj2YpZG3fvt18fc6DDz6oF154QbVr1y5Wz8/PTxMnTtTDDz9sblqalJRkpWsAAACPZilknTlzxjzu1avXNet369bNPC56VyEAAEBlZClkFb3oWXIMXFdy4sQJ89jf399K1wAAAB7NUshq0qSJefvvvffeU1ZW1hXrnjp1SkuWLDHXZDVp0sRK1wAAAB7NUsi64447JF16sjA1NVXdunXThx9+qH379ik9PV2nTp3Stm3btGDBAnXu3FnHjx83F8n379/fJV8AAADAE9kMCxtWHT16VDfddJNycnIk/e8pw5LYb+FQo0YN7du3T2FhYWXtulKIjIxUUlKSIiIitHXr1vIeDoAKKKF3dHkPAfA40RsSynsIkizOZDVo0EBvvfWWubFoUYgq6aeoTJKWLFlS5QMWAACo3Cy/VueRRx5RXFycgoKCHMKW/Y90aSardu3a+uSTTzR06FCr3QIAAHi0aq5oZMiQIerfv79iY2P11VdfKTk52XzaMDQ0VB06dFD//v01cuRI1axZ0xVdAgAAeDSXhCxJqlWrlsaPH6/x48e7qkmgUpg1a5YOHTrkcK1Ro0Z68cUXy2lEAIDrwWUhC0DJDh06pP3795f3MAAA15nlNVmlVVBQoB9++EETJ0683l0DAABcNy6ZyYqNjdXq1auVmpqqrKws5efn6/KdIQzDUHZ2ts6cOaOCggJJ0uuvv+6K7gEAADyO5ZD1yCOP6G9/+5skFQtWV3Ol/bQAAAAqA0sh67PPPtOHH35onjsbnCzsfwoAAFAhWFqTtWzZMkly2AvLPkCVtCmpJPXt21cff/yxla4BAAA8mqWQtW3bNnMn9xtvvFE//vijDh06ZIauVatWKTc3V7/88ouGDx8u6VLw+uWXX9SvXz/rowcAAPBQlkLWyZMnJV2ayRo5cqRuueUWNWzYUF26dJEkJSQkqFq1amrSpImWLVumW265RZJ08OBBzZ8/3+LQAQAAPJelkJWXl2ceN2zY0Dzu3bu3DMPQ999/71D/oYceMo9XrVplpWsAAACPZilk2b8ip2hWS5K6du0qSUpKStKFCxfM676+vpIu3TL8+eefrXQNAADg0SyFrObNm5sL2t955x0dPnxYktSjRw9JUm5urhYtWiRJysjI0Lvvvmuu18rJybHSNQAAgEezFLJ+85vfSLq0JuvAgQNq1aqVjh07pgYNGqhFixaSpClTpqhjx45q0aKFUlJSzM82aNDAStcAAAAezVLIGjdunIKCgv7XmJeXwsLCJF1af1W0ZUNycrLOnDljznrZbDbdeeedVroGAADwaJZCVv369fXFF1+oTp06kqSbb77ZLHvqqafMwGWz2cwfSapdu7aef/55K10DAAB4NMsviO7Vq5f27NmjmTNn6r777jOvBwcHa8OGDbr11lsdNiPt3Lmz4uPj1ahRI6tdAwAAeCxLr9UpLCyUl5eXgoOD9cILLxQrb968ub799lsdPnxYhw8fVv369dWkSRMrXQIAAFQIlkLWgAEDlJOToyFDhui3v/2tbrjhhhLrhYeHKzw83EpXAAAAFYql24VbtmxRfHy8YmJi1LdvX1eNCQAAoMKzFLLOnj1rHo8YMcLyYAAAACoLSyHrpptuMo8LCwstDwYAAKCysBSyZsyYYR4vWLBAO3futDwgAACAysDSwvdevXpp3rx5mjRpko4dO6aOHTvq1ltvVefOnVW/fn3VrFlTfn5+8vLyMvfIsvfII49Y6R4AAMBjWQpZ9evXN8NT0T5YGzdu1MaNG536PCELAABUVpZClj37maqi1+k4Wx8AAKCysRyynAlUAAAAVY2lkMW2DQAAACWzFLJiY2NdNQ4AAIBKxfILogEAAFAcIQsAAMANLN0unDlzpqXOX3rpJUufBwAA8FSWQtb06dMtbcVAyAIAAJWVS/bJKss2DuyTBQAAKjPLa7KcDVg2m41gBQAAqgxLM1nTpk27YllBQYGys7OVkZGh5ORkJSYmymaz6c4779RLL70kLy/ra+4zMzM1Z84crVixQr/++qv8/PzUsWNHPfHEExoyZEiZ2/3xxx81b948rV+/XsePH1dAQIC6d++up59+Wv369bM8bgAAUPm5LWRd7u9//7uGDx+ur7/+Ws2aNdOiRYusdK2TJ0+qT58+SklJMa/l5OQoPj5e8fHx2rBhgxYuXFjqdl999VU9//zzKiwsNK/l5ubqyy+/1Nq1a/X6669rwoQJlsYOAAAqv+u2hcNDDz2kBx98UIZh6O2339bXX39tqb2xY8c6BKz27dsrLCzMPF+0aJFWrFhRqjbfe+89TZ061QxYAQEBioyMVI0aNSRdujX69NNPa8eOHZbGDgAAKr/ruk9Wz549zeMFCxaUuZ3du3dr+fLl5vkbb7yh5ORkpaamqlevXub1GTNmON1mVlaWJk2aZJ5HRUXp8OHD2rJli3bt2qWQkBBJl26DLl68uMxjBwAAVYNLni501sGDByVdmhFKTEwsczv2ASs4OFgxMTGSJF9fX02ZMkX33HOPJGnnzp3avXu3Wrdufc02P/nkE509e9Y8f/vttxUcHCxJuvHGGzV16lTt3LlT4eHhioiIKPPYAQBA1XBdQlZhYaE+//xzLVq0SDabTYZh6Pz582Vuzz6gderUSb6+vua5/WxZUV1nQta///1v87hOnTpq27atQ/nTTz9d1uECAIAqyFLIatas2VXL7Z8wLCgoMLd7sNlsatGiRZn7tV+LFR4e7lAWHBysgIAAM8TZ172a5ORk87hx48bKycnRokWLFB8fLy8vL3Xv3l2jR49WaGhomccNAACqDkshKzU11ZyZcoZ93WHDhpW539OnT5vHQUFBxcoDAwPNkJWWluZUm0ePHjWP8/LydOutt+rHH380r61atUpz587V559/7rDu63KLFy/WkiVLnOrT2QAIAAAqHpfcLnR2k9GigNWzZ09L2yDY32r08fEpVm5/LSsry6k2MzMzzeMrPT2YlpamAQMG6Mcff1STJk1KrHPs2DElJSU51ScAAKi8LIes0rxS5+abb9aDDz6op59+WtWqlb1r+z5LCnhl2Vm+oKDA4bxNmzZatmyZWrVqpU8++URPPPGELl68qIyMDE2fPl1Lly4tsZ2wsDCnF8anpKQoOzu71GMFAACez1LI+vXXX69Zx2azyd/fXyEhIfL29rbSnSkgIMB8EjAvL69Yuf21wMBAp9oMDAxURkaGef7OO++oS5cukqRHH31USUlJ5uamK1eulGEYJYa5MWPGaMyYMU71GRkZyawXAACVlKWQdeONN7pqHKUSEhJihiz723xFzp07Zx7XqVPHqTZvuOEGM2R5e3ura9euDuVRUVFmyMrIyFBaWprTbQMAgKrnumxGevmtOKtatmxpHtsvWJcuBaALFy6Y523atHGqzXbt2pnHhYWFys/Pdyi/fEasNLdJAQBA1eOykBUXF6d+/fo5bOhZZOTIkeratauWLl3qknASGRlpHicmJio3N9c837x5s0Pdolt+1xIVFWUeG4ah77//3qH8559/No8DAwPZygEAAFyV5ZCVlpam6OhoDRs2TAkJCdq2bVuxOjt27NCWLVs0atQoRUdHKz093VKfgwYNMo/T09PN23h5eXmaM2eOWda+fXu1atXKqTYffPBBh6cSJ0yYYM6SHT9+XPPnzzfL+vfvLy+v6/pGIgAAUMFYSgqFhYW655579N1335kzVJeHrMLCQu3Zs8fcI2vjxo0aOHCgpRmtzp0766677jLPJ02apFtuuUVNmzbVunXrzOvTpk1z+NzatWvVp08f8+f48eNmWYMGDfTMM8+Y5zt37lSLFi0UGRmpm266Sfv375ckVa9eXS+++GKZxw4AAKoGSyHrgw8+MG/PFYWo7du3O9Q5duyYvLy8zKfxim7FxcXFWelasbGxDuuokpOTdeTIEfM8JiZGgwcPdvjM8ePHlZCQYP7k5OQ4lM+YMUMjR440z7Ozs5WUlGTuteXn56cPPvhAHTp0sDR2AABQ+VkKWX//+98lyQxQM2fO1Ny5cx3qNGzYUGlpafrzn/8sLy8vc9uDjz76yErXqlu3rhITEzVr1iy1a9dOfn5+qlWrlnr37q24uDgtWrSo1G16e3srNjZWK1euVP/+/RUaGiofHx81btxYo0aN0rZt2/S73/3O0rgBAEDVYDMs3LcLCwvTyZMnJUkPPfSQPvzww6vWf/TRR7Vs2TLzs/YzT1VR0T5ZERER2rp1a3kPB24yevRo83ZzkebNmzv9+iXgahJ6R5f3EACPE70hobyHIMniPllnzpwxj6/2Pr8i3bp1M0OWs+8UhHWRk98v7yFUaTWPntHl2/DuPXqGfy7lbOucR8p7CAAqOUu3CwMCAsxj+8B1JSdOnDCP/f39rXQNAADg0SyFrCZNmsgwDBmGoffee++qL2M+deqUlixZYq7JutILlgEAACoDSyHrjjvukHTpycLU1FR169ZNH374ofbt26f09HSdOnVK27Zt04IFC9S5c2cdP37cXCTfv39/l3wBAAAAT2RpTda4ceO0YMECcyuElJQUjRgxosS69i9U9vf317hx46x0DQAA4NEszWQ1aNBAb731lrmxaNE+WCX9FJVJ0pIlSxQWFmZ99AAAAB7K8rthHnnkEcXFxSkoKMghbNn/SJdmsmrXrq1PPvlEQ4cOtdotAACAR7N0u7DIkCFD1L9/f8XGxuqrr75ScnKy+bRhaGioOnTooP79+2vkyJGqWbOmK7oEAADwaC4JWZJUq1YtjR8/XuPHj3dVkwAAABWW5duFAAAAKM5lIWvTpk167LHHdOHChWJl48aN05AhQxQfH++q7gAAADya5ZCVk5Oj4cOHq2fPnlq2bJm2b99erM6mTZv0ySef6LbbbtPQoUN18eJFq90CAAB4NMsha8iQIfroo4/MJwu3bdtWrM6uXbskXXrC8B//+AdPFwIAgErPUsj6/PPPtXr1akkyt2q4PGQdOnRIFy5ccNgr6/PPP9eaNWusdA0AAODRLIWsZcuWSZI5i/Xoo4/qqaeecqjTqFEjpaamauzYsQ67vi9dutRK1wAAAB7N0hYOW7ZsMUPT3XffrXfffbfEeo0bN9b8+fN15MgRffbZZ5KkH374wUrXAAAAHs3STNapU6fM4zvvvPOa9W+77Tbz+OTJk1a6BgAA8GiWQlb16tXN4+zs7GvWP3funHlcrZrL9kEFAADwOJZCVqNGjSRdWpP14YcfKj8//4p1L1y4oNjYWPP2YtFnAQAAKiNLIatfv37mYvYdO3aof//++u6775Sbm2vWycjI0BdffKHevXtr3759Zn37W4cAAACVjaV7dmPHjtXbb7+tgoICGYah9evXa/369bLZbAoICFB+fr5ycnIkyWELh2rVqunJJ590yRcAAADwRJZmslq2bKmZM2c6bM1gGIYKCwuVmZmp7OxsGYbhUC5Jf/zjH3XTTTdZGzkAAIAHs7zj+5QpU/Taa6+pWrVqZpgq6adoBuv111/X5MmTXTF2AAAAj+WSF0RPnjxZKSkpGjdunFq1aiVJ5gyWJLVu3VpPPfWUdu/erQkTJriiSwAAAI/msn0UmjVrpjfffFOSlJeXp/T0dElS7dq15ePj46puAAAAKgS3bFbl4+OjunXruqNpAACACsEltwtL68SJE5o/f355dA0AAHBduGQma/369Vq9erVSU1OVlZWl/Px8cz1WEcMwlJ2drRMnTujgwYOSpHHjxrmiewAAAI9jOWS98MIL+tOf/uRUXfvgZb+lAwAAQGVj6Xbh+vXr9corrzg8SSip2Lk9whUAAKgKLM1kLV68WJIcNiIt6bxI0fVOnTpp6NChVroGAADwaJZmsrZs2WJuNBoaGqrPPvtM3377rRmuYmNjtXfvXn3zzTfmew4lqbCwUDExMdZHDwAA4KEshazjx49LujRDNWrUKN17773q2bOnOnToIEnat2+fWrRooT59+ujLL79Us2bNZBiGkpOT9fbbb1sfPQAAgIeyFLKKXv4sSU2bNjWPo6OjZRiGvv/+e/Oaj4+PRowYYZ4vX77cStcAAAAezVLICggIMI8zMzPN427dukmSEhMTlZeXZ16vXbu2pEtrtXbv3m2lawAAAI9mKWQ1bdrUfJJw2bJlysrKkiT16NFDknThwgV99NFHki69aicuLs787Pnz5610DQAA4NEshaw+ffpIurQma+fOnWrVqpVOnDihpk2bqmHDhpKkMWPG6L777lOHDh30n//8x3zCsE6dOtZGDgAA4MEshayxY8eqevXq5vmZM2fM8DRkyBAZhqHc3Fx98cUX2rNnjznrZbPZ1LdvX2sjBwAA8GCWQlaLFi30wQcfyM/PT5LUtm1beXt7S5ImTpyooKAgc+bKZrOZx35+fnruueesdA0AAODRLL8g+v7779f27dv12GOPmbcPJalhw4Zas2aNGjVqZM5gGYahhg0batWqVWrbtq3VrgEAADyWS14Q3aJFC7377rvFrvfo0UO//PKLvv/+ex0+fFj169dXz5495ePj44puAQAAPJZLQtbVeHl5qVevXu7uBgAAwKNYvl0IAACA4ghZAAAAbkDIAgAAcANCFgAAgBsQsgAAANyAkAUAAOAGhCwAAAA3IGQBAAC4ASELAADADVyy43taWppWr16t3bt36+zZs7p48aIMw7jqZ2w2m9577z1XdA8AAOBxLIesDz74QH/4wx90/vx5pz9jGIZLQlZmZqbmzJmjFStW6Ndff5Wfn586duyoJ554QkOGDLHUdpE9e/aoY8eOysnJkST9+uuvatKkiUvaRtVQUD3IqWsAgMrFUshKTEzUyJEjHWatbDab5UE54+TJk+rTp49SUlLMazk5OYqPj1d8fLw2bNighQsXWurDMAyNGjXKDFhAWVxoFl3eQwAAlANLa7L+/Oc/m7NSRT+GYVzzxxXGjh3rELDat2+vsLAw83zRokVasWKFpT4WLlyojRs3WmoDAABUTZZmsjZs2GAGK0lq166dBg0apPr168vf319eXu5ZV797924tX77cPH/jjTc0YcIE5ebm6rbbbtN3330nSZoxY4buv//+MvWRmpqqqVOnumS8AACg6rEUss6ePSvp0i3CHj16aP369fLx8XHJwK7GPmAFBwcrJiZGkuTr66spU6bonnvukSTt3LlTu3fvVuvWrUvdx+jRo0u1zgwAAMCepamm8PBwcxZr6NCh1yVgSZfWghXp1KmTfH19zfOePXtesa6zYmNj9a9//UuSFBISUsZRAgCAqsxSyLr33nvN4zNnzlgejLPs12KFh4c7lAUHBysgIKDEus44duyYJk6cKEmqXr26Xn75ZQsjBQAAVZWl24WTJ0/WBx98oJMnT+qvf/2rxo8fr5o1a7pqbFd0+vRp8zgoqPij8IGBgeatvrS0tFK1HRMTo4yMDEnS888/X+pbjYsXL9aSJUucqlvaAAgAACoOSyGrbt26Wrlype655x4dOHBAUVFRmj59urp06aIbbrjB4TaeK9mvlSrpFqX9taysLKfb/fjjj/X5559Lkm6++WZNmTKl1E8XHjt2TElJSaX6DAAAqHwshay77rpLklSvXj2lpaVpx44dGjx4sFOftdlsys/PL1O/19qXqyx7daWlpenJJ5+UJHl5eendd98t0xqzsLAwRUREOFU3JSVF2dnZpe4DAAB4Pksha+3atWagsd/Kwd0CAgLMJxvz8vKKldtfCwwMdKrNCRMm6OTJk5Iu7cHVrVu3Mo1tzJgxGjNmjFN1IyMjmfUCAKCSculGVvabkl7txyr7J/4yMzOLlZ87d848rlOnzjXbW7NmjT788ENJUuPGjVnsDgAALLMcspzZ4d3VO763bNnSPD569KhDWUZGhi5cuGCet2nT5prt2e+7dfDgQdWsWdMMhH379nWo27RpU9lsNi1durSMowcAAFWBpZBVWFhY5p+CgoIy9xsZGWkeJyYmKjc31zzfvHmzQ90uXbpcs73rdZsTAABUHZbWZJWXQYMG6ZVXXpEkpaena+HChZo4caLy8vI0Z84cs1779u3VqlWra7bXunVrRUeX/BLfjIwMbd++3Tzv1q2b/Pz8VL9+fYvfAgAAVGYVMmR17txZd911l9asWSNJmjRpkpYtW6a0tDQdOXLErDdt2jSHz61du1avvvqqeR4XF6f69etrypQpmjJlSol9xcfHO9wyjIuLU5MmTVz4bQAAQGXk0pCVmZmprKws5efnF7sFZxiGsrOzdeLECW3evFlxcXGWnqyLjY1Vv3799NNPP0mSkpOTHcpjYmKKbSdx/PhxJSQkmOc5OTll7h8AAOBqLIes9PR0TZgwQatXrzZ3Sr8e6tatq8TERM2dO1dxcXHav3+/fH191bFjR8XExOiBBx64bmMBAAC4nM2wsOo7Ly9PnTt31s6dO0u9eNxms1la/F4ZFO2TFRERoa1bt7qvn8nvu61toKLaOueR8h6CSyT0Lnk9KVCVRW9IuHal68DSTNbChQu1Y8eOK+5/VRS8Li8zDENeXi7dogsAAMCjWEo6K1euNI8Nw1DdunXVoUMHM1xFRETo1ltvVePGjR1muh544AGHBeoAAACVjaWQtWfPHnOWqnv37jp48KA2btwof39/SdL48eP13Xff6ddff9U777xjbka6atUq87U4AAAAlZGlkJWenm4e//a3v5WPj49q1Kih3r17S5K+/fZbs3zUqFEaMGCApEtP9c2dO9dK1wAAAB7NUsiyvwUYHBxsHvfq1UuGYeg///mPQ/3bb7/dPF6/fr2VrgEAADyapZBVu3Zt83jPnj3mcdGrbFJSUnT8+HHz+pkzZyRdCmeHDx+20jUAAIBHsxSy2rVrZ66zevvtt7VixQpJUo8ePeTl5SXDMDRx4kRlZGTo22+/1YIFC0p8ChEAAKCysRSy7rvvPkmXtmg4f/68HnjgAR09elQ1a9Y0Z7P+8Y9/KDQ0VH369NGZM2fMW4zNmze3NnIAAAAPZilkjR49Wm3btpVhGLLZbAoNDVWDBg0kSY8++qgZqIpmu4pmsWw2m373u99ZHDoAAIDnshSyqlevrnXr1um2226TYRi65ZZbzLLHH39cUVFRZriyv00YERGhyZMnW+kaAADAo1nedr1evXr617/+pQ0bNmj8+PH/a9jLS19//bWef/55NW3aVD4+PmrUqJGefvpprV+/Xn5+fla7BgAA8FiWXxBdpFevXsWuVa9eXbNmzdKsWbNc1Q0AAECFwAsEAQAA3MClIWvDhg165plndPvtt6t169Zq2LChWfb6668rNTXVld0BAAB4LJfcLkxKStJjjz2mHTt2mNfsnya8ePGinn32WT333HN66aWX9Nxzz7miWwAAAI9lOWStW7dO99xzj3Jzcx2Clb1du3apoKBABQUFevHFF3X27Fm99tprVrsGAADwWJZfED1kyBBdvHjRvFa0J5a9ohkum80mwzD05z//WZs2bbLSNQAAgEezFLIWLFig9PR0c/Zq8ODBSkhIULNmzRzq3XzzzeZLo4vqLlq0yErXAAAAHs1SyFq9erV53LVrVy1fvlxRUVHy8fFxqBcREaH4+HhFR0ebM13ffvutla4BAAA8mqWQtX//fodZrKt25OXlUOf48eNWugYAAPBolkLWhQsXzGNvb+9r1j9z5ox5zI7vAACgMrMUsopeBi1Ja9asuWrdnJwcffTRR+bMl/1nAQAAKhtLIatv377mGqtvvvlGkyZNKnYbsKCgQImJibrzzju1d+9ec/F7v379LA0cAADAk1kKWU899ZS8vb3NrRnefPNNNWzYUHv37jXr+Pv7q3v37vruu+/MWSwvLy89/vjj1kYOAADgwSyFrHbt2unll1922JrBfp8swzCUn59fbN+siRMnqmPHjla6BgAA8GiW3134zDPPaN68efL19TXDVkk/hmHIy8tLL7zwAru9AwCASs8lL4h+8skntXfvXk2ePFk333yzGaqKfpo2bao//OEP2r59u2bOnOmKLgEAADyaS14QLUmNGjXSa6+9ptdee00FBQU6c+aM8vPzVbt2bbZrAAAAVY7LQpY9b29v3XDDDe5oGgAAoEJwye1CAAAAOHJqJsuZ3dxLy2azKT8/3+XtAgAAeAKnQtblWzAAAADg6pxek1W0D1aRouB1+XVnENoAAEBlV+qF74ZhyNvbW+Hh4e4YDwAAQKVQpqcLCwsLlZWVpV69eikqKkpRUVGKiIhwy9otAACAisjpkGW/m7sknTlzRqtWrdKqVaskSTVq1FD37t3N0NWtWzfVqFHDPaMGAADwcE6FrHfeeUcJCQlKSEjQoUOHHMqKdnc/f/68vvnmG33zzTeSLj2R2KlTJzN09erVS6Ghoa7/BgAAAB7IqZA1atQojRo1SpL066+/Kj4+3gxdBw4cKFa/6MXQW7Zs0ZYtW/TGG29Iklq1auUQupo0aeK6bwIAAOBBSr0mq2nTpmratKkeffRRSdKBAwfM0BUfH6/U1FSz7uVPEe7evVt79uzRu+++yz5ZAACgUrP8Wp0bb7xRI0aM0IgRIyRJhw4dcpjp2r9/v8M2D2zfAAAAqgKXv1anoKBA+fn5ysvL08WLF83rVvbVAgAAqGgsz2QdOHBA69evN2evDh486FBeFKqK/peZLAAAUBWUOmQdPHjQIVTZL3y3D1BFTx3aa9mypbnwPSoqysKwAQAAPJtTIev9999XfHy84uPjSwxVJd0C9PLyUseOHR2eJrzhhhtcNGwAAADP5lTIGjlyZIkzU0UMw5C/v7+6detmhqoePXooICDApYMFAACoKEp1u9B+xiokJEQ9e/Y0Q1VkZCSv1QEAAPivMr8gOjAwUMnJyUpOTtaiRYtK3bHNZtP+/ftL/TkAAICKoFQhq+h2YWFhofkUYVmfFmQrBwAAUJmV+Xbh1a5dC9s4AACAys7pkEUwAgAAcJ5TIauwsNDd4wAAAKhUXP5aHQAAAFTwkJWZmamXXnpJbdu2lb+/v2rXrq2+ffvq448/LnOb27dv1+OPP67mzZvLz89PgYGB6tChg6ZOnaqTJ0+6cPQAAKAys/zuwvJy8uRJ9enTRykpKea1nJwcc2f6DRs2aOHChaVqc968eZo0aZIKCgrMaxcvXtSOHTu0Y8cOxcbG6ssvv1SnTp1c9j0AAEDlVGFnssaOHesQsNq3b6+wsDDzfNGiRVqxYoXT7SUkJGjChAlmwPL19VWnTp104403mnVOnDihgQMH6sKFCy74BgAAoDKrkCFr9+7dWr58uXn+xhtvKDk5WampqerVq5d5fcaMGU63OXv2bPM4NDRUO3bsUFJSklJTUzVz5kyz7PDhw/rb3/5m8RsAAIDKrkKGLPuAFRwcrJiYGEmXZp+mTJlilu3cuVO7d+++ZnsFBQX65ptvzPNRo0apZcuW5vlzzz0nPz8/83zz5s2Wxg8AACq/CrkmKzEx0Tzu1KmTfH19zfOePXsWq9u6deurtpeXl6e//OUvOnr0qI4eParbb7/dodzb21t+fn7KycmRJOXm5lr9CgAAoJKrkCHLfi1WeHi4Q1lwcLACAgJ0/vz5YnWvxM/PT48++ugVyxMTE5WRkWGeN23atJQjBgAAVU2FvF14+vRp8zgoKKhYeWBgoHmclpZmqa+8vDxNnDjR4drdd99tqU0AAFD5VciZrKJZKkny8fEpVm5/LSsrq8z9FBQU6OGHH9Z3331nXrvtttvUtWvXK35m8eLFWrJkiVPtOzPLBgAAKqYKGbLs36PoqpdWXy4vL0/Dhg1zWGQfGBh4zQB17NgxJSUlWe4fAABUbBUyZAUEBOjs2bOSLoWhy9lfs7916KyLFy/q/vvv1+rVq81r1apV00cffaRmzZpd9bNhYWGKiIhwqp+UlBRlZ2eXenwAAMDzVciQFRISYoaszMzMYuXnzp0zj+vUqVOqtrOzszVgwACtW7fOvObj46O///3vGjBgwDU/P2bMGI0ZM8apviIjI5n1AgCgkqqQC9/t97A6evSoQ1lGRobDjuxt2rRxut38/HwNHjzYIWDVqFFDq1at0uDBgy2MGAAAVDUVMmRFRkaax4mJiQ77Vl2+UWiXLl2cbnfcuHH68ssvzfPAwECtXbtW/fv3tzBaAABQFVXIkDVo0CDzOD093XwRdF5enubMmWOWtW/fXq1atXKqzTVr1uitt94yz202m5YvX66oqCgXjRoAAFQlFXJNVufOnXXXXXdpzZo1kqRJkyZp2bJlSktL05EjR8x606ZNc/jc2rVr9eqrr5rncXFxql+/viRp+vTpDnVr1KihV1991aF+kejo6FK9FxEAAFQ9FTJkSVJsbKz69eunn376SZKUnJzsUB4TE1NsHdXx48eVkJBgnhe9Jmfbtm0Or+qRLu3FZV/XXmkX0wMAgKqnQt4ulKS6desqMTFRs2bNUrt27eTn56datWqpd+/eiouL06JFi5xua+PGjW4cKQAAqIpshv3OnriuirZwiIiI0NatW93Xz+T33dY2UFFtnfNIeQ/BJRJ6R5f3EACPE72h5DtR11uFnckCAADwZIQsAAAANyBkAQAAuAEhCwAAwA0IWQAAAG5AyAIAAHADQhYAAIAbELIAAADcgJAFAADgBoQsAAAANyBkAQAAuAEhCwAAwA0IWQAAAG5AyAIAAHADQhYAAIAbELIAAADcgJAFAADgBoQsAAAANyBkAQAAuAEhCwAAwA0IWQAAAG5AyAIAAHADQhYAAIAbELIAAADcgJAFAADgBoQsAAAANyBkAQAAuAEhCwAAwA0IWQAAAG5AyAIAAHADQhYAAIAbELIAAADcgJAFAADgBoQsAAAANyBkAQAAuAEhCwAAwA0IWQAAAG5AyAIAAHADQhYAAIAbELIAAADcgJAFAADgBoQsAAAANyBkAQAAuAEhCwAAwA0IWQAAAG5AyAIAAHADQhYAAIAbELIAAADcgJAFAADgBoQsAAAANyBkAQAAuAEhCwAAwA0IWQAAAG5QoUNWZmamXnrpJbVt21b+/v6qXbu2+vbtq48//rjMbZ48eVITJkxQ8+bN5efnpzp16ujuu+/WunXrXDhyAABQ2VUr7wGU1cmTJ9WnTx+lpKSY13JychQfH6/4+Hht2LBBCxcuLFWbe/fuVZ8+fXTs2DHz2sWLF7VmzRqtWbNGs2fP1uTJk132HQAAQOVVYWeyxo4d6xCw2rdvr7CwMPN80aJFWrFihdPtGYahRx55xAxYNptNERERCg0NNetMmTJFP/zwgwtGDwAAKrsKGbJ2796t5cuXm+dvvPGGkpOTlZqaql69epnXZ8yY4XSbX3/9tTZv3myef/rpp9q6dav279+vm266SZJUWFioWbNmueAbAACAyq5Chiz7gBUcHKyYmBhJkq+vr6ZMmWKW7dy5U7t373aqTft1XG3bttV9990nSQoKCtL48ePNsq+//lrnzp2zMnwAAFAFVMiQlZiYaB536tRJvr6+5nnPnj2vWNfZNrt16+ZQZt9mbm6utm/fXqrxAgCAqqdChiz7tVjh4eEOZcHBwQoICCix7pUUFhZqz549V2zz8nNn2gQAAFVbhXy68PTp0+ZxUFBQsfLAwECdP39ekpSWlnbN9jIzM5Wbm3vFNgMDAx3Or9bm4sWLtWTJkmv2KcmcEUtJSVFkZKRTnymLlCPX/jMAqprIb+aV9xBcImvv3vIeAuBxAt3471R7rVu31t/+9rcrllfIkFUUoCTJx8enWLn9taysrFK1V1Kbl59frc1jx44pKSnpmn3ay87OLvVnAFiTdOJAeQ8BgLt4yL9TK2TIMgzDPLbZbMXKS7rmbHslfb407YWFhSkiIsKpujt37pRhGAoMDFTTpk2d7gMVU0pKirKzs+Xv7682bdqU93AAuBC/31VT69atr1peIUNWQECAzp49K0nKy8srVm5/7fJbfVdq70qfL+n8am2OGTNGY8aMuWafqHoiIyOVlJSkNm3aaOvWreU9HAAuxO83SlIhF76HhISYx5mZmcXK7bdYqFOnzjXbq1mzpsMtwcvbvHzLBmfaBAAAVVuFDFktW7Y0j48ePepQlpGRoQsXLpjnzkzbent7q1mzZlds88iRIw7nTAUDAIBrqZAhy/5JvMTERIcnA+13bZekLl26lLrNjRs3OpTZt+nr66sOHTqUarwAAKDqqZAha9CgQeZxenq6+SLovLw8zZkzxyxr3769WrVqVeo2f/rpJ3366aeSLj1JOH/+fLPszjvvVK1atSyNHwAAVH4VMmR17txZd911l3k+adIk3XLLLWratKnWrVtnXp82bZrD59auXas+ffqYP8ePHzfLBg0a5DBD9bvf/U6dO3dW06ZNtWvXLkmSl5eXXnzxRXd9LQAAUIlUyKcLJSk2Nlb9+vXTTz/9JElKTk52KI+JidHgwYMdrh0/flwJCQnmeU5Ojnns7e2t5cuXq1+/fjpy5IgKCwuLPSHy6quvOn37EQAAVG0VciZLkurWravExETNmjVL7dq1k5+fn2rVqqXevXsrLi5OixYtKnWbLVu2VHJysiZPnqwWLVrI19dXISEh6t+/v/71r39p8uTJbvgmAACgMrIZl+/ECcAtivbRiYiIYB8doJLh9xslqbAzWQAAAJ6MkAUAAOAGFXbhO1DRjB49WseOHVNYWFh5DwWAi/H7jZKwJgsAAMANuF0IAADgBoQsAAAANyBkAQAAuAEhCxVCamqqbDbbFX+8vLzk5+en+vXrq2/fvnrrrbeUl5dX3sMusyZNmpjfbeTIkU6XAXDO2bNnNXfuXN1+++1q2LChqlevruDgYHXo0EF/+MMftH379hI/Z//7d/nfQb6+vgoODlaLFi00dOhQbdq0qcQ2rvT3mLe3t6pXr67atWurdevWGj16tPlaN1RQBlAB/Prrr4akUv307NnTOH/+fHkPvUxuvPFG83uMGDHC6TIA1/b+++8btWvXvubfIWPGjDFycnIcPmv/+3etH5vNZixYsKBY/6X5e8zX19f49NNPr9cfDVyMLRxQId14441q0qSJJMkwDBUWFur8+fPauXOnOYO1ceNGTZ06VfPmzSvHkbpe9+7dze/eunXr8h0MUMHMnj1bzz77rMO1xo0bq0GDBjpw4ICOHTtmXl+8eLEuXLig999/v8S2goKC1LFjR0lSfn6+srKytHfvXmVnZ0u69HfTk08+qTZt2ui2224rsY169eqZv8f5+fk6d+6cdu/ebf49lpubqwceeEDbt29XmzZtLH13lIPyTnmAMy6fyZo2bVqJ9Q4dOmQ0adLErBcQEGDk5+df38G6ALNVgOv9+9//Nry8vMzfrWbNmhkbNmxwqLNq1SojNDTU4e+bzz77zCy3/92Mjo4u1kdmZqYxfPhwh8/ffffdDnXsy0r6/T516pTxm9/8xqHeH/7wB1f8EeA6Y00WKpXw8HA9+uij5vn58+d1+vTpchwRAE/x7LPPqrCwUJJUq1YtrV+/XlFRUQ51BgwYoBUrVjhcW7BggdN9BAYGavHixQoJCTGvXWlt1pXUqVNH77//vmw2W5nbgGcgZKHSsV/w7u/vrxtuuMGhfMuWLXr44YfVrFkz+fv7q1q1agoJCVG3bt302muv6eLFi8XaLCws1LJly3TnnXeqfv368vX1lZ+fnxo1aqR7771XK1euvOp4Fi1apFtvvVVBQUHy9/dXs2bN9Nhjj11xce3VOLsofunSpcrOztasWbPUtm1b+fv7q06dOho0aJA2b958xfZ/+uknjRw5Uo0bN1b16tUVEhKi3r17a8GCBcrJySn1eAFPkJiY6PDi5piYGDVu3LjEun369NHjjz+u559/Xl999ZVWrVpVqr78/f3VokUL8/zcuXOlHm/9+vUd/u4qSxvwAOU9lQY4w9nbhbt27TLq1atn1vu///s/h/LVq1cbPj4+V11o2rVrVyM7O9v8TEFBgXHPPfdcc4HqSy+9VGw8p06dMrp163bFz3h5eRmvv/56sc+VdeG7fdns2bONm2++ucR+q1WrZnz11VfF+v3rX/961T+f9u3bG4cOHbrCPyXAc7322msO/1/+9ttvy9TOtW4XGoZhZGdnGyEhIWa98PBwh3L7cVxpOcCxY8ccbm326tWrTONF+WImCxXS0qVL1adPH/MnKipK7du3V7t27XTixAlJUufOnTV79mzzM5mZmRo6dKg50+Xj46MuXbooIiJC/v7+Zr0ffvhBH3/8sXn+wQcfaPXq1eZ5SEiIunbtqoiICFWr9r9nR2bOnKmNGzc6jPOBBx5wmDVq1KiRunTposDAQEmXZsgmTpzo0L6rvPjii9q5c6dCQ0PVrVs31apVyyzLz8/Xc88951A/Pj5ejz/+uPnn4+fnp4iICIf/It+xY4cGDx5s3nIBKoqdO3c6nLvroZGsrCz9/ve/15kzZ8xrffr0KVUbp0+f1iOPPOLwe1baNuAhyjvlAc4o7RYOdevWNZKSkhza+Oqrr4ymTZsaNpvNCA4ONvbt22eWHT9+3OGR7okTJ5plY8aMMa9HRUUZeXl5DuNq0KCBERQUZHTt2tV49913zbI1a9Y4jOkvf/mLWZaenm7cfvvtZlnHjh0dxuqKmSxJRkxMjPkI+unTp43mzZs7zKLZz9h17drVYTxHjhwxyz777DPD29vbLP/888+v9Y8M8Ch33XWXw++G/e9xadj/jgUFBRnR0dFGdHS0ERUVZbRv397w9/cvtgXD9u3bHdqwL69Xr57ZRs+ePY22bdsWm02uXbu2w+8jKg5CFiqEsuyTJcmYPn16sbaysrKMX3/91eHa0aNHjbZt25qfGzVqlFk2Y8YMh79U58yZY+zcudMsv9JeXI899pj5uQ4dOhQr/89//uMw1tTUVLPMFSErLCzMyM3NdSifPn26Q59Ff3EfPHjQ4XpJ+/LceeedZvnIkSNL/M6Ap7rjjjsc/j9++f5XzirNPlnVq1c34uLiirVRmr/DgoKCjPXr11v89igv3C5EhTRt2jQZl/4jQYZhKDc3V6dOnVJ8fLy6d+9u1ps+fbrDrT9JqlGjhk6ePKk33nhDw4YNU6tWrdSgQQOHnZXz8/PN49///veqW7eupEu7RE+ePFk333yzQkNDdffdd2vhwoXas2dPsTEmJyc7HF++u3OPHj0c6m/bts3Sn8nlOnXqJB8fH4dr9evXdzgvujVoP1ZJ+u1vf1tsvF999ZXbxgq4m/3TfpKUkZHh8j58fX0VGhqqDh06aOzYsfrpp5/0wAMPOP15m82m6tWrq27duurcubOmTJmiPXv2cKuwAmMzUlQKPj4+qlOnjqKjo/XPf/5TjRs31vnz5yVJr7/+uoYMGSJJiouL0zPPPKNDhw6Zn/Xy8jLXcpW03UNYWJg2bdqkqVOnauXKleYTdmfOnNGaNWu0Zs0aPfvss+rfv7/effddNWzYUFLp/xJPS0sry1e/otq1axe7Vr16dYdzwzAklf9YAXdr2bKlw3lKSorq1at3xforV65UaGiobr31Vnl5lTwfER0drfj4eEvjGjFihJYuXWqpDXguZrJQ6YSEhKhdu3bm+U8//SRJWr16tYYOHapDhw7JZrMpJiZG69at09mzZ7Vz586r7qbctGlTxcXF6dSpU1q5cqWeeuopdevWTb6+vmadtWvXatCgQeZ5jRo1zOOoqCilp6df9efhhx925R+Dw6L8a7EfqyT985//vOpYL19EDHi6vn37Opx/+eWXV6xbWFiocePGKSoqSvXr19f48ePdPTxUUoQsVDqGYTi8GqPoycE333zTnLn5f//v/2nRokXq16+f+aSf/WfsnTx5Ut9++63effddbd68WQMHDtTrr7+uTZs2KT09XY8//rhZNzEx0Zwlsw96u3btMl8eW/Sza9cuvffee/r++++VkZFR7Nbe9WQ/VunSU4T2Yw0ODtbbb7+tVatWad++fVf8L3vAU/Xu3VutWrUyzxcvXqzDhw+XWPe9997TwYMHJUmnTp3S2bNnr8sYUfnwNyUqlYKCAv3xj390uB1YtKOz/bUdO3bo1KlTki69G2z69On6+eefzfKiNVmFhYWKiIhQ79699fvf/14jRoxwWH/l4+PjsPmpJHl7e0uS7r33XvNaWlqa/u///s98p1lGRoaeeOIJPf3007r77rvVvXv3ct3os2XLlg4zeX/605/03Xffmed//etfNXXqVI0YMUJdu3bVkiVLymOYQJl5eXnp1VdfNc/Pnj2rfv36OeykXlhYqLfffltPPvmkec3b21tTpky5rmNF5cGaLFRIS5cudVgLYRiGLl68qF9++cUMT9KlhaSTJk2SJLVv31579+6VdClwNW/eXG3atNH+/fuLrTHKzMyUdOkv5mnTpmn06NGSpCNHjqht27Zq166datSooZ9//tnhs1FRUWrQoIEkafDgwerUqZN+/PFHSZf22/riiy900003ae/evQ7/dfz888877NVVHv74xz9q8ODBki79C6jov/y9vLwcHgoIDw/XE088UV7DBMrsvvvu03PPPadXXnlFkrRv3z716NFDzZo10w033KC9e/cqPT3d4TMvv/wyL2JH2ZXjk42A08q6hcNrr71mtrFjxw6jZs2aV6xr/2h2q1atHPqfOnXqNfu66aabjAMHDjh87tChQ0arVq2u+rlx48YV+76u2MKhpJ2kY2NjHfq+fCuL1157zWGX6ct/6tWrZ+zateva/8AAD7Zo0SIjICDgmtsvzJkzp9hnndnx/Vrs++EF8JUbtwtRaVSrVk01atTQjTfeqN/97nfasGGDnnnmGbP85ptv1ubNmzVkyBDdcMMN8vb2VkhIiPr27avly5frs88+M+vu2bPHYZuCV155RZs2bdLIkSN10003yc/PT9WqVVOdOnXUu3dvvf7669q+fXuxd6GFh4dr+/btevPNNxUVFaXQ0FB5e3srNDRU/fv316effqp58+a5/c/GWc8884y2bNmixx57zHy3Y/Xq1dW6dWtNmjRJ27Ztu+oDAkBFEBMTo19++UUzZ85Ujx49VKdOHVWrVk1BQUHm1gkpKSl6+umny3uoqOBshvHflcAAAABwGWayAAAA3ICQBQAA4AaELAAAADcgZAEAALgBIQsAAMANCFkAAABuQMgCAABwA0IWAACAGxCyAAAA3ICQBQAA4AaELAAAADcgZAEAALgBIQsAAMAN/j+4WuiOJSfySgAAAABJRU5ErkJggg==" + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAlkAAAG2CAYAAABMApONAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy81sbWrAAAACXBIWXMAAA9hAAAPYQGoP6dpAABGmklEQVR4nO3deVxVdeL/8fcFQRCUzVQEF8zctQTXRFFryl+L6TjZTGVaNjHDmJZmaZmZTk3lZJnyLc1GbRsmbdEcs5lMtGwy1AQXXCpxxwVBQUFZzu8PhzP3Cipw7pV74fV8PHjMOefzuZ/P59pg7z7ncz7HZhiGIQAAADiVV3UPAAAAoCYiZAEAALgAIQsAAMAFCFkAAAAuQMgCAABwAUIWAACACxCyAAAAXICQBQAA4AKELAAAABeoMSHr8OHDqlu3rmw2m2w2W5XbOX/+vF599VVFR0crMDBQDRo0UK9evTRv3jyVlJQ4ccQAAKAms9WE1+oYhqEhQ4Zo+fLlDtcq6+zZsxo0aJC++eabcsvvuusuLV26VHXq1KnyWAEAQO3g8TNZhYWFGj16tEPAqqpp06Y5BKw2bdooKirKPF+2bJlef/11y/0AAICaz6Nnsn788Uc9+OCDSk1NLVNW2a916tQpNW7cWOfOnZMkjRs3Tq+//rpKSkp03333KSkpSZIUGhqqzMxM+fj4WP8CAACgxvLYmazBgwcrJiam3IBVFcuXLzcDls1m0+TJkyVJXl5emjJlilnv5MmTWr16tVP6BAAANZfHhqzPP//cnK3q06ePHnroIUvtpaSkmMctW7ZU48aNzfOOHTsqODi43LoAAADl8diQJUlBQUGaNm2akpOT1axZM0ttpaenm8eRkZFlyiMiIsqtCwAAUB6PfUzu9ddf10MPPaT69es7pb0TJ06Yx0FBQWXKAwMDzeOsrKxLtjNv3jzNnz+/Qn3u3btX9evXV2xsrD744INKjBYAALg7jw1Z48aNc2p7Z86cMY/LW9Rufy0vL++S7Rw5ckSbN2+ucL/Z2dnauXNnhesDAADP4LEhy9nsn0YsbzPTim5wGh4erujo6ArVTU9PV35+fsUGCAAAPAoh678CAgLM48LCwjLl9tfsbx1eLD4+XvHx8RXqMyYmplKzXgAAwHN49MJ3ZwoNDTWPc3Nzy5SfPn3aPG7YsOFVGRMAAPBchKz/atOmjXl8+PDhMuWHDh0yj9u3b39VxgQAADwXIeu/YmJizOOffvpJR48eNc937dqlU6dOmefdu3e/qmMDAACeh5D1X4MHD5a3t7ckqaSkRC+++KKkCwvi//KXv5j1wsLCNHDgwGoZIwAA8By1KmRlZmaqf//+5s+qVavMssaNGzssWH/jjTfUtm1btW7dWosXLzavT548mfcWAgCAK6pVTxcWFBRo7dq15vmoUaMcyl955RXt2LFDycnJkqTdu3c7lA8ePNjp+3MBAICaqVbNZF1JQECA/v3vf+uNN95QTEyMAgICFBAQoO7du+vNN9/UJ598ojp1alUuBQAAVWQz7HfhxFVVuk9WdHS0Nm3aVN3DAQAATsRMFgAAgAtw7wsAgCqaMWOGDhw44HCtWbNmevbZZ6tpRHAnhCwAAKrowIED+vnnn6t7GHBT3C4EAABwAUIWAACACxCyAAAAXICQBQAA4AKELAAAABcgZAEAALgAIQsAAMAFCFkAAAAuQMgCAABwAUIWAACACxCyAAAAXICQBQAA4AKELAAAABcgZAEAALgAIQsAAMAFCFkAAAAuQMgCAABwAUIWAACACxCyAAAAXICQBQAA4AKELAAAABcgZAEAALgAIQsAAMAF6lT3AAAAVbe2X1x1D6FWy6vjLdlsjtd++ol/LtUsbt3a6h6CJGayAAAAXIKQBQAA4AKELAAAABcgZAEAALgAIQsAAMAFCFkAAAAuQMgCAABwAUIWAACACxCyAAAAXICQBQAA4AKELAAAABcgZAEAALgAIQsAAMAFCFkAAAAuQMgCAABwAUIWAACACxCyAAAAXICQBQAA4AKELAAAABcgZAEAALgAIQsAAMAFCFkAAAAuQMgCAABwgTrVPQAAADxVmGFU6BpqJ0IWAABV9JvikuoeAtwYtwsBAABcgJAFAADgAoQsAAAAF/DokJWbm6upU6eqQ4cO8vf3V0hIiAYMGKCPPvqoym1+//33+u1vf6vIyEj5+vqqQYMG6tmzp1599VWdO3fOiaMHAAA1mccufD927Jj69++v9PR081pBQYGSk5OVnJysdevWae7cuZVqc968eUpISFBJyf8WMhYWFuqHH37QDz/8oH/84x/66quv1KBBA6d9DwAAUDN57EzWmDFjHAJW586dFR4ebp4nJiZq6dKlFW5vz549GjNmjBmwfHx8FB0drYiICLNOSkqKnnjiCSeMHgAA1HQeGbJ27typJUuWmOevvfaa0tLSlJGRodjYWPP6888/X+E2k5KSVFRUJEmqV6+eUlJStGnTJu3bt0+jRo0y63344YcqLCy0/iUAAECN5pEhyz5gBQcHKyEhQZLk6+urSZMmmWXbtm3Tzp07K9TmwYMHzeNf/epXuv766yVJ3t7eGjdunFl25swZZWVlWRo/AACo+TwyZKWkpJjHXbt2la+vr3nep0+fS9a9nJYtW5rHubm5DmU5OTnmsa+vrxo2bFiJ0QIAgNrII0OW/VqsyMhIh7Lg4GAFBASUW/dyHnjgAQUGBkqS1qxZo7ffflt5eXlKT093WId1//33q04dj31eAAAAXCUemRZOnDhhHgcFBZUpDwwM1JkzZySpwrf2IiIitHz5co0YMUKHDh3SI488okceecShTmxsrF5//fXLtjNv3jzNnz+/Qn1WNAACAADP45EhqzRASReeAryY/bW8vLwKt9u1a1fddtttevvtt8uUBQQE6KmnnlL9+vUv28aRI0e0efPmCvcJAABqJo8MWYbdG85tNluZ8vKuXUl2drZiY2O1Y8cOSRfWXrVv317Z2dnav3+/zpw5ozvvvFPPPfecpk2bdsl2wsPDFR0dXaE+09PTlZ+fX+mxAgAA9+eRISsgIECnTp2SpHK3U7C/VrrO6kqeeeYZM2BFRETo66+/Vps2bSRJs2bN0oQJEyRd2BZi0KBB6tWrV7ntxMfHKz4+vkJ9xsTEMOsFAEAN5ZEL30NDQ83ji58ElKTTp0+bxxV5EtAwDL333nvm+ZNPPmkGLEkaP368unbtap4vXry40mMGAAC1i0eGLPsAdPjwYYeynJwcnT171jxv3779Fds7duyYw9qt1q1bl6ljfy0jI6MywwUAALWQR4asmJgY8zglJUXnz583zzds2OBQt3v37lds7+LF7Fu3bi1TZ9euXeZxeU80AgAA2PPIkDV06FDzODs723wRdGFhoWbOnGmWde7cWW3btr1ie/Xq1VO3bt3M81deeUU//vijef7GG28oLS3NPI+Li7M0fgAAUPN5ZMjq1q2bbrvtNvN8woQJuv766xUVFaXVq1eb15977jmHz61atUr9+/c3fzIzM82yqVOnmscnT55U9+7d1aVLF0VFRTm8VqdZs2YaOXKkK74WAACoQTzy6UJJWrhwoQYOHKjt27dLksNMkyQlJCRo2LBhDtcyMzO1du1a87ygoMA8vvPOOzV79mxNmDBBRUVFKi4uLnPbsGnTpvr8889Vr149Z38dAABQw1iayfriiy9UXFzsrLFUSqNGjZSSkqIZM2aoY8eO8vPzU4MGDdSvXz8lJSUpMTGx0m2OHTtWmzdv1ujRo9WqVSv5+vqqXr166ty5s5555hmlpqaaL44GAAC4HJthv7NnJXl5ealhw4YaPny4fve735V5OTMur3SfrOjoaG3atKm6hwPAA63txxpR4GJx69ZeudJVYHlNVlZWlt58803169dPLVu21OTJk8vcugMAAKhtnLbw3TAM7d+/X6+88oq6du2qTp066S9/+Yv27t3rrC4AAAA8hqWQFR0dLcMwzHcJlr4z0DAM7dixQ1OmTFHr1q3Vu3dvzZ07V8eOHbM+YgAAAA9gKWRt3LhR+/bt0+zZszVgwAB5eXmVG7h++OEHjRs3ThEREbrlllu0aNEih1ffAAAA1DSWbxc2a9ZMjz76qFavXq2jR49q4cKFuuuuu+Tn52cGrtLZruLiYq1evVqjR49WkyZNNGzYMC1dulRFRUWWvwgAAIA7cepmpKGhoRo5cqQ+/fRTnThxQkuXLtWIESMUEhIim80mm81mBq6CggJ99tlnuueee9SsWTPNnj1bFh50BAAAcCsu2/Hd399fN998s371q1+pd+/eDrcRS3+kC7NcR48e1fjx4zV48GCVlJS4akgAAABXjdN3fD958qQ+++wzffzxx1q9erUKCwsl/W+NliSHGSv7sLVy5Uq9/fbbio+Pd/awAAAAriqnhKyjR4/q008/1ccff6y1a9eau8Dbz16VHterV09DhgzRkCFD9J///Efz5s1Tfn6+WeeDDz4gZAEAAI9nKWTNnj1bH3/8sb777juHRe6SHNZg2Ww2DRw4UCNGjNCwYcMUEBAgSfrNb36jhIQE9e7dW1lZWZJU5n2BAAAAnshSyHr88ccdZqkuXtzesWNHjRgxQvfdd58iIiLKbePaa69VfHy8XnjhBUnSmTNnrAwJAADALTjldqF9sGrcuLHuvfdejRgxQjfccEOFPh8ZGWkeBwcHO2NIAAAA1cpyyDIMQ/7+/hoyZIhGjBihW265RV5elXtoMTs7W61bt1ZkZKRuvPFGq0MCAACodpZClv06q8DAwCq3M2nSJE2aNMnKUAAAANyKpZD11VdfVahecXGxvL29rXQFAADgUZy2GWlSUpIGDhyoU6dOlSkbNWqUevTooUWLFrGrOwAAqBUsh6wTJ04oLi5O9913n9auXastW7aUqbN161Zt3LhRo0ePVlxcnLKzs612CwAA4NYshaySkhLdeeed+vbbb80ZqotDVklJiXbt2mU+gbh+/XoNHjyYGS0AAFCjWQpZ7733njZs2CDpf9s4pKamOtQ5cuSIvLy8zE1JDcPQd999p6SkJCtdAwAAuDVLIevvf/+7JJkBavr06Xr11Vcd6kRERCgrK0t//etf5eXlZb6r8MMPP7TSNQAAgFuzFLJSU1PNXd5/+9vfasqUKQoJCSlTz8/PT+PHj9f9999vblq6efNmK10DAAC4NUsh6+TJk+ZxbGzsFev37NnTPC59VyEAAEBNZClklb7oWXIMXJdy9OhR89jf399K1wAAAG7NUshq2bKlefvvnXfeUV5e3iXrHj9+XPPnzzfXZLVs2dJK1wAAAG7NUsi65ZZbJF14sjAjI0M9e/bU+++/rz179ig7O1vHjx/Xli1bNGfOHHXr1k2ZmZnmIvlBgwY55QsAAAC4I5thYcOqw4cP67rrrlNBQYGk/z1lWB77LRzq1aunPXv2KDw8vKpd1wgxMTHavHmzoqOjtWnTpuoeDgAPtLZfXHUPAXA7cevWVvcQJFmcyWratKnefPNNc2PR0hBV3k9pmSTNnz+/1gcsAABQs1l6QbQkPfDAA/Lz89Mf/vAH5eTkSFKZ2azSoBUSEqIFCxZo6NChVrsFPMaMGTN04MABh2vNmjXTs88+W00jAgBcDZZDliQNHz5cgwYN0sKFC/Xll18qLS3NfNowLCxMXbp00aBBgzRq1CjVr1/fGV0CHuPAgQP6+eefq3sYAICrzCkhS5IaNGigcePGady4cc5qEgAAwGNZWpMFAACA8l31kFVcXKwffvhB48ePv9pdAwAAXDVOuV24cOFCrVixQhkZGcrLy1NRUZEu3hnCMAzl5+fr5MmTKi4uliTNmjXLGd0DAAC4Hac8XfjBBx9IUplgdTmX2k8LAACgJrAUsj799FO9//775nlFg5OF/U8BAAA8gqU1WYsXL5b0v3BVuh9WqfI2JZWkAQMG6KOPPrLSNQAAgFuzFLK2bNli7uTeokUL/fjjjzpw4IAZupYvX67z58/rl19+0YgRIyRdCF6//PKLBg4caH30AAAAbspSyDp27JikCzNZo0aN0vXXX6+IiAh1795dkrR27VrVqVNHLVu21OLFi3X99ddLkvbv36833njD4tABAADcl6WQVVhYaB5HRESYx/369ZNhGPruu+8c6v/ud78zj5cvX26lawAAALdmKWTZvyKndFZLknr06CFJ2rx5s86ePWte9/X1lXThluFPP/1kpWsAAAC3ZilkXXvtteaC9rffflsHDx6UJPXu3VuSdP78eSUmJkqScnJytGDBAnO9VkFBgZWuAQAA3JqlkPWrX/1K0oU1Wfv27VPbtm115MgRNW3aVK1bt5YkTZo0STfccINat26t9PR087NNmza10jUAAIBbsxSyxo4dq6CgoP815uWl8PBwSRfWX5Vu2ZCWlqaTJ0+as142m0233nqrla4BAADcmqWQ1aRJE33++edq2LChJKlTp05m2eOPP24GLpvNZv5IUkhIiJ555hkrXQMAALg1yy+Ijo2N1a5duzR9+nQNGTLEvB4cHKx169bpxhtvdNiMtFu3bkpOTlazZs2sdg0AAOC2LL1Wp6SkRF5eXgoODtaUKVPKlF977bX65ptvdPDgQR08eFBNmjRRy5YtrXQJAADgESyFrDvvvFMFBQUaPny4fv3rX+uaa64pt15kZKQiIyOtdAUAAOBRLN0u3Lhxo5KTk5WQkKABAwY4a0wAAAAez1LIOnXqlHk8cuRIy4MBAACoKSyFrOuuu848LikpsTwYAACAmsJSyHr++efN4zlz5mjbtm2WBwQAAFATWFr4Hhsbq9mzZ2vChAk6cuSIbrjhBt14443q1q2bmjRpovr168vPz09eXl7mHln2HnjgASvdAwAAuC1LIatJkyZmeCrdB2v9+vVav359hT5PyAIAADWVpZBlz36mqvR1OhWtDwAAUNNYDlkVCVQAAAC1jaWQVd3bNuTm5mrmzJlaunSp9u7dKz8/P91www364x//qOHDh1e53R9//FGzZ8/WmjVrlJmZqYCAAPXq1UtPPPGEBg4c6MRvAAAAaipLIWvhwoXOGkelHTt2TP3791d6erp5raCgQMnJyUpOTta6des0d+7cSrf70ksv6ZlnnnHYkuL8+fP64osvtGrVKs2aNUuPPfaYM74CAACowSy/ILq6jBkzxiFgde7cWeHh4eZ5YmKili5dWqk233nnHU2ePNkMWAEBAYqJiVG9evUkXbg1+sQTT2jr1q1O+AYAAKAm88iQtXPnTi1ZssQ8f+2115SWlqaMjAzFxsaa1+338bqSvLw8TZgwwTzv27evDh48qI0bN2rHjh0KDQ2VJBUXF2vevHlO+BYAAKAms3S7cPr06ZY6nzp1apU+Zx+wgoODlZCQIEny9fXVpEmTdMcdd0iStm3bpp07d6pdu3ZXbPPjjz92eE3QW2+9peDgYElSixYtNHnyZG3btk2RkZGKjo6u0rgBAEDtYSlkTZs2zdJWDFUNWSkpKeZx165d5evra5736dOnTN2KhKyvvvrKPG7YsKE6dOjgUP7EE09UaawAAKB2cso+WVXZxsFKOLNfixUZGelQFhwcrICAAJ05c6ZM3ctJS0szj5s3b66CggIlJiYqOTlZXl5e6tWrlx555BGFhYVVedwAAKD2uGr7ZNnvDG/ViRMnzOOgoKAy5YGBgWbIysrKqlCbhw8fNo8LCwt144036scffzSvLV++XK+++qo+++wzh3VfF5s3b57mz59foT4rGgABAIDnsRSynnvuuUuWFRcXKz8/Xzk5OUpLS1NKSopsNptuvfVWTZ06VV5eVV9zXxqgJMnHx6dMuf21vLy8CrWZm5trHl/q6cGsrCzdeeed+vHHH9WyZcty6xw5ckSbN2+uUJ8AAKDmclnIutjf//53jRgxQv/617/UqlUrJSYmVrlf+9mw8m47VuVWZHFxscN5+/bttXjxYrVt21Yff/yx/vjHP+rcuXPKycnRtGnTtGjRonLbCQ8Pr/DC+PT0dOXn51d6rAAAwP057d2FV/K73/1O//znP/Xhhx/qrbfe0l133aVbbrmlSm0FBASYTwIWFhaWKbe/FhgYWKE2AwMDlZOTY56//fbb6t69uyTpwQcf1ObNm83NTZctWybDMMoNc/Hx8YqPj69QnzExMcx6AQBQQ13VfbLsn/ybM2dOldsp3bNKcrzNV+r06dPmccOGDSvU5jXXXGMee3t7q0ePHg7lffv2NY9zcnIqvNYLAADUTlc1ZO3fv1/Shdt99tswVFabNm3MY/sF69KFAHT27FnzvH379hVqs2PHjuZxSUmJioqKHMovnhHjxdgAAOByrkrIKikp0SeffKLExETzFpv94vXKiomJMY9TUlJ0/vx583zDhg0OdUtv+V2J/UyVYRj67rvvHMp/+ukn8zgwMJCtHAAAwGVZClmtWrW67E+LFi3UqFEj+fn56e6771ZeXp65lql169ZV7nfo0KHmcXZ2trlWqrCwUDNnzjTLOnfurLZt21aozd/+9rcOTyU+9thj5ixZZmam3njjDbNs0KBBlp6OBAAANZ+lhe8ZGRmy2WyV2iurtO59991X5X67deum2267TStXrpQkTZgwQYsXL1ZWVpYOHTpk1rv46cdVq1bppZdeMs+TkpLUpEkTSVLTpk315JNP6oUXXpB04ZU8rVu3Vvv27bV7925zK4i6devq2WefrfLYq0PMxHerewi1Wv3DJ+V90bXdh0/yz6WabZr5QHUPAUAN55SnCyu6ZUJpwOrTp48ee+wxS30uXLhQAwcO1Pbt2yU57tguSQkJCRo2bJjDtczMTK1du9Y8LygocCh//vnndejQIXN7hvz8fIen//z8/PTuu++qS5culsYOAABqPsv3vAzDqPBPp06d9Oc//1lfffWV6tSxlu8aNWqklJQUzZgxQx07dpSfn58aNGigfv36KSkpqUr7cHl7e2vhwoVatmyZBg0apLCwMPn4+Kh58+YaPXq0tmzZorvvvtvSuAEAQO1gKens3bv3inVsNpv8/f0VGhoqb++Lb5pY4+/vrylTpmjKlCkVqj9q1CiNGjXqivUGDx6swYMHWxwdAACozSyFrBYtWjhrHAAAADXKVXlE7uJX1gAAANR0TgtZSUlJGjhwoPm6G3ujRo1Sjx49tGjRIjbxBAAAtYLlkJWVlaW4uDjdd999Wrt2rbZs2VKmztatW7Vx40aNHj1acXFxys7OttotAACAW7MUskpKSnTHHXfo22+/NWeoLg5ZJSUl2rVrl7lH1vr16zV48GBmtAAAQI1mKWS999575mtsSkNUamqqQ50jR47Iy8vL3Om99JU1SUlJVroGAABwa5ZC1t///ndJMgPU9OnT9eqrrzrUiYiIUFZWlv7617/Ky8vL3Lj0ww8/tNI1AACAW7MUslJTU2Wz2WSz2fTb3/5WU6ZMUUhISJl6fn5+Gj9+vO6//35zY1L7ndQBAABqGksh6+TJk+ZxbGzsFev37NnTPM7KyrLSNQAAgFuzFLICAgLMY/vAdSlHjx41j/39/a10DQAA4NYshayWLVuat//eeecd5eXlXbLu8ePHNX/+fHNNVsuWLa10DQAA4NYshaxbbrlF0oUnCzMyMtSzZ0+9//772rNnj7Kzs3X8+HFt2bJFc+bMUbdu3ZSZmWkukh80aJBTvgAAAIA7svTuwrFjx2rOnDkqKCiQJKWnp2vkyJHl1i0NV9KFW4Vjx4610jUAAIBbszST1bRpU7355pvmxqKl+2CV91NaJknz589XeHi49dEDAAC4Kcuv1XnggQeUlJSkoKAgh7Bl/yNdmMkKCQnRxx9/rHvvvddqtwAAAG7N0u3CUsOHD9egQYO0cOFCffnll0pLSzOfNgwLC1OXLl00aNAgjRo1SvXr13dGlwAAAG7NKSFLkho0aKBx48Zp3LhxzmoSAADAY1m+XQgAAICynBayvv/+ez300EM6e/ZsmbKxY8dq+PDhSk5OdlZ3AAAAbs1yyCooKNCIESPUp08fLV68WKmpqWXqfP/99/r4449100036d5779W5c+esdgsAAODWLIes4cOH68MPPzSfLNyyZUuZOjt27JB04QnDf/zjHzxdCAAAajxLIeuzzz7TihUrJMncquHikHXgwAGdPXvWYa+szz77TCtXrrTSNQAAgFuzFLIWL14sSeYs1oMPPqjHH3/coU6zZs2UkZGhMWPGOOz6vmjRIitdAwAAuDVLWzhs3LjRDE233367FixYUG695s2b64033tChQ4f06aefSpJ++OEHK10DAAC4NUszWcePHzePb7311ivWv+mmm8zjY8eOWekaAADArVkKWXXr1jWP8/Pzr1j/9OnT5nGdOk7bBxUAAMDtWApZzZo1k3RhTdb777+voqKiS9Y9e/asFi5caN5eLP0sAABATWQpZA0cONBczL5161YNGjRI3377rc6fP2/WycnJ0eeff65+/fppz549Zn37W4cAAAA1jaV7dmPGjNFbb72l4uJiGYahNWvWaM2aNbLZbAoICFBRUZEKCgokyWELhzp16ujRRx91yhcAAABwR5Zmstq0aaPp06c7bM1gGIZKSkqUm5ur/Px8GYbhUC5Jf/7zn3XddddZGzkAAIAbs7zj+6RJk/Tyyy+rTp06Zpgq76d0BmvWrFmaOHGiM8YOAADgtpzyguiJEycqPT1dY8eOVdu2bSXJnMGSpHbt2unxxx/Xzp079dhjjzmjSwAAALfmtH0UWrVqpddff12SVFhYqOzsbElSSEiIfHx8nNUNAACAR3DJZlU+Pj5q1KiRK5oGAADwCE65XVhZR48e1RtvvFEdXQMAAFwVTpnJWrNmjVasWKGMjAzl5eWpqKjIXI9VyjAM5efn6+jRo9q/f78kaezYsc7oHnBrxXWDKnQNAFCzWA5ZU6ZM0V/+8pcK1bUPXvZbOgA12dlWcdU9BABANbB0u3DNmjV68cUXHZ4klFTm3B7hCgAA1AaWZrLmzZsnSQ4bkZZ3Xqr0eteuXXXvvfda6RoAAMCtWZrJ2rhxo7nRaFhYmD799FN98803ZrhauHChdu/era+//tp8z6EklZSUKCEhwfroAQAA3JSlkJWZmSnpwgzV6NGjddddd6lPnz7q0qWLJGnPnj1q3bq1+vfvry+++EKtWrWSYRhKS0vTW2+9ZX30AAAAbspSyCp9+bMkRUVFmcdxcXEyDEPfffedec3Hx0cjR440z5csWWKlawAAALdmKWQFBASYx7m5ueZxz549JUkpKSkqLCw0r4eEhEi6sFZr586dVroGAABwa5ZCVlRUlPkk4eLFi5WXlydJ6t27tyTp7Nmz+vDDDyVdeNVOUlKS+dkzZ85Y6RoAAMCtWQpZ/fv3l3RhTda2bdvUtm1bHT16VFFRUYqIiJAkxcfHa8iQIerSpYv+85//mE8YNmzY0NrIAQAA3JilkDVmzBjVrVvXPD958qQZnoYPHy7DMHT+/Hl9/vnn2rVrlznrZbPZNGDAAGsjBwAAcGOWQlbr1q313nvvyc/PT5LUoUMHeXt7S5LGjx+voKAgc+bKZrOZx35+fnr66aetdA0AAODWLL8g+je/+Y1SU1P10EMPmbcPJSkiIkIrV65Us2bNzBkswzAUERGh5cuXq0OHDla7BgAAcFtOeUF069attWDBgjLXe/furV9++UXfffedDh48qCZNmqhPnz7y8fFxRrcAAABuyykh63K8vLwUGxvr6m4AAADciuXbhQAAACiLkAUAAOAChCwAAAAXIGQBAAC4ACELAADABTw6ZOXm5mrq1Knq0KGD/P39FRISogEDBuijjz5yWh+7du2Sv7+/uZlqRkaG09oGAAA1l8u3cHCVY8eOqX///kpPTzevFRQUKDk5WcnJyVq3bp3mzp1rqQ/DMDR69GgVFBRYHS4AAKhlPHYma8yYMQ4Bq3PnzgoPDzfPExMTtXTpUkt9zJ07V+vXr7fUBgAAqJ08MmTt3LlTS5YsMc9fe+01paWlKSMjw2Hj0+eff77KfWRkZGjy5MmWxgkAAGovp9wuzMrK0ooVK7Rz506dOnVK586dk2EYl/2MzWbTO++8U6X+7ANWcHCwEhISJEm+vr6aNGmS7rjjDknStm3btHPnTrVr167SfTzyyCM6c+ZMlcYHAABgOWS99957+tOf/lSpQGIYhqWQlZKSYh537dpVvr6+5nmfPn3K1K1syFq4cKH+/e9/S5JCQ0N18uTJKo0TAADUXpZuF6akpGjUqFHKy8uTYRhXnL1yFvu1WJGRkQ5lwcHBCggIKLduRRw5ckTjx4+XJNWtW1cvvPCChZECAIDaytJM1l//+ldzVqrU1QhaJ06cMI+DgoLKlAcGBpoza1lZWZVqOyEhQTk5OZKkZ555ptKzYPPmzdP8+fMrVLeyARAAAHgOSyFr3bp1stlsZrDq2LGjhg4dqiZNmsjf319eXq5ZV29/a9LHx6dMuf21vLy8Crf70Ucf6bPPPpMkderUSZMmTar004VHjhzR5s2bK/UZAABQ81gKWadOnZJ0YRF77969tWbNmnJDj7PZz5bZz6Jd7tqVZGVl6dFHH5UkeXl5acGCBVX6LuHh4YqOjq5Q3fT0dOXn51e6DwAA4P4shazIyEj99NNPstlsuvfee69KwJKkgIAAM+AVFhaWKbe/FhgYWKE2H3vsMR07dkzShT24evbsWaWxxcfHKz4+vkJ1Y2JimPUCAKCGsnQ/76677jKPr+YTeKGhoeZxbm5umfLTp0+bxw0bNrxieytXrtT7778vSWrevDmL3QEAgGWWQtbEiRPVqFEjGYahv/3tb+UGHldo06aNeXz48GGHspycHJ09e9Y8b9++/RXbs993a//+/apfv775rsIBAwY41I2KipLNZtOiRYuqOHoAAFAbWApZjRo10rJlyxQWFqZ9+/apb9+++uyzz3To0CGdP3/eWWMsIyYmxjxOSUlx6GvDhg0Odbt3737F9q7W1hMAAKD2sLQm67bbbpMkNW7cWFlZWdq6dauGDRtWoc/abDYVFRVVqd+hQ4fqxRdflCRlZ2dr7ty5Gj9+vAoLCzVz5kyzXufOndW2bdsrtteuXTvFxcWVW5aTk6PU1FTzvGfPnvLz81OTJk2qNHYAAFA72AwL0zheXl5V3iPLZrOpuLi4ql3r9ttv18qVK83zLl26KCsrS4cOHTKvLV261CH0rVq1Si+99JJ5npSUdMWwlJyc7HDLcO/evWrZsmWVx22vdOF7dHS0Nm3a5JQ2y+1n4rsuaxvwVJtmPlDdQ3CKtf3K/w9EoDaLW7e2uocgyUnvLixV0a0TnHF7buHChRo4cKC2b98uSUpLS3MoT0hIKDOrlpmZqbVr//cHX1BQYHkcAAAA5bG8W2jp63Qq8+MMjRo1UkpKimbMmKGOHTvKz89PDRo0UL9+/ZSUlKTExESn9AMAAFAVlm4XwhpuFwLVh9uFQM3lLrcLXfPeGwAAgFqOkAUAAOACTl34npubq7y8PBUVFZVZe2UYhvLz83X06FFt2LBBSUlJvFIGAADUWJZDVnZ2th577DGtWLFCOTk5ThgSAACA57MUsgoLC9W/f39t27at0k8NVnS7BwAAAE9kKWTNnTtXW7duNd/zd7HS4HVxmWEY8vJiORgAAKi5LCWdZcuWmceGYahRo0bq0qWLGa6io6N14403qnnz5g4zXffcc4/DzuwAAAA1jaWQtWvXLnOWqlevXtq/f7/Wr18vf39/SdK4ceP07bffau/evXr77bfNzUiXL1+uU6dOWR89AACAm7IUsrKzs83jX//61/Lx8VG9evXUr18/SdI333xjlo8ePVp33nmnpAuvs3n11VetdA0AAODWLIUs+1uAwcHB5nFsbKwMw9B//vMfh/o333yzebxmzRorXQMAALg1SyErJCTEPN61a5d53L17d0lSenq6MjMzzesnT56UdCGcHTx40ErXAAAAbs1SyOrYsaO5zuqtt97S0qVLJUm9e/eWl5eXDMPQ+PHjlZOTo2+++UZz5sxh6wYAAFArWApZQ4YMkXRhi4YzZ87onnvu0eHDh1W/fn1zNusf//iHwsLC1L9/f508edK8xXjttddaGzkAAIAbsxSyHnnkEXXo0EGGYchmsyksLExNmzaVJD344INmoCqd7SqdxbLZbLr77rstDh0AAMB9WQpZdevW1erVq3XTTTfJMAxdf/31ZtnDDz+svn37muHK/jZhdHS0Jk6caKVrAAAAt2Z52/XGjRvr3//+t9atW6dx48b9r2EvL/3rX//SM888o6ioKPn4+KhZs2Z64okntGbNGvn5+VntGgAAwG1ZfkF0qdjY2DLX6tatqxkzZmjGjBnO6gYAAMAj8AJBAAAAF3BqyFq3bp2efPJJ3XzzzWrXrp0iIiLMslmzZikjI8OZ3QEAALgtp9wu3Lx5sx566CFt3brVvGb/NOG5c+f01FNP6emnn9bUqVP19NNPO6NbAAAAt2U5ZK1evVp33HGHzp8/7xCs7O3YsUPFxcUqLi7Ws88+q1OnTunll1+22jUAAIDbsvyC6OHDh+vcuXPmtdI9seyVznDZbDYZhqG//vWv+v777610DQAA4NYshaw5c+YoOzvbnL0aNmyY1q5dq1atWjnU69Spk/nS6NK6iYmJVroGAABwa5ZC1ooVK8zjHj16aMmSJerbt698fHwc6kVHRys5OVlxcXHmTNc333xjpWsAAAC3Zilk/fzzzw6zWJftyMvLoU5mZqaVrgEAANyapZB19uxZ89jb2/uK9U+ePGkes+M7AACoySyFrNKXQUvSypUrL1u3oKBAH374oTnzZf9ZAACAmsZSyBowYIC5xurrr7/WhAkTytwGLC4uVkpKim699Vbt3r3bXPw+cOBASwMHAABwZ5ZC1uOPPy5vb29za4bXX39dERER2r17t1nH399fvXr10rfffmvOYnl5eenhhx+2NnIAAAA3ZilkdezYUS+88ILD1gz2+2QZhqGioqIy+2aNHz9eN9xwg5WuAQAA3Jrldxc++eSTmj17tnx9fc2wVd6PYRjy8vLSlClT2O0dAADUeE55QfSjjz6q3bt3a+LEierUqZMZqkp/oqKi9Kc//UmpqamaPn26M7oEAABwa055QbQkNWvWTC+//LJefvllFRcX6+TJkyoqKlJISAjbNQAAgFrHaSHLnre3t6655hpXNA0AAOARnHK7EAAAAI4qNJNVkd3cK8tms6moqMjp7QIAALiDCoWsi7dgAAAAwOVVeE1W6T5YpUqD18XXK4LQBgAAarpKL3w3DEPe3t6KjIx0xXgAAABqhCo9XVhSUqK8vDzFxsaqb9++6tu3r6Kjo12ydgsAAMATVThk2e/mLkknT57U8uXLtXz5cklSvXr11KtXLzN09ezZU/Xq1XPNqAEAANxchULW22+/rbVr12rt2rU6cOCAQ1np7u5nzpzR119/ra+//lrShScSu3btaoau2NhYhYWFOf8bAAAAuKEKhazRo0dr9OjRkqS9e/cqOTnZDF379u0rU7/0xdAbN27Uxo0b9dprr0mS2rZt6xC6WrZs6bxvAgAA4EYqvSYrKipKUVFRevDBByVJ+/btM0NXcnKyMjIyzLoXP0W4c+dO7dq1SwsWLGCfLAAAUKNZfq1OixYtNHLkSI0cOVKSdODAAYeZrp9//tlhmwe2bwAAALWB01+rU1xcrKKiIhUWFurcuXPmdSv7agEAAHgayzNZ+/bt05o1a8zZq/379zuUl4aq0v9lJgsAANQGlQ5Z+/fvdwhV9gvf7QNU6VOH9tq0aWMufO/bt6+FYQMAALi3CoWsd999V8nJyUpOTi43VJV3C9DLy0s33HCDw9OE11xzjZOGDQAA4N4qFLJGjRpV7sxUKcMw5O/vr549e5qhqnfv3goICHDqYAEAADxFpW4X2s9YhYaGqk+fPmaoiomJ4bU6AAAA/1XlF0QHBgYqLS1NaWlpSkxMrHTHNptNP//8c6U/BwAA4AkqFbJKbxeWlJSYTxFW9WlBtnIAAAA1WZVvF17u2pWwjQMAAKjpKhyyCEYAAAAVV6Ed30tKSpz+U1xcbHnwubm5mjp1qjp06CB/f3+FhIRowIAB+uijj6rcZmpqqh5++GFde+218vPzU2BgoLp06aLJkyfr2LFjlscMAABqB8s7vleXY8eOqX///kpPTzevFRQUmPt5rVu3TnPnzq1Um7Nnz9aECRMcAuC5c+e0detWbd26VQsXLtQXX3yhrl27Ou17AACAmsnp7y68WsaMGeMQsDp37qzw8HDzPDExUUuXLq1we2vXrtVjjz1mBixfX1917dpVLVq0MOscPXpUgwcP1tmzZ53wDQAAQE3mkSFr586dWrJkiXn+2muvKS0tTRkZGYqNjTWvP//88xVu85VXXjGPw8LCtHXrVm3evFkZGRmaPn26WXbw4EF98MEHFr8BAACo6TwyZNkHrODgYCUkJEi6MPs0adIks2zbtm3auXPnFdsrLi7W119/bZ6PHj1abdq0Mc+ffvpp+fn5mecbNmywNH4AAFDzeeSarJSUFPO4a9eu8vX1Nc/79OlTpm67du0u215hYaH+7//+T4cPH9bhw4d18803O5R7e3vLz89PBQUFkqTz589b/QoAAKCG88iQZb8WKzIy0qEsODhYAQEBOnPmTJm6l+Ln56cHH3zwkuUpKSnKyckxz6Oioio5YgAAUNt4ZMg6ceKEeRwUFFSmPDAw0AxZWVlZlvoqLCzU+PHjHa7dfvvtl6w/b948zZ8/v0JtVyQAAgAAz+SRIas0QEmSj49PmXL7a3l5eVXup7i4WPfff7++/fZb89pNN92kHj16XPIzR44c0ebNm6vcJwAAqBk8MmTZ7z7vrFf9XKywsFD33XefwyL7wMDAK85ShYeHKzo6ukJ9pKenKz8/39I4AQCAe/LIkBUQEKBTp05JuhCGLmZ/LTAwsNLtnzt3Tr/5zW+0YsUK81qdOnX04YcfqlWrVpf9bHx8vOLj4yvUT0xMDLNeAADUUB65hUNoaKh5nJubW6b89OnT5nHDhg0r1XZ+fr5uv/12h4Dl4+OjpKQk3XnnnVUYLQAAqI08MmTZ72F1+PBhh7KcnByHHdnbt29f4XaLioo0bNgwrV692rxWr149LV++XMOGDbMwYgAAUNt4ZMiKiYkxj1NSUhz2rbp4o9Du3btXuN2xY8fqiy++MM8DAwO1atUqDRo0yMJoAQBAbeSRIWvo0KHmcXZ2tvki6MLCQs2cOdMs69y5s9q2bVuhNleuXKk333zTPLfZbFqyZIn69u3rpFEDAIDaxCMXvnfr1k233XabVq5cKUmaMGGCFi9erKysLB06dMis99xzzzl8btWqVXrppZfM86SkJDVp0kSSNG3aNIe69erV00svveRQv1RcXFyl3osIAABqH48MWZK0cOFCDRw4UNu3b5ckpaWlOZQnJCSUWUeVmZmptWvXmuelr8nZsmWLw6t6pAt7cdnXtVfZxfQAAKD28cjbhZLUqFEjpaSkaMaMGerYsaP8/PzUoEED9evXT0lJSUpMTKxwW+vXr3fhSAEAQG1kM+x39sRVVbpPVnR0tDZt2uS6fia+67K2AU+1aeYD1T0Ep1jbL666hwC4nbh15d+Juto8diYLAADAnRGyAAAAXICQBQAA4AKELAAAABcgZAEAALgAIQsAAMAFCFkAAAAuQMgCAABwAUIWAACACxCyAAAAXICQBQAA4AKELAAAABcgZAEAALgAIQsAAMAFCFkAAAAuQMgCAABwAUIWAACACxCyAAAAXICQBQAA4AKELAAAABcgZAEAALgAIQsAAMAFCFkAAAAuQMgCAABwAUIWAACACxCyAAAAXICQBQAA4AKELAAAABcgZAEAALgAIQsAAMAFCFkAAAAuQMgCAABwAUIWAACACxCyAAAAXICQBQAA4AKELAAAABcgZAEAALgAIQsAAMAFCFkAAAAuQMgCAABwAUIWAACACxCyAAAAXICQBQAA4AKELAAAABcgZAEAALgAIQsAAMAFCFkAAAAuQMgCAABwAUIWAACACxCyAAAAXICQBQAA4AKELAAAABcgZAEAALgAIQsAAMAFCFkAAAAu4NEhKzc3V1OnTlWHDh3k7++vkJAQDRgwQB999FGV2zx27Jgee+wxXXvttfLz81PDhg11++23a/Xq1U4cOQAAqOnqVPcAqurYsWPq37+/0tPTzWsFBQVKTk5WcnKy1q1bp7lz51aqzd27d6t///46cuSIee3cuXNauXKlVq5cqVdeeUUTJ0502ncAAAA1l8fOZI0ZM8YhYHXu3Fnh4eHmeWJiopYuXVrh9gzD0AMPPGAGLJvNpujoaIWFhZl1Jk2apB9++MEJowcAADWdR4asnTt3asmSJeb5a6+9prS0NGVkZCg2Nta8/vzzz1e4zX/961/asGGDef7JJ59o06ZN+vnnn3XddddJkkpKSjRjxgwnfAMAAFDTeWTIsg9YwcHBSkhIkCT5+vpq0qRJZtm2bdu0c+fOCrVpv46rQ4cOGjJkiCQpKChI48aNM8v+9a9/6fTp01aGDwAAagGPDFkpKSnmcdeuXeXr62ue9+nT55J1K9pmz549Hcrs2zx//rxSU1MrNV4AAFD7eGTIsl+LFRkZ6VAWHBysgICAcuteSklJiXbt2nXJNi8+r0ibAACgdvPIpwtPnDhhHgcFBZUpDwwM1JkzZyRJWVlZV2wvNzdX58+fv2SbgYGBDueXa3PevHmaP3/+FfuUZM6IpaenKyYmpkKfqYr0Q1f+MwBqm5ivZ1f3EJwib/fu6h4C4HYCXfjvVHvt2rXTBx98cMlyjwxZpQFKknx8fMqU21/Ly8urVHvltXnx+eXaPHLkiDZv3nzFPu3l5+dX+jMArNl8dF91DwGAq7jJv1M9MmQZhmEe22y2MuXlXatoe+V9vjLthYeHKzo6ukJ1t23bJsMwFBgYqKioqAr3Ac+Unp6u/Px8+fv7q3379tU9HABOxO937dSuXbvLlntkyAoICNCpU6ckSYWFhWXK7a9dfKvvUu1d6vPlnV+uzfj4eMXHx1+xT9Q+MTEx2rx5s9q3b69NmzZV93AAOBG/3yiPRy58Dw0NNY9zc3PLlNtvsdCwYcMrtle/fn2HW4IXt3nxlg0VaRMAANRuHhmy2rRpYx4fPnzYoSwnJ0dnz541zysybevt7a1WrVpdss1Dhw45nDMVDAAArsQjQ5b9k3gpKSkOTwba79ouSd27d690m+vXr3cos2/T19dXXbp0qdR4AQBA7eORIWvo0KHmcXZ2tvki6MLCQs2cOdMs69y5s9q2bVvpNrdv365PPvlE0oUnCd944w2z7NZbb1WDBg0sjR8AANR8HhmyunXrpttuu808nzBhgq6//npFRUVp9erV5vXnnnvO4XOrVq1S//79zZ/MzEyzbOjQoQ4zVHfffbe6deumqKgo7dixQ5Lk5eWlZ5991lVfCwAA1CAe+XShJC1cuFADBw7U9u3bJUlpaWkO5QkJCRo2bJjDtczMTK1du9Y8LygoMI+9vb21ZMkSDRw4UIcOHVJJSUmZJ0ReeumlCt9+BAAAtZtHzmRJUqNGjZSSkqIZM2aoY8eO8vPzU4MGDdSvXz8lJSUpMTGx0m22adNGaWlpmjhxolq3bi1fX1+FhoZq0KBB+ve//62JEye64JsAAICayGZcvBMnAJco3UcnOjqafXSAGobfb5THY2eyAAAA3BkhCwAAwAU8duE74GkeeeQRHTlyROHh4dU9FABOxu83ysOaLAAAABfgdiEAAIALELIAAABcgJAFAADgAoQseISMjAzZbLZL/nh5ecnPz09NmjTRgAED9Oabb6qwsLC6h11lLVu2NL/bqFGjKlwGoGJOnTqlV199VTfffLMiIiJUt25dBQcHq0uXLvrTn/6k1NTUcj9n//t38d9Bvr6+Cg4OVuvWrXXvvffq+++/L7eNS/095u3trbp16yokJETt2rXTI488Yr7WDR7KADzA3r17DUmV+unTp49x5syZ6h56lbRo0cL8HiNHjqxwGYAre/fdd42QkJAr/h0SHx9vFBQUOHzW/vfvSj82m82YM2dOmf4r8/eYr6+v8cknn1ytPxo4GVs4wCO1aNFCLVu2lCQZhqGSkhKdOXNG27ZtM2ew1q9fr8mTJ2v27NnVOFLn69Wrl/nd27VrV72DATzMK6+8oqeeesrhWvPmzdW0aVPt27dPR44cMa/PmzdPZ8+e1bvvvltuW0FBQbrhhhskSUVFRcrLy9Pu3buVn58v6cLfTY8++qjat2+vm266qdw2GjdubP4eFxUV6fTp09q5c6f599j58+d1zz33KDU1Ve3bt7f03VENqjvlARVx8UzWc889V269AwcOGC1btjTrBQQEGEVFRVd3sE7AbBXgfF999ZXh5eVl/m61atXKWLdunUOd5cuXG2FhYQ5/33z66admuf3vZlxcXJk+cnNzjREjRjh8/vbbb3eoY19W3u/38ePHjV/96lcO9f70pz85448AVxlrslCjREZG6sEHHzTPz5w5oxMnTlTjiAC4i6eeekolJSWSpAYNGmjNmjXq27evQ50777xTS5cudbg2Z86cCvcRGBioefPmKTQ01Lx2qbVZl9KwYUO9++67stlsVW4D7oGQhRrHfsG7v7+/rrnmGofyjRs36v7771erVq3k7++vOnXqKDQ0VD179tTLL7+sc+fOlWmzpKREixcv1q233qomTZrI19dXfn5+atasme666y4tW7bssuNJTEzUjTfeqKCgIPn7+6tVq1Z66KGHLrm49nIquih+0aJFys/P14wZM9ShQwf5+/urYcOGGjp0qDZs2HDJ9rdv365Ro0apefPmqlu3rkJDQ9WvXz/NmTNHBQUFlR4v4A5SUlIcXtyckJCg5s2bl1u3f//+evjhh/XMM8/oyy+/1PLlyyvVl7+/v1q3bm2enz59utLjbdKkicPfXVVpA26guqfSgIqo6O3CHTt2GI0bNzbr/eEPf3AoX7FiheHj43PZhaY9evQw8vPzzc8UFxcbd9xxxxUXqE6dOrXMeI4fP2707Nnzkp/x8vIyZs2aVeZzVV34bl/2yiuvGJ06dSq33zp16hhffvllmX7/9re/XfbPp3PnzsaBAwcu8U8JcF8vv/yyw/+Xv/nmmyq1c6XbhYZhGPn5+UZoaKhZLzIy0qHcfhyXWg5w5MgRh1ubsbGxVRovqhczWfBIixYtUv/+/c2fvn37qnPnzurYsaOOHj0qSerWrZteeeUV8zO5ubm69957zZkuHx8fde/eXdHR0fL39zfr/fDDD/roo4/M8/fee08rVqwwz0NDQ9WjRw9FR0erTp3/PTsyffp0rV+/3mGc99xzj8OsUbNmzdS9e3cFBgZKujBDNn78eIf2neXZZ5/Vtm3bFBYWpp49e6pBgwZmWVFRkZ5++mmH+snJyXr44YfNPx8/Pz9FR0c7/Bf51q1bNWzYMPOWC+Aptm3b5nDuqodG8vLy9Pvf/14nT540r/Xv379SbZw4cUIPPPCAw+9ZZduAm6julAdURGW3cGjUqJGxefNmhza+/PJLIyoqyrDZbEZwcLCxZ88esywzM9Phke7x48ebZfHx8eb1vn37GoWFhQ7jatq0qREUFGT06NHDWLBggVm2cuVKhzH93//9n1mWnZ1t3HzzzWbZDTfc4DBWZ8xkSTISEhLMR9BPnDhhXHvttQ6zaPYzdj169HAYz6FDh8yyTz/91PD29jbLP/vssyv9IwPcym233ebwu2H/e1wZ9r9jQUFBRlxcnBEXF2f07dvX6Ny5s+Hv719mC4bU1FSHNuzLGzdubLbRp08fo0OHDmVmk0NCQhx+H+E5CFnwCFXZJ0uSMW3atDJt5eXlGXv37nW4dvjwYaNDhw7m50aPHm2WPf/88w5/qc6cOdPYtm2bWX6pvbgeeugh83NdunQpU/6f//zHYawZGRlmmTNCVnh4uHH+/HmH8mnTpjn0WfoX9/79+x2ul7cvz6233mqWjxo1qtzvDLirW265xeH/4xfvf1VRldknq27dukZSUlKZNirzd1hQUJCxZs0ai98e1YXbhfBIzz33nIwL/5EgwzB0/vx5HT9+XMnJyerVq5dZb9q0aQ63/iSpXr16OnbsmF577TXdd999atu2rZo2beqws3JRUZF5/Pvf/16NGjWSdGGX6IkTJ6pTp04KCwvT7bffrrlz52rXrl1lxpiWluZwfPHuzr1793aov2XLFkt/Jhfr2rWrfHx8HK41adLE4bz01qD9WCXp17/+dZnxfvnlly4bK+Bq9k/7SVJOTo7T+/D19VVYWJi6dOmiMWPGaPv27brnnnsq/Hmbzaa6deuqUaNG6tatmyZNmqRdu3Zxq9CDsRkpagQfHx81bNhQcXFx+uc//6nmzZvrzJkzkqRZs2Zp+PDhkqSkpCQ9+eSTOnDggPlZLy8vcy1Xeds9hIeH6/vvv9fkyZO1bNky8wm7kydPauXKlVq5cqWeeuopDRo0SAsWLFBERISkyv8lnpWVVZWvfkkhISFlrtWtW9fh3DAMSdU/VsDV2rRp43Cenp6uxo0bX7L+smXLFBYWphtvvFFeXuXPR8TFxSk5OdnSuEaOHKlFixZZagPui5ks1DihoaHq2LGjeb59+3ZJ0ooVK3TvvffqwIEDstlsSkhI0OrVq3Xq1Clt27btsrspR0VFKSkpScePH9eyZcv0+OOPq2fPnvL19TXrrFq1SkOHDjXP69WrZx737dtX2dnZl/25//77nfnH4LAo/0rsxypJ//znPy871osXEQPubsCAAQ7nX3zxxSXrlpSUaOzYserbt6+aNGmicePGuXp4qKEIWahxDMNweDVG6ZODr7/+ujlz8//+3/9TYmKiBg4caD7pZ/8Ze8eOHdM333yjBQsWaMOGDRo8eLBmzZql77//XtnZ2Xr44YfNuikpKeYsmX3Q27Fjh/ny2NKfHTt26J133tF3332nnJycMrf2rib7sUoXniK0H2twcLDeeustLV++XHv27Lnkf9kD7qpfv35q27ateT5v3jwdPHiw3LrvvPOO9u/fL0k6fvy4Tp06dVXGiJqHvylRoxQXF+vPf/6zw+3A0h2d7a9t3bpVx48fl3Th3WDTpk3TTz/9ZJaXrskqKSlRdHS0+vXrp9///vcaOXKkw/orHx8fh81PJcnb21uSdNddd5nXsrKy9Ic//MF8p1lOTo7++Mc/6oknntDtt9+uXr16VetGn23atHGYyfvLX/6ib7/91jz/29/+psmTJ2vkyJHq0aOH5s+fXx3DBKrMy8tLL730knl+6tQpDRw40GEn9ZKSEr311lt69NFHzWve3t6aNGnSVR0rag7WZMEjLVq0yGEthGEYOnfunH755RczPEkXFpJOmDBBktS5c2ft3r1b0oXAde2116p9+/b6+eefy6wxys3NlXThL+bnnntOjzzyiCTp0KFD6tChgzp27Kh69erpp59+cvhs37591bRpU0nSsGHD1LVrV/3444+SLuy39fnnn+u6667T7t27Hf7r+JlnnnHYq6s6/PnPf9awYcMkXfgXUOl/+Xt5eTk8FBAZGak//vGP1TVMoMqGDBmip59+Wi+++KIkac+ePerdu7datWqla665Rrt371Z2drbDZ1544QVexI6qq8YnG4EKq+oWDi+//LLZxtatW4369etfsq79o9lt27Z16H/y5MlX7Ou6664z9u3b5/C5AwcOGG3btr3s58aOHVvm+zpjC4fydpJeuHChQ98Xb2Xx8ssvO+wyffFP48aNjR07dlz5HxjgxhITE42AgIArbr8wc+bMMp+tyI7vV2LfDy+Ar9m4XYgao06dOqpXr55atGihu+++W+vWrdOTTz5plnfq1EkbNmzQ8OHDdc0118jb21uhoaEaMGCAlixZok8//dSsu2vXLodtCl588UV9//33GjVqlK677jr5+fmpTp06atiwofr166dZs2YpNTW1zLvQIiMjlZqaqtdff119+/ZVWFiYvL29FRYWpkGDBumTTz7R7NmzXf5nU1FPPvmkNm7cqIceesh8t2PdunXVrl07TZgwQVu2bLnsAwKAJ0hISNAvv/yi6dOnq3fv3mrYsKHq1KmjoKAgc+uE9PR0PfHEE9U9VHg4m2H8dyUwAAAAnIaZLAAAABcgZAEAALgAIQsAAMAFCFkAAAAuQMgCAABwAUIWAACACxCyAAAAXICQBQAA4AKELAAAABcgZAEAALgAIQsAAMAFCFkAAAAuQMgCAABwgf8P3gTabDQAYfYAAAAASUVORK5CYII=" }, "metadata": {}, "output_type": "display_data" @@ -297,22 +296,24 @@ "warnings.simplefilter(action='ignore', category=FutureWarning)\n", "import matplotlib.pyplot as plt\n", "import pandas as pd\n", - "X = sf.feat_matrix(df_parts=df_parts, df_scales=df_scales, features=df_feat[\"feature\"])\n", - "# ML evaluation\n", + "\n", + "X = sf.feat_matrix(df_parts=df_parts, features=df_feat[\"feature\"])\n", "rf = RandomForestClassifier()\n", "cv = cross_val_score(rf, X, y, scoring=\"accuracy\", cv=5, n_jobs=1) \n", "print(f\"Mean accuracy of {round(np.mean(cv), 2)}\")\n", + "\n", "aa.plot_settings(font_scale=1.1)\n", "sns.barplot(pd.DataFrame({\"Baseline\": cv_base, \"CPP\": cv}), palette=[\"tab:blue\", \"tab:red\"])\n", "plt.ylabel(\"Mean accuracy\", size=aa.plot_gcfs()+1)\n", + "plt.ylim(0, 1)\n", "sns.despine()\n", "plt.show()" ], "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2023-09-24T07:11:52.473756689Z", - "start_time": "2023-09-24T07:11:45.847226209Z" + "end_time": "2023-09-24T11:20:45.657617985Z", + "start_time": "2023-09-24T11:20:38.934103291Z" } } },