diff --git a/aaanalysis/_data/benchmarks/INFO_benchmarks.xlsx b/aaanalysis/_data/benchmarks/Overview.xlsx similarity index 100% rename from aaanalysis/_data/benchmarks/INFO_benchmarks.xlsx rename to aaanalysis/_data/benchmarks/Overview.xlsx diff --git a/aaanalysis/data_handling/__pycache__/_load_dataset.cpython-39.pyc b/aaanalysis/data_handling/__pycache__/_load_dataset.cpython-39.pyc index 6c407436..23bda7d7 100644 Binary files a/aaanalysis/data_handling/__pycache__/_load_dataset.cpython-39.pyc and b/aaanalysis/data_handling/__pycache__/_load_dataset.cpython-39.pyc differ diff --git a/aaanalysis/data_handling/_load_dataset.py b/aaanalysis/data_handling/_load_dataset.py index 3c58ba8c..1b58f13f 100644 --- a/aaanalysis/data_handling/_load_dataset.py +++ b/aaanalysis/data_handling/_load_dataset.py @@ -19,16 +19,17 @@ # I Helper Functions # Check functions -def check_name_of_dataset(name="INFO", folder_in=None): +def check_name_of_dataset(name="Overview", folder_in=None): """""" - if name == "INFO": + if name == "Overview": return - list_datasets = [x.split(".")[0] for x in os.listdir(folder_in) if "." in x] + list_datasets = [x.split(".")[0] for x in os.listdir(folder_in) + if "." in x and not x.startswith(".")] if name not in list_datasets: list_aa = [x for x in list_datasets if 'AA' in x] list_seq = [x for x in list_datasets if 'SEQ' in x] list_dom = [x for x in list_datasets if 'DOM' in x] - raise ValueError(f"'name' ({name}) is not valid." + raise ValueError(f"'name' ({name}) is not valid. Chose one of the following:" f"\n Amino acid datasets: {list_aa}" f"\n Sequence datasets: {list_seq}" f"\n Domain datasets: {list_dom}") @@ -119,7 +120,7 @@ def _get_aa_window(df_seq=None, aa_window_size=9): # II Main Functions -def load_dataset(name: str = "INFO", +def load_dataset(name: str = "Overview", n: Optional[int] = None, random: bool = False, non_canonical_aa: Literal["remove", "keep", "gap"] = "remove", @@ -131,7 +132,7 @@ def load_dataset(name: str = "INFO", Loads protein benchmarking datasets. The benchmarks are categorized into amino acid ('AA'), domain ('DOM'), and sequence ('SEQ') level datasets. - By default, an overview table is provided (``name='INFO'``). For in-depth details, refer to [Breimann23a]_. + By default, an overview table is provided (``name='Overview'``). For in-depth details, refer to [Breimann23a]_. Parameters ---------- @@ -159,7 +160,7 @@ def load_dataset(name: str = "INFO", ------- pandas.DataFrame A DataFrame of either the selected sequence dataset (``df_seq``) or - general info on all benchmark datasets (``df_info``). + overview on all benchmark datasets (``df_overview``). Notes ----- @@ -188,8 +189,8 @@ def load_dataset(name: str = "INFO", check_min_max_val(min_len=min_len, max_len=max_len) check_aa_window_size(aa_window_size=aa_window_size) # Load overview table - if name == "INFO": - return ut.read_excel_cached(FOLDER_BENCHMARKS + "INFO_benchmarks.xlsx") + if name == "Overview": + return ut.read_excel_cached(FOLDER_BENCHMARKS + "Overview.xlsx") df = ut.read_csv_cached(FOLDER_BENCHMARKS + name + ".tsv", sep="\t") # Filter data if min_len is not None: diff --git a/aaanalysis/feature_engineering/__pycache__/_aaclust_plot.cpython-39.pyc b/aaanalysis/feature_engineering/__pycache__/_aaclust_plot.cpython-39.pyc index a76b4131..3e58e453 100644 Binary files a/aaanalysis/feature_engineering/__pycache__/_aaclust_plot.cpython-39.pyc and b/aaanalysis/feature_engineering/__pycache__/_aaclust_plot.cpython-39.pyc differ diff --git a/docs/build/doctrees/environment.pickle b/docs/build/doctrees/environment.pickle index f13f7394..70223b11 100644 Binary files a/docs/build/doctrees/environment.pickle and b/docs/build/doctrees/environment.pickle differ diff --git a/docs/build/doctrees/generated/aaanalysis.AAclustPlot.doctree b/docs/build/doctrees/generated/aaanalysis.AAclustPlot.doctree index 7da66e86..4ed23b07 100644 Binary files a/docs/build/doctrees/generated/aaanalysis.AAclustPlot.doctree and b/docs/build/doctrees/generated/aaanalysis.AAclustPlot.doctree differ diff --git a/docs/build/doctrees/generated/aaanalysis.load_dataset.doctree b/docs/build/doctrees/generated/aaanalysis.load_dataset.doctree index d9f26b25..da9673d1 100644 Binary files a/docs/build/doctrees/generated/aaanalysis.load_dataset.doctree and b/docs/build/doctrees/generated/aaanalysis.load_dataset.doctree differ diff --git a/docs/build/html/_downloads/004048c0cbb6684bdb9047282ab71735/aaanalysis-plot_settings-2.pdf b/docs/build/html/_downloads/004048c0cbb6684bdb9047282ab71735/aaanalysis-plot_settings-2.pdf index c8addcb9..ddfa2263 100644 Binary files a/docs/build/html/_downloads/004048c0cbb6684bdb9047282ab71735/aaanalysis-plot_settings-2.pdf and b/docs/build/html/_downloads/004048c0cbb6684bdb9047282ab71735/aaanalysis-plot_settings-2.pdf differ diff --git a/docs/build/html/_downloads/163aacac4bd235c9af7a62d7b4d0c89f/aaanalysis-plot_get_cdict-1.pdf b/docs/build/html/_downloads/163aacac4bd235c9af7a62d7b4d0c89f/aaanalysis-plot_get_cdict-1.pdf index e61bd6d3..cd93d28b 100644 Binary files a/docs/build/html/_downloads/163aacac4bd235c9af7a62d7b4d0c89f/aaanalysis-plot_get_cdict-1.pdf and b/docs/build/html/_downloads/163aacac4bd235c9af7a62d7b4d0c89f/aaanalysis-plot_get_cdict-1.pdf differ diff --git a/docs/build/html/_downloads/1f3abea1675a65bb341756c52c9927f4/aaanalysis-plot_gcfs-1.pdf b/docs/build/html/_downloads/1f3abea1675a65bb341756c52c9927f4/aaanalysis-plot_gcfs-1.pdf index 4a3b6e47..a38292fd 100644 Binary files a/docs/build/html/_downloads/1f3abea1675a65bb341756c52c9927f4/aaanalysis-plot_gcfs-1.pdf and b/docs/build/html/_downloads/1f3abea1675a65bb341756c52c9927f4/aaanalysis-plot_gcfs-1.pdf differ diff --git a/docs/build/html/_downloads/72c2e4be500ecf10c85a4e6f81c365fc/aaanalysis-plot_legend-1.pdf b/docs/build/html/_downloads/72c2e4be500ecf10c85a4e6f81c365fc/aaanalysis-plot_legend-1.pdf index 7562902d..28562a1c 100644 Binary files a/docs/build/html/_downloads/72c2e4be500ecf10c85a4e6f81c365fc/aaanalysis-plot_legend-1.pdf and b/docs/build/html/_downloads/72c2e4be500ecf10c85a4e6f81c365fc/aaanalysis-plot_legend-1.pdf differ diff --git a/docs/build/html/_downloads/795a736e5af756908120c8bda412fd28/aaanalysis-plot_get_cmap-1.pdf b/docs/build/html/_downloads/795a736e5af756908120c8bda412fd28/aaanalysis-plot_get_cmap-1.pdf index 75829d12..1d33e4a7 100644 Binary files a/docs/build/html/_downloads/795a736e5af756908120c8bda412fd28/aaanalysis-plot_get_cmap-1.pdf and b/docs/build/html/_downloads/795a736e5af756908120c8bda412fd28/aaanalysis-plot_get_cmap-1.pdf differ diff --git a/docs/build/html/_downloads/88d7f3f7cb5a284c0bfaa377fb4ce1d8/aaanalysis-plot_get_clist-1.pdf b/docs/build/html/_downloads/88d7f3f7cb5a284c0bfaa377fb4ce1d8/aaanalysis-plot_get_clist-1.pdf index fd37155f..1b940746 100644 Binary files a/docs/build/html/_downloads/88d7f3f7cb5a284c0bfaa377fb4ce1d8/aaanalysis-plot_get_clist-1.pdf and b/docs/build/html/_downloads/88d7f3f7cb5a284c0bfaa377fb4ce1d8/aaanalysis-plot_get_clist-1.pdf differ diff --git a/docs/build/html/_downloads/fb29bffb69140db4f68c4eb913c6f7d3/aaanalysis-plot_settings-1.pdf b/docs/build/html/_downloads/fb29bffb69140db4f68c4eb913c6f7d3/aaanalysis-plot_settings-1.pdf index d454887d..3abf57ee 100644 Binary files a/docs/build/html/_downloads/fb29bffb69140db4f68c4eb913c6f7d3/aaanalysis-plot_settings-1.pdf and b/docs/build/html/_downloads/fb29bffb69140db4f68c4eb913c6f7d3/aaanalysis-plot_settings-1.pdf differ diff --git a/docs/build/html/generated/aaanalysis.AAclustPlot.html b/docs/build/html/generated/aaanalysis.AAclustPlot.html index 778bf6a4..cde8717f 100644 --- a/docs/build/html/generated/aaanalysis.AAclustPlot.html +++ b/docs/build/html/generated/aaanalysis.AAclustPlot.html @@ -129,7 +129,7 @@

aaanalysis.AAclustPlot

-class aaanalysis.AAclustPlot(model_class=<class 'sklearn.decomposition._pca.PCA'>, model_kwargs=None)[source]
+class aaanalysis.AAclustPlot(model_class=<class 'sklearn.decomposition._pca.PCA'>, model_kwargs=None)[source]

Bases: object

Plot results of AAclust analysis.

Dimensionality reduction is performed for visualization using decomposition models such as @@ -150,7 +150,7 @@

aaanalysis.AAclustPlot
-__init__(model_class=<class 'sklearn.decomposition._pca.PCA'>, model_kwargs=None)[source]
+__init__(model_class=<class 'sklearn.decomposition._pca.PCA'>, model_kwargs=None)[source]
Parameters:
    @@ -187,7 +187,7 @@

    aaanalysis.AAclustPlot
    -static eval(data_eval, names=None, dict_xlims=None, figsize=(7, 6))[source]
    +static eval(data_eval, names=None, dict_xlims=None, figsize=(7, 6))[source]

    Evaluates and plots n_clusters and clustering metrics BIC, CH, and SC for the provided data.

    The clustering evaluation metrics (BIC, CH, and SC) are ranked by the average of their independent rankings.

    @@ -232,7 +232,7 @@

    aaanalysis.AAclustPlot
    -center(X, labels=None, component_x=1, component_y=2, ax=None, figsize=(7, 6), dot_alpha=0.75, dot_size=100, legend=True, palette=None)[source]
    +center(X, labels=None, component_x=1, component_y=2, ax=None, figsize=(7, 6), dot_alpha=0.75, dot_size=100, legend=True, palette=None)[source]

    PCA plot of clustering with centers highlighted :type palette: Optional[ListedColormap] :param palette: list of RGB tuples or matplotlib.colors.ListedColormap

    @@ -259,7 +259,7 @@

    aaanalysis.AAclustPlot
    -medoids(X, labels=None, component_x=1, component_y=2, metric='euclidean', ax=None, figsize=(7, 6), dot_alpha=0.75, dot_size=100, legend=True, palette=None, return_data=False)[source]
    +medoids(X, labels=None, component_x=1, component_y=2, metric='euclidean', ax=None, figsize=(7, 6), dot_alpha=0.75, dot_size=100, legend=True, palette=None, return_data=False)[source]

    PCA plot of clustering with medoids highlighted

    Parameters:
    @@ -283,7 +283,7 @@

    aaanalysis.AAclustPlot
    -static correlation(df_corr=None, labels=None, bar_position='left', bar_width=0.1, bar_spacing=0.1, bar_colors='gray', bar_ticklabel_pad=None, vmin=-1, vmax=1, cmap='viridis', **kwargs_heatmap)[source]
    +static correlation(df_corr=None, labels=None, bar_position='left', bar_width=0.1, bar_spacing=0.1, bar_colors='gray', bar_ticklabel_pad=None, vmin=-1, vmax=1, cmap='viridis', **kwargs_heatmap)[source]

    Heatmap for correlation matrix with colored sidebar to label clusters.

    Parameters:
    diff --git a/docs/build/html/generated/aaanalysis.load_dataset.html b/docs/build/html/generated/aaanalysis.load_dataset.html index 3700e967..013c84a6 100644 --- a/docs/build/html/generated/aaanalysis.load_dataset.html +++ b/docs/build/html/generated/aaanalysis.load_dataset.html @@ -126,10 +126,10 @@

    aaanalysis.load_dataset

    -aaanalysis.load_dataset(name='INFO', n=None, random=False, non_canonical_aa='remove', min_len=None, max_len=None, aa_window_size=9)[source]
    +aaanalysis.load_dataset(name='Overview', n=None, random=False, non_canonical_aa='remove', min_len=None, max_len=None, aa_window_size=9)[source]

    Loads protein benchmarking datasets.

    The benchmarks are categorized into amino acid (‘AA’), domain (‘DOM’), and sequence (‘SEQ’) level datasets. -By default, an overview table is provided (name='INFO'). For in-depth details, refer to [Breimann23a].

    +By default, an overview table is provided (name='Overview'). For in-depth details, refer to [Breimann23a].

    Parameters:
      @@ -150,7 +150,7 @@

      aaanalysis.load_dataset
      Returns:

      A DataFrame of either the selected sequence dataset (df_seq) or -general info on all benchmark datasets (df_info).

      +overview on all benchmark datasets (df_overview).

      Return type:

      pandas.DataFrame

      diff --git a/docs/build/html/searchindex.js b/docs/build/html/searchindex.js index b015e542..f2dd6eae 100644 --- a/docs/build/html/searchindex.js +++ b/docs/build/html/searchindex.js @@ -1 +1 @@ -Search.setIndex({"docnames": ["api", "generated/aaanalysis.AAclust", "generated/aaanalysis.AAclustPlot", "generated/aaanalysis.CPP", "generated/aaanalysis.CPPPlot", "generated/aaanalysis.SequenceFeature", "generated/aaanalysis.dPULearn", "generated/aaanalysis.load_dataset", "generated/aaanalysis.load_scales", "generated/aaanalysis.plot_gcfs", "generated/aaanalysis.plot_get_cdict", "generated/aaanalysis.plot_get_clist", "generated/aaanalysis.plot_get_cmap", "generated/aaanalysis.plot_legend", "generated/aaanalysis.plot_settings", "generated/plotting_prelude", "generated/tutorial1_quick_start", "generated/tutorial1_slow_start", "generated/tutorial2a_data_loader", "generated/tutorial2b_scales_loader", "index", "index/CONTRIBUTING_COPY", "index/badges", "index/citations", "index/introduction", "index/overview", "index/references", "index/tables", "index/usage_principles", "index/usage_principles/aaontology", "index/usage_principles/data_flow_entry_points", "index/usage_principles/feature_identification", "index/usage_principles/pu_learning", "index/usage_principles/xai", "tutorials"], "filenames": ["api.rst", "generated/aaanalysis.AAclust.rst", "generated/aaanalysis.AAclustPlot.rst", "generated/aaanalysis.CPP.rst", "generated/aaanalysis.CPPPlot.rst", "generated/aaanalysis.SequenceFeature.rst", "generated/aaanalysis.dPULearn.rst", "generated/aaanalysis.load_dataset.rst", "generated/aaanalysis.load_scales.rst", "generated/aaanalysis.plot_gcfs.rst", "generated/aaanalysis.plot_get_cdict.rst", "generated/aaanalysis.plot_get_clist.rst", "generated/aaanalysis.plot_get_cmap.rst", "generated/aaanalysis.plot_legend.rst", "generated/aaanalysis.plot_settings.rst", "generated/plotting_prelude.rst", "generated/tutorial1_quick_start.rst", "generated/tutorial1_slow_start.rst", "generated/tutorial2a_data_loader.rst", "generated/tutorial2b_scales_loader.rst", "index.rst", "index/CONTRIBUTING_COPY.rst", "index/badges.rst", "index/citations.rst", "index/introduction.rst", "index/overview.rst", "index/references.rst", "index/tables.rst", "index/usage_principles.rst", "index/usage_principles/aaontology.rst", "index/usage_principles/data_flow_entry_points.rst", "index/usage_principles/feature_identification.rst", "index/usage_principles/pu_learning.rst", "index/usage_principles/xai.rst", "tutorials.rst"], "titles": ["API", "aaanalysis.AAclust", "aaanalysis.AAclustPlot", "aaanalysis.CPP", "aaanalysis.CPPPlot", "aaanalysis.SequenceFeature", "aaanalysis.dPULearn", "aaanalysis.load_dataset", "aaanalysis.load_scales", "aaanalysis.plot_gcfs", "aaanalysis.plot_get_cdict", "aaanalysis.plot_get_clist", "aaanalysis.plot_get_cmap", "aaanalysis.plot_legend", "aaanalysis.plot_settings", "Plotting Prelude", "Quick Start with AAanalysis", "Slow Start with AAanalysis", "Data Loading Tutorial", "Scale Loading Tutorial", "Welcome to the AAanalysis documentation!", "Contributing", "<no title>", "<no title>", "Introduction", "<no title>", "References", "Tables", "Usage Principles", "AAontology: Classification of amino acid scales", "Data Flow and Enry Points", "Identifying Physicochemical Signatures using CPP", "Learning from unbalanced and small data", "Explainable AI at Sequence Level", "Tutorials"], "terms": {"thi": [0, 1, 4, 8, 9, 11, 13, 14, 15, 17, 18, 19, 21, 30], "applic": [0, 4, 13], "program": [0, 21], "interfac": [0, 21, 27], "i": [0, 1, 2, 3, 4, 5, 6, 7, 8, 12, 13, 14, 16, 17, 18, 19, 20, 21, 24, 25, 27, 29, 31], "public": [0, 15, 18, 20, 21, 23], "object": [0, 1, 2, 4, 5, 6, 13, 17], "function": [0, 1, 2, 4, 9, 11, 12, 13, 14, 15, 17, 18, 19, 20, 25], "our": [0, 9, 10, 12, 15, 17, 19, 21, 24], "aaanalysi": [0, 15, 18, 19, 21, 23, 24, 25, 27, 28, 31, 34], "python": [0, 16, 17, 20, 21, 24, 25], "toolkit": [0, 21, 30], "which": [0, 4, 5, 13, 14, 16, 17, 18, 19, 21, 24, 27, 30, 32], "can": [0, 1, 5, 6, 9, 13, 15, 16, 17, 18, 19, 20, 21, 24, 27, 30, 32], "import": [0, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 28], "aa": [0, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 27, 28], "you": [0, 15, 19, 20, 21, 23], "access": [0, 1, 7, 17, 19, 27], "all": [0, 1, 3, 4, 5, 7, 8, 14, 15, 16, 17, 19, 21, 27], "method": [0, 1, 2, 3, 4, 5, 6, 16, 17, 26], "via": [0, 15, 21, 26], "alia": [0, 5], "load_dataset": [0, 5, 16, 17, 18, 19, 27], "class": [1, 2, 3, 4, 5, 6, 7, 9, 11, 12, 13, 14, 15, 18, 32], "model_class": [1, 2, 17], "sklearn": [1, 2, 16, 17], "cluster": [1, 2, 17, 20, 24, 25, 26, 27], "_kmean": 1, "kmean": [1, 17], "model_kwarg": [1, 2], "none": [1, 2, 3, 4, 5, 6, 7, 8, 13, 18], "verbos": [1, 3, 4, 5, 6, 16, 17], "sourc": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 19, 21], "base": [1, 2, 3, 4, 5, 6, 7, 13, 16, 17, 20, 21, 24, 25, 26, 27, 31, 32], "wrapper": [1, 4, 17, 20, 21, 24, 25], "A": [1, 2, 5, 7, 9, 11, 12, 13, 14, 15, 17, 18, 19, 21, 24, 26], "k": [1, 20, 24, 25, 26], "optim": [1, 3, 4, 11, 15, 20, 21, 24, 25, 26], "select": [1, 2, 3, 4, 7, 8, 16, 17, 18, 19, 20, 21, 24, 25, 26], "redund": [1, 3, 8, 16, 17, 20, 21, 24, 25, 26], "reduc": [1, 6, 8, 16, 20, 24, 25, 26, 27], "set": [1, 2, 3, 4, 5, 6, 8, 9, 13, 14, 15, 16, 17, 18, 20, 21, 24, 25, 26, 27, 30], "numer": [1, 4, 5, 17, 20, 24, 25], "scale": [1, 2, 3, 4, 5, 8, 10, 14, 16, 20, 23, 24, 25, 26, 28, 30, 34], "us": [1, 2, 3, 4, 6, 7, 8, 9, 13, 15, 16, 17, 18, 19, 20, 21, 23, 24, 27, 28, 30, 32], "model": [1, 2, 6, 16, 17, 21, 32], "requir": [1, 21], "pre": [1, 3, 16, 17, 18, 21], "defin": [1, 5, 8, 16, 17, 18, 21, 27, 30], "number": [1, 2, 3, 4, 5, 6, 7, 8, 11, 12, 13, 18, 19, 27], "n_cluster": [1, 2, 16, 17], "mean": [1, 3, 4, 16, 17, 19, 27], "other": [1, 4, 8, 14, 15, 19, 21, 27], "scikit": [1, 2, 21], "learn": [1, 2, 6, 16, 18, 20, 21, 23, 24, 25, 26, 27, 28], "valu": [1, 2, 3, 4, 5, 9, 10, 11, 12, 13, 14, 15, 16, 17, 19, 21, 24, 27], "util": [1, 14, 15, 16, 18, 20, 21], "pearson": [1, 3], "correl": [1, 2, 3, 27], "repres": [1, 4, 17, 18, 24, 27], "sampl": [1, 3, 4, 5, 6, 18, 27, 32], "medoid": [1, 2], "each": [1, 2, 3, 4, 5, 6, 17, 18, 19, 21], "closest": 1, "center": [1, 2, 16, 17, 27], "result": [1, 2, 3, 21], "see": [1, 2, 4, 21, 24, 27, 30], "breimann23a": [1, 7, 8, 26, 27], "paramet": [1, 2, 3, 4, 5, 6, 7, 8, 10, 11, 12, 13, 14, 17, 18, 19, 21, 27], "type": [1, 2, 3, 4, 5, 6, 7, 8, 10, 11, 12, 13, 18, 21, 27], "clustermixin": 1, "instanti": 1, "fit": [1, 6, 16, 17, 21], "option": [1, 2, 3, 4, 5, 6, 7, 8, 9, 13, 14, 16, 17], "dict": [1, 2, 3, 4, 5, 6, 10, 13], "keyword": [1, 2, 4, 6], "argument": [1, 2, 4, 5, 6, 13], "pass": [1, 2, 4, 6, 21], "bool": [1, 2, 3, 4, 5, 6, 7, 8, 12, 13, 14], "If": [1, 2, 3, 4, 5, 6, 7, 8, 13, 14, 20, 21, 23, 32], "true": [1, 2, 3, 4, 5, 7, 8, 12, 13, 14, 15, 18, 19], "output": [1, 3, 5, 6, 15, 21], "ar": [1, 2, 3, 4, 5, 6, 7, 8, 9, 13, 14, 15, 16, 17, 18, 19, 21, 27, 30, 32, 33], "enabl": [1, 3, 4, 5, 6, 20, 21, 24, 25, 31], "The": [1, 2, 3, 4, 5, 6, 7, 8, 10, 11, 12, 13, 15, 17, 18, 19, 21, 27, 30, 31], "after": [1, 3, 27], "call": [1, 8, 15, 27], "obtain": [1, 5, 8, 16, 17, 27], "int": [1, 2, 3, 4, 5, 6, 7, 8, 11, 12, 13], "labels_": [1, 6], "label": [1, 2, 3, 4, 5, 6, 7, 13, 15, 16, 17, 18, 21, 27, 32], "order": [1, 2, 21, 27], "x": [1, 2, 4, 6, 7, 9, 10, 11, 12, 13, 14, 15, 16, 17], "arrai": [1, 2, 3, 5, 6, 16, 17], "like": [1, 2, 3, 5, 6, 14, 15, 21, 27], "shape": [1, 2, 3, 4, 5, 6, 13, 27], "n_sampl": [1, 2, 3, 5, 6], "centers_": 1, "averag": [1, 2, 5, 16, 17, 19, 27], "correspond": [1, 2, 13, 18, 21, 27], "n_featur": [1, 2, 3, 4, 5, 6], "labels_centers_": 1, "medoids_": 1, "one": [1, 4, 11, 13, 21], "labels_medoids_": 1, "is_medoid_": 1, "indic": [1, 4, 5, 6, 18, 19, 21, 27], "being": [1, 18, 21, 27], "1": [1, 2, 3, 4, 5, 6, 7, 8, 10, 13, 14, 15, 16, 18, 19, 21, 27, 32], "0": [1, 2, 3, 4, 5, 6, 7, 13, 15, 16, 17, 18, 19, 27, 32], "same": [1, 2, 8, 19], "medoid_names_": [1, 16, 17], "name": [1, 2, 3, 4, 5, 6, 7, 8, 10, 11, 12, 14, 16, 17, 18, 19, 27], "provid": [1, 2, 4, 6, 7, 8, 13, 17, 18, 19, 20, 21, 25, 27, 32], "list": [1, 2, 4, 5, 10, 11, 12, 13, 16, 17, 27], "attribut": 1, "dure": [1, 6], "directli": [1, 21], "design": [1, 4, 21, 27, 31], "primarili": [1, 6, 21], "amino": [1, 3, 4, 5, 7, 8, 16, 17, 20, 23, 24, 25, 26, 28, 30, 32], "acid": [1, 3, 4, 5, 7, 8, 16, 17, 20, 23, 24, 25, 26, 28, 30, 32], "ani": [1, 19, 21, 24, 27], "__init__": [1, 2, 3, 4, 5, 6], "on_cent": 1, "min_th": 1, "3": [1, 5, 6, 11, 12, 13, 18, 19, 21, 27], "merg": 1, "metric": [1, 2, 6, 21], "euclidean": [1, 2, 6], "appli": [1, 6, 13, 14, 18], "algorithm": [1, 3, 4, 16, 17, 20, 21, 24, 25, 30, 31], "featur": [1, 3, 4, 5, 6, 16, 20, 21, 24, 25, 30, 31, 32], "matrix": [1, 2, 5, 6, 16, 17, 27], "determin": [1, 2, 8], "without": [1, 4, 21, 27], "specif": [1, 18, 21, 27], "It": [1, 2, 14, 17, 18, 24, 27, 30], "partit": [1, 27], "data": [1, 2, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 19, 20, 21, 27, 28], "maxim": 1, "within": [1, 3, 5, 21, 27, 30], "beyond": 1, "threshold": [1, 3], "qualiti": 1, "either": [1, 2, 5, 7, 8, 19, 20], "minimum": [1, 2, 5, 7], "member": 1, "fals": [1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 13, 14, 16, 17], "between": [1, 2, 3, 4, 5, 8, 11, 13, 16, 17, 18, 21, 27], "its": [1, 18, 21, 27], "min_cor_al": 1, "min_cor_cent": 1, "respect": [1, 7, 17, 20, 21, 23, 27], "describ": [1, 27], "row": [1, 2], "typic": [1, 2, 18, 24, 27], "column": [1, 2, 3, 4, 5, 6, 7, 8, 13, 18, 19, 21], "must": [1, 5, 11, 12, 21], "float": [1, 2, 3, 4, 6, 13, 14], "otherwis": [1, 4, 5, 6, 27], "step": [1, 3, 4, 5, 7, 8, 21, 24], "perform": [1, 2, 3, 6, 8, 16, 17, 19, 27], "str": [1, 2, 4, 5, 6, 7, 8, 10, 12, 13, 14], "similar": [1, 21, 27, 32], "measur": [1, 2, 21, 27], "maximum": [1, 2, 3, 5, 6, 7, 16, 17], "distanc": [1, 6, 27], "manhattan": [1, 6], "cosin": [1, 6], "return": [1, 2, 3, 4, 5, 6, 7, 8, 10, 11, 12, 13, 18], "instanc": [1, 4, 13], "allow": 1, "direct": [1, 21], "aanalysi": [1, 21], "consist": [1, 14, 21, 24, 27], "three": [1, 5, 18, 27], "main": [1, 27], "estim": 1, "lower": [1, 27], "bound": 1, "refin": [1, 21], "recurs": [1, 26], "chosen": [1, 3, 5, 7, 8, 18], "smaller": [1, 15], "merge_metr": 1, "reduct": [1, 2], "pairwise_dist": 1, "were": [1, 8, 19, 27], "runtimewarn": 1, "caught": 1, "bundl": 1, "eval": [1, 2, 3, 6, 21], "evalu": [1, 2, 3, 8, 19, 21, 27], "establish": [1, 21], "quantifi": 1, "bic": [1, 2], "bayesian": [1, 2], "inform": [1, 2, 3, 4, 5, 6, 19, 30], "criterion": [1, 2], "reflect": [1, 21, 27], "good": [1, 21], "while": [1, 18], "account": [1, 21, 27], "rang": 1, "from": [1, 2, 3, 4, 5, 6, 7, 8, 16, 17, 18, 19, 20, 21, 27, 28], "neg": [1, 5, 6, 7, 13, 18, 21, 27, 32], "infin": 1, "posit": [1, 2, 3, 4, 5, 6, 7, 20, 21, 24, 25, 27, 32], "higher": [1, 27], "superior": 1, "ch": [1, 2, 27], "calinski": [1, 2], "harabasz": [1, 2], "index": [1, 2, 7, 19, 20, 21, 26], "ratio": 1, "dispers": 1, "score": [1, 16, 17], "suggest": [1, 21], "better": 1, "sc": [1, 2], "silhouett": [1, 2], "coeffici": [1, 2], "proxim": 1, "point": [1, 4, 9, 13, 27, 28], "neighbor": [1, 27], "li": 1, "closer": 1, "impli": 1, "equal": [1, 18], "inf": 1, "wa": [1, 24], "adapt": 1, "form": [1, 5, 27], "stackexchang": 1, "discuss": [1, 21], "modifi": [1, 6, 14], "align": [1, 4, 13, 17, 19, 21], "so": 1, "signifi": 1, "contrari": 1, "convent": [1, 5, 8], "implement": [1, 21], "favor": 1, "calinski_harabasz_scor": 1, "silhouette_scor": 1, "static": [1, 2, 5], "name_clust": 1, "shorten_nam": 1, "assign": [1, 4, 5, 6, 19, 27], "frequenc": [1, 27], "priorit": 1, "alreadi": [1, 32], "contain": [1, 2, 3, 4, 6, 7, 8, 19, 21, 27, 30, 32], "unclassifi": [1, 8, 19, 27], "shorten": 1, "version": [1, 19, 27], "cluster_nam": 1, "renam": 1, "comp_cent": 1, "comput": [1, 3, 4, 5, 16, 17, 21, 26, 27], "given": [1, 4, 5, 7, 11, 12, 13, 16, 17, 19, 21, 27], "labels_cent": 1, "associ": [1, 27], "comp_medoid": 1, "labels_medoid": 1, "comp_correl": 1, "x_ref": 1, "labels_ref": 1, "names_ref": 1, "refer": [1, 3, 5, 7, 17, 21, 27], "compar": [1, 16, 18, 20, 24, 25, 27, 30, 31], "n_samples_ref": 1, "df_corr": [1, 2], "pd": [1, 5, 6, 16, 17, 21], "datafram": [1, 2, 3, 4, 5, 6, 7, 8, 16, 17, 21, 30], "pair": 1, "labels_sort": 1, "sort": 1, "ascend": [1, 2], "replac": [1, 7], "panda": [1, 3, 4, 5, 6, 7, 8, 16, 17, 21], "corr": 1, "comp_coverag": 1, "percentag": [1, 3, 6, 19], "uniqu": [1, 2, 3, 4, 19, 21], "present": [1, 5, 7], "help": 1, "understand": 1, "coverag": [1, 21], "particular": 1, "subset": [1, 5, 8, 27], "univers": 1, "both": [1, 4, 14, 18], "consid": [1, 8, 21], "onli": [1, 4, 7, 8, 13, 14, 18, 21, 27, 32], "onc": [1, 21], "regardless": 1, "repetit": 1, "should": [1, 2, 3, 4, 5, 6, 21, 32], "superset": 1, "found": [1, 5, 21], "decomposit": 2, "_pca": 2, "pca": [2, 6, 19], "plot": [2, 4, 9, 10, 11, 12, 13, 14, 18, 20, 21, 27, 34], "aaclust": [2, 8, 16, 19, 20, 23, 24, 25, 26, 27], "analysi": [2, 6, 8, 17, 19, 20, 21, 24, 25, 27], "dimension": [2, 6, 26], "visual": [2, 11, 14, 15, 21], "princip": [2, 6, 8, 19, 27], "compon": [2, 5, 6, 8, 19, 27], "transformermixin": 2, "n_compon": [2, 6], "data_ev": 2, "dict_xlim": 2, "figsiz": [2, 4], "7": [2, 4, 5, 6, 15, 18, 27], "6": [2, 5, 18, 27], "rank": [2, 3, 19], "independ": [2, 15], "follow": [2, 3, 5, 6, 8, 20, 21, 23, 24, 25, 28], "four": [2, 21], "intern": [2, 4, 21, 27], "gener": [2, 3, 4, 5, 7, 11, 12, 14, 21, 24, 26, 27, 32], "2": [2, 3, 4, 5, 6, 9, 11, 13, 15, 16, 18, 19, 21, 27, 32], "etc": 2, "dictionari": [2, 3, 4, 5, 10, 13], "axi": [2, 4, 14, 19], "limit": [2, 4, 21], "xmin": 2, "xmax": 2, "subplot": 2, "kei": [2, 4, 10, 13, 21, 27], "e": [2, 4, 5, 15, 17, 19, 20, 21, 24, 25, 27, 32], "g": [2, 4, 5, 20, 21, 24, 25, 27, 32], "auto": 2, "tupl": [2, 4, 12], "width": [2, 4, 13], "height": [2, 4], "figur": [2, 4], "inch": [2, 4], "fig": 2, "ax": [2, 4, 9, 10, 13, 14], "": [2, 13, 18, 21, 26, 27], "detail": [2, 4, 7, 8, 19, 20, 21, 23], "component_x": 2, "component_i": 2, "dot_alpha": 2, "75": [2, 4], "dot_siz": 2, "100": [2, 3, 7, 16, 17, 18], "legend": [2, 4, 13, 14, 15], "palett": [2, 9, 10, 11, 12, 13, 14, 15, 16, 17], "highlight": [2, 4], "listedcolormap": 2, "param": 2, "rgb": [2, 12], "matplotlib": [2, 4, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 21], "color": [2, 4, 9, 10, 11, 12, 13, 14, 15], "newtyp": 2, "arraylike2d": 2, "union": [2, 13], "sequenc": [2, 3, 4, 5, 6, 7, 16, 18, 20, 21, 24, 25, 26, 27, 28, 30, 31, 32], "ndarrai": 2, "arraylike1d": 2, "seri": [2, 5], "return_data": 2, "bar_posit": 2, "left": [2, 13, 27], "bar_width": [2, 4], "bar_spac": 2, "bar_color": 2, "grai": 2, "bar_ticklabel_pad": 2, "vmin": [2, 4], "vmax": [2, 4], "cmap": [2, 4], "viridi": 2, "kwargs_heatmap": 2, "heatmap": [2, 4], "sidebar": 2, "group": [2, 3, 4, 5, 13, 15, 27], "side": [2, 18, 27], "bar": [2, 4], "length": [2, 3, 4, 5, 7, 13, 18, 27], "default": [2, 3, 4, 5, 6, 7, 9, 10, 13, 14, 15, 16, 17, 18, 19], "right": [2, 15, 27], "top": [2, 8, 15, 27], "down": 2, "ad": [2, 4], "space": [2, 4, 6, 13, 21], "singl": 2, "pad": 2, "y": [2, 4, 9, 10, 11, 12, 13, 14, 15, 16, 17], "tick": [2, 4, 14, 15], "sn": [2, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18], "colormap": [2, 4], "addit": [2, 4, 5, 6, 8, 14, 19, 21, 27], "_ax": 2, "ensur": [2, 14, 18, 21], "avoid": 2, "mislabel": 2, "seaborn": [2, 4, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 21], "creat": [2, 3, 4, 5, 6, 14, 15, 16, 17, 21, 30], "df_scale": [3, 5, 8, 16, 17, 19, 30], "df_cat": [3, 4, 5, 8, 19, 30], "df_part": [3, 5, 16, 17, 30], "split_kw": [3, 5, 16, 17, 30], "accept_gap": [3, 4, 5], "tool": [3, 21, 26], "filter": [3, 4, 7, 16, 17, 18], "most": [3, 4, 6, 13, 16, 17, 20, 24, 25], "discrimin": [3, 4, 16, 17], "two": [3, 4, 8, 9, 16, 17, 19, 20, 21, 24, 25, 26, 27, 29, 30], "load_categori": [3, 5], "categori": [3, 4, 5, 8, 10, 11, 13, 18, 19], "physicochem": [3, 5, 20, 24, 25, 26, 27, 28, 30], "part": [3, 4, 5, 16, 17, 21, 30], "sequencefeatur": [3, 16, 17], "get_split_kw": [3, 5, 16, 17], "nest": [3, 5], "split_typ": [3, 5, 16, 17], "whether": [3, 4, 5, 12, 13], "accept": [3, 4, 5], "miss": [3, 4, 5], "omit": [3, 4, 5], "print": [3, 4, 5, 16, 17], "progress": [3, 4, 26], "about": [3, 4], "run": [3, 5, 16, 17], "parametr": 3, "n_filter": 3, "tmd_len": [3, 4, 5], "20": [3, 4, 5, 8, 18, 21, 27], "jmd_n_len": [3, 4, 5], "10": [3, 4, 5, 11, 13, 18, 21, 27], "jmd_c_len": [3, 4, 5], "ext_len": [3, 4, 5], "4": [3, 4, 5, 18, 19, 27], "start": [3, 4, 5, 7, 21, 27, 28, 30], "check_cat": 3, "n_pre_filt": 3, "pct_pre_filt": 3, "5": [3, 4, 5, 6, 15, 16, 17, 18, 19, 21, 27], "max_std_test": 3, "max_overlap": 3, "max_cor": 3, "n_process": 3, "pipelin": [3, 21], "creation": 3, "aim": [3, 4, 16, 17, 21], "identifi": [3, 4, 6, 7, 16, 17, 18, 20, 24, 25, 26, 28, 32], "collect": [3, 8], "non": [3, 5, 7, 16, 17, 27], "test": [3, 17, 19], "t": [3, 7, 16, 17, 27], "u": [3, 15, 20, 21], "p": [3, 26], "tmd": [3, 4, 5, 7, 16, 17, 18], "todo": [3, 21], "add": [3, 4, 5, 21], "link": [3, 20, 21, 23, 26], "explan": [3, 4, 21], "first": [3, 4, 5, 8, 15, 16, 21], "n": [3, 4, 5, 7, 8, 16, 17, 18, 19, 21, 26, 27], "terminu": [3, 4, 5, 27], "jmd": [3, 4, 5, 16, 17], "c": [3, 4, 5, 9, 11, 12, 13, 14, 15, 17, 18, 19, 20, 26, 27], "extend": [3, 4, 5, 21, 27, 32], "termin": [3, 4, 5, 17, 18, 27], "longer": 3, "than": [3, 27], "check": [3, 21], "remain": [3, 19, 21], "standard": [3, 32], "deviat": 3, "overlap": 3, "cpu": 3, "multiprocess": [3, 17], "automat": [3, 4, 6, 13, 21], "df_feat": [3, 4, 5, 16, 17, 30], "statist": [3, 4], "n_feature_inform": [3, 4], "eleven": 3, "includ": [3, 5, 7, 8, 13, 21], "id": [3, 5, 7, 8, 19], "11": [3, 4, 18, 27], "split": [3, 5, 16, 17, 30], "subcategori": [3, 4, 8, 19], "sub": 3, "scale_nam": [3, 4, 8, 19], "abs_auc": [3, 4], "absolut": [3, 21], "adjust": [3, 4, 13, 14, 15], "auc": 3, "abs_mean_dif": 3, "differ": [3, 4, 5, 11, 18, 19, 30], "std_test": [3, 4], "std_ref": 3, "p_val": 3, "mann_whitnei": 3, "ttest_indep": 3, "p_val_fdr_bh": 3, "benjamini": 3, "hochberg": 3, "fdr": 3, "correct": 3, "condit": [4, 5], "jmd_m_len": [4, 5], "profil": [4, 16, 20, 24, 25, 31], "val_col": 4, "mean_dif": 4, "val_typ": 4, "count": [4, 18], "normal": [4, 8, 13, 19, 21, 27], "titl": [4, 9, 13, 14, 15, 16, 17], "title_kw": 4, "dict_color": [4, 10, 13, 15], "edge_color": 4, "add_jmd_tmd": 4, "jmd_n_seq": 4, "tmd_seq": 4, "jmd_c_seq": 4, "tmd_color": 4, "mediumspringgreen": 4, "jmd_color": 4, "blue": [4, 16, 17], "tmd_seq_color": 4, "black": [4, 12, 13, 15, 21], "jmd_seq_color": 4, "white": [4, 12, 13], "seq_siz": 4, "tmd_jmd_fontsiz": 4, "xtick_siz": 4, "xtick_width": 4, "xtick_length": 4, "xticks_po": 4, "ytick_siz": 4, "ytick_width": 4, "ytick_length": 4, "ylim": [4, 16, 17], "highlight_tmd_area": 4, "highlight_alpha": 4, "15": [4, 5, 18, 27], "grid": [4, 14, 15], "grid_axi": [4, 14, 15], "add_legend_cat": 4, "legend_kw": 4, "shap_plot": 4, "kwarg": [4, 5, 13], "avail": [4, 8, 13, 17, 19, 20, 23, 26], "specifi": [4, 5, 6, 10, 12, 17, 21], "check_value_typ": 4, "size": [4, 5, 9, 13, 14, 15, 16, 17, 27], "custom": [4, 8, 15, 21], "appear": [4, 27], "map": [4, 5, 12, 13], "edg": [4, 13, 21, 27], "line": [4, 13, 14, 15, 21], "annot": 4, "font": [4, 9, 13, 14], "area": [4, 19, 27], "alpha": 4, "drawn": 4, "shap": [4, 9, 12, 17, 21], "shaplei": 4, "librari": [4, 14, 21], "8": [4, 5, 6, 13, 17, 18, 21, 27], "grid_on": 4, "rdbu_r": 4, "cmap_n_color": 4, "cbar_kw": 4, "facecolor_dark": [4, 12], "add_importance_map": 4, "cbar_pct": 4, "featuremap": 4, "versu": 4, "level": [4, 7, 8, 18, 19, 20, 21, 25, 27, 28, 29], "protein": [4, 5, 7, 16, 19, 20, 21, 24, 25, 26, 30, 31, 32], "shown": 4, "feat_impact": 4, "displai": [4, 14], "sum": [4, 19, 27], "std": 4, "aggreg": 4, "positions_onli": 4, "further": [4, 19, 21, 27], "across": [4, 14, 19, 21], "recommend": [4, 6, 8, 21], "when": [4, 6, 13, 21, 27], "emphas": [4, 21], "fewer": 4, "value_typ": 4, "pyplot": [4, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18], "anchor": [4, 13, 27], "infer": [4, 21], "seismic": 4, "impact": 4, "discret": 4, "diverg": 4, "sequenti": 4, "classifi": 4, "colorbar": 4, "under": [4, 8, 21], "depicet": 4, "depict": 4, "jmd_n": [4, 5, 7, 18], "jmd_c": [4, 5, 7, 18], "set_xticklabel": 4, "widht": 4, "tick_param": 4, "classif": [4, 7, 8, 17, 18, 19, 20, 25, 27, 28, 32], "pcolormesh": 4, "effect": [4, 21, 27, 32], "document": [4, 27], "more": [4, 13, 14, 16, 21], "cpp": [4, 5, 9, 12, 16, 20, 23, 24, 25, 28, 30], "code": [4, 9, 10, 11, 12, 13, 14, 15], "update_seq_s": 4, "retriev": [5, 17], "continu": [5, 12, 17, 21], "domain": [5, 7, 17, 18, 27], "transmembran": [5, 27], "membran": [5, 27], "principl": [5, 20], "distinct": [5, 20, 21, 24, 25, 27], "segment": [5, 16, 17, 30], "pattern": [5, 13, 17], "properti": [5, 13, 21, 27], "express": 5, "realiz": 5, "For": [5, 7, 16, 18, 21, 32], "over": [5, 16, 17], "valid": [5, 21], "tmd_e": 5, "tmd_n": 5, "tmd_c": 5, "ext_c": 5, "ext_n": 5, "tmd_jmd": [5, 16, 17], "jmd_n_tmd_n": 5, "tmd_c_jmd_c": 5, "ext_n_tmd_n": 5, "tmd_c_ext_c": 5, "get_df_part": [5, 16, 17], "df_seq": [5, 6, 7, 16, 17, 18, 30], "list_part": [5, 16, 17], "all_part": 5, "datafran": 5, "compris": [5, 13, 19], "tmd_start": [5, 7, 18], "tmd_stop": [5, 7, 18], "string": 5, "len": [5, 10, 18], "lenght": 5, "resp": [5, 27], "extra": [5, 15, 27], "possibl": [5, 18, 27, 32], "get": [5, 9, 13, 15, 28], "sf": [5, 16, 17], "dom_gsec": [5, 16, 17, 18, 27], "n_split_min": 5, "n_split_max": [5, 16, 17], "steps_pattern": 5, "n_min": 5, "n_max": 5, "len_max": 5, "steps_periodicpattern": 5, "periodicpattern": 5, "greater": 5, "greatest": 5, "whole": [5, 7, 19], "specfii": 5, "smallest": [5, 27], "integ": 5, "vari": [5, 18], "paramt": 5, "argumetn": 5, "get_featur": 5, "load_scal": [5, 16, 17, 19, 20, 25, 27], "combin": [5, 16, 17, 21, 27], "feat_matrix": [5, 16, 17], "n_job": [5, 16, 17], "return_label": 5, "job": 5, "parallel": [5, 27], "spars": 5, "feat_nam": 5, "convert": 5, "depend": [5, 27], "last": 5, "step1": 5, "step2": 5, "add_feat_valu": 5, "dict_scal": 5, "letter": 5, "feature_valu": 5, "n_part": 5, "ha": [5, 21, 27], "where": [5, 6, 14, 27], "structur": [5, 26, 27], "th": [5, 8, 19], "n_split": 5, "p1": 5, "p2": 5, "pn": 5, "end": [5, 21, 27], "odd": [5, 18], "even": 5, "give": 5, "add_dif": 5, "sample_nam": 5, "ref_group": 5, "add_posit": 5, "part_split": 5, "feat_posit": 5, "total": [5, 6, 19, 21, 27], "pca_kwarg": 6, "determinist": [6, 20, 24, 25], "unlabel": [6, 20, 24, 25, 27, 32], "offer": [6, 18, 21], "approach": [6, 16, 17, 18, 21, 32], "pu": [6, 20, 24, 25, 27], "emploi": 6, "pc": [6, 8, 27], "iter": 6, "reliabl": [6, 18, 21], "These": [6, 8, 15, 17, 19, 21, 32], "those": [6, 27], "distant": 6, "altern": [6, 32], "also": [6, 18, 21, 27], "80": 6, "cover": 6, "varianc": 6, "identif": [6, 26], "datapoint": 6, "inspir": [6, 21], "techniqu": [6, 32], "an": [6, 7, 8, 13, 15, 16, 17, 18, 19, 20, 21, 23, 26, 27], "theoret": [6, 27], "high": [6, 26, 27], "n_neg": 6, "label_po": 6, "name_neg": 6, "rel_neg": 6, "col_class": 6, "newli": 6, "updat": [6, 21], "new": [6, 21], "store": 6, "Will": 6, "initi": [6, 27], "small": [6, 16, 17, 18, 20, 21, 24, 25, 28, 33], "datafor": 6, "conta": 6, "po": 6, "unl": 6, "numpi": [6, 16, 17, 21], "np": [6, 16, 17], "atgc": 6, "gcta": 6, "actg": 6, "tacg": 6, "mode": 6, "dpul": 6, "info": 7, "random": [7, 18, 27], "non_canonical_aa": 7, "remov": [7, 14, 15], "min_len": [7, 18], "max_len": [7, 18], "aa_window_s": [7, 18], "9": [7, 11, 15, 18, 21, 27], "load": [7, 8, 16, 20, 21, 25, 34], "benchmark": [7, 17, 19, 20, 25], "dataset": [7, 8, 16, 17, 19, 20, 21, 24, 25, 32, 33], "categor": [7, 15, 18], "dom": [7, 18, 27], "seq": [7, 18, 27], "By": 7, "overview": [7, 8, 18, 21], "tabl": [7, 8, 18, 21], "depth": [7, 8, 19, 20, 25], "per": [7, 18, 27], "randomli": [7, 18], "liter": 7, "keep": 7, "gap": 7, "handl": [7, 13, 20], "canon": [7, 19], "don": 7, "symbol": 7, "disabl": [7, 19], "window": [7, 27], "aa_": 7, "df_info": [7, 18], "entri": [7, 18, 19], "uniprot": 7, "binari": [7, 17, 18, 32], "stop": 7, "seq_amylo": [7, 18, 19, 27], "guid": [7, 8], "tutori": [7, 8, 17, 20, 21, 24], "just_aaindex": [8, 19], "unclassified_out": [8, 19], "top60_n": [8, 19], "aaontologi": [8, 17, 20, 23, 25, 26, 28], "scales_raw": [8, 19, 27], "encompass": [8, 27], "aaindex": [8, 17, 19, 26], "kawashima08": [8, 26, 27], "along": [8, 17], "min": [8, 19, 27], "max": [8, 19, 27], "organ": [8, 21], "scales_cat": [8, 19, 27], "breimann23b": [8, 20, 23, 26, 27], "compress": [8, 19, 27], "scales_pc": [8, 19, 27], "60": [8, 19, 27], "top60": [8, 19, 27], "individu": [8, 21], "accompani": 8, "top60_ev": [8, 19, 27], "normliz": 8, "raw": [8, 19, 27], "best": [8, 19], "Or": [8, 18], "relev": 8, "exclus": 8, "suffix": [8, 18, 21], "scale_id": [8, 19], "deriv": 8, "descript": [8, 19, 21, 27], "scale_descript": [8, 19], "current": [9, 13], "linewdith": 9, "plot_set": [9, 10, 11, 12, 13, 15, 16, 17, 18], "here": [9, 18, 21, 27], "plt": [9, 10, 11, 12, 13, 14, 15, 16, 17, 18], "b": [9, 11, 12, 13, 14, 15, 27], "23": [9, 11, 12, 13, 14, 15, 27], "27": [9, 13, 14, 15], "43": [9, 13, 14, 15], "plot_get_clist": [9, 13, 14, 15], "barplot": [9, 10, 11, 12, 13, 14, 15, 16, 17], "despin": [9, 10, 13, 14, 15, 16, 17, 18], "bigger": 9, "tight_layout": [9, 10, 13, 14, 15], "show": [9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19], "png": [9, 10, 11, 12, 13, 14], "hire": [9, 10, 11, 12, 13, 14], "pdf": [9, 10, 11, 12, 13, 14], "prelud": [9, 10, 11, 12, 13, 14, 34], "dict_cat": 10, "weight_bold": [10, 14], "xaxi": 10, "set_vis": 10, "n_color": [11, 12, 15], "fuction": 11, "eight": 11, "colorl": 11, "appeal": [11, 15], "33": [11, 12], "notebook": 11, "color_palett": [11, 12], "101": 12, "shp": 12, "least": [12, 13, 21], "central": [12, 31], "14": [12, 15, 27], "light_palett": 12, "lighter": 12, "packag": [12, 16, 21], "list_cat": 13, "loc": [13, 19], "upper": 13, "loc_out": 13, "ncol": [13, 15], "labelspac": 13, "columnspac": 13, "handletextpad": 13, "handlelength": 13, "fontsiz": [13, 15], "fontsize_titl": 13, "weight": [13, 26, 27], "fontsize_weight": 13, "marker": 13, "marker_s": 13, "lw": 13, "linestyl": 13, "edgecolor": 13, "hatch": [13, 15], "hatchcolor": 13, "title_align_left": 13, "independntli": 13, "customiz": 13, "flexbili": 13, "convini": 13, "func": 13, "attach": 13, "item": 13, "locat": [13, 27], "25": 13, "thei": [13, 17, 18, 21], "coordin": 13, "vertic": 13, "horizont": 13, "bewtween": 13, "text": [13, 14], "visiabl": 13, "corner": 13, "round": [13, 16, 17], "style": [13, 14], "Not": 13, "fill": [13, 21], "furhter": 13, "word": 13, "line2d": 13, "core": [13, 16, 17], "gca": 13, "font_scal": [14, 18], "arial": 14, "adjust_only_font": 14, "adjust_further_el": 14, "no_tick": 14, "short_tick": 14, "no_ticks_x": [14, 15], "short_ticks_x": 14, "no_ticks_i": 14, "short_ticks_i": [14, 15], "show_opt": 14, "configur": 14, "global": 14, "embed": 14, "vector": [14, 27], "format": [14, 27], "svg": 14, "compat": 14, "edit": 14, "variou": [14, 17, 21, 27, 30], "viewer": 14, "softwar": [14, 21], "factor": [14, 27], "element": [14, 15], "set_context": 14, "common": [14, 21], "verdana": 14, "helvetica": 14, "dejavu": 14, "san": 14, "bold": 14, "leav": [14, 21], "unchang": 14, "make": [14, 15, 17, 18, 21], "layout": 14, "errorbar": 14, "choos": 14, "mark": 14, "short": 14, "ignor": [14, 18, 21], "runtim": 14, "polt": 14, "rcparam": 14, "manag": 14, "some": [15, 16, 27], "readi": [15, 18], "view": [15, 21, 32], "let": 15, "spine": 15, "look": 15, "just": 15, "easili": [15, 17, 18, 21], "comparison": [15, 16, 17], "d": [15, 19], "increas": [15, 27], "match": [15, 26], "plot_gcf": [15, 16, 17], "plot_legend": 15, "framework": [16, 17, 20, 24, 25], "predict": [16, 20, 21, 24, 25, 26, 27, 31, 32], "around": [16, 17], "interpret": [16, 17, 20, 21, 23, 24, 25, 26, 27, 31], "engin": [16, 20, 21, 24, 25, 31], "third": 16, "parti": 16, "aanalsi": 16, "we": [16, 17, 18, 21], "exampl": [16, 17, 18, 21, 24, 32], "\u03b3": [16, 17, 26], "secretas": [16, 17, 26, 27], "50": [16, 17, 18], "substrat": [16, 17, 26, 27], "aac": [16, 17], "now": [16, 17], "physic": [16, 27], "Its": 16, "idea": 16, "concept": 16, "As": [16, 17], "baselin": [16, 17], "entir": [16, 17, 21], "machin": [16, 17, 20, 21, 23, 26, 32], "ensembl": [16, 17], "randomforestclassifi": [16, 17], "model_select": [16, 17], "cross_val_scor": [16, 17], "rf": [16, 17], "cv_base": [16, 17], "accuraci": [16, 17, 19, 26], "f": [16, 17, 19], "63": [16, 18, 27], "take": [16, 17], "littl": [16, 17], "time": [16, 17], "improv": [16, 17, 21, 26], "000": [16, 17, 19], "cv": [16, 17], "tab": [16, 17], "red": [16, 17], "ylabel": [16, 17], "88": 16, "dive": 17, "power": 17, "capabl": [17, 27], "dedic": 17, "free": [17, 27], "In": [17, 18, 21, 32], "gamma": [17, 27], "ll": 17, "focu": [17, 21], "extract": 17, "how": 17, "har": 17, "task": [17, 21, 32], "essenti": [17, 18, 21], "randomforest": 17, "With": 17, "have": [17, 18, 19, 21, 27, 32], "hand": [17, 27], "effortlessli": 17, "furthermor": 17, "predominantli": 17, "hierarch": 17, "known": 17, "your": [17, 20, 21, 23], "fingertip": 17, "centerpiec": 17, "support": [17, 21, 27], "sinc": 17, "problem": 17, "lightweight": 17, "agglom": 17, "close": [17, 21], "integr": [17, 21, 26], "target": [17, 21], "middl": [17, 27], "adjac": [17, 27], "region": [17, 26, 27], "discontinu": 17, "togeth": [17, 30], "input": [17, 21, 30], "characterist": [17, 27], "58": [17, 27], "1000": 17, "yield": 17, "minut": 17, "i7": 17, "10510u": 17, "thread": 17, "93": 17, "iloc": [18, 19], "13": [18, 27], "predictor": [18, 27], "aa_caspase3": [18, 27], "233": [18, 27], "185605": [18, 27], "705": [18, 27], "184900": [18, 27], "prosper": [18, 26, 27], "aa_furin": [18, 27], "71": [18, 27], "59003": [18, 27], "163": [18, 27], "58840": [18, 27], "aa_ldr": [18, 27], "342": [18, 27], "118248": [18, 27], "35469": [18, 27], "82779": [18, 27], "idp": [18, 26, 27], "seq2seq": [18, 26, 27], "aa_mmp2": [18, 27], "573": [18, 27], "312976": [18, 27], "2416": [18, 27], "310560": [18, 27], "aa_rnabind": [18, 27], "221": [18, 27], "55001": [18, 27], "6492": [18, 27], "48509": [18, 27], "gmksvm": [18, 27], "ru": [18, 27], "aa_sa": [18, 27], "101082": [18, 27], "84523": [18, 27], "1414": [18, 27], "8484": [18, 27], "511": [18, 27], "903": [18, 27], "rerf": [18, 26, 27], "pred": [18, 26, 27], "seq_capsid": [18, 19, 27], "7935": [18, 27], "3364680": [18, 27], "3864": [18, 27], "4071": [18, 27], "viralpro": [18, 26, 27], "seq_disulfid": [18, 19, 27], "2547": [18, 27], "614470": [18, 27], "897": [18, 27], "1650": [18, 27], "dipro": [18, 27], "seq_loc": [18, 19, 27], "1835": [18, 27], "732398": [18, 27], "1045": [18, 27], "790": [18, 27], "nan": [18, 27], "seq_solubl": [18, 27], "17408": [18, 27], "4432269": [18, 27], "8704": [18, 27], "solpro": [18, 26, 27], "seq_tail": [18, 27], "6668": [18, 27], "2671690": [18, 27], "2574": [18, 27], "4094": [18, 27], "12": [18, 27], "126": [18, 27], "92964": [18, 27], "prefix": 18, "exemplifi": 18, "df_seq1": 18, "df_seq2": 18, "df_seq3": 18, "head": [18, 19], "capsid_1": 18, "mvthnvkinkhvtrrsyssakevleippltevqtasykwfmdkgik": 18, "capsid_2": 18, "mkkrqkkmtlsnftdtsfqdfvsaeqvddksamalinraedfkagq": 18, "balanc": 18, "200": 18, "value_count": 18, "dtype": 18, "int64": 18, "distribut": 18, "warn": 18, "simplefilt": 18, "action": 18, "futurewarn": 18, "list_seq_len": 18, "histplot": 18, "binwidth": 18, "xlim": 18, "1500": 18, "800": 18, "residu": [18, 19, 26, 27], "seen": 18, "caspase3_1": 18, "mslfdlfrgffgfpgprshrdpffggmtrdedddeeeeeeggswgr": 18, "caspase3_2": 18, "mevtgdagvpesgeirtlkpcllrrnysreqhgvaascledlrska": 18, "caspase3_3": 18, "mrarsgargalllalllcwdptpslagidsggqalpdsfpsapaeq": 18, "caspase3_4": 18, "mdakarncllqhrealekdiktsyimdhmisdgfltiseeekvrn": 18, "conveni": 18, "flank": 18, "popular": [18, 32], "caspase3_1_pos126": 18, "qtlrdsmlk": 18, "caspase3_1_pos127": 18, "tlrdsmlky": 18, "caspase3_1_pos4": 18, "mslfdlfrg": 18, "caspase3_1_pos5": 18, "slfdlfrgf": 18, "21": [18, 27], "caspase3_94_pos31": 18, "vshwqqqsyldsgihsgattt": 18, "caspase3_129_pos530": 18, "wfnkvledktddastpatdt": 18, "caspase3_76_pos554": 18, "qllrgvkhlhdnwilhrdlkt": 18, "caspase3_19_pos163": 18, "ghrgnsldrrsqggphlsgav": 18, "But": 18, "mani": 18, "face": 18, "challeng": [18, 21], "might": [18, 27], "unbalanc": [18, 20, 21, 24, 25, 28, 33], "lack": 18, "clear": [18, 21], "scenario": 18, "denot": [18, 27], "_pu": [18, 27], "dom_gsec_pu": [18, 27], "q14802": 18, "mqkvtlgllvflagfpvldandledknspfyydwhslqvgglicag": 18, "37": 18, "59": 18, "nspfyydwh": 18, "lqvgglicagvlcamgiiivmsa": 18, "kckckfgqk": 18, "q86ue4": 18, "maarswqdelaqqaeegsarlremlsvglgflrtelgldlglepkr": 18, "72": 18, "lglepkrypg": 18, "wvilvgtgalgllllfllgygwa": 18, "aacagarkkr": 18, "p05067": 18, "mlpglallllaawtaralevptdgnagllaepqiamfcgrlnmhmn": 18, "701": 18, "723": 18, "faedvgsnkg": 18, "aiiglmvggvviatvivitlvml": 18, "kkkqytsihh": 18, "p14925": 18, "magrarsgllllllgllalqssclafrsplsvfkrfkettrsfsn": 18, "868": 18, "890": 18, "klstepgsgv": 18, "svvlittllvipvlvllaivmfi": 18, "rwkksrafgd": 18, "df_seq_pu": 18, "p12821": 18, "mgaasgrrgpglllplplllllppqpalaldpglqpgnfsadeaga": 18, "1257": 18, "1276": 18, "gldldaqqar": 18, "vgqwlllflgiallvatlgl": 18, "sqrlfsirhr": 18, "p36896": 18, "maesagassffplvvlllagsggsgprgvqallcactsclqanytc": 18, "127": 18, "149": 18, "ehpsmwgpv": 18, "lvgiiagpvfllfliiiivflvi": 18, "nyhqrvyhnr": 18, "six": 19, "origin": 19, "df_raw": 19, "df_pc": 19, "andn920101": 19, "argp820101": 19, "argp820102": 19, "argp820103": 19, "494": 19, "230": 19, "355": 19, "504": 19, "864": 19, "404": 19, "579": 19, "387": 19, "174": 19, "420": 19, "177": 19, "019": 19, "032": 19, "877": 19, "762": 19, "601": 19, "670": 19, "term": [19, 27], "lins030110": 19, "asa": [19, 27], "volum": [19, 27], "surfac": [19, 27], "fold": [19, 27], "coil": [19, 27], "turn": [19, 27], "median": 19, "resi": 19, "lins030113": 19, "janj780101": 19, "janin": [19, 27], "et": [19, 26, 27], "al": [19, 26, 27], "janj780103": 19, "expos": [19, 21, 27], "lins030104": 19, "stem": 19, "top60_id": 19, "acc": 19, "presenc": [19, 27], "absenc": [19, 27], "df_top60": 19, "aac01": 19, "aac02": 19, "aac03": 19, "aac04": 19, "aac05": 19, "df_eval": 19, "overal": 19, "761": 19, "827": 19, "732": 19, "746": 19, "747": 19, "830": 19, "733": 19, "742": 19, "741": 19, "829": 19, "734": 19, "828": 19, "731": 19, "739": 19, "735": 19, "752": 19, "df_cat_1": 19, "df_raw_1": 19, "df_scales_1": 19, "selected_scal": 19, "tolist": 19, "df_aac1": 19, "exclud": 19, "subordin": 19, "dpulearn": [20, 23, 24, 25], "train": [20, 21, 24, 25, 32], "moreov": [20, 25], "load_data": [20, 25], "pypi": 20, "conda": [20, 21], "forg": 20, "pip": [20, 21], "introduct": 20, "usag": [20, 21, 24], "contribut": [20, 27], "api": [20, 21], "explain": [20, 21, 26, 28], "ai": [20, 21, 26, 28], "perturb": [20, 32], "modul": 20, "search": 20, "page": 20, "work": [20, 23], "pleas": [20, 21, 23], "cite": [20, 23], "_": [20, 23], "breimann": [20, 23, 26], "kamp": [20, 23], "steiner": [20, 23], "frishman": [20, 23], "2023": [20, 23], "ontologi": [20, 23, 26], "biorxiv": [20, 23, 26], "welcom": 21, "thank": 21, "open": 21, "project": [21, 27], "focus": 21, "involv": 21, "invalu": 21, "made": 21, "wai": 21, "file": 21, "github": 21, "issu": 21, "tracker": 21, "submit": 21, "particip": [21, 27], "newcom": 21, "tackl": 21, "email": 21, "stephanbreimann": 21, "gmail": 21, "com": 21, "question": 21, "comprehens": 21, "robust": 21, "life": [21, 32, 33], "scienc": [21, 32, 33], "seamlessli": 21, "flexibl": [21, 27], "interoper": 21, "biopython": 21, "reimplement": 21, "exist": [21, 32], "solut": 21, "biolog": [21, 24, 27, 32], "context": 21, "relianc": 21, "opaqu": 21, "box": 21, "empir": 21, "insight": 21, "cut": 21, "fair": 21, "transpar": 21, "re": [21, 26], "commit": 21, "divers": 21, "aspect": 21, "causal": 21, "minim": 21, "reproduc": 21, "mre": 21, "amount": 21, "demonstr": 21, "self": 21, "necessari": 21, "confirm": 21, "replic": 21, "guidelin": 21, "To": [21, 28], "git": 21, "http": 21, "breimanntool": 21, "master": 21, "repositori": 21, "your_usernam": 21, "navig": 21, "folder": 21, "up": 21, "cd": 21, "isol": 21, "activ": [21, 27], "poetri": 21, "pytest": 21, "hypothesi": 21, "execut": 21, "case": 21, "directori": 21, "out": [21, 27], "readm": 21, "command": 21, "cheat": 21, "sheet": [21, 27], "substanti": 21, "minor": 21, "typo": 21, "concis": 21, "branch": [21, 27], "fix": 21, "date": 21, "readthedoc": 21, "org": 21, "crucial": 21, "modif": 21, "render": 21, "correctli": 21, "strive": 21, "well": 21, "codebas": 21, "standalon": 21, "special": 21, "carri": 21, "complet": 21, "process": 21, "fulfil": 21, "purpos": 21, "inherit": 21, "supplementari": 21, "accordingli": 21, "cppplot": 21, "semi": 21, "strictli": 21, "adher": 21, "aforement": 21, "primari": [21, 30], "_util": 21, "_utils_const": 21, "py": 21, "modular": 21, "therefor": 21, "flat": 21, "hierarchi": 21, "outlin": 21, "user": 21, "friendli": 21, "hint": 21, "enhanc": [21, 27], "propos": 21, "pep": 21, "484": 21, "book": 21, "error": 21, "messag": 21, "docstr": 21, "257": 21, "markup": 21, "languag": 21, "restructuredtext": 21, "rst": 21, "primer": 21, "restructuretext": 21, "cheatsheet": 21, "sphinx": 21, "autodoc": 21, "inclus": 21, "napoleon": 21, "extens": 21, "conf": 21, "bird": 21, "ey": 21, "background": 21, "medium": [21, 27], "tabular": 21, "critic": 21, "except": 21, "rule": 21, "showcas": 21, "scientif": 21, "mai": 21, "mention": 21, "section": 21, "extern": 21, "note": 21, "go": 21, "html": 21, "_build": 21, "browser": 21, "below": 21, "blank": 21, "OF": 21, "ONE": 21, "complex": 21, "At": 21, "intric": 21, "do": 21, "placehold": 21, "incomplet": 21, "potenti": [21, 27], "expect": 21, "30": 21, "150": 21, "remind": 21, "token": 21, "truncat": 21, "respons": 21, "simpli": 21, "ask": 21, "someth": 21, "repeat": 21, "compil": 21, "done": 21, "script": 21, "leverag": 21, "struggl": 21, "produc": 21, "erron": 21, "often": [21, 32], "ambigu": 21, "logic": 21, "address": 21, "intuit": 21, "through": 21, "signatur": [21, 28], "behavior": 21, "deeper": 21, "intricaci": 21, "citat": 23, "develop": 24, "practic": 24, "2023a": 26, "2023b": 26, "breimann23c": [26, 27], "2023c": 26, "chart": 26, "cheng06": [26, 27], "cheng": 26, "2006": 26, "larg": 26, "disulphid": 26, "bridg": [26, 27], "kernel": 26, "neural": 26, "network": 26, "graph": [26, 27], "struct": 26, "funct": 26, "kawashima": 26, "2008": 26, "aid": 26, "databas": 26, "report": 26, "nucleic": 26, "magnan09": [26, 27], "magnan": 26, "randal": 26, "baldi": 26, "2009": [26, 27], "accur": 26, "solubl": [26, 27], "bioinformat": 26, "galiez16": [26, 27], "galiez": 26, "2016": [26, 27], "viral": 26, "capsid": [26, 27], "tail": [26, 27], "song18": [26, 27], "song": 26, "2018": 26, "throughput": 26, "cleavag": [26, 27], "site": [26, 27], "90": 26, "proteas": 26, "shen19": [26, 27], "shen": 26, "2019": 26, "subcellular": [26, 27], "local": [26, 27], "evolutionari": 26, "chou": [26, 27], "pseaac": 26, "j": 26, "theor": 26, "biol": 26, "tang20": [26, 27], "tang": 26, "2020": 26, "intrins": [26, 27], "disord": [26, 27], "teng21": [26, 27], "teng": 26, "2021": 26, "amyloidogen": [26, 27], "pseudo": 26, "composit": [26, 27], "tripeptid": 26, "bmc": 26, "yang21": [26, 27], "yang": 26, "granular": 26, "multipl": 26, "rna": [26, 27], "bind": [26, 27], "appl": 26, "chronolog": 27, "histori": 27, "t1_overview_benchmark": 27, "t2_overview_scal": 27, "t3a_aaontology_categori": 27, "t3b_aaontology_subcategori": 27, "begin": 27, "append": 27, "caspas": 27, "furin": 27, "long": 27, "ldr": 27, "metallopeptidas": 27, "mmp2": 27, "rbp60": 27, "solvent": 27, "sa": 27, "buri": 27, "amyloidognen": 27, "capdsid": 27, "disulfid": 27, "ss": 27, "bond": 27, "cytoplasm": 27, "v": 27, "plasma": 27, "insolubl": 27, "694": 27, "494524": 27, "unknown": 27, "statu": 27, "586": 27, "tier": 27, "system": 27, "systemat": 27, "arrang": 27, "67": 27, "everi": 27, "clearli": 27, "assess": 27, "couldn": 27, "alloc": 27, "regard": 27, "prefer": 27, "chothia": 27, "1976": 27, "lin": 27, "2003": 27, "64": 27, "occurr": 27, "cellular": 27, "mitochondria": 27, "nakashima": 27, "1990": 27, "nishikawa": 27, "1992": 27, "conform": 27, "\u03b1": 27, "helix": 27, "\u03b2": 27, "strand": 27, "ranodm": 27, "tanaka": 27, "scheraga": 27, "1977": 27, "fasman": 27, "1978b": 27, "richardson": 27, "1988": 27, "qian": 27, "sejnowski": 27, "aurora": 27, "rose": 27, "1998": 27, "224": 27, "19": 27, "24": 27, "energi": 27, "charg": 27, "entropi": 27, "charton": 27, "1983": 27, "gui": 27, "1985": 27, "radzicka": 27, "wolfenden": 27, "36": 27, "could": 27, "mutabl": 27, "sneath": 27, "1966": 27, "17": 27, "polar": 27, "hydrophob": 27, "hydrophil": 27, "amphiphil": 27, "kyte": 27, "doolittl": 27, "1982": 27, "mitaku": 27, "2002": 27, "koehler": 27, "111": 27, "steric": 27, "chain": 27, "angl": 27, "symmetri": 27, "represent": 27, "eccentr": 27, "prabhakaran": 27, "ponnuswami": 27, "karkbara": 27, "knislei": 27, "45": 27, "stabil": 27, "backbon": 27, "dynam": 27, "vihinen": 27, "1994": 27, "bastolla": 27, "2005": 27, "31": 27, "water": 27, "tendenc": 27, "oppos": 27, "1978": 27, "partial": 27, "displac": 27, "caus": 27, "interact": 27, "mainli": 27, "ones": 27, "bull": 27, "brees": 27, "1974": 27, "bigelow": 27, "1967": 27, "jone": 27, "dayhoff": 27, "interior": 27, "unpolar": 27, "fukuchi": 27, "2001": 27, "mp": 27, "cedano": 27, "1997": 27, "mitochondri": 27, "less": 27, "val": 27, "cf": 27, "cap": 27, "propens": 27, "asp": 27, "glu": 27, "ly": 27, "arg": 27, "observ": 27, "character": 27, "punta": 27, "maritan": 27, "robson": 27, "suzuki": 27, "linker": 27, "georg": 27, "heringa": 27, "2004": 27, "helic": 27, "half": 27, "finkelstein": 27, "1991": 27, "outsid": 27, "insid": 27, "befor": 27, "geisow": 27, "robert": 27, "1980": 27, "ramachandran": 27, "state": 27, "quadrant": 27, "bottom": 27, "paul": 27, "1951": 27, "antiparallel": 27, "lifson": 27, "sander": 27, "1979": 27, "bend": 27, "revers": 27, "tight": 27, "consecut": 27, "180": 27, "back": 27, "hydrogen": 27, "3rd": 27, "4th": 27, "1st": 27, "2nd": 27, "r": 27, "tm": 27, "place": 27, "monn\u00e9": 27, "1999": 27, "\u03c0": 27, "ala": 27, "gln": 27, "fodj": 27, "karadaghi": 27, "net": 27, "donor": 27, "transfer": 27, "klein": 27, "1984": 27, "acceptor": 27, "faucher": 27, "hi": 27, "electron": 27, "ion": 27, "pot": 27, "valenc": 27, "chemic": 27, "cosic": 27, "low": 27, "due": 27, "strong": 27, "hutchen": 27, "1970": 27, "unfold": 27, "gibb": 27, "denatur": 27, "yutani": 27, "1987": 27, "instabl": 27, "highest": 27, "break": 27, "pro": 27, "munoz": 27, "serrano": 27, "isoelectr": 27, "ph": 27, "electr": 27, "neutral": 27, "zimmerman": 27, "1968": 27, "16": 27, "crystal": 27, "pairwis": 27, "constitu": 27, "atom": 27, "lennard": 27, "oobatak": 27, "ooi": 27, "rel": 27, "chang": 27, "divid": 27, "aliphat": 27, "linear": 27, "aromat": 27, "carbon": 27, "approxim": 27, "invers": 27, "reactiv": 27, "hydroxythiol": 27, "wold": 27, "occur": 27, "esp": 27, "amphipath": 27, "highli": 27, "signal": 27, "argo": 27, "cornett": 27, "38": 27, "environ": 27, "eisenberg": 27, "mclachlan": 27, "1986": 27, "surround": 27, "angstrom": 27, "radiu": 27, "pack": 27, "globular": 27, "1981": 27, "28": 27, "eigenvalu": 27, "laplacian": 27, "undirect": 27, "node": 27, "mass": 27, "molecular": 27, "second": 27, "actual": 27, "root": 27, "squar": 27, "gyrat": 27, "farther": 27, "awai": 27, "rackovski": 27, "relationship": 27, "rate": 27, "shift": 27, "bundi": 27, "wuthrich": 27, "nh": 27, "temperatur": 27, "rigid": 27, "gly": 27, "ser": 27, "particularli": 27, "ptitsyn": 27, "zhou": 27, "equilibrium": 27, "sueki": 27, "flow": 28, "enri": 28, "introduc": 29, "diagram": 30, "platform": 31, "novel": 31, "everywher": [32, 33], "setup": 32, "augment": 32, "smote": 32, "artifici": 32, "Such": 32, "veri": 32, "deep": 32, "imag": 32, "recognit": 32, "feasibl": 32, "becaus": 32, "slight": 32, "mutat": 32, "alter": 32, "dramat": 32, "great": 32, "quantiti": 32, "besid": 32, "distinguish": 32, "subfield": 32, "quick": 34, "slow": 34}, "objects": {"aaanalysis": [[1, 0, 1, "", "AAclust"], [2, 0, 1, "", "AAclustPlot"], [3, 0, 1, "", "CPP"], [4, 0, 1, "", "CPPPlot"], [5, 0, 1, "", "SequenceFeature"], [6, 0, 1, "", "dPULearn"], [7, 3, 1, "", "load_dataset"], [8, 3, 1, "", "load_scales"], [9, 3, 1, "", "plot_gcfs"], [10, 3, 1, "", "plot_get_cdict"], [11, 3, 1, "", "plot_get_clist"], [12, 3, 1, "", "plot_get_cmap"], [13, 3, 1, "", "plot_legend"], [14, 3, 1, "", "plot_settings"]], "aaanalysis.AAclust": [[1, 1, 1, "", "__init__"], [1, 2, 1, "", "centers_"], [1, 1, 1, "", "comp_centers"], [1, 1, 1, "", "comp_correlation"], [1, 1, 1, "", "comp_coverage"], [1, 1, 1, "", "comp_medoids"], [1, 1, 1, "", "eval"], [1, 1, 1, "", "fit"], [1, 2, 1, "", "is_medoid_"], [1, 2, 1, "", "labels_"], [1, 2, 1, "", "labels_centers_"], [1, 2, 1, "", "labels_medoids_"], [1, 2, 1, "", "medoid_names_"], [1, 2, 1, "", "medoids_"], [1, 2, 1, "", "model"], [1, 2, 1, "", "n_clusters"], [1, 1, 1, "", "name_clusters"]], "aaanalysis.AAclustPlot": [[2, 1, 1, "", "__init__"], [2, 1, 1, "", "center"], [2, 1, 1, "", "correlation"], [2, 1, 1, "", "eval"], [2, 1, 1, "", "medoids"]], "aaanalysis.CPP": [[3, 1, 1, "", "__init__"], [3, 1, 1, "", "eval"], [3, 1, 1, "", "run"]], "aaanalysis.CPPPlot": [[4, 1, 1, "", "__init__"], [4, 1, 1, "", "heatmap"], [4, 1, 1, "", "profile"], [4, 1, 1, "", "update_seq_size"]], "aaanalysis.SequenceFeature": [[5, 1, 1, "", "__init__"], [5, 1, 1, "", "add_dif"], [5, 1, 1, "", "add_feat_value"], [5, 1, 1, "", "add_position"], [5, 1, 1, "", "feat_matrix"], [5, 1, 1, "", "feat_names"], [5, 1, 1, "", "get_df_parts"], [5, 1, 1, "", "get_features"], [5, 1, 1, "", "get_split_kws"]], "aaanalysis.dPULearn": [[6, 1, 1, "", "__init__"], [6, 1, 1, "", "eval"], [6, 1, 1, "", "fit"], [6, 2, 1, "", "labels_"]]}, "objtypes": {"0": "py:class", "1": "py:method", "2": "py:attribute", "3": "py:function"}, "objnames": {"0": ["py", "class", "Python class"], "1": ["py", "method", "Python method"], "2": ["py", "attribute", "Python attribute"], "3": ["py", "function", "Python function"]}, "titleterms": {"api": 0, "data": [0, 18, 30, 32, 34], "handl": [0, 34], "featur": [0, 17, 34], "engin": [0, 17, 34], "pu": [0, 18, 32, 34], "learn": [0, 17, 32, 34], "explain": [0, 17, 33, 34], "ai": [0, 17, 33, 34], "perturb": 0, "plot": [0, 15], "util": 0, "aaanalysi": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16, 17, 20, 30], "aaclust": [1, 17], "note": [1, 2, 3, 5, 6, 7, 8, 13], "aaclustplot": 2, "cpp": [3, 17, 31], "cppplot": 4, "exampl": [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 20], "sequencefeatur": 5, "dpulearn": 6, "load_dataset": 7, "load_scal": 8, "plot_gcf": 9, "plot_get_cdict": 10, "plot_get_clist": 11, "plot_get_cmap": 12, "plot_legend": 13, "plot_set": 14, "prelud": 15, "quick": 16, "start": [16, 17, 34], "slow": 17, "what": [17, 32, 33], "you": 17, "Will": 17, "1": 17, "load": [17, 18, 19], "sequenc": [17, 33], "scale": [17, 19, 27, 29], "2": 17, "compar": 17, "physicochem": [17, 31], "profil": 17, "3": 17, "protein": [17, 18, 27], "predict": 17, "4": 17, "group": 17, "level": [17, 33], "individu": 17, "tutori": [18, 19, 34], "benchmark": [18, 26, 27], "amino": [18, 19, 27, 29], "acid": [18, 19, 27, 29], "window": 18, "size": 18, "posit": 18, "unlabel": 18, "dataset": [18, 26, 27], "three": 19, "set": 19, "numer": 19, "aaontologi": [19, 27, 29], "redund": 19, "reduc": 19, "subset": 19, "filter": 19, "welcom": 20, "document": [20, 21, 24], "instal": [20, 21], "overview": [20, 24, 27], "refer": [20, 26], "indic": 20, "tabl": [20, 27], "citat": 20, "contribut": 21, "introduct": [21, 24], "vision": 21, "object": 21, "non": 21, "goal": 21, "principl": [21, 28], "bug": 21, "report": 21, "latest": 21, "version": 21, "local": 21, "develop": 21, "environ": 21, "fork": 21, "clone": 21, "depend": 21, "run": 21, "unit": 21, "test": 21, "pull": 21, "request": 21, "preview": 21, "chang": 21, "name": 21, "convent": 21, "class": 21, "templat": 21, "function": 21, "method": 21, "code": 21, "philosophi": 21, "style": 21, "layer": 21, "build": 21, "doc": 21, "chatgpt": 21, "guid": 21, "tgd": 21, "workflow": 24, "algorithm": 26, "us": [26, 31], "case": 26, "further": 26, "inform": 26, "categori": 27, "subcategori": 27, "usag": 28, "classif": 29, "flow": 30, "enri": 30, "point": 30, "compon": 30, "entri": 30, "bridg": 30, "extern": 30, "librari": 30, "identifi": 31, "signatur": 31, "from": 32, "unbalanc": 32, "small": 32, "i": [32, 33], "get": 34}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 8, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.intersphinx": 1, "nbsphinx": 4, "sphinx": 57}, "alltitles": {"API": [[0, "api"]], "Data Handling": [[0, "data-handling"], [34, "data-handling"]], "Feature Engineering": [[0, "feature-engineering"], [34, "feature-engineering"]], "PU Learning": [[0, "pu-learning"], [34, "pu-learning"]], "Explainable AI": [[0, "explainable-ai"], [34, "explainable-ai"]], "Perturbation": [[0, "perturbation"]], "Plot Utilities": [[0, "plot-utilities"]], "aaanalysis.AAclust": [[1, "aaanalysis-aaclust"]], "Notes": [[1, null], [1, null], [1, null], [1, null], [2, null], [2, null], [3, null], [3, null], [5, null], [5, null], [5, null], [5, null], [5, null], [6, null], [6, null], [7, null], [8, null], [13, null]], "aaanalysis.AAclustPlot": [[2, "aaanalysis-aaclustplot"]], "aaanalysis.CPP": [[3, "aaanalysis-cpp"]], "aaanalysis.CPPPlot": [[4, "aaanalysis-cppplot"]], "Examples": [[4, null], [5, null], [5, null], [6, null], [7, null], [8, null], [9, null], [10, null], [11, null], [12, null], [13, null], [14, null]], "aaanalysis.SequenceFeature": [[5, "aaanalysis-sequencefeature"]], "aaanalysis.dPULearn": [[6, "aaanalysis-dpulearn"]], "aaanalysis.load_dataset": [[7, "aaanalysis-load-dataset"]], "aaanalysis.load_scales": [[8, "aaanalysis-load-scales"]], "aaanalysis.plot_gcfs": [[9, "aaanalysis-plot-gcfs"]], "aaanalysis.plot_get_cdict": [[10, "aaanalysis-plot-get-cdict"]], "aaanalysis.plot_get_clist": [[11, "aaanalysis-plot-get-clist"]], "aaanalysis.plot_get_cmap": [[12, "aaanalysis-plot-get-cmap"]], "aaanalysis.plot_legend": [[13, "aaanalysis-plot-legend"]], "aaanalysis.plot_settings": [[14, "aaanalysis-plot-settings"]], "Plotting Prelude": [[15, "plotting-prelude"]], "Quick Start with AAanalysis": [[16, "quick-start-with-aaanalysis"]], "Slow Start with AAanalysis": [[17, "slow-start-with-aaanalysis"]], "What You Will Learn:": [[17, "what-you-will-learn"]], "1. Loading Sequences and Scales": [[17, "loading-sequences-and-scales"]], "2. Feature Engineering": [[17, "feature-engineering"]], "AAclust": [[17, "aaclust"]], "Comparative Physicochemical Profiling (CPP)": [[17, "comparative-physicochemical-profiling-cpp"]], "3. Protein Prediction": [[17, "protein-prediction"]], "4. Explainable AI": [[17, "explainable-ai"]], "Explainable AI on group level": [[17, "explainable-ai-on-group-level"]], "Explainable AI on individual level": [[17, "explainable-ai-on-individual-level"]], "Data Loading Tutorial": [[18, "data-loading-tutorial"]], "Loading of protein benchmarks": [[18, "loading-of-protein-benchmarks"]], "Loading of protein benchmarks: Amino acid window size": [[18, "loading-of-protein-benchmarks-amino-acid-window-size"]], "Loading of protein benchmarks: Positive-Unlabeled (PU) datasets": [[18, "loading-of-protein-benchmarks-positive-unlabeled-pu-datasets"]], "Scale Loading Tutorial": [[19, "scale-loading-tutorial"]], "Three sets of numerical amino acid scales": [[19, "three-sets-of-numerical-amino-acid-scales"]], "AAontology": [[19, "aaontology"], [27, "aaontology"]], "Redundancy-reduce scale subsets": [[19, "redundancy-reduce-scale-subsets"]], "Filtering of scales": [[19, "filtering-of-scales"]], "Welcome to the AAanalysis documentation!": [[20, "welcome-to-the-aaanalysis-documentation"]], "Install": [[20, "install"]], "OVERVIEW": [[20, null]], "EXAMPLES": [[20, null]], "REFERENCES": [[20, null]], "Indices and tables": [[20, "indices-and-tables"]], "Citation": [[20, "citation"]], "Contributing": [[21, "contributing"]], "Introduction": [[21, "introduction"], [24, "introduction"]], "Vision": [[21, "vision"]], "Objectives": [[21, "objectives"]], "Non-goals": [[21, "non-goals"]], "Principles": [[21, "principles"]], "Bug Reports": [[21, "bug-reports"]], "Installation": [[21, "installation"]], "Latest Version": [[21, "latest-version"]], "Local Development Environment": [[21, "local-development-environment"]], "Fork and Clone": [[21, "fork-and-clone"]], "Install Dependencies": [[21, "install-dependencies"]], "Run Unit Tests": [[21, "run-unit-tests"]], "Pull Requests": [[21, "pull-requests"]], "Preview Changes": [[21, "preview-changes"]], "Documentation": [[21, "documentation"]], "Naming Conventions": [[21, "naming-conventions"]], "Class Templates": [[21, "class-templates"]], "Function and Method Naming": [[21, "function-and-method-naming"]], "Code Philosophy": [[21, "code-philosophy"]], "Documentation Style": [[21, "documentation-style"]], "Documentation Layers": [[21, "documentation-layers"]], "Building the Docs": [[21, "building-the-docs"]], "Test with ChatGPT": [[21, "test-with-chatgpt"]], "Test Guided Development (TGD)": [[21, "test-guided-development-tgd"]], "Workflow": [[24, "workflow"]], "Overview of documentation": [[24, "overview-of-documentation"]], "References": [[26, "references"]], "Algorithms": [[26, "algorithms"]], "Datasets and Benchmarks": [[26, "datasets-and-benchmarks"]], "Use Cases": [[26, "use-cases"]], "Further Information": [[26, "further-information"]], "Tables": [[27, "tables"]], "Overview Table": [[27, "overview-table"]], "Protein Benchmark Datasets": [[27, "protein-benchmark-datasets"]], "Amino Acid Scale Datasets": [[27, "amino-acid-scale-datasets"]], "Categories": [[27, "categories"]], "Subcategories": [[27, "subcategories"]], "Usage Principles": [[28, "usage-principles"]], "AAontology: Classification of amino acid scales": [[29, "aaontology-classification-of-amino-acid-scales"]], "Data Flow and Enry Points": [[30, "data-flow-and-enry-points"]], "Data Flow: Components of AAanalysis": [[30, "data-flow-components-of-aaanalysis"]], "Entry Points: Bridges to External Libraries": [[30, "entry-points-bridges-to-external-libraries"]], "Identifying Physicochemical Signatures using CPP": [[31, "identifying-physicochemical-signatures-using-cpp"]], "Learning from unbalanced and small data": [[32, "learning-from-unbalanced-and-small-data"]], "What is PU learning?": [[32, "what-is-pu-learning"]], "Explainable AI at Sequence Level": [[33, "explainable-ai-at-sequence-level"]], "What is explainable AI?": [[33, "what-is-explainable-ai"]], "Tutorials": [[34, "tutorials"]], "Getting Started": [[34, "getting-started"]]}, "indexentries": {"aaclust (class in aaanalysis)": [[1, "aaanalysis.AAclust"]], "__init__() (aaanalysis.aaclust method)": [[1, "aaanalysis.AAclust.__init__"]], "centers_ (aaanalysis.aaclust attribute)": [[1, "aaanalysis.AAclust.centers_"]], "comp_centers() (aaanalysis.aaclust static method)": [[1, "aaanalysis.AAclust.comp_centers"]], "comp_correlation() (aaanalysis.aaclust static method)": [[1, "aaanalysis.AAclust.comp_correlation"]], "comp_coverage() (aaanalysis.aaclust static method)": [[1, "aaanalysis.AAclust.comp_coverage"]], "comp_medoids() (aaanalysis.aaclust static method)": [[1, "aaanalysis.AAclust.comp_medoids"]], "eval() (aaanalysis.aaclust method)": [[1, "aaanalysis.AAclust.eval"]], "fit() (aaanalysis.aaclust method)": [[1, "aaanalysis.AAclust.fit"]], "is_medoid_ (aaanalysis.aaclust attribute)": [[1, "aaanalysis.AAclust.is_medoid_"]], "labels_ (aaanalysis.aaclust attribute)": [[1, "aaanalysis.AAclust.labels_"]], "labels_centers_ (aaanalysis.aaclust attribute)": [[1, "aaanalysis.AAclust.labels_centers_"]], "labels_medoids_ (aaanalysis.aaclust attribute)": [[1, "aaanalysis.AAclust.labels_medoids_"]], "medoid_names_ (aaanalysis.aaclust attribute)": [[1, "aaanalysis.AAclust.medoid_names_"]], "medoids_ (aaanalysis.aaclust attribute)": [[1, "aaanalysis.AAclust.medoids_"]], "model (aaanalysis.aaclust attribute)": [[1, "aaanalysis.AAclust.model"]], "n_clusters (aaanalysis.aaclust attribute)": [[1, "aaanalysis.AAclust.n_clusters"]], "name_clusters() (aaanalysis.aaclust static method)": [[1, "aaanalysis.AAclust.name_clusters"]], "aaclustplot (class in aaanalysis)": [[2, "aaanalysis.AAclustPlot"]], "__init__() (aaanalysis.aaclustplot method)": [[2, "aaanalysis.AAclustPlot.__init__"]], "center() (aaanalysis.aaclustplot method)": [[2, "aaanalysis.AAclustPlot.center"]], "correlation() (aaanalysis.aaclustplot static method)": [[2, "aaanalysis.AAclustPlot.correlation"]], "eval() (aaanalysis.aaclustplot static method)": [[2, "aaanalysis.AAclustPlot.eval"]], "medoids() (aaanalysis.aaclustplot method)": [[2, "aaanalysis.AAclustPlot.medoids"]], "cpp (class in aaanalysis)": [[3, "aaanalysis.CPP"]], "__init__() (aaanalysis.cpp method)": [[3, "aaanalysis.CPP.__init__"]], "eval() (aaanalysis.cpp method)": [[3, "aaanalysis.CPP.eval"]], "run() (aaanalysis.cpp method)": [[3, "aaanalysis.CPP.run"]], "cppplot (class in aaanalysis)": [[4, "aaanalysis.CPPPlot"]], "__init__() (aaanalysis.cppplot method)": [[4, "aaanalysis.CPPPlot.__init__"]], "heatmap() (aaanalysis.cppplot method)": [[4, "aaanalysis.CPPPlot.heatmap"]], "profile() (aaanalysis.cppplot method)": [[4, "aaanalysis.CPPPlot.profile"]], "update_seq_size() (aaanalysis.cppplot method)": [[4, "aaanalysis.CPPPlot.update_seq_size"]], "sequencefeature (class in aaanalysis)": [[5, "aaanalysis.SequenceFeature"]], "__init__() (aaanalysis.sequencefeature method)": [[5, "aaanalysis.SequenceFeature.__init__"]], "add_dif() (aaanalysis.sequencefeature static method)": [[5, "aaanalysis.SequenceFeature.add_dif"]], "add_feat_value() (aaanalysis.sequencefeature static method)": [[5, "aaanalysis.SequenceFeature.add_feat_value"]], "add_position() (aaanalysis.sequencefeature static method)": [[5, "aaanalysis.SequenceFeature.add_position"]], "feat_matrix() (aaanalysis.sequencefeature static method)": [[5, "aaanalysis.SequenceFeature.feat_matrix"]], "feat_names() (aaanalysis.sequencefeature static method)": [[5, "aaanalysis.SequenceFeature.feat_names"]], "get_df_parts() (aaanalysis.sequencefeature static method)": [[5, "aaanalysis.SequenceFeature.get_df_parts"]], "get_features() (aaanalysis.sequencefeature method)": [[5, "aaanalysis.SequenceFeature.get_features"]], "get_split_kws() (aaanalysis.sequencefeature static method)": [[5, "aaanalysis.SequenceFeature.get_split_kws"]], "__init__() (aaanalysis.dpulearn method)": [[6, "aaanalysis.dPULearn.__init__"]], "dpulearn (class in aaanalysis)": [[6, "aaanalysis.dPULearn"]], "eval() (aaanalysis.dpulearn method)": [[6, "aaanalysis.dPULearn.eval"]], "fit() (aaanalysis.dpulearn method)": [[6, "aaanalysis.dPULearn.fit"]], "labels_ (aaanalysis.dpulearn attribute)": [[6, "aaanalysis.dPULearn.labels_"]], "load_dataset() (in module aaanalysis)": [[7, "aaanalysis.load_dataset"]], "load_scales() (in module aaanalysis)": [[8, "aaanalysis.load_scales"]], "plot_gcfs() (in module aaanalysis)": [[9, "aaanalysis.plot_gcfs"]], "plot_get_cdict() (in module aaanalysis)": [[10, "aaanalysis.plot_get_cdict"]], "plot_get_clist() (in module aaanalysis)": [[11, "aaanalysis.plot_get_clist"]], "plot_get_cmap() (in module aaanalysis)": [[12, "aaanalysis.plot_get_cmap"]], "plot_legend() (in module aaanalysis)": [[13, "aaanalysis.plot_legend"]], "plot_settings() (in module aaanalysis)": [[14, "aaanalysis.plot_settings"]]}}) \ No newline at end of file +Search.setIndex({"docnames": ["api", "generated/aaanalysis.AAclust", "generated/aaanalysis.AAclustPlot", "generated/aaanalysis.CPP", "generated/aaanalysis.CPPPlot", "generated/aaanalysis.SequenceFeature", "generated/aaanalysis.dPULearn", "generated/aaanalysis.load_dataset", "generated/aaanalysis.load_scales", "generated/aaanalysis.plot_gcfs", "generated/aaanalysis.plot_get_cdict", "generated/aaanalysis.plot_get_clist", "generated/aaanalysis.plot_get_cmap", "generated/aaanalysis.plot_legend", "generated/aaanalysis.plot_settings", "generated/plotting_prelude", "generated/tutorial1_quick_start", "generated/tutorial1_slow_start", "generated/tutorial2a_data_loader", "generated/tutorial2b_scales_loader", "index", "index/CONTRIBUTING_COPY", "index/badges", "index/citations", "index/introduction", "index/overview", "index/references", "index/tables", "index/usage_principles", "index/usage_principles/aaontology", "index/usage_principles/data_flow_entry_points", "index/usage_principles/feature_identification", "index/usage_principles/pu_learning", "index/usage_principles/xai", "tutorials"], "filenames": ["api.rst", "generated/aaanalysis.AAclust.rst", "generated/aaanalysis.AAclustPlot.rst", "generated/aaanalysis.CPP.rst", "generated/aaanalysis.CPPPlot.rst", "generated/aaanalysis.SequenceFeature.rst", "generated/aaanalysis.dPULearn.rst", "generated/aaanalysis.load_dataset.rst", "generated/aaanalysis.load_scales.rst", "generated/aaanalysis.plot_gcfs.rst", "generated/aaanalysis.plot_get_cdict.rst", "generated/aaanalysis.plot_get_clist.rst", "generated/aaanalysis.plot_get_cmap.rst", "generated/aaanalysis.plot_legend.rst", "generated/aaanalysis.plot_settings.rst", "generated/plotting_prelude.rst", "generated/tutorial1_quick_start.rst", "generated/tutorial1_slow_start.rst", "generated/tutorial2a_data_loader.rst", "generated/tutorial2b_scales_loader.rst", "index.rst", "index/CONTRIBUTING_COPY.rst", "index/badges.rst", "index/citations.rst", "index/introduction.rst", "index/overview.rst", "index/references.rst", "index/tables.rst", "index/usage_principles.rst", "index/usage_principles/aaontology.rst", "index/usage_principles/data_flow_entry_points.rst", "index/usage_principles/feature_identification.rst", "index/usage_principles/pu_learning.rst", "index/usage_principles/xai.rst", "tutorials.rst"], "titles": ["API", "aaanalysis.AAclust", "aaanalysis.AAclustPlot", "aaanalysis.CPP", "aaanalysis.CPPPlot", "aaanalysis.SequenceFeature", "aaanalysis.dPULearn", "aaanalysis.load_dataset", "aaanalysis.load_scales", "aaanalysis.plot_gcfs", "aaanalysis.plot_get_cdict", "aaanalysis.plot_get_clist", "aaanalysis.plot_get_cmap", "aaanalysis.plot_legend", "aaanalysis.plot_settings", "Plotting Prelude", "Quick Start with AAanalysis", "Slow Start with AAanalysis", "Data Loading Tutorial", "Scale Loading Tutorial", "Welcome to the AAanalysis documentation!", "Contributing", "<no title>", "<no title>", "Introduction", "<no title>", "References", "Tables", "Usage Principles", "AAontology: Classification of amino acid scales", "Data Flow and Enry Points", "Identifying Physicochemical Signatures using CPP", "Learning from unbalanced and small data", "Explainable AI at Sequence Level", "Tutorials"], "terms": {"thi": [0, 1, 4, 8, 9, 11, 13, 14, 15, 17, 18, 19, 21, 30], "applic": [0, 4, 13], "program": [0, 21], "interfac": [0, 21, 27], "i": [0, 1, 2, 3, 4, 5, 6, 7, 8, 12, 13, 14, 16, 17, 18, 19, 20, 21, 24, 25, 27, 29, 31], "public": [0, 15, 18, 20, 21, 23], "object": [0, 1, 2, 4, 5, 6, 13, 17], "function": [0, 1, 2, 4, 9, 11, 12, 13, 14, 15, 17, 18, 19, 20, 25], "our": [0, 9, 10, 12, 15, 17, 19, 21, 24], "aaanalysi": [0, 15, 18, 19, 21, 23, 24, 25, 27, 28, 31, 34], "python": [0, 16, 17, 20, 21, 24, 25], "toolkit": [0, 21, 30], "which": [0, 4, 5, 13, 14, 16, 17, 18, 19, 21, 24, 27, 30, 32], "can": [0, 1, 5, 6, 9, 13, 15, 16, 17, 18, 19, 20, 21, 24, 27, 30, 32], "import": [0, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 28], "aa": [0, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 27, 28], "you": [0, 15, 19, 20, 21, 23], "access": [0, 1, 7, 17, 19, 27], "all": [0, 1, 3, 4, 5, 7, 8, 14, 15, 16, 17, 19, 21, 27], "method": [0, 1, 2, 3, 4, 5, 6, 16, 17, 26], "via": [0, 15, 21, 26], "alia": [0, 5], "load_dataset": [0, 5, 16, 17, 18, 19, 27], "class": [1, 2, 3, 4, 5, 6, 7, 9, 11, 12, 13, 14, 15, 18, 32], "model_class": [1, 2, 17], "sklearn": [1, 2, 16, 17], "cluster": [1, 2, 17, 20, 24, 25, 26, 27], "_kmean": 1, "kmean": [1, 17], "model_kwarg": [1, 2], "none": [1, 2, 3, 4, 5, 6, 7, 8, 13, 18], "verbos": [1, 3, 4, 5, 6, 16, 17], "sourc": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 19, 21], "base": [1, 2, 3, 4, 5, 6, 7, 13, 16, 17, 20, 21, 24, 25, 26, 27, 31, 32], "wrapper": [1, 4, 17, 20, 21, 24, 25], "A": [1, 2, 5, 7, 9, 11, 12, 13, 14, 15, 17, 18, 19, 21, 24, 26], "k": [1, 20, 24, 25, 26], "optim": [1, 3, 4, 11, 15, 20, 21, 24, 25, 26], "select": [1, 2, 3, 4, 7, 8, 16, 17, 18, 19, 20, 21, 24, 25, 26], "redund": [1, 3, 8, 16, 17, 20, 21, 24, 25, 26], "reduc": [1, 6, 8, 16, 20, 24, 25, 26, 27], "set": [1, 2, 3, 4, 5, 6, 8, 9, 13, 14, 15, 16, 17, 18, 20, 21, 24, 25, 26, 27, 30], "numer": [1, 4, 5, 17, 20, 24, 25], "scale": [1, 2, 3, 4, 5, 8, 10, 14, 16, 20, 23, 24, 25, 26, 28, 30, 34], "us": [1, 2, 3, 4, 6, 7, 8, 9, 13, 15, 16, 17, 18, 19, 20, 21, 23, 24, 27, 28, 30, 32], "model": [1, 2, 6, 16, 17, 21, 32], "requir": [1, 21], "pre": [1, 3, 16, 17, 18, 21], "defin": [1, 5, 8, 16, 17, 18, 21, 27, 30], "number": [1, 2, 3, 4, 5, 6, 7, 8, 11, 12, 13, 18, 19, 27], "n_cluster": [1, 2, 16, 17], "mean": [1, 3, 4, 16, 17, 19, 27], "other": [1, 4, 8, 14, 15, 19, 21, 27], "scikit": [1, 2, 21], "learn": [1, 2, 6, 16, 18, 20, 21, 23, 24, 25, 26, 27, 28], "valu": [1, 2, 3, 4, 5, 9, 10, 11, 12, 13, 14, 15, 16, 17, 19, 21, 24, 27], "util": [1, 14, 15, 16, 18, 20, 21], "pearson": [1, 3], "correl": [1, 2, 3, 27], "repres": [1, 4, 17, 18, 24, 27], "sampl": [1, 3, 4, 5, 6, 18, 27, 32], "medoid": [1, 2], "each": [1, 2, 3, 4, 5, 6, 17, 18, 19, 21], "closest": 1, "center": [1, 2, 16, 17, 27], "result": [1, 2, 3, 21], "see": [1, 2, 4, 21, 24, 27, 30], "breimann23a": [1, 7, 8, 26, 27], "paramet": [1, 2, 3, 4, 5, 6, 7, 8, 10, 11, 12, 13, 14, 17, 18, 19, 21, 27], "type": [1, 2, 3, 4, 5, 6, 7, 8, 10, 11, 12, 13, 18, 21, 27], "clustermixin": 1, "instanti": 1, "fit": [1, 6, 16, 17, 21], "option": [1, 2, 3, 4, 5, 6, 7, 8, 9, 13, 14, 16, 17], "dict": [1, 2, 3, 4, 5, 6, 10, 13], "keyword": [1, 2, 4, 6], "argument": [1, 2, 4, 5, 6, 13], "pass": [1, 2, 4, 6, 21], "bool": [1, 2, 3, 4, 5, 6, 7, 8, 12, 13, 14], "If": [1, 2, 3, 4, 5, 6, 7, 8, 13, 14, 20, 21, 23, 32], "true": [1, 2, 3, 4, 5, 7, 8, 12, 13, 14, 15, 18, 19], "output": [1, 3, 5, 6, 15, 21], "ar": [1, 2, 3, 4, 5, 6, 7, 8, 9, 13, 14, 15, 16, 17, 18, 19, 21, 27, 30, 32, 33], "enabl": [1, 3, 4, 5, 6, 20, 21, 24, 25, 31], "The": [1, 2, 3, 4, 5, 6, 7, 8, 10, 11, 12, 13, 15, 17, 18, 19, 21, 27, 30, 31], "after": [1, 3, 27], "call": [1, 8, 15, 27], "obtain": [1, 5, 8, 16, 17, 27], "int": [1, 2, 3, 4, 5, 6, 7, 8, 11, 12, 13], "labels_": [1, 6], "label": [1, 2, 3, 4, 5, 6, 7, 13, 15, 16, 17, 18, 21, 27, 32], "order": [1, 2, 21, 27], "x": [1, 2, 4, 6, 7, 9, 10, 11, 12, 13, 14, 15, 16, 17], "arrai": [1, 2, 3, 5, 6, 16, 17], "like": [1, 2, 3, 5, 6, 14, 15, 21, 27], "shape": [1, 2, 3, 4, 5, 6, 13, 27], "n_sampl": [1, 2, 3, 5, 6], "centers_": 1, "averag": [1, 2, 5, 16, 17, 19, 27], "correspond": [1, 2, 13, 18, 21, 27], "n_featur": [1, 2, 3, 4, 5, 6], "labels_centers_": 1, "medoids_": 1, "one": [1, 4, 11, 13, 21], "labels_medoids_": 1, "is_medoid_": 1, "indic": [1, 4, 5, 6, 18, 19, 21, 27], "being": [1, 18, 21, 27], "1": [1, 2, 3, 4, 5, 6, 7, 8, 10, 13, 14, 15, 16, 18, 19, 21, 27, 32], "0": [1, 2, 3, 4, 5, 6, 7, 13, 15, 16, 17, 18, 19, 27, 32], "same": [1, 2, 8, 19], "medoid_names_": [1, 16, 17], "name": [1, 2, 3, 4, 5, 6, 7, 8, 10, 11, 12, 14, 16, 17, 18, 19, 27], "provid": [1, 2, 4, 6, 7, 8, 13, 17, 18, 19, 20, 21, 25, 27, 32], "list": [1, 2, 4, 5, 10, 11, 12, 13, 16, 17, 27], "attribut": 1, "dure": [1, 6], "directli": [1, 21], "design": [1, 4, 21, 27, 31], "primarili": [1, 6, 21], "amino": [1, 3, 4, 5, 7, 8, 16, 17, 20, 23, 24, 25, 26, 28, 30, 32], "acid": [1, 3, 4, 5, 7, 8, 16, 17, 20, 23, 24, 25, 26, 28, 30, 32], "ani": [1, 19, 21, 24, 27], "__init__": [1, 2, 3, 4, 5, 6], "on_cent": 1, "min_th": 1, "3": [1, 5, 6, 11, 12, 13, 18, 19, 21, 27], "merg": 1, "metric": [1, 2, 6, 21], "euclidean": [1, 2, 6], "appli": [1, 6, 13, 14, 18], "algorithm": [1, 3, 4, 16, 17, 20, 21, 24, 25, 30, 31], "featur": [1, 3, 4, 5, 6, 16, 20, 21, 24, 25, 30, 31, 32], "matrix": [1, 2, 5, 6, 16, 17, 27], "determin": [1, 2, 8], "without": [1, 4, 21, 27], "specif": [1, 18, 21, 27], "It": [1, 2, 14, 17, 18, 24, 27, 30], "partit": [1, 27], "data": [1, 2, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 19, 20, 21, 27, 28], "maxim": 1, "within": [1, 3, 5, 21, 27, 30], "beyond": 1, "threshold": [1, 3], "qualiti": 1, "either": [1, 2, 5, 7, 8, 19, 20], "minimum": [1, 2, 5, 7], "member": 1, "fals": [1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 13, 14, 16, 17], "between": [1, 2, 3, 4, 5, 8, 11, 13, 16, 17, 18, 21, 27], "its": [1, 18, 21, 27], "min_cor_al": 1, "min_cor_cent": 1, "respect": [1, 7, 17, 20, 21, 23, 27], "describ": [1, 27], "row": [1, 2], "typic": [1, 2, 18, 24, 27], "column": [1, 2, 3, 4, 5, 6, 7, 8, 13, 18, 19, 21], "must": [1, 5, 11, 12, 21], "float": [1, 2, 3, 4, 6, 13, 14], "otherwis": [1, 4, 5, 6, 27], "step": [1, 3, 4, 5, 7, 8, 21, 24], "perform": [1, 2, 3, 6, 8, 16, 17, 19, 27], "str": [1, 2, 4, 5, 6, 7, 8, 10, 12, 13, 14], "similar": [1, 21, 27, 32], "measur": [1, 2, 21, 27], "maximum": [1, 2, 3, 5, 6, 7, 16, 17], "distanc": [1, 6, 27], "manhattan": [1, 6], "cosin": [1, 6], "return": [1, 2, 3, 4, 5, 6, 7, 8, 10, 11, 12, 13, 18], "instanc": [1, 4, 13], "allow": 1, "direct": [1, 21], "aanalysi": [1, 21], "consist": [1, 14, 21, 24, 27], "three": [1, 5, 18, 27], "main": [1, 27], "estim": 1, "lower": [1, 27], "bound": 1, "refin": [1, 21], "recurs": [1, 26], "chosen": [1, 3, 5, 7, 8, 18], "smaller": [1, 15], "merge_metr": 1, "reduct": [1, 2], "pairwise_dist": 1, "were": [1, 8, 19, 27], "runtimewarn": 1, "caught": 1, "bundl": 1, "eval": [1, 2, 3, 6, 21], "evalu": [1, 2, 3, 8, 19, 21, 27], "establish": [1, 21], "quantifi": 1, "bic": [1, 2], "bayesian": [1, 2], "inform": [1, 2, 3, 4, 5, 6, 19, 30], "criterion": [1, 2], "reflect": [1, 21, 27], "good": [1, 21], "while": [1, 18], "account": [1, 21, 27], "rang": 1, "from": [1, 2, 3, 4, 5, 6, 7, 8, 16, 17, 18, 19, 20, 21, 27, 28], "neg": [1, 5, 6, 7, 13, 18, 21, 27, 32], "infin": 1, "posit": [1, 2, 3, 4, 5, 6, 7, 20, 21, 24, 25, 27, 32], "higher": [1, 27], "superior": 1, "ch": [1, 2, 27], "calinski": [1, 2], "harabasz": [1, 2], "index": [1, 2, 7, 19, 20, 21, 26], "ratio": 1, "dispers": 1, "score": [1, 16, 17], "suggest": [1, 21], "better": 1, "sc": [1, 2], "silhouett": [1, 2], "coeffici": [1, 2], "proxim": 1, "point": [1, 4, 9, 13, 27, 28], "neighbor": [1, 27], "li": 1, "closer": 1, "impli": 1, "equal": [1, 18], "inf": 1, "wa": [1, 24], "adapt": 1, "form": [1, 5, 27], "stackexchang": 1, "discuss": [1, 21], "modifi": [1, 6, 14], "align": [1, 4, 13, 17, 19, 21], "so": 1, "signifi": 1, "contrari": 1, "convent": [1, 5, 8], "implement": [1, 21], "favor": 1, "calinski_harabasz_scor": 1, "silhouette_scor": 1, "static": [1, 2, 5], "name_clust": 1, "shorten_nam": 1, "assign": [1, 4, 5, 6, 19, 27], "frequenc": [1, 27], "priorit": 1, "alreadi": [1, 32], "contain": [1, 2, 3, 4, 6, 7, 8, 19, 21, 27, 30, 32], "unclassifi": [1, 8, 19, 27], "shorten": 1, "version": [1, 19, 27], "cluster_nam": 1, "renam": 1, "comp_cent": 1, "comput": [1, 3, 4, 5, 16, 17, 21, 26, 27], "given": [1, 4, 5, 7, 11, 12, 13, 16, 17, 19, 21, 27], "labels_cent": 1, "associ": [1, 27], "comp_medoid": 1, "labels_medoid": 1, "comp_correl": 1, "x_ref": 1, "labels_ref": 1, "names_ref": 1, "refer": [1, 3, 5, 7, 17, 21, 27], "compar": [1, 16, 18, 20, 24, 25, 27, 30, 31], "n_samples_ref": 1, "df_corr": [1, 2], "pd": [1, 5, 6, 16, 17, 21], "datafram": [1, 2, 3, 4, 5, 6, 7, 8, 16, 17, 21, 30], "pair": 1, "labels_sort": 1, "sort": 1, "ascend": [1, 2], "replac": [1, 7], "panda": [1, 3, 4, 5, 6, 7, 8, 16, 17, 21], "corr": 1, "comp_coverag": 1, "percentag": [1, 3, 6, 19], "uniqu": [1, 2, 3, 4, 19, 21], "present": [1, 5, 7], "help": 1, "understand": 1, "coverag": [1, 21], "particular": 1, "subset": [1, 5, 8, 27], "univers": 1, "both": [1, 4, 14, 18], "consid": [1, 8, 21], "onli": [1, 4, 7, 8, 13, 14, 18, 21, 27, 32], "onc": [1, 21], "regardless": 1, "repetit": 1, "should": [1, 2, 3, 4, 5, 6, 21, 32], "superset": 1, "found": [1, 5, 21], "decomposit": 2, "_pca": 2, "pca": [2, 6, 19], "plot": [2, 4, 9, 10, 11, 12, 13, 14, 18, 20, 21, 27, 34], "aaclust": [2, 8, 16, 19, 20, 23, 24, 25, 26, 27], "analysi": [2, 6, 8, 17, 19, 20, 21, 24, 25, 27], "dimension": [2, 6, 26], "visual": [2, 11, 14, 15, 21], "princip": [2, 6, 8, 19, 27], "compon": [2, 5, 6, 8, 19, 27], "transformermixin": 2, "n_compon": [2, 6], "data_ev": 2, "dict_xlim": 2, "figsiz": [2, 4], "7": [2, 4, 5, 6, 15, 18, 27], "6": [2, 5, 18, 27], "rank": [2, 3, 19], "independ": [2, 15], "follow": [2, 3, 5, 6, 8, 20, 21, 23, 24, 25, 28], "four": [2, 21], "intern": [2, 4, 21, 27], "gener": [2, 3, 4, 5, 11, 12, 14, 21, 24, 26, 27, 32], "2": [2, 3, 4, 5, 6, 9, 11, 13, 15, 16, 18, 19, 21, 27, 32], "etc": 2, "dictionari": [2, 3, 4, 5, 10, 13], "axi": [2, 4, 14, 19], "limit": [2, 4, 21], "xmin": 2, "xmax": 2, "subplot": 2, "kei": [2, 4, 10, 13, 21, 27], "e": [2, 4, 5, 15, 17, 19, 20, 21, 24, 25, 27, 32], "g": [2, 4, 5, 20, 21, 24, 25, 27, 32], "auto": 2, "tupl": [2, 4, 12], "width": [2, 4, 13], "height": [2, 4], "figur": [2, 4], "inch": [2, 4], "fig": 2, "ax": [2, 4, 9, 10, 13, 14], "": [2, 13, 18, 21, 26, 27], "detail": [2, 4, 7, 8, 19, 20, 21, 23], "component_x": 2, "component_i": 2, "dot_alpha": 2, "75": [2, 4], "dot_siz": 2, "100": [2, 3, 7, 16, 17, 18], "legend": [2, 4, 13, 14, 15], "palett": [2, 9, 10, 11, 12, 13, 14, 15, 16, 17], "highlight": [2, 4], "listedcolormap": 2, "param": 2, "rgb": [2, 12], "matplotlib": [2, 4, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 21], "color": [2, 4, 9, 10, 11, 12, 13, 14, 15], "newtyp": 2, "arraylike2d": 2, "union": [2, 13], "sequenc": [2, 3, 4, 5, 6, 7, 16, 18, 20, 21, 24, 25, 26, 27, 28, 30, 31, 32], "ndarrai": 2, "arraylike1d": 2, "seri": [2, 5], "return_data": 2, "bar_posit": 2, "left": [2, 13, 27], "bar_width": [2, 4], "bar_spac": 2, "bar_color": 2, "grai": 2, "bar_ticklabel_pad": 2, "vmin": [2, 4], "vmax": [2, 4], "cmap": [2, 4], "viridi": 2, "kwargs_heatmap": 2, "heatmap": [2, 4], "sidebar": 2, "group": [2, 3, 4, 5, 13, 15, 27], "side": [2, 18, 27], "bar": [2, 4], "length": [2, 3, 4, 5, 7, 13, 18, 27], "default": [2, 3, 4, 5, 6, 7, 9, 10, 13, 14, 15, 16, 17, 18, 19], "right": [2, 15, 27], "top": [2, 8, 15, 27], "down": 2, "ad": [2, 4], "space": [2, 4, 6, 13, 21], "singl": 2, "pad": 2, "y": [2, 4, 9, 10, 11, 12, 13, 14, 15, 16, 17], "tick": [2, 4, 14, 15], "sn": [2, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18], "colormap": [2, 4], "addit": [2, 4, 5, 6, 8, 14, 19, 21, 27], "_ax": 2, "ensur": [2, 14, 18, 21], "avoid": 2, "mislabel": 2, "seaborn": [2, 4, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 21], "creat": [2, 3, 4, 5, 6, 14, 15, 16, 17, 21, 30], "df_scale": [3, 5, 8, 16, 17, 19, 30], "df_cat": [3, 4, 5, 8, 19, 30], "df_part": [3, 5, 16, 17, 30], "split_kw": [3, 5, 16, 17, 30], "accept_gap": [3, 4, 5], "tool": [3, 21, 26], "filter": [3, 4, 7, 16, 17, 18], "most": [3, 4, 6, 13, 16, 17, 20, 24, 25], "discrimin": [3, 4, 16, 17], "two": [3, 4, 8, 9, 16, 17, 19, 20, 21, 24, 25, 26, 27, 29, 30], "load_categori": [3, 5], "categori": [3, 4, 5, 8, 10, 11, 13, 18, 19], "physicochem": [3, 5, 20, 24, 25, 26, 27, 28, 30], "part": [3, 4, 5, 16, 17, 21, 30], "sequencefeatur": [3, 16, 17], "get_split_kw": [3, 5, 16, 17], "nest": [3, 5], "split_typ": [3, 5, 16, 17], "whether": [3, 4, 5, 12, 13], "accept": [3, 4, 5], "miss": [3, 4, 5], "omit": [3, 4, 5], "print": [3, 4, 5, 16, 17], "progress": [3, 4, 26], "about": [3, 4], "run": [3, 5, 16, 17], "parametr": 3, "n_filter": 3, "tmd_len": [3, 4, 5], "20": [3, 4, 5, 8, 18, 21, 27], "jmd_n_len": [3, 4, 5], "10": [3, 4, 5, 11, 13, 18, 21, 27], "jmd_c_len": [3, 4, 5], "ext_len": [3, 4, 5], "4": [3, 4, 5, 18, 19, 27], "start": [3, 4, 5, 7, 21, 27, 28, 30], "check_cat": 3, "n_pre_filt": 3, "pct_pre_filt": 3, "5": [3, 4, 5, 6, 15, 16, 17, 18, 19, 21, 27], "max_std_test": 3, "max_overlap": 3, "max_cor": 3, "n_process": 3, "pipelin": [3, 21], "creation": 3, "aim": [3, 4, 16, 17, 21], "identifi": [3, 4, 6, 7, 16, 17, 18, 20, 24, 25, 26, 28, 32], "collect": [3, 8], "non": [3, 5, 7, 16, 17, 27], "test": [3, 17, 19], "t": [3, 7, 16, 17, 27], "u": [3, 15, 20, 21], "p": [3, 26], "tmd": [3, 4, 5, 7, 16, 17, 18], "todo": [3, 21], "add": [3, 4, 5, 21], "link": [3, 20, 21, 23, 26], "explan": [3, 4, 21], "first": [3, 4, 5, 8, 15, 16, 21], "n": [3, 4, 5, 7, 8, 16, 17, 18, 19, 21, 26, 27], "terminu": [3, 4, 5, 27], "jmd": [3, 4, 5, 16, 17], "c": [3, 4, 5, 9, 11, 12, 13, 14, 15, 17, 18, 19, 20, 26, 27], "extend": [3, 4, 5, 21, 27, 32], "termin": [3, 4, 5, 17, 18, 27], "longer": 3, "than": [3, 27], "check": [3, 21], "remain": [3, 19, 21], "standard": [3, 32], "deviat": 3, "overlap": 3, "cpu": 3, "multiprocess": [3, 17], "automat": [3, 4, 6, 13, 21], "df_feat": [3, 4, 5, 16, 17, 30], "statist": [3, 4], "n_feature_inform": [3, 4], "eleven": 3, "includ": [3, 5, 7, 8, 13, 21], "id": [3, 5, 7, 8, 19], "11": [3, 4, 18, 27], "split": [3, 5, 16, 17, 30], "subcategori": [3, 4, 8, 19], "sub": 3, "scale_nam": [3, 4, 8, 19], "abs_auc": [3, 4], "absolut": [3, 21], "adjust": [3, 4, 13, 14, 15], "auc": 3, "abs_mean_dif": 3, "differ": [3, 4, 5, 11, 18, 19, 30], "std_test": [3, 4], "std_ref": 3, "p_val": 3, "mann_whitnei": 3, "ttest_indep": 3, "p_val_fdr_bh": 3, "benjamini": 3, "hochberg": 3, "fdr": 3, "correct": 3, "condit": [4, 5], "jmd_m_len": [4, 5], "profil": [4, 16, 20, 24, 25, 31], "val_col": 4, "mean_dif": 4, "val_typ": 4, "count": [4, 18], "normal": [4, 8, 13, 19, 21, 27], "titl": [4, 9, 13, 14, 15, 16, 17], "title_kw": 4, "dict_color": [4, 10, 13, 15], "edge_color": 4, "add_jmd_tmd": 4, "jmd_n_seq": 4, "tmd_seq": 4, "jmd_c_seq": 4, "tmd_color": 4, "mediumspringgreen": 4, "jmd_color": 4, "blue": [4, 16, 17], "tmd_seq_color": 4, "black": [4, 12, 13, 15, 21], "jmd_seq_color": 4, "white": [4, 12, 13], "seq_siz": 4, "tmd_jmd_fontsiz": 4, "xtick_siz": 4, "xtick_width": 4, "xtick_length": 4, "xticks_po": 4, "ytick_siz": 4, "ytick_width": 4, "ytick_length": 4, "ylim": [4, 16, 17], "highlight_tmd_area": 4, "highlight_alpha": 4, "15": [4, 5, 18, 27], "grid": [4, 14, 15], "grid_axi": [4, 14, 15], "add_legend_cat": 4, "legend_kw": 4, "shap_plot": 4, "kwarg": [4, 5, 13], "avail": [4, 8, 13, 17, 19, 20, 23, 26], "specifi": [4, 5, 6, 10, 12, 17, 21], "check_value_typ": 4, "size": [4, 5, 9, 13, 14, 15, 16, 17, 27], "custom": [4, 8, 15, 21], "appear": [4, 27], "map": [4, 5, 12, 13], "edg": [4, 13, 21, 27], "line": [4, 13, 14, 15, 21], "annot": 4, "font": [4, 9, 13, 14], "area": [4, 19, 27], "alpha": 4, "drawn": 4, "shap": [4, 9, 12, 17, 21], "shaplei": 4, "librari": [4, 14, 21], "8": [4, 5, 6, 13, 17, 18, 21, 27], "grid_on": 4, "rdbu_r": 4, "cmap_n_color": 4, "cbar_kw": 4, "facecolor_dark": [4, 12], "add_importance_map": 4, "cbar_pct": 4, "featuremap": 4, "versu": 4, "level": [4, 7, 8, 18, 19, 20, 21, 25, 27, 28, 29], "protein": [4, 5, 7, 16, 19, 20, 21, 24, 25, 26, 30, 31, 32], "shown": 4, "feat_impact": 4, "displai": [4, 14], "sum": [4, 19, 27], "std": 4, "aggreg": 4, "positions_onli": 4, "further": [4, 19, 21, 27], "across": [4, 14, 19, 21], "recommend": [4, 6, 8, 21], "when": [4, 6, 13, 21, 27], "emphas": [4, 21], "fewer": 4, "value_typ": 4, "pyplot": [4, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18], "anchor": [4, 13, 27], "infer": [4, 21], "seismic": 4, "impact": 4, "discret": 4, "diverg": 4, "sequenti": 4, "classifi": 4, "colorbar": 4, "under": [4, 8, 21], "depicet": 4, "depict": 4, "jmd_n": [4, 5, 7, 18], "jmd_c": [4, 5, 7, 18], "set_xticklabel": 4, "widht": 4, "tick_param": 4, "classif": [4, 7, 8, 17, 18, 19, 20, 25, 27, 28, 32], "pcolormesh": 4, "effect": [4, 21, 27, 32], "document": [4, 27], "more": [4, 13, 14, 16, 21], "cpp": [4, 5, 9, 12, 16, 20, 23, 24, 25, 28, 30], "code": [4, 9, 10, 11, 12, 13, 14, 15], "update_seq_s": 4, "retriev": [5, 17], "continu": [5, 12, 17, 21], "domain": [5, 7, 17, 18, 27], "transmembran": [5, 27], "membran": [5, 27], "principl": [5, 20], "distinct": [5, 20, 21, 24, 25, 27], "segment": [5, 16, 17, 30], "pattern": [5, 13, 17], "properti": [5, 13, 21, 27], "express": 5, "realiz": 5, "For": [5, 7, 16, 18, 21, 32], "over": [5, 16, 17], "valid": [5, 21], "tmd_e": 5, "tmd_n": 5, "tmd_c": 5, "ext_c": 5, "ext_n": 5, "tmd_jmd": [5, 16, 17], "jmd_n_tmd_n": 5, "tmd_c_jmd_c": 5, "ext_n_tmd_n": 5, "tmd_c_ext_c": 5, "get_df_part": [5, 16, 17], "df_seq": [5, 6, 7, 16, 17, 18, 30], "list_part": [5, 16, 17], "all_part": 5, "datafran": 5, "compris": [5, 13, 19], "tmd_start": [5, 7, 18], "tmd_stop": [5, 7, 18], "string": 5, "len": [5, 10, 18], "lenght": 5, "resp": [5, 27], "extra": [5, 15, 27], "possibl": [5, 18, 27, 32], "get": [5, 9, 13, 15, 28], "sf": [5, 16, 17], "dom_gsec": [5, 16, 17, 18, 27], "n_split_min": 5, "n_split_max": [5, 16, 17], "steps_pattern": 5, "n_min": 5, "n_max": 5, "len_max": 5, "steps_periodicpattern": 5, "periodicpattern": 5, "greater": 5, "greatest": 5, "whole": [5, 7, 19], "specfii": 5, "smallest": [5, 27], "integ": 5, "vari": [5, 18], "paramt": 5, "argumetn": 5, "get_featur": 5, "load_scal": [5, 16, 17, 19, 20, 25, 27], "combin": [5, 16, 17, 21, 27], "feat_matrix": [5, 16, 17], "n_job": [5, 16, 17], "return_label": 5, "job": 5, "parallel": [5, 27], "spars": 5, "feat_nam": 5, "convert": 5, "depend": [5, 27], "last": 5, "step1": 5, "step2": 5, "add_feat_valu": 5, "dict_scal": 5, "letter": 5, "feature_valu": 5, "n_part": 5, "ha": [5, 21, 27], "where": [5, 6, 14, 27], "structur": [5, 26, 27], "th": [5, 8, 19], "n_split": 5, "p1": 5, "p2": 5, "pn": 5, "end": [5, 21, 27], "odd": [5, 18], "even": 5, "give": 5, "add_dif": 5, "sample_nam": 5, "ref_group": 5, "add_posit": 5, "part_split": 5, "feat_posit": 5, "total": [5, 6, 19, 21, 27], "pca_kwarg": 6, "determinist": [6, 20, 24, 25], "unlabel": [6, 20, 24, 25, 27, 32], "offer": [6, 18, 21], "approach": [6, 16, 17, 18, 21, 32], "pu": [6, 20, 24, 25, 27], "emploi": 6, "pc": [6, 8, 27], "iter": 6, "reliabl": [6, 18, 21], "These": [6, 8, 15, 17, 19, 21, 32], "those": [6, 27], "distant": 6, "altern": [6, 32], "also": [6, 18, 21, 27], "80": 6, "cover": 6, "varianc": 6, "identif": [6, 26], "datapoint": 6, "inspir": [6, 21], "techniqu": [6, 32], "an": [6, 7, 8, 13, 15, 16, 17, 18, 19, 20, 21, 23, 26, 27], "theoret": [6, 27], "high": [6, 26, 27], "n_neg": 6, "label_po": 6, "name_neg": 6, "rel_neg": 6, "col_class": 6, "newli": 6, "updat": [6, 21], "new": [6, 21], "store": 6, "Will": 6, "initi": [6, 27], "small": [6, 16, 17, 18, 20, 21, 24, 25, 28, 33], "datafor": 6, "conta": 6, "po": 6, "unl": 6, "numpi": [6, 16, 17, 21], "np": [6, 16, 17], "atgc": 6, "gcta": 6, "actg": 6, "tacg": 6, "mode": 6, "dpul": 6, "overview": [7, 8, 18, 21], "random": [7, 18, 27], "non_canonical_aa": 7, "remov": [7, 14, 15], "min_len": [7, 18], "max_len": [7, 18], "aa_window_s": [7, 18], "9": [7, 11, 15, 18, 21, 27], "load": [7, 8, 16, 20, 21, 25, 34], "benchmark": [7, 17, 19, 20, 25], "dataset": [7, 8, 16, 17, 19, 20, 21, 24, 25, 32, 33], "categor": [7, 15, 18], "dom": [7, 18, 27], "seq": [7, 18, 27], "By": 7, "tabl": [7, 8, 18, 21], "depth": [7, 8, 19, 20, 25], "per": [7, 18, 27], "randomli": [7, 18], "liter": 7, "keep": 7, "gap": 7, "handl": [7, 13, 20], "canon": [7, 19], "don": 7, "symbol": 7, "disabl": [7, 19], "window": [7, 27], "aa_": 7, "df_overview": 7, "entri": [7, 18, 19], "uniprot": 7, "binari": [7, 17, 18, 32], "stop": 7, "seq_amylo": [7, 18, 19, 27], "guid": [7, 8], "tutori": [7, 8, 17, 20, 21, 24], "just_aaindex": [8, 19], "unclassified_out": [8, 19], "top60_n": [8, 19], "aaontologi": [8, 17, 20, 23, 25, 26, 28], "scales_raw": [8, 19, 27], "encompass": [8, 27], "aaindex": [8, 17, 19, 26], "kawashima08": [8, 26, 27], "along": [8, 17], "min": [8, 19, 27], "max": [8, 19, 27], "organ": [8, 21], "scales_cat": [8, 19, 27], "breimann23b": [8, 20, 23, 26, 27], "compress": [8, 19, 27], "scales_pc": [8, 19, 27], "60": [8, 19, 27], "top60": [8, 19, 27], "individu": [8, 21], "accompani": 8, "top60_ev": [8, 19, 27], "normliz": 8, "raw": [8, 19, 27], "best": [8, 19], "Or": [8, 18], "relev": 8, "exclus": 8, "suffix": [8, 18, 21], "scale_id": [8, 19], "deriv": 8, "descript": [8, 19, 21, 27], "scale_descript": [8, 19], "current": [9, 13], "linewdith": 9, "plot_set": [9, 10, 11, 12, 13, 15, 16, 17, 18], "here": [9, 18, 21, 27], "plt": [9, 10, 11, 12, 13, 14, 15, 16, 17, 18], "b": [9, 11, 12, 13, 14, 15, 27], "23": [9, 11, 12, 13, 14, 15, 27], "27": [9, 13, 14, 15], "43": [9, 13, 14, 15], "plot_get_clist": [9, 13, 14, 15], "barplot": [9, 10, 11, 12, 13, 14, 15, 16, 17], "despin": [9, 10, 13, 14, 15, 16, 17, 18], "bigger": 9, "tight_layout": [9, 10, 13, 14, 15], "show": [9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19], "png": [9, 10, 11, 12, 13, 14], "hire": [9, 10, 11, 12, 13, 14], "pdf": [9, 10, 11, 12, 13, 14], "prelud": [9, 10, 11, 12, 13, 14, 34], "dict_cat": 10, "weight_bold": [10, 14], "xaxi": 10, "set_vis": 10, "n_color": [11, 12, 15], "fuction": 11, "eight": 11, "colorl": 11, "appeal": [11, 15], "33": [11, 12], "notebook": 11, "color_palett": [11, 12], "101": 12, "shp": 12, "least": [12, 13, 21], "central": [12, 31], "14": [12, 15, 27], "light_palett": 12, "lighter": 12, "packag": [12, 16, 21], "list_cat": 13, "loc": [13, 19], "upper": 13, "loc_out": 13, "ncol": [13, 15], "labelspac": 13, "columnspac": 13, "handletextpad": 13, "handlelength": 13, "fontsiz": [13, 15], "fontsize_titl": 13, "weight": [13, 26, 27], "fontsize_weight": 13, "marker": 13, "marker_s": 13, "lw": 13, "linestyl": 13, "edgecolor": 13, "hatch": [13, 15], "hatchcolor": 13, "title_align_left": 13, "independntli": 13, "customiz": 13, "flexbili": 13, "convini": 13, "func": 13, "attach": 13, "item": 13, "locat": [13, 27], "25": 13, "thei": [13, 17, 18, 21], "coordin": 13, "vertic": 13, "horizont": 13, "bewtween": 13, "text": [13, 14], "visiabl": 13, "corner": 13, "round": [13, 16, 17], "style": [13, 14], "Not": 13, "fill": [13, 21], "furhter": 13, "word": 13, "line2d": 13, "core": [13, 16, 17], "gca": 13, "font_scal": [14, 18], "arial": 14, "adjust_only_font": 14, "adjust_further_el": 14, "no_tick": 14, "short_tick": 14, "no_ticks_x": [14, 15], "short_ticks_x": 14, "no_ticks_i": 14, "short_ticks_i": [14, 15], "show_opt": 14, "configur": 14, "global": 14, "embed": 14, "vector": [14, 27], "format": [14, 27], "svg": 14, "compat": 14, "edit": 14, "variou": [14, 17, 21, 27, 30], "viewer": 14, "softwar": [14, 21], "factor": [14, 27], "element": [14, 15], "set_context": 14, "common": [14, 21], "verdana": 14, "helvetica": 14, "dejavu": 14, "san": 14, "bold": 14, "leav": [14, 21], "unchang": 14, "make": [14, 15, 17, 18, 21], "layout": 14, "errorbar": 14, "choos": 14, "mark": 14, "short": 14, "ignor": [14, 18, 21], "runtim": 14, "polt": 14, "rcparam": 14, "manag": 14, "some": [15, 16, 27], "readi": [15, 18], "view": [15, 21, 32], "let": 15, "spine": 15, "look": 15, "just": 15, "easili": [15, 17, 18, 21], "comparison": [15, 16, 17], "d": [15, 19], "increas": [15, 27], "match": [15, 26], "plot_gcf": [15, 16, 17], "plot_legend": 15, "framework": [16, 17, 20, 24, 25], "predict": [16, 20, 21, 24, 25, 26, 27, 31, 32], "around": [16, 17], "interpret": [16, 17, 20, 21, 23, 24, 25, 26, 27, 31], "engin": [16, 20, 21, 24, 25, 31], "third": 16, "parti": 16, "aanalsi": 16, "we": [16, 17, 18, 21], "exampl": [16, 17, 18, 21, 24, 32], "\u03b3": [16, 17, 26], "secretas": [16, 17, 26, 27], "50": [16, 17, 18], "substrat": [16, 17, 26, 27], "aac": [16, 17], "now": [16, 17], "physic": [16, 27], "Its": 16, "idea": 16, "concept": 16, "As": [16, 17], "baselin": [16, 17], "entir": [16, 17, 21], "machin": [16, 17, 20, 21, 23, 26, 32], "ensembl": [16, 17], "randomforestclassifi": [16, 17], "model_select": [16, 17], "cross_val_scor": [16, 17], "rf": [16, 17], "cv_base": [16, 17], "accuraci": [16, 17, 19, 26], "f": [16, 17, 19], "63": [16, 18, 27], "take": [16, 17], "littl": [16, 17], "time": [16, 17], "improv": [16, 17, 21, 26], "000": [16, 17, 19], "cv": [16, 17], "tab": [16, 17], "red": [16, 17], "ylabel": [16, 17], "88": 16, "dive": 17, "power": 17, "capabl": [17, 27], "dedic": 17, "free": [17, 27], "In": [17, 18, 21, 32], "gamma": [17, 27], "ll": 17, "focu": [17, 21], "extract": 17, "how": 17, "har": 17, "task": [17, 21, 32], "essenti": [17, 18, 21], "randomforest": 17, "With": 17, "have": [17, 18, 19, 21, 27, 32], "hand": [17, 27], "effortlessli": 17, "furthermor": 17, "predominantli": 17, "hierarch": 17, "known": 17, "your": [17, 20, 21, 23], "fingertip": 17, "centerpiec": 17, "support": [17, 21, 27], "sinc": 17, "problem": 17, "lightweight": 17, "agglom": 17, "close": [17, 21], "integr": [17, 21, 26], "target": [17, 21], "middl": [17, 27], "adjac": [17, 27], "region": [17, 26, 27], "discontinu": 17, "togeth": [17, 30], "input": [17, 21, 30], "characterist": [17, 27], "58": [17, 27], "1000": 17, "yield": 17, "minut": 17, "i7": 17, "10510u": 17, "thread": 17, "93": 17, "df_info": 18, "iloc": [18, 19], "13": [18, 27], "predictor": [18, 27], "aa_caspase3": [18, 27], "233": [18, 27], "185605": [18, 27], "705": [18, 27], "184900": [18, 27], "prosper": [18, 26, 27], "aa_furin": [18, 27], "71": [18, 27], "59003": [18, 27], "163": [18, 27], "58840": [18, 27], "aa_ldr": [18, 27], "342": [18, 27], "118248": [18, 27], "35469": [18, 27], "82779": [18, 27], "idp": [18, 26, 27], "seq2seq": [18, 26, 27], "aa_mmp2": [18, 27], "573": [18, 27], "312976": [18, 27], "2416": [18, 27], "310560": [18, 27], "aa_rnabind": [18, 27], "221": [18, 27], "55001": [18, 27], "6492": [18, 27], "48509": [18, 27], "gmksvm": [18, 27], "ru": [18, 27], "aa_sa": [18, 27], "101082": [18, 27], "84523": [18, 27], "1414": [18, 27], "8484": [18, 27], "511": [18, 27], "903": [18, 27], "rerf": [18, 26, 27], "pred": [18, 26, 27], "seq_capsid": [18, 19, 27], "7935": [18, 27], "3364680": [18, 27], "3864": [18, 27], "4071": [18, 27], "viralpro": [18, 26, 27], "seq_disulfid": [18, 19, 27], "2547": [18, 27], "614470": [18, 27], "897": [18, 27], "1650": [18, 27], "dipro": [18, 27], "seq_loc": [18, 19, 27], "1835": [18, 27], "732398": [18, 27], "1045": [18, 27], "790": [18, 27], "nan": [18, 27], "seq_solubl": [18, 27], "17408": [18, 27], "4432269": [18, 27], "8704": [18, 27], "solpro": [18, 26, 27], "seq_tail": [18, 27], "6668": [18, 27], "2671690": [18, 27], "2574": [18, 27], "4094": [18, 27], "12": [18, 27], "126": [18, 27], "92964": [18, 27], "prefix": 18, "exemplifi": 18, "df_seq1": 18, "df_seq2": 18, "df_seq3": 18, "head": [18, 19], "capsid_1": 18, "mvthnvkinkhvtrrsyssakevleippltevqtasykwfmdkgik": 18, "capsid_2": 18, "mkkrqkkmtlsnftdtsfqdfvsaeqvddksamalinraedfkagq": 18, "balanc": 18, "200": 18, "value_count": 18, "dtype": 18, "int64": 18, "distribut": 18, "warn": 18, "simplefilt": 18, "action": 18, "futurewarn": 18, "list_seq_len": 18, "histplot": 18, "binwidth": 18, "xlim": 18, "1500": 18, "800": 18, "residu": [18, 19, 26, 27], "seen": 18, "caspase3_1": 18, "mslfdlfrgffgfpgprshrdpffggmtrdedddeeeeeeggswgr": 18, "caspase3_2": 18, "mevtgdagvpesgeirtlkpcllrrnysreqhgvaascledlrska": 18, "caspase3_3": 18, "mrarsgargalllalllcwdptpslagidsggqalpdsfpsapaeq": 18, "caspase3_4": 18, "mdakarncllqhrealekdiktsyimdhmisdgfltiseeekvrn": 18, "conveni": 18, "flank": 18, "popular": [18, 32], "caspase3_1_pos126": 18, "qtlrdsmlk": 18, "caspase3_1_pos127": 18, "tlrdsmlky": 18, "caspase3_1_pos4": 18, "mslfdlfrg": 18, "caspase3_1_pos5": 18, "slfdlfrgf": 18, "21": [18, 27], "caspase3_94_pos31": 18, "vshwqqqsyldsgihsgattt": 18, "caspase3_129_pos530": 18, "wfnkvledktddastpatdt": 18, "caspase3_76_pos554": 18, "qllrgvkhlhdnwilhrdlkt": 18, "caspase3_19_pos163": 18, "ghrgnsldrrsqggphlsgav": 18, "But": 18, "mani": 18, "face": 18, "challeng": [18, 21], "might": [18, 27], "unbalanc": [18, 20, 21, 24, 25, 28, 33], "lack": 18, "clear": [18, 21], "scenario": 18, "denot": [18, 27], "_pu": [18, 27], "dom_gsec_pu": [18, 27], "q14802": 18, "mqkvtlgllvflagfpvldandledknspfyydwhslqvgglicag": 18, "37": 18, "59": 18, "nspfyydwh": 18, "lqvgglicagvlcamgiiivmsa": 18, "kckckfgqk": 18, "q86ue4": 18, "maarswqdelaqqaeegsarlremlsvglgflrtelgldlglepkr": 18, "72": 18, "lglepkrypg": 18, "wvilvgtgalgllllfllgygwa": 18, "aacagarkkr": 18, "p05067": 18, "mlpglallllaawtaralevptdgnagllaepqiamfcgrlnmhmn": 18, "701": 18, "723": 18, "faedvgsnkg": 18, "aiiglmvggvviatvivitlvml": 18, "kkkqytsihh": 18, "p14925": 18, "magrarsgllllllgllalqssclafrsplsvfkrfkettrsfsn": 18, "868": 18, "890": 18, "klstepgsgv": 18, "svvlittllvipvlvllaivmfi": 18, "rwkksrafgd": 18, "df_seq_pu": 18, "p12821": 18, "mgaasgrrgpglllplplllllppqpalaldpglqpgnfsadeaga": 18, "1257": 18, "1276": 18, "gldldaqqar": 18, "vgqwlllflgiallvatlgl": 18, "sqrlfsirhr": 18, "p36896": 18, "maesagassffplvvlllagsggsgprgvqallcactsclqanytc": 18, "127": 18, "149": 18, "ehpsmwgpv": 18, "lvgiiagpvfllfliiiivflvi": 18, "nyhqrvyhnr": 18, "six": 19, "origin": 19, "df_raw": 19, "df_pc": 19, "andn920101": 19, "argp820101": 19, "argp820102": 19, "argp820103": 19, "494": 19, "230": 19, "355": 19, "504": 19, "864": 19, "404": 19, "579": 19, "387": 19, "174": 19, "420": 19, "177": 19, "019": 19, "032": 19, "877": 19, "762": 19, "601": 19, "670": 19, "term": [19, 27], "lins030110": 19, "asa": [19, 27], "volum": [19, 27], "surfac": [19, 27], "fold": [19, 27], "coil": [19, 27], "turn": [19, 27], "median": 19, "resi": 19, "lins030113": 19, "janj780101": 19, "janin": [19, 27], "et": [19, 26, 27], "al": [19, 26, 27], "janj780103": 19, "expos": [19, 21, 27], "lins030104": 19, "stem": 19, "top60_id": 19, "acc": 19, "presenc": [19, 27], "absenc": [19, 27], "df_top60": 19, "aac01": 19, "aac02": 19, "aac03": 19, "aac04": 19, "aac05": 19, "df_eval": 19, "overal": 19, "761": 19, "827": 19, "732": 19, "746": 19, "747": 19, "830": 19, "733": 19, "742": 19, "741": 19, "829": 19, "734": 19, "828": 19, "731": 19, "739": 19, "735": 19, "752": 19, "df_cat_1": 19, "df_raw_1": 19, "df_scales_1": 19, "selected_scal": 19, "tolist": 19, "df_aac1": 19, "exclud": 19, "subordin": 19, "dpulearn": [20, 23, 24, 25], "train": [20, 21, 24, 25, 32], "moreov": [20, 25], "load_data": [20, 25], "pypi": 20, "conda": [20, 21], "forg": 20, "pip": [20, 21], "introduct": 20, "usag": [20, 21, 24], "contribut": [20, 27], "api": [20, 21], "explain": [20, 21, 26, 28], "ai": [20, 21, 26, 28], "perturb": [20, 32], "modul": 20, "search": 20, "page": 20, "work": [20, 23], "pleas": [20, 21, 23], "cite": [20, 23], "_": [20, 23], "breimann": [20, 23, 26], "kamp": [20, 23], "steiner": [20, 23], "frishman": [20, 23], "2023": [20, 23], "ontologi": [20, 23, 26], "biorxiv": [20, 23, 26], "welcom": 21, "thank": 21, "open": 21, "project": [21, 27], "focus": 21, "involv": 21, "invalu": 21, "made": 21, "wai": 21, "file": 21, "github": 21, "issu": 21, "tracker": 21, "submit": 21, "particip": [21, 27], "newcom": 21, "tackl": 21, "email": 21, "stephanbreimann": 21, "gmail": 21, "com": 21, "question": 21, "comprehens": 21, "robust": 21, "life": [21, 32, 33], "scienc": [21, 32, 33], "seamlessli": 21, "flexibl": [21, 27], "interoper": 21, "biopython": 21, "reimplement": 21, "exist": [21, 32], "solut": 21, "biolog": [21, 24, 27, 32], "context": 21, "relianc": 21, "opaqu": 21, "box": 21, "empir": 21, "insight": 21, "cut": 21, "fair": 21, "transpar": 21, "re": [21, 26], "commit": 21, "divers": 21, "aspect": 21, "causal": 21, "minim": 21, "reproduc": 21, "mre": 21, "amount": 21, "demonstr": 21, "self": 21, "necessari": 21, "confirm": 21, "replic": 21, "guidelin": 21, "To": [21, 28], "git": 21, "http": 21, "breimanntool": 21, "master": 21, "repositori": 21, "your_usernam": 21, "navig": 21, "folder": 21, "up": 21, "cd": 21, "isol": 21, "activ": [21, 27], "poetri": 21, "pytest": 21, "hypothesi": 21, "execut": 21, "case": 21, "directori": 21, "out": [21, 27], "readm": 21, "command": 21, "cheat": 21, "sheet": [21, 27], "substanti": 21, "minor": 21, "typo": 21, "concis": 21, "branch": [21, 27], "fix": 21, "date": 21, "readthedoc": 21, "org": 21, "crucial": 21, "modif": 21, "render": 21, "correctli": 21, "strive": 21, "well": 21, "codebas": 21, "standalon": 21, "special": 21, "carri": 21, "complet": 21, "process": 21, "fulfil": 21, "purpos": 21, "inherit": 21, "supplementari": 21, "accordingli": 21, "cppplot": 21, "semi": 21, "strictli": 21, "adher": 21, "aforement": 21, "primari": [21, 30], "_util": 21, "_utils_const": 21, "py": 21, "modular": 21, "therefor": 21, "flat": 21, "hierarchi": 21, "outlin": 21, "user": 21, "friendli": 21, "hint": 21, "enhanc": [21, 27], "propos": 21, "pep": 21, "484": 21, "book": 21, "error": 21, "messag": 21, "docstr": 21, "257": 21, "markup": 21, "languag": 21, "restructuredtext": 21, "rst": 21, "primer": 21, "restructuretext": 21, "cheatsheet": 21, "sphinx": 21, "autodoc": 21, "inclus": 21, "napoleon": 21, "extens": 21, "conf": 21, "bird": 21, "ey": 21, "background": 21, "medium": [21, 27], "tabular": 21, "critic": 21, "except": 21, "rule": 21, "showcas": 21, "scientif": 21, "mai": 21, "mention": 21, "section": 21, "extern": 21, "note": 21, "go": 21, "html": 21, "_build": 21, "browser": 21, "below": 21, "blank": 21, "OF": 21, "ONE": 21, "complex": 21, "At": 21, "intric": 21, "do": 21, "placehold": 21, "incomplet": 21, "potenti": [21, 27], "expect": 21, "30": 21, "150": 21, "remind": 21, "token": 21, "truncat": 21, "respons": 21, "simpli": 21, "ask": 21, "someth": 21, "repeat": 21, "compil": 21, "done": 21, "script": 21, "leverag": 21, "struggl": 21, "produc": 21, "erron": 21, "often": [21, 32], "ambigu": 21, "logic": 21, "address": 21, "intuit": 21, "through": 21, "signatur": [21, 28], "behavior": 21, "deeper": 21, "intricaci": 21, "citat": 23, "develop": 24, "practic": 24, "2023a": 26, "2023b": 26, "breimann23c": [26, 27], "2023c": 26, "chart": 26, "cheng06": [26, 27], "cheng": 26, "2006": 26, "larg": 26, "disulphid": 26, "bridg": [26, 27], "kernel": 26, "neural": 26, "network": 26, "graph": [26, 27], "struct": 26, "funct": 26, "kawashima": 26, "2008": 26, "aid": 26, "databas": 26, "report": 26, "nucleic": 26, "magnan09": [26, 27], "magnan": 26, "randal": 26, "baldi": 26, "2009": [26, 27], "accur": 26, "solubl": [26, 27], "bioinformat": 26, "galiez16": [26, 27], "galiez": 26, "2016": [26, 27], "viral": 26, "capsid": [26, 27], "tail": [26, 27], "song18": [26, 27], "song": 26, "2018": 26, "throughput": 26, "cleavag": [26, 27], "site": [26, 27], "90": 26, "proteas": 26, "shen19": [26, 27], "shen": 26, "2019": 26, "subcellular": [26, 27], "local": [26, 27], "evolutionari": 26, "chou": [26, 27], "pseaac": 26, "j": 26, "theor": 26, "biol": 26, "tang20": [26, 27], "tang": 26, "2020": 26, "intrins": [26, 27], "disord": [26, 27], "teng21": [26, 27], "teng": 26, "2021": 26, "amyloidogen": [26, 27], "pseudo": 26, "composit": [26, 27], "tripeptid": 26, "bmc": 26, "yang21": [26, 27], "yang": 26, "granular": 26, "multipl": 26, "rna": [26, 27], "bind": [26, 27], "appl": 26, "chronolog": 27, "histori": 27, "t1_overview_benchmark": 27, "t2_overview_scal": 27, "t3a_aaontology_categori": 27, "t3b_aaontology_subcategori": 27, "begin": 27, "append": 27, "caspas": 27, "furin": 27, "long": 27, "ldr": 27, "metallopeptidas": 27, "mmp2": 27, "rbp60": 27, "solvent": 27, "sa": 27, "buri": 27, "amyloidognen": 27, "capdsid": 27, "disulfid": 27, "ss": 27, "bond": 27, "cytoplasm": 27, "v": 27, "plasma": 27, "insolubl": 27, "694": 27, "494524": 27, "unknown": 27, "statu": 27, "586": 27, "tier": 27, "system": 27, "systemat": 27, "arrang": 27, "67": 27, "everi": 27, "clearli": 27, "assess": 27, "couldn": 27, "alloc": 27, "regard": 27, "prefer": 27, "chothia": 27, "1976": 27, "lin": 27, "2003": 27, "64": 27, "occurr": 27, "cellular": 27, "mitochondria": 27, "nakashima": 27, "1990": 27, "nishikawa": 27, "1992": 27, "conform": 27, "\u03b1": 27, "helix": 27, "\u03b2": 27, "strand": 27, "ranodm": 27, "tanaka": 27, "scheraga": 27, "1977": 27, "fasman": 27, "1978b": 27, "richardson": 27, "1988": 27, "qian": 27, "sejnowski": 27, "aurora": 27, "rose": 27, "1998": 27, "224": 27, "19": 27, "24": 27, "energi": 27, "charg": 27, "entropi": 27, "charton": 27, "1983": 27, "gui": 27, "1985": 27, "radzicka": 27, "wolfenden": 27, "36": 27, "could": 27, "mutabl": 27, "sneath": 27, "1966": 27, "17": 27, "polar": 27, "hydrophob": 27, "hydrophil": 27, "amphiphil": 27, "kyte": 27, "doolittl": 27, "1982": 27, "mitaku": 27, "2002": 27, "koehler": 27, "111": 27, "steric": 27, "chain": 27, "angl": 27, "symmetri": 27, "represent": 27, "eccentr": 27, "prabhakaran": 27, "ponnuswami": 27, "karkbara": 27, "knislei": 27, "45": 27, "stabil": 27, "backbon": 27, "dynam": 27, "vihinen": 27, "1994": 27, "bastolla": 27, "2005": 27, "31": 27, "water": 27, "tendenc": 27, "oppos": 27, "1978": 27, "partial": 27, "displac": 27, "caus": 27, "interact": 27, "mainli": 27, "ones": 27, "bull": 27, "brees": 27, "1974": 27, "bigelow": 27, "1967": 27, "jone": 27, "dayhoff": 27, "interior": 27, "unpolar": 27, "fukuchi": 27, "2001": 27, "mp": 27, "cedano": 27, "1997": 27, "mitochondri": 27, "less": 27, "val": 27, "cf": 27, "cap": 27, "propens": 27, "asp": 27, "glu": 27, "ly": 27, "arg": 27, "observ": 27, "character": 27, "punta": 27, "maritan": 27, "robson": 27, "suzuki": 27, "linker": 27, "georg": 27, "heringa": 27, "2004": 27, "helic": 27, "half": 27, "finkelstein": 27, "1991": 27, "outsid": 27, "insid": 27, "befor": 27, "geisow": 27, "robert": 27, "1980": 27, "ramachandran": 27, "state": 27, "quadrant": 27, "bottom": 27, "paul": 27, "1951": 27, "antiparallel": 27, "lifson": 27, "sander": 27, "1979": 27, "bend": 27, "revers": 27, "tight": 27, "consecut": 27, "180": 27, "back": 27, "hydrogen": 27, "3rd": 27, "4th": 27, "1st": 27, "2nd": 27, "r": 27, "tm": 27, "place": 27, "monn\u00e9": 27, "1999": 27, "\u03c0": 27, "ala": 27, "gln": 27, "fodj": 27, "karadaghi": 27, "net": 27, "donor": 27, "transfer": 27, "klein": 27, "1984": 27, "acceptor": 27, "faucher": 27, "hi": 27, "electron": 27, "ion": 27, "pot": 27, "valenc": 27, "chemic": 27, "cosic": 27, "low": 27, "due": 27, "strong": 27, "hutchen": 27, "1970": 27, "unfold": 27, "gibb": 27, "denatur": 27, "yutani": 27, "1987": 27, "instabl": 27, "highest": 27, "break": 27, "pro": 27, "munoz": 27, "serrano": 27, "isoelectr": 27, "ph": 27, "electr": 27, "neutral": 27, "zimmerman": 27, "1968": 27, "16": 27, "crystal": 27, "pairwis": 27, "constitu": 27, "atom": 27, "lennard": 27, "oobatak": 27, "ooi": 27, "rel": 27, "chang": 27, "divid": 27, "aliphat": 27, "linear": 27, "aromat": 27, "carbon": 27, "approxim": 27, "invers": 27, "reactiv": 27, "hydroxythiol": 27, "wold": 27, "occur": 27, "esp": 27, "amphipath": 27, "highli": 27, "signal": 27, "argo": 27, "cornett": 27, "38": 27, "environ": 27, "eisenberg": 27, "mclachlan": 27, "1986": 27, "surround": 27, "angstrom": 27, "radiu": 27, "pack": 27, "globular": 27, "1981": 27, "28": 27, "eigenvalu": 27, "laplacian": 27, "undirect": 27, "node": 27, "mass": 27, "molecular": 27, "second": 27, "actual": 27, "root": 27, "squar": 27, "gyrat": 27, "farther": 27, "awai": 27, "rackovski": 27, "relationship": 27, "rate": 27, "shift": 27, "bundi": 27, "wuthrich": 27, "nh": 27, "temperatur": 27, "rigid": 27, "gly": 27, "ser": 27, "particularli": 27, "ptitsyn": 27, "zhou": 27, "equilibrium": 27, "sueki": 27, "flow": 28, "enri": 28, "introduc": 29, "diagram": 30, "platform": 31, "novel": 31, "everywher": [32, 33], "setup": 32, "augment": 32, "smote": 32, "artifici": 32, "Such": 32, "veri": 32, "deep": 32, "imag": 32, "recognit": 32, "feasibl": 32, "becaus": 32, "slight": 32, "mutat": 32, "alter": 32, "dramat": 32, "great": 32, "quantiti": 32, "besid": 32, "distinguish": 32, "subfield": 32, "quick": 34, "slow": 34}, "objects": {"aaanalysis": [[1, 0, 1, "", "AAclust"], [2, 0, 1, "", "AAclustPlot"], [3, 0, 1, "", "CPP"], [4, 0, 1, "", "CPPPlot"], [5, 0, 1, "", "SequenceFeature"], [6, 0, 1, "", "dPULearn"], [7, 3, 1, "", "load_dataset"], [8, 3, 1, "", "load_scales"], [9, 3, 1, "", "plot_gcfs"], [10, 3, 1, "", "plot_get_cdict"], [11, 3, 1, "", "plot_get_clist"], [12, 3, 1, "", "plot_get_cmap"], [13, 3, 1, "", "plot_legend"], [14, 3, 1, "", "plot_settings"]], "aaanalysis.AAclust": [[1, 1, 1, "", "__init__"], [1, 2, 1, "", "centers_"], [1, 1, 1, "", "comp_centers"], [1, 1, 1, "", "comp_correlation"], [1, 1, 1, "", "comp_coverage"], [1, 1, 1, "", "comp_medoids"], [1, 1, 1, "", "eval"], [1, 1, 1, "", "fit"], [1, 2, 1, "", "is_medoid_"], [1, 2, 1, "", "labels_"], [1, 2, 1, "", "labels_centers_"], [1, 2, 1, "", "labels_medoids_"], [1, 2, 1, "", "medoid_names_"], [1, 2, 1, "", "medoids_"], [1, 2, 1, "", "model"], [1, 2, 1, "", "n_clusters"], [1, 1, 1, "", "name_clusters"]], "aaanalysis.AAclustPlot": [[2, 1, 1, "", "__init__"], [2, 1, 1, "", "center"], [2, 1, 1, "", "correlation"], [2, 1, 1, "", "eval"], [2, 1, 1, "", "medoids"]], "aaanalysis.CPP": [[3, 1, 1, "", "__init__"], [3, 1, 1, "", "eval"], [3, 1, 1, "", "run"]], "aaanalysis.CPPPlot": [[4, 1, 1, "", "__init__"], [4, 1, 1, "", "heatmap"], [4, 1, 1, "", "profile"], [4, 1, 1, "", "update_seq_size"]], "aaanalysis.SequenceFeature": [[5, 1, 1, "", "__init__"], [5, 1, 1, "", "add_dif"], [5, 1, 1, "", "add_feat_value"], [5, 1, 1, "", "add_position"], [5, 1, 1, "", "feat_matrix"], [5, 1, 1, "", "feat_names"], [5, 1, 1, "", "get_df_parts"], [5, 1, 1, "", "get_features"], [5, 1, 1, "", "get_split_kws"]], "aaanalysis.dPULearn": [[6, 1, 1, "", "__init__"], [6, 1, 1, "", "eval"], [6, 1, 1, "", "fit"], [6, 2, 1, "", "labels_"]]}, "objtypes": {"0": "py:class", "1": "py:method", "2": "py:attribute", "3": "py:function"}, "objnames": {"0": ["py", "class", "Python class"], "1": ["py", "method", "Python method"], "2": ["py", "attribute", "Python attribute"], "3": ["py", "function", "Python function"]}, "titleterms": {"api": 0, "data": [0, 18, 30, 32, 34], "handl": [0, 34], "featur": [0, 17, 34], "engin": [0, 17, 34], "pu": [0, 18, 32, 34], "learn": [0, 17, 32, 34], "explain": [0, 17, 33, 34], "ai": [0, 17, 33, 34], "perturb": 0, "plot": [0, 15], "util": 0, "aaanalysi": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16, 17, 20, 30], "aaclust": [1, 17], "note": [1, 2, 3, 5, 6, 7, 8, 13], "aaclustplot": 2, "cpp": [3, 17, 31], "cppplot": 4, "exampl": [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 20], "sequencefeatur": 5, "dpulearn": 6, "load_dataset": 7, "load_scal": 8, "plot_gcf": 9, "plot_get_cdict": 10, "plot_get_clist": 11, "plot_get_cmap": 12, "plot_legend": 13, "plot_set": 14, "prelud": 15, "quick": 16, "start": [16, 17, 34], "slow": 17, "what": [17, 32, 33], "you": 17, "Will": 17, "1": 17, "load": [17, 18, 19], "sequenc": [17, 33], "scale": [17, 19, 27, 29], "2": 17, "compar": 17, "physicochem": [17, 31], "profil": 17, "3": 17, "protein": [17, 18, 27], "predict": 17, "4": 17, "group": 17, "level": [17, 33], "individu": 17, "tutori": [18, 19, 34], "benchmark": [18, 26, 27], "amino": [18, 19, 27, 29], "acid": [18, 19, 27, 29], "window": 18, "size": 18, "posit": 18, "unlabel": 18, "dataset": [18, 26, 27], "three": 19, "set": 19, "numer": 19, "aaontologi": [19, 27, 29], "redund": 19, "reduc": 19, "subset": 19, "filter": 19, "welcom": 20, "document": [20, 21, 24], "instal": [20, 21], "overview": [20, 24, 27], "refer": [20, 26], "indic": 20, "tabl": [20, 27], "citat": 20, "contribut": 21, "introduct": [21, 24], "vision": 21, "object": 21, "non": 21, "goal": 21, "principl": [21, 28], "bug": 21, "report": 21, "latest": 21, "version": 21, "local": 21, "develop": 21, "environ": 21, "fork": 21, "clone": 21, "depend": 21, "run": 21, "unit": 21, "test": 21, "pull": 21, "request": 21, "preview": 21, "chang": 21, "name": 21, "convent": 21, "class": 21, "templat": 21, "function": 21, "method": 21, "code": 21, "philosophi": 21, "style": 21, "layer": 21, "build": 21, "doc": 21, "chatgpt": 21, "guid": 21, "tgd": 21, "workflow": 24, "algorithm": 26, "us": [26, 31], "case": 26, "further": 26, "inform": 26, "categori": 27, "subcategori": 27, "usag": 28, "classif": 29, "flow": 30, "enri": 30, "point": 30, "compon": 30, "entri": 30, "bridg": 30, "extern": 30, "librari": 30, "identifi": 31, "signatur": 31, "from": 32, "unbalanc": 32, "small": 32, "i": [32, 33], "get": 34}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 8, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.intersphinx": 1, "nbsphinx": 4, "sphinx": 57}, "alltitles": {"API": [[0, "api"]], "Data Handling": [[0, "data-handling"], [34, "data-handling"]], "Feature Engineering": [[0, "feature-engineering"], [34, "feature-engineering"]], "PU Learning": [[0, "pu-learning"], [34, "pu-learning"]], "Explainable AI": [[0, "explainable-ai"], [34, "explainable-ai"]], "Perturbation": [[0, "perturbation"]], "Plot Utilities": [[0, "plot-utilities"]], "aaanalysis.AAclust": [[1, "aaanalysis-aaclust"]], "Notes": [[1, null], [1, null], [1, null], [1, null], [2, null], [2, null], [3, null], [3, null], [5, null], [5, null], [5, null], [5, null], [5, null], [6, null], [6, null], [7, null], [8, null], [13, null]], "aaanalysis.AAclustPlot": [[2, "aaanalysis-aaclustplot"]], "aaanalysis.CPP": [[3, "aaanalysis-cpp"]], "aaanalysis.CPPPlot": [[4, "aaanalysis-cppplot"]], "Examples": [[4, null], [5, null], [5, null], [6, null], [7, null], [8, null], [9, null], [10, null], [11, null], [12, null], [13, null], [14, null]], "aaanalysis.SequenceFeature": [[5, "aaanalysis-sequencefeature"]], "aaanalysis.dPULearn": [[6, "aaanalysis-dpulearn"]], "aaanalysis.load_dataset": [[7, "aaanalysis-load-dataset"]], "aaanalysis.load_scales": [[8, "aaanalysis-load-scales"]], "aaanalysis.plot_gcfs": [[9, "aaanalysis-plot-gcfs"]], "aaanalysis.plot_get_cdict": [[10, "aaanalysis-plot-get-cdict"]], "aaanalysis.plot_get_clist": [[11, "aaanalysis-plot-get-clist"]], "aaanalysis.plot_get_cmap": [[12, "aaanalysis-plot-get-cmap"]], "aaanalysis.plot_legend": [[13, "aaanalysis-plot-legend"]], "aaanalysis.plot_settings": [[14, "aaanalysis-plot-settings"]], "Plotting Prelude": [[15, "plotting-prelude"]], "Quick Start with AAanalysis": [[16, "quick-start-with-aaanalysis"]], "Slow Start with AAanalysis": [[17, "slow-start-with-aaanalysis"]], "What You Will Learn:": [[17, "what-you-will-learn"]], "1. Loading Sequences and Scales": [[17, "loading-sequences-and-scales"]], "2. Feature Engineering": [[17, "feature-engineering"]], "AAclust": [[17, "aaclust"]], "Comparative Physicochemical Profiling (CPP)": [[17, "comparative-physicochemical-profiling-cpp"]], "3. Protein Prediction": [[17, "protein-prediction"]], "4. Explainable AI": [[17, "explainable-ai"]], "Explainable AI on group level": [[17, "explainable-ai-on-group-level"]], "Explainable AI on individual level": [[17, "explainable-ai-on-individual-level"]], "Data Loading Tutorial": [[18, "data-loading-tutorial"]], "Loading of protein benchmarks": [[18, "loading-of-protein-benchmarks"]], "Loading of protein benchmarks: Amino acid window size": [[18, "loading-of-protein-benchmarks-amino-acid-window-size"]], "Loading of protein benchmarks: Positive-Unlabeled (PU) datasets": [[18, "loading-of-protein-benchmarks-positive-unlabeled-pu-datasets"]], "Scale Loading Tutorial": [[19, "scale-loading-tutorial"]], "Three sets of numerical amino acid scales": [[19, "three-sets-of-numerical-amino-acid-scales"]], "AAontology": [[19, "aaontology"], [27, "aaontology"]], "Redundancy-reduce scale subsets": [[19, "redundancy-reduce-scale-subsets"]], "Filtering of scales": [[19, "filtering-of-scales"]], "Welcome to the AAanalysis documentation!": [[20, "welcome-to-the-aaanalysis-documentation"]], "Install": [[20, "install"]], "OVERVIEW": [[20, null]], "EXAMPLES": [[20, null]], "REFERENCES": [[20, null]], "Indices and tables": [[20, "indices-and-tables"]], "Citation": [[20, "citation"]], "Contributing": [[21, "contributing"]], "Introduction": [[21, "introduction"], [24, "introduction"]], "Vision": [[21, "vision"]], "Objectives": [[21, "objectives"]], "Non-goals": [[21, "non-goals"]], "Principles": [[21, "principles"]], "Bug Reports": [[21, "bug-reports"]], "Installation": [[21, "installation"]], "Latest Version": [[21, "latest-version"]], "Local Development Environment": [[21, "local-development-environment"]], "Fork and Clone": [[21, "fork-and-clone"]], "Install Dependencies": [[21, "install-dependencies"]], "Run Unit Tests": [[21, "run-unit-tests"]], "Pull Requests": [[21, "pull-requests"]], "Preview Changes": [[21, "preview-changes"]], "Documentation": [[21, "documentation"]], "Naming Conventions": [[21, "naming-conventions"]], "Class Templates": [[21, "class-templates"]], "Function and Method Naming": [[21, "function-and-method-naming"]], "Code Philosophy": [[21, "code-philosophy"]], "Documentation Style": [[21, "documentation-style"]], "Documentation Layers": [[21, "documentation-layers"]], "Building the Docs": [[21, "building-the-docs"]], "Test with ChatGPT": [[21, "test-with-chatgpt"]], "Test Guided Development (TGD)": [[21, "test-guided-development-tgd"]], "Workflow": [[24, "workflow"]], "Overview of documentation": [[24, "overview-of-documentation"]], "References": [[26, "references"]], "Algorithms": [[26, "algorithms"]], "Datasets and Benchmarks": [[26, "datasets-and-benchmarks"]], "Use Cases": [[26, "use-cases"]], "Further Information": [[26, "further-information"]], "Tables": [[27, "tables"]], "Overview Table": [[27, "overview-table"]], "Protein Benchmark Datasets": [[27, "protein-benchmark-datasets"]], "Amino Acid Scale Datasets": [[27, "amino-acid-scale-datasets"]], "Categories": [[27, "categories"]], "Subcategories": [[27, "subcategories"]], "Usage Principles": [[28, "usage-principles"]], "AAontology: Classification of amino acid scales": [[29, "aaontology-classification-of-amino-acid-scales"]], "Data Flow and Enry Points": [[30, "data-flow-and-enry-points"]], "Data Flow: Components of AAanalysis": [[30, "data-flow-components-of-aaanalysis"]], "Entry Points: Bridges to External Libraries": [[30, "entry-points-bridges-to-external-libraries"]], "Identifying Physicochemical Signatures using CPP": [[31, "identifying-physicochemical-signatures-using-cpp"]], "Learning from unbalanced and small data": [[32, "learning-from-unbalanced-and-small-data"]], "What is PU learning?": [[32, "what-is-pu-learning"]], "Explainable AI at Sequence Level": [[33, "explainable-ai-at-sequence-level"]], "What is explainable AI?": [[33, "what-is-explainable-ai"]], "Tutorials": [[34, "tutorials"]], "Getting Started": [[34, "getting-started"]]}, "indexentries": {"aaclust (class in aaanalysis)": [[1, "aaanalysis.AAclust"]], "__init__() (aaanalysis.aaclust method)": [[1, "aaanalysis.AAclust.__init__"]], "centers_ (aaanalysis.aaclust attribute)": [[1, "aaanalysis.AAclust.centers_"]], "comp_centers() (aaanalysis.aaclust static method)": [[1, "aaanalysis.AAclust.comp_centers"]], "comp_correlation() (aaanalysis.aaclust static method)": [[1, "aaanalysis.AAclust.comp_correlation"]], "comp_coverage() (aaanalysis.aaclust static method)": [[1, "aaanalysis.AAclust.comp_coverage"]], "comp_medoids() (aaanalysis.aaclust static method)": [[1, "aaanalysis.AAclust.comp_medoids"]], "eval() (aaanalysis.aaclust method)": [[1, "aaanalysis.AAclust.eval"]], "fit() (aaanalysis.aaclust method)": [[1, "aaanalysis.AAclust.fit"]], "is_medoid_ (aaanalysis.aaclust attribute)": [[1, "aaanalysis.AAclust.is_medoid_"]], "labels_ (aaanalysis.aaclust attribute)": [[1, "aaanalysis.AAclust.labels_"]], "labels_centers_ (aaanalysis.aaclust attribute)": [[1, "aaanalysis.AAclust.labels_centers_"]], "labels_medoids_ (aaanalysis.aaclust attribute)": [[1, "aaanalysis.AAclust.labels_medoids_"]], "medoid_names_ (aaanalysis.aaclust attribute)": [[1, "aaanalysis.AAclust.medoid_names_"]], "medoids_ (aaanalysis.aaclust attribute)": [[1, "aaanalysis.AAclust.medoids_"]], "model (aaanalysis.aaclust attribute)": [[1, "aaanalysis.AAclust.model"]], "n_clusters (aaanalysis.aaclust attribute)": [[1, "aaanalysis.AAclust.n_clusters"]], "name_clusters() (aaanalysis.aaclust static method)": [[1, "aaanalysis.AAclust.name_clusters"]], "aaclustplot (class in aaanalysis)": [[2, "aaanalysis.AAclustPlot"]], "__init__() (aaanalysis.aaclustplot method)": [[2, "aaanalysis.AAclustPlot.__init__"]], "center() (aaanalysis.aaclustplot method)": [[2, "aaanalysis.AAclustPlot.center"]], "correlation() (aaanalysis.aaclustplot static method)": [[2, "aaanalysis.AAclustPlot.correlation"]], "eval() (aaanalysis.aaclustplot static method)": [[2, "aaanalysis.AAclustPlot.eval"]], "medoids() (aaanalysis.aaclustplot method)": [[2, "aaanalysis.AAclustPlot.medoids"]], "cpp (class in aaanalysis)": [[3, "aaanalysis.CPP"]], "__init__() (aaanalysis.cpp method)": [[3, "aaanalysis.CPP.__init__"]], "eval() (aaanalysis.cpp method)": [[3, "aaanalysis.CPP.eval"]], "run() (aaanalysis.cpp method)": [[3, "aaanalysis.CPP.run"]], "cppplot (class in aaanalysis)": [[4, "aaanalysis.CPPPlot"]], "__init__() (aaanalysis.cppplot method)": [[4, "aaanalysis.CPPPlot.__init__"]], "heatmap() (aaanalysis.cppplot method)": [[4, "aaanalysis.CPPPlot.heatmap"]], "profile() (aaanalysis.cppplot method)": [[4, "aaanalysis.CPPPlot.profile"]], "update_seq_size() (aaanalysis.cppplot method)": [[4, "aaanalysis.CPPPlot.update_seq_size"]], "sequencefeature (class in aaanalysis)": [[5, "aaanalysis.SequenceFeature"]], "__init__() (aaanalysis.sequencefeature method)": [[5, "aaanalysis.SequenceFeature.__init__"]], "add_dif() (aaanalysis.sequencefeature static method)": [[5, "aaanalysis.SequenceFeature.add_dif"]], "add_feat_value() (aaanalysis.sequencefeature static method)": [[5, "aaanalysis.SequenceFeature.add_feat_value"]], "add_position() (aaanalysis.sequencefeature static method)": [[5, "aaanalysis.SequenceFeature.add_position"]], "feat_matrix() (aaanalysis.sequencefeature static method)": [[5, "aaanalysis.SequenceFeature.feat_matrix"]], "feat_names() (aaanalysis.sequencefeature static method)": [[5, "aaanalysis.SequenceFeature.feat_names"]], "get_df_parts() (aaanalysis.sequencefeature static method)": [[5, "aaanalysis.SequenceFeature.get_df_parts"]], "get_features() (aaanalysis.sequencefeature method)": [[5, "aaanalysis.SequenceFeature.get_features"]], "get_split_kws() (aaanalysis.sequencefeature static method)": [[5, "aaanalysis.SequenceFeature.get_split_kws"]], "__init__() (aaanalysis.dpulearn method)": [[6, "aaanalysis.dPULearn.__init__"]], "dpulearn (class in aaanalysis)": [[6, "aaanalysis.dPULearn"]], "eval() (aaanalysis.dpulearn method)": [[6, "aaanalysis.dPULearn.eval"]], "fit() (aaanalysis.dpulearn method)": [[6, "aaanalysis.dPULearn.fit"]], "labels_ (aaanalysis.dpulearn attribute)": [[6, "aaanalysis.dPULearn.labels_"]], "load_dataset() (in module aaanalysis)": [[7, "aaanalysis.load_dataset"]], "load_scales() (in module aaanalysis)": [[8, "aaanalysis.load_scales"]], "plot_gcfs() (in module aaanalysis)": [[9, "aaanalysis.plot_gcfs"]], "plot_get_cdict() (in module aaanalysis)": [[10, "aaanalysis.plot_get_cdict"]], "plot_get_clist() (in module aaanalysis)": [[11, "aaanalysis.plot_get_clist"]], "plot_get_cmap() (in module aaanalysis)": [[12, "aaanalysis.plot_get_cmap"]], "plot_legend() (in module aaanalysis)": [[13, "aaanalysis.plot_legend"]], "plot_settings() (in module aaanalysis)": [[14, "aaanalysis.plot_settings"]]}}) \ No newline at end of file diff --git a/docs/build/plot_directive/generated/aaanalysis-plot_gcfs-1.pdf b/docs/build/plot_directive/generated/aaanalysis-plot_gcfs-1.pdf index 4a3b6e47..a38292fd 100644 Binary files a/docs/build/plot_directive/generated/aaanalysis-plot_gcfs-1.pdf and b/docs/build/plot_directive/generated/aaanalysis-plot_gcfs-1.pdf differ diff --git a/docs/build/plot_directive/generated/aaanalysis-plot_get_cdict-1.pdf b/docs/build/plot_directive/generated/aaanalysis-plot_get_cdict-1.pdf index e61bd6d3..cd93d28b 100644 Binary files a/docs/build/plot_directive/generated/aaanalysis-plot_get_cdict-1.pdf and b/docs/build/plot_directive/generated/aaanalysis-plot_get_cdict-1.pdf differ diff --git a/docs/build/plot_directive/generated/aaanalysis-plot_get_clist-1.pdf b/docs/build/plot_directive/generated/aaanalysis-plot_get_clist-1.pdf index fd37155f..1b940746 100644 Binary files a/docs/build/plot_directive/generated/aaanalysis-plot_get_clist-1.pdf and b/docs/build/plot_directive/generated/aaanalysis-plot_get_clist-1.pdf differ diff --git a/docs/build/plot_directive/generated/aaanalysis-plot_get_cmap-1.pdf b/docs/build/plot_directive/generated/aaanalysis-plot_get_cmap-1.pdf index 75829d12..1d33e4a7 100644 Binary files a/docs/build/plot_directive/generated/aaanalysis-plot_get_cmap-1.pdf and b/docs/build/plot_directive/generated/aaanalysis-plot_get_cmap-1.pdf differ diff --git a/docs/build/plot_directive/generated/aaanalysis-plot_legend-1.pdf b/docs/build/plot_directive/generated/aaanalysis-plot_legend-1.pdf index 7562902d..28562a1c 100644 Binary files a/docs/build/plot_directive/generated/aaanalysis-plot_legend-1.pdf and b/docs/build/plot_directive/generated/aaanalysis-plot_legend-1.pdf differ diff --git a/docs/build/plot_directive/generated/aaanalysis-plot_settings-1.pdf b/docs/build/plot_directive/generated/aaanalysis-plot_settings-1.pdf index d454887d..3abf57ee 100644 Binary files a/docs/build/plot_directive/generated/aaanalysis-plot_settings-1.pdf and b/docs/build/plot_directive/generated/aaanalysis-plot_settings-1.pdf differ diff --git a/docs/build/plot_directive/generated/aaanalysis-plot_settings-2.pdf b/docs/build/plot_directive/generated/aaanalysis-plot_settings-2.pdf index c8addcb9..ddfa2263 100644 Binary files a/docs/build/plot_directive/generated/aaanalysis-plot_settings-2.pdf and b/docs/build/plot_directive/generated/aaanalysis-plot_settings-2.pdf differ diff --git a/tests/unit/.hypothesis/unicode_data/13.0.0/codec-utf-8.json.gz b/tests/unit/.hypothesis/unicode_data/13.0.0/codec-utf-8.json.gz index fb10800e..fe5d3368 100644 Binary files a/tests/unit/.hypothesis/unicode_data/13.0.0/codec-utf-8.json.gz and b/tests/unit/.hypothesis/unicode_data/13.0.0/codec-utf-8.json.gz differ