diff --git a/aaanalysis/data_loader/__pycache__/data_loader.cpython-39.pyc b/aaanalysis/data_loader/__pycache__/data_loader.cpython-39.pyc index a98bb70a..f21124e9 100644 Binary files a/aaanalysis/data_loader/__pycache__/data_loader.cpython-39.pyc and b/aaanalysis/data_loader/__pycache__/data_loader.cpython-39.pyc differ diff --git a/aaanalysis/data_loader/data_loader.py b/aaanalysis/data_loader/data_loader.py index 679295a5..2e6105a5 100644 --- a/aaanalysis/data_loader/data_loader.py +++ b/aaanalysis/data_loader/data_loader.py @@ -185,10 +185,6 @@ def load_dataset(name: str = "INFO", DataFrame Dataframe (df_seq) containing the selected sequence dataset. - See also - -------- - See an overview of all benchmarks in :ref:`1_overview_benchmarks` and a detailed usage tutorial in the - `data loader tutorial `_. Examples -------- diff --git a/docs/build/doctrees/environment.pickle b/docs/build/doctrees/environment.pickle index a758af57..cab7f05a 100644 Binary files a/docs/build/doctrees/environment.pickle and b/docs/build/doctrees/environment.pickle differ diff --git a/docs/build/doctrees/generated/aaanalysis.load_dataset.doctree b/docs/build/doctrees/generated/aaanalysis.load_dataset.doctree index 0b0bda18..06c8532e 100644 Binary files a/docs/build/doctrees/generated/aaanalysis.load_dataset.doctree and b/docs/build/doctrees/generated/aaanalysis.load_dataset.doctree differ diff --git a/docs/build/doctrees/generated/aaanalysis.load_scales.doctree b/docs/build/doctrees/generated/aaanalysis.load_scales.doctree index 7fbed04b..f9f854e3 100644 Binary files a/docs/build/doctrees/generated/aaanalysis.load_scales.doctree and b/docs/build/doctrees/generated/aaanalysis.load_scales.doctree differ diff --git a/docs/build/html/generated/aaanalysis.load_dataset.html b/docs/build/html/generated/aaanalysis.load_dataset.html index 7df59d0d..b314b45c 100644 --- a/docs/build/html/generated/aaanalysis.load_dataset.html +++ b/docs/build/html/generated/aaanalysis.load_dataset.html @@ -120,7 +120,7 @@

aaanalysis.load_dataset

-aaanalysis.load_dataset(name='INFO', n=None, random=False, non_canonical_aa='remove', min_len=None, max_len=None, aa_window_size=9)[source]
+aaanalysis.load_dataset(name='INFO', n=None, random=False, non_canonical_aa='remove', min_len=None, max_len=None, aa_window_size=9)[source]

Load protein benchmarking datasets.

The benchmarks are categorized into amino acid (‘AA’), domain (‘DOM’), and sequence (‘SEQ’) level datasets. Use default settings (name='INFO') for an overview table. Detailed analysis is in [Breimann23a].

@@ -149,11 +149,6 @@

aaanalysis.load_dataset

DataFrame

-
-

See also

-

See an overview of all benchmarks in Amino acid scale datasets and a detailed usage tutorial in the -data loader tutorial.

-

Examples

>>> import aaanalysis as aa
diff --git a/docs/build/html/generated/aaanalysis.load_scales.html b/docs/build/html/generated/aaanalysis.load_scales.html
index 67fb3968..36bdd380 100644
--- a/docs/build/html/generated/aaanalysis.load_scales.html
+++ b/docs/build/html/generated/aaanalysis.load_scales.html
@@ -120,7 +120,7 @@
 

aaanalysis.load_scales

-aaanalysis.load_scales(name='scales', just_aaindex=False, unclassified_in=True)[source]
+aaanalysis.load_scales(name='scales', just_aaindex=False, unclassified_in=True)[source]

Load amino acid scales, scale classification (AAontology), or scale evaluation.

A thorough analysis of the residue and sequence datasets can be found in [Breimann23a].

diff --git a/docs/build/html/searchindex.js b/docs/build/html/searchindex.js index 877333e5..370f01b4 100644 --- a/docs/build/html/searchindex.js +++ b/docs/build/html/searchindex.js @@ -1 +1 @@ -Search.setIndex({"docnames": ["api", "generated/aaanalysis.AAclust", "generated/aaanalysis.CPP", "generated/aaanalysis.CPPPlot", "generated/aaanalysis.SequenceFeature", "generated/aaanalysis.dPULearn", "generated/aaanalysis.load_dataset", "generated/aaanalysis.load_scales", "generated/aaanalysis.plot_gcfs", "generated/aaanalysis.plot_get_cdict", "generated/aaanalysis.plot_get_cmap", "generated/aaanalysis.plot_set_legend", "generated/aaanalysis.plot_settings", "index", "index/CONTRIBUTING_COPY", "index/badges", "index/citations", "index/introduction", "index/overview", "index/references", "index/tables", "index/usage_principles/aaontology", "index/usage_principles/data_flow_entry_points", "index/usage_principles/feature_identification", "index/usage_principles/pu_learning", "index/usage_principles/xai", "tables_template", "tutorials", "usage_principles"], "filenames": ["api.rst", "generated/aaanalysis.AAclust.rst", "generated/aaanalysis.CPP.rst", "generated/aaanalysis.CPPPlot.rst", "generated/aaanalysis.SequenceFeature.rst", "generated/aaanalysis.dPULearn.rst", "generated/aaanalysis.load_dataset.rst", "generated/aaanalysis.load_scales.rst", "generated/aaanalysis.plot_gcfs.rst", "generated/aaanalysis.plot_get_cdict.rst", "generated/aaanalysis.plot_get_cmap.rst", "generated/aaanalysis.plot_set_legend.rst", "generated/aaanalysis.plot_settings.rst", "index.rst", "index/CONTRIBUTING_COPY.rst", "index/badges.rst", "index/citations.rst", "index/introduction.rst", "index/overview.rst", "index/references.rst", "index/tables.rst", "index/usage_principles/aaontology.rst", "index/usage_principles/data_flow_entry_points.rst", "index/usage_principles/feature_identification.rst", "index/usage_principles/pu_learning.rst", "index/usage_principles/xai.rst", "tables_template.rst", "tutorials.rst", "usage_principles.rst"], "titles": ["API", "aaanalysis.AAclust", "aaanalysis.CPP", "aaanalysis.CPPPlot", "aaanalysis.SequenceFeature", "aaanalysis.dPULearn", "aaanalysis.load_dataset", "aaanalysis.load_scales", "aaanalysis.plot_gcfs", "aaanalysis.plot_get_cdict", "aaanalysis.plot_get_cmap", "aaanalysis.plot_set_legend", "aaanalysis.plot_settings", "Welcome to the AAanalysis documentation", "Contributing", "<no title>", "<no title>", "Introduction", "<no title>", "References", "Tables", "AAontology: Classification of amino acid scales", "Data Flow and Enry Points", "Identifying Physicochemical Signatures using CPP", "Learning from unbalanced and small data", "Explainable AI at Sequence Level", "Tables", "Tutorials", "Usage Principles"], "terms": {"thi": [0, 1, 3, 12, 14], "page": [0, 13], "contain": [0, 2, 3, 5, 6, 14, 20, 22, 24, 26], "refer": [0, 1, 2, 4, 14, 20], "public": [0, 13, 14, 16], "object": [0, 1, 3, 4, 5], "function": [0, 3, 8, 10, 12, 13, 18], "aaanalysi": [0, 14, 16, 17, 18, 20, 22, 23, 26, 28], "For": [0, 1, 4, 11, 14, 20, 24, 26], "more": [0, 3, 14], "exampl": [0, 14, 24], "practic": 0, "usag": [0, 6, 13, 14], "our": [0, 14], "notebook": [0, 27], "conveni": 0, "i": [0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 13, 14, 17, 18, 20, 21, 23, 26], "common": [0, 14], "import": [0, 4, 5, 6, 11, 12, 14, 28], "modul": [0, 1, 13], "follow": [0, 1, 2, 4, 5, 13, 14, 16, 17, 18, 20, 26], "aa": [0, 2, 4, 5, 6, 11, 12, 20, 26, 28], "Then": 0, "you": [0, 13, 14, 16], "can": [0, 1, 4, 5, 7, 11, 13, 14, 17, 22, 24], "access": [0, 20], "all": [0, 1, 2, 3, 4, 6, 12, 14, 20, 26], "method": [0, 1, 2, 3, 4, 5, 19], "via": [0, 14, 19], "alia": [0, 4], "load_dataset": [0, 4, 20], "class": [1, 2, 3, 4, 5, 6, 24], "model": [1, 5, 14, 24], "none": [1, 2, 3, 4, 5, 6, 9, 10, 11], "model_kwarg": 1, "verbos": [1, 2, 3, 4, 5, 12], "fals": [1, 2, 3, 4, 5, 6, 7, 10, 11, 12], "sourc": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14], "base": [1, 2, 3, 4, 5, 10, 13, 14, 17, 18, 19, 23, 24], "A": [1, 4, 7, 11, 14, 17, 19], "k": [1, 13, 17, 18, 19], "optim": [1, 2, 3, 13, 17, 18, 19], "cluster": [1, 13, 17, 18, 19], "framework": [1, 13, 17, 18], "select": [1, 2, 3, 6, 7, 13, 14, 17, 18, 19], "redund": [1, 2, 13, 17, 18, 19], "reduc": [1, 5, 13, 17, 18, 19], "set": [1, 2, 3, 4, 5, 6, 8, 11, 12, 13, 14, 17, 18, 19, 20, 22], "numer": [1, 3, 4, 13, 17, 18], "scale": [1, 2, 3, 4, 6, 7, 9, 10, 12, 13, 16, 17, 18, 19, 22], "design": [1, 3, 14, 23], "primarili": [1, 5, 14], "amino": [1, 2, 3, 4, 6, 7, 13, 16, 17, 18, 19, 22, 24], "acid": [1, 2, 3, 4, 6, 7, 13, 16, 17, 18, 19, 22, 24], "versatil": 1, "enough": 1, "ani": [1, 14, 17], "indic": [1, 3, 4, 5, 20, 26], "It": [1, 17], "take": 1, "requir": 1, "pre": [1, 2, 14], "defin": [1, 4, 14], "number": [1, 2, 3, 4, 5, 6, 10, 11], "from": [1, 2, 3, 4, 5, 7, 13, 14, 20, 22, 26], "scikit": [1, 14], "learn": [1, 5, 13, 14, 16, 17, 18, 19, 20, 26], "http": [1, 14], "org": [1, 14], "stabl": 1, "html": [1, 14], "By": 1, "leverag": 1, "pearson": [1, 2], "correl": [1, 2], "similar": [1, 24], "measur": [1, 14], "valu": [1, 2, 3, 4, 14, 17, 20], "one": [1, 3], "repres": [1, 3, 17], "sampl": [1, 2, 3, 4, 5, 20, 24, 26], "term": 1, "medoid": 1, "each": [1, 2, 3, 4, 5, 14, 20, 26], "which": [1, 3, 4, 8, 17, 22, 24], "closest": 1, "": [1, 11, 14, 19], "center": [1, 10], "yield": 1, "paramet": [1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 12], "callabl": 1, "option": [1, 2, 3, 4, 5, 6, 7, 10, 12], "default": [1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 12], "sklearn": 1, "kmean": 1, "The": [1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 14, 20, 22, 23, 26], "emploi": [1, 5], "given": [1, 3, 4, 6, 7, 20, 26], "n_cluster": 1, "dict": [1, 2, 3, 4, 5, 9, 10, 11], "dictionari": [1, 2, 3, 4, 9, 10, 11], "keyword": [1, 3, 5], "argument": [1, 3, 4, 5, 11], "pass": [1, 3, 5, 11, 14], "bool": [1, 2, 3, 4, 5, 6, 7, 10, 11, 12], "flag": 1, "enabl": [1, 2, 3, 4, 5, 12, 13, 14, 17, 18, 23], "disabl": [1, 6], "output": [1, 4, 5, 12], "obtain": [1, 4, 22], "type": [1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 14, 20, 26], "int": [1, 2, 3, 4, 5, 6, 10, 11], "labels_": [1, 5], "label": [1, 2, 3, 4, 5, 11, 14, 20, 24], "order": [1, 20, 26], "featur": [1, 2, 3, 4, 5, 10, 13, 14, 17, 18, 22, 23, 24], "matrix": [1, 4, 5, 20], "arrai": [1, 2, 4, 5], "like": [1, 2, 4, 5, 14], "centers_": 1, "averag": [1, 4], "correspond": [1, 14], "center_labels_": 1, "medoids_": 1, "medoid_labels_": 1, "medoid_ind_": 1, "chosen": [1, 2, 4, 6], "within": [1, 2, 4], "origin": 1, "dataset": [1, 2, 6, 7, 13, 14, 17, 18, 24, 25], "__init__": [1, 2, 3, 4, 5], "fit": [1, 5, 14], "x": [1, 3, 5, 6, 11, 12], "name": [1, 2, 3, 4, 5, 6, 7, 9, 10, 12, 20, 26], "on_cent": 1, "true": [1, 2, 3, 4, 6, 7, 11, 12], "min_th": 1, "0": [1, 2, 3, 4, 5, 11, 12, 20, 24], "merge_metr": 1, "euclidean": [1, 5], "data": [1, 3, 5, 6, 13, 14, 20, 26], "format": [1, 12], "us": [1, 2, 3, 5, 6, 10, 12, 13, 14, 16, 17, 20, 22, 24, 26], "determin": 1, "without": [1, 3, 14, 20], "specif": [1, 9, 14, 20, 22, 26], "partit": 1, "maxim": 1, "beyond": 1, "threshold": [1, 2], "qualiti": 1, "either": [1, 4, 13], "minimum": [1, 4, 6], "member": 1, "min_cor": 1, "between": [1, 2, 3, 4, 10, 11], "its": [1, 14], "govern": 1, "undergo": 1, "three": [1, 4, 10, 20, 26], "stage": 1, "1": [1, 2, 3, 4, 5, 11, 12, 20, 24, 26], "estim": 1, "lower": 1, "bound": 1, "2": [1, 2, 3, 4, 5, 11, 20, 24, 26], "refin": 1, "metric": [1, 5, 14], "3": [1, 4, 5, 11, 14, 20], "merg": 1, "smaller": 1, "direct": 1, "final": 1, "reduct": 1, "shape": [1, 2, 3, 4, 5, 11], "n_sampl": [1, 2, 4, 5], "n_featur": [1, 2, 3, 4, 5], "where": [1, 4, 5], "list": [1, 3, 4, 10, 11], "str": [1, 3, 4, 5, 6, 7, 9, 10, 11, 12], "If": [1, 2, 3, 4, 5, 6, 10, 12, 13, 14, 16, 24], "provid": [1, 2, 3, 5, 7, 10, 13, 14, 18, 20, 24, 26], "return": [1, 2, 3, 4, 5, 6, 7, 9, 10, 11], "appli": [1, 5, 10, 11, 12], "otherwis": [1, 3, 4, 5], "float": [1, 2, 3, 5, 10, 11, 12], "instead": 1, "names_medoid": 1, "attribut": 1, "attr": 1, "further": [1, 3, 14], "inform": [1, 2, 3, 4, 5, 22], "paper": 1, "todo": [1, 2], "add": [1, 2, 3, 4], "link": [1, 2, 13, 14, 16, 19], "cluster_nam": 1, "name_unclassifi": 1, "unclassifi": [1, 7], "assign": [1, 3, 4, 5], "frequenc": 1, "renam": 1, "prioriti": 1, "most": [1, 2, 3, 5, 13, 17, 18], "frequent": 1, "alreadi": [1, 24], "doe": 1, "exist": [1, 14, 24], "cannot": 1, "classifi": [1, 3], "static": [1, 2, 4], "get_cluster_cent": 1, "comput": [1, 2, 3, 4, 14, 19], "center_label": 1, "associ": 1, "get_cluster_medoid": 1, "medoid_label": 1, "medoid_ind": 1, "index": [1, 6, 13, 14, 19], "x_test": 1, "x_ref": 1, "labels_test": 1, "labels_ref": 1, "n": [1, 2, 3, 4, 6, 14, 19], "posit": [1, 2, 3, 4, 5, 10, 13, 17, 18, 20, 24, 26], "except_unclassifi": 1, "test": [1, 2], "top": [1, 20], "consid": [1, 14], "strength": 1, "els": 1, "neg": [1, 4, 5, 10, 20, 24], "exclud": 1, "list_top_center_name_corr": 1, "have": [1, 14, 24], "strongest": 1, "eval": [1, 2, 5, 14], "df_scale": [2, 4, 22], "df_cat": [2, 3, 4, 22], "df_part": [2, 4, 22], "split_kw": [2, 4], "accept_gap": [2, 3, 4], "tool": [2, 14, 19], "creat": [2, 3, 4, 5, 14, 22], "filter": [2, 3, 6], "ar": [2, 3, 4, 5, 6, 7, 14, 20, 22, 24, 25, 26], "discrimin": [2, 3], "two": [2, 3, 13, 14, 17, 18, 19, 21, 22], "sequenc": [2, 3, 4, 5, 6, 7, 13, 14, 17, 18, 19, 20, 22, 23, 24, 26], "panda": [2, 3, 4, 5, 14], "datafram": [2, 3, 4, 5, 6, 7, 14, 22], "load_categori": [2, 4], "categori": [2, 3, 4, 7, 9, 10, 11], "physicochem": [2, 4, 13, 17, 18, 19, 22], "part": [2, 3, 4, 14, 20, 22, 26], "sequencefeatur": 2, "get_split_kw": [2, 4], "nest": [2, 4], "split_typ": [2, 4], "whether": [2, 3, 4, 7, 10, 11], "accept": [2, 3, 4], "miss": [2, 3, 4], "omit": [2, 3, 4], "print": [2, 3, 4], "progress": [2, 3, 19], "about": [2, 3], "algorithm": [2, 3, 13, 14, 17, 18, 22, 23], "run": [2, 4], "perform": [2, 5], "step": [2, 3, 4, 14, 17], "parametr": 2, "n_filter": 2, "100": [2, 6, 10], "tmd_len": [2, 3, 4], "20": [2, 3, 4, 14, 20], "jmd_n_len": [2, 3, 4], "10": [2, 3, 4, 10], "jmd_c_len": [2, 3, 4], "ext_len": [2, 3, 4], "4": [2, 3, 4], "start": [2, 3, 4, 14, 22], "check_cat": 2, "n_pre_filt": 2, "pct_pre_filt": 2, "5": [2, 3, 4, 5, 11], "max_std_test": 2, "max_overlap": 2, "max_cor": 2, "n_process": 2, "pipelin": [2, 14], "creation": 2, "aim": [2, 3, 14], "identifi": [2, 3, 5, 13, 17, 18, 19, 24], "collect": 2, "non": [2, 4, 6, 20], "group": [2, 3, 4], "t": 2, "u": [2, 13, 14], "p": [2, 19], "percentag": [2, 5, 10], "length": [2, 3, 4, 6], "tmd": [2, 3, 4], "explan": [2, 3, 14], "first": [2, 3, 4, 10, 14, 20, 26], "terminu": [2, 3, 4], "jmd": [2, 3, 4], "c": [2, 3, 4, 13, 19], "extend": [2, 3, 4, 14, 24], "termin": [2, 3, 4], "should": [2, 3, 4, 5, 7, 14, 24], "longer": 2, "than": 2, "check": [2, 14], "remain": [2, 14], "after": 2, "maximum": [2, 4, 5, 6], "standard": [2, 24], "deviat": 2, "overlap": 2, "cpu": 2, "multiprocess": 2, "automat": [2, 3, 5, 14], "df_feat": [2, 3, 4, 22], "uniqu": [2, 3], "statist": [2, 3], "n_feature_inform": [2, 3], "eleven": 2, "column": [2, 3, 4, 5, 6, 11, 14], "includ": [2, 4, 7, 10, 11, 14], "id": [2, 4], "result": 2, "rank": 2, "11": [2, 3, 11], "split": [2, 4, 22], "subcategori": [2, 3], "sub": 2, "scale_nam": [2, 3], "abs_auc": [2, 3], "absolut": 2, "adjust": [2, 3, 12], "auc": 2, "abs_mean_dif": 2, "mean": [2, 3], "differ": [2, 3, 4, 11, 20, 22, 26], "std_test": [2, 3], "std_ref": 2, "p_val": 2, "mann_whitnei": 2, "ttest_indep": 2, "p_val_fdr_bh": 2, "benjamini": 2, "hochberg": 2, "fdr": 2, "correct": 2, "get": [2, 4, 8], "evalu": [2, 7, 14, 20], "condit": [3, 4], "jmd_m_len": [3, 4], "profil": [3, 9, 10, 13, 17, 18, 23], "y": [3, 11, 12], "val_col": 3, "mean_dif": 3, "val_typ": 3, "count": [3, 7], "normal": [3, 11, 20], "figsiz": 3, "7": [3, 4, 5, 12], "titl": [3, 11], "title_kw": 3, "dict_color": [3, 9, 10, 11], "edge_color": 3, "bar_width": 3, "75": 3, "add_jmd_tmd": 3, "jmd_n_seq": 3, "tmd_seq": 3, "jmd_c_seq": 3, "tmd_color": 3, "mediumspringgreen": 3, "jmd_color": 3, "blue": [3, 11], "tmd_seq_color": 3, "black": [3, 14], "jmd_seq_color": 3, "white": 3, "seq_siz": 3, "tmd_jmd_fontsiz": 3, "xtick_siz": 3, "xtick_width": 3, "xtick_length": 3, "xticks_po": 3, "ytick_siz": 3, "ytick_width": 3, "ytick_length": 3, "ylim": 3, "highlight_tmd_area": 3, "highlight_alpha": 3, "15": [3, 4], "grid": [3, 12], "grid_axi": [3, 12], "both": [3, 12], "add_legend_cat": 3, "legend_kw": 3, "shap_plot": 3, "kwarg": [3, 4, 11], "plot": [3, 9, 10, 11, 12, 13, 14], "instanc": 3, "avail": [3, 13, 16, 19], "specifi": [3, 4, 5, 9, 10, 12, 14], "check_value_typ": 3, "tupl": [3, 10], "size": [3, 4, 8, 10, 11, 12], "custom": [3, 11, 12], "appear": [3, 12], "map": [3, 4, 10, 11], "color": [3, 9, 10, 11], "edg": [3, 11, 14], "bar": [3, 9, 10], "width": [3, 11], "line": [3, 11], "annot": 3, "font": [3, 8, 11, 12], "tick": [3, 12], "axi": [3, 12], "limit": 3, "highlight": 3, "area": 3, "alpha": 3, "ad": 3, "drawn": 3, "legend": [3, 11], "shap": [3, 10, 14], "shaplei": 3, "addit": [3, 4, 5, 11, 12, 20, 26], "gener": [3, 4, 10, 12, 14, 17, 19, 24], "other": [3, 7, 14], "intern": 3, "librari": [3, 12, 14], "ax": [3, 11], "matplotlib": [3, 11, 12, 14], "heatmap": [3, 9, 10], "8": [3, 4, 5, 14], "vmin": 3, "vmax": 3, "grid_on": 3, "cmap": [3, 9, 10], "rdbu_r": 3, "cmap_n_color": 3, "cbar_kw": 3, "facecolor_dark": [3, 10], "add_importance_map": 3, "cbar_pct": 3, "featuremap": 3, "versu": 3, "wrapper": [3, 13, 14, 17, 18], "seaborn": [3, 10, 12, 14], "level": [3, 6, 13, 14, 18, 20, 21], "e": [3, 4, 9, 10, 12, 13, 14, 17, 18, 20, 24, 26], "g": [3, 4, 9, 10, 12, 13, 14, 17, 18, 20, 24, 26], "protein": [3, 4, 6, 13, 14, 17, 18, 19, 22, 23, 24], "shown": 3, "feat_impact": 3, "displai": 3, "sum": 3, "std": 3, "aggreg": 3, "positions_onli": 3, "across": [3, 14], "recommend": [3, 5, 14], "when": [3, 5], "emphas": [3, 14], "fewer": 3, "value_typ": 3, "height": 3, "figur": 3, "inch": 3, "pyplot": [3, 11], "anchor": [3, 11], "colormap": 3, "infer": [3, 14], "seismic": 3, "space": [3, 5, 10, 11], "impact": 3, "discret": 3, "diverg": 3, "sequenti": 3, "kei": [3, 14], "colorbar": 3, "under": [3, 14], "depicet": 3, "depict": 3, "jmd_n": [3, 4], "jmd_c": [3, 4], "point": [3, 11], "set_xticklabel": 3, "widht": 3, "tick_param": 3, "classif": [3, 7, 13, 18, 20, 24, 26], "pcolormesh": 3, "effect": [3, 14, 24], "onli": [3, 6, 7, 14, 20, 24, 26], "align": [3, 11], "applic": 3, "see": [3, 6, 14, 20], "document": [3, 20, 26], "detail": [3, 6, 11, 13, 14, 16], "cpp": [3, 4, 10, 13, 16, 17, 18, 22, 27], "code": [3, 10], "update_seq_s": 3, "retriev": [4, 9, 10], "compon": [4, 5, 20], "continu": 4, "subset": [4, 20], "domain": [4, 6, 20, 26], "transmembran": 4, "membran": [4, 20], "principl": [4, 13], "distinct": [4, 13, 14, 17, 18], "segment": 4, "pattern": 4, "properti": [4, 20, 26], "express": 4, "present": 4, "realiz": 4, "over": 4, "valid": [4, 14], "tmd_e": 4, "tmd_n": 4, "tmd_c": 4, "ext_c": 4, "ext_n": 4, "tmd_jmd": 4, "jmd_n_tmd_n": 4, "tmd_c_jmd_c": 4, "ext_n_tmd_n": 4, "tmd_c_ext_c": 4, "get_df_part": 4, "df_seq": [4, 5, 6, 22], "list_part": 4, "all_part": 4, "datafran": 4, "compris": 4, "tmd_start": 4, "tmd_stop": 4, "string": [4, 10], "len": 4, "must": 4, "lenght": 4, "resp": 4, "extra": 4, "possibl": [4, 24], "found": [4, 7, 14], "sf": 4, "dom_gs": 4, "n_split_min": 4, "n_split_max": 4, "steps_pattern": 4, "n_min": 4, "n_max": 4, "len_max": 4, "steps_periodicpattern": 4, "periodicpattern": 4, "greater": 4, "greatest": 4, "whole": [4, 6], "specfii": 4, "smallest": 4, "integ": 4, "6": 4, "vari": 4, "paramt": 4, "argumetn": 4, "get_featur": 4, "load_scal": [4, 13, 18, 20], "combin": [4, 14], "form": 4, "feat_matrix": 4, "n_job": 4, "return_label": 4, "pd": [4, 5, 14], "seri": 4, "job": 4, "parallel": 4, "spars": 4, "feat_nam": 4, "convert": 4, "depend": 4, "last": 4, "step1": 4, "step2": 4, "add_feat_valu": 4, "dict_scal": 4, "convent": 4, "letter": 4, "feature_valu": 4, "n_part": 4, "ha": [4, 14], "structur": [4, 19], "th": 4, "n_split": 4, "p1": 4, "p2": 4, "pn": 4, "end": 4, "odd": 4, "even": 4, "give": 4, "add_dif": 4, "sample_nam": 4, "ref_group": 4, "add_posit": 4, "part_split": 4, "feat_posit": 4, "total": [4, 5], "n_compon": 5, "pca_kwarg": 5, "determinist": [5, 13, 17, 18], "unlabel": [5, 13, 17, 18, 20, 24, 26], "offer": [5, 14], "approach": [5, 24], "pu": [5, 13, 17, 18, 20, 26], "princip": [5, 20], "analysi": [5, 6, 7, 13, 14, 17, 18], "pca": 5, "dimension": [5, 19], "pc": [5, 20], "iter": 5, "reliabl": [5, 14], "These": [5, 14, 24], "those": 5, "distant": 5, "altern": [5, 24], "also": [5, 14, 20], "distanc": 5, "manhattan": 5, "cosin": 5, "80": 5, "cover": 5, "varianc": 5, "identif": [5, 19], "datapoint": 5, "inspir": [5, 14], "techniqu": [5, 24], "an": [5, 6, 13, 14, 16, 19, 20, 26], "theoret": 5, "high": [5, 19], "n_neg": 5, "label_po": 5, "name_neg": 5, "rel_neg": 5, "col_class": 5, "newli": 5, "updat": [5, 14], "new": [5, 14], "store": 5, "Will": 5, "dure": 5, "initi": 5, "small": [5, 13, 14, 17, 18, 25], "datafor": 5, "conta": 5, "po": 5, "unl": 5, "numpi": [5, 14], "np": 5, "atgc": 5, "gcta": 5, "actg": 5, "tacg": 5, "mode": 5, "modifi": [5, 6, 12, 22], "dpul": 5, "info": 6, "random": 6, "non_canonical_aa": 6, "remov": [6, 12], "min_len": 6, "max_len": 6, "aa_window_s": 6, "9": [6, 14], "load": [6, 7, 13, 14, 18], "benchmark": [6, 13, 18], "categor": 6, "dom": [6, 20, 26], "seq": [6, 20, 26], "overview": [6, 14], "tabl": [6, 14], "breimann23a": [6, 7, 19, 20], "per": 6, "liter": 6, "keep": 6, "gap": [6, 10], "canon": 6, "dont": 6, "replac": 6, "symbol": 6, "window": 6, "aa_": 6, "tutori": [6, 13, 14], "loader": 6, "seq_amylo": [6, 20, 26], "just_aaindex": 7, "unclassified_in": 7, "aaontologi": [7, 13, 16, 18, 19, 20], "thorough": 7, "residu": [7, 19, 20, 26], "scales_raw": [7, 20], "scales_cat": 7, "scales_pc": [7, 20], "top60": [7, 20], "top60_ev": [7, 20], "relev": 7, "aaindex": [7, 19], "current": 8, "ut": 8, "plot_set": 8, "dict_scale_cat": [9, 10], "cppplot": [9, 10, 14], "respect": [9, 10, 13, 14, 16], "n_color": 10, "color_po": 10, "color_neg": 10, "color_cent": 10, "input": [10, 14, 22], "hex": 10, "pct_gap": 10, "pct_center": 10, "palett": 10, "feat": 10, "ggplot": 10, "datagroup": 10, "dark": 10, "face": 10, "rgb": 10, "hl": 10, "husl": 10, "xkcd": 10, "interpret": [10, 13, 14, 16, 17, 18, 19, 23], "latter": 10, "rang": 10, "sn": 10, "color_palett": 10, "light_palett": 10, "lighter": 10, "handl": 11, "list_cat": 11, "ncol": 11, "fontsiz": 11, "weight": [11, 19], "lw": 11, "edgecolor": 11, "return_handl": 11, "loc": 11, "upper": 11, "left": 11, "labelspac": 11, "columnspac": 11, "fontsize_legend": 11, "title_align_left": 11, "fontsize_weight": 11, "customiz": 11, "attach": 11, "item": 11, "coordin": 11, "text": [11, 12], "locat": [11, 20], "vertic": 11, "horizont": 11, "marker": 11, "directli": [11, 14], "finer": 11, "control": 11, "how": 11, "line2d": 11, "cat1": 11, "red": 11, "cat2": 11, "o": 11, "fig_format": 12, "pdf": 12, "font_scal": 12, "arial": 12, "change_s": 12, "weight_bold": 12, "adjust_el": 12, "short_tick": 12, "no_tick": 12, "no_ticks_i": 12, "short_ticks_i": 12, "no_ticks_x": 12, "short_ticks_x": 12, "configur": 12, "visual": [12, 14], "variou": [12, 14, 22], "file": [12, 14], "save": 12, "make": [12, 14], "visibl": 12, "choos": 12, "san": 12, "serif": 12, "verdana": 12, "helvetica": 12, "dejavu": 12, "element": 12, "bold": 12, "layout": 12, "short": 12, "mark": 12, "global": 12, "python": [13, 14, 17, 18], "predict": [13, 14, 17, 18, 19, 20, 23, 24, 26], "aaclust": [13, 16, 17, 18, 19, 27], "compar": [13, 17, 18, 22, 23], "engin": [13, 14, 17, 18, 23], "dpulearn": [13, 16, 17, 18, 27], "train": [13, 14, 17, 18, 24], "unbalanc": [13, 14, 17, 18, 25], "moreov": [13, 18], "load_data": [13, 18], "depth": [13, 18], "pypi": 13, "conda": [13, 14], "forg": 13, "pip": [13, 14], "introduct": 13, "contribut": 13, "api": 13, "explain": [13, 14, 19], "ai": [13, 14, 19], "perturb": [13, 24], "util": [13, 14], "search": 13, "your": [13, 14, 16], "work": [13, 16], "pleas": [13, 14, 16], "cite": [13, 16], "breimann23b": [13, 16, 19, 20, 21], "_": [13, 16], "breimann": [13, 16, 19], "kamp": [13, 16], "steiner": [13, 16], "frishman": [13, 16], "2023": [13, 16], "ontologi": [13, 16, 19], "machin": [13, 14, 16, 19, 24], "biorxiv": [13, 16, 19], "welcom": 14, "thank": 14, "we": 14, "open": 14, "project": [14, 20, 26], "focus": 14, "involv": 14, "invalu": 14, "made": 14, "wai": 14, "suggest": 14, "github": 14, "issu": 14, "tracker": 14, "submit": 14, "improv": [14, 19], "particip": 14, "discuss": 14, "newcom": 14, "tackl": 14, "good": 14, "email": 14, "stephanbreimann": 14, "gmail": 14, "com": 14, "question": 14, "establish": 14, "comprehens": 14, "toolkit": [14, 22], "robust": 14, "life": [14, 24, 25], "scienc": [14, 24, 25], "integr": [14, 19], "seamlessli": 14, "flexibl": 14, "interoper": 14, "packag": 14, "biopython": 14, "reimplement": 14, "solut": 14, "ignor": 14, "biolog": [14, 17, 24], "context": 14, "relianc": 14, "opaqu": 14, "box": 14, "empir": 14, "insight": 14, "cut": 14, "fair": 14, "account": 14, "transpar": 14, "re": [14, 19], "commit": 14, "divers": 14, "aspect": 14, "causal": 14, "minim": 14, "reproduc": 14, "mre": 14, "least": 14, "amount": 14, "demonstr": 14, "self": 14, "ensur": 14, "necessari": 14, "confirm": 14, "replic": 14, "guidelin": 14, "here": [14, 20, 26], "To": 14, "git": 14, "breimanntool": 14, "master": 14, "repositori": 14, "your_usernam": 14, "navig": 14, "folder": 14, "up": 14, "cd": 14, "isol": 14, "aanalysi": 14, "activ": 14, "poetri": 14, "pytest": 14, "hypothesi": 14, "execut": 14, "case": 14, "directori": 14, "substanti": 14, "minor": 14, "typo": 14, "concis": 14, "descript": [14, 20], "clear": 14, "branch": 14, "fix": 14, "readm": 14, "date": 14, "readthedoc": 14, "crucial": 14, "modif": 14, "thei": 14, "render": 14, "correctli": 14, "strive": 14, "consist": [14, 17], "interfac": 14, "well": 14, "organ": 14, "codebas": 14, "standalon": 14, "focu": 14, "special": 14, "task": [14, 24], "carri": 14, "out": 14, "complet": 14, "process": 14, "fulfil": 14, "purpos": 14, "being": 14, "implement": 14, "inherit": 14, "supplementari": 14, "accordingli": 14, "suffix": 14, "support": 14, "semi": 14, "strictli": 14, "adher": 14, "aforement": 14, "primari": 14, "_util": 14, "_utils_const": 14, "py": 14, "modular": 14, "easili": 14, "therefor": 14, "flat": 14, "hierarchi": 14, "program": 14, "outlin": 14, "softwar": 14, "user": 14, "friendli": 14, "hint": 14, "enhanc": 14, "propos": 14, "pep": 14, "484": 14, "book": 14, "error": 14, "messag": 14, "docstr": 14, "257": 14, "guid": 14, "markup": 14, "languag": 14, "restructuredtext": 14, "rst": 14, "primer": 14, "cheat": 14, "sheet": 14, "restructuretext": 14, "cheatsheet": 14, "sphinx": 14, "autodoc": 14, "inclus": 14, "napoleon": 14, "extens": 14, "conf": 14, "four": 14, "bird": 14, "ey": 14, "view": [14, 24, 27], "background": 14, "reflect": 14, "close": 14, "essenti": 14, "medium": 14, "tabular": 14, "critic": 14, "go": 14, "_build": 14, "browser": 14, "citat": 16, "wa": 17, "develop": 17, "typic": 17, "et": 19, "al": 19, "2023a": 19, "2023b": 19, "breimann23c": [19, 20, 24, 25], "2023c": 19, "chart": 19, "\u03b3": 19, "secretas": [19, 20], "substrat": [19, 20], "cheng06": [19, 20], "cheng": 19, "2006": 19, "larg": 19, "disulphid": 19, "bridg": [19, 20], "kernel": 19, "recurs": 19, "neural": 19, "network": 19, "graph": 19, "match": 19, "struct": 19, "funct": 19, "kawashima08": [19, 20], "kawashima": 19, "2008": 19, "aid": 19, "databas": 19, "report": 19, "nucleic": 19, "magnan09": [19, 20], "magnan": 19, "randal": 19, "baldi": 19, "2009": 19, "solpro": [19, 20], "accur": 19, "solubl": [19, 20], "bioinformat": 19, "galiez16": [19, 20], "galiez": 19, "2016": 19, "viralpro": [19, 20], "viral": 19, "capsid": [19, 20], "tail": [19, 20], "song18": [19, 20], "song": 19, "2018": 19, "prosper": [19, 20], "throughput": 19, "cleavag": [19, 20], "site": [19, 20], "90": 19, "proteas": 19, "accuraci": 19, "shen19": [19, 20], "shen": 19, "2019": 19, "subcellular": [19, 20], "local": 19, "evolutionari": 19, "chou": 19, "pseaac": 19, "j": 19, "theor": 19, "biol": 19, "tang20": [19, 20], "tang": 19, "2020": 19, "idp": [19, 20], "seq2seq": [19, 20], "intrins": [19, 20], "disord": [19, 20], "region": [19, 20], "teng21": [19, 20], "teng": 19, "2021": 19, "rerf": [19, 20], "pred": [19, 20], "amyloidogen": [19, 20], "pseudo": 19, "composit": 19, "tripeptid": 19, "bmc": 19, "yang21": [19, 20], "yang": 19, "granular": 19, "multipl": 19, "rna": [19, 20], "bind": [19, 20], "appl": 19, "chronolog": [20, 26], "histori": [20, 26], "1_overview_benchmark": 20, "2_overview_scal": 20, "abbrevi": [20, 26], "aa_ldr": [20, 26], "dom_gsec": [20, 26], "some": [20, 26], "version": [20, 26], "dataset_nam": [20, 26], "_pu": [20, 26], "dom_gsec_pu": [20, 26], "predictor": 20, "aa_caspase3": 20, "233": 20, "185605": 20, "705": 20, "184900": 20, "caspas": 20, "adjac": 20, "aa_furin": 20, "71": 20, "59003": 20, "163": 20, "58840": 20, "furin": 20, "342": 20, "118248": 20, "35469": 20, "82779": 20, "long": 20, "ldr": 20, "aa_mmp2": 20, "573": 20, "312976": 20, "2416": 20, "310560": 20, "metallopeptidas": 20, "mmp2": 20, "aa_rnabind": 20, "221": 20, "55001": 20, "6492": 20, "48509": 20, "gmksvm": 20, "ru": 20, "rbp60": 20, "aa_sa": 20, "101082": 20, "84523": 20, "solvent": 20, "sa": 20, "expos": 20, "buri": 20, "1414": 20, "8484": 20, "511": 20, "903": 20, "amyloidognen": 20, "seq_capsid": 20, "7935": 20, "3364680": 20, "3864": 20, "4071": 20, "capdsid": 20, "seq_disulfid": 20, "2547": 20, "614470": 20, "897": 20, "1650": 20, "dipro": 20, "disulfid": 20, "ss": 20, "bond": 20, "seq_loc": 20, "1835": 20, "732398": 20, "1045": 20, "790": 20, "nan": 20, "cytoplasm": 20, "v": 20, "plasma": 20, "seq_solubl": 20, "17408": 20, "4432269": 20, "8704": 20, "insolubl": 20, "seq_tail": 20, "6668": 20, "2671690": 20, "2574": 20, "4094": 20, "126": 20, "92964": 20, "63": 20, "gamma": 20, "694": 20, "494524": 20, "unknown": 20, "statu": 20, "min": 20, "max": 20, "586": 20, "raw": 20, "scales_classif": 20, "compress": 20, "60": 20, "introduc": 21, "togeth": 22, "central": 23, "platform": 23, "novel": 23, "everywher": [24, 25], "In": 24, "binari": 24, "setup": 24, "augment": 24, "smote": 24, "artifici": 24, "Such": 24, "veri": 24, "popular": 24, "deep": 24, "imag": 24, "recognit": 24, "feasibl": 24, "becaus": 24, "slight": 24, "mutat": 24, "alter": 24, "dramat": 24, "often": 24, "great": 24, "quantiti": 24, "besid": 24, "distinguish": 24, "subfield": 24}, "objects": {"aaanalysis": [[1, 0, 1, "", "AAclust"], [2, 0, 1, "", "CPP"], [3, 0, 1, "", "CPPPlot"], [4, 0, 1, "", "SequenceFeature"], [5, 0, 1, "", "dPULearn"], [6, 3, 1, "", "load_dataset"], [7, 3, 1, "", "load_scales"], [8, 3, 1, "", "plot_gcfs"], [9, 3, 1, "", "plot_get_cdict"], [10, 3, 1, "", "plot_get_cmap"], [11, 3, 1, "", "plot_set_legend"], [12, 3, 1, "", "plot_settings"]], "aaanalysis.AAclust": [[1, 1, 1, "", "__init__"], [1, 2, 1, "", "center_labels_"], [1, 2, 1, "", "centers_"], [1, 1, 1, "", "cluster_naming"], [1, 1, 1, "", "correlation"], [1, 1, 1, "", "eval"], [1, 1, 1, "", "fit"], [1, 1, 1, "", "get_cluster_centers"], [1, 1, 1, "", "get_cluster_medoids"], [1, 2, 1, "", "labels_"], [1, 2, 1, "", "medoid_ind_"], [1, 2, 1, "", "medoid_labels_"], [1, 2, 1, "", "medoids_"], [1, 2, 1, "", "n_clusters"]], "aaanalysis.CPP": [[2, 1, 1, "", "__init__"], [2, 1, 1, "", "eval"], [2, 1, 1, "", "run"]], "aaanalysis.CPPPlot": [[3, 1, 1, "", "__init__"], [3, 1, 1, "", "heatmap"], [3, 1, 1, "", "profile"], [3, 1, 1, "", "update_seq_size"]], "aaanalysis.SequenceFeature": [[4, 1, 1, "", "__init__"], [4, 1, 1, "", "add_dif"], [4, 1, 1, "", "add_feat_value"], [4, 1, 1, "", "add_position"], [4, 1, 1, "", "feat_matrix"], [4, 1, 1, "", "feat_names"], [4, 1, 1, "", "get_df_parts"], [4, 1, 1, "", "get_features"], [4, 1, 1, "", "get_split_kws"]], "aaanalysis.dPULearn": [[5, 1, 1, "", "__init__"], [5, 1, 1, "", "eval"], [5, 1, 1, "", "fit"], [5, 2, 1, "", "labels_"]]}, "objtypes": {"0": "py:class", "1": "py:method", "2": "py:attribute", "3": "py:function"}, "objnames": {"0": ["py", "class", "Python class"], "1": ["py", "method", "Python method"], "2": ["py", "attribute", "Python attribute"], "3": ["py", "function", "Python function"]}, "titleterms": {"api": 0, "data": [0, 22, 24, 27], "featur": [0, 27], "engin": [0, 27], "pu": [0, 24], "learn": [0, 24], "explain": [0, 25], "ai": [0, 25], "perturb": 0, "plot": 0, "util": 0, "aaanalysi": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13], "aaclust": 1, "note": [1, 2, 4, 5, 12], "cpp": [2, 23], "cppplot": 3, "exampl": [3, 4, 5, 6, 11, 12, 13], "sequencefeatur": 4, "dpulearn": 5, "load_dataset": 6, "load_scal": 7, "plot_gcf": 8, "plot_get_cdict": 9, "plot_get_cmap": 10, "plot_set_legend": 11, "plot_set": 12, "welcom": 13, "document": [13, 14], "instal": [13, 14], "overview": [13, 20, 26], "refer": [13, 19], "indic": 13, "tabl": [13, 20, 26], "citat": 13, "contribut": 14, "introduct": [14, 17], "vision": 14, "object": 14, "non": 14, "goal": 14, "principl": [14, 28], "bug": 14, "report": 14, "latest": 14, "version": 14, "local": 14, "develop": 14, "environ": 14, "fork": 14, "clone": 14, "depend": 14, "run": 14, "unit": 14, "test": 14, "pull": 14, "request": 14, "preview": 14, "chang": 14, "name": 14, "convent": 14, "class": 14, "templat": 14, "function": 14, "method": 14, "code": 14, "philosophi": 14, "style": 14, "layer": 14, "build": 14, "doc": 14, "workflow": 17, "algorithm": 19, "dataset": [19, 20, 26], "benchmark": [19, 20, 26], "us": [19, 23], "case": 19, "further": 19, "inform": 19, "protein": [20, 26, 27], "amino": [20, 21, 26], "acid": [20, 21, 26], "scale": [20, 21, 26], "aaontologi": 21, "classif": 21, "flow": 22, "enri": 22, "point": 22, "identifi": 23, "physicochem": 23, "signatur": 23, "from": 24, "unbalanc": 24, "small": 24, "what": [24, 25], "i": [24, 25], "sequenc": 25, "level": 25, "tutori": 27, "quick": 27, "start": 27, "load": 27, "redund": 27, "reduct": 27, "identif": 27, "neg": 27, "predict": 27, "usag": 28}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 8, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.intersphinx": 1, "sphinx": 57}, "alltitles": {"API": [[0, "api"]], "Data": [[0, "data"]], "Feature Engineering": [[0, "feature-engineering"]], "PU Learning": [[0, "pu-learning"]], "Explainable AI": [[0, "explainable-ai"]], "Perturbation": [[0, "perturbation"]], "Plot Utilities": [[0, "plot-utilities"]], "aaanalysis.AAclust": [[1, "aaanalysis-aaclust"]], "Notes": [[1, null], [2, null], [2, null], [4, null], [4, null], [4, null], [4, null], [4, null], [5, null], [5, null], [12, null]], "aaanalysis.CPP": [[2, "aaanalysis-cpp"]], "aaanalysis.CPPPlot": [[3, "aaanalysis-cppplot"]], "Examples": [[3, null], [4, null], [4, null], [5, null], [6, null], [11, null], [12, null]], "aaanalysis.SequenceFeature": [[4, "aaanalysis-sequencefeature"]], "aaanalysis.dPULearn": [[5, "aaanalysis-dpulearn"]], "aaanalysis.load_dataset": [[6, "aaanalysis-load-dataset"]], "aaanalysis.load_scales": [[7, "aaanalysis-load-scales"]], "aaanalysis.plot_gcfs": [[8, "aaanalysis-plot-gcfs"]], "aaanalysis.plot_get_cdict": [[9, "aaanalysis-plot-get-cdict"]], "aaanalysis.plot_get_cmap": [[10, "aaanalysis-plot-get-cmap"]], "aaanalysis.plot_set_legend": [[11, "aaanalysis-plot-set-legend"]], "aaanalysis.plot_settings": [[12, "aaanalysis-plot-settings"]], "Welcome to the AAanalysis documentation": [[13, "welcome-to-the-aaanalysis-documentation"]], "Install": [[13, "install"]], "OVERVIEW": [[13, null]], "EXAMPLES": [[13, null]], "REFERENCES": [[13, null]], "Indices and tables": [[13, "indices-and-tables"]], "Citation": [[13, "citation"]], "Contributing": [[14, "contributing"]], "Introduction": [[14, "introduction"], [17, "introduction"]], "Vision": [[14, "vision"]], "Objectives": [[14, "objectives"]], "Non-goals": [[14, "non-goals"]], "Principles": [[14, "principles"]], "Bug Reports": [[14, "bug-reports"]], "Installation": [[14, "installation"]], "Latest Version": [[14, "latest-version"]], "Local Development Environment": [[14, "local-development-environment"]], "Fork and Clone": [[14, "fork-and-clone"]], "Install Dependencies": [[14, "install-dependencies"]], "Run Unit Tests": [[14, "run-unit-tests"]], "Pull Requests": [[14, "pull-requests"]], "Preview Changes": [[14, "preview-changes"]], "Documentation": [[14, "documentation"]], "Naming Conventions": [[14, "naming-conventions"]], "Class Templates": [[14, "class-templates"]], "Function and Method Naming": [[14, "function-and-method-naming"]], "Code Philosophy": [[14, "code-philosophy"]], "Documentation Style": [[14, "documentation-style"]], "Documentation Layers": [[14, "documentation-layers"]], "Building the Docs": [[14, "building-the-docs"]], "Workflow": [[17, "workflow"]], "References": [[19, "references"]], "Algorithms": [[19, "algorithms"]], "Datasets and Benchmarks": [[19, "datasets-and-benchmarks"]], "Use Cases": [[19, "use-cases"]], "Further Information": [[19, "further-information"]], "Tables": [[20, "tables"], [26, "tables"]], "Overview Table": [[20, "overview-table"], [26, "overview-table"]], "Protein benchmark datasets": [[20, "protein-benchmark-datasets"], [26, "protein-benchmark-datasets"]], "Amino acid scale datasets": [[20, "amino-acid-scale-datasets"], [26, "amino-acid-scale-datasets"]], "AAontology: Classification of amino acid scales": [[21, "aaontology-classification-of-amino-acid-scales"]], "Data Flow and Enry Points": [[22, "data-flow-and-enry-points"]], "Identifying Physicochemical Signatures using CPP": [[23, "identifying-physicochemical-signatures-using-cpp"]], "Learning from unbalanced and small data": [[24, "learning-from-unbalanced-and-small-data"]], "What is PU learning?": [[24, "what-is-pu-learning"]], "Explainable AI at Sequence Level": [[25, "explainable-ai-at-sequence-level"]], "What is explainable AI?": [[25, "what-is-explainable-ai"]], "Tutorials": [[27, "tutorials"]], "Quick start": [[27, "quick-start"]], "Data loading": [[27, "data-loading"]], "Redundancy-reduction": [[27, "redundancy-reduction"]], "Feature engineering": [[27, "feature-engineering"]], "Identification of negatives": [[27, "identification-of-negatives"]], "Protein prediction": [[27, "protein-prediction"]], "Usage Principles": [[28, "usage-principles"]]}, "indexentries": {"aaclust (class in aaanalysis)": [[1, "aaanalysis.AAclust"]], "__init__() (aaanalysis.aaclust method)": [[1, "aaanalysis.AAclust.__init__"]], "center_labels_ (aaanalysis.aaclust attribute)": [[1, "aaanalysis.AAclust.center_labels_"]], "centers_ (aaanalysis.aaclust attribute)": [[1, "aaanalysis.AAclust.centers_"]], "cluster_naming() (aaanalysis.aaclust method)": [[1, "aaanalysis.AAclust.cluster_naming"]], "correlation() (aaanalysis.aaclust static method)": [[1, "aaanalysis.AAclust.correlation"]], "eval() (aaanalysis.aaclust method)": [[1, "aaanalysis.AAclust.eval"]], "fit() (aaanalysis.aaclust method)": [[1, "aaanalysis.AAclust.fit"]], "get_cluster_centers() (aaanalysis.aaclust static method)": [[1, "aaanalysis.AAclust.get_cluster_centers"]], "get_cluster_medoids() (aaanalysis.aaclust static method)": [[1, "aaanalysis.AAclust.get_cluster_medoids"]], "labels_ (aaanalysis.aaclust attribute)": [[1, "aaanalysis.AAclust.labels_"]], "medoid_ind_ (aaanalysis.aaclust attribute)": [[1, "aaanalysis.AAclust.medoid_ind_"]], "medoid_labels_ (aaanalysis.aaclust attribute)": [[1, "aaanalysis.AAclust.medoid_labels_"]], "medoids_ (aaanalysis.aaclust attribute)": [[1, "aaanalysis.AAclust.medoids_"]], "n_clusters (aaanalysis.aaclust attribute)": [[1, "aaanalysis.AAclust.n_clusters"]], "cpp (class in aaanalysis)": [[2, "aaanalysis.CPP"]], "__init__() (aaanalysis.cpp method)": [[2, "aaanalysis.CPP.__init__"]], "eval() (aaanalysis.cpp static method)": [[2, "aaanalysis.CPP.eval"]], "run() (aaanalysis.cpp method)": [[2, "aaanalysis.CPP.run"]], "cppplot (class in aaanalysis)": [[3, "aaanalysis.CPPPlot"]], "__init__() (aaanalysis.cppplot method)": [[3, "aaanalysis.CPPPlot.__init__"]], "heatmap() (aaanalysis.cppplot method)": [[3, "aaanalysis.CPPPlot.heatmap"]], "profile() (aaanalysis.cppplot method)": [[3, "aaanalysis.CPPPlot.profile"]], "update_seq_size() (aaanalysis.cppplot method)": [[3, "aaanalysis.CPPPlot.update_seq_size"]], "sequencefeature (class in aaanalysis)": [[4, "aaanalysis.SequenceFeature"]], "__init__() (aaanalysis.sequencefeature method)": [[4, "aaanalysis.SequenceFeature.__init__"]], "add_dif() (aaanalysis.sequencefeature static method)": [[4, "aaanalysis.SequenceFeature.add_dif"]], "add_feat_value() (aaanalysis.sequencefeature static method)": [[4, "aaanalysis.SequenceFeature.add_feat_value"]], "add_position() (aaanalysis.sequencefeature static method)": [[4, "aaanalysis.SequenceFeature.add_position"]], "feat_matrix() (aaanalysis.sequencefeature static method)": [[4, "aaanalysis.SequenceFeature.feat_matrix"]], "feat_names() (aaanalysis.sequencefeature static method)": [[4, "aaanalysis.SequenceFeature.feat_names"]], "get_df_parts() (aaanalysis.sequencefeature static method)": [[4, "aaanalysis.SequenceFeature.get_df_parts"]], "get_features() (aaanalysis.sequencefeature method)": [[4, "aaanalysis.SequenceFeature.get_features"]], "get_split_kws() (aaanalysis.sequencefeature static method)": [[4, "aaanalysis.SequenceFeature.get_split_kws"]], "__init__() (aaanalysis.dpulearn method)": [[5, "aaanalysis.dPULearn.__init__"]], "dpulearn (class in aaanalysis)": [[5, "aaanalysis.dPULearn"]], "eval() (aaanalysis.dpulearn method)": [[5, "aaanalysis.dPULearn.eval"]], "fit() (aaanalysis.dpulearn method)": [[5, "aaanalysis.dPULearn.fit"]], "labels_ (aaanalysis.dpulearn attribute)": [[5, "aaanalysis.dPULearn.labels_"]], "load_dataset() (in module aaanalysis)": [[6, "aaanalysis.load_dataset"]], "load_scales() (in module aaanalysis)": [[7, "aaanalysis.load_scales"]], "plot_gcfs() (in module aaanalysis)": [[8, "aaanalysis.plot_gcfs"]], "plot_get_cdict() (in module aaanalysis)": [[9, "aaanalysis.plot_get_cdict"]], "plot_get_cmap() (in module aaanalysis)": [[10, "aaanalysis.plot_get_cmap"]], "plot_set_legend() (in module aaanalysis)": [[11, "aaanalysis.plot_set_legend"]], "plot_settings() (in module aaanalysis)": [[12, "aaanalysis.plot_settings"]]}}) \ No newline at end of file +Search.setIndex({"docnames": ["api", "generated/aaanalysis.AAclust", "generated/aaanalysis.CPP", "generated/aaanalysis.CPPPlot", "generated/aaanalysis.SequenceFeature", "generated/aaanalysis.dPULearn", "generated/aaanalysis.load_dataset", "generated/aaanalysis.load_scales", "generated/aaanalysis.plot_gcfs", "generated/aaanalysis.plot_get_cdict", "generated/aaanalysis.plot_get_cmap", "generated/aaanalysis.plot_set_legend", "generated/aaanalysis.plot_settings", "index", "index/CONTRIBUTING_COPY", "index/badges", "index/citations", "index/introduction", "index/overview", "index/references", "index/tables", "index/usage_principles/aaontology", "index/usage_principles/data_flow_entry_points", "index/usage_principles/feature_identification", "index/usage_principles/pu_learning", "index/usage_principles/xai", "tables_template", "tutorials", "usage_principles"], "filenames": ["api.rst", "generated/aaanalysis.AAclust.rst", "generated/aaanalysis.CPP.rst", "generated/aaanalysis.CPPPlot.rst", "generated/aaanalysis.SequenceFeature.rst", "generated/aaanalysis.dPULearn.rst", "generated/aaanalysis.load_dataset.rst", "generated/aaanalysis.load_scales.rst", "generated/aaanalysis.plot_gcfs.rst", "generated/aaanalysis.plot_get_cdict.rst", "generated/aaanalysis.plot_get_cmap.rst", "generated/aaanalysis.plot_set_legend.rst", "generated/aaanalysis.plot_settings.rst", "index.rst", "index/CONTRIBUTING_COPY.rst", "index/badges.rst", "index/citations.rst", "index/introduction.rst", "index/overview.rst", "index/references.rst", "index/tables.rst", "index/usage_principles/aaontology.rst", "index/usage_principles/data_flow_entry_points.rst", "index/usage_principles/feature_identification.rst", "index/usage_principles/pu_learning.rst", "index/usage_principles/xai.rst", "tables_template.rst", "tutorials.rst", "usage_principles.rst"], "titles": ["API", "aaanalysis.AAclust", "aaanalysis.CPP", "aaanalysis.CPPPlot", "aaanalysis.SequenceFeature", "aaanalysis.dPULearn", "aaanalysis.load_dataset", "aaanalysis.load_scales", "aaanalysis.plot_gcfs", "aaanalysis.plot_get_cdict", "aaanalysis.plot_get_cmap", "aaanalysis.plot_set_legend", "aaanalysis.plot_settings", "Welcome to the AAanalysis documentation", "Contributing", "<no title>", "<no title>", "Introduction", "<no title>", "References", "Tables", "AAontology: Classification of amino acid scales", "Data Flow and Enry Points", "Identifying Physicochemical Signatures using CPP", "Learning from unbalanced and small data", "Explainable AI at Sequence Level", "Tables", "Tutorials", "Usage Principles"], "terms": {"thi": [0, 1, 3, 12, 14], "page": [0, 13], "contain": [0, 2, 3, 5, 6, 14, 20, 22, 24, 26], "refer": [0, 1, 2, 4, 14, 20], "public": [0, 13, 14, 16], "object": [0, 1, 3, 4, 5], "function": [0, 3, 8, 10, 12, 13, 18], "aaanalysi": [0, 14, 16, 17, 18, 20, 22, 23, 26, 28], "For": [0, 1, 4, 11, 14, 20, 24, 26], "more": [0, 3, 14], "exampl": [0, 14, 24], "practic": 0, "usag": [0, 13, 14], "our": [0, 14], "notebook": [0, 27], "conveni": 0, "i": [0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 13, 14, 17, 18, 20, 21, 23, 26], "common": [0, 14], "import": [0, 4, 5, 6, 11, 12, 14, 28], "modul": [0, 1, 13], "follow": [0, 1, 2, 4, 5, 13, 14, 16, 17, 18, 20, 26], "aa": [0, 2, 4, 5, 6, 11, 12, 20, 26, 28], "Then": 0, "you": [0, 13, 14, 16], "can": [0, 1, 4, 5, 7, 11, 13, 14, 17, 22, 24], "access": [0, 20], "all": [0, 1, 2, 3, 4, 12, 14, 20, 26], "method": [0, 1, 2, 3, 4, 5, 19], "via": [0, 14, 19], "alia": [0, 4], "load_dataset": [0, 4, 20], "class": [1, 2, 3, 4, 5, 6, 24], "model": [1, 5, 14, 24], "none": [1, 2, 3, 4, 5, 6, 9, 10, 11], "model_kwarg": 1, "verbos": [1, 2, 3, 4, 5, 12], "fals": [1, 2, 3, 4, 5, 6, 7, 10, 11, 12], "sourc": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14], "base": [1, 2, 3, 4, 5, 10, 13, 14, 17, 18, 19, 23, 24], "A": [1, 4, 7, 11, 14, 17, 19], "k": [1, 13, 17, 18, 19], "optim": [1, 2, 3, 13, 17, 18, 19], "cluster": [1, 13, 17, 18, 19], "framework": [1, 13, 17, 18], "select": [1, 2, 3, 6, 7, 13, 14, 17, 18, 19], "redund": [1, 2, 13, 17, 18, 19], "reduc": [1, 5, 13, 17, 18, 19], "set": [1, 2, 3, 4, 5, 6, 8, 11, 12, 13, 14, 17, 18, 19, 20, 22], "numer": [1, 3, 4, 13, 17, 18], "scale": [1, 2, 3, 4, 7, 9, 10, 12, 13, 16, 17, 18, 19, 22], "design": [1, 3, 14, 23], "primarili": [1, 5, 14], "amino": [1, 2, 3, 4, 6, 7, 13, 16, 17, 18, 19, 22, 24], "acid": [1, 2, 3, 4, 6, 7, 13, 16, 17, 18, 19, 22, 24], "versatil": 1, "enough": 1, "ani": [1, 14, 17], "indic": [1, 3, 4, 5, 20, 26], "It": [1, 17], "take": 1, "requir": 1, "pre": [1, 2, 14], "defin": [1, 4, 14], "number": [1, 2, 3, 4, 5, 6, 10, 11], "from": [1, 2, 3, 4, 5, 7, 13, 14, 20, 22, 26], "scikit": [1, 14], "learn": [1, 5, 13, 14, 16, 17, 18, 19, 20, 26], "http": [1, 14], "org": [1, 14], "stabl": 1, "html": [1, 14], "By": 1, "leverag": 1, "pearson": [1, 2], "correl": [1, 2], "similar": [1, 24], "measur": [1, 14], "valu": [1, 2, 3, 4, 14, 17, 20], "one": [1, 3], "repres": [1, 3, 17], "sampl": [1, 2, 3, 4, 5, 20, 24, 26], "term": 1, "medoid": 1, "each": [1, 2, 3, 4, 5, 14, 20, 26], "which": [1, 3, 4, 8, 17, 22, 24], "closest": 1, "": [1, 11, 14, 19], "center": [1, 10], "yield": 1, "paramet": [1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 12], "callabl": 1, "option": [1, 2, 3, 4, 5, 6, 7, 10, 12], "default": [1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 12], "sklearn": 1, "kmean": 1, "The": [1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 14, 20, 22, 23, 26], "emploi": [1, 5], "given": [1, 3, 4, 6, 7, 20, 26], "n_cluster": 1, "dict": [1, 2, 3, 4, 5, 9, 10, 11], "dictionari": [1, 2, 3, 4, 9, 10, 11], "keyword": [1, 3, 5], "argument": [1, 3, 4, 5, 11], "pass": [1, 3, 5, 11, 14], "bool": [1, 2, 3, 4, 5, 6, 7, 10, 11, 12], "flag": 1, "enabl": [1, 2, 3, 4, 5, 12, 13, 14, 17, 18, 23], "disabl": [1, 6], "output": [1, 4, 5, 12], "obtain": [1, 4, 22], "type": [1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 14, 20, 26], "int": [1, 2, 3, 4, 5, 6, 10, 11], "labels_": [1, 5], "label": [1, 2, 3, 4, 5, 11, 14, 20, 24], "order": [1, 20, 26], "featur": [1, 2, 3, 4, 5, 10, 13, 14, 17, 18, 22, 23, 24], "matrix": [1, 4, 5, 20], "arrai": [1, 2, 4, 5], "like": [1, 2, 4, 5, 14], "centers_": 1, "averag": [1, 4], "correspond": [1, 14], "center_labels_": 1, "medoids_": 1, "medoid_labels_": 1, "medoid_ind_": 1, "chosen": [1, 2, 4, 6], "within": [1, 2, 4], "origin": 1, "dataset": [1, 2, 6, 7, 13, 14, 17, 18, 24, 25], "__init__": [1, 2, 3, 4, 5], "fit": [1, 5, 14], "x": [1, 3, 5, 6, 11, 12], "name": [1, 2, 3, 4, 5, 6, 7, 9, 10, 12, 20, 26], "on_cent": 1, "true": [1, 2, 3, 4, 6, 7, 11, 12], "min_th": 1, "0": [1, 2, 3, 4, 5, 11, 12, 20, 24], "merge_metr": 1, "euclidean": [1, 5], "data": [1, 3, 5, 13, 14, 20, 26], "format": [1, 12], "us": [1, 2, 3, 5, 6, 10, 12, 13, 14, 16, 17, 20, 22, 24, 26], "determin": 1, "without": [1, 3, 14, 20], "specif": [1, 9, 14, 20, 22, 26], "partit": 1, "maxim": 1, "beyond": 1, "threshold": [1, 2], "qualiti": 1, "either": [1, 4, 13], "minimum": [1, 4, 6], "member": 1, "min_cor": 1, "between": [1, 2, 3, 4, 10, 11], "its": [1, 14], "govern": 1, "undergo": 1, "three": [1, 4, 10, 20, 26], "stage": 1, "1": [1, 2, 3, 4, 5, 11, 12, 20, 24, 26], "estim": 1, "lower": 1, "bound": 1, "2": [1, 2, 3, 4, 5, 11, 20, 24, 26], "refin": 1, "metric": [1, 5, 14], "3": [1, 4, 5, 11, 14, 20], "merg": 1, "smaller": 1, "direct": 1, "final": 1, "reduct": 1, "shape": [1, 2, 3, 4, 5, 11], "n_sampl": [1, 2, 4, 5], "n_featur": [1, 2, 3, 4, 5], "where": [1, 4, 5], "list": [1, 3, 4, 10, 11], "str": [1, 3, 4, 5, 6, 7, 9, 10, 11, 12], "If": [1, 2, 3, 4, 5, 6, 10, 12, 13, 14, 16, 24], "provid": [1, 2, 3, 5, 7, 10, 13, 14, 18, 20, 24, 26], "return": [1, 2, 3, 4, 5, 6, 7, 9, 10, 11], "appli": [1, 5, 10, 11, 12], "otherwis": [1, 3, 4, 5], "float": [1, 2, 3, 5, 10, 11, 12], "instead": 1, "names_medoid": 1, "attribut": 1, "attr": 1, "further": [1, 3, 14], "inform": [1, 2, 3, 4, 5, 22], "paper": 1, "todo": [1, 2], "add": [1, 2, 3, 4], "link": [1, 2, 13, 14, 16, 19], "cluster_nam": 1, "name_unclassifi": 1, "unclassifi": [1, 7], "assign": [1, 3, 4, 5], "frequenc": 1, "renam": 1, "prioriti": 1, "most": [1, 2, 3, 5, 13, 17, 18], "frequent": 1, "alreadi": [1, 24], "doe": 1, "exist": [1, 14, 24], "cannot": 1, "classifi": [1, 3], "static": [1, 2, 4], "get_cluster_cent": 1, "comput": [1, 2, 3, 4, 14, 19], "center_label": 1, "associ": 1, "get_cluster_medoid": 1, "medoid_label": 1, "medoid_ind": 1, "index": [1, 6, 13, 14, 19], "x_test": 1, "x_ref": 1, "labels_test": 1, "labels_ref": 1, "n": [1, 2, 3, 4, 6, 14, 19], "posit": [1, 2, 3, 4, 5, 10, 13, 17, 18, 20, 24, 26], "except_unclassifi": 1, "test": [1, 2], "top": [1, 20], "consid": [1, 14], "strength": 1, "els": 1, "neg": [1, 4, 5, 10, 20, 24], "exclud": 1, "list_top_center_name_corr": 1, "have": [1, 14, 24], "strongest": 1, "eval": [1, 2, 5, 14], "df_scale": [2, 4, 22], "df_cat": [2, 3, 4, 22], "df_part": [2, 4, 22], "split_kw": [2, 4], "accept_gap": [2, 3, 4], "tool": [2, 14, 19], "creat": [2, 3, 4, 5, 14, 22], "filter": [2, 3, 6], "ar": [2, 3, 4, 5, 6, 7, 14, 20, 22, 24, 25, 26], "discrimin": [2, 3], "two": [2, 3, 13, 14, 17, 18, 19, 21, 22], "sequenc": [2, 3, 4, 5, 6, 7, 13, 14, 17, 18, 19, 20, 22, 23, 24, 26], "panda": [2, 3, 4, 5, 14], "datafram": [2, 3, 4, 5, 6, 7, 14, 22], "load_categori": [2, 4], "categori": [2, 3, 4, 7, 9, 10, 11], "physicochem": [2, 4, 13, 17, 18, 19, 22], "part": [2, 3, 4, 14, 20, 22, 26], "sequencefeatur": 2, "get_split_kw": [2, 4], "nest": [2, 4], "split_typ": [2, 4], "whether": [2, 3, 4, 7, 10, 11], "accept": [2, 3, 4], "miss": [2, 3, 4], "omit": [2, 3, 4], "print": [2, 3, 4], "progress": [2, 3, 19], "about": [2, 3], "algorithm": [2, 3, 13, 14, 17, 18, 22, 23], "run": [2, 4], "perform": [2, 5], "step": [2, 3, 4, 14, 17], "parametr": 2, "n_filter": 2, "100": [2, 6, 10], "tmd_len": [2, 3, 4], "20": [2, 3, 4, 14, 20], "jmd_n_len": [2, 3, 4], "10": [2, 3, 4, 10], "jmd_c_len": [2, 3, 4], "ext_len": [2, 3, 4], "4": [2, 3, 4], "start": [2, 3, 4, 14, 22], "check_cat": 2, "n_pre_filt": 2, "pct_pre_filt": 2, "5": [2, 3, 4, 5, 11], "max_std_test": 2, "max_overlap": 2, "max_cor": 2, "n_process": 2, "pipelin": [2, 14], "creation": 2, "aim": [2, 3, 14], "identifi": [2, 3, 5, 13, 17, 18, 19, 24], "collect": 2, "non": [2, 4, 6, 20], "group": [2, 3, 4], "t": 2, "u": [2, 13, 14], "p": [2, 19], "percentag": [2, 5, 10], "length": [2, 3, 4, 6], "tmd": [2, 3, 4], "explan": [2, 3, 14], "first": [2, 3, 4, 10, 14, 20, 26], "terminu": [2, 3, 4], "jmd": [2, 3, 4], "c": [2, 3, 4, 13, 19], "extend": [2, 3, 4, 14, 24], "termin": [2, 3, 4], "should": [2, 3, 4, 5, 7, 14, 24], "longer": 2, "than": 2, "check": [2, 14], "remain": [2, 14], "after": 2, "maximum": [2, 4, 5, 6], "standard": [2, 24], "deviat": 2, "overlap": 2, "cpu": 2, "multiprocess": 2, "automat": [2, 3, 5, 14], "df_feat": [2, 3, 4, 22], "uniqu": [2, 3], "statist": [2, 3], "n_feature_inform": [2, 3], "eleven": 2, "column": [2, 3, 4, 5, 6, 11, 14], "includ": [2, 4, 7, 10, 11, 14], "id": [2, 4], "result": 2, "rank": 2, "11": [2, 3, 11], "split": [2, 4, 22], "subcategori": [2, 3], "sub": 2, "scale_nam": [2, 3], "abs_auc": [2, 3], "absolut": 2, "adjust": [2, 3, 12], "auc": 2, "abs_mean_dif": 2, "mean": [2, 3], "differ": [2, 3, 4, 11, 20, 22, 26], "std_test": [2, 3], "std_ref": 2, "p_val": 2, "mann_whitnei": 2, "ttest_indep": 2, "p_val_fdr_bh": 2, "benjamini": 2, "hochberg": 2, "fdr": 2, "correct": 2, "get": [2, 4, 8], "evalu": [2, 7, 14, 20], "condit": [3, 4], "jmd_m_len": [3, 4], "profil": [3, 9, 10, 13, 17, 18, 23], "y": [3, 11, 12], "val_col": 3, "mean_dif": 3, "val_typ": 3, "count": [3, 7], "normal": [3, 11, 20], "figsiz": 3, "7": [3, 4, 5, 12], "titl": [3, 11], "title_kw": 3, "dict_color": [3, 9, 10, 11], "edge_color": 3, "bar_width": 3, "75": 3, "add_jmd_tmd": 3, "jmd_n_seq": 3, "tmd_seq": 3, "jmd_c_seq": 3, "tmd_color": 3, "mediumspringgreen": 3, "jmd_color": 3, "blue": [3, 11], "tmd_seq_color": 3, "black": [3, 14], "jmd_seq_color": 3, "white": 3, "seq_siz": 3, "tmd_jmd_fontsiz": 3, "xtick_siz": 3, "xtick_width": 3, "xtick_length": 3, "xticks_po": 3, "ytick_siz": 3, "ytick_width": 3, "ytick_length": 3, "ylim": 3, "highlight_tmd_area": 3, "highlight_alpha": 3, "15": [3, 4], "grid": [3, 12], "grid_axi": [3, 12], "both": [3, 12], "add_legend_cat": 3, "legend_kw": 3, "shap_plot": 3, "kwarg": [3, 4, 11], "plot": [3, 9, 10, 11, 12, 13, 14], "instanc": 3, "avail": [3, 13, 16, 19], "specifi": [3, 4, 5, 9, 10, 12, 14], "check_value_typ": 3, "tupl": [3, 10], "size": [3, 4, 8, 10, 11, 12], "custom": [3, 11, 12], "appear": [3, 12], "map": [3, 4, 10, 11], "color": [3, 9, 10, 11], "edg": [3, 11, 14], "bar": [3, 9, 10], "width": [3, 11], "line": [3, 11], "annot": 3, "font": [3, 8, 11, 12], "tick": [3, 12], "axi": [3, 12], "limit": 3, "highlight": 3, "area": 3, "alpha": 3, "ad": 3, "drawn": 3, "legend": [3, 11], "shap": [3, 10, 14], "shaplei": 3, "addit": [3, 4, 5, 11, 12, 20, 26], "gener": [3, 4, 10, 12, 14, 17, 19, 24], "other": [3, 7, 14], "intern": 3, "librari": [3, 12, 14], "ax": [3, 11], "matplotlib": [3, 11, 12, 14], "heatmap": [3, 9, 10], "8": [3, 4, 5, 14], "vmin": 3, "vmax": 3, "grid_on": 3, "cmap": [3, 9, 10], "rdbu_r": 3, "cmap_n_color": 3, "cbar_kw": 3, "facecolor_dark": [3, 10], "add_importance_map": 3, "cbar_pct": 3, "featuremap": 3, "versu": 3, "wrapper": [3, 13, 14, 17, 18], "seaborn": [3, 10, 12, 14], "level": [3, 6, 13, 14, 18, 20, 21], "e": [3, 4, 9, 10, 12, 13, 14, 17, 18, 20, 24, 26], "g": [3, 4, 9, 10, 12, 13, 14, 17, 18, 20, 24, 26], "protein": [3, 4, 6, 13, 14, 17, 18, 19, 22, 23, 24], "shown": 3, "feat_impact": 3, "displai": 3, "sum": 3, "std": 3, "aggreg": 3, "positions_onli": 3, "across": [3, 14], "recommend": [3, 5, 14], "when": [3, 5], "emphas": [3, 14], "fewer": 3, "value_typ": 3, "height": 3, "figur": 3, "inch": 3, "pyplot": [3, 11], "anchor": [3, 11], "colormap": 3, "infer": [3, 14], "seismic": 3, "space": [3, 5, 10, 11], "impact": 3, "discret": 3, "diverg": 3, "sequenti": 3, "kei": [3, 14], "colorbar": 3, "under": [3, 14], "depicet": 3, "depict": 3, "jmd_n": [3, 4], "jmd_c": [3, 4], "point": [3, 11], "set_xticklabel": 3, "widht": 3, "tick_param": 3, "classif": [3, 7, 13, 18, 20, 24, 26], "pcolormesh": 3, "effect": [3, 14, 24], "onli": [3, 6, 7, 14, 20, 24, 26], "align": [3, 11], "applic": 3, "see": [3, 6, 14, 20], "document": [3, 20, 26], "detail": [3, 6, 11, 13, 14, 16], "cpp": [3, 4, 10, 13, 16, 17, 18, 22, 27], "code": [3, 10], "update_seq_s": 3, "retriev": [4, 9, 10], "compon": [4, 5, 20], "continu": 4, "subset": [4, 20], "domain": [4, 6, 20, 26], "transmembran": 4, "membran": [4, 20], "principl": [4, 13], "distinct": [4, 13, 14, 17, 18], "segment": 4, "pattern": 4, "properti": [4, 20, 26], "express": 4, "present": 4, "realiz": 4, "over": 4, "valid": [4, 14], "tmd_e": 4, "tmd_n": 4, "tmd_c": 4, "ext_c": 4, "ext_n": 4, "tmd_jmd": 4, "jmd_n_tmd_n": 4, "tmd_c_jmd_c": 4, "ext_n_tmd_n": 4, "tmd_c_ext_c": 4, "get_df_part": 4, "df_seq": [4, 5, 6, 22], "list_part": 4, "all_part": 4, "datafran": 4, "compris": 4, "tmd_start": 4, "tmd_stop": 4, "string": [4, 10], "len": 4, "must": 4, "lenght": 4, "resp": 4, "extra": 4, "possibl": [4, 24], "found": [4, 7, 14], "sf": 4, "dom_gs": 4, "n_split_min": 4, "n_split_max": 4, "steps_pattern": 4, "n_min": 4, "n_max": 4, "len_max": 4, "steps_periodicpattern": 4, "periodicpattern": 4, "greater": 4, "greatest": 4, "whole": [4, 6], "specfii": 4, "smallest": 4, "integ": 4, "6": 4, "vari": 4, "paramt": 4, "argumetn": 4, "get_featur": 4, "load_scal": [4, 13, 18, 20], "combin": [4, 14], "form": 4, "feat_matrix": 4, "n_job": 4, "return_label": 4, "pd": [4, 5, 14], "seri": 4, "job": 4, "parallel": 4, "spars": 4, "feat_nam": 4, "convert": 4, "depend": 4, "last": 4, "step1": 4, "step2": 4, "add_feat_valu": 4, "dict_scal": 4, "convent": 4, "letter": 4, "feature_valu": 4, "n_part": 4, "ha": [4, 14], "structur": [4, 19], "th": 4, "n_split": 4, "p1": 4, "p2": 4, "pn": 4, "end": 4, "odd": 4, "even": 4, "give": 4, "add_dif": 4, "sample_nam": 4, "ref_group": 4, "add_posit": 4, "part_split": 4, "feat_posit": 4, "total": [4, 5], "n_compon": 5, "pca_kwarg": 5, "determinist": [5, 13, 17, 18], "unlabel": [5, 13, 17, 18, 20, 24, 26], "offer": [5, 14], "approach": [5, 24], "pu": [5, 13, 17, 18, 20, 26], "princip": [5, 20], "analysi": [5, 6, 7, 13, 14, 17, 18], "pca": 5, "dimension": [5, 19], "pc": [5, 20], "iter": 5, "reliabl": [5, 14], "These": [5, 14, 24], "those": 5, "distant": 5, "altern": [5, 24], "also": [5, 14, 20], "distanc": 5, "manhattan": 5, "cosin": 5, "80": 5, "cover": 5, "varianc": 5, "identif": [5, 19], "datapoint": 5, "inspir": [5, 14], "techniqu": [5, 24], "an": [5, 6, 13, 14, 16, 19, 20, 26], "theoret": 5, "high": [5, 19], "n_neg": 5, "label_po": 5, "name_neg": 5, "rel_neg": 5, "col_class": 5, "newli": 5, "updat": [5, 14], "new": [5, 14], "store": 5, "Will": 5, "dure": 5, "initi": 5, "small": [5, 13, 14, 17, 18, 25], "datafor": 5, "conta": 5, "po": 5, "unl": 5, "numpi": [5, 14], "np": 5, "atgc": 5, "gcta": 5, "actg": 5, "tacg": 5, "mode": 5, "modifi": [5, 6, 12, 22], "dpul": 5, "info": 6, "random": 6, "non_canonical_aa": 6, "remov": [6, 12], "min_len": 6, "max_len": 6, "aa_window_s": 6, "9": [6, 14], "load": [6, 7, 13, 14, 18], "benchmark": [6, 13, 18], "categor": 6, "dom": [6, 20, 26], "seq": [6, 20, 26], "overview": [6, 14], "tabl": [6, 14], "breimann23a": [6, 7, 19, 20], "per": 6, "liter": 6, "keep": 6, "gap": [6, 10], "canon": 6, "dont": 6, "replac": 6, "symbol": 6, "window": 6, "aa_": 6, "seq_amylo": [6, 20, 26], "just_aaindex": 7, "unclassified_in": 7, "aaontologi": [7, 13, 16, 18, 19, 20], "thorough": 7, "residu": [7, 19, 20, 26], "scales_raw": [7, 20], "scales_cat": 7, "scales_pc": [7, 20], "top60": [7, 20], "top60_ev": [7, 20], "relev": 7, "aaindex": [7, 19], "current": 8, "ut": 8, "plot_set": 8, "dict_scale_cat": [9, 10], "cppplot": [9, 10, 14], "respect": [9, 10, 13, 14, 16], "n_color": 10, "color_po": 10, "color_neg": 10, "color_cent": 10, "input": [10, 14, 22], "hex": 10, "pct_gap": 10, "pct_center": 10, "palett": 10, "feat": 10, "ggplot": 10, "datagroup": 10, "dark": 10, "face": 10, "rgb": 10, "hl": 10, "husl": 10, "xkcd": 10, "interpret": [10, 13, 14, 16, 17, 18, 19, 23], "latter": 10, "rang": 10, "sn": 10, "color_palett": 10, "light_palett": 10, "lighter": 10, "handl": 11, "list_cat": 11, "ncol": 11, "fontsiz": 11, "weight": [11, 19], "lw": 11, "edgecolor": 11, "return_handl": 11, "loc": 11, "upper": 11, "left": 11, "labelspac": 11, "columnspac": 11, "fontsize_legend": 11, "title_align_left": 11, "fontsize_weight": 11, "customiz": 11, "attach": 11, "item": 11, "coordin": 11, "text": [11, 12], "locat": [11, 20], "vertic": 11, "horizont": 11, "marker": 11, "directli": [11, 14], "finer": 11, "control": 11, "how": 11, "line2d": 11, "cat1": 11, "red": 11, "cat2": 11, "o": 11, "fig_format": 12, "pdf": 12, "font_scal": 12, "arial": 12, "change_s": 12, "weight_bold": 12, "adjust_el": 12, "short_tick": 12, "no_tick": 12, "no_ticks_i": 12, "short_ticks_i": 12, "no_ticks_x": 12, "short_ticks_x": 12, "configur": 12, "visual": [12, 14], "variou": [12, 14, 22], "file": [12, 14], "save": 12, "make": [12, 14], "visibl": 12, "choos": 12, "san": 12, "serif": 12, "verdana": 12, "helvetica": 12, "dejavu": 12, "element": 12, "bold": 12, "layout": 12, "short": 12, "mark": 12, "global": 12, "python": [13, 14, 17, 18], "predict": [13, 14, 17, 18, 19, 20, 23, 24, 26], "aaclust": [13, 16, 17, 18, 19, 27], "compar": [13, 17, 18, 22, 23], "engin": [13, 14, 17, 18, 23], "dpulearn": [13, 16, 17, 18, 27], "train": [13, 14, 17, 18, 24], "unbalanc": [13, 14, 17, 18, 25], "moreov": [13, 18], "load_data": [13, 18], "depth": [13, 18], "pypi": 13, "conda": [13, 14], "forg": 13, "pip": [13, 14], "introduct": 13, "contribut": 13, "tutori": [13, 14], "api": 13, "explain": [13, 14, 19], "ai": [13, 14, 19], "perturb": [13, 24], "util": [13, 14], "search": 13, "your": [13, 14, 16], "work": [13, 16], "pleas": [13, 14, 16], "cite": [13, 16], "breimann23b": [13, 16, 19, 20, 21], "_": [13, 16], "breimann": [13, 16, 19], "kamp": [13, 16], "steiner": [13, 16], "frishman": [13, 16], "2023": [13, 16], "ontologi": [13, 16, 19], "machin": [13, 14, 16, 19, 24], "biorxiv": [13, 16, 19], "welcom": 14, "thank": 14, "we": 14, "open": 14, "project": [14, 20, 26], "focus": 14, "involv": 14, "invalu": 14, "made": 14, "wai": 14, "suggest": 14, "github": 14, "issu": 14, "tracker": 14, "submit": 14, "improv": [14, 19], "particip": 14, "discuss": 14, "newcom": 14, "tackl": 14, "good": 14, "email": 14, "stephanbreimann": 14, "gmail": 14, "com": 14, "question": 14, "establish": 14, "comprehens": 14, "toolkit": [14, 22], "robust": 14, "life": [14, 24, 25], "scienc": [14, 24, 25], "integr": [14, 19], "seamlessli": 14, "flexibl": 14, "interoper": 14, "packag": 14, "biopython": 14, "reimplement": 14, "solut": 14, "ignor": 14, "biolog": [14, 17, 24], "context": 14, "relianc": 14, "opaqu": 14, "box": 14, "empir": 14, "insight": 14, "cut": 14, "fair": 14, "account": 14, "transpar": 14, "re": [14, 19], "commit": 14, "divers": 14, "aspect": 14, "causal": 14, "minim": 14, "reproduc": 14, "mre": 14, "least": 14, "amount": 14, "demonstr": 14, "self": 14, "ensur": 14, "necessari": 14, "confirm": 14, "replic": 14, "guidelin": 14, "here": [14, 20, 26], "To": 14, "git": 14, "breimanntool": 14, "master": 14, "repositori": 14, "your_usernam": 14, "navig": 14, "folder": 14, "up": 14, "cd": 14, "isol": 14, "aanalysi": 14, "activ": 14, "poetri": 14, "pytest": 14, "hypothesi": 14, "execut": 14, "case": 14, "directori": 14, "substanti": 14, "minor": 14, "typo": 14, "concis": 14, "descript": [14, 20], "clear": 14, "branch": 14, "fix": 14, "readm": 14, "date": 14, "readthedoc": 14, "crucial": 14, "modif": 14, "thei": 14, "render": 14, "correctli": 14, "strive": 14, "consist": [14, 17], "interfac": 14, "well": 14, "organ": 14, "codebas": 14, "standalon": 14, "focu": 14, "special": 14, "task": [14, 24], "carri": 14, "out": 14, "complet": 14, "process": 14, "fulfil": 14, "purpos": 14, "being": 14, "implement": 14, "inherit": 14, "supplementari": 14, "accordingli": 14, "suffix": 14, "support": 14, "semi": 14, "strictli": 14, "adher": 14, "aforement": 14, "primari": 14, "_util": 14, "_utils_const": 14, "py": 14, "modular": 14, "easili": 14, "therefor": 14, "flat": 14, "hierarchi": 14, "program": 14, "outlin": 14, "softwar": 14, "user": 14, "friendli": 14, "hint": 14, "enhanc": 14, "propos": 14, "pep": 14, "484": 14, "book": 14, "error": 14, "messag": 14, "docstr": 14, "257": 14, "guid": 14, "markup": 14, "languag": 14, "restructuredtext": 14, "rst": 14, "primer": 14, "cheat": 14, "sheet": 14, "restructuretext": 14, "cheatsheet": 14, "sphinx": 14, "autodoc": 14, "inclus": 14, "napoleon": 14, "extens": 14, "conf": 14, "four": 14, "bird": 14, "ey": 14, "view": [14, 24, 27], "background": 14, "reflect": 14, "close": 14, "essenti": 14, "medium": 14, "tabular": 14, "critic": 14, "go": 14, "_build": 14, "browser": 14, "citat": 16, "wa": 17, "develop": 17, "typic": 17, "et": 19, "al": 19, "2023a": 19, "2023b": 19, "breimann23c": [19, 20, 24, 25], "2023c": 19, "chart": 19, "\u03b3": 19, "secretas": [19, 20], "substrat": [19, 20], "cheng06": [19, 20], "cheng": 19, "2006": 19, "larg": 19, "disulphid": 19, "bridg": [19, 20], "kernel": 19, "recurs": 19, "neural": 19, "network": 19, "graph": 19, "match": 19, "struct": 19, "funct": 19, "kawashima08": [19, 20], "kawashima": 19, "2008": 19, "aid": 19, "databas": 19, "report": 19, "nucleic": 19, "magnan09": [19, 20], "magnan": 19, "randal": 19, "baldi": 19, "2009": 19, "solpro": [19, 20], "accur": 19, "solubl": [19, 20], "bioinformat": 19, "galiez16": [19, 20], "galiez": 19, "2016": 19, "viralpro": [19, 20], "viral": 19, "capsid": [19, 20], "tail": [19, 20], "song18": [19, 20], "song": 19, "2018": 19, "prosper": [19, 20], "throughput": 19, "cleavag": [19, 20], "site": [19, 20], "90": 19, "proteas": 19, "accuraci": 19, "shen19": [19, 20], "shen": 19, "2019": 19, "subcellular": [19, 20], "local": 19, "evolutionari": 19, "chou": 19, "pseaac": 19, "j": 19, "theor": 19, "biol": 19, "tang20": [19, 20], "tang": 19, "2020": 19, "idp": [19, 20], "seq2seq": [19, 20], "intrins": [19, 20], "disord": [19, 20], "region": [19, 20], "teng21": [19, 20], "teng": 19, "2021": 19, "rerf": [19, 20], "pred": [19, 20], "amyloidogen": [19, 20], "pseudo": 19, "composit": 19, "tripeptid": 19, "bmc": 19, "yang21": [19, 20], "yang": 19, "granular": 19, "multipl": 19, "rna": [19, 20], "bind": [19, 20], "appl": 19, "chronolog": [20, 26], "histori": [20, 26], "1_overview_benchmark": 20, "2_overview_scal": 20, "abbrevi": [20, 26], "aa_ldr": [20, 26], "dom_gsec": [20, 26], "some": [20, 26], "version": [20, 26], "dataset_nam": [20, 26], "_pu": [20, 26], "dom_gsec_pu": [20, 26], "predictor": 20, "aa_caspase3": 20, "233": 20, "185605": 20, "705": 20, "184900": 20, "caspas": 20, "adjac": 20, "aa_furin": 20, "71": 20, "59003": 20, "163": 20, "58840": 20, "furin": 20, "342": 20, "118248": 20, "35469": 20, "82779": 20, "long": 20, "ldr": 20, "aa_mmp2": 20, "573": 20, "312976": 20, "2416": 20, "310560": 20, "metallopeptidas": 20, "mmp2": 20, "aa_rnabind": 20, "221": 20, "55001": 20, "6492": 20, "48509": 20, "gmksvm": 20, "ru": 20, "rbp60": 20, "aa_sa": 20, "101082": 20, "84523": 20, "solvent": 20, "sa": 20, "expos": 20, "buri": 20, "1414": 20, "8484": 20, "511": 20, "903": 20, "amyloidognen": 20, "seq_capsid": 20, "7935": 20, "3364680": 20, "3864": 20, "4071": 20, "capdsid": 20, "seq_disulfid": 20, "2547": 20, "614470": 20, "897": 20, "1650": 20, "dipro": 20, "disulfid": 20, "ss": 20, "bond": 20, "seq_loc": 20, "1835": 20, "732398": 20, "1045": 20, "790": 20, "nan": 20, "cytoplasm": 20, "v": 20, "plasma": 20, "seq_solubl": 20, "17408": 20, "4432269": 20, "8704": 20, "insolubl": 20, "seq_tail": 20, "6668": 20, "2671690": 20, "2574": 20, "4094": 20, "126": 20, "92964": 20, "63": 20, "gamma": 20, "694": 20, "494524": 20, "unknown": 20, "statu": 20, "min": 20, "max": 20, "586": 20, "raw": 20, "scales_classif": 20, "compress": 20, "60": 20, "introduc": 21, "togeth": 22, "central": 23, "platform": 23, "novel": 23, "everywher": [24, 25], "In": 24, "binari": 24, "setup": 24, "augment": 24, "smote": 24, "artifici": 24, "Such": 24, "veri": 24, "popular": 24, "deep": 24, "imag": 24, "recognit": 24, "feasibl": 24, "becaus": 24, "slight": 24, "mutat": 24, "alter": 24, "dramat": 24, "often": 24, "great": 24, "quantiti": 24, "besid": 24, "distinguish": 24, "subfield": 24}, "objects": {"aaanalysis": [[1, 0, 1, "", "AAclust"], [2, 0, 1, "", "CPP"], [3, 0, 1, "", "CPPPlot"], [4, 0, 1, "", "SequenceFeature"], [5, 0, 1, "", "dPULearn"], [6, 3, 1, "", "load_dataset"], [7, 3, 1, "", "load_scales"], [8, 3, 1, "", "plot_gcfs"], [9, 3, 1, "", "plot_get_cdict"], [10, 3, 1, "", "plot_get_cmap"], [11, 3, 1, "", "plot_set_legend"], [12, 3, 1, "", "plot_settings"]], "aaanalysis.AAclust": [[1, 1, 1, "", "__init__"], [1, 2, 1, "", "center_labels_"], [1, 2, 1, "", "centers_"], [1, 1, 1, "", "cluster_naming"], [1, 1, 1, "", "correlation"], [1, 1, 1, "", "eval"], [1, 1, 1, "", "fit"], [1, 1, 1, "", "get_cluster_centers"], [1, 1, 1, "", "get_cluster_medoids"], [1, 2, 1, "", "labels_"], [1, 2, 1, "", "medoid_ind_"], [1, 2, 1, "", "medoid_labels_"], [1, 2, 1, "", "medoids_"], [1, 2, 1, "", "n_clusters"]], "aaanalysis.CPP": [[2, 1, 1, "", "__init__"], [2, 1, 1, "", "eval"], [2, 1, 1, "", "run"]], "aaanalysis.CPPPlot": [[3, 1, 1, "", "__init__"], [3, 1, 1, "", "heatmap"], [3, 1, 1, "", "profile"], [3, 1, 1, "", "update_seq_size"]], "aaanalysis.SequenceFeature": [[4, 1, 1, "", "__init__"], [4, 1, 1, "", "add_dif"], [4, 1, 1, "", "add_feat_value"], [4, 1, 1, "", "add_position"], [4, 1, 1, "", "feat_matrix"], [4, 1, 1, "", "feat_names"], [4, 1, 1, "", "get_df_parts"], [4, 1, 1, "", "get_features"], [4, 1, 1, "", "get_split_kws"]], "aaanalysis.dPULearn": [[5, 1, 1, "", "__init__"], [5, 1, 1, "", "eval"], [5, 1, 1, "", "fit"], [5, 2, 1, "", "labels_"]]}, "objtypes": {"0": "py:class", "1": "py:method", "2": "py:attribute", "3": "py:function"}, "objnames": {"0": ["py", "class", "Python class"], "1": ["py", "method", "Python method"], "2": ["py", "attribute", "Python attribute"], "3": ["py", "function", "Python function"]}, "titleterms": {"api": 0, "data": [0, 22, 24, 27], "featur": [0, 27], "engin": [0, 27], "pu": [0, 24], "learn": [0, 24], "explain": [0, 25], "ai": [0, 25], "perturb": 0, "plot": 0, "util": 0, "aaanalysi": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13], "aaclust": 1, "note": [1, 2, 4, 5, 12], "cpp": [2, 23], "cppplot": 3, "exampl": [3, 4, 5, 6, 11, 12, 13], "sequencefeatur": 4, "dpulearn": 5, "load_dataset": 6, "load_scal": 7, "plot_gcf": 8, "plot_get_cdict": 9, "plot_get_cmap": 10, "plot_set_legend": 11, "plot_set": 12, "welcom": 13, "document": [13, 14], "instal": [13, 14], "overview": [13, 20, 26], "refer": [13, 19], "indic": 13, "tabl": [13, 20, 26], "citat": 13, "contribut": 14, "introduct": [14, 17], "vision": 14, "object": 14, "non": 14, "goal": 14, "principl": [14, 28], "bug": 14, "report": 14, "latest": 14, "version": 14, "local": 14, "develop": 14, "environ": 14, "fork": 14, "clone": 14, "depend": 14, "run": 14, "unit": 14, "test": 14, "pull": 14, "request": 14, "preview": 14, "chang": 14, "name": 14, "convent": 14, "class": 14, "templat": 14, "function": 14, "method": 14, "code": 14, "philosophi": 14, "style": 14, "layer": 14, "build": 14, "doc": 14, "workflow": 17, "algorithm": 19, "dataset": [19, 20, 26], "benchmark": [19, 20, 26], "us": [19, 23], "case": 19, "further": 19, "inform": 19, "protein": [20, 26, 27], "amino": [20, 21, 26], "acid": [20, 21, 26], "scale": [20, 21, 26], "aaontologi": 21, "classif": 21, "flow": 22, "enri": 22, "point": 22, "identifi": 23, "physicochem": 23, "signatur": 23, "from": 24, "unbalanc": 24, "small": 24, "what": [24, 25], "i": [24, 25], "sequenc": 25, "level": 25, "tutori": 27, "quick": 27, "start": 27, "load": 27, "redund": 27, "reduct": 27, "identif": 27, "neg": 27, "predict": 27, "usag": 28}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 8, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.intersphinx": 1, "sphinx": 57}, "alltitles": {"API": [[0, "api"]], "Data": [[0, "data"]], "Feature Engineering": [[0, "feature-engineering"]], "PU Learning": [[0, "pu-learning"]], "Explainable AI": [[0, "explainable-ai"]], "Perturbation": [[0, "perturbation"]], "Plot Utilities": [[0, "plot-utilities"]], "aaanalysis.AAclust": [[1, "aaanalysis-aaclust"]], "Notes": [[1, null], [2, null], [2, null], [4, null], [4, null], [4, null], [4, null], [4, null], [5, null], [5, null], [12, null]], "aaanalysis.CPP": [[2, "aaanalysis-cpp"]], "aaanalysis.CPPPlot": [[3, "aaanalysis-cppplot"]], "Examples": [[3, null], [4, null], [4, null], [5, null], [6, null], [11, null], [12, null]], "aaanalysis.SequenceFeature": [[4, "aaanalysis-sequencefeature"]], "aaanalysis.dPULearn": [[5, "aaanalysis-dpulearn"]], "aaanalysis.load_dataset": [[6, "aaanalysis-load-dataset"]], "aaanalysis.load_scales": [[7, "aaanalysis-load-scales"]], "aaanalysis.plot_gcfs": [[8, "aaanalysis-plot-gcfs"]], "aaanalysis.plot_get_cdict": [[9, "aaanalysis-plot-get-cdict"]], "aaanalysis.plot_get_cmap": [[10, "aaanalysis-plot-get-cmap"]], "aaanalysis.plot_set_legend": [[11, "aaanalysis-plot-set-legend"]], "aaanalysis.plot_settings": [[12, "aaanalysis-plot-settings"]], "Welcome to the AAanalysis documentation": [[13, "welcome-to-the-aaanalysis-documentation"]], "Install": [[13, "install"]], "OVERVIEW": [[13, null]], "EXAMPLES": [[13, null]], "REFERENCES": [[13, null]], "Indices and tables": [[13, "indices-and-tables"]], "Citation": [[13, "citation"]], "Contributing": [[14, "contributing"]], "Introduction": [[14, "introduction"], [17, "introduction"]], "Vision": [[14, "vision"]], "Objectives": [[14, "objectives"]], "Non-goals": [[14, "non-goals"]], "Principles": [[14, "principles"]], "Bug Reports": [[14, "bug-reports"]], "Installation": [[14, "installation"]], "Latest Version": [[14, "latest-version"]], "Local Development Environment": [[14, "local-development-environment"]], "Fork and Clone": [[14, "fork-and-clone"]], "Install Dependencies": [[14, "install-dependencies"]], "Run Unit Tests": [[14, "run-unit-tests"]], "Pull Requests": [[14, "pull-requests"]], "Preview Changes": [[14, "preview-changes"]], "Documentation": [[14, "documentation"]], "Naming Conventions": [[14, "naming-conventions"]], "Class Templates": [[14, "class-templates"]], "Function and Method Naming": [[14, "function-and-method-naming"]], "Code Philosophy": [[14, "code-philosophy"]], "Documentation Style": [[14, "documentation-style"]], "Documentation Layers": [[14, "documentation-layers"]], "Building the Docs": [[14, "building-the-docs"]], "Workflow": [[17, "workflow"]], "References": [[19, "references"]], "Algorithms": [[19, "algorithms"]], "Datasets and Benchmarks": [[19, "datasets-and-benchmarks"]], "Use Cases": [[19, "use-cases"]], "Further Information": [[19, "further-information"]], "Tables": [[20, "tables"], [26, "tables"]], "Overview Table": [[20, "overview-table"], [26, "overview-table"]], "Protein benchmark datasets": [[20, "protein-benchmark-datasets"], [26, "protein-benchmark-datasets"]], "Amino acid scale datasets": [[20, "amino-acid-scale-datasets"], [26, "amino-acid-scale-datasets"]], "AAontology: Classification of amino acid scales": [[21, "aaontology-classification-of-amino-acid-scales"]], "Data Flow and Enry Points": [[22, "data-flow-and-enry-points"]], "Identifying Physicochemical Signatures using CPP": [[23, "identifying-physicochemical-signatures-using-cpp"]], "Learning from unbalanced and small data": [[24, "learning-from-unbalanced-and-small-data"]], "What is PU learning?": [[24, "what-is-pu-learning"]], "Explainable AI at Sequence Level": [[25, "explainable-ai-at-sequence-level"]], "What is explainable AI?": [[25, "what-is-explainable-ai"]], "Tutorials": [[27, "tutorials"]], "Quick start": [[27, "quick-start"]], "Data loading": [[27, "data-loading"]], "Redundancy-reduction": [[27, "redundancy-reduction"]], "Feature engineering": [[27, "feature-engineering"]], "Identification of negatives": [[27, "identification-of-negatives"]], "Protein prediction": [[27, "protein-prediction"]], "Usage Principles": [[28, "usage-principles"]]}, "indexentries": {"aaclust (class in aaanalysis)": [[1, "aaanalysis.AAclust"]], "__init__() (aaanalysis.aaclust method)": [[1, "aaanalysis.AAclust.__init__"]], "center_labels_ (aaanalysis.aaclust attribute)": [[1, "aaanalysis.AAclust.center_labels_"]], "centers_ (aaanalysis.aaclust attribute)": [[1, "aaanalysis.AAclust.centers_"]], "cluster_naming() (aaanalysis.aaclust method)": [[1, "aaanalysis.AAclust.cluster_naming"]], "correlation() (aaanalysis.aaclust static method)": [[1, "aaanalysis.AAclust.correlation"]], "eval() (aaanalysis.aaclust method)": [[1, "aaanalysis.AAclust.eval"]], "fit() (aaanalysis.aaclust method)": [[1, "aaanalysis.AAclust.fit"]], "get_cluster_centers() (aaanalysis.aaclust static method)": [[1, "aaanalysis.AAclust.get_cluster_centers"]], "get_cluster_medoids() (aaanalysis.aaclust static method)": [[1, "aaanalysis.AAclust.get_cluster_medoids"]], "labels_ (aaanalysis.aaclust attribute)": [[1, "aaanalysis.AAclust.labels_"]], "medoid_ind_ (aaanalysis.aaclust attribute)": [[1, "aaanalysis.AAclust.medoid_ind_"]], "medoid_labels_ (aaanalysis.aaclust attribute)": [[1, "aaanalysis.AAclust.medoid_labels_"]], "medoids_ (aaanalysis.aaclust attribute)": [[1, "aaanalysis.AAclust.medoids_"]], "n_clusters (aaanalysis.aaclust attribute)": [[1, "aaanalysis.AAclust.n_clusters"]], "cpp (class in aaanalysis)": [[2, "aaanalysis.CPP"]], "__init__() (aaanalysis.cpp method)": [[2, "aaanalysis.CPP.__init__"]], "eval() (aaanalysis.cpp static method)": [[2, "aaanalysis.CPP.eval"]], "run() (aaanalysis.cpp method)": [[2, "aaanalysis.CPP.run"]], "cppplot (class in aaanalysis)": [[3, "aaanalysis.CPPPlot"]], "__init__() (aaanalysis.cppplot method)": [[3, "aaanalysis.CPPPlot.__init__"]], "heatmap() (aaanalysis.cppplot method)": [[3, "aaanalysis.CPPPlot.heatmap"]], "profile() (aaanalysis.cppplot method)": [[3, "aaanalysis.CPPPlot.profile"]], "update_seq_size() (aaanalysis.cppplot method)": [[3, "aaanalysis.CPPPlot.update_seq_size"]], "sequencefeature (class in aaanalysis)": [[4, "aaanalysis.SequenceFeature"]], "__init__() (aaanalysis.sequencefeature method)": [[4, "aaanalysis.SequenceFeature.__init__"]], "add_dif() (aaanalysis.sequencefeature static method)": [[4, "aaanalysis.SequenceFeature.add_dif"]], "add_feat_value() (aaanalysis.sequencefeature static method)": [[4, "aaanalysis.SequenceFeature.add_feat_value"]], "add_position() (aaanalysis.sequencefeature static method)": [[4, "aaanalysis.SequenceFeature.add_position"]], "feat_matrix() (aaanalysis.sequencefeature static method)": [[4, "aaanalysis.SequenceFeature.feat_matrix"]], "feat_names() (aaanalysis.sequencefeature static method)": [[4, "aaanalysis.SequenceFeature.feat_names"]], "get_df_parts() (aaanalysis.sequencefeature static method)": [[4, "aaanalysis.SequenceFeature.get_df_parts"]], "get_features() (aaanalysis.sequencefeature method)": [[4, "aaanalysis.SequenceFeature.get_features"]], "get_split_kws() (aaanalysis.sequencefeature static method)": [[4, "aaanalysis.SequenceFeature.get_split_kws"]], "__init__() (aaanalysis.dpulearn method)": [[5, "aaanalysis.dPULearn.__init__"]], "dpulearn (class in aaanalysis)": [[5, "aaanalysis.dPULearn"]], "eval() (aaanalysis.dpulearn method)": [[5, "aaanalysis.dPULearn.eval"]], "fit() (aaanalysis.dpulearn method)": [[5, "aaanalysis.dPULearn.fit"]], "labels_ (aaanalysis.dpulearn attribute)": [[5, "aaanalysis.dPULearn.labels_"]], "load_dataset() (in module aaanalysis)": [[6, "aaanalysis.load_dataset"]], "load_scales() (in module aaanalysis)": [[7, "aaanalysis.load_scales"]], "plot_gcfs() (in module aaanalysis)": [[8, "aaanalysis.plot_gcfs"]], "plot_get_cdict() (in module aaanalysis)": [[9, "aaanalysis.plot_get_cdict"]], "plot_get_cmap() (in module aaanalysis)": [[10, "aaanalysis.plot_get_cmap"]], "plot_set_legend() (in module aaanalysis)": [[11, "aaanalysis.plot_set_legend"]], "plot_settings() (in module aaanalysis)": [[12, "aaanalysis.plot_settings"]]}}) \ No newline at end of file