diff --git a/aaanalysis/_data/benchmarks/INFO_benchmarks.xlsx b/aaanalysis/_data/benchmarks/Overview.xlsx similarity index 100% rename from aaanalysis/_data/benchmarks/INFO_benchmarks.xlsx rename to aaanalysis/_data/benchmarks/Overview.xlsx diff --git a/aaanalysis/data_handling/__pycache__/_load_dataset.cpython-39.pyc b/aaanalysis/data_handling/__pycache__/_load_dataset.cpython-39.pyc index 6c407436..23bda7d7 100644 Binary files a/aaanalysis/data_handling/__pycache__/_load_dataset.cpython-39.pyc and b/aaanalysis/data_handling/__pycache__/_load_dataset.cpython-39.pyc differ diff --git a/aaanalysis/data_handling/_load_dataset.py b/aaanalysis/data_handling/_load_dataset.py index 3c58ba8c..1b58f13f 100644 --- a/aaanalysis/data_handling/_load_dataset.py +++ b/aaanalysis/data_handling/_load_dataset.py @@ -19,16 +19,17 @@ # I Helper Functions # Check functions -def check_name_of_dataset(name="INFO", folder_in=None): +def check_name_of_dataset(name="Overview", folder_in=None): """""" - if name == "INFO": + if name == "Overview": return - list_datasets = [x.split(".")[0] for x in os.listdir(folder_in) if "." in x] + list_datasets = [x.split(".")[0] for x in os.listdir(folder_in) + if "." in x and not x.startswith(".")] if name not in list_datasets: list_aa = [x for x in list_datasets if 'AA' in x] list_seq = [x for x in list_datasets if 'SEQ' in x] list_dom = [x for x in list_datasets if 'DOM' in x] - raise ValueError(f"'name' ({name}) is not valid." + raise ValueError(f"'name' ({name}) is not valid. Chose one of the following:" f"\n Amino acid datasets: {list_aa}" f"\n Sequence datasets: {list_seq}" f"\n Domain datasets: {list_dom}") @@ -119,7 +120,7 @@ def _get_aa_window(df_seq=None, aa_window_size=9): # II Main Functions -def load_dataset(name: str = "INFO", +def load_dataset(name: str = "Overview", n: Optional[int] = None, random: bool = False, non_canonical_aa: Literal["remove", "keep", "gap"] = "remove", @@ -131,7 +132,7 @@ def load_dataset(name: str = "INFO", Loads protein benchmarking datasets. The benchmarks are categorized into amino acid ('AA'), domain ('DOM'), and sequence ('SEQ') level datasets. - By default, an overview table is provided (``name='INFO'``). For in-depth details, refer to [Breimann23a]_. + By default, an overview table is provided (``name='Overview'``). For in-depth details, refer to [Breimann23a]_. Parameters ---------- @@ -159,7 +160,7 @@ def load_dataset(name: str = "INFO", ------- pandas.DataFrame A DataFrame of either the selected sequence dataset (``df_seq``) or - general info on all benchmark datasets (``df_info``). + overview on all benchmark datasets (``df_overview``). Notes ----- @@ -188,8 +189,8 @@ def load_dataset(name: str = "INFO", check_min_max_val(min_len=min_len, max_len=max_len) check_aa_window_size(aa_window_size=aa_window_size) # Load overview table - if name == "INFO": - return ut.read_excel_cached(FOLDER_BENCHMARKS + "INFO_benchmarks.xlsx") + if name == "Overview": + return ut.read_excel_cached(FOLDER_BENCHMARKS + "Overview.xlsx") df = ut.read_csv_cached(FOLDER_BENCHMARKS + name + ".tsv", sep="\t") # Filter data if min_len is not None: diff --git a/aaanalysis/feature_engineering/__pycache__/_aaclust_plot.cpython-39.pyc b/aaanalysis/feature_engineering/__pycache__/_aaclust_plot.cpython-39.pyc index a76b4131..3e58e453 100644 Binary files a/aaanalysis/feature_engineering/__pycache__/_aaclust_plot.cpython-39.pyc and b/aaanalysis/feature_engineering/__pycache__/_aaclust_plot.cpython-39.pyc differ diff --git a/docs/build/doctrees/environment.pickle b/docs/build/doctrees/environment.pickle index f13f7394..70223b11 100644 Binary files a/docs/build/doctrees/environment.pickle and b/docs/build/doctrees/environment.pickle differ diff --git a/docs/build/doctrees/generated/aaanalysis.AAclustPlot.doctree b/docs/build/doctrees/generated/aaanalysis.AAclustPlot.doctree index 7da66e86..4ed23b07 100644 Binary files a/docs/build/doctrees/generated/aaanalysis.AAclustPlot.doctree and b/docs/build/doctrees/generated/aaanalysis.AAclustPlot.doctree differ diff --git a/docs/build/doctrees/generated/aaanalysis.load_dataset.doctree b/docs/build/doctrees/generated/aaanalysis.load_dataset.doctree index d9f26b25..da9673d1 100644 Binary files a/docs/build/doctrees/generated/aaanalysis.load_dataset.doctree and b/docs/build/doctrees/generated/aaanalysis.load_dataset.doctree differ diff --git a/docs/build/html/_downloads/004048c0cbb6684bdb9047282ab71735/aaanalysis-plot_settings-2.pdf b/docs/build/html/_downloads/004048c0cbb6684bdb9047282ab71735/aaanalysis-plot_settings-2.pdf index c8addcb9..ddfa2263 100644 Binary files a/docs/build/html/_downloads/004048c0cbb6684bdb9047282ab71735/aaanalysis-plot_settings-2.pdf and b/docs/build/html/_downloads/004048c0cbb6684bdb9047282ab71735/aaanalysis-plot_settings-2.pdf differ diff --git a/docs/build/html/_downloads/163aacac4bd235c9af7a62d7b4d0c89f/aaanalysis-plot_get_cdict-1.pdf b/docs/build/html/_downloads/163aacac4bd235c9af7a62d7b4d0c89f/aaanalysis-plot_get_cdict-1.pdf index e61bd6d3..cd93d28b 100644 Binary files a/docs/build/html/_downloads/163aacac4bd235c9af7a62d7b4d0c89f/aaanalysis-plot_get_cdict-1.pdf and b/docs/build/html/_downloads/163aacac4bd235c9af7a62d7b4d0c89f/aaanalysis-plot_get_cdict-1.pdf differ diff --git a/docs/build/html/_downloads/1f3abea1675a65bb341756c52c9927f4/aaanalysis-plot_gcfs-1.pdf b/docs/build/html/_downloads/1f3abea1675a65bb341756c52c9927f4/aaanalysis-plot_gcfs-1.pdf index 4a3b6e47..a38292fd 100644 Binary files a/docs/build/html/_downloads/1f3abea1675a65bb341756c52c9927f4/aaanalysis-plot_gcfs-1.pdf and b/docs/build/html/_downloads/1f3abea1675a65bb341756c52c9927f4/aaanalysis-plot_gcfs-1.pdf differ diff --git a/docs/build/html/_downloads/72c2e4be500ecf10c85a4e6f81c365fc/aaanalysis-plot_legend-1.pdf b/docs/build/html/_downloads/72c2e4be500ecf10c85a4e6f81c365fc/aaanalysis-plot_legend-1.pdf index 7562902d..28562a1c 100644 Binary files a/docs/build/html/_downloads/72c2e4be500ecf10c85a4e6f81c365fc/aaanalysis-plot_legend-1.pdf and b/docs/build/html/_downloads/72c2e4be500ecf10c85a4e6f81c365fc/aaanalysis-plot_legend-1.pdf differ diff --git a/docs/build/html/_downloads/795a736e5af756908120c8bda412fd28/aaanalysis-plot_get_cmap-1.pdf b/docs/build/html/_downloads/795a736e5af756908120c8bda412fd28/aaanalysis-plot_get_cmap-1.pdf index 75829d12..1d33e4a7 100644 Binary files a/docs/build/html/_downloads/795a736e5af756908120c8bda412fd28/aaanalysis-plot_get_cmap-1.pdf and b/docs/build/html/_downloads/795a736e5af756908120c8bda412fd28/aaanalysis-plot_get_cmap-1.pdf differ diff --git a/docs/build/html/_downloads/88d7f3f7cb5a284c0bfaa377fb4ce1d8/aaanalysis-plot_get_clist-1.pdf b/docs/build/html/_downloads/88d7f3f7cb5a284c0bfaa377fb4ce1d8/aaanalysis-plot_get_clist-1.pdf index fd37155f..1b940746 100644 Binary files a/docs/build/html/_downloads/88d7f3f7cb5a284c0bfaa377fb4ce1d8/aaanalysis-plot_get_clist-1.pdf and b/docs/build/html/_downloads/88d7f3f7cb5a284c0bfaa377fb4ce1d8/aaanalysis-plot_get_clist-1.pdf differ diff --git a/docs/build/html/_downloads/fb29bffb69140db4f68c4eb913c6f7d3/aaanalysis-plot_settings-1.pdf b/docs/build/html/_downloads/fb29bffb69140db4f68c4eb913c6f7d3/aaanalysis-plot_settings-1.pdf index d454887d..3abf57ee 100644 Binary files a/docs/build/html/_downloads/fb29bffb69140db4f68c4eb913c6f7d3/aaanalysis-plot_settings-1.pdf and b/docs/build/html/_downloads/fb29bffb69140db4f68c4eb913c6f7d3/aaanalysis-plot_settings-1.pdf differ diff --git a/docs/build/html/generated/aaanalysis.AAclustPlot.html b/docs/build/html/generated/aaanalysis.AAclustPlot.html index 778bf6a4..cde8717f 100644 --- a/docs/build/html/generated/aaanalysis.AAclustPlot.html +++ b/docs/build/html/generated/aaanalysis.AAclustPlot.html @@ -129,7 +129,7 @@
Bases: object
Plot results of AAclust analysis.
Dimensionality reduction is performed for visualization using decomposition models such as @@ -150,7 +150,7 @@
Evaluates and plots n_clusters
and clustering metrics BIC
, CH
, and SC
for the provided data.
The clustering evaluation metrics (BIC, CH, and SC) are ranked by the average of their independent rankings.
PCA plot of clustering with centers highlighted
:type palette: Optional
[ListedColormap
]
:param palette: list of RGB tuples or matplotlib.colors.ListedColormap
PCA plot of clustering with medoids highlighted
Heatmap for correlation matrix with colored sidebar to label clusters.
Loads protein benchmarking datasets.
The benchmarks are categorized into amino acid (‘AA’), domain (‘DOM’), and sequence (‘SEQ’) level datasets.
-By default, an overview table is provided (name='INFO'
). For in-depth details, refer to [Breimann23a].
name='Overview'
). For in-depth details, refer to [Breimann23a].
A DataFrame of either the selected sequence dataset (df_seq
) or
-general info on all benchmark datasets (df_info
).
df_overview
).