diff --git a/.bumpversion.cfg b/.bumpversion.cfg index 0448c4e5..693bf7ed 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.6.1 +current_version = 0.6.2 commit = True tag = False parse = (?P\d+)\.(?P\d+)\.(?P\d+)(\-(?P[a-z]+)(?P\d+))? diff --git a/HISTORY.md b/HISTORY.md index af37ad01..4af5d06d 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -1,6 +1,12 @@ # Changelog + +# 0.6.2 +* FIX preprocessing with VST floats and inf +* FIX plotly display + # 0.6.1 * FIX data loading + # 0.6.0 * ADD mzTAB support * ENH color Volcano Plot data points using list of protein names `color_list=your_protein_list` diff --git a/alphastats/DataSet.py b/alphastats/DataSet.py index 6b8c156c..26b002ca 100644 --- a/alphastats/DataSet.py +++ b/alphastats/DataSet.py @@ -146,6 +146,7 @@ def create_matrix(self): df.columns = df.columns.str.replace(substring_to_remove, "") # transpose dataframe mat = df.transpose() + mat.replace([np.inf, -np.inf], np.nan, inplace=True) # remove proteins with only zero self.mat = mat.loc[:, (mat != 0).any(axis=0)] self.mat = self.mat.astype(float) @@ -206,7 +207,7 @@ def overview(self): dataset_overview = ( "Attributes of the DataSet can be accessed using: \n" + "DataSet.rawinput:\t Raw Protein data.\n" - + "DataSet.mat:\tProcessed data matrix with ProteinIDs/ProteinGroups as columns and samples as rows. All computations are performed on this matrix.\n" + + "DataSet.mat:\t\tProcessed data matrix with ProteinIDs/ProteinGroups as columns and samples as rows. All computations are performed on this matrix.\n" + "DataSet.metadata:\tMetadata for the samples in the matrix. Metadata will be matched with DataSet.mat when needed (for instance Volcano Plot)." ) print(dataset_overview) diff --git a/alphastats/DataSet_Plot.py b/alphastats/DataSet_Plot.py index 15a563a8..3f7eeab3 100644 --- a/alphastats/DataSet_Plot.py +++ b/alphastats/DataSet_Plot.py @@ -115,6 +115,8 @@ def plot_umap(self, group=None, circle=False): ) return dimensionality_reduction.plot + + @ignore_warning(RuntimeWarning) def plot_volcano( self, group1, diff --git a/alphastats/DataSet_Preprocess.py b/alphastats/DataSet_Preprocess.py index c36be93d..3de7144a 100644 --- a/alphastats/DataSet_Preprocess.py +++ b/alphastats/DataSet_Preprocess.py @@ -81,11 +81,11 @@ def _imputation(self, method): logging.info("Imputing data...") if method == "mean": - imp = sklearn.impute.SimpleImputer(missing_values=np.nan, strategy="mean") + imp = sklearn.impute.SimpleImputer(missing_values=np.nan, strategy="mean", keep_empty_features=True) imputation_array = imp.fit_transform(self.mat.values) elif method == "median": - imp = sklearn.impute.SimpleImputer(missing_values=np.nan, strategy="median") + imp = sklearn.impute.SimpleImputer(missing_values=np.nan, strategy="median", keep_empty_features=True) imputation_array = imp.fit_transform(self.mat.values) elif method == "knn": @@ -145,7 +145,7 @@ def _normalization(self, method): ) elif method == "vst": - scaler = sklearn.preprocessing.PowerTransformer() + scaler = sklearn.preprocessing.PowerTransformer(standardize=False) normalized_array = scaler.fit_transform(self.mat.values) else: @@ -167,10 +167,17 @@ def reset_preprocessing(self): self.create_matrix() print("All preprocessing steps are reset.") + @ignore_warning(RuntimeWarning) def _compare_preprocessing_modes(self, func, params_for_func) -> list: dataset = self - imputation_methods = ["mean", "median", "knn"] - normalization_methods = ["zscore", "quantile", "vst"] + + # normalization_methods = methods["normalization"] + # if isinstance(methods, dict): + # imputation_methods = methods["imputation"] + + imputation_methods = ["mean", "median", "knn", "randomforest"] + normalization_methods = ["vst","zscore", "quantile" ] + preprocessing_modes = list(itertools.product(normalization_methods, imputation_methods)) results_list = [] @@ -182,7 +189,7 @@ def _compare_preprocessing_modes(self, func, params_for_func) -> list: # reset preprocessing dataset.reset_preprocessing() print(f"Normalization {preprocessing_mode[0]}, Imputation {str(preprocessing_mode[1])}") - + dataset.mat.replace([np.inf, -np.inf], np.nan, inplace=True) dataset.preprocess( subset=True, normalization = preprocessing_mode[0], @@ -192,6 +199,8 @@ def _compare_preprocessing_modes(self, func, params_for_func) -> list: res = func(**params_for_func) results_list.append(res) + print("\t") + return results_list def _log2_transform(self): @@ -272,11 +281,13 @@ def preprocess( if subset: self.mat = self._subset() - if log2_transform: + if log2_transform and self.preprocessing_info.get("Log2-transformed") is False: self._log2_transform() if normalization is not None: self._normalization(method=normalization) + self.mat = self.mat.replace([np.inf, -np.inf], np.nan) + #self.mat[:] = np.nan_to_num(self.mat) if imputation is not None: self._imputation(method=imputation) diff --git a/alphastats/__init__.py b/alphastats/__init__.py index be1fa152..9d464759 100644 --- a/alphastats/__init__.py +++ b/alphastats/__init__.py @@ -1,5 +1,5 @@ __project__ = "alphastats" -__version__ = "0.6.1" +__version__ = "0.6.2" __license__ = "Apache" __description__ = "An open-source Python package for Mass Spectrometry Analysis" __author__ = "Mann Labs" diff --git a/alphastats/plots/DimensionalityReduction.py b/alphastats/plots/DimensionalityReduction.py index 5bf9570d..297b8c8c 100644 --- a/alphastats/plots/DimensionalityReduction.py +++ b/alphastats/plots/DimensionalityReduction.py @@ -134,6 +134,7 @@ def _plot(self, sample_names, group_color): labels=self.labels, color=group_color, hover_data=[components[self.dataset.sample]], + template="simple_white+alphastats_colors" ) # rename hover_data_0 to sample diff --git a/alphastats/plots/IntensityPlot.py b/alphastats/plots/IntensityPlot.py index 8da36eda..9be25187 100644 --- a/alphastats/plots/IntensityPlot.py +++ b/alphastats/plots/IntensityPlot.py @@ -4,9 +4,30 @@ import numpy as np import plotly.graph_objects as go import plotly.express as px +import plotly from alphastats.plots.PlotUtils import plotly_object, PlotUtils +plotly.io.templates["alphastats_colors"] = plotly.graph_objects.layout.Template( + layout=plotly.graph_objects.Layout( + paper_bgcolor="rgba(0,0,0,0)", + plot_bgcolor="rgba(0,0,0,0)", + colorway=[ + "#009599", + "#005358", + "#772173", + "#B65EAF", # pink + "#A73A00", + "#6490C1", + "#FF894F", + "#2B5E8B", + "#A87F32", + ], + ) +) + +plotly.io.templates.default = "simple_white+alphastats_colors" + class IntensityPlot(PlotUtils): def __init__(self, @@ -115,23 +136,26 @@ def _prepare_data(self): def _plot(self): if self.method == "violin": fig = px.violin( - self.prepared_df, y=self.protein_id, x=self.group, color=self.group, labels={self.protein_id: self.y_label} + self.prepared_df, y=self.protein_id, x=self.group, color=self.group, labels={self.protein_id: self.y_label}, + template="simple_white+alphastats_colors" ) elif self.method == "box": fig = px.box( - self.prepared_df, y=self.protein_id, x=self.group, color=self.group, labels={self.protein_id: self.y_label} + self.prepared_df, y=self.protein_id, x=self.group, color=self.group, labels={self.protein_id: self.y_label}, + template="simple_white+alphastats_colors" ) elif self.method == "scatter": fig = px.scatter( - self.prepared_df, y=self.protein_id, x=self.group, color=self.group, labels={self.protein_id: self.y_label} + self.prepared_df, y=self.protein_id, x=self.group, color=self.group, labels={self.protein_id: self.y_label}, + template="simple_white+alphastats_colors" ) elif self.method == "all": fig = px.violin( self.prepared_df, y=self.protein_id, x=self.group, color=self.group, labels={self.protein_id: self.y_label}, - box=True, points="all" + box=True, points="all", template= "simple_white+alphastats_colors" ) else: diff --git a/alphastats/plots/PlotUtils.py b/alphastats/plots/PlotUtils.py index 990dcbca..60d9b00d 100644 --- a/alphastats/plots/PlotUtils.py +++ b/alphastats/plots/PlotUtils.py @@ -2,6 +2,26 @@ import seaborn as sns import plotly.graph_objects as go +plotly.io.templates["alphastats_colors"] = plotly.graph_objects.layout.Template( + layout=plotly.graph_objects.Layout( + paper_bgcolor="rgba(0,0,0,0)", + plot_bgcolor="rgba(0,0,0,0)", + colorway=[ + "#009599", + "#005358", + "#772173", + "#B65EAF", # pink + "#A73A00", + "#6490C1", + "#FF894F", + "#2B5E8B", + "#A87F32", + ], + ) +) + +plotly.io.templates.default = "simple_white+alphastats_colors" + class PlotUtils: def __init__(self) -> None: pass diff --git a/alphastats/plots/VolcanoPlot.py b/alphastats/plots/VolcanoPlot.py index 1ee8501b..56d59d6f 100644 --- a/alphastats/plots/VolcanoPlot.py +++ b/alphastats/plots/VolcanoPlot.py @@ -5,9 +5,28 @@ import pandas as pd import plotly.express as px import plotly.graph_objects as go - +import plotly from functools import lru_cache +plotly.io.templates["alphastats_colors"] = plotly.graph_objects.layout.Template( + layout=plotly.graph_objects.Layout( + paper_bgcolor="rgba(0,0,0,0)", + plot_bgcolor="rgba(0,0,0,0)", + colorway=[ + "#009599", + "#005358", + "#772173", + "#B65EAF", # pink + "#A73A00", + "#6490C1", + "#FF894F", + "#2B5E8B", + "#A87F32", + ], + ) +) + +plotly.io.templates.default = "simple_white+alphastats_colors" class VolcanoPlot(PlotUtils): def __init__( @@ -245,7 +264,7 @@ def _annotate_result_df(self): convert pvalue to log10 add color labels for up and down regulates """ - self.res = self.res[(self.res["log2fc"] < 10) & (self.res["log2fc"] > -10)] + self.res = self.res[(self.res["log2fc"] < 20) & (self.res["log2fc"] > -20)] self.res["-log10(p-value)"] = -np.log10(self.res[self.pvalue_column]) self.alpha = -np.log10(self.alpha) @@ -353,6 +372,7 @@ def _plot(self): y="-log10(p-value)", color="color", hover_data=self.hover_data, + template= "simple_white+alphastats_colors" ) # update coloring diff --git a/docs/conf.py b/docs/conf.py index be31e1f2..57fd7fa5 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -23,7 +23,7 @@ author = "Elena Krismer" # The full version, including alpha/beta/rc tags -release = "0.6.1" +release = "0.6.2" # -- General configuration --------------------------------------------------- diff --git a/nbs/getting_started.ipynb b/nbs/getting_started.ipynb index eb6af19a..de914369 100644 --- a/nbs/getting_started.ipynb +++ b/nbs/getting_started.ipynb @@ -13,6 +13,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "63ea0375-2937-4689-b1c9-f2186776b329", "metadata": {}, @@ -329,6 +330,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "ebcf20a2-458d-480b-8a76-4d38c98aa1a2", "metadata": {}, @@ -618,6 +620,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "1b2da0df-dd79-429f-815e-cea5228790dd", "metadata": {}, @@ -648,6 +651,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "71235990-e4bc-491f-8828-8f9445dcac3b", "metadata": {}, @@ -681,6 +685,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "aefc5fe7-0f97-42d5-acb6-d9a151320627", "metadata": {}, @@ -716,6 +721,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "e0efd5a2-22cf-4504-9ce8-0490a7967e92", "metadata": {}, @@ -725,6 +731,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "b8244053-e920-447d-a405-7049507733a9", "metadata": {}, @@ -751,6 +758,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "1cd24307-cd84-411b-8c3c-dccc3be08dbc", "metadata": {}, @@ -787,6 +795,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "bc027a63-318a-475d-a04e-7e27002e2ee2", "metadata": {}, @@ -813,6 +822,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "51c7655f-9bb7-4473-be47-563e8ced0e98", "metadata": {}, @@ -848,6 +858,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "d4d74bd7-3937-4155-965c-302c7a7795c4", "metadata": {}, @@ -874,6 +885,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "4357b5ee-e208-4992-9f52-60ec5eb73758", "metadata": {}, @@ -914,6 +926,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "7bdf8679-0451-4f2f-b041-637d134c7ea2", "metadata": {}, @@ -958,7 +971,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.7" + "version": "3.9.13" }, "vscode": { "interpreter": { diff --git a/release/one_click_linux_gui/control b/release/one_click_linux_gui/control index e0622975..41cb2552 100644 --- a/release/one_click_linux_gui/control +++ b/release/one_click_linux_gui/control @@ -1,5 +1,5 @@ Package: alphastats -Version: 0.6.1 +Version: 0.6.2 Architecture: all Maintainer: MannLabs Description: alphastats diff --git a/release/one_click_linux_gui/create_installer_linux.sh b/release/one_click_linux_gui/create_installer_linux.sh index 8b519857..ee2dcbaf 100644 --- a/release/one_click_linux_gui/create_installer_linux.sh +++ b/release/one_click_linux_gui/create_installer_linux.sh @@ -17,7 +17,7 @@ python setup.py sdist bdist_wheel # Setting up the local package cd release/one_click_linux_gui # Make sure you include the required extra packages and always use the stable or very-stable options! -pip install "../../dist/alphastats-0.6.1-py3-none-any.whl" +pip install "../../dist/alphastats-0.6.2-py3-none-any.whl" # Creating the stand-alone pyinstaller folder pip install pyinstaller==5.8 diff --git a/release/one_click_macos_gui/Info.plist b/release/one_click_macos_gui/Info.plist index 4ff955e0..c0f30f00 100644 --- a/release/one_click_macos_gui/Info.plist +++ b/release/one_click_macos_gui/Info.plist @@ -9,7 +9,7 @@ CFBundleIconFile alphapeptstats_logo.icns CFBundleIdentifier - alphastats.0.6.1 + alphastats.0.6.2 CFBundleShortVersionString 0.6.0 CFBundleInfoDictionaryVersion diff --git a/release/one_click_macos_gui/create_installer_macos.sh b/release/one_click_macos_gui/create_installer_macos.sh index 78cad8d2..525c03bc 100755 --- a/release/one_click_macos_gui/create_installer_macos.sh +++ b/release/one_click_macos_gui/create_installer_macos.sh @@ -20,7 +20,7 @@ python setup.py sdist bdist_wheel # Setting up the local package cd release/one_click_macos_gui -pip install "../../dist/alphastats-0.6.1-py3-none-any.whl" +pip install "../../dist/alphastats-0.6.2-py3-none-any.whl" # Creating the stand-alone pyinstaller folder pip install pyinstaller==5.8 diff --git a/release/one_click_macos_gui/distribution.xml b/release/one_click_macos_gui/distribution.xml index 6999f6fb..ddc87a29 100644 --- a/release/one_click_macos_gui/distribution.xml +++ b/release/one_click_macos_gui/distribution.xml @@ -1,6 +1,6 @@ - AlphaPeptStats 0.6.1 + AlphaPeptStats 0.6.2 diff --git a/release/one_click_windows_gui/alphastats_innoinstaller.iss b/release/one_click_windows_gui/alphastats_innoinstaller.iss index 76a74e81..a59c8f6a 100644 --- a/release/one_click_windows_gui/alphastats_innoinstaller.iss +++ b/release/one_click_windows_gui/alphastats_innoinstaller.iss @@ -2,7 +2,7 @@ ; SEE THE DOCUMENTATION FOR DETAILS ON CREATING INNO SETUP SCRIPT FILES! #define MyAppName "AlphaPeptStats" -#define MyAppVersion "0.6.1" +#define MyAppVersion "0.6.2" #define MyAppPublisher "MannLabs" #define MyAppURL "https://github.com/MannLabs/alphapeptstats" #define MyAppExeName "alphastats_gui.exe" diff --git a/release/one_click_windows_gui/create_installer_windows.sh b/release/one_click_windows_gui/create_installer_windows.sh index 09e65279..3f2e0aad 100644 --- a/release/one_click_windows_gui/create_installer_windows.sh +++ b/release/one_click_windows_gui/create_installer_windows.sh @@ -17,7 +17,7 @@ python setup.py sdist bdist_wheel # Setting up the local package cd release/one_click_windows_gui # Make sure you include the required extra packages and always use the stable or very-stable options! -pip install "../../dist/alphastats-0.6.1-py3-none-any.whl" +pip install "../../dist/alphastats-0.6.2-py3-none-any.whl" # Creating the stand-alone pyinstaller folder pip install pyinstaller==5.8 diff --git a/requirements.txt b/requirements.txt index 718163c8..531a6fa3 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,15 +1,14 @@ -pandas==2.0.0 -scikit-learn==1.2.1 +pandas==2.0.2 +scikit-learn==1.2.2 data_cache>=0.1.6 -plotly==5.11.0 -statsmodels==0.13.5 -openpyxl>=3.0.10 +plotly==5.15.0 +statsmodels==0.14.0 sklearn_pandas==2.2.0 pingouin==0.5.3 scipy==1.10.1 tqdm>=4.64.0 diffxpy==0.7.4 -anndata==0.8.0 +anndata==0.9.1 umap-learn==0.5.3 streamlit==1.22.0 tables==3.7.0 @@ -20,4 +19,5 @@ swifter==1.2.0 click==8.0.1 kaleido==0.2.1 combat==0.3.3 -pyteomics==4.6.0 \ No newline at end of file +pyteomics==4.6.0 +openpyxl>=3.0.10 \ No newline at end of file diff --git a/setup.py b/setup.py index a55caee8..6c7bee1f 100644 --- a/setup.py +++ b/setup.py @@ -25,13 +25,13 @@ def create_pip_wheel(): requirements = get_requirements() setuptools.setup( name="alphastats", - version="0.6.1", + version="0.6.2", license="Apache", - description="An open-source Python package for Mass Spectrometry Analysis", + description="An open-source Python package for automated and scalable statistical analysis of mass spectrometry-based proteomics", long_description=get_long_description(), long_description_content_type="text/markdown", author="Mann Labs", - author_email="elena.krismer@hotmail.com", + author_email="elena.krismer@gmail.com", url="https://github.com/MannLabs/alphastats", project_urls={ "Mann Labs at MPIB": "https://www.biochem.mpg.de/mann", diff --git a/tests/test_DataSet.py b/tests/test_DataSet.py index 39650f84..83564945 100644 --- a/tests/test_DataSet.py +++ b/tests/test_DataSet.py @@ -295,12 +295,12 @@ def test_preprocess_normalize_vst(self): self.obj.preprocess(log2_transform=False,normalization="vst") expected_mat = pd.DataFrame( { - "a": [-1.30773413, 1.12010046, 0.18763367], - "b": [1.41421361, -0.70710674, -0.70710674], - "c": [-1.39384919, 0.90401955, 0.48982964], + "a": [ 3.19059101, 11.591763, 8.365096], + "b": [0.084829, 0.084829, 0.084829], + "c": [0.000000, 7.850074, 6.435102], } ) - pd._testing.assert_frame_equal(self.obj.mat, expected_mat) + pd._testing.assert_frame_equal(self.obj.mat.round(2), expected_mat.round(2)) def test_preprocess_imputation_mean_values(self): self.obj.mat = pd.DataFrame( @@ -456,7 +456,7 @@ def test_plot_volcano_compare_preprocessing_modes(self): group2=["1_71_F10", "1_73_F12"], compare_preprocessing_modes=True ) - self.assertEqual(len(result_list), 9) + self.assertEqual(len(result_list), 12) def test_preprocess_subset(self): self.obj.preprocess(subset=True, log2_transform=False)