Skip to content

Commit

Permalink
Merge branch 'main' into multicova
Browse files Browse the repository at this point in the history
  • Loading branch information
elena-krismer authored Jun 30, 2023
2 parents 428f4fb + be87721 commit dd4e680
Show file tree
Hide file tree
Showing 33 changed files with 7,678 additions and 203 deletions.
2 changes: 1 addition & 1 deletion .bumpversion.cfg
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[bumpversion]
current_version = 0.5.4
current_version = 0.6.2
commit = True
tag = False
parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\-(?P<release>[a-z]+)(?P<build>\d+))?
Expand Down
15 changes: 14 additions & 1 deletion HISTORY.md
Original file line number Diff line number Diff line change
@@ -1,11 +1,24 @@
# Changelog

# 0.6.0

# 0.6.3
* ENH download metadata template in the GUI
* ENH multicova analysis
* ENH filter data completeness `dataset.preprocess(data_completeness=0.7)`
* ADD `GenericLoader` for not supported data formats

# 0.6.2
* FIX preprocessing with VST floats and inf
* FIX plotly display

# 0.6.1
* FIX data loading

# 0.6.0
* ADD mzTAB support
* ENH color Volcano Plot data points using list of protein names `color_list=your_protein_list`


# 0.5.4
* FIX altair version - binning of streamlit version

Expand Down
13 changes: 13 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,19 @@

[link]:https://alphapeptstats.readthedocs.io/en/main/

<div align = center>
<br>
<br>

[<kbd> <br> Streamlit WebApp <br> </kbd>][link_streamlit]

</div>

<br>
<br>

[link_streamlit]:https://mannlabs-alphapeptstats-alphastatsguialphapeptstats-qyzgwd.streamlit.app/

An open-source Python package for downstream mass spectrometry downstream data analysis from the [Mann Group at the University of Copenhagen](https://www.cpr.ku.dk/research/proteomics/mann/).


Expand Down
11 changes: 9 additions & 2 deletions alphastats/DataSet.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
from alphastats.loader.MaxQuantLoader import MaxQuantLoader
from alphastats.loader.SpectronautLoader import SpectronautLoader
from alphastats.loader.GenericLoader import GenericLoader
from alphastats.loader.mzTabLoader import mzTabLoader


from alphastats.DataSet_Plot import Plot
from alphastats.DataSet_Preprocess import Preprocess
Expand Down Expand Up @@ -102,15 +104,18 @@ def _check_loader(self, loader):
loader : loader
"""
if not isinstance(

loader,
(
AlphaPeptLoader,
MaxQuantLoader,
DIANNLoader,
FragPipeLoader,
SpectronautLoader,
GenericLoader
GenericLoader,
mzTabLoader
),

):
raise LoaderError(
"loader must be from class: AlphaPeptLoader, MaxQuantLoader, DIANNLoader, FragPipeLoader or SpectronautLoader"
Expand Down Expand Up @@ -159,8 +164,10 @@ def create_matrix(self):
df.columns = df.columns.str.replace(substring_to_remove, "")
# transpose dataframe
mat = df.transpose()
mat.replace([np.inf, -np.inf], np.nan, inplace=True)
# remove proteins with only zero
self.mat = mat.loc[:, (mat != 0).any(axis=0)]
self.mat = self.mat.astype(float)
# reset preproccessing info
self.preprocessing_info = self._save_dataset_info()
self.preprocessed = False
Expand Down Expand Up @@ -219,7 +226,7 @@ def overview(self):
dataset_overview = (
"Attributes of the DataSet can be accessed using: \n"
+ "DataSet.rawinput:\t Raw Protein data.\n"
+ "DataSet.mat:\tProcessed data matrix with ProteinIDs/ProteinGroups as columns and samples as rows. All computations are performed on this matrix.\n"
+ "DataSet.mat:\t\tProcessed data matrix with ProteinIDs/ProteinGroups as columns and samples as rows. All computations are performed on this matrix.\n"
+ "DataSet.metadata:\tMetadata for the samples in the matrix. Metadata will be matched with DataSet.mat when needed (for instance Volcano Plot)."
)
print(dataset_overview)
27 changes: 16 additions & 11 deletions alphastats/DataSet_Plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,19 +122,22 @@ def plot_umap(self, group: str = None, circle: bool = False):
)
return dimensionality_reduction.plot


@ignore_warning(RuntimeWarning)
def plot_volcano(
self,
group1: Union[str, list],
group2: Union[str, list],
column: str = None,
method: str = "ttest",
labels: bool = False,
min_fc: float = 1.0,
alpha: float = 0.05,
draw_line: bool = True,
perm: int = 100,
fdr: float = 0.05,
compare_preprocessing_modes: bool = False,
column:str=None,
method:str="ttest",
labels:bool=False,
min_fc:float=1.0,
alpha:float=0.05,
draw_line:bool=True,
perm:int=100,
fdr:float=0.05,
compare_preprocessing_modes:bool=False,
color_list:list=[]
):
"""Plot Volcano Plot
Expand All @@ -149,6 +152,7 @@ def plot_volcano(
draw_line(boolean): whether to draw cut off lines.
perm(float,optional): number of permutations when using SAM as method. Defaults to 100.
fdr(float,optional): FDR cut off when using SAM as method. Defaults to 0.05.
color_list (list): list with ProteinIDs that should be highlighted.
compare_preprocessing_modes(bool): Will iterate through normalization and imputation modes and return a list of VolcanoPlots in different settings, Default False.
Expand All @@ -174,8 +178,9 @@ def plot_volcano(
min_fc=min_fc,
alpha=alpha,
draw_line=draw_line,
perm=perm,
perm=perm,
fdr=fdr,
color_list=color_list
)

return volcano_plot.plot
Expand Down Expand Up @@ -252,7 +257,7 @@ def plot_intensity(
protein_id (str): ProteinGroup ID
group (str, optional): A metadata column used for grouping. Defaults to None.
subgroups (list, optional): Select variables from the group column. Defaults to None.
method (str, optional): Violinplot = "violin", Boxplot = "box", Scatterplot = "scatter". Defaults to "box".
method (str, optional): Violinplot = "violin", Boxplot = "box", Scatterplot = "scatter" or "all". Defaults to "box".
add_significance (bool, optional): add p-value bar, only possible when two groups are compared. Defaults False.
log_scale (bool, optional): yaxis in logarithmic scale. Defaults to False.
Expand Down
30 changes: 17 additions & 13 deletions alphastats/DataSet_Preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,11 +109,11 @@ def _imputation(self, method: str):
logging.info("Imputing data...")

if method == "mean":
imp = sklearn.impute.SimpleImputer(missing_values=np.nan, strategy="mean")
imp = sklearn.impute.SimpleImputer(missing_values=np.nan, strategy="mean", keep_empty_features=True)
imputation_array = imp.fit_transform(self.mat.values)

elif method == "median":
imp = sklearn.impute.SimpleImputer(missing_values=np.nan, strategy="median")
imp = sklearn.impute.SimpleImputer(missing_values=np.nan, strategy="median", keep_empty_features=True)
imputation_array = imp.fit_transform(self.mat.values)

elif method == "knn":
Expand Down Expand Up @@ -173,7 +173,7 @@ def _normalization(self, method: str):
)

elif method == "vst":
scaler = sklearn.preprocessing.PowerTransformer()
scaler = sklearn.preprocessing.PowerTransformer(standardize=False)
normalized_array = scaler.fit_transform(self.mat.values)

else:
Expand All @@ -195,13 +195,14 @@ def reset_preprocessing(self):
self.create_matrix()
print("All preprocessing steps are reset.")

@ignore_warning(RuntimeWarning)
def _compare_preprocessing_modes(self, func, params_for_func) -> list:
dataset = self
imputation_methods = ["mean", "median", "knn"]
normalization_methods = ["zscore", "quantile", "vst"]
preprocessing_modes = list(
itertools.product(normalization_methods, imputation_methods)
)
imputation_methods = ["mean", "median", "knn", "randomforest"]
normalization_methods = ["vst","zscore", "quantile" ]

preprocessing_modes = list(itertools.product(normalization_methods, imputation_methods))


results_list = []

Expand All @@ -211,9 +212,8 @@ def _compare_preprocessing_modes(self, func, params_for_func) -> list:
for preprocessing_mode in preprocessing_modes:
# reset preprocessing
dataset.reset_preprocessing()
print(
f"Normalization {preprocessing_mode[0]}, Imputation {str(preprocessing_mode[1])}"
)
print(f"Normalization {preprocessing_mode[0]}, Imputation {str(preprocessing_mode[1])}")
dataset.mat.replace([np.inf, -np.inf], np.nan, inplace=True)

dataset.preprocess(
subset=True,
Expand All @@ -223,6 +223,8 @@ def _compare_preprocessing_modes(self, func, params_for_func) -> list:

res = func(**params_for_func)
results_list.append(res)

print("\t")

return results_list

Expand Down Expand Up @@ -305,15 +307,17 @@ def preprocess(
if subset:
self.mat = self._subset()


if data_completeness> 0:
self._remove_na_values(cut_off=data_completeness)

if log2_transform:
if log2_transform and self.preprocessing_info.get("Log2-transformed") is False:
self._log2_transform()

if normalization is not None:
self._normalization(method=normalization)

self.mat = self.mat.replace([np.inf, -np.inf], np.nan)

if imputation is not None:
self._imputation(method=imputation)

Expand Down
2 changes: 1 addition & 1 deletion alphastats/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
__project__ = "alphastats"
__version__ = "0.5.4"
__version__ = "0.6.2"
__license__ = "Apache"
__description__ = "An open-source Python package for Mass Spectrometry Analysis"
__author__ = "Mann Labs"
Expand Down
29 changes: 29 additions & 0 deletions alphastats/loader/mzTabLoader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
from pyteomics import mztab
from alphastats.loader.BaseLoader import BaseLoader

class mzTabLoader(BaseLoader):
def __init__(self, file, intensity_column: str="protein_abundance_[sample]", index_column:str="accession"):
"""Load mzTab file. Will add contamination column for further analysis.
Args:
file (str): path to mzTab file.
intensity_column (str, optional): columns where the intensity of the proteins are given.. Defaults to "protein_abundance_[sample]".
index_column (str, optional): column indicating the protein groups. Defaults to "accession".
"""
self.filter_columns = []
self.gene_names = None
self.intensity_column = intensity_column
self.index_column = index_column
self.confidence_column = None
self.evidence_df = None
self.gene_names = None
self._load_protein_table(file=file)
self._add_contamination_column()


def _load_protein_table(self, file):
tables = mztab.MzTab(file)
self.rawinput = tables.protein_table
self.mztab_metadata = tables.metadata
self.software = tables.protein_table.search_engine[0]

1 change: 1 addition & 0 deletions alphastats/plots/DimensionalityReduction.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,7 @@ def _plot(self, sample_names, group_color):
labels=self.labels,
color=group_color,
hover_data=[components[self.dataset.sample]],
template="simple_white+alphastats_colors"
)

# rename hover_data_0 to sample
Expand Down
48 changes: 33 additions & 15 deletions alphastats/plots/IntensityPlot.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,30 @@
import numpy as np
import plotly.graph_objects as go
import plotly.express as px
import plotly

from alphastats.plots.PlotUtils import plotly_object, PlotUtils

plotly.io.templates["alphastats_colors"] = plotly.graph_objects.layout.Template(
layout=plotly.graph_objects.Layout(
paper_bgcolor="rgba(0,0,0,0)",
plot_bgcolor="rgba(0,0,0,0)",
colorway=[
"#009599",
"#005358",
"#772173",
"#B65EAF", # pink
"#A73A00",
"#6490C1",
"#FF894F",
"#2B5E8B",
"#A87F32",
],
)
)

plotly.io.templates.default = "simple_white+alphastats_colors"


class IntensityPlot(PlotUtils):
def __init__(
Expand Down Expand Up @@ -122,29 +143,26 @@ def _prepare_data(self):
def _plot(self):
if self.method == "violin":
fig = px.violin(
self.prepared_df,
y=self.protein_id,
x=self.group,
color=self.group,
labels={self.protein_id: self.y_label},
self.prepared_df, y=self.protein_id, x=self.group, color=self.group, labels={self.protein_id: self.y_label},
template="simple_white+alphastats_colors"
)

elif self.method == "box":
fig = px.box(
self.prepared_df,
y=self.protein_id,
x=self.group,
color=self.group,
labels={self.protein_id: self.y_label},
self.prepared_df, y=self.protein_id, x=self.group, color=self.group, labels={self.protein_id: self.y_label},
template="simple_white+alphastats_colors"
)

elif self.method == "scatter":
fig = px.scatter(
self.prepared_df,
y=self.protein_id,
x=self.group,
color=self.group,
labels={self.protein_id: self.y_label},
self.prepared_df, y=self.protein_id, x=self.group, color=self.group, labels={self.protein_id: self.y_label},
template="simple_white+alphastats_colors"
)

elif self.method == "all":
fig = px.violin(
self.prepared_df, y=self.protein_id, x=self.group, color=self.group, labels={self.protein_id: self.y_label},
box=True, points="all", template= "simple_white+alphastats_colors"
)

else:
Expand Down
21 changes: 21 additions & 0 deletions alphastats/plots/PlotUtils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,27 @@
import plotly.graph_objects as go


plotly.io.templates["alphastats_colors"] = plotly.graph_objects.layout.Template(
layout=plotly.graph_objects.Layout(
paper_bgcolor="rgba(0,0,0,0)",
plot_bgcolor="rgba(0,0,0,0)",
colorway=[
"#009599",
"#005358",
"#772173",
"#B65EAF", # pink
"#A73A00",
"#6490C1",
"#FF894F",
"#2B5E8B",
"#A87F32",
],
)
)

plotly.io.templates.default = "simple_white+alphastats_colors"


class PlotUtils:
def __init__(self) -> None:
pass
Expand Down
Loading

0 comments on commit dd4e680

Please sign in to comment.