Skip to content

Commit

Permalink
Merge pull request #197 from MannLabs/revision
Browse files Browse the repository at this point in the history
Release 0.6.0 Revision
  • Loading branch information
elena-krismer authored May 31, 2023
2 parents 719ba6c + adab852 commit d79a158
Show file tree
Hide file tree
Showing 26 changed files with 1,713 additions and 38 deletions.
2 changes: 1 addition & 1 deletion .bumpversion.cfg
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[bumpversion]
current_version = 0.5.4
current_version = 0.6.0
commit = True
tag = False
parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\-(?P<release>[a-z]+)(?P<build>\d+))?
Expand Down
3 changes: 3 additions & 0 deletions HISTORY.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
# Changelog
# 0.6.0
* ADD mzTAB support
* ENH color Volcano Plot data points using list of protein names `color_list=your_protein_list`

# 0.5.4
* FIX altair version - binning of streamlit version
Expand Down
3 changes: 2 additions & 1 deletion alphastats/DataSet.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from alphastats.loader.FragPipeLoader import FragPipeLoader
from alphastats.loader.MaxQuantLoader import MaxQuantLoader
from alphastats.loader.SpectronautLoader import SpectronautLoader
from alphastats.loader.mzTabLoader import mzTabLoader

from alphastats.DataSet_Plot import Plot
from alphastats.DataSet_Preprocess import Preprocess
Expand Down Expand Up @@ -96,7 +97,7 @@ def _check_loader(self, loader):
loader : loader
"""
if not isinstance(
loader, (AlphaPeptLoader, MaxQuantLoader, DIANNLoader, FragPipeLoader, SpectronautLoader)
loader, (AlphaPeptLoader, MaxQuantLoader, DIANNLoader, FragPipeLoader, SpectronautLoader, mzTabLoader)
):
raise LoaderError(
"loader must be from class: AlphaPeptLoader, MaxQuantLoader, DIANNLoader, FragPipeLoader or SpectronautLoader"
Expand Down
25 changes: 14 additions & 11 deletions alphastats/DataSet_Plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,15 +119,16 @@ def plot_volcano(
self,
group1,
group2,
column=None,
method="ttest",
labels=False,
min_fc=1,
alpha=0.05,
draw_line=True,
perm=100,
fdr=0.05,
compare_preprocessing_modes=False
column:str=None,
method:str="ttest",
labels:bool=False,
min_fc:float=1.0,
alpha:float=0.05,
draw_line:bool=True,
perm:int=100,
fdr:float=0.05,
compare_preprocessing_modes:bool=False,
color_list:list=[]
):
"""Plot Volcano Plot
Expand All @@ -142,6 +143,7 @@ def plot_volcano(
draw_line(boolean): whether to draw cut off lines.
perm(float,optional): number of permutations when using SAM as method. Defaults to 100.
fdr(float,optional): FDR cut off when using SAM as method. Defaults to 0.05.
color_list (list): list with ProteinIDs that should be highlighted.
compare_preprocessing_modes(bool): Will iterate through normalization and imputation modes and return a list of VolcanoPlots in different settings, Default False.
Expand All @@ -166,7 +168,8 @@ def plot_volcano(
alpha=alpha,
draw_line=draw_line,
perm=perm,
fdr=fdr
fdr=fdr,
color_list=color_list
)

return volcano_plot.plot
Expand Down Expand Up @@ -241,7 +244,7 @@ def plot_intensity(
ID (str): ProteinGroup ID
group (str, optional): A metadata column used for grouping. Defaults to None.
subgroups (list, optional): Select variables from the group column. Defaults to None.
method (str, optional): Violinplot = "violin", Boxplot = "box", Scatterplot = "scatter". Defaults to "box".
method (str, optional): Violinplot = "violin", Boxplot = "box", Scatterplot = "scatter" or "all". Defaults to "box".
add_significance (bool, optional): add p-value bar, only possible when two groups are compared. Defaults False.
log_scale (bool, optional): yaxis in logarithmic scale. Defaults to False.
Expand Down
2 changes: 1 addition & 1 deletion alphastats/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
__project__ = "alphastats"
__version__ = "0.5.4"
__version__ = "0.6.0"
__license__ = "Apache"
__description__ = "An open-source Python package for Mass Spectrometry Analysis"
__author__ = "Mann Labs"
Expand Down
29 changes: 29 additions & 0 deletions alphastats/loader/mzTabLoader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
from pyteomics import mztab
from alphastats.loader.BaseLoader import BaseLoader

class mzTabLoader(BaseLoader):
def __init__(self, file, intensity_column: str="protein_abundance_[sample]", index_column:str="accession"):
"""Load mzTab file. Will add contamination column for further analysis.
Args:
file (str): path to mzTab file.
intensity_column (str, optional): columns where the intensity of the proteins are given.. Defaults to "protein_abundance_[sample]".
index_column (str, optional): column indicating the protein groups. Defaults to "accession".
"""
self.filter_columns = []
self.gene_names = None
self.intensity_column = intensity_column
self.index_column = index_column
self.confidence_column = None
self.evidence_df = None
self.gene_names = None
self._load_protein_table(file=file)
self._add_contamination_column()


def _load_protein_table(self, file):
tables = mztab.MzTab(file)
self.rawinput = tables.protein_table
self.mztab_metadata = tables.metadata
self.software = tables.protein_table.search_engine[0]

6 changes: 6 additions & 0 deletions alphastats/plots/IntensityPlot.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,12 @@ def _plot(self):
self.prepared_df, y=self.protein_id, x=self.group, color=self.group, labels={self.protein_id: self.y_label}
)

elif self.method == "all":
fig = px.violin(
self.prepared_df, y=self.protein_id, x=self.group, color=self.group, labels={self.protein_id: self.y_label},
box=True, points="all"
)

else:
raise ValueError(
f"{self.method} is not available."
Expand Down
22 changes: 19 additions & 3 deletions alphastats/plots/VolcanoPlot.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@ def __init__(
column=None, method=None,
labels=None, min_fc=None,
alpha=None, draw_line=None,
plot=True, perm=100, fdr=0.05
plot=True, perm=100, fdr=0.05,
color_list=[]
):
self.dataset = dataset
self.group1 = group1
Expand All @@ -31,6 +32,7 @@ def __init__(
self.res = None
self.pvalue_column = None
self.perm=perm
self.color_list = color_list
self._check_input()

if plot:
Expand Down Expand Up @@ -266,6 +268,10 @@ def _annotate_result_df(self):

value = ["down", "up"]
self.res["color"] = np.select(condition, value, default="non_sig")

if len(self.color_list) > 0:
self.res["color"] = np.where(self.res[self.dataset.index_column].isin(self.color_list),
"color", "no_color")


def _add_labels_plot(self):
Expand Down Expand Up @@ -327,6 +333,17 @@ def _draw_fdr_line(self):
line_shape='spline',
showlegend=False)
)

def _color_data_points(self):
# update coloring
if len(self.color_list) == 0:
color_dict = {"non_sig": "#404040", "up": "#B65EAF", "down": "#009599"}

else:
color_dict = {"no_color": "#404040", "color": "#B65EAF"}

self.plot = self._update_colors_plotly(self.plot, color_dict=color_dict)



def _plot(self):
Expand All @@ -339,8 +356,7 @@ def _plot(self):
)

# update coloring
color_dict = {"non_sig": "#404040", "up": "#B65EAF", "down": "#009599"}
self.plot = self._update_colors_plotly(self.plot, color_dict=color_dict)
self._color_data_points()

if self.labels:
self._add_labels_plot()
Expand Down
8 changes: 7 additions & 1 deletion docs/Introduction.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,12 @@

AlphaPeptStats is an open-source package for analyzing mass spectrometry-based proteomics data.

AlphaPeptStats was developed to simplify and standardize the process of analyzing complex datasets. Hereby AlphaPeptStats supports proteomics data generated by `AlphaPept`, `DIA-NN`, `FragPipe`, `MaxQuant` and `Spectronaut`. The tool allows a structured workflow from importing data, preprocessing data to visualization.
AlphaPeptStats was developed to simplify and standardize the process of analyzing complex datasets. Hereby AlphaPeptStats supports proteomics data generated by `AlphaPept`, `DIA-NN`, `FragPipe`, `MaxQuant` and `Spectronaut` and quantiative proteomics results in `mzTab` format. The tool allows a structured workflow from importing data, preprocessing data to visualization.

AlphaPeptStats was developed by the [Mann Group at the University of Copenhagen](https://www.biochem.mpg.de/mann) and is freely available with an [Apache License](LICENSE.txt). External Python packages (available in the [requirements](requirements) folder) have their own licenses, which can be consulted on their respective websites.

The workflow consists of:
1. Import of proteomics data
2. Creation of the DataSet, consisting of the imported proteomics data and metadata
3. Data preprocessing (optional)
4. Statistical Analysis and Visualization
6 changes: 6 additions & 0 deletions docs/api_reference/loader.rst
Original file line number Diff line number Diff line change
Expand Up @@ -32,3 +32,9 @@ SpectronautLoader
.. automodule:: alphastats.loader.SpectronautLoader
:members:
:undoc-members:

mzTabLoader
~~~~~~~~~~~~~~~~~~~~~~
.. automodule:: alphastats.loader.mzTabLoader
:members:
:undoc-members:
2 changes: 1 addition & 1 deletion docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
author = "Elena Krismer"

# The full version, including alpha/beta/rc tags
release = "0.5.4"
release = "0.6.0"


# -- General configuration ---------------------------------------------------
Expand Down
58 changes: 51 additions & 7 deletions docs/import_data.md
Original file line number Diff line number Diff line change
@@ -1,9 +1,23 @@
# Data import


Currently, AlphaStats allows the analysis of four quantitative proteomics software packages: AlphaPept, DIA-NN, FragPipe, MaxQuant and Spectronaut. As the output of these software differs significantly data needs to be loaded in customized loaders.
Currently, AlphaStats allows the analysis of five quantitative proteomics software packages: AlphaPept, DIA-NN, FragPipe, MaxQuant and Spectronaut. As the output of these software differs significantly data needs to be loaded in customized loaders.

Imported proteomics data and metadata can be combined in a DataSet, which will be used for the downstream analysis.

```python
import alphastats

maxquant_data = alphastats.MaxQuantLoader(
file="testfiles/maxquant_proteinGroups.txt"
)

dataset = alphastats.DataSet(
loader = maxquant_data,
metadata_path="../testfiles/maxquant/metadata.xlsx",
sample_column="sample"
)
```


## Importing data from a Proteomics software
Expand All @@ -13,7 +27,7 @@ As we are dealing with wide data, a column represents the intensity for one samp

Upon data import, the proteomics data gets processed in an internal format.

## Additional modifications by AlphaStats
### Additional modifications by AlphaStats

When importing the data, AlphaStats will identify potential contaminations based on a contaminant library, created by [Frankenfield et al. 2022](https://www.biorxiv.org/content/10.1101/2022.04.27.489766v2.full). This information will be added as an extra column to the imported data and can either be ignored or used for filtering in the preprocessing step.

Expand Down Expand Up @@ -70,19 +84,49 @@ fragpipe_data = alphastats.FragPipeLoader(file="testfiles/fragpipe_combined_prot

Find more details about the file format [here](https://biognosys.com/content/uploads/2022/12/Spectronaut17_UserManual.pdf).

```python
import alphastats
spectronaut_data = alphastats.FragPipeLoader(file="testfiles/spectronaut/results.tsv")
```

As default alphastats will use "PG.ProteinGroups" and "PG.Quantity" for the analysis. For an ananlysis on a peptide level the "F.PeakArea" and the peptide sequences ("PEP.StrippedSequence") can be used.

```python
spectronaut_data = alphastats.FragPipeLoader(
import alphastats
spectronaut_data = alphastats.SpectronautLoader(
file="testfiles/spectronaut/results.tsv",
intensity_column = "F.PeakArea",
index_column = "PEP.StrippedSequence"
)
```

### mzTab

Find more details about the file format [here](https://www.psidev.info/mztab).

```python
import alphastats
mztab_data = alphastats.mzTabLoader(
file="testfiles/mztab/test.mztab"
)
```


## Preparing metadata

To compare samples across various conditions in the downstream analysis, a metadata file in form of a table (excel, csv, tsv) is required. This file should contain a column with the sample IDs (raw file names) matching the sample names annotated in the output file of your proteomics software. Further, information can be provided like disease and various clinical parameters. Examples of metadata files can be found in the [testfiles-folder](https://github.com/MannLabs/alphastats/tree/main/testfiles).


## Creating a DataSet

The whole downstream analysis can be perforemd on the alphastats.DataSet. To create the DataSet you need to provide the loader object as well as the metadata.

```python
import alphastats

maxquant_data = alphastats.MaxQuantLoader(
file="testfiles/maxquant_proteinGroups.txt"
)

dataset = alphastats.DataSet(
loader = maxquant_data,
metadata_path="../testfiles/maxquant/metadata.xlsx",
sample_column="sample"
)
```
2 changes: 1 addition & 1 deletion docs/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,9 @@ The tool allows a structured workflow like importing data, preprocessing data an
installation.md
import_data.md
data_preprocessing.md
go_analysis.md
functions.rst
workflow_example.rst
go_analysis.md
API <api_reference/index>

..
Expand Down
2 changes: 1 addition & 1 deletion docs/requirements_docs.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
alphastats==0.5.4
alphastats==0.6.0
altair==4.2.0
anndata==0.8.0
attrs==22.1.0
Expand Down
2 changes: 1 addition & 1 deletion release/one_click_linux_gui/control
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
Package: alphastats
Version: 0.5.4
Version: 0.6.0
Architecture: all
Maintainer: MannLabs
Description: alphastats
Expand Down
2 changes: 1 addition & 1 deletion release/one_click_linux_gui/create_installer_linux.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ python setup.py sdist bdist_wheel
# Setting up the local package
cd release/one_click_linux_gui
# Make sure you include the required extra packages and always use the stable or very-stable options!
pip install "../../dist/alphastats-0.5.4-py3-none-any.whl"
pip install "../../dist/alphastats-0.6.0-py3-none-any.whl"

# Creating the stand-alone pyinstaller folder
pip install pyinstaller==5.8
Expand Down
4 changes: 2 additions & 2 deletions release/one_click_macos_gui/Info.plist
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@
<key>CFBundleIconFile</key>
<string>alphapeptstats_logo.icns</string>
<key>CFBundleIdentifier</key>
<string>alphastats.0.5.4</string>
<string>alphastats.0.6.0</string>
<key>CFBundleShortVersionString</key>
<string>0.5.4</string>
<string>0.6.0</string>
<key>CFBundleInfoDictionaryVersion</key>
<string>6.0</string>
<key>CFBundleName</key>
Expand Down
2 changes: 1 addition & 1 deletion release/one_click_macos_gui/create_installer_macos.sh
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ python setup.py sdist bdist_wheel

# Setting up the local package
cd release/one_click_macos_gui
pip install "../../dist/alphastats-0.5.4-py3-none-any.whl"
pip install "../../dist/alphastats-0.6.0-py3-none-any.whl"

# Creating the stand-alone pyinstaller folder
pip install pyinstaller==5.8
Expand Down
2 changes: 1 addition & 1 deletion release/one_click_macos_gui/distribution.xml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
<?xml version="1.0" encoding="utf-8" standalone="no"?>
<installer-script minSpecVersion="1.000000">
<title>AlphaPeptStats 0.5.4</title>
<title>AlphaPeptStats 0.6.0</title>
<background mime-type="image/png" file="alphapeptstats_logo.png" scaling="proportional"/>
<welcome file="welcome.html" mime-type="text/html" />
<conclusion file="conclusion.html" mime-type="text/html" />
Expand Down
2 changes: 1 addition & 1 deletion release/one_click_windows_gui/alphastats_innoinstaller.iss
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
; SEE THE DOCUMENTATION FOR DETAILS ON CREATING INNO SETUP SCRIPT FILES!

#define MyAppName "AlphaPeptStats"
#define MyAppVersion "0.5.4"
#define MyAppVersion "0.6.0"
#define MyAppPublisher "MannLabs"
#define MyAppURL "https://github.com/MannLabs/alphapeptstats"
#define MyAppExeName "alphastats_gui.exe"
Expand Down
2 changes: 1 addition & 1 deletion release/one_click_windows_gui/create_installer_windows.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ python setup.py sdist bdist_wheel
# Setting up the local package
cd release/one_click_windows_gui
# Make sure you include the required extra packages and always use the stable or very-stable options!
pip install "../../dist/alphastats-0.5.4-py3-none-any.whl"
pip install "../../dist/alphastats-0.6.0-py3-none-any.whl"

# Creating the stand-alone pyinstaller folder
pip install pyinstaller==5.8
Expand Down
Loading

0 comments on commit d79a158

Please sign in to comment.