Skip to content

Commit

Permalink
Merge pull request #12 from MannLabs/development
Browse files Browse the repository at this point in the history
Development
  • Loading branch information
ammarcsj authored Jul 18, 2023
2 parents b53065b + 4a1f2eb commit c425f3e
Show file tree
Hide file tree
Showing 18 changed files with 183 additions and 92 deletions.
5 changes: 3 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -195,9 +195,9 @@ In case of issues, check out the following:
---
## Citations

In the case that directLFQ is useful to you, please consider supporting us by citing the [preprint](https://doi.org/10.1101/2023.02.17.528962)
In the case that directLFQ is useful to you, please consider supporting us by citing the [paper](https://doi.org/10.1016/j.mcpro.2023.100581)

Ammar, C., Schessner, J.P., Willems, S., Michaelis, A.C., and Mann, M. (2023). Accurate label-free quantification by directLFQ to compare unlimited numbers of proteomes. bioRxiv, 2023.02.17.528962. 10.1101/2023.02.17.528962.
Ammar, C., Schessner, J.P., Willems, S., Michaelis, A.C., and Mann, M. (2023). Accurate label-free quantification by directLFQ to compare unlimited numbers of proteomes. Molecular & Cellular Proteomics, 100581.



Expand Down Expand Up @@ -229,6 +229,7 @@ directLFQ is started internally via the directlfq.lfq_manager.run_lfq() command.
- **num_cores**: The number of cores to use (default is to use multiprocessing).
- **filename_suffix**: Suffix to append to the output files.
- **deactivate_normalization**: Set to true, if no between-sample normalization should be performed before processing.
- **filter_dict**: In case you want to define specific filters in addition to the standard filters, you can add a yaml file where the filters are defined (see example [here](release/examples/filterdict.yaml)). In the Python API you can also directly put in the dictionary instead of the .yaml file.

---

Expand Down
2 changes: 1 addition & 1 deletion directlfq/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@


__project__ = "directlfq"
__version__ = "0.2.9"
__version__ = "0.2.10"
__license__ = "Apache"
__description__ = "An open-source Python package of the AlphaPept ecosystem"
__author__ = "Mann Labs"
Expand Down
3 changes: 3 additions & 0 deletions directlfq/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,10 @@ def gui():
@click.option("--maximum_number_of_quadratic_ions_to_use_per_protein", "-mn", type= int, default = 10, help="How many ions are used to create the anchor intensity trace (see paper). Increasing might marginally increase performance at the cost of runtime.")
@click.option("--number_of_quadratic_samples", "-nq", type = int, default = 50, help="How many samples are used to create the anchor intensity trace (see paper). Increasing might marginally increase performance at the cost of runtime.")
@click.option("--filename_suffix", "-fs", type=str, default="", help="A suffix to add to the output file name.")
@click.option("--num_cores", "-nc", type = int, default = None, help="The number of cores to use (default is to use multiprocessing).")
@click.option("--deactivate_normalization", "-dn", type = bool, default = False, help="If you want to deactivate the normalization step, you can set this flag to True.")
@click.option("--filter_dict", "-dn", type = bool, default = False, help="In case you want to define specific filters in addition to the standard filters, you can add a yaml file where the filters are defined (see GitHub docu for example).")

def run_directlfq(**kwargs):
print("starting directLFQ")
import directlfq.lfq_manager
Expand Down
60 changes: 56 additions & 4 deletions directlfq/configs/intable_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -347,6 +347,7 @@ diann_precursors_plexDIA:
use_iontree: True
ml_level: CHARGE


diann_precursors:
format: longtable
sample_ID: Run
Expand All @@ -355,6 +356,12 @@ diann_precursors:
- Protein.Group
ion_cols:
- Precursor.Id
filters:
protein_qval:
param: Lib.PG.Q.Value
comparator: "<="
value: 0.01

# filters:
# shape_quality:
# param: Quantity.Quality
Expand Down Expand Up @@ -397,6 +404,11 @@ diann_fragion_isotopes_raw:
- Precursor.Charge
MS1ISOTOPES:
- Precursor.Charge
filters:
protein_qval:
param: Lib.PG.Q.Value
comparator: "<="
value: 0.01
use_iontree: True
ml_level: CHARGE

Expand Down Expand Up @@ -435,6 +447,11 @@ diann_fragion_isotopes_raw_charite_dataset:
- Precursor.Charge
MS1ISOTOPES:
- Precursor.Charge
filters:
protein_qval:
param: Lib.PG.Q.Value
comparator: "<="
value: 0.01
use_iontree: True
ml_level: CHARGE

Expand Down Expand Up @@ -473,6 +490,11 @@ diann_fragion_isotopes:
- Precursor.Charge
MS1ISOTOPES:
- Precursor.Charge
filters:
protein_qval:
param: Lib.PG.Q.Value
comparator: "<="
value: 0.01
use_iontree: True
ml_level: CHARGE

Expand Down Expand Up @@ -510,6 +532,11 @@ diann_fragion_isotopes_topn:
- Precursor.Charge
MS1ISOTOPES:
- Precursor.Charge
filters:
protein_qval:
param: Lib.PG.Q.Value
comparator: "<="
value: 0.01
use_iontree: True
ml_level: CHARGE

Expand Down Expand Up @@ -542,6 +569,11 @@ diann_precursor_ms1_and_ms2:
- Precursor.Charge
MS1ISOTOPES:
- Precursor.Charge
filters:
protein_qval:
param: Lib.PG.Q.Value
comparator: "<="
value: 0.01
use_iontree: True
ml_level: CHARGE

Expand All @@ -568,6 +600,11 @@ diann_fragion:
- Precursor.Charge
FRGION:
- Fragment.Quant.Corrected
filters:
protein_qval:
param: Lib.PG.Q.Value
comparator: "<="
value: 0.01
use_iontree: True
ml_level: CHARGE

Expand All @@ -581,6 +618,11 @@ diann_precursors_ms1:
- Protein.Group
ion_cols:
- Precursor.Id
filters:
protein_qval:
param: Lib.PG.Q.Value
comparator: "<="
value: 0.01

diann_precursor_filename_sampleid:
format: longtable
Expand All @@ -590,6 +632,11 @@ diann_precursor_filename_sampleid:
- Genes
ion_cols:
- Precursor.Id
filters:
protein_qval:
param: Lib.PG.Q.Value
comparator: "<="
value: 0.01
# filters:
# shape_quality:
# param: Quantity.Quality
Expand All @@ -606,10 +653,10 @@ diann_sequence:
ion_cols:
- Stripped.Sequence
filters:
shape_quality:
param: Quantity.Quality
comparator: ">"
value: 0.3
protein_qval:
param: Lib.PG.Q.Value
comparator: "<="
value: 0.01


diann_protein:
Expand Down Expand Up @@ -658,6 +705,11 @@ diann_peptide_based_on_precursor_ms1_and_ms2:
- Precursor.Charge
MS1ISOTOPES:
- Precursor.Charge
filters:
protein_qval:
param: Lib.PG.Q.Value
comparator: "<="
value: 0.01
use_iontree: True
ml_level: CHARGE

Expand Down
23 changes: 21 additions & 2 deletions directlfq/dashboard_parts.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,16 @@ def __init__(self):
margin=(15, 15, 0, 15)
)

self.yaml_filt_dict_title = pn.pane.Markdown('* In case you want to define specific filters in addition to the standard filters, you can add a yaml file where the filters are defined (see GitHub docs).')
self.yaml_filt_dict_path = pn.widgets.TextInput(
name='',
default = None,
placeholder='(optional) Enter the whole path to the yaml file with the filters',
width=900,
sizing_mode='stretch_width',
margin=(15, 15, 0, 15)
)

self.dropdown_menu_for_input_type_title = pn.pane.Markdown('* Specify the type of the input table you want to use from the dropdown menu. Applies only if you want to use non-default settings, for example if you want to use summarized precursor intensities instead of fragment ion intensities for DIA data:')
self.dropdown_menu_for_input_type = pn.widgets.Select(name = "",
options = {'detect automatically' : None, 'Alphapept peptides.csv' : 'alphapept_peptides', 'MaxQuant evidence.txt' : "maxquant_evidence", 'MaxQuant peptides.txt' : 'maxquant_peptides',
Expand All @@ -185,6 +195,9 @@ def __init__(self):
self.num_nonan_vals_title = pn.pane.Markdown('* Specify the minimum number of non-nan ion intensities required to derive a protein intensity. The higher this number, the more reliable the protein quantification at the cost of more missing values:')
self.num_nonan_vals = pn.widgets.IntInput(name='', value=1, step=1, start=0, end=1000)

self.num_cores_title = pn.pane.Markdown('* Specify the number of cores to use (default of 0 means multiprocessing):')
self.num_cores_vals = pn.widgets.IntInput(name='', value=0, step=1, start=0, end=1000)



# RUN PIPELINE
Expand Down Expand Up @@ -231,6 +244,10 @@ def create(self):
self.protein_subset_for_normalization_file,
self.num_nonan_vals_title,
self.num_nonan_vals,
self.num_cores_title,
self.num_cores_vals,
self.yaml_filt_dict_title,
self.yaml_filt_dict_path,
), ),
header='optional configurations',
collapsed=True,
Expand Down Expand Up @@ -308,10 +325,12 @@ def run_pipeline(self, *args):
additional_headers = [] if self.additional_headers.value == '' else self.additional_headers.value
min_nonan = self.num_nonan_vals.value
file_of_proteins_for_normalization = None if self.protein_subset_for_normalization_file.value == '' else self.protein_subset_for_normalization_file.value

num_cores = None if self.num_cores_vals.value == -1 else self.num_cores_vals.value
yaml_filt_dict_path = None if self.yaml_filt_dict_path.value == '' else self.yaml_filt_dict_path.value

lfq_manager.run_lfq(input_file = input_file, input_type_to_use = input_type_to_use, maximum_number_of_quadratic_ions_to_use_per_protein = 10,
number_of_quadratic_samples = 50, mq_protein_groups_txt= mq_protein_groups_txt, columns_to_add= additional_headers, selected_proteins_file= file_of_proteins_for_normalization, min_nonan = min_nonan)
number_of_quadratic_samples = 50, mq_protein_groups_txt= mq_protein_groups_txt, columns_to_add= additional_headers, selected_proteins_file= file_of_proteins_for_normalization,
min_nonan = min_nonan, num_cores=num_cores, filter_dict=yaml_filt_dict_path)

self.trigger_dependancy()
self.run_pipeline_progress.active = False
Expand Down
16 changes: 13 additions & 3 deletions directlfq/lfq_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import directlfq.utils as lfqutils
import pandas as pd
import directlfq
import os

import warnings

Expand All @@ -17,7 +18,7 @@


def run_lfq(input_file, columns_to_add = [], selected_proteins_file :str = None, mq_protein_groups_txt = None, min_nonan = 1, input_type_to_use = None, maximum_number_of_quadratic_ions_to_use_per_protein = 10,
number_of_quadratic_samples = 50, num_cores = None, filename_suffix = "", deactivate_normalization = False
number_of_quadratic_samples = 50, num_cores = None, filename_suffix = "", deactivate_normalization = False, filter_dict = None
):
"""Run the directLFQ pipeline on a given input file. The input file is expected to contain ion intensities. The output is a table containing protein intensities.
Expand All @@ -34,9 +35,9 @@ def run_lfq(input_file, columns_to_add = [], selected_proteins_file :str = None
"""
print("Starting directLFQ analysis.")
input_file = prepare_input_filename(input_file)
print("reformatting input file, for large files this might take a while.")
filter_dict = load_filter_dict_if_given_as_yaml(filter_dict)
input_file = lfqutils.add_mq_protein_group_ids_if_applicable_and_obtain_annotated_file(input_file, input_type_to_use,mq_protein_groups_txt, columns_to_add)
input_df = lfqutils.import_data(input_file=input_file, input_type_to_use=input_type_to_use)
input_df = lfqutils.import_data(input_file=input_file, input_type_to_use=input_type_to_use, filter_dict=filter_dict)
input_df = lfqutils.index_and_log_transform_input_df(input_df)
input_df = lfqutils.remove_allnan_rows_input_df(input_df)

Expand All @@ -59,6 +60,15 @@ def run_lfq(input_file, columns_to_add = [], selected_proteins_file :str = None

print("Analysis finished!")

def load_filter_dict_if_given_as_yaml(filter_dict):
if os.path.isfile(str(filter_dict)):
#check if filter_dict is a path to a yaml file
if filter_dict.endswith(".yaml"):
filter_dict = lfqutils.load_config(filter_dict)
return filter_dict
else:
return filter_dict

def prepare_input_filename(input_file):
input_file = fr"{input_file}".replace("\ ", " ").rstrip() #replace escaped spaces with normal spaces and remove trailing whitespace
return input_file
Expand Down
9 changes: 6 additions & 3 deletions directlfq/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -769,7 +769,7 @@ def check_for_processed_runs_in_results_folder(results_folder):
import os
import pathlib

def import_data(input_file, input_type_to_use = None, samples_subset = None, results_dir = None):
def import_data(input_file, input_type_to_use = None, samples_subset = None, filter_dict = None):
"""
Function to import peptide level data. Depending on available columns in the provided file,
the function identifies the type of input used (e.g. Spectronaut, MaxQuant, DIA-NN), reformats if necessary
Expand All @@ -782,16 +782,19 @@ def import_data(input_file, input_type_to_use = None, samples_subset = None, res
if ("aq_reformat" in input_file) | (input_type_to_use == "directlfq"):
file_to_read = input_file
else:
file_to_read = reformat_and_save_input_file(input_file=input_file, input_type_to_use=input_type_to_use)
file_to_read = reformat_and_save_input_file(input_file=input_file, input_type_to_use=input_type_to_use, filter_dict=filter_dict)

input_reshaped = pd.read_csv(file_to_read, sep = "\t", encoding = 'latin1', usecols=samples_subset)
input_reshaped = input_reshaped.drop_duplicates(subset='ion')
return input_reshaped


def reformat_and_save_input_file(input_file, input_type_to_use = None):
def reformat_and_save_input_file(input_file, input_type_to_use = None, filter_dict = None):

input_type, config_dict_for_type, sep = get_input_type_and_config_dict(input_file, input_type_to_use)

if filter_dict is not None:
config_dict_for_type['filters']= dict(config_dict_for_type.get('filters', {}),**filter_dict)
print(f"using input type {input_type}")
format = config_dict_for_type.get('format')
outfile_name = f"{input_file}.{input_type}.aq_reformat.tsv"
Expand Down
2 changes: 1 addition & 1 deletion misc/bumpversion.cfg
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[bumpversion]
current_version = 0.2.9
current_version = 0.2.10
commit = True
tag = False
parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\-(?P<release>[a-z]+)(?P<build>\d+))?
Expand Down
8 changes: 8 additions & 0 deletions release/examples/filterdict.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
protein_qval:
param: Lib.PG.Q.Value
comparator: "<="
value: 0.01
peptide_qval:
param: Lib.Q.Value
comparator: "<="
value: 0.01
2 changes: 1 addition & 1 deletion release/one_click_linux_gui/control
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
Package: directlfq
Version: 0.2.9
Version: 0.2.10
Architecture: all
Maintainer: Mann Labs <opensource@alphapept.com>
Description: directlfq
Expand Down
2 changes: 1 addition & 1 deletion release/one_click_linux_gui/create_installer_linux.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ python setup.py sdist bdist_wheel
# Setting up the local package
cd release/one_click_linux_gui
# Make sure you include the required extra packages and always use the stable or very-stable options!
pip install "../../dist/directlfq-0.2.9-py3-none-any.whl[stable, gui]"
pip install "../../dist/directlfq-0.2.10-py3-none-any.whl[stable, gui]"

# Creating the stand-alone pyinstaller folder
pip install pyinstaller==4.10
Expand Down
4 changes: 2 additions & 2 deletions release/one_click_macos_gui/Info.plist
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@
<key>CFBundleIconFile</key>
<string>alpha_logo.icns</string>
<key>CFBundleIdentifier</key>
<string>directlfq.0.2.9</string>
<string>directlfq.0.2.10</string>
<key>CFBundleShortVersionString</key>
<string>0.2.9</string>
<string>0.2.10</string>
<key>CFBundleInfoDictionaryVersion</key>
<string>6.0</string>
<key>CFBundleName</key>
Expand Down
4 changes: 2 additions & 2 deletions release/one_click_macos_gui/create_installer_macos.sh
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ python setup.py sdist bdist_wheel

# Setting up the local package
cd release/one_click_macos_gui
pip install "../../dist/directlfq-0.2.9-py3-none-any.whl[stable, gui]"
pip install "../../dist/directlfq-0.2.10-py3-none-any.whl[stable, gui]"

# Creating the stand-alone pyinstaller folder
pip install pyinstaller==4.10
Expand All @@ -40,5 +40,5 @@ cp ../../LICENSE Resources/LICENSE
cp ../logos/alpha_logo.png Resources/alpha_logo.png
chmod 777 scripts/*

pkgbuild --root dist/directlfq --identifier de.mpg.biochem.directlfq.app --version 0.2.9 --install-location /Applications/directlfq.app --scripts scripts directlfq.pkg
pkgbuild --root dist/directlfq --identifier de.mpg.biochem.directlfq.app --version 0.2.10 --install-location /Applications/directlfq.app --scripts scripts directlfq.pkg
productbuild --distribution distribution.xml --resources Resources --package-path directlfq.pkg dist/directlfq_gui_installer_macos.pkg
2 changes: 1 addition & 1 deletion release/one_click_macos_gui/distribution.xml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
<?xml version="1.0" encoding="utf-8" standalone="no"?>
<installer-script minSpecVersion="1.000000">
<title>directlfq 0.2.9</title>
<title>directlfq 0.2.10</title>
<background mime-type="image/png" file="alpha_logo.png" scaling="proportional"/>
<welcome file="welcome.html" mime-type="text/html" />
<conclusion file="conclusion.html" mime-type="text/html" />
Expand Down
2 changes: 1 addition & 1 deletion release/one_click_windows_gui/create_installer_windows.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ python setup.py sdist bdist_wheel
# Setting up the local package
cd release/one_click_windows_gui
# Make sure you include the required extra packages and always use the stable or very-stable options!
pip install "../../dist/directlfq-0.2.9-py3-none-any.whl[stable, gui]"
pip install "../../dist/directlfq-0.2.10-py3-none-any.whl[stable, gui]"

# Creating the stand-alone pyinstaller folder
pip install pyinstaller==4.10
Expand Down
2 changes: 1 addition & 1 deletion release/one_click_windows_gui/directlfq_innoinstaller.iss
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
; SEE THE DOCUMENTATION FOR DETAILS ON CREATING INNO SETUP SCRIPT FILES!

#define MyAppName "directlfq"
#define MyAppVersion "0.2.9"
#define MyAppVersion "0.2.10"
#define MyAppPublisher "Max Planck Institute of Biochemistry and the University of Copenhagen, Mann Labs"
#define MyAppURL "https://github.com/MannLabs/directlfq"
#define MyAppExeName "directlfq_gui.exe"
Expand Down
Loading

0 comments on commit c425f3e

Please sign in to comment.