From 76f73be736942785ba677dfb61d4b7eefba94258 Mon Sep 17 00:00:00 2001
From: Chengxin Dai <37200167+daichengxin@users.noreply.github.com>
Date: Sun, 24 Mar 2024 17:55:18 +0800
Subject: [PATCH 01/19] add ms2score

---
 bin/ms2rescore_cli.py                         | 174 ++++++++++++++++++
 bin/psm_conversion.py                         |   5 +-
 conf/modules.config                           |  14 ++
 modules/local/ms2rescore/main.nf              |  55 ++++++
 modules/local/ms2rescore/meta.yml             |  38 ++++
 .../local/openms/extractpsmfeatures/main.nf   |   4 +-
 nextflow.config                               |   6 +
 subworkflows/local/dda_id.nf                  |  20 +-
 subworkflows/local/psmrescoring.nf            |   2 +-
 9 files changed, 308 insertions(+), 10 deletions(-)
 create mode 100644 bin/ms2rescore_cli.py
 create mode 100644 modules/local/ms2rescore/main.nf
 create mode 100644 modules/local/ms2rescore/meta.yml

diff --git a/bin/ms2rescore_cli.py b/bin/ms2rescore_cli.py
new file mode 100644
index 00000000..1298dd38
--- /dev/null
+++ b/bin/ms2rescore_cli.py
@@ -0,0 +1,174 @@
+#!/usr/bin/env python
+
+import sys
+import click
+import importlib.resources
+import json
+import logging
+from typing import List
+
+import pandas as pd
+
+from ms2rescore import rescore, package_data
+from psm_utils.io.idxml import IdXMLReader, IdXMLWriter
+from psm_utils import PSMList
+import pyopenms as oms
+
+logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
+
+
+def parse_cli_arguments_to_config(**kwargs):
+    """Update default MS²Rescore config with CLI arguments"""
+    config = json.load(importlib.resources.open_text(package_data, "config_default.json"))
+
+    for key, value in kwargs.items():
+        # Skip these arguments since they need to set in a nested dict of feature_generators
+        if key in ["ms2pip_model", "ms2_tolerance", "rng", "calibration_set_size"]:
+            continue
+
+        elif key == "feature_generators":
+            feature_generators = value.split(",")
+            # Reset feature generator dict since there might be default generators we don't want
+            config["ms2rescore"]["feature_generators"] = {}
+            if "basic" in feature_generators:
+                config["ms2rescore"]["feature_generators"]["basic"] = {}
+            if "ms2pip" in feature_generators:
+                config["ms2rescore"]["feature_generators"]["ms2pip"] = {
+                    "model": kwargs["ms2pip_model"],
+                    "ms2_tolerance": kwargs["ms2_tolerance"],
+                }
+            if "deeplc" in feature_generators:
+                config["ms2rescore"]["feature_generators"]["deeplc"] = {
+                    "deeplc_retrain": False,
+                    "calibration_set_size": kwargs["calibration_set_size"],
+                }
+            if "maxquant" in feature_generators:
+                config["ms2rescore"]["feature_generators"]["maxquant"] = {}
+            if "ionmob" in feature_generators:
+                config["ms2rescore"]["feature_generators"]["ionmob"] = {}
+
+        elif key == "rescoring_engine":
+            # Reset rescoring engine dict we want to allow only computing features
+            config["ms2rescore"]["rescoring_engine"] = {}
+            if value == "mokapot":
+                config["ms2rescore"]["rescoring_engine"]["mokapot"] = {
+                    "write_weights": True,
+                    "write_txt": False,
+                    "write_flashlfq": False,
+                    "rng": kwargs["rng"],
+                    "max_workers": kwargs["processes"],
+                }
+            if value == "percolator":
+                logging.info(
+                    "Percolator rescoring engine has been specified. Use the idXML containing rescoring features and run Percolator in a separate step."
+                )
+                continue
+
+        else:
+            config["ms2rescore"][key] = value
+
+    return config
+
+
+def rescore_idxml(input_file, output_file, config) -> None:
+    """Rescore PSMs in an idXML file and keep other information unchanged."""
+    # Read PSMs
+    reader = IdXMLReader(input_file)
+    psm_list = reader.read_file()
+
+    # Rescore
+    rescore(config, psm_list)
+
+    # Filter out PeptideHits within PeptideIdentification(s) that could not be processed by all feature generators
+    peptide_ids_filtered = filter_out_artifact_psms(psm_list, reader.peptide_ids)
+
+    # Write
+    writer = IdXMLWriter(output_file, reader.protein_ids, peptide_ids_filtered)
+    writer.write_file(psm_list)
+
+
+def filter_out_artifact_psms(
+    psm_list: PSMList, peptide_ids: List[oms.PeptideIdentification]
+) -> List[oms.PeptideIdentification]:
+    """Filter out PeptideHits that could not be processed by all feature generators"""
+    num_mandatory_features = max([len(psm.rescoring_features) for psm in psm_list])
+    new_psm_list = PSMList(psm_list=[psm for psm in psm_list if len(psm.rescoring_features) == num_mandatory_features])
+
+    # get differing peptidoforms of both psm lists
+    psm_list_peptides = set([next(iter(psm.provenance_data.items()))[1] for psm in psm_list])
+    new_psm_list_peptides = set([next(iter(psm.provenance_data.items()))[1] for psm in new_psm_list])
+    not_supported_peptides = psm_list_peptides - new_psm_list_peptides
+
+    # no need to filter if all peptides are supported
+    if len(not_supported_peptides) == 0:
+        return peptide_ids
+    # Create new peptide ids and filter out not supported peptides
+    new_peptide_ids = []
+    for peptide_id in peptide_ids:
+        new_hits = []
+        for hit in peptide_id.getHits():
+            if hit.getSequence().toString() in not_supported_peptides:
+                continue
+            new_hits.append(hit)
+        if len(new_hits) == 0:
+            continue
+        peptide_id.setHits(new_hits)
+        new_peptide_ids.append(peptide_id)
+    logging.info(
+        f"Removed {len(psm_list_peptides) - len(new_psm_list_peptides)} PSMs. Peptides not supported: {not_supported_peptides}"
+    )
+    return new_peptide_ids
+
+
+@click.command()
+@click.option(
+    "-p", "--psm_file", help="Path to PSM file (PIN, mzIdentML, MaxQuant msms, X!Tandem XML, idXML)", required=True
+)
+@click.option(
+    "-s",
+    "--spectrum_path",
+    help="Path to MGF/mzML spectrum file or directory with spectrum files (default: derived from identification file)",
+    required=True,
+)
+@click.option(
+    "-o", "--output_path", help="Path and stem for output file names (default: derive from identification file)"
+)
+@click.option("-l", "--log_level", help="Logging level (default: `info`)", default="info")
+@click.option("-n", "--processes", help="Number of parallel processes available to MS²Rescore", type=int, default=16)
+@click.option("-f", "--fasta_file", help="Path to FASTA file")
+@click.option(
+    "-fg",
+    "--feature_generators",
+    help="Comma-separated list of feature generators to use (default: `ms2pip,deeplc`). See ms2rescore doc for further information",
+    default="",
+)
+@click.option("-pipm", "--ms2pip_model", help="MS²PIP model (default: `Immuno-HCD`)", type=str, default="Immuno-HCD")
+@click.option(
+    "-ms2tol", "--ms2_tolerance", help="Fragment mass tolerance [Da](default: `0.02`)", type=float, default=0.02
+)
+@click.option(
+    "-cs",
+    "--calibration_set_size",
+    help="Percentage of number of calibration set for DeepLC (default: `0.15`)",
+    default=0.15,
+)
+@click.option("-re", "--rescoring_engine", help="Either mokapot or percolator (default: `mokapot`)", default="mokapot")
+@click.option(
+    "-rng", "--rng", help="Seed for mokapot's random number generator (default: `4711`)", type=int, default=4711
+)
+@click.option("-d", "--id_decoy_pattern", help="Regex decoy pattern (default: `DECOY_`)", default="^DECOY_")
+@click.option(
+    "-lsb",
+    "--lower_score_is_better",
+    help="Interpretation of primary search engine score (default: True)",
+    default=True,
+)
+def main(**kwargs):
+    config = parse_cli_arguments_to_config(**kwargs)
+    logging.info("MS²Rescore config:")
+    logging.info(config)
+    rescore_idxml(kwargs["psm_file"], kwargs["output_path"], config)
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/bin/psm_conversion.py b/bin/psm_conversion.py
index c122a24f..75793d0c 100644
--- a/bin/psm_conversion.py
+++ b/bin/psm_conversion.py
@@ -1,4 +1,5 @@
 #!/usr/bin/env python
+
 import numpy as np
 import pyopenms as oms
 import pandas as pd
@@ -63,8 +64,8 @@ def convert_psm(idxml, spectra_file, export_decoy_psm):
 
         if isinstance(spectra_df, pd.DataFrame):
             spectra = spectra_df[spectra_df["scan"] == scan_number]
-            mz_array = spectra["mz"].values[0]
-            intensity_array = spectra["intensity"].values[0]
+            mz_array = spectra["mz"].values
+            intensity_array = spectra["intensity"].values
             num_peaks = len(mz_array)
 
         for hit in peptide_id.getHits():
diff --git a/conf/modules.config b/conf/modules.config
index c220be03..c1d1fc5b 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -270,4 +270,18 @@ process {
         ]
     }
 
+    withName: 'MS2RESCORE' {
+        ext.args   = [
+            "--ms2_tolerance ${2 * params.fragment_mass_tolerance}",
+            "--ms2pip_model ${params.ms2pip_model}",
+            "--rescoring_engine ${params.posterior_probabilities}",
+            params.feature_generators.trim() ? "--feature_generators ${params.feature_generators}" : ''
+        ].join(' ').trim()
+        publishDir  = [
+            path: { "${params.outdir}/${task.process.tokenize(':')[-1].toLowerCase()}" },
+            mode: params.publish_dir_mode,
+            saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
+        ]
+    }
+
 }
diff --git a/modules/local/ms2rescore/main.nf b/modules/local/ms2rescore/main.nf
new file mode 100644
index 00000000..a1e049a7
--- /dev/null
+++ b/modules/local/ms2rescore/main.nf
@@ -0,0 +1,55 @@
+process MS2RESCORE {
+    tag "$meta.mzml_id"
+    label 'process_high'
+
+    conda "bioconda::ms2rescore=3.0.1"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/ms2rescore:3.0.1--pyhdfd78af_2':
+        'biocontainers/ms2rescore:3.0.1--pyhdfd78af_2' }"
+
+    // userEmulation settings when docker is specified
+    containerOptions = (workflow.containerEngine == 'docker') ? '-u $(id -u) -e "HOME=${HOME}" -v /etc/passwd:/etc/passwd:ro -v /etc/shadow:/etc/shadow:ro -v /etc/group:/etc/group:ro -v $HOME:$HOME' : ''
+
+    input:
+    tuple val(meta), path(idxml), path(mzml)
+
+    output:
+    tuple val(meta), path("*ms2rescore.idXML") , emit: idxml
+    tuple val(meta), path("*feature_names.tsv"), emit: feature_names
+    tuple val(meta), path("*.html" )           , optional:true, emit: html
+    path "versions.yml"                        , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.mzml_id}_ms2rescore"
+
+    """
+    ms2rescore_cli.py \\
+        --psm_file $idxml \\
+        --spectrum_path . \\
+        --output_path ${idxml.baseName}_ms2rescore.idXML \\
+        --processes $task.cpus \\
+        $args
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        MS²Rescore: \$(echo \$(ms2rescore --version 2>&1) | grep -oP 'MS²Rescore \\(v\\K[^\\)]+' ))
+    END_VERSIONS
+    """
+
+    stub:
+    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}_ms2rescore"
+
+    """
+    touch ${prefix}.idXML
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        MS²Rescore: \$(echo \$(ms2rescore --version 2>&1) | grep -oP 'MS²Rescore \\(v\\K[^\\)]+' ))
+    END_VERSIONS
+    """
+}
diff --git a/modules/local/ms2rescore/meta.yml b/modules/local/ms2rescore/meta.yml
new file mode 100644
index 00000000..887c3a04
--- /dev/null
+++ b/modules/local/ms2rescore/meta.yml
@@ -0,0 +1,38 @@
+name: MS2RESCORE
+description: A module to perform MS2 rescoring step
+keywords:
+  - MS2
+  - rescoring
+tools:
+  - custom:
+      description: |
+        A custom module for MS2 rescoring.
+      homepage: https://github.com/bigbio/quantms
+      documentation: https://github.com/bigbio/quantms/tree/readthedocs
+input:
+  - idxml_file:
+      type: file
+      description: idXML identification file
+      pattern: "*.idXML"
+  - mzml:
+      type: file
+      description: spectrum data file
+      pattern: "*.mzML"
+  - meta:
+      type: map
+      description: Groovy Map containing sample information
+output:
+  - idxml:
+      type: file
+      description: idXML identification file after MS2 rescoring
+      pattern: "*.idXML"
+  - feature_names:
+      type: file
+      description: File containing feature names
+      pattern: "*feature_names.tsv"
+  - version:
+      type: file
+      description: File containing software version
+      pattern: "versions.yml"
+authors:
+  - "@daichengxin"
diff --git a/modules/local/openms/extractpsmfeatures/main.nf b/modules/local/openms/extractpsmfeatures/main.nf
index a9ff73b1..b430fc2c 100644
--- a/modules/local/openms/extractpsmfeatures/main.nf
+++ b/modules/local/openms/extractpsmfeatures/main.nf
@@ -10,7 +10,7 @@ process EXTRACTPSMFEATURES {
         'biocontainers/openms-thirdparty:3.1.0--h9ee0642_1' }"
 
     input:
-    tuple val(meta), path(id_file)
+    tuple val(meta), path(id_file), path(extra_feat)
 
     output:
     tuple val(meta), path("${id_file.baseName}_feat.idXML"), emit: id_files_feat
@@ -20,12 +20,14 @@ process EXTRACTPSMFEATURES {
     script:
     def args = task.ext.args ?: ''
     def prefix = task.ext.prefix ?: "${meta.mzml_id}"
+    def feature = (params.ms2rescore == true) && (params.id_only == true) ? "-extra ${extra_feat}" : ""
 
     """
     PSMFeatureExtractor \\
         -in ${id_file} \\
         -out ${id_file.baseName}_feat.idXML \\
         -threads $task.cpus \\
+        -extra ${feature} \\
         $args \\
         2>&1 | tee ${id_file.baseName}_extract_psm_feature.log
 
diff --git a/nextflow.config b/nextflow.config
index 8cd10ce8..05d9436f 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -101,6 +101,12 @@ params {
     min_fr_mz                     = null
     max_fr_mz                     = null
 
+    // MSRESCORE flags
+    ms2rescore                      = false
+    ms2pip_model                    = 'HCD2021'
+    feature_generators              = 'deeplc,ms2pip'
+    deeplc_calibration_set_size     = 0.15
+
     // PeptideIndexer flags
     IL_equivalent    = true
     unmatched_action = "warn"
diff --git a/subworkflows/local/dda_id.nf b/subworkflows/local/dda_id.nf
index c98b1c55..119ceefa 100644
--- a/subworkflows/local/dda_id.nf
+++ b/subworkflows/local/dda_id.nf
@@ -8,6 +8,7 @@ include { PERCOLATOR         } from '../../modules/local/openms/thirdparty/perco
 include { FALSEDISCOVERYRATE as FDRIDPEP } from '../../modules/local/openms/falsediscoveryrate/main'
 include { IDPEP                          } from '../../modules/local/openms/idpep/main'
 include { PSMCONVERSION                  } from '../../modules/local/extract_psm/main'
+include { MS2RESCORE                     } from '../../modules/local/ms2rescore/main'
 
 //
 // SUBWORKFLOW: Consisting of a mix of local and nf-core/modules
@@ -39,7 +40,7 @@ workflow DDA_ID {
         sage: filename.name.contains('sage')
             return [meta, filename]
         nosage: true
-            return [meta, filename]
+            return [meta, filename, []]
     }.set{ch_id_files_branched}
 
 
@@ -48,9 +49,18 @@ workflow DDA_ID {
     //
     if (params.skip_rescoring == false) {
         if (params.posterior_probabilities == 'percolator') {
-            EXTRACTPSMFEATURES(ch_id_files_branched.nosage)
-            ch_id_files_feats = ch_id_files_branched.sage.mix(EXTRACTPSMFEATURES.out.id_files_feat)
-            ch_software_versions = ch_software_versions.mix(EXTRACTPSMFEATURES.out.version)
+            if (params.ms2rescore == true) {
+                MS2RESCORE(ch_id_files.combine(ch_file_preparation_results, by: 0))
+                ch_software_versions = ch_software_versions.mix(MS2RESCORE.out.versions)
+                EXTRACTPSMFEATURES(MS2RESCORE.out.idxml.join(MS2RESCORE.out.feature_names))
+                ch_id_files_feats = EXTRACTPSMFEATURES.out.id_files_feat
+                ch_software_versions = ch_software_versions.mix(EXTRACTPSMFEATURES.out.version)
+            } else {
+                EXTRACTPSMFEATURES(ch_id_files_branched.nosage)
+                ch_id_files_feats = ch_id_files_branched.sage.mix(EXTRACTPSMFEATURES.out.id_files_feat)
+                ch_software_versions = ch_software_versions.mix(EXTRACTPSMFEATURES.out.version)
+            }
+
             PERCOLATOR(ch_id_files_feats)
             ch_software_versions = ch_software_versions.mix(PERCOLATOR.out.version)
             ch_consensus_input = PERCOLATOR.out.id_files_perc
@@ -90,8 +100,6 @@ workflow DDA_ID {
         //
         // Extract PSMs and export parquet format
         //
-        ch_spectrum_data.view()
-        PSMFDRCONTROL.out.id_filtered.view()
         PSMCONVERSION(PSMFDRCONTROL.out.id_filtered.combine(ch_spectrum_data, by: 0))
 
     } else {
diff --git a/subworkflows/local/psmrescoring.nf b/subworkflows/local/psmrescoring.nf
index 7bc89d98..9f243c87 100644
--- a/subworkflows/local/psmrescoring.nf
+++ b/subworkflows/local/psmrescoring.nf
@@ -21,7 +21,7 @@ workflow PSMRESCORING {
             sage: filename.name.contains('sage')
                 return [meta, filename]
             nosage: true
-                return [meta, filename]
+                return [meta, filename, []]
         }.set{ch_id_files_branched}
         EXTRACTPSMFEATURES(ch_id_files_branched.nosage)
         ch_id_files_feats = ch_id_files_branched.sage.mix(EXTRACTPSMFEATURES.out.id_files_feat)

From 192ad038a2654326e6599b32a1379a75ce559bfa Mon Sep 17 00:00:00 2001
From: Chengxin Dai <37200167+daichengxin@users.noreply.github.com>
Date: Sun, 24 Mar 2024 18:24:24 +0800
Subject: [PATCH 02/19] fixed

---
 nextflow.config      |  1 -
 nextflow_schema.json | 18 ++++++++++++++++++
 2 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/nextflow.config b/nextflow.config
index 05d9436f..876659e4 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -105,7 +105,6 @@ params {
     ms2rescore                      = false
     ms2pip_model                    = 'HCD2021'
     feature_generators              = 'deeplc,ms2pip'
-    deeplc_calibration_set_size     = 0.15
 
     // PeptideIndexer flags
     IL_equivalent    = true
diff --git a/nextflow_schema.json b/nextflow_schema.json
index 8375cfa3..0248a2ce 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -434,6 +434,24 @@
                     "default": false,
                     "fa_icon": "far fa-check-square"
                 },
+                "ms2rescore": {
+                    "type": "boolean",
+                    "description": "Whether performing peptide identification rescoring with LC-MS predictors such as MS²PIP and DeepLC.",
+                    "default": false,
+                    "fa_icon": "far fa-check-square"
+                },
+                "ms2pip_model": {
+                    "type": "string",
+                    "description": "Which deep learning model to generate feature.",
+                    "fa_icon": "fas fa-font",
+                    "default": "HCD2021"
+                },
+                "feature_generators": {
+                    "type": "string",
+                    "description": "Which feature generator to generate feature.",
+                    "fa_icon": "fas fa-font",
+                    "default": "deeplc,ms2pip"
+                },
                 "posterior_probabilities": {
                     "type": "string",
                     "description": "How to calculate posterior probabilities for PSMs:\n\n* 'percolator' = Re-score based on PSM-feature-based SVM and transform distance\n    to hyperplane for posteriors\n* 'fit_distributions' = Fit positive and negative distributions to scores\n    (similar to PeptideProphet)",

From f4e3919e4b60d33249981dc0252dabda12142d88 Mon Sep 17 00:00:00 2001
From: Chengxin Dai <37200167+daichengxin@users.noreply.github.com>
Date: Tue, 26 Mar 2024 11:17:38 +0800
Subject: [PATCH 03/19] add mokapot

---
 conf/modules.config                      | 16 +++++++++++++++-
 modules/local/ms2rescore/main.nf         | 20 ++++++++++++++++----
 modules/local/openms/consensusid/main.nf |  2 +-
 nextflow.config                          |  1 +
 nextflow_schema.json                     | 10 ++++++++--
 subworkflows/local/dda_id.nf             | 12 ++++++++----
 workflows/quantms.nf                     |  6 +++++-
 7 files changed, 54 insertions(+), 13 deletions(-)

diff --git a/conf/modules.config b/conf/modules.config
index c1d1fc5b..4480740f 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -270,11 +270,12 @@ process {
         ]
     }
 
+    // ID ONLY
     withName: 'MS2RESCORE' {
         ext.args   = [
-            "--ms2_tolerance ${2 * params.fragment_mass_tolerance}",
             "--ms2pip_model ${params.ms2pip_model}",
             "--rescoring_engine ${params.posterior_probabilities}",
+            "--calibration_set_size ${params.calibration_set_size}",
             params.feature_generators.trim() ? "--feature_generators ${params.feature_generators}" : ''
         ].join(' ').trim()
         publishDir  = [
@@ -284,4 +285,17 @@ process {
         ]
     }
 
+     withName: '.*:DDA_ID:IDSCORESWITCHER' {
+        ext.args    = [
+            "-new_score_orientation lower_better",
+            "-new_score_type \"Posterior Error Probability\"",
+            "-debug $params.idscoreswitcher_debug"
+        ].join(' ').trim()
+        publishDir  = [
+            path: { "${params.outdir}/idscoreswitcherforluciphor" },
+            mode: params.publish_dir_mode,
+            saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
+        ]
+     }
+
 }
diff --git a/modules/local/ms2rescore/main.nf b/modules/local/ms2rescore/main.nf
index a1e049a7..ffa26a4f 100644
--- a/modules/local/ms2rescore/main.nf
+++ b/modules/local/ms2rescore/main.nf
@@ -2,10 +2,10 @@ process MS2RESCORE {
     tag "$meta.mzml_id"
     label 'process_high'
 
-    conda "bioconda::ms2rescore=3.0.1"
+    conda "bioconda::ms2rescore=3.0.2"
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/ms2rescore:3.0.1--pyhdfd78af_2':
-        'biocontainers/ms2rescore:3.0.1--pyhdfd78af_2' }"
+        'https://depot.galaxyproject.org/singularity/ms2rescore:3.0.2--pyhdfd78af_0':
+        'biocontainers/ms2rescore:3.0.2--pyhdfd78af_0' }"
 
     // userEmulation settings when docker is specified
     containerOptions = (workflow.containerEngine == 'docker') ? '-u $(id -u) -e "HOME=${HOME}" -v /etc/passwd:/etc/passwd:ro -v /etc/shadow:/etc/shadow:ro -v /etc/group:/etc/group:ro -v $HOME:$HOME' : ''
@@ -18,6 +18,7 @@ process MS2RESCORE {
     tuple val(meta), path("*feature_names.tsv"), emit: feature_names
     tuple val(meta), path("*.html" )           , optional:true, emit: html
     path "versions.yml"                        , emit: versions
+    path "*.log"                               , emit: log
 
     when:
     task.ext.when == null || task.ext.when
@@ -26,13 +27,24 @@ process MS2RESCORE {
     def args = task.ext.args ?: ''
     def prefix = task.ext.prefix ?: "${meta.mzml_id}_ms2rescore"
 
+
+    // ms2rescore only supports Da unit. https://ms2rescore.readthedocs.io/en/v3.0.2/userguide/configuration/
+    if (meta['fragmentmasstoleranceunit'].toLowerCase().endsWith('da')) {
+        ms2_tolerence = 2 * meta['fragmentmasstolerance']
+    } else {
+        log.info "Warning: MS2Rescore only supports Da unit. Set default ms2 tolerance as 0.02!"
+        ms2_tolerence = 0.02
+    }
+
     """
     ms2rescore_cli.py \\
         --psm_file $idxml \\
         --spectrum_path . \\
+        --ms2_tolerance $ms2_tolerence \\
         --output_path ${idxml.baseName}_ms2rescore.idXML \\
         --processes $task.cpus \\
-        $args
+        $args \\
+        2>&1 | tee ${meta.mzml_id}_ms2rescore.log
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
diff --git a/modules/local/openms/consensusid/main.nf b/modules/local/openms/consensusid/main.nf
index 5264701d..ad8a20f3 100644
--- a/modules/local/openms/consensusid/main.nf
+++ b/modules/local/openms/consensusid/main.nf
@@ -31,7 +31,7 @@ process CONSENSUSID {
         -filter:considered_hits $params.consensusid_considered_top_hits \\
         -debug $params.consensusid_debug \\
         $args \\
-        2>&1 | tee ${meta.id}_consensusID.log
+        2>&1 | tee ${meta.mzml_id}_consensusID.log
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
diff --git a/nextflow.config b/nextflow.config
index 876659e4..904d2f55 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -105,6 +105,7 @@ params {
     ms2rescore                      = false
     ms2pip_model                    = 'HCD2021'
     feature_generators              = 'deeplc,ms2pip'
+    calibration_set_size            = 0.15
 
     // PeptideIndexer flags
     IL_equivalent    = true
diff --git a/nextflow_schema.json b/nextflow_schema.json
index 0248a2ce..2b35e9f7 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -452,12 +452,18 @@
                     "fa_icon": "fas fa-font",
                     "default": "deeplc,ms2pip"
                 },
+                "calibration_set_size": {
+                    "type": "number",
+                    "description": "Percentage of number of calibration set for DeepLC",
+                    "default": 0.15,
+                    "fa_icon": "fas fa-filter"
+                },
                 "posterior_probabilities": {
                     "type": "string",
-                    "description": "How to calculate posterior probabilities for PSMs:\n\n* 'percolator' = Re-score based on PSM-feature-based SVM and transform distance\n    to hyperplane for posteriors\n* 'fit_distributions' = Fit positive and negative distributions to scores\n    (similar to PeptideProphet)",
+                    "description": "How to calculate posterior probabilities for PSMs:\n\n* 'percolator' = Re-score based on PSM-feature-based SVM and transform distance\n    to hyperplane for posteriors\n* 'fit_distributions' = Fit positive and negative distributions to scores\n    (similar to PeptideProphet)\n\n* 'mokapot' = Re-score based on PSM-feature-based semi-supervised learning algorithm introduced by Percolator",
                     "fa_icon": "fas fa-list-ol",
                     "default": "percolator",
-                    "enum": ["percolator", "fit_distributions"]
+                    "enum": ["percolator", "fit_distributions", "mokapot"]
                 },
                 "run_fdr_cutoff": {
                     "type": "number",
diff --git a/subworkflows/local/dda_id.nf b/subworkflows/local/dda_id.nf
index 119ceefa..7dd234a6 100644
--- a/subworkflows/local/dda_id.nf
+++ b/subworkflows/local/dda_id.nf
@@ -9,6 +9,7 @@ include { FALSEDISCOVERYRATE as FDRIDPEP } from '../../modules/local/openms/fals
 include { IDPEP                          } from '../../modules/local/openms/idpep/main'
 include { PSMCONVERSION                  } from '../../modules/local/extract_psm/main'
 include { MS2RESCORE                     } from '../../modules/local/ms2rescore/main'
+include { IDSCORESWITCHER                } from '../../modules/local/openms/idscoreswitcher/main'
 
 //
 // SUBWORKFLOW: Consisting of a mix of local and nf-core/modules
@@ -64,10 +65,13 @@ workflow DDA_ID {
             PERCOLATOR(ch_id_files_feats)
             ch_software_versions = ch_software_versions.mix(PERCOLATOR.out.version)
             ch_consensus_input = PERCOLATOR.out.id_files_perc
-        }
-
-
-        if (params.posterior_probabilities != 'percolator') {
+        } else if (params.posterior_probabilities == 'mokapot') {
+            MS2RESCORE(ch_id_files.combine(ch_file_preparation_results, by: 0))
+            ch_software_versions = ch_software_versions.mix(MS2RESCORE.out.versions)
+            IDSCORESWITCHER(MS2RESCORE.out.idxml.combine(Channel.value("PEP")))
+            ch_software_versions = ch_software_versions.mix(IDSCORESWITCHER.out.version)
+            ch_consensus_input = IDSCORESWITCHER.out.id_score_switcher.combine(Channel.value("MS:1001491"))
+        } else {
             ch_fdridpep = Channel.empty()
             if (params.search_engines.split(",").size() == 1) {
                 FDRIDPEP(ch_id_files)
diff --git a/workflows/quantms.nf b/workflows/quantms.nf
index e0772204..f61d6d62 100644
--- a/workflows/quantms.nf
+++ b/workflows/quantms.nf
@@ -104,7 +104,11 @@ workflow QUANTMS {
         ch_versions = ch_versions.mix(DECOYDATABASE.out.version.ifEmpty(null))
     }
 
-    if (params.id_only) {
+    // This rescoring engine currently only is supported in id_only subworkflows via ms2rescore.
+    if (params.id_only | params.posterior_probabilities == "mokapot") {
+        if (params.id_only == false) {
+            log.warn "The mokapot rescoring engine currently only is supported in id_only subworkflow via ms2rescore."
+        }
         DDA_ID( FILE_PREPARATION.out.results, ch_searchengine_in_db, FILE_PREPARATION.out.spectrum_data)
         ch_versions = ch_versions.mix(DDA_ID.out.version.ifEmpty(null))
     } else {

From 18d0571863082ffefa3c3eb43b97c2e32ca42118 Mon Sep 17 00:00:00 2001
From: Chengxin Dai <37200167+daichengxin@users.noreply.github.com>
Date: Tue, 26 Mar 2024 11:20:41 +0800
Subject: [PATCH 04/19] Update modules.config

---
 conf/modules.config | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/conf/modules.config b/conf/modules.config
index 4480740f..4778ada8 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -285,8 +285,8 @@ process {
         ]
     }
 
-     withName: '.*:DDA_ID:IDSCORESWITCHER' {
-        ext.args    = [
+    withName: '.*:DDA_ID:IDSCORESWITCHER' {
+        ext.args   = [
             "-new_score_orientation lower_better",
             "-new_score_type \"Posterior Error Probability\"",
             "-debug $params.idscoreswitcher_debug"

From c7b415e0d2ab1d88a68599a90e905f93514ed80e Mon Sep 17 00:00:00 2001
From: Chengxin Dai <37200167+daichengxin@users.noreply.github.com>
Date: Tue, 26 Mar 2024 11:22:24 +0800
Subject: [PATCH 05/19] Update modules.config

---
 conf/modules.config | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/conf/modules.config b/conf/modules.config
index 4778ada8..a1065abc 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -296,6 +296,6 @@ process {
             mode: params.publish_dir_mode,
             saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
         ]
-     }
+    }
 
 }

From fe840a999ca299b043a7c20c0acd9fac5566aa54 Mon Sep 17 00:00:00 2001
From: Chengxin Dai <37200167+daichengxin@users.noreply.github.com>
Date: Sun, 31 Mar 2024 13:13:10 +0800
Subject: [PATCH 06/19] fixed

---
 bin/ms2rescore_cli.py            |  2 ++
 bin/psm_conversion.py            |  3 +++
 conf/modules.config              | 15 +++++++++++++++
 modules/local/ms2rescore/main.nf |  4 ++--
 subworkflows/local/dda_id.nf     | 12 ++++++++++--
 5 files changed, 32 insertions(+), 4 deletions(-)

diff --git a/bin/ms2rescore_cli.py b/bin/ms2rescore_cli.py
index 1298dd38..61e517e9 100644
--- a/bin/ms2rescore_cli.py
+++ b/bin/ms2rescore_cli.py
@@ -56,6 +56,7 @@ def parse_cli_arguments_to_config(**kwargs):
                     "write_txt": False,
                     "write_flashlfq": False,
                     "rng": kwargs["rng"],
+                    "test_fdr": kwargs["test_fdr"],
                     "max_workers": kwargs["processes"],
                 }
             if value == "percolator":
@@ -136,6 +137,7 @@ def filter_out_artifact_psms(
 @click.option("-l", "--log_level", help="Logging level (default: `info`)", default="info")
 @click.option("-n", "--processes", help="Number of parallel processes available to MS²Rescore", type=int, default=16)
 @click.option("-f", "--fasta_file", help="Path to FASTA file")
+@click.option("-t", "--test_fdr", help="The false-discovery rate threshold at which to evaluate the learned models. (default: 0.05)", default=0.05)
 @click.option(
     "-fg",
     "--feature_generators",
diff --git a/bin/psm_conversion.py b/bin/psm_conversion.py
index 75793d0c..767859a0 100644
--- a/bin/psm_conversion.py
+++ b/bin/psm_conversion.py
@@ -85,6 +85,9 @@ def convert_psm(idxml, spectra_file, export_decoy_psm):
             elif search_engines == "Sage":
                 id_scores = ["Sage:hyperscore: " + str(hit.getScore())]
 
+            if hit.metaValueExists("MS:1001491"):
+                global_qvalue = hit.getMetaValue("MS:1001491")
+
             charge = hit.getCharge()
             peptidoform = hit.getSequence().toString()
             modifications = mods_position(peptidoform)
diff --git a/conf/modules.config b/conf/modules.config
index a1065abc..b5a05b30 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -276,6 +276,7 @@ process {
             "--ms2pip_model ${params.ms2pip_model}",
             "--rescoring_engine ${params.posterior_probabilities}",
             "--calibration_set_size ${params.calibration_set_size}",
+            "--test_fdr ${params.test_FDR}",
             params.feature_generators.trim() ? "--feature_generators ${params.feature_generators}" : ''
         ].join(' ').trim()
         publishDir  = [
@@ -298,4 +299,18 @@ process {
         ]
     }
 
+    withName: '.*:DDA_ID:PSMFDRCONTROL:IDSCORESWITCHER' {
+        ext.args    = [
+            "-new_score_orientation lower_better",
+            "-old_score \"Posterior Error Probability\"",
+            "-new_score_type q-value",
+            "-debug $params.idscoreswitcher_debug"
+        ].join(' ').trim()
+        publishDir  = [
+            path: { "${params.outdir}/idscoreswitcher" },
+            mode: params.publish_dir_mode,
+            saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
+        ]
+    }
+
 }
diff --git a/modules/local/ms2rescore/main.nf b/modules/local/ms2rescore/main.nf
index ffa26a4f..6667337a 100644
--- a/modules/local/ms2rescore/main.nf
+++ b/modules/local/ms2rescore/main.nf
@@ -30,7 +30,7 @@ process MS2RESCORE {
 
     // ms2rescore only supports Da unit. https://ms2rescore.readthedocs.io/en/v3.0.2/userguide/configuration/
     if (meta['fragmentmasstoleranceunit'].toLowerCase().endsWith('da')) {
-        ms2_tolerence = 2 * meta['fragmentmasstolerance']
+        ms2_tolerence = meta['fragmentmasstolerance']
     } else {
         log.info "Warning: MS2Rescore only supports Da unit. Set default ms2 tolerance as 0.02!"
         ms2_tolerence = 0.02
@@ -54,7 +54,7 @@ process MS2RESCORE {
 
     stub:
     def args = task.ext.args ?: ''
-    def prefix = task.ext.prefix ?: "${meta.id}_ms2rescore"
+    def prefix = task.ext.prefix ?: "${meta.mzml_id}_ms2rescore"
 
     """
     touch ${prefix}.idXML
diff --git a/subworkflows/local/dda_id.nf b/subworkflows/local/dda_id.nf
index 7dd234a6..3a595735 100644
--- a/subworkflows/local/dda_id.nf
+++ b/subworkflows/local/dda_id.nf
@@ -53,8 +53,16 @@ workflow DDA_ID {
             if (params.ms2rescore == true) {
                 MS2RESCORE(ch_id_files.combine(ch_file_preparation_results, by: 0))
                 ch_software_versions = ch_software_versions.mix(MS2RESCORE.out.versions)
-                EXTRACTPSMFEATURES(MS2RESCORE.out.idxml.join(MS2RESCORE.out.feature_names))
-                ch_id_files_feats = EXTRACTPSMFEATURES.out.id_files_feat
+
+                MS2RESCORE.out.idxml.join(MS2RESCORE.out.feature_names).branch{ meta, idxml, feature_name ->
+                    sage: idxml.name.contains('sage')
+                        return [meta, idxml]
+                    nosage: true
+                        return [meta, idxml, feature_name]
+                }.set{ch_ms2rescore_branched}
+
+                EXTRACTPSMFEATURES(ch_ms2rescore_branched.nosage)
+                ch_id_files_feats = EXTRACTPSMFEATURES.out.id_files_feat.mix(ch_ms2rescore_branched.sage)
                 ch_software_versions = ch_software_versions.mix(EXTRACTPSMFEATURES.out.version)
             } else {
                 EXTRACTPSMFEATURES(ch_id_files_branched.nosage)

From 6bc25fe00a316d23b36b72cd65cff475d5f64fcf Mon Sep 17 00:00:00 2001
From: Chengxin Dai <37200167+daichengxin@users.noreply.github.com>
Date: Sun, 28 Apr 2024 10:35:55 +0800
Subject: [PATCH 07/19] add rescore range parameter

---
 bin/extract_sample.py        |  51 +++++++++++
 nextflow.config              |   1 +
 nextflow_schema.json         |   7 ++
 subworkflows/local/dda_id.nf | 158 +++++++++++++++++++++++++++++++++--
 workflows/quantms.nf         |   2 +-
 5 files changed, 213 insertions(+), 6 deletions(-)
 create mode 100644 bin/extract_sample.py

diff --git a/bin/extract_sample.py b/bin/extract_sample.py
new file mode 100644
index 00000000..0fb2c2bd
--- /dev/null
+++ b/bin/extract_sample.py
@@ -0,0 +1,51 @@
+#!/usr/bin/env python
+
+import argparse
+import errno
+import os
+import sys
+from pathlib import Path
+import pandas as pd
+
+
+def parse_args(args=None):
+    Description = "Extract sample information from an experiment design file"
+    Epilog = "Example usage: python extract_sample.py <EXP>"
+
+    parser = argparse.ArgumentParser(description=Description, epilog=Epilog)
+    parser.add_argument("EXP", help="Expdesign file to be extracted")
+    return parser.parse_args(args)
+
+
+def extract_sample(expdesign):
+    data = pd.read_csv(expdesign, sep="\t", header=0, dtype=str)
+    fTable = data.dropna()
+
+    # two table format
+    with open(expdesign, "r") as f:
+        lines = f.readlines()
+        empty_row = lines.index("\n")
+        s_table = [i.replace("\n", "").split("\t") for i in lines[empty_row + 1:]][1:]
+        s_header = lines[empty_row + 1].replace("\n", "").split("\t")
+        s_DataFrame = pd.DataFrame(s_table, columns=s_header)
+
+    sample_dt = pd.DataFrame()
+    if "MSstats_Mixture" not in s_DataFrame.columns:
+        fTable = fTable[["Spectra_Filepath", "Sample"]]
+        fTable.to_csv(f"{Path(expdesign).stem}_sample.csv", sep="\t", index=False)
+    else:
+        fTable.drop_duplicates(subset=["Spectra_Filepath"], inplace=True)
+        for _, row in fTable.iterrows():
+            mixture_id = s_DataFrame[s_DataFrame["Sample"] == row["Sample"]]["MSstats_Mixture"]
+            sample_dt = sample_dt.append({"Spectra_Filepath": row["Spectra_Filepath"], "Sample": mixture_id},
+                                         ignore_index=True)
+        sample_dt.to_csv(f"{Path(expdesign).stem}_sample.csv", sep="\t", index=False)
+
+
+def main(args=None):
+    args = parse_args(args)
+    extract_sample(args.EXP)
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/nextflow.config b/nextflow.config
index 32a1584b..fb41a50c 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -105,6 +105,7 @@ params {
 
     // MSRESCORE flags
     ms2rescore                      = false
+    rescore_range                   = 'independent_run'
     ms2pip_model                    = 'HCD2021'
     feature_generators              = 'deeplc,ms2pip'
     calibration_set_size            = 0.15
diff --git a/nextflow_schema.json b/nextflow_schema.json
index f736fd51..de62b9bc 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -447,6 +447,13 @@
                     "default": false,
                     "fa_icon": "far fa-check-square"
                 },
+                "rescore_range": {
+                    "type": "string",
+                    "description": "Rescoring for independent run, Sample or whole experiments",
+                    "fa_icon": "fas fa-font",
+                    "default": "independent_run",
+                    "enum": ["independent_run", "by_sample", "by_project"]
+                },
                 "ms2pip_model": {
                     "type": "string",
                     "description": "Which deep learning model to generate feature.",
diff --git a/subworkflows/local/dda_id.nf b/subworkflows/local/dda_id.nf
index 3a595735..ac43e570 100644
--- a/subworkflows/local/dda_id.nf
+++ b/subworkflows/local/dda_id.nf
@@ -5,11 +5,14 @@ include { DECOYDATABASE } from '../../modules/local/openms/decoydatabase/main'
 include { CONSENSUSID   } from '../../modules/local/openms/consensusid/main'
 include { EXTRACTPSMFEATURES } from '../../modules/local/openms/extractpsmfeatures/main'
 include { PERCOLATOR         } from '../../modules/local/openms/thirdparty/percolator/main'
+include { IDMERGER           } from '../../modules/local/openms/idmerger/main'
+include { IDRIPPER           } from '../../modules/local/openms/idripper/main'
 include { FALSEDISCOVERYRATE as FDRIDPEP } from '../../modules/local/openms/falsediscoveryrate/main'
 include { IDPEP                          } from '../../modules/local/openms/idpep/main'
 include { PSMCONVERSION                  } from '../../modules/local/extract_psm/main'
 include { MS2RESCORE                     } from '../../modules/local/ms2rescore/main'
 include { IDSCORESWITCHER                } from '../../modules/local/openms/idscoreswitcher/main'
+include { EXTRACT_SAMPLE                 } from '../../modules/local/extract_sample/main'
 
 //
 // SUBWORKFLOW: Consisting of a mix of local and nf-core/modules
@@ -22,6 +25,7 @@ workflow DDA_ID {
     ch_file_preparation_results
     ch_database_wdecoy
     ch_spectrum_data
+    ch_expdesign
 
     main:
 
@@ -70,9 +74,77 @@ workflow DDA_ID {
                 ch_software_versions = ch_software_versions.mix(EXTRACTPSMFEATURES.out.version)
             }
 
-            PERCOLATOR(ch_id_files_feats)
-            ch_software_versions = ch_software_versions.mix(PERCOLATOR.out.version)
-            ch_consensus_input = PERCOLATOR.out.id_files_perc
+            // Rescoring for independent run, Sample or whole experiments
+            if (params.rescore_range == "independent_run") {
+                PERCOLATOR(ch_id_files_feats)
+                ch_software_versions = ch_software_versions.mix(PERCOLATOR.out.version)
+                ch_consensus_input = PERCOLATOR.out.id_files_perc
+            } else if (params.rescore_range == "by_sample") {
+                // Sample map
+                EXTRACT_SAMPLE(ch_expdesign)
+                ch_expdesign_sample = EXTRACT_SAMPLE.out.ch_expdesign_sample
+                ch_expdesign_sample.splitCsv(header: true, sep: '\t')
+                    .map { get_sample_map(it) }.set{ sample_map_idv }
+
+                sample_map = sample_map_idv.collect().map{ all_sample_map( it ) }
+
+                // Group by search_engines and convert meta
+                 ch_id_files_feats.combine( sample_map ).branch{ meta, filename, sample_map  ->
+                    sage: filename.name.contains('sage')
+                        return [comvert_exp_meta(meta, "sample_id", filename, sample_map), filename]
+                    msgf: filename.name.contains('msgf')
+                        return [comvert_exp_meta(meta, "sample_id", filename, sample_map), filename]
+                    comet: filename.name.contains('comet')
+                        return [comvert_exp_meta(meta, "sample_id", filename, sample_map), filename]
+                }.set{ch_id_files_feat_branched}
+
+                // IDMERGER for samples group
+                IDMERGER(ch_id_files_feat_branched.comet.groupTuple(by: 0)
+                    .mix(ch_id_files_feat_branched.msgf.groupTuple(by: 0))
+                    .mix(ch_id_files_feat_branched.sage.groupTuple(by: 0)))
+                ch_software_versions = ch_software_versions.mix(IDMERGER.out.version)
+
+                PERCOLATOR(IDMERGER.out.id_merged)
+                ch_software_versions = ch_software_versions.mix(PERCOLATOR.out.version)
+
+                // Currently only ID runs on exactly one mzML file are supported in CONSENSUSID. Split idXML by runs
+                IDRIPPER(PERCOLATOR.out.id_files_perc)
+                IDRIPPER.out.meta.first().combine(IDRIPPER.out.id_rippers.flatten())
+                    .map{ [comvert_exp_meta(it[0], "mzml_id", it[1], ""), it[1], "MS:1001491"] }
+                    .set{ ch_consensus_input }
+                ch_consensus_input.view()
+                ch_software_versions = ch_software_versions.mix(IDRIPPER.out.version)
+
+            } else if (params.rescore_range == "by_project"){
+                // Split ch_id_files_feats by search_engines
+                ch_id_files_feats.branch{ meta, filename ->
+                    sage: filename.name.contains('sage')
+                        return [comvert_exp_meta(meta, "experiment_id", filename, ""), filename]
+                    msgf: filename.name.contains('msgf')
+                        return [comvert_exp_meta(meta, "experiment_id", filename, ""), filename]
+                    comet: filename.name.contains('comet')
+                        return [comvert_exp_meta(meta, "experiment_id", filename, ""), filename]
+                }.set{ch_id_files_feat_branched}
+
+                // IDMERGER for whole experiments
+                IDMERGER(ch_id_files_feat_branched.comet.groupTuple(by: 0)
+                    .mix(ch_id_files_feat_branched.msgf.groupTuple(by: 0))
+                    .mix(ch_id_files_feat_branched.sage.groupTuple(by: 0)))
+                ch_software_versions = ch_software_versions.mix(IDMERGER.out.version)
+
+                PERCOLATOR(IDMERGER.out.id_merged)
+                ch_software_versions = ch_software_versions.mix(PERCOLATOR.out.version)
+
+                // Currently only ID runs on exactly one mzML file are supported in CONSENSUSID. Split idXML by runs
+                IDRIPPER(PERCOLATOR.out.id_files_perc)
+                IDRIPPER.out.meta.first().combine(IDRIPPER.out.id_rippers.flatten())
+                    .map{ [comvert_exp_meta(it[0], "mzml_id", it[1], ""), it[1], "MS:1001491"] }
+                    .set{ ch_consensus_input }
+                ch_software_versions = ch_software_versions.mix(IDRIPPER.out.version)
+
+            }
+
+
         } else if (params.posterior_probabilities == 'mokapot') {
             MS2RESCORE(ch_id_files.combine(ch_file_preparation_results, by: 0))
             ch_software_versions = ch_software_versions.mix(MS2RESCORE.out.versions)
@@ -109,9 +181,8 @@ workflow DDA_ID {
         PSMFDRCONTROL(ch_psmfdrcontrol)
         ch_software_versions = ch_software_versions.mix(PSMFDRCONTROL.out.version.ifEmpty(null))
 
-        //
+
         // Extract PSMs and export parquet format
-        //
         PSMCONVERSION(PSMFDRCONTROL.out.id_filtered.combine(ch_spectrum_data, by: 0))
 
     } else {
@@ -122,3 +193,80 @@ workflow DDA_ID {
     emit:
     version                 = ch_software_versions
 }
+
+// Function to group by mzML/sample/experiment
+def comvert_exp_meta(Map meta, value, file_name, sample_map) {
+    def exp_meta = [:]
+
+    if (value == "experiment_id") {
+        exp_meta.mzml_id = meta.experiment_id
+    } else if (value == "mzml_id") {
+        position = file(file_name).name.lastIndexOf('_sage_perc.idXML')
+        if (position == -1) {
+            position = file(file_name).name.lastIndexOf('_comet_perc.idXML')
+            if (position == -1) {
+                position = file(file_name).name.lastIndexOf('_msgf_perc.idXML')
+            }
+        }
+        exp_meta.mzml_id = file(file_name).name.take(position)
+    } else if (value == "sample_id") {
+        tag = file(file_name).name.lastIndexOf('_perc.idXML')
+        if (tag == -1) {
+            position = file(file_name).name.lastIndexOf('_sage.idXML')
+            if (position == -1) {
+                position = file(file_name).name.lastIndexOf('_comet_feat.idXML')
+                if (position == -1) {
+                    position = file(file_name).name.lastIndexOf('_msgf_feat.idXML')
+                }
+            }
+        } else {
+            position = file(file_name).name.lastIndexOf('_sage_perc.idXML')
+            if (position == -1) {
+                position = file(file_name).name.lastIndexOf('_comet_perc.idXML')
+                if (position == -1) {
+                    position = file(file_name).name.lastIndexOf('_msgf_perc.idXML')
+                }
+            }
+        }
+
+        file_name = file(file_name).name.take(position)
+        exp_meta.mzml_id = sample_map[file_name]
+    }
+
+
+    exp_meta.experiment_id              = meta.experiment_id
+    exp_meta.labelling_type             = meta.labelling_type
+    exp_meta.dissociationmethod         = meta.dissociationmethod
+    exp_meta.fixedmodifications         = meta.fixedmodifications
+    exp_meta.variablemodifications      = meta.variablemodifications
+    exp_meta.precursormasstolerance     = meta.precursormasstolerance
+    exp_meta.precursormasstoleranceunit = meta.precursormasstoleranceunit
+    exp_meta.fragmentmasstolerance      = meta.fragmentmasstolerance
+    exp_meta.fragmentmasstoleranceunit  = meta.fragmentmasstoleranceunit
+    exp_meta.enzyme                     = meta.enzyme
+    exp_meta.acquisition_method         = meta.acquisition_method
+    print(exp_meta)
+    return exp_meta
+}
+
+// Function to get sample map
+def get_sample_map(LinkedHashMap row) {
+    def sample_map = [:]
+
+    filestr               = row.Spectra_Filepath
+    file_name             = file(filestr).name.take(file(filestr).name.lastIndexOf('.'))
+    sample                = row.Sample
+    sample_map[file_name] = sample
+
+    return sample_map
+
+}
+
+def all_sample_map(sample_list) {
+    res = [:]
+    sample_list.each {
+        res = res + it
+    }
+
+    return res
+}
diff --git a/workflows/quantms.nf b/workflows/quantms.nf
index f61d6d62..82c9a870 100644
--- a/workflows/quantms.nf
+++ b/workflows/quantms.nf
@@ -109,7 +109,7 @@ workflow QUANTMS {
         if (params.id_only == false) {
             log.warn "The mokapot rescoring engine currently only is supported in id_only subworkflow via ms2rescore."
         }
-        DDA_ID( FILE_PREPARATION.out.results, ch_searchengine_in_db, FILE_PREPARATION.out.spectrum_data)
+        DDA_ID( FILE_PREPARATION.out.results, ch_searchengine_in_db, FILE_PREPARATION.out.spectrum_data, CREATE_INPUT_CHANNEL.out.ch_expdesign)
         ch_versions = ch_versions.mix(DDA_ID.out.version.ifEmpty(null))
     } else {
         TMT(ch_fileprep_result.iso, CREATE_INPUT_CHANNEL.out.ch_expdesign, ch_searchengine_in_db)

From a877cee97dd08662d8ab8ab8570baac9f7498131 Mon Sep 17 00:00:00 2001
From: Chengxin Dai <37200167+daichengxin@users.noreply.github.com>
Date: Sun, 28 Apr 2024 10:39:09 +0800
Subject: [PATCH 08/19] Update dda_id.nf

---
 subworkflows/local/dda_id.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/subworkflows/local/dda_id.nf b/subworkflows/local/dda_id.nf
index ac43e570..983decd7 100644
--- a/subworkflows/local/dda_id.nf
+++ b/subworkflows/local/dda_id.nf
@@ -89,7 +89,7 @@ workflow DDA_ID {
                 sample_map = sample_map_idv.collect().map{ all_sample_map( it ) }
 
                 // Group by search_engines and convert meta
-                 ch_id_files_feats.combine( sample_map ).branch{ meta, filename, sample_map  ->
+                ch_id_files_feats.combine( sample_map ).branch{ meta, filename, sample_map  ->
                     sage: filename.name.contains('sage')
                         return [comvert_exp_meta(meta, "sample_id", filename, sample_map), filename]
                     msgf: filename.name.contains('msgf')

From 434a60d585e68f539f23435e8a35dd120fc59f98 Mon Sep 17 00:00:00 2001
From: Chengxin Dai <37200167+daichengxin@users.noreply.github.com>
Date: Sun, 28 Apr 2024 10:56:00 +0800
Subject: [PATCH 09/19] add new modules

---
 modules/local/extract_sample/main.nf          | 31 +++++++++++
 modules/local/extract_sample/meta.yml         | 27 +++++++++
 modules/local/openms/idmerger/main.nf         | 55 +++++++++++++++++++
 modules/local/openms/idmerger/meta.yml        | 33 +++++++++++
 modules/local/openms/idripper/main.nf         | 53 ++++++++++++++++++
 modules/local/openms/idripper/meta.yml        | 33 +++++++++++
 .../openms/thirdparty/percolator/main.nf      |  2 +-
 subworkflows/local/dda_id.nf                  |  4 +-
 8 files changed, 235 insertions(+), 3 deletions(-)
 create mode 100644 modules/local/extract_sample/main.nf
 create mode 100644 modules/local/extract_sample/meta.yml
 create mode 100644 modules/local/openms/idmerger/main.nf
 create mode 100644 modules/local/openms/idmerger/meta.yml
 create mode 100644 modules/local/openms/idripper/main.nf
 create mode 100644 modules/local/openms/idripper/meta.yml

diff --git a/modules/local/extract_sample/main.nf b/modules/local/extract_sample/main.nf
new file mode 100644
index 00000000..6a7210f4
--- /dev/null
+++ b/modules/local/extract_sample/main.nf
@@ -0,0 +1,31 @@
+process GETSAMPLE {
+    tag "$design.Name"
+    label 'process_low'
+
+    conda "bioconda::sdrf-pipelines=0.0.25"
+    if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
+        container "https://depot.galaxyproject.org/singularity/sdrf-pipelines:0.0.25--pyhdfd78af_0"
+    } else {
+        container "biocontainers/sdrf-pipelines:0.0.25--pyhdfd78af_0"
+    }
+
+    input:
+    path design
+
+    output:
+    path "*_sample.csv", emit: ch_expdesign_sample
+    path "versions.yml", emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    """
+    extract_sample.py "${design}" 2>&1 | tee extract_sample.log
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        sdrf-pipelines: \$(parse_sdrf --version 2>&1 | awk -F ' ' '{print \$2}')
+    END_VERSIONS
+    """
+}
diff --git a/modules/local/extract_sample/meta.yml b/modules/local/extract_sample/meta.yml
new file mode 100644
index 00000000..548484ae
--- /dev/null
+++ b/modules/local/extract_sample/meta.yml
@@ -0,0 +1,27 @@
+name: GETSAMPLE
+description: A module to extract sample information from experimental design file
+keywords:
+  - sample
+  - conversion
+tools:
+  - custom:
+      description: |
+        A custom module for sample extraction.
+      homepage: https://github.com/bigbio/quantms
+      documentation: https://github.com/bigbio/quantms/tree/readthedocs
+input:
+  - design:
+      type: file
+      description: experimental design file
+      pattern: "*.csv"
+output:
+  - ch_expdesign_sample:
+      type: file
+      description: sample csv file
+      pattern: "*_sample.csv"
+  - version:
+      type: file
+      description: File containing software version
+      pattern: "versions.yml"
+authors:
+  - "@daichengxin"
diff --git a/modules/local/openms/idmerger/main.nf b/modules/local/openms/idmerger/main.nf
new file mode 100644
index 00000000..9fde67e3
--- /dev/null
+++ b/modules/local/openms/idmerger/main.nf
@@ -0,0 +1,55 @@
+process IDMERGER {
+    tag "$meta.mzml_id"
+    label 'process_medium'
+    label 'openms'
+
+    conda "bioconda::openms-thirdparty=3.1.0"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/openms-thirdparty:3.1.0--h9ee0642_1' :
+        'biocontainers/openms-thirdparty:3.1.0--h9ee0642_1' }"
+
+    input:
+    tuple val(meta), path(id_files)
+
+    output:
+    tuple val(meta), path("*_merged.idXML"), emit: id_merged
+    path "versions.yml", emit: version
+    path "*.log", emit: log
+
+    script:
+    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.mzml_id}"
+
+    if (params.rescore_range == "by_project") {
+        if (id_files[0].baseName.contains('sage')){
+            prefix = "${meta.experiment_id}_sage"
+        } else if (id_files[0].baseName.contains('comet')){
+            prefix = "${meta.experiment_id}_comet"
+        } else {
+            prefix = "${meta.experiment_id}_msgf"
+        }
+    } else if (params.rescore_range == "by_sample") {
+        if (id_files[0].baseName.contains('sage')){
+            prefix = "${meta.mzml_id}_sage"
+        } else if (id_files[0].baseName.contains('comet')){
+            prefix = "${meta.mzml_id}_comet"
+        } else {
+            prefix = "${meta.mzml_id}_msgf"
+        }
+    }
+
+    """
+    IDMerger \\
+        -in ${id_files.join(' ')} \\
+        -threads $task.cpus \\
+        -out ${prefix}_merged.idXML \\
+        -merge_proteins_add_PSMs \\
+        $args \\
+        2>&1 | tee ${prefix}_merged.log
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        IDMerger: \$(IDMerger 2>&1 | grep -E '^Version(.*)' | sed 's/Version: //g' | cut -d ' ' -f 1)
+    END_VERSIONS
+    """
+}
diff --git a/modules/local/openms/idmerger/meta.yml b/modules/local/openms/idmerger/meta.yml
new file mode 100644
index 00000000..94b5caf5
--- /dev/null
+++ b/modules/local/openms/idmerger/meta.yml
@@ -0,0 +1,33 @@
+name: idmerger
+description: Merges several idXML files into one file.
+keywords:
+  - merge
+  - idXML
+  - OpenMS
+tools:
+  - IDMerger:
+      description: |
+        Merges several idXML files into one file.
+      homepage: https://www.openms.org/documentation/html/TOPP_IDMerger.html
+      documentation: https://www.openms.org/documentation/html/TOPP_IDMerger.html
+input:
+  - id_files:
+      type: file
+      description: |
+        Input files separated by blank.
+      pattern: "*.{idXML}"
+output:
+  - id_merge:
+      type: file
+      description: Output Merged file
+      pattern: "*.{idXML}"
+  - log:
+      type: file
+      description: log file
+      pattern: "*.log"
+  - version:
+      type: file
+      description: File containing software version
+      pattern: "versions.yml"
+authors:
+  - "@daichengxin"
diff --git a/modules/local/openms/idripper/main.nf b/modules/local/openms/idripper/main.nf
new file mode 100644
index 00000000..61eee8d9
--- /dev/null
+++ b/modules/local/openms/idripper/main.nf
@@ -0,0 +1,53 @@
+process IDRIPPER {
+    tag "$meta.mzml_id"
+    label 'process_medium'
+    label 'openms'
+
+    conda "bioconda::openms-thirdparty=3.1.0"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/openms-thirdparty:3.1.0--h9ee0642_1' :
+        'biocontainers/openms-thirdparty:3.1.0--h9ee0642_1' }"
+
+    input:
+    tuple val(meta), path(id_file), val(qval_score)
+
+    output:
+    val(meta), emit: meta
+    path("*.idXML"), emit: id_rippers
+    val("MS:1001491"), emit: qval_score
+    path "versions.yml", emit: version
+    path "*.log", emit: log
+
+    script:
+    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.mzml_id}"
+
+    if (id_file.baseName.contains('sage')){
+        pattern = "_sage_perc.idXML"
+    } else if (id_file.baseName.contains('comet')){
+        pattern = "_comet_perc.idXML"
+    } else {
+        pattern = "_msgf_perc.idXML"
+    }
+
+    """
+    IDRipper \\
+        -in ${id_file} \\
+        -threads $task.cpus \\
+        -out ./ \\
+        -split_ident_runs \\
+        $args \\
+        2>&1 | tee ${prefix}_idripper.log
+
+    for i in `ls | grep -v \"_perc.idXML\$\" | grep \".idXML\$\"`
+    do
+        mv \$i `ls \"\$i\" |awk -F \".\" \'{print \$1\"${pattern}\"}\'`
+    done
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        IDRipper: \$(IDRipper 2>&1 | grep -E '^Version(.*)' | sed 's/Version: //g' | cut -d ' ' -f 1)
+    END_VERSIONS
+    """
+}
+//
diff --git a/modules/local/openms/idripper/meta.yml b/modules/local/openms/idripper/meta.yml
new file mode 100644
index 00000000..81cc525f
--- /dev/null
+++ b/modules/local/openms/idripper/meta.yml
@@ -0,0 +1,33 @@
+name: idripper
+description: Splits the protein/peptide identifications of an idXML file into several idXML files according their annotated file origin.
+keywords:
+  - split
+  - idXML
+  - OpenMS
+tools:
+  - IDMerger:
+      description: |
+        IDRipper splits the protein/peptide identifications of an idXML file into several idXML files according their annotated file origin.
+      homepage: https://www.openms.org/documentation/html/TOPP_IDRipper.html
+      documentation: https://www.openms.org/documentation/html/TOPP_IDRipper.html
+input:
+  - id_file:
+      type: file
+      description: |
+        Input file, in which the protein/peptide identifications must be tagged with 'file_origin'
+      pattern: "*.{idXML}"
+output:
+  - id_rippers:
+      type: file
+      description: Output split files
+      pattern: "*.{idXML}"
+  - log:
+      type: file
+      description: log file
+      pattern: "*.log"
+  - version:
+      type: file
+      description: File containing software version
+      pattern: "versions.yml"
+authors:
+  - "@daichengxin"
diff --git a/modules/local/openms/thirdparty/percolator/main.nf b/modules/local/openms/thirdparty/percolator/main.nf
index 7a99dd11..af166e96 100644
--- a/modules/local/openms/thirdparty/percolator/main.nf
+++ b/modules/local/openms/thirdparty/percolator/main.nf
@@ -12,7 +12,7 @@ process PERCOLATOR {
     tuple val(meta), path(id_file)
 
     output:
-    tuple val(meta), path("${id_file.baseName}_perc.idXML"), val("MS:1001491"), emit: id_files_perc
+    tuple val(meta), path("*_perc.idXML"), val("MS:1001491"), emit: id_files_perc
     path "versions.yml", emit: version
     path "*.log", emit: log
 
diff --git a/subworkflows/local/dda_id.nf b/subworkflows/local/dda_id.nf
index 983decd7..d6b57b55 100644
--- a/subworkflows/local/dda_id.nf
+++ b/subworkflows/local/dda_id.nf
@@ -12,7 +12,7 @@ include { IDPEP                          } from '../../modules/local/openms/idpe
 include { PSMCONVERSION                  } from '../../modules/local/extract_psm/main'
 include { MS2RESCORE                     } from '../../modules/local/ms2rescore/main'
 include { IDSCORESWITCHER                } from '../../modules/local/openms/idscoreswitcher/main'
-include { EXTRACT_SAMPLE                 } from '../../modules/local/extract_sample/main'
+include { GETSAMPLE                      } from '../../modules/local/extract_sample/main'
 
 //
 // SUBWORKFLOW: Consisting of a mix of local and nf-core/modules
@@ -81,7 +81,7 @@ workflow DDA_ID {
                 ch_consensus_input = PERCOLATOR.out.id_files_perc
             } else if (params.rescore_range == "by_sample") {
                 // Sample map
-                EXTRACT_SAMPLE(ch_expdesign)
+                GETSAMPLE(ch_expdesign)
                 ch_expdesign_sample = EXTRACT_SAMPLE.out.ch_expdesign_sample
                 ch_expdesign_sample.splitCsv(header: true, sep: '\t')
                     .map { get_sample_map(it) }.set{ sample_map_idv }

From b648f57d023b879c108f4428c94262df0a726708 Mon Sep 17 00:00:00 2001
From: Yasset Perez-Riverol <ypriverol@gmail.com>
Date: Sun, 28 Apr 2024 19:41:12 +0100
Subject: [PATCH 10/19] Update subworkflows/local/dda_id.nf

Co-authored-by: Jonas Scheid <43858870+jonasscheid@users.noreply.github.com>
---
 subworkflows/local/dda_id.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/subworkflows/local/dda_id.nf b/subworkflows/local/dda_id.nf
index d6b57b55..0d6c8c83 100644
--- a/subworkflows/local/dda_id.nf
+++ b/subworkflows/local/dda_id.nf
@@ -195,7 +195,7 @@ workflow DDA_ID {
 }
 
 // Function to group by mzML/sample/experiment
-def comvert_exp_meta(Map meta, value, file_name, sample_map) {
+def convert_exp_meta(Map meta, value, file_name, sample_map) {
     def exp_meta = [:]
 
     if (value == "experiment_id") {

From cc01bd226e611cadf423198f994cbc020782f030 Mon Sep 17 00:00:00 2001
From: Yasset Perez-Riverol <ypriverol@gmail.com>
Date: Sun, 28 Apr 2024 19:41:19 +0100
Subject: [PATCH 11/19] Update subworkflows/local/dda_id.nf

Co-authored-by: Jonas Scheid <43858870+jonasscheid@users.noreply.github.com>
---
 subworkflows/local/dda_id.nf | 1 -
 1 file changed, 1 deletion(-)

diff --git a/subworkflows/local/dda_id.nf b/subworkflows/local/dda_id.nf
index 0d6c8c83..a4a83c33 100644
--- a/subworkflows/local/dda_id.nf
+++ b/subworkflows/local/dda_id.nf
@@ -112,7 +112,6 @@ workflow DDA_ID {
                 IDRIPPER.out.meta.first().combine(IDRIPPER.out.id_rippers.flatten())
                     .map{ [comvert_exp_meta(it[0], "mzml_id", it[1], ""), it[1], "MS:1001491"] }
                     .set{ ch_consensus_input }
-                ch_consensus_input.view()
                 ch_software_versions = ch_software_versions.mix(IDRIPPER.out.version)
 
             } else if (params.rescore_range == "by_project"){

From 4fbbd0c5c9bee113ba95e8b9a1a11e8b16f35bf3 Mon Sep 17 00:00:00 2001
From: Chengxin Dai <37200167+daichengxin@users.noreply.github.com>
Date: Mon, 6 May 2024 19:18:08 +0800
Subject: [PATCH 12/19] fixed

---
 conf/modules.config                            |  7 ++++---
 .../local/openms/extractpsmfeatures/main.nf    |  8 ++++++--
 nextflow.config                                |  1 +
 nextflow_schema.json                           |  7 +++++++
 subworkflows/local/dda_id.nf                   | 18 +++++++++---------
 5 files changed, 27 insertions(+), 14 deletions(-)

diff --git a/conf/modules.config b/conf/modules.config
index a168d867..52e477a6 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -68,7 +68,8 @@ if (params.add_decoys) {
 if (params.posterior_probabilities == "percolator") {
     process {
         // EXTRACTPSMFEATURE
-        withName: '.*:ID:PSMRESCORING:EXTRACTPSMFEATURES' {
+        withName: '.*:EXTRACTPSMFEATURES' {
+            ext.args    = "-debug $params.extractpsmfeature_debug"
             publishDir  = [
                 path: { "${params.outdir}/extractpsmfeature" },
                 mode: params.publish_dir_mode,
@@ -78,14 +79,14 @@ if (params.posterior_probabilities == "percolator") {
         }
 
         //PERCOLATOR
-        withName: '.*:ID:PSMRESCORING:PERCOLATOR' {
+        withName: '.*:PERCOLATOR' {
             ext.args    = "-debug $params.percolator_debug"
         }
     }
 } else {
     process {
         // IDPEP
-        withName: '.*:ID:PSMRESCORING:IDPEP' {
+        withName: '.*:IDPEP' {
             ext.args    = "-debug $params.idpep_debug"
         }
     }
diff --git a/modules/local/openms/extractpsmfeatures/main.nf b/modules/local/openms/extractpsmfeatures/main.nf
index b430fc2c..503aff55 100644
--- a/modules/local/openms/extractpsmfeatures/main.nf
+++ b/modules/local/openms/extractpsmfeatures/main.nf
@@ -20,14 +20,18 @@ process EXTRACTPSMFEATURES {
     script:
     def args = task.ext.args ?: ''
     def prefix = task.ext.prefix ?: "${meta.mzml_id}"
-    def feature = (params.ms2rescore == true) && (params.id_only == true) ? "-extra ${extra_feat}" : ""
+
+    feature = ""
+    if (params.ms2rescore && params.id_only) {
+        feature = "-extra \$(awk 'NR > 1 && \$1 !~ /psm_file/ {printf \"%s \", \$2}' ${extra_feat})"
+    }
 
     """
     PSMFeatureExtractor \\
         -in ${id_file} \\
         -out ${id_file.baseName}_feat.idXML \\
         -threads $task.cpus \\
-        -extra ${feature} \\
+        ${feature} \\
         $args \\
         2>&1 | tee ${id_file.baseName}_extract_psm_feature.log
 
diff --git a/nextflow.config b/nextflow.config
index fb41a50c..ad30cb2e 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -33,6 +33,7 @@ params {
     // Debug level
     decoydatabase_debug     = 0
     pp_debug                = 0
+    extractpsmfeature_debug = 0
     idfilter_debug          = 0
     idscoreswitcher_debug   = 0
     iso_debug               = 0
diff --git a/nextflow_schema.json b/nextflow_schema.json
index de62b9bc..1ad00b40 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -485,6 +485,13 @@
                     "default": 0.1,
                     "fa_icon": "fas fa-filter"
                 },
+                "extractpsmfeature_debug": {
+                    "type": "integer",
+                    "description": "Debug level when running the PSMFeatureExtractor step. Increase for verbose logging",
+                    "fa_icon": "fas fa-bug",
+                    "hidden": true,
+                    "default": 0
+                },
                 "idfilter_debug": {
                     "type": "integer",
                     "description": "Debug level when running the IDFilter step. Increase for verbose logging",
diff --git a/subworkflows/local/dda_id.nf b/subworkflows/local/dda_id.nf
index a4a83c33..e04f851a 100644
--- a/subworkflows/local/dda_id.nf
+++ b/subworkflows/local/dda_id.nf
@@ -91,11 +91,11 @@ workflow DDA_ID {
                 // Group by search_engines and convert meta
                 ch_id_files_feats.combine( sample_map ).branch{ meta, filename, sample_map  ->
                     sage: filename.name.contains('sage')
-                        return [comvert_exp_meta(meta, "sample_id", filename, sample_map), filename]
+                        return [convert_exp_meta(meta, "sample_id", filename, sample_map), filename]
                     msgf: filename.name.contains('msgf')
-                        return [comvert_exp_meta(meta, "sample_id", filename, sample_map), filename]
+                        return [convert_exp_meta(meta, "sample_id", filename, sample_map), filename]
                     comet: filename.name.contains('comet')
-                        return [comvert_exp_meta(meta, "sample_id", filename, sample_map), filename]
+                        return [convert_exp_meta(meta, "sample_id", filename, sample_map), filename]
                 }.set{ch_id_files_feat_branched}
 
                 // IDMERGER for samples group
@@ -110,7 +110,7 @@ workflow DDA_ID {
                 // Currently only ID runs on exactly one mzML file are supported in CONSENSUSID. Split idXML by runs
                 IDRIPPER(PERCOLATOR.out.id_files_perc)
                 IDRIPPER.out.meta.first().combine(IDRIPPER.out.id_rippers.flatten())
-                    .map{ [comvert_exp_meta(it[0], "mzml_id", it[1], ""), it[1], "MS:1001491"] }
+                    .map{ [convert_exp_meta(it[0], "mzml_id", it[1], ""), it[1], "MS:1001491"] }
                     .set{ ch_consensus_input }
                 ch_software_versions = ch_software_versions.mix(IDRIPPER.out.version)
 
@@ -118,11 +118,11 @@ workflow DDA_ID {
                 // Split ch_id_files_feats by search_engines
                 ch_id_files_feats.branch{ meta, filename ->
                     sage: filename.name.contains('sage')
-                        return [comvert_exp_meta(meta, "experiment_id", filename, ""), filename]
+                        return [convert_exp_meta(meta, "experiment_id", filename, ""), filename]
                     msgf: filename.name.contains('msgf')
-                        return [comvert_exp_meta(meta, "experiment_id", filename, ""), filename]
+                        return [convert_exp_meta(meta, "experiment_id", filename, ""), filename]
                     comet: filename.name.contains('comet')
-                        return [comvert_exp_meta(meta, "experiment_id", filename, ""), filename]
+                        return [convert_exp_meta(meta, "experiment_id", filename, ""), filename]
                 }.set{ch_id_files_feat_branched}
 
                 // IDMERGER for whole experiments
@@ -137,7 +137,7 @@ workflow DDA_ID {
                 // Currently only ID runs on exactly one mzML file are supported in CONSENSUSID. Split idXML by runs
                 IDRIPPER(PERCOLATOR.out.id_files_perc)
                 IDRIPPER.out.meta.first().combine(IDRIPPER.out.id_rippers.flatten())
-                    .map{ [comvert_exp_meta(it[0], "mzml_id", it[1], ""), it[1], "MS:1001491"] }
+                    .map{ [convert_exp_meta(it[0], "mzml_id", it[1], ""), it[1], "MS:1001491"] }
                     .set{ ch_consensus_input }
                 ch_software_versions = ch_software_versions.mix(IDRIPPER.out.version)
 
@@ -244,7 +244,7 @@ def convert_exp_meta(Map meta, value, file_name, sample_map) {
     exp_meta.fragmentmasstoleranceunit  = meta.fragmentmasstoleranceunit
     exp_meta.enzyme                     = meta.enzyme
     exp_meta.acquisition_method         = meta.acquisition_method
-    print(exp_meta)
+
     return exp_meta
 }
 

From 4330b09c32027ec8642ccd00ccb9c25b69ed0d53 Mon Sep 17 00:00:00 2001
From: Chengxin Dai <37200167+daichengxin@users.noreply.github.com>
Date: Mon, 6 May 2024 19:40:22 +0800
Subject: [PATCH 13/19] Update main.nf

---
 subworkflows/nf-core/utils_nfcore_pipeline/main.nf | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/main.nf b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf
index a8b55d6f..98e4b691 100644
--- a/subworkflows/nf-core/utils_nfcore_pipeline/main.nf
+++ b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf
@@ -65,9 +65,15 @@ def checkProfileProvided(nextflow_cli_args) {
 // Citation string for pipeline
 //
 def workflowCitation() {
+    def temp_doi_ref = ""
+    String[] manifest_doi = workflow.manifest.doi.tokenize(",")
+    // Using a loop to handle multiple DOIs
+    // Removing `https://doi.org/` to handle pipelines using DOIs vs DOI resolvers
+    // Removing ` ` since the manifest.doi is a string and not a proper list
+    for (String doi_ref: manifest_doi) temp_doi_ref += "  https://doi.org/${doi_ref.replace('https://doi.org/', '').replace(' ', '')}\n"
     return "If you use ${workflow.manifest.name} for your analysis please cite:\n\n" +
         "* The pipeline\n" +
-        "  ${workflow.manifest.doi}\n\n" +
+        temp_doi_ref + "\n" +
         "* The nf-core framework\n" +
         "  https://doi.org/10.1038/s41587-020-0439-x\n\n" +
         "* Software dependencies\n" +
@@ -437,4 +443,4 @@ def imNotification(summary_params, hook_url) {
     if (! postRC.equals(200)) {
         log.warn(post.getErrorStream().getText());
     }
-}
+}
\ No newline at end of file

From 7cd22afd51140f5ec2f4667a98cea8119fcfe64c Mon Sep 17 00:00:00 2001
From: Chengxin Dai <37200167+daichengxin@users.noreply.github.com>
Date: Mon, 6 May 2024 20:21:46 +0800
Subject: [PATCH 14/19] Update ms2rescore_cli.py

---
 bin/ms2rescore_cli.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/bin/ms2rescore_cli.py b/bin/ms2rescore_cli.py
index 61e517e9..7c607bc4 100644
--- a/bin/ms2rescore_cli.py
+++ b/bin/ms2rescore_cli.py
@@ -1,4 +1,5 @@
 #!/usr/bin/env python
+# Written by Jonas Scheid under the MIT license
 
 import sys
 import click

From 77b2b2b7c6de8b875ed3a23a39914b511fecdfce Mon Sep 17 00:00:00 2001
From: Chengxin Dai <37200167+daichengxin@users.noreply.github.com>
Date: Mon, 6 May 2024 20:35:55 +0800
Subject: [PATCH 15/19] Update modules.json

---
 modules.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules.json b/modules.json
index beb0405b..8bd8a9c6 100644
--- a/modules.json
+++ b/modules.json
@@ -26,7 +26,7 @@
                     },
                     "utils_nfcore_pipeline": {
                         "branch": "master",
-                        "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa",
+                        "git_sha": "92de218a329bfc9a9033116eb5f65fd270e72ba3",
                         "installed_by": ["subworkflows"]
                     },
                     "utils_nfvalidation_plugin": {

From d3cab9dc08f8abe312230c34949c306c7582ff09 Mon Sep 17 00:00:00 2001
From: Chengxin Dai <37200167+daichengxin@users.noreply.github.com>
Date: Mon, 6 May 2024 21:07:27 +0800
Subject: [PATCH 16/19] Update main.nf

---
 subworkflows/nf-core/utils_nfcore_pipeline/main.nf | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/main.nf b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf
index 98e4b691..a8b55d6f 100644
--- a/subworkflows/nf-core/utils_nfcore_pipeline/main.nf
+++ b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf
@@ -65,15 +65,9 @@ def checkProfileProvided(nextflow_cli_args) {
 // Citation string for pipeline
 //
 def workflowCitation() {
-    def temp_doi_ref = ""
-    String[] manifest_doi = workflow.manifest.doi.tokenize(",")
-    // Using a loop to handle multiple DOIs
-    // Removing `https://doi.org/` to handle pipelines using DOIs vs DOI resolvers
-    // Removing ` ` since the manifest.doi is a string and not a proper list
-    for (String doi_ref: manifest_doi) temp_doi_ref += "  https://doi.org/${doi_ref.replace('https://doi.org/', '').replace(' ', '')}\n"
     return "If you use ${workflow.manifest.name} for your analysis please cite:\n\n" +
         "* The pipeline\n" +
-        temp_doi_ref + "\n" +
+        "  ${workflow.manifest.doi}\n\n" +
         "* The nf-core framework\n" +
         "  https://doi.org/10.1038/s41587-020-0439-x\n\n" +
         "* Software dependencies\n" +
@@ -443,4 +437,4 @@ def imNotification(summary_params, hook_url) {
     if (! postRC.equals(200)) {
         log.warn(post.getErrorStream().getText());
     }
-}
\ No newline at end of file
+}

From 1d978f3a8fdd4be0a050241f967563f5c04d0176 Mon Sep 17 00:00:00 2001
From: Chengxin Dai <37200167+daichengxin@users.noreply.github.com>
Date: Mon, 6 May 2024 21:25:51 +0800
Subject: [PATCH 17/19] try

---
 subworkflows/nf-core/utils_nfcore_pipeline/main.nf | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/main.nf b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf
index a8b55d6f..14558c39 100644
--- a/subworkflows/nf-core/utils_nfcore_pipeline/main.nf
+++ b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf
@@ -65,9 +65,15 @@ def checkProfileProvided(nextflow_cli_args) {
 // Citation string for pipeline
 //
 def workflowCitation() {
+    def temp_doi_ref = ""
+    String[] manifest_doi = workflow.manifest.doi.tokenize(",")
+    // Using a loop to handle multiple DOIs
+    // Removing `https://doi.org/` to handle pipelines using DOIs vs DOI resolvers
+    // Removing ` ` since the manifest.doi is a string and not a proper list
+    for (String doi_ref: manifest_doi) temp_doi_ref += "  https://doi.org/${doi_ref.replace('https://doi.org/', '').replace(' ', '')}\n"
     return "If you use ${workflow.manifest.name} for your analysis please cite:\n\n" +
         "* The pipeline\n" +
-        "  ${workflow.manifest.doi}\n\n" +
+        temp_doi_ref + "\n" +
         "* The nf-core framework\n" +
         "  https://doi.org/10.1038/s41587-020-0439-x\n\n" +
         "* Software dependencies\n" +

From c41f628378bfc76a829d0a1b926b3789b2f7aaca Mon Sep 17 00:00:00 2001
From: Chengxin Dai <37200167+daichengxin@users.noreply.github.com>
Date: Tue, 7 May 2024 00:30:50 +0800
Subject: [PATCH 18/19] fixed

---
 conf/modules.config  | 5 ++++-
 nextflow.config      | 2 +-
 nextflow_schema.json | 8 ++++----
 workflows/quantms.nf | 3 +++
 4 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/conf/modules.config b/conf/modules.config
index 52e477a6..89d3bc4b 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -80,7 +80,10 @@ if (params.posterior_probabilities == "percolator") {
 
         //PERCOLATOR
         withName: '.*:PERCOLATOR' {
-            ext.args    = "-debug $params.percolator_debug"
+            ext.args    = [
+                "-debug $params.percolator_debug",
+                (params.fdr_level != 'psm_level_fdrs') ? "-" + params.fdr_level : ""
+            ].join(' ').trim()
         }
     }
 } else {
diff --git a/nextflow.config b/nextflow.config
index ad30cb2e..2461e68e 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -124,7 +124,7 @@ params {
     // Percolator flags
     train_FDR                    = 0.05
     test_FDR                     = 0.05
-    FDR_level                    = 'peptide-level-fdrs'
+    fdr_level                    = 'peptide_level_fdrs'
     klammer                      = false
     description_correct_features = 0
     subset_max_train             = 300000
diff --git a/nextflow_schema.json b/nextflow_schema.json
index 1ad00b40..746e312c 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -522,12 +522,12 @@
             "description": "In the following you can find help for the Percolator specific options that are only used if [`--posterior_probabilities`](#posterior_probabilities) was set to 'percolator'.\nNote that there are currently some restrictions to the original options of Percolator:\n\n* no Percolator protein FDR possible (currently OpenMS' FDR is used on protein level)\n* no support for separate target and decoy databases (i.e. no min-max q-value calculation or target-decoy competition strategy)\n* no support for combined or experiment-wide peptide re-scoring. Currently search results per input file are submitted to Percolator independently.",
             "default": "",
             "properties": {
-                "FDR_level": {
+                "fdr_level": {
                     "type": "string",
-                    "description": "Calculate FDR on PSM ('psm-level-fdrs') or peptide level ('peptide-level-fdrs')?",
-                    "default": "peptide-level-fdrs",
+                    "description": "Calculate FDR on PSM ('psm_level_fdrs') or peptide level ('peptide_level_fdrs')?",
+                    "default": "peptide_level_fdrs",
                     "fa_icon": "fas fa-list-ol",
-                    "enum": ["peptide-level-fdrs", "psm-level-fdrs"]
+                    "enum": ["peptide_level_fdrs", "psm_level_fdrs"]
                 },
                 "train_FDR": {
                     "type": "number",
diff --git a/workflows/quantms.nf b/workflows/quantms.nf
index 82c9a870..7645dfb4 100644
--- a/workflows/quantms.nf
+++ b/workflows/quantms.nf
@@ -109,6 +109,9 @@ workflow QUANTMS {
         if (params.id_only == false) {
             log.warn "The mokapot rescoring engine currently only is supported in id_only subworkflow via ms2rescore."
         }
+        if (params.posterior_probabilities == "mokapot" && params.FDR_level == "peptide-level-fdrs") {
+                log.warn "The rescoring engine is set to mokapot. This rescoring engine currently only supports psm-level-fdr via ms2rescore."
+        }
         DDA_ID( FILE_PREPARATION.out.results, ch_searchengine_in_db, FILE_PREPARATION.out.spectrum_data, CREATE_INPUT_CHANNEL.out.ch_expdesign)
         ch_versions = ch_versions.mix(DDA_ID.out.version.ifEmpty(null))
     } else {

From bae2e3e43aea202818adf79e2de3c23ee77f193b Mon Sep 17 00:00:00 2001
From: Chengxin Dai <37200167+daichengxin@users.noreply.github.com>
Date: Tue, 7 May 2024 11:22:20 +0800
Subject: [PATCH 19/19] fixed typo

---
 conf/test_full_lfq.config | 2 +-
 conf/test_full_tmt.config | 2 +-
 conf/test_localize.config | 1 +
 workflows/quantms.nf      | 2 +-
 4 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/conf/test_full_lfq.config b/conf/test_full_lfq.config
index fe17eb44..4e4684f1 100644
--- a/conf/test_full_lfq.config
+++ b/conf/test_full_lfq.config
@@ -29,5 +29,5 @@ params {
     add_decoys = true
     add_triqler_output = true
     protein_level_fdr_cutoff = 0.01
-    psm_pep_fdr_cutoff = 0.01
+    psm_level_fdr_cutoff = 0.01
 }
diff --git a/conf/test_full_tmt.config b/conf/test_full_tmt.config
index d4b8469f..11eea647 100644
--- a/conf/test_full_tmt.config
+++ b/conf/test_full_tmt.config
@@ -27,7 +27,7 @@ params {
     posterior_probabilities = "percolator"
     search_engines = "comet,msgf"
     protein_level_fdr_cutoff = 0.01
-    psm_pep_fdr_cutoff = 0.01
+    psm_level_fdr_cutoff = 0.01
     add_decoys = true
     protocol = 'TMT'
 }
diff --git a/conf/test_localize.config b/conf/test_localize.config
index 3ed7a152..ef32bddd 100644
--- a/conf/test_localize.config
+++ b/conf/test_localize.config
@@ -30,4 +30,5 @@ params {
     psm_level_fdr_cutoff = 0.50
     skip_post_msstats = true
     quantify_decoys = true
+    fdr_level = "psm_level_fdrs"
 }
diff --git a/workflows/quantms.nf b/workflows/quantms.nf
index 7645dfb4..e45a6c64 100644
--- a/workflows/quantms.nf
+++ b/workflows/quantms.nf
@@ -109,7 +109,7 @@ workflow QUANTMS {
         if (params.id_only == false) {
             log.warn "The mokapot rescoring engine currently only is supported in id_only subworkflow via ms2rescore."
         }
-        if (params.posterior_probabilities == "mokapot" && params.FDR_level == "peptide-level-fdrs") {
+        if (params.posterior_probabilities == "mokapot" && params.fdr_level == "peptide_level_fdrs") {
                 log.warn "The rescoring engine is set to mokapot. This rescoring engine currently only supports psm-level-fdr via ms2rescore."
         }
         DDA_ID( FILE_PREPARATION.out.results, ch_searchengine_in_db, FILE_PREPARATION.out.spectrum_data, CREATE_INPUT_CHANNEL.out.ch_expdesign)