diff --git a/CHANGELOG.md b/CHANGELOG.md index f2e9c664..7a84772e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,13 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [v0.15.0-alpha] (unreleased) +### Added +- Initial support for searching diaPASEF data +### Changed +- Don't deisotope reporter ion regions if MS2-based TMT/iTRAQ is used +- Removed `fragment_min_mz` and `fragment_max_mz` parameters. These were decreasing the accuracy of preliminary scoring estimation when attempting to annotate multiply-charged, high-m/z ions. + ## [v0.14.7] ### Added - Added columns missing from parquet output: `semi_enzymatic` and `missed_cleavages` diff --git a/Cargo.lock b/Cargo.lock index 6a03aa71..2937d57c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2281,7 +2281,7 @@ dependencies = [ [[package]] name = "sage-cloudpath" -version = "0.14.7" +version = "0.15.0-alpha" dependencies = [ "async-compression", "aws-config", @@ -2307,7 +2307,7 @@ dependencies = [ [[package]] name = "sage-core" -version = "0.14.7" +version = "0.15.0-alpha" dependencies = [ "dashmap", "fnv", diff --git a/crates/sage-cli/src/main.rs b/crates/sage-cli/src/main.rs index d64bbdc2..980573a8 100644 --- a/crates/sage-cli/src/main.rs +++ b/crates/sage-cli/src/main.rs @@ -184,21 +184,16 @@ impl Runner { .sn .then_some(self.parameters.quant.tmt_settings.level); - let (min_fragment_mz, min_deisotope_mz) = match &self.parameters.quant.tmt { + let min_deisotope_mz = match &self.parameters.quant.tmt { Some(i) => match self.parameters.quant.tmt_settings.level { - 2 => ( - i.reporter_masses().first().map(|x| x * (1.0 - 20E-6)), - i.reporter_masses().last().map(|x| x * (1.0 + 20E-6)), - ), - _ => (None, None), + 2 => i.reporter_masses().last().map(|x| x * (1.0 + 20E-6)), + _ => None, }, - None => (None, None), + None => None, }; let sp = SpectrumProcessor::new( self.parameters.max_peaks, - min_fragment_mz.unwrap_or(self.parameters.database.fragment_min_mz), - self.parameters.database.fragment_max_mz, self.parameters.deisotope, min_deisotope_mz.unwrap_or(0.0), ); @@ -268,8 +263,6 @@ impl Runner { min_precursor_charge: self.parameters.precursor_charge.0, max_precursor_charge: self.parameters.precursor_charge.1, max_fragment_charge: self.parameters.max_fragment_charge, - min_fragment_mass: self.parameters.database.fragment_min_mz, - max_fragment_mass: self.parameters.database.fragment_max_mz, chimera: self.parameters.chimera, report_psms: self.parameters.report_psms, wide_window: self.parameters.wide_window, diff --git a/crates/sage-cli/tests/integration.rs b/crates/sage-cli/tests/integration.rs index 636ae4a5..a22bcd21 100644 --- a/crates/sage-cli/tests/integration.rs +++ b/crates/sage-cli/tests/integration.rs @@ -13,7 +13,7 @@ fn integration() -> anyhow::Result<()> { let spectra = sage_cloudpath::util::read_mzml("../../tests/LQSRPAAPPAPGPGQLTLR.mzML", 0, None)?; assert_eq!(spectra.len(), 1); - let sp = SpectrumProcessor::new(100, 0.0, 1500.0, true, 0.0); + let sp = SpectrumProcessor::new(100, true, 0.0); let processed = sp.process(spectra[0].clone()); assert!(processed.peaks.len() <= 300); @@ -27,8 +27,6 @@ fn integration() -> anyhow::Result<()> { min_precursor_charge: 2, max_precursor_charge: 4, max_fragment_charge: Some(1), - min_fragment_mass: 0.0, - max_fragment_mass: 1500.0, chimera: false, report_psms: 1, wide_window: false, diff --git a/crates/sage-cloudpath/Cargo.toml b/crates/sage-cloudpath/Cargo.toml index d69c9ea2..d3292488 100644 --- a/crates/sage-cloudpath/Cargo.toml +++ b/crates/sage-cloudpath/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "sage-cloudpath" -version = "0.14.7" +version = "0.15.0-alpha" authors = ["Michael Lazear , pub enzyme: Option, - /// Minimum fragment m/z that will be stored in the database - pub fragment_min_mz: Option, - /// Maximum fragment m/z that will be stored in the database - pub fragment_max_mz: Option, /// Minimum peptide monoisotopic mass that will be fragmented pub peptide_min_mass: Option, /// Maximum peptide monoisotopic mass that will be fragmented @@ -95,8 +91,6 @@ impl Builder { let bucket_size = self.bucket_size.unwrap_or(8192).next_power_of_two(); Parameters { bucket_size, - fragment_min_mz: self.fragment_min_mz.unwrap_or(150.0), - fragment_max_mz: self.fragment_max_mz.unwrap_or(2000.0), peptide_min_mass: self.peptide_min_mass.unwrap_or(500.0), peptide_max_mass: self.peptide_max_mass.unwrap_or(5000.0), ion_kinds: self.ion_kinds.unwrap_or(vec![Kind::B, Kind::Y]), @@ -120,8 +114,6 @@ impl Builder { pub struct Parameters { pub bucket_size: usize, pub enzyme: EnzymeBuilder, - pub fragment_min_mz: f32, - pub fragment_max_mz: f32, pub peptide_min_mass: f32, pub peptide_max_mass: f32, pub ion_kinds: Vec, @@ -236,8 +228,6 @@ impl Parameters { } }; ion_idx_filter - && ion.monoisotopic_mass >= self.fragment_min_mz - && ion.monoisotopic_mass <= self.fragment_max_mz }) .map(move |(_, ion)| Theoretical { peptide_index: PeptideIx(idx as u32), @@ -426,8 +416,17 @@ pub struct IndexedQuery<'d> { impl<'d> IndexedQuery<'d> { /// Search for a specified `fragment_mz` within the database - pub fn page_search(&self, fragment_mz: f32) -> impl Iterator { - let (fragment_lo, fragment_hi) = self.fragment_tol.bounds(fragment_mz); + pub fn page_search(&self, fragment_mz: f32, charge: u8) -> impl Iterator { + let mass = fragment_mz * charge as f32; + + // Account for multiplication of observed decharged mass + // - relative tolerance needs to be proportionally decreased + let tol = match self.fragment_tol { + Tolerance::Ppm(lo, hi) => Tolerance::Ppm(lo / charge as f32, hi / charge as f32), + Tolerance::Da(_, _) => self.fragment_tol, + }; + + let (fragment_lo, fragment_hi) = tol.bounds(mass); let (precursor_lo, precursor_hi) = self.precursor_tol.bounds(self.precursor_mass); // Locate the left and right page indices that contain matching fragments @@ -587,8 +586,6 @@ mod test { max_len: Some(10), ..Default::default() }, - fragment_min_mz: 100.0, - fragment_max_mz: 1000.0, peptide_min_mass: 150.0, peptide_max_mass: 5000.0, ion_kinds: vec![Kind::B, Kind::Y], diff --git a/crates/sage/src/scoring.rs b/crates/sage/src/scoring.rs index dcbed951..169159e4 100644 --- a/crates/sage/src/scoring.rs +++ b/crates/sage/src/scoring.rs @@ -183,8 +183,6 @@ pub struct Scorer<'db> { pub min_precursor_charge: u8, pub max_precursor_charge: u8, pub max_fragment_charge: Option, - pub min_fragment_mass: f32, - pub max_fragment_mass: f32, pub chimera: bool, pub report_psms: usize, @@ -270,8 +268,7 @@ impl<'db> Scorer<'db> { for peak in query.peaks.iter() { for charge in 1..max_fragment_charge { - let mass = peak.mass * charge as f32; - for frag in candidates.page_search(mass) { + for frag in candidates.page_search(peak.mass, charge) { let idx = frag.peptide_index.0 as usize - candidates.pre_idx_lo; let sc = &mut hits.preliminary[idx]; if sc.matched == 0 { @@ -280,6 +277,7 @@ impl<'db> Scorer<'db> { sc.peptide = frag.peptide_index; sc.isotope_error = isotope_error; } + sc.matched += 1; hits.matched_peaks += 1; } @@ -598,6 +596,7 @@ impl<'db> Scorer<'db> { for charge in 1..max_fragment_charge { // Experimental peaks are multipled by charge, therefore theoretical are divided let mz = frag.monoisotopic_mass / charge as f32; + if let Some(peak) = crate::spectrum::select_most_intense_peak( &query.peaks, mz, diff --git a/crates/sage/src/spectrum.rs b/crates/sage/src/spectrum.rs index 756277fc..e8fb4c0d 100644 --- a/crates/sage/src/spectrum.rs +++ b/crates/sage/src/spectrum.rs @@ -39,8 +39,6 @@ pub struct Deisotoped { #[derive(Debug, Clone)] pub struct SpectrumProcessor { pub take_top_n: usize, - pub max_fragment_mz: f32, - pub min_fragment_mz: f32, pub min_deisotope_mz: f32, pub deisotope: bool, } @@ -262,17 +260,9 @@ impl SpectrumProcessor { /// * `min_fragment_mz`: Keep only fragments >= this m/z /// * `max_fragment_mz`: Keep only fragments <= this m/z /// * `deisotope`: Perform deisotoping & charge state deconvolution - pub fn new( - take_top_n: usize, - min_fragment_mz: f32, - max_fragment_mz: f32, - deisotope: bool, - min_deisotope_mz: f32, - ) -> Self { + pub fn new(take_top_n: usize, deisotope: bool, min_deisotope_mz: f32) -> Self { Self { take_top_n, - min_fragment_mz, - max_fragment_mz, min_deisotope_mz, deisotope, } @@ -310,11 +300,7 @@ impl SpectrumProcessor { peaks .into_iter() - .filter(|peak| { - peak.envelope.is_none() - && peak.mz >= self.min_fragment_mz - && peak.mz <= self.max_fragment_mz - }) + .filter(|peak| peak.envelope.is_none()) .map(|peak| { // Convert from MH* to M let mass = (peak.mz - PROTON) * peak.charge.unwrap_or(1) as f32; @@ -330,7 +316,6 @@ impl SpectrumProcessor { .mz .iter() .zip(spectrum.intensity.iter()) - .filter(|&(mz, _)| *mz >= self.min_fragment_mz && *mz <= self.max_fragment_mz) .map(|(mz, &intensity)| { let mass = (mz - PROTON) * 1.0; Peak { mass, intensity } diff --git a/crates/sage/tests/integration.rs b/crates/sage/tests/integration.rs index 63d54cbb..ac57f956 100644 --- a/crates/sage/tests/integration.rs +++ b/crates/sage/tests/integration.rs @@ -61,7 +61,7 @@ fn check_all_ions_visited(target_fragment_mz: f32, bucket_size: usize) { // are returned to us by searching the database. let query = database.query(1000.0, Tolerance::Da(-5000.0, 5000.0), fragment_tol); - for fragment in query.page_search(target_fragment_mz) { + for fragment in query.page_search(target_fragment_mz, 1) { visited[fragment.peptide_index.0 as usize] += 1; }