diff --git a/crates/sage/src/scoring.rs b/crates/sage/src/scoring.rs index 46d5a012..ad873a60 100644 --- a/crates/sage/src/scoring.rs +++ b/crates/sage/src/scoring.rs @@ -21,6 +21,7 @@ struct Score { ppm_difference: f32, precursor_charge: u8, isotope_error: i8, + precursor_index: usize, } /// Preliminary score - # of matched peaks for each candidate peptide @@ -30,6 +31,7 @@ struct PreScore { peptide: PeptideIx, precursor_charge: u8, isotope_error: i8, + precursor_index: usize, } /// Store preliminary scores & stats for first pass search for a query spectrum @@ -327,47 +329,74 @@ impl<'db> Scorer<'db> { } } - fn initial_hits(&self, query: &ProcessedSpectrum, precursor: &Precursor) -> InitialHits { + fn initial_hits(&self, query: &ProcessedSpectrum, precursors: &[Precursor]) -> InitialHits { // Sage operates on masses without protons; [M] instead of [MH+] - let mz = precursor.mz - PROTON; + let mzs = precursors + .iter() + .map(|p| p.mz - PROTON) + .collect::>(); // Search in wide-window/DIA mode if self.wide_window { - let mut hits = (self.min_precursor_charge..=self.max_precursor_charge).fold( - InitialHits::default(), - |mut hits, precursor_charge| { - let precursor_mass = mz * precursor_charge as f32; - let precursor_tol = precursor - .isolation_window - .unwrap_or(Tolerance::Da(-2.4, 2.4)) - * precursor_charge as f32; - hits += - self.matched_peaks(query, precursor_mass, precursor_charge, precursor_tol); - hits - }, - ); + let mut hits = InitialHits::default(); + for (i, (precursor, mz)) in precursors.iter().zip(mzs).enumerate() { + let local_hits = (self.min_precursor_charge..=self.max_precursor_charge).fold( + InitialHits::default(), + |mut local_hits, precursor_charge| { + let precursor_mass = mz * precursor_charge as f32; + let precursor_tol = precursor + .isolation_window + .unwrap_or(Tolerance::Da(-2.4, 2.4)) + * precursor_charge as f32; + let mut matched_peaks = self.matched_peaks( + query, + precursor_mass, + precursor_charge, + precursor_tol, + ); + matched_peaks.preliminary.iter_mut().for_each(|score| { + score.precursor_index = i; + }); + + local_hits += matched_peaks; + local_hits + }, + ); + hits += local_hits; + } self.trim_hits(&mut hits); hits - } else if let Some(charge) = precursor.charge { + } else if precursors.len() == 1 && precursors.first().unwrap().charge.is_some() { + let charge = precursors.first().unwrap().charge.unwrap(); + let mz = mzs[0]; // Charge state is already annotated for this precusor, only search once let precursor_mass = mz * charge as f32; self.matched_peaks(query, precursor_mass, charge, self.precursor_tol) } else { // Not all selected ion precursors have charge states annotated - // assume it could be z=2, z=3, z=4 and search all three - let mut hits = (self.min_precursor_charge..=self.max_precursor_charge).fold( - InitialHits::default(), - |mut hits, precursor_charge| { - let precursor_mass = mz * precursor_charge as f32; - hits += self.matched_peaks( - query, - precursor_mass, - precursor_charge, - self.precursor_tol, - ); - hits - }, - ); + let mut hits = InitialHits::default(); + for (i, mz) in mzs.iter().enumerate() { + let local_hits = (self.min_precursor_charge..=self.max_precursor_charge).fold( + InitialHits::default(), + |mut local_hits, precursor_charge| { + let precursor_mass = mz * precursor_charge as f32; + let mut matched_peaks = self.matched_peaks( + query, + precursor_mass, + precursor_charge, + self.precursor_tol, + ); + matched_peaks.preliminary.iter_mut().for_each(|score| { + score.precursor_index = i; + }); + + local_hits += matched_peaks; + local_hits + }, + ); + hits += local_hits; + } self.trim_hits(&mut hits); hits } @@ -375,13 +404,19 @@ impl<'db> Scorer<'db> { /// Score a single [`ProcessedSpectrum`] against the database pub fn score_standard(&self, query: &ProcessedSpectrum) -> Vec { - let precursor = query.precursors.get(0).unwrap_or_else(|| { - panic!("missing MS1 precursor for {}", query.id); - }); + if query.precursors.is_empty() { + panic!("missing precursor for {}", query.id); + } - let hits = self.initial_hits(query, precursor); + let hits = self.initial_hits(query, &query.precursors); let mut features = Vec::with_capacity(self.report_psms); - self.build_features(query, precursor, &hits, self.report_psms, &mut features); + self.build_features( + query, + &query.precursors, + &hits, + self.report_psms, + &mut features, + ); features } @@ -390,7 +425,7 @@ impl<'db> Scorer<'db> { fn build_features( &self, query: &ProcessedSpectrum, - precursor: &Precursor, + precursors: &[Precursor], hits: &InitialHits, report_psms: usize, features: &mut Vec, @@ -410,15 +445,14 @@ impl<'db> Scorer<'db> { // (average # of matches peaks/peptide candidate) let lambda = hits.matched_peaks as f64 / hits.scored_candidates as f64; - // Sage operates on masses without protons; [M] instead of [MH+] - let mz = precursor.mz - PROTON; - for idx in 0..report_psms.min(score_vector.len()) { let score = score_vector[idx].0; let fragments: Option = score_vector[idx].1.take(); let psm_id = increment_psm_counter(); let peptide = &self.db[score.peptide]; + // Sage operates on masses without protons; [M] instead of [MH+] + let mz = precursors[score.precursor_index].mz - PROTON; let precursor_mass = mz * score.precursor_charge as f32; let next = score_vector @@ -539,18 +573,18 @@ impl<'db> Scorer<'db> { /// Return multiple PSMs for each spectra - first is the best match, second PSM is the best match /// after all theoretical peaks assigned to the best match are removed, etc pub fn score_chimera_fast(&self, query: &ProcessedSpectrum) -> Vec { - let precursor = query.precursors.get(0).unwrap_or_else(|| { - panic!("missing MS1 precursor for {}", query.id); - }); + if query.precursors.is_empty() { + panic!("missing precursor for {}", query.id); + } let mut query = query.clone(); - let hits = self.initial_hits(&query, precursor); + let hits = self.initial_hits(&query, &query.precursors); let mut candidates: Vec = Vec::with_capacity(self.report_psms); let mut prev = 0; while candidates.len() < self.report_psms { - self.build_features(&query, precursor, &hits, 1, &mut candidates); + self.build_features(&query, &query.precursors, &hits, 1, &mut candidates); if candidates.len() > prev { if let Some(feat) = candidates.get_mut(prev) { self.remove_matched_peaks(&mut query, feat); @@ -574,6 +608,7 @@ impl<'db> Scorer<'db> { peptide: pre_score.peptide, precursor_charge: pre_score.precursor_charge, isotope_error: pre_score.isotope_error, + precursor_index: pre_score.precursor_index, ..Default::default() }; let peptide = &self.db[score.peptide];