Skip to content

Commit

Permalink
first branch commit
Browse files Browse the repository at this point in the history
  • Loading branch information
jspaezp committed Feb 5, 2024
1 parent 0cc7ca4 commit 840efab
Showing 1 changed file with 78 additions and 43 deletions.
121 changes: 78 additions & 43 deletions crates/sage/src/scoring.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ struct Score {
ppm_difference: f32,
precursor_charge: u8,
isotope_error: i8,
precursor_index: usize,
}

/// Preliminary score - # of matched peaks for each candidate peptide
Expand All @@ -30,6 +31,7 @@ struct PreScore {
peptide: PeptideIx,
precursor_charge: u8,
isotope_error: i8,
precursor_index: usize,
}

/// Store preliminary scores & stats for first pass search for a query spectrum
Expand Down Expand Up @@ -327,61 +329,94 @@ impl<'db> Scorer<'db> {
}
}

fn initial_hits(&self, query: &ProcessedSpectrum, precursor: &Precursor) -> InitialHits {
fn initial_hits(&self, query: &ProcessedSpectrum, precursors: &[Precursor]) -> InitialHits {
// Sage operates on masses without protons; [M] instead of [MH+]
let mz = precursor.mz - PROTON;
let mzs = precursors
.iter()
.map(|p| p.mz - PROTON)
.collect::<Vec<f32>>();

// Search in wide-window/DIA mode
if self.wide_window {
let mut hits = (self.min_precursor_charge..=self.max_precursor_charge).fold(
InitialHits::default(),
|mut hits, precursor_charge| {
let precursor_mass = mz * precursor_charge as f32;
let precursor_tol = precursor
.isolation_window
.unwrap_or(Tolerance::Da(-2.4, 2.4))
* precursor_charge as f32;
hits +=
self.matched_peaks(query, precursor_mass, precursor_charge, precursor_tol);
hits
},
);
let mut hits = InitialHits::default();
for (i, (precursor, mz)) in precursors.iter().zip(mzs).enumerate() {
let local_hits = (self.min_precursor_charge..=self.max_precursor_charge).fold(
InitialHits::default(),
|mut local_hits, precursor_charge| {
let precursor_mass = mz * precursor_charge as f32;
let precursor_tol = precursor
.isolation_window
.unwrap_or(Tolerance::Da(-2.4, 2.4))
* precursor_charge as f32;
let mut matched_peaks = self.matched_peaks(
query,
precursor_mass,
precursor_charge,
precursor_tol,
);
matched_peaks.preliminary.iter_mut().for_each(|score| {
score.precursor_index = i;
});

local_hits += matched_peaks;
local_hits
},
);
hits += local_hits;
}
self.trim_hits(&mut hits);
hits
} else if let Some(charge) = precursor.charge {
} else if precursors.len() == 1 && precursors.first().unwrap().charge.is_some() {
let charge = precursors.first().unwrap().charge.unwrap();
let mz = mzs[0];
// Charge state is already annotated for this precusor, only search once
let precursor_mass = mz * charge as f32;
self.matched_peaks(query, precursor_mass, charge, self.precursor_tol)
} else {
// Not all selected ion precursors have charge states annotated -
// assume it could be z=2, z=3, z=4 and search all three
let mut hits = (self.min_precursor_charge..=self.max_precursor_charge).fold(
InitialHits::default(),
|mut hits, precursor_charge| {
let precursor_mass = mz * precursor_charge as f32;
hits += self.matched_peaks(
query,
precursor_mass,
precursor_charge,
self.precursor_tol,
);
hits
},
);
let mut hits = InitialHits::default();
for (i, mz) in mzs.iter().enumerate() {
let local_hits = (self.min_precursor_charge..=self.max_precursor_charge).fold(
InitialHits::default(),
|mut local_hits, precursor_charge| {
let precursor_mass = mz * precursor_charge as f32;
let mut matched_peaks = self.matched_peaks(
query,
precursor_mass,
precursor_charge,
self.precursor_tol,
);
matched_peaks.preliminary.iter_mut().for_each(|score| {
score.precursor_index = i;
});

local_hits += matched_peaks;
local_hits
},
);
hits += local_hits;
}
self.trim_hits(&mut hits);
hits
}
}

/// Score a single [`ProcessedSpectrum`] against the database
pub fn score_standard(&self, query: &ProcessedSpectrum) -> Vec<Feature> {
let precursor = query.precursors.get(0).unwrap_or_else(|| {
panic!("missing MS1 precursor for {}", query.id);
});
if query.precursors.is_empty() {
panic!("missing precursor for {}", query.id);
}

let hits = self.initial_hits(query, precursor);
let hits = self.initial_hits(query, &query.precursors);
let mut features = Vec::with_capacity(self.report_psms);
self.build_features(query, precursor, &hits, self.report_psms, &mut features);
self.build_features(
query,
&query.precursors,
&hits,
self.report_psms,
&mut features,
);
features
}

Expand All @@ -390,7 +425,7 @@ impl<'db> Scorer<'db> {
fn build_features(
&self,
query: &ProcessedSpectrum,
precursor: &Precursor,
precursors: &[Precursor],
hits: &InitialHits,
report_psms: usize,
features: &mut Vec<Feature>,
Expand All @@ -410,15 +445,14 @@ impl<'db> Scorer<'db> {
// (average # of matches peaks/peptide candidate)
let lambda = hits.matched_peaks as f64 / hits.scored_candidates as f64;

// Sage operates on masses without protons; [M] instead of [MH+]
let mz = precursor.mz - PROTON;

for idx in 0..report_psms.min(score_vector.len()) {
let score = score_vector[idx].0;
let fragments: Option<Fragments> = score_vector[idx].1.take();
let psm_id = increment_psm_counter();

let peptide = &self.db[score.peptide];
// Sage operates on masses without protons; [M] instead of [MH+]
let mz = precursors[score.precursor_index].mz - PROTON;
let precursor_mass = mz * score.precursor_charge as f32;

let next = score_vector
Expand Down Expand Up @@ -539,18 +573,18 @@ impl<'db> Scorer<'db> {
/// Return multiple PSMs for each spectra - first is the best match, second PSM is the best match
/// after all theoretical peaks assigned to the best match are removed, etc
pub fn score_chimera_fast(&self, query: &ProcessedSpectrum) -> Vec<Feature> {
let precursor = query.precursors.get(0).unwrap_or_else(|| {
panic!("missing MS1 precursor for {}", query.id);
});
if query.precursors.is_empty() {
panic!("missing precursor for {}", query.id);
}

let mut query = query.clone();
let hits = self.initial_hits(&query, precursor);
let hits = self.initial_hits(&query, &query.precursors);

let mut candidates: Vec<Feature> = Vec::with_capacity(self.report_psms);

let mut prev = 0;
while candidates.len() < self.report_psms {
self.build_features(&query, precursor, &hits, 1, &mut candidates);
self.build_features(&query, &query.precursors, &hits, 1, &mut candidates);
if candidates.len() > prev {
if let Some(feat) = candidates.get_mut(prev) {
self.remove_matched_peaks(&mut query, feat);
Expand All @@ -574,6 +608,7 @@ impl<'db> Scorer<'db> {
peptide: pre_score.peptide,
precursor_charge: pre_score.precursor_charge,
isotope_error: pre_score.isotope_error,
precursor_index: pre_score.precursor_index,
..Default::default()
};
let peptide = &self.db[score.peptide];
Expand Down

0 comments on commit 840efab

Please sign in to comment.