diff --git a/v0.3.4-alpha/.lock b/v0.3.4-alpha/.lock new file mode 100644 index 00000000..e69de29b diff --git a/v0.3.4-alpha/crates.js b/v0.3.4-alpha/crates.js new file mode 100644 index 00000000..40d1ce1b --- /dev/null +++ b/v0.3.4-alpha/crates.js @@ -0,0 +1,2 @@ +window.ALL_CRATES = ["mscore","rustdf","rustms"]; +//{"start":21,"fragment_lengths":[8,9,9]} \ No newline at end of file diff --git a/v0.3.4-alpha/help.html b/v0.3.4-alpha/help.html new file mode 100644 index 00000000..b44c28c2 --- /dev/null +++ b/v0.3.4-alpha/help.html @@ -0,0 +1 @@ +
pub fn convolve(
+ dist_a: &Vec<(f64, f64)>,
+ dist_b: &Vec<(f64, f64)>,
+ mass_tolerance: f64,
+ abundance_threshold: f64,
+ max_results: usize,
+) -> Vec<(f64, f64)>
convolve two distributions of masses and abundances
+Arguments:
+dist_a
- first distribution of masses and abundancesdist_b
- second distribution of masses and abundancesmass_tolerance
- mass tolerance for combining peaksabundance_threshold
- minimum abundance for a peak to be included in the resultmax_results
- maximum number of peaks to include in the resultReturns:
+Vec<(f64, f64)>
- combined distribution of masses and abundancesuse mscore::algorithm::isotope::convolve;
+
+let dist_a = vec![(100.0, 0.5), (101.0, 0.5)];
+let dist_b = vec![(100.0, 0.5), (101.0, 0.5)];
+let result = convolve(&dist_a, &dist_b, 1e-6, 1e-12, 200);
+assert_eq!(result, vec![(200.0, 0.25), (201.0, 0.5), (202.0, 0.25)]);
pub fn convolve_pow(dist: &Vec<(f64, f64)>, n: i32) -> Vec<(f64, f64)>
convolve a distribution with itself n times
+Arguments:
+dist
- distribution of masses and abundancesn
- number of times to convolve the distribution with itselfReturns:
+Vec<(f64, f64)>
- distribution of masses and abundancesuse mscore::algorithm::isotope::convolve_pow;
+
+let dist = vec![(100.0, 0.5), (101.0, 0.5)];
+let result = convolve_pow(&dist, 2);
+assert_eq!(result, vec![(200.0, 0.25), (201.0, 0.5), (202.0, 0.25)]);
pub fn generate_averagine_spectra(
+ masses: Vec<f64>,
+ charges: Vec<i32>,
+ min_intensity: i32,
+ k: i32,
+ resolution: i32,
+ centroid: bool,
+ num_threads: usize,
+ amp: Option<f64>,
+) -> Vec<MzSpectrum>
generate the averagine spectra for a given list of masses and charges +using multiple threads
+Arguments:
+masses
- list of masses of the peptidescharges
- list of charges of the peptidesmin_intensity
- minimum intensity for a peak to be included in the resultk
- number of isotopes to considerresolution
- resolution of the isotope patterncentroid
- whether to centroid the spectrumnum_threads
- number of threads to useamp
- amplitude of the isotope patternReturns:
+Vec<MzSpectrum>
- list of averagine spectrause mscore::algorithm::isotope::generate_averagine_spectra;
+
+let masses = vec![3000.0, 3000.0];
+let charges = vec![2, 3];
+let spectra = generate_averagine_spectra(masses, charges, 1, 10, 3, true, 4, None);
pub fn generate_averagine_spectrum(
+ mass: f64,
+ charge: i32,
+ min_intensity: i32,
+ k: i32,
+ resolution: i32,
+ centroid: bool,
+ amp: Option<f64>,
+) -> MzSpectrum
generate the averagine spectrum for a given mass and charge
+Arguments:
+mass
- mass of the peptidecharge
- charge of the peptidemin_intensity
- minimum intensity for a peak to be included in the resultk
- number of isotopes to considerresolution
- resolution of the isotope patterncentroid
- whether to centroid the spectrumamp
- amplitude of the isotope patternReturns:
+MzSpectrum
- averagine spectrumuse mscore::algorithm::isotope::generate_averagine_spectrum;
+
+let spectrum = generate_averagine_spectrum(3000.0, 2, 1, 10, 3, true, None);
pub fn generate_isotope_distribution(
+ atomic_composition: &HashMap<String, i32>,
+ mass_tolerance: f64,
+ abundance_threshold: f64,
+ max_result: i32,
+) -> Vec<(f64, f64)>
generate the isotope distribution for a given atomic composition
+Arguments:
+atomic_composition
- atomic composition of the peptidemass_tolerance
- mass tolerance for combining peaksabundance_threshold
- minimum abundance for a peak to be included in the resultmax_result
- maximum number of peaks to include in the resultReturns:
+Vec<(f64, f64)>
- distribution of masses and abundancesuse std::collections::HashMap;
+use mscore::algorithm::isotope::generate_isotope_distribution;
+
+let mut atomic_composition = HashMap::new();
+atomic_composition.insert("C".to_string(), 5);
+atomic_composition.insert("H".to_string(), 9);
+atomic_composition.insert("N".to_string(), 1);
+atomic_composition.insert("O".to_string(), 1);
+let result = generate_isotope_distribution(&atomic_composition, 1e-6, 1e-12, 200);
pub fn generate_isotope_pattern(
+ lower_bound: f64,
+ upper_bound: f64,
+ mass: f64,
+ charge: f64,
+ amp: f64,
+ k: usize,
+ sigma: f64,
+ resolution: i32,
+) -> (Vec<f64>, Vec<f64>)
generate the isotope pattern for a given mass and charge
+Arguments:
+lower_bound
- lower bound of the isotope patternupper_bound
- upper bound of the isotope patternmass
- mass of the peptidecharge
- charge of the peptideamp
- amplitude of the isotope patternk
- number of isotopes to considersigma
- standard deviation of the normal distributionresolution
- resolution of the isotope patternReturns:
+(Vec<f64>, Vec<f64>)
- isotope patternuse mscore::algorithm::isotope::generate_isotope_pattern;
+
+let (mzs, intensities) = generate_isotope_pattern(1500.0, 1510.0, 3000.0, 2.0, 1e4, 10, 1.0, 3);
pub fn generate_precursor_spectra(
+ sequences: &Vec<&str>,
+ charges: &Vec<i32>,
+ num_threads: usize,
+ peptide_ids: Vec<Option<i32>>,
+) -> Vec<MzSpectrum>
parallel version of generate_precursor_spectrum
Arguments:
+sequences
- list of peptide sequencescharges
- list of charges of the peptidesnum_threads
- number of threads to useReturns:
+Vec<MzSpectrum>
- list of precursor spectrapub fn generate_precursor_spectrum(
+ sequence: &str,
+ charge: i32,
+ peptide_id: Option<i32>,
+) -> MzSpectrum
generate the precursor spectrum for a given peptide sequence and charge +using isotope convolutions
+Arguments:
+sequence
- peptide sequencecharge
- charge of the peptideReturns:
+MzSpectrum
- precursor spectrumpub fn iso(
+ x: &Vec<f64>,
+ mass: f64,
+ charge: f64,
+ sigma: f64,
+ amp: f64,
+ k: usize,
+ step_size: f64,
+) -> Vec<f64>
calculate the isotope pattern for a given mass and charge based on the averagine model +using the normal distribution for peak shapes
+Arguments:
+x
- list of m/z values to probemass
- mass of the peptidecharge
- charge of the peptidesigma
- standard deviation of the normal distributionamp
- amplitude of the isotope patternk
- number of isotopes to considerstep_size
- step size for the m/z values to probeReturns:
+Vec<f64>
- isotope patternpub fn lam(mass: f64, slope: f64, intercept: f64) -> f64
calculate the lambda value for a given mass
+Arguments:
+mass
- mass of the peptideslope
- slope of the linear regressionintercept
- intercept of the linear regressionReturns:
+f64
- lambda valueuse mscore::algorithm::isotope::lam;
+
+let lambda = lam(1000.0, 0.000594, -0.03091);
+assert_eq!(lambda, 0.56309);
pub fn normal_pdf(x: f64, mean: f64, std_dev: f64) -> f64
calculate the normal probability density function
+Arguments:
+x
- value to calculate the probability density function ofmean
- mean of the normal distributionstd_dev
- standard deviation of the normal distributionReturns:
+f64
- probability density function of x
use mscore::algorithm::isotope::normal_pdf;
+
+let pdf = normal_pdf(0.0, 0.0, 1.0);
+assert_eq!(pdf, 0.39894228040143265);
generate_precursor_spectrum
pub fn atomic_product_ion_composition(
+ product_ion: &PeptideProductIon,
+) -> Vec<(&str, i32)>
calculate the atomic composition of a product ion
+Arguments:
+product_ion
- a PeptideProductIon instanceReturns:
+Vec<(&str, i32)>
- a vector of tuples representing the atomic composition of the product ionpub fn calculate_amino_acid_composition(sequence: &str) -> HashMap<String, i32>
get a count dictionary of the amino acid composition of a peptide sequence
+Arguments:
+sequence
- peptide sequenceReturns:
+composition
- a dictionary of amino acid compositionuse mscore::algorithm::peptide::calculate_amino_acid_composition;
+
+let sequence = "PEPTIDEH";
+let composition = calculate_amino_acid_composition(sequence);
+assert_eq!(composition.get("P"), Some(&2));
+assert_eq!(composition.get("E"), Some(&2));
+assert_eq!(composition.get("T"), Some(&1));
+assert_eq!(composition.get("I"), Some(&1));
+assert_eq!(composition.get("D"), Some(&1));
+assert_eq!(composition.get("H"), Some(&1));
pub fn calculate_peptide_mono_isotopic_mass(
+ peptide_sequence: &PeptideSequence,
+) -> f64
calculate the monoisotopic mass of a peptide sequence
+Arguments:
+sequence
- peptide sequenceReturns:
+mass
- monoisotopic mass of the peptideuse mscore::algorithm::peptide::calculate_peptide_mono_isotopic_mass;
+use mscore::data::peptide::PeptideSequence;
+
+let peptide_sequence = PeptideSequence::new("PEPTIDEH".to_string(), Some(1));
+let mass = calculate_peptide_mono_isotopic_mass(&peptide_sequence);
+let mass_quantized = (mass * 1e6).round() as i32;
+assert_eq!(mass_quantized, 936418877);
pub fn calculate_peptide_product_ion_mono_isotopic_mass(
+ sequence: &str,
+ kind: FragmentType,
+) -> f64
calculate the monoisotopic mass of a peptide product ion for a given fragment type
+Arguments:
+sequence
- peptide sequencekind
- fragment typeReturns:
+mass
- monoisotopic mass of the peptideuse mscore::algorithm::peptide::calculate_peptide_product_ion_mono_isotopic_mass;
+use mscore::data::peptide::FragmentType;
+let sequence = "PEPTIDEH";
+let mass = calculate_peptide_product_ion_mono_isotopic_mass(sequence, FragmentType::Y);
+assert_eq!(mass, 936.4188766862999);
pub fn calculate_product_ion_mz(
+ sequence: &str,
+ kind: FragmentType,
+ charge: Option<i32>,
+) -> f64
calculate the monoisotopic m/z of a peptide product ion for a given fragment type and charge
+Arguments:
+sequence
- peptide sequencekind
- fragment typecharge
- chargeReturns:
+mz
- monoisotopic mass of the peptideuse mscore::algorithm::peptide::calculate_product_ion_mz;
+use mscore::chemistry::constants::MASS_PROTON;
+use mscore::data::peptide::FragmentType;
+let sequence = "PEPTIDEH";
+let mz = calculate_product_ion_mz(sequence, FragmentType::Y, Some(1));
+assert_eq!(mz, 936.4188766862999 + MASS_PROTON);
pub fn fragments_to_composition(
+ product_ions: Vec<PeptideProductIon>,
+ num_threads: usize,
+) -> Vec<Vec<(String, i32)>>
calculate the atomic composition of a peptide product ion series +Arguments:
+product_ions
- a vector of PeptideProductIon instances
num_threads
- an usize representing the number of threads to use
+Returns:
Vec<Vec<(String, i32)>>
- a vector of vectors of tuples representing the atomic composition of each product ion
pub fn get_num_protonizable_sites(sequence: &str) -> usize
count the number of protonizable sites in a peptide sequence
+sequence
- a string representing the peptide sequenceusize
- the number of protonizable sites in the peptide sequenceuse mscore::algorithm::peptide::get_num_protonizable_sites;
+
+let sequence = "PEPTIDEH";
+let num_protonizable_sites = get_num_protonizable_sites(sequence);
+assert_eq!(num_protonizable_sites, 2);
pub fn peptide_sequence_to_atomic_composition(
+ peptide_sequence: &PeptideSequence,
+) -> HashMap<&'static str, i32>
calculate the atomic composition of a peptide sequence
+pub fn simulate_charge_state_for_sequence(
+ sequence: &str,
+ max_charge: Option<usize>,
+ charged_probability: Option<f64>,
+) -> Vec<f64>
simulate the charge state distribution for a peptide sequence
+sequence
- a string representing the peptide sequencemax_charge
- an optional usize representing the maximum charge state to simulatecharged_probability
- an optional f64 representing the probability of a site being chargedVec<f64>
- a vector of f64 representing the probability of each charge stateuse mscore::algorithm::peptide::simulate_charge_state_for_sequence;
+
+let sequence = "PEPTIDEH";
+let charge_state_probs = simulate_charge_state_for_sequence(sequence, None, None);
+assert_eq!(charge_state_probs, vec![0.25, 0.5, 0.25, 0.0, 0.0]);
pub fn simulate_charge_states_for_sequences(
+ sequences: Vec<&str>,
+ num_threads: usize,
+ max_charge: Option<usize>,
+ charged_probability: Option<f64>,
+) -> Vec<Vec<f64>>
simulate the charge state distribution for a list of peptide sequences
+sequences
- a vector of strings representing the peptide sequencesnum_threads
- an usize representing the number of threads to usemax_charge
- an optional usize representing the maximum charge state to simulatecharged_probability
- an optional f64 representing the probability of a site being chargedVec<Vec<f64>>
- a vector of vectors of f64 representing the probability of each charge state for each sequenceuse mscore::algorithm::peptide::simulate_charge_states_for_sequences;
+
+let sequences = vec!["PEPTIDEH", "PEPTIDEH", "PEPTIDEH"];
+let charge_state_probs = simulate_charge_states_for_sequences(sequences, 4, None, None);
+assert_eq!(charge_state_probs, vec![vec![0.25, 0.5, 0.25, 0.0, 0.0], vec![0.25, 0.5, 0.25, 0.0, 0.0], vec![0.25, 0.5, 0.25, 0.0, 0.0]]);
pub fn amino_acid_composition() -> HashMap<char, HashMap<&'static str, i32>>
Amino Acid Composition
+None
+HashMap<char, HashMap<&'static str, i32>>
- a map of amino acid one-letter codes to their atomic compositionsuse mscore::chemistry::amino_acid::amino_acid_composition;
+use std::collections::HashMap;
+
+let amino_acid_composition = amino_acid_composition();
+assert_eq!(amino_acid_composition.get(&'K'), Some(&HashMap::from([("C", 6), ("H", 12), ("N", 2), ("O", 1)])));
pub fn amino_acid_masses() -> HashMap<&'static str, f64>
Amino Acid Masses
+None
+HashMap<&'static str, f64>
- a map of amino acid one-letter codes to their monoisotopic massesuse mscore::chemistry::amino_acid::amino_acid_masses;
+
+let amino_acid_masses = amino_acid_masses();
+assert_eq!(amino_acid_masses.get("K"), Some(&128.094963));
pub fn atomic_weights_mono_isotopic() -> HashMap<&'static str, f64>
Atomic Weights
+None
+HashMap<&'static str, f64>
- a map of atomic symbols to their monoisotopic weightsuse mscore::chemistry::elements::atomic_weights_mono_isotopic;
+
+let atomic_weights = atomic_weights_mono_isotopic();
+assert_eq!(atomic_weights.get("H"), Some(&1.00782503223));
pub fn atoms_isotopic_weights() -> HashMap<&'static str, Vec<f64>>
Isotopic Weights
+None
+HashMap<&'static str, Vec<f64>>
- a map of atomic symbols to their isotopic weightsuse mscore::chemistry::elements::atoms_isotopic_weights;
+
+let isotopic_weights = atoms_isotopic_weights();
+assert_eq!(isotopic_weights.get("H"), Some(&vec![1.00782503223, 2.01410177812]));
pub fn isotopic_abundance() -> HashMap<&'static str, Vec<f64>>
Isotopic Abundance
+None
+HashMap<&'static str, Vec<f64>>
- a map of atomic symbols to their isotopic abundancesuse mscore::chemistry::elements::isotopic_abundance;
+
+let isotopic_abundance = isotopic_abundance();
+assert_eq!(isotopic_abundance.get("H"), Some(&vec![0.999885, 0.000115]));
pub fn calculate_mz(monoisotopic_mass: f64, charge: i32) -> f64
calculate the m/z of an ion
+Arguments:
+mono_mass
- monoisotopic mass of the ioncharge
- charge state of the ionReturns:
+mz
- mass-over-charge of the ionuse mscore::chemistry::formulas::calculate_mz;
+
+let mz = calculate_mz(1000.0, 2);
+assert_eq!(mz, 501.007276466621);
pub fn ccs_to_one_over_reduced_mobility(
+ ccs: f64,
+ mz: f64,
+ charge: u32,
+ mass_gas: f64,
+ temp: f64,
+ t_diff: f64,
+) -> f64
convert CCS to 1 over reduced ion mobility (1/k0)
+Arguments:
+ccs
- collision cross-sectioncharge
- charge state of the ionmz
- mass-over-charge of the ionmass_gas
- mass of drift gas (N2)temp
- temperature of the drift gas in C°t_diff
- factor to translate from C° to KReturns:
+one_over_k0
- 1 over reduced ion mobility (1/k0)use mscore::chemistry::formulas::ccs_to_one_over_reduced_mobility;
+
+let k0 = ccs_to_one_over_reduced_mobility(806.5918693771381, 1000.0, 2, 28.013, 31.85, 273.15);
+assert_eq!(k0, 2.0);
pub fn one_over_reduced_mobility_to_ccs(
+ one_over_k0: f64,
+ mz: f64,
+ charge: u32,
+ mass_gas: f64,
+ temp: f64,
+ t_diff: f64,
+) -> f64
convert 1 over reduced ion mobility (1/k0) to CCS
+Arguments:
+one_over_k0
- 1 over reduced ion mobility (1/k0)charge
- charge state of the ionmz
- mass-over-charge of the ionmass_gas
- mass of drift gas (N2)temp
- temperature of the drift gas in C°t_diff
- factor to translate from C° to KReturns:
+ccs
- collision cross-sectionuse mscore::chemistry::formulas::one_over_reduced_mobility_to_ccs;
+
+let ccs = one_over_reduced_mobility_to_ccs(0.5, 1000.0, 2, 28.013, 31.85, 273.15);
+assert_eq!(ccs, 201.64796734428452);
pub struct SumFormula {
+ pub formula: String,
+ pub elements: HashMap<String, i32>,
+}
formula: String
§elements: HashMap<String, i32>
self
into a Left
variant of Either<Self, Self>
+if into_left
is true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
into a Left
variant of Either<Self, Self>
+if into_left(&self)
returns true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.self
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.pub fn modification_atomic_composition() -> HashMap<String, HashMap<&'static str, i32>>
Unimod Modifications
+None
+HashMap<String, HashMap<&'static str, i32>>
- a map of unimod modification names to their atomic compositionsuse mscore::chemistry::unimod::modification_atomic_composition;
+use std::collections::HashMap;
+
+let composition = modification_atomic_composition();
+assert_eq!(composition.get("[UNIMOD:1]"), Some(&HashMap::from([("C", 2), ("H", 2), ("O", 1)])));
pub fn unimod_modifications_mass() -> HashMap<&'static str, f64>
Unimod Modifications Mass
+None
+HashMap<&'static str, f64>
- a map of unimod modification names to their massuse mscore::chemistry::unimod::unimod_modifications_mass;
+
+let mass = unimod_modifications_mass();
+assert_eq!(mass.get("[UNIMOD:1]"), Some(&42.010565));
pub fn unimod_modifications_mass_numerical() -> HashMap<u32, f64>
Unimod Modifications Mass Numerical
+None
+HashMap<u32, f64>
- a map of unimod modification numerical ids to their massuse mscore::chemistry::unimod::unimod_modifications_mass_numerical;
+
+let mass = unimod_modifications_mass_numerical();
+assert_eq!(mass.get(&1), Some(&42.010565));
pub fn find_unimod_patterns(input_string: &str) -> (String, Vec<f64>)
Convert a peptide sequence with UNIMOD annotations to a tuple of plain sequence and for each +position in the sequence, the mass of the modification at that position (0 if no modification), +which is the representation of sequence nad modifications used by SAGE
+input_string
- a string slice of the peptide sequence(String, Vec<f64>)
- a tuple of the plain sequence and a vector of f64 representing the mass
+of the modification at each position in the sequenceuse mscore::chemistry::utility::find_unimod_patterns;
+
+let sequence = "PEPTIDE[UNIMOD:1]H";
+let (stripped_sequence, mods) = find_unimod_patterns(sequence);
+assert_eq!(stripped_sequence, "PEPTIDEH");
+assert_eq!(mods, vec![0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 42.010565, 0.0]);
pub fn reshape_prosit_array(flat_array: Vec<f64>) -> Vec<Vec<Vec<f64>>>
Reshape the flat prosit array into a 3D array of shape (29, 2, 3)
+flat_array
- a vector of f64 representing the flat prosit arrayVec<Vec<Vec<f64>>>
- a 3D array of shape (29, 2, 3)use mscore::chemistry::utility::reshape_prosit_array;
+
+let flat_array = vec![0.0; 174];
+let reshaped_array = reshape_prosit_array(flat_array);
+assert_eq!(reshaped_array.len(), 29);
+assert_eq!(reshaped_array[0].len(), 2);
+assert_eq!(reshaped_array[0][0].len(), 3);
pub fn unimod_sequence_to_tokens(
+ sequence: &str,
+ group_modifications: bool,
+) -> Vec<String>
Convert a peptide sequence with UNIMOD annotations to a list of tokens
+sequence
- a string slice of the peptide sequencegroup_modifications
- a boolean indicating whether to group the amino acid before the UNIMOD with the UNIMODVec<String>
- a vector of strings representing the tokensuse mscore::chemistry::utility::unimod_sequence_to_tokens;
+
+let sequence = "PEPTIDE[UNIMOD:1]H";
+let tokens = unimod_sequence_to_tokens(sequence, false);
+assert_eq!(tokens, vec!["P", "E", "P", "T", "I", "D", "E", "[UNIMOD:1]", "H"]);
+let tokens = unimod_sequence_to_tokens(sequence, true);
+assert_eq!(tokens, vec!["P", "E", "P", "T", "I", "D", "E[UNIMOD:1]", "H"]);
pub enum FragmentType {
+ A,
+ B,
+ C,
+ X,
+ Y,
+ Z,
+}
source
. Read moreclone_to_uninit
)self
into a Left
variant of Either<Self, Self>
+if into_left
is true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
into a Left
variant of Either<Self, Self>
+if into_left(&self)
returns true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.self
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.pub struct PeptideIon {
+ pub sequence: PeptideSequence,
+ pub charge: i32,
+ pub intensity: f64,
+}
sequence: PeptideSequence
§charge: i32
§intensity: f64
source
. Read moreclone_to_uninit
)self
into a Left
variant of Either<Self, Self>
+if into_left
is true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
into a Left
variant of Either<Self, Self>
+if into_left(&self)
returns true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.self
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.pub struct PeptideProductIon {
+ pub kind: FragmentType,
+ pub ion: PeptideIon,
+}
kind: FragmentType
§ion: PeptideIon
source
. Read moreclone_to_uninit
)self
into a Left
variant of Either<Self, Self>
+if into_left
is true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
into a Left
variant of Either<Self, Self>
+if into_left(&self)
returns true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.self
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.pub struct PeptideProductIonSeries {
+ pub charge: i32,
+ pub n_ions: Vec<PeptideProductIon>,
+ pub c_ions: Vec<PeptideProductIon>,
+}
charge: i32
§n_ions: Vec<PeptideProductIon>
§c_ions: Vec<PeptideProductIon>
source
. Read moreclone_to_uninit
)self
into a Left
variant of Either<Self, Self>
+if into_left
is true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
into a Left
variant of Either<Self, Self>
+if into_left(&self)
returns true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.self
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.pub struct PeptideProductIonSeriesCollection {
+ pub peptide_ions: Vec<PeptideProductIonSeries>,
+}
peptide_ions: Vec<PeptideProductIonSeries>
source
. Read moreclone_to_uninit
)self
into a Left
variant of Either<Self, Self>
+if into_left
is true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
into a Left
variant of Either<Self, Self>
+if into_left(&self)
returns true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.self
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.pub struct PeptideSequence {
+ pub sequence: String,
+ pub peptide_id: Option<i32>,
+}
sequence: String
§peptide_id: Option<i32>
source
. Read moreclone_to_uninit
)self
into a Left
variant of Either<Self, Self>
+if into_left
is true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
into a Left
variant of Either<Self, Self>
+if into_left(&self)
returns true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.self
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.pub enum MsType {
+ Precursor,
+ FragmentDda,
+ FragmentDia,
+ Unknown,
+}
Represents the type of spectrum.
+The SpecType
enum is used to distinguish between precursor and fragment spectra.
clone_to_uninit
)self
into a Left
variant of Either<Self, Self>
+if into_left
is true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
into a Left
variant of Either<Self, Self>
+if into_left(&self)
returns true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.self
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.pub struct IndexedMzSpectrum {
+ pub index: Vec<i32>,
+ pub mz_spectrum: MzSpectrum,
+}
Represents a mass spectrum with associated m/z indices, m/z values, and intensities
+index: Vec<i32>
§mz_spectrum: MzSpectrum
Creates a new TOFMzSpectrum
instance.
index
- A vector containing the mz index, e.g., time-of-flight values.mz
- A vector containing the m/z values.intensity
- A vector containing the intensity values.use mscore::data::spectrum::IndexedMzSpectrum;
+use mscore::data::spectrum::MzSpectrum;
+
+let spectrum = IndexedMzSpectrum::new(vec![1000, 2000], vec![100.5, 200.5], vec![50.0, 60.0]);
Bins the spectrum based on a given m/z resolution, summing intensities and averaging index values +for m/z values that fall into the same bin.
+resolution
- The desired m/z resolution for binning.use mscore::data::spectrum::IndexedMzSpectrum;
+
+let spectrum = IndexedMzSpectrum::new(vec![1000, 2000], vec![100.42, 100.43], vec![50.0, 60.0]);
+let binned_spectrum = spectrum.to_resolution(1);
+
+assert_eq!(binned_spectrum.mz_spectrum.mz, vec![100.4]);
+assert_eq!(binned_spectrum.mz_spectrum.intensity, vec![110.0]);
+assert_eq!(binned_spectrum.index, vec![1500]);
Convert the IndexedMzSpectrum
to a IndexedMzVector
using the given resolution for binning.
After binning to the desired resolution, the binned m/z values are translated into integer indices.
+resolution
- The desired m/z resolution for binning.use mscore::data::spectrum::IndexedMzSpectrum;
+
+let spectrum = IndexedMzSpectrum::new(vec![1000, 2000], vec![100.42, 100.43], vec![50.0, 60.0]);
+let binned_spectrum = spectrum.to_resolution(1);
+
+assert_eq!(binned_spectrum.mz_spectrum.mz, vec![100.4]);
+assert_eq!(binned_spectrum.mz_spectrum.intensity, vec![110.0]);
+assert_eq!(binned_spectrum.index, vec![1500]);
source
. Read moreclone_to_uninit
)self
into a Left
variant of Either<Self, Self>
+if into_left
is true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
into a Left
variant of Either<Self, Self>
+if into_left(&self)
returns true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.self
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.pub struct IndexedMzSpectrumVectorized {
+ pub index: Vec<i32>,
+ pub mz_vector: MzSpectrumVectorized,
+}
index: Vec<i32>
§mz_vector: MzSpectrumVectorized
source
. Read moreclone_to_uninit
)self
into a Left
variant of Either<Self, Self>
+if into_left
is true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
into a Left
variant of Either<Self, Self>
+if into_left(&self)
returns true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.self
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.pub struct MzSpectrum {
+ pub mz: Vec<f64>,
+ pub intensity: Vec<f64>,
+}
Represents a mass spectrum with associated m/z values and intensities.
+mz: Vec<f64>
§intensity: Vec<f64>
Constructs a new MzSpectrum
.
mz
- A vector of m/z values.intensity
- A vector of intensity values corresponding to the m/z values.Panics if the lengths of mz
and intensity
are not the same. (actually, it doesn’t at the moment, planning on adding this later)
let spectrum = MzSpectrum::new(vec![100.0, 200.0], vec![10.0, 20.0]);
+assert_eq!(spectrum.mz, vec![100.0, 200.0]);
+assert_eq!(spectrum.intensity, vec![10.0, 20.0]);
Splits the spectrum into a collection of windows based on m/z values.
+This function divides the spectrum into smaller spectra (windows) based on a specified window length. +Each window contains peaks from the original spectrum that fall within the m/z range of that window.
+window_length
: The size (in terms of m/z values) of each window.
overlapping
: If true
, each window will overlap with its neighboring windows by half of the window_length
.
+This means that a peak may belong to multiple windows. If false
, windows do not overlap.
min_peaks
: The minimum number of peaks a window must have to be retained in the result.
min_intensity
: The minimum intensity value a window must have (in its highest intensity peak) to be retained in the result.
A BTreeMap
where the keys represent the window indices and the values are the spectra (MzSpectrum
) within those windows.
+Windows that do not meet the criteria of having at least min_peaks
peaks or a highest intensity peak
+greater than or equal to min_intensity
are discarded.
let spectrum = MzSpectrum::new(vec![100.0, 101.0, 102.5, 103.0], vec![10.0, 20.0, 30.0, 40.0]);
+let windowed_spectrum = spectrum.to_windows(1.0, false, 1, 10.0);
+assert!(windowed_spectrum.contains_key(&100));
+assert!(windowed_spectrum.contains_key(&102));
Combines two MzSpectrum
instances by summing up the intensities of matching m/z values.
Each m/z value is quantized to retain at least 6 decimals. If two spectra have m/z values +that quantize to the same integer value, their intensities are summed.
+let spectrum1 = MzSpectrum { mz: vec![100.523, 101.923], intensity: vec![10.0, 20.0] };
+let spectrum2 = MzSpectrum { mz: vec![101.235, 105.112], intensity: vec![15.0, 30.0] };
+
+let combined = spectrum1 + spectrum2;
+
+assert_eq!(combined.mz, vec![100.523, 101.235, 101.923, 105.112]);
+assert_eq!(combined.intensity, vec![10.0, 15.0, 20.0, 30.0]);
+
operator.source
. Read moreFormats the MzSpectrum
for display.
Bins the spectrum’s m/z values to a given resolution and sums the intensities.
+resolution
- The desired resolution in terms of decimal places. For instance, a resolution of 2
+would bin m/z values to two decimal places.A new MzSpectrum
where m/z values are binned according to the given resolution.
let spectrum = MzSpectrum::new(vec![100.123, 100.121, 100.131], vec![10.0, 20.0, 30.0]);
+let binned_spectrum_1 = spectrum.to_resolution(1);
+let binned_spectrum_2 = spectrum.to_resolution(2);
+/// assert_eq!(binned_spectrum_2.mz, vec![100.1]);
+assert_eq!(binned_spectrum_1.intensity, vec![60.0]);
+assert_eq!(binned_spectrum_2.mz, vec![100.12, 100.13]);
+assert_eq!(binned_spectrum_2.intensity, vec![30.0, 30.0]);
Convert the MzSpectrum
to a MzSpectrumVectorized
using the given resolution for binning.
After binning to the desired resolution, the binned m/z values are translated into integer indices.
+clone_to_uninit
)self
into a Left
variant of Either<Self, Self>
+if into_left
is true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
into a Left
variant of Either<Self, Self>
+if into_left(&self)
returns true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.self
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.pub struct MzSpectrumVectorized {
+ pub resolution: i32,
+ pub indices: Vec<i32>,
+ pub values: Vec<f64>,
+}
resolution: i32
§indices: Vec<i32>
§values: Vec<f64>
source
. Read moreConvert the MzSpectrum
to a MzSpectrumVectorized
using the given resolution for binning.
After binning to the desired resolution, the binned m/z values are translated into integer indices.
+clone_to_uninit
)self
into a Left
variant of Either<Self, Self>
+if into_left
is true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
into a Left
variant of Either<Self, Self>
+if into_left(&self)
returns true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.self
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.pub trait ToResolution {
+ // Required method
+ fn to_resolution(&self, resolution: i32) -> Self;
+}
Represents a vectorized mass spectrum.
+Convert a given TimsFrame to a vector of TimsSpectrum.
+resolution
- The resolution to which the m/z values should be rounded.use mscore::data::spectrum::MsType;
+use mscore::timstof::frame::TimsFrame;
+use mscore::timstof::spectrum::TimsSpectrum;
+use mscore::data::spectrum::IndexedMzSpectrum;
+use mscore::data::spectrum::ToResolution;
+
+let frame = TimsFrame::new(1, MsType::Precursor, 100.0, vec![1, 2], vec![0.1, 0.2], vec![1000, 2000], vec![100.5, 200.5], vec![50.0, 60.0]);
+let low_res_frame = frame.to_resolution(1);
pub trait Vectorized<T> {
+ // Required method
+ fn vectorized(&self, resolution: i32) -> T;
+}
Vectorized representation for Structs holding m/z values and intensities.
+pub enum SourceType {
+ Signal,
+ ChemicalNoise,
+ RandomNoise,
+ Unknown,
+}
source
. Read moreclone_to_uninit
)self
into a Left
variant of Either<Self, Self>
+if into_left
is true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
into a Left
variant of Either<Self, Self>
+if into_left(&self)
returns true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.self
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.pub struct ContributionSource {
+ pub intensity_contribution: f64,
+ pub source_type: SourceType,
+ pub signal_attributes: Option<SignalAttributes>,
+}
intensity_contribution: f64
§source_type: SourceType
§signal_attributes: Option<SignalAttributes>
source
. Read moreclone_to_uninit
)self
into a Left
variant of Either<Self, Self>
+if into_left
is true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
into a Left
variant of Either<Self, Self>
+if into_left(&self)
returns true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.self
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.pub struct MzSpectrumAnnotated {
+ pub mz: Vec<f64>,
+ pub intensity: Vec<f64>,
+ pub annotations: Vec<PeakAnnotation>,
+}
mz: Vec<f64>
§intensity: Vec<f64>
§annotations: Vec<PeakAnnotation>
source
. Read moreclone_to_uninit
)self
into a Left
variant of Either<Self, Self>
+if into_left
is true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
into a Left
variant of Either<Self, Self>
+if into_left(&self)
returns true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.self
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.pub struct MzSpectrumAnnotatedVectorized {
+ pub indices: Vec<u32>,
+ pub values: Vec<f64>,
+ pub annotations: Vec<PeakAnnotation>,
+}
indices: Vec<u32>
§values: Vec<f64>
§annotations: Vec<PeakAnnotation>
source
. Read moreclone_to_uninit
)self
into a Left
variant of Either<Self, Self>
+if into_left
is true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
into a Left
variant of Either<Self, Self>
+if into_left(&self)
returns true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.self
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.pub struct PeakAnnotation {
+ pub contributions: Vec<ContributionSource>,
+}
contributions: Vec<ContributionSource>
source
. Read moreclone_to_uninit
)self
into a Left
variant of Either<Self, Self>
+if into_left
is true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
into a Left
variant of Either<Self, Self>
+if into_left(&self)
returns true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.self
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.pub struct SignalAttributes {
+ pub charge_state: i32,
+ pub peptide_id: i32,
+ pub isotope_peak: i32,
+ pub description: Option<String>,
+}
charge_state: i32
§peptide_id: i32
§isotope_peak: i32
§description: Option<String>
source
. Read moreclone_to_uninit
)self
into a Left
variant of Either<Self, Self>
+if into_left
is true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
into a Left
variant of Either<Self, Self>
+if into_left(&self)
returns true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.self
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.pub struct TimsFrameAnnotated {
+ pub frame_id: i32,
+ pub retention_time: f64,
+ pub ms_type: MsType,
+ pub tof: Vec<u32>,
+ pub mz: Vec<f64>,
+ pub scan: Vec<u32>,
+ pub inv_mobility: Vec<f64>,
+ pub intensity: Vec<f64>,
+ pub annotations: Vec<PeakAnnotation>,
+}
frame_id: i32
§retention_time: f64
§ms_type: MsType
§tof: Vec<u32>
§mz: Vec<f64>
§scan: Vec<u32>
§inv_mobility: Vec<f64>
§intensity: Vec<f64>
§annotations: Vec<PeakAnnotation>
source
. Read moreclone_to_uninit
)self
into a Left
variant of Either<Self, Self>
+if into_left
is true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
into a Left
variant of Either<Self, Self>
+if into_left(&self)
returns true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.self
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.pub struct TimsSpectrumAnnotated {
+ pub frame_id: i32,
+ pub scan: u32,
+ pub retention_time: f64,
+ pub mobility: f64,
+ pub ms_type: MsType,
+ pub tof: Vec<u32>,
+ pub spectrum: MzSpectrumAnnotated,
+}
frame_id: i32
§scan: u32
§retention_time: f64
§mobility: f64
§ms_type: MsType
§tof: Vec<u32>
§spectrum: MzSpectrumAnnotated
source
. Read moreclone_to_uninit
)self
into a Left
variant of Either<Self, Self>
+if into_left
is true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
into a Left
variant of Either<Self, Self>
+if into_left(&self)
returns true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.self
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.pub struct TimsTofCollisionEnergyDIA { /* private fields */ }
self
into a Left
variant of Either<Self, Self>
+if into_left
is true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
into a Left
variant of Either<Self, Self>
+if into_left(&self)
returns true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.self
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.pub struct ImsFrame {
+ pub retention_time: f64,
+ pub mobility: Vec<f64>,
+ pub mz: Vec<f64>,
+ pub intensity: Vec<f64>,
+}
retention_time: f64
§mobility: Vec<f64>
§mz: Vec<f64>
§intensity: Vec<f64>
Creates a new ImsFrame
instance.
retention_time
- The retention time in seconds.mobility
- A vector of inverse ion mobilities.mz
- A vector of m/z values.intensity
- A vector of intensity values.use mscore::timstof::frame::ImsFrame;
+
+let frame = ImsFrame::new(100.0, vec![0.1, 0.2], vec![100.5, 200.5], vec![50.0, 60.0]);
clone_to_uninit
)self
into a Left
variant of Either<Self, Self>
+if into_left
is true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
into a Left
variant of Either<Self, Self>
+if into_left(&self)
returns true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.self
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.pub struct ImsFrameVectorized {
+ pub retention_time: f64,
+ pub mobility: Vec<f64>,
+ pub indices: Vec<i32>,
+ pub values: Vec<f64>,
+ pub resolution: i32,
+}
retention_time: f64
§mobility: Vec<f64>
§indices: Vec<i32>
§values: Vec<f64>
§resolution: i32
source
. Read moreclone_to_uninit
)self
into a Left
variant of Either<Self, Self>
+if into_left
is true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
into a Left
variant of Either<Self, Self>
+if into_left(&self)
returns true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.self
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.pub struct RawTimsFrame {
+ pub frame_id: i32,
+ pub retention_time: f64,
+ pub ms_type: MsType,
+ pub scan: Vec<u32>,
+ pub tof: Vec<u32>,
+ pub intensity: Vec<f64>,
+}
frame_id: i32
§retention_time: f64
§ms_type: MsType
§scan: Vec<u32>
§tof: Vec<u32>
§intensity: Vec<f64>
source
. Read moreclone_to_uninit
)self
into a Left
variant of Either<Self, Self>
+if into_left
is true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
into a Left
variant of Either<Self, Self>
+if into_left(&self)
returns true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.self
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.pub struct TimsFrame {
+ pub frame_id: i32,
+ pub ms_type: MsType,
+ pub scan: Vec<i32>,
+ pub tof: Vec<i32>,
+ pub ims_frame: ImsFrame,
+}
frame_id: i32
§ms_type: MsType
§scan: Vec<i32>
§tof: Vec<i32>
§ims_frame: ImsFrame
Creates a new TimsFrame
instance.
frame_id
- index of frame in TDF raw file.ms_type
- The type of frame.retention_time
- The retention time in seconds.scan
- A vector of scan IDs.mobility
- A vector of inverse ion mobilities.tof
- A vector of time-of-flight values.mz
- A vector of m/z values.intensity
- A vector of intensity values.use mscore::data::spectrum::MsType;
+use mscore::timstof::frame::TimsFrame;
+use mscore::timstof::frame::ImsFrame;
+
+let frame = TimsFrame::new(1, MsType::Precursor, 100.0, vec![1, 2], vec![0.1, 0.2], vec![1000, 2000], vec![100.5, 200.5], vec![50.0, 60.0]);
Convert a given TimsFrame to an ImsFrame.
+use mscore::data::spectrum::MsType;
+use mscore::timstof::frame::TimsFrame;
+
+let frame = TimsFrame::new(1, MsType::Precursor, 100.0, vec![1, 2], vec![0.1, 0.2], vec![1000, 2000], vec![100.5, 200.5], vec![50.0, 60.0]);
+let ims_spectrum = frame.get_ims_frame();
Convert a given TimsFrame to a vector of TimsSpectrum.
+use mscore::data::spectrum::MsType;
+use mscore::timstof::frame::TimsFrame;
+
+let frame = TimsFrame::new(1, MsType::Precursor, 100.0, vec![1, 2], vec![0.1, 0.2], vec![1000, 2000], vec![100.5, 200.5], vec![50.0, 60.0]);
+let tims_spectra = frame.to_tims_spectra();
Filter a given TimsFrame by m/z, scan, and intensity.
+mz_min
- The minimum m/z value.mz_max
- The maximum m/z value.scan_min
- The minimum scan value.scan_max
- The maximum scan value.intensity_min
- The minimum intensity value.intensity_max
- The maximum intensity value.use mscore::data::spectrum::MsType;
+use mscore::timstof::frame::TimsFrame;
+
+let frame = TimsFrame::new(1, MsType::Precursor, 100.0, vec![1, 2], vec![0.1, 0.2], vec![1000, 2000], vec![100.5, 200.5], vec![50.0, 60.0]);
+let filtered_frame = frame.filter_ranged(100.0, 200.0, 1, 2, 0.0, 1.6, 50.0, 60.0);
Calculate the weighted mean and variance of inv_mob
values based on their intensities.
Convert a given TimsFrame to a vector of TimsSpectrum.
+resolution
- The resolution to which the m/z values should be rounded.use mscore::data::spectrum::MsType;
+use mscore::timstof::frame::TimsFrame;
+use mscore::timstof::spectrum::TimsSpectrum;
+use mscore::data::spectrum::IndexedMzSpectrum;
+use mscore::data::spectrum::ToResolution;
+
+let frame = TimsFrame::new(1, MsType::Precursor, 100.0, vec![1, 2], vec![0.1, 0.2], vec![1000, 2000], vec![100.5, 200.5], vec![50.0, 60.0]);
+let low_res_frame = frame.to_resolution(1);
clone_to_uninit
)self
into a Left
variant of Either<Self, Self>
+if into_left
is true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
into a Left
variant of Either<Self, Self>
+if into_left(&self)
returns true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.self
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.pub struct TimsFrameVectorized {
+ pub frame_id: i32,
+ pub ms_type: MsType,
+ pub scan: Vec<i32>,
+ pub tof: Vec<i32>,
+ pub ims_frame: ImsFrameVectorized,
+}
frame_id: i32
§ms_type: MsType
§scan: Vec<i32>
§tof: Vec<i32>
§ims_frame: ImsFrameVectorized
source
. Read moreclone_to_uninit
)self
into a Left
variant of Either<Self, Self>
+if into_left
is true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
into a Left
variant of Either<Self, Self>
+if into_left(&self)
returns true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.self
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.pub fn apply_transmission(
+ midpoint: f64,
+ window_length: f64,
+ k: f64,
+ mz: Vec<f64>,
+) -> Vec<f64>
Apply ion transmission function to mz values
+Arguments:
+midpoint
- center of the stepwindow_length
- length of the stepk
- steepness of the stepmz
- mz valuesReturns:
+Vec<f64>
- transmission probability for each mz valueuse mscore::timstof::quadrupole::apply_transmission;
+
+let mz = vec![100.0, 150.0, 170.0];
+let transmission = apply_transmission(150.0, 50.0, 1.0, mz).iter().map(
+|&x| (x * 100.0).round() / 100.0).collect::<Vec<f64>>();
+assert_eq!(transmission, vec![0.0, 1.0, 1.0]);
pub fn ion_transition_function_midpoint(
+ midpoint: f64,
+ window_length: f64,
+ k: f64,
+) -> impl Fn(Vec<f64>) -> Vec<f64>
Ion transmission function for quadrupole selection simulation
+Arguments:
+midpoint
- center of the stepwindow_length
- length of the stepk
- steepness of the stepReturns:
+impl Fn(Vec<f64>) -> Vec<f64>
- ion transmission functionuse mscore::timstof::quadrupole::ion_transition_function_midpoint;
+
+let ion_transmission = ion_transition_function_midpoint(150.0, 50.0, 1.0);
+let mz = vec![100.0, 150.0, 170.0];
+let transmission = ion_transmission(mz).iter().map(
+|&x| (x * 100.0).round() / 100.0).collect::<Vec<f64>>();
+assert_eq!(transmission, vec![0.0, 1.0, 1.0]);
pub fn smooth_step(x: &Vec<f64>, up_start: f64, up_end: f64, k: f64) -> Vec<f64>
Sigmoid step function for quadrupole selection simulation
+Arguments:
+x
- mz valuesup_start
- start of the stepup_end
- end of the stepk
- steepness of the stepReturns:
+Vec<f64>
- transmission probability for each mz valueuse mscore::timstof::quadrupole::smooth_step;
+
+let mz = vec![100.0, 200.0, 300.0];
+let transmission = smooth_step(&mz, 150.0, 250.0, 0.5).iter().map(
+|&x| (x * 100.0).round() / 100.0).collect::<Vec<f64>>();
+assert_eq!(transmission, vec![0.0, 0.5, 1.0]);
pub fn smooth_step_up_down(
+ x: &Vec<f64>,
+ up_start: f64,
+ up_end: f64,
+ down_start: f64,
+ down_end: f64,
+ k: f64,
+) -> Vec<f64>
Sigmoide step function for quadrupole selection simulation
+Arguments:
+x
- mz valuesup_start
- start of the step upup_end
- end of the step updown_start
- start of the step downdown_end
- end of the step downk
- steepness of the stepReturns:
+Vec<f64>
- transmission probability for each mz valueuse mscore::timstof::quadrupole::smooth_step_up_down;
+
+let mz = vec![100.0, 200.0, 300.0];
+let transmission = smooth_step_up_down(&mz, 150.0, 200.0, 250.0, 300.0, 0.5).iter().map(
+|&x| (x * 100.0).round() / 100.0).collect::<Vec<f64>>();
+assert_eq!(transmission, vec![0.0, 1.0, 0.0]);
pub struct TimsTransmissionDIA { /* private fields */ }
source
. Read moreclone_to_uninit
)self
into a Left
variant of Either<Self, Self>
+if into_left
is true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
into a Left
variant of Either<Self, Self>
+if into_left(&self)
returns true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.self
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.pub trait IonTransmission {
+ // Required method
+ fn apply_transmission(
+ &self,
+ frame_id: i32,
+ scan_id: i32,
+ mz: &Vec<f64>,
+ ) -> Vec<f64>;
+
+ // Provided methods
+ fn transmit_spectrum(
+ &self,
+ frame_id: i32,
+ scan_id: i32,
+ spectrum: MzSpectrum,
+ min_probability: Option<f64>,
+ ) -> MzSpectrum { ... }
+ fn transmit_annotated_spectrum(
+ &self,
+ frame_id: i32,
+ scan_id: i32,
+ spectrum: MzSpectrumAnnotated,
+ min_probability: Option<f64>,
+ ) -> MzSpectrumAnnotated { ... }
+ fn transmit_ion(
+ &self,
+ frame_ids: Vec<i32>,
+ scan_ids: Vec<i32>,
+ spec: MzSpectrum,
+ min_proba: Option<f64>,
+ ) -> Vec<Vec<MzSpectrum>> { ... }
+ fn get_transmission_set(
+ &self,
+ frame_id: i32,
+ scan_id: i32,
+ mz: &Vec<f64>,
+ min_proba: Option<f64>,
+ ) -> HashSet<usize> { ... }
+ fn all_transmitted(
+ &self,
+ frame_id: i32,
+ scan_id: i32,
+ mz: &Vec<f64>,
+ min_proba: Option<f64>,
+ ) -> bool { ... }
+ fn is_transmitted(
+ &self,
+ frame_id: i32,
+ scan_id: i32,
+ mz: f64,
+ min_proba: Option<f64>,
+ ) -> bool { ... }
+ fn any_transmitted(
+ &self,
+ frame_id: i32,
+ scan_id: i32,
+ mz: &Vec<f64>,
+ min_proba: Option<f64>,
+ ) -> bool { ... }
+ fn transmit_tims_frame(
+ &self,
+ frame: &TimsFrame,
+ min_probability: Option<f64>,
+ ) -> TimsFrame { ... }
+ fn transmit_tims_frame_annotated(
+ &self,
+ frame: &TimsFrameAnnotated,
+ min_probability: Option<f64>,
+ ) -> TimsFrameAnnotated { ... }
+ fn isotopes_transmitted(
+ &self,
+ frame_id: i32,
+ scan_id: i32,
+ mz_mono: f64,
+ isotopic_envelope: &Vec<f64>,
+ min_probability: Option<f64>,
+ ) -> (f64, Vec<(f64, f64)>) { ... }
+}
Transmit a spectrum given a frame id and scan id
+Arguments:
+frame_id
- frame idscan_id
- scan idspectrum
- MzSpectrummin_probability
- minimum probability for transmissionReturns:
+MzSpectrum
- transmitted spectrumTransmit an annotated spectrum given a frame id and scan id
+Arguments:
+frame_id
- frame idscan_id
- scan idspectrum
- MzSpectrumAnnotatedmin_probability
- minimum probability for transmissionReturns:
+MzSpectrumAnnotated
- transmitted spectrumGet all ions in a frame that are transmitted
+Arguments:
+frame_id
- frame idscan_id
- scan idmz
- mz valuesmin_proba
- minimum probability for transmissionReturns:
+HashSet<usize>
- indices of transmitted mz valuesCheck if all mz values in a given collection are transmitted
+Arguments:
+frame_id
- frame idscan_id
- scan idmz
- mz valuesmin_proba
- minimum probability for transmissionReturns:
+bool
- true if all mz values are transmittedCheck if a single mz value is transmitted
+Arguments:
+frame_id
- frame idscan_id
- scan idmz
- mz valuemin_proba
- minimum probability for transmissionReturns:
+bool
- true if mz value is transmittedCheck if any mz value is transmitted, can be used to check if one peak of isotopic envelope is transmitted
+Arguments:
+frame_id
- frame idscan_id
- scan idmz
- mz valuesmin_proba
- minimum probability for transmissionReturns:
+bool
- true if any mz value is transmittedTransmit a frame given a diaPASEF transmission layout
+Transmit a frame given a diaPASEF transmission layout with annotations
+Arguments:
+frame
- TimsFrameAnnotatedmin_probability
- minimum probability for transmissionReturns:
+TimsFrameAnnotated
- transmitted framepub struct TimsPlane {
+ pub tof_mean: f64,
+ pub tof_std: f64,
+ pub mz_mean: f64,
+ pub mz_std: f64,
+ pub frame_id: Vec<i32>,
+ pub retention_time: Vec<f64>,
+ pub scan: Vec<i32>,
+ pub mobility: Vec<f64>,
+ pub intensity: Vec<f64>,
+}
tof_mean: f64
§tof_std: f64
§mz_mean: f64
§mz_std: f64
§frame_id: Vec<i32>
§retention_time: Vec<f64>
§scan: Vec<i32>
§mobility: Vec<f64>
§intensity: Vec<f64>
clone_to_uninit
)self
into a Left
variant of Either<Self, Self>
+if into_left
is true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
into a Left
variant of Either<Self, Self>
+if into_left(&self)
returns true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.self
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.pub struct TimsSlice {
+ pub frames: Vec<TimsFrame>,
+}
frames: Vec<TimsFrame>
Filter the TimsSlice by m/z, scan, and intensity
+mz_min
- The minimum m/z valuemz_max
- The maximum m/z valuescan_min
- The minimum scan valuescan_max
- The maximum scan valueintensity_min
- The minimum intensity valueintensity_max
- The maximum intensity valuenum_threads
- The number of threads to useTimsSlice
- A TimsSlice containing only the TimsFrames that pass the filteruse mscore::timstof::slice::TimsSlice;
+
+let slice = TimsSlice::new(vec![]);
+let filtered_slice = slice.filter_ranged(400.0, 2000.0, 0, 1000, 0.0, 100000.0, 0.0, 1.6, 4);
clone_to_uninit
)self
into a Left
variant of Either<Self, Self>
+if into_left
is true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
into a Left
variant of Either<Self, Self>
+if into_left(&self)
returns true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.self
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.pub struct TimsSliceFlat {
+ pub frame_ids: Vec<i32>,
+ pub scans: Vec<i32>,
+ pub tofs: Vec<i32>,
+ pub retention_times: Vec<f64>,
+ pub mobilities: Vec<f64>,
+ pub mzs: Vec<f64>,
+ pub intensities: Vec<f64>,
+}
frame_ids: Vec<i32>
§scans: Vec<i32>
§tofs: Vec<i32>
§retention_times: Vec<f64>
§mobilities: Vec<f64>
§mzs: Vec<f64>
§intensities: Vec<f64>
source
. Read moreclone_to_uninit
)self
into a Left
variant of Either<Self, Self>
+if into_left
is true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
into a Left
variant of Either<Self, Self>
+if into_left(&self)
returns true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.self
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.pub struct TimsSliceVectorized {
+ pub frames: Vec<TimsFrameVectorized>,
+ pub frame_map: BTreeMap<u32, (Vec<u32>, Vec<u32>, Vec<f32>)>,
+}
frames: Vec<TimsFrameVectorized>
§frame_map: BTreeMap<u32, (Vec<u32>, Vec<u32>, Vec<f32>)>
source
. Read moreclone_to_uninit
)self
into a Left
variant of Either<Self, Self>
+if into_left
is true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
into a Left
variant of Either<Self, Self>
+if into_left(&self)
returns true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.self
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.pub struct TimsSliceVectorizedFlat {
+ pub frame_ids: Vec<i32>,
+ pub scans: Vec<i32>,
+ pub tofs: Vec<i32>,
+ pub retention_times: Vec<f64>,
+ pub mobilities: Vec<f64>,
+ pub indices: Vec<i32>,
+ pub intensities: Vec<f64>,
+}
frame_ids: Vec<i32>
§scans: Vec<i32>
§tofs: Vec<i32>
§retention_times: Vec<f64>
§mobilities: Vec<f64>
§indices: Vec<i32>
§intensities: Vec<f64>
source
. Read moreclone_to_uninit
)self
into a Left
variant of Either<Self, Self>
+if into_left
is true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
into a Left
variant of Either<Self, Self>
+if into_left(&self)
returns true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.self
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.pub struct TimsSpectrum {
+ pub frame_id: i32,
+ pub scan: i32,
+ pub retention_time: f64,
+ pub mobility: f64,
+ pub ms_type: MsType,
+ pub spectrum: IndexedMzSpectrum,
+}
frame_id: i32
§scan: i32
§retention_time: f64
§mobility: f64
§ms_type: MsType
§spectrum: IndexedMzSpectrum
Creates a new TimsSpectrum
instance.
frame_id
- index of frame in TDF raw file.scan_id
- index of scan in TDF raw file.retention_time
- The retention time in seconds.mobility
- The inverse ion mobility.spectrum
- A TOFMzSpectrum
instance.use mscore::data::spectrum::{IndexedMzSpectrum, MsType};
+use mscore::timstof::spectrum::{TimsSpectrum};
+
+let spectrum = TimsSpectrum::new(1, 1, 100.0, 0.1, MsType::FragmentDda, IndexedMzSpectrum::new(vec![1000, 2000], vec![100.5, 200.5], vec![50.0, 60.0]));
+
operator.+
operation. Read moresource
. Read moreclone_to_uninit
)self
into a Left
variant of Either<Self, Self>
+if into_left
is true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
into a Left
variant of Either<Self, Self>
+if into_left(&self)
returns true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.self
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.pub struct TimsSpectrumVectorized {
+ pub frame_id: i32,
+ pub scan: i32,
+ pub retention_time: f64,
+ pub mobility: f64,
+ pub ms_type: MsType,
+ pub vector: IndexedMzSpectrumVectorized,
+}
frame_id: i32
§scan: i32
§retention_time: f64
§mobility: f64
§ms_type: MsType
§vector: IndexedMzSpectrumVectorized
source
. Read moreclone_to_uninit
)self
into a Left
variant of Either<Self, Self>
+if into_left
is true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
into a Left
variant of Either<Self, Self>
+if into_left(&self)
returns true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.self
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.pub enum AcquisitionMode {
+ PRECURSOR,
+ DDA,
+ DIA,
+ Unknown,
+}
source
. Read moreclone_to_uninit
)self
into a Left
variant of Either<Self, Self>
+if into_left
is true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
into a Left
variant of Either<Self, Self>
+if into_left(&self)
returns true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.self
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.pub struct TimsDataset {
+ pub loader: TimsDataLoader,
+}
loader: TimsDataLoader
self
into a Left
variant of Either<Self, Self>
+if into_left
is true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
into a Left
variant of Either<Self, Self>
+if into_left(&self)
returns true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.self
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.pub struct PASEFDDAFragment {
+ pub frame_id: u32,
+ pub precursor_id: u32,
+ pub collision_energy: f64,
+ pub selected_fragment: TimsFrame,
+}
frame_id: u32
§precursor_id: u32
§collision_energy: f64
§selected_fragment: TimsFrame
source
. Read moreclone_to_uninit
)self
into a Left
variant of Either<Self, Self>
+if into_left
is true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
into a Left
variant of Either<Self, Self>
+if into_left(&self)
returns true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.self
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.pub struct TimsDatasetDDA {
+ pub loader: TimsDataLoader,
+}
loader: TimsDataLoader
Get the fragment spectra for all PASEF selected precursors
+self
into a Left
variant of Either<Self, Self>
+if into_left
is true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
into a Left
variant of Either<Self, Self>
+if into_left(&self)
returns true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.self
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.pub struct TimsDatasetDIA {
+ pub loader: TimsDataLoader,
+ pub global_meta_data: GlobalMetaData,
+ pub meta_data: Vec<FrameMeta>,
+ pub dia_ms_mis_info: Vec<DiaMsMisInfo>,
+ pub dia_ms_ms_windows: Vec<DiaMsMsWindow>,
+}
loader: TimsDataLoader
§global_meta_data: GlobalMetaData
§meta_data: Vec<FrameMeta>
§dia_ms_mis_info: Vec<DiaMsMisInfo>
§dia_ms_ms_windows: Vec<DiaMsMsWindow>
self
into a Left
variant of Either<Self, Self>
+if into_left
is true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
into a Left
variant of Either<Self, Self>
+if into_left(&self)
returns true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.self
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.pub enum TimsDataLoader {
+ InMemory(TimsInMemoryLoader),
+ Lazy(TimsLazyLoder),
+}
self
into a Left
variant of Either<Self, Self>
+if into_left
is true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
into a Left
variant of Either<Self, Self>
+if into_left(&self)
returns true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.self
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.pub enum TimsIndexConverter {
+ Simple(SimpleIndexConverter),
+ BrukerLib(BrukerLibTimsDataConverter),
+}
self
into a Left
variant of Either<Self, Self>
+if into_left
is true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
into a Left
variant of Either<Self, Self>
+if into_left(&self)
returns true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.self
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.pub struct BrukerLibTimsDataConverter {
+ pub bruker_lib: BrukerTimsDataLibrary,
+}
bruker_lib: BrukerTimsDataLibrary
self
into a Left
variant of Either<Self, Self>
+if into_left
is true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
into a Left
variant of Either<Self, Self>
+if into_left(&self)
returns true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.self
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.pub struct SimpleIndexConverter {
+ pub tof_intercept: f64,
+ pub tof_slope: f64,
+ pub scan_intercept: f64,
+ pub scan_slope: f64,
+}
tof_intercept: f64
§tof_slope: f64
§scan_intercept: f64
§scan_slope: f64
self
into a Left
variant of Either<Self, Self>
+if into_left
is true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
into a Left
variant of Either<Self, Self>
+if into_left(&self)
returns true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.self
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.pub struct TimsInMemoryLoader {
+ pub raw_data_layout: TimsRawDataLayout,
+ pub index_converter: TimsIndexConverter,
+ /* private fields */
+}
raw_data_layout: TimsRawDataLayout
§index_converter: TimsIndexConverter
self
into a Left
variant of Either<Self, Self>
+if into_left
is true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
into a Left
variant of Either<Self, Self>
+if into_left(&self)
returns true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.self
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.pub struct TimsLazyLoder {
+ pub raw_data_layout: TimsRawDataLayout,
+ pub index_converter: TimsIndexConverter,
+}
raw_data_layout: TimsRawDataLayout
§index_converter: TimsIndexConverter
self
into a Left
variant of Either<Self, Self>
+if into_left
is true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
into a Left
variant of Either<Self, Self>
+if into_left(&self)
returns true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.self
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.pub struct TimsRawDataLayout {
+ pub raw_data_path: String,
+ pub global_meta_data: GlobalMetaData,
+ pub frame_meta_data: Vec<FrameMeta>,
+ pub max_scan_count: i64,
+ pub frame_id_ptr: Vec<i64>,
+ pub tims_offset_values: Vec<i64>,
+ pub acquisition_mode: AcquisitionMode,
+}
raw_data_path: String
§global_meta_data: GlobalMetaData
§frame_meta_data: Vec<FrameMeta>
§max_scan_count: i64
§frame_id_ptr: Vec<i64>
§tims_offset_values: Vec<i64>
§acquisition_mode: AcquisitionMode
self
into a Left
variant of Either<Self, Self>
+if into_left
is true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
into a Left
variant of Either<Self, Self>
+if into_left(&self)
returns true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.self
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.pub trait IndexConverter {
+ // Required methods
+ fn tof_to_mz(&self, frame_id: u32, tof_values: &Vec<u32>) -> Vec<f64>;
+ fn mz_to_tof(&self, frame_id: u32, mz_values: &Vec<f64>) -> Vec<u32>;
+ fn scan_to_inverse_mobility(
+ &self,
+ frame_id: u32,
+ scan_values: &Vec<u32>,
+ ) -> Vec<f64>;
+ fn inverse_mobility_to_scan(
+ &self,
+ frame_id: u32,
+ inverse_mobility_values: &Vec<f64>,
+ ) -> Vec<u32>;
+}
pub trait TimsData {
+ // Required methods
+ fn get_frame(&self, frame_id: u32) -> TimsFrame;
+ fn get_raw_frame(&self, frame_id: u32) -> RawTimsFrame;
+ fn get_slice(&self, frame_ids: Vec<u32>, num_threads: usize) -> TimsSlice;
+ fn get_acquisition_mode(&self) -> AcquisitionMode;
+ fn get_frame_count(&self) -> i32;
+ fn get_data_path(&self) -> &str;
+}
pub fn read_global_meta_sql(
+ bruker_d_folder_name: &str,
+) -> Result<GlobalMetaData, Box<dyn Error>>
pub struct DDAFragmentInfo {
+ pub frame_id: i64,
+ pub scan_begin: i64,
+ pub scan_end: i64,
+ pub isolation_mz: f64,
+ pub isolation_width: f64,
+ pub collision_energy: f64,
+ pub precursor_id: i64,
+}
frame_id: i64
§scan_begin: i64
§scan_end: i64
§isolation_mz: f64
§isolation_width: f64
§collision_energy: f64
§precursor_id: i64
self
into a Left
variant of Either<Self, Self>
+if into_left
is true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
into a Left
variant of Either<Self, Self>
+if into_left(&self)
returns true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.self
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.pub struct DDAPrecursorMeta {
+ pub precursor_id: i64,
+ pub precursor_mz_highest_intensity: f64,
+ pub precursor_mz_average: f64,
+ pub precursor_mz_monoisotopic: Option<f64>,
+ pub precursor_charge: Option<i64>,
+ pub precursor_average_scan_number: f64,
+ pub precursor_total_intensity: f64,
+ pub precursor_frame_id: i64,
+}
precursor_id: i64
§precursor_mz_highest_intensity: f64
§precursor_mz_average: f64
§precursor_mz_monoisotopic: Option<f64>
§precursor_charge: Option<i64>
§precursor_average_scan_number: f64
§precursor_total_intensity: f64
§precursor_frame_id: i64
source
. Read moreclone_to_uninit
)self
into a Left
variant of Either<Self, Self>
+if into_left
is true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
into a Left
variant of Either<Self, Self>
+if into_left(&self)
returns true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.self
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.pub struct DIAFragmentFrameInfo {}
self
into a Left
variant of Either<Self, Self>
+if into_left
is true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
into a Left
variant of Either<Self, Self>
+if into_left(&self)
returns true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.self
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.pub struct DIAWindowGroupInfo {}
self
into a Left
variant of Either<Self, Self>
+if into_left
is true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
into a Left
variant of Either<Self, Self>
+if into_left(&self)
returns true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.self
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.pub struct DiaMsMisInfo {
+ pub frame_id: u32,
+ pub window_group: u32,
+}
frame_id: u32
§window_group: u32
source
. Read moreclone_to_uninit
)self
into a Left
variant of Either<Self, Self>
+if into_left
is true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
into a Left
variant of Either<Self, Self>
+if into_left(&self)
returns true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.self
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.pub struct DiaMsMsWindow {
+ pub window_group: u32,
+ pub scan_num_begin: u32,
+ pub scan_num_end: u32,
+ pub isolation_mz: f64,
+ pub isolation_width: f64,
+ pub collision_energy: f64,
+}
window_group: u32
§scan_num_begin: u32
§scan_num_end: u32
§isolation_mz: f64
§isolation_width: f64
§collision_energy: f64
source
. Read moreclone_to_uninit
)self
into a Left
variant of Either<Self, Self>
+if into_left
is true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
into a Left
variant of Either<Self, Self>
+if into_left(&self)
returns true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.self
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.pub struct FrameMeta {Show 17 fields
+ pub id: i64,
+ pub time: f64,
+ pub polarity: String,
+ pub scan_mode: i64,
+ pub ms_ms_type: i64,
+ pub tims_id: i64,
+ pub max_intensity: f64,
+ pub sum_intensity: f64,
+ pub num_scans: i64,
+ pub num_peaks: i64,
+ pub mz_calibration: i64,
+ pub t_1: f64,
+ pub t_2: f64,
+ pub tims_calibration: i64,
+ pub property_group: i64,
+ pub accumulation_time: f64,
+ pub ramp_time: f64,
+}
id: i64
§time: f64
§polarity: String
§scan_mode: i64
§ms_ms_type: i64
§tims_id: i64
§max_intensity: f64
§sum_intensity: f64
§num_scans: i64
§num_peaks: i64
§mz_calibration: i64
§t_1: f64
§t_2: f64
§tims_calibration: i64
§property_group: i64
§accumulation_time: f64
§ramp_time: f64
self
into a Left
variant of Either<Self, Self>
+if into_left
is true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
into a Left
variant of Either<Self, Self>
+if into_left(&self)
returns true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.self
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.pub struct GlobalMetaData {Show 13 fields
+ pub schema_type: String,
+ pub schema_version_major: i64,
+ pub schema_version_minor: i64,
+ pub acquisition_software_vendor: String,
+ pub instrument_vendor: String,
+ pub closed_property: i64,
+ pub tims_compression_type: i64,
+ pub max_num_peaks_per_scan: i64,
+ pub mz_acquisition_range_lower: f64,
+ pub mz_acquisition_range_upper: f64,
+ pub one_over_k0_range_lower: f64,
+ pub one_over_k0_range_upper: f64,
+ pub tof_max_index: u32,
+}
schema_type: String
§schema_version_major: i64
§schema_version_minor: i64
§acquisition_software_vendor: String
§instrument_vendor: String
§closed_property: i64
§tims_compression_type: i64
§max_num_peaks_per_scan: i64
§mz_acquisition_range_lower: f64
§mz_acquisition_range_upper: f64
§one_over_k0_range_lower: f64
§one_over_k0_range_upper: f64
§tof_max_index: u32
self
into a Left
variant of Either<Self, Self>
+if into_left
is true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
into a Left
variant of Either<Self, Self>
+if into_left(&self)
returns true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.self
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.pub struct PasefMsMsMeta {
+ pub frame_id: i64,
+ pub scan_num_begin: i64,
+ pub scan_num_end: i64,
+ pub isolation_mz: f64,
+ pub isolation_width: f64,
+ pub collision_energy: f64,
+ pub precursor_id: i64,
+}
frame_id: i64
§scan_num_begin: i64
§scan_num_end: i64
§isolation_mz: f64
§isolation_width: f64
§collision_energy: f64
§precursor_id: i64
source
. Read moreclone_to_uninit
)self
into a Left
variant of Either<Self, Self>
+if into_left
is true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
into a Left
variant of Either<Self, Self>
+if into_left(&self)
returns true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.self
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.pub struct BrukerTimsDataLibrary {
+ pub lib: Library,
+ pub handle: u64,
+}
lib: Library
§handle: u64
self
into a Left
variant of Either<Self, Self>
+if into_left
is true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
into a Left
variant of Either<Self, Self>
+if into_left(&self)
returns true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.self
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.pub fn parse_decompressed_bruker_binary_data(
+ decompressed_bytes: &[u8],
+) -> Result<(Vec<u32>, Vec<u32>, Vec<u32>), Box<dyn Error>>
Parses the decompressed bruker binary data
+decompressed_bytes
- A byte slice that holds the decompressed datascan_indices
- A vector of u32 that holds the scan indicestof_indices
- A vector of u32 that holds the tof indicesintensities
- A vector of u32 that holds the intensitiespub struct FragmentIonSim {
+ pub peptide_id: u32,
+ pub ion_id: u32,
+ pub collision_energy: f64,
+ pub charge: i8,
+ pub indices: Vec<u32>,
+ pub values: Vec<f64>,
+}
peptide_id: u32
§ion_id: u32
§collision_energy: f64
§charge: i8
§indices: Vec<u32>
§values: Vec<f64>
self
into a Left
variant of Either<Self, Self>
+if into_left
is true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
into a Left
variant of Either<Self, Self>
+if into_left(&self)
returns true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.self
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.pub struct FrameToWindowGroupSim {
+ pub frame_id: u32,
+ pub window_group: u32,
+}
frame_id: u32
§window_group: u32
source
. Read moreclone_to_uninit
)self
into a Left
variant of Either<Self, Self>
+if into_left
is true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
into a Left
variant of Either<Self, Self>
+if into_left(&self)
returns true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.self
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.pub struct FramesSim {
+ pub frame_id: u32,
+ pub time: f32,
+ pub ms_type: i64,
+}
frame_id: u32
§time: f32
§ms_type: i64
clone_to_uninit
)self
into a Left
variant of Either<Self, Self>
+if into_left
is true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
into a Left
variant of Either<Self, Self>
+if into_left(&self)
returns true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.self
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.pub struct IonSim {
+ pub ion_id: u32,
+ pub peptide_id: u32,
+ pub sequence: String,
+ pub charge: i8,
+ pub relative_abundance: f32,
+ pub mobility: f32,
+ pub simulated_spectrum: MzSpectrum,
+ pub scan_distribution: SignalDistribution,
+}
ion_id: u32
§peptide_id: u32
§sequence: String
§charge: i8
§relative_abundance: f32
§mobility: f32
§simulated_spectrum: MzSpectrum
§scan_distribution: SignalDistribution
clone_to_uninit
)self
into a Left
variant of Either<Self, Self>
+if into_left
is true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
into a Left
variant of Either<Self, Self>
+if into_left(&self)
returns true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.self
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.pub struct PeptidesSim {Show 14 fields
+ pub protein_id: u32,
+ pub peptide_id: u32,
+ pub sequence: PeptideSequence,
+ pub proteins: String,
+ pub decoy: bool,
+ pub missed_cleavages: i8,
+ pub n_term: Option<bool>,
+ pub c_term: Option<bool>,
+ pub mono_isotopic_mass: f32,
+ pub retention_time: f32,
+ pub events: f32,
+ pub frame_start: u32,
+ pub frame_end: u32,
+ pub frame_distribution: SignalDistribution,
+}
protein_id: u32
§peptide_id: u32
§sequence: PeptideSequence
§proteins: String
§decoy: bool
§missed_cleavages: i8
§n_term: Option<bool>
§c_term: Option<bool>
§mono_isotopic_mass: f32
§retention_time: f32
§events: f32
§frame_start: u32
§frame_end: u32
§frame_distribution: SignalDistribution
source
. Read moreclone_to_uninit
)self
into a Left
variant of Either<Self, Self>
+if into_left
is true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
into a Left
variant of Either<Self, Self>
+if into_left(&self)
returns true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.self
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.pub struct ScansSim {
+ pub scan: u32,
+ pub mobility: f32,
+}
scan: u32
§mobility: f32
clone_to_uninit
)self
into a Left
variant of Either<Self, Self>
+if into_left
is true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
into a Left
variant of Either<Self, Self>
+if into_left(&self)
returns true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.self
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.pub struct SignalDistribution {
+ pub mean: f32,
+ pub variance: f32,
+ pub error: f32,
+ pub occurrence: Vec<u32>,
+ pub abundance: Vec<f32>,
+}
mean: f32
§variance: f32
§error: f32
§occurrence: Vec<u32>
§abundance: Vec<f32>
source
. Read moreclone_to_uninit
)self
into a Left
variant of Either<Self, Self>
+if into_left
is true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
into a Left
variant of Either<Self, Self>
+if into_left(&self)
returns true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.self
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.pub struct WindowGroupSettingsSim {
+ pub window_group: u32,
+ pub scan_start: u32,
+ pub scan_end: u32,
+ pub isolation_mz: f32,
+ pub isolation_width: f32,
+ pub collision_energy: f32,
+}
window_group: u32
§scan_start: u32
§scan_end: u32
§isolation_mz: f32
§isolation_width: f32
§collision_energy: f32
source
. Read moreclone_to_uninit
)self
into a Left
variant of Either<Self, Self>
+if into_left
is true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
into a Left
variant of Either<Self, Self>
+if into_left(&self)
returns true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.self
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.pub struct TimsTofSyntheticsFrameBuilderDIA {
+ pub path: String,
+ pub precursor_frame_builder: TimsTofSyntheticsPrecursorFrameBuilder,
+ pub transmission_settings: TimsTransmissionDIA,
+ pub fragmentation_settings: TimsTofCollisionEnergyDIA,
+ pub fragment_ions: Option<BTreeMap<(u32, i8, i8), (PeptideProductIonSeriesCollection, Vec<MzSpectrum>)>>,
+ pub fragment_ions_annotated: Option<BTreeMap<(u32, i8, i8), (PeptideProductIonSeriesCollection, Vec<MzSpectrumAnnotated>)>>,
+}
path: String
§precursor_frame_builder: TimsTofSyntheticsPrecursorFrameBuilder
§transmission_settings: TimsTransmissionDIA
§fragmentation_settings: TimsTofCollisionEnergyDIA
§fragment_ions: Option<BTreeMap<(u32, i8, i8), (PeptideProductIonSeriesCollection, Vec<MzSpectrum>)>>
§fragment_ions_annotated: Option<BTreeMap<(u32, i8, i8), (PeptideProductIonSeriesCollection, Vec<MzSpectrumAnnotated>)>>
self
into a Left
variant of Either<Self, Self>
+if into_left
is true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
into a Left
variant of Either<Self, Self>
+if into_left(&self)
returns true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.self
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.pub struct TimsTofSyntheticsDataHandle {
+ pub connection: Connection,
+}
connection: Connection
Method to build a map from peptide id to ions
+Method to build a map from peptide id to events (absolute number of events in the simulation)
+Method to build a set of precursor frame ids, can be used to check if a frame is a precursor frame
+self
into a Left
variant of Either<Self, Self>
+if into_left
is true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
into a Left
variant of Either<Self, Self>
+if into_left(&self)
returns true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.self
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.pub struct TimsTofSyntheticsPrecursorFrameBuilder {
+ pub ions: BTreeMap<u32, Vec<IonSim>>,
+ pub peptides: BTreeMap<u32, PeptidesSim>,
+ pub scans: Vec<ScansSim>,
+ pub frames: Vec<FramesSim>,
+ pub precursor_frame_id_set: HashSet<u32>,
+ pub frame_to_abundances: BTreeMap<u32, (Vec<u32>, Vec<f32>)>,
+ pub peptide_to_ions: BTreeMap<u32, (Vec<f32>, Vec<Vec<u32>>, Vec<Vec<f32>>, Vec<i8>, Vec<MzSpectrum>)>,
+ pub frame_to_rt: BTreeMap<u32, f32>,
+ pub scan_to_mobility: BTreeMap<u32, f32>,
+ pub peptide_to_events: BTreeMap<u32, f32>,
+}
ions: BTreeMap<u32, Vec<IonSim>>
§peptides: BTreeMap<u32, PeptidesSim>
§scans: Vec<ScansSim>
§frames: Vec<FramesSim>
§precursor_frame_id_set: HashSet<u32>
§frame_to_abundances: BTreeMap<u32, (Vec<u32>, Vec<f32>)>
§peptide_to_ions: BTreeMap<u32, (Vec<f32>, Vec<Vec<u32>>, Vec<Vec<f32>>, Vec<i8>, Vec<MzSpectrum>)>
§frame_to_rt: BTreeMap<u32, f32>
§scan_to_mobility: BTreeMap<u32, f32>
§peptide_to_events: BTreeMap<u32, f32>
self
into a Left
variant of Either<Self, Self>
+if into_left
is true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
into a Left
variant of Either<Self, Self>
+if into_left(&self)
returns true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.self
from the equivalent element of its
+superset. Read moreself
is actually part of its subset T
(and can be converted to it).self.to_subset
but without any property checks. Always succeeds.self
to the equivalent element of its superset.pub fn reshape_prosit_array(array: Vec<f64>) -> Vec<Vec<Vec<f64>>>
helper function to reshape the flat prosit predicted intensity array into a 3D array where: +1st dimension: 29 rows for every potential ion since prosit allows precursor sequences up to 30 amino acids +2nd dimension: 2 columns for B and Y ions +3rd dimension: 3 channels for charge 1, 2, and 3
+array
- A vector of f64 representing the flat prosit arraypub fn sequence_to_all_ions(
+ sequence: &str,
+ charge: i32,
+ intensity_pred_flat: &Vec<f64>,
+ normalize: bool,
+ half_charge_one: bool,
+ peptide_id: Option<i32>,
+) -> String
helper function to convert a peptide ion to all possible ions and serialize the result to a json string
+sequence
- A string representing the peptide sequencecharge
- An i32 representing the chargeintensity_pred_flat
- A vector of f64 representing the flat prosit predicted intensity arraynormalize
- A bool indicating whether to normalize the intensity valueshalf_charge_one
- A bool indicating whether to use half charge onepub fn convolve(
+ dist_a: &Vec<(f64, f64)>,
+ dist_b: &Vec<(f64, f64)>,
+ mass_tolerance: f64,
+ abundance_threshold: f64,
+ max_results: usize,
+) -> Vec<(f64, f64)>
convolve two distributions of masses and abundances
+Arguments:
+dist_a
- first distribution of masses and abundancesdist_b
- second distribution of masses and abundancesmass_tolerance
- mass tolerance for combining peaksabundance_threshold
- minimum abundance for a peak to be included in the resultmax_results
- maximum number of peaks to include in the resultReturns:
+Vec<(f64, f64)>
- combined distribution of masses and abundancesuse rustms::algorithm::isotope::convolve;
+
+let dist_a = vec![(100.0, 0.5), (101.0, 0.5)];
+let dist_b = vec![(100.0, 0.5), (101.0, 0.5)];
+let result = convolve(&dist_a, &dist_b, 1e-6, 1e-12, 200);
+assert_eq!(result, vec![(200.0, 0.25), (201.0, 0.5), (202.0, 0.25)]);
pub fn convolve_pow(dist: &Vec<(f64, f64)>, n: i32) -> Vec<(f64, f64)>
convolve a distribution with itself n times
+Arguments:
+dist
- distribution of masses and abundancesn
- number of times to convolve the distribution with itselfReturns:
+Vec<(f64, f64)>
- distribution of masses and abundancesuse rustms::algorithm::isotope::convolve_pow;
+
+let dist = vec![(100.0, 0.5), (101.0, 0.5)];
+let result = convolve_pow(&dist, 2);
+assert_eq!(result, vec![(200.0, 0.25), (201.0, 0.5), (202.0, 0.25)]);
pub fn generate_isotope_distribution(
+ atomic_composition: &HashMap<String, i32>,
+ mass_tolerance: f64,
+ abundance_threshold: f64,
+ max_result: i32,
+) -> Vec<(f64, f64)>
generate the isotope distribution for a given atomic composition
+Arguments:
+atomic_composition
- atomic composition of the peptidemass_tolerance
- mass tolerance for combining peaksabundance_threshold
- minimum abundance for a peak to be included in the resultmax_result
- maximum number of peaks to include in the resultReturns:
+Vec<(f64, f64)>
- distribution of masses and abundancesuse std::collections::HashMap;
+use rustms::algorithm::isotope::generate_isotope_distribution;
+
+let mut atomic_composition = HashMap::new();
+atomic_composition.insert("C".to_string(), 5);
+atomic_composition.insert("H".to_string(), 9);
+atomic_composition.insert("N".to_string(), 1);
+atomic_composition.insert("O".to_string(), 1);
+let result = generate_isotope_distribution(&atomic_composition, 1e-6, 1e-12, 200);
pub fn atomic_product_ion_composition(
+ product_ion: &PeptideProductIon,
+) -> Vec<(&str, i32)>
calculate the atomic composition of a product ion
+Arguments:
+product_ion
- a PeptideProductIon instanceReturns:
+Vec<(&str, i32)>
- a vector of tuples representing the atomic composition of the product ionpub fn calculate_amino_acid_composition(sequence: &str) -> HashMap<String, i32>
get a count dictionary of the amino acid composition of a peptide sequence
+Arguments:
+sequence
- peptide sequenceReturns:
+composition
- a dictionary of amino acid compositionuse rustms::algorithm::peptide::calculate_amino_acid_composition;
+
+let sequence = "PEPTIDEH";
+let composition = calculate_amino_acid_composition(sequence);
+assert_eq!(composition.get("P"), Some(&2));
+assert_eq!(composition.get("E"), Some(&2));
+assert_eq!(composition.get("T"), Some(&1));
+assert_eq!(composition.get("I"), Some(&1));
+assert_eq!(composition.get("D"), Some(&1));
+assert_eq!(composition.get("H"), Some(&1));
pub fn calculate_peptide_mono_isotopic_mass(
+ peptide_sequence: &PeptideSequence,
+) -> f64
calculate the monoisotopic mass of a peptide sequence
+Arguments:
+sequence
- peptide sequenceReturns:
+mass
- monoisotopic mass of the peptideuse rustms::algorithm::peptide::calculate_peptide_mono_isotopic_mass;
+use rustms::proteomics::peptide::PeptideSequence;
+
+let peptide_sequence = PeptideSequence::new("PEPTIDEH".to_string(), Some(1));
+let mass = calculate_peptide_mono_isotopic_mass(&peptide_sequence);
+let mass_quantized = (mass * 1e6).round() as i32;
+assert_eq!(mass_quantized, 936418877);
pub fn calculate_peptide_product_ion_mono_isotopic_mass(
+ sequence: &str,
+ kind: FragmentType,
+) -> f64
calculate the monoisotopic mass of a peptide product ion for a given fragment type
+Arguments:
+sequence
- peptide sequencekind
- fragment typeReturns:
+mass
- monoisotopic mass of the peptideuse rustms::algorithm::peptide::calculate_peptide_product_ion_mono_isotopic_mass;
+use rustms::proteomics::peptide::FragmentType;
+let sequence = "PEPTIDEH";
+let mass = calculate_peptide_product_ion_mono_isotopic_mass(sequence, FragmentType::Y);
+assert_eq!(mass, 936.4188766862999);
pub fn calculate_product_ion_mz(
+ sequence: &str,
+ kind: FragmentType,
+ charge: Option<i32>,
+) -> f64
calculate the monoisotopic m/z of a peptide product ion for a given fragment type and charge
+Arguments:
+sequence
- peptide sequencekind
- fragment typecharge
- chargeReturns:
+mz
- monoisotopic mass of the peptideuse rustms::algorithm::peptide::calculate_product_ion_mz;
+use rustms::chemistry::constants::MASS_PROTON;
+use rustms::proteomics::peptide::FragmentType;
+let sequence = "PEPTIDEH";
+let mz = calculate_product_ion_mz(sequence, FragmentType::Y, Some(1));
+assert_eq!(mz, 936.4188766862999 + MASS_PROTON);
pub fn fragments_to_composition(
+ product_ions: Vec<PeptideProductIon>,
+ num_threads: usize,
+) -> Vec<Vec<(String, i32)>>
calculate the atomic composition of a peptide product ion series +Arguments:
+product_ions
- a vector of PeptideProductIon instances
num_threads
- an usize representing the number of threads to use
+Returns:
Vec<Vec<(String, i32)>>
- a vector of vectors of tuples representing the atomic composition of each product ion
pub fn get_num_protonizable_sites(sequence: &str) -> usize
count the number of protonizable sites in a peptide sequence
+sequence
- a string representing the peptide sequenceusize
- the number of protonizable sites in the peptide sequenceuse rustms::algorithm::peptide::get_num_protonizable_sites;
+
+let sequence = "PEPTIDEH";
+let num_protonizable_sites = get_num_protonizable_sites(sequence);
+assert_eq!(num_protonizable_sites, 2);
pub fn peptide_sequence_to_atomic_composition(
+ peptide_sequence: &PeptideSequence,
+) -> HashMap<&'static str, i32>
calculate the atomic composition of a peptide sequence
+pub fn atomic_weights_mono_isotopic() -> HashMap<&'static str, f64>
Atomic Weights
+None
+HashMap<&'static str, f64>
- a map of atomic symbols to their monoisotopic weightsuse rustms::chemistry::element::atomic_weights_mono_isotopic;
+
+let atomic_weights = atomic_weights_mono_isotopic();
+assert_eq!(atomic_weights.get("H"), Some(&1.00782503223));
pub fn atoms_isotopic_weights() -> HashMap<&'static str, Vec<f64>>
Isotopic Weights
+None
+HashMap<&'static str, Vec<f64>>
- a map of atomic symbols to their isotopic weightsuse rustms::chemistry::element::atoms_isotopic_weights;
+
+let isotopic_weights = atoms_isotopic_weights();
+assert_eq!(isotopic_weights.get("H"), Some(&vec![1.00782503223, 2.01410177812]));
pub fn isotopic_abundance() -> HashMap<&'static str, Vec<f64>>
Isotopic Abundance
+None
+HashMap<&'static str, Vec<f64>>
- a map of atomic symbols to their isotopic abundancesuse rustms::chemistry::element::isotopic_abundance;
+
+let isotopic_abundance = isotopic_abundance();
+assert_eq!(isotopic_abundance.get("H"), Some(&vec![0.999885, 0.000115]));
pub fn calculate_mz(monoisotopic_mass: f64, charge: i32) -> f64
calculate the m/z of an ion
+Arguments:
+mono_mass
- monoisotopic mass of the ioncharge
- charge state of the ionReturns:
+mz
- mass-over-charge of the ionuse rustms::chemistry::formula::calculate_mz;
+
+let mz = calculate_mz(1000.0, 2);
+assert_eq!(mz, 501.007276466621);
pub fn ccs_to_one_over_reduced_mobility(
+ ccs: f64,
+ mz: f64,
+ charge: u32,
+ mass_gas: f64,
+ temp: f64,
+ t_diff: f64,
+) -> f64
convert CCS to 1 over reduced ion mobility (1/k0)
+Arguments:
+ccs
- collision cross-sectioncharge
- charge state of the ionmz
- mass-over-charge of the ionmass_gas
- mass of drift gas (N2)temp
- temperature of the drift gas in C°t_diff
- factor to translate from C° to KReturns:
+one_over_k0
- 1 over reduced ion mobility (1/k0)use rustms::chemistry::formula::ccs_to_one_over_reduced_mobility;
+
+let k0 = ccs_to_one_over_reduced_mobility(806.5918693771381, 1000.0, 2, 28.013, 31.85, 273.15);
+assert_eq!(k0, 2.0);
pub fn one_over_reduced_mobility_to_ccs(
+ one_over_k0: f64,
+ mz: f64,
+ charge: u32,
+ mass_gas: f64,
+ temp: f64,
+ t_diff: f64,
+) -> f64
convert 1 over reduced ion mobility (1/k0) to CCS
+Arguments:
+one_over_k0
- 1 over reduced ion mobility (1/k0)charge
- charge state of the ionmz
- mass-over-charge of the ionmass_gas
- mass of drift gas (N2)temp
- temperature of the drift gas in C°t_diff
- factor to translate from C° to KReturns:
+ccs
- collision cross-sectionuse rustms::chemistry::formula::one_over_reduced_mobility_to_ccs;
+
+let ccs = one_over_reduced_mobility_to_ccs(0.5, 1000.0, 2, 28.013, 31.85, 273.15);
+assert_eq!(ccs, 201.64796734428452);
pub fn parse_formula(formula: &str) -> Result<HashMap<String, i32>, String>
Parse a chemical formula into a map of elements and their counts.
+Arguments:
+formula
- The chemical formula to parse.Returns:
+Result<HashMap<String, i32>, String>
- A map of elements and their counts.use rustms::chemistry::sum_formula::parse_formula;
+
+let formula = "H2O";
+let elements = parse_formula(formula).unwrap();
+assert_eq!(elements.get("H"), Some(&2));
+assert_eq!(elements.get("O"), Some(&1));
pub struct SumFormula {
+ pub formula: String,
+ pub elements: HashMap<String, i32>,
+}
formula: String
§elements: HashMap<String, i32>
Calculate the monoisotopic weight of the chemical formula.
+Arguments:
+None
+Returns:
+f64
- The monoisotopic weight of the chemical formula.use rustms::chemistry::sum_formula::SumFormula;
+
+let formula = "H2O";
+let sum_formula = SumFormula::new(formula);
+assert_eq!(sum_formula.monoisotopic_weight(), 18.01056468403);
Generate the isotope distribution of the chemical formula.
+Arguments:
+charge
- The charge state of the ion.Returns:
+MzSpectrum
- The isotope distribution of the chemical formula.use rustms::chemistry::sum_formula::SumFormula;
+use rustms::ms::spectrum::MzSpectrum;
+
+let formula = "C6H12O6";
+let sum_formula = SumFormula::new(formula);
+let isotope_distribution = sum_formula.isotope_distribution(1);
+let mut first_mz = *isotope_distribution.mz.first().unwrap();
+// round to first 5 decimal places
+first_mz = (first_mz * 1e5).round() / 1e5;
+assert_eq!(first_mz, 181.07066);
self
into a Left
variant of Either<Self, Self>
+if into_left
is true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
into a Left
variant of Either<Self, Self>
+if into_left(&self)
returns true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read morepub fn modification_atomic_composition() -> HashMap<String, HashMap<&'static str, i32>>
Unimod Modifications
+None
+HashMap<String, HashMap<&'static str, i32>>
- a map of unimod modification names to their atomic compositionsuse rustms::chemistry::unimod::modification_atomic_composition;
+use std::collections::HashMap;
+
+let composition = modification_atomic_composition();
+assert_eq!(composition.get("[UNIMOD:1]"), Some(&HashMap::from([("C", 2), ("H", 2), ("O", 1)])));
pub fn unimod_modifications_mass() -> HashMap<&'static str, f64>
Unimod Modifications Mass
+None
+HashMap<&'static str, f64>
- a map of unimod modification names to their massuse rustms::chemistry::unimod::unimod_modifications_mass;
+
+let mass = unimod_modifications_mass();
+assert_eq!(mass.get("[UNIMOD:1]"), Some(&42.010565));
pub fn unimod_modifications_mass_numerical() -> HashMap<u32, f64>
Unimod Modifications Mass Numerical
+None
+HashMap<u32, f64>
- a map of unimod modification numerical ids to their massuse rustms::chemistry::unimod::unimod_modifications_mass_numerical;
+
+let mass = unimod_modifications_mass_numerical();
+assert_eq!(mass.get(&58), Some(&56.026215));
pub fn find_unimod_patterns(input_string: &str) -> (String, Vec<f64>)
Convert a peptide sequence with UNIMOD annotations to a tuple of plain sequence and for each +position in the sequence, the mass of the modification at that position (0 if no modification), +which is the representation of sequence nad modifications used by SAGE
+input_string
- a string slice of the peptide sequence(String, Vec<f64>)
- a tuple of the plain sequence and a vector of f64 representing the mass
+of the modification at each position in the sequenceuse rustms::chemistry::utility::find_unimod_patterns;
+
+let sequence = "PEPTIDE[UNIMOD:1]H";
+let (stripped_sequence, mods) = find_unimod_patterns(sequence);
+assert_eq!(stripped_sequence, "PEPTIDEH");
+assert_eq!(mods, vec![0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 42.010565, 0.0]);
pub fn reshape_prosit_array(flat_array: Vec<f64>) -> Vec<Vec<Vec<f64>>>
Reshape the flat prosit array into a 3D array of shape (29, 2, 3)
+flat_array
- a vector of f64 representing the flat prosit arrayVec<Vec<Vec<f64>>>
- a 3D array of shape (29, 2, 3)use rustms::chemistry::utility::reshape_prosit_array;
+
+let flat_array = vec![0.0; 174];
+let reshaped_array = reshape_prosit_array(flat_array);
+assert_eq!(reshaped_array.len(), 29);
+assert_eq!(reshaped_array[0].len(), 2);
+assert_eq!(reshaped_array[0][0].len(), 3);
pub fn unimod_sequence_to_tokens(
+ sequence: &str,
+ group_modifications: bool,
+) -> Vec<String>
Convert a peptide sequence with UNIMOD annotations to a list of tokens
+sequence
- a string slice of the peptide sequencegroup_modifications
- a boolean indicating whether to group the amino acid before the UNIMOD with the UNIMODVec<String>
- a vector of strings representing the tokensuse rustms::chemistry::utility::unimod_sequence_to_tokens;
+
+let sequence = "PEPTIDE[UNIMOD:1]H";
+let tokens = unimod_sequence_to_tokens(sequence, false);
+assert_eq!(tokens, vec!["P", "E", "P", "T", "I", "D", "E", "[UNIMOD:1]", "H"]);
+let tokens = unimod_sequence_to_tokens(sequence, true);
+assert_eq!(tokens, vec!["P", "E", "P", "T", "I", "D", "E[UNIMOD:1]", "H"]);
pub struct MzSpectrum {
+ pub mz: Vec<f64>,
+ pub intensity: Vec<f64>,
+}
Represents a mass spectrum with associated m/z values and intensities.
+mz: Vec<f64>
§intensity: Vec<f64>
Constructs a new MzSpectrum
.
mz
- A vector of m/z values.intensity
- A vector of intensity values corresponding to the m/z values.Panics if the lengths of mz
and intensity
are not the same. (actually, it doesn’t at the moment, planning on adding this later)
let spectrum = MzSpectrum::new(vec![200.0, 100.0], vec![20.0, 10.0]);
+assert_eq!(spectrum.mz, vec![100.0, 200.0]);
+assert_eq!(spectrum.intensity, vec![10.0, 20.0]);
Filters the m/z values and intensities based on a range of m/z values and intensities.
+mz_min
- The minimum m/z value.mz_max
- The maximum m/z value.intensity_min
- The minimum intensity value.intensity_max
- The maximum intensity value.MzSpectrum
- A new MzSpectrum
with m/z values and intensities within the specified ranges.let spectrum = MzSpectrum::new(vec![100.0, 200.0, 300.0], vec![10.0, 20.0, 30.0]);
+let filtered_spectrum = spectrum.filter_ranged(150.0, 250.0, 15.0, 25.0);
+assert_eq!(filtered_spectrum.mz, vec![200.0]);
+assert_eq!(filtered_spectrum.intensity, vec![20.0]);
Combines two MzSpectrum
instances by summing up the intensities of matching m/z values.
Each m/z value is quantized to retain at least 6 decimals. If two spectra have m/z values +that quantize to the same integer value, their intensities are summed.
+let spectrum1 = MzSpectrum { mz: vec![100.523, 101.923], intensity: vec![10.0, 20.0] };
+let spectrum2 = MzSpectrum { mz: vec![101.235, 105.112], intensity: vec![15.0, 30.0] };
+
+let combined = spectrum1 + spectrum2;
+
+assert_eq!(combined.mz, vec![100.523, 101.235, 101.923, 105.112]);
+assert_eq!(combined.intensity, vec![10.0, 15.0, 20.0, 30.0]);
+
operator.source
. Read moreclone_to_uninit
)self
into a Left
variant of Either<Self, Self>
+if into_left
is true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
into a Left
variant of Either<Self, Self>
+if into_left(&self)
returns true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read morepub fn amino_acid_composition() -> HashMap<char, HashMap<&'static str, i32>>
Amino Acid Composition
+None
+HashMap<char, HashMap<&'static str, i32>>
- a map of amino acid one-letter codes to their atomic compositionsuse rustms::proteomics::amino_acid::amino_acid_composition;
+use std::collections::HashMap;
+
+let amino_acid_composition = amino_acid_composition();
+assert_eq!(amino_acid_composition.get(&'K'), Some(&HashMap::from([("C", 6), ("H", 12), ("N", 2), ("O", 1)])));
pub fn amino_acid_masses() -> HashMap<&'static str, f64>
Amino Acid Masses
+None
+HashMap<&'static str, f64>
- a map of amino acid one-letter codes to their monoisotopic massesuse rustms::proteomics::amino_acid::amino_acid_masses;
+
+let amino_acid_masses = amino_acid_masses();
+assert_eq!(amino_acid_masses.get("K"), Some(&128.094963));
pub enum FragmentType {
+ A,
+ B,
+ C,
+ X,
+ Y,
+ Z,
+}
source
. Read moreclone_to_uninit
)self
into a Left
variant of Either<Self, Self>
+if into_left
is true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
into a Left
variant of Either<Self, Self>
+if into_left(&self)
returns true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read morepub struct PeptideIon {
+ pub sequence: PeptideSequence,
+ pub charge: i32,
+ pub intensity: f64,
+ pub ordinal: u32,
+}
sequence: PeptideSequence
§charge: i32
§intensity: f64
§ordinal: u32
source
. Read moreclone_to_uninit
)self
into a Left
variant of Either<Self, Self>
+if into_left
is true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
into a Left
variant of Either<Self, Self>
+if into_left(&self)
returns true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read morepub struct PeptideProductIon {
+ pub kind: FragmentType,
+ pub ion: PeptideIon,
+}
kind: FragmentType
§ion: PeptideIon
source
. Read moreclone_to_uninit
)self
into a Left
variant of Either<Self, Self>
+if into_left
is true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
into a Left
variant of Either<Self, Self>
+if into_left(&self)
returns true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read morepub struct PeptideProductIonSeries {
+ pub charge: i32,
+ pub n_ions: Vec<PeptideProductIon>,
+ pub c_ions: Vec<PeptideProductIon>,
+}
charge: i32
§n_ions: Vec<PeptideProductIon>
§c_ions: Vec<PeptideProductIon>
source
. Read moreclone_to_uninit
)self
into a Left
variant of Either<Self, Self>
+if into_left
is true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
into a Left
variant of Either<Self, Self>
+if into_left(&self)
returns true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read morepub struct PeptideProductIonSeriesCollection {
+ pub peptide_ions: Vec<PeptideProductIonSeries>,
+}
peptide_ions: Vec<PeptideProductIonSeries>
source
. Read moreclone_to_uninit
)self
into a Left
variant of Either<Self, Self>
+if into_left
is true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
into a Left
variant of Either<Self, Self>
+if into_left(&self)
returns true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read morepub struct PeptideSequence {
+ pub sequence: String,
+ pub peptide_id: Option<i32>,
+}
sequence: String
§peptide_id: Option<i32>
source
. Read moreclone_to_uninit
)self
into a Left
variant of Either<Self, Self>
+if into_left
is true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moreself
into a Left
variant of Either<Self, Self>
+if into_left(&self)
returns true
.
+Converts self
into a Right
variant of Either<Self, Self>
+otherwise. Read moregenerate_precursor_spectrum
\ngenerate the precursor spectrum for a given peptide …\ncalculate the isotope pattern for a given mass and charge …\ncalculate the lambda value for a given mass\ncalculate the normal probability density function\ncalculate the atomic composition of a product ion\nget a count dictionary of the amino acid composition of a …\ncalculate the monoisotopic mass of a peptide sequence\ncalculate the monoisotopic mass of a peptide product ion …\ncalculate the monoisotopic m/z of a peptide product ion …\ncalculate the atomic composition of a peptide product ion …\ncount the number of protonizable sites in a peptide …\ncalculate the atomic composition of a peptide sequence\nsimulate the charge state distribution for a peptide …\nsimulate the charge state distribution for a list of …\nAmino Acid Composition\nAmino Acid Masses\nAmino Acids\nAtomic Weights\nIsotopic Weights\nIsotopic Abundance\ncalculate the m/z of an ion\nconvert CCS to 1 over reduced ion mobility (1/k0)\nconvert 1 over reduced ion mobility (1/k0) to CCS\nReturns the argument unchanged.\nCalls U::from(self)
.\nUnimod Modifications\nUnimod Modifications Mass\nUnimod Modifications Mass Numerical\nConvert a peptide sequence with UNIMOD annotations to a …\nReshape the flat prosit array into a 3D array of shape …\nConvert a peptide sequence with UNIMOD annotations to a …\nReturns the argument unchanged.\nReturns the argument unchanged.\nReturns the argument unchanged.\nReturns the argument unchanged.\nReturns the argument unchanged.\nReturns the argument unchanged.\nCalls U::from(self)
.\nCalls U::from(self)
.\nCalls U::from(self)
.\nCalls U::from(self)
.\nCalls U::from(self)
.\nCalls U::from(self)
.\nRepresents a mass spectrum with associated m/z indices, …\nRepresents the type of spectrum.\nRepresents a mass spectrum with associated m/z values and …\nRepresents a vectorized mass spectrum.\nVectorized representation for Structs holding m/z values …\nCombines two MzSpectrum
instances by summing up the …\nReturns the argument unchanged.\nReturns the argument unchanged.\nReturns the argument unchanged.\nReturns the argument unchanged.\nReturns the argument unchanged.\nCalls U::from(self)
.\nCalls U::from(self)
.\nCalls U::from(self)
.\nCalls U::from(self)
.\nCalls U::from(self)
.\nReturns the integer value corresponding to the MsType
enum.\nReturns the MsType
enum corresponding to the given integer …\nConstructs a new MzSpectrum
.\nCreates a new TOFMzSpectrum
instance.\nBins the spectrum’s m/z values to a given resolution and …\nBins the spectrum based on a given m/z resolution, summing …\nSplits the spectrum into a collection of windows based on …\nConvert the MzSpectrum
to a MzSpectrumVectorized
using the …\nConvert the IndexedMzSpectrum
to a IndexedMzVector
using …\nReturns the argument unchanged.\nReturns the argument unchanged.\nReturns the argument unchanged.\nReturns the argument unchanged.\nReturns the argument unchanged.\nReturns the argument unchanged.\nReturns the argument unchanged.\nReturns the argument unchanged.\nCalls U::from(self)
.\nCalls U::from(self)
.\nCalls U::from(self)
.\nCalls U::from(self)
.\nCalls U::from(self)
.\nCalls U::from(self)
.\nCalls U::from(self)
.\nCalls U::from(self)
.\nReturns the argument unchanged.\nCalls U::from(self)
.\nFilter a given TimsFrame by m/z, scan, and intensity.\nReturns the argument unchanged.\nReturns the argument unchanged.\nReturns the argument unchanged.\nReturns the argument unchanged.\nReturns the argument unchanged.\nConvert a given TimsFrame to an ImsFrame.\nCalculate the weighted mean and variance of inv_mob
values …\nCalls U::from(self)
.\nCalls U::from(self)
.\nCalls U::from(self)
.\nCalls U::from(self)
.\nCalls U::from(self)
.\nCreates a new ImsFrame
instance.\nCreates a new TimsFrame
instance.\nConvert a given TimsFrame to a vector of TimsSpectrum.\nCheck if all mz values in a given collection are …\nCheck if any mz value is transmitted, can be used to check …\nApply ion transmission function to mz values\nReturns the argument unchanged.\nGet all ions in a frame that are transmitted\nCalls U::from(self)
.\nIon transmission function for quadrupole selection …\nCheck if a single mz value is transmitted\nSigmoid step function for quadrupole selection simulation\nSigmoide step function for quadrupole selection simulation\nTransmit an annotated spectrum given a frame id and scan id\nTransmit a spectrum given a frame id and scan id\nTransmit a frame given a diaPASEF transmission layout\nTransmit a frame given a diaPASEF transmission layout with …\nFilter the TimsSlice by m/z, scan, and intensity\nReturns the argument unchanged.\nReturns the argument unchanged.\nReturns the argument unchanged.\nReturns the argument unchanged.\nReturns the argument unchanged.\nGet a vector of TimsFrames by MsType\nCalls U::from(self)
.\nCalls U::from(self)
.\nCalls U::from(self)
.\nCalls U::from(self)
.\nCalls U::from(self)
.\nCreate a new TimsSlice from a vector of TimsFrames\nReturns the argument unchanged.\nReturns the argument unchanged.\nCalls U::from(self)
.\nCalls U::from(self)
.\nCreates a new TimsSpectrum
instance.")
\ No newline at end of file
diff --git a/v0.3.4-alpha/search.desc/rustdf/rustdf-desc-0-.js b/v0.3.4-alpha/search.desc/rustdf/rustdf-desc-0-.js
new file mode 100644
index 00000000..bbc5fdd2
--- /dev/null
+++ b/v0.3.4-alpha/search.desc/rustdf/rustdf-desc-0-.js
@@ -0,0 +1 @@
+searchState.loadedDescShard("rustdf", 0, "Returns the argument unchanged.\nCalls U::from(self)
.\nReturns the argument unchanged.\nCalls U::from(self)
.\nReturns the argument unchanged.\nReturns the argument unchanged.\nGet the fragment spectra for all PASEF selected precursors\nCalls U::from(self)
.\nCalls U::from(self)
.\nReturns the argument unchanged.\nCalls U::from(self)
.\nReturns the argument unchanged.\nReturns the argument unchanged.\nReturns the argument unchanged.\nReturns the argument unchanged.\nReturns the argument unchanged.\nReturns the argument unchanged.\nReturns the argument unchanged.\nCalls U::from(self)
.\nCalls U::from(self)
.\nCalls U::from(self)
.\nCalls U::from(self)
.\nCalls U::from(self)
.\nCalls U::from(self)
.\nCalls U::from(self)
.\ntranslate inverse mobility to scan values calling the …\ntranslate scan to inverse mobility values calling the …\ntranslate tof to mz values calling the bruker library\nReturns the argument unchanged.\nReturns the argument unchanged.\nReturns the argument unchanged.\nReturns the argument unchanged.\nReturns the argument unchanged.\nReturns the argument unchanged.\nReturns the argument unchanged.\nReturns the argument unchanged.\nReturns the argument unchanged.\nCalls U::from(self)
.\nCalls U::from(self)
.\nCalls U::from(self)
.\nCalls U::from(self)
.\nCalls U::from(self)
.\nCalls U::from(self)
.\nCalls U::from(self)
.\nCalls U::from(self)
.\nCalls U::from(self)
.\nReturns the argument unchanged.\nCalls U::from(self)
.\nParses the decompressed bruker binary data\nCompresses a byte array using ZSTD\nDecompresses a ZSTD compressed byte array\nReturns the argument unchanged.\nReturns the argument unchanged.\nReturns the argument unchanged.\nReturns the argument unchanged.\nReturns the argument unchanged.\nReturns the argument unchanged.\nReturns the argument unchanged.\nReturns the argument unchanged.\nCalls U::from(self)
.\nCalls U::from(self)
.\nCalls U::from(self)
.\nCalls U::from(self)
.\nCalls U::from(self)
.\nCalls U::from(self)
.\nCalls U::from(self)
.\nCalls U::from(self)
.\nBuild a frame for DIA synthetic experiment\nReturns the argument unchanged.\nCalls U::from(self)
.\nMethod to build a map from peptide id to events (absolute …\nMethod to build a map from peptide id to ions\nMethod to build a set of precursor frame ids, can be used …\nReturns the argument unchanged.\nCalls U::from(self)
.\nBuild a precursor frame\nBuild a collection of precursor frames in parallel\nReturns the argument unchanged.\nCalls U::from(self)
.\nCreate a new instance of TimsTofSynthetics\nhelper function to reshape the flat prosit predicted …\nhelper function to convert a peptide ion to all possible …")
\ No newline at end of file
diff --git a/v0.3.4-alpha/search.desc/rustms/rustms-desc-0-.js b/v0.3.4-alpha/search.desc/rustms/rustms-desc-0-.js
new file mode 100644
index 00000000..fb0372c6
--- /dev/null
+++ b/v0.3.4-alpha/search.desc/rustms/rustms-desc-0-.js
@@ -0,0 +1 @@
+searchState.loadedDescShard("rustms", 0, "convolve two distributions of masses and abundances\nconvolve a distribution with itself n times\ngenerate the isotope distribution for a given atomic …\ncalculate the atomic composition of a product ion\nget a count dictionary of the amino acid composition of a …\ncalculate the monoisotopic mass of a peptide sequence\ncalculate the monoisotopic mass of a peptide product ion …\ncalculate the monoisotopic m/z of a peptide product ion …\ncalculate the atomic composition of a peptide product ion …\ncount the number of protonizable sites in a peptide …\ncalculate the atomic composition of a peptide sequence\nAtomic Weights\nIsotopic Weights\nIsotopic Abundance\ncalculate the m/z of an ion\nconvert CCS to 1 over reduced ion mobility (1/k0)\nconvert 1 over reduced ion mobility (1/k0) to CCS\nReturns the argument unchanged.\nCalls U::from(self)
.\nGenerate the isotope distribution of the chemical formula.\nCalculate the monoisotopic weight of the chemical formula.\nParse a chemical formula into a map of elements and their …\nUnimod Modifications\nUnimod Modifications Mass\nUnimod Modifications Mass Numerical\nConvert a peptide sequence with UNIMOD annotations to a …\nReshape the flat prosit array into a 3D array of shape …\nConvert a peptide sequence with UNIMOD annotations to a …\nRepresents a mass spectrum with associated m/z values and …\nCombines two MzSpectrum
instances by summing up the …\nFilters the m/z values and intensities based on a range of …\nReturns the argument unchanged.\nCalls U::from(self)
.\nConstructs a new MzSpectrum
.\nAmino Acid Composition\nAmino Acid Masses\nAmino Acids\nReturns the argument unchanged.\nReturns the argument unchanged.\nReturns the argument unchanged.\nReturns the argument unchanged.\nReturns the argument unchanged.\nReturns the argument unchanged.\nCalls U::from(self)
.\nCalls U::from(self)
.\nCalls U::from(self)
.\nCalls U::from(self)
.\nCalls U::from(self)
.\nCalls U::from(self)
.")
\ No newline at end of file
diff --git a/v0.3.4-alpha/settings.html b/v0.3.4-alpha/settings.html
new file mode 100644
index 00000000..5a32b92b
--- /dev/null
+++ b/v0.3.4-alpha/settings.html
@@ -0,0 +1 @@
++1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +50 +51 +52 +53 +54 +55 +56 +57 +58 +59 +60 +61 +62 +63 +64 +65 +66 +67 +68 +69 +70 +71 +72 +73 +74 +75 +76 +77 +78 +79 +80 +81 +82 +83 +84 +85 +86 +87 +88 +89 +90 +91 +92 +93 +94 +95 +96 +97 +98 +99 +100 +101 +102 +103 +104 +105 +106 +107 +108 +109 +110 +111 +112 +113 +114 +115 +116 +117 +118 +119 +120 +121 +122 +123 +124 +125 +126 +127 +128 +129 +130 +131 +132 +133 +134 +135 +136 +137 +138 +139 +140 +141 +142 +143 +144 +145 +146 +147 +148 +149 +150 +151 +152 +153 +154 +155 +156 +157 +158 +159 +160 +161 +162 +163 +164 +165 +166 +167 +168 +169 +170 +171 +172 +173 +174 +175 +176 +177 +178 +179 +180 +181 +182 +183 +184 +185 +186 +187 +188 +189 +190 +191 +192 +193 +194 +195 +196 +197 +198 +199 +200 +201 +202 +203 +204 +205 +206 +207 +208 +209 +210 +211 +212 +213 +214 +215 +216 +217 +218 +219 +220 +221 +222 +223 +224 +225 +226 +227 +228 +229 +230 +231 +232 +233 +234 +235 +236 +237 +238 +239 +240 +241 +242 +243 +244 +245 +246 +247 +248 +249 +250 +251 +252 +253 +254 +255 +256 +257 +258 +259 +260 +261 +262 +263 +264 +265 +266 +267 +268 +269 +270 +271 +272 +273 +274 +275 +276 +277 +278 +279 +280 +281 +282 +283 +284 +285 +286 +287 +288 +289 +290 +291 +292 +293 +294 +295 +296 +297 +298 +299 +300 +301 +302 +303 +304 +305 +306 +307 +308 +309 +310 +311 +312 +313 +314 +315 +316 +317 +318 +319 +320 +321 +322 +323 +324 +325 +326 +327 +328 +329 +330 +331 +332 +333 +334 +335 +336 +337 +338 +339 +340 +341 +342 +343 +344 +345 +346 +347 +348 +349 +350 +351 +352 +353 +354 +355 +356 +357 +358 +359 +360 +361 +362 +363 +364 +365 +366 +367 +368 +369 +370 +371 +372 +373 +374 +375 +376 +377 +378 +379 +380 +381 +382 +383 +384 +385 +386 +387 +388 +389 +390 +391 +392 +393 +394 +395 +396 +397 +398 +399 +400 +401 +402 +403 +404 +405 +406 +407 +408 +409 +410 +411 +412 +413 +414 +415 +416 +417 +418 +419 +420 +421 +422 +423 +424 +425 +426 +427 +428 +429 +430 +431 +432 +433 +434 +435 +436 +437 +438 +439 +440 +441 +442 +443 +444 +445 +446 +447 +448 +449 +450 +451 +452 +453 +454 +455 +456 +457 +458 +459 +460 +461 +462 +463 +464 +465 +466 +467 +468 +469 +470 +471 +472 +473 +474 +475 +476 +477 +478 +479 +480 +481 +482 +483 +484 +485 +486 +487 +488 +489 +490 +491 +492 +493 +494 +495 +496 +497 +498 +499 +500 +501 +502 +503 +504 +505 +506 +507 +508 +509 +510 +511 +512 +513 +514 +515 +516 +517 +518 +519
extern crate statrs;
+
+use std::collections::{BTreeMap, HashMap, HashSet};
+use rayon::prelude::*;
+use rayon::ThreadPoolBuilder;
+
+use statrs::distribution::{Continuous, Normal};
+use crate::chemistry::constants::{MASS_NEUTRON, MASS_PROTON};
+use crate::chemistry::elements::{atoms_isotopic_weights, isotopic_abundance};
+use crate::data::peptide::PeptideIon;
+use crate::data::spectrum::MzSpectrum;
+use crate::data::spectrum::ToResolution;
+
+/// convolve two distributions of masses and abundances
+///
+/// Arguments:
+///
+/// * `dist_a` - first distribution of masses and abundances
+/// * `dist_b` - second distribution of masses and abundances
+/// * `mass_tolerance` - mass tolerance for combining peaks
+/// * `abundance_threshold` - minimum abundance for a peak to be included in the result
+/// * `max_results` - maximum number of peaks to include in the result
+///
+/// Returns:
+///
+/// * `Vec<(f64, f64)>` - combined distribution of masses and abundances
+///
+/// # Examples
+///
+/// ```
+/// use mscore::algorithm::isotope::convolve;
+///
+/// let dist_a = vec![(100.0, 0.5), (101.0, 0.5)];
+/// let dist_b = vec![(100.0, 0.5), (101.0, 0.5)];
+/// let result = convolve(&dist_a, &dist_b, 1e-6, 1e-12, 200);
+/// assert_eq!(result, vec![(200.0, 0.25), (201.0, 0.5), (202.0, 0.25)]);
+/// ```
+pub fn convolve(dist_a: &Vec<(f64, f64)>, dist_b: &Vec<(f64, f64)>, mass_tolerance: f64, abundance_threshold: f64, max_results: usize) -> Vec<(f64, f64)> {
+
+ let mut result: Vec<(f64, f64)> = Vec::new();
+
+ for (mass_a, abundance_a) in dist_a {
+ for (mass_b, abundance_b) in dist_b {
+ let combined_mass = mass_a + mass_b;
+ let combined_abundance = abundance_a * abundance_b;
+
+ // Skip entries with combined abundance below the threshold
+ if combined_abundance < abundance_threshold {
+ continue;
+ }
+
+ // Insert or update the combined mass in the result distribution
+ if let Some(entry) = result.iter_mut().find(|(m, _)| (*m - combined_mass).abs() < mass_tolerance) {
+ entry.1 += combined_abundance;
+ } else {
+ result.push((combined_mass, combined_abundance));
+ }
+ }
+ }
+
+ // Sort by abundance (descending) to prepare for trimming
+ result.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap());
+
+ // Trim the vector if it exceeds max_results
+ if result.len() > max_results {
+ result.truncate(max_results);
+ }
+
+ // Optionally, sort by mass if needed for further processing
+ result.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap());
+
+ result
+}
+
+/// convolve a distribution with itself n times
+///
+/// Arguments:
+///
+/// * `dist` - distribution of masses and abundances
+/// * `n` - number of times to convolve the distribution with itself
+///
+/// Returns:
+///
+/// * `Vec<(f64, f64)>` - distribution of masses and abundances
+///
+/// # Examples
+///
+/// ```
+/// use mscore::algorithm::isotope::convolve_pow;
+///
+/// let dist = vec![(100.0, 0.5), (101.0, 0.5)];
+/// let result = convolve_pow(&dist, 2);
+/// assert_eq!(result, vec![(200.0, 0.25), (201.0, 0.5), (202.0, 0.25)]);
+/// ```
+pub fn convolve_pow(dist: &Vec<(f64, f64)>, n: i32) -> Vec<(f64, f64)> {
+ if n == 0 {
+ return vec![(0.0, 1.0)]; // Return the delta distribution
+ }
+ if n == 1 {
+ return dist.clone();
+ }
+
+ let mut result = dist.clone();
+ let mut power = 2;
+
+ while power <= n {
+ result = convolve(&result, &result, 1e-6, 1e-12, 200); // Square the result to get the next power of 2
+ power *= 2;
+ }
+
+ // If n is not a power of 2, recursively fill in the remainder
+ if power / 2 < n {
+ result = convolve(&result, &convolve_pow(dist, n - power / 2, ), 1e-6, 1e-12, 200);
+ }
+
+ result
+}
+
+/// generate the isotope distribution for a given atomic composition
+///
+/// Arguments:
+///
+/// * `atomic_composition` - atomic composition of the peptide
+/// * `mass_tolerance` - mass tolerance for combining peaks
+/// * `abundance_threshold` - minimum abundance for a peak to be included in the result
+/// * `max_result` - maximum number of peaks to include in the result
+///
+/// Returns:
+///
+/// * `Vec<(f64, f64)>` - distribution of masses and abundances
+///
+/// # Examples
+///
+/// ```
+/// use std::collections::HashMap;
+/// use mscore::algorithm::isotope::generate_isotope_distribution;
+///
+/// let mut atomic_composition = HashMap::new();
+/// atomic_composition.insert("C".to_string(), 5);
+/// atomic_composition.insert("H".to_string(), 9);
+/// atomic_composition.insert("N".to_string(), 1);
+/// atomic_composition.insert("O".to_string(), 1);
+/// let result = generate_isotope_distribution(&atomic_composition, 1e-6, 1e-12, 200);
+/// ```
+pub fn generate_isotope_distribution(
+ atomic_composition: &HashMap<String, i32>,
+ mass_tolerance: f64,
+ abundance_threshold: f64,
+ max_result: i32
+) -> Vec<(f64, f64)> {
+
+ let mut cumulative_distribution: Option<Vec<(f64, f64)>> = None;
+ let atoms_isotopic_weights: HashMap<String, Vec<f64>> = atoms_isotopic_weights().iter().map(|(k, v)| (k.to_string(), v.clone())).collect();
+ let atomic_isotope_abundance: HashMap<String, Vec<f64>> = isotopic_abundance().iter().map(|(k, v)| (k.to_string(), v.clone())).collect();
+
+ for (element, &count) in atomic_composition.iter() {
+ let elemental_isotope_weights = atoms_isotopic_weights.get(element).expect("Element not found in isotopic weights table").clone();
+ let elemental_isotope_abundance = atomic_isotope_abundance.get(element).expect("Element not found in isotopic abundance table").clone();
+
+ let element_distribution: Vec<(f64, f64)> = elemental_isotope_weights.iter().zip(elemental_isotope_abundance.iter()).map(|(&mass, &abundance
+ )| (mass, abundance)).collect();
+
+ let element_power_distribution = if count > 1 {
+ convolve_pow(&element_distribution, count)
+ } else {
+ element_distribution
+ };
+
+ cumulative_distribution = match cumulative_distribution {
+ Some(cum_dist) => Some(convolve(&cum_dist, &element_power_distribution, mass_tolerance, abundance_threshold, max_result as usize)),
+ None => Some(element_power_distribution),
+ };
+ }
+
+ let final_distribution = cumulative_distribution.expect("Peptide has no elements");
+ // Normalize the distribution
+ let total_abundance: f64 = final_distribution.iter().map(|&(_, abundance)| abundance).sum();
+ let result: Vec<_> = final_distribution.into_iter().map(|(mass, abundance)| (mass, abundance / total_abundance)).collect();
+
+ let mut sort_map: BTreeMap<i64, f64> = BTreeMap::new();
+ let quantize = |mz: f64| -> i64 { (mz * 1_000_000.0).round() as i64 };
+
+ for (mz, intensity) in result {
+ let key = quantize(mz);
+ sort_map.entry(key).and_modify(|e| *e += intensity).or_insert(intensity);
+ }
+
+ let mz: Vec<f64> = sort_map.keys().map(|&key| key as f64 / 1_000_000.0).collect();
+ let intensity: Vec<f64> = sort_map.values().map(|&intensity| intensity).collect();
+ mz.iter().zip(intensity.iter()).map(|(&mz, &intensity)| (mz, intensity)).collect()
+}
+
+
+/// calculate the normal probability density function
+///
+/// Arguments:
+///
+/// * `x` - value to calculate the probability density function of
+/// * `mean` - mean of the normal distribution
+/// * `std_dev` - standard deviation of the normal distribution
+///
+/// Returns:
+///
+/// * `f64` - probability density function of `x`
+///
+/// # Examples
+///
+/// ```
+/// use mscore::algorithm::isotope::normal_pdf;
+///
+/// let pdf = normal_pdf(0.0, 0.0, 1.0);
+/// assert_eq!(pdf, 0.39894228040143265);
+/// ```
+pub fn normal_pdf(x: f64, mean: f64, std_dev: f64) -> f64 {
+ let normal = Normal::new(mean, std_dev).unwrap();
+ normal.pdf(x)
+}
+
+/// calculate the factorial of a number
+///
+/// Arguments:
+///
+/// * `n` - number to calculate factorial of
+///
+/// Returns:
+///
+/// * `f64` - factorial of `n`
+///
+/// # Examples
+///
+/// ```
+/// use mscore::algorithm::isotope::factorial;
+///
+/// let fact = factorial(5);
+/// assert_eq!(fact, 120.0);
+/// ```
+pub fn factorial(n: i32) -> f64 {
+ (1..=n).fold(1.0, |acc, x| acc * x as f64)
+}
+
+pub fn weight(mass: f64, peak_nums: Vec<i32>, normalize: bool) -> Vec<f64> {
+ let lam_val = lam(mass, 0.000594, -0.03091);
+ let factorials: Vec<f64> = peak_nums.iter().map(|&k| factorial(k)).collect();
+ let mut weights: Vec<f64> = peak_nums.iter().map(|&k| {
+ let pow = lam_val.powi(k);
+ let exp = (-lam_val).exp();
+ exp * pow / factorials[k as usize]
+ }).collect();
+
+ if normalize {
+ let sum: f64 = weights.iter().sum();
+ weights = weights.iter().map(|&w| w / sum).collect();
+ }
+
+ weights
+}
+
+/// calculate the lambda value for a given mass
+///
+/// Arguments:
+///
+/// * `mass` - mass of the peptide
+/// * `slope` - slope of the linear regression
+/// * `intercept` - intercept of the linear regression
+///
+/// Returns:
+///
+/// * `f64` - lambda value
+///
+/// # Examples
+///
+/// ```
+/// use mscore::algorithm::isotope::lam;
+///
+/// let lambda = lam(1000.0, 0.000594, -0.03091);
+/// assert_eq!(lambda, 0.56309);
+pub fn lam(mass: f64, slope: f64, intercept: f64) -> f64 {
+ slope * mass + intercept
+}
+
+/// calculate the isotope pattern for a given mass and charge based on the averagine model
+/// using the normal distribution for peak shapes
+///
+/// Arguments:
+///
+/// * `x` - list of m/z values to probe
+/// * `mass` - mass of the peptide
+/// * `charge` - charge of the peptide
+/// * `sigma` - standard deviation of the normal distribution
+/// * `amp` - amplitude of the isotope pattern
+/// * `k` - number of isotopes to consider
+/// * `step_size` - step size for the m/z values to probe
+///
+/// Returns:
+///
+/// * `Vec<f64>` - isotope pattern
+///
+pub fn iso(x: &Vec<f64>, mass: f64, charge: f64, sigma: f64, amp: f64, k: usize, step_size: f64) -> Vec<f64> {
+ let k_range: Vec<usize> = (0..k).collect();
+ let means: Vec<f64> = k_range.iter().map(|&k_val| (mass + MASS_NEUTRON * k_val as f64) / charge).collect();
+ let weights = weight(mass, k_range.iter().map(|&k_val| k_val as i32).collect::<Vec<i32>>(), true);
+
+ let mut intensities = vec![0.0; x.len()];
+ for (i, x_val) in x.iter().enumerate() {
+ for (j, &mean) in means.iter().enumerate() {
+ intensities[i] += weights[j] * normal_pdf(*x_val, mean, sigma);
+ }
+ intensities[i] *= step_size;
+ }
+ intensities.iter().map(|&intensity| intensity * amp).collect()
+}
+
+/// generate the isotope pattern for a given mass and charge
+///
+/// Arguments:
+///
+/// * `lower_bound` - lower bound of the isotope pattern
+/// * `upper_bound` - upper bound of the isotope pattern
+/// * `mass` - mass of the peptide
+/// * `charge` - charge of the peptide
+/// * `amp` - amplitude of the isotope pattern
+/// * `k` - number of isotopes to consider
+/// * `sigma` - standard deviation of the normal distribution
+/// * `resolution` - resolution of the isotope pattern
+///
+/// Returns:
+///
+/// * `(Vec<f64>, Vec<f64>)` - isotope pattern
+///
+/// # Examples
+///
+/// ```
+/// use mscore::algorithm::isotope::generate_isotope_pattern;
+///
+/// let (mzs, intensities) = generate_isotope_pattern(1500.0, 1510.0, 3000.0, 2.0, 1e4, 10, 1.0, 3);
+/// ```
+pub fn generate_isotope_pattern(lower_bound: f64, upper_bound: f64, mass: f64, charge: f64, amp: f64, k: usize, sigma: f64, resolution: i32) -> (Vec<f64>, Vec<f64>) {
+ let step_size = f64::min(sigma / 10.0, 1.0 / 10f64.powi(resolution));
+ let size = ((upper_bound - lower_bound) / step_size).ceil() as usize;
+ let mzs: Vec<f64> = (0..size).map(|i| lower_bound + step_size * i as f64).collect();
+ let intensities = iso(&mzs, mass, charge, sigma, amp, k, step_size);
+
+ (mzs.iter().map(|&mz| mz + MASS_PROTON).collect(), intensities)
+}
+
+/// generate the averagine spectrum for a given mass and charge
+///
+/// Arguments:
+///
+/// * `mass` - mass of the peptide
+/// * `charge` - charge of the peptide
+/// * `min_intensity` - minimum intensity for a peak to be included in the result
+/// * `k` - number of isotopes to consider
+/// * `resolution` - resolution of the isotope pattern
+/// * `centroid` - whether to centroid the spectrum
+/// * `amp` - amplitude of the isotope pattern
+///
+/// Returns:
+///
+/// * `MzSpectrum` - averagine spectrum
+///
+/// # Examples
+///
+/// ```
+/// use mscore::algorithm::isotope::generate_averagine_spectrum;
+///
+/// let spectrum = generate_averagine_spectrum(3000.0, 2, 1, 10, 3, true, None);
+/// ```
+pub fn generate_averagine_spectrum(
+ mass: f64,
+ charge: i32,
+ min_intensity: i32,
+ k: i32,
+ resolution: i32,
+ centroid: bool,
+ amp: Option<f64>
+) -> MzSpectrum {
+ let amp = amp.unwrap_or(1e4);
+ let lb = mass / charge as f64 - 0.2;
+ let ub = mass / charge as f64 + k as f64 + 0.2;
+
+ let (mz, intensities) = generate_isotope_pattern(
+ lb,
+ ub,
+ mass,
+ charge as f64,
+ amp,
+ k as usize,
+ 0.008492569002123142,
+ resolution,
+ );
+
+ let spectrum = MzSpectrum::new(mz, intensities).to_resolution(resolution).filter_ranged(lb, ub, min_intensity as f64, 1e9);
+
+ if centroid {
+ spectrum.to_centroid(std::cmp::max(min_intensity, 1), 1.0 / 10f64.powi(resolution - 1), true)
+ } else {
+ spectrum
+ }
+}
+
+/// generate the averagine spectra for a given list of masses and charges
+/// using multiple threads
+///
+/// Arguments:
+///
+/// * `masses` - list of masses of the peptides
+/// * `charges` - list of charges of the peptides
+/// * `min_intensity` - minimum intensity for a peak to be included in the result
+/// * `k` - number of isotopes to consider
+/// * `resolution` - resolution of the isotope pattern
+/// * `centroid` - whether to centroid the spectrum
+/// * `num_threads` - number of threads to use
+/// * `amp` - amplitude of the isotope pattern
+///
+/// Returns:
+///
+/// * `Vec<MzSpectrum>` - list of averagine spectra
+///
+/// # Examples
+///
+/// ```
+/// use mscore::algorithm::isotope::generate_averagine_spectra;
+///
+/// let masses = vec![3000.0, 3000.0];
+/// let charges = vec![2, 3];
+/// let spectra = generate_averagine_spectra(masses, charges, 1, 10, 3, true, 4, None);
+/// ```
+pub fn generate_averagine_spectra(
+ masses: Vec<f64>,
+ charges: Vec<i32>,
+ min_intensity: i32,
+ k: i32,
+ resolution: i32,
+ centroid: bool,
+ num_threads: usize,
+ amp: Option<f64>
+) -> Vec<MzSpectrum> {
+ let amp = amp.unwrap_or(1e5);
+ let mut spectra: Vec<MzSpectrum> = Vec::new();
+ let thread_pool = ThreadPoolBuilder::new().num_threads(num_threads).build().unwrap();
+
+ thread_pool.install(|| {
+ spectra = masses.par_iter().zip(charges.par_iter()).map(|(&mass, &charge)| {
+ generate_averagine_spectrum(mass, charge, min_intensity, k, resolution, centroid, Some(amp))
+ }).collect();
+ });
+
+ spectra
+}
+
+/// generate the precursor spectrum for a given peptide sequence and charge
+/// using isotope convolutions
+///
+/// Arguments:
+///
+/// * `sequence` - peptide sequence
+/// * `charge` - charge of the peptide
+///
+/// Returns:
+///
+/// * `MzSpectrum` - precursor spectrum
+///
+pub fn generate_precursor_spectrum(sequence: &str, charge: i32, peptide_id: Option<i32>) -> MzSpectrum {
+ let peptide_ion = PeptideIon::new(sequence.to_string(), charge, 1.0, peptide_id);
+ peptide_ion.calculate_isotopic_spectrum(1e-3, 1e-9, 200, 1e-6)
+}
+
+/// parallel version of `generate_precursor_spectrum`
+///
+/// Arguments:
+///
+/// * `sequences` - list of peptide sequences
+/// * `charges` - list of charges of the peptides
+/// * `num_threads` - number of threads to use
+///
+/// Returns:
+///
+/// * `Vec<MzSpectrum>` - list of precursor spectra
+///
+pub fn generate_precursor_spectra(sequences: &Vec<&str>, charges: &Vec<i32>, num_threads: usize, peptide_ids: Vec<Option<i32>>) -> Vec<MzSpectrum> {
+ let thread_pool = ThreadPoolBuilder::new().num_threads(num_threads).build().unwrap();
+ // need to zip sequences and charges and peptide_ids
+ let result = thread_pool.install(|| {
+ sequences.par_iter().zip(charges.par_iter()).zip(peptide_ids.par_iter()).map(|((&sequence, &charge), &peptide_id)| {
+ generate_precursor_spectrum(sequence, charge, peptide_id)
+ }).collect()
+ });
+ result
+}
+
+// Calculates the isotope distribution for a fragment given the isotope distribution of the fragment, the isotope distribution of the complementary fragment, and the transmitted precursor isotopes
+// implemented based on OpenMS: "https://github.com/OpenMS/OpenMS/blob/079143800f7ed036a7c68ea6e124fe4f5cfc9569/src/openms/source/CHEMISTRY/ISOTOPEDISTRIBUTION/CoarseIsotopePatternGenerator.cpp#L415"
+pub fn calculate_transmission_dependent_fragment_ion_isotope_distribution(fragment_isotope_dist: &Vec<(f64, f64)>, comp_fragment_isotope_dist: &Vec<(f64, f64)>, precursor_isotopes: &HashSet<usize>, max_isotope: usize) -> Vec<(f64, f64)> {
+
+ if fragment_isotope_dist.is_empty() || comp_fragment_isotope_dist.is_empty() {
+ return Vec::new();
+ }
+
+ let mut r_max = fragment_isotope_dist.len();
+ if max_isotope != 0 && r_max > max_isotope {
+ r_max = max_isotope;
+ }
+
+ let mut result = (0..r_max).map(|i| (fragment_isotope_dist[0].0 + i as f64, 0.0)).collect::<Vec<(f64, f64)>>();
+
+ // Calculation of dependent isotope distribution
+ for (i, &(_mz, intensity)) in fragment_isotope_dist.iter().enumerate().take(r_max) {
+ for &precursor in precursor_isotopes {
+ if precursor >= i && (precursor - i) < comp_fragment_isotope_dist.len() {
+ let comp_intensity = comp_fragment_isotope_dist[precursor - i].1;
+ result[i].1 += comp_intensity;
+ }
+ }
+ result[i].1 *= intensity;
+ }
+
+ result
+}
+1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +50 +51 +52 +53 +54 +55 +56 +57 +58 +59 +60 +61 +62 +63 +64 +65 +66 +67 +68 +69 +70 +71 +72 +73 +74 +75 +76 +77 +78 +79 +80 +81 +82 +83 +84 +85 +86 +87 +88 +89 +90 +91 +92 +93 +94 +95 +96 +97 +98 +99 +100 +101 +102 +103 +104 +105 +106 +107 +108 +109 +110 +111 +112 +113 +114 +115 +116 +117 +118 +119 +120 +121 +122 +123 +124 +125 +126 +127 +128 +129 +130 +131 +132 +133 +134 +135 +136 +137 +138 +139 +140 +141 +142 +143 +144 +145 +146 +147 +148 +149 +150 +151 +152 +153 +154 +155 +156 +157 +158 +159 +160 +161 +162 +163 +164 +165 +166 +167 +168 +169 +170 +171 +172 +173 +174 +175 +176 +177 +178 +179 +180 +181 +182 +183 +184 +185 +186 +187 +188 +189 +190 +191 +192 +193 +194 +195 +196 +197 +198 +199 +200 +201 +202 +203 +204 +205 +206 +207 +208 +209 +210 +211 +212 +213 +214 +215 +216 +217 +218 +219 +220 +221 +222 +223 +224 +225 +226 +227 +228 +229 +230 +231 +232 +233 +234 +235 +236 +237 +238 +239 +240 +241 +242 +243 +244 +245 +246 +247 +248 +249 +250 +251 +252 +253 +254 +255 +256 +257 +258 +259 +260 +261 +262 +263 +264 +265 +266 +267 +268 +269 +270 +271 +272 +273 +274 +275 +276 +277 +278 +279 +280 +281 +282 +283 +284 +285 +286 +287 +288 +289 +290 +291 +292 +293 +294 +295 +296 +297 +298 +299 +300 +301 +302 +303 +304 +305 +306 +307 +308 +309 +310 +311 +312 +313 +314 +315 +316 +317 +318 +319 +320 +321 +322 +323 +324 +325 +326 +327 +328 +329 +330 +331 +332 +333 +334 +335 +336 +337 +338 +339 +340 +341 +342 +343 +344 +345 +346 +347 +348 +349 +350 +351 +352 +353 +354 +355 +356 +357 +358 +359 +360 +361 +362 +363 +364 +365 +366 +367 +368 +369 +370 +371 +372 +373
use std::collections::HashMap;
+use rayon::prelude::*;
+use rayon::ThreadPoolBuilder;
+use regex::Regex;
+use statrs::distribution::{Binomial, Discrete};
+use crate::chemistry::amino_acid::{amino_acid_composition, amino_acid_masses};
+use crate::chemistry::constants::{MASS_CO, MASS_NH3, MASS_PROTON, MASS_WATER};
+use crate::chemistry::formulas::calculate_mz;
+use crate::chemistry::unimod::{modification_atomic_composition, unimod_modifications_mass_numerical};
+use crate::chemistry::utility::{find_unimod_patterns, unimod_sequence_to_tokens};
+use crate::data::peptide::{FragmentType, PeptideProductIon, PeptideSequence};
+
+/// calculate the monoisotopic mass of a peptide sequence
+///
+/// Arguments:
+///
+/// * `sequence` - peptide sequence
+///
+/// Returns:
+///
+/// * `mass` - monoisotopic mass of the peptide
+///
+/// # Examples
+///
+/// ```
+/// use mscore::algorithm::peptide::calculate_peptide_mono_isotopic_mass;
+/// use mscore::data::peptide::PeptideSequence;
+///
+/// let peptide_sequence = PeptideSequence::new("PEPTIDEH".to_string(), Some(1));
+/// let mass = calculate_peptide_mono_isotopic_mass(&peptide_sequence);
+/// let mass_quantized = (mass * 1e6).round() as i32;
+/// assert_eq!(mass_quantized, 936418877);
+/// ```
+pub fn calculate_peptide_mono_isotopic_mass(peptide_sequence: &PeptideSequence) -> f64 {
+ let amino_acid_masses = amino_acid_masses();
+ let modifications_mz_numerical = unimod_modifications_mass_numerical();
+ let pattern = Regex::new(r"\[UNIMOD:(\d+)]").unwrap();
+
+ let sequence = peptide_sequence.sequence.as_str();
+
+ // Find all occurrences of the pattern
+ let modifications: Vec<u32> = pattern
+ .find_iter(sequence)
+ .filter_map(|mat| mat.as_str()[8..mat.as_str().len() - 1].parse().ok())
+ .collect();
+
+ // Remove the modifications from the sequence
+ let sequence = pattern.replace_all(sequence, "");
+
+ // Count occurrences of each amino acid
+ let mut aa_counts = HashMap::new();
+ for char in sequence.chars() {
+ *aa_counts.entry(char).or_insert(0) += 1;
+ }
+
+ // Mass of amino acids and modifications
+ let mass_sequence: f64 = aa_counts.iter().map(|(aa, &count)| amino_acid_masses.get(&aa.to_string()[..]).unwrap_or(&0.0) * count as f64).sum();
+ let mass_modifications: f64 = modifications.iter().map(|&mod_id| modifications_mz_numerical.get(&mod_id).unwrap_or(&0.0)).sum();
+
+ mass_sequence + mass_modifications + MASS_WATER
+}
+
+/// calculate the monoisotopic mass of a peptide product ion for a given fragment type
+///
+/// Arguments:
+///
+/// * `sequence` - peptide sequence
+/// * `kind` - fragment type
+///
+/// Returns:
+///
+/// * `mass` - monoisotopic mass of the peptide
+///
+/// # Examples
+/// ```
+/// use mscore::algorithm::peptide::calculate_peptide_product_ion_mono_isotopic_mass;
+/// use mscore::data::peptide::FragmentType;
+/// let sequence = "PEPTIDEH";
+/// let mass = calculate_peptide_product_ion_mono_isotopic_mass(sequence, FragmentType::Y);
+/// assert_eq!(mass, 936.4188766862999);
+/// ```
+pub fn calculate_peptide_product_ion_mono_isotopic_mass(sequence: &str, kind: FragmentType) -> f64 {
+
+ let (sequence, modifications) = find_unimod_patterns(sequence);
+
+ // Return mz of empty sequence
+ if sequence.is_empty() {
+ return 0.0;
+ }
+
+ let amino_acid_masses = amino_acid_masses();
+
+ // Add up raw amino acid masses and potential modifications
+ let mass_sequence: f64 = sequence.chars()
+ .map(|aa| amino_acid_masses.get(&aa.to_string()[..]).unwrap_or(&0.0))
+ .sum();
+
+ let mass_modifications: f64 = modifications.iter().sum();
+
+ // Calculate total mass
+ let mass = mass_sequence + mass_modifications + MASS_WATER;
+
+ let mass = match kind {
+ FragmentType::A => mass - MASS_CO - MASS_WATER,
+ FragmentType::B => mass - MASS_WATER,
+ FragmentType::C => mass + MASS_NH3 - MASS_WATER,
+ FragmentType::X => mass + MASS_CO - 2.0 * MASS_PROTON,
+ FragmentType::Y => mass,
+ FragmentType::Z => mass - MASS_NH3,
+ };
+
+ mass
+}
+
+/// calculate the monoisotopic m/z of a peptide product ion for a given fragment type and charge
+///
+/// Arguments:
+///
+/// * `sequence` - peptide sequence
+/// * `kind` - fragment type
+/// * `charge` - charge
+///
+/// Returns:
+///
+/// * `mz` - monoisotopic mass of the peptide
+///
+/// # Examples
+/// ```
+/// use mscore::algorithm::peptide::calculate_product_ion_mz;
+/// use mscore::chemistry::constants::MASS_PROTON;
+/// use mscore::data::peptide::FragmentType;
+/// let sequence = "PEPTIDEH";
+/// let mz = calculate_product_ion_mz(sequence, FragmentType::Y, Some(1));
+/// assert_eq!(mz, 936.4188766862999 + MASS_PROTON);
+/// ```
+pub fn calculate_product_ion_mz(sequence: &str, kind: FragmentType, charge: Option<i32>) -> f64 {
+ let mass = calculate_peptide_product_ion_mono_isotopic_mass(sequence, kind);
+ calculate_mz(mass, charge.unwrap_or(1))
+}
+
+/// get a count dictionary of the amino acid composition of a peptide sequence
+///
+/// Arguments:
+///
+/// * `sequence` - peptide sequence
+///
+/// Returns:
+///
+/// * `composition` - a dictionary of amino acid composition
+///
+/// # Examples
+///
+/// ```
+/// use mscore::algorithm::peptide::calculate_amino_acid_composition;
+///
+/// let sequence = "PEPTIDEH";
+/// let composition = calculate_amino_acid_composition(sequence);
+/// assert_eq!(composition.get("P"), Some(&2));
+/// assert_eq!(composition.get("E"), Some(&2));
+/// assert_eq!(composition.get("T"), Some(&1));
+/// assert_eq!(composition.get("I"), Some(&1));
+/// assert_eq!(composition.get("D"), Some(&1));
+/// assert_eq!(composition.get("H"), Some(&1));
+/// ```
+pub fn calculate_amino_acid_composition(sequence: &str) -> HashMap<String, i32> {
+ let mut composition = HashMap::new();
+ for char in sequence.chars() {
+ *composition.entry(char.to_string()).or_insert(0) += 1;
+ }
+ composition
+}
+
+/// calculate the atomic composition of a peptide sequence
+pub fn peptide_sequence_to_atomic_composition(peptide_sequence: &PeptideSequence) -> HashMap<&'static str, i32> {
+
+ let token_sequence = unimod_sequence_to_tokens(peptide_sequence.sequence.as_str(), false);
+ let mut collection: HashMap<&'static str, i32> = HashMap::new();
+
+ // Assuming amino_acid_composition and modification_composition return appropriate mappings...
+ let aa_compositions = amino_acid_composition();
+ let mod_compositions = modification_atomic_composition();
+
+ // No need for conversion to HashMap<String, ...> as long as you're directly accessing
+ // the HashMap provided by modification_composition() if it uses String keys.
+ for token in token_sequence {
+ if token.len() == 1 {
+ let char = token.chars().next().unwrap();
+ if let Some(composition) = aa_compositions.get(&char) {
+ for (key, value) in composition.iter() {
+ *collection.entry(key).or_insert(0) += *value;
+ }
+ }
+ } else {
+ // Directly use &token without .as_str() conversion
+ if let Some(composition) = mod_compositions.get(&token) {
+ for (key, value) in composition.iter() {
+ *collection.entry(key).or_insert(0) += *value;
+ }
+ }
+ }
+ }
+
+ // Add water
+ *collection.entry("H").or_insert(0) += 2; //
+ *collection.entry("O").or_insert(0) += 1; //
+
+ collection
+}
+
+/// calculate the atomic composition of a product ion
+///
+/// Arguments:
+///
+/// * `product_ion` - a PeptideProductIon instance
+///
+/// Returns:
+///
+/// * `Vec<(&str, i32)>` - a vector of tuples representing the atomic composition of the product ion
+pub fn atomic_product_ion_composition(product_ion: &PeptideProductIon) -> Vec<(&str, i32)> {
+
+ let mut composition = peptide_sequence_to_atomic_composition(&product_ion.ion.sequence);
+
+ match product_ion.kind {
+ FragmentType::A => {
+ // A: peptide_mass - CO - Water
+ *composition.entry("H").or_insert(0) -= 2;
+ *composition.entry("O").or_insert(0) -= 2;
+ *composition.entry("C").or_insert(0) -= 1;
+ },
+ FragmentType::B => {
+ // B: peptide_mass - Water
+ *composition.entry("H").or_insert(0) -= 2;
+ *composition.entry("O").or_insert(0) -= 1;
+ },
+ FragmentType::C => {
+ // C: peptide_mass + NH3 - Water
+ *composition.entry("H").or_insert(0) += 1;
+ *composition.entry("N").or_insert(0) += 1;
+ *composition.entry("O").or_insert(0) -= 1;
+ },
+ FragmentType::X => {
+ // X: peptide_mass + CO
+ *composition.entry("C").or_insert(0) += 1; // Add 1 for CO
+ *composition.entry("O").or_insert(0) += 1; // Add 1 for CO
+ *composition.entry("H").or_insert(0) -= 2; // Subtract 2 for 2 protons
+ },
+ FragmentType::Y => {
+ ()
+ },
+ FragmentType::Z => {
+ *composition.entry("H").or_insert(0) -= 3;
+ *composition.entry("N").or_insert(0) -= 1;
+ },
+ }
+
+ composition.iter().map(|(k, v)| (*k, *v)).collect()
+}
+
+/// calculate the atomic composition of a peptide product ion series
+/// Arguments:
+///
+/// * `product_ions` - a vector of PeptideProductIon instances
+/// * `num_threads` - an usize representing the number of threads to use
+/// Returns:
+///
+/// * `Vec<Vec<(String, i32)>>` - a vector of vectors of tuples representing the atomic composition of each product ion
+///
+pub fn fragments_to_composition(product_ions: Vec<PeptideProductIon>, num_threads: usize) -> Vec<Vec<(String, i32)>> {
+ let thread_pool = ThreadPoolBuilder::new().num_threads(num_threads).build().unwrap();
+ let result = thread_pool.install(|| {
+ product_ions.par_iter().map(|ion| atomic_product_ion_composition(ion)).map(|composition| {
+ composition.iter().map(|(k, v)| (k.to_string(), *v)).collect()
+ }).collect()
+ });
+ result
+}
+
+/// count the number of protonizable sites in a peptide sequence
+///
+/// # Arguments
+///
+/// * `sequence` - a string representing the peptide sequence
+///
+/// # Returns
+///
+/// * `usize` - the number of protonizable sites in the peptide sequence
+///
+/// # Example
+///
+/// ```
+/// use mscore::algorithm::peptide::get_num_protonizable_sites;
+///
+/// let sequence = "PEPTIDEH";
+/// let num_protonizable_sites = get_num_protonizable_sites(sequence);
+/// assert_eq!(num_protonizable_sites, 2);
+/// ```
+pub fn get_num_protonizable_sites(sequence: &str) -> usize {
+ let mut sites = 1; // n-terminus
+ for s in sequence.chars() {
+ match s {
+ 'H' | 'R' | 'K' => sites += 1,
+ _ => {}
+ }
+ }
+ sites
+}
+
+/// simulate the charge state distribution for a peptide sequence
+///
+/// # Arguments
+///
+/// * `sequence` - a string representing the peptide sequence
+/// * `max_charge` - an optional usize representing the maximum charge state to simulate
+/// * `charged_probability` - an optional f64 representing the probability of a site being charged
+///
+/// # Returns
+///
+/// * `Vec<f64>` - a vector of f64 representing the probability of each charge state
+///
+/// # Example
+///
+/// ```
+/// use mscore::algorithm::peptide::simulate_charge_state_for_sequence;
+///
+/// let sequence = "PEPTIDEH";
+/// let charge_state_probs = simulate_charge_state_for_sequence(sequence, None, None);
+/// assert_eq!(charge_state_probs, vec![0.25, 0.5, 0.25, 0.0, 0.0]);
+pub fn simulate_charge_state_for_sequence(sequence: &str, max_charge: Option<usize>, charged_probability: Option<f64>) -> Vec<f64> {
+ let charged_prob = charged_probability.unwrap_or(0.5);
+ let max_charge = max_charge.unwrap_or(5);
+ let num_protonizable_sites = get_num_protonizable_sites(sequence);
+ let mut charge_state_probs = vec![0.0; max_charge];
+
+ for charge in 0..max_charge {
+ let binom = Binomial::new(charged_prob, num_protonizable_sites as u64).unwrap();
+ let prob = binom.pmf(charge as u64);
+
+ charge_state_probs[charge] = prob;
+ }
+
+ charge_state_probs
+}
+
+/// simulate the charge state distribution for a list of peptide sequences
+///
+/// # Arguments
+///
+/// * `sequences` - a vector of strings representing the peptide sequences
+/// * `num_threads` - an usize representing the number of threads to use
+/// * `max_charge` - an optional usize representing the maximum charge state to simulate
+/// * `charged_probability` - an optional f64 representing the probability of a site being charged
+///
+/// # Returns
+///
+/// * `Vec<Vec<f64>>` - a vector of vectors of f64 representing the probability of each charge state for each sequence
+///
+/// # Example
+///
+/// ```
+/// use mscore::algorithm::peptide::simulate_charge_states_for_sequences;
+///
+/// let sequences = vec!["PEPTIDEH", "PEPTIDEH", "PEPTIDEH"];
+/// let charge_state_probs = simulate_charge_states_for_sequences(sequences, 4, None, None);
+/// assert_eq!(charge_state_probs, vec![vec![0.25, 0.5, 0.25, 0.0, 0.0], vec![0.25, 0.5, 0.25, 0.0, 0.0], vec![0.25, 0.5, 0.25, 0.0, 0.0]]);
+/// ```
+pub fn simulate_charge_states_for_sequences(sequences: Vec<&str>, num_threads: usize, max_charge: Option<usize>, charged_probability: Option<f64>) -> Vec<Vec<f64>> {
+ let pool = ThreadPoolBuilder::new().num_threads(num_threads).build().unwrap();
+ pool.install(|| {
+ sequences.par_iter()
+ .map(|sequence| simulate_charge_state_for_sequence(sequence, max_charge, charged_probability))
+ .collect()
+ })
+}
+1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +50 +51 +52 +53 +54 +55 +56 +57 +58 +59 +60 +61 +62 +63 +64 +65 +66 +67 +68 +69 +70 +71 +72 +73 +74 +75 +76 +77 +78 +79 +80 +81 +82 +83 +84 +85 +86 +87 +88 +89 +90 +91 +92 +93 +94 +95 +96 +97 +98 +99 +100 +101 +102 +103 +104 +105 +106 +107 +108 +109 +110 +111 +112 +113 +114 +115 +116 +117 +118 +119 +120 +121 +122 +123 +124 +125 +126 +127 +128 +129 +130 +131 +132 +133 +134 +135 +136 +137 +138 +139 +140 +141 +142 +143 +144 +145 +146 +147 +148 +149 +150 +151 +152 +153 +154 +155 +156 +157 +158 +159 +160 +161 +162 +163 +164 +165 +166 +167 +168 +169 +170 +171 +172 +173 +174 +175 +176 +177 +178 +179 +180 +181 +182 +183 +184 +185 +186 +187 +188 +189 +190 +191 +192 +193 +194 +195 +196 +197 +198 +199 +200 +201 +202 +203 +204 +205 +206 +207 +208 +209 +210 +211 +212 +213 +214 +215 +216 +217 +218 +219 +220 +221 +222 +223 +224 +225 +226 +227 +228 +229 +230 +231 +232 +233 +234 +235
use std::collections::HashMap;
+use std::f64::consts::SQRT_2;
+use rayon::prelude::*;
+use rayon::ThreadPoolBuilder;
+
+use std::collections::VecDeque;
+
+fn gauss_kronrod(f: &dyn Fn(f64) -> f64, a: f64, b: f64) -> (f64, f64) {
+ let nodes = [
+ 0.0, 0.20778495500789848, 0.40584515137739717, 0.58608723546769113,
+ 0.74153118559939444, 0.86486442335976907, 0.94910791234275852, 0.99145537112081264,
+ ];
+ let weights_gauss = [
+ 0.41795918367346939, 0.38183005050511894, 0.27970539148927667, 0.12948496616886969,
+ ];
+ let weights_kronrod = [
+ 0.20948214108472783, 0.20443294007529889, 0.19035057806478541, 0.16900472663926790,
+ 0.14065325971552592, 0.10479001032225018, 0.06309209262997855, 0.02293532201052922,
+ ];
+
+ let c1 = (b - a) / 2.0;
+ let c2 = (b + a) / 2.0;
+
+ let mut integral_gauss = 0.0;
+ let mut integral_kronrod = 0.0;
+
+ for i in 0..4 {
+ let x = c1 * nodes[i] + c2;
+ integral_gauss += weights_gauss[i] * (f(x) + f(2.0 * c2 - x));
+ }
+
+ for i in 0..8 {
+ let x = c1 * nodes[i] + c2;
+ integral_kronrod += weights_kronrod[i] * (f(x) + f(2.0 * c2 - x));
+ }
+
+ integral_gauss *= c1;
+ integral_kronrod *= c1;
+
+ (integral_kronrod, (integral_kronrod - integral_gauss).abs())
+}
+
+pub fn adaptive_integration(f: &dyn Fn(f64) -> f64, a: f64, b: f64, epsabs: f64, epsrel: f64) -> (f64, f64) {
+ let mut intervals = VecDeque::new();
+ intervals.push_back((a, b));
+
+ let mut result = 0.0;
+ let mut total_error = 0.0;
+
+ while let Some((a, b)) = intervals.pop_front() {
+ let (integral, error) = gauss_kronrod(f, a, b);
+ if error < epsabs || error < epsrel * integral.abs() {
+ result += integral;
+ total_error += error;
+ } else {
+ let mid = (a + b) / 2.0;
+ intervals.push_back((a, mid));
+ intervals.push_back((mid, b));
+ }
+ }
+
+ (result, total_error)
+}
+
+
+
+
+// Numerical integration using the trapezoidal rule
+fn integrate<F>(f: F, a: f64, b: f64, n: usize) -> f64
+ where
+ F: Fn(f64) -> f64,
+{
+ let dx = (b - a) / n as f64;
+ let mut sum = 0.0;
+ for i in 0..n {
+ let x = a + i as f64 * dx;
+ sum += f(x);
+ }
+ sum * dx
+}
+
+// Complementary error function (erfc)
+fn erfc(x: f64) -> f64 {
+ 1.0 - erf(x)
+}
+
+// Error function (erf)
+fn erf(x: f64) -> f64 {
+ let t = 1.0 / (1.0 + 0.5 * x.abs());
+ let tau = t * (-x * x - 1.26551223 + t * (1.00002368 +
+ t * (0.37409196 + t * (0.09678418 + t * (-0.18628806 +
+ t * (0.27886807 + t * (-1.13520398 + t * (1.48851587 +
+ t * (-0.82215223 + t * 0.17087277)))))))))
+ .exp();
+ if x >= 0.0 {
+ 1.0 - tau
+ } else {
+ tau - 1.0
+ }
+}
+
+// Exponentially modified Gaussian function
+fn emg(x: f64, mu: f64, sigma: f64, lambda: f64) -> f64 {
+ let part1 = lambda / 2.0 * (-lambda * (x - mu) + lambda * lambda * sigma * sigma / 2.0).exp();
+ let part2 = erfc((mu + lambda * sigma * sigma - x) / (sigma * 2.0_f64.sqrt()));
+ part1 * part2
+}
+
+pub fn custom_cdf_normal(x: f64, mean: f64, std_dev: f64) -> f64 {
+ let z = (x - mean) / std_dev;
+ 0.5 * (1.0 + erf(z / SQRT_2))
+}
+
+pub fn accumulated_intensity_cdf_normal(sample_start: f64, sample_end: f64, mean: f64, std_dev: f64) -> f64 {
+ let cdf_start = custom_cdf_normal(sample_start, mean, std_dev);
+ let cdf_end = custom_cdf_normal(sample_end, mean, std_dev);
+ cdf_end - cdf_start
+}
+
+pub fn calculate_bounds_normal(mean: f64, std: f64, z_score: f64) -> (f64, f64) {
+ (mean - z_score * std, mean + z_score * std)
+}
+
+pub fn emg_function(x: f64, mu: f64, sigma: f64, lambda: f64) -> f64 {
+ let prefactor = lambda / 2.0 * ((lambda / 2.0) * (2.0 * mu + lambda * sigma.powi(2) - 2.0 * x)).exp();
+ let erfc_part = erfc((mu + lambda * sigma.powi(2) - x) / (SQRT_2 * sigma));
+ prefactor * erfc_part
+}
+
+pub fn emg_cdf_range(lower_limit: f64, upper_limit: f64, mu: f64, sigma: f64, lambda: f64, n_steps: Option<usize>) -> f64 {
+ let n_steps = n_steps.unwrap_or(1000);
+ integrate(|x| emg(x, mu, sigma, lambda), lower_limit, upper_limit, n_steps)
+}
+
+pub fn calculate_bounds_emg(mu: f64, sigma: f64, lambda: f64, step_size: f64, target: f64, lower_start: f64, upper_start: f64, n_steps: Option<usize>) -> (f64, f64) {
+ assert!(0.0 <= target && target <= 1.0, "target must be in [0, 1]");
+
+ let lower_initial = mu - lower_start * sigma - 2.0;
+ let upper_initial = mu + upper_start * sigma;
+
+ let steps = ((upper_initial - lower_initial) / step_size).round() as usize;
+ let search_space: Vec<f64> = (0..=steps).map(|i| lower_initial + i as f64 * step_size).collect();
+
+ let calc_cdf = |low: usize, high: usize| -> f64 {
+ emg_cdf_range(search_space[low], search_space[high], mu, sigma, lambda, n_steps)
+ };
+
+ // Binary search for cutoff values
+ let (mut low, mut high) = (0, steps);
+ while low < high {
+ let mid = low + (high - low) / 2;
+ if calc_cdf(0, mid) < target {
+ low = mid + 1;
+ } else {
+ high = mid;
+ }
+ }
+ let upper_cutoff_index = low;
+
+ low = 0;
+ high = upper_cutoff_index;
+ while low < high {
+ let mid = high - (high - low) / 2;
+ let prob_mid_to_upper = calc_cdf(mid, upper_cutoff_index);
+
+ if prob_mid_to_upper < target {
+ high = mid - 1;
+ } else {
+ low = mid;
+ }
+ }
+ let lower_cutoff_index = high;
+
+ (search_space[lower_cutoff_index], search_space[upper_cutoff_index])
+}
+
+pub fn calculate_frame_occurrence_emg(retention_times: &[f64], rt: f64, sigma: f64, lambda_: f64, target_p: f64, step_size: f64, n_steps: Option<usize>) -> Vec<i32> {
+ let (rt_min, rt_max) = calculate_bounds_emg(rt, sigma, lambda_, step_size, target_p, 20.0, 60.0, n_steps);
+
+ // Finding the frame closest to rt_min
+ let first_frame = retention_times.iter()
+ .enumerate()
+ .min_by(|(_, &a), (_, &b)| (a - rt_min).abs().partial_cmp(&(b - rt_min).abs()).unwrap())
+ .map(|(idx, _)| idx + 1) // Rust is zero-indexed, so +1 to match Python's 1-indexing
+ .unwrap_or(0); // Fallback in case of an empty slice
+
+ // Finding the frame closest to rt_max
+ let last_frame = retention_times.iter()
+ .enumerate()
+ .min_by(|(_, &a), (_, &b)| (a - rt_max).abs().partial_cmp(&(b - rt_max).abs()).unwrap())
+ .map(|(idx, _)| idx + 1) // Same adjustment for 1-indexing
+ .unwrap_or(0); // Fallback
+
+ // Generating the range of frames
+ (first_frame..=last_frame).map(|x| x as i32).collect()
+}
+
+pub fn calculate_frame_abundance_emg(time_map: &HashMap<i32, f64>, occurrences: &[i32], rt: f64, sigma: f64, lambda_: f64, rt_cycle_length: f64, n_steps: Option<usize>) -> Vec<f64> {
+ let mut frame_abundance = Vec::new();
+
+ for &occurrence in occurrences {
+ if let Some(&time) = time_map.get(&occurrence) {
+ let start = time - rt_cycle_length;
+ let i = emg_cdf_range(start, time, rt, sigma, lambda_, n_steps);
+ frame_abundance.push(i);
+ }
+ }
+
+ frame_abundance
+}
+
+// retention_times: &[f64], rt: f64, sigma: f64, lambda_: f64
+pub fn calculate_frame_occurrences_emg_par(retention_times: &[f64], rts: Vec<f64>, sigmas: Vec<f64>, lambdas: Vec<f64>, target_p: f64, step_size: f64, num_threads: usize, n_steps: Option<usize>) -> Vec<Vec<i32>> {
+ let thread_pool = ThreadPoolBuilder::new().num_threads(num_threads).build().unwrap();
+ let result = thread_pool.install(|| {
+ rts.into_par_iter().zip(sigmas.into_par_iter()).zip(lambdas.into_par_iter())
+ .map(|((rt, sigma), lambda)| {
+ calculate_frame_occurrence_emg(retention_times, rt, sigma, lambda, target_p, step_size, n_steps)
+ })
+ .collect()
+ });
+ result
+}
+
+pub fn calculate_frame_abundances_emg_par(time_map: &HashMap<i32, f64>, occurrences: Vec<Vec<i32>>, rts: Vec<f64>, sigmas: Vec<f64>, lambdas: Vec<f64>, rt_cycle_length: f64, num_threads: usize, n_steps: Option<usize>) -> Vec<Vec<f64>> {
+ let thread_pool = ThreadPoolBuilder::new().num_threads(num_threads).build().unwrap();
+ let result = thread_pool.install(|| {
+ occurrences.into_par_iter().zip(rts.into_par_iter()).zip(sigmas.into_par_iter()).zip(lambdas.into_par_iter())
+ .map(|(((occurrences, rt), sigma), lambda)| {
+ calculate_frame_abundance_emg(time_map, &occurrences, rt, sigma, lambda, rt_cycle_length, n_steps)
+ })
+ .collect()
+ });
+ result
+}
+
+1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +50 +51 +52 +53 +54 +55 +56 +57 +58 +59 +60 +61 +62 +63 +64 +65 +66 +67 +68 +69 +70 +71 +72 +73 +74 +75 +76 +77 +78 +79 +80 +81 +82 +83 +84 +85 +86 +87 +88 +89 +90 +91 +92 +93 +94 +95 +96 +97 +98 +99 +100 +101 +102 +103 +104 +105 +106 +107 +108 +109 +110 +111 +112 +113 +114 +115 +116 +117 +118 +119 +120 +121 +122 +123 +124 +125 +126 +127 +128 +129 +130 +131 +132 +133 +134 +135 +136 +137 +138
use std::collections::HashMap;
+
+/// Amino Acids
+///
+/// # Arguments
+///
+/// None
+///
+/// # Returns
+///
+/// * `HashMap<&'static str, &'static str>` - a map of amino acid names to their one-letter codes
+///
+/// # Example
+///
+/// ```
+/// use mscore::chemistry::amino_acid::amino_acids;
+///
+/// let amino_acids = amino_acids();
+/// assert_eq!(amino_acids.get("Lysine"), Some(&"K"));
+/// ```
+pub fn amino_acids() -> HashMap<&'static str, &'static str> {
+ let mut map = HashMap::new();
+ map.insert("Lysine", "K");
+ map.insert("Alanine", "A");
+ map.insert("Glycine", "G");
+ map.insert("Valine", "V");
+ map.insert("Tyrosine", "Y");
+ map.insert("Arginine", "R");
+ map.insert("Glutamic Acid", "E");
+ map.insert("Phenylalanine", "F");
+ map.insert("Tryptophan", "W");
+ map.insert("Leucine", "L");
+ map.insert("Threonine", "T");
+ map.insert("Cysteine", "C");
+ map.insert("Serine", "S");
+ map.insert("Glutamine", "Q");
+ map.insert("Methionine", "M");
+ map.insert("Isoleucine", "I");
+ map.insert("Asparagine", "N");
+ map.insert("Proline", "P");
+ map.insert("Histidine", "H");
+ map.insert("Aspartic Acid", "D");
+ map.insert("Selenocysteine", "U");
+ map
+}
+
+
+/// Amino Acid Masses
+///
+/// # Arguments
+///
+/// None
+///
+/// # Returns
+///
+/// * `HashMap<&'static str, f64>` - a map of amino acid one-letter codes to their monoisotopic masses
+///
+/// # Example
+///
+/// ```
+/// use mscore::chemistry::amino_acid::amino_acid_masses;
+///
+/// let amino_acid_masses = amino_acid_masses();
+/// assert_eq!(amino_acid_masses.get("K"), Some(&128.094963));
+/// ```
+pub fn amino_acid_masses() -> HashMap<&'static str, f64> {
+ let mut map = HashMap::new();
+ map.insert("A", 71.037114);
+ map.insert("R", 156.101111);
+ map.insert("N", 114.042927);
+ map.insert("D", 115.026943);
+ map.insert("C", 103.009185);
+ map.insert("E", 129.042593);
+ map.insert("Q", 128.058578);
+ map.insert("G", 57.021464);
+ map.insert("H", 137.058912);
+ map.insert("I", 113.084064);
+ map.insert("L", 113.084064);
+ map.insert("K", 128.094963);
+ map.insert("M", 131.040485);
+ map.insert("F", 147.068414);
+ map.insert("P", 97.052764);
+ map.insert("S", 87.032028);
+ map.insert("T", 101.047679);
+ map.insert("W", 186.079313);
+ map.insert("Y", 163.063329);
+ map.insert("V", 99.068414);
+ map.insert("U", 168.053);
+ map
+}
+
+/// Amino Acid Composition
+///
+/// # Arguments
+///
+/// None
+///
+/// # Returns
+///
+/// * `HashMap<char, HashMap<&'static str, i32>>` - a map of amino acid one-letter codes to their atomic compositions
+///
+/// # Example
+///
+/// ```
+/// use mscore::chemistry::amino_acid::amino_acid_composition;
+/// use std::collections::HashMap;
+///
+/// let amino_acid_composition = amino_acid_composition();
+/// assert_eq!(amino_acid_composition.get(&'K'), Some(&HashMap::from([("C", 6), ("H", 12), ("N", 2), ("O", 1)])));
+/// ```
+pub fn amino_acid_composition() -> HashMap<char, HashMap<&'static str, i32>> {
+
+ let mut composition: HashMap<char, HashMap<&'static str, i32>> = HashMap::new();
+
+ composition.insert('G', HashMap::from([("C", 2), ("H", 3), ("N", 1), ("O", 1)])); // Glycine
+ composition.insert('A', HashMap::from([("C", 3), ("H", 5), ("N", 1), ("O", 1)])); // Alanine
+ composition.insert('S', HashMap::from([("C", 3), ("H", 5), ("N", 1), ("O", 2)])); // Serine
+ composition.insert('P', HashMap::from([("C", 5), ("H", 7), ("N", 1), ("O", 1)])); // Proline
+ composition.insert('V', HashMap::from([("C", 5), ("H", 9), ("N", 1), ("O", 1)])); // Valine
+ composition.insert('T', HashMap::from([("C", 4), ("H", 7), ("N", 1), ("O", 2)])); // Threonine
+ composition.insert('C', HashMap::from([("C", 3), ("H", 5), ("N", 1), ("O", 1), ("S", 1)])); // Cysteine
+ composition.insert('I', HashMap::from([("C", 6), ("H", 11), ("N", 1), ("O", 1)])); // Isoleucine
+ composition.insert('L', HashMap::from([("C", 6), ("H", 11), ("N", 1), ("O", 1)])); // Leucine
+ composition.insert('N', HashMap::from([("C", 4), ("H", 6), ("N", 2), ("O", 2)])); // Asparagine
+ composition.insert('D', HashMap::from([("C", 4), ("H", 5), ("N", 1), ("O", 3)])); // Aspartic Acid
+ composition.insert('Q', HashMap::from([("C", 5), ("H", 8), ("N", 2), ("O", 2)])); // Glutamine
+ composition.insert('K', HashMap::from([("C", 6), ("H", 12), ("N", 2), ("O", 1)])); // Lysine
+ composition.insert('E', HashMap::from([("C", 5), ("H", 7), ("N", 1), ("O", 3)])); // Glutamic Acid
+ composition.insert('M', HashMap::from([("C", 5), ("H", 9), ("N", 1), ("O", 1), ("S", 1)])); // Methionine
+ composition.insert('H', HashMap::from([("C", 6), ("H", 7), ("N", 3), ("O", 1)])); // Histidine
+ composition.insert('F', HashMap::from([("C", 9), ("H", 9), ("N", 1), ("O", 1)])); // Phenylalanine
+ composition.insert('R', HashMap::from([("C", 6), ("H", 12), ("N", 4), ("O", 1)])); // Arginine
+ composition.insert('Y', HashMap::from([("C", 9), ("H", 9), ("N", 1), ("O", 2)])); // Tyrosine
+ composition.insert('W', HashMap::from([("C", 11), ("H", 10), ("N", 2), ("O", 1)])); // Tryptophan
+ composition.insert('U', HashMap::from([("C", 3), ("H", 5), ("N", 1), ("O", 1), ("Se", 1)])); // Selenocysteine
+
+ composition
+}
// Purpose: To store constants that are used in the program
+pub const MASS_PROTON: f64 = 1.007276466621; // Unified atomic mass unit
+pub const MASS_NEUTRON: f64 = 1.00866491595; // Unified atomic mass unit
+pub const MASS_ELECTRON: f64 = 0.00054857990946; // Unified atomic mass unit
+pub const MASS_WATER: f64 = 18.0105646863; // Unified atomic mass unit
+pub const MASS_CO: f64 = 27.994915; // Mass of CO (carbon monoxide)
+pub const MASS_NH3: f64 = 17.026549; // Mass of NH3 (ammonia)
+
+// IUPAC Standards
+pub const STANDARD_TEMPERATURE: f64 = 273.15; // Kelvin
+pub const STANDARD_PRESSURE: f64 = 1e5; // Pascal
+pub const ELEMENTARY_CHARGE: f64 = 1.602176634e-19; // Coulombs
+pub const K_BOLTZMANN: f64 = 1.380649e-23; // J/K
+pub const AVOGADRO: f64 = 6.02214076e23; // mol^-1
+1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +50 +51 +52 +53 +54 +55 +56 +57 +58 +59 +60 +61 +62 +63 +64 +65 +66 +67 +68 +69 +70 +71 +72 +73 +74 +75 +76 +77 +78 +79 +80 +81 +82 +83 +84 +85 +86 +87 +88 +89 +90 +91 +92 +93 +94 +95 +96 +97 +98 +99 +100 +101 +102 +103 +104 +105 +106 +107 +108 +109 +110 +111 +112 +113 +114 +115 +116 +117 +118 +119 +120 +121 +122 +123 +124 +125 +126 +127 +128 +129 +130 +131 +132 +133 +134 +135 +136 +137 +138 +139 +140 +141 +142 +143 +144 +145 +146 +147 +148 +149 +150 +151 +152 +153 +154 +155 +156 +157 +158 +159 +160 +161 +162 +163 +164 +165 +166 +167 +168 +169 +170 +171 +172 +173 +174 +175 +176 +177 +178 +179 +180 +181 +182 +183 +184 +185 +186 +187 +188 +189 +190 +191 +192 +193 +194 +195 +196 +197 +198 +199 +200 +201 +202 +203 +204 +205 +206 +207 +208 +209 +210 +211 +212 +213 +214 +215 +216 +217 +218 +219 +220 +221 +222 +223 +224 +225 +226 +227 +228 +229 +230 +231 +232 +233 +234 +235 +236 +237 +238 +239 +240 +241 +242 +243 +244 +245 +246 +247 +248 +249 +250 +251 +252 +253 +254 +255 +256 +257 +258 +259 +260 +261
use std::collections::HashMap;
+
+/// Atomic Weights
+///
+/// # Arguments
+///
+/// None
+///
+/// # Returns
+///
+/// * `HashMap<&'static str, f64>` - a map of atomic symbols to their monoisotopic weights
+///
+/// # Example
+///
+/// ```
+/// use mscore::chemistry::elements::atomic_weights_mono_isotopic;
+///
+/// let atomic_weights = atomic_weights_mono_isotopic();
+/// assert_eq!(atomic_weights.get("H"), Some(&1.00782503223));
+/// ```
+pub fn atomic_weights_mono_isotopic() -> HashMap<&'static str, f64> {
+ let mut map = HashMap::new();
+ map.insert("H", 1.00782503223);
+ map.insert("He", 4.00260325415);
+ map.insert("Li", 7.0160034366);
+ map.insert("Be", 9.012183065);
+ map.insert("B", 11.00930536);
+ map.insert("C", 12.0000000);
+ map.insert("N", 14.00307400443);
+ map.insert("O", 15.99491461957);
+ map.insert("F", 18.99840316273);
+ map.insert("Ne", 19.9924401762);
+ map.insert("Na", 22.9897692820);
+ map.insert("Mg", 23.985041697);
+ map.insert("Al", 26.98153853);
+ map.insert("Si", 27.97692653465);
+ map.insert("P", 30.97376199842);
+ map.insert("S", 31.9720711744);
+ map.insert("Cl", 34.968852682);
+ map.insert("Ar", 39.9623831237);
+ map.insert("K", 38.963706679);
+ map.insert("Ca", 39.96259098);
+ map.insert("Sc", 44.95590828);
+ map.insert("Ti", 47.9479463);
+ map.insert("V", 50.9439595);
+ map.insert("Cr", 51.9405075);
+ map.insert("Mn", 54.9380455);
+ map.insert("Fe", 55.9349375);
+ map.insert("Co", 58.9331955);
+ map.insert("Ni", 57.9353429);
+ map.insert("Cu", 62.9295975);
+ map.insert("Zn", 63.9291422);
+ map.insert("Ga", 68.9255735);
+ map.insert("Ge", 73.9211778);
+ map.insert("As", 74.9215965);
+ map.insert("Se", 79.9165218);
+ map.insert("Br", 78.9183376);
+ map.insert("Kr", 83.911507);
+ map.insert("Rb", 84.9117893);
+ map.insert("Sr", 87.9056125);
+ map.insert("Y", 88.905842);
+ map.insert("Zr", 89.9046977);
+ map.insert("Nb", 92.906373);
+ map.insert("Mo", 97.905404);
+ map.insert("Tc", 98.0);
+ map.insert("Ru", 101.904349);
+ map.insert("Rh", 102.905504);
+ map.insert("Pd", 105.903485);
+ map.insert("Ag", 106.905093);
+ map.insert("Cd", 113.903358);
+ map.insert("In", 114.903878);
+ map.insert("Sn", 119.902199);
+ map.insert("Sb", 120.903818);
+ map.insert("Te", 129.906224);
+ map.insert("I", 126.904473);
+ map.insert("Xe", 131.904155);
+ map.insert("Cs", 132.905447);
+ map.insert("Ba", 137.905247);
+ map.insert("La", 138.906355);
+ map.insert("Ce", 139.905442);
+ map.insert("Pr", 140.907662);
+ map.insert("Nd", 141.907732);
+ map.insert("Pm", 145.0);
+ map.insert("Sm", 151.919728);
+ map.insert("Eu", 152.921225);
+ map.insert("Gd", 157.924103);
+ map.insert("Tb", 158.925346);
+ map.insert("Dy", 163.929171);
+ map.insert("Ho", 164.930319);
+ map.insert("Er", 165.930290);
+ map.insert("Tm", 168.934211);
+ map.insert("Yb", 173.938859);
+ map.insert("Lu", 174.940770);
+ map.insert("Hf", 179.946550);
+ map.insert("Ta", 180.947992);
+ map.insert("W", 183.950932);
+ map.insert("Re", 186.955744);
+ map.insert("Os", 191.961467);
+ map.insert("Ir", 192.962917);
+ map.insert("Pt", 194.964766);
+ map.insert("Au", 196.966543);
+ map.insert("Hg", 201.970617);
+ map.insert("Tl", 204.974427);
+ map.insert("Pb", 207.976627);
+ map.insert("Bi", 208.980384);
+ map.insert("Po", 209.0);
+ map.insert("At", 210.0);
+ map.insert("Rn", 222.0);
+ map.insert("Fr", 223.0);
+ map.insert("Ra", 226.0);
+ map.insert("Ac", 227.0);
+ map.insert("Th", 232.038054);
+ map.insert("Pa", 231.035882);
+ map.insert("U", 238.050786);
+ map.insert("Np", 237.0);
+ map.insert("Pu", 244.0);
+ map.insert("Am", 243.0);
+ map.insert("Cm", 247.0);
+ map.insert("Bk", 247.0);
+ map.insert("Cf", 251.0);
+ map.insert("Es", 252.0);
+ map.insert("Fm", 257.0);
+ map.insert("Md", 258.0);
+ map.insert("No", 259.0);
+ map.insert("Lr", 262.0);
+ map.insert("Rf", 267.0);
+ map.insert("Db", 270.0);
+ map.insert("Sg", 271.0);
+ map.insert("Bh", 270.0);
+ map.insert("Hs", 277.0);
+ map.insert("Mt", 276.0);
+ map.insert("Ds", 281.0);
+ map.insert("Rg", 280.0);
+ map.insert("Cn", 285.0);
+ map.insert("Nh", 284.0);
+ map.insert("Fl", 289.0);
+ map.insert("Mc", 288.0);
+ map.insert("Lv", 293.0);
+ map.insert("Ts", 294.0);
+ map.insert("Og", 294.0);
+
+ map
+}
+
+/// Isotopic Weights
+///
+/// # Arguments
+///
+/// None
+///
+/// # Returns
+///
+/// * `HashMap<&'static str, Vec<f64>>` - a map of atomic symbols to their isotopic weights
+///
+/// # Example
+///
+/// ```
+/// use mscore::chemistry::elements::atoms_isotopic_weights;
+///
+/// let isotopic_weights = atoms_isotopic_weights();
+/// assert_eq!(isotopic_weights.get("H"), Some(&vec![1.00782503223, 2.01410177812]));
+/// ```
+pub fn atoms_isotopic_weights() -> HashMap<&'static str, Vec<f64>> {
+ let mut map = HashMap::new();
+ map.insert("H", vec![1.00782503223, 2.01410177812]);
+ map.insert("He", vec![4.00260325415]);
+ map.insert("Li", vec![7.0160034366]);
+ map.insert("Be", vec![9.012183065]);
+ map.insert("B", vec![11.00930536]);
+ map.insert("C", vec![12.0000000, 13.00335483507]);
+ map.insert("N", vec![14.00307400443, 15.00010889888]);
+ map.insert("O", vec![15.99491461957, 16.99913175650, 17.99915961286]);
+ map.insert("F", vec![18.99840316273]);
+ map.insert("Ne", vec![19.9924401762]);
+ map.insert("Na", vec![22.9897692820]);
+ map.insert("Mg", vec![23.985041697]);
+ map.insert("Al", vec![26.98153853]);
+ map.insert("Si", vec![27.97692653465]);
+ map.insert("P", vec![30.97376199842]);
+ map.insert("S", vec![31.9720711744, 32.9714589098, 33.967867004]);
+ map.insert("Cl", vec![34.968852682, 36.965902602]);
+ map.insert("Ar", vec![39.9623831237, 35.967545105]);
+ map.insert("K", vec![38.963706679, 39.963998166, 40.961825257]);
+ map.insert("Ca", vec![39.96259098, 41.95861783, 42.95876644, 43.95548156, 45.95369276]);
+ map.insert("Sc", vec![44.95590828]);
+ map.insert("Ti", vec![47.9479463, 45.95262772, 46.95175879, 47.94794198, 49.9447912]);
+ map.insert("V", vec![50.9439595]);
+ map.insert("Cr", vec![51.9405075, 49.94604183, 50.9439637, 51.94050623, 53.93887916]);
+ map.insert("Mn", vec![54.9380455]);
+ map.insert("Fe", vec![55.9349375, 53.93960899, 54.93804514, 55.93493739, 56.93539400, 57.93327443]);
+ map.insert("Co", vec![58.9331955]);
+ map.insert("Ni", vec![57.9353429, 58.9343467, 59.93078588, 60.93105557, 61.92834537, 63.92796682]);
+ map.insert("Cu", vec![62.9295975, 61.92834537, 63.92914201]);
+ map.insert("Zn", vec![63.9291422, 65.92603381, 66.92712775, 67.92484455, 69.9253192]);
+ map.insert("Ga", vec![68.9255735]);
+ map.insert("Ge", vec![73.9211778, 71.922075826, 72.923458956, 73.921177761, 75.921402726]);
+ map.insert("As", vec![74.9215965]);
+ map.insert("Se", vec![79.9165218, 73.9224764, 75.9192136, 76.9199140, 77.9173095, 79.9165218, 81.9166995]);
+
+ map
+}
+
+/// Isotopic Abundance
+///
+/// # Arguments
+///
+/// None
+///
+/// # Returns
+///
+/// * `HashMap<&'static str, Vec<f64>>` - a map of atomic symbols to their isotopic abundances
+///
+/// # Example
+///
+/// ```
+/// use mscore::chemistry::elements::isotopic_abundance;
+///
+/// let isotopic_abundance = isotopic_abundance();
+/// assert_eq!(isotopic_abundance.get("H"), Some(&vec![0.999885, 0.000115]));
+/// ```
+pub fn isotopic_abundance() -> HashMap<&'static str, Vec<f64>> {
+
+ let mut map = HashMap::new();
+
+ map.insert("H", vec![0.999885, 0.000115]);
+ map.insert("He", vec![0.99999866, 0.00000134]);
+ map.insert("Li", vec![0.0759, 0.9241]);
+ map.insert("Be", vec![1.0]);
+ map.insert("B", vec![0.199, 0.801]);
+ map.insert("C", vec![0.9893, 0.0107]);
+ map.insert("N", vec![0.99632, 0.00368]);
+ map.insert("O", vec![0.99757, 0.00038, 0.00205]);
+ map.insert("F", vec![1.0]);
+ map.insert("Ne", vec![0.9048, 0.0027, 0.0925]);
+ map.insert("Na", vec![0.5429, 0.4571]);
+ map.insert("Mg", vec![0.7899, 0.1000, 0.1101]);
+ map.insert("Al", vec![1.0]);
+ map.insert("Si", vec![0.9223, 0.0467, 0.0310]);
+ map.insert("P", vec![1.0]);
+ map.insert("S", vec![0.9493, 0.0076, 0.0429]);
+ map.insert("Cl", vec![0.7578, 0.2422]);
+ map.insert("Ar", vec![0.003365, 0.000632, 0.996003]);
+ map.insert("K", vec![0.932581, 0.000117, 0.067302]);
+ map.insert("Ca", vec![0.96941, 0.00647, 0.00135, 0.02086, 0.00187]);
+ map.insert("Sc", vec![1.0]);
+ map.insert("Ti", vec![0.0825, 0.0744, 0.7372, 0.0541, 0.0518]);
+ map.insert("V", vec![0.9975, 0.0025]);
+ map.insert("Cr", vec![0.04345, 0.83789, 0.09501, 0.02365, 0.0001]);
+ map.insert("Mn", vec![1.0]);
+ map.insert("Fe", vec![0.05845, 0.91754, 0.02119, 0.00282, 0.0002]);
+ map.insert("Co", vec![1.0]);
+ map.insert("Ni", vec![0.680769, 0.262231, 0.011399, 0.036345, 0.009256, 0.0011]);
+ map.insert("Cu", vec![0.6915, 0.3085]);
+ map.insert("Zn", vec![0.4917, 0.2773, 0.0404, 0.1845, 0.0061]);
+ map.insert("Ga", vec![0.60108, 0.39892]);
+ map.insert("Ge", vec![0.2052, 0.2745, 0.0775, 0.3652, 0.0775]);
+ map.insert("As", vec![1.0]);
+ map.insert("Se", vec![0.0089, 0.0937, 0.0763, 0.2377, 0.4961, 0.0873]);
+
+ map
+}
+1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +50 +51 +52 +53 +54 +55 +56 +57 +58 +59 +60 +61 +62 +63 +64 +65 +66 +67 +68 +69 +70 +71 +72 +73 +74 +75 +76 +77 +78 +79 +80 +81 +82 +83 +84 +85 +86 +87 +88 +89 +90 +91 +92 +93 +94 +95 +96 +97 +98
use crate::chemistry::constants::MASS_PROTON;
+
+/// convert 1 over reduced ion mobility (1/k0) to CCS
+///
+/// Arguments:
+///
+/// * `one_over_k0` - 1 over reduced ion mobility (1/k0)
+/// * `charge` - charge state of the ion
+/// * `mz` - mass-over-charge of the ion
+/// * `mass_gas` - mass of drift gas (N2)
+/// * `temp` - temperature of the drift gas in C°
+/// * `t_diff` - factor to translate from C° to K
+///
+/// Returns:
+///
+/// * `ccs` - collision cross-section
+///
+/// # Examples
+///
+/// ```
+/// use mscore::chemistry::formulas::one_over_reduced_mobility_to_ccs;
+///
+/// let ccs = one_over_reduced_mobility_to_ccs(0.5, 1000.0, 2, 28.013, 31.85, 273.15);
+/// assert_eq!(ccs, 201.64796734428452);
+/// ```
+pub fn one_over_reduced_mobility_to_ccs(
+ one_over_k0: f64,
+ mz: f64,
+ charge: u32,
+ mass_gas: f64,
+ temp: f64,
+ t_diff: f64,
+) -> f64 {
+ let summary_constant = 18509.8632163405;
+ let reduced_mobility = 1.0 / one_over_k0;
+ let reduced_mass = (mz * charge as f64 * mass_gas) / (mz * charge as f64 + mass_gas);
+ summary_constant * charge as f64 / (reduced_mass * (temp + t_diff)).sqrt() / reduced_mobility
+}
+
+
+/// convert CCS to 1 over reduced ion mobility (1/k0)
+///
+/// Arguments:
+///
+/// * `ccs` - collision cross-section
+/// * `charge` - charge state of the ion
+/// * `mz` - mass-over-charge of the ion
+/// * `mass_gas` - mass of drift gas (N2)
+/// * `temp` - temperature of the drift gas in C°
+/// * `t_diff` - factor to translate from C° to K
+///
+/// Returns:
+///
+/// * `one_over_k0` - 1 over reduced ion mobility (1/k0)
+///
+/// # Examples
+///
+/// ```
+/// use mscore::chemistry::formulas::ccs_to_one_over_reduced_mobility;
+///
+/// let k0 = ccs_to_one_over_reduced_mobility(806.5918693771381, 1000.0, 2, 28.013, 31.85, 273.15);
+/// assert_eq!(k0, 2.0);
+/// ```
+pub fn ccs_to_one_over_reduced_mobility(
+ ccs: f64,
+ mz: f64,
+ charge: u32,
+ mass_gas: f64,
+ temp: f64,
+ t_diff: f64,
+) -> f64 {
+ let summary_constant = 18509.8632163405;
+ let reduced_mass = (mz * charge as f64 * mass_gas) / (mz * charge as f64 + mass_gas);
+ ((reduced_mass * (temp + t_diff)).sqrt() * ccs) / (summary_constant * charge as f64)
+}
+
+/// calculate the m/z of an ion
+///
+/// Arguments:
+///
+/// * `mono_mass` - monoisotopic mass of the ion
+/// * `charge` - charge state of the ion
+///
+/// Returns:
+///
+/// * `mz` - mass-over-charge of the ion
+///
+/// # Examples
+///
+/// ```
+/// use mscore::chemistry::formulas::calculate_mz;
+///
+/// let mz = calculate_mz(1000.0, 2);
+/// assert_eq!(mz, 501.007276466621);
+/// ```
+pub fn calculate_mz(monoisotopic_mass: f64, charge: i32) -> f64 {
+ (monoisotopic_mass + charge as f64 * MASS_PROTON) / charge as f64
+}
+1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +50 +51 +52 +53 +54 +55 +56 +57 +58 +59 +60 +61 +62 +63 +64 +65 +66 +67 +68 +69 +70 +71 +72 +73
use std::collections::HashMap;
+use crate::algorithm::isotope::generate_isotope_distribution;
+use crate::chemistry::constants::MASS_PROTON;
+use crate::chemistry::elements::atomic_weights_mono_isotopic;
+use crate::data::spectrum::MzSpectrum;
+
+pub struct SumFormula {
+ pub formula: String,
+ pub elements: HashMap<String, i32>,
+}
+
+impl SumFormula {
+ pub fn new(formula: &str) -> Self {
+ let elements = parse_formula(formula).unwrap();
+ SumFormula {
+ formula: formula.to_string(),
+ elements,
+ }
+ }
+ pub fn monoisotopic_weight(&self) -> f64 {
+ let atomic_weights = atomic_weights_mono_isotopic();
+ self.elements.iter().fold(0.0, |acc, (element, count)| {
+ acc + atomic_weights[element.as_str()] * *count as f64
+ })
+ }
+
+ pub fn isotope_distribution(&self, charge: i32) -> MzSpectrum {
+ let distribution = generate_isotope_distribution(&self.elements, 1e-3, 1e-9, 200);
+ let intensity = distribution.iter().map(|(_, i)| *i).collect();
+ let mz = distribution.iter().map(|(m, _)| (*m + charge as f64 * MASS_PROTON) / charge as f64).collect();
+ MzSpectrum::new(mz, intensity)
+ }
+}
+
+fn parse_formula(formula: &str) -> Result<HashMap<String, i32>, String> {
+ let atomic_weights = atomic_weights_mono_isotopic();
+ let mut element_counts = HashMap::new();
+ let mut current_element = String::new();
+ let mut current_count = String::new();
+ let mut chars = formula.chars().peekable();
+
+ while let Some(c) = chars.next() {
+ if c.is_ascii_uppercase() {
+ if !current_element.is_empty() {
+ let count = current_count.parse::<i32>().unwrap_or(1);
+ if atomic_weights.contains_key(current_element.as_str()) {
+ *element_counts.entry(current_element.clone()).or_insert(0) += count;
+ } else {
+ return Err(format!("Unknown element: {}", current_element));
+ }
+ }
+ current_element = c.to_string();
+ current_count = String::new();
+ } else if c.is_ascii_digit() {
+ current_count.push(c);
+ } else if c.is_ascii_lowercase() {
+ current_element.push(c);
+ }
+
+ if chars.peek().map_or(true, |next_c| next_c.is_ascii_uppercase()) {
+ let count = current_count.parse::<i32>().unwrap_or(1);
+ if atomic_weights.contains_key(current_element.as_str()) {
+ *element_counts.entry(current_element.clone()).or_insert(0) += count;
+ } else {
+ return Err(format!("Unknown element: {}", current_element));
+ }
+ current_element = String::new();
+ current_count = String::new();
+ }
+ }
+
+ Ok(element_counts)
+}
+1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +50 +51 +52 +53 +54 +55 +56 +57 +58 +59 +60 +61 +62 +63 +64 +65 +66 +67 +68 +69 +70 +71 +72 +73 +74 +75 +76 +77 +78 +79 +80 +81 +82 +83 +84 +85 +86 +87 +88 +89 +90 +91 +92 +93 +94 +95 +96 +97 +98 +99 +100 +101 +102 +103 +104 +105 +106 +107 +108 +109 +110 +111 +112 +113 +114 +115 +116 +117 +118 +119 +120 +121 +122 +123 +124 +125 +126 +127 +128 +129 +130 +131 +132 +133 +134 +135 +136 +137 +138 +139 +140 +141 +142 +143 +144 +145 +146 +147 +148 +149 +150 +151 +152 +153 +154 +155 +156 +157 +158 +159 +160 +161 +162 +163 +164 +165 +166 +167 +168 +169 +170 +171 +172 +173 +174 +175 +176 +177 +178 +179 +180 +181 +182 +183 +184 +185 +186 +187 +188 +189 +190 +191 +192 +193 +194 +195 +196 +197 +198 +199 +200 +201 +202 +203 +204 +205 +206 +207 +208 +209 +210 +211 +212 +213 +214 +215 +216 +217 +218 +219 +220 +221 +222 +223 +224 +225 +226 +227 +228 +229 +230 +231 +232 +233 +234 +235 +236 +237 +238 +239 +240 +241 +242 +243 +244 +245 +246 +247 +248 +249 +250 +251 +252 +253 +254 +255 +256 +257 +258 +259 +260 +261 +262 +263 +264 +265 +266 +267 +268 +269 +270 +271 +272 +273 +274 +275 +276 +277 +278 +279 +280 +281 +282 +283 +284 +285 +286 +287 +288 +289 +290 +291 +292 +293 +294 +295 +296 +297 +298 +299 +300 +301 +302 +303 +304 +305 +306 +307 +308 +309 +310 +311 +312 +313 +314 +315 +316 +317 +318 +319 +320 +321 +322 +323 +324 +325 +326 +327 +328 +329 +330 +331 +332 +333 +334 +335 +336 +337 +338 +339 +340 +341 +342 +343 +344 +345 +346 +347 +348 +349 +350 +351 +352 +353 +354 +355 +356 +357 +358 +359 +360 +361 +362 +363 +364 +365 +366 +367 +368 +369 +370 +371 +372 +373 +374 +375 +376 +377 +378 +379 +380 +381 +382 +383 +384 +385 +386 +387 +388 +389 +390 +391 +392 +393 +394 +395 +396 +397 +398 +399 +400 +401 +402 +403 +404 +405 +406 +407 +408 +409 +410 +411 +412 +413 +414 +415 +416 +417 +418 +419 +420 +421 +422 +423 +424 +425 +426 +427 +428 +429 +430 +431 +432 +433 +434 +435 +436 +437 +438 +439 +440 +441 +442 +443 +444 +445 +446 +447 +448 +449 +450 +451 +452 +453 +454 +455 +456 +457 +458 +459 +460 +461 +462 +463 +464 +465 +466 +467 +468 +469 +470 +471 +472 +473 +474 +475 +476 +477 +478 +479 +480 +481 +482 +483 +484 +485 +486 +487 +488 +489 +490 +491 +492 +493 +494 +495 +496 +497 +498 +499 +500 +501 +502 +503 +504 +505 +506 +507 +508 +509 +510 +511 +512 +513 +514 +515 +516 +517 +518 +519 +520 +521 +522 +523 +524 +525 +526 +527 +528 +529 +530 +531 +532 +533 +534 +535 +536 +537 +538 +539 +540 +541 +542 +543 +544 +545 +546 +547 +548 +549 +550 +551 +552 +553 +554 +555 +556 +557 +558 +559 +560 +561 +562 +563 +564 +565 +566 +567 +568 +569 +570 +571 +572 +573 +574 +575 +576 +577 +578 +579 +580 +581 +582 +583 +584 +585 +586 +587 +588 +589 +590 +591 +592 +593 +594 +595 +596 +597 +598 +599 +600 +601 +602 +603 +604 +605 +606 +607 +608 +609 +610 +611 +612 +613 +614 +615 +616 +617 +618 +619 +620 +621 +622 +623 +624 +625 +626 +627 +628 +629 +630 +631 +632 +633 +634 +635 +636 +637 +638 +639 +640 +641 +642 +643 +644 +645 +646 +647 +648 +649 +650 +651 +652 +653 +654 +655 +656 +657 +658 +659 +660 +661 +662 +663 +664 +665 +666 +667 +668 +669 +670 +671 +672 +673 +674 +675 +676 +677 +678 +679 +680 +681 +682 +683 +684 +685 +686 +687 +688 +689 +690 +691 +692 +693 +694 +695 +696 +697 +698 +699 +700 +701 +702 +703 +704 +705 +706 +707 +708 +709 +710 +711 +712 +713 +714 +715 +716 +717 +718 +719 +720 +721 +722 +723 +724 +725 +726 +727 +728 +729 +730 +731 +732 +733 +734 +735 +736 +737 +738 +739 +740 +741 +742 +743 +744 +745 +746 +747 +748 +749 +750 +751 +752 +753 +754 +755 +756 +757 +758 +759 +760 +761 +762 +763 +764 +765 +766 +767 +768 +769 +770 +771 +772 +773 +774 +775 +776 +777 +778 +779 +780 +781 +782 +783 +784 +785 +786 +787 +788 +789 +790 +791 +792 +793 +794 +795 +796 +797 +798 +799 +800 +801 +802 +803 +804 +805 +806 +807 +808 +809 +810 +811 +812 +813 +814 +815 +816 +817 +818 +819 +820 +821 +822 +823 +824 +825 +826 +827 +828 +829 +830 +831 +832 +833 +834 +835 +836 +837 +838 +839 +840 +841 +842 +843 +844 +845 +846 +847 +848 +849 +850 +851 +852 +853 +854 +855 +856 +857 +858 +859 +860 +861 +862 +863 +864 +865 +866 +867 +868 +869 +870 +871 +872 +873 +874 +875 +876 +877 +878 +879 +880 +881 +882 +883 +884 +885 +886 +887 +888 +889 +890 +891 +892 +893 +894 +895 +896 +897 +898 +899 +900 +901 +902 +903 +904 +905 +906 +907 +908 +909 +910 +911 +912 +913 +914 +915 +916 +917 +918 +919 +920 +921 +922 +923 +924 +925 +926 +927 +928 +929 +930 +931 +932 +933 +934 +935 +936 +937 +938 +939 +940 +941 +942 +943 +944 +945 +946 +947 +948 +949 +950 +951 +952 +953 +954 +955 +956 +957 +958 +959 +960 +961 +962 +963 +964 +965 +966 +967 +968 +969 +970 +971 +972 +973 +974 +975 +976 +977 +978 +979 +980 +981 +982 +983 +984 +985 +986 +987 +988 +989 +990 +991 +992 +993 +994 +995 +996 +997 +998 +999 +1000 +1001 +1002 +1003 +1004 +1005 +1006 +1007 +1008 +1009 +1010 +1011 +1012 +1013 +1014 +1015 +1016 +1017 +1018 +1019 +1020 +1021 +1022 +1023 +1024 +1025 +1026 +1027 +1028 +1029 +1030 +1031 +1032 +1033 +1034 +1035 +1036 +1037 +1038 +1039 +1040 +1041 +1042 +1043 +1044 +1045 +1046 +1047 +1048 +1049 +1050 +1051 +1052 +1053 +1054 +1055 +1056 +1057 +1058 +1059 +1060 +1061 +1062 +1063 +1064 +1065 +1066 +1067 +1068 +1069 +1070 +1071 +1072 +1073 +1074 +1075 +1076 +1077 +1078 +1079 +1080 +1081 +1082 +1083 +1084 +1085 +1086 +1087 +1088 +1089 +1090 +1091 +1092 +1093 +1094 +1095 +1096 +1097 +1098 +1099 +1100 +1101 +1102 +1103 +1104 +1105 +1106 +1107 +1108 +1109 +1110 +1111 +1112 +1113 +1114 +1115 +1116 +1117 +1118 +1119 +1120 +1121 +1122 +1123 +1124 +1125 +1126 +1127 +1128 +1129 +1130 +1131 +1132 +1133 +1134 +1135 +1136 +1137 +1138 +1139 +1140 +1141 +1142 +1143 +1144 +1145 +1146 +1147 +1148 +1149 +1150 +1151 +1152 +1153 +1154 +1155 +1156 +1157 +1158 +1159 +1160 +1161 +1162 +1163 +1164 +1165 +1166 +1167 +1168 +1169 +1170 +1171 +1172 +1173 +1174 +1175 +1176 +1177 +1178 +1179 +1180 +1181 +1182 +1183 +1184 +1185 +1186 +1187 +1188 +1189 +1190 +1191 +1192 +1193 +1194 +1195 +1196 +1197 +1198 +1199 +1200 +1201 +1202 +1203 +1204 +1205 +1206 +1207 +1208 +1209 +1210 +1211 +1212 +1213 +1214 +1215 +1216 +1217 +1218 +1219 +1220 +1221 +1222 +1223 +1224 +1225 +1226 +1227 +1228 +1229 +1230 +1231 +1232 +1233 +1234 +1235 +1236 +1237 +1238 +1239 +1240 +1241 +1242 +1243 +1244 +1245 +1246 +1247 +1248 +1249 +1250 +1251 +1252 +1253 +1254 +1255 +1256 +1257 +1258 +1259 +1260 +1261 +1262 +1263 +1264 +1265 +1266 +1267 +1268 +1269 +1270 +1271 +1272 +1273 +1274 +1275 +1276 +1277 +1278 +1279 +1280 +1281 +1282 +1283 +1284 +1285 +1286 +1287 +1288 +1289 +1290 +1291 +1292 +1293 +1294 +1295 +1296 +1297 +1298 +1299 +1300 +1301 +1302 +1303 +1304 +1305 +1306 +1307 +1308 +1309 +1310 +1311 +1312 +1313 +1314 +1315 +1316 +1317 +1318 +1319 +1320 +1321 +1322 +1323 +1324 +1325 +1326 +1327 +1328 +1329 +1330 +1331 +1332 +1333 +1334 +1335 +1336 +1337 +1338 +1339 +1340 +1341 +1342 +1343 +1344 +1345 +1346 +1347 +1348 +1349 +1350 +1351 +1352 +1353 +1354 +1355 +1356 +1357 +1358 +1359 +1360 +1361 +1362 +1363 +1364 +1365 +1366 +1367 +1368 +1369 +1370 +1371 +1372 +1373 +1374 +1375 +1376 +1377 +1378 +1379 +1380 +1381 +1382 +1383 +1384 +1385 +1386 +1387 +1388 +1389 +1390 +1391 +1392 +1393 +1394 +1395 +1396 +1397 +1398 +1399 +1400 +1401 +1402 +1403 +1404 +1405 +1406 +1407 +1408 +1409 +1410 +1411 +1412 +1413 +1414 +1415 +1416 +1417 +1418 +1419 +1420 +1421 +1422 +1423 +1424 +1425 +1426 +1427 +1428 +1429 +1430 +1431 +1432 +1433 +1434 +1435 +1436 +1437 +1438 +1439 +1440 +1441 +1442 +1443 +1444 +1445 +1446 +1447 +1448 +1449 +1450 +1451 +1452 +1453 +1454 +1455 +1456 +1457 +1458 +1459 +1460 +1461 +1462 +1463 +1464 +1465 +1466 +1467 +1468 +1469 +1470 +1471 +1472 +1473 +1474 +1475 +1476 +1477 +1478 +1479 +1480 +1481 +1482 +1483 +1484 +1485 +1486 +1487 +1488 +1489 +1490 +1491 +1492 +1493 +1494 +1495 +1496 +1497 +1498 +1499 +1500 +1501 +1502 +1503 +1504 +1505 +1506 +1507 +1508 +1509 +1510 +1511 +1512 +1513 +1514 +1515 +1516 +1517 +1518 +1519 +1520 +1521 +1522 +1523 +1524 +1525 +1526 +1527 +1528 +1529 +1530 +1531 +1532 +1533 +1534 +1535 +1536 +1537 +1538 +1539 +1540 +1541 +1542 +1543 +1544 +1545 +1546 +1547 +1548 +1549 +1550 +1551 +1552 +1553 +1554 +1555 +1556 +1557 +1558 +1559 +1560 +1561 +1562 +1563 +1564 +1565 +1566 +1567 +1568 +1569 +1570 +1571 +1572 +1573 +1574 +1575 +1576 +1577 +1578 +1579 +1580 +1581 +1582 +1583 +1584 +1585 +1586 +1587 +1588 +1589 +1590 +1591 +1592 +1593 +1594 +1595 +1596 +1597 +1598 +1599 +1600 +1601 +1602 +1603 +1604 +1605 +1606 +1607 +1608 +1609 +1610 +1611 +1612 +1613 +1614 +1615 +1616 +1617 +1618 +1619 +1620 +1621 +1622 +1623 +1624 +1625 +1626 +1627 +1628 +1629 +1630 +1631 +1632 +1633 +1634 +1635 +1636 +1637 +1638 +1639 +1640 +1641 +1642 +1643 +1644 +1645 +1646 +1647 +1648 +1649 +1650 +1651 +1652 +1653 +1654 +1655 +1656 +1657 +1658 +1659 +1660 +1661 +1662 +1663 +1664 +1665 +1666 +1667 +1668 +1669 +1670 +1671 +1672 +1673 +1674 +1675 +1676 +1677 +1678 +1679 +1680 +1681 +1682 +1683 +1684 +1685 +1686 +1687 +1688 +1689 +1690 +1691 +1692 +1693 +1694 +1695 +1696 +1697 +1698 +1699 +1700 +1701 +1702 +1703 +1704 +1705 +1706 +1707 +1708 +1709 +1710 +1711 +1712 +1713 +1714 +1715 +1716 +1717 +1718 +1719 +1720 +1721 +1722 +1723 +1724 +1725 +1726 +1727 +1728 +1729 +1730 +1731 +1732 +1733 +1734 +1735 +1736 +1737 +1738 +1739 +1740 +1741 +1742 +1743 +1744 +1745 +1746 +1747 +1748 +1749 +1750 +1751 +1752 +1753 +1754 +1755 +1756 +1757 +1758 +1759 +1760 +1761 +1762 +1763 +1764 +1765 +1766 +1767 +1768 +1769 +1770 +1771 +1772 +1773 +1774 +1775 +1776 +1777 +1778 +1779 +1780 +1781 +1782 +1783 +1784 +1785 +1786 +1787 +1788 +1789 +1790 +1791 +1792 +1793 +1794 +1795 +1796 +1797 +1798 +1799 +1800 +1801 +1802 +1803 +1804 +1805 +1806 +1807 +1808 +1809 +1810 +1811 +1812 +1813 +1814 +1815 +1816 +1817 +1818 +1819 +1820 +1821 +1822 +1823 +1824 +1825 +1826 +1827 +1828 +1829 +1830 +1831 +1832 +1833 +1834 +1835 +1836 +1837 +1838 +1839 +1840 +1841 +1842 +1843 +1844 +1845 +1846 +1847 +1848 +1849 +1850 +1851 +1852 +1853 +1854 +1855 +1856 +1857 +1858 +1859 +1860 +1861 +1862 +1863 +1864 +1865 +1866 +1867 +1868 +1869 +1870 +1871 +1872 +1873 +1874 +1875 +1876 +1877 +1878 +1879 +1880 +1881 +1882 +1883 +1884 +1885 +1886 +1887 +1888 +1889 +1890 +1891 +1892 +1893 +1894 +1895 +1896 +1897 +1898 +1899 +1900 +1901 +1902 +1903 +1904 +1905 +1906 +1907 +1908 +1909 +1910 +1911 +1912 +1913 +1914 +1915 +1916 +1917 +1918 +1919 +1920 +1921 +1922 +1923 +1924 +1925 +1926 +1927 +1928 +1929 +1930 +1931 +1932 +1933 +1934 +1935 +1936 +1937 +1938 +1939 +1940 +1941 +1942 +1943 +1944 +1945 +1946 +1947 +1948 +1949 +1950 +1951 +1952 +1953 +1954 +1955 +1956 +1957 +1958 +1959 +1960 +1961 +1962 +1963 +1964 +1965 +1966 +1967 +1968 +1969 +1970 +1971 +1972 +1973 +1974 +1975 +1976 +1977 +1978 +1979 +1980 +1981 +1982 +1983 +1984 +1985 +1986 +1987 +1988 +1989 +1990 +1991 +1992 +1993 +1994 +1995 +1996 +1997 +1998 +1999 +2000 +2001 +2002 +2003 +2004 +2005 +2006 +2007 +2008 +2009 +2010 +2011 +2012 +2013 +2014 +2015 +2016 +2017 +2018 +2019 +2020 +2021 +2022 +2023 +2024 +2025 +2026 +2027 +2028 +2029 +2030 +2031 +2032 +2033 +2034 +2035 +2036 +2037 +2038 +2039 +2040 +2041 +2042 +2043 +2044 +2045 +2046 +2047 +2048 +2049 +2050 +2051 +2052 +2053 +2054 +2055 +2056 +2057 +2058 +2059 +2060 +2061 +2062 +2063 +2064 +2065 +2066 +2067 +2068 +2069 +2070 +2071 +2072 +2073 +2074 +2075 +2076 +2077 +2078 +2079 +2080 +2081 +2082 +2083 +2084 +2085 +2086 +2087 +2088 +2089 +2090 +2091 +2092 +2093 +2094 +2095 +2096 +2097 +2098 +2099 +2100 +2101 +2102 +2103 +2104 +2105 +2106 +2107 +2108 +2109 +2110 +2111 +2112 +2113 +2114 +2115 +2116 +2117 +2118 +2119 +2120 +2121 +2122 +2123 +2124 +2125 +2126 +2127 +2128 +2129 +2130 +2131 +2132 +2133 +2134 +2135 +2136 +2137 +2138 +2139 +2140 +2141 +2142 +2143 +2144 +2145
use std::collections::HashMap;
+
+/// Unimod Modifications
+///
+/// # Arguments
+///
+/// None
+///
+/// # Returns
+///
+/// * `HashMap<String, HashMap<&'static str, i32>>` - a map of unimod modification names to their atomic compositions
+///
+/// # Example
+///
+/// ```
+/// use mscore::chemistry::unimod::modification_atomic_composition;
+/// use std::collections::HashMap;
+///
+/// let composition = modification_atomic_composition();
+/// assert_eq!(composition.get("[UNIMOD:1]"), Some(&HashMap::from([("C", 2), ("H", 2), ("O", 1)])));
+/// ```
+pub fn modification_atomic_composition() -> HashMap<String, HashMap<&'static str, i32>> {
+ let mut composition: HashMap<String, HashMap<&'static str, i32>> = HashMap::new();
+ composition.insert("[UNIMOD:1]".to_string(), HashMap::from([("C", 2), ("H", 2), ("O", 1)])); // Acetyl
+ composition.insert("[UNIMOD:3]".to_string(), HashMap::from([("N", 2), ("C", 10), ("H", 14), ("O", 2), ("S", 1)])); // Biotinylation
+ composition.insert("[UNIMOD:4]".to_string(), HashMap::from([("C", 2), ("H", 3), ("O", 1), ("N", 1)]));
+ composition.insert("[UNIMOD:7]".to_string(), HashMap::from([("H", -1), ("N", -1), ("O", 1)])); // Hydroxylation
+ composition.insert("[UNIMOD:21]".to_string(), HashMap::from([("H", 1),("O", 3), ("P", 1)])); // Phosphorylation
+ composition.insert("[UNIMOD:34]".to_string(), HashMap::from([("H", 2), ("C", 1)])); // Methylation
+ composition.insert("[UNIMOD:35]".to_string(), HashMap::from([("O", 1)])); // Hydroxylation
+ // composition.insert("[UNIMOD:43]".to_string(), HashMap::from([("C", 8), ("H", 15), ("N", 1), ("O", 6)])); // HexNAc ??
+ composition.insert("[UNIMOD:58]".to_string(), HashMap::from([("C", 3), ("H", 4), ("O", 1)])); // Propionyl
+ composition.insert("[UNIMOD:121]".to_string(), HashMap::from([("C", 4), ("H", 6), ("O", 2), ("N", 2)])); // ubiquitinylation residue
+ composition.insert("[UNIMOD:122]".to_string(), HashMap::from([("C", 1), ("O", 1)])); // Formylation
+ composition.insert("[UNIMOD:312]".to_string(), HashMap::from([("C", 3), ("H", 5), ("O", 2), ("N", 1), ("S", 1)])); // Cysteinyl
+ composition.insert("[UNIMOD:354]".to_string(), HashMap::from([("H", -1), ("O", 2), ("N", 1)])); // Oxidation to nitro
+ // composition.insert("[UNIMOD:408]".to_string(), HashMap::from([("C", -1), ("H", -2), ("N", 1), ("O", 2)])); // Glycosyl ??
+ composition.insert("[UNIMOD:747]".to_string(), HashMap::from([("C", 3), ("H", 2), ("O", 3)])); // Malonylation
+ composition.insert("[UNIMOD:1289]".to_string(), HashMap::from([("C", 4), ("H", 6), ("O", 1)])); // Butyryl
+ composition.insert("[UNIMOD:1363]".to_string(), HashMap::from([("C", 4), ("H", 4), ("O", 1)])); // Crotonylation
+
+ composition
+}
+
+/// Unimod Modifications Mass
+///
+/// # Arguments
+///
+/// None
+///
+/// # Returns
+///
+/// * `HashMap<&'static str, f64>` - a map of unimod modification names to their mass
+///
+/// # Example
+///
+/// ```
+/// use mscore::chemistry::unimod::unimod_modifications_mass;
+///
+/// let mass = unimod_modifications_mass();
+/// assert_eq!(mass.get("[UNIMOD:1]"), Some(&42.010565));
+/// ```
+pub fn unimod_modifications_mass() -> HashMap<&'static str, f64> {
+ let mut map = HashMap::new();
+ map.insert("[UNIMOD:1]", 42.010565);
+ map.insert("[UNIMOD:2]", -0.984016);
+ map.insert("[UNIMOD:3]", 226.077598);
+ map.insert("[UNIMOD:4]", 57.021464);
+ map.insert("[UNIMOD:5]", 43.005814);
+ map.insert("[UNIMOD:6]", 58.005479);
+ map.insert("[UNIMOD:7]", 0.984016);
+ map.insert("[UNIMOD:8]", 486.251206);
+ map.insert("[UNIMOD:9]", 494.30142);
+ map.insert("[UNIMOD:10]", -29.992806);
+ map.insert("[UNIMOD:11]", -48.003371);
+ map.insert("[UNIMOD:12]", 450.275205);
+ map.insert("[UNIMOD:13]", 442.224991);
+ map.insert("[UNIMOD:17]", 99.068414);
+ map.insert("[UNIMOD:20]", 414.193691);
+ map.insert("[UNIMOD:21]", 79.966331);
+ map.insert("[UNIMOD:23]", -18.010565);
+ map.insert("[UNIMOD:24]", 71.037114);
+ map.insert("[UNIMOD:25]", 119.037114);
+ map.insert("[UNIMOD:26]", 39.994915);
+ map.insert("[UNIMOD:27]", -18.010565);
+ map.insert("[UNIMOD:28]", -17.026549);
+ map.insert("[UNIMOD:29]", 127.063329);
+ map.insert("[UNIMOD:30]", 21.981943);
+ map.insert("[UNIMOD:31]", 105.057849);
+ map.insert("[UNIMOD:34]", 14.01565);
+ map.insert("[UNIMOD:35]", 15.994915);
+ map.insert("[UNIMOD:36]", 28.0313);
+ map.insert("[UNIMOD:37]", 42.04695);
+ map.insert("[UNIMOD:39]", 45.987721);
+ map.insert("[UNIMOD:40]", 79.956815);
+ map.insert("[UNIMOD:41]", 162.052824);
+ map.insert("[UNIMOD:42]", 188.032956);
+ map.insert("[UNIMOD:43]", 203.079373);
+ map.insert("[UNIMOD:44]", 204.187801);
+ map.insert("[UNIMOD:45]", 210.198366);
+ map.insert("[UNIMOD:46]", 229.014009);
+ map.insert("[UNIMOD:47]", 238.229666);
+ map.insert("[UNIMOD:48]", 272.250401);
+ map.insert("[UNIMOD:49]", 340.085794);
+ map.insert("[UNIMOD:50]", 783.141486);
+ map.insert("[UNIMOD:51]", 788.725777);
+ map.insert("[UNIMOD:52]", 42.021798);
+ map.insert("[UNIMOD:53]", 156.11503);
+ map.insert("[UNIMOD:55]", 305.068156);
+ map.insert("[UNIMOD:142]", 349.137281);
+ map.insert("[UNIMOD:143]", 406.158745);
+ map.insert("[UNIMOD:144]", 486.158471);
+ map.insert("[UNIMOD:145]", 495.19519);
+ map.insert("[UNIMOD:146]", 511.190105);
+ map.insert("[UNIMOD:147]", 552.216654);
+ map.insert("[UNIMOD:148]", 568.211569);
+ map.insert("[UNIMOD:149]", 656.227613);
+ map.insert("[UNIMOD:150]", 698.274563);
+ map.insert("[UNIMOD:151]", 700.253828);
+ map.insert("[UNIMOD:152]", 714.269478);
+ map.insert("[UNIMOD:153]", 730.264392);
+ map.insert("[UNIMOD:154]", 821.280102);
+ map.insert("[UNIMOD:155]", 846.311736);
+ map.insert("[UNIMOD:156]", 860.327386);
+ map.insert("[UNIMOD:157]", 862.306651);
+ map.insert("[UNIMOD:158]", 876.322301);
+ map.insert("[UNIMOD:159]", 892.317216);
+ map.insert("[UNIMOD:160]", 947.323029);
+ map.insert("[UNIMOD:161]", 972.283547);
+ map.insert("[UNIMOD:214]", 144.102063);
+ map.insert("[UNIMOD:342]", 15.010899);
+ map.insert("[UNIMOD:343]", 199.066699);
+ map.insert("[UNIMOD:344]", -43.053433);
+ map.insert("[UNIMOD:345]", 47.984744);
+ map.insert("[UNIMOD:348]", -23.015984);
+ map.insert("[UNIMOD:349]", -22.031969);
+ map.insert("[UNIMOD:350]", 19.989829);
+ map.insert("[UNIMOD:351]", 3.994915);
+ map.insert("[UNIMOD:352]", -1.031634);
+ map.insert("[UNIMOD:353]", 241.088497);
+ map.insert("[UNIMOD:354]", 44.985078);
+ map.insert("[UNIMOD:357]", 258.115047);
+ map.insert("[UNIMOD:359]", 13.979265);
+ map.insert("[UNIMOD:360]", -30.010565);
+ map.insert("[UNIMOD:361]", 240.104482);
+ map.insert("[UNIMOD:368]", -33.987721);
+ map.insert("[UNIMOD:369]", -27.994915);
+ map.insert("[UNIMOD:371]", 86.036779);
+ map.insert("[UNIMOD:372]", -42.021798);
+ map.insert("[UNIMOD:374]", -1.007825);
+ map.insert("[UNIMOD:375]", 142.110613);
+ map.insert("[UNIMOD:376]", 220.182715);
+ map.insert("[UNIMOD:377]", 576.511761);
+ map.insert("[UNIMOD:378]", 72.021129);
+ map.insert("[UNIMOD:379]", 87.068414);
+ map.insert("[UNIMOD:380]", 266.203451);
+ map.insert("[UNIMOD:381]", 14.96328);
+ map.insert("[UNIMOD:382]", -33.003705);
+ map.insert("[UNIMOD:385]", -17.026549);
+ map.insert("[UNIMOD:387]", 586.279135);
+ map.insert("[UNIMOD:388]", 588.294785);
+ map.insert("[UNIMOD:389]", 584.263485);
+ map.insert("[UNIMOD:390]", 616.177295);
+ map.insert("[UNIMOD:391]", 521.884073);
+ map.insert("[UNIMOD:392]", 29.974179);
+ map.insert("[UNIMOD:393]", 340.100562);
+ map.insert("[UNIMOD:394]", 123.00853);
+ map.insert("[UNIMOD:395]", 881.146904);
+ map.insert("[UNIMOD:396]", 197.04531);
+ map.insert("[UNIMOD:397]", 469.716159);
+ map.insert("[UNIMOD:398]", 595.612807);
+ map.insert("[UNIMOD:400]", -94.041865);
+ map.insert("[UNIMOD:401]", -2.01565);
+ map.insert("[UNIMOD:402]", -17.992806);
+ map.insert("[UNIMOD:403]", -15.010899);
+ map.insert("[UNIMOD:405]", 329.05252);
+ map.insert("[UNIMOD:407]", 146.036779);
+ map.insert("[UNIMOD:408]", 148.037173);
+ map.insert("[UNIMOD:409]", 454.088965);
+ map.insert("[UNIMOD:410]", 634.662782);
+ map.insert("[UNIMOD:413]", 345.047435);
+ map.insert("[UNIMOD:414]", 30.010565);
+ map.insert("[UNIMOD:415]", 1620.930224);
+ map.insert("[UNIMOD:416]", 418.137616);
+ map.insert("[UNIMOD:417]", 306.025302);
+ map.insert("[UNIMOD:419]", 154.00311);
+ map.insert("[UNIMOD:420]", 15.977156);
+ map.insert("[UNIMOD:421]", 31.972071);
+ map.insert("[UNIMOD:422]", 70.005479);
+ map.insert("[UNIMOD:423]", 79.91652);
+ map.insert("[UNIMOD:424]", 1572.985775);
+ map.insert("[UNIMOD:425]", 31.989829);
+ map.insert("[UNIMOD:426]", 126.104465);
+ map.insert("[UNIMOD:428]", 283.045704);
+ map.insert("[UNIMOD:429]", 242.019154);
+ map.insert("[UNIMOD:431]", 236.214016);
+ map.insert("[UNIMOD:432]", 368.344302);
+ map.insert("[UNIMOD:433]", 264.187801);
+ map.insert("[UNIMOD:434]", 294.183109);
+ map.insert("[UNIMOD:435]", 109.052764);
+ map.insert("[UNIMOD:436]", 614.161645);
+ map.insert("[UNIMOD:437]", 386.110369);
+ map.insert("[UNIMOD:438]", 24.995249);
+ map.insert("[UNIMOD:439]", 342.786916);
+ map.insert("[UNIMOD:440]", 42.021798);
+ map.insert("[UNIMOD:442]", 438.094051);
+ map.insert("[UNIMOD:443]", 456.104615);
+ map.insert("[UNIMOD:444]", 922.834855);
+ map.insert("[UNIMOD:445]", 59.04969);
+ map.insert("[UNIMOD:447]", -15.994915);
+ map.insert("[UNIMOD:448]", 831.197041);
+ map.insert("[UNIMOD:449]", 154.135765);
+ map.insert("[UNIMOD:478]", 421.073241);
+ map.insert("[UNIMOD:494]", 672.298156);
+ map.insert("[UNIMOD:495]", 684.298156);
+ map.insert("[UNIMOD:499]", 298.022748);
+ map.insert("[UNIMOD:523]", 452.245726);
+ map.insert("[UNIMOD:526]", -48.003371);
+ map.insert("[UNIMOD:528]", 14.999666);
+ map.insert("[UNIMOD:529]", 29.039125);
+ map.insert("[UNIMOD:530]", 37.955882);
+ map.insert("[UNIMOD:531]", 61.921774);
+ map.insert("[UNIMOD:532]", 144.105918);
+ map.insert("[UNIMOD:533]", 144.099599);
+ map.insert("[UNIMOD:534]", 155.821022);
+ map.insert("[UNIMOD:535]", 383.228103);
+ map.insert("[UNIMOD:540]", 15.994915);
+ map.insert("[UNIMOD:541]", 30.010565);
+ map.insert("[UNIMOD:542]", 43.989829);
+ map.insert("[UNIMOD:543]", 26.01565);
+ map.insert("[UNIMOD:544]", -14.01565);
+ map.insert("[UNIMOD:545]", 58.005479);
+ map.insert("[UNIMOD:546]", 28.0313);
+ map.insert("[UNIMOD:547]", 44.059229);
+ map.insert("[UNIMOD:548]", -15.977156);
+ map.insert("[UNIMOD:549]", 83.070128);
+ map.insert("[UNIMOD:550]", 60.054144);
+ map.insert("[UNIMOD:551]", 53.091927);
+ map.insert("[UNIMOD:552]", -45.987721);
+ map.insert("[UNIMOD:553]", -43.989829);
+ map.insert("[UNIMOD:554]", 22.031969);
+ map.insert("[UNIMOD:555]", -0.984016);
+ map.insert("[UNIMOD:556]", -58.005479);
+ map.insert("[UNIMOD:557]", 48.036386);
+ map.insert("[UNIMOD:558]", 14.01565);
+ map.insert("[UNIMOD:559]", -15.958529);
+ map.insert("[UNIMOD:560]", -58.005479);
+ map.insert("[UNIMOD:561]", -0.984016);
+ map.insert("[UNIMOD:562]", -14.01565);
+ map.insert("[UNIMOD:563]", -0.94763);
+ map.insert("[UNIMOD:564]", -72.021129);
+ map.insert("[UNIMOD:565]", -29.974179);
+ map.insert("[UNIMOD:566]", -60.036386);
+ map.insert("[UNIMOD:567]", -44.059229);
+ map.insert("[UNIMOD:568]", -33.98435);
+ map.insert("[UNIMOD:569]", 15.994915);
+ map.insert("[UNIMOD:570]", -48.0);
+ map.insert("[UNIMOD:571]", 14.01565);
+ map.insert("[UNIMOD:572]", 30.010565);
+ map.insert("[UNIMOD:573]", 129.057849);
+ map.insert("[UNIMOD:574]", 72.021129);
+ map.insert("[UNIMOD:575]", 42.04695);
+ map.insert("[UNIMOD:576]", 58.005479);
+ map.insert("[UNIMOD:577]", 45.987721);
+ map.insert("[UNIMOD:578]", 99.079647);
+ map.insert("[UNIMOD:580]", -40.006148);
+ map.insert("[UNIMOD:581]", 26.004417);
+ map.insert("[UNIMOD:582]", -9.000334);
+ map.insert("[UNIMOD:584]", 19.042199);
+ map.insert("[UNIMOD:585]", -23.974848);
+ map.insert("[UNIMOD:588]", -12.036386);
+ map.insert("[UNIMOD:589]", 0.958863);
+ map.insert("[UNIMOD:590]", 15.010899);
+ map.insert("[UNIMOD:594]", -27.047285);
+ map.insert("[UNIMOD:595]", -14.052036);
+ map.insert("[UNIMOD:596]", 0.94763);
+ map.insert("[UNIMOD:597]", -0.036386);
+ map.insert("[UNIMOD:598]", 2.945522);
+ map.insert("[UNIMOD:599]", 28.006148);
+ map.insert("[UNIMOD:600]", -15.010899);
+ map.insert("[UNIMOD:601]", -26.052036);
+ map.insert("[UNIMOD:602]", 33.98435);
+ map.insert("[UNIMOD:603]", 72.995249);
+ map.insert("[UNIMOD:604]", -16.0313);
+ map.insert("[UNIMOD:605]", -14.01565);
+ map.insert("[UNIMOD:606]", 23.974848);
+ map.insert("[UNIMOD:607]", 14.974514);
+ map.insert("[UNIMOD:608]", 17.956421);
+ map.insert("[UNIMOD:609]", 43.017047);
+ map.insert("[UNIMOD:610]", -29.992806);
+ map.insert("[UNIMOD:611]", 25.060626);
+ map.insert("[UNIMOD:613]", -2.945522);
+ map.insert("[UNIMOD:614]", -17.956421);
+ map.insert("[UNIMOD:615]", -31.972071);
+ map.insert("[UNIMOD:616]", -27.010899);
+ map.insert("[UNIMOD:617]", -12.995249);
+ map.insert("[UNIMOD:618]", 14.052036);
+ map.insert("[UNIMOD:619]", 49.020401);
+ map.insert("[UNIMOD:620]", 23.015984);
+ map.insert("[UNIMOD:621]", 0.984016);
+ map.insert("[UNIMOD:622]", -0.958863);
+ map.insert("[UNIMOD:623]", -10.020735);
+ map.insert("[UNIMOD:624]", -26.01565);
+ map.insert("[UNIMOD:625]", 40.006148);
+ map.insert("[UNIMOD:626]", 31.005814);
+ map.insert("[UNIMOD:627]", 3.994915);
+ map.insert("[UNIMOD:628]", 59.048347);
+ map.insert("[UNIMOD:629]", 16.0313);
+ map.insert("[UNIMOD:630]", -31.005814);
+ map.insert("[UNIMOD:631]", 0.036386);
+ map.insert("[UNIMOD:632]", 0.984016);
+ map.insert("[UNIMOD:633]", 9.000334);
+ map.insert("[UNIMOD:634]", 28.042534);
+ map.insert("[UNIMOD:635]", -14.974514);
+ map.insert("[UNIMOD:636]", -69.069083);
+ map.insert("[UNIMOD:637]", 29.978202);
+ map.insert("[UNIMOD:638]", -55.053433);
+ map.insert("[UNIMOD:639]", -59.048347);
+ map.insert("[UNIMOD:640]", -28.006148);
+ map.insert("[UNIMOD:641]", -19.042199);
+ map.insert("[UNIMOD:642]", -28.042534);
+ map.insert("[UNIMOD:643]", -25.060626);
+ map.insert("[UNIMOD:644]", -53.091927);
+ map.insert("[UNIMOD:645]", -43.017047);
+ map.insert("[UNIMOD:646]", -99.079647);
+ map.insert("[UNIMOD:647]", 60.036386);
+ map.insert("[UNIMOD:648]", -15.994915);
+ map.insert("[UNIMOD:649]", 99.047285);
+ map.insert("[UNIMOD:650]", 14.01565);
+ map.insert("[UNIMOD:651]", 27.010899);
+ map.insert("[UNIMOD:652]", 10.020735);
+ map.insert("[UNIMOD:653]", 76.0313);
+ map.insert("[UNIMOD:654]", 15.977156);
+ map.insert("[UNIMOD:655]", 69.069083);
+ map.insert("[UNIMOD:656]", 26.052036);
+ map.insert("[UNIMOD:657]", -30.010565);
+ map.insert("[UNIMOD:658]", -14.01565);
+ map.insert("[UNIMOD:659]", -30.010565);
+ map.insert("[UNIMOD:660]", 12.995249);
+ map.insert("[UNIMOD:661]", 27.047285);
+ map.insert("[UNIMOD:662]", -3.994915);
+ map.insert("[UNIMOD:663]", 29.992806);
+ map.insert("[UNIMOD:664]", 12.036386);
+ map.insert("[UNIMOD:665]", 55.053433);
+ map.insert("[UNIMOD:666]", 48.0);
+ map.insert("[UNIMOD:667]", -28.0313);
+ map.insert("[UNIMOD:668]", 29.974179);
+ map.insert("[UNIMOD:669]", 31.972071);
+ map.insert("[UNIMOD:670]", 15.958529);
+ map.insert("[UNIMOD:671]", 14.01565);
+ map.insert("[UNIMOD:672]", -42.04695);
+ map.insert("[UNIMOD:673]", -99.047285);
+ map.insert("[UNIMOD:674]", -83.070128);
+ map.insert("[UNIMOD:675]", -29.978202);
+ map.insert("[UNIMOD:676]", -129.057849);
+ map.insert("[UNIMOD:677]", -72.995249);
+ map.insert("[UNIMOD:678]", -15.994915);
+ map.insert("[UNIMOD:679]", -76.0313);
+ map.insert("[UNIMOD:680]", -49.020401);
+ map.insert("[UNIMOD:681]", -26.004417);
+ map.insert("[UNIMOD:682]", -48.036386);
+ map.insert("[UNIMOD:683]", -60.054144);
+ map.insert("[UNIMOD:695]", 7.017164);
+ map.insert("[UNIMOD:730]", 304.20536);
+ map.insert("[UNIMOD:731]", 304.19904);
+ map.insert("[UNIMOD:772]", 5.016774);
+ map.insert("[UNIMOD:827]", 572.181134);
+ map.insert("[UNIMOD:888]", 140.094963);
+ map.insert("[UNIMOD:889]", 144.102063);
+ map.insert("[UNIMOD:897]", 3.98814);
+ map.insert("[UNIMOD:950]", 6.008178);
+ map.insert("[UNIMOD:951]", 37.946941);
+ map.insert("[UNIMOD:952]", 53.919289);
+ map.insert("[UNIMOD:953]", 55.919696);
+ map.insert("[UNIMOD:954]", 61.913495);
+ map.insert("[UNIMOD:955]", 105.897267);
+ map.insert("[UNIMOD:956]", 21.969392);
+ map.insert("[UNIMOD:1014]", 87.032028);
+ map.insert("[UNIMOD:1018]", 138.06808);
+ map.insert("[UNIMOD:1019]", 144.10574);
+ map.insert("[UNIMOD:1044]", 31.972071);
+ map.insert("[UNIMOD:1045]", 76.0313);
+ map.insert("[UNIMOD:1046]", 66.021798);
+ map.insert("[UNIMOD:1047]", 42.04695);
+ map.insert("[UNIMOD:1048]", 57.057849);
+ map.insert("[UNIMOD:1049]", 60.003371);
+ map.insert("[UNIMOD:1050]", 43.005814);
+ map.insert("[UNIMOD:1051]", 57.021464);
+ map.insert("[UNIMOD:1052]", 85.063997);
+ map.insert("[UNIMOD:1053]", 115.042199);
+ map.insert("[UNIMOD:1054]", 92.026215);
+ map.insert("[UNIMOD:1055]", -31.972071);
+ map.insert("[UNIMOD:1056]", 12.017759);
+ map.insert("[UNIMOD:1057]", 26.033409);
+ map.insert("[UNIMOD:1058]", 34.049727);
+ map.insert("[UNIMOD:1059]", 10.07488);
+ map.insert("[UNIMOD:1060]", 25.085779);
+ map.insert("[UNIMOD:1061]", 28.0313);
+ map.insert("[UNIMOD:1062]", 11.033743);
+ map.insert("[UNIMOD:1063]", -5.956421);
+ map.insert("[UNIMOD:1064]", 25.049393);
+ map.insert("[UNIMOD:1065]", -1.961506);
+ map.insert("[UNIMOD:1066]", -3.940771);
+ map.insert("[UNIMOD:1067]", -12.017759);
+ map.insert("[UNIMOD:1068]", 32.041471);
+ map.insert("[UNIMOD:1069]", -1.942879);
+ map.insert("[UNIMOD:1070]", 13.06802);
+ map.insert("[UNIMOD:1071]", 16.013542);
+ map.insert("[UNIMOD:1072]", -17.974179);
+ map.insert("[UNIMOD:1073]", 13.031634);
+ map.insert("[UNIMOD:1074]", 41.074168);
+ map.insert("[UNIMOD:1075]", -27.994915);
+ map.insert("[UNIMOD:1076]", -13.979265);
+ map.insert("[UNIMOD:1077]", 71.05237);
+ map.insert("[UNIMOD:1078]", -26.033409);
+ map.insert("[UNIMOD:1079]", 18.025821);
+ map.insert("[UNIMOD:1080]", 8.016319);
+ map.insert("[UNIMOD:1081]", -15.958529);
+ map.insert("[UNIMOD:1082]", 1.997892);
+ map.insert("[UNIMOD:1083]", -14.999666);
+ map.insert("[UNIMOD:1084]", -31.989829);
+ map.insert("[UNIMOD:1085]", 27.058518);
+ map.insert("[UNIMOD:1086]", -42.010565);
+ map.insert("[UNIMOD:1087]", -27.994915);
+ map.insert("[UNIMOD:1088]", 57.03672);
+ map.insert("[UNIMOD:1089]", 34.020735);
+ map.insert("[UNIMOD:1090]", -76.0313);
+ map.insert("[UNIMOD:1091]", -32.041471);
+ map.insert("[UNIMOD:1092]", -18.025821);
+ map.insert("[UNIMOD:1093]", -90.04695);
+ map.insert("[UNIMOD:1094]", -10.009502);
+ map.insert("[UNIMOD:1095]", -18.973451);
+ map.insert("[UNIMOD:1096]", -16.027929);
+ map.insert("[UNIMOD:1097]", -33.025486);
+ map.insert("[UNIMOD:1098]", -50.01565);
+ map.insert("[UNIMOD:1099]", -19.009836);
+ map.insert("[UNIMOD:1100]", 9.032697);
+ map.insert("[UNIMOD:1101]", -46.020735);
+ map.insert("[UNIMOD:1102]", 39.010899);
+ map.insert("[UNIMOD:1103]", 90.04695);
+ map.insert("[UNIMOD:1104]", 80.037448);
+ map.insert("[UNIMOD:1105]", 56.0626);
+ map.insert("[UNIMOD:1106]", 71.073499);
+ map.insert("[UNIMOD:1107]", 74.019021);
+ map.insert("[UNIMOD:1108]", 57.021464);
+ map.insert("[UNIMOD:1109]", 40.0313);
+ map.insert("[UNIMOD:1110]", 71.037114);
+ map.insert("[UNIMOD:1111]", 44.026215);
+ map.insert("[UNIMOD:1112]", 106.041865);
+ map.insert("[UNIMOD:1113]", -66.021798);
+ map.insert("[UNIMOD:1114]", -34.049727);
+ map.insert("[UNIMOD:1115]", -8.016319);
+ map.insert("[UNIMOD:1116]", 10.009502);
+ map.insert("[UNIMOD:1117]", -80.037448);
+ map.insert("[UNIMOD:1119]", -8.963949);
+ map.insert("[UNIMOD:1120]", -6.018427);
+ map.insert("[UNIMOD:1121]", -50.026883);
+ map.insert("[UNIMOD:1122]", -36.011233);
+ map.insert("[UNIMOD:1123]", -37.990498);
+ map.insert("[UNIMOD:1124]", 49.020401);
+ map.insert("[UNIMOD:1125]", -42.04695);
+ map.insert("[UNIMOD:1126]", -10.07488);
+ map.insert("[UNIMOD:1127]", 1.942879);
+ map.insert("[UNIMOD:1128]", 15.958529);
+ map.insert("[UNIMOD:1129]", -56.0626);
+ map.insert("[UNIMOD:1130]", 49.979265);
+ map.insert("[UNIMOD:1131]", -57.057849);
+ map.insert("[UNIMOD:1132]", -25.085779);
+ map.insert("[UNIMOD:1133]", -13.06802);
+ map.insert("[UNIMOD:1134]", 18.973451);
+ map.insert("[UNIMOD:1135]", -71.073499);
+ map.insert("[UNIMOD:1136]", 8.963949);
+ map.insert("[UNIMOD:1137]", -31.042199);
+ map.insert("[UNIMOD:1138]", -41.062935);
+ map.insert("[UNIMOD:1139]", -29.026549);
+ map.insert("[UNIMOD:1140]", 57.98435);
+ map.insert("[UNIMOD:1141]", 34.968366);
+ map.insert("[UNIMOD:1142]", -60.003371);
+ map.insert("[UNIMOD:1143]", -28.0313);
+ map.insert("[UNIMOD:1144]", -16.013542);
+ map.insert("[UNIMOD:1145]", -1.997892);
+ map.insert("[UNIMOD:1146]", 16.027929);
+ map.insert("[UNIMOD:1147]", -74.019021);
+ map.insert("[UNIMOD:1148]", 6.018427);
+ map.insert("[UNIMOD:1149]", -16.997557);
+ map.insert("[UNIMOD:1150]", -33.987721);
+ map.insert("[UNIMOD:1151]", -2.981907);
+ map.insert("[UNIMOD:1152]", -44.008456);
+ map.insert("[UNIMOD:1153]", 55.038828);
+ map.insert("[UNIMOD:1154]", 32.022844);
+ map.insert("[UNIMOD:1155]", -43.005814);
+ map.insert("[UNIMOD:1156]", -11.033743);
+ map.insert("[UNIMOD:1157]", 14.999666);
+ map.insert("[UNIMOD:1158]", 33.025486);
+ map.insert("[UNIMOD:1159]", -57.021464);
+ map.insert("[UNIMOD:1160]", 16.997557);
+ map.insert("[UNIMOD:1161]", -16.990164);
+ map.insert("[UNIMOD:1162]", 14.01565);
+ map.insert("[UNIMOD:1163]", 42.058184);
+ map.insert("[UNIMOD:1164]", -14.974514);
+ map.insert("[UNIMOD:1165]", 72.036386);
+ map.insert("[UNIMOD:1166]", 5.956421);
+ map.insert("[UNIMOD:1167]", 17.974179);
+ map.insert("[UNIMOD:1168]", 31.989829);
+ map.insert("[UNIMOD:1169]", 50.01565);
+ map.insert("[UNIMOD:1170]", -40.0313);
+ map.insert("[UNIMOD:1171]", 31.042199);
+ map.insert("[UNIMOD:1172]", 33.987721);
+ map.insert("[UNIMOD:1173]", 16.990164);
+ map.insert("[UNIMOD:1174]", 2.01565);
+ map.insert("[UNIMOD:1175]", 89.026549);
+ map.insert("[UNIMOD:1176]", 66.010565);
+ map.insert("[UNIMOD:1177]", -57.021464);
+ map.insert("[UNIMOD:1178]", -25.049393);
+ map.insert("[UNIMOD:1179]", -13.031634);
+ map.insert("[UNIMOD:1180]", 19.009836);
+ map.insert("[UNIMOD:1181]", -71.037114);
+ map.insert("[UNIMOD:1182]", 2.981907);
+ map.insert("[UNIMOD:1183]", -14.01565);
+ map.insert("[UNIMOD:1184]", -41.026549);
+ map.insert("[UNIMOD:1185]", -27.010899);
+ map.insert("[UNIMOD:1186]", -28.990164);
+ map.insert("[UNIMOD:1187]", 58.020735);
+ map.insert("[UNIMOD:1188]", 35.004751);
+ map.insert("[UNIMOD:1189]", -85.063997);
+ map.insert("[UNIMOD:1190]", -41.074168);
+ map.insert("[UNIMOD:1191]", -27.058518);
+ map.insert("[UNIMOD:1192]", -42.058184);
+ map.insert("[UNIMOD:1193]", -57.032697);
+ map.insert("[UNIMOD:1194]", 6.962218);
+ map.insert("[UNIMOD:1195]", -9.032697);
+ map.insert("[UNIMOD:1196]", 27.994915);
+ map.insert("[UNIMOD:1197]", 42.010565);
+ map.insert("[UNIMOD:1198]", 50.026883);
+ map.insert("[UNIMOD:1199]", 41.062935);
+ map.insert("[UNIMOD:1200]", 44.008456);
+ map.insert("[UNIMOD:1201]", 41.026549);
+ map.insert("[UNIMOD:1202]", 12.036386);
+ map.insert("[UNIMOD:1203]", 1.961506);
+ map.insert("[UNIMOD:1204]", 13.979265);
+ map.insert("[UNIMOD:1205]", 27.994915);
+ map.insert("[UNIMOD:1206]", 46.020735);
+ map.insert("[UNIMOD:1207]", -44.026215);
+ map.insert("[UNIMOD:1208]", 36.011233);
+ map.insert("[UNIMOD:1209]", 27.010899);
+ map.insert("[UNIMOD:1210]", -1.979265);
+ map.insert("[UNIMOD:1211]", 85.031634);
+ map.insert("[UNIMOD:1212]", 62.01565);
+ map.insert("[UNIMOD:1213]", 3.940771);
+ map.insert("[UNIMOD:1214]", 37.990498);
+ map.insert("[UNIMOD:1215]", 29.026549);
+ map.insert("[UNIMOD:1216]", 14.974514);
+ map.insert("[UNIMOD:1217]", -2.01565);
+ map.insert("[UNIMOD:1218]", 28.990164);
+ map.insert("[UNIMOD:1219]", 57.032697);
+ map.insert("[UNIMOD:1220]", -12.036386);
+ map.insert("[UNIMOD:1221]", 1.979265);
+ map.insert("[UNIMOD:1222]", 87.010899);
+ map.insert("[UNIMOD:1223]", 63.994915);
+ map.insert("[UNIMOD:1224]", -115.042199);
+ map.insert("[UNIMOD:1225]", -71.05237);
+ map.insert("[UNIMOD:1226]", -57.03672);
+ map.insert("[UNIMOD:1227]", -39.010899);
+ map.insert("[UNIMOD:1228]", -49.020401);
+ map.insert("[UNIMOD:1229]", -57.98435);
+ map.insert("[UNIMOD:1230]", -55.038828);
+ map.insert("[UNIMOD:1231]", -72.036386);
+ map.insert("[UNIMOD:1232]", -89.026549);
+ map.insert("[UNIMOD:1233]", -58.020735);
+ map.insert("[UNIMOD:1234]", -85.031634);
+ map.insert("[UNIMOD:1235]", -87.010899);
+ map.insert("[UNIMOD:1236]", -23.015984);
+ map.insert("[UNIMOD:1237]", -92.026215);
+ map.insert("[UNIMOD:1238]", -34.020735);
+ map.insert("[UNIMOD:1239]", -106.041865);
+ map.insert("[UNIMOD:1240]", -34.968366);
+ map.insert("[UNIMOD:1241]", -32.022844);
+ map.insert("[UNIMOD:1242]", -66.010565);
+ map.insert("[UNIMOD:1243]", -35.004751);
+ map.insert("[UNIMOD:1244]", -6.962218);
+ map.insert("[UNIMOD:1245]", -62.01565);
+ map.insert("[UNIMOD:1246]", -63.994915);
+ map.insert("[UNIMOD:1247]", 23.015984);
+ map.insert("[UNIMOD:1248]", -49.979265);
+ map.insert("[UNIMOD:1287]", -156.101111);
+ map.insert("[UNIMOD:1288]", 156.101111);
+ map.insert("[UNIMOD:1289]", 70.041865);
+ map.insert("[UNIMOD:1290]", 114.042927);
+ map.insert("[UNIMOD:1291]", 34.068961);
+ map.insert("[UNIMOD:1292]", 242.101505);
+ map.insert("[UNIMOD:1293]", 343.149184);
+ map.insert("[UNIMOD:1296]", 3.010064);
+ map.insert("[UNIMOD:1297]", 4.007099);
+ map.insert("[UNIMOD:1298]", 5.010454);
+ map.insert("[UNIMOD:1299]", 10.062767);
+ map.insert("[UNIMOD:1300]", 5.028462);
+ map.insert("[UNIMOD:1301]", 128.094963);
+ map.insert("[UNIMOD:1302]", 148.109162);
+ map.insert("[UNIMOD:1303]", 291.095417);
+ map.insert("[UNIMOD:1304]", 307.090331);
+ map.insert("[UNIMOD:1305]", 42.04695);
+ map.insert("[UNIMOD:1306]", 48.084611);
+ map.insert("[UNIMOD:1321]", 145.12);
+ map.insert("[UNIMOD:1322]", 145.132163);
+ map.insert("[UNIMOD:1323]", 145.128307);
+ map.insert("[UNIMOD:1324]", 145.140471);
+ map.insert("[UNIMOD:1363]", 68.026215);
+ map.insert("[UNIMOD:1392]", 217.162932);
+ map.insert("[UNIMOD:1393]", 217.156612);
+ map.insert("[UNIMOD:1394]", 217.168776);
+ map.insert("[UNIMOD:1395]", 217.162456);
+ map.insert("[UNIMOD:1396]", 217.175096);
+ map.insert("[UNIMOD:1402]", 11.032077);
+ map.insert("[UNIMOD:1403]", 7.034695);
+ map.insert("[UNIMOD:1408]", 2204.772441);
+ map.insert("[UNIMOD:1409]", 1913.677025);
+ map.insert("[UNIMOD:1410]", 2059.734933);
+ map.insert("[UNIMOD:1411]", 2350.83035);
+ map.insert("[UNIMOD:1412]", 283.036187);
+ map.insert("[UNIMOD:1413]", 404.071978);
+ map.insert("[UNIMOD:1414]", 54.113505);
+ map.insert("[UNIMOD:1420]", 185.189198);
+ map.insert("[UNIMOD:1421]", 128.131349);
+ map.insert("[UNIMOD:1425]", 132.042259);
+ map.insert("[UNIMOD:1426]", 294.095082);
+ map.insert("[UNIMOD:1427]", 338.084912);
+ map.insert("[UNIMOD:1428]", 426.137341);
+ map.insert("[UNIMOD:1429]", 445.098527);
+ map.insert("[UNIMOD:1430]", 445.089011);
+ map.insert("[UNIMOD:1431]", 453.14824);
+ map.insert("[UNIMOD:1432]", 469.143155);
+ map.insert("[UNIMOD:1433]", 609.238118);
+ map.insert("[UNIMOD:1434]", 494.174789);
+ map.insert("[UNIMOD:1435]", 510.169704);
+ map.insert("[UNIMOD:1436]", 525.205755);
+ map.insert("[UNIMOD:1437]", 539.221405);
+ map.insert("[UNIMOD:1438]", 527.18502);
+ map.insert("[UNIMOD:1439]", 541.164284);
+ map.insert("[UNIMOD:1440]", 541.20067);
+ map.insert("[UNIMOD:1441]", 558.1796);
+ map.insert("[UNIMOD:1442]", 585.190499);
+ map.insert("[UNIMOD:1443]", 607.141834);
+ map.insert("[UNIMOD:1444]", 615.201064);
+ map.insert("[UNIMOD:1445]", 616.221465);
+ map.insert("[UNIMOD:1446]", 646.195644);
+ map.insert("[UNIMOD:1447]", 648.168383);
+ map.insert("[UNIMOD:1448]", 648.211294);
+ map.insert("[UNIMOD:1449]", 1008.36456);
+ map.insert("[UNIMOD:1450]", 1021.359809);
+ map.insert("[UNIMOD:1451]", 1024.359475);
+ map.insert("[UNIMOD:1452]", 1054.370039);
+ map.insert("[UNIMOD:1453]", 1129.390834);
+ map.insert("[UNIMOD:1454]", 1170.417383);
+ map.insert("[UNIMOD:1455]", 1183.412632);
+ map.insert("[UNIMOD:1456]", 1186.412298);
+ map.insert("[UNIMOD:1457]", 1227.438847);
+ map.insert("[UNIMOD:1458]", 1296.389194);
+ map.insert("[UNIMOD:1459]", 1332.470207);
+ map.insert("[UNIMOD:1460]", 1337.449137);
+ map.insert("[UNIMOD:1461]", 1345.465456);
+ map.insert("[UNIMOD:1462]", 1362.480772);
+ map.insert("[UNIMOD:1463]", 1373.496756);
+ map.insert("[UNIMOD:1464]", 1378.432776);
+ map.insert("[UNIMOD:1465]", 1378.475686);
+ map.insert("[UNIMOD:1466]", 1389.491671);
+ map.insert("[UNIMOD:1467]", 1403.507321);
+ map.insert("[UNIMOD:1468]", 1419.502235);
+ map.insert("[UNIMOD:1469]", 1430.51822);
+ map.insert("[UNIMOD:1470]", 1458.442017);
+ map.insert("[UNIMOD:1471]", 1483.464135);
+ map.insert("[UNIMOD:1472]", 1494.52303);
+ map.insert("[UNIMOD:1473]", 1499.501961);
+ map.insert("[UNIMOD:1474]", 1505.539015);
+ map.insert("[UNIMOD:1475]", 1519.554665);
+ map.insert("[UNIMOD:1476]", 1524.490684);
+ map.insert("[UNIMOD:1477]", 1524.533595);
+ map.insert("[UNIMOD:1478]", 1535.549579);
+ map.insert("[UNIMOD:1479]", 1540.485599);
+ map.insert("[UNIMOD:1480]", 1540.52851);
+ map.insert("[UNIMOD:1481]", 1549.56523);
+ map.insert("[UNIMOD:1482]", 1551.544494);
+ map.insert("[UNIMOD:1483]", 1564.539743);
+ map.insert("[UNIMOD:1484]", 1565.560144);
+ map.insert("[UNIMOD:1485]", 1576.576129);
+ map.insert("[UNIMOD:1486]", 1581.512148);
+ map.insert("[UNIMOD:1487]", 1581.555059);
+ map.insert("[UNIMOD:1488]", 1589.571378);
+ map.insert("[UNIMOD:1489]", 1592.571043);
+ map.insert("[UNIMOD:1490]", 1620.494841);
+ map.insert("[UNIMOD:1491]", 1620.602343);
+ map.insert("[UNIMOD:1492]", 1637.581274);
+ map.insert("[UNIMOD:1493]", 1645.516959);
+ map.insert("[UNIMOD:1494]", 1651.596924);
+ map.insert("[UNIMOD:1495]", 1661.52139);
+ map.insert("[UNIMOD:1496]", 1663.608157);
+ map.insert("[UNIMOD:1497]", 1665.612574);
+ map.insert("[UNIMOD:1498]", 1681.607488);
+ map.insert("[UNIMOD:1499]", 1686.543508);
+ map.insert("[UNIMOD:1500]", 1686.586419);
+ map.insert("[UNIMOD:1501]", 1694.602737);
+ map.insert("[UNIMOD:1502]", 1700.461172);
+ map.insert("[UNIMOD:1503]", 1702.538423);
+ map.insert("[UNIMOD:1504]", 1702.581333);
+ map.insert("[UNIMOD:1505]", 1708.618387);
+ map.insert("[UNIMOD:1506]", 1710.597652);
+ map.insert("[UNIMOD:1507]", 1722.634037);
+ map.insert("[UNIMOD:1508]", 1727.570057);
+ map.insert("[UNIMOD:1509]", 1727.612968);
+ map.insert("[UNIMOD:1510]", 1735.629286);
+ map.insert("[UNIMOD:1511]", 1736.649688);
+ map.insert("[UNIMOD:1512]", 1738.628952);
+ map.insert("[UNIMOD:1513]", 1743.564972);
+ map.insert("[UNIMOD:1514]", 1743.607882);
+ map.insert("[UNIMOD:1515]", 1774.559552);
+ map.insert("[UNIMOD:1516]", 1782.655167);
+ map.insert("[UNIMOD:1517]", 1784.591521);
+ map.insert("[UNIMOD:1518]", 1807.569782);
+ map.insert("[UNIMOD:1519]", 1809.666066);
+ map.insert("[UNIMOD:1520]", 1821.549047);
+ map.insert("[UNIMOD:1521]", 1823.574213);
+ map.insert("[UNIMOD:1522]", 1826.681382);
+ map.insert("[UNIMOD:1523]", 1832.601417);
+ map.insert("[UNIMOD:1524]", 1839.640245);
+ map.insert("[UNIMOD:1525]", 1840.660646);
+ map.insert("[UNIMOD:1526]", 1843.660312);
+ map.insert("[UNIMOD:1527]", 1848.596331);
+ map.insert("[UNIMOD:1528]", 1854.676296);
+ map.insert("[UNIMOD:1529]", 1856.655561);
+ map.insert("[UNIMOD:1530]", 1864.548335);
+ map.insert("[UNIMOD:1531]", 1864.634157);
+ map.insert("[UNIMOD:1532]", 1866.68753);
+ map.insert("[UNIMOD:1533]", 1868.691946);
+ map.insert("[UNIMOD:1534]", 1872.650475);
+ map.insert("[UNIMOD:1535]", 1884.686861);
+ map.insert("[UNIMOD:1536]", 1889.62288);
+ map.insert("[UNIMOD:1537]", 1889.665791);
+ map.insert("[UNIMOD:1538]", 1900.681776);
+ map.insert("[UNIMOD:1539]", 1901.505861);
+ map.insert("[UNIMOD:1540]", 1907.714079);
+ map.insert("[UNIMOD:1541]", 1914.697426);
+ map.insert("[UNIMOD:1542]", 1920.617461);
+ map.insert("[UNIMOD:1543]", 1928.553146);
+ map.insert("[UNIMOD:1544]", 1928.713076);
+ map.insert("[UNIMOD:1545]", 1929.671939);
+ map.insert("[UNIMOD:1546]", 1930.64943);
+ map.insert("[UNIMOD:1547]", 1930.69234);
+ map.insert("[UNIMOD:1548]", 1936.612375);
+ map.insert("[UNIMOD:1549]", 1946.687255);
+ map.insert("[UNIMOD:1550]", 1952.60729);
+ map.insert("[UNIMOD:1551]", 1954.703574);
+ map.insert("[UNIMOD:1552]", 1958.72364);
+ map.insert("[UNIMOD:1553]", 1969.622606);
+ map.insert("[UNIMOD:1554]", 1969.632122);
+ map.insert("[UNIMOD:1555]", 1971.718889);
+ map.insert("[UNIMOD:1556]", 1977.638925);
+ map.insert("[UNIMOD:1557]", 1978.659326);
+ map.insert("[UNIMOD:1558]", 1987.670893);
+ map.insert("[UNIMOD:1559]", 1987.713804);
+ map.insert("[UNIMOD:1560]", 1993.633839);
+ map.insert("[UNIMOD:1561]", 1995.730123);
+ map.insert("[UNIMOD:1562]", 1996.750524);
+ map.insert("[UNIMOD:1563]", 672.222527);
+ map.insert("[UNIMOD:1564]", 673.242928);
+ map.insert("[UNIMOD:1565]", 689.194932);
+ map.insert("[UNIMOD:1566]", 689.237843);
+ map.insert("[UNIMOD:1567]", 695.157878);
+ map.insert("[UNIMOD:1568]", 697.254162);
+ map.insert("[UNIMOD:1570]", 703.217108);
+ map.insert("[UNIMOD:1571]", 703.253493);
+ map.insert("[UNIMOD:1572]", 712.136808);
+ map.insert("[UNIMOD:1573]", 713.249076);
+ map.insert("[UNIMOD:1575]", 728.177625);
+ map.insert("[UNIMOD:1577]", 736.184427);
+ map.insert("[UNIMOD:1578]", 744.243657);
+ map.insert("[UNIMOD:1579]", 753.199743);
+ map.insert("[UNIMOD:1580]", 755.296027);
+ map.insert("[UNIMOD:1581]", 761.258973);
+ map.insert("[UNIMOD:1582]", 771.290941);
+ map.insert("[UNIMOD:1583]", 777.210976);
+ map.insert("[UNIMOD:1584]", 778.274288);
+ map.insert("[UNIMOD:1585]", 783.173922);
+ map.insert("[UNIMOD:1586]", 792.253553);
+ map.insert("[UNIMOD:1587]", 794.226292);
+ map.insert("[UNIMOD:1588]", 802.285522);
+ map.insert("[UNIMOD:1589]", 810.221207);
+ map.insert("[UNIMOD:1590]", 810.264117);
+ map.insert("[UNIMOD:1591]", 812.31749);
+ map.insert("[UNIMOD:1592]", 817.260035);
+ map.insert("[UNIMOD:1593]", 818.280436);
+ map.insert("[UNIMOD:1594]", 819.300837);
+ map.insert("[UNIMOD:1595]", 834.275351);
+ map.insert("[UNIMOD:1596]", 835.295752);
+ map.insert("[UNIMOD:1597]", 849.275017);
+ map.insert("[UNIMOD:1598]", 851.247756);
+ map.insert("[UNIMOD:1599]", 851.290667);
+ map.insert("[UNIMOD:1600]", 859.306985);
+ map.insert("[UNIMOD:1602]", 875.3019);
+ map.insert("[UNIMOD:1604]", 890.230448);
+ map.insert("[UNIMOD:1606]", 907.316881);
+ map.insert("[UNIMOD:1607]", 915.252567);
+ map.insert("[UNIMOD:1608]", 917.34885);
+ map.insert("[UNIMOD:1609]", 929.231831);
+ map.insert("[UNIMOD:1610]", 933.343765);
+ map.insert("[UNIMOD:1611]", 939.2638);
+ map.insert("[UNIMOD:1612]", 940.327112);
+ map.insert("[UNIMOD:1614]", 948.34343);
+ map.insert("[UNIMOD:1615]", 956.279116);
+ map.insert("[UNIMOD:1616]", 958.375399);
+ map.insert("[UNIMOD:1617]", 963.317944);
+ map.insert("[UNIMOD:1618]", 964.338345);
+ map.insert("[UNIMOD:1619]", 979.312859);
+ map.insert("[UNIMOD:1620]", 989.333594);
+ map.insert("[UNIMOD:1621]", 995.332925);
+ map.insert("[UNIMOD:1622]", 997.305665);
+ map.insert("[UNIMOD:1623]", 1003.232225);
+ map.insert("[UNIMOD:1624]", 1005.364894);
+ map.insert("[UNIMOD:1625]", 1011.32784);
+ map.insert("[UNIMOD:1626]", 1013.300579);
+ map.insert("[UNIMOD:1627]", 1013.34349);
+ map.insert("[UNIMOD:1628]", 1015.396863);
+ map.insert("[UNIMOD:1630]", 1031.344159);
+ map.insert("[UNIMOD:1631]", 1037.354723);
+ map.insert("[UNIMOD:1632]", 1050.16311);
+ map.insert("[UNIMOD:1633]", 1052.283272);
+ map.insert("[UNIMOD:1634]", 1052.354389);
+ map.insert("[UNIMOD:1635]", 1061.310475);
+ map.insert("[UNIMOD:1636]", 1062.386358);
+ map.insert("[UNIMOD:1637]", 1063.406759);
+ map.insert("[UNIMOD:1638]", 1078.381273);
+ map.insert("[UNIMOD:1639]", 1085.321709);
+ map.insert("[UNIMOD:1640]", 1091.284655);
+ map.insert("[UNIMOD:1641]", 1093.380938);
+ map.insert("[UNIMOD:1642]", 1101.316623);
+ map.insert("[UNIMOD:1643]", 1102.337025);
+ map.insert("[UNIMOD:1644]", 1110.396254);
+ map.insert("[UNIMOD:1645]", 1120.428223);
+ map.insert("[UNIMOD:1646]", 1136.423137);
+ map.insert("[UNIMOD:1647]", 1141.365682);
+ map.insert("[UNIMOD:1648]", 1143.406484);
+ map.insert("[UNIMOD:1649]", 1150.402402);
+ map.insert("[UNIMOD:1650]", 1151.422803);
+ map.insert("[UNIMOD:1651]", 1159.358488);
+ map.insert("[UNIMOD:1652]", 1161.454772);
+ map.insert("[UNIMOD:1653]", 1167.417718);
+ map.insert("[UNIMOD:1654]", 1168.438119);
+ map.insert("[UNIMOD:1655]", 1175.353403);
+ map.insert("[UNIMOD:1656]", 1182.293839);
+ map.insert("[UNIMOD:1657]", 1183.412632);
+ map.insert("[UNIMOD:1658]", 1208.444267);
+ map.insert("[UNIMOD:1659]", 1212.215934);
+ map.insert("[UNIMOD:1660]", 1214.407213);
+ map.insert("[UNIMOD:1661]", 1224.439181);
+ map.insert("[UNIMOD:1662]", 1230.359217);
+ map.insert("[UNIMOD:1663]", 1237.342563);
+ map.insert("[UNIMOD:1664]", 1238.418446);
+ map.insert("[UNIMOD:1665]", 1240.434096);
+ map.insert("[UNIMOD:1666]", 1247.374532);
+ map.insert("[UNIMOD:1667]", 1256.454163);
+ map.insert("[UNIMOD:1668]", 1264.389848);
+ map.insert("[UNIMOD:1669]", 1272.449077);
+ map.insert("[UNIMOD:1670]", 1278.369113);
+ map.insert("[UNIMOD:1671]", 1282.481046);
+ map.insert("[UNIMOD:1672]", 1286.40319);
+ map.insert("[UNIMOD:1673]", 1288.401081);
+ map.insert("[UNIMOD:1674]", 1294.364027);
+ map.insert("[UNIMOD:1675]", 1296.460311);
+ map.insert("[UNIMOD:1676]", 1297.480712);
+ map.insert("[UNIMOD:1678]", 1304.395996);
+ map.insert("[UNIMOD:1679]", 1305.416397);
+ map.insert("[UNIMOD:1680]", 1307.512681);
+ map.insert("[UNIMOD:1681]", 1312.455225);
+ map.insert("[UNIMOD:1682]", 1313.475627);
+ map.insert("[UNIMOD:1683]", 1321.411312);
+ map.insert("[UNIMOD:1684]", 1329.470541);
+ map.insert("[UNIMOD:1685]", 1339.50251);
+ map.insert("[UNIMOD:1686]", 1345.465456);
+ map.insert("[UNIMOD:1687]", 1353.481775);
+ map.insert("[UNIMOD:1688]", 1370.49709);
+ map.insert("[UNIMOD:1689]", 1371.517491);
+ map.insert("[UNIMOD:1690]", 1374.268757);
+ map.insert("[UNIMOD:1691]", 1376.460036);
+ map.insert("[UNIMOD:1692]", 1386.492005);
+ map.insert("[UNIMOD:1693]", 1390.439301);
+ map.insert("[UNIMOD:1694]", 1392.41204);
+ map.insert("[UNIMOD:1695]", 1393.432441);
+ map.insert("[UNIMOD:1696]", 1402.48692);
+ map.insert("[UNIMOD:1697]", 1402.512072);
+ map.insert("[UNIMOD:1698]", 1418.506986);
+ map.insert("[UNIMOD:1699]", 1424.427021);
+ map.insert("[UNIMOD:1700]", 1427.518554);
+ map.insert("[UNIMOD:1701]", 1428.538955);
+ map.insert("[UNIMOD:1702]", 1440.421936);
+ map.insert("[UNIMOD:1703]", 1443.538621);
+ map.insert("[UNIMOD:1705]", 1448.456013);
+ map.insert("[UNIMOD:1706]", 1449.501567);
+ map.insert("[UNIMOD:1707]", 1450.453905);
+ map.insert("[UNIMOD:1708]", 1458.513134);
+ map.insert("[UNIMOD:1709]", 1459.533535);
+ map.insert("[UNIMOD:1711]", 1466.44882);
+ map.insert("[UNIMOD:1712]", 1474.508049);
+ map.insert("[UNIMOD:1713]", 1482.443734);
+ map.insert("[UNIMOD:1714]", 1490.502964);
+ map.insert("[UNIMOD:1715]", 1491.523365);
+ map.insert("[UNIMOD:1716]", 1497.4434);
+ map.insert("[UNIMOD:1717]", 1515.534598);
+ map.insert("[UNIMOD:1718]", 1516.554999);
+ map.insert("[UNIMOD:1719]", 1517.5754);
+ map.insert("[UNIMOD:1720]", 1531.529513);
+ map.insert("[UNIMOD:1721]", 1532.549914);
+ map.insert("[UNIMOD:1722]", 1533.570315);
+ map.insert("[UNIMOD:1723]", 1536.321581);
+ map.insert("[UNIMOD:1724]", 1538.469949);
+ map.insert("[UNIMOD:1725]", 1547.524427);
+ map.insert("[UNIMOD:1726]", 1564.564895);
+ map.insert("[UNIMOD:1727]", 1573.576463);
+ map.insert("[UNIMOD:1728]", 1574.596864);
+ map.insert("[UNIMOD:1729]", 1593.493521);
+ map.insert("[UNIMOD:1730]", 1605.591444);
+ map.insert("[UNIMOD:1732]", 1620.442414);
+ map.insert("[UNIMOD:1733]", 1621.586359);
+ map.insert("[UNIMOD:1735]", 1631.618328);
+ map.insert("[UNIMOD:1736]", 1643.501309);
+ map.insert("[UNIMOD:1737]", 1659.496223);
+ map.insert("[UNIMOD:1738]", 1677.587422);
+ map.insert("[UNIMOD:1739]", 1678.607823);
+ map.insert("[UNIMOD:1740]", 1679.628224);
+ map.insert("[UNIMOD:1742]", 1698.374404);
+ map.insert("[UNIMOD:1743]", 1713.671426);
+ map.insert("[UNIMOD:1744]", 1755.546345);
+ map.insert("[UNIMOD:1745]", 1757.544236);
+ map.insert("[UNIMOD:1746]", 1793.671151);
+ map.insert("[UNIMOD:1747]", 1797.593295);
+ map.insert("[UNIMOD:1748]", 1805.554132);
+ map.insert("[UNIMOD:1749]", 1806.630015);
+ map.insert("[UNIMOD:1750]", 1823.64533);
+ map.insert("[UNIMOD:1751]", 1824.665732);
+ map.insert("[UNIMOD:1752]", 1854.614759);
+ map.insert("[UNIMOD:1753]", 1860.427228);
+ map.insert("[UNIMOD:1754]", 1864.67188);
+ map.insert("[UNIMOD:1755]", 1900.583852);
+ map.insert("[UNIMOD:1756]", 1911.53783);
+ map.insert("[UNIMOD:1757]", 1927.532745);
+ map.insert("[UNIMOD:1758]", 1969.703239);
+ map.insert("[UNIMOD:1759]", 1991.494645);
+ map.insert("[UNIMOD:1760]", 1022.38021);
+ map.insert("[UNIMOD:1761]", 1038.375125);
+ map.insert("[UNIMOD:1762]", 1079.401674);
+ map.insert("[UNIMOD:1763]", 1095.396588);
+ map.insert("[UNIMOD:1764]", 1118.331939);
+ map.insert("[UNIMOD:1765]", 1184.433033);
+ map.insert("[UNIMOD:1766]", 1200.427948);
+ map.insert("[UNIMOD:1767]", 1225.459583);
+ map.insert("[UNIMOD:1768]", 1241.454497);
+ map.insert("[UNIMOD:1769]", 1257.449412);
+ map.insert("[UNIMOD:1770]", 1346.485857);
+ map.insert("[UNIMOD:1771]", 1387.512406);
+ map.insert("[UNIMOD:1772]", 1501.555334);
+ map.insert("[UNIMOD:1773]", 1548.544828);
+ map.insert("[UNIMOD:1774]", 1590.591779);
+ map.insert("[UNIMOD:1775]", 1647.613242);
+ map.insert("[UNIMOD:1776]", 1704.634706);
+ map.insert("[UNIMOD:1777]", 1751.624201);
+ map.insert("[UNIMOD:1778]", 1752.644602);
+ map.insert("[UNIMOD:1779]", 1784.634431);
+ map.insert("[UNIMOD:1780]", 1825.660981);
+ map.insert("[UNIMOD:1781]", 1850.692615);
+ map.insert("[UNIMOD:1782]", 1897.68211);
+ map.insert("[UNIMOD:1783]", 1898.702511);
+ map.insert("[UNIMOD:1784]", 1938.708659);
+ map.insert("[UNIMOD:1785]", 1955.723975);
+ map.insert("[UNIMOD:1786]", 698.238177);
+ map.insert("[UNIMOD:1825]", 324.035867);
+ map.insert("[UNIMOD:1826]", -3.994915);
+ map.insert("[UNIMOD:1827]", -0.979006);
+ map.insert("[UNIMOD:1830]", 361.146012);
+ map.insert("[UNIMOD:1833]", 220.182715);
+ map.insert("[UNIMOD:1834]", 165.164326);
+ map.insert("[UNIMOD:1835]", 263.237491);
+ map.insert("[UNIMOD:1836]", 188.156501);
+ map.insert("[UNIMOD:1837]", 168.187801);
+ map.insert("[UNIMOD:1838]", 224.250401);
+ map.insert("[UNIMOD:1841]", 389.090154);
+ map.insert("[UNIMOD:1842]", 158.003765);
+ map.insert("[UNIMOD:1843]", 305.041287);
+ map.insert("[UNIMOD:1844]", 226.047738);
+ map.insert("[UNIMOD:1870]", 52.911464);
+ map.insert("[UNIMOD:1871]", 151.996571);
+ map.insert("[UNIMOD:1876]", 138.06808);
+ map.insert("[UNIMOD:1877]", 259.141973);
+ map.insert("[UNIMOD:1878]", 176.01433);
+ map.insert("[UNIMOD:1879]", 175.030314);
+ map.insert("[UNIMOD:1880]", 279.077658);
+ map.insert("[UNIMOD:1881]", 54.010565);
+ map.insert("[UNIMOD:1882]", 85.982635);
+ map.insert("[UNIMOD:1883]", 103.9932);
+ map.insert("[UNIMOD:1884]", 196.084792);
+ map.insert("[UNIMOD:1885]", 111.032028);
+ map.insert("[UNIMOD:1886]", 85.052764);
+ map.insert("[UNIMOD:1887]", 213.111341);
+ map.insert("[UNIMOD:1888]", 214.095357);
+ map.insert("[UNIMOD:1889]", 317.158686);
+ map.insert("[UNIMOD:1891]", 172.01289);
+ map.insert("[UNIMOD:1892]", 113.995309);
+ map.insert("[UNIMOD:1893]", 173.980921);
+ map.insert("[UNIMOD:1895]", 219.089543);
+ map.insert("[UNIMOD:1896]", 158.003765);
+ map.insert("[UNIMOD:1897]", 226.047738);
+ map.insert("[UNIMOD:1898]", 138.06808);
+ map.insert("[UNIMOD:1899]", 196.084792);
+ map.insert("[UNIMOD:1900]", 172.01289);
+ map.insert("[UNIMOD:1901]", 113.995309);
+ map.insert("[UNIMOD:1902]", 173.980921);
+ map.insert("[UNIMOD:1903]", 219.089543);
+ map.insert("[UNIMOD:1905]", 96.021129);
+ map.insert("[UNIMOD:1906]", 113.047679);
+ map.insert("[UNIMOD:1907]", 114.031694);
+ map.insert("[UNIMOD:1908]", 217.095023);
+ map.insert("[UNIMOD:1909]", 96.021129);
+ map.insert("[UNIMOD:1910]", 23.958063);
+ map.insert("[UNIMOD:1911]", 139.110947);
+ map.insert("[UNIMOD:1912]", 122.084398);
+ map.insert("[UNIMOD:1914]", -32.008456);
+ map.insert("[UNIMOD:1915]", -30.010565);
+ map.insert("[UNIMOD:1916]", -10.031969);
+ map.insert("[UNIMOD:1917]", 4.97893);
+ map.insert("[UNIMOD:1918]", 13.979265);
+ map.insert("[UNIMOD:1922]", 18.010565);
+ map.insert("[UNIMOD:1923]", 27.958529);
+ map.insert("[UNIMOD:1924]", 43.953444);
+ map.insert("[UNIMOD:1925]", 63.979659);
+ map.insert("[UNIMOD:1926]", 72.021129);
+ map.insert("[UNIMOD:1927]", 80.026215);
+ map.insert("[UNIMOD:1928]", 122.073165);
+ map.insert("[UNIMOD:1929]", 154.026609);
+ map.insert("[UNIMOD:1930]", 264.084518);
+ map.insert("[UNIMOD:1931]", 335.121631);
+ map.insert("[UNIMOD:1932]", 404.062462);
+ map.insert("[UNIMOD:1933]", 440.152991);
+ map.insert("[UNIMOD:1934]", 486.11556);
+ map.insert("[UNIMOD:1935]", 572.19525);
+ map.insert("[UNIMOD:1936]", 588.190165);
+ map.insert("[UNIMOD:1937]", 602.205815);
+ map.insert("[UNIMOD:1938]", 824.243382);
+ map.insert("[UNIMOD:1939]", 835.259366);
+ map.insert("[UNIMOD:1940]", 865.269931);
+ map.insert("[UNIMOD:1941]", 940.284201);
+ map.insert("[UNIMOD:1942]", 961.302294);
+ map.insert("[UNIMOD:1943]", 970.301291);
+ map.insert("[UNIMOD:1944]", 986.296206);
+ map.insert("[UNIMOD:1945]", 1027.322755);
+ map.insert("[UNIMOD:1946]", 1127.41157);
+ map.insert("[UNIMOD:1947]", 1175.396314);
+ map.insert("[UNIMOD:1948]", 1200.385037);
+ map.insert("[UNIMOD:1949]", 1255.433762);
+ map.insert("[UNIMOD:1950]", 1330.490942);
+ map.insert("[UNIMOD:1951]", 1346.442946);
+ map.insert("[UNIMOD:1952]", 1442.394675);
+ map.insert("[UNIMOD:1953]", 1458.475412);
+ map.insert("[UNIMOD:1954]", 1467.469221);
+ map.insert("[UNIMOD:1955]", 1522.554331);
+ map.insert("[UNIMOD:1956]", 1588.452584);
+ map.insert("[UNIMOD:1957]", 1661.554784);
+ map.insert("[UNIMOD:1958]", 1734.510493);
+ map.insert("[UNIMOD:1959]", 1767.619116);
+ map.insert("[UNIMOD:1960]", 1767.644268);
+ map.insert("[UNIMOD:1961]", 1792.65075);
+ map.insert("[UNIMOD:1962]", 1823.607608);
+ map.insert("[UNIMOD:1963]", 1848.639242);
+ map.insert("[UNIMOD:1964]", 1880.666794);
+ map.insert("[UNIMOD:1965]", 1881.687195);
+ map.insert("[UNIMOD:1966]", 1914.654515);
+ map.insert("[UNIMOD:1967]", 1955.687589);
+ map.insert("[UNIMOD:1968]", 1968.682838);
+ map.insert("[UNIMOD:1969]", 1997.698154);
+ map.insert("[UNIMOD:1970]", 162.125595);
+ map.insert("[UNIMOD:1971]", 176.744957);
+ map.insert("[UNIMOD:1972]", 210.16198);
+ map.insert("[UNIMOD:1973]", 216.099774);
+ map.insert("[UNIMOD:1974]", 234.073953);
+ map.insert("[UNIMOD:1975]", 248.19876);
+ map.insert("[UNIMOD:1976]", 249.981018);
+ map.insert("[UNIMOD:1977]", 301.986514);
+ map.insert("[UNIMOD:1978]", 306.095082);
+ map.insert("[UNIMOD:1979]", 420.051719);
+ map.insert("[UNIMOD:1992]", 159.068414);
+ map.insert("[UNIMOD:1999]", 55.989829);
+ map.insert("[UNIMOD:2000]", 82.041865);
+ map.insert("[UNIMOD:2018]", -18.010565);
+ map.insert("[UNIMOD:2020]", -2.01565);
+ map.insert("[UNIMOD:2026]", -17.026549);
+ map.insert("[UNIMOD:2028]", 2861.000054);
+ map.insert("[UNIMOD:2029]", 2352.846);
+ map
+}
+
+/// Unimod Modifications Mass Numerical
+///
+/// # Arguments
+///
+/// None
+///
+/// # Returns
+///
+/// * `HashMap<u32, f64>` - a map of unimod modification numerical ids to their mass
+///
+/// # Example
+///
+/// ```
+/// use mscore::chemistry::unimod::unimod_modifications_mass_numerical;
+///
+/// let mass = unimod_modifications_mass_numerical();
+/// assert_eq!(mass.get(&1), Some(&42.010565));
+/// ```
+pub fn unimod_modifications_mass_numerical() -> HashMap<u32, f64> {
+ let mut map = HashMap::new();
+ map.insert(1, 42.010565);
+ map.insert(2, -0.984016);
+ map.insert(3, 226.077598);
+ map.insert(4, 57.021464);
+ map.insert(5, 43.005814);
+ map.insert(6, 58.005479);
+ map.insert(7, 0.984016);
+ map.insert(8, 486.251206);
+ map.insert(9, 494.30142);
+ map.insert(10, -29.992806);
+ map.insert(11, -48.003371);
+ map.insert(12, 450.275205);
+ map.insert(13, 442.224991);
+ map.insert(17, 99.068414);
+ map.insert(20, 414.193691);
+ map.insert(21, 79.966331);
+ map.insert(23, -18.010565);
+ map.insert(24, 71.037114);
+ map.insert(25, 119.037114);
+ map.insert(26, 39.994915);
+ map.insert(27, -18.010565);
+ map.insert(28, -17.026549);
+ map.insert(29, 127.063329);
+ map.insert(30, 21.981943);
+ map.insert(31, 105.057849);
+ map.insert(34, 14.01565);
+ map.insert(35, 15.994915);
+ map.insert(36, 28.0313);
+ map.insert(37, 42.04695);
+ map.insert(39, 45.987721);
+ map.insert(40, 79.956815);
+ map.insert(41, 162.052824);
+ map.insert(42, 188.032956);
+ map.insert(43, 203.079373);
+ map.insert(44, 204.187801);
+ map.insert(45, 210.198366);
+ map.insert(46, 229.014009);
+ map.insert(47, 238.229666);
+ map.insert(48, 272.250401);
+ map.insert(49, 340.085794);
+ map.insert(50, 783.141486);
+ map.insert(51, 788.725777);
+ map.insert(52, 42.021798);
+ map.insert(53, 156.11503);
+ map.insert(55, 305.068156);
+ map.insert(142, 349.137281);
+ map.insert(143, 406.158745);
+ map.insert(144, 486.158471);
+ map.insert(145, 495.19519);
+ map.insert(146, 511.190105);
+ map.insert(147, 552.216654);
+ map.insert(148, 568.211569);
+ map.insert(149, 656.227613);
+ map.insert(150, 698.274563);
+ map.insert(151, 700.253828);
+ map.insert(152, 714.269478);
+ map.insert(153, 730.264392);
+ map.insert(154, 821.280102);
+ map.insert(155, 846.311736);
+ map.insert(156, 860.327386);
+ map.insert(157, 862.306651);
+ map.insert(158, 876.322301);
+ map.insert(159, 892.317216);
+ map.insert(160, 947.323029);
+ map.insert(161, 972.283547);
+ map.insert(214, 144.102063);
+ map.insert(342, 15.010899);
+ map.insert(343, 199.066699);
+ map.insert(344, -43.053433);
+ map.insert(345, 47.984744);
+ map.insert(348, -23.015984);
+ map.insert(349, -22.031969);
+ map.insert(350, 19.989829);
+ map.insert(351, 3.994915);
+ map.insert(352, -1.031634);
+ map.insert(353, 241.088497);
+ map.insert(354, 44.985078);
+ map.insert(357, 258.115047);
+ map.insert(359, 13.979265);
+ map.insert(360, -30.010565);
+ map.insert(361, 240.104482);
+ map.insert(368, -33.987721);
+ map.insert(369, -27.994915);
+ map.insert(371, 86.036779);
+ map.insert(372, -42.021798);
+ map.insert(374, -1.007825);
+ map.insert(375, 142.110613);
+ map.insert(376, 220.182715);
+ map.insert(377, 576.511761);
+ map.insert(378, 72.021129);
+ map.insert(379, 87.068414);
+ map.insert(380, 266.203451);
+ map.insert(381, 14.96328);
+ map.insert(382, -33.003705);
+ map.insert(385, -17.026549);
+ map.insert(387, 586.279135);
+ map.insert(388, 588.294785);
+ map.insert(389, 584.263485);
+ map.insert(390, 616.177295);
+ map.insert(391, 521.884073);
+ map.insert(392, 29.974179);
+ map.insert(393, 340.100562);
+ map.insert(394, 123.00853);
+ map.insert(395, 881.146904);
+ map.insert(396, 197.04531);
+ map.insert(397, 469.716159);
+ map.insert(398, 595.612807);
+ map.insert(400, -94.041865);
+ map.insert(401, -2.01565);
+ map.insert(402, -17.992806);
+ map.insert(403, -15.010899);
+ map.insert(405, 329.05252);
+ map.insert(407, 146.036779);
+ map.insert(408, 148.037173);
+ map.insert(409, 454.088965);
+ map.insert(410, 634.662782);
+ map.insert(413, 345.047435);
+ map.insert(414, 30.010565);
+ map.insert(415, 1620.930224);
+ map.insert(416, 418.137616);
+ map.insert(417, 306.025302);
+ map.insert(419, 154.00311);
+ map.insert(420, 15.977156);
+ map.insert(421, 31.972071);
+ map.insert(422, 70.005479);
+ map.insert(423, 79.91652);
+ map.insert(424, 1572.985775);
+ map.insert(425, 31.989829);
+ map.insert(426, 126.104465);
+ map.insert(428, 283.045704);
+ map.insert(429, 242.019154);
+ map.insert(431, 236.214016);
+ map.insert(432, 368.344302);
+ map.insert(433, 264.187801);
+ map.insert(434, 294.183109);
+ map.insert(435, 109.052764);
+ map.insert(436, 614.161645);
+ map.insert(437, 386.110369);
+ map.insert(438, 24.995249);
+ map.insert(439, 342.786916);
+ map.insert(440, 42.021798);
+ map.insert(442, 438.094051);
+ map.insert(443, 456.104615);
+ map.insert(444, 922.834855);
+ map.insert(445, 59.04969);
+ map.insert(447, -15.994915);
+ map.insert(448, 831.197041);
+ map.insert(449, 154.135765);
+ map.insert(478, 421.073241);
+ map.insert(494, 672.298156);
+ map.insert(495, 684.298156);
+ map.insert(499, 298.022748);
+ map.insert(523, 452.245726);
+ map.insert(526, -48.003371);
+ map.insert(528, 14.999666);
+ map.insert(529, 29.039125);
+ map.insert(530, 37.955882);
+ map.insert(531, 61.921774);
+ map.insert(532, 144.105918);
+ map.insert(533, 144.099599);
+ map.insert(534, 155.821022);
+ map.insert(535, 383.228103);
+ map.insert(540, 15.994915);
+ map.insert(541, 30.010565);
+ map.insert(542, 43.989829);
+ map.insert(543, 26.01565);
+ map.insert(544, -14.01565);
+ map.insert(545, 58.005479);
+ map.insert(546, 28.0313);
+ map.insert(547, 44.059229);
+ map.insert(548, -15.977156);
+ map.insert(549, 83.070128);
+ map.insert(550, 60.054144);
+ map.insert(551, 53.091927);
+ map.insert(552, -45.987721);
+ map.insert(553, -43.989829);
+ map.insert(554, 22.031969);
+ map.insert(555, -0.984016);
+ map.insert(556, -58.005479);
+ map.insert(557, 48.036386);
+ map.insert(558, 14.01565);
+ map.insert(559, -15.958529);
+ map.insert(560, -58.005479);
+ map.insert(561, -0.984016);
+ map.insert(562, -14.01565);
+ map.insert(563, -0.94763);
+ map.insert(564, -72.021129);
+ map.insert(565, -29.974179);
+ map.insert(566, -60.036386);
+ map.insert(567, -44.059229);
+ map.insert(568, -33.98435);
+ map.insert(569, 15.994915);
+ map.insert(570, -48.0);
+ map.insert(571, 14.01565);
+ map.insert(572, 30.010565);
+ map.insert(573, 129.057849);
+ map.insert(574, 72.021129);
+ map.insert(575, 42.04695);
+ map.insert(576, 58.005479);
+ map.insert(577, 45.987721);
+ map.insert(578, 99.079647);
+ map.insert(580, -40.006148);
+ map.insert(581, 26.004417);
+ map.insert(582, -9.000334);
+ map.insert(584, 19.042199);
+ map.insert(585, -23.974848);
+ map.insert(588, -12.036386);
+ map.insert(589, 0.958863);
+ map.insert(590, 15.010899);
+ map.insert(594, -27.047285);
+ map.insert(595, -14.052036);
+ map.insert(596, 0.94763);
+ map.insert(597, -0.036386);
+ map.insert(598, 2.945522);
+ map.insert(599, 28.006148);
+ map.insert(600, -15.010899);
+ map.insert(601, -26.052036);
+ map.insert(602, 33.98435);
+ map.insert(603, 72.995249);
+ map.insert(604, -16.0313);
+ map.insert(605, -14.01565);
+ map.insert(606, 23.974848);
+ map.insert(607, 14.974514);
+ map.insert(608, 17.956421);
+ map.insert(609, 43.017047);
+ map.insert(610, -29.992806);
+ map.insert(611, 25.060626);
+ map.insert(613, -2.945522);
+ map.insert(614, -17.956421);
+ map.insert(615, -31.972071);
+ map.insert(616, -27.010899);
+ map.insert(617, -12.995249);
+ map.insert(618, 14.052036);
+ map.insert(619, 49.020401);
+ map.insert(620, 23.015984);
+ map.insert(621, 0.984016);
+ map.insert(622, -0.958863);
+ map.insert(623, -10.020735);
+ map.insert(624, -26.01565);
+ map.insert(625, 40.006148);
+ map.insert(626, 31.005814);
+ map.insert(627, 3.994915);
+ map.insert(628, 59.048347);
+ map.insert(629, 16.0313);
+ map.insert(630, -31.005814);
+ map.insert(631, 0.036386);
+ map.insert(632, 0.984016);
+ map.insert(633, 9.000334);
+ map.insert(634, 28.042534);
+ map.insert(635, -14.974514);
+ map.insert(636, -69.069083);
+ map.insert(637, 29.978202);
+ map.insert(638, -55.053433);
+ map.insert(639, -59.048347);
+ map.insert(640, -28.006148);
+ map.insert(641, -19.042199);
+ map.insert(642, -28.042534);
+ map.insert(643, -25.060626);
+ map.insert(644, -53.091927);
+ map.insert(645, -43.017047);
+ map.insert(646, -99.079647);
+ map.insert(647, 60.036386);
+ map.insert(648, -15.994915);
+ map.insert(649, 99.047285);
+ map.insert(650, 14.01565);
+ map.insert(651, 27.010899);
+ map.insert(652, 10.020735);
+ map.insert(653, 76.0313);
+ map.insert(654, 15.977156);
+ map.insert(655, 69.069083);
+ map.insert(656, 26.052036);
+ map.insert(657, -30.010565);
+ map.insert(658, -14.01565);
+ map.insert(659, -30.010565);
+ map.insert(660, 12.995249);
+ map.insert(661, 27.047285);
+ map.insert(662, -3.994915);
+ map.insert(663, 29.992806);
+ map.insert(664, 12.036386);
+ map.insert(665, 55.053433);
+ map.insert(666, 48.0);
+ map.insert(667, -28.0313);
+ map.insert(668, 29.974179);
+ map.insert(669, 31.972071);
+ map.insert(670, 15.958529);
+ map.insert(671, 14.01565);
+ map.insert(672, -42.04695);
+ map.insert(673, -99.047285);
+ map.insert(674, -83.070128);
+ map.insert(675, -29.978202);
+ map.insert(676, -129.057849);
+ map.insert(677, -72.995249);
+ map.insert(678, -15.994915);
+ map.insert(679, -76.0313);
+ map.insert(680, -49.020401);
+ map.insert(681, -26.004417);
+ map.insert(682, -48.036386);
+ map.insert(683, -60.054144);
+ map.insert(695, 7.017164);
+ map.insert(730, 304.20536);
+ map.insert(731, 304.19904);
+ map.insert(772, 5.016774);
+ map.insert(827, 572.181134);
+ map.insert(888, 140.094963);
+ map.insert(889, 144.102063);
+ map.insert(897, 3.98814);
+ map.insert(950, 6.008178);
+ map.insert(951, 37.946941);
+ map.insert(952, 53.919289);
+ map.insert(953, 55.919696);
+ map.insert(954, 61.913495);
+ map.insert(955, 105.897267);
+ map.insert(956, 21.969392);
+ map.insert(1014, 87.032028);
+ map.insert(1018, 138.06808);
+ map.insert(1019, 144.10574);
+ map.insert(1044, 31.972071);
+ map.insert(1045, 76.0313);
+ map.insert(1046, 66.021798);
+ map.insert(1047, 42.04695);
+ map.insert(1048, 57.057849);
+ map.insert(1049, 60.003371);
+ map.insert(1050, 43.005814);
+ map.insert(1051, 57.021464);
+ map.insert(1052, 85.063997);
+ map.insert(1053, 115.042199);
+ map.insert(1054, 92.026215);
+ map.insert(1055, -31.972071);
+ map.insert(1056, 12.017759);
+ map.insert(1057, 26.033409);
+ map.insert(1058, 34.049727);
+ map.insert(1059, 10.07488);
+ map.insert(1060, 25.085779);
+ map.insert(1061, 28.0313);
+ map.insert(1062, 11.033743);
+ map.insert(1063, -5.956421);
+ map.insert(1064, 25.049393);
+ map.insert(1065, -1.961506);
+ map.insert(1066, -3.940771);
+ map.insert(1067, -12.017759);
+ map.insert(1068, 32.041471);
+ map.insert(1069, -1.942879);
+ map.insert(1070, 13.06802);
+ map.insert(1071, 16.013542);
+ map.insert(1072, -17.974179);
+ map.insert(1073, 13.031634);
+ map.insert(1074, 41.074168);
+ map.insert(1075, -27.994915);
+ map.insert(1076, -13.979265);
+ map.insert(1077, 71.05237);
+ map.insert(1078, -26.033409);
+ map.insert(1079, 18.025821);
+ map.insert(1080, 8.016319);
+ map.insert(1081, -15.958529);
+ map.insert(1082, 1.997892);
+ map.insert(1083, -14.999666);
+ map.insert(1084, -31.989829);
+ map.insert(1085, 27.058518);
+ map.insert(1086, -42.010565);
+ map.insert(1087, -27.994915);
+ map.insert(1088, 57.03672);
+ map.insert(1089, 34.020735);
+ map.insert(1090, -76.0313);
+ map.insert(1091, -32.041471);
+ map.insert(1092, -18.025821);
+ map.insert(1093, -90.04695);
+ map.insert(1094, -10.009502);
+ map.insert(1095, -18.973451);
+ map.insert(1096, -16.027929);
+ map.insert(1097, -33.025486);
+ map.insert(1098, -50.01565);
+ map.insert(1099, -19.009836);
+ map.insert(1100, 9.032697);
+ map.insert(1101, -46.020735);
+ map.insert(1102, 39.010899);
+ map.insert(1103, 90.04695);
+ map.insert(1104, 80.037448);
+ map.insert(1105, 56.0626);
+ map.insert(1106, 71.073499);
+ map.insert(1107, 74.019021);
+ map.insert(1108, 57.021464);
+ map.insert(1109, 40.0313);
+ map.insert(1110, 71.037114);
+ map.insert(1111, 44.026215);
+ map.insert(1112, 106.041865);
+ map.insert(1113, -66.021798);
+ map.insert(1114, -34.049727);
+ map.insert(1115, -8.016319);
+ map.insert(1116, 10.009502);
+ map.insert(1117, -80.037448);
+ map.insert(1119, -8.963949);
+ map.insert(1120, -6.018427);
+ map.insert(1121, -50.026883);
+ map.insert(1122, -36.011233);
+ map.insert(1123, -37.990498);
+ map.insert(1124, 49.020401);
+ map.insert(1125, -42.04695);
+ map.insert(1126, -10.07488);
+ map.insert(1127, 1.942879);
+ map.insert(1128, 15.958529);
+ map.insert(1129, -56.0626);
+ map.insert(1130, 49.979265);
+ map.insert(1131, -57.057849);
+ map.insert(1132, -25.085779);
+ map.insert(1133, -13.06802);
+ map.insert(1134, 18.973451);
+ map.insert(1135, -71.073499);
+ map.insert(1136, 8.963949);
+ map.insert(1137, -31.042199);
+ map.insert(1138, -41.062935);
+ map.insert(1139, -29.026549);
+ map.insert(1140, 57.98435);
+ map.insert(1141, 34.968366);
+ map.insert(1142, -60.003371);
+ map.insert(1143, -28.0313);
+ map.insert(1144, -16.013542);
+ map.insert(1145, -1.997892);
+ map.insert(1146, 16.027929);
+ map.insert(1147, -74.019021);
+ map.insert(1148, 6.018427);
+ map.insert(1149, -16.997557);
+ map.insert(1150, -33.987721);
+ map.insert(1151, -2.981907);
+ map.insert(1152, -44.008456);
+ map.insert(1153, 55.038828);
+ map.insert(1154, 32.022844);
+ map.insert(1155, -43.005814);
+ map.insert(1156, -11.033743);
+ map.insert(1157, 14.999666);
+ map.insert(1158, 33.025486);
+ map.insert(1159, -57.021464);
+ map.insert(1160, 16.997557);
+ map.insert(1161, -16.990164);
+ map.insert(1162, 14.01565);
+ map.insert(1163, 42.058184);
+ map.insert(1164, -14.974514);
+ map.insert(1165, 72.036386);
+ map.insert(1166, 5.956421);
+ map.insert(1167, 17.974179);
+ map.insert(1168, 31.989829);
+ map.insert(1169, 50.01565);
+ map.insert(1170, -40.0313);
+ map.insert(1171, 31.042199);
+ map.insert(1172, 33.987721);
+ map.insert(1173, 16.990164);
+ map.insert(1174, 2.01565);
+ map.insert(1175, 89.026549);
+ map.insert(1176, 66.010565);
+ map.insert(1177, -57.021464);
+ map.insert(1178, -25.049393);
+ map.insert(1179, -13.031634);
+ map.insert(1180, 19.009836);
+ map.insert(1181, -71.037114);
+ map.insert(1182, 2.981907);
+ map.insert(1183, -14.01565);
+ map.insert(1184, -41.026549);
+ map.insert(1185, -27.010899);
+ map.insert(1186, -28.990164);
+ map.insert(1187, 58.020735);
+ map.insert(1188, 35.004751);
+ map.insert(1189, -85.063997);
+ map.insert(1190, -41.074168);
+ map.insert(1191, -27.058518);
+ map.insert(1192, -42.058184);
+ map.insert(1193, -57.032697);
+ map.insert(1194, 6.962218);
+ map.insert(1195, -9.032697);
+ map.insert(1196, 27.994915);
+ map.insert(1197, 42.010565);
+ map.insert(1198, 50.026883);
+ map.insert(1199, 41.062935);
+ map.insert(1200, 44.008456);
+ map.insert(1201, 41.026549);
+ map.insert(1202, 12.036386);
+ map.insert(1203, 1.961506);
+ map.insert(1204, 13.979265);
+ map.insert(1205, 27.994915);
+ map.insert(1206, 46.020735);
+ map.insert(1207, -44.026215);
+ map.insert(1208, 36.011233);
+ map.insert(1209, 27.010899);
+ map.insert(1210, -1.979265);
+ map.insert(1211, 85.031634);
+ map.insert(1212, 62.01565);
+ map.insert(1213, 3.940771);
+ map.insert(1214, 37.990498);
+ map.insert(1215, 29.026549);
+ map.insert(1216, 14.974514);
+ map.insert(1217, -2.01565);
+ map.insert(1218, 28.990164);
+ map.insert(1219, 57.032697);
+ map.insert(1220, -12.036386);
+ map.insert(1221, 1.979265);
+ map.insert(1222, 87.010899);
+ map.insert(1223, 63.994915);
+ map.insert(1224, -115.042199);
+ map.insert(1225, -71.05237);
+ map.insert(1226, -57.03672);
+ map.insert(1227, -39.010899);
+ map.insert(1228, -49.020401);
+ map.insert(1229, -57.98435);
+ map.insert(1230, -55.038828);
+ map.insert(1231, -72.036386);
+ map.insert(1232, -89.026549);
+ map.insert(1233, -58.020735);
+ map.insert(1234, -85.031634);
+ map.insert(1235, -87.010899);
+ map.insert(1236, -23.015984);
+ map.insert(1237, -92.026215);
+ map.insert(1238, -34.020735);
+ map.insert(1239, -106.041865);
+ map.insert(1240, -34.968366);
+ map.insert(1241, -32.022844);
+ map.insert(1242, -66.010565);
+ map.insert(1243, -35.004751);
+ map.insert(1244, -6.962218);
+ map.insert(1245, -62.01565);
+ map.insert(1246, -63.994915);
+ map.insert(1247, 23.015984);
+ map.insert(1248, -49.979265);
+ map.insert(1287, -156.101111);
+ map.insert(1288, 156.101111);
+ map.insert(1289, 70.041865);
+ map.insert(1290, 114.042927);
+ map.insert(1291, 34.068961);
+ map.insert(1292, 242.101505);
+ map.insert(1293, 343.149184);
+ map.insert(1296, 3.010064);
+ map.insert(1297, 4.007099);
+ map.insert(1298, 5.010454);
+ map.insert(1299, 10.062767);
+ map.insert(1300, 5.028462);
+ map.insert(1301, 128.094963);
+ map.insert(1302, 148.109162);
+ map.insert(1303, 291.095417);
+ map.insert(1304, 307.090331);
+ map.insert(1305, 42.04695);
+ map.insert(1306, 48.084611);
+ map.insert(1321, 145.12);
+ map.insert(1322, 145.132163);
+ map.insert(1323, 145.128307);
+ map.insert(1324, 145.140471);
+ map.insert(1363, 68.026215);
+ map.insert(1392, 217.162932);
+ map.insert(1393, 217.156612);
+ map.insert(1394, 217.168776);
+ map.insert(1395, 217.162456);
+ map.insert(1396, 217.175096);
+ map.insert(1402, 11.032077);
+ map.insert(1403, 7.034695);
+ map.insert(1408, 2204.772441);
+ map.insert(1409, 1913.677025);
+ map.insert(1410, 2059.734933);
+ map.insert(1411, 2350.83035);
+ map.insert(1412, 283.036187);
+ map.insert(1413, 404.071978);
+ map.insert(1414, 54.113505);
+ map.insert(1420, 185.189198);
+ map.insert(1421, 128.131349);
+ map.insert(1425, 132.042259);
+ map.insert(1426, 294.095082);
+ map.insert(1427, 338.084912);
+ map.insert(1428, 426.137341);
+ map.insert(1429, 445.098527);
+ map.insert(1430, 445.089011);
+ map.insert(1431, 453.14824);
+ map.insert(1432, 469.143155);
+ map.insert(1433, 609.238118);
+ map.insert(1434, 494.174789);
+ map.insert(1435, 510.169704);
+ map.insert(1436, 525.205755);
+ map.insert(1437, 539.221405);
+ map.insert(1438, 527.18502);
+ map.insert(1439, 541.164284);
+ map.insert(1440, 541.20067);
+ map.insert(1441, 558.1796);
+ map.insert(1442, 585.190499);
+ map.insert(1443, 607.141834);
+ map.insert(1444, 615.201064);
+ map.insert(1445, 616.221465);
+ map.insert(1446, 646.195644);
+ map.insert(1447, 648.168383);
+ map.insert(1448, 648.211294);
+ map.insert(1449, 1008.36456);
+ map.insert(1450, 1021.359809);
+ map.insert(1451, 1024.359475);
+ map.insert(1452, 1054.370039);
+ map.insert(1453, 1129.390834);
+ map.insert(1454, 1170.417383);
+ map.insert(1455, 1183.412632);
+ map.insert(1456, 1186.412298);
+ map.insert(1457, 1227.438847);
+ map.insert(1458, 1296.389194);
+ map.insert(1459, 1332.470207);
+ map.insert(1460, 1337.449137);
+ map.insert(1461, 1345.465456);
+ map.insert(1462, 1362.480772);
+ map.insert(1463, 1373.496756);
+ map.insert(1464, 1378.432776);
+ map.insert(1465, 1378.475686);
+ map.insert(1466, 1389.491671);
+ map.insert(1467, 1403.507321);
+ map.insert(1468, 1419.502235);
+ map.insert(1469, 1430.51822);
+ map.insert(1470, 1458.442017);
+ map.insert(1471, 1483.464135);
+ map.insert(1472, 1494.52303);
+ map.insert(1473, 1499.501961);
+ map.insert(1474, 1505.539015);
+ map.insert(1475, 1519.554665);
+ map.insert(1476, 1524.490684);
+ map.insert(1477, 1524.533595);
+ map.insert(1478, 1535.549579);
+ map.insert(1479, 1540.485599);
+ map.insert(1480, 1540.52851);
+ map.insert(1481, 1549.56523);
+ map.insert(1482, 1551.544494);
+ map.insert(1483, 1564.539743);
+ map.insert(1484, 1565.560144);
+ map.insert(1485, 1576.576129);
+ map.insert(1486, 1581.512148);
+ map.insert(1487, 1581.555059);
+ map.insert(1488, 1589.571378);
+ map.insert(1489, 1592.571043);
+ map.insert(1490, 1620.494841);
+ map.insert(1491, 1620.602343);
+ map.insert(1492, 1637.581274);
+ map.insert(1493, 1645.516959);
+ map.insert(1494, 1651.596924);
+ map.insert(1495, 1661.52139);
+ map.insert(1496, 1663.608157);
+ map.insert(1497, 1665.612574);
+ map.insert(1498, 1681.607488);
+ map.insert(1499, 1686.543508);
+ map.insert(1500, 1686.586419);
+ map.insert(1501, 1694.602737);
+ map.insert(1502, 1700.461172);
+ map.insert(1503, 1702.538423);
+ map.insert(1504, 1702.581333);
+ map.insert(1505, 1708.618387);
+ map.insert(1506, 1710.597652);
+ map.insert(1507, 1722.634037);
+ map.insert(1508, 1727.570057);
+ map.insert(1509, 1727.612968);
+ map.insert(1510, 1735.629286);
+ map.insert(1511, 1736.649688);
+ map.insert(1512, 1738.628952);
+ map.insert(1513, 1743.564972);
+ map.insert(1514, 1743.607882);
+ map.insert(1515, 1774.559552);
+ map.insert(1516, 1782.655167);
+ map.insert(1517, 1784.591521);
+ map.insert(1518, 1807.569782);
+ map.insert(1519, 1809.666066);
+ map.insert(1520, 1821.549047);
+ map.insert(1521, 1823.574213);
+ map.insert(1522, 1826.681382);
+ map.insert(1523, 1832.601417);
+ map.insert(1524, 1839.640245);
+ map.insert(1525, 1840.660646);
+ map.insert(1526, 1843.660312);
+ map.insert(1527, 1848.596331);
+ map.insert(1528, 1854.676296);
+ map.insert(1529, 1856.655561);
+ map.insert(1530, 1864.548335);
+ map.insert(1531, 1864.634157);
+ map.insert(1532, 1866.68753);
+ map.insert(1533, 1868.691946);
+ map.insert(1534, 1872.650475);
+ map.insert(1535, 1884.686861);
+ map.insert(1536, 1889.62288);
+ map.insert(1537, 1889.665791);
+ map.insert(1538, 1900.681776);
+ map.insert(1539, 1901.505861);
+ map.insert(1540, 1907.714079);
+ map.insert(1541, 1914.697426);
+ map.insert(1542, 1920.617461);
+ map.insert(1543, 1928.553146);
+ map.insert(1544, 1928.713076);
+ map.insert(1545, 1929.671939);
+ map.insert(1546, 1930.64943);
+ map.insert(1547, 1930.69234);
+ map.insert(1548, 1936.612375);
+ map.insert(1549, 1946.687255);
+ map.insert(1550, 1952.60729);
+ map.insert(1551, 1954.703574);
+ map.insert(1552, 1958.72364);
+ map.insert(1553, 1969.622606);
+ map.insert(1554, 1969.632122);
+ map.insert(1555, 1971.718889);
+ map.insert(1556, 1977.638925);
+ map.insert(1557, 1978.659326);
+ map.insert(1558, 1987.670893);
+ map.insert(1559, 1987.713804);
+ map.insert(1560, 1993.633839);
+ map.insert(1561, 1995.730123);
+ map.insert(1562, 1996.750524);
+ map.insert(1563, 672.222527);
+ map.insert(1564, 673.242928);
+ map.insert(1565, 689.194932);
+ map.insert(1566, 689.237843);
+ map.insert(1567, 695.157878);
+ map.insert(1568, 697.254162);
+ map.insert(1570, 703.217108);
+ map.insert(1571, 703.253493);
+ map.insert(1572, 712.136808);
+ map.insert(1573, 713.249076);
+ map.insert(1575, 728.177625);
+ map.insert(1577, 736.184427);
+ map.insert(1578, 744.243657);
+ map.insert(1579, 753.199743);
+ map.insert(1580, 755.296027);
+ map.insert(1581, 761.258973);
+ map.insert(1582, 771.290941);
+ map.insert(1583, 777.210976);
+ map.insert(1584, 778.274288);
+ map.insert(1585, 783.173922);
+ map.insert(1586, 792.253553);
+ map.insert(1587, 794.226292);
+ map.insert(1588, 802.285522);
+ map.insert(1589, 810.221207);
+ map.insert(1590, 810.264117);
+ map.insert(1591, 812.31749);
+ map.insert(1592, 817.260035);
+ map.insert(1593, 818.280436);
+ map.insert(1594, 819.300837);
+ map.insert(1595, 834.275351);
+ map.insert(1596, 835.295752);
+ map.insert(1597, 849.275017);
+ map.insert(1598, 851.247756);
+ map.insert(1599, 851.290667);
+ map.insert(1600, 859.306985);
+ map.insert(1602, 875.3019);
+ map.insert(1604, 890.230448);
+ map.insert(1606, 907.316881);
+ map.insert(1607, 915.252567);
+ map.insert(1608, 917.34885);
+ map.insert(1609, 929.231831);
+ map.insert(1610, 933.343765);
+ map.insert(1611, 939.2638);
+ map.insert(1612, 940.327112);
+ map.insert(1614, 948.34343);
+ map.insert(1615, 956.279116);
+ map.insert(1616, 958.375399);
+ map.insert(1617, 963.317944);
+ map.insert(1618, 964.338345);
+ map.insert(1619, 979.312859);
+ map.insert(1620, 989.333594);
+ map.insert(1621, 995.332925);
+ map.insert(1622, 997.305665);
+ map.insert(1623, 1003.232225);
+ map.insert(1624, 1005.364894);
+ map.insert(1625, 1011.32784);
+ map.insert(1626, 1013.300579);
+ map.insert(1627, 1013.34349);
+ map.insert(1628, 1015.396863);
+ map.insert(1630, 1031.344159);
+ map.insert(1631, 1037.354723);
+ map.insert(1632, 1050.16311);
+ map.insert(1633, 1052.283272);
+ map.insert(1634, 1052.354389);
+ map.insert(1635, 1061.310475);
+ map.insert(1636, 1062.386358);
+ map.insert(1637, 1063.406759);
+ map.insert(1638, 1078.381273);
+ map.insert(1639, 1085.321709);
+ map.insert(1640, 1091.284655);
+ map.insert(1641, 1093.380938);
+ map.insert(1642, 1101.316623);
+ map.insert(1643, 1102.337025);
+ map.insert(1644, 1110.396254);
+ map.insert(1645, 1120.428223);
+ map.insert(1646, 1136.423137);
+ map.insert(1647, 1141.365682);
+ map.insert(1648, 1143.406484);
+ map.insert(1649, 1150.402402);
+ map.insert(1650, 1151.422803);
+ map.insert(1651, 1159.358488);
+ map.insert(1652, 1161.454772);
+ map.insert(1653, 1167.417718);
+ map.insert(1654, 1168.438119);
+ map.insert(1655, 1175.353403);
+ map.insert(1656, 1182.293839);
+ map.insert(1657, 1183.412632);
+ map.insert(1658, 1208.444267);
+ map.insert(1659, 1212.215934);
+ map.insert(1660, 1214.407213);
+ map.insert(1661, 1224.439181);
+ map.insert(1662, 1230.359217);
+ map.insert(1663, 1237.342563);
+ map.insert(1664, 1238.418446);
+ map.insert(1665, 1240.434096);
+ map.insert(1666, 1247.374532);
+ map.insert(1667, 1256.454163);
+ map.insert(1668, 1264.389848);
+ map.insert(1669, 1272.449077);
+ map.insert(1670, 1278.369113);
+ map.insert(1671, 1282.481046);
+ map.insert(1672, 1286.40319);
+ map.insert(1673, 1288.401081);
+ map.insert(1674, 1294.364027);
+ map.insert(1675, 1296.460311);
+ map.insert(1676, 1297.480712);
+ map.insert(1678, 1304.395996);
+ map.insert(1679, 1305.416397);
+ map.insert(1680, 1307.512681);
+ map.insert(1681, 1312.455225);
+ map.insert(1682, 1313.475627);
+ map.insert(1683, 1321.411312);
+ map.insert(1684, 1329.470541);
+ map.insert(1685, 1339.50251);
+ map.insert(1686, 1345.465456);
+ map.insert(1687, 1353.481775);
+ map.insert(1688, 1370.49709);
+ map.insert(1689, 1371.517491);
+ map.insert(1690, 1374.268757);
+ map.insert(1691, 1376.460036);
+ map.insert(1692, 1386.492005);
+ map.insert(1693, 1390.439301);
+ map.insert(1694, 1392.41204);
+ map.insert(1695, 1393.432441);
+ map.insert(1696, 1402.48692);
+ map.insert(1697, 1402.512072);
+ map.insert(1698, 1418.506986);
+ map.insert(1699, 1424.427021);
+ map.insert(1700, 1427.518554);
+ map.insert(1701, 1428.538955);
+ map.insert(1702, 1440.421936);
+ map.insert(1703, 1443.538621);
+ map.insert(1705, 1448.456013);
+ map.insert(1706, 1449.501567);
+ map.insert(1707, 1450.453905);
+ map.insert(1708, 1458.513134);
+ map.insert(1709, 1459.533535);
+ map.insert(1711, 1466.44882);
+ map.insert(1712, 1474.508049);
+ map.insert(1713, 1482.443734);
+ map.insert(1714, 1490.502964);
+ map.insert(1715, 1491.523365);
+ map.insert(1716, 1497.4434);
+ map.insert(1717, 1515.534598);
+ map.insert(1718, 1516.554999);
+ map.insert(1719, 1517.5754);
+ map.insert(1720, 1531.529513);
+ map.insert(1721, 1532.549914);
+ map.insert(1722, 1533.570315);
+ map.insert(1723, 1536.321581);
+ map.insert(1724, 1538.469949);
+ map.insert(1725, 1547.524427);
+ map.insert(1726, 1564.564895);
+ map.insert(1727, 1573.576463);
+ map.insert(1728, 1574.596864);
+ map.insert(1729, 1593.493521);
+ map.insert(1730, 1605.591444);
+ map.insert(1732, 1620.442414);
+ map.insert(1733, 1621.586359);
+ map.insert(1735, 1631.618328);
+ map.insert(1736, 1643.501309);
+ map.insert(1737, 1659.496223);
+ map.insert(1738, 1677.587422);
+ map.insert(1739, 1678.607823);
+ map.insert(1740, 1679.628224);
+ map.insert(1742, 1698.374404);
+ map.insert(1743, 1713.671426);
+ map.insert(1744, 1755.546345);
+ map.insert(1745, 1757.544236);
+ map.insert(1746, 1793.671151);
+ map.insert(1747, 1797.593295);
+ map.insert(1748, 1805.554132);
+ map.insert(1749, 1806.630015);
+ map.insert(1750, 1823.64533);
+ map.insert(1751, 1824.665732);
+ map.insert(1752, 1854.614759);
+ map.insert(1753, 1860.427228);
+ map.insert(1754, 1864.67188);
+ map.insert(1755, 1900.583852);
+ map.insert(1756, 1911.53783);
+ map.insert(1757, 1927.532745);
+ map.insert(1758, 1969.703239);
+ map.insert(1759, 1991.494645);
+ map.insert(1760, 1022.38021);
+ map.insert(1761, 1038.375125);
+ map.insert(1762, 1079.401674);
+ map.insert(1763, 1095.396588);
+ map.insert(1764, 1118.331939);
+ map.insert(1765, 1184.433033);
+ map.insert(1766, 1200.427948);
+ map.insert(1767, 1225.459583);
+ map.insert(1768, 1241.454497);
+ map.insert(1769, 1257.449412);
+ map.insert(1770, 1346.485857);
+ map.insert(1771, 1387.512406);
+ map.insert(1772, 1501.555334);
+ map.insert(1773, 1548.544828);
+ map.insert(1774, 1590.591779);
+ map.insert(1775, 1647.613242);
+ map.insert(1776, 1704.634706);
+ map.insert(1777, 1751.624201);
+ map.insert(1778, 1752.644602);
+ map.insert(1779, 1784.634431);
+ map.insert(1780, 1825.660981);
+ map.insert(1781, 1850.692615);
+ map.insert(1782, 1897.68211);
+ map.insert(1783, 1898.702511);
+ map.insert(1784, 1938.708659);
+ map.insert(1785, 1955.723975);
+ map.insert(1786, 698.238177);
+ map.insert(1825, 324.035867);
+ map.insert(1826, -3.994915);
+ map.insert(1827, -0.979006);
+ map.insert(1830, 361.146012);
+ map.insert(1833, 220.182715);
+ map.insert(1834, 165.164326);
+ map.insert(1835, 263.237491);
+ map.insert(1836, 188.156501);
+ map.insert(1837, 168.187801);
+ map.insert(1838, 224.250401);
+ map.insert(1841, 389.090154);
+ map.insert(1842, 158.003765);
+ map.insert(1843, 305.041287);
+ map.insert(1844, 226.047738);
+ map.insert(1870, 52.911464);
+ map.insert(1871, 151.996571);
+ map.insert(1876, 138.06808);
+ map.insert(1877, 259.141973);
+ map.insert(1878, 176.01433);
+ map.insert(1879, 175.030314);
+ map.insert(1880, 279.077658);
+ map.insert(1881, 54.010565);
+ map.insert(1882, 85.982635);
+ map.insert(1883, 103.9932);
+ map.insert(1884, 196.084792);
+ map.insert(1885, 111.032028);
+ map.insert(1886, 85.052764);
+ map.insert(1887, 213.111341);
+ map.insert(1888, 214.095357);
+ map.insert(1889, 317.158686);
+ map.insert(1891, 172.01289);
+ map.insert(1892, 113.995309);
+ map.insert(1893, 173.980921);
+ map.insert(1895, 219.089543);
+ map.insert(1896, 158.003765);
+ map.insert(1897, 226.047738);
+ map.insert(1898, 138.06808);
+ map.insert(1899, 196.084792);
+ map.insert(1900, 172.01289);
+ map.insert(1901, 113.995309);
+ map.insert(1902, 173.980921);
+ map.insert(1903, 219.089543);
+ map.insert(1905, 96.021129);
+ map.insert(1906, 113.047679);
+ map.insert(1907, 114.031694);
+ map.insert(1908, 217.095023);
+ map.insert(1909, 96.021129);
+ map.insert(1910, 23.958063);
+ map.insert(1911, 139.110947);
+ map.insert(1912, 122.084398);
+ map.insert(1914, -32.008456);
+ map.insert(1915, -30.010565);
+ map.insert(1916, -10.031969);
+ map.insert(1917, 4.97893);
+ map.insert(1918, 13.979265);
+ map.insert(1922, 18.010565);
+ map.insert(1923, 27.958529);
+ map.insert(1924, 43.953444);
+ map.insert(1925, 63.979659);
+ map.insert(1926, 72.021129);
+ map.insert(1927, 80.026215);
+ map.insert(1928, 122.073165);
+ map.insert(1929, 154.026609);
+ map.insert(1930, 264.084518);
+ map.insert(1931, 335.121631);
+ map.insert(1932, 404.062462);
+ map.insert(1933, 440.152991);
+ map.insert(1934, 486.11556);
+ map.insert(1935, 572.19525);
+ map.insert(1936, 588.190165);
+ map.insert(1937, 602.205815);
+ map.insert(1938, 824.243382);
+ map.insert(1939, 835.259366);
+ map.insert(1940, 865.269931);
+ map.insert(1941, 940.284201);
+ map.insert(1942, 961.302294);
+ map.insert(1943, 970.301291);
+ map.insert(1944, 986.296206);
+ map.insert(1945, 1027.322755);
+ map.insert(1946, 1127.41157);
+ map.insert(1947, 1175.396314);
+ map.insert(1948, 1200.385037);
+ map.insert(1949, 1255.433762);
+ map.insert(1950, 1330.490942);
+ map.insert(1951, 1346.442946);
+ map.insert(1952, 1442.394675);
+ map.insert(1953, 1458.475412);
+ map.insert(1954, 1467.469221);
+ map.insert(1955, 1522.554331);
+ map.insert(1956, 1588.452584);
+ map.insert(1957, 1661.554784);
+ map.insert(1958, 1734.510493);
+ map.insert(1959, 1767.619116);
+ map.insert(1960, 1767.644268);
+ map.insert(1961, 1792.65075);
+ map.insert(1962, 1823.607608);
+ map.insert(1963, 1848.639242);
+ map.insert(1964, 1880.666794);
+ map.insert(1965, 1881.687195);
+ map.insert(1966, 1914.654515);
+ map.insert(1967, 1955.687589);
+ map.insert(1968, 1968.682838);
+ map.insert(1969, 1997.698154);
+ map.insert(1970, 162.125595);
+ map.insert(1971, 176.744957);
+ map.insert(1972, 210.16198);
+ map.insert(1973, 216.099774);
+ map.insert(1974, 234.073953);
+ map.insert(1975, 248.19876);
+ map.insert(1976, 249.981018);
+ map.insert(1977, 301.986514);
+ map.insert(1978, 306.095082);
+ map.insert(1979, 420.051719);
+ map.insert(1992, 159.068414);
+ map.insert(1999, 55.989829);
+ map.insert(2000, 82.041865);
+ map.insert(2018, -18.010565);
+ map.insert(2020, -2.01565);
+ map.insert(2026, -17.026549);
+ map.insert(2028, 2861.000054);
+ map.insert(2029, 2352.846);
+ map
+}
+1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +50 +51 +52 +53 +54 +55 +56 +57 +58 +59 +60 +61 +62 +63 +64 +65 +66 +67 +68 +69 +70 +71 +72 +73 +74 +75 +76 +77 +78 +79 +80 +81 +82 +83 +84 +85 +86 +87 +88 +89 +90 +91 +92 +93 +94 +95 +96 +97 +98 +99 +100 +101 +102 +103 +104 +105 +106 +107 +108 +109 +110 +111 +112 +113 +114 +115 +116 +117 +118 +119 +120 +121 +122 +123 +124 +125 +126 +127 +128 +129 +130 +131 +132 +133 +134 +135 +136 +137 +138 +139 +140 +141 +142 +143 +144 +145 +146 +147 +148 +149 +150 +151 +152 +153 +154 +155 +156 +157 +158 +159 +160 +161 +162 +163 +164 +165 +166 +167 +168 +169 +170 +171 +172 +173 +174 +175 +176 +177 +178 +179 +180 +181
use regex::Regex;
+use crate::chemistry::unimod::unimod_modifications_mass;
+
+/// Convert a peptide sequence with UNIMOD annotations to a list of tokens
+///
+/// # Arguments
+///
+/// * `sequence` - a string slice of the peptide sequence
+/// * `group_modifications` - a boolean indicating whether to group the amino acid before the UNIMOD with the UNIMOD
+///
+/// # Returns
+///
+/// * `Vec<String>` - a vector of strings representing the tokens
+///
+/// # Example
+///
+/// ```
+/// use mscore::chemistry::utility::unimod_sequence_to_tokens;
+///
+/// let sequence = "PEPTIDE[UNIMOD:1]H";
+/// let tokens = unimod_sequence_to_tokens(sequence, false);
+/// assert_eq!(tokens, vec!["P", "E", "P", "T", "I", "D", "E", "[UNIMOD:1]", "H"]);
+/// let tokens = unimod_sequence_to_tokens(sequence, true);
+/// assert_eq!(tokens, vec!["P", "E", "P", "T", "I", "D", "E[UNIMOD:1]", "H"]);
+/// ```
+pub fn unimod_sequence_to_tokens(sequence: &str, group_modifications: bool) -> Vec<String> {
+ let pattern = Regex::new(r"\[UNIMOD:\d+\]").unwrap();
+ let mut tokens = Vec::new();
+ let mut last_index = 0;
+
+ for mat in pattern.find_iter(sequence) {
+ if group_modifications {
+ // When grouping, include the amino acid before the UNIMOD in the token
+ let pre_mod_sequence = &sequence[last_index..mat.start()];
+ let aa_sequence = if pre_mod_sequence.is_empty() {
+ ""
+ } else {
+ &pre_mod_sequence[..pre_mod_sequence.len() - 1]
+ };
+ tokens.extend(aa_sequence.chars().map(|c| c.to_string()));
+
+ // Group the last amino acid with the UNIMOD as one token
+ let grouped_mod = format!("{}{}", pre_mod_sequence.chars().last().unwrap_or_default().to_string(), &sequence[mat.start()..mat.end()]);
+ tokens.push(grouped_mod);
+ } else {
+ // Extract the amino acids before the current UNIMOD and add them as individual tokens
+ let aa_sequence = &sequence[last_index..mat.start()];
+ tokens.extend(aa_sequence.chars().map(|c| c.to_string()));
+
+ // Add the UNIMOD as its own token
+ let unimod = &sequence[mat.start()..mat.end()];
+ tokens.push(unimod.to_string());
+ }
+
+ // Update last_index to the end of the current UNIMOD
+ last_index = mat.end();
+ }
+
+ if !group_modifications || last_index < sequence.len() {
+ // Add the remaining amino acids after the last UNIMOD as individual tokens
+ let remaining_aa_sequence = &sequence[last_index..];
+ tokens.extend(remaining_aa_sequence.chars().map(|c| c.to_string()));
+ }
+
+ tokens
+}
+
+/// Convert a peptide sequence with UNIMOD annotations to a tuple of plain sequence and for each
+/// position in the sequence, the mass of the modification at that position (0 if no modification),
+/// which is the representation of sequence nad modifications used by SAGE
+///
+/// # Arguments
+///
+/// * `input_string` - a string slice of the peptide sequence
+///
+/// # Returns
+///
+/// * `(String, Vec<f64>)` - a tuple of the plain sequence and a vector of f64 representing the mass
+/// of the modification at each position in the sequence
+///
+/// # Example
+///
+/// ```
+/// use mscore::chemistry::utility::find_unimod_patterns;
+///
+/// let sequence = "PEPTIDE[UNIMOD:1]H";
+/// let (stripped_sequence, mods) = find_unimod_patterns(sequence);
+/// assert_eq!(stripped_sequence, "PEPTIDEH");
+/// assert_eq!(mods, vec![0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 42.010565, 0.0]);
+/// ```
+pub fn find_unimod_patterns(input_string: &str) -> (String, Vec<f64>) {
+ let results = extract_unimod_patterns(input_string);
+ let stripped_sequence = remove_unimod_annotation(input_string);
+ let index_list = generate_index_list(&results, input_string);
+ let mods = calculate_modifications(&index_list, &stripped_sequence);
+ (stripped_sequence, mods)
+}
+
+fn remove_unimod_annotation(sequence: &str) -> String {
+ let pattern = Regex::new(r"\[UNIMOD:\d+]").unwrap();
+ pattern.replace_all(sequence, "").to_string()
+}
+
+fn extract_unimod_patterns(input_string: &str) -> Vec<(usize, usize, String)> {
+ let pattern = Regex::new(r"\[UNIMOD:\d+]").unwrap();
+ pattern.find_iter(input_string)
+ .map(|mat| (mat.start(), mat.end(), mat.as_str().to_string()))
+ .collect()
+}
+
+fn generate_index_list(results: &[(usize, usize, String)], sequence: &str) -> Vec<(usize, String)> {
+ let mut index_list = Vec::new();
+ let mut chars_removed_counter = 0;
+
+ for (start, end, _) in results {
+ let num_chars_removed = end - start;
+ let mod_str = &sequence[*start..*end];
+
+ let later_aa_index = if *start != 0 {
+ start - 1 - chars_removed_counter
+ } else {
+ 0
+ };
+
+ index_list.push((later_aa_index, mod_str.to_string()));
+ chars_removed_counter += num_chars_removed;
+ }
+
+ index_list
+}
+
+fn calculate_modifications(index_list: &[(usize, String)], stripped_sequence: &str) -> Vec<f64> {
+ let mut mods = vec![0.0; stripped_sequence.len()];
+ for (index, mod_str) in index_list {
+ if let Some(mass) = unimod_modifications_mass().get(mod_str.as_str()) {
+ mods[*index] += mass;
+ }
+ }
+ mods
+}
+
+/// Reshape the flat prosit array into a 3D array of shape (29, 2, 3)
+///
+/// # Arguments
+///
+/// * `flat_array` - a vector of f64 representing the flat prosit array
+///
+/// # Returns
+///
+/// * `Vec<Vec<Vec<f64>>>` - a 3D array of shape (29, 2, 3)
+///
+/// # Example
+///
+/// ```
+/// use mscore::chemistry::utility::reshape_prosit_array;
+///
+/// let flat_array = vec![0.0; 174];
+/// let reshaped_array = reshape_prosit_array(flat_array);
+/// assert_eq!(reshaped_array.len(), 29);
+/// assert_eq!(reshaped_array[0].len(), 2);
+/// assert_eq!(reshaped_array[0][0].len(), 3);
+/// ```
+pub fn reshape_prosit_array(flat_array: Vec<f64>) -> Vec<Vec<Vec<f64>>> {
+ let mut array_return: Vec<Vec<Vec<f64>>> = vec![vec![vec![0.0; 3]; 2]; 29];
+ let mut ptr = 0;
+
+ for c in 0..3 {
+ for row in 0..29 {
+ // Fill in the Y ion values
+ array_return[row][0][c] = flat_array[ptr];
+ ptr += 1;
+ }
+ for row in 0..29 {
+ // Fill in the B ion values
+ array_return[row][1][c] = flat_array[ptr];
+ ptr += 1;
+ }
+ }
+
+ array_return
+}
+1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +50 +51 +52 +53 +54 +55 +56 +57 +58 +59 +60 +61 +62 +63 +64 +65 +66 +67 +68 +69 +70 +71 +72 +73 +74 +75 +76 +77 +78 +79 +80 +81 +82 +83 +84 +85 +86 +87 +88 +89 +90 +91 +92 +93 +94 +95 +96 +97 +98 +99 +100 +101 +102 +103 +104 +105 +106 +107 +108 +109 +110 +111 +112 +113 +114 +115 +116 +117 +118 +119 +120 +121 +122 +123 +124 +125 +126 +127 +128 +129 +130 +131 +132 +133 +134 +135 +136 +137 +138 +139 +140 +141 +142 +143 +144 +145 +146 +147 +148 +149 +150 +151 +152 +153 +154 +155 +156 +157 +158 +159 +160 +161 +162 +163 +164 +165 +166 +167 +168 +169 +170 +171 +172 +173 +174 +175 +176 +177 +178 +179 +180 +181 +182 +183 +184 +185 +186 +187 +188 +189 +190 +191 +192 +193 +194 +195 +196 +197 +198 +199 +200 +201 +202 +203 +204 +205 +206 +207 +208 +209 +210 +211 +212 +213 +214 +215 +216 +217 +218 +219 +220 +221 +222 +223 +224 +225 +226 +227 +228 +229 +230 +231 +232 +233 +234 +235 +236 +237 +238 +239 +240 +241 +242 +243 +244 +245 +246 +247 +248 +249 +250 +251 +252 +253 +254 +255 +256 +257 +258 +259 +260 +261 +262 +263 +264 +265 +266 +267 +268 +269 +270 +271 +272 +273 +274 +275 +276 +277 +278 +279 +280 +281 +282 +283 +284 +285 +286 +287 +288 +289 +290 +291 +292 +293 +294 +295 +296 +297 +298 +299 +300 +301 +302 +303 +304 +305 +306 +307 +308 +309 +310 +311 +312 +313 +314 +315 +316 +317 +318 +319 +320 +321 +322 +323 +324 +325 +326 +327 +328 +329 +330 +331 +332 +333 +334 +335 +336 +337 +338 +339 +340 +341 +342 +343 +344 +345 +346 +347 +348 +349 +350 +351 +352 +353 +354 +355 +356 +357 +358 +359 +360 +361 +362 +363 +364 +365 +366 +367 +368 +369 +370 +371 +372 +373 +374 +375 +376 +377 +378 +379 +380 +381 +382 +383 +384 +385 +386 +387 +388 +389 +390 +391 +392 +393 +394 +395 +396 +397 +398 +399 +400 +401 +402 +403 +404 +405 +406 +407 +408 +409 +410 +411 +412 +413 +414 +415 +416 +417 +418 +419 +420 +421 +422 +423 +424 +425 +426 +427 +428 +429 +430 +431 +432 +433 +434 +435 +436 +437 +438 +439 +440 +441 +442 +443 +444 +445 +446 +447 +448 +449 +450 +451 +452 +453 +454 +455 +456 +457 +458 +459 +460 +461 +462 +463 +464 +465 +466 +467 +468 +469 +470 +471 +472 +473 +474 +475 +476 +477 +478 +479 +480 +481 +482 +483 +484 +485 +486 +487 +488 +489 +490 +491 +492 +493 +494 +495 +496 +497 +498 +499 +500 +501 +502 +503 +504 +505 +506 +507 +508 +509 +510 +511 +512 +513 +514 +515 +516 +517 +518 +519 +520 +521 +522 +523 +524 +525 +526 +527 +528 +529 +530 +531 +532 +533 +534 +535 +536 +537 +538 +539 +540 +541 +542 +543 +544 +545 +546 +547 +548 +549 +550 +551 +552 +553 +554 +555 +556 +557 +558 +559 +560 +561 +562 +563 +564 +565 +566 +567 +568 +569 +570 +571 +572 +573 +574 +575 +576 +577 +578 +579 +580 +581 +582 +583 +584 +585 +586 +587 +588 +589 +590 +591 +592 +593 +594 +595 +596 +597 +598 +599 +600 +601 +602
use std::collections::{HashMap};
+use bincode::{Decode, Encode};
+use itertools::Itertools;
+use regex::Regex;
+use serde::{Deserialize, Serialize};
+use crate::algorithm::peptide::{calculate_peptide_mono_isotopic_mass, calculate_peptide_product_ion_mono_isotopic_mass, peptide_sequence_to_atomic_composition};
+use crate::chemistry::amino_acid::{amino_acid_masses};
+use crate::chemistry::formulas::calculate_mz;
+use crate::chemistry::utility::{find_unimod_patterns, reshape_prosit_array, unimod_sequence_to_tokens};
+use crate::data::spectrum::MzSpectrum;
+use crate::simulation::annotation::{MzSpectrumAnnotated, ContributionSource, SignalAttributes, SourceType, PeakAnnotation};
+
+// helper types for easier reading
+type Mass = f64;
+type Abundance = f64;
+type IsotopeDistribution = Vec<(Mass, Abundance)>;
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct PeptideIon {
+ pub sequence: PeptideSequence,
+ pub charge: i32,
+ pub intensity: f64,
+}
+
+impl PeptideIon {
+ pub fn new(sequence: String, charge: i32, intensity: f64, peptide_id: Option<i32>) -> Self {
+ PeptideIon {
+ sequence: PeptideSequence::new(sequence, peptide_id),
+ charge,
+ intensity,
+ }
+ }
+ pub fn mz(&self) -> f64 {
+ calculate_mz(self.sequence.mono_isotopic_mass(), self.charge)
+ }
+
+ pub fn calculate_isotope_distribution(
+ &self,
+ mass_tolerance: f64,
+ abundance_threshold: f64,
+ max_result: i32,
+ intensity_min: f64,
+ ) -> IsotopeDistribution {
+
+ let atomic_composition: HashMap<String, i32> = self.sequence.atomic_composition().iter().map(|(k, v)| (k.to_string(), *v)).collect();
+
+ let distribution: IsotopeDistribution = crate::algorithm::isotope::generate_isotope_distribution(&atomic_composition, mass_tolerance, abundance_threshold, max_result)
+ .into_iter().filter(|&(_, abundance)| abundance > intensity_min).collect();
+
+ let mz_distribution = distribution.iter().map(|(mass, _)| calculate_mz(*mass, self.charge))
+ .zip(distribution.iter().map(|&(_, abundance)| abundance)).collect();
+
+ mz_distribution
+ }
+
+ pub fn calculate_isotopic_spectrum(
+ &self,
+ mass_tolerance: f64,
+ abundance_threshold: f64,
+ max_result: i32,
+ intensity_min: f64,
+ ) -> MzSpectrum {
+ let isotopic_distribution = self.calculate_isotope_distribution(mass_tolerance, abundance_threshold, max_result, intensity_min);
+ MzSpectrum::new(isotopic_distribution.iter().map(|(mz, _)| *mz).collect(), isotopic_distribution.iter().map(|(_, abundance)| *abundance).collect()) * self.intensity
+ }
+
+ pub fn calculate_isotopic_spectrum_annotated(
+ &self,
+ mass_tolerance: f64,
+ abundance_threshold: f64,
+ max_result: i32,
+ intensity_min: f64,
+ ) -> MzSpectrumAnnotated {
+ let isotopic_distribution = self.calculate_isotope_distribution(mass_tolerance, abundance_threshold, max_result, intensity_min);
+ let mut annotations = Vec::new();
+ let mut isotope_counter = 0;
+ let mut previous_mz = isotopic_distribution[0].0;
+
+
+
+ for (mz, abundance) in isotopic_distribution.iter() {
+
+ let ppm_tolerance = (mz / 1e6) * 25.0;
+
+ if (mz - previous_mz).abs() > ppm_tolerance {
+ isotope_counter += 1;
+ previous_mz = *mz;
+ }
+
+ let signal_attributes = SignalAttributes {
+ charge_state: self.charge,
+ peptide_id: self.sequence.peptide_id.unwrap_or(-1),
+ isotope_peak: isotope_counter,
+ description: None,
+ };
+
+ let contribution_source = ContributionSource {
+ intensity_contribution: *abundance,
+ source_type: SourceType::Signal,
+ signal_attributes: Some(signal_attributes)
+ };
+
+ annotations.push(PeakAnnotation {
+ contributions: vec![contribution_source]
+ });
+ }
+
+ MzSpectrumAnnotated::new(isotopic_distribution.iter().map(|(mz, _)| *mz).collect(), isotopic_distribution.iter().map(|(_, abundance)| *abundance).collect(), annotations)
+ }
+}
+
+#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
+pub enum FragmentType { A, B, C, X, Y, Z, }
+
+// implement to string for fragment type
+impl std::fmt::Display for FragmentType {
+ fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+ match self {
+ FragmentType::A => write!(f, "a"),
+ FragmentType::B => write!(f, "b"),
+ FragmentType::C => write!(f, "c"),
+ FragmentType::X => write!(f, "x"),
+ FragmentType::Y => write!(f, "y"),
+ FragmentType::Z => write!(f, "z"),
+ }
+ }
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct PeptideProductIon {
+ pub kind: FragmentType,
+ pub ion: PeptideIon,
+}
+
+impl PeptideProductIon {
+ pub fn new(kind: FragmentType, sequence: String, charge: i32, intensity: f64, peptide_id: Option<i32>) -> Self {
+ PeptideProductIon {
+ kind,
+ ion: PeptideIon {
+ sequence: PeptideSequence::new(sequence, peptide_id),
+ charge,
+ intensity,
+ },
+ }
+ }
+
+ pub fn mono_isotopic_mass(&self) -> f64 {
+ calculate_peptide_product_ion_mono_isotopic_mass(self.ion.sequence.sequence.as_str(), self.kind)
+ }
+
+ pub fn atomic_composition(&self) -> HashMap<&str, i32> {
+
+ let mut composition = peptide_sequence_to_atomic_composition(&self.ion.sequence);
+
+ match self.kind {
+ FragmentType::A => {
+ *composition.entry("H").or_insert(0) -= 2;
+ *composition.entry("O").or_insert(0) -= 2;
+ *composition.entry("C").or_insert(0) -= 1;
+ },
+
+ FragmentType::B => {
+ // B: peptide_mass - Water
+ *composition.entry("H").or_insert(0) -= 2;
+ *composition.entry("O").or_insert(0) -= 1;
+ },
+
+ FragmentType::C => {
+ // C: peptide_mass + NH3 - Water
+ *composition.entry("H").or_insert(0) += 1;
+ *composition.entry("N").or_insert(0) += 1;
+ *composition.entry("O").or_insert(0) -= 1;
+ },
+
+ FragmentType::X => {
+ // X: peptide_mass + CO + 2*H - Water
+ *composition.entry("C").or_insert(0) += 1;
+ *composition.entry("O").or_insert(0) += 1;
+ },
+
+ FragmentType::Y => {
+ ()
+ },
+
+ FragmentType::Z => {
+ *composition.entry("H").or_insert(0) -= 1;
+ *composition.entry("N").or_insert(0) -= 3;
+ },
+ }
+ composition
+ }
+
+ pub fn mz(&self) -> f64 {
+ calculate_mz(self.mono_isotopic_mass(), self.ion.charge)
+ }
+
+ pub fn isotope_distribution(
+ &self,
+ mass_tolerance: f64,
+ abundance_threshold: f64,
+ max_result: i32,
+ intensity_min: f64,
+ ) -> IsotopeDistribution {
+
+ let atomic_composition: HashMap<String, i32> = self.atomic_composition().iter().map(|(k, v)| (k.to_string(), *v)).collect();
+
+ let distribution: IsotopeDistribution = crate::algorithm::isotope::generate_isotope_distribution(&atomic_composition, mass_tolerance, abundance_threshold, max_result)
+ .into_iter().filter(|&(_, abundance)| abundance > intensity_min).collect();
+
+ let mz_distribution = distribution.iter().map(|(mass, _)| calculate_mz(*mass, self.ion.charge)).zip(distribution.iter().map(|&(_, abundance)| abundance)).collect();
+
+ mz_distribution
+ }
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize, Encode, Decode)]
+pub struct PeptideSequence {
+ pub sequence: String,
+ pub peptide_id: Option<i32>,
+}
+
+impl PeptideSequence {
+ pub fn new(raw_sequence: String, peptide_id: Option<i32>) -> Self {
+
+ // constructor will parse the sequence and check if it is valid
+ let pattern = Regex::new(r"\[UNIMOD:(\d+)]").unwrap();
+
+ // remove the modifications from the sequence
+ let sequence = pattern.replace_all(&raw_sequence, "").to_string();
+
+ // check if all remaining characters are valid amino acids
+ let valid_amino_acids = sequence.chars().all(|c| amino_acid_masses().contains_key(&c.to_string()[..]));
+ if !valid_amino_acids {
+ panic!("Invalid amino acid sequence, use only valid amino acids: ARNDCQEGHILKMFPSTWYVU, and modifications in the format [UNIMOD:ID]");
+ }
+
+ PeptideSequence { sequence: raw_sequence, peptide_id }
+ }
+
+ pub fn mono_isotopic_mass(&self) -> f64 {
+ calculate_peptide_mono_isotopic_mass(self)
+ }
+
+ pub fn atomic_composition(&self) -> HashMap<&str, i32> {
+ peptide_sequence_to_atomic_composition(self)
+ }
+
+ pub fn to_tokens(&self, group_modifications: bool) -> Vec<String> {
+ unimod_sequence_to_tokens(&*self.sequence, group_modifications)
+ }
+
+ pub fn to_sage_representation(&self) -> (String, Vec<f64>) {
+ find_unimod_patterns(&*self.sequence)
+ }
+
+ pub fn amino_acid_count(&self) -> usize {
+ self.to_tokens(true).len()
+ }
+
+ pub fn calculate_mono_isotopic_product_ion_spectrum(&self, charge: i32, fragment_type: FragmentType) -> MzSpectrum {
+ let product_ions = self.calculate_product_ion_series(charge, fragment_type);
+ product_ions.generate_mono_isotopic_spectrum()
+ }
+
+ pub fn calculate_mono_isotopic_product_ion_spectrum_annotated(&self, charge: i32, fragment_type: FragmentType) -> MzSpectrumAnnotated {
+ let product_ions = self.calculate_product_ion_series(charge, fragment_type);
+ product_ions.generate_mono_isotopic_spectrum_annotated()
+ }
+
+ pub fn calculate_isotopic_product_ion_spectrum(&self, charge: i32, fragment_type: FragmentType, mass_tolerance: f64, abundance_threshold: f64, max_result: i32, intensity_min: f64) -> MzSpectrum {
+ let product_ions = self.calculate_product_ion_series(charge, fragment_type);
+ product_ions.generate_isotopic_spectrum(mass_tolerance, abundance_threshold, max_result, intensity_min)
+ }
+
+ pub fn calculate_isotopic_product_ion_spectrum_annotated(&self, charge: i32, fragment_type: FragmentType, mass_tolerance: f64, abundance_threshold: f64, max_result: i32, intensity_min: f64) -> MzSpectrumAnnotated {
+ let product_ions = self.calculate_product_ion_series(charge, fragment_type);
+ product_ions.generate_isotopic_spectrum_annotated(mass_tolerance, abundance_threshold, max_result, intensity_min)
+ }
+
+ pub fn calculate_product_ion_series(&self, target_charge: i32, fragment_type: FragmentType) -> PeptideProductIonSeries {
+ // TODO: check for n-terminal modifications
+ let tokens = unimod_sequence_to_tokens(self.sequence.as_str(), true);
+ let mut n_terminal_ions = Vec::new();
+ let mut c_terminal_ions = Vec::new();
+
+ // Generate n ions
+ for i in 1..tokens.len() {
+ let n_ion_seq = tokens[..i].join("");
+ n_terminal_ions.push(PeptideProductIon {
+ kind: match fragment_type {
+ FragmentType::A => FragmentType::A,
+ FragmentType::B => FragmentType::B,
+ FragmentType::C => FragmentType::C,
+ FragmentType::X => FragmentType::A,
+ FragmentType::Y => FragmentType::B,
+ FragmentType::Z => FragmentType::C,
+ },
+ ion: PeptideIon {
+ sequence: PeptideSequence {
+ sequence: n_ion_seq,
+ peptide_id: self.peptide_id,
+ },
+ charge: target_charge,
+ intensity: 1.0, // Placeholder intensity
+ },
+ });
+ }
+
+ // Generate c ions
+ for i in 1..tokens.len() {
+ let c_ion_seq = tokens[tokens.len() - i..].join("");
+ c_terminal_ions.push(PeptideProductIon {
+ kind: match fragment_type {
+ FragmentType::A => FragmentType::X,
+ FragmentType::B => FragmentType::Y,
+ FragmentType::C => FragmentType::Z,
+ FragmentType::X => FragmentType::X,
+ FragmentType::Y => FragmentType::Y,
+ FragmentType::Z => FragmentType::Z,
+ },
+ ion: PeptideIon {
+ sequence: PeptideSequence {
+ sequence: c_ion_seq,
+ peptide_id: self.peptide_id,
+ },
+ charge: target_charge,
+ intensity: 1.0, // Placeholder intensity
+ },
+ });
+ }
+
+ PeptideProductIonSeries::new(target_charge, n_terminal_ions, c_terminal_ions)
+ }
+
+ pub fn associate_with_predicted_intensities(
+ &self,
+ // TODO: check docs of prosit if charge is meant as precursor charge or max charge of fragments to generate
+ charge: i32,
+ fragment_type: FragmentType,
+ flat_intensities: Vec<f64>,
+ normalize: bool,
+ half_charge_one: bool,
+ ) -> PeptideProductIonSeriesCollection {
+
+ let reshaped_intensities = reshape_prosit_array(flat_intensities);
+ let max_charge = std::cmp::min(charge, 3).max(1); // Ensure at least 1 for loop range
+ let mut sum_intensity = if normalize { 0.0 } else { 1.0 };
+ let num_tokens = self.amino_acid_count() - 1; // Full sequence length is not counted as fragment, since nothing is cleaved off, therefore -1
+
+ let mut peptide_ion_collection = Vec::new();
+
+ if normalize {
+ for z in 1..=max_charge {
+
+ let intensity_c: Vec<f64> = reshaped_intensities[..num_tokens].iter().map(|x| x[0][z as usize - 1]).filter(|&x| x > 0.0).collect();
+ let intensity_n: Vec<f64> = reshaped_intensities[..num_tokens].iter().map(|x| x[1][z as usize - 1]).filter(|&x| x > 0.0).collect();
+
+ sum_intensity += intensity_n.iter().sum::<f64>() + intensity_c.iter().sum::<f64>();
+ }
+ }
+
+ for z in 1..=max_charge {
+
+ let mut product_ions = self.calculate_product_ion_series(z, fragment_type);
+ let intensity_n: Vec<f64> = reshaped_intensities[..num_tokens].iter().map(|x| x[1][z as usize - 1]).collect();
+ let intensity_c: Vec<f64> = reshaped_intensities[..num_tokens].iter().map(|x| x[0][z as usize - 1]).collect(); // Reverse for y
+
+ let adjusted_sum_intensity = if max_charge == 1 && half_charge_one { sum_intensity * 2.0 } else { sum_intensity };
+
+ for (i, ion) in product_ions.n_ions.iter_mut().enumerate() {
+ ion.ion.intensity = intensity_n[i] / adjusted_sum_intensity;
+ }
+ for (i, ion) in product_ions.c_ions.iter_mut().enumerate() {
+ ion.ion.intensity = intensity_c[i] / adjusted_sum_intensity;
+ }
+
+ peptide_ion_collection.push(PeptideProductIonSeries::new(z, product_ions.n_ions, product_ions.c_ions));
+ }
+
+ PeptideProductIonSeriesCollection::new(peptide_ion_collection)
+ }
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct PeptideProductIonSeries {
+ pub charge: i32,
+ pub n_ions: Vec<PeptideProductIon>,
+ pub c_ions: Vec<PeptideProductIon>,
+}
+
+impl PeptideProductIonSeries {
+ pub fn new(charge: i32, n_ions: Vec<PeptideProductIon>, c_ions: Vec<PeptideProductIon>) -> Self {
+ PeptideProductIonSeries {
+ charge,
+ n_ions,
+ c_ions,
+ }
+ }
+
+ pub fn generate_mono_isotopic_spectrum(&self) -> MzSpectrum {
+ let mz_i_n = self.n_ions.iter().map(|ion| (ion.mz(), ion.ion.intensity)).collect_vec();
+ let mz_i_c = self.c_ions.iter().map(|ion| (ion.mz(), ion.ion.intensity)).collect_vec();
+ let n_spectrum = MzSpectrum::new(mz_i_n.iter().map(|(mz, _)| *mz).collect(), mz_i_n.iter().map(|(_, abundance)| *abundance).collect());
+ let c_spectrum = MzSpectrum::new(mz_i_c.iter().map(|(mz, _)| *mz).collect(), mz_i_c.iter().map(|(_, abundance)| *abundance).collect());
+ MzSpectrum::from_collection(vec![n_spectrum, c_spectrum]).filter_ranged(0.0, 5_000.0, 1e-6, 1e6)
+ }
+
+ pub fn generate_mono_isotopic_spectrum_annotated(&self) -> MzSpectrumAnnotated {
+ let mut annotations: Vec<PeakAnnotation> = Vec::with_capacity(self.n_ions.len() + self.c_ions.len());
+ let mut mz_values = Vec::with_capacity(self.n_ions.len() + self.c_ions.len());
+ let mut intensity_values = Vec::with_capacity(self.n_ions.len() + self.c_ions.len());
+
+ for (index, n_ion) in self.n_ions.iter().enumerate() {
+ let kind = n_ion.kind;
+ let charge = n_ion.ion.charge;
+ let mz = n_ion.mz();
+ let intensity = n_ion.ion.intensity;
+ let signal_attributes = SignalAttributes {
+ charge_state: charge,
+ peptide_id: n_ion.ion.sequence.peptide_id.unwrap_or(-1),
+ isotope_peak: 0,
+ description: Some(format!("{}_{}_{}", kind, index + 1, 0)),
+ };
+ let contribution_source = ContributionSource {
+ intensity_contribution: intensity,
+ source_type: SourceType::Signal,
+ signal_attributes: Some(signal_attributes)
+ };
+
+ annotations.push(PeakAnnotation {
+ contributions: vec![contribution_source]
+ });
+ mz_values.push(mz);
+ intensity_values.push(intensity);
+ }
+
+ for (index, c_ion) in self.c_ions.iter().enumerate() {
+ let kind = c_ion.kind;
+ let charge = c_ion.ion.charge;
+ let mz = c_ion.mz();
+ let intensity = c_ion.ion.intensity;
+ let signal_attributes = SignalAttributes {
+ charge_state: charge,
+ peptide_id: c_ion.ion.sequence.peptide_id.unwrap_or(-1),
+ isotope_peak: 0,
+ description: Some(format!("{}_{}_{}", kind, index + 1, 0)),
+ };
+ let contribution_source = ContributionSource {
+ intensity_contribution: intensity,
+ source_type: SourceType::Signal,
+ signal_attributes: Some(signal_attributes)
+ };
+
+ annotations.push(PeakAnnotation {
+ contributions: vec![contribution_source]
+ });
+ mz_values.push(mz);
+ intensity_values.push(intensity);
+ }
+
+ MzSpectrumAnnotated::new(mz_values, intensity_values, annotations)
+ }
+
+ pub fn generate_isotopic_spectrum(&self, mass_tolerance: f64, abundance_threshold: f64, max_result: i32, intensity_min: f64) -> MzSpectrum {
+ let mut spectra: Vec<MzSpectrum> = Vec::new();
+
+ for ion in &self.n_ions {
+ let n_isotopes = ion.isotope_distribution(mass_tolerance, abundance_threshold, max_result, intensity_min);
+ let spectrum = MzSpectrum::new(n_isotopes.iter().map(|(mz, _)| *mz).collect(), n_isotopes.iter().map(|(_, abundance)| *abundance * ion.ion.intensity).collect());
+ spectra.push(spectrum);
+ }
+
+ for ion in &self.c_ions {
+ let c_isotopes = ion.isotope_distribution(mass_tolerance, abundance_threshold, max_result, intensity_min);
+ let spectrum = MzSpectrum::new(c_isotopes.iter().map(|(mz, _)| *mz).collect(), c_isotopes.iter().map(|(_, abundance)| *abundance * ion.ion.intensity).collect());
+ spectra.push(spectrum);
+ }
+
+ MzSpectrum::from_collection(spectra).filter_ranged(0.0, 5_000.0, 1e-6, 1e6)
+ }
+
+ pub fn generate_isotopic_spectrum_annotated(&self, mass_tolerance: f64, abundance_threshold: f64, max_result: i32, intensity_min: f64) -> MzSpectrumAnnotated {
+ let mut annotations: Vec<PeakAnnotation> = Vec::new();
+ let mut mz_values = Vec::new();
+ let mut intensity_values = Vec::new();
+
+ for (index, ion) in self.n_ions.iter().enumerate() {
+ let n_isotopes = ion.isotope_distribution(mass_tolerance, abundance_threshold, max_result, intensity_min);
+ let mut isotope_counter = 0;
+ let mut previous_mz = n_isotopes[0].0;
+
+ for (mz, abundance) in n_isotopes.iter() {
+ let ppm_tolerance = (mz / 1e6) * 25.0;
+
+ if (mz - previous_mz).abs() > ppm_tolerance {
+ isotope_counter += 1;
+ previous_mz = *mz;
+ }
+
+ let signal_attributes = SignalAttributes {
+ charge_state: ion.ion.charge,
+ peptide_id: ion.ion.sequence.peptide_id.unwrap_or(-1),
+ isotope_peak: isotope_counter,
+ // use convention of 1-based indexing for fragment ion enumeration
+ description: Some(format!("{}_{}_{}", ion.kind, index + 1, isotope_counter)),
+ };
+
+ let contribution_source = ContributionSource {
+ intensity_contribution: *abundance * ion.ion.intensity,
+ source_type: SourceType::Signal,
+ signal_attributes: Some(signal_attributes)
+ };
+
+ annotations.push(PeakAnnotation {
+ contributions: vec![contribution_source]
+ });
+ mz_values.push(*mz);
+ intensity_values.push(*abundance * ion.ion.intensity);
+ }
+ }
+
+ for (index, ion) in self.c_ions.iter().enumerate() {
+ let c_isotopes = ion.isotope_distribution(mass_tolerance, abundance_threshold, max_result, intensity_min);
+ let mut isotope_counter = 0;
+ let mut previous_mz = c_isotopes[0].0;
+
+ for (mz, abundance) in c_isotopes.iter() {
+ let ppm_tolerance = (mz / 1e6) * 25.0;
+
+ if (mz - previous_mz).abs() > ppm_tolerance {
+ isotope_counter += 1;
+ previous_mz = *mz;
+ }
+
+ let signal_attributes = SignalAttributes {
+ charge_state: ion.ion.charge,
+ peptide_id: ion.ion.sequence.peptide_id.unwrap_or(-1),
+ isotope_peak: isotope_counter,
+ description: Some(format!("{}_{}_{}", ion.kind, index + 1, isotope_counter)),
+ };
+
+ let contribution_source = ContributionSource {
+ intensity_contribution: *abundance * ion.ion.intensity,
+ source_type: SourceType::Signal,
+ signal_attributes: Some(signal_attributes)
+ };
+
+ annotations.push(PeakAnnotation {
+ contributions: vec![contribution_source]
+ });
+
+ mz_values.push(*mz);
+ intensity_values.push(*abundance * ion.ion.intensity);
+ }
+ }
+ MzSpectrumAnnotated::new(mz_values, intensity_values, annotations)
+ }
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct PeptideProductIonSeriesCollection {
+ pub peptide_ions: Vec<PeptideProductIonSeries>,
+}
+impl PeptideProductIonSeriesCollection {
+ pub fn new(peptide_ions: Vec<PeptideProductIonSeries>) -> Self {
+ PeptideProductIonSeriesCollection {
+ peptide_ions,
+ }
+ }
+
+ pub fn find_ion_series(&self, charge: i32) -> Option<&PeptideProductIonSeries> {
+ self.peptide_ions.iter().find(|ion_series| ion_series.charge == charge)
+ }
+
+ pub fn generate_isotopic_spectrum(&self, mass_tolerance: f64, abundance_threshold: f64, max_result: i32, intensity_min: f64) -> MzSpectrum {
+ let mut spectra: Vec<MzSpectrum> = Vec::new();
+
+ for ion_series in &self.peptide_ions {
+ let isotopic_spectrum = ion_series.generate_isotopic_spectrum(mass_tolerance, abundance_threshold, max_result, intensity_min);
+ spectra.push(isotopic_spectrum);
+ }
+
+ MzSpectrum::from_collection(spectra).filter_ranged(0.0, 5_000.0, 1e-6, 1e6)
+ }
+
+ pub fn generate_isotopic_spectrum_annotated(&self, mass_tolerance: f64, abundance_threshold: f64, max_result: i32, intensity_min: f64) -> MzSpectrumAnnotated {
+ let mut annotations: Vec<PeakAnnotation> = Vec::new();
+ let mut mz_values = Vec::new();
+ let mut intensity_values = Vec::new();
+
+ for ion_series in &self.peptide_ions {
+ let isotopic_spectrum = ion_series.generate_isotopic_spectrum_annotated(mass_tolerance, abundance_threshold, max_result, intensity_min);
+ for (mz, intensity) in isotopic_spectrum.mz.iter().zip(isotopic_spectrum.intensity.iter()) {
+ mz_values.push(*mz);
+ intensity_values.push(*intensity);
+ }
+ annotations.extend(isotopic_spectrum.annotations.iter().cloned());
+ }
+
+ MzSpectrumAnnotated::new(mz_values, intensity_values, annotations)
+ }
+}
+1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +50 +51 +52 +53 +54 +55 +56 +57 +58 +59 +60 +61 +62 +63 +64 +65 +66 +67 +68 +69 +70 +71 +72 +73 +74 +75 +76 +77 +78 +79 +80 +81 +82 +83 +84 +85 +86 +87 +88 +89 +90 +91 +92 +93 +94 +95 +96 +97 +98 +99 +100 +101 +102 +103 +104 +105 +106 +107 +108 +109 +110 +111 +112 +113 +114 +115 +116 +117 +118 +119 +120 +121 +122 +123 +124 +125 +126 +127 +128 +129 +130 +131 +132 +133 +134 +135 +136 +137 +138 +139 +140 +141 +142 +143 +144 +145 +146 +147 +148 +149 +150 +151 +152 +153 +154 +155 +156 +157 +158 +159 +160 +161 +162 +163 +164 +165 +166 +167 +168 +169 +170 +171 +172 +173 +174 +175 +176 +177 +178 +179 +180 +181 +182 +183 +184 +185 +186 +187 +188 +189 +190 +191 +192 +193 +194 +195 +196 +197 +198 +199 +200 +201 +202 +203 +204 +205 +206 +207 +208 +209 +210 +211 +212 +213 +214 +215 +216 +217 +218 +219 +220 +221 +222 +223 +224 +225 +226 +227 +228 +229 +230 +231 +232 +233 +234 +235 +236 +237 +238 +239 +240 +241 +242 +243 +244 +245 +246 +247 +248 +249 +250 +251 +252 +253 +254 +255 +256 +257 +258 +259 +260 +261 +262 +263 +264 +265 +266 +267 +268 +269 +270 +271 +272 +273 +274 +275 +276 +277 +278 +279 +280 +281 +282 +283 +284 +285 +286 +287 +288 +289 +290 +291 +292 +293 +294 +295 +296 +297 +298 +299 +300 +301 +302 +303 +304 +305 +306 +307 +308 +309 +310 +311 +312 +313 +314 +315 +316 +317 +318 +319 +320 +321 +322 +323 +324 +325 +326 +327 +328 +329 +330 +331 +332 +333 +334 +335 +336 +337 +338 +339 +340 +341 +342 +343 +344 +345 +346 +347 +348 +349 +350 +351 +352 +353 +354 +355 +356 +357 +358 +359 +360 +361 +362 +363 +364 +365 +366 +367 +368 +369 +370 +371 +372 +373 +374 +375 +376 +377 +378 +379 +380 +381 +382 +383 +384 +385 +386 +387 +388 +389 +390 +391 +392 +393 +394 +395 +396 +397 +398 +399 +400 +401 +402 +403 +404 +405 +406 +407 +408 +409 +410 +411 +412 +413 +414 +415 +416 +417 +418 +419 +420 +421 +422 +423 +424 +425 +426 +427 +428 +429 +430 +431 +432 +433 +434 +435 +436 +437 +438 +439 +440 +441 +442 +443 +444 +445 +446 +447 +448 +449 +450 +451 +452 +453 +454 +455 +456 +457 +458 +459 +460 +461 +462 +463 +464 +465 +466 +467 +468 +469 +470 +471 +472 +473 +474 +475 +476 +477 +478 +479 +480 +481 +482 +483 +484 +485 +486 +487 +488 +489 +490 +491 +492 +493 +494 +495 +496 +497 +498 +499 +500 +501 +502 +503 +504 +505 +506 +507 +508 +509 +510 +511 +512 +513 +514 +515 +516 +517 +518 +519 +520 +521 +522 +523 +524 +525 +526 +527 +528 +529 +530 +531 +532 +533 +534 +535 +536 +537 +538 +539 +540 +541 +542 +543 +544 +545 +546 +547 +548 +549 +550 +551 +552 +553 +554 +555 +556 +557 +558 +559 +560 +561 +562 +563 +564 +565 +566 +567 +568 +569 +570 +571 +572 +573 +574 +575 +576 +577 +578 +579 +580 +581 +582 +583 +584 +585 +586 +587 +588 +589 +590 +591 +592 +593 +594 +595 +596 +597 +598 +599 +600 +601 +602 +603 +604 +605 +606 +607 +608 +609 +610 +611 +612 +613 +614 +615 +616 +617 +618 +619 +620 +621 +622 +623 +624 +625 +626 +627 +628 +629 +630 +631 +632 +633 +634 +635 +636 +637 +638 +639 +640 +641 +642 +643 +644 +645 +646 +647 +648 +649 +650 +651 +652 +653 +654 +655 +656 +657
use std::fmt;
+use std::collections::BTreeMap;
+use nalgebra::DVector;
+use std::fmt::{Display, Formatter};
+use bincode::{Decode, Encode};
+use serde::{Serialize, Deserialize};
+
+extern crate rand;
+
+use rand::distributions::{Uniform, Distribution};
+use rand::rngs::ThreadRng;
+use statrs::distribution::Normal;
+
+/// Represents a vectorized mass spectrum.
+pub trait ToResolution {
+ fn to_resolution(&self, resolution: i32) -> Self;
+}
+
+/// Vectorized representation for Structs holding m/z values and intensities.
+pub trait Vectorized<T> {
+ fn vectorized(&self, resolution: i32) -> T;
+}
+
+/// Represents the type of spectrum.
+///
+/// # Description
+///
+/// The `SpecType` enum is used to distinguish between precursor and fragment spectra.
+///
+#[derive(Clone, PartialEq, Debug, Serialize, Deserialize, Encode, Decode)]
+pub enum MsType {
+ Precursor,
+ FragmentDda,
+ FragmentDia,
+ Unknown,
+}
+
+impl MsType {
+ /// Returns the `MsType` enum corresponding to the given integer value.
+ ///
+ /// # Arguments
+ ///
+ /// * `ms_type` - An integer value corresponding to the `MsType` enum.
+ ///
+ pub fn new(ms_type: i32) -> MsType {
+ match ms_type {
+ 0 => MsType::Precursor,
+ 8 => MsType::FragmentDda,
+ 9 => MsType::FragmentDia,
+ _ => MsType::Unknown,
+ }
+ }
+
+ /// Returns the integer value corresponding to the `MsType` enum.
+ pub fn ms_type_numeric(&self) -> i32 {
+ match self {
+ MsType::Precursor => 0,
+ MsType::FragmentDda => 8,
+ MsType::FragmentDia => 9,
+ MsType::Unknown => -1,
+ }
+ }
+}
+
+impl Default for MsType {
+ fn default() -> Self {
+ MsType::Unknown
+ }
+}
+
+impl Display for MsType {
+ fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
+ match self {
+ MsType::Precursor => write!(f, "Precursor"),
+ MsType::FragmentDda => write!(f, "FragmentDda"),
+ MsType::FragmentDia => write!(f, "FragmentDia"),
+ MsType::Unknown => write!(f, "Unknown"),
+ }
+ }
+}
+
+/// Represents a mass spectrum with associated m/z values and intensities.
+#[derive(Clone, Debug, Serialize, Deserialize, Encode, Decode)]
+pub struct MzSpectrum {
+ pub mz: Vec<f64>,
+ pub intensity: Vec<f64>,
+}
+
+impl MzSpectrum {
+ /// Constructs a new `MzSpectrum`.
+ ///
+ /// # Arguments
+ ///
+ /// * `mz` - A vector of m/z values.
+ /// * `intensity` - A vector of intensity values corresponding to the m/z values.
+ ///
+ /// # Panics
+ ///
+ /// Panics if the lengths of `mz` and `intensity` are not the same. (actually, it doesn't at the moment, planning on adding this later)
+ ///
+ /// # Example
+ ///
+ /// ```rust
+ /// # use mscore::data::spectrum::MzSpectrum;
+ /// let spectrum = MzSpectrum::new(vec![100.0, 200.0], vec![10.0, 20.0]);
+ /// assert_eq!(spectrum.mz, vec![100.0, 200.0]);
+ /// assert_eq!(spectrum.intensity, vec![10.0, 20.0]);
+ /// ```
+ pub fn new(mz: Vec<f64>, intensity: Vec<f64>) -> Self {
+ MzSpectrum {mz, intensity}
+ }
+
+ pub fn filter_ranged(&self, mz_min: f64, mz_max: f64, intensity_min:f64, intensity_max: f64) -> Self {
+ let mut mz_vec: Vec<f64> = Vec::new();
+ let mut intensity_vec: Vec<f64> = Vec::new();
+
+ for (mz, intensity) in self.mz.iter().zip(self.intensity.iter()) {
+ if mz_min <= *mz && *mz <= mz_max && *intensity >= intensity_min && *intensity <= intensity_max {
+ mz_vec.push(*mz);
+ intensity_vec.push(*intensity);
+ }
+ }
+ MzSpectrum { mz: mz_vec, intensity: intensity_vec }
+ }
+
+ /// Splits the spectrum into a collection of windows based on m/z values.
+ ///
+ /// This function divides the spectrum into smaller spectra (windows) based on a specified window length.
+ /// Each window contains peaks from the original spectrum that fall within the m/z range of that window.
+ ///
+ /// # Arguments
+ ///
+ /// * `window_length`: The size (in terms of m/z values) of each window.
+ ///
+ /// * `overlapping`: If `true`, each window will overlap with its neighboring windows by half of the `window_length`.
+ /// This means that a peak may belong to multiple windows. If `false`, windows do not overlap.
+ ///
+ /// * `min_peaks`: The minimum number of peaks a window must have to be retained in the result.
+ ///
+ /// * `min_intensity`: The minimum intensity value a window must have (in its highest intensity peak) to be retained in the result.
+ ///
+ /// # Returns
+ ///
+ /// A `BTreeMap` where the keys represent the window indices and the values are the spectra (`MzSpectrum`) within those windows.
+ /// Windows that do not meet the criteria of having at least `min_peaks` peaks or a highest intensity peak
+ /// greater than or equal to `min_intensity` are discarded.
+ ///
+ /// # Example
+ ///
+ /// ```rust
+ /// # use mscore::data::spectrum::MzSpectrum;
+ /// let spectrum = MzSpectrum::new(vec![100.0, 101.0, 102.5, 103.0], vec![10.0, 20.0, 30.0, 40.0]);
+ /// let windowed_spectrum = spectrum.to_windows(1.0, false, 1, 10.0);
+ /// assert!(windowed_spectrum.contains_key(&100));
+ /// assert!(windowed_spectrum.contains_key(&102));
+ /// ```
+ pub fn to_windows(&self, window_length: f64, overlapping: bool, min_peaks: usize, min_intensity: f64) -> BTreeMap<i32, MzSpectrum> {
+ let mut splits = BTreeMap::new();
+
+ for (i, &mz) in self.mz.iter().enumerate() {
+ let intensity = self.intensity[i];
+
+ let tmp_key = (mz / window_length).floor() as i32;
+
+ splits.entry(tmp_key).or_insert_with(|| MzSpectrum::new(Vec::new(), Vec::new())).mz.push(mz);
+ splits.entry(tmp_key).or_insert_with(|| MzSpectrum::new(Vec::new(), Vec::new())).intensity.push(intensity);
+ }
+
+ if overlapping {
+ let mut splits_offset = BTreeMap::new();
+
+ for (i, &mmz) in self.mz.iter().enumerate() {
+ let intensity = self.intensity[i];
+
+ let tmp_key = -((mmz + window_length / 2.0) / window_length).floor() as i32;
+
+ splits_offset.entry(tmp_key).or_insert_with(|| MzSpectrum::new(Vec::new(), Vec::new())).mz.push(mmz);
+ splits_offset.entry(tmp_key).or_insert_with(|| MzSpectrum::new(Vec::new(), Vec::new())).intensity.push(intensity);
+ }
+
+ for (key, val) in splits_offset {
+ splits.entry(key).or_insert_with(|| MzSpectrum::new(Vec::new(), Vec::new())).mz.extend(val.mz);
+ splits.entry(key).or_insert_with(|| MzSpectrum::new(Vec::new(), Vec::new())).intensity.extend(val.intensity);
+ }
+ }
+
+ splits.retain(|_, spectrum| {
+ spectrum.mz.len() >= min_peaks && spectrum.intensity.iter().cloned().max_by(
+ |a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal)).unwrap_or(0.0) >= min_intensity
+ });
+
+ splits
+ }
+
+ pub fn to_centroid(&self, baseline_noise_level: i32, sigma: f64, normalize: bool) -> MzSpectrum {
+
+ let filtered = self.filter_ranged(0.0, 1e9, baseline_noise_level as f64, 1e9);
+
+ let mut cent_mz = Vec::new();
+ let mut cent_i: Vec<f64> = Vec::new();
+
+ let mut last_mz = 0.0;
+ let mut mean_mz = 0.0;
+ let mut sum_i = 0.0;
+
+ for (i, ¤t_mz) in filtered.mz.iter().enumerate() {
+ let current_intensity = filtered.intensity[i];
+
+ // If peak is too far away from last peak, push centroid
+ if current_mz - last_mz > sigma && mean_mz > 0.0 {
+ mean_mz /= sum_i;
+ cent_mz.push(mean_mz);
+ cent_i.push(sum_i);
+
+ // Start new centroid
+ sum_i = 0.0;
+ mean_mz = 0.0;
+ }
+
+ mean_mz += current_mz * current_intensity as f64;
+ sum_i += current_intensity;
+ last_mz = current_mz;
+ }
+
+ // Push back last remaining centroid
+ if mean_mz > 0.0 {
+ mean_mz /= sum_i;
+ cent_mz.push(mean_mz);
+ cent_i.push(sum_i);
+ }
+
+ if normalize {
+ let sum_i: f64 = cent_i.iter().sum();
+ cent_i = cent_i.iter().map(|&i| i / sum_i).collect();
+ }
+ MzSpectrum::new(cent_mz, cent_i)
+ }
+
+ pub fn from_collection(collection: Vec<MzSpectrum>) -> MzSpectrum {
+
+ let quantize = |mz: f64| -> i64 {
+ (mz * 1_000_000.0).round() as i64
+ };
+
+ let mut combined_map: BTreeMap<i64, f64> = BTreeMap::new();
+
+ for spectrum in collection {
+ for (mz, intensity) in spectrum.mz.iter().zip(spectrum.intensity.iter()) {
+ let key = quantize(*mz);
+ let entry = combined_map.entry(key).or_insert(0.0);
+ *entry += *intensity;
+ }
+ }
+
+ let mz_combined: Vec<f64> = combined_map.keys().map(|&key| key as f64 / 1_000_000.0).collect();
+ let intensity_combined: Vec<f64> = combined_map.values().cloned().collect();
+
+ MzSpectrum { mz: mz_combined, intensity: intensity_combined }
+ }
+
+ pub fn add_mz_noise_uniform(&self, ppm: f64, right_drag: bool) -> Self {
+ let mut rng = rand::thread_rng();
+ self.add_mz_noise(ppm, &mut rng, |rng, mz, ppm| {
+
+ let ppm_mz = match right_drag {
+ true => mz * ppm / 1e6 / 2.0,
+ false => mz * ppm / 1e6,
+ };
+
+ let dist = match right_drag {
+ true => Uniform::from(mz - (ppm_mz / 3.0)..=mz + ppm_mz),
+ false => Uniform::from(mz - ppm_mz..=mz + ppm_mz),
+ };
+
+ dist.sample(rng)
+ })
+ }
+
+ pub fn add_mz_noise_normal(&self, ppm: f64) -> Self {
+ let mut rng = rand::thread_rng();
+ self.add_mz_noise(ppm, &mut rng, |rng, mz, ppm| {
+ let ppm_mz = mz * ppm / 1e6;
+ let dist = Normal::new(mz, ppm_mz / 3.0).unwrap();
+ dist.sample(rng)
+ })
+ }
+
+ fn add_mz_noise<F>(&self, ppm: f64, rng: &mut ThreadRng, noise_fn: F) -> Self
+ where
+ F: Fn(&mut ThreadRng, f64, f64) -> f64,
+ {
+ let mz: Vec<f64> = self.mz.iter().map(|&mz_value| noise_fn(rng, mz_value, ppm)).collect();
+ let spectrum = MzSpectrum { mz, intensity: self.intensity.clone()};
+ // Sort the spectrum by m/z values and potentially sum up intensities at the same m/z value
+ spectrum.to_resolution(6)
+ }
+}
+
+impl ToResolution for MzSpectrum {
+ /// Bins the spectrum's m/z values to a given resolution and sums the intensities.
+ ///
+ /// # Arguments
+ ///
+ /// * `resolution` - The desired resolution in terms of decimal places. For instance, a resolution of 2
+ /// would bin m/z values to two decimal places.
+ ///
+ /// # Returns
+ ///
+ /// A new `MzSpectrum` where m/z values are binned according to the given resolution.
+ ///
+ /// # Example
+ ///
+ /// ```rust
+ /// # use mscore::data::spectrum::MzSpectrum;
+ /// # use mscore::data::spectrum::ToResolution;
+ /// let spectrum = MzSpectrum::new(vec![100.123, 100.121, 100.131], vec![10.0, 20.0, 30.0]);
+ /// let binned_spectrum_1 = spectrum.to_resolution(1);
+ /// let binned_spectrum_2 = spectrum.to_resolution(2);
+ /// /// assert_eq!(binned_spectrum_2.mz, vec![100.1]);
+ /// assert_eq!(binned_spectrum_1.intensity, vec![60.0]);
+ /// assert_eq!(binned_spectrum_2.mz, vec![100.12, 100.13]);
+ /// assert_eq!(binned_spectrum_2.intensity, vec![30.0, 30.0]);
+ /// ```
+ fn to_resolution(&self, resolution: i32) -> Self {
+ let mut binned: BTreeMap<i64, f64> = BTreeMap::new();
+ let factor = 10f64.powi(resolution);
+
+ for (mz, inten) in self.mz.iter().zip(self.intensity.iter()) {
+
+ let key = (mz * factor).round() as i64;
+ let entry = binned.entry(key).or_insert(0.0);
+ *entry += *inten;
+ }
+
+ let mz: Vec<f64> = binned.keys().map(|&key| key as f64 / 10f64.powi(resolution)).collect();
+ let intensity: Vec<f64> = binned.values().cloned().collect();
+
+ MzSpectrum { mz, intensity }
+ }
+}
+
+impl Vectorized<MzSpectrumVectorized> for MzSpectrum {
+ /// Convert the `MzSpectrum` to a `MzSpectrumVectorized` using the given resolution for binning.
+ ///
+ /// After binning to the desired resolution, the binned m/z values are translated into integer indices.
+ fn vectorized(&self, resolution: i32) -> MzSpectrumVectorized {
+
+ let binned_spectrum = self.to_resolution(resolution);
+
+ // Translate the m/z values into integer indices
+ let indices: Vec<i32> = binned_spectrum.mz.iter().map(|&mz| (mz * 10f64.powi(resolution)).round() as i32).collect();
+
+ MzSpectrumVectorized {
+ resolution,
+ indices,
+ values: binned_spectrum.intensity,
+ }
+ }
+}
+
+/// Formats the `MzSpectrum` for display.
+impl Display for MzSpectrum {
+ fn fmt(&self, f: &mut Formatter) -> fmt::Result {
+
+ let (mz, i) = self.mz.iter()
+ .zip(&self.intensity)
+ .max_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal))
+ .unwrap();
+
+ write!(f, "MzSpectrum(data points: {}, max by intensity:({}, {}))", self.mz.len(), format!("{:.3}", mz), i)
+ }
+}
+
+impl std::ops::Add for MzSpectrum {
+ type Output = Self;
+ /// Combines two `MzSpectrum` instances by summing up the intensities of matching m/z values.
+ ///
+ /// # Description
+ /// Each m/z value is quantized to retain at least 6 decimals. If two spectra have m/z values
+ /// that quantize to the same integer value, their intensities are summed.
+ ///
+ /// # Example
+ /// ```
+ /// # use mscore::data::spectrum::MzSpectrum;
+ /// let spectrum1 = MzSpectrum { mz: vec![100.523, 101.923], intensity: vec![10.0, 20.0] };
+ /// let spectrum2 = MzSpectrum { mz: vec![101.235, 105.112], intensity: vec![15.0, 30.0] };
+ ///
+ /// let combined = spectrum1 + spectrum2;
+ ///
+ /// assert_eq!(combined.mz, vec![100.523, 101.235, 101.923, 105.112]);
+ /// assert_eq!(combined.intensity, vec![10.0, 15.0, 20.0, 30.0]);
+ /// ```
+ fn add(self, other: Self) -> MzSpectrum {
+ let mut combined_map: BTreeMap<i64, f64> = BTreeMap::new();
+
+ // Helper to quantize mz to an integer key
+ let quantize = |mz: f64| -> i64 {
+ (mz * 1_000_000.0).round() as i64
+ };
+
+ // Add the m/z and intensities from the first spectrum to the map
+ for (mz, intensity) in self.mz.iter().zip(self.intensity.iter()) {
+ let key = quantize(*mz);
+ combined_map.insert(key, *intensity);
+ }
+
+ // Combine the second spectrum into the map
+ for (mz, intensity) in other.mz.iter().zip(other.intensity.iter()) {
+ let key = quantize(*mz);
+ let entry = combined_map.entry(key).or_insert(0.0);
+ *entry += *intensity;
+ }
+
+ // Convert the combined map back into two Vec<f64>
+ let mz_combined: Vec<f64> = combined_map.keys().map(|&key| key as f64 / 1_000_000.0).collect();
+ let intensity_combined: Vec<f64> = combined_map.values().cloned().collect();
+
+ MzSpectrum { mz: mz_combined, intensity: intensity_combined }
+ }
+}
+
+impl std::ops::Mul<f64> for MzSpectrum {
+ type Output = Self;
+ fn mul(self, scale: f64) -> Self::Output{
+ let mut scaled_intensities: Vec<f64> = vec![0.0; self.intensity.len()];
+ for (idx,intensity) in self.intensity.iter().enumerate(){
+ scaled_intensities[idx] = scale*intensity;
+ }
+ Self{ mz: self.mz.clone(), intensity: scaled_intensities}
+
+ }
+}
+
+impl std::ops::Sub for MzSpectrum {
+ type Output = Self;
+ fn sub(self, other: Self) -> Self::Output {
+ let mut combined_map: BTreeMap<i64, f64> = BTreeMap::new();
+
+ // Helper to quantize mz to an integer key
+ let quantize = |mz: f64| -> i64 {
+ (mz * 1_000_000.0).round() as i64
+ };
+
+ // Add the m/z and intensities from the first spectrum to the map
+ for (mz, intensity) in self.mz.iter().zip(self.intensity.iter()) {
+ let key = quantize(*mz);
+ combined_map.insert(key, *intensity);
+ }
+
+ // Combine the second spectrum into the map
+ for (mz, intensity) in other.mz.iter().zip(other.intensity.iter()) {
+ let key = quantize(*mz);
+ let entry = combined_map.entry(key).or_insert(0.0);
+ *entry -= *intensity;
+ }
+
+ // Convert the combined map back into two Vec<f64>
+ let mz_combined: Vec<f64> = combined_map.keys().map(|&key| key as f64 / 1_000_000.0).collect();
+ let intensity_combined: Vec<f64> = combined_map.values().cloned().collect();
+
+ MzSpectrum { mz: mz_combined, intensity: intensity_combined }
+ }
+}
+
+/// Represents a mass spectrum with associated m/z indices, m/z values, and intensities
+#[derive(Clone, Debug)]
+pub struct IndexedMzSpectrum {
+ pub index: Vec<i32>,
+ pub mz_spectrum: MzSpectrum,
+}
+
+impl IndexedMzSpectrum {
+ /// Creates a new `TOFMzSpectrum` instance.
+ ///
+ /// # Arguments
+ ///
+ /// * `index` - A vector containing the mz index, e.g., time-of-flight values.
+ /// * `mz` - A vector containing the m/z values.
+ /// * `intensity` - A vector containing the intensity values.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use mscore::data::spectrum::IndexedMzSpectrum;
+ /// use mscore::data::spectrum::MzSpectrum;
+ ///
+ /// let spectrum = IndexedMzSpectrum::new(vec![1000, 2000], vec![100.5, 200.5], vec![50.0, 60.0]);
+ /// ```
+ pub fn new(index: Vec<i32>, mz: Vec<f64>, intensity: Vec<f64>) -> Self {
+ IndexedMzSpectrum { index, mz_spectrum: MzSpectrum { mz, intensity } }
+ }
+ /// Bins the spectrum based on a given m/z resolution, summing intensities and averaging index values
+ /// for m/z values that fall into the same bin.
+ ///
+ /// # Arguments
+ ///
+ /// * `resolution` - The desired m/z resolution for binning.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use mscore::data::spectrum::IndexedMzSpectrum;
+ ///
+ /// let spectrum = IndexedMzSpectrum::new(vec![1000, 2000], vec![100.42, 100.43], vec![50.0, 60.0]);
+ /// let binned_spectrum = spectrum.to_resolution(1);
+ ///
+ /// assert_eq!(binned_spectrum.mz_spectrum.mz, vec![100.4]);
+ /// assert_eq!(binned_spectrum.mz_spectrum.intensity, vec![110.0]);
+ /// assert_eq!(binned_spectrum.index, vec![1500]);
+ /// ```
+ pub fn to_resolution(&self, resolution: i32) -> IndexedMzSpectrum {
+
+ let mut mz_bins: BTreeMap<i64, (f64, Vec<i64>)> = BTreeMap::new();
+ let factor = 10f64.powi(resolution);
+
+ for ((mz, intensity), tof_val) in self.mz_spectrum.mz.iter().zip(self.mz_spectrum.intensity.iter()).zip(&self.index) {
+ let key = (mz * factor).round() as i64;
+ let entry = mz_bins.entry(key).or_insert((0.0, Vec::new()));
+ entry.0 += *intensity;
+ entry.1.push(*tof_val as i64);
+ }
+
+ let mz: Vec<f64> = mz_bins.keys().map(|&key| key as f64 / factor).collect();
+ let intensity: Vec<f64> = mz_bins.values().map(|(intensity, _)| *intensity).collect();
+ let tof: Vec<i32> = mz_bins.values().map(|(_, tof_vals)| {
+ let sum: i64 = tof_vals.iter().sum();
+ let count: i32 = tof_vals.len() as i32;
+ (sum as f64 / count as f64).round() as i32
+ }).collect();
+
+ IndexedMzSpectrum {index: tof, mz_spectrum: MzSpectrum {mz, intensity } }
+ }
+
+ /// Convert the `IndexedMzSpectrum` to a `IndexedMzVector` using the given resolution for binning.
+ ///
+ /// After binning to the desired resolution, the binned m/z values are translated into integer indices.
+ ///
+ /// # Arguments
+ ///
+ /// * `resolution` - The desired m/z resolution for binning.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use mscore::data::spectrum::IndexedMzSpectrum;
+ ///
+ /// let spectrum = IndexedMzSpectrum::new(vec![1000, 2000], vec![100.42, 100.43], vec![50.0, 60.0]);
+ /// let binned_spectrum = spectrum.to_resolution(1);
+ ///
+ /// assert_eq!(binned_spectrum.mz_spectrum.mz, vec![100.4]);
+ /// assert_eq!(binned_spectrum.mz_spectrum.intensity, vec![110.0]);
+ /// assert_eq!(binned_spectrum.index, vec![1500]);
+ /// ```
+ pub fn vectorized(&self, resolution: i32) -> IndexedMzSpectrumVectorized {
+
+ let binned_spectrum = self.to_resolution(resolution);
+
+ // Translate the m/z values into integer indices
+ let indices: Vec<i32> = binned_spectrum.mz_spectrum.mz.iter()
+ .map(|&mz| (mz * 10f64.powi(resolution)).round() as i32).collect();
+
+ IndexedMzSpectrumVectorized {
+ index: binned_spectrum.index,
+ mz_vector: MzSpectrumVectorized {
+ resolution,
+ indices,
+ values: binned_spectrum.mz_spectrum.intensity,
+ }
+ }
+ }
+
+ pub fn filter_ranged(&self, mz_min: f64, mz_max: f64, intensity_min:f64, intensity_max: f64) -> Self {
+ let mut mz_vec: Vec<f64> = Vec::new();
+ let mut intensity_vec: Vec<f64> = Vec::new();
+ let mut index_vec: Vec<i32> = Vec::new();
+
+ for ((&mz, &intensity), &index) in self.mz_spectrum.mz.iter().zip(self.mz_spectrum.intensity.iter()).zip(self.index.iter()) {
+ if mz_min <= mz && mz <= mz_max && intensity >= intensity_min && intensity <= intensity_max {
+ mz_vec.push(mz);
+ intensity_vec.push(intensity);
+ index_vec.push(index);
+ }
+ }
+ IndexedMzSpectrum { index: index_vec, mz_spectrum: MzSpectrum { mz: mz_vec, intensity: intensity_vec } }
+ }
+}
+
+impl Display for IndexedMzSpectrum {
+ fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
+ let (mz, i) = self.mz_spectrum.mz.iter()
+ .zip(&self.mz_spectrum.intensity)
+ .max_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal))
+ .unwrap();
+
+ write!(f, "IndexedMzSpectrum(data points: {}, max by intensity:({}, {}))", self.mz_spectrum.mz.len(), format!("{:.3}", mz), i)
+ }
+}
+
+#[derive(Clone)]
+pub struct MzSpectrumVectorized {
+ pub resolution: i32,
+ pub indices: Vec<i32>,
+ pub values: Vec<f64>,
+}
+
+impl MzSpectrumVectorized {
+ /// Convert the `MzVector` to a dense vector with a specified maximum index.
+ ///
+ /// The resulting vector has length equal to `max_index + 1` and its values
+ /// are the intensities corresponding to each index. Indices with no associated intensity will have a value of 0.
+ ///
+ /// # Arguments
+ ///
+ /// * `max_index` - The maximum index for the dense vector.
+
+ fn get_max_index(&self) -> usize {
+ let base: i32 = 10;
+ let max_mz: i32 = 2000;
+ let max_index: usize = (max_mz*base.pow(self.resolution as u32)) as usize;
+ max_index
+ }
+
+ pub fn to_dense(&self, max_index: Option<usize>) -> DVector<f64> {
+ let max_index = match max_index {
+ Some(max_index) => max_index,
+ None => self.get_max_index(),
+ };
+ let mut dense_intensities: DVector<f64> = DVector::<f64>::zeros(max_index + 1);
+ for (&index, &intensity) in self.indices.iter().zip(self.values.iter()) {
+ if (index as usize) <= max_index {
+ dense_intensities[index as usize] = intensity;
+ }
+ }
+ dense_intensities
+ }
+ pub fn to_dense_spectrum(&self, max_index: Option<usize>) -> MzSpectrumVectorized{
+ let max_index = match max_index {
+ Some(max_index) => max_index,
+ None => self.get_max_index(),
+ };
+ let dense_intensities: Vec<f64> = self.to_dense(Some(max_index)).data.into();
+ let dense_indices: Vec<i32> = (0..=max_index).map(|i| i as i32).collect();
+ let dense_spectrum: MzSpectrumVectorized = MzSpectrumVectorized { resolution: (self.resolution), indices: (dense_indices), values: (dense_intensities) };
+ dense_spectrum
+ }
+}
+
+#[derive(Clone)]
+pub struct IndexedMzSpectrumVectorized {
+ pub index: Vec<i32>,
+ pub mz_vector: MzSpectrumVectorized,
+}
+
+
+
+
+
+
+1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +50 +51 +52 +53 +54 +55 +56 +57 +58 +59 +60 +61 +62 +63 +64 +65 +66 +67 +68 +69 +70 +71 +72 +73 +74 +75 +76 +77 +78 +79 +80 +81 +82 +83 +84 +85 +86 +87 +88 +89 +90 +91 +92 +93 +94 +95 +96 +97 +98 +99 +100 +101 +102 +103 +104 +105 +106 +107 +108 +109 +110 +111 +112 +113 +114 +115 +116 +117 +118 +119 +120 +121 +122 +123 +124 +125 +126 +127 +128 +129 +130 +131 +132 +133 +134 +135 +136 +137 +138 +139 +140 +141 +142 +143 +144 +145 +146 +147 +148 +149 +150 +151 +152 +153 +154 +155 +156 +157 +158 +159 +160 +161 +162 +163 +164 +165 +166 +167 +168 +169 +170 +171 +172 +173 +174 +175 +176 +177 +178 +179 +180 +181 +182 +183 +184 +185 +186 +187 +188 +189 +190 +191 +192 +193 +194 +195 +196 +197 +198 +199 +200 +201 +202 +203 +204 +205 +206 +207 +208 +209 +210 +211 +212 +213 +214 +215 +216 +217 +218 +219 +220 +221 +222 +223 +224 +225 +226 +227 +228 +229 +230 +231 +232 +233 +234 +235 +236 +237 +238 +239 +240 +241 +242 +243 +244 +245 +246 +247 +248 +249 +250 +251 +252 +253 +254 +255 +256 +257 +258 +259 +260 +261 +262 +263 +264 +265 +266 +267 +268 +269 +270 +271 +272 +273 +274 +275 +276 +277 +278 +279 +280 +281 +282 +283 +284 +285 +286 +287 +288 +289 +290 +291 +292 +293 +294 +295 +296 +297 +298 +299 +300 +301 +302 +303 +304 +305 +306 +307 +308 +309 +310 +311 +312 +313 +314 +315 +316 +317 +318 +319 +320 +321 +322 +323 +324 +325 +326 +327 +328 +329 +330 +331 +332 +333 +334 +335 +336 +337 +338 +339 +340 +341 +342 +343 +344 +345 +346 +347 +348 +349 +350 +351 +352 +353 +354 +355 +356 +357 +358 +359 +360 +361 +362 +363 +364 +365 +366 +367 +368 +369 +370 +371 +372 +373 +374 +375 +376 +377 +378 +379 +380 +381 +382 +383 +384 +385 +386 +387 +388 +389 +390 +391 +392 +393 +394 +395 +396 +397 +398 +399 +400 +401 +402 +403 +404 +405 +406 +407 +408 +409 +410 +411 +412 +413 +414 +415 +416 +417 +418 +419 +420 +421 +422 +423 +424 +425 +426 +427 +428 +429 +430 +431 +432 +433 +434 +435 +436 +437 +438 +439 +440 +441 +442 +443 +444 +445 +446 +447 +448 +449 +450 +451 +452 +453 +454 +455 +456 +457 +458 +459 +460 +461 +462 +463 +464 +465 +466 +467 +468 +469 +470 +471 +472 +473 +474 +475 +476 +477 +478 +479 +480 +481 +482 +483 +484 +485 +486 +487 +488 +489 +490 +491 +492 +493 +494 +495 +496 +497 +498 +499 +500 +501 +502 +503 +504 +505 +506 +507 +508 +509 +510 +511 +512 +513 +514 +515 +516 +517 +518 +519 +520 +521 +522 +523 +524 +525 +526 +527 +528 +529 +530 +531 +532 +533 +534 +535 +536 +537 +538 +539 +540 +541 +542 +543 +544 +545 +546 +547 +548 +549 +550 +551 +552 +553 +554 +555 +556 +557 +558 +559 +560 +561 +562 +563 +564 +565 +566 +567 +568 +569 +570 +571 +572 +573 +574 +575 +576 +577 +578 +579 +580 +581 +582 +583 +584 +585 +586 +587 +588 +589 +590 +591 +592 +593 +594 +595 +596 +597 +598 +599 +600 +601 +602 +603 +604 +605 +606 +607 +608 +609 +610 +611 +612 +613 +614 +615 +616 +617 +618 +619 +620 +621 +622 +623 +624 +625 +626 +627 +628 +629 +630 +631 +632 +633 +634 +635 +636 +637 +638 +639 +640 +641 +642 +643 +644 +645 +646 +647 +648 +649 +650 +651 +652 +653 +654 +655 +656 +657 +658
use std::collections::BTreeMap;
+use std::fmt::Display;
+use itertools::izip;
+use rand::distributions::{Uniform, Distribution};
+use rand::rngs::ThreadRng;
+use statrs::distribution::Normal;
+use crate::data::spectrum::{MsType, ToResolution, Vectorized};
+
+#[derive(Clone, Debug)]
+pub struct PeakAnnotation {
+ pub contributions: Vec<ContributionSource>,
+}
+
+impl PeakAnnotation {
+ pub fn new_random_noise(intensity: f64) -> Self {
+ let contribution_source = ContributionSource {
+ intensity_contribution: intensity,
+ source_type: SourceType::RandomNoise,
+ signal_attributes: None,
+ };
+
+ PeakAnnotation {
+ contributions: vec![contribution_source],
+ }
+ }
+}
+
+
+#[derive(Clone, Debug)]
+pub struct ContributionSource {
+ pub intensity_contribution: f64,
+ pub source_type: SourceType,
+ pub signal_attributes: Option<SignalAttributes>,
+}
+
+#[derive(Clone, Debug, PartialEq)]
+pub enum SourceType {
+ Signal,
+ ChemicalNoise,
+ RandomNoise,
+ Unknown,
+}
+
+impl SourceType {
+ pub fn new(source_type: i32) -> Self {
+ match source_type {
+ 0 => SourceType::Signal,
+ 1 => SourceType::ChemicalNoise,
+ 2 => SourceType::RandomNoise,
+ 3 => SourceType::Unknown,
+ _ => panic!("Invalid source type"),
+ }
+ }
+}
+
+impl Display for SourceType {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ match self {
+ SourceType::Signal => write!(f, "Signal"),
+ SourceType::ChemicalNoise => write!(f, "ChemicalNoise"),
+ SourceType::RandomNoise => write!(f, "RandomNoise"),
+ SourceType::Unknown => write!(f, "Unknown"),
+ }
+ }
+}
+
+#[derive(Clone, Debug)]
+pub struct SignalAttributes {
+ pub charge_state: i32,
+ pub peptide_id: i32,
+ pub isotope_peak: i32,
+ pub description: Option<String>,
+}
+
+#[derive(Clone, Debug)]
+pub struct MzSpectrumAnnotated {
+ pub mz: Vec<f64>,
+ pub intensity: Vec<f64>,
+ pub annotations: Vec<PeakAnnotation>,
+}
+
+impl MzSpectrumAnnotated {
+ pub fn new(mz: Vec<f64>, intensity: Vec<f64>, annotations: Vec<PeakAnnotation>) -> Self {
+ assert!(mz.len() == intensity.len() && intensity.len() == annotations.len());
+ // zip and sort by mz
+ let mut mz_intensity_annotations: Vec<(f64, f64, PeakAnnotation)> = izip!(mz.iter(), intensity.iter(), annotations.iter()).map(|(mz, intensity, annotation)| (*mz, *intensity, annotation.clone())).collect();
+ mz_intensity_annotations.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap());
+
+ MzSpectrumAnnotated {
+ mz: mz_intensity_annotations.iter().map(|(mz, _, _)| *mz).collect(),
+ intensity: mz_intensity_annotations.iter().map(|(_, intensity, _)| *intensity).collect(),
+ annotations: mz_intensity_annotations.iter().map(|(_, _, annotation)| annotation.clone()).collect(),
+ }
+ }
+
+ pub fn filter_ranged(&self, mz_min: f64, mz_max: f64, intensity_min: f64, intensity_max: f64) -> Self {
+ let mut mz_filtered: Vec<f64> = Vec::new();
+ let mut intensity_filtered: Vec<f64> = Vec::new();
+ let mut annotations_filtered: Vec<PeakAnnotation> = Vec::new();
+
+ for (mz, intensity, annotation) in izip!(self.mz.iter(), self.intensity.iter(), self.annotations.iter()) {
+ if *mz >= mz_min && *mz <= mz_max && *intensity >= intensity_min && *intensity <= intensity_max {
+ mz_filtered.push(*mz);
+ intensity_filtered.push(*intensity);
+ annotations_filtered.push(annotation.clone());
+ }
+ }
+ // after filtering, the length of the mz, intensity and annotations vectors should be the same
+ assert!(mz_filtered.len() == intensity_filtered.len() && intensity_filtered.len() == annotations_filtered.len());
+
+ MzSpectrumAnnotated {
+ mz: mz_filtered,
+ intensity: intensity_filtered,
+ annotations: annotations_filtered,
+ }
+ }
+
+ pub fn add_mz_noise_uniform(&self, ppm: f64, right_drag: bool) -> Self {
+ let mut rng = rand::thread_rng();
+ self.add_mz_noise(ppm, &mut rng, |rng, mz, ppm| {
+
+ let ppm_mz = match right_drag {
+ true => mz * ppm / 1e6 / 2.0,
+ false => mz * ppm / 1e6,
+ };
+
+ let dist = match right_drag {
+ true => Uniform::from(mz - (ppm_mz / 3.0)..=mz + ppm_mz),
+ false => Uniform::from(mz - ppm_mz..=mz + ppm_mz),
+ };
+
+ dist.sample(rng)
+ })
+ }
+
+ pub fn add_mz_noise_normal(&self, ppm: f64) -> Self {
+ let mut rng = rand::thread_rng();
+ self.add_mz_noise(ppm, &mut rng, |rng, mz, ppm| {
+ let ppm_mz = mz * ppm / 1e6;
+ let dist = Normal::new(mz, ppm_mz / 3.0).unwrap(); // 3 sigma ? good enough?
+ dist.sample(rng)
+ })
+ }
+
+ fn add_mz_noise<F>(&self, ppm: f64, rng: &mut ThreadRng, noise_fn: F) -> Self
+ where
+ F: Fn(&mut ThreadRng, f64, f64) -> f64,
+ {
+ let mz: Vec<f64> = self.mz.iter().map(|&mz_value| noise_fn(rng, mz_value, ppm)).collect();
+ let spectrum = MzSpectrumAnnotated { mz, intensity: self.intensity.clone(), annotations: self.annotations.clone()};
+
+ // Sort the spectrum by m/z values and potentially sum up intensities and extend annotations at the same m/z value
+ spectrum.to_resolution(6)
+ }
+
+ pub fn to_windows(&self, window_length: f64, overlapping: bool, min_peaks: usize, min_intensity: f64) -> BTreeMap<i32, MzSpectrumAnnotated> {
+ let mut splits = BTreeMap::new();
+
+ for (i, &mz) in self.mz.iter().enumerate() {
+ let intensity = self.intensity[i];
+ let annotation = self.annotations[i].clone();
+
+ let tmp_key = (mz / window_length).floor() as i32;
+
+ splits.entry(tmp_key).or_insert_with(|| MzSpectrumAnnotated::new(Vec::new(), Vec::new(), Vec::new())).mz.push(mz);
+ splits.entry(tmp_key).or_insert_with(|| MzSpectrumAnnotated::new(Vec::new(), Vec::new(), Vec::new())).intensity.push(intensity);
+ splits.entry(tmp_key).or_insert_with(|| MzSpectrumAnnotated::new(Vec::new(), Vec::new(), Vec::new())).annotations.push(annotation);
+ }
+
+ if overlapping {
+ let mut splits_offset = BTreeMap::new();
+
+ for (i, &mmz) in self.mz.iter().enumerate() {
+ let intensity = self.intensity[i];
+ let annotation = self.annotations[i].clone();
+
+ let tmp_key = -((mmz + window_length / 2.0) / window_length).floor() as i32;
+
+ splits_offset.entry(tmp_key).or_insert_with(|| MzSpectrumAnnotated::new(Vec::new(), Vec::new(), Vec::new())).mz.push(mmz);
+ splits_offset.entry(tmp_key).or_insert_with(|| MzSpectrumAnnotated::new(Vec::new(), Vec::new(), Vec::new())).intensity.push(intensity);
+ splits_offset.entry(tmp_key).or_insert_with(|| MzSpectrumAnnotated::new(Vec::new(), Vec::new(), Vec::new())).annotations.push(annotation);
+ }
+
+ for (key, val) in splits_offset {
+ splits.entry(key).or_insert_with(|| MzSpectrumAnnotated::new(Vec::new(), Vec::new(), Vec::new())).mz.extend(val.mz);
+ splits.entry(key).or_insert_with(|| MzSpectrumAnnotated::new(Vec::new(), Vec::new(), Vec::new())).intensity.extend(val.intensity);
+ splits.entry(key).or_insert_with(|| MzSpectrumAnnotated::new(Vec::new(), Vec::new(), Vec::new())).annotations.extend(val.annotations);
+ }
+ }
+
+ splits.retain(|_, spectrum| {
+ spectrum.mz.len() >= min_peaks && spectrum.intensity.iter().cloned().max_by(
+ |a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal)).unwrap_or(0.0) >= min_intensity
+ });
+
+ splits
+ }
+}
+
+impl std::ops::Add for MzSpectrumAnnotated {
+ type Output = Self;
+ fn add(self, other: Self) -> Self {
+
+ let quantize = |mz: f64| -> i64 { (mz * 1_000_000.0).round() as i64 };
+ let mut spec_map: BTreeMap<i64, (f64, PeakAnnotation)> = BTreeMap::new();
+
+ for ((mz, intensity), annotation) in self.mz.iter().zip(self.intensity.iter()).zip(self.annotations.iter()) {
+ let key = quantize(*mz);
+ spec_map.insert(key, (*intensity, annotation.clone()));
+ }
+
+ for ((mz, intensity), annotation) in other.mz.iter().zip(other.intensity.iter()).zip(other.annotations.iter()) {
+ let key = quantize(*mz);
+ spec_map.entry(key).and_modify(|e| {
+ e.0 += *intensity;
+ e.1.contributions.extend(annotation.contributions.clone());
+ }).or_insert((*intensity, annotation.clone()));
+ }
+
+ let mz: Vec<f64> = spec_map.keys().map(|&key| key as f64 / 1_000_000.0).collect();
+ let intensity: Vec<f64> = spec_map.values().map(|(intensity, _)| *intensity).collect();
+ let annotations: Vec<PeakAnnotation> = spec_map.values().map(|(_, annotation)| annotation.clone()).collect();
+
+ assert!(mz.len() == intensity.len() && intensity.len() == annotations.len());
+
+ MzSpectrumAnnotated {
+ mz,
+ intensity,
+ annotations,
+ }
+ }
+}
+
+impl ToResolution for MzSpectrumAnnotated {
+ fn to_resolution(&self, resolution: i32) -> Self {
+ let mut spec_map: BTreeMap<i64, (f64, PeakAnnotation)> = BTreeMap::new();
+ let quantize = |mz: f64| -> i64 { (mz * 10.0_f64.powi(resolution)).round() as i64 };
+
+ for ((mz, intensity), annotation) in self.mz.iter().zip(self.intensity.iter()).zip(self.annotations.iter()) {
+ let key = quantize(*mz);
+ spec_map.entry(key).and_modify(|e| {
+ e.0 += *intensity;
+ e.1.contributions.extend(annotation.contributions.clone());
+ }).or_insert((*intensity, annotation.clone()));
+ }
+
+ let mz: Vec<f64> = spec_map.keys().map(|&key| key as f64 / 10.0_f64.powi(resolution)).collect();
+ let intensity: Vec<f64> = spec_map.values().map(|(intensity, _)| *intensity).collect();
+ let annotations: Vec<PeakAnnotation> = spec_map.values().map(|(_, annotation)| annotation.clone()).collect();
+
+ assert!(mz.len() == intensity.len() && intensity.len() == annotations.len());
+
+ MzSpectrumAnnotated {
+ mz,
+ intensity,
+ annotations,
+ }
+ }
+}
+
+impl std::ops::Mul<f64> for MzSpectrumAnnotated {
+ type Output = Self;
+ fn mul(self, scale: f64) -> Self::Output{
+
+ let mut scaled_intensities: Vec<f64> = vec![0.0; self.intensity.len()];
+
+ for (idx,intensity) in self.intensity.iter().enumerate(){
+ scaled_intensities[idx] = scale*intensity;
+ }
+
+ let mut scaled_annotations: Vec<PeakAnnotation> = Vec::new();
+
+ for annotation in self.annotations.iter(){
+ let mut scaled_contributions: Vec<ContributionSource> = Vec::new();
+ for contribution in annotation.contributions.iter(){
+ let scaled_intensity = (contribution.intensity_contribution*scale).round();
+ let scaled_contribution = ContributionSource{
+ intensity_contribution: scaled_intensity,
+ source_type: contribution.source_type.clone(),
+ signal_attributes: contribution.signal_attributes.clone(),
+ };
+ scaled_contributions.push(scaled_contribution);
+ }
+ let scaled_annotation = PeakAnnotation{
+ contributions: scaled_contributions,
+ };
+ scaled_annotations.push(scaled_annotation);
+ }
+
+ MzSpectrumAnnotated { mz: self.mz.clone(), intensity: scaled_intensities, annotations: scaled_annotations }
+ }
+}
+
+impl Vectorized<MzSpectrumAnnotatedVectorized> for MzSpectrumAnnotated {
+ fn vectorized(&self, resolution: i32) -> MzSpectrumAnnotatedVectorized {
+
+ let quantize = |mz: f64| -> i64 { (mz * 10.0_f64.powi(resolution)).round() as i64 };
+
+ let binned_spec = self.to_resolution(resolution);
+ let mut indices: Vec<u32> = Vec::with_capacity(binned_spec.mz.len());
+ let mut values: Vec<f64> = Vec::with_capacity(binned_spec.mz.len());
+ let mut annotations: Vec<PeakAnnotation> = Vec::with_capacity(binned_spec.mz.len());
+
+ for (mz, intensity, annotation) in izip!(binned_spec.mz.iter(), binned_spec.intensity.iter(), binned_spec.annotations.iter()) {
+ indices.push(quantize(*mz) as u32);
+ values.push(*intensity);
+ annotations.push(annotation.clone());
+ }
+
+ MzSpectrumAnnotatedVectorized {
+ indices,
+ values,
+ annotations,
+ }
+ }
+}
+
+#[derive(Clone, Debug)]
+pub struct MzSpectrumAnnotatedVectorized {
+ pub indices: Vec<u32>,
+ pub values: Vec<f64>,
+ pub annotations: Vec<PeakAnnotation>,
+}
+
+#[derive(Clone, Debug)]
+pub struct TimsSpectrumAnnotated {
+ pub frame_id: i32,
+ pub scan: u32,
+ pub retention_time: f64,
+ pub mobility: f64,
+ pub ms_type: MsType,
+ pub tof: Vec<u32>,
+ pub spectrum: MzSpectrumAnnotated,
+}
+
+impl TimsSpectrumAnnotated {
+ pub fn new(frame_id: i32, scan: u32, retention_time: f64, mobility: f64, ms_type: MsType, tof: Vec<u32>, spectrum: MzSpectrumAnnotated) -> Self {
+ assert!(tof.len() == spectrum.mz.len() && spectrum.mz.len() == spectrum.intensity.len() && spectrum.intensity.len() == spectrum.annotations.len());
+ // zip and sort by mz
+ let mut mz_intensity_annotations: Vec<(u32, f64, f64, PeakAnnotation)> = izip!(tof.iter(), spectrum.mz.iter(), spectrum.intensity.iter(),
+ spectrum.annotations.iter()).map(|(tof, mz, intensity, annotation)| (*tof, *mz, *intensity, annotation.clone())).collect();
+ mz_intensity_annotations.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap());
+ TimsSpectrumAnnotated {
+ frame_id,
+ scan,
+ retention_time,
+ mobility,
+ ms_type,
+ tof: mz_intensity_annotations.iter().map(|(tof, _, _, _)| *tof).collect(),
+ spectrum: MzSpectrumAnnotated {
+ mz: mz_intensity_annotations.iter().map(|(_, mz, _, _)| *mz).collect(),
+ intensity: mz_intensity_annotations.iter().map(|(_, _, intensity, _)| *intensity).collect(),
+ annotations: mz_intensity_annotations.iter().map(|(_, _, _, annotation)| annotation.clone()).collect(),
+ },
+ }
+ }
+
+ pub fn filter_ranged(&self, mz_min: f64, mz_max: f64, intensity_min: f64, intensity_max: f64) -> Self {
+ let mut tof_filtered: Vec<u32> = Vec::new();
+ let mut mz_filtered: Vec<f64> = Vec::new();
+ let mut intensity_filtered: Vec<f64> = Vec::new();
+ let mut annotations_filtered: Vec<PeakAnnotation> = Vec::new();
+
+ for (tof, mz, intensity, annotation) in izip!(self.tof.iter(), self.spectrum.mz.iter(), self.spectrum.intensity.iter(), self.spectrum.annotations.iter()) {
+ if *mz >= mz_min && *mz <= mz_max && *intensity >= intensity_min && *intensity <= intensity_max {
+ tof_filtered.push(*tof);
+ mz_filtered.push(*mz);
+ intensity_filtered.push(*intensity);
+ annotations_filtered.push(annotation.clone());
+ }
+ }
+
+ assert!(tof_filtered.len() == mz_filtered.len() && mz_filtered.len() == intensity_filtered.len() && intensity_filtered.len() == annotations_filtered.len());
+
+ TimsSpectrumAnnotated {
+ frame_id: self.frame_id,
+ scan: self.scan,
+ retention_time: self.retention_time,
+ mobility: self.mobility,
+ ms_type: self.ms_type.clone(),
+ tof: tof_filtered,
+ spectrum: MzSpectrumAnnotated::new(mz_filtered, intensity_filtered, annotations_filtered),
+ }
+ }
+
+ pub fn add_mz_noise_uniform(&self, ppm: f64, right_drag: bool) -> Self {
+ TimsSpectrumAnnotated {
+ frame_id: self.frame_id,
+ scan: self.scan,
+ retention_time: self.retention_time,
+ mobility: self.mobility,
+ ms_type: self.ms_type.clone(),
+ // TODO: adding noise to mz means that TOF values need to be re-calculated
+ tof: self.tof.clone(),
+ spectrum: self.spectrum.add_mz_noise_uniform(ppm, right_drag),
+ }
+ }
+
+ pub fn add_mz_noise_normal(&self, ppm: f64) -> Self {
+ TimsSpectrumAnnotated {
+ frame_id: self.frame_id,
+ scan: self.scan,
+ retention_time: self.retention_time,
+ mobility: self.mobility,
+ ms_type: self.ms_type.clone(),
+ // TODO: adding noise to mz means that TOF values need to be re-calculated
+ tof: self.tof.clone(),
+ spectrum: self.spectrum.add_mz_noise_normal(ppm),
+ }
+ }
+}
+
+impl std::ops::Add for TimsSpectrumAnnotated {
+ type Output = Self;
+ fn add(self, other: Self) -> Self {
+ assert_eq!(self.scan, other.scan);
+
+ let quantize = |mz: f64| -> i64 { (mz * 1_000_000.0).round() as i64 };
+ let mut spec_map: BTreeMap<i64, (u32, f64, PeakAnnotation, i64)> = BTreeMap::new();
+
+ for (tof, mz, intensity, annotation) in izip!(self.tof.iter(), self.spectrum.mz.iter(), self.spectrum.intensity.iter(), self.spectrum.annotations.iter()) {
+ let key = quantize(*mz);
+ spec_map.insert(key, (*tof, *intensity, annotation.clone(), 1));
+ }
+
+ for (tof, mz, intensity, annotation) in izip!(other.tof.iter(), other.spectrum.mz.iter(), other.spectrum.intensity.iter(), other.spectrum.annotations.iter()) {
+ let key = quantize(*mz);
+ spec_map.entry(key).and_modify(|e| {
+ e.0 += *tof;
+ e.1 += *intensity;
+ e.2.contributions.extend(annotation.contributions.clone());
+ e.3 += 1;
+ }).or_insert((*tof, *intensity, annotation.clone(), 1));
+ }
+
+ let mut tof_vec: Vec<u32> = Vec::with_capacity(spec_map.len());
+ let mut mz_vec: Vec<f64> = Vec::with_capacity(spec_map.len());
+ let mut intensity_vec: Vec<f64> = Vec::with_capacity(spec_map.len());
+ let mut annotations_vec: Vec<PeakAnnotation> = Vec::with_capacity(spec_map.len());
+
+ for (key, (tof, intensity, annotation, count)) in spec_map.iter() {
+ tof_vec.push((*tof as f64 / *count as f64) as u32);
+ mz_vec.push(*key as f64 / 1_000_000.0);
+ intensity_vec.push(*intensity / *count as f64);
+ annotations_vec.push(annotation.clone());
+ }
+
+ assert!(tof_vec.len() == mz_vec.len() && mz_vec.len() == intensity_vec.len() && intensity_vec.len() == annotations_vec.len());
+
+ TimsSpectrumAnnotated {
+ frame_id: self.frame_id,
+ scan: self.scan,
+ retention_time: self.retention_time,
+ mobility: self.mobility,
+ ms_type: if self.ms_type == other.ms_type { self.ms_type.clone() } else { MsType::Unknown },
+ tof: tof_vec,
+ spectrum: MzSpectrumAnnotated::new(mz_vec, intensity_vec, annotations_vec),
+ }
+ }
+}
+
+#[derive(Clone, Debug)]
+pub struct TimsFrameAnnotated {
+ pub frame_id: i32,
+ pub retention_time: f64,
+ pub ms_type: MsType,
+ pub tof: Vec<u32>,
+ pub mz: Vec<f64>,
+ pub scan: Vec<u32>,
+ pub inv_mobility: Vec<f64>,
+ pub intensity: Vec<f64>,
+ pub annotations: Vec<PeakAnnotation>,
+}
+
+impl TimsFrameAnnotated {
+ pub fn new(frame_id: i32, retention_time: f64, ms_type: MsType, tof: Vec<u32>, mz: Vec<f64>, scan: Vec<u32>, inv_mobility: Vec<f64>, intensity: Vec<f64>, annotations: Vec<PeakAnnotation>) -> Self {
+ assert!(tof.len() == mz.len() && mz.len() == scan.len() && scan.len() == inv_mobility.len() && inv_mobility.len() == intensity.len() && intensity.len() == annotations.len());
+ TimsFrameAnnotated {
+ frame_id,
+ retention_time,
+ ms_type,
+ tof,
+ mz,
+ scan,
+ inv_mobility,
+ intensity,
+ annotations,
+ }
+ }
+ pub fn filter_ranged(&self, mz_min: f64, mz_max: f64, inv_mobility_min: f64, inv_mobility_max: f64, scan_min: u32, scan_max: u32, intensity_min: f64, intensity_max: f64) -> Self {
+ let mut tof_filtered: Vec<u32> = Vec::new();
+ let mut mz_filtered: Vec<f64> = Vec::new();
+ let mut scan_filtered: Vec<u32> = Vec::new();
+ let mut inv_mobility_filtered: Vec<f64> = Vec::new();
+ let mut intensity_filtered: Vec<f64> = Vec::new();
+ let mut annotations_filtered: Vec<PeakAnnotation> = Vec::new();
+
+ for (tof, mz, scan, inv_mobility, intensity, annotation) in izip!(self.tof.iter(), self.mz.iter(), self.scan.iter(), self.inv_mobility.iter(), self.intensity.iter(), self.annotations.iter()) {
+ if *mz >= mz_min && *mz <= mz_max && *inv_mobility >= inv_mobility_min && *inv_mobility <= inv_mobility_max && *scan >= scan_min && *scan <= scan_max && *intensity >= intensity_min && *intensity <= intensity_max {
+ tof_filtered.push(*tof);
+ mz_filtered.push(*mz);
+ scan_filtered.push(*scan);
+ inv_mobility_filtered.push(*inv_mobility);
+ intensity_filtered.push(*intensity);
+ annotations_filtered.push(annotation.clone());
+ }
+ }
+
+ assert!(tof_filtered.len() == mz_filtered.len() && mz_filtered.len() == scan_filtered.len() && scan_filtered.len() == inv_mobility_filtered.len() && inv_mobility_filtered.len() == intensity_filtered.len() && intensity_filtered.len() == annotations_filtered.len());
+
+ TimsFrameAnnotated {
+ frame_id: self.frame_id,
+ retention_time: self.retention_time,
+ ms_type: self.ms_type.clone(),
+ tof: tof_filtered,
+ mz: mz_filtered,
+ scan: scan_filtered,
+ inv_mobility: inv_mobility_filtered,
+ intensity: intensity_filtered,
+ annotations: annotations_filtered,
+ }
+ }
+
+ pub fn to_tims_spectra_annotated(&self) -> Vec<TimsSpectrumAnnotated> {
+ // use a sorted map where scan is used as key
+ let mut spectra = BTreeMap::<i32, (f64, Vec<u32>, MzSpectrumAnnotated)>::new();
+
+ // all indices and the intensity values are sorted by scan and stored in the map as a tuple (mobility, tof, mz, intensity)
+ for (scan, mobility, tof, mz, intensity, annotations) in izip!(self.scan.iter(), self.inv_mobility.iter(), self.tof.iter(), self.mz.iter(), self.intensity.iter(), self.annotations.iter()) {
+ let entry = spectra.entry(*scan as i32).or_insert_with(|| (*mobility, Vec::new(), MzSpectrumAnnotated::new(Vec::new(), Vec::new(), Vec::new())));
+ entry.1.push(*tof);
+ entry.2.mz.push(*mz);
+ entry.2.intensity.push(*intensity);
+ entry.2.annotations.push(annotations.clone());
+ }
+
+ // convert the map to a vector of TimsSpectrumAnnotated
+ let mut tims_spectra: Vec<TimsSpectrumAnnotated> = Vec::new();
+
+ for (scan, (mobility, tof, spectrum)) in spectra {
+ tims_spectra.push(TimsSpectrumAnnotated::new(self.frame_id, scan as u32, self.retention_time, mobility, self.ms_type.clone(), tof, spectrum));
+ }
+
+ tims_spectra
+ }
+
+ pub fn from_tims_spectra_annotated(spectra: Vec<TimsSpectrumAnnotated>) -> TimsFrameAnnotated {
+ let quantize = |mz: f64| -> i64 { (mz * 1_000_000.0).round() as i64 };
+ let mut spec_map: BTreeMap<(u32, i64), (f64, u32, f64, PeakAnnotation, i64)> = BTreeMap::new();
+ let mut capacity_count = 0;
+
+ for spectrum in &spectra {
+ let inv_mobility = spectrum.mobility;
+ for (i, mz) in spectrum.spectrum.mz.iter().enumerate() {
+ let scan = spectrum.scan;
+ let tof = spectrum.tof[i];
+ let intensity = spectrum.spectrum.intensity[i];
+ let annotation = spectrum.spectrum.annotations[i].clone();
+ let key = (scan, quantize(*mz));
+ spec_map.entry(key).and_modify(|e| {
+ e.0 += intensity;
+ e.1 += tof;
+ e.2 += inv_mobility;
+ e.3.contributions.extend(annotation.contributions.clone());
+ e.4 += 1;
+ }).or_insert((intensity, tof, inv_mobility, annotation, 1));
+ capacity_count += 1;
+ }
+ }
+
+ let mut scan_vec: Vec<u32> = Vec::with_capacity(capacity_count);
+ let mut inv_mobility_vec: Vec<f64> = Vec::with_capacity(capacity_count);
+ let mut tof_vec: Vec<u32> = Vec::with_capacity(capacity_count);
+ let mut mz_vec: Vec<f64> = Vec::with_capacity(capacity_count);
+ let mut intensity_vec: Vec<f64> = Vec::with_capacity(capacity_count);
+ let mut annotations_vec: Vec<PeakAnnotation> = Vec::with_capacity(capacity_count);
+
+ for ((scan, mz), (intensity, tof, inv_mobility, annotation, count)) in spec_map.iter() {
+ scan_vec.push(*scan);
+ inv_mobility_vec.push(*inv_mobility / *count as f64);
+ tof_vec.push((*tof as f64 / *count as f64) as u32);
+ mz_vec.push(*mz as f64 / 1_000_000.0);
+ intensity_vec.push(*intensity);
+ annotations_vec.push(annotation.clone());
+ }
+
+ assert!(tof_vec.len() == mz_vec.len() && mz_vec.len() == scan_vec.len() && scan_vec.len() == inv_mobility_vec.len() && inv_mobility_vec.len() == intensity_vec.len() && intensity_vec.len() == annotations_vec.len());
+
+ TimsFrameAnnotated {
+ frame_id: spectra.first().unwrap().frame_id,
+ retention_time: spectra.first().unwrap().retention_time,
+ ms_type: spectra.first().unwrap().ms_type.clone(),
+ tof: tof_vec,
+ mz: mz_vec,
+ scan: scan_vec,
+ inv_mobility: inv_mobility_vec,
+ intensity: intensity_vec,
+ annotations: annotations_vec,
+ }
+ }
+}
+
+impl std::ops::Add for TimsFrameAnnotated {
+ type Output = Self;
+ fn add(self, other: Self) -> Self {
+
+ let quantize = |mz: f64| -> i64 { (mz * 1_000_000.0).round() as i64 };
+ let mut spec_map: BTreeMap<(u32, i64), (f64, u32, f64, PeakAnnotation, i64)> = BTreeMap::new();
+
+ for (scan, mz, tof, inv_mobility, intensity, annotation) in
+ izip!(self.scan.iter(), self.mz.iter(), self.tof.iter(), self.inv_mobility.iter(), self.intensity.iter(), self.annotations.iter()) {
+ let key = (*scan, quantize(*mz));
+ spec_map.insert(key, (*intensity, *tof, *inv_mobility, annotation.clone(), 1));
+ }
+
+ for (scan, mz, tof, inv_mobility, intensity, annotation) in
+ izip!(other.scan.iter(), other.mz.iter(), other.tof.iter(), other.inv_mobility.iter(), other.intensity.iter(), other.annotations.iter()) {
+ let key = (*scan, quantize(*mz));
+ spec_map.entry(key).and_modify(|e| {
+ e.0 += *intensity;
+ e.1 += *tof;
+ e.2 += *inv_mobility;
+ e.3.contributions.extend(annotation.contributions.clone());
+ e.4 += 1;
+ }).or_insert((*intensity, *tof, *inv_mobility, annotation.clone(), 1));
+ }
+
+ let mut tof_vec: Vec<u32> = Vec::with_capacity(spec_map.len());
+ let mut mz_vec: Vec<f64> = Vec::with_capacity(spec_map.len());
+ let mut scan_vec: Vec<u32> = Vec::with_capacity(spec_map.len());
+ let mut inv_mobility_vec: Vec<f64> = Vec::with_capacity(spec_map.len());
+ let mut intensity_vec: Vec<f64> = Vec::with_capacity(spec_map.len());
+ let mut annotations_vec: Vec<PeakAnnotation> = Vec::with_capacity(spec_map.len());
+
+ for ((scan, mz), (intensity, tof, inv_mobility, annotation, count)) in spec_map.iter() {
+ scan_vec.push(*scan);
+ mz_vec.push(*mz as f64 / 1_000_000.0);
+ intensity_vec.push(*intensity);
+ tof_vec.push((*tof as f64 / *count as f64) as u32);
+ inv_mobility_vec.push(*inv_mobility / *count as f64);
+ annotations_vec.push(annotation.clone());
+ }
+
+ assert!(tof_vec.len() == mz_vec.len() && mz_vec.len() == scan_vec.len() && scan_vec.len() == inv_mobility_vec.len() && inv_mobility_vec.len() == intensity_vec.len() && intensity_vec.len() == annotations_vec.len());
+
+ TimsFrameAnnotated {
+ frame_id: self.frame_id,
+ retention_time: self.retention_time,
+ ms_type: if self.ms_type == other.ms_type { self.ms_type.clone() } else { MsType::Unknown },
+ tof: tof_vec,
+ mz: mz_vec,
+ scan: scan_vec,
+ inv_mobility: inv_mobility_vec,
+ intensity: intensity_vec,
+ annotations: annotations_vec,
+ }
+ }
+}
+1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +50 +51 +52 +53 +54 +55 +56 +57
use std::collections::HashMap;
+
+pub trait TimsTofCollisionEnergy {
+ fn get_collision_energy(&self, frame_id: i32, scan_id: i32) -> f64;
+}
+
+pub struct TimsTofCollisionEnergyDIA {
+ frame_to_window_group: HashMap<i32, i32>,
+ window_group_settings: HashMap<(i32, i32), f64>,
+}
+
+impl TimsTofCollisionEnergyDIA {
+ pub fn new(
+ frame: Vec<i32>,
+ frame_window_group: Vec<i32>,
+ window_group: Vec<i32>,
+ scan_start: Vec<i32>,
+ scan_end: Vec<i32>,
+ collision_energy: Vec<f64>,
+ ) -> Self {
+ // hashmap from frame to window group
+ let frame_to_window_group = frame.iter().zip(frame_window_group.iter()).map(|(&f, &wg)| (f, wg)).collect::<HashMap<i32, i32>>();
+ let mut window_group_settings: HashMap<(i32, i32), f64> = HashMap::new();
+
+ for (index, &wg) in window_group.iter().enumerate() {
+ let scan_start = scan_start[index];
+ let scan_end = scan_end[index];
+ let collision_energy = collision_energy[index];
+
+ for scan in scan_start..scan_end + 1 {
+ let key = (wg, scan);
+ window_group_settings.insert(key, collision_energy);
+ }
+ }
+
+ Self {
+ frame_to_window_group,
+ window_group_settings,
+ }
+ }
+}
+
+impl TimsTofCollisionEnergy for TimsTofCollisionEnergyDIA {
+ fn get_collision_energy(&self, frame_id: i32, scan_id: i32) -> f64 {
+ let window_group = self.frame_to_window_group.get(&frame_id);
+ match window_group {
+ Some(&wg) => {
+ let setting = self.window_group_settings.get(&(wg, scan_id));
+ match setting {
+ Some(&s) => s,
+ None => 0.0,
+ }
+ },
+ None => 0.0,
+ }
+ }
+}
+1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +50 +51 +52 +53 +54 +55 +56 +57 +58 +59 +60 +61 +62 +63 +64 +65 +66 +67 +68 +69 +70 +71 +72 +73 +74 +75 +76 +77 +78 +79 +80 +81 +82 +83 +84 +85 +86 +87 +88 +89 +90 +91 +92 +93 +94 +95 +96 +97 +98 +99 +100 +101 +102 +103 +104 +105 +106 +107 +108 +109 +110 +111 +112 +113 +114 +115 +116 +117 +118 +119 +120 +121 +122 +123 +124 +125 +126 +127 +128 +129 +130 +131 +132 +133 +134 +135 +136 +137 +138 +139 +140 +141 +142 +143 +144 +145 +146 +147 +148 +149 +150 +151 +152 +153 +154 +155 +156 +157 +158 +159 +160 +161 +162 +163 +164 +165 +166 +167 +168 +169 +170 +171 +172 +173 +174 +175 +176 +177 +178 +179 +180 +181 +182 +183 +184 +185 +186 +187 +188 +189 +190 +191 +192 +193 +194 +195 +196 +197 +198 +199 +200 +201 +202 +203 +204 +205 +206 +207 +208 +209 +210 +211 +212 +213 +214 +215 +216 +217 +218 +219 +220 +221 +222 +223 +224 +225 +226 +227 +228 +229 +230 +231 +232 +233 +234 +235 +236 +237 +238 +239 +240 +241 +242 +243 +244 +245 +246 +247 +248 +249 +250 +251 +252 +253 +254 +255 +256 +257 +258 +259 +260 +261 +262 +263 +264 +265 +266 +267 +268 +269 +270 +271 +272 +273 +274 +275 +276 +277 +278 +279 +280 +281 +282 +283 +284 +285 +286 +287 +288 +289 +290 +291 +292 +293 +294 +295 +296 +297 +298 +299 +300 +301 +302 +303 +304 +305 +306 +307 +308 +309 +310 +311 +312 +313 +314 +315 +316 +317 +318 +319 +320 +321 +322 +323 +324 +325 +326 +327 +328 +329 +330 +331 +332 +333 +334 +335 +336 +337 +338 +339 +340 +341 +342 +343 +344 +345 +346 +347 +348 +349 +350 +351 +352 +353 +354 +355 +356 +357 +358 +359 +360 +361 +362 +363 +364 +365 +366 +367 +368 +369 +370 +371 +372 +373 +374 +375 +376 +377 +378 +379 +380 +381 +382 +383 +384 +385 +386 +387 +388 +389 +390 +391 +392 +393 +394 +395 +396 +397 +398 +399 +400 +401 +402 +403 +404 +405 +406 +407 +408 +409 +410 +411 +412 +413 +414 +415 +416 +417 +418 +419 +420 +421 +422 +423 +424 +425 +426 +427 +428 +429 +430 +431 +432 +433 +434 +435 +436 +437 +438 +439 +440 +441 +442 +443 +444 +445 +446 +447 +448 +449 +450 +451 +452 +453 +454 +455 +456 +457 +458 +459 +460 +461 +462 +463 +464 +465 +466 +467 +468 +469 +470 +471 +472 +473 +474 +475 +476 +477 +478 +479 +480 +481 +482 +483 +484 +485 +486 +487 +488 +489 +490 +491 +492 +493 +494 +495 +496 +497 +498 +499 +500 +501 +502 +503 +504 +505 +506 +507 +508 +509 +510 +511 +512 +513 +514 +515 +516 +517 +518 +519 +520 +521 +522 +523 +524 +525 +526 +527 +528 +529 +530 +531 +532 +533 +534 +535 +536 +537 +538 +539 +540 +541 +542 +543 +544 +545 +546 +547 +548 +549 +550 +551 +552 +553 +554 +555 +556 +557 +558 +559 +560 +561 +562 +563 +564 +565 +566 +567 +568 +569 +570 +571 +572 +573 +574 +575 +576 +577 +578 +579 +580 +581 +582 +583 +584 +585 +586 +587 +588 +589 +590 +591 +592 +593 +594 +595 +596 +597 +598 +599 +600 +601 +602 +603 +604 +605 +606 +607 +608 +609 +610 +611 +612 +613 +614 +615 +616 +617 +618 +619 +620 +621 +622 +623 +624 +625 +626 +627 +628 +629 +630 +631 +632 +633 +634 +635 +636 +637 +638 +639 +640 +641 +642 +643 +644 +645 +646 +647 +648 +649 +650 +651 +652 +653 +654 +655 +656 +657 +658 +659 +660 +661 +662 +663 +664 +665 +666 +667 +668 +669 +670 +671 +672 +673 +674 +675 +676 +677 +678 +679 +680 +681 +682 +683 +684 +685 +686 +687 +688 +689 +690 +691 +692 +693 +694 +695 +696 +697 +698 +699 +700 +701 +702 +703 +704 +705 +706 +707 +708 +709 +710 +711 +712 +713 +714 +715 +716 +717 +718 +719 +720 +721 +722 +723 +724 +725 +726 +727 +728 +729 +730 +731 +732 +733 +734 +735 +736 +737 +738 +739 +740 +741 +742 +743 +744 +745
use std::fmt;
+use std::collections::BTreeMap;
+use std::fmt::{Formatter};
+use bincode::{Decode, Encode};
+use itertools;
+use itertools::izip;
+use ordered_float::OrderedFloat;
+use rand::Rng;
+use serde::{Deserialize, Serialize};
+use crate::timstof::spectrum::TimsSpectrum;
+use crate::data::spectrum::{MsType, MzSpectrum, IndexedMzSpectrum, Vectorized, ToResolution};
+use crate::simulation::annotation::{PeakAnnotation, TimsFrameAnnotated};
+use crate::timstof::vec_utils::{filter_with_mask, find_sparse_local_maxima_mask};
+
+#[derive(Clone)]
+pub struct RawTimsFrame {
+ pub frame_id: i32,
+ pub retention_time: f64,
+ pub ms_type: MsType,
+ pub scan: Vec<u32>,
+ pub tof: Vec<u32>,
+ pub intensity: Vec<f64>,
+}
+
+impl RawTimsFrame {
+ pub fn smooth(mut self, window: u32) -> Self {
+ let mut smooth_intensities: Vec<f64> = self.intensity.clone();
+ for (current_index, current_tof) in self.tof.iter().enumerate()
+ {
+ let current_intensity: f64 = self.intensity[current_index];
+ for (_next_index, next_tof) in
+ self.tof[current_index + 1..].iter().enumerate()
+ {
+ let next_index: usize = _next_index + current_index + 1;
+ let next_intensity: f64 = self.intensity[next_index];
+ if (next_tof - current_tof) <= window {
+ smooth_intensities[current_index] += next_intensity;
+ smooth_intensities[next_index] += current_intensity;
+ } else {
+ break;
+ }
+ }
+ }
+ self.intensity = smooth_intensities;
+
+ self
+ }
+ pub fn centroid(mut self, window: u32) -> Self {
+ let local_maxima: Vec<bool> = find_sparse_local_maxima_mask(
+ &self.tof,
+ &self.intensity,
+ window,
+ );
+ self.tof = filter_with_mask(&self.tof, &local_maxima);
+ self.intensity = filter_with_mask(&self.intensity, &local_maxima);
+ self.scan = filter_with_mask(&self.scan, &local_maxima);
+ self
+ }
+}
+
+#[derive(Clone, Debug, Default, Serialize, Deserialize, Encode, Decode)]
+pub struct ImsFrame {
+ pub retention_time: f64,
+ pub mobility: Vec<f64>,
+ pub mz: Vec<f64>,
+ pub intensity: Vec<f64>,
+}
+
+impl ImsFrame {
+ /// Creates a new `ImsFrame` instance.
+ ///
+ /// # Arguments
+ ///
+ /// * `retention_time` - The retention time in seconds.
+ /// * `mobility` - A vector of inverse ion mobilities.
+ /// * `mz` - A vector of m/z values.
+ /// * `intensity` - A vector of intensity values.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use mscore::timstof::frame::ImsFrame;
+ ///
+ /// let frame = ImsFrame::new(100.0, vec![0.1, 0.2], vec![100.5, 200.5], vec![50.0, 60.0]);
+ /// ```
+ pub fn new(retention_time: f64, mobility: Vec<f64>, mz: Vec<f64>, intensity: Vec<f64>) -> Self {
+ ImsFrame { retention_time, mobility, mz, intensity }
+ }
+}
+
+impl fmt::Display for ImsFrame {
+ fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
+ write!(f, "ImsFrame(rt: {}, data points: {})", self.retention_time, self.mobility.len())
+ }
+}
+
+#[derive(Clone)]
+pub struct ImsFrameVectorized {
+ pub retention_time: f64,
+ pub mobility: Vec<f64>,
+ pub indices: Vec<i32>,
+ pub values: Vec<f64>,
+ pub resolution: i32,
+}
+
+#[derive(Clone, Debug, Serialize, Deserialize, Encode, Decode)]
+pub struct TimsFrame {
+ pub frame_id: i32,
+ pub ms_type: MsType,
+ pub scan: Vec<i32>,
+ pub tof: Vec<i32>,
+ pub ims_frame: ImsFrame,
+}
+
+impl Default for TimsFrame {
+ fn default() -> Self {
+ TimsFrame {
+ frame_id: 0, // Replace with a suitable default value
+ ms_type: MsType::Unknown,
+ scan: Vec::new(),
+ tof: Vec::new(),
+ ims_frame: ImsFrame::default(), // Uses the default implementation for `ImsFrame`
+ }
+ }
+}
+
+impl TimsFrame {
+ /// Creates a new `TimsFrame` instance.
+ ///
+ /// # Arguments
+ ///
+ /// * `frame_id` - index of frame in TDF raw file.
+ /// * `ms_type` - The type of frame.
+ /// * `retention_time` - The retention time in seconds.
+ /// * `scan` - A vector of scan IDs.
+ /// * `mobility` - A vector of inverse ion mobilities.
+ /// * `tof` - A vector of time-of-flight values.
+ /// * `mz` - A vector of m/z values.
+ /// * `intensity` - A vector of intensity values.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use mscore::data::spectrum::MsType;
+ /// use mscore::timstof::frame::TimsFrame;
+ /// use mscore::timstof::frame::ImsFrame;
+ ///
+ /// let frame = TimsFrame::new(1, MsType::Precursor, 100.0, vec![1, 2], vec![0.1, 0.2], vec![1000, 2000], vec![100.5, 200.5], vec![50.0, 60.0]);
+ /// ```
+ pub fn new(frame_id: i32, ms_type: MsType, retention_time: f64, scan: Vec<i32>, mobility: Vec<f64>, tof: Vec<i32>, mz: Vec<f64>, intensity: Vec<f64>) -> Self {
+ TimsFrame { frame_id, ms_type, scan, tof, ims_frame: ImsFrame { retention_time, mobility, mz, intensity } }
+ }
+
+ ///
+ /// Convert a given TimsFrame to an ImsFrame.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use mscore::data::spectrum::MsType;
+ /// use mscore::timstof::frame::TimsFrame;
+ ///
+ /// let frame = TimsFrame::new(1, MsType::Precursor, 100.0, vec![1, 2], vec![0.1, 0.2], vec![1000, 2000], vec![100.5, 200.5], vec![50.0, 60.0]);
+ /// let ims_spectrum = frame.get_ims_frame();
+ /// ```
+ pub fn get_ims_frame(&self) -> ImsFrame { self.ims_frame.clone() }
+
+ ///
+ /// Convert a given TimsFrame to a vector of TimsSpectrum.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use mscore::data::spectrum::MsType;
+ /// use mscore::timstof::frame::TimsFrame;
+ ///
+ /// let frame = TimsFrame::new(1, MsType::Precursor, 100.0, vec![1, 2], vec![0.1, 0.2], vec![1000, 2000], vec![100.5, 200.5], vec![50.0, 60.0]);
+ /// let tims_spectra = frame.to_tims_spectra();
+ /// ```
+ pub fn to_tims_spectra(&self) -> Vec<TimsSpectrum> {
+ // use a sorted map where scan is used as key
+ let mut spectra = BTreeMap::<i32, (f64, Vec<i32>, Vec<f64>, Vec<f64>)>::new();
+
+ // all indices and the intensity values are sorted by scan and stored in the map as a tuple (mobility, tof, mz, intensity)
+ for (scan, mobility, tof, mz, intensity) in itertools::multizip((
+ &self.scan,
+ &self.ims_frame.mobility,
+ &self.tof,
+ &self.ims_frame.mz,
+ &self.ims_frame.intensity)) {
+ let entry = spectra.entry(*scan).or_insert_with(|| (*mobility, Vec::new(), Vec::new(), Vec::new()));
+ entry.1.push(*tof);
+ entry.2.push(*mz);
+ entry.3.push(*intensity);
+ }
+
+ // convert the map to a vector of TimsSpectrum
+ let mut tims_spectra: Vec<TimsSpectrum> = Vec::new();
+
+ for (scan, (mobility, tof, mz, intensity)) in spectra {
+ let spectrum = IndexedMzSpectrum::new(tof, mz, intensity);
+ tims_spectra.push(TimsSpectrum::new(self.frame_id, scan, self.ims_frame.retention_time, mobility, self.ms_type.clone(), spectrum));
+ }
+
+ tims_spectra
+ }
+
+ ///
+ /// Filter a given TimsFrame by m/z, scan, and intensity.
+ ///
+ /// # Arguments
+ ///
+ /// * `mz_min` - The minimum m/z value.
+ /// * `mz_max` - The maximum m/z value.
+ /// * `scan_min` - The minimum scan value.
+ /// * `scan_max` - The maximum scan value.
+ /// *
+ /// * `intensity_min` - The minimum intensity value.
+ /// * `intensity_max` - The maximum intensity value.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use mscore::data::spectrum::MsType;
+ /// use mscore::timstof::frame::TimsFrame;
+ ///
+ /// let frame = TimsFrame::new(1, MsType::Precursor, 100.0, vec![1, 2], vec![0.1, 0.2], vec![1000, 2000], vec![100.5, 200.5], vec![50.0, 60.0]);
+ /// let filtered_frame = frame.filter_ranged(100.0, 200.0, 1, 2, 0.0, 1.6, 50.0, 60.0);
+ /// ```
+ pub fn filter_ranged(&self, mz_min: f64, mz_max: f64, scan_min: i32, scan_max: i32, inv_mob_min: f64, inv_mob_max: f64, intensity_min: f64, intensity_max: f64) -> TimsFrame {
+
+ let mut scan_vec = Vec::new();
+ let mut mobility_vec = Vec::new();
+ let mut tof_vec = Vec::new();
+ let mut mz_vec = Vec::new();
+ let mut intensity_vec = Vec::new();
+
+ for (mz, intensity, scan, mobility, tof) in itertools::multizip((&self.ims_frame.mz, &self.ims_frame.intensity, &self.scan, &self.ims_frame.mobility, &self.tof)) {
+ if mz >= &mz_min && mz <= &mz_max && scan >= &scan_min && scan <= &scan_max && mobility >= &inv_mob_min && mobility <= &inv_mob_max && intensity >= &intensity_min && intensity <= &intensity_max {
+ scan_vec.push(*scan);
+ mobility_vec.push(*mobility);
+ tof_vec.push(*tof);
+ mz_vec.push(*mz);
+ intensity_vec.push(*intensity);
+ }
+ }
+
+ TimsFrame::new(self.frame_id, self.ms_type.clone(), self.ims_frame.retention_time, scan_vec, mobility_vec, tof_vec, mz_vec, intensity_vec)
+ }
+
+ pub fn to_windows_indexed(&self, window_length: f64, overlapping: bool, min_peaks: usize, min_intensity: f64) -> (Vec<i32>, Vec<i32>, Vec<TimsSpectrum>) {
+ // split by scan (ion mobility)
+ let spectra = self.to_tims_spectra();
+
+ let windows: Vec<_> = spectra.iter().map(|spectrum|
+ spectrum.to_windows(window_length, overlapping, min_peaks, min_intensity))
+ .collect();
+
+ let mut scan_indices = Vec::new();
+
+ for tree in windows.iter() {
+ for (_, window) in tree {
+ scan_indices.push(window.scan)
+ }
+ }
+
+ let mut spectra = Vec::new();
+ let mut window_indices = Vec::new();
+
+ for window in windows {
+ for (i, spectrum) in window {
+ spectra.push(spectrum);
+ window_indices.push(i);
+ }
+ }
+
+ (scan_indices, window_indices, spectra)
+ }
+
+ pub fn to_windows(&self, window_length: f64, overlapping: bool, min_peaks: usize, min_intensity: f64) -> Vec<TimsSpectrum> {
+ let (_, _, widows) = self.to_windows_indexed(window_length, overlapping, min_peaks, min_intensity);
+ widows
+ }
+
+ pub fn from_windows(windows: Vec<TimsSpectrum>) -> TimsFrame {
+
+ let first_window = windows.first().unwrap();
+
+ let mut scan = Vec::new();
+ let mut tof = Vec::new();
+ let mut mzs = Vec::new();
+ let mut intensity = Vec::new();
+ let mut mobility = Vec::new();
+
+ for window in windows.iter() {
+ for (i, mz) in window.spectrum.mz_spectrum.mz.iter().enumerate() {
+ scan.push(window.scan);
+ mobility.push(window.mobility);
+ tof.push(window.spectrum.index[i]);
+ mzs.push(*mz);
+ intensity.push(window.spectrum.mz_spectrum.intensity[i]);
+ }
+ }
+
+ TimsFrame::new(first_window.frame_id, first_window.ms_type.clone(), first_window.retention_time, scan, mobility, tof, mzs, intensity)
+ }
+
+ pub fn from_tims_spectra(spectra: Vec<TimsSpectrum>) -> TimsFrame {
+
+ // Helper to quantize mz to an integer key
+ let quantize = |mz: f64| -> i64 {
+ (mz * 1_000_000.0).round() as i64
+ };
+
+ // Step 1: Get frame coordinates
+ let first_spec = spectra.first();
+ let frame_id = match first_spec {
+ Some(first_spec) => first_spec.frame_id,
+ _ => 1
+ };
+
+ let ms_type = match first_spec {
+ Some(first_spec) => first_spec.ms_type.clone(),
+ _ => MsType::Unknown,
+ };
+
+ let retention_time = match first_spec {
+ Some(first_spec) => first_spec.retention_time,
+ _ => 0.0
+ };
+
+ let mut frame_map: BTreeMap<i32, (f64, BTreeMap<i64, (i32, f64)>)> = BTreeMap::new();
+ let mut capacity_count = 0;
+
+ // Step 2: Group by scan and unroll all spectra to a single vector per scan
+ for spectrum in &spectra {
+ let inv_mobility = spectrum.mobility;
+ let entry = frame_map.entry(spectrum.scan).or_insert_with(|| (inv_mobility, BTreeMap::new()));
+ for (i, mz) in spectrum.spectrum.mz_spectrum.mz.iter().enumerate() {
+ let tof = spectrum.spectrum.index[i];
+ let intensity = spectrum.spectrum.mz_spectrum.intensity[i];
+ entry.1.entry(quantize(*mz)).and_modify(|e| *e = (tof, e.1 + intensity)).or_insert((tof, intensity));
+ capacity_count += 1;
+ }
+ }
+
+ // Step 3: Unroll the map to vectors
+ let mut scan = Vec::with_capacity(capacity_count);
+ let mut mobility = Vec::with_capacity(capacity_count);
+ let mut tof = Vec::with_capacity(capacity_count);
+ let mut mzs = Vec::with_capacity(capacity_count);
+ let mut intensity = Vec::with_capacity(capacity_count);
+
+ for (scan_val, (mobility_val, mz_map)) in frame_map {
+ for (mz_val, (tof_val, intensity_val)) in mz_map {
+ scan.push(scan_val);
+ mobility.push(mobility_val);
+ tof.push(tof_val);
+ mzs.push(mz_val as f64 / 1_000_000.0);
+ intensity.push(intensity_val);
+ }
+ }
+
+ TimsFrame::new(frame_id, ms_type, retention_time, scan, mobility, tof, mzs, intensity)
+ }
+
+ pub fn to_dense_windows(&self, window_length: f64, overlapping: bool, min_peaks: usize, min_intensity: f64, resolution: i32) -> (Vec<f64>, Vec<i32>, Vec<i32>, usize, usize) {
+ let factor = (10.0f64).powi(resolution);
+ let num_colums = ((window_length * factor).round() + 1.0) as usize;
+
+ let (scans, window_indices, spectra) = self.to_windows_indexed(window_length, overlapping, min_peaks, min_intensity);
+ let vectorized_spectra = spectra.iter().map(|spectrum| spectrum.vectorized(resolution)).collect::<Vec<_>>();
+
+ let mut flat_matrix: Vec<f64> = vec![0.0; spectra.len() * num_colums];
+
+ for (row_index, (window_index, spectrum)) in itertools::multizip((&window_indices, vectorized_spectra)).enumerate() {
+
+ let vectorized_window_index = match *window_index >= 0 {
+ true => (*window_index as f64 * window_length * factor).round() as i32,
+ false => (((-1.0 * (*window_index as f64)) * window_length - (0.5 * window_length)) * factor).round() as i32,
+ };
+
+ for (i, index) in spectrum.vector.mz_vector.indices.iter().enumerate() {
+ let zero_based_index = (index - vectorized_window_index) as usize;
+ let current_index = row_index * num_colums + zero_based_index;
+ flat_matrix[current_index] = spectrum.vector.mz_vector.values[i];
+ }
+
+ }
+ (flat_matrix, scans, window_indices, spectra.len(), num_colums)
+ }
+
+
+ pub fn to_indexed_mz_spectrum(&self) -> IndexedMzSpectrum {
+ let mut grouped_data: BTreeMap<i32, Vec<(f64, f64)>> = BTreeMap::new();
+
+ // Group by 'tof' with 'mz' and 'intensity'
+ for (&tof, (&mz, &intensity)) in self.tof.iter().zip(self.ims_frame.mz.iter().zip(self.ims_frame.intensity.iter())) {
+ grouped_data.entry(tof).or_insert_with(Vec::new).push((mz, intensity));
+ }
+
+ let mut index = Vec::new();
+ let mut mz = Vec::new();
+ let mut intensity = Vec::new();
+
+ for (&tof_val, values) in &grouped_data {
+ let sum_intensity: f64 = values.iter().map(|&(_, i)| i).sum();
+ let avg_mz: f64 = values.iter().map(|&(m, _)| m).sum::<f64>() / values.len() as f64;
+
+ index.push(tof_val);
+ mz.push(avg_mz);
+ intensity.push(sum_intensity);
+ }
+
+ IndexedMzSpectrum {
+ index,
+ mz_spectrum: MzSpectrum { mz, intensity },
+ }
+ }
+
+ pub fn generate_random_sample(&self, take_probability: f64) -> TimsFrame {
+ assert!(take_probability >= 0.0 && take_probability <= 1.0);
+
+ let mut rng = rand::thread_rng();
+ let mut scan = Vec::new();
+ let mut mobility = Vec::new();
+ let mut tof = Vec::new();
+ let mut mz = Vec::new();
+ let mut intensity = Vec::new();
+
+ for (s, m, t, mz_val, i) in itertools::multizip((&self.scan, &self.ims_frame.mobility, &self.tof, &self.ims_frame.mz, &self.ims_frame.intensity)) {
+ if rng.gen::<f64>() <= take_probability {
+ scan.push(*s);
+ mobility.push(*m);
+ tof.push(*t);
+ mz.push(*mz_val);
+ intensity.push(*i);
+ }
+ }
+
+ TimsFrame::new(self.frame_id, self.ms_type.clone(), self.ims_frame.retention_time, scan, mobility, tof, mz, intensity)
+ }
+
+ pub fn to_noise_annotated_tims_frame(&self) -> TimsFrameAnnotated {
+ let mut annotations = Vec::with_capacity(self.ims_frame.mz.len());
+ let tof_values = self.tof.clone();
+ let mz_values = self.ims_frame.mz.clone();
+ let scan_values = self.scan.clone();
+ let inv_mobility_values = self.ims_frame.mobility.clone();
+ let intensity_values = self.ims_frame.intensity.clone();
+
+ for intensity in &intensity_values {
+ annotations.push(PeakAnnotation::new_random_noise(*intensity));
+ }
+
+ TimsFrameAnnotated::new(
+ self.frame_id,
+ self.ims_frame.retention_time,
+ self.ms_type.clone(),
+ tof_values.iter().map(|&x| x as u32).collect(),
+ mz_values,
+ scan_values.iter().map(|&x| x as u32).collect(),
+ inv_mobility_values,
+ intensity_values,
+ annotations,
+ )
+ }
+
+ pub fn get_inverse_mobility_along_scan_marginal(&self) -> f64 {
+ let mut marginal_map: BTreeMap<i32, (f64, f64)> = BTreeMap::new();
+ // go over all data points of scan, inv_mob and intensity
+ for (scan, inv_mob, intensity) in izip!(&self.scan, &self.ims_frame.mobility, &self.ims_frame.intensity) {
+ // create a key for the map
+ let key = *scan;
+ // get the entry from the map or insert a new one
+ let entry = marginal_map.entry(key).or_insert((0.0, 0.0));
+ // update the entry with the current intensity adding it to the existing intensity
+ entry.0 += *intensity;
+ // update the entry with the current inverse mobility, overwriting the existing value
+ entry.1 = *inv_mob;
+ }
+
+ // get the inverse mobility with the highest intensity
+ let (_, max_inv_mob) = marginal_map.iter().max_by(|a, b| a.1.0.partial_cmp(&b.1.0).unwrap_or(std::cmp::Ordering::Equal)).unwrap_or((&0, &(0.0, 0.0))).1;
+
+ *max_inv_mob
+ }
+
+ /// Calculate the weighted mean and variance of `inv_mob` values based on their intensities.
+ pub fn get_mobility_mean_and_variance(&self) -> (f64, f64) {
+ let mut mobility_map: BTreeMap<OrderedFloat<f64>, f64> = BTreeMap::new();
+
+ // Aggregate intensity values for each `inv_mob`
+ for (inv_mob, intensity) in izip!(&self.ims_frame.mobility, &self.ims_frame.intensity) {
+ let entry = mobility_map.entry(OrderedFloat(*inv_mob)).or_insert(0.0);
+ *entry += *intensity;
+ }
+
+ // Calculate weighted mean
+ let mut total_weight = 0.0;
+ let mut weighted_sum = 0.0;
+ for (&inv_mob, &intensity) in &mobility_map {
+ total_weight += intensity;
+ weighted_sum += inv_mob.into_inner() * intensity;
+ }
+ let mean = weighted_sum / total_weight;
+
+ // Calculate weighted variance
+ let mut weighted_squared_diff_sum = 0.0;
+ for (&inv_mob, &intensity) in &mobility_map {
+ let diff = inv_mob.into_inner() - mean;
+ weighted_squared_diff_sum += intensity * diff * diff;
+ }
+ let variance = weighted_squared_diff_sum / total_weight;
+
+ (mean, variance)
+ }
+}
+
+struct AggregateData {
+ intensity_sum: f64,
+ ion_mobility_sum: f64,
+ tof_sum: i64,
+ count: i32,
+}
+
+impl std::ops::Add for TimsFrame {
+ type Output = Self;
+ fn add(self, other: Self) -> TimsFrame {
+ let mut combined_map: BTreeMap<(i32, i64), AggregateData> = BTreeMap::new();
+
+ let quantize = |mz: f64| -> i64 {
+ (mz * 1_000_000.0).round() as i64
+ };
+
+ let add_to_map = |map: &mut BTreeMap<(i32, i64), AggregateData>, mz, ion_mobility, tof, scan, intensity| {
+ let key = (scan, quantize(mz));
+ let entry = map.entry(key).or_insert(AggregateData { intensity_sum: 0.0, ion_mobility_sum: 0.0, tof_sum: 0, count: 0 });
+ entry.intensity_sum += intensity;
+ entry.ion_mobility_sum += ion_mobility;
+ entry.tof_sum += tof as i64;
+ entry.count += 1;
+ };
+
+ for (mz, tof, ion_mobility, scan, intensity) in izip!(&self.ims_frame.mz, &self.tof, &self.ims_frame.mobility, &self.scan, &self.ims_frame.intensity) {
+ add_to_map(&mut combined_map, *mz, *ion_mobility, *tof, *scan, *intensity);
+ }
+
+ for (mz, tof, ion_mobility, scan, intensity) in izip!(&other.ims_frame.mz, &other.tof, &other.ims_frame.mobility, &other.scan, &other.ims_frame.intensity) {
+ add_to_map(&mut combined_map, *mz, *ion_mobility, *tof, *scan, *intensity);
+ }
+
+ let mut mz_combined = Vec::new();
+ let mut tof_combined = Vec::new();
+ let mut ion_mobility_combined = Vec::new();
+ let mut scan_combined = Vec::new();
+ let mut intensity_combined = Vec::new();
+
+ for ((scan, quantized_mz), data) in combined_map {
+ mz_combined.push(quantized_mz as f64 / 1_000_000.0);
+ tof_combined.push(data.tof_sum / data.count as i64);
+ ion_mobility_combined.push(data.ion_mobility_sum / data.count as f64);
+ scan_combined.push(scan);
+ intensity_combined.push(data.intensity_sum);
+ }
+
+ let frame = TimsFrame {
+ frame_id: self.frame_id,
+ ms_type: if self.ms_type == other.ms_type { self.ms_type.clone() } else { MsType::Unknown },
+ scan: scan_combined,
+ tof: tof_combined.iter().map(|&x| x as i32).collect(),
+ ims_frame: ImsFrame {
+ retention_time: self.ims_frame.retention_time,
+ mobility: ion_mobility_combined,
+ mz: mz_combined,
+ intensity: intensity_combined,
+ },
+ };
+
+ return frame;
+ }
+}
+
+impl fmt::Display for TimsFrame {
+ fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
+
+ let (mz, i) = self.ims_frame.mz.iter()
+ .zip(&self.ims_frame.intensity)
+ .max_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal))
+ .unwrap();
+
+ write!(f, "TimsFrame(id: {}, type: {}, rt: {}, data points: {}, max by intensity: (mz: {}, intensity: {}))",
+ self.frame_id, self.ms_type, self.ims_frame.retention_time, self.scan.len(), format!("{:.3}", mz), i)
+ }
+}
+
+impl Vectorized<TimsFrameVectorized> for TimsFrame {
+ fn vectorized(&self, resolution: i32) -> TimsFrameVectorized {
+ let binned_frame = self.to_resolution(resolution);
+ // Translate the m/z values into integer indices
+ let indices: Vec<i32> = binned_frame.ims_frame.mz.iter().map(|&mz| (mz * 10f64.powi(resolution)).round() as i32).collect();
+ // Create a vector of values
+ return TimsFrameVectorized {
+ frame_id: self.frame_id,
+ ms_type: self.ms_type.clone(),
+ scan: binned_frame.scan,
+ tof: binned_frame.tof,
+ ims_frame: ImsFrameVectorized {
+ retention_time: binned_frame.ims_frame.retention_time,
+ mobility: binned_frame.ims_frame.mobility,
+ indices,
+ values: binned_frame.ims_frame.intensity,
+ resolution,
+ },
+ };
+ }
+}
+
+///
+/// Convert a given TimsFrame to a vector of TimsSpectrum.
+///
+/// # Arguments
+///
+/// * `resolution` - The resolution to which the m/z values should be rounded.
+///
+/// # Examples
+///
+/// ```
+/// use mscore::data::spectrum::MsType;
+/// use mscore::timstof::frame::TimsFrame;
+/// use mscore::timstof::spectrum::TimsSpectrum;
+/// use mscore::data::spectrum::IndexedMzSpectrum;
+/// use mscore::data::spectrum::ToResolution;
+///
+/// let frame = TimsFrame::new(1, MsType::Precursor, 100.0, vec![1, 2], vec![0.1, 0.2], vec![1000, 2000], vec![100.5, 200.5], vec![50.0, 60.0]);
+/// let low_res_frame = frame.to_resolution(1);
+/// ```
+impl ToResolution for TimsFrame {
+ fn to_resolution(&self, resolution: i32) -> TimsFrame {
+ let factor = (10.0f64).powi(resolution);
+
+ // Using a tuple of (scan, mz_bin) as a key
+ // Value will store sum of intensities, sum of tofs, sum of mobilities and their count for averaging
+ let mut bin_map: BTreeMap<(i32, i32), (f64, f64, f64, i32)> = BTreeMap::new();
+
+ for i in 0..self.ims_frame.mz.len() {
+ let rounded_mz = (self.ims_frame.mz[i] * factor).round() as i32;
+ let scan_val = self.scan[i];
+ let intensity_val = self.ims_frame.intensity[i] as f64;
+ let tof_val = self.tof[i] as f64;
+ let mobility_val = self.ims_frame.mobility[i] as f64;
+
+ let entry = bin_map.entry((scan_val, rounded_mz)).or_insert((0.0, 0.0, 0.0, 0));
+ entry.0 += intensity_val;
+ entry.1 += tof_val;
+ entry.2 += mobility_val;
+ entry.3 += 1;
+ }
+
+ let mut new_mz = Vec::with_capacity(bin_map.len());
+ let mut new_scan = Vec::with_capacity(bin_map.len());
+ let mut new_intensity = Vec::with_capacity(bin_map.len());
+ let mut new_tof = Vec::with_capacity(bin_map.len());
+ let mut new_mobility = Vec::with_capacity(bin_map.len());
+
+ for ((scan, mz_bin), (intensity_sum, tof_sum, mobility_sum, count)) in bin_map {
+ new_mz.push(mz_bin as f64 / factor);
+ new_scan.push(scan);
+ new_intensity.push(intensity_sum);
+ new_tof.push((tof_sum / count as f64) as i32);
+ new_mobility.push(mobility_sum / count as f64);
+ }
+
+ TimsFrame {
+ frame_id: self.frame_id,
+ ms_type: self.ms_type.clone(),
+ scan: new_scan,
+ tof: new_tof,
+ ims_frame: ImsFrame {
+ retention_time: self.ims_frame.retention_time,
+ mobility: new_mobility,
+ mz: new_mz,
+ intensity: new_intensity,
+ },
+ }
+ }
+}
+
+#[derive(Clone)]
+pub struct TimsFrameVectorized {
+ pub frame_id: i32,
+ pub ms_type: MsType,
+ pub scan: Vec<i32>,
+ pub tof: Vec<i32>,
+ pub ims_frame: ImsFrameVectorized,
+}
+
+impl TimsFrameVectorized {
+ pub fn filter_ranged(&self, mz_min: f64, mz_max: f64, scan_min: i32, scan_max: i32, inv_mob_min: f64, inv_mob_max: f64, intensity_min: f64, intensity_max: f64) -> TimsFrameVectorized {
+ let mut scan_vec = Vec::new();
+ let mut mobility_vec = Vec::new();
+ let mut tof_vec = Vec::new();
+ let mut mz_vec = Vec::new();
+ let mut intensity_vec = Vec::new();
+ let mut indices_vec = Vec::new();
+
+ for (mz, intensity, scan, mobility, tof, index) in itertools::multizip((&self.ims_frame.values, &self.ims_frame.values, &self.scan, &self.ims_frame.mobility, &self.tof, &self.ims_frame.indices)) {
+ if mz >= &mz_min && mz <= &mz_max && scan >= &scan_min && scan <= &scan_max && mobility >= &inv_mob_min && mobility <= &inv_mob_max && intensity >= &intensity_min && intensity <= &intensity_max {
+ scan_vec.push(*scan);
+ mobility_vec.push(*mobility);
+ tof_vec.push(*tof);
+ mz_vec.push(*mz);
+ intensity_vec.push(*intensity);
+ indices_vec.push(*index);
+ }
+ }
+
+ TimsFrameVectorized {
+ frame_id: self.frame_id,
+ ms_type: self.ms_type.clone(),
+ scan: scan_vec,
+ tof: tof_vec,
+ ims_frame: ImsFrameVectorized {
+ retention_time: self.ims_frame.retention_time,
+ mobility: mobility_vec,
+ indices: indices_vec,
+ values: mz_vec,
+ resolution: self.ims_frame.resolution,
+ },
+ }
+ }
+}
+
+impl fmt::Display for TimsFrameVectorized {
+ fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
+
+ let (mz, i) = self.ims_frame.values.iter()
+ .zip(&self.ims_frame.values)
+ .max_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal))
+ .unwrap();
+
+ write!(f, "TimsFrame(id: {}, type: {}, rt: {}, data points: {}, max by intensity: (mz: {}, intensity: {}))",
+ self.frame_id, self.ms_type, self.ims_frame.retention_time, self.scan.len(), format!("{:.3}", mz), i)
+ }
+}
+
+1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +50 +51 +52 +53 +54 +55 +56 +57 +58 +59 +60 +61 +62 +63 +64 +65 +66 +67 +68 +69 +70 +71 +72 +73 +74 +75 +76 +77 +78 +79 +80 +81 +82 +83 +84 +85 +86 +87 +88 +89 +90 +91 +92 +93 +94 +95 +96 +97 +98 +99 +100 +101 +102 +103 +104 +105 +106 +107 +108 +109 +110 +111 +112 +113 +114 +115 +116 +117 +118 +119 +120 +121 +122 +123 +124 +125 +126 +127 +128 +129 +130 +131 +132 +133 +134 +135 +136 +137 +138 +139 +140 +141 +142 +143 +144 +145 +146 +147 +148 +149 +150 +151 +152 +153 +154 +155 +156 +157 +158 +159 +160 +161 +162 +163 +164 +165 +166 +167 +168 +169 +170 +171 +172 +173 +174 +175 +176 +177 +178 +179 +180 +181 +182 +183 +184 +185 +186 +187 +188 +189 +190 +191 +192 +193 +194 +195 +196 +197 +198 +199 +200 +201 +202 +203 +204 +205 +206 +207 +208 +209 +210 +211 +212 +213 +214 +215 +216 +217 +218 +219 +220 +221 +222 +223 +224 +225 +226 +227 +228 +229 +230 +231 +232 +233 +234 +235 +236 +237 +238 +239 +240 +241 +242 +243 +244 +245 +246 +247 +248 +249 +250 +251 +252 +253 +254 +255 +256 +257 +258 +259 +260 +261 +262 +263 +264 +265 +266 +267 +268 +269 +270 +271 +272 +273 +274 +275 +276 +277 +278 +279 +280 +281 +282 +283 +284 +285 +286 +287 +288 +289 +290 +291 +292 +293 +294 +295 +296 +297 +298 +299 +300 +301 +302 +303 +304 +305 +306 +307 +308 +309 +310 +311 +312 +313 +314 +315 +316 +317 +318 +319 +320 +321 +322 +323 +324 +325 +326 +327 +328 +329 +330 +331 +332 +333 +334 +335 +336 +337 +338 +339 +340 +341 +342 +343 +344 +345 +346 +347 +348 +349 +350 +351 +352 +353 +354 +355 +356 +357 +358 +359 +360 +361 +362 +363 +364 +365 +366 +367 +368 +369 +370 +371 +372 +373 +374 +375 +376 +377 +378 +379 +380 +381 +382 +383 +384 +385 +386 +387 +388 +389 +390 +391 +392 +393 +394 +395 +396 +397 +398 +399 +400 +401 +402 +403 +404 +405 +406 +407 +408 +409 +410 +411 +412 +413 +414 +415 +416 +417 +418 +419 +420 +421 +422 +423 +424 +425 +426 +427 +428 +429 +430 +431 +432 +433 +434 +435 +436 +437 +438 +439 +440 +441 +442 +443 +444 +445 +446 +447 +448 +449 +450 +451 +452 +453 +454 +455 +456 +457 +458 +459 +460 +461 +462 +463 +464 +465 +466 +467 +468 +469 +470
use std::collections::{HashMap, HashSet};
+use std::f64;
+use std::f64::consts::E;
+use itertools::izip;
+use crate::data::spectrum::MzSpectrum;
+use crate::simulation::annotation::{MzSpectrumAnnotated, TimsFrameAnnotated};
+use crate::timstof::frame::TimsFrame;
+
+/// Sigmoid step function for quadrupole selection simulation
+///
+/// Arguments:
+///
+/// * `x` - mz values
+/// * `up_start` - start of the step
+/// * `up_end` - end of the step
+/// * `k` - steepness of the step
+///
+/// Returns:
+///
+/// * `Vec<f64>` - transmission probability for each mz value
+///
+/// # Examples
+///
+/// ```
+/// use mscore::timstof::quadrupole::smooth_step;
+///
+/// let mz = vec![100.0, 200.0, 300.0];
+/// let transmission = smooth_step(&mz, 150.0, 250.0, 0.5).iter().map(
+/// |&x| (x * 100.0).round() / 100.0).collect::<Vec<f64>>();
+/// assert_eq!(transmission, vec![0.0, 0.5, 1.0]);
+/// ```
+pub fn smooth_step(x: &Vec<f64>, up_start: f64, up_end: f64, k: f64) -> Vec<f64> {
+ let m = (up_start + up_end) / 2.0;
+ x.iter().map(|&xi| 1.0 / (1.0 + E.powf(-k * (xi - m)))).collect()
+}
+
+/// Sigmoide step function for quadrupole selection simulation
+///
+/// Arguments:
+///
+/// * `x` - mz values
+/// * `up_start` - start of the step up
+/// * `up_end` - end of the step up
+/// * `down_start` - start of the step down
+/// * `down_end` - end of the step down
+/// * `k` - steepness of the step
+///
+/// Returns:
+///
+/// * `Vec<f64>` - transmission probability for each mz value
+///
+/// # Examples
+///
+/// ```
+/// use mscore::timstof::quadrupole::smooth_step_up_down;
+///
+/// let mz = vec![100.0, 200.0, 300.0];
+/// let transmission = smooth_step_up_down(&mz, 150.0, 200.0, 250.0, 300.0, 0.5).iter().map(
+/// |&x| (x * 100.0).round() / 100.0).collect::<Vec<f64>>();
+/// assert_eq!(transmission, vec![0.0, 1.0, 0.0]);
+/// ```
+pub fn smooth_step_up_down(x: &Vec<f64>, up_start: f64, up_end: f64, down_start: f64, down_end: f64, k: f64) -> Vec<f64> {
+ let step_up = smooth_step(x, up_start, up_end, k);
+ let step_down = smooth_step(x, down_start, down_end, k);
+ step_up.iter().zip(step_down.iter()).map(|(&u, &d)| u - d).collect()
+}
+
+/// Ion transmission function for quadrupole selection simulation
+///
+/// Arguments:
+///
+/// * `midpoint` - center of the step
+/// * `window_length` - length of the step
+/// * `k` - steepness of the step
+///
+/// Returns:
+///
+/// * `impl Fn(Vec<f64>) -> Vec<f64>` - ion transmission function
+///
+/// # Examples
+///
+/// ```
+/// use mscore::timstof::quadrupole::ion_transition_function_midpoint;
+///
+/// let ion_transmission = ion_transition_function_midpoint(150.0, 50.0, 1.0);
+/// let mz = vec![100.0, 150.0, 170.0];
+/// let transmission = ion_transmission(mz).iter().map(
+/// |&x| (x * 100.0).round() / 100.0).collect::<Vec<f64>>();
+/// assert_eq!(transmission, vec![0.0, 1.0, 1.0]);
+/// ```
+pub fn ion_transition_function_midpoint(midpoint: f64, window_length: f64, k: f64) -> impl Fn(Vec<f64>) -> Vec<f64> {
+ let half_window = window_length / 2.0;
+
+ let up_start = midpoint - half_window - 2.0;
+ let up_end = midpoint - half_window;
+ let down_start = midpoint + half_window;
+ let down_end = midpoint + half_window + 2.0;
+
+ // take a vector of mz values to their transmission probability
+ move |mz: Vec<f64>| -> Vec<f64> {
+ smooth_step_up_down(&mz, up_start, up_end, down_start, down_end, k)
+ }
+}
+
+/// Apply ion transmission function to mz values
+///
+/// Arguments:
+///
+/// * `midpoint` - center of the step
+/// * `window_length` - length of the step
+/// * `k` - steepness of the step
+/// * `mz` - mz values
+///
+/// Returns:
+///
+/// * `Vec<f64>` - transmission probability for each mz value
+///
+/// # Examples
+///
+/// ```
+/// use mscore::timstof::quadrupole::apply_transmission;
+///
+/// let mz = vec![100.0, 150.0, 170.0];
+/// let transmission = apply_transmission(150.0, 50.0, 1.0, mz).iter().map(
+/// |&x| (x * 100.0).round() / 100.0).collect::<Vec<f64>>();
+/// assert_eq!(transmission, vec![0.0, 1.0, 1.0]);
+/// ```
+pub fn apply_transmission(midpoint: f64, window_length: f64, k: f64, mz: Vec<f64>) -> Vec<f64> {
+ ion_transition_function_midpoint(midpoint, window_length, k)(mz)
+}
+
+pub trait IonTransmission {
+ fn apply_transmission(&self, frame_id: i32, scan_id: i32, mz: &Vec<f64>) -> Vec<f64>;
+
+ /// Transmit a spectrum given a frame id and scan id
+ ///
+ /// Arguments:
+ ///
+ /// * `frame_id` - frame id
+ /// * `scan_id` - scan id
+ /// * `spectrum` - MzSpectrum
+ /// * `min_probability` - minimum probability for transmission
+ ///
+ /// Returns:
+ ///
+ /// * `MzSpectrum` - transmitted spectrum
+ ///
+ fn transmit_spectrum(&self, frame_id: i32, scan_id: i32, spectrum: MzSpectrum, min_probability: Option<f64>) -> MzSpectrum {
+
+ let probability_cutoff = min_probability.unwrap_or(0.5);
+ let transmission_probability = self.apply_transmission(frame_id, scan_id, &spectrum.mz);
+
+ let mut filtered_mz = Vec::new();
+ let mut filtered_intensity = Vec::new();
+
+ // zip mz and intensity with transmission probability and filter out all mz values with transmission probability 0.001
+ for (i, (mz, intensity)) in spectrum.mz.iter().zip(spectrum.intensity.iter()).enumerate() {
+ if transmission_probability[i] > probability_cutoff {
+ filtered_mz.push(*mz);
+ filtered_intensity.push(*intensity* transmission_probability[i]);
+ }
+ }
+
+ MzSpectrum {
+ mz: filtered_mz,
+ intensity: filtered_intensity,
+ }
+ }
+
+ /// Transmit an annotated spectrum given a frame id and scan id
+ ///
+ /// Arguments:
+ ///
+ /// * `frame_id` - frame id
+ /// * `scan_id` - scan id
+ /// * `spectrum` - MzSpectrumAnnotated
+ /// * `min_probability` - minimum probability for transmission
+ ///
+ /// Returns:
+ ///
+ /// * `MzSpectrumAnnotated` - transmitted spectrum
+ ///
+ fn transmit_annotated_spectrum(&self, frame_id: i32, scan_id: i32, spectrum: MzSpectrumAnnotated, min_probability: Option<f64>) -> MzSpectrumAnnotated {
+ let probability_cutoff = min_probability.unwrap_or(0.5);
+ let transmission_probability = self.apply_transmission(frame_id, scan_id, &spectrum.mz);
+
+ let mut filtered_mz = Vec::new();
+ let mut filtered_intensity = Vec::new();
+ let mut filtered_annotation = Vec::new();
+
+ // zip mz and intensity with transmission probability and filter out all mz values with transmission probability 0.5
+ for (i, (mz, intensity, annotation)) in izip!(spectrum.mz.iter(), spectrum.intensity.iter(), spectrum.annotations.iter()).enumerate() {
+ if transmission_probability[i] > probability_cutoff {
+ filtered_mz.push(*mz);
+ filtered_intensity.push(*intensity* transmission_probability[i]);
+ filtered_annotation.push(annotation.clone());
+ }
+ }
+
+ MzSpectrumAnnotated {
+ mz: filtered_mz,
+ intensity: filtered_intensity,
+ annotations: filtered_annotation,
+ }
+ }
+
+ fn transmit_ion(&self, frame_ids: Vec<i32>, scan_ids: Vec<i32>, spec: MzSpectrum, min_proba: Option<f64>) -> Vec<Vec<MzSpectrum>> {
+
+ let mut result: Vec<Vec<MzSpectrum>> = Vec::new();
+
+ for frame_id in frame_ids.iter() {
+ let mut frame_result: Vec<MzSpectrum> = Vec::new();
+ for scan_id in scan_ids.iter() {
+ let transmitted_spectrum = self.transmit_spectrum(*frame_id, *scan_id, spec.clone(), min_proba);
+ frame_result.push(transmitted_spectrum);
+ }
+ result.push(frame_result);
+ }
+ result
+ }
+
+ /// Get all ions in a frame that are transmitted
+ ///
+ /// Arguments:
+ ///
+ /// * `frame_id` - frame id
+ /// * `scan_id` - scan id
+ /// * `mz` - mz values
+ /// * `min_proba` - minimum probability for transmission
+ ///
+ /// Returns:
+ ///
+ /// * `HashSet<usize>` - indices of transmitted mz values
+ ///
+ fn get_transmission_set(&self, frame_id: i32, scan_id: i32, mz: &Vec<f64>, min_proba: Option<f64>) -> HashSet<usize> {
+ // go over enumerated mz and push all indices with transmission probability > min_proba to a set
+ let probability_cutoff = min_proba.unwrap_or(0.5);
+ let transmission_probability = self.apply_transmission(frame_id, scan_id, mz);
+ mz.iter().enumerate().filter(|&(i, _)| transmission_probability[i] > probability_cutoff).map(|(i, _)| i).collect()
+ }
+
+ /// Check if all mz values in a given collection are transmitted
+ ///
+ /// Arguments:
+ ///
+ /// * `frame_id` - frame id
+ /// * `scan_id` - scan id
+ /// * `mz` - mz values
+ /// * `min_proba` - minimum probability for transmission
+ ///
+ /// Returns:
+ ///
+ /// * `bool` - true if all mz values are transmitted
+ ///
+ fn all_transmitted(&self, frame_id: i32, scan_id: i32, mz: &Vec<f64>, min_proba: Option<f64>) -> bool {
+ let probability_cutoff = min_proba.unwrap_or(0.5);
+ let transmission_probability = self.apply_transmission(frame_id, scan_id, mz);
+ transmission_probability.iter().all(|&p| p > probability_cutoff)
+ }
+
+ /// Check if a single mz value is transmitted
+ ///
+ /// Arguments:
+ ///
+ /// * `frame_id` - frame id
+ /// * `scan_id` - scan id
+ /// * `mz` - mz value
+ /// * `min_proba` - minimum probability for transmission
+ ///
+ /// Returns:
+ ///
+ /// * `bool` - true if mz value is transmitted
+ ///
+ fn is_transmitted(&self, frame_id: i32, scan_id: i32, mz: f64, min_proba: Option<f64>) -> bool {
+ let probability_cutoff = min_proba.unwrap_or(0.5);
+ let transmission_probability = self.apply_transmission(frame_id, scan_id, &vec![mz]);
+ transmission_probability[0] > probability_cutoff
+ }
+
+ /// Check if any mz value is transmitted, can be used to check if one peak of isotopic envelope is transmitted
+ ///
+ /// Arguments:
+ ///
+ /// * `frame_id` - frame id
+ /// * `scan_id` - scan id
+ /// * `mz` - mz values
+ /// * `min_proba` - minimum probability for transmission
+ ///
+ /// Returns:
+ ///
+ /// * `bool` - true if any mz value is transmitted
+ ///
+ fn any_transmitted(&self, frame_id: i32, scan_id: i32, mz: &Vec<f64>, min_proba: Option<f64>) -> bool {
+ let probability_cutoff = min_proba.unwrap_or(0.5);
+ let transmission_probability = self.apply_transmission(frame_id, scan_id, mz);
+ transmission_probability.iter().any(|&p| p > probability_cutoff)
+ }
+
+ /// Transmit a frame given a diaPASEF transmission layout
+ fn transmit_tims_frame(&self, frame: &TimsFrame, min_probability: Option<f64>) -> TimsFrame {
+ let spectra = frame.to_tims_spectra();
+ let mut filtered_spectra = Vec::new();
+
+ for mut spectrum in spectra {
+ let filtered_spectrum = self.transmit_spectrum(frame.frame_id, spectrum.scan, spectrum.spectrum.mz_spectrum, min_probability);
+ if filtered_spectrum.mz.len() > 0 {
+ spectrum.spectrum.mz_spectrum = filtered_spectrum;
+ filtered_spectra.push(spectrum);
+ }
+ }
+
+ if filtered_spectra.len() > 0 {
+ TimsFrame::from_tims_spectra(filtered_spectra)
+ } else {
+ TimsFrame::new(
+ frame.frame_id,
+ frame.ms_type.clone(),
+ 0.0,
+ vec![],
+ vec![],
+ vec![],
+ vec![],
+ vec![]
+ )
+ }
+ }
+
+ /// Transmit a frame given a diaPASEF transmission layout with annotations
+ ///
+ /// Arguments:
+ ///
+ /// * `frame` - TimsFrameAnnotated
+ /// * `min_probability` - minimum probability for transmission
+ ///
+ /// Returns:
+ ///
+ /// * `TimsFrameAnnotated` - transmitted frame
+ ///
+ fn transmit_tims_frame_annotated(&self, frame: &TimsFrameAnnotated, min_probability: Option<f64>) -> TimsFrameAnnotated {
+ let spectra = frame.to_tims_spectra_annotated();
+ let mut filtered_spectra = Vec::new();
+
+ for mut spectrum in spectra {
+ let filtered_spectrum = self.transmit_annotated_spectrum(frame.frame_id, spectrum.scan as i32, spectrum.spectrum.clone(), min_probability);
+ if filtered_spectrum.mz.len() > 0 {
+ spectrum.spectrum = filtered_spectrum;
+ filtered_spectra.push(spectrum);
+ }
+ }
+
+ if filtered_spectra.len() > 0 {
+ TimsFrameAnnotated::from_tims_spectra_annotated(filtered_spectra)
+ } else {
+ TimsFrameAnnotated::new(
+ frame.frame_id,
+ frame.retention_time,
+ frame.ms_type.clone(),
+ vec![],
+ vec![],
+ vec![],
+ vec![],
+ vec![],
+ vec![]
+ )
+ }
+ }
+
+ fn isotopes_transmitted(&self, frame_id: i32, scan_id: i32, mz_mono: f64, isotopic_envelope: &Vec<f64>, min_probability: Option<f64>) -> (f64, Vec<(f64, f64)>) {
+
+ let probability_cutoff = min_probability.unwrap_or(0.5);
+ let transmission_probability = self.apply_transmission(frame_id, scan_id, &isotopic_envelope);
+ let mut result: Vec<(f64, f64)> = Vec::new();
+
+ for (mz, p) in isotopic_envelope.iter().zip(transmission_probability.iter()) {
+ if *p > probability_cutoff {
+ result.push((*mz - mz_mono, *p));
+ }
+ }
+
+ (mz_mono, result)
+ }
+}
+
+#[derive(Clone, Debug)]
+pub struct TimsTransmissionDIA {
+ frame_to_window_group: HashMap<i32, i32>,
+ window_group_settings: HashMap<(i32, i32), (f64, f64)>,
+ k: f64,
+}
+
+impl TimsTransmissionDIA {
+ pub fn new(
+ frame: Vec<i32>,
+ frame_window_group: Vec<i32>,
+
+ window_group: Vec<i32>,
+ scan_start: Vec<i32>,
+ scan_end: Vec<i32>,
+ isolation_mz: Vec<f64>,
+ isolation_width: Vec<f64>,
+ k: Option<f64>,
+ ) -> Self {
+ // hashmap from frame to window group
+ let frame_to_window_group = frame.iter().zip(frame_window_group.iter()).map(|(&f, &wg)| (f, wg)).collect::<HashMap<i32, i32>>();
+ let mut window_group_settings: HashMap<(i32, i32), (f64, f64)> = HashMap::new();
+
+ for (index, &wg) in window_group.iter().enumerate() {
+ let scan_start = scan_start[index];
+ let scan_end = scan_end[index];
+ let isolation_mz = isolation_mz[index];
+ let isolation_width = isolation_width[index];
+
+ let value = (isolation_mz, isolation_width);
+
+ for scan in scan_start..scan_end + 1 {
+ let key = (wg, scan);
+ window_group_settings.insert(key, value);
+ }
+ }
+
+ Self {
+ frame_to_window_group,
+ window_group_settings,
+ k: k.unwrap_or(2.0),
+ }
+ }
+
+ pub fn frame_to_window_group(&self, frame_id: i32) -> i32 {
+ let window_group = self.frame_to_window_group.get(&frame_id);
+ match window_group {
+ Some(&wg) => wg,
+ None => -1,
+ }
+ }
+
+ pub fn get_setting(&self, window_group: i32, scan_id: i32) -> Option<&(f64, f64)> {
+ let setting = self.window_group_settings.get(&(window_group, scan_id));
+ match setting {
+ Some(s) => Some(s),
+ None => None,
+ }
+ }
+
+ // check if a frame is a precursor frame
+ pub fn is_precursor(&self, frame_id: i32) -> bool {
+ // if frame id is in the hashmap, it is not a precursor frame
+ match self.frame_to_window_group.contains_key(&frame_id) {
+ true => false,
+ false => true,
+ }
+ }
+}
+
+impl IonTransmission for TimsTransmissionDIA {
+ fn apply_transmission(&self, frame_id: i32, scan_id: i32, mz: &Vec<f64>) -> Vec<f64> {
+
+ let setting = self.get_setting(self.frame_to_window_group(frame_id), scan_id);
+ let is_precursor = self.is_precursor(frame_id);
+
+ match setting {
+ Some((isolation_mz, isolation_width)) => {
+ apply_transmission(*isolation_mz, *isolation_width, self.k, mz.clone())
+ },
+ None => match is_precursor {
+ true => vec![1.0; mz.len()],
+ false => vec![0.0; mz.len()],
+ }
+ }
+ }
+}
+1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +50 +51 +52 +53 +54 +55 +56 +57 +58 +59 +60 +61 +62 +63 +64 +65 +66 +67 +68 +69 +70 +71 +72 +73 +74 +75 +76 +77 +78 +79 +80 +81 +82 +83 +84 +85 +86 +87 +88 +89 +90 +91 +92 +93 +94 +95 +96 +97 +98 +99 +100 +101 +102 +103 +104 +105 +106 +107 +108 +109 +110 +111 +112 +113 +114 +115 +116 +117 +118 +119 +120 +121 +122 +123 +124 +125 +126 +127 +128 +129 +130 +131 +132 +133 +134 +135 +136 +137 +138 +139 +140 +141 +142 +143 +144 +145 +146 +147 +148 +149 +150 +151 +152 +153 +154 +155 +156 +157 +158 +159 +160 +161 +162 +163 +164 +165 +166 +167 +168 +169 +170 +171 +172 +173 +174 +175 +176 +177 +178 +179 +180 +181 +182 +183 +184 +185 +186 +187 +188 +189 +190 +191 +192 +193 +194 +195 +196 +197 +198 +199 +200 +201 +202 +203 +204 +205 +206 +207 +208 +209 +210 +211 +212 +213 +214 +215 +216 +217 +218 +219 +220 +221 +222 +223 +224 +225 +226 +227 +228 +229 +230 +231 +232 +233 +234 +235 +236 +237 +238 +239 +240 +241 +242 +243 +244 +245 +246 +247 +248 +249 +250 +251 +252 +253 +254 +255 +256 +257 +258 +259 +260 +261 +262 +263 +264 +265 +266 +267 +268 +269 +270 +271 +272 +273 +274 +275 +276 +277 +278 +279 +280 +281 +282 +283 +284 +285 +286 +287 +288 +289 +290 +291 +292 +293 +294 +295 +296 +297 +298 +299 +300 +301 +302 +303 +304 +305 +306 +307 +308 +309 +310 +311 +312 +313 +314 +315 +316 +317 +318 +319 +320 +321 +322 +323 +324 +325 +326 +327 +328 +329 +330 +331 +332 +333 +334 +335 +336 +337 +338 +339 +340 +341 +342 +343 +344 +345 +346 +347 +348 +349 +350 +351 +352 +353 +354 +355 +356 +357 +358 +359 +360 +361 +362 +363 +364 +365 +366 +367 +368 +369 +370 +371 +372 +373 +374 +375 +376 +377 +378 +379 +380 +381 +382 +383 +384 +385 +386 +387 +388 +389 +390 +391 +392 +393 +394 +395 +396 +397 +398 +399 +400 +401 +402 +403 +404 +405 +406 +407 +408 +409 +410 +411 +412 +413 +414 +415 +416 +417 +418 +419 +420 +421 +422 +423 +424 +425 +426 +427 +428 +429 +430 +431 +432 +433 +434 +435 +436 +437 +438 +439 +440 +441 +442 +443 +444 +445 +446 +447 +448 +449 +450 +451 +452 +453 +454 +455 +456 +457 +458 +459 +460 +461 +462 +463 +464 +465 +466 +467 +468 +469 +470 +471 +472 +473 +474 +475 +476 +477 +478 +479 +480 +481 +482 +483 +484 +485 +486 +487
use rayon::prelude::*;
+use rayon::ThreadPoolBuilder;
+
+use std::collections::BTreeMap;
+use std::collections::BTreeSet;
+use itertools::multizip;
+
+use crate::data::spectrum::{MsType, Vectorized, ToResolution};
+use crate::timstof::spectrum::{TimsSpectrum};
+use crate::timstof::frame::{ImsFrame, TimsFrame, TimsFrameVectorized};
+
+#[derive(Clone)]
+pub struct TimsSlice {
+ pub frames: Vec<TimsFrame>,
+}
+
+impl TimsSlice {
+
+ /// Create a new TimsSlice from a vector of TimsFrames
+ ///
+ /// # Arguments
+ ///
+ /// * `frames` - A vector of TimsFrames
+ ///
+ /// # Returns
+ ///
+ /// * `TimsSlice` - A TimsSlice containing the TimsFrames
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use mscore::timstof::slice::TimsSlice;
+ ///
+ /// let slice = TimsSlice::new(vec![]);
+ /// ```
+ pub fn new(frames: Vec<TimsFrame>) -> Self {
+ TimsSlice { frames }
+ }
+
+ /// Filter the TimsSlice by m/z, scan, and intensity
+ ///
+ /// # Arguments
+ ///
+ /// * `mz_min` - The minimum m/z value
+ /// * `mz_max` - The maximum m/z value
+ /// * `scan_min` - The minimum scan value
+ /// * `scan_max` - The maximum scan value
+ /// * `intensity_min` - The minimum intensity value
+ /// * `intensity_max` - The maximum intensity value
+ /// * `num_threads` - The number of threads to use
+ ///
+ /// # Returns
+ ///
+ /// * `TimsSlice` - A TimsSlice containing only the TimsFrames that pass the filter
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use mscore::timstof::slice::TimsSlice;
+ ///
+ /// let slice = TimsSlice::new(vec![]);
+ /// let filtered_slice = slice.filter_ranged(400.0, 2000.0, 0, 1000, 0.0, 100000.0, 0.0, 1.6, 4);
+ /// ```
+ pub fn filter_ranged(&self, mz_min: f64, mz_max: f64, scan_min: i32, scan_max: i32, inv_mob_min: f64, inv_mob_max: f64, intensity_min: f64, intensity_max: f64, num_threads: usize) -> TimsSlice {
+
+ let pool = ThreadPoolBuilder::new().num_threads(num_threads).build().unwrap(); // Set to the desired number of threads
+
+ // Use the thread pool
+ let filtered_frames = pool.install(|| {
+ let result: Vec<_> = self.frames.par_iter()
+ .map(|f| f.filter_ranged(mz_min, mz_max, scan_min, scan_max, inv_mob_min, inv_mob_max, intensity_min, intensity_max))
+ .collect();
+ result
+ });
+
+ TimsSlice { frames: filtered_frames }
+ }
+
+ pub fn filter_ranged_ms_type_specific(&self,
+ mz_min_ms1: f64,
+ mz_max_ms1: f64,
+ scan_min_ms1: i32,
+ scan_max_ms1: i32,
+ inv_mob_min_ms1: f64,
+ inv_mob_max_ms1: f64,
+ intensity_min_ms1: f64,
+ intensity_max_ms1: f64,
+ mz_min_ms2: f64,
+ mz_max_ms2: f64,
+ scan_min_ms2: i32,
+ scan_max_ms2: i32,
+ inv_mob_min_ms2: f64,
+ inv_mob_max_ms2: f64,
+ intensity_min_ms2: f64,
+ intensity_max_ms2: f64,
+ num_threads: usize) -> TimsSlice {
+
+ let pool = ThreadPoolBuilder::new().num_threads(num_threads).build().unwrap(); // Set to the desired number of threads
+
+ // Use the thread pool
+ let filtered_frames = pool.install(|| {
+ let result: Vec<_> = self.frames.par_iter()
+ .map(|f| match f.ms_type {
+ MsType::Precursor => f.filter_ranged(mz_min_ms1, mz_max_ms1, scan_min_ms1, scan_max_ms1, inv_mob_min_ms1, inv_mob_max_ms1, intensity_min_ms1, intensity_max_ms1),
+ _ => f.filter_ranged(mz_min_ms2, mz_max_ms2, scan_min_ms2, scan_max_ms2, inv_mob_min_ms2, inv_mob_max_ms2, intensity_min_ms2, intensity_max_ms2),
+ })
+ .collect();
+ result
+ });
+
+ TimsSlice { frames: filtered_frames }
+ }
+
+ /// Get a vector of TimsFrames by MsType
+ ///
+ /// # Arguments
+ ///
+ /// * `t` - The MsType to filter by
+ ///
+ /// # Returns
+ ///
+ /// * `TimsSlice` - A TimsSlice containing only the TimsFrames of the specified MsType
+ pub fn get_slice_by_type(&self, t: MsType) -> TimsSlice {
+ let filtered_frames = self.frames.iter()
+ .filter(|f| f.ms_type == t)
+ .map(|f| f.clone())
+ .collect();
+ TimsSlice { frames: filtered_frames }
+ }
+
+ pub fn to_resolution(&self, resolution: i32, num_threads: usize) -> TimsSlice {
+
+ let pool = ThreadPoolBuilder::new().num_threads(num_threads).build().unwrap(); // Set to the desired number of threads
+
+ // Use the thread pool
+ let result_frames = pool.install(|| {
+ let result: Vec<_> = self.frames.par_iter()
+ .map(|f| f.to_resolution(resolution))
+ .collect();
+ result
+ });
+
+ TimsSlice { frames: result_frames }
+ }
+
+ pub fn vectorized(&self, resolution: i32, num_threads: usize) -> TimsSliceVectorized {
+
+ let pool = ThreadPoolBuilder::new().num_threads(num_threads).build().unwrap();
+
+ // Use the thread pool
+ let result_frames = pool.install(|| {
+ let result: Vec<_> = self.frames.par_iter()
+ .map(|f| f.vectorized(resolution))
+ .collect();
+ result
+ });
+
+ let frame_map = get_index_map(&result_frames);
+
+ TimsSliceVectorized { frames: result_frames, frame_map }
+ }
+
+ pub fn from_flat_slice(frame_ids: Vec<i32>,
+ scans: Vec<i32>,
+ tofs: Vec<i32>,
+ retention_times: Vec<f64>,
+ mobilities: Vec<f64>,
+ mzs: Vec<f64>,
+ intensities: Vec<f64>) -> Self {
+
+ let mut frames = Vec::new();
+ let unique_frame_ids: BTreeSet<_> = frame_ids.iter().cloned().collect();
+
+ for frame_id in unique_frame_ids {
+ let indices: Vec<usize> = frame_ids.iter().enumerate().filter(|(_, &x)| x == frame_id).map(|(i, _)| i).collect();
+ let mut scan = Vec::new();
+ let mut tof = Vec::new();
+ let mut retention_time = Vec::new();
+ let mut mobility = Vec::new();
+ let mut mz = Vec::new();
+ let mut intensity = Vec::new();
+
+ for index in indices {
+ scan.push(scans[index]);
+ tof.push(tofs[index]);
+ retention_time.push(retention_times[index]);
+ mobility.push(mobilities[index]);
+ mz.push(mzs[index]);
+ intensity.push(intensities[index]);
+ }
+
+ let ims_frame = ImsFrame {
+ retention_time: retention_time[0],
+ mobility,
+ mz,
+ intensity,
+ };
+
+ let tims_frame = TimsFrame {
+ frame_id,
+ ms_type: MsType::Unknown,
+ scan,
+ tof,
+ ims_frame,
+ };
+
+ frames.push(tims_frame);
+ }
+
+ TimsSlice { frames }
+ }
+
+ pub fn flatten(&self) -> TimsSliceFlat {
+ let mut frame_ids = Vec::new();
+ let mut scans = Vec::new();
+ let mut tofs = Vec::new();
+ let mut retention_times = Vec::new();
+ let mut mobilities = Vec::new();
+ let mut mzs = Vec::new();
+ let mut intensities = Vec::new();
+
+ for frame in &self.frames {
+ let length = frame.scan.len();
+ frame_ids.extend(vec![frame.frame_id; length].into_iter());
+ scans.extend(frame.scan.clone());
+ tofs.extend(frame.tof.clone());
+ retention_times.extend(vec![frame.ims_frame.retention_time; length].into_iter());
+ mobilities.extend(&frame.ims_frame.mobility);
+ mzs.extend(&frame.ims_frame.mz);
+ intensities.extend(&frame.ims_frame.intensity);
+ }
+
+ TimsSliceFlat {
+ frame_ids,
+ scans,
+ tofs,
+ retention_times,
+ mobilities,
+ mzs,
+ intensities,
+ }
+ }
+
+ pub fn to_windows(&self, window_length: f64, overlapping: bool, min_peaks: usize, min_intensity: f64, num_threads: usize) -> Vec<TimsSpectrum> {
+ // Create a thread pool
+ let pool = ThreadPoolBuilder::new().num_threads(num_threads).build().unwrap(); // Set to the desired number of threads
+
+ // Use the thread pool
+ let windows = pool.install(|| {
+ let windows: Vec<_> = self.frames.par_iter()
+ .flat_map( | frame | frame.to_windows(window_length, overlapping, min_peaks, min_intensity))
+ .collect();
+ windows
+ });
+
+ windows
+ }
+
+ pub fn to_dense_windows(&self, window_length: f64, overlapping: bool, min_peaks: usize, min_intensity: f64, resolution: i32, num_threads: usize) -> Vec<(Vec<f64>, Vec<i32>, Vec<i32>, usize, usize)> {
+ let pool = ThreadPoolBuilder::new().num_threads(num_threads).build().unwrap();
+
+ let result = pool.install(|| {
+ let t = self.frames.par_iter().map(|f| f.to_dense_windows(window_length, overlapping, min_peaks, min_intensity, resolution)).collect::<Vec<_>>();
+ t
+ });
+
+ result
+ }
+
+ pub fn to_tims_planes(&self, tof_max_value: i32, num_chunks: i32, num_threads: usize) -> Vec<TimsPlane> {
+
+ let flat_slice = self.flatten();
+
+ let chunk_size = (tof_max_value as f64 / num_chunks as f64) as i32;
+
+ // Calculate range_and_width based on num_chunks and chunk_size
+ let range_and_width: Vec<(i32, i32)> = (1..=num_chunks)
+ .map(|i| (chunk_size * i, i + 2))
+ .collect();
+
+ let mut tof_map: BTreeMap<(i32, i32), (Vec<i32>, Vec<f64>, Vec<i32>, Vec<f64>, Vec<i32>, Vec<f64>, Vec<f64>)> = BTreeMap::new();
+
+ // Iterate over the data points using multizip
+ for (id, rt, scan, mobility, tof, mz, intensity)
+
+ in multizip((flat_slice.frame_ids, flat_slice.retention_times, flat_slice.scans, flat_slice.mobilities, flat_slice.tofs, flat_slice.mzs, flat_slice.intensities)) {
+
+ for &(switch_point, width) in &range_and_width {
+ if tof < switch_point {
+
+ let key = (width, (tof as f64 / width as f64).floor() as i32);
+
+ tof_map.entry(key).or_insert_with(|| (vec![], vec![], vec![], vec![], vec![], vec![], vec![])).0.push(id);
+ tof_map.entry(key).or_insert_with(|| (vec![], vec![], vec![], vec![], vec![], vec![], vec![])).1.push(rt);
+ tof_map.entry(key).or_insert_with(|| (vec![], vec![], vec![], vec![], vec![], vec![], vec![])).2.push(scan);
+ tof_map.entry(key).or_insert_with(|| (vec![], vec![], vec![], vec![], vec![], vec![], vec![])).3.push(mobility);
+ tof_map.entry(key).or_insert_with(|| (vec![], vec![], vec![], vec![], vec![], vec![], vec![])).4.push(tof);
+ tof_map.entry(key).or_insert_with(|| (vec![], vec![], vec![], vec![], vec![], vec![], vec![])).5.push(mz);
+ tof_map.entry(key).or_insert_with(|| (vec![], vec![], vec![], vec![], vec![], vec![], vec![])).6.push(intensity);
+
+ break
+ }
+ }
+ }
+
+ // Create a thread pool with the desired number of threads
+ let pool = ThreadPoolBuilder::new().num_threads(num_threads).build().unwrap();
+
+ let tims_planes: Vec<TimsPlane> = pool.install(|| {
+ tof_map.par_iter()
+ .map(|(key, values)| collapse_entry(key, values))
+ .collect()
+ });
+
+ tims_planes
+ }
+}
+
+#[derive(Clone)]
+pub struct TimsSliceVectorized {
+ pub frames: Vec<TimsFrameVectorized>,
+ pub frame_map: BTreeMap<u32, (Vec<u32>, Vec<u32>, Vec<f32>)>
+}
+
+impl TimsSliceVectorized {
+
+ pub fn filter_ranged(&self, mz_min: f64, mz_max: f64, scan_min: i32, scan_max: i32, inv_mob_min: f64, inv_mob_max: f64, intensity_min: f64, intensity_max: f64, num_threads: usize) -> TimsSliceVectorized {
+
+ let pool = ThreadPoolBuilder::new().num_threads(num_threads).build().unwrap(); // Set to the desired number of threads
+
+ // Use the thread pool
+ let filtered_frames = pool.install(|| {
+ let result: Vec<_> = self.frames.par_iter()
+ .map(|f| f.filter_ranged(mz_min, mz_max, scan_min, scan_max, inv_mob_min, inv_mob_max, intensity_min, intensity_max))
+ .collect();
+ result
+ });
+
+ let frame_map = get_index_map(&filtered_frames);
+
+ TimsSliceVectorized { frames: filtered_frames, frame_map }
+ }
+
+ pub fn get_vectors_at_index(&self, index: u32) -> Option<(Vec<u32>, Vec<u32>, Vec<f32>)> {
+ self.frame_map.get(&index).cloned()
+ }
+
+ pub fn flatten(&self) -> TimsSliceVectorizedFlat {
+ let mut frame_ids = Vec::new();
+ let mut scans = Vec::new();
+ let mut tofs = Vec::new();
+ let mut retention_times = Vec::new();
+ let mut mobilities = Vec::new();
+ let mut indices = Vec::new();
+ let mut intensities = Vec::new();
+
+ for frame in &self.frames {
+ let length = frame.ims_frame.indices.len();
+ frame_ids.extend(vec![frame.frame_id; length].into_iter());
+ scans.extend(frame.scan.clone());
+ tofs.extend(frame.tof.clone());
+ retention_times.extend(vec![frame.ims_frame.retention_time; length].into_iter());
+ mobilities.extend(&frame.ims_frame.mobility);
+ indices.extend(&frame.ims_frame.indices);
+ intensities.extend(&frame.ims_frame.values);
+ }
+
+ TimsSliceVectorizedFlat {
+ frame_ids,
+ scans,
+ tofs,
+ retention_times,
+ mobilities,
+ indices,
+ intensities,
+ }
+ }
+}
+
+fn get_index_map(frames: &Vec<TimsFrameVectorized>) -> BTreeMap<u32, (Vec<u32>, Vec<u32>, Vec<f32>)> {
+ let mut index_map: BTreeMap<u32, Vec<(u32, u32, f32)>> = BTreeMap::new();
+
+ for frame in frames {
+ for (i, index) in frame.ims_frame.indices.iter().enumerate() {
+ let entry = index_map.entry(*index as u32).or_insert_with(|| vec![]);
+ entry.push((frame.frame_id as u32, frame.scan[i] as u32, frame.ims_frame.values[i] as f32));
+ }
+ }
+
+ let mut result_map: BTreeMap<u32, (Vec<u32>, Vec<u32>, Vec<f32>)> = BTreeMap::new();
+
+ for (index, values) in index_map {
+ for (frame_id, scan, intensity) in values {
+ let entry = result_map.entry(index).or_insert_with(|| (vec![], vec![], vec![]));
+ entry.0.push(frame_id);
+ entry.1.push(scan);
+ entry.2.push(intensity);
+ }
+ }
+
+ result_map
+}
+
+#[derive(Clone)]
+pub struct TimsPlane {
+ pub tof_mean: f64,
+ pub tof_std: f64,
+ pub mz_mean: f64,
+ pub mz_std: f64,
+
+ pub frame_id: Vec<i32>,
+ pub retention_time: Vec<f64>,
+ pub scan: Vec<i32>,
+ pub mobility: Vec<f64>,
+ pub intensity: Vec<f64>,
+}
+
+fn collapse_entry(_key: &(i32, i32), values: &(Vec<i32>, Vec<f64>, Vec<i32>, Vec<f64>, Vec<i32>, Vec<f64>, Vec<f64>)) -> TimsPlane {
+
+ let (frame_ids, retention_times, scans, mobilities, tofs, mzs, intensities) = values;
+
+ // 1. Calculate mean and std for tof and mz
+ let tof_mean: f64 = tofs.iter().map(|&x| x as f64).sum::<f64>() / tofs.len() as f64;
+ let tof_std: f64 = (tofs.iter().map(|&x| (x as f64 - tof_mean).powi(2)).sum::<f64>() / tofs.len() as f64).sqrt();
+ let mz_mean: f64 = mzs.iter().map(|&x| x as f64).sum::<f64>() / mzs.len() as f64;
+ let mz_std: f64 = (mzs.iter().map(|&x| (x as f64 - mz_mean).powi(2)).sum::<f64>() / mzs.len() as f64).sqrt();
+
+ // 2. Aggregate data by frame_id and scan using a BTreeMap for sorted order
+ let mut grouped_data: BTreeMap<(i32, i32), (f64, f64, f64)> = BTreeMap::new();
+
+ for (f, r, s, m, i) in multizip((frame_ids, retention_times, scans, mobilities, intensities)) {
+ let key = (*f, *s);
+ let entry = grouped_data.entry(key).or_insert((0.0, 0.0, 0.0)); // (intensity_sum, mobility, retention_time)
+ entry.0 += *i;
+ entry.1 = *m;
+ entry.2 = *r;
+ }
+
+ // Extract data from the grouped_data
+ let mut frame_id = vec![];
+ let mut retention_time = vec![];
+ let mut scan = vec![];
+ let mut mobility = vec![];
+ let mut intensity = vec![];
+
+ for ((f, s), (i, m, r)) in grouped_data {
+ frame_id.push(f);
+ retention_time.push(r);
+ scan.push(s);
+ mobility.push(m);
+ intensity.push(i);
+ }
+
+ TimsPlane {
+ tof_mean,
+ tof_std,
+ mz_mean,
+ mz_std,
+ frame_id,
+ retention_time,
+ scan,
+ mobility,
+ intensity,
+ }
+}
+
+#[derive(Clone, Debug)]
+pub struct TimsSliceFlat {
+ pub frame_ids: Vec<i32>,
+ pub scans: Vec<i32>,
+ pub tofs: Vec<i32>,
+ pub retention_times: Vec<f64>,
+ pub mobilities: Vec<f64>,
+ pub mzs: Vec<f64>,
+ pub intensities: Vec<f64>,
+}
+
+#[derive(Clone, Debug)]
+pub struct TimsSliceVectorizedFlat {
+ pub frame_ids: Vec<i32>,
+ pub scans: Vec<i32>,
+ pub tofs: Vec<i32>,
+ pub retention_times: Vec<f64>,
+ pub mobilities: Vec<f64>,
+ pub indices: Vec<i32>,
+ pub intensities: Vec<f64>,
+}
+1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +50 +51 +52 +53 +54 +55 +56 +57 +58 +59 +60 +61 +62 +63 +64 +65 +66 +67 +68 +69 +70 +71 +72 +73 +74 +75 +76 +77 +78 +79 +80 +81 +82 +83 +84 +85 +86 +87 +88 +89 +90 +91 +92 +93 +94 +95 +96 +97 +98 +99 +100 +101 +102 +103 +104 +105 +106 +107 +108 +109 +110 +111 +112 +113 +114 +115 +116 +117 +118 +119 +120 +121 +122 +123 +124 +125 +126 +127 +128 +129 +130 +131 +132 +133 +134 +135 +136 +137 +138 +139 +140 +141 +142 +143 +144 +145 +146 +147 +148 +149 +150 +151 +152 +153 +154 +155 +156 +157 +158 +159 +160 +161 +162 +163 +164 +165 +166 +167 +168 +169 +170 +171 +172 +173
use std::collections::BTreeMap;
+use std::fmt;
+use std::fmt::{Display, Formatter};
+use crate::data::spectrum::{IndexedMzSpectrum, IndexedMzSpectrumVectorized, MsType, MzSpectrum};
+
+#[derive(Clone)]
+pub struct TimsSpectrumVectorized {
+ pub frame_id: i32,
+ pub scan: i32,
+ pub retention_time: f64,
+ pub mobility: f64,
+ pub ms_type: MsType,
+ pub vector: IndexedMzSpectrumVectorized,
+}
+
+#[derive(Clone, Debug)]
+pub struct TimsSpectrum {
+ pub frame_id: i32,
+ pub scan: i32,
+ pub retention_time: f64,
+ pub mobility: f64,
+ pub ms_type: MsType,
+ pub spectrum: IndexedMzSpectrum,
+}
+
+impl TimsSpectrum {
+ /// Creates a new `TimsSpectrum` instance.
+ ///
+ /// # Arguments
+ ///
+ /// * `frame_id` - index of frame in TDF raw file.
+ /// * `scan_id` - index of scan in TDF raw file.
+ /// * `retention_time` - The retention time in seconds.
+ /// * `mobility` - The inverse ion mobility.
+ /// * `spectrum` - A `TOFMzSpectrum` instance.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use mscore::data::spectrum::{IndexedMzSpectrum, MsType};
+ /// use mscore::timstof::spectrum::{TimsSpectrum};
+ ///
+ /// let spectrum = TimsSpectrum::new(1, 1, 100.0, 0.1, MsType::FragmentDda, IndexedMzSpectrum::new(vec![1000, 2000], vec![100.5, 200.5], vec![50.0, 60.0]));
+ /// ```
+ pub fn new(frame_id: i32, scan_id: i32, retention_time: f64, mobility: f64, ms_type: MsType, spectrum: IndexedMzSpectrum) -> Self {
+ TimsSpectrum { frame_id, scan: scan_id, retention_time, mobility: mobility, ms_type, spectrum }
+ }
+
+ pub fn filter_ranged(&self, mz_min: f64, mz_max: f64, intensity_min: f64, intensity_max: f64) -> Self {
+ let filtered = self.spectrum.filter_ranged(mz_min, mz_max, intensity_min, intensity_max);
+ TimsSpectrum { frame_id: self.frame_id, scan: self.scan, retention_time: self.retention_time, mobility: self.mobility, ms_type: self.ms_type.clone(), spectrum: filtered }
+ }
+
+ pub fn to_resolution(&self, resolution: i32) -> TimsSpectrum {
+ let spectrum = self.spectrum.to_resolution(resolution);
+ TimsSpectrum { frame_id: self.frame_id, scan: self.scan, retention_time: self.retention_time, mobility: self.mobility, ms_type: self.ms_type.clone(), spectrum }
+ }
+
+ pub fn vectorized(&self, resolution: i32) -> TimsSpectrumVectorized {
+ let vector = self.spectrum.vectorized(resolution);
+ TimsSpectrumVectorized { frame_id: self.frame_id, scan: self.scan, retention_time: self.retention_time, mobility: self.mobility, ms_type: self.ms_type.clone(), vector }
+ }
+
+ pub fn to_windows(&self, window_length: f64, overlapping: bool, min_peaks: usize, min_intensity: f64) -> BTreeMap<i32, TimsSpectrum> {
+
+ let mut splits: BTreeMap<i32, TimsSpectrum> = BTreeMap::new();
+
+ for (i, &mz) in self.spectrum.mz_spectrum.mz.iter().enumerate() {
+ let intensity = self.spectrum.mz_spectrum.intensity[i];
+ let tof = self.spectrum.index[i];
+
+ let tmp_key = (mz / window_length).floor() as i32;
+
+ splits.entry(tmp_key).or_insert_with(|| TimsSpectrum::new(self.frame_id, self.scan, self.retention_time, self.mobility, self.ms_type.clone(), IndexedMzSpectrum::new(
+ Vec::new(), Vec::new(), Vec::new()))
+ ).spectrum.mz_spectrum.mz.push(mz);
+
+ splits.entry(tmp_key).or_insert_with(|| TimsSpectrum::new(self.frame_id, self.scan, self.retention_time, self.mobility, self.ms_type.clone(), IndexedMzSpectrum::new(
+ Vec::new(), Vec::new(), Vec::new()))
+ ).spectrum.mz_spectrum.intensity.push(intensity);
+
+ splits.entry(tmp_key).or_insert_with(|| TimsSpectrum::new(self.frame_id, self.scan, self.retention_time, self.mobility, self.ms_type.clone(), IndexedMzSpectrum::new(
+ Vec::new(), Vec::new(), Vec::new()))
+ ).spectrum.index.push(tof);
+ }
+
+ if overlapping {
+ let mut splits_offset = BTreeMap::new();
+
+ for (i, &mmz) in self.spectrum.mz_spectrum.mz.iter().enumerate() {
+ let intensity = self.spectrum.mz_spectrum.intensity[i];
+ let tof = self.spectrum.index[i];
+
+ let tmp_key = -((mmz + window_length / 2.0) / window_length).floor() as i32;
+
+ splits_offset.entry(tmp_key).or_insert_with(|| TimsSpectrum::new(self.frame_id, self.scan, self.retention_time, self.mobility, self.ms_type.clone(), IndexedMzSpectrum::new(
+ Vec::new(), Vec::new(), Vec::new()))
+ ).spectrum.mz_spectrum.mz.push(mmz);
+
+ splits_offset.entry(tmp_key).or_insert_with(|| TimsSpectrum::new(self.frame_id, self.scan, self.retention_time, self.mobility, self.ms_type.clone(), IndexedMzSpectrum::new(
+ Vec::new(), Vec::new(), Vec::new()))
+ ).spectrum.mz_spectrum.intensity.push(intensity);
+
+ splits_offset.entry(tmp_key).or_insert_with(|| TimsSpectrum::new(self.frame_id, self.scan, self.retention_time, self.mobility, self.ms_type.clone(), IndexedMzSpectrum::new(
+ Vec::new(), Vec::new(), Vec::new()))
+ ).spectrum.index.push(tof);
+ }
+
+ for (key, val) in splits_offset {
+ splits.entry(key).or_insert_with(|| TimsSpectrum::new(self.frame_id, self.scan, self.retention_time, self.mobility, self.ms_type.clone(), IndexedMzSpectrum::new(
+ Vec::new(), Vec::new(), Vec::new()))
+ ).spectrum.mz_spectrum.mz.extend(val.spectrum.mz_spectrum.mz);
+
+ splits.entry(key).or_insert_with(|| TimsSpectrum::new(self.frame_id, self.scan, self.retention_time, self.mobility, self.ms_type.clone(), IndexedMzSpectrum::new(
+ Vec::new(), Vec::new(), Vec::new()))
+ ).spectrum.mz_spectrum.intensity.extend(val.spectrum.mz_spectrum.intensity);
+
+ splits.entry(key).or_insert_with(|| TimsSpectrum::new(self.frame_id, self.scan, self.retention_time, self.mobility, self.ms_type.clone(), IndexedMzSpectrum::new(
+ Vec::new(), Vec::new(), Vec::new()))
+ ).spectrum.index.extend(val.spectrum.index);
+ }
+ }
+
+ splits.retain(|_, spectrum| {
+ spectrum.spectrum.mz_spectrum.mz.len() >= min_peaks && spectrum.spectrum.mz_spectrum.intensity.iter().cloned().max_by(
+ |a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal)).unwrap_or(0.0) >= min_intensity
+ });
+
+ splits
+ }
+}
+
+impl std::ops::Add for TimsSpectrum {
+ type Output = Self;
+
+ fn add(self, other: Self) -> TimsSpectrum {
+ assert_eq!(self.frame_id, other.frame_id);
+ assert_eq!(self.scan, other.scan);
+
+ let average_mobility = (self.mobility + other.mobility) / 2.0;
+ let average_retention_time = (self.retention_time + other.retention_time) / 2.0;
+
+ let mut combined_map: BTreeMap<i64, (f64, i32, i32)> = BTreeMap::new();
+ let quantize = |mz: f64| -> i64 { (mz * 1_000_000.0).round() as i64 };
+
+ for ((mz, intensity), index) in self.spectrum.mz_spectrum.mz.iter().zip(self.spectrum.mz_spectrum.intensity.iter()).zip(self.spectrum.index.iter()) {
+ let key = quantize(*mz);
+ combined_map.insert(key, (*intensity, *index, 1)); // Initialize count as 1
+ }
+
+ for ((mz, intensity), index) in other.spectrum.mz_spectrum.mz.iter().zip(other.spectrum.mz_spectrum.intensity.iter()).zip(other.spectrum.index.iter()) {
+ let key = quantize(*mz);
+ combined_map.entry(key).and_modify(|e| {
+ e.0 += *intensity; // Sum intensity
+ e.1 += *index; // Sum index
+ e.2 += 1; // Increment count
+ }).or_insert((*intensity, *index, 1));
+ }
+
+ let mz_combined: Vec<f64> = combined_map.keys().map(|&key| key as f64 / 1_000_000.0).collect();
+ let intensity_combined: Vec<f64> = combined_map.values().map(|(intensity, _, _)| *intensity).collect();
+ let index_combined: Vec<i32> = combined_map.values().map(|(_, index, count)| index / count).collect(); // Average index
+
+ let spectrum = IndexedMzSpectrum { index: index_combined, mz_spectrum: MzSpectrum { mz: mz_combined, intensity: intensity_combined } };
+ TimsSpectrum { frame_id: self.frame_id, scan: self.scan, retention_time: average_retention_time, mobility: average_mobility, ms_type: self.ms_type.clone(), spectrum }
+ }
+}
+
+impl Display for TimsSpectrum {
+ fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
+ write!(f, "TimsSpectrum(frame_id: {}, scan_id: {}, retention_time: {}, mobility: {}, spectrum: {})", self.frame_id, self.scan, self.retention_time, self.mobility, self.spectrum)
+ }
+}
+
+1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33
pub fn find_sparse_local_maxima_mask(
+ indices: &Vec<u32>,
+ values: &Vec<f64>,
+ window: u32,
+) -> Vec<bool> {
+ let mut local_maxima: Vec<bool> = vec![true; indices.len()];
+ for (index, sparse_index) in indices.iter().enumerate() {
+ let current_intensity: f64 = values[index];
+ for (_next_index, next_sparse_index) in
+ indices[index + 1..].iter().enumerate()
+ {
+ let next_index: usize = _next_index + index + 1;
+ let next_value: f64 = values[next_index];
+ if (next_sparse_index - sparse_index) <= window {
+ if current_intensity < next_value {
+ local_maxima[index] = false
+ } else {
+ local_maxima[next_index] = false
+ }
+ } else {
+ break;
+ }
+ }
+ }
+ local_maxima
+}
+
+pub fn filter_with_mask<T: Copy>(vec: &Vec<T>, mask: &Vec<bool>) -> Vec<T> {
+ (0..vec.len())
+ .filter(|&x| mask[x])
+ .map(|x| vec[x])
+ .collect()
+}
+1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +50 +51 +52 +53 +54 +55 +56 +57 +58 +59 +60 +61 +62
use std::fmt::Display;
+
+#[derive(Debug, Clone)]
+pub enum AcquisitionMode {
+ PRECURSOR,
+ DDA,
+ DIA,
+ Unknown,
+}
+
+impl AcquisitionMode {
+ pub fn to_i32(&self) -> i32 {
+ match self {
+ AcquisitionMode::PRECURSOR => 0,
+ AcquisitionMode::DDA => 8,
+ AcquisitionMode::DIA => 9,
+ AcquisitionMode::Unknown => -1,
+ }
+ }
+
+ pub fn to_str(&self) -> &str {
+ match self {
+ AcquisitionMode::PRECURSOR => "PRECURSOR",
+ AcquisitionMode::DDA => "DDA",
+ AcquisitionMode::DIA => "DIA",
+ AcquisitionMode::Unknown => "UNKNOWN",
+ }
+ }
+}
+
+impl Display for AcquisitionMode {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ match self {
+ AcquisitionMode::PRECURSOR => write!(f, "PRECURSOR"),
+ AcquisitionMode::DDA => write!(f, "DDA"),
+ AcquisitionMode::DIA => write!(f, "DIA"),
+ AcquisitionMode::Unknown => write!(f, "UNKNOWN"),
+ }
+ }
+}
+
+impl From<i32> for AcquisitionMode {
+ fn from(item: i32) -> Self {
+ match item {
+ 0 => AcquisitionMode::PRECURSOR,
+ 8 => AcquisitionMode::DDA,
+ 9 => AcquisitionMode::DIA,
+ _ => AcquisitionMode::Unknown,
+ }
+ }
+}
+
+impl From<&str> for AcquisitionMode {
+ fn from(item: &str) -> Self {
+ match item {
+ "PRECURSOR" => AcquisitionMode::PRECURSOR,
+ "DDA" => AcquisitionMode::DDA,
+ "DIA" => AcquisitionMode::DIA,
+ _ => AcquisitionMode::Unknown,
+ }
+ }
+}
+1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +50 +51 +52 +53 +54 +55 +56 +57 +58 +59 +60 +61 +62 +63 +64 +65 +66 +67 +68 +69 +70 +71 +72 +73 +74 +75 +76 +77 +78
use mscore::timstof::frame::{RawTimsFrame, TimsFrame};
+use mscore::timstof::slice::TimsSlice;
+use crate::data::acquisition::AcquisitionMode;
+use crate::data::handle::{IndexConverter, TimsData, TimsDataLoader};
+use crate::data::meta::{read_global_meta_sql, read_meta_data_sql};
+
+pub struct TimsDataset {
+ pub loader: TimsDataLoader,
+}
+
+impl TimsDataset {
+ pub fn new(bruker_lib_path: &str, data_path: &str, in_memory: bool, use_bruker_sdk: bool) -> Self {
+
+ // TODO: error handling
+ let global_meta_data = read_global_meta_sql(data_path).unwrap();
+ let meta_data = read_meta_data_sql(data_path).unwrap();
+
+ let scan_max_index = meta_data.iter().map(|x| x.num_scans).max().unwrap() as u32;
+ let im_lower = global_meta_data.one_over_k0_range_lower;
+ let im_upper = global_meta_data.one_over_k0_range_upper;
+
+ let tof_max_index = global_meta_data.tof_max_index;
+ let mz_lower = global_meta_data.mz_acquisition_range_lower;
+ let mz_upper = global_meta_data.mz_acquisition_range_upper;
+
+ let loader = match in_memory {
+ true => TimsDataLoader::new_in_memory(bruker_lib_path, data_path, use_bruker_sdk, scan_max_index, im_lower, im_upper, tof_max_index, mz_lower, mz_upper),
+ false => TimsDataLoader::new_lazy(bruker_lib_path, data_path, use_bruker_sdk, scan_max_index, im_lower, im_upper, tof_max_index, mz_lower, mz_upper),
+ };
+
+ TimsDataset { loader }
+ }
+}
+
+impl TimsData for TimsDataset {
+ // Get a frame by its id
+ fn get_frame(&self, frame_id: u32) -> TimsFrame {
+ self.loader.get_frame(frame_id)
+ }
+ // Get a raw frame by its id
+ fn get_raw_frame(&self, frame_id: u32) -> RawTimsFrame {
+ self.loader.get_raw_frame(frame_id)
+ }
+ // Get a collection of frames by their ids
+ fn get_slice(&self, frame_ids: Vec<u32>, num_threads: usize) -> TimsSlice {
+ self.loader.get_slice(frame_ids, num_threads)
+ }
+ // Get the acquisition mode, DDA or DIA
+ fn get_acquisition_mode(&self) -> AcquisitionMode {
+ self.loader.get_acquisition_mode().clone()
+ }
+ // Get total number of frames in the dataset
+ fn get_frame_count(&self) -> i32 {
+ self.loader.get_frame_count()
+ }
+ // Get the path to the data
+ fn get_data_path(&self) -> &str {
+ &self.loader.get_data_path()
+ }
+}
+
+impl IndexConverter for TimsDataset {
+ fn tof_to_mz(&self, frame_id: u32, tof_values: &Vec<u32>) -> Vec<f64> {
+ self.loader.get_index_converter().tof_to_mz(frame_id, tof_values)
+ }
+ // convert m/z values to TOF values given a valid data handle and frame id
+ fn mz_to_tof(&self, frame_id: u32, mz_values: &Vec<f64>) -> Vec<u32> {
+ self.loader.get_index_converter().mz_to_tof(frame_id, mz_values)
+ }
+ // convert inverse mobility values to scan values given a valid data handle and frame id
+ fn scan_to_inverse_mobility(&self, frame_id: u32, scan_values: &Vec<u32>) -> Vec<f64> {
+ self.loader.get_index_converter().scan_to_inverse_mobility(frame_id, scan_values)
+ }
+ // convert scan values to inverse mobility values given a valid data handle and frame id
+ fn inverse_mobility_to_scan(&self, frame_id: u32, inverse_mobility_values: &Vec<f64>) -> Vec<u32> {
+ self.loader.get_index_converter().inverse_mobility_to_scan(frame_id, inverse_mobility_values)
+ }
+}
+1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +50 +51 +52 +53 +54 +55 +56 +57 +58 +59 +60 +61 +62 +63 +64 +65 +66 +67 +68 +69 +70 +71 +72 +73 +74 +75 +76 +77 +78 +79 +80 +81 +82 +83 +84 +85 +86 +87 +88 +89 +90 +91 +92 +93 +94 +95 +96 +97 +98 +99 +100 +101 +102 +103 +104 +105 +106 +107 +108 +109 +110 +111 +112 +113 +114 +115 +116 +117 +118 +119 +120 +121 +122 +123 +124 +125 +126 +127 +128 +129 +130 +131 +132 +133 +134 +135 +136 +137 +138 +139
use mscore::timstof::frame::{RawTimsFrame, TimsFrame};
+use mscore::timstof::slice::TimsSlice;
+use rayon::prelude::*;
+use rayon::ThreadPoolBuilder;
+use crate::data::acquisition::AcquisitionMode;
+use crate::data::handle::{IndexConverter, TimsData, TimsDataLoader};
+use crate::data::meta::{DDAPrecursorMeta, PasefMsMsMeta, read_dda_precursor_meta, read_pasef_frame_ms_ms_info, read_global_meta_sql, read_meta_data_sql};
+
+#[derive(Clone)]
+pub struct PASEFDDAFragment {
+ pub frame_id: u32,
+ pub precursor_id: u32,
+ pub collision_energy: f64,
+ pub selected_fragment: TimsFrame,
+}
+
+pub struct TimsDatasetDDA {
+ pub loader: TimsDataLoader,
+}
+
+impl TimsDatasetDDA {
+
+ pub fn new(bruker_lib_path: &str, data_path: &str, in_memory: bool, use_bruker_sdk: bool) -> Self {
+
+ // TODO: error handling
+ let global_meta_data = read_global_meta_sql(data_path).unwrap();
+ let meta_data = read_meta_data_sql(data_path).unwrap();
+
+ let scan_max_index = meta_data.iter().map(|x| x.num_scans).max().unwrap() as u32;
+ let im_lower = global_meta_data.one_over_k0_range_lower;
+ let im_upper = global_meta_data.one_over_k0_range_upper;
+
+ let tof_max_index = global_meta_data.tof_max_index;
+ let mz_lower = global_meta_data.mz_acquisition_range_lower;
+ let mz_upper = global_meta_data.mz_acquisition_range_upper;
+
+ let loader = match in_memory {
+ true => TimsDataLoader::new_in_memory(bruker_lib_path, data_path, use_bruker_sdk, scan_max_index, im_lower, im_upper, tof_max_index, mz_lower, mz_upper),
+ false => TimsDataLoader::new_lazy(bruker_lib_path, data_path, use_bruker_sdk, scan_max_index, im_lower, im_upper, tof_max_index, mz_lower, mz_upper),
+ };
+ TimsDatasetDDA { loader }
+ }
+
+ pub fn get_selected_precursors(&self) -> Vec<DDAPrecursorMeta> {
+ read_dda_precursor_meta(&self.loader.get_data_path()).unwrap()
+ }
+
+ pub fn get_pasef_frame_ms_ms_info(&self) -> Vec<PasefMsMsMeta> {
+ read_pasef_frame_ms_ms_info(&self.loader.get_data_path()).unwrap()
+ }
+
+ /// Get the fragment spectra for all PASEF selected precursors
+ pub fn get_pasef_fragments(&self, num_threads: usize) -> Vec<PASEFDDAFragment> {
+ // extract fragment spectra information
+ let pasef_info = self.get_pasef_frame_ms_ms_info();
+
+ let pool = ThreadPoolBuilder::new().num_threads(num_threads).build().unwrap();
+
+ let filtered_frames = pool.install(|| {
+
+ let result: Vec<_> = pasef_info.par_iter().map(|pasef_info| {
+
+ // get the frame
+ let frame = self.loader.get_frame(pasef_info.frame_id as u32);
+
+ // get five percent of the scan range
+ let scan_margin = (pasef_info.scan_num_end - pasef_info.scan_num_begin) / 20;
+
+ // get the fragment spectrum by scan range
+ let filtered_frame = frame.filter_ranged(
+ 0.0,
+ 2000.0,
+ (pasef_info.scan_num_begin - scan_margin) as i32,
+ (pasef_info.scan_num_end + scan_margin) as i32,
+ 0.0,
+ 5.0,
+ 0.0,
+ 1e9,
+ );
+
+ PASEFDDAFragment {
+ frame_id: pasef_info.frame_id as u32,
+ precursor_id: pasef_info.precursor_id as u32,
+ collision_energy: pasef_info.collision_energy,
+ // flatten the spectrum
+ selected_fragment: filtered_frame,
+ }
+ }).collect();
+
+ result
+ });
+
+ filtered_frames
+ }
+}
+
+impl TimsData for TimsDatasetDDA {
+ fn get_frame(&self, frame_id: u32) -> TimsFrame {
+ self.loader.get_frame(frame_id)
+ }
+
+ fn get_raw_frame(&self, frame_id: u32) -> RawTimsFrame {
+ self.loader.get_raw_frame(frame_id)
+ }
+
+ fn get_slice(&self, frame_ids: Vec<u32>, num_threads: usize) -> TimsSlice {
+ self.loader.get_slice(frame_ids, num_threads)
+ }
+
+ fn get_acquisition_mode(&self) -> AcquisitionMode {
+ self.loader.get_acquisition_mode().clone()
+ }
+
+ fn get_frame_count(&self) -> i32 {
+ self.loader.get_frame_count()
+ }
+
+ fn get_data_path(&self) -> &str {
+ &self.loader.get_data_path()
+ }
+}
+
+impl IndexConverter for TimsDatasetDDA {
+ fn tof_to_mz(&self, frame_id: u32, tof_values: &Vec<u32>) -> Vec<f64> {
+ self.loader.get_index_converter().tof_to_mz(frame_id, tof_values)
+ }
+
+ fn mz_to_tof(&self, frame_id: u32, mz_values: &Vec<f64>) -> Vec<u32> {
+ self.loader.get_index_converter().mz_to_tof(frame_id, mz_values)
+ }
+
+ fn scan_to_inverse_mobility(&self, frame_id: u32, scan_values: &Vec<u32>) -> Vec<f64> {
+ self.loader.get_index_converter().scan_to_inverse_mobility(frame_id, scan_values)
+ }
+
+ fn inverse_mobility_to_scan(&self, frame_id: u32, inverse_mobility_values: &Vec<f64>) -> Vec<u32> {
+ self.loader.get_index_converter().inverse_mobility_to_scan(frame_id, inverse_mobility_values)
+ }
+}
+1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +50 +51 +52 +53 +54 +55 +56 +57 +58 +59 +60 +61 +62 +63 +64 +65 +66 +67 +68 +69 +70 +71 +72 +73 +74 +75 +76 +77 +78 +79 +80 +81 +82 +83 +84 +85 +86 +87 +88 +89 +90 +91 +92 +93 +94 +95 +96 +97 +98 +99 +100 +101 +102 +103 +104 +105 +106 +107 +108 +109 +110 +111 +112 +113 +114 +115 +116 +117 +118 +119 +120 +121 +122 +123 +124 +125 +126 +127 +128 +129 +130 +131 +132 +133 +134
use rand::prelude::IteratorRandom;
+use mscore::timstof::frame::{RawTimsFrame, TimsFrame};
+use mscore::timstof::slice::TimsSlice;
+use crate::data::acquisition::AcquisitionMode;
+use crate::data::handle::{IndexConverter, TimsData, TimsDataLoader};
+use crate::data::meta::{read_dia_ms_ms_info, read_dia_ms_ms_windows, read_global_meta_sql, read_meta_data_sql, DiaMsMisInfo, DiaMsMsWindow, FrameMeta, GlobalMetaData};
+
+pub struct TimsDatasetDIA {
+ pub loader: TimsDataLoader,
+ pub global_meta_data: GlobalMetaData,
+ pub meta_data: Vec<FrameMeta>,
+ pub dia_ms_mis_info: Vec<DiaMsMisInfo>,
+ pub dia_ms_ms_windows: Vec<DiaMsMsWindow>
+}
+
+impl TimsDatasetDIA {
+ pub fn new(bruker_lib_path: &str, data_path: &str, in_memory: bool, use_bruker_sdk: bool) -> Self {
+
+ // TODO: error handling
+ let global_meta_data = read_global_meta_sql(data_path).unwrap();
+ let meta_data = read_meta_data_sql(data_path).unwrap();
+ let dia_ms_mis_info = read_dia_ms_ms_info(data_path).unwrap();
+ let dia_ms_ms_windows = read_dia_ms_ms_windows(data_path).unwrap();
+
+ let scan_max_index = meta_data.iter().map(|x| x.num_scans).max().unwrap() as u32;
+ let im_lower = global_meta_data.one_over_k0_range_lower;
+ let im_upper = global_meta_data.one_over_k0_range_upper;
+
+ let tof_max_index = global_meta_data.tof_max_index;
+ let mz_lower = global_meta_data.mz_acquisition_range_lower;
+ let mz_upper = global_meta_data.mz_acquisition_range_upper;
+
+ let loader = match in_memory {
+ true => TimsDataLoader::new_in_memory(bruker_lib_path, data_path, use_bruker_sdk, scan_max_index, im_lower, im_upper, tof_max_index, mz_lower, mz_upper),
+ false => TimsDataLoader::new_lazy(bruker_lib_path, data_path, use_bruker_sdk, scan_max_index, im_lower, im_upper, tof_max_index, mz_lower, mz_upper),
+ };
+
+ TimsDatasetDIA { loader, global_meta_data, meta_data, dia_ms_mis_info, dia_ms_ms_windows }
+ }
+
+ pub fn sample_precursor_signal(&self, num_frames: usize, max_intensity: f64, take_probability: f64) -> TimsFrame {
+ // get all precursor frames
+ let precursor_frames = self.meta_data.iter().filter(|x| x.ms_ms_type == 0);
+
+ // randomly sample num_frames
+ let mut rng = rand::thread_rng();
+ let mut sampled_frames: Vec<TimsFrame> = Vec::new();
+
+ // go through each frame and sample the data
+ for frame in precursor_frames.choose_multiple(&mut rng, num_frames) {
+ let frame_id = frame.id;
+ let frame_data = self.loader.get_frame(frame_id as u32).filter_ranged(0.0, 2000.0, 0, 1000, 0.0, 5.0, 1.0, max_intensity).generate_random_sample(take_probability);
+ sampled_frames.push(frame_data);
+ }
+
+ // get the first frame
+ let mut sampled_frame = sampled_frames.remove(0);
+
+ // sum all the other frames to the first frame
+ for frame in sampled_frames {
+ sampled_frame = sampled_frame + frame;
+ }
+
+ sampled_frame
+ }
+
+ pub fn sample_fragment_signal(&self, num_frames: usize, window_group: u32, max_intensity: f64, take_probability: f64) -> TimsFrame {
+ // get all fragment frames, filter by window_group
+ let fragment_frames: Vec<u32> = self.dia_ms_mis_info.iter().filter(|x| x.window_group == window_group).map(|x| x.frame_id).collect();
+
+ // randomly sample num_frames
+ let mut rng = rand::thread_rng();
+ let mut sampled_frames: Vec<TimsFrame> = Vec::new();
+
+ // go through each frame and sample the data
+ for frame_id in fragment_frames.into_iter().choose_multiple(&mut rng, num_frames) {
+ let frame_data = self.loader.get_frame(frame_id).filter_ranged(0.0, 2000.0, 0, 1000, 0.0, 5.0, 1.0, max_intensity).generate_random_sample(take_probability);
+ sampled_frames.push(frame_data);
+ }
+
+ // get the first frame
+ let mut sampled_frame = sampled_frames.remove(0);
+
+ // sum all the other frames to the first frame
+ for frame in sampled_frames {
+ sampled_frame = sampled_frame + frame;
+ }
+
+ sampled_frame
+ }
+}
+
+impl TimsData for TimsDatasetDIA {
+ fn get_frame(&self, frame_id: u32) -> TimsFrame {
+ self.loader.get_frame(frame_id)
+ }
+
+ fn get_raw_frame(&self, frame_id: u32) -> RawTimsFrame {
+ self.loader.get_raw_frame(frame_id)
+ }
+
+ fn get_slice(&self, frame_ids: Vec<u32>, num_threads: usize) -> TimsSlice {
+ self.loader.get_slice(frame_ids, num_threads)
+ }
+ fn get_acquisition_mode(&self) -> AcquisitionMode {
+ self.loader.get_acquisition_mode().clone()
+ }
+
+ fn get_frame_count(&self) -> i32 {
+ self.loader.get_frame_count()
+ }
+
+ fn get_data_path(&self) -> &str {
+ &self.loader.get_data_path()
+ }
+}
+
+impl IndexConverter for TimsDatasetDIA {
+ fn tof_to_mz(&self, frame_id: u32, tof_values: &Vec<u32>) -> Vec<f64> {
+ self.loader.get_index_converter().tof_to_mz(frame_id, tof_values)
+ }
+
+ fn mz_to_tof(&self, frame_id: u32, mz_values: &Vec<f64>) -> Vec<u32> {
+ self.loader.get_index_converter().mz_to_tof(frame_id, mz_values)
+ }
+
+ fn scan_to_inverse_mobility(&self, frame_id: u32, scan_values: &Vec<u32>) -> Vec<f64> {
+ self.loader.get_index_converter().scan_to_inverse_mobility(frame_id, scan_values)
+ }
+
+ fn inverse_mobility_to_scan(&self, frame_id: u32, inverse_mobility_values: &Vec<f64>) -> Vec<u32> {
+ self.loader.get_index_converter().inverse_mobility_to_scan(frame_id, inverse_mobility_values)
+ }
+}
+1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +50 +51 +52 +53 +54 +55 +56 +57 +58 +59 +60 +61 +62 +63 +64 +65 +66 +67 +68 +69 +70 +71 +72 +73 +74 +75 +76 +77 +78 +79 +80 +81 +82 +83 +84 +85 +86 +87 +88 +89 +90 +91 +92 +93 +94 +95 +96 +97 +98 +99 +100 +101 +102 +103 +104 +105 +106 +107 +108 +109 +110 +111 +112 +113 +114 +115 +116 +117 +118 +119 +120 +121 +122 +123 +124 +125 +126 +127 +128 +129 +130 +131 +132 +133 +134 +135 +136 +137 +138 +139 +140 +141 +142 +143 +144 +145 +146 +147 +148 +149 +150 +151 +152 +153 +154 +155 +156 +157 +158 +159 +160 +161 +162 +163 +164 +165 +166 +167 +168 +169 +170 +171 +172 +173 +174 +175 +176 +177 +178 +179 +180 +181 +182 +183 +184 +185 +186 +187 +188 +189 +190 +191 +192 +193 +194 +195 +196 +197 +198 +199 +200 +201 +202 +203 +204 +205 +206 +207 +208 +209 +210 +211 +212 +213 +214 +215 +216 +217 +218 +219 +220 +221 +222 +223 +224 +225 +226 +227 +228 +229 +230 +231 +232 +233 +234 +235 +236 +237 +238 +239 +240 +241 +242 +243 +244 +245 +246 +247 +248 +249 +250 +251 +252 +253 +254 +255 +256 +257 +258 +259 +260 +261 +262 +263 +264 +265 +266 +267 +268 +269 +270 +271 +272 +273 +274 +275 +276 +277 +278 +279 +280 +281 +282 +283 +284 +285 +286 +287 +288 +289 +290 +291 +292 +293 +294 +295 +296 +297 +298 +299 +300 +301 +302 +303 +304 +305 +306 +307 +308 +309 +310 +311 +312 +313 +314 +315 +316 +317 +318 +319 +320 +321 +322 +323 +324 +325 +326 +327 +328 +329 +330 +331 +332 +333 +334 +335 +336 +337 +338 +339 +340 +341 +342 +343 +344 +345 +346 +347 +348 +349 +350 +351 +352 +353 +354 +355 +356 +357 +358 +359 +360 +361 +362 +363 +364 +365 +366 +367 +368 +369 +370 +371 +372 +373 +374 +375 +376 +377 +378 +379 +380 +381 +382 +383 +384 +385 +386 +387 +388 +389 +390 +391 +392 +393 +394 +395 +396 +397 +398 +399 +400 +401 +402 +403 +404 +405 +406 +407 +408 +409 +410 +411 +412 +413 +414 +415 +416 +417 +418 +419 +420 +421 +422 +423 +424 +425 +426 +427 +428 +429 +430 +431 +432 +433 +434 +435 +436 +437 +438 +439 +440 +441 +442 +443 +444 +445 +446 +447 +448 +449 +450 +451 +452 +453 +454 +455 +456 +457 +458 +459 +460 +461 +462 +463 +464 +465 +466 +467 +468 +469 +470 +471 +472 +473 +474 +475 +476 +477 +478 +479 +480 +481 +482 +483 +484 +485 +486 +487 +488 +489 +490 +491 +492 +493 +494 +495 +496 +497 +498 +499 +500 +501 +502 +503 +504 +505 +506 +507 +508 +509 +510 +511 +512 +513 +514 +515 +516 +517 +518 +519 +520 +521 +522 +523 +524 +525 +526 +527 +528 +529 +530 +531 +532 +533 +534 +535 +536 +537 +538 +539 +540 +541 +542 +543 +544 +545 +546 +547 +548 +549 +550 +551 +552 +553 +554 +555 +556 +557 +558 +559 +560 +561 +562 +563 +564 +565 +566 +567 +568 +569 +570 +571 +572 +573 +574 +575 +576 +577 +578 +579 +580 +581 +582 +583 +584 +585 +586 +587 +588 +589 +590 +591 +592 +593 +594 +595 +596 +597 +598 +599 +600 +601 +602 +603 +604 +605 +606 +607 +608 +609 +610 +611 +612 +613 +614 +615 +616 +617 +618 +619 +620 +621 +622 +623 +624 +625 +626 +627 +628 +629 +630 +631 +632 +633 +634 +635 +636 +637 +638 +639 +640 +641 +642 +643 +644 +645 +646 +647 +648 +649 +650 +651 +652 +653 +654 +655 +656 +657 +658 +659 +660 +661 +662 +663 +664 +665 +666 +667 +668 +669 +670 +671 +672 +673 +674 +675 +676 +677 +678 +679 +680 +681 +682 +683 +684 +685 +686 +687 +688 +689 +690 +691 +692 +693 +694 +695 +696 +697 +698 +699 +700 +701 +702 +703 +704 +705 +706 +707 +708 +709 +710 +711 +712 +713 +714 +715 +716 +717 +718 +719 +720 +721 +722 +723 +724 +725 +726 +727 +728 +729 +730 +731 +732 +733 +734 +735 +736 +737 +738 +739 +740 +741 +742 +743 +744 +745 +746 +747 +748 +749 +750 +751 +752 +753 +754 +755 +756 +757 +758 +759 +760 +761 +762 +763 +764 +765 +766 +767 +768 +769 +770 +771 +772 +773 +774 +775 +776 +777 +778 +779 +780 +781 +782 +783 +784 +785 +786 +787 +788 +789 +790 +791 +792 +793 +794 +795 +796 +797 +798 +799 +800 +801 +802 +803 +804 +805 +806 +807 +808 +809 +810 +811 +812 +813 +814 +815 +816
use std::fs::File;
+use std::io::{Read, Cursor, SeekFrom, Seek};
+use std::path::PathBuf;
+use byteorder::{LittleEndian, ReadBytesExt};
+use mscore::data::spectrum::MsType;
+use mscore::timstof::frame::{ImsFrame, RawTimsFrame, TimsFrame};
+use mscore::timstof::slice::TimsSlice;
+use crate::data::meta::{FrameMeta, GlobalMetaData, read_global_meta_sql, read_meta_data_sql};
+use crate::data::raw::BrukerTimsDataLibrary;
+use crate::data::utility::{flatten_scan_values, parse_decompressed_bruker_binary_data, zstd_decompress};
+
+use rayon::prelude::*;
+use rayon::ThreadPoolBuilder;
+use crate::data::acquisition::AcquisitionMode;
+
+use std::error::Error;
+
+fn lzf_decompress(data: &[u8], max_output_size: usize) -> Result<Vec<u8>, Box<dyn Error>> {
+ let decompressed_data = lzf::decompress(data, max_output_size)
+ .map_err(|e| format!("LZF decompression failed: {}", e))?;
+ Ok(decompressed_data)
+}
+
+fn parse_decompressed_bruker_binary_type1(
+ decompressed_bytes: &[u8],
+ scan_indices: &mut [i64],
+ tof_indices: &mut [u32],
+ intensities: &mut [u16],
+ scan_start: usize,
+ scan_index: usize,
+) -> usize {
+ // Interpret decompressed_bytes as a slice of i32
+ let int_count = decompressed_bytes.len() / 4;
+ let buffer = unsafe {
+ std::slice::from_raw_parts(decompressed_bytes.as_ptr() as *const i32, int_count)
+ };
+
+ let mut tof_index = 0i32;
+ let mut previous_was_intensity = true;
+ let mut current_index = scan_start;
+
+ for &value in buffer {
+ if value >= 0 {
+ // positive value => intensity
+ if previous_was_intensity {
+ tof_index += 1;
+ }
+ tof_indices[current_index] = tof_index as u32;
+ intensities[current_index] = value as u16;
+ previous_was_intensity = true;
+ current_index += 1;
+ } else {
+ // negative value => indicates a jump in tof_index
+ tof_index -= value; // value is negative, so this adds |value| to tof_index
+ previous_was_intensity = false;
+ }
+ }
+
+ let scan_size = current_index - scan_start;
+ scan_indices[scan_index] = scan_size as i64;
+ scan_size
+}
+
+pub struct TimsRawDataLayout {
+ pub raw_data_path: String,
+ pub global_meta_data: GlobalMetaData,
+ pub frame_meta_data: Vec<FrameMeta>,
+ pub max_scan_count: i64,
+ pub frame_id_ptr: Vec<i64>,
+ pub tims_offset_values: Vec<i64>,
+ pub acquisition_mode: AcquisitionMode,
+}
+
+impl TimsRawDataLayout {
+ pub fn new(data_path: &str) -> Self {
+ // get the global and frame meta data
+ let global_meta_data = read_global_meta_sql(data_path).unwrap();
+ let frame_meta_data = read_meta_data_sql(data_path).unwrap();
+
+ // get the max scan count
+ let max_scan_count = frame_meta_data.iter().map(|x| x.num_scans).max().unwrap();
+
+ let mut frame_id_ptr: Vec<i64> = Vec::new();
+ frame_id_ptr.resize(frame_meta_data.len() + 1, 0);
+
+ // get the frame id_ptr values
+ for (i, row) in frame_meta_data.iter().enumerate() {
+ frame_id_ptr[i + 1] = row.num_peaks + frame_id_ptr[i];
+ }
+
+ // get the tims offset values
+ let tims_offset_values = frame_meta_data.iter().map(|x| x.tims_id).collect::<Vec<i64>>();
+
+ // get the acquisition mode
+ let acquisition_mode = match frame_meta_data[0].scan_mode {
+ 8 => AcquisitionMode::DDA,
+ 9 => AcquisitionMode::DIA,
+ _ => AcquisitionMode::Unknown,
+ };
+
+ TimsRawDataLayout {
+ raw_data_path: data_path.to_string(),
+ global_meta_data,
+ frame_meta_data,
+ max_scan_count,
+ frame_id_ptr,
+ tims_offset_values,
+ acquisition_mode
+ }
+ }
+}
+
+pub trait TimsData {
+ fn get_frame(&self, frame_id: u32) -> TimsFrame;
+ fn get_raw_frame(&self, frame_id: u32) -> RawTimsFrame;
+ fn get_slice(&self, frame_ids: Vec<u32>, num_threads: usize) -> TimsSlice;
+ fn get_acquisition_mode(&self) -> AcquisitionMode;
+ fn get_frame_count(&self) -> i32;
+ fn get_data_path(&self) -> &str;
+}
+
+pub trait IndexConverter {
+ fn tof_to_mz(&self, frame_id: u32, tof_values: &Vec<u32>) -> Vec<f64>;
+ fn mz_to_tof(&self, frame_id: u32, mz_values: &Vec<f64>) -> Vec<u32>;
+ fn scan_to_inverse_mobility(&self, frame_id: u32, scan_values: &Vec<u32>) -> Vec<f64>;
+ fn inverse_mobility_to_scan(&self, frame_id: u32, inverse_mobility_values: &Vec<f64>) -> Vec<u32>;
+}
+
+pub struct BrukerLibTimsDataConverter {
+ pub bruker_lib: BrukerTimsDataLibrary,
+}
+
+impl BrukerLibTimsDataConverter {
+ pub fn new(bruker_lib_path: &str, data_path: &str) -> Self {
+ let bruker_lib = BrukerTimsDataLibrary::new(bruker_lib_path, data_path).unwrap();
+ BrukerLibTimsDataConverter {
+ bruker_lib,
+ }
+ }
+}
+impl IndexConverter for BrukerLibTimsDataConverter {
+ /// translate tof to mz values calling the bruker library
+ ///
+ /// # Arguments
+ ///
+ /// * `frame_id` - A u32 that holds the frame id
+ /// * `tof` - A vector of u32 that holds the tof values
+ ///
+ /// # Returns
+ ///
+ /// * `mz_values` - A vector of f64 that holds the mz values
+ ///
+ fn tof_to_mz(&self, frame_id: u32, tof: &Vec<u32>) -> Vec<f64> {
+ let mut dbl_tofs: Vec<f64> = Vec::new();
+ dbl_tofs.resize(tof.len(), 0.0);
+
+ for (i, &val) in tof.iter().enumerate() {
+ dbl_tofs[i] = val as f64;
+ }
+
+ let mut mz_values: Vec<f64> = Vec::new();
+ mz_values.resize(tof.len(), 0.0);
+
+ self.bruker_lib.tims_index_to_mz(frame_id, &dbl_tofs, &mut mz_values).expect("Bruker binary call failed at: tims_index_to_mz;");
+
+ mz_values
+ }
+
+ fn mz_to_tof(&self, frame_id: u32, mz: &Vec<f64>) -> Vec<u32> {
+ let mut dbl_mz: Vec<f64> = Vec::new();
+ dbl_mz.resize(mz.len(), 0.0);
+
+ for (i, &val) in mz.iter().enumerate() {
+ dbl_mz[i] = val;
+ }
+
+ let mut tof_values: Vec<f64> = Vec::new();
+ tof_values.resize(mz.len(), 0.0);
+
+ self.bruker_lib.tims_mz_to_index(frame_id, &dbl_mz, &mut tof_values).expect("Bruker binary call failed at: tims_mz_to_index;");
+
+ tof_values.iter().map(|&x| x.round() as u32).collect()
+ }
+
+ /// translate scan to inverse mobility values calling the bruker library
+ ///
+ /// # Arguments
+ ///
+ /// * `frame_id` - A u32 that holds the frame id
+ /// * `scan` - A vector of i32 that holds the scan values
+ ///
+ /// # Returns
+ ///
+ /// * `inv_mob` - A vector of f64 that holds the inverse mobility values
+ ///
+ fn scan_to_inverse_mobility(&self, frame_id: u32, scan: &Vec<u32>) -> Vec<f64> {
+ let mut dbl_scans: Vec<f64> = Vec::new();
+ dbl_scans.resize(scan.len(), 0.0);
+
+ for (i, &val) in scan.iter().enumerate() {
+ dbl_scans[i] = val as f64;
+ }
+
+ let mut inv_mob: Vec<f64> = Vec::new();
+ inv_mob.resize(scan.len(), 0.0);
+
+ self.bruker_lib.tims_scan_to_inv_mob(frame_id, &dbl_scans, &mut inv_mob).expect("Bruker binary call failed at: tims_scannum_to_oneoverk0;");
+
+ inv_mob
+ }
+
+ /// translate inverse mobility to scan values calling the bruker library
+ ///
+ /// # Arguments
+ ///
+ /// * `frame_id` - A u32 that holds the frame id
+ /// * `inv_mob` - A vector of f64 that holds the inverse mobility values
+ ///
+ /// # Returns
+ ///
+ /// * `scan_values` - A vector of i32 that holds the scan values
+ ///
+ fn inverse_mobility_to_scan(&self, frame_id: u32, inv_mob: &Vec<f64>) -> Vec<u32> {
+ let mut dbl_inv_mob: Vec<f64> = Vec::new();
+ dbl_inv_mob.resize(inv_mob.len(), 0.0);
+
+ for (i, &val) in inv_mob.iter().enumerate() {
+ dbl_inv_mob[i] = val;
+ }
+
+ let mut scan_values: Vec<f64> = Vec::new();
+ scan_values.resize(inv_mob.len(), 0.0);
+
+ self.bruker_lib.inv_mob_to_tims_scan(frame_id, &dbl_inv_mob, &mut scan_values).expect("Bruker binary call failed at: tims_oneoverk0_to_scannum;");
+
+ scan_values.iter().map(|&x| x.round() as u32).collect()
+ }
+}
+
+pub enum TimsIndexConverter {
+ Simple(SimpleIndexConverter),
+ BrukerLib(BrukerLibTimsDataConverter)
+}
+
+impl IndexConverter for TimsIndexConverter {
+ fn tof_to_mz(&self, frame_id: u32, tof_values: &Vec<u32>) -> Vec<f64> {
+ match self {
+ TimsIndexConverter::Simple(converter) => converter.tof_to_mz(frame_id, tof_values),
+ TimsIndexConverter::BrukerLib(converter) => converter.tof_to_mz(frame_id, tof_values)
+ }
+ }
+
+ fn mz_to_tof(&self, frame_id: u32, mz_values: &Vec<f64>) -> Vec<u32> {
+ match self {
+ TimsIndexConverter::Simple(converter) => converter.mz_to_tof(frame_id, mz_values),
+ TimsIndexConverter::BrukerLib(converter) => converter.mz_to_tof(frame_id, mz_values)
+ }
+ }
+
+ fn scan_to_inverse_mobility(&self, frame_id: u32, scan_values: &Vec<u32>) -> Vec<f64> {
+ match self {
+ TimsIndexConverter::Simple(converter) => converter.scan_to_inverse_mobility(frame_id, scan_values),
+ TimsIndexConverter::BrukerLib(converter) => converter.scan_to_inverse_mobility(frame_id, scan_values)
+ }
+ }
+
+ fn inverse_mobility_to_scan(&self, frame_id: u32, inverse_mobility_values: &Vec<f64>) -> Vec<u32> {
+ match self {
+ TimsIndexConverter::Simple(converter) => converter.inverse_mobility_to_scan(frame_id, inverse_mobility_values),
+ TimsIndexConverter::BrukerLib(converter) => converter.inverse_mobility_to_scan(frame_id, inverse_mobility_values)
+ }
+ }
+}
+
+
+pub struct TimsLazyLoder {
+ pub raw_data_layout: TimsRawDataLayout,
+ pub index_converter: TimsIndexConverter,
+}
+
+impl TimsData for TimsLazyLoder {
+ fn get_frame(&self, frame_id: u32) -> TimsFrame {
+ let frame_index = (frame_id - 1) as usize;
+
+ // turns out, there can be empty frames in the data, check for that, if so, return an empty frame
+ let num_peaks = self.raw_data_layout.frame_meta_data[frame_index].num_peaks;
+
+ if num_peaks == 0 {
+ return TimsFrame {
+ frame_id: frame_id as i32,
+ ms_type: MsType::Unknown,
+ scan: Vec::new(),
+ tof: Vec::new(),
+ ims_frame: ImsFrame { retention_time: self.raw_data_layout.frame_meta_data[(frame_id - 1) as usize].time, mobility: Vec::new(), mz: Vec::new(), intensity: Vec::new() }
+ };
+ }
+
+ let offset = self.raw_data_layout.tims_offset_values[frame_index] as u64;
+
+ let mut file_path = PathBuf::from(&self.raw_data_layout.raw_data_path);
+ file_path.push("analysis.tdf_bin");
+ let mut infile = File::open(&file_path).unwrap();
+
+ infile.seek(SeekFrom::Start(offset)).unwrap();
+
+ let mut bin_buffer = [0u8; 4];
+ infile.read_exact(&mut bin_buffer).unwrap();
+ let bin_size = Cursor::new(bin_buffer).read_i32::<LittleEndian>().unwrap();
+
+ infile.read_exact(&mut bin_buffer).unwrap();
+
+ match self.raw_data_layout.global_meta_data.tims_compression_type {
+ 1 => {
+ let scan_count = self.raw_data_layout.frame_meta_data[frame_index].num_scans as usize;
+ let num_peaks = num_peaks as usize;
+ let compression_offset = 8 + (scan_count + 1) * 4;
+
+ let mut scan_offsets_buffer = vec![0u8; (scan_count + 1) * 4];
+ infile.read_exact(&mut scan_offsets_buffer).unwrap();
+
+ let mut scan_offsets = Vec::with_capacity(scan_count + 1);
+ {
+ let mut rdr = Cursor::new(&scan_offsets_buffer);
+ for _ in 0..(scan_count + 1) {
+ scan_offsets.push(rdr.read_i32::<LittleEndian>().unwrap());
+ }
+ }
+
+ for offs in &mut scan_offsets {
+ *offs -= compression_offset as i32;
+ }
+
+ let remaining_size = (bin_size as usize - compression_offset) as usize;
+ let mut compressed_data = vec![0u8; remaining_size];
+ infile.read_exact(&mut compressed_data).unwrap();
+
+ let mut scan_indices_ = vec![0i64; scan_count];
+ let mut tof_indices_ = vec![0u32; num_peaks];
+ let mut intensities_ = vec![0u16; num_peaks];
+
+ let mut scan_start = 0usize;
+
+ for scan_index in 0..scan_count {
+ let start = scan_offsets[scan_index] as usize;
+ let end = scan_offsets[scan_index + 1] as usize;
+
+ if start == end {
+ continue;
+ }
+
+ let max_output_size = num_peaks * 8;
+ let decompressed_bytes = lzf_decompress(&compressed_data[start..end], max_output_size)
+ .expect("LZF decompression failed.");
+
+ scan_start += parse_decompressed_bruker_binary_type1(
+ &decompressed_bytes,
+ &mut scan_indices_,
+ &mut tof_indices_,
+ &mut intensities_,
+ scan_start,
+ scan_index
+ );
+ }
+
+ // Create a flat scan vector to match what flatten_scan_values expects
+ let mut scan = Vec::with_capacity(num_peaks);
+ {
+ let mut current_scan_index = 0u32;
+ for &size in &scan_indices_ {
+ let sz = size as usize;
+ for _ in 0..sz {
+ scan.push(current_scan_index);
+ }
+ current_scan_index += 1;
+ }
+ }
+
+ let intensity_dbl = intensities_.iter().map(|&x| x as f64).collect::<Vec<f64>>();
+ let tof_i32 = tof_indices_.iter().map(|&x| x as i32).collect::<Vec<i32>>();
+
+ let mz = self.index_converter.tof_to_mz(frame_id, &tof_indices_);
+ let inv_mobility = self.index_converter.scan_to_inverse_mobility(frame_id, &scan);
+
+ let ms_type_raw = self.raw_data_layout.frame_meta_data[frame_index].ms_ms_type;
+ let ms_type = match ms_type_raw {
+ 0 => MsType::Precursor,
+ 8 => MsType::FragmentDda,
+ 9 => MsType::FragmentDia,
+ _ => MsType::Unknown,
+ };
+
+ TimsFrame {
+ frame_id: frame_id as i32,
+ ms_type,
+ scan: scan.iter().map(|&x| x as i32).collect(),
+ tof: tof_i32,
+ ims_frame: ImsFrame {
+ retention_time: self.raw_data_layout.frame_meta_data[frame_index].time,
+ mobility: inv_mobility,
+ mz,
+ intensity: intensity_dbl
+ }
+ }
+ },
+
+ // Existing handling of Type 2
+ 2 => {
+ let mut compressed_data = vec![0u8; bin_size as usize - 8];
+ infile.read_exact(&mut compressed_data).unwrap();
+
+ let decompressed_bytes = zstd_decompress(&compressed_data).unwrap();
+
+ let (scan, tof, intensity) = parse_decompressed_bruker_binary_data(&decompressed_bytes).unwrap();
+ let intensity_dbl = intensity.iter().map(|&x| x as f64).collect();
+ let tof_i32 = tof.iter().map(|&x| x as i32).collect();
+ let scan = flatten_scan_values(&scan, true);
+
+ let mz = self.index_converter.tof_to_mz(frame_id, &tof);
+ let inv_mobility = self.index_converter.scan_to_inverse_mobility(frame_id, &scan);
+
+ let ms_type_raw = self.raw_data_layout.frame_meta_data[frame_index].ms_ms_type;
+
+ let ms_type = match ms_type_raw {
+ 0 => MsType::Precursor,
+ 8 => MsType::FragmentDda,
+ 9 => MsType::FragmentDia,
+ _ => MsType::Unknown,
+ };
+
+ TimsFrame {
+ frame_id: frame_id as i32,
+ ms_type,
+ scan: scan.iter().map(|&x| x as i32).collect(),
+ tof: tof_i32,
+ ims_frame: ImsFrame {
+ retention_time: self.raw_data_layout.frame_meta_data[frame_index].time,
+ mobility: inv_mobility,
+ mz,
+ intensity: intensity_dbl
+ }
+ }
+ },
+
+ _ => {
+ panic!("TimsCompressionType is not 1 or 2.")
+ }
+ }
+ }
+
+ fn get_raw_frame(&self, frame_id: u32) -> RawTimsFrame {
+
+ let frame_index = (frame_id - 1) as usize;
+ let offset = self.raw_data_layout.tims_offset_values[frame_index] as u64;
+
+ // turns out, there can be empty frames in the data, check for that, if so, return an empty frame
+ let num_peaks = self.raw_data_layout.frame_meta_data[frame_index].num_peaks;
+
+ if num_peaks == 0 {
+ return RawTimsFrame {
+ frame_id: frame_id as i32,
+ retention_time: self.raw_data_layout.frame_meta_data[(frame_id - 1) as usize].time,
+ ms_type: MsType::Unknown,
+ scan: Vec::new(),
+ tof: Vec::new(),
+ intensity: Vec::new(),
+ };
+ }
+
+
+ let mut file_path = PathBuf::from(&self.raw_data_layout.raw_data_path);
+ file_path.push("analysis.tdf_bin");
+ let mut infile = File::open(&file_path).unwrap();
+
+ infile.seek(SeekFrom::Start(offset)).unwrap();
+
+ let mut bin_buffer = [0u8; 4];
+ infile.read_exact(&mut bin_buffer).unwrap();
+ let bin_size = Cursor::new(bin_buffer).read_i32::<LittleEndian>().unwrap();
+
+ infile.read_exact(&mut bin_buffer).unwrap();
+
+ match self.raw_data_layout.global_meta_data.tims_compression_type {
+ _ if self.raw_data_layout.global_meta_data.tims_compression_type == 1 => {
+ panic!("Decompression Type1 not implemented.");
+ },
+
+ // Extract from ZSTD compressed binary
+ _ if self.raw_data_layout.global_meta_data.tims_compression_type == 2 => {
+
+ let mut compressed_data = vec![0u8; bin_size as usize - 8];
+ infile.read_exact(&mut compressed_data).unwrap();
+
+ let decompressed_bytes = zstd_decompress(&compressed_data).unwrap();
+
+ let (scan, tof, intensity) = parse_decompressed_bruker_binary_data(&decompressed_bytes).unwrap();
+
+ let ms_type_raw = self.raw_data_layout.frame_meta_data[frame_index].ms_ms_type;
+
+ let ms_type = match ms_type_raw {
+ 0 => MsType::Precursor,
+ 8 => MsType::FragmentDda,
+ 9 => MsType::FragmentDia,
+ _ => MsType::Unknown,
+ };
+
+ let frame = RawTimsFrame {
+ frame_id: frame_id as i32,
+ retention_time: self.raw_data_layout.frame_meta_data[(frame_id - 1) as usize].time,
+ ms_type,
+ scan,
+ tof,
+ intensity: intensity.iter().map(|&x| x as f64).collect(),
+ };
+
+ return frame;
+ },
+
+ // Error on unknown compression algorithm
+ _ => {
+ panic!("TimsCompressionType is not 1 or 2.")
+ }
+ }
+ }
+
+ fn get_slice(&self, frame_ids: Vec<u32>, _num_threads: usize) -> TimsSlice {
+ let result: Vec<TimsFrame> = frame_ids
+ .into_iter()
+ .map(|f| { self.get_frame(f) }).collect();
+
+ TimsSlice { frames: result }
+ }
+
+ fn get_acquisition_mode(&self) -> AcquisitionMode {
+ self.raw_data_layout.acquisition_mode.clone()
+ }
+
+ fn get_frame_count(&self) -> i32 {
+ self.raw_data_layout.frame_meta_data.len() as i32
+ }
+
+ fn get_data_path(&self) -> &str {
+ &self.raw_data_layout.raw_data_path
+ }
+}
+
+pub struct TimsInMemoryLoader {
+ pub raw_data_layout: TimsRawDataLayout,
+ pub index_converter: TimsIndexConverter,
+ compressed_data: Vec<u8>
+}
+
+impl TimsData for TimsInMemoryLoader {
+ fn get_frame(&self, frame_id: u32) -> TimsFrame {
+
+ let raw_frame = self.get_raw_frame(frame_id);
+
+ let raw_frame = match raw_frame.ms_type {
+ MsType::FragmentDda => raw_frame.smooth(1).centroid(1),
+ _ => raw_frame,
+ };
+
+ // if raw frame is empty, return an empty frame
+ if raw_frame.scan.is_empty() {
+ return TimsFrame::default();
+ }
+
+ let tof_i32 = raw_frame.tof.iter().map(|&x| x as i32).collect();
+ let scan = flatten_scan_values(&raw_frame.scan, true);
+
+ let mz = self.index_converter.tof_to_mz(frame_id, &raw_frame.tof);
+ let inverse_mobility = self.index_converter.scan_to_inverse_mobility(frame_id, &scan);
+
+ let ims_frame = ImsFrame {
+ retention_time: raw_frame.retention_time,
+ mz,
+ intensity: raw_frame.intensity,
+ mobility: inverse_mobility,
+ };
+
+ TimsFrame {
+ frame_id: frame_id as i32,
+ ms_type: raw_frame.ms_type,
+ scan: scan.iter().map(|&x| x as i32).collect(),
+ tof: tof_i32,
+ ims_frame,
+ }
+ }
+
+ fn get_raw_frame(&self, frame_id: u32) -> RawTimsFrame {
+ let frame_index = (frame_id - 1) as usize;
+ let offset = self.raw_data_layout.tims_offset_values[frame_index] as usize;
+
+ let bin_size_offset = offset + 4; // Assuming the size is stored immediately before the frame data
+ let bin_size = Cursor::new(&self.compressed_data[offset..bin_size_offset]).read_i32::<LittleEndian>().unwrap();
+
+ let data_offset = bin_size_offset + 4; // Adjust based on actual structure
+ let frame_data = &self.compressed_data[data_offset..data_offset + bin_size as usize - 8];
+
+ let decompressed_bytes = zstd_decompress(&frame_data).unwrap();
+
+ let (scan, tof, intensity) = parse_decompressed_bruker_binary_data(&decompressed_bytes).unwrap();
+
+ let ms_type_raw = self.raw_data_layout.frame_meta_data[frame_index].ms_ms_type;
+
+ let ms_type = match ms_type_raw {
+ 0 => MsType::Precursor,
+ 8 => MsType::FragmentDda,
+ 9 => MsType::FragmentDia,
+ _ => MsType::Unknown,
+ };
+
+ let raw_frame = RawTimsFrame {
+ frame_id: frame_id as i32,
+ retention_time: self.raw_data_layout.frame_meta_data[(frame_id - 1) as usize].time,
+ ms_type,
+ scan,
+ tof,
+ intensity: intensity.iter().map(|&x| x as f64).collect(),
+ };
+
+ raw_frame
+ }
+
+ fn get_slice(&self, frame_ids: Vec<u32>, num_threads: usize) -> TimsSlice {
+ let pool = ThreadPoolBuilder::new().num_threads(num_threads).build().unwrap();
+ let frames = pool.install(|| {
+ frame_ids.par_iter().map(|&frame_id| {
+ self.get_frame(frame_id)
+ }).collect()
+ });
+
+ TimsSlice {
+ frames
+ }
+ }
+
+ fn get_acquisition_mode(&self) -> AcquisitionMode {
+ self.raw_data_layout.acquisition_mode.clone()
+ }
+
+ fn get_frame_count(&self) -> i32 {
+ self.raw_data_layout.frame_meta_data.len() as i32
+ }
+
+ fn get_data_path(&self) -> &str {
+ &self.raw_data_layout.raw_data_path
+ }
+}
+
+pub enum TimsDataLoader {
+ InMemory(TimsInMemoryLoader),
+ Lazy(TimsLazyLoder)
+}
+
+impl TimsDataLoader {
+ pub fn new_lazy(bruker_lib_path: &str, data_path: &str, use_bruker_sdk: bool, scan_max_index: u32, im_lower: f64, im_upper: f64, tof_max_index: u32, mz_lower: f64, mz_upper: f64) -> Self {
+ let raw_data_layout = TimsRawDataLayout::new(data_path);
+
+ let index_converter = match use_bruker_sdk {
+ true => TimsIndexConverter::BrukerLib(BrukerLibTimsDataConverter::new(bruker_lib_path, data_path)),
+ false => TimsIndexConverter::Simple(SimpleIndexConverter::from_boundaries(mz_lower, mz_upper, tof_max_index, im_lower, im_upper, scan_max_index))
+ };
+
+ TimsDataLoader::Lazy(TimsLazyLoder {
+ raw_data_layout,
+ index_converter
+ })
+ }
+
+ pub fn new_in_memory(bruker_lib_path: &str, data_path: &str, use_bruker_sdk: bool, scan_max_index: u32, im_lower: f64, im_upper: f64, tof_max_index: u32, mz_lower: f64, mz_upper: f64) -> Self {
+ let raw_data_layout = TimsRawDataLayout::new(data_path);
+
+ let index_converter = match use_bruker_sdk {
+ true => TimsIndexConverter::BrukerLib(BrukerLibTimsDataConverter::new(bruker_lib_path, data_path)),
+ false => TimsIndexConverter::Simple(SimpleIndexConverter::from_boundaries(mz_lower, mz_upper, tof_max_index, im_lower, im_upper, scan_max_index))
+ };
+
+ let mut file_path = PathBuf::from(data_path);
+ file_path.push("analysis.tdf_bin");
+ let mut infile = File::open(file_path).unwrap();
+ let mut data = Vec::new();
+ infile.read_to_end(&mut data).unwrap();
+
+ TimsDataLoader::InMemory(TimsInMemoryLoader {
+ raw_data_layout,
+ index_converter,
+ compressed_data: data
+ })
+ }
+ pub fn get_index_converter(&self) -> &dyn IndexConverter {
+ match self {
+ TimsDataLoader::InMemory(loader) => &loader.index_converter,
+ TimsDataLoader::Lazy(loader) => &loader.index_converter
+ }
+ }
+}
+
+impl TimsData for TimsDataLoader {
+ fn get_frame(&self, frame_id: u32) -> TimsFrame {
+ match self {
+ TimsDataLoader::InMemory(loader) => loader.get_frame(frame_id),
+ TimsDataLoader::Lazy(loader) => loader.get_frame(frame_id)
+ }
+ }
+ fn get_raw_frame(&self, frame_id: u32) -> RawTimsFrame {
+ match self {
+ TimsDataLoader::InMemory(loader) => loader.get_raw_frame(frame_id),
+ TimsDataLoader::Lazy(loader) => loader.get_raw_frame(frame_id)
+ }
+ }
+
+ fn get_slice(&self, frame_ids: Vec<u32>, num_threads: usize) -> TimsSlice {
+ match self {
+ TimsDataLoader::InMemory(loader) => loader.get_slice(frame_ids, num_threads),
+ TimsDataLoader::Lazy(loader) => loader.get_slice(frame_ids, num_threads)
+ }
+ }
+
+ fn get_acquisition_mode(&self) -> AcquisitionMode {
+ match self {
+ TimsDataLoader::InMemory(loader) => loader.get_acquisition_mode(),
+ TimsDataLoader::Lazy(loader) => loader.get_acquisition_mode()
+ }
+ }
+
+ fn get_frame_count(&self) -> i32 {
+ match self {
+ TimsDataLoader::InMemory(loader) => loader.get_frame_count(),
+ TimsDataLoader::Lazy(loader) => loader.get_frame_count()
+ }
+ }
+
+ fn get_data_path(&self) -> &str {
+ match self {
+ TimsDataLoader::InMemory(loader) => loader.get_data_path(),
+ TimsDataLoader::Lazy(loader) => loader.get_data_path()
+ }
+ }
+}
+
+pub struct SimpleIndexConverter {
+ pub tof_intercept: f64,
+ pub tof_slope: f64,
+ pub scan_intercept: f64,
+ pub scan_slope: f64,
+}
+
+impl SimpleIndexConverter {
+ pub fn from_boundaries(
+ mz_min: f64,
+ mz_max: f64,
+ tof_max_index: u32,
+ im_min: f64,
+ im_max: f64,
+ scan_max_index: u32,
+ ) -> Self {
+ let tof_intercept: f64 = mz_min.sqrt();
+ let tof_slope: f64 =
+ (mz_max.sqrt() - tof_intercept) / tof_max_index as f64;
+
+ let scan_intercept: f64 = im_max;
+ let scan_slope: f64 = (im_min - scan_intercept) / scan_max_index as f64;
+ Self {
+ tof_intercept,
+ tof_slope,
+ scan_intercept,
+ scan_slope,
+ }
+ }
+}
+
+impl IndexConverter for SimpleIndexConverter {
+ fn tof_to_mz(&self, _frame_id: u32, _tof_values: &Vec<u32>) -> Vec<f64> {
+ let mut mz_values: Vec<f64> = Vec::new();
+ mz_values.resize(_tof_values.len(), 0.0);
+
+ for (i, &val) in _tof_values.iter().enumerate() {
+ mz_values[i] = (self.tof_intercept + self.tof_slope * val as f64).powi(2);
+ }
+
+ mz_values
+ }
+
+ fn mz_to_tof(&self, _frame_id: u32, _mz_values: &Vec<f64>) -> Vec<u32> {
+ let mut tof_values: Vec<u32> = Vec::new();
+ tof_values.resize(_mz_values.len(), 0);
+
+ for (i, &val) in _mz_values.iter().enumerate() {
+ tof_values[i] = ((val.sqrt() - self.tof_intercept) / self.tof_slope) as u32;
+ }
+
+ tof_values
+ }
+
+ fn scan_to_inverse_mobility(&self, _frame_id: u32, _scan_values: &Vec<u32>) -> Vec<f64> {
+ let mut inv_mobility_values: Vec<f64> = Vec::new();
+ inv_mobility_values.resize(_scan_values.len(), 0.0);
+
+ for (i, &val) in _scan_values.iter().enumerate() {
+ inv_mobility_values[i] = self.scan_intercept + self.scan_slope * val as f64;
+ }
+
+ inv_mobility_values
+ }
+
+ fn inverse_mobility_to_scan(&self, _frame_id: u32, _inverse_mobility_values: &Vec<f64>) -> Vec<u32> {
+ let mut scan_values: Vec<u32> = Vec::new();
+ scan_values.resize(_inverse_mobility_values.len(), 0);
+
+ for (i, &val) in _inverse_mobility_values.iter().enumerate() {
+ scan_values[i] = ((val - self.scan_intercept) / self.scan_slope) as u32;
+ }
+
+ scan_values
+ }
+}
+
+1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +50 +51 +52 +53 +54 +55 +56 +57 +58 +59 +60 +61 +62 +63 +64 +65 +66 +67 +68 +69 +70 +71 +72 +73 +74 +75 +76 +77 +78 +79 +80 +81 +82 +83 +84 +85 +86 +87 +88 +89 +90 +91 +92 +93 +94 +95 +96 +97 +98 +99 +100 +101 +102 +103 +104 +105 +106 +107 +108 +109 +110 +111 +112 +113 +114 +115 +116 +117 +118 +119 +120 +121 +122 +123 +124 +125 +126 +127 +128 +129 +130 +131 +132 +133 +134 +135 +136 +137 +138 +139 +140 +141 +142 +143 +144 +145 +146 +147 +148 +149 +150 +151 +152 +153 +154 +155 +156 +157 +158 +159 +160 +161 +162 +163 +164 +165 +166 +167 +168 +169 +170 +171 +172 +173 +174 +175 +176 +177 +178 +179 +180 +181 +182 +183 +184 +185 +186 +187 +188 +189 +190 +191 +192 +193 +194 +195 +196 +197 +198 +199 +200 +201 +202 +203 +204 +205 +206 +207 +208 +209 +210 +211 +212 +213 +214 +215 +216 +217 +218 +219 +220 +221 +222 +223 +224 +225 +226 +227 +228 +229 +230 +231 +232 +233 +234 +235 +236 +237 +238 +239 +240 +241 +242 +243 +244 +245 +246 +247 +248 +249 +250 +251 +252 +253 +254 +255 +256 +257 +258 +259 +260 +261 +262 +263 +264 +265 +266 +267 +268 +269 +270 +271 +272 +273 +274 +275 +276 +277 +278 +279 +280 +281 +282 +283 +284 +285 +286 +287 +288 +289 +290
extern crate rusqlite;
+
+use rusqlite::{Connection, Result};
+use std::path::Path;
+
+#[derive(Debug, Clone)]
+pub struct DiaMsMisInfo {
+ pub frame_id: u32,
+ pub window_group: u32,
+}
+
+#[derive(Debug, Clone)]
+pub struct DiaMsMsWindow {
+ pub window_group: u32,
+ pub scan_num_begin: u32,
+ pub scan_num_end: u32,
+ pub isolation_mz: f64,
+ pub isolation_width: f64,
+ pub collision_energy: f64,
+}
+
+#[derive(Debug, Clone)]
+pub struct PasefMsMsMeta {
+ pub frame_id: i64,
+ pub scan_num_begin: i64,
+ pub scan_num_end: i64,
+ pub isolation_mz: f64,
+ pub isolation_width: f64,
+ pub collision_energy: f64,
+ pub precursor_id: i64,
+}
+
+#[derive(Debug, Clone)]
+pub struct DDAPrecursorMeta {
+ pub precursor_id: i64,
+ pub precursor_mz_highest_intensity: f64,
+ pub precursor_mz_average: f64,
+ pub precursor_mz_monoisotopic: Option<f64>,
+ pub precursor_charge: Option<i64>,
+ pub precursor_average_scan_number: f64,
+ pub precursor_total_intensity: f64,
+ pub precursor_frame_id: i64,
+}
+
+pub struct DDAFragmentInfo {
+ pub frame_id: i64,
+ pub scan_begin: i64,
+ pub scan_end: i64,
+ pub isolation_mz: f64,
+ pub isolation_width: f64,
+ pub collision_energy: f64,
+ pub precursor_id: i64,
+}
+
+pub struct DIAFragmentFrameInfo {}
+
+pub struct DIAWindowGroupInfo {}
+
+#[derive(Debug)]
+pub struct GlobalMetaData {
+ pub schema_type: String,
+ pub schema_version_major: i64,
+ pub schema_version_minor: i64,
+ pub acquisition_software_vendor: String,
+ pub instrument_vendor: String,
+ pub closed_property: i64,
+ pub tims_compression_type: i64,
+ pub max_num_peaks_per_scan: i64,
+ pub mz_acquisition_range_lower: f64,
+ pub mz_acquisition_range_upper: f64,
+ pub one_over_k0_range_lower: f64,
+ pub one_over_k0_range_upper: f64,
+ pub tof_max_index: u32,
+}
+
+#[derive(Debug)]
+pub struct FrameMeta {
+ pub id: i64,
+ pub time: f64,
+ pub polarity: String,
+ pub scan_mode: i64,
+ pub ms_ms_type: i64,
+ pub tims_id: i64,
+ pub max_intensity: f64,
+ pub sum_intensity: f64,
+ pub num_scans: i64,
+ pub num_peaks: i64,
+ pub mz_calibration: i64,
+ pub t_1: f64,
+ pub t_2: f64,
+ pub tims_calibration: i64,
+ pub property_group: i64,
+ pub accumulation_time: f64,
+ pub ramp_time: f64,
+}
+
+struct GlobalMetaInternal {
+ key: String,
+ value: String,
+}
+
+pub fn read_dda_precursor_meta(bruker_d_folder_name: &str) -> Result<Vec<DDAPrecursorMeta>, Box<dyn std::error::Error>> {
+ // Connect to the database
+ let db_path = Path::new(bruker_d_folder_name).join("analysis.tdf");
+ let conn = Connection::open(db_path)?;
+
+ // prepare the query
+ let rows: Vec<&str> = vec!["Id", "LargestPeakMz", "AverageMz", "MonoisotopicMz", "Charge", "ScanNumber", "Intensity", "Parent"];
+ let query = format!("SELECT {} FROM Precursors", rows.join(", "));
+
+ // execute the query
+ let frames_rows: Result<Vec<DDAPrecursorMeta>, _> = conn.prepare(&query)?.query_map([], |row| {
+ Ok(DDAPrecursorMeta {
+ precursor_id: row.get(0)?,
+ precursor_mz_highest_intensity: row.get(1)?,
+ precursor_mz_average: row.get(2)?,
+ precursor_mz_monoisotopic: row.get(3)?, // Now using Option<f64>
+ precursor_charge: row.get(4)?, // Now using Option<i64>
+ precursor_average_scan_number: row.get(5)?,
+ precursor_total_intensity: row.get(6)?,
+ precursor_frame_id: row.get(7)?,
+ })
+ })?.collect();
+
+ // return the frames
+ Ok(frames_rows?)
+}
+
+pub fn read_pasef_frame_ms_ms_info(bruker_d_folder_name: &str) -> Result<Vec<PasefMsMsMeta>, Box<dyn std::error::Error>> {
+ // Connect to the database
+ let db_path = Path::new(bruker_d_folder_name).join("analysis.tdf");
+ let conn = Connection::open(db_path)?;
+
+ // prepare the query
+ let rows: Vec<&str> = vec!["Frame", "ScanNumBegin", "ScanNumEnd", "IsolationMz", "IsolationWidth", "CollisionEnergy", "Precursor"];
+ let query = format!("SELECT {} FROM PasefFrameMsMsInfo", rows.join(", "));
+
+ // execute the query
+ let frames_rows: Result<Vec<PasefMsMsMeta>, _> = conn.prepare(&query)?.query_map([], |row| {
+ Ok(PasefMsMsMeta {
+ frame_id: row.get(0)?,
+ scan_num_begin: row.get(1)?,
+ scan_num_end: row.get(2)?,
+ isolation_mz: row.get(3)?,
+ isolation_width: row.get(4)?,
+ collision_energy: row.get(5)?,
+ precursor_id: row.get(6)?, })
+ })?.collect();
+
+ // return the frames
+ Ok(frames_rows?)
+}
+
+// Read the global meta data from the analysis.tdf file
+pub fn read_global_meta_sql(bruker_d_folder_name: &str) -> Result<GlobalMetaData, Box<dyn std::error::Error>> {
+
+ // Connect to the database
+ let db_path = Path::new(bruker_d_folder_name).join("analysis.tdf");
+ let conn = Connection::open(db_path)?;
+
+ // execute the query
+ let frames_rows: Result<Vec<GlobalMetaInternal>, _> = conn.prepare("SELECT * FROM GlobalMetadata")?.query_map([], |row| {
+ Ok(GlobalMetaInternal {
+ key: row.get(0)?,
+ value: row.get(1)?,
+ })
+ })?.collect();
+
+ let mut global_meta = GlobalMetaData {
+ schema_type: String::new(),
+ schema_version_major: -1,
+ schema_version_minor: -1,
+ acquisition_software_vendor: String::new(),
+ instrument_vendor: String::new(),
+ closed_property: -1,
+ tims_compression_type: -1,
+ max_num_peaks_per_scan: -1,
+ mz_acquisition_range_lower: -1.0,
+ mz_acquisition_range_upper: -1.0,
+ one_over_k0_range_lower: -1.0,
+ one_over_k0_range_upper: -1.0,
+ tof_max_index: 0,
+ };
+
+ // go over the keys and parse values for the global meta data
+ for row in frames_rows? {
+ match row.key.as_str() {
+ "SchemaType" => global_meta.schema_type = row.value,
+ "SchemaVersionMajor" => global_meta.schema_version_major = row.value.parse::<i64>().unwrap(),
+ "SchemaVersionMinor" => global_meta.schema_version_minor = row.value.parse::<i64>().unwrap(),
+ "AcquisitionSoftwareVendor" => global_meta.acquisition_software_vendor = row.value,
+ "InstrumentVendor" => global_meta.instrument_vendor = row.value,
+ "ClosedProperly" => global_meta.closed_property = row.value.parse::<i64>().unwrap(),
+ "TimsCompressionType" => global_meta.tims_compression_type = row.value.parse::<i64>().unwrap(),
+ "MaxNumPeaksPerScan" => global_meta.max_num_peaks_per_scan = row.value.parse::<i64>().unwrap(),
+ "MzAcqRangeLower" => global_meta.mz_acquisition_range_lower = row.value.parse::<f64>().unwrap(),
+ "MzAcqRangeUpper" => global_meta.mz_acquisition_range_upper = row.value.parse::<f64>().unwrap(),
+ "OneOverK0AcqRangeLower" => global_meta.one_over_k0_range_lower = row.value.parse::<f64>().unwrap(),
+ "OneOverK0AcqRangeUpper" => global_meta.one_over_k0_range_upper = row.value.parse::<f64>().unwrap(),
+ "DigitizerNumSamples" => global_meta.tof_max_index = (row.value.parse::<i64>().unwrap() + 1) as u32,
+ _ => (),
+ }
+ }
+ // return global_meta
+ Ok(global_meta)
+}
+
+// Read the frame meta data from the analysis.tdf file
+pub fn read_meta_data_sql(bruker_d_folder_name: &str) -> Result<Vec<FrameMeta>, Box<dyn std::error::Error>> {
+ // Connect to the database
+ let db_path = Path::new(bruker_d_folder_name).join("analysis.tdf");
+ let conn = Connection::open(db_path)?;
+
+ // prepare the query
+ let rows: Vec<&str> = vec!["Id", "Time", "ScanMode", "Polarity", "MsMsType", "TimsId", "MaxIntensity", "SummedIntensities",
+ "NumScans", "NumPeaks", "MzCalibration", "T1", "T2", "TimsCalibration", "PropertyGroup", "AccumulationTime", "RampTime"];
+ let query = format!("SELECT {} FROM Frames", rows.join(", "));
+
+ // execute the query
+ let frames_rows: Result<Vec<FrameMeta>, _> = conn.prepare(&query)?.query_map([], |row| {
+ Ok(FrameMeta {
+ id: row.get(0)?,
+ time: row.get(1)?,
+ scan_mode: row.get(2)?,
+ polarity: row.get(3)?,
+ ms_ms_type: row.get(4)?,
+ tims_id: row.get(5)?,
+ max_intensity: row.get(6)?,
+ sum_intensity: row.get(7)?,
+ num_scans: row.get(8)?,
+ num_peaks: row.get(9)?,
+ mz_calibration: row.get(10)?,
+ t_1: row.get(11)?,
+ t_2: row.get(12)?,
+ tims_calibration: row.get(13)?,
+ property_group: row.get(14)?,
+ accumulation_time: row.get(15)?,
+ ramp_time: row.get(16)?,
+ })
+ })?.collect();
+
+ // return the frames
+ Ok(frames_rows?)
+}
+
+pub fn read_dia_ms_ms_info(bruker_d_folder_name: &str) -> Result<Vec<DiaMsMisInfo>, Box<dyn std::error::Error>> {
+ // Connect to the database
+ let db_path = Path::new(bruker_d_folder_name).join("analysis.tdf");
+ let conn = Connection::open(db_path)?;
+
+ // prepare the query
+ let rows: Vec<&str> = vec!["Frame", "WindowGroup"];
+ let query = format!("SELECT {} FROM DiaFrameMsMsInfo", rows.join(", "));
+
+ // execute the query
+ let frames_rows: Result<Vec<DiaMsMisInfo>, _> = conn.prepare(&query)?.query_map([], |row| {
+ Ok(DiaMsMisInfo {
+ frame_id: row.get(0)?,
+ window_group: row.get(1)?,
+ })
+ })?.collect();
+
+ // return the frames
+ Ok(frames_rows?)
+}
+
+pub fn read_dia_ms_ms_windows(bruker_d_folder_name: &str) -> Result<Vec<DiaMsMsWindow>, Box<dyn std::error::Error>> {
+ // Connect to the database
+ let db_path = Path::new(bruker_d_folder_name).join("analysis.tdf");
+ let conn = Connection::open(db_path)?;
+
+ // prepare the query
+ let rows: Vec<&str> = vec!["WindowGroup", "ScanNumBegin", "ScanNumEnd", "IsolationMz", "IsolationWidth", "CollisionEnergy"];
+ let query = format!("SELECT {} FROM DiaFrameMsMsWindows", rows.join(", "));
+
+ // execute the query
+ let frames_rows: Result<Vec<DiaMsMsWindow>, _> = conn.prepare(&query)?.query_map([], |row| {
+ Ok(DiaMsMsWindow {
+ window_group: row.get(0)?,
+ scan_num_begin: row.get(1)?,
+ scan_num_end: row.get(2)?,
+ isolation_mz: row.get(3)?,
+ isolation_width: row.get(4)?,
+ collision_energy: row.get(5)?,
+ })
+ })?.collect();
+
+ // return the frames
+ Ok(frames_rows?)
+}
+1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +50 +51 +52 +53 +54 +55 +56 +57 +58 +59 +60 +61 +62 +63 +64 +65 +66 +67 +68 +69 +70 +71 +72 +73 +74 +75 +76 +77 +78 +79 +80 +81 +82 +83 +84 +85 +86 +87 +88 +89 +90 +91 +92 +93 +94 +95 +96 +97 +98 +99 +100 +101 +102 +103 +104 +105 +106 +107 +108 +109 +110 +111 +112 +113 +114 +115 +116 +117 +118 +119 +120 +121 +122 +123 +124 +125 +126 +127 +128 +129 +130 +131 +132 +133 +134 +135 +136 +137 +138 +139 +140 +141 +142 +143 +144 +145 +146 +147 +148 +149 +150 +151 +152 +153 +154 +155 +156 +157 +158 +159 +160 +161 +162 +163 +164 +165 +166 +167 +168 +169 +170
use libloading::{Library, Symbol};
+use std::os::raw::{c_char, c_double};
+
+//
+// A struct that holds a handle to the raw data
+//
+// # Example
+//
+// ```
+// let bruker_lib_path = "path/to/libtimsdata.so";
+// let data_path = "path/to/data.d";
+// let tims_data = BrukerTimsDataLibrary::new(bruker_lib_path, data_path);
+// ```
+pub struct BrukerTimsDataLibrary {
+ pub lib: Library,
+ pub handle: u64,
+}
+
+impl BrukerTimsDataLibrary {
+ //
+ // Create a new BrukerTimsDataLibrary struct
+ //
+ // # Arguments
+ //
+ // * `bruker_lib_path` - A string slice that holds the path to the bruker library
+ // * `data_path` - A string slice that holds the path to the data
+ //
+ // # Example
+ //
+ // ```
+ // let bruker_lib_path = "path/to/libtimsdata.so";
+ // let data_path = "path/to/data.d";
+ // let tims_data = BrukerTimsDataLibrary::new(bruker_lib_path, data_path);
+ // ```
+ pub fn new(bruker_lib_path: &str, data_path: &str) -> Result<BrukerTimsDataLibrary, Box<dyn std::error::Error>> {
+
+ // Load the library
+ let lib = unsafe {
+ Library::new(bruker_lib_path)?
+ };
+
+ // create a handle to the raw data
+ let handle = unsafe {
+ let func: Symbol<unsafe extern fn(*const c_char, u32) -> u64> = lib.get(b"tims_open")?;
+ let path = std::ffi::CString::new(data_path)?;
+ let handle = func(path.as_ptr(), 0);
+ handle
+ };
+
+ // return the BrukerTimsDataLibrary struct
+ Ok(BrukerTimsDataLibrary {
+ lib,
+ handle,
+ })
+ }
+
+ //
+ // Close the handle to the raw data
+ //
+ // # Example
+ //
+ // ```
+ // let close = tims_data.tims_close();
+ // match close {
+ // Ok(_) => println!("tims_data closed"),
+ // Err(e) => println!("error: {}", e),
+ // };
+ // ```
+ pub fn tims_close(&self) -> Result<(), Box<dyn std::error::Error>> {
+ unsafe {
+ let func: Symbol<unsafe extern fn(u64) -> ()> = self.lib.get(b"tims_close")?;
+ func(self.handle);
+ }
+ Ok(())
+ }
+
+ //
+ // Convert the given indices to mz values.
+ //
+ // # Example
+ //
+ // ```
+ // let indices = vec![...];
+ // let mz_values_result = tims_data.tims_index_to_mz(estimation, &mut indices, tof_max_index);
+ // match mz_values_result {
+ // Ok(mz_values) => println!("{:?}", mz_values),
+ // Err(e) => println!("error: {}", e),
+ // };
+ // ```
+ pub fn tims_index_to_mz(&self, frame_id: u32, dbl_tofs: &[c_double], mzs: &mut [c_double]) -> Result<(), Box<dyn std::error::Error>> {
+ unsafe {
+ let func: Symbol<unsafe extern "C" fn(u64, u32, *const c_double, *mut c_double, u32)> = self.lib.get(b"tims_index_to_mz")?;
+ func(self.handle, frame_id, dbl_tofs.as_ptr(), mzs.as_mut_ptr(), dbl_tofs.len() as u32);
+ }
+ Ok(())
+ }
+
+ //
+ // Convert the given mz values to indices.
+ //
+ // # Example
+ //
+ // ```
+ // let mzs = vec![...];
+ // let indices_result = tims_data.tims_mz_to_index(estimation, &mut mzs);
+ // match indices_result {
+ // Ok(indices) => println!("{:?}", indices),
+ // Err(e) => println!("error: {}", e),
+ // };
+ // ```
+ pub fn tims_mz_to_index(&self, frame_id: u32, mzs: &[c_double], indices: &mut [c_double]) -> Result<(), Box<dyn std::error::Error>> {
+ unsafe {
+ let func: Symbol<unsafe extern "C" fn(u64, u32, *const c_double, *mut c_double, u32)> = self.lib.get(b"tims_mz_to_index")?;
+ func(self.handle, frame_id, mzs.as_ptr(), indices.as_mut_ptr(), mzs.len() as u32);
+ }
+ Ok(())
+ }
+
+ //
+ // Convert the given indices to inverse mobility values.
+ //
+ // # Example
+ //
+ // ```
+ // let indices = vec![...];
+ // let scan_values_result = tims_data.tims_scan_to_inv_mob(estimation, &mut indices);
+ // match mz_values_result {
+ // Ok(mz_values) => println!("{:?}", mz_values),
+ // Err(e) => println!("error: {}", e),
+ // };
+ // ```
+ pub fn tims_scan_to_inv_mob(&self, frame_id: u32, dbl_scans: &[c_double], inv_mob: &mut [c_double]) -> Result<(), Box<dyn std::error::Error>> {
+ unsafe {
+ let func: Symbol<unsafe extern "C" fn(u64, u32, *const c_double, *mut c_double, u32)> = self.lib.get(b"tims_scannum_to_oneoverk0")?;
+ func(self.handle, frame_id, dbl_scans.as_ptr(), inv_mob.as_mut_ptr(), dbl_scans.len() as u32);
+ }
+ Ok(())
+ }
+
+ //
+ // Convert the given inverse mobility values to scan values.
+ //
+ // # Example
+ //
+ // ```
+ // let inv_mob = vec![...];
+ // let scan_values_result = tims_data.tims_inv_mob_to_scan(estimation, &mut inv_mob);
+ // match mz_values_result {
+ // Ok(mz_values) => println!("{:?}", mz_values),
+ // Err(e) => println!("error: {}", e),
+ // };
+ // ```
+ pub fn inv_mob_to_tims_scan(&self, frame_id: u32, inv_mob: &[c_double], scans: &mut [c_double]) -> Result<(), Box<dyn std::error::Error>> {
+ unsafe {
+ let func: Symbol<unsafe extern "C" fn(u64, u32, *const c_double, *mut c_double, u32)> = self.lib.get(b"tims_oneoverk0_to_scannum")?;
+ func(self.handle, frame_id, inv_mob.as_ptr(), scans.as_mut_ptr(), inv_mob.len() as u32);
+ }
+ Ok(())
+ }
+}
+
+impl Drop for BrukerTimsDataLibrary {
+ fn drop(&mut self) {
+ let close = self.tims_close();
+ match close {
+ Ok(_) => (),
+ Err(e) => println!("error: {}", e),
+ };
+ }
+}
+1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +50 +51 +52 +53 +54 +55 +56 +57 +58 +59 +60 +61 +62 +63 +64 +65 +66 +67 +68 +69 +70 +71 +72 +73 +74 +75 +76 +77 +78 +79 +80 +81 +82 +83 +84 +85 +86 +87 +88 +89 +90 +91 +92 +93 +94 +95 +96 +97 +98 +99 +100 +101 +102 +103 +104 +105 +106 +107 +108 +109 +110 +111 +112 +113 +114 +115 +116 +117 +118 +119 +120 +121 +122 +123 +124 +125 +126 +127 +128 +129 +130 +131 +132 +133 +134 +135 +136 +137 +138 +139 +140 +141 +142 +143 +144 +145 +146 +147 +148 +149 +150 +151 +152 +153 +154 +155 +156 +157 +158 +159 +160 +161 +162 +163 +164 +165 +166 +167 +168 +169 +170 +171 +172 +173 +174 +175 +176 +177 +178 +179 +180 +181 +182 +183 +184 +185 +186 +187 +188 +189 +190 +191 +192 +193 +194 +195 +196 +197 +198 +199 +200 +201 +202 +203 +204 +205 +206 +207 +208 +209 +210 +211 +212 +213 +214 +215 +216 +217 +218 +219 +220 +221 +222 +223 +224 +225 +226 +227 +228 +229 +230 +231 +232 +233 +234 +235 +236 +237 +238 +239 +240 +241 +242 +243 +244 +245 +246 +247 +248 +249 +250 +251 +252 +253 +254 +255 +256 +257 +258 +259 +260 +261 +262 +263 +264 +265 +266 +267
use std::io;
+use std::io::{Read, Write};
+use byteorder::{ByteOrder, LittleEndian};
+use mscore::timstof::frame::TimsFrame;
+use rayon::prelude::*;
+use rayon::ThreadPoolBuilder;
+use rayon::iter::IntoParallelRefIterator;
+
+
+/// Decompresses a ZSTD compressed byte array
+///
+/// # Arguments
+///
+/// * `compressed_data` - A byte slice that holds the compressed data
+///
+/// # Returns
+///
+/// * `decompressed_data` - A vector of u8 that holds the decompressed data
+///
+pub fn zstd_decompress(compressed_data: &[u8]) -> io::Result<Vec<u8>> {
+ let mut decoder = zstd::Decoder::new(compressed_data)?;
+ let mut decompressed_data = Vec::new();
+ decoder.read_to_end(&mut decompressed_data)?;
+ Ok(decompressed_data)
+}
+
+/// Compresses a byte array using ZSTD
+///
+/// # Arguments
+///
+/// * `decompressed_data` - A byte slice that holds the decompressed data
+///
+/// # Returns
+///
+/// * `compressed_data` - A vector of u8 that holds the compressed data
+///
+pub fn zstd_compress(decompressed_data: &[u8], compression_level: i32) -> io::Result<Vec<u8>> {
+ let mut encoder = zstd::Encoder::new(Vec::new(), compression_level)?;
+ encoder.write_all(decompressed_data)?;
+ let compressed_data = encoder.finish()?;
+ Ok(compressed_data)
+}
+
+pub fn reconstruct_compressed_data(
+ scans: Vec<u32>,
+ mut tofs: Vec<u32>,
+ intensities: Vec<u32>,
+ total_scans: u32,
+ compression_level: i32,
+) -> Result<Vec<u8>, Box<dyn std::error::Error>> {
+ // Ensuring all vectors have the same length
+ assert_eq!(scans.len(), tofs.len());
+ assert_eq!(scans.len(), intensities.len());
+
+ // Modify TOFs based on scans
+ modify_tofs(&mut tofs, &scans);
+
+ // Get peak counts from total scans and scans
+ let peak_cnts = get_peak_cnts(total_scans, &scans);
+
+ // Interleave TOFs and intensities
+ let mut interleaved = Vec::new();
+ for (&tof, &intensity) in tofs.iter().zip(intensities.iter()) {
+ interleaved.push(tof);
+ interleaved.push(intensity);
+ }
+
+ // Get real data using the custom loop logic
+ let real_data = get_realdata(&peak_cnts, &interleaved);
+
+ // Compress real_data using zstd_compress
+ let compressed_data = zstd_compress(&real_data, compression_level)?;
+
+ // Final data preparation with compressed data
+ let mut final_data = Vec::new();
+
+ // Include the length of the compressed data as a header (4 bytes)
+ final_data.extend_from_slice(&(compressed_data.len() as u32 + 8).to_le_bytes());
+
+ // Include total_scans as part of the header
+ final_data.extend_from_slice(&total_scans.to_le_bytes());
+
+ // Include the compressed data itself
+ final_data.extend_from_slice(&compressed_data);
+
+ Ok(final_data)
+}
+
+pub fn compress_collection(frames: Vec<TimsFrame>, max_scan_count: u32, compression_level: i32, num_threads: usize) -> Vec<Vec<u8>> {
+
+ let pool = ThreadPoolBuilder::new().num_threads(num_threads).build().unwrap();
+
+ let result = pool.install(|| {
+ frames.par_iter().map(|frame| {
+ let compressed_data = reconstruct_compressed_data(
+ frame.scan.iter().map(|&x| x as u32).collect(),
+ frame.tof.iter().map(|&x| x as u32).collect(),
+ frame.ims_frame.intensity.iter().map(|&x| x as u32).collect(),
+ max_scan_count,
+ compression_level,
+ ).unwrap();
+ compressed_data
+ }).collect()
+ });
+ result
+}
+
+/// Parses the decompressed bruker binary data
+///
+/// # Arguments
+///
+/// * `decompressed_bytes` - A byte slice that holds the decompressed data
+///
+/// # Returns
+///
+/// * `scan_indices` - A vector of u32 that holds the scan indices
+/// * `tof_indices` - A vector of u32 that holds the tof indices
+/// * `intensities` - A vector of u32 that holds the intensities
+///
+pub fn parse_decompressed_bruker_binary_data(decompressed_bytes: &[u8]) -> Result<(Vec<u32>, Vec<u32>, Vec<u32>), Box<dyn std::error::Error>> {
+
+ let mut buffer_u32 = Vec::new();
+
+ for i in 0..(decompressed_bytes.len() / 4) {
+ let value = LittleEndian::read_u32(&[
+ decompressed_bytes[i],
+ decompressed_bytes[i + (decompressed_bytes.len() / 4)],
+ decompressed_bytes[i + (2 * decompressed_bytes.len() / 4)],
+ decompressed_bytes[i + (3 * decompressed_bytes.len() / 4)]
+ ]);
+ buffer_u32.push(value);
+ }
+
+ // get the number of scans
+ let scan_count = buffer_u32[0] as usize;
+
+ // get the scan indices
+ let mut scan_indices: Vec<u32> = buffer_u32[..scan_count].to_vec();
+ for index in &mut scan_indices {
+ *index /= 2;
+ }
+
+ // first scan index is always 0?
+ scan_indices[0] = 0;
+
+ // get the tof indices, which are the first half of the buffer after the scan indices
+ let mut tof_indices: Vec<u32> = buffer_u32.iter().skip(scan_count).step_by(2).cloned().collect();
+
+ // get the intensities, which are the second half of the buffer
+ let intensities: Vec<u32> = buffer_u32.iter().skip(scan_count + 1).step_by(2).cloned().collect();
+
+ // calculate the last scan before moving scan indices
+ let last_scan = intensities.len() as u32 - scan_indices[1..].iter().sum::<u32>();
+
+ // shift the scan indices to the right
+ for i in 0..(scan_indices.len() - 1) {
+ scan_indices[i] = scan_indices[i + 1];
+ }
+
+ // set the last scan index
+ let len = scan_indices.len();
+ scan_indices[len - 1] = last_scan;
+
+ // convert the tof indices to cumulative sums
+ let mut index = 0;
+ for &size in &scan_indices {
+ let mut current_sum = 0;
+ for _ in 0..size {
+ current_sum += tof_indices[index];
+ tof_indices[index] = current_sum;
+ index += 1;
+ }
+ }
+
+ // adjust the tof indices to be zero-indexed
+ let adjusted_tof_indices: Vec<u32> = tof_indices.iter().map(|&val| val - 1).collect();
+ Ok((scan_indices, adjusted_tof_indices, intensities))
+}
+
+pub fn get_peak_cnts(total_scans: u32, scans: &[u32]) -> Vec<u32> {
+ let mut peak_cnts = vec![total_scans];
+ let mut ii = 0;
+ for scan_id in 1..total_scans {
+ let mut counter = 0;
+ while ii < scans.len() && scans[ii] < scan_id {
+ ii += 1;
+ counter += 1;
+ }
+ peak_cnts.push(counter * 2);
+ }
+ peak_cnts
+}
+
+pub fn modify_tofs(tofs: &mut [u32], scans: &[u32]) {
+ let mut last_tof = -1i32; // Using i32 to allow -1
+ let mut last_scan = 0;
+ for ii in 0..tofs.len() {
+ if last_scan != scans[ii] {
+ last_tof = -1;
+ last_scan = scans[ii];
+ }
+ let val = tofs[ii] as i32; // Cast to i32 for calculation
+ tofs[ii] = (val - last_tof) as u32; // Cast back to u32
+ last_tof = val;
+ }
+}
+
+pub fn get_realdata(peak_cnts: &[u32], interleaved: &[u32]) -> Vec<u8> {
+ let mut back_data = Vec::new();
+
+ // Convert peak counts to bytes and add to back_data
+ for &cnt in peak_cnts {
+ back_data.extend_from_slice(&cnt.to_le_bytes());
+ }
+
+ // Convert interleaved data to bytes and add to back_data
+ for &value in interleaved {
+ back_data.extend_from_slice(&value.to_le_bytes());
+ }
+
+ // Call get_realdata_loop for data rearrangement
+ get_realdata_loop(&back_data)
+}
+
+pub fn get_realdata_loop(back_data: &[u8]) -> Vec<u8> {
+ let mut real_data = vec![0u8; back_data.len()];
+ let mut reminder = 0;
+ let mut bd_idx = 0;
+ for rd_idx in 0..back_data.len() {
+ if bd_idx >= back_data.len() {
+ reminder += 1;
+ bd_idx = reminder;
+ }
+ real_data[rd_idx] = back_data[bd_idx];
+ bd_idx += 4;
+ }
+ real_data
+}
+
+pub fn get_data_for_compression(tofs: &Vec<u32>, scans: &Vec<u32>, intensities: &Vec<u32>, max_scans: u32) -> Vec<u8> {
+ let mut tof_copy = tofs.clone();
+ modify_tofs(&mut tof_copy, &scans);
+ let peak_cnts = get_peak_cnts(max_scans, &scans);
+ let interleaved: Vec<u32> = tofs.iter().zip(intensities.iter()).flat_map(|(tof, intensity)| vec![*tof, *intensity]).collect();
+
+ get_realdata(&peak_cnts, &interleaved)
+}
+
+
+pub fn get_data_for_compression_par(tofs: Vec<Vec<u32>>, scans: Vec<Vec<u32>>, intensities: Vec<Vec<u32>>, max_scans: u32, num_threads: usize) -> Vec<Vec<u8>> {
+ let pool = ThreadPoolBuilder::new().num_threads(num_threads).build().unwrap();
+
+ let result = pool.install(|| {
+ tofs.par_iter().zip(scans.par_iter()).zip(intensities.par_iter()).map(|((tof, scan), intensity)| {
+ get_data_for_compression(tof, scan, intensity, max_scans)
+ }).collect()
+ });
+
+ result
+}
+
+pub fn flatten_scan_values(scan: &Vec<u32>, zero_indexed: bool) -> Vec<u32> {
+ let add = if zero_indexed { 0 } else { 1 };
+ scan.iter().enumerate()
+ .flat_map(|(index, &count)| vec![(index + add) as u32; count as usize]
+ .into_iter()).collect()
+}
+1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +50 +51 +52 +53 +54 +55 +56 +57 +58 +59 +60 +61 +62 +63 +64 +65 +66 +67 +68 +69 +70 +71 +72 +73 +74 +75 +76 +77 +78 +79 +80 +81 +82 +83 +84 +85 +86 +87 +88 +89 +90 +91 +92 +93 +94 +95 +96 +97 +98 +99 +100 +101 +102 +103 +104 +105 +106 +107 +108 +109 +110 +111 +112 +113 +114 +115 +116 +117 +118 +119 +120 +121 +122 +123 +124 +125 +126 +127 +128 +129 +130 +131 +132 +133 +134 +135 +136 +137 +138 +139 +140 +141 +142 +143 +144 +145 +146 +147 +148 +149 +150 +151 +152 +153 +154 +155 +156 +157 +158 +159 +160 +161 +162 +163 +164 +165 +166 +167 +168 +169 +170 +171 +172 +173 +174 +175 +176 +177 +178 +179 +180 +181 +182 +183 +184 +185 +186 +187 +188 +189 +190 +191 +192 +193 +194 +195 +196 +197 +198 +199 +200 +201 +202 +203 +204 +205 +206 +207 +208 +209 +210 +211 +212 +213 +214 +215 +216 +217 +218 +219 +220 +221 +222 +223 +224 +225 +226 +227 +228 +229 +230 +231 +232 +233 +234 +235 +236 +237 +238 +239 +240 +241 +242 +243 +244 +245 +246 +247 +248 +249
use mscore::data::peptide::{PeptideSequence};
+use mscore::data::spectrum::{MzSpectrum, MsType};
+use serde::{Serialize, Deserialize};
+use rand::distributions::{Distribution, Uniform};
+
+#[derive(Serialize, Deserialize, Debug, Clone)]
+pub struct SignalDistribution {
+ pub mean: f32,
+ pub variance: f32,
+ pub error: f32,
+ pub occurrence: Vec<u32>,
+ pub abundance: Vec<f32>,
+}
+
+impl SignalDistribution {
+ pub fn new(mean: f32, variance: f32, error: f32, occurrence: Vec<u32>, abundance: Vec<f32>) -> Self {
+ SignalDistribution { mean, variance, error, occurrence, abundance, }
+ }
+
+ pub fn add_noise(&self, noise_level: f32) -> Vec<f32> {
+ let mut rng = rand::thread_rng();
+ let noise_dist = Uniform::new(0.0, noise_level);
+
+ let noise: Vec<f32> = self.abundance.iter().map(|_| noise_dist.sample(&mut rng)).collect();
+ let noise_relative: Vec<f32> = self.abundance.iter().zip(noise.iter()).map(|(&abu, &noise)| abu * noise).collect();
+ let noised_signal: Vec<f32> = self.abundance.iter().zip(noise_relative.iter()).map(|(&abu, &noise_rel)| abu + noise_rel).collect();
+
+ let sum_noised_signal: f32 = noised_signal.iter().sum();
+ let sum_rt_abu: f32 = self.abundance.iter().sum();
+
+ noised_signal.iter().map(|&x| (x / sum_noised_signal) * sum_rt_abu).collect()
+ }
+}
+
+
+#[derive(Debug, Clone)]
+pub struct PeptidesSim {
+ pub protein_id: u32,
+ pub peptide_id: u32,
+ pub sequence: PeptideSequence,
+ pub proteins: String,
+ pub decoy: bool,
+ pub missed_cleavages: i8,
+ pub n_term : Option<bool>,
+ pub c_term : Option<bool>,
+ pub mono_isotopic_mass: f32,
+ pub retention_time: f32,
+ pub events: f32,
+ pub frame_start: u32,
+ pub frame_end: u32,
+ pub frame_distribution: SignalDistribution,
+}
+
+impl PeptidesSim {
+ pub fn new(
+ protein_id: u32,
+ peptide_id: u32,
+ sequence: String,
+ proteins: String,
+ decoy: bool,
+ missed_cleavages: i8,
+ n_term: Option<bool>,
+ c_term: Option<bool>,
+ mono_isotopic_mass: f32,
+ retention_time: f32,
+ events: f32,
+ frame_start: u32,
+ frame_end: u32,
+ frame_occurrence: Vec<u32>,
+ frame_abundance: Vec<f32>,
+ ) -> Self {
+ PeptidesSim {
+ protein_id,
+ peptide_id,
+ sequence: PeptideSequence::new(sequence, Some(peptide_id as i32)),
+ proteins,
+ decoy,
+ missed_cleavages,
+ n_term,
+ c_term,
+ mono_isotopic_mass,
+ retention_time,
+ events,
+ frame_start,
+ frame_end,
+ frame_distribution: SignalDistribution::new(
+ 0.0, 0.0, 0.0, frame_occurrence, frame_abundance),
+ }
+ }
+}
+
+#[derive(Debug, Clone)]
+pub struct WindowGroupSettingsSim {
+ pub window_group: u32,
+ pub scan_start: u32,
+ pub scan_end: u32,
+ pub isolation_mz: f32,
+ pub isolation_width: f32,
+ pub collision_energy: f32,
+}
+
+impl WindowGroupSettingsSim {
+ pub fn new(
+ window_group: u32,
+ scan_start: u32,
+ scan_end: u32,
+ isolation_mz: f32,
+ isolation_width: f32,
+ collision_energy: f32,
+ ) -> Self {
+ WindowGroupSettingsSim {
+ window_group,
+ scan_start,
+ scan_end,
+ isolation_mz,
+ isolation_width,
+ collision_energy,
+ }
+ }
+}
+
+#[derive(Debug, Clone)]
+pub struct FrameToWindowGroupSim {
+ pub frame_id: u32,
+ pub window_group:u32,
+}
+
+impl FrameToWindowGroupSim {
+ pub fn new(frame_id: u32, window_group: u32) -> Self {
+ FrameToWindowGroupSim {
+ frame_id,
+ window_group,
+ }
+ }
+}
+
+#[derive(Debug, Clone)]
+pub struct IonSim {
+ pub ion_id: u32,
+ pub peptide_id: u32,
+ pub sequence: String,
+ pub charge: i8,
+ pub relative_abundance: f32,
+ pub mobility: f32,
+ pub simulated_spectrum: MzSpectrum,
+ pub scan_distribution: SignalDistribution,
+}
+
+impl IonSim {
+ pub fn new(
+ ion_id: u32,
+ peptide_id: u32,
+ sequence: String,
+ charge: i8,
+ relative_abundance: f32,
+ mobility: f32,
+ simulated_spectrum: MzSpectrum,
+ scan_occurrence: Vec<u32>,
+ scan_abundance: Vec<f32>,
+ ) -> Self {
+ IonSim {
+ ion_id,
+ peptide_id,
+ sequence,
+ charge,
+ relative_abundance,
+ mobility,
+ simulated_spectrum,
+ scan_distribution: SignalDistribution::new(
+ 0.0, 0.0, 0.0, scan_occurrence, scan_abundance),
+ }
+ }
+}
+
+
+#[derive(Debug, Clone)]
+pub struct ScansSim {
+ pub scan: u32,
+ pub mobility: f32,
+}
+
+impl ScansSim {
+ pub fn new(scan: u32, mobility: f32) -> Self {
+ ScansSim { scan, mobility }
+ }
+}
+
+#[derive(Debug, Clone)]
+pub struct FramesSim {
+ pub frame_id: u32,
+ pub time: f32,
+ pub ms_type: i64,
+}
+
+impl FramesSim {
+ pub fn new(frame_id: u32, time: f32, ms_type: i64) -> Self {
+ FramesSim {
+ frame_id,
+ time,
+ ms_type,
+ }
+ }
+ pub fn parse_ms_type(&self) -> MsType {
+ match self.ms_type {
+ 0 => MsType::Precursor,
+ 8 => MsType::FragmentDda,
+ 9 => MsType::FragmentDia,
+ _ => MsType::Unknown,
+ }
+
+ }
+}
+
+pub struct FragmentIonSim {
+ pub peptide_id: u32,
+ pub ion_id: u32,
+ pub collision_energy: f64,
+ pub charge: i8,
+ pub indices: Vec<u32>,
+ pub values: Vec<f64>,
+}
+
+impl FragmentIonSim {
+ pub fn new(
+ peptide_id: u32,
+ ion_id: u32,
+ collision_energy: f64,
+ charge: i8,
+ indices: Vec<u32>,
+ values: Vec<f64>,
+ ) -> Self {
+ FragmentIonSim {
+ peptide_id,
+ ion_id,
+ charge,
+ collision_energy,
+ indices,
+ values,
+ }
+ }
+
+ pub fn to_dense(&self, length: usize) -> Vec<f64> {
+ let mut dense = vec![0.0; length];
+ for (i, &idx) in self.indices.iter().enumerate() {
+ dense[idx as usize] = self.values[i];
+ }
+ dense
+ }
+}
+1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +50 +51 +52 +53 +54 +55 +56 +57 +58 +59 +60 +61 +62 +63 +64 +65 +66 +67 +68 +69 +70 +71 +72 +73 +74 +75 +76 +77 +78 +79 +80 +81 +82 +83 +84 +85 +86 +87 +88 +89 +90 +91 +92 +93 +94 +95 +96 +97 +98 +99 +100 +101 +102 +103 +104 +105 +106 +107 +108 +109 +110 +111 +112 +113 +114 +115 +116 +117 +118 +119 +120 +121 +122 +123 +124 +125 +126 +127 +128 +129 +130 +131 +132 +133 +134 +135 +136 +137 +138 +139 +140 +141 +142 +143 +144 +145 +146 +147 +148 +149 +150 +151 +152 +153 +154 +155 +156 +157 +158 +159 +160 +161 +162 +163 +164 +165 +166 +167 +168 +169 +170 +171 +172 +173 +174 +175 +176 +177 +178 +179 +180 +181 +182 +183 +184 +185 +186 +187 +188 +189 +190 +191 +192 +193 +194 +195 +196 +197 +198 +199 +200 +201 +202 +203 +204 +205 +206 +207 +208 +209 +210 +211 +212 +213 +214 +215 +216 +217 +218 +219 +220 +221 +222 +223 +224 +225 +226 +227 +228 +229 +230 +231 +232 +233 +234 +235 +236 +237 +238 +239 +240 +241 +242 +243 +244 +245 +246 +247 +248 +249 +250 +251 +252 +253 +254 +255 +256 +257 +258 +259 +260 +261 +262 +263 +264 +265 +266 +267 +268 +269 +270 +271 +272 +273 +274 +275 +276 +277 +278 +279 +280 +281 +282 +283 +284 +285 +286 +287 +288 +289 +290 +291 +292 +293 +294 +295 +296 +297 +298 +299 +300 +301 +302 +303 +304 +305 +306 +307 +308 +309 +310 +311 +312 +313 +314 +315 +316 +317 +318 +319 +320 +321 +322 +323 +324 +325 +326 +327 +328 +329 +330 +331 +332 +333 +334 +335 +336 +337 +338 +339 +340 +341 +342 +343 +344 +345 +346 +347 +348 +349 +350 +351 +352 +353 +354 +355 +356 +357 +358 +359 +360 +361 +362 +363 +364 +365 +366 +367 +368 +369 +370 +371 +372 +373 +374 +375 +376 +377 +378 +379 +380 +381 +382 +383 +384 +385 +386 +387 +388 +389 +390 +391 +392 +393 +394 +395 +396 +397 +398 +399 +400 +401 +402 +403 +404 +405 +406 +407 +408 +409 +410 +411 +412 +413 +414 +415 +416 +417 +418 +419 +420 +421 +422 +423 +424 +425 +426 +427 +428 +429 +430 +431 +432 +433 +434 +435 +436 +437 +438 +439 +440 +441 +442 +443 +444 +445 +446 +447 +448 +449 +450 +451 +452 +453 +454 +455 +456 +457 +458 +459 +460 +461 +462 +463 +464 +465 +466 +467 +468 +469 +470 +471 +472 +473 +474 +475 +476 +477 +478 +479 +480 +481 +482 +483 +484 +485 +486 +487 +488 +489 +490 +491 +492 +493 +494 +495 +496 +497 +498 +499 +500 +501 +502 +503 +504 +505 +506 +507 +508 +509 +510 +511 +512 +513 +514 +515 +516 +517 +518 +519 +520 +521 +522 +523 +524 +525 +526 +527 +528 +529 +530 +531 +532 +533 +534 +535 +536 +537 +538 +539 +540 +541 +542 +543 +544 +545 +546 +547
use std::collections::{BTreeMap, HashSet};
+use std::path::Path;
+use mscore::data::peptide::{PeptideIon, PeptideProductIonSeriesCollection};
+use mscore::timstof::collision::{TimsTofCollisionEnergy, TimsTofCollisionEnergyDIA};
+use mscore::timstof::quadrupole::{IonTransmission, TimsTransmissionDIA};
+use mscore::data::spectrum::{IndexedMzSpectrum, MsType, MzSpectrum};
+use mscore::simulation::annotation::{MzSpectrumAnnotated, TimsFrameAnnotated, TimsSpectrumAnnotated};
+use mscore::timstof::frame::TimsFrame;
+use mscore::timstof::spectrum::TimsSpectrum;
+
+use rayon::prelude::*;
+use rayon::ThreadPoolBuilder;
+
+use crate::sim::handle::TimsTofSyntheticsDataHandle;
+use crate::sim::precursor::{TimsTofSyntheticsPrecursorFrameBuilder};
+
+pub struct TimsTofSyntheticsFrameBuilderDIA {
+ pub path: String,
+ pub precursor_frame_builder: TimsTofSyntheticsPrecursorFrameBuilder,
+ pub transmission_settings: TimsTransmissionDIA,
+ pub fragmentation_settings: TimsTofCollisionEnergyDIA,
+ pub fragment_ions: Option<BTreeMap<(u32, i8, i8), (PeptideProductIonSeriesCollection, Vec<MzSpectrum>)>>,
+ pub fragment_ions_annotated: Option<BTreeMap<(u32, i8, i8), (PeptideProductIonSeriesCollection, Vec<MzSpectrumAnnotated>)>>,
+}
+
+impl TimsTofSyntheticsFrameBuilderDIA {
+ pub fn new(path: &Path, with_annotations: bool, num_threads: usize) -> rusqlite::Result<Self> {
+
+ let synthetics = TimsTofSyntheticsPrecursorFrameBuilder::new(path)?;
+ let handle = TimsTofSyntheticsDataHandle::new(path)?;
+
+ let fragment_ions = handle.read_fragment_ions()?;
+
+ // get collision energy settings per window group
+ let fragmentation_settings = handle.get_collision_energy_dia();
+ // get ion transmission settings per window group
+ let transmission_settings = handle.get_transmission_dia();
+
+ match with_annotations {
+ true => {
+ let fragment_ions = Some(TimsTofSyntheticsDataHandle::build_fragment_ions_annotated(&synthetics.peptides, &fragment_ions, num_threads));
+ Ok(Self {
+ path: path.to_str().unwrap().to_string(),
+ precursor_frame_builder: synthetics,
+ transmission_settings,
+ fragmentation_settings,
+ fragment_ions: None,
+ fragment_ions_annotated: fragment_ions,
+ })
+ }
+
+ false => {
+ let fragment_ions = Some(TimsTofSyntheticsDataHandle::build_fragment_ions(&synthetics.peptides, &fragment_ions, num_threads));
+ Ok(Self {
+ path: path.to_str().unwrap().to_string(),
+ precursor_frame_builder: synthetics,
+ transmission_settings,
+ fragmentation_settings,
+ fragment_ions,
+ fragment_ions_annotated: None,
+ })
+ }
+ }
+ }
+
+ /// Build a frame for DIA synthetic experiment
+ ///
+ /// # Arguments
+ ///
+ /// * `frame_id` - The frame id
+ /// * `fragmentation` - A boolean indicating if fragmentation is enabled, if false, the frame has same mz distribution as the precursor frame but will be quadrupole filtered
+ ///
+ /// # Returns
+ ///
+ /// A TimsFrame
+ ///
+ pub fn build_frame(&self, frame_id: u32, fragmentation: bool, mz_noise_precursor: bool, uniform: bool, precursor_noise_ppm: f64, mz_noise_fragment: bool, fragment_noise_ppm: f64, right_drag: bool) -> TimsFrame {
+ // determine if the frame is a precursor frame
+ match self.precursor_frame_builder.precursor_frame_id_set.contains(&frame_id) {
+ true => self.build_ms1_frame(frame_id, mz_noise_precursor, uniform, precursor_noise_ppm, right_drag),
+ false => self.build_ms2_frame(frame_id, fragmentation, mz_noise_fragment, uniform, fragment_noise_ppm, right_drag),
+ }
+ }
+
+ pub fn build_frame_annotated(&self, frame_id: u32, fragmentation: bool, mz_noise_precursor: bool, uniform: bool, precursor_noise_ppm: f64, mz_noise_fragment: bool, fragment_noise_ppm: f64, right_drag: bool) -> TimsFrameAnnotated {
+ match self.precursor_frame_builder.precursor_frame_id_set.contains(&frame_id) {
+ true => self.build_ms1_frame_annotated(frame_id, mz_noise_precursor, uniform, precursor_noise_ppm, right_drag),
+ false => self.build_ms2_frame_annotated(frame_id, fragmentation, mz_noise_fragment, uniform, fragment_noise_ppm, right_drag),
+ }
+ }
+
+ pub fn get_fragment_ion_ids(&self, precursor_frame_ids: Vec<u32>) -> Vec<u32> {
+ let mut peptide_ids: HashSet<u32> = HashSet::new();
+ // get all peptide ids for the precursor frame ids
+ for frame_id in precursor_frame_ids {
+ for (peptide_id, peptide) in self.precursor_frame_builder.peptides.iter() {
+ if peptide.frame_start <= frame_id && peptide.frame_end >= frame_id {
+ peptide_ids.insert(*peptide_id);
+ }
+ }
+ }
+ // get all ion ids for the peptide ids
+ let mut result: Vec<u32> = Vec::new();
+ for item in peptide_ids {
+ let ions = self.precursor_frame_builder.ions.get(&item).unwrap();
+ for ion in ions.iter() {
+ result.push(ion.ion_id);
+ }
+ }
+ result
+ }
+
+ pub fn build_frames(&self, frame_ids: Vec<u32>, fragmentation: bool, mz_noise_precursor: bool, uniform: bool, precursor_noise_ppm: f64, mz_noise_fragment: bool, fragment_noise_ppm: f64, right_drag: bool, num_threads: usize) -> Vec<TimsFrame> {
+
+ let thread_pool = ThreadPoolBuilder::new().num_threads(num_threads).build().unwrap();
+ let mut tims_frames: Vec<TimsFrame> = Vec::new();
+
+ thread_pool.install(|| {
+ tims_frames = frame_ids.par_iter().map(|frame_id| self.build_frame(*frame_id, fragmentation, mz_noise_precursor, uniform, precursor_noise_ppm, mz_noise_fragment, fragment_noise_ppm, right_drag)).collect();
+ });
+
+ tims_frames.sort_by(|a, b| a.frame_id.cmp(&b.frame_id));
+
+ tims_frames
+ }
+
+ pub fn build_frames_annotated(&self, frame_ids: Vec<u32>, fragmentation: bool, mz_noise_precursor: bool, uniform: bool, precursor_noise_ppm: f64, mz_noise_fragment: bool, fragment_noise_ppm: f64, right_drag: bool, num_threads: usize) -> Vec<TimsFrameAnnotated> {
+
+ let thread_pool = ThreadPoolBuilder::new().num_threads(num_threads).build().unwrap();
+ let mut tims_frames: Vec<TimsFrameAnnotated> = Vec::new();
+
+ thread_pool.install(|| {
+ tims_frames = frame_ids.par_iter().map(|frame_id| self.build_frame_annotated(*frame_id, fragmentation, mz_noise_precursor, uniform, precursor_noise_ppm, mz_noise_fragment, fragment_noise_ppm, right_drag)).collect();
+ });
+
+ tims_frames.sort_by(|a, b| a.frame_id.cmp(&b.frame_id));
+
+ tims_frames
+ }
+
+ fn build_ms1_frame(&self, frame_id: u32, mz_noise_precursor: bool, uniform: bool, precursor_ppm: f64, right_drag: bool) -> TimsFrame {
+ let mut tims_frame = self.precursor_frame_builder.build_precursor_frame(frame_id, mz_noise_precursor, uniform, precursor_ppm, right_drag);
+ let intensities_rounded = tims_frame.ims_frame.intensity.iter().map(|x| x.round()).collect::<Vec<_>>();
+ tims_frame.ims_frame.intensity = intensities_rounded;
+ tims_frame
+ }
+
+ fn build_ms1_frame_annotated(&self, frame_id: u32, mz_noise_precursor: bool, uniform: bool, precursor_ppm: f64, right_drag: bool) -> TimsFrameAnnotated {
+ let mut tims_frame = self.precursor_frame_builder.build_precursor_frame_annotated(frame_id, mz_noise_precursor, uniform, precursor_ppm, right_drag);
+ let intensities_rounded = tims_frame.intensity.iter().map(|x| x.round()).collect::<Vec<_>>();
+ tims_frame.intensity = intensities_rounded;
+ tims_frame
+ }
+
+ fn build_ms2_frame(&self, frame_id: u32, fragmentation: bool, mz_noise_fragment: bool, uniform: bool, fragment_ppm: f64, right_drag: bool) -> TimsFrame {
+ match fragmentation {
+ false => {
+ let mut frame = self.transmission_settings.transmit_tims_frame(&self.build_ms1_frame(frame_id, mz_noise_fragment, uniform, fragment_ppm, right_drag), None);
+ let intensities_rounded = frame.ims_frame.intensity.iter().map(|x| x.round()).collect::<Vec<_>>();
+ frame.ims_frame.intensity = intensities_rounded;
+ frame.ms_type = MsType::FragmentDia;
+ frame
+ },
+ true => {
+ let mut frame = self.build_fragment_frame(frame_id, &self.fragment_ions.as_ref().unwrap(), mz_noise_fragment, uniform, fragment_ppm, None, None, None, Some(right_drag));
+ let intensities_rounded = frame.ims_frame.intensity.iter().map(|x| x.round()).collect::<Vec<_>>();
+ frame.ims_frame.intensity = intensities_rounded;
+ frame
+ },
+ }
+ }
+
+ fn build_ms2_frame_annotated(&self, frame_id: u32, fragmentation: bool, mz_noise_fragment: bool, uniform: bool, fragment_ppm: f64, right_drag: bool) -> TimsFrameAnnotated {
+ match fragmentation {
+ false => {
+ let mut frame = self.transmission_settings.transmit_tims_frame_annotated(&self.build_ms1_frame_annotated(frame_id, mz_noise_fragment, uniform, fragment_ppm, right_drag), None);
+ let intensities_rounded = frame.intensity.iter().map(|x| x.round()).collect::<Vec<_>>();
+ frame.intensity = intensities_rounded;
+ frame.ms_type = MsType::FragmentDia;
+ frame
+ },
+ true => {
+ let mut frame = self.build_fragment_frame_annotated(frame_id, &self.fragment_ions_annotated.as_ref().unwrap(), mz_noise_fragment, uniform, fragment_ppm, None, None, None, Some(right_drag));
+ let intensities_rounded = frame.intensity.iter().map(|x| x.round()).collect::<Vec<_>>();
+ frame.intensity = intensities_rounded;
+ frame
+ },
+ }
+ }
+
+ /// Build a fragment frame
+ ///
+ /// # Arguments
+ ///
+ /// * `frame_id` - The frame id
+ /// * `mz_min` - The minimum m/z value in fragment spectrum
+ /// * `mz_max` - The maximum m/z value in fragment spectrum
+ /// * `intensity_min` - The minimum intensity value in fragment spectrum
+ ///
+ /// # Returns
+ ///
+ /// A TimsFrame
+ ///
+ fn build_fragment_frame(
+ &self,
+ frame_id: u32,
+ fragment_ions: &BTreeMap<(u32, i8, i8), (PeptideProductIonSeriesCollection, Vec<MzSpectrum>)>,
+ mz_noise_fragment: bool,
+ uniform: bool,
+ fragment_ppm: f64,
+ mz_min: Option<f64>,
+ mz_max: Option<f64>,
+ intensity_min: Option<f64>,
+ right_drag: Option<bool>,
+ ) -> TimsFrame {
+
+ // check frame id
+ let ms_type = match self.precursor_frame_builder.precursor_frame_id_set.contains(&frame_id) {
+ false => MsType::FragmentDia,
+ true => MsType::Unknown,
+ };
+
+ let mut tims_spectra: Vec<TimsSpectrum> = Vec::new();
+
+ // Frame might not have any peptides
+ if !self.precursor_frame_builder.frame_to_abundances.contains_key(&frame_id) {
+ return TimsFrame::new(
+ frame_id as i32,
+ ms_type.clone(),
+ *self.precursor_frame_builder.frame_to_rt.get(&frame_id).unwrap() as f64,
+ vec![],
+ vec![],
+ vec![],
+ vec![],
+ vec![],
+ );
+ }
+
+ // Get the peptide ids and abundances for the frame, should now save to unwrap since we checked if the frame is in the map
+ let (peptide_ids, frame_abundances) = self.precursor_frame_builder.frame_to_abundances.get(&frame_id).unwrap();
+
+ // Go over all peptides in the frame with their respective abundances
+ for (peptide_id, frame_abundance) in peptide_ids.iter().zip(frame_abundances.iter()) {
+
+ // jump to next peptide if the peptide_id is not in the peptide_to_ions map
+ if !self.precursor_frame_builder.peptide_to_ions.contains_key(&peptide_id) {
+ continue;
+ }
+
+ // get all the ions for the peptide
+ let (ion_abundances, scan_occurrences, scan_abundances, charges, spectra) = self.precursor_frame_builder.peptide_to_ions.get(&peptide_id).unwrap();
+
+ for (index, ion_abundance) in ion_abundances.iter().enumerate() {
+ // occurrence and abundance of the ion in the scan
+ let all_scan_occurrence = scan_occurrences.get(index).unwrap();
+ let all_scan_abundance = scan_abundances.get(index).unwrap();
+
+ // get precursor spectrum for the ion
+ let spectrum = spectra.get(index).unwrap();
+
+ // go over occurrence and abundance of the ion in the scan
+ for (scan, scan_abundance) in all_scan_occurrence.iter().zip(all_scan_abundance.iter()) {
+
+ // first, check if precursor is transmitted
+ if !self.transmission_settings.any_transmitted(frame_id as i32, *scan as i32, &spectrum.mz, None) {
+ continue;
+ }
+
+ // calculate abundance factor
+ let total_events = self.precursor_frame_builder.peptide_to_events.get(&peptide_id).unwrap();
+ let fraction_events = frame_abundance * scan_abundance * ion_abundance * total_events;
+
+ // get collision energy for the ion
+ let collision_energy = self.fragmentation_settings.get_collision_energy(frame_id as i32, *scan as i32);
+ let collision_energy_quantized = (collision_energy * 1e3).round() as i8;
+
+ // get charge state for the ion
+ let charge_state = charges.get(index).unwrap();
+ // extract fragment ions for the peptide, charge state and collision energy
+ let maybe_value = fragment_ions.get(&(*peptide_id, *charge_state, collision_energy_quantized));
+
+ // jump to next peptide if the fragment_ions is None (can this happen?)
+ if maybe_value.is_none() {
+ continue;
+ }
+
+ // for each fragment ion series, create a spectrum and add it to the tims_spectra
+ for fragment_ion_series in maybe_value.unwrap().1.iter() {
+ let scaled_spec = fragment_ion_series.clone() * fraction_events as f64;
+ let right_drag = right_drag.unwrap_or(false);
+
+ let mz_spectrum = if mz_noise_fragment {
+ match uniform {
+ true => scaled_spec.add_mz_noise_uniform(fragment_ppm, right_drag),
+ false => scaled_spec.add_mz_noise_normal(fragment_ppm),
+ }
+ } else {
+ scaled_spec
+ };
+
+ tims_spectra.push(
+ TimsSpectrum::new(
+ frame_id as i32,
+ *scan as i32,
+ *self.precursor_frame_builder.frame_to_rt.get(&frame_id).unwrap() as f64,
+ *self.precursor_frame_builder.scan_to_mobility.get(&scan).unwrap() as f64,
+ ms_type.clone(),
+ IndexedMzSpectrum::new(vec![0; mz_spectrum.mz.len()], mz_spectrum.mz, mz_spectrum.intensity).filter_ranged(
+ 100.0,
+ 1700.0,
+ 1.0,
+ 1e9,
+ ),
+ )
+ );
+ }
+ }
+ }
+ }
+
+ if tims_spectra.is_empty() {
+ return TimsFrame::new(
+ frame_id as i32,
+ ms_type.clone(),
+ *self.precursor_frame_builder.frame_to_rt.get(&frame_id).unwrap() as f64,
+ vec![],
+ vec![],
+ vec![],
+ vec![],
+ vec![],
+ );
+ }
+
+ let tims_frame = TimsFrame::from_tims_spectra(tims_spectra);
+ tims_frame.filter_ranged(
+ mz_min.unwrap_or(100.0),
+ mz_max.unwrap_or(1700.0),
+ 0,
+ 1000,
+ 0.0,
+ 10.0,
+ intensity_min.unwrap_or(1.0),
+ 1e9,
+ )
+ }
+
+ pub fn build_fragment_frame_annotated(
+ &self,
+ frame_id: u32,
+ fragment_ions: &BTreeMap<(u32, i8, i8), (PeptideProductIonSeriesCollection, Vec<MzSpectrumAnnotated>)>,
+ mz_noise_fragment: bool,
+ uniform: bool,
+ fragment_ppm: f64,
+ mz_min: Option<f64>,
+ mz_max: Option<f64>,
+ intensity_min: Option<f64>,
+ right_drag: Option<bool>,
+ ) -> TimsFrameAnnotated {
+ let ms_type = match self.precursor_frame_builder.precursor_frame_id_set.contains(&frame_id) {
+ false => MsType::FragmentDia,
+ true => MsType::Unknown,
+ };
+
+ let mut tims_spectra: Vec<TimsSpectrumAnnotated> = Vec::new();
+
+ if !self.precursor_frame_builder.frame_to_abundances.contains_key(&frame_id) {
+ return TimsFrameAnnotated::new(
+ frame_id as i32,
+ *self.precursor_frame_builder.frame_to_rt.get(&frame_id).unwrap() as f64,
+ ms_type.clone(),
+ vec![],
+ vec![],
+ vec![],
+ vec![],
+ vec![],
+ vec![],
+ );
+ }
+
+ let (peptide_ids, frame_abundances) = self.precursor_frame_builder.frame_to_abundances.get(&frame_id).unwrap();
+
+ for (peptide_id, frame_abundance) in peptide_ids.iter().zip(frame_abundances.iter()) {
+ if !self.precursor_frame_builder.peptide_to_ions.contains_key(&peptide_id) {
+ continue;
+ }
+
+ let (ion_abundances, scan_occurrences, scan_abundances, charges, _) = self.precursor_frame_builder.peptide_to_ions.get(&peptide_id).unwrap();
+
+ for (index, ion_abundance) in ion_abundances.iter().enumerate() {
+ let all_scan_occurrence = scan_occurrences.get(index).unwrap();
+ let all_scan_abundance = scan_abundances.get(index).unwrap();
+
+ let peptide = self.precursor_frame_builder.peptides.get(peptide_id).unwrap();
+ let ion = PeptideIon::new(peptide.sequence.sequence.clone(), charges[index] as i32, *ion_abundance as f64, Some(*peptide_id as i32));
+ // TODO: make this configurable
+ let spectrum = ion.calculate_isotopic_spectrum_annotated(1e-3, 1e-8, 200, 1e-4);
+
+ for (scan, scan_abundance) in all_scan_occurrence.iter().zip(all_scan_abundance.iter()) {
+ if !self.transmission_settings.any_transmitted(frame_id as i32, *scan as i32, &spectrum.mz, None) {
+ continue;
+ }
+
+ let total_events = self.precursor_frame_builder.peptide_to_events.get(&peptide_id).unwrap();
+ let fraction_events = frame_abundance * scan_abundance * ion_abundance * total_events;
+
+ let collision_energy = self.fragmentation_settings.get_collision_energy(frame_id as i32, *scan as i32);
+ let collision_energy_quantized = (collision_energy * 1e3).round() as i8;
+
+ let charge_state = charges.get(index).unwrap();
+ let maybe_value = fragment_ions.get(&(*peptide_id, *charge_state, collision_energy_quantized));
+
+ if maybe_value.is_none() {
+ continue;
+ }
+
+ for fragment_ion_series in maybe_value.unwrap().1.iter() {
+ let scaled_spec = fragment_ion_series.clone() * fraction_events as f64;
+ let right_drag = right_drag.unwrap_or(false);
+
+ let mz_spectrum = if mz_noise_fragment {
+ match uniform {
+ true => scaled_spec.add_mz_noise_uniform(fragment_ppm, right_drag),
+ false => scaled_spec.add_mz_noise_normal(fragment_ppm),
+ }
+ } else {
+ scaled_spec
+ };
+
+ tims_spectra.push(
+ TimsSpectrumAnnotated::new(
+ frame_id as i32,
+ *scan,
+ *self.precursor_frame_builder.frame_to_rt.get(&frame_id).unwrap() as f64,
+ *self.precursor_frame_builder.scan_to_mobility.get(&scan).unwrap() as f64,
+ ms_type.clone(),
+ vec![0; mz_spectrum.mz.len()],
+ mz_spectrum)
+ );
+ }
+ }
+ }
+ }
+
+ if tims_spectra.is_empty() {
+ return TimsFrameAnnotated::new(
+ frame_id as i32,
+ *self.precursor_frame_builder.frame_to_rt.get(&frame_id).unwrap() as f64,
+ ms_type.clone(),
+ vec![],
+ vec![],
+ vec![],
+ vec![],
+ vec![],
+ vec![],
+ );
+ }
+
+ let tims_frame = TimsFrameAnnotated::from_tims_spectra_annotated(tims_spectra);
+
+ tims_frame.filter_ranged(
+ mz_min.unwrap_or(100.0),
+ mz_max.unwrap_or(1700.0),
+ 0.0,
+ 10.0,
+ 0,
+ 1000,
+ intensity_min.unwrap_or(1.0),
+ 1e9,
+ )
+ }
+
+ pub fn get_ion_transmission_matrix(&self, peptide_id: u32, charge: i8, include_precursor_frames: bool) -> Vec<Vec<f32>> {
+
+ let maybe_peptide_sim = self.precursor_frame_builder.peptides.get(&peptide_id);
+
+ let mut frame_ids = match maybe_peptide_sim {
+ Some(maybe_peptide_sim) => maybe_peptide_sim.frame_distribution.occurrence.clone(),
+ _ => vec![]
+ };
+
+ if !include_precursor_frames {
+ frame_ids = frame_ids.iter().filter(|frame_id| !self.precursor_frame_builder.precursor_frame_id_set.contains(frame_id)).cloned().collect();
+ }
+
+ let ion = self.precursor_frame_builder.ions.get(&peptide_id).unwrap().iter().find(|ion| ion.charge == charge).unwrap();
+ let spectrum = ion.simulated_spectrum.clone();
+ let scan_distribution = &ion.scan_distribution;
+
+ let mut transmission_matrix = vec![vec![0.0; frame_ids.len()]; scan_distribution.occurrence.len()];
+
+ for (frame_index, frame) in frame_ids.iter().enumerate() {
+ for (scan_index, scan) in scan_distribution.occurrence.iter().enumerate() {
+ if self.transmission_settings.all_transmitted(*frame as i32, *scan as i32, &spectrum.mz, None) {
+ transmission_matrix[scan_index][frame_index] = 1.0;
+ }
+ else if self.transmission_settings.any_transmitted(*frame as i32, *scan as i32, &spectrum.mz, None) {
+ let transmitted_spectrum = self.transmission_settings.transmit_spectrum(*frame as i32, *scan as i32, spectrum.clone(), None);
+ let percentage_transmitted = transmitted_spectrum.intensity.iter().sum::<f64>() / spectrum.intensity.iter().sum::<f64>();
+ transmission_matrix[scan_index][frame_index] = percentage_transmitted as f32;
+ }
+ }
+ }
+
+ transmission_matrix
+ }
+
+ pub fn count_number_transmissions(&self, peptide_id: u32, charge: i8) -> (usize, usize) {
+ let frame_ids: Vec<_> = self.precursor_frame_builder.peptides.get(&peptide_id).unwrap().frame_distribution.occurrence.clone().iter().filter(|frame_id| !self.precursor_frame_builder.precursor_frame_id_set.contains(frame_id)).cloned().collect();
+ let ion = self.precursor_frame_builder.ions.get(&peptide_id).unwrap().iter().find(|ion| ion.charge == charge).unwrap();
+ let spectrum = ion.simulated_spectrum.clone();
+ let scan_distribution = &ion.scan_distribution;
+
+ let mut frame_count = 0;
+ let mut scan_count = 0;
+
+ for frame in frame_ids.iter() {
+ let mut frame_transmitted = false;
+ for scan in scan_distribution.occurrence.iter() {
+ if self.transmission_settings.any_transmitted(*frame as i32, *scan as i32, &spectrum.mz, None) {
+ frame_transmitted = true;
+ scan_count += 1;
+ }
+ }
+ if frame_transmitted {
+ frame_count += 1;
+ }
+ }
+
+ (frame_count, scan_count)
+ }
+
+ pub fn count_number_transmissions_parallel(&self, peptide_ids: Vec<u32>, charge: Vec<i8>, num_threads: usize) -> Vec<(usize, usize)> {
+
+ let thread_pool = ThreadPoolBuilder::new().num_threads(num_threads).build().unwrap();
+ let result: Vec<(usize, usize)> = thread_pool.install(|| {
+ peptide_ids.par_iter().zip(charge.par_iter()).map(|(peptide_id, charge)| self.count_number_transmissions(*peptide_id, *charge)).collect()
+ });
+
+ result
+ }
+}
+
+impl TimsTofCollisionEnergy for TimsTofSyntheticsFrameBuilderDIA {
+ fn get_collision_energy(&self, frame_id: i32, scan_id: i32) -> f64 {
+ self.fragmentation_settings.get_collision_energy(frame_id, scan_id)
+ }
+}
+1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +50 +51 +52 +53 +54 +55 +56 +57 +58 +59 +60 +61 +62 +63 +64 +65 +66 +67 +68 +69 +70 +71 +72 +73 +74 +75 +76 +77 +78 +79 +80 +81 +82 +83 +84 +85 +86 +87 +88 +89 +90 +91 +92 +93 +94 +95 +96 +97 +98 +99 +100 +101 +102 +103 +104 +105 +106 +107 +108 +109 +110 +111 +112 +113 +114 +115 +116 +117 +118 +119 +120 +121 +122 +123 +124 +125 +126 +127 +128 +129 +130 +131 +132 +133 +134 +135 +136 +137 +138 +139 +140 +141 +142 +143 +144 +145 +146 +147 +148 +149 +150 +151 +152 +153 +154 +155 +156 +157 +158 +159 +160 +161 +162 +163 +164 +165 +166 +167 +168 +169 +170 +171 +172 +173 +174 +175 +176 +177 +178 +179 +180 +181 +182 +183 +184 +185 +186 +187 +188 +189 +190 +191 +192 +193 +194 +195 +196 +197 +198 +199 +200 +201 +202 +203 +204 +205 +206 +207 +208 +209 +210 +211 +212 +213 +214 +215 +216 +217 +218 +219 +220 +221 +222 +223 +224 +225 +226 +227 +228 +229 +230 +231 +232 +233 +234 +235 +236 +237 +238 +239 +240 +241 +242 +243 +244 +245 +246 +247 +248 +249 +250 +251 +252 +253 +254 +255 +256 +257 +258 +259 +260 +261 +262 +263 +264 +265 +266 +267 +268 +269 +270 +271 +272 +273 +274 +275 +276 +277 +278 +279 +280 +281 +282 +283 +284 +285 +286 +287 +288 +289 +290 +291 +292 +293 +294 +295 +296 +297 +298 +299 +300 +301 +302 +303 +304 +305 +306 +307 +308 +309 +310 +311 +312 +313 +314 +315 +316 +317 +318 +319 +320 +321 +322 +323 +324 +325 +326 +327 +328 +329 +330 +331 +332 +333 +334 +335 +336 +337 +338 +339 +340 +341 +342 +343 +344 +345 +346 +347 +348 +349 +350 +351 +352 +353 +354 +355 +356 +357 +358 +359 +360 +361 +362 +363 +364 +365 +366 +367 +368 +369 +370 +371 +372 +373 +374 +375 +376 +377 +378 +379 +380 +381 +382 +383 +384 +385 +386 +387 +388 +389 +390 +391 +392 +393 +394 +395 +396 +397 +398 +399 +400 +401 +402 +403 +404 +405 +406 +407 +408 +409 +410 +411 +412 +413 +414 +415 +416 +417 +418 +419 +420 +421 +422 +423 +424 +425 +426 +427 +428 +429 +430 +431 +432 +433 +434 +435 +436 +437 +438 +439 +440 +441 +442 +443 +444 +445 +446 +447 +448 +449 +450 +451 +452 +453 +454 +455 +456 +457 +458 +459 +460 +461 +462 +463 +464 +465 +466 +467 +468 +469 +470 +471 +472 +473 +474 +475 +476 +477 +478 +479 +480 +481 +482 +483 +484 +485 +486 +487 +488 +489 +490 +491 +492
use std::collections::{BTreeMap, BTreeSet, HashSet};
+use std::path::Path;
+use mscore::data::peptide::{FragmentType, PeptideProductIonSeriesCollection, PeptideSequence};
+use mscore::timstof::collision::{TimsTofCollisionEnergy, TimsTofCollisionEnergyDIA};
+use mscore::timstof::quadrupole::{IonTransmission, TimsTransmissionDIA};
+use mscore::data::spectrum::{MsType, MzSpectrum};
+use mscore::simulation::annotation::MzSpectrumAnnotated;
+use rusqlite::Connection;
+use crate::sim::containers::{FragmentIonSim, FramesSim, FrameToWindowGroupSim, IonSim, PeptidesSim, ScansSim, SignalDistribution, WindowGroupSettingsSim};
+use rayon::prelude::*;
+use rayon::ThreadPoolBuilder;
+
+#[derive(Debug)]
+pub struct TimsTofSyntheticsDataHandle {
+ pub connection: Connection,
+}
+
+impl TimsTofSyntheticsDataHandle {
+ pub fn new(path: &Path) -> rusqlite::Result<Self> {
+ let connection = Connection::open(path)?;
+ Ok(Self { connection })
+ }
+
+ pub fn read_frames(&self) -> rusqlite::Result<Vec<FramesSim>> {
+ let mut stmt = self.connection.prepare("SELECT * FROM frames")?;
+ let frames_iter = stmt.query_map([], |row| {
+ Ok(FramesSim::new(
+ row.get(0)?,
+ row.get(1)?,
+ row.get(2)?,
+ ))
+ })?;
+ let mut frames = Vec::new();
+ for frame in frames_iter {
+ frames.push(frame?);
+ }
+ Ok(frames)
+ }
+
+ pub fn read_scans(&self) -> rusqlite::Result<Vec<ScansSim>> {
+ let mut stmt = self.connection.prepare("SELECT * FROM scans")?;
+ let scans_iter = stmt.query_map([], |row| {
+ Ok(ScansSim::new(
+ row.get(0)?,
+ row.get(1)?,
+ ))
+ })?;
+ let mut scans = Vec::new();
+ for scan in scans_iter {
+ scans.push(scan?);
+ }
+ Ok(scans)
+ }
+ pub fn read_peptides(&self) -> rusqlite::Result<Vec<PeptidesSim>> {
+ let mut stmt = self.connection.prepare("SELECT * FROM peptides")?;
+ let peptides_iter = stmt.query_map([], |row| {
+ let frame_occurrence_str: String = row.get(15)?;
+ let frame_abundance_str: String = row.get(16)?;
+
+ let frame_occurrence: Vec<u32> = match serde_json::from_str(&frame_occurrence_str) {
+ Ok(value) => value,
+ Err(e) => return Err(rusqlite::Error::FromSqlConversionFailure(
+ 15,
+ rusqlite::types::Type::Text,
+ Box::new(e),
+ )),
+ };
+
+ // if the frame abundance is not available, set it to 0
+ let frame_abundance: Vec<f32> = match serde_json::from_str(&frame_abundance_str) {
+ Ok(value) => value,
+ Err(_e) => vec![0.0; frame_occurrence.len()],
+ };
+
+ let frame_distribution = SignalDistribution::new(
+ 0.0, 0.0, 0.0, frame_occurrence, frame_abundance);
+
+ Ok(PeptidesSim {
+ protein_id: row.get(0)?,
+ peptide_id: row.get(1)?,
+ sequence: PeptideSequence::new(row.get(2)?, row.get(1)?),
+ proteins: row.get(3)?,
+ decoy: row.get(4)?,
+ missed_cleavages: row.get(5)?,
+ n_term: row.get(6)?,
+ c_term: row.get(7)?,
+ mono_isotopic_mass: row.get(8)?,
+ retention_time: row.get(9)?,
+ events: row.get(10)?,
+ frame_start: row.get(13)?,
+ frame_end: row.get(14)?,
+ frame_distribution,
+ })
+ })?;
+ let mut peptides = Vec::new();
+ for peptide in peptides_iter {
+ peptides.push(peptide?);
+ }
+ Ok(peptides)
+ }
+
+ pub fn read_ions(&self) -> rusqlite::Result<Vec<IonSim>> {
+ let mut stmt = self.connection.prepare("SELECT * FROM ions")?;
+ let ions_iter = stmt.query_map([], |row| {
+ let simulated_spectrum_str: String = row.get(6)?;
+ let scan_occurrence_str: String = row.get(8)?;
+ let scan_abundance_str: String = row.get(9)?;
+
+ let simulated_spectrum: MzSpectrum = match serde_json::from_str(&simulated_spectrum_str) {
+ Ok(value) => value,
+ Err(e) => return Err(rusqlite::Error::FromSqlConversionFailure(
+ 6,
+ rusqlite::types::Type::Text,
+ Box::new(e),
+ )),
+ };
+
+ let scan_occurrence: Vec<u32> = match serde_json::from_str(&scan_occurrence_str) {
+ Ok(value) => value,
+ Err(e) => return Err(rusqlite::Error::FromSqlConversionFailure(
+ 8,
+ rusqlite::types::Type::Text,
+ Box::new(e),
+ )),
+ };
+
+ let scan_abundance: Vec<f32> = match serde_json::from_str(&scan_abundance_str) {
+ Ok(value) => value,
+ Err(e) => return Err(rusqlite::Error::FromSqlConversionFailure(
+ 9,
+ rusqlite::types::Type::Text,
+ Box::new(e),
+ )),
+ };
+
+ Ok(IonSim::new(
+ row.get(0)?,
+ row.get(1)?,
+ row.get(2)?,
+ row.get(3)?,
+ row.get(4)?,
+ row.get(5)?,
+ simulated_spectrum,
+ scan_occurrence,
+ scan_abundance,
+ ))
+ })?;
+ let mut ions = Vec::new();
+ for ion in ions_iter {
+ ions.push(ion?);
+ }
+ Ok(ions)
+ }
+
+ pub fn read_window_group_settings(&self) -> rusqlite::Result<Vec<WindowGroupSettingsSim>> {
+ let mut stmt = self.connection.prepare("SELECT * FROM dia_ms_ms_windows")?;
+ let window_group_settings_iter = stmt.query_map([], |row| {
+ Ok(WindowGroupSettingsSim::new(
+ row.get(0)?,
+ row.get(1)?,
+ row.get(2)?,
+ row.get(3)?,
+ row.get(4)?,
+ row.get(5)?,
+ ))
+ })?;
+ let mut window_group_settings = Vec::new();
+ for window_group_setting in window_group_settings_iter {
+ window_group_settings.push(window_group_setting?);
+ }
+ Ok(window_group_settings)
+ }
+
+ pub fn read_frame_to_window_group(&self) -> rusqlite::Result<Vec<FrameToWindowGroupSim>> {
+ let mut stmt = self.connection.prepare("SELECT * FROM dia_ms_ms_info")?;
+ let frame_to_window_group_iter = stmt.query_map([], |row| {
+ Ok(FrameToWindowGroupSim::new(
+ row.get(0)?,
+ row.get(1)?,
+ ))
+ })?;
+
+ let mut frame_to_window_groups: Vec<FrameToWindowGroupSim> = Vec::new();
+ for frame_to_window_group in frame_to_window_group_iter {
+ frame_to_window_groups.push(frame_to_window_group?);
+ }
+
+ Ok(frame_to_window_groups)
+ }
+
+ pub fn read_fragment_ions(&self) -> rusqlite::Result<Vec<FragmentIonSim>> {
+
+ let mut stmt = self.connection.prepare("SELECT * FROM fragment_ions")?;
+
+ let fragment_ion_sim_iter = stmt.query_map([], |row| {
+ let indices_string: String = row.get(4)?;
+ let values_string: String = row.get(5)?;
+
+ let indices: Vec<u32> = match serde_json::from_str(&indices_string) {
+ Ok(value) => value,
+ Err(e) => return Err(rusqlite::Error::FromSqlConversionFailure(
+ 4,
+ rusqlite::types::Type::Text,
+ Box::new(e),
+ )),
+ };
+
+ let values: Vec<f64> = match serde_json::from_str(&values_string) {
+ Ok(value) => value,
+ Err(e) => return Err(rusqlite::Error::FromSqlConversionFailure(
+ 5,
+ rusqlite::types::Type::Text,
+ Box::new(e),
+ )),
+ };
+
+ Ok(FragmentIonSim::new(
+ row.get(0)?,
+ row.get(1)?,
+ row.get(2)?,
+ row.get(3)?,
+ indices,
+ values,
+ ))
+ })?;
+
+ let mut fragment_ion_sim = Vec::new();
+ for fragment_ion in fragment_ion_sim_iter {
+ fragment_ion_sim.push(fragment_ion?);
+ }
+
+ Ok(fragment_ion_sim)
+ }
+
+ pub fn get_transmission_dia(&self) -> TimsTransmissionDIA {
+ let frame_to_window_group = self.read_frame_to_window_group().unwrap();
+ let window_group_settings = self.read_window_group_settings().unwrap();
+
+ TimsTransmissionDIA::new(
+ frame_to_window_group.iter().map(|x| x.frame_id as i32).collect(),
+ frame_to_window_group.iter().map(|x| x.window_group as i32).collect(),
+ window_group_settings.iter().map(|x| x.window_group as i32).collect(),
+ window_group_settings.iter().map(|x| x.scan_start as i32).collect(),
+ window_group_settings.iter().map(|x| x.scan_end as i32).collect(),
+ window_group_settings.iter().map(|x| x.isolation_mz as f64).collect(),
+ window_group_settings.iter().map(|x| x.isolation_width as f64).collect(),
+ None,
+ )
+ }
+
+ pub fn get_collision_energy_dia(&self) -> TimsTofCollisionEnergyDIA {
+ let frame_to_window_group = self.read_frame_to_window_group().unwrap();
+ let window_group_settings = self.read_window_group_settings().unwrap();
+
+ TimsTofCollisionEnergyDIA::new(
+ frame_to_window_group.iter().map(|x| x.frame_id as i32).collect(),
+ frame_to_window_group.iter().map(|x| x.window_group as i32).collect(),
+ window_group_settings.iter().map(|x| x.window_group as i32).collect(),
+ window_group_settings.iter().map(|x| x.scan_start as i32).collect(),
+ window_group_settings.iter().map(|x| x.scan_end as i32).collect(),
+ window_group_settings.iter().map(|x| x.collision_energy as f64).collect(),
+ )
+ }
+
+ fn ion_map_fn(
+ ion: IonSim,
+ peptide_map: &BTreeMap<u32, PeptidesSim>,
+ precursor_frames: &HashSet<u32>,
+ transmission: &TimsTransmissionDIA,
+ collision_energy: &TimsTofCollisionEnergyDIA) -> BTreeSet<(u32, u32, String, i8, i32)> {
+
+ let peptide = peptide_map.get(&ion.peptide_id).unwrap();
+ let mut ret_tree: BTreeSet<(u32, u32, String, i8, i32)> = BTreeSet::new();
+
+ // go over all frames the ion occurs in
+ for frame in peptide.frame_distribution.occurrence.iter() {
+
+ // only consider fragment frames
+ if !precursor_frames.contains(frame) {
+
+ // go over all scans the ion occurs in
+ for scan in &ion.scan_distribution.occurrence {
+ // check transmission for all precursor ion peaks of the isotopic envelope
+
+ let precursor_spec = &ion.simulated_spectrum;
+
+ if transmission.any_transmitted(*frame as i32, *scan as i32, &precursor_spec.mz, Some(0.5)) {
+
+ let collision_energy = collision_energy.get_collision_energy(*frame as i32, *scan as i32);
+ let quantized_energy = (collision_energy * 100.0).round() as i32;
+
+ ret_tree.insert((ion.peptide_id, ion.ion_id, peptide.sequence.sequence.clone(), ion.charge, quantized_energy));
+ }
+ }
+ }
+ }
+ ret_tree
+ }
+
+ // TODO: take isotopic envelope into account
+ pub fn get_transmitted_ions(&self, num_threads: usize) -> (Vec<i32>, Vec<i32>, Vec<String>, Vec<i8>, Vec<f32>) {
+
+ let thread_pool = ThreadPoolBuilder::new().num_threads(num_threads).build().unwrap(); // create a thread pool
+ let peptides = self.read_peptides().unwrap();
+ let peptide_map = TimsTofSyntheticsDataHandle::build_peptide_map(&peptides);
+ let precursor_frames = TimsTofSyntheticsDataHandle::build_precursor_frame_id_set(&self.read_frames().unwrap());
+ let transmission = self.get_transmission_dia();
+ let collision_energy = self.get_collision_energy_dia();
+
+ let ions = self.read_ions().unwrap();
+
+ let trees = thread_pool.install(|| { ions.par_iter().map(|ion| {
+ TimsTofSyntheticsDataHandle::ion_map_fn(ion.clone(), &peptide_map, &precursor_frames, &transmission, &collision_energy)
+ }).collect::<Vec<_>>()
+ });
+
+ let mut ret_tree: BTreeSet<(u32, u32, String, i8, i32)> = BTreeSet::new();
+ for tree in trees {
+ ret_tree.extend(tree);
+ }
+
+ let mut ret_peptide_id = Vec::new();
+ let mut ret_ion_id = Vec::new();
+ let mut ret_sequence = Vec::new();
+ let mut ret_charge = Vec::new();
+ let mut ret_energy = Vec::new();
+
+ for (peptide_id, ion_id, sequence, charge, energy) in ret_tree {
+ ret_peptide_id.push(peptide_id as i32);
+ ret_ion_id.push(ion_id as i32);
+ ret_sequence.push(sequence);
+ ret_charge.push(charge);
+ ret_energy.push(energy as f32 / 100.0);
+ }
+
+ (ret_peptide_id, ret_ion_id, ret_sequence, ret_charge, ret_energy)
+ }
+
+ /// Method to build a map from peptide id to ions
+ pub fn build_peptide_to_ion_map(ions: &Vec<IonSim>) -> BTreeMap<u32, Vec<IonSim>> {
+ let mut ion_map = BTreeMap::new();
+ for ion in ions.iter() {
+ let ions = ion_map.entry(ion.peptide_id).or_insert_with(Vec::new);
+ ions.push(ion.clone());
+ }
+ ion_map
+ }
+
+ /// Method to build a map from peptide id to events (absolute number of events in the simulation)
+ pub fn build_peptide_map(peptides: &Vec<PeptidesSim>) -> BTreeMap<u32, PeptidesSim> {
+ let mut peptide_map = BTreeMap::new();
+ for peptide in peptides.iter() {
+ peptide_map.insert(peptide.peptide_id, peptide.clone());
+ }
+ peptide_map
+ }
+
+ /// Method to build a set of precursor frame ids, can be used to check if a frame is a precursor frame
+ pub fn build_precursor_frame_id_set(frames: &Vec<FramesSim>) -> HashSet<u32> {
+ frames.iter().filter(|frame| frame.parse_ms_type() == MsType::Precursor)
+ .map(|frame| frame.frame_id)
+ .collect()
+ }
+
+ // Method to build a map from peptide id to events (absolute number of events in the simulation)
+ pub fn build_peptide_to_events(peptides: &Vec<PeptidesSim>) -> BTreeMap<u32, f32> {
+ let mut peptide_to_events = BTreeMap::new();
+ for peptide in peptides.iter() {
+ peptide_to_events.insert(peptide.peptide_id, peptide.events);
+ }
+ peptide_to_events
+ }
+
+ // Method to build a map from frame id to retention time
+ pub fn build_frame_to_rt(frames: &Vec<FramesSim>) -> BTreeMap<u32, f32> {
+ let mut frame_to_rt = BTreeMap::new();
+ for frame in frames.iter() {
+ frame_to_rt.insert(frame.frame_id, frame.time);
+ }
+ frame_to_rt
+ }
+
+ // Method to build a map from scan id to mobility
+ pub fn build_scan_to_mobility(scans: &Vec<ScansSim>) -> BTreeMap<u32, f32> {
+ let mut scan_to_mobility = BTreeMap::new();
+ for scan in scans.iter() {
+ scan_to_mobility.insert(scan.scan, scan.mobility);
+ }
+ scan_to_mobility
+ }
+ pub fn build_frame_to_abundances(peptides: &Vec<PeptidesSim>) -> BTreeMap<u32, (Vec<u32>, Vec<f32>)> {
+ let mut frame_to_abundances = BTreeMap::new();
+
+ for peptide in peptides.iter() {
+ let peptide_id = peptide.peptide_id;
+ let frame_occurrence = peptide.frame_distribution.occurrence.clone();
+ let frame_abundance = peptide.frame_distribution.abundance.clone();
+
+ for (frame_id, abundance) in frame_occurrence.iter().zip(frame_abundance.iter()) {
+ let (occurrences, abundances) = frame_to_abundances.entry(*frame_id).or_insert((vec![], vec![]));
+ occurrences.push(peptide_id);
+ abundances.push(*abundance);
+ }
+ }
+
+ frame_to_abundances
+ }
+ pub fn build_peptide_to_ions(ions: &Vec<IonSim>) -> BTreeMap<u32, (Vec<f32>, Vec<Vec<u32>>, Vec<Vec<f32>>, Vec<i8>, Vec<MzSpectrum>)> {
+ let mut peptide_to_ions = BTreeMap::new();
+
+ for ion in ions.iter() {
+ let peptide_id = ion.peptide_id;
+ let abundance = ion.relative_abundance;
+ let scan_occurrence = ion.scan_distribution.occurrence.clone();
+ let scan_abundance = ion.scan_distribution.abundance.clone();
+ let charge = ion.charge;
+ let spectrum = ion.simulated_spectrum.clone();
+
+ let (abundances, scan_occurrences, scan_abundances, charges, spectra) = peptide_to_ions.entry(peptide_id).or_insert((vec![], vec![], vec![], vec![], vec![]));
+ abundances.push(abundance);
+ scan_occurrences.push(scan_occurrence);
+ scan_abundances.push(scan_abundance);
+ charges.push(charge);
+ spectra.push(spectrum);
+ }
+
+ peptide_to_ions
+ }
+ pub fn build_fragment_ions(peptides_sim: &BTreeMap<u32, PeptidesSim>, fragment_ions: &Vec<FragmentIonSim>, num_threads: usize) -> BTreeMap<(u32, i8, i8), (PeptideProductIonSeriesCollection, Vec<MzSpectrum>)> {
+
+ let thread_pool = ThreadPoolBuilder::new().num_threads(num_threads).build().unwrap();
+ let fragment_ion_map = thread_pool.install(|| {
+ fragment_ions.par_iter()
+ .map(|fragment_ion| {
+ let key = (fragment_ion.peptide_id, fragment_ion.charge, (fragment_ion.collision_energy * 1e3).round() as i8);
+
+ let value = peptides_sim.get(&fragment_ion.peptide_id).unwrap().sequence.associate_with_predicted_intensities(
+ fragment_ion.charge as i32,
+ FragmentType::B,
+ fragment_ion.to_dense(174),
+ true,
+ true,
+ );
+
+ let fragment_ions: Vec<MzSpectrum> = value.peptide_ions.par_iter().map(|ion_series| {
+ ion_series.generate_isotopic_spectrum(
+ 1e-2,
+ 1e-3,
+ 100,
+ 1e-5,
+ )
+ }).collect();
+ (key, (value, fragment_ions))
+ })
+ .collect::<BTreeMap<_, _>>() // Collect the results into a BTreeMap
+ });
+
+ fragment_ion_map
+ }
+
+ pub fn build_fragment_ions_annotated(peptides_sim: &BTreeMap<u32, PeptidesSim>, fragment_ions: &Vec<FragmentIonSim>, num_threads: usize) -> BTreeMap<(u32, i8, i8), (PeptideProductIonSeriesCollection, Vec<MzSpectrumAnnotated>)> {
+
+ let thread_pool = ThreadPoolBuilder::new().num_threads(num_threads).build().unwrap();
+ let fragment_ion_map = thread_pool.install(|| {
+ fragment_ions.par_iter()
+ .map(|fragment_ion| {
+ let key = (fragment_ion.peptide_id, fragment_ion.charge, (fragment_ion.collision_energy * 1e3).round() as i8);
+
+ let value = peptides_sim.get(&fragment_ion.peptide_id).unwrap().sequence.associate_with_predicted_intensities(
+ fragment_ion.charge as i32,
+ FragmentType::B,
+ fragment_ion.to_dense(174),
+ true,
+ true,
+ );
+
+ let fragment_ions: Vec<MzSpectrumAnnotated> = value.peptide_ions.par_iter().map(|ion_series| {
+ ion_series.generate_isotopic_spectrum_annotated(
+ 1e-2,
+ 1e-3,
+ 100,
+ 1e-5,
+ )
+ }).collect();
+ (key, (value, fragment_ions))
+ })
+ .collect::<BTreeMap<_, _>>() // Collect the results into a BTreeMap
+ });
+
+ fragment_ion_map
+ }
+}
+1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +50 +51 +52 +53 +54 +55 +56 +57 +58 +59 +60 +61 +62 +63 +64 +65 +66 +67 +68 +69 +70 +71 +72 +73 +74 +75 +76 +77 +78 +79 +80 +81 +82 +83 +84 +85 +86 +87 +88 +89 +90 +91 +92 +93 +94 +95 +96 +97 +98 +99 +100 +101 +102 +103 +104 +105 +106 +107 +108 +109 +110 +111 +112 +113 +114 +115 +116 +117 +118 +119 +120 +121 +122 +123 +124 +125 +126 +127 +128 +129 +130 +131 +132 +133 +134 +135 +136 +137 +138 +139 +140 +141 +142 +143 +144 +145 +146 +147 +148 +149 +150 +151 +152 +153 +154 +155 +156 +157 +158 +159 +160 +161 +162 +163 +164 +165 +166 +167 +168 +169 +170 +171 +172 +173 +174 +175 +176 +177 +178 +179 +180 +181 +182 +183 +184 +185 +186 +187 +188 +189 +190 +191 +192 +193 +194 +195 +196 +197 +198 +199 +200 +201 +202 +203 +204 +205 +206 +207 +208 +209 +210 +211 +212 +213 +214 +215 +216 +217 +218 +219 +220 +221 +222 +223 +224 +225 +226 +227 +228 +229 +230 +231 +232 +233 +234 +235 +236 +237 +238 +239 +240 +241 +242 +243 +244 +245 +246 +247 +248 +249 +250 +251 +252 +253 +254 +255 +256 +257 +258 +259 +260 +261 +262 +263 +264 +265 +266 +267 +268 +269 +270 +271 +272 +273 +274 +275 +276 +277 +278 +279 +280 +281 +282 +283 +284
use std::collections::{BTreeMap, HashSet};
+use mscore::timstof::frame::TimsFrame;
+use mscore::timstof::spectrum::TimsSpectrum;
+use mscore::data::spectrum::{MzSpectrum, MsType, IndexedMzSpectrum};
+use rusqlite::{Result};
+use std::path::Path;
+use mscore::data::peptide::PeptideIon;
+use mscore::simulation::annotation::{MzSpectrumAnnotated, PeakAnnotation, TimsFrameAnnotated, TimsSpectrumAnnotated};
+
+use rayon::prelude::*;
+use rayon::ThreadPoolBuilder;
+use crate::sim::containers::{FramesSim, IonSim, PeptidesSim, ScansSim};
+use crate::sim::handle::TimsTofSyntheticsDataHandle;
+
+pub struct TimsTofSyntheticsPrecursorFrameBuilder {
+ pub ions: BTreeMap<u32, Vec<IonSim>>,
+ pub peptides: BTreeMap<u32, PeptidesSim>,
+ pub scans: Vec<ScansSim>,
+ pub frames: Vec<FramesSim>,
+ pub precursor_frame_id_set: HashSet<u32>,
+ pub frame_to_abundances: BTreeMap<u32, (Vec<u32>, Vec<f32>)>,
+ pub peptide_to_ions: BTreeMap<u32, (Vec<f32>, Vec<Vec<u32>>, Vec<Vec<f32>>, Vec<i8>, Vec<MzSpectrum>)>,
+ pub frame_to_rt: BTreeMap<u32, f32>,
+ pub scan_to_mobility: BTreeMap<u32, f32>,
+ pub peptide_to_events: BTreeMap<u32, f32>,
+}
+
+impl TimsTofSyntheticsPrecursorFrameBuilder {
+ /// Create a new instance of TimsTofSynthetics
+ ///
+ /// # Arguments
+ ///
+ /// * `path` - A reference to a Path
+ ///
+ /// # Returns
+ ///
+ /// * A Result containing the TimsTofSynthetics instance
+ ///
+ pub fn new(path: &Path) -> Result<Self> {
+ let handle = TimsTofSyntheticsDataHandle::new(path)?;
+ let ions = handle.read_ions()?;
+ let peptides = handle.read_peptides()?;
+ let scans = handle.read_scans()?;
+ let frames = handle.read_frames()?;
+ Ok(Self {
+ ions: TimsTofSyntheticsDataHandle::build_peptide_to_ion_map(&ions),
+ peptides: TimsTofSyntheticsDataHandle::build_peptide_map(&peptides),
+ scans: scans.clone(),
+ frames: frames.clone(),
+ precursor_frame_id_set: TimsTofSyntheticsDataHandle::build_precursor_frame_id_set(&frames),
+ frame_to_abundances: TimsTofSyntheticsDataHandle::build_frame_to_abundances(&peptides),
+ peptide_to_ions: TimsTofSyntheticsDataHandle::build_peptide_to_ions(&ions),
+ frame_to_rt: TimsTofSyntheticsDataHandle::build_frame_to_rt(&frames),
+ scan_to_mobility: TimsTofSyntheticsDataHandle::build_scan_to_mobility(&scans),
+ peptide_to_events: TimsTofSyntheticsDataHandle::build_peptide_to_events(&peptides),
+ })
+ }
+
+ /// Build a precursor frame
+ ///
+ /// # Arguments
+ ///
+ /// * `frame_id` - A u32 representing the frame id
+ ///
+ /// # Returns
+ ///
+ /// * A TimsFrame instance
+ pub fn build_precursor_frame(&self, frame_id: u32, mz_noise_precursor: bool, uniform: bool, precursor_noise_ppm: f64, right_drag: bool) -> TimsFrame {
+
+ let ms_type = match self.precursor_frame_id_set.contains(&frame_id) {
+ true => MsType::Precursor,
+ false => MsType::Unknown,
+ };
+
+ let mut tims_spectra: Vec<TimsSpectrum> = Vec::new();
+
+ // Frame might not have any peptides
+ if !self.frame_to_abundances.contains_key(&frame_id) {
+ return TimsFrame::new(
+ frame_id as i32,
+ ms_type.clone(),
+ *self.frame_to_rt.get(&frame_id).unwrap() as f64,
+ vec![],
+ vec![],
+ vec![],
+ vec![],
+ vec![],
+ );
+ }
+ // Get the peptide ids and abundances for the frame, should now save to unwrap since we checked if the frame is in the map
+ let (peptide_ids, abundances) = self.frame_to_abundances.get(&frame_id).unwrap();
+
+ // go over all peptides and their abundances in the frame
+ for (peptide_id, abundance) in peptide_ids.iter().zip(abundances.iter()) {
+ // jump to next peptide if the peptide_id is not in the peptide_to_ions map
+ if !self.peptide_to_ions.contains_key(&peptide_id) {
+ continue;
+ }
+
+ // one peptide can have multiple ions, occurring in multiple scans
+ let (ion_abundances, scan_occurrences, scan_abundances, _, spectra) = self.peptide_to_ions.get(&peptide_id).unwrap();
+
+ for (index, ion_abundance) in ion_abundances.iter().enumerate() {
+ let scan_occurrence = scan_occurrences.get(index).unwrap();
+ let scan_abundance = scan_abundances.get(index).unwrap();
+ let spectrum = spectra.get(index).unwrap();
+
+ for (scan, scan_abu) in scan_occurrence.iter().zip(scan_abundance.iter()) {
+ let abundance_factor = abundance * ion_abundance * scan_abu * self.peptide_to_events.get(&peptide_id).unwrap();
+ let scan_id = *scan;
+ let scaled_spec: MzSpectrum = spectrum.clone() * abundance_factor as f64;
+
+ let mz_spectrum = if mz_noise_precursor {
+ match uniform {
+ true => scaled_spec.add_mz_noise_uniform(precursor_noise_ppm, right_drag),
+ false => scaled_spec.add_mz_noise_normal(precursor_noise_ppm),
+ }
+ } else {
+ scaled_spec
+ };
+
+ let tims_spec = TimsSpectrum::new(
+ frame_id as i32,
+ *scan as i32,
+ *self.frame_to_rt.get(&frame_id).unwrap() as f64,
+ *self.scan_to_mobility.get(&scan_id).unwrap() as f64,
+ ms_type.clone(),
+ IndexedMzSpectrum::new(vec![0; mz_spectrum.mz.len()], mz_spectrum.mz, mz_spectrum.intensity),
+ );
+ tims_spectra.push(tims_spec);
+ }
+ }
+ }
+
+ let tims_frame = TimsFrame::from_tims_spectra(tims_spectra);
+
+ tims_frame.filter_ranged(
+ 0.0,
+ 10000.0,
+ 0,
+ 2000,
+ 0.0,
+ 10.0,
+ 1.0,
+ 1e9,
+ )
+ }
+
+ /// Build a collection of precursor frames in parallel
+ ///
+ /// # Arguments
+ ///
+ /// * `frame_ids` - A vector of u32 representing the frame ids
+ /// * `num_threads` - A usize representing the number of threads
+ ///
+ /// # Returns
+ ///
+ /// * A vector of TimsFrame instances
+ ///
+ pub fn build_precursor_frames(&self, frame_ids: Vec<u32>, mz_noise_precursor: bool, uniform: bool, precursor_noise_ppm: f64, right_drag: bool, num_threads: usize) -> Vec<TimsFrame> {
+ let thread_pool = ThreadPoolBuilder::new().num_threads(num_threads).build().unwrap();
+ let mut tims_frames: Vec<TimsFrame> = Vec::new();
+
+ thread_pool.install(|| {
+ tims_frames = frame_ids.par_iter().map(|frame_id| self.build_precursor_frame(*frame_id, mz_noise_precursor, uniform, precursor_noise_ppm, right_drag)).collect();
+ });
+
+ tims_frames.sort_by(|a, b| a.frame_id.cmp(&b.frame_id));
+
+ tims_frames
+ }
+
+ pub fn build_precursor_frame_annotated(&self, frame_id: u32, mz_noise_precursor: bool, uniform: bool, precursor_noise_ppm: f64, right_drag: bool) -> TimsFrameAnnotated {
+
+ let ms_type = match self.precursor_frame_id_set.contains(&frame_id) {
+ true => MsType::Precursor,
+ false => MsType::Unknown,
+ };
+
+ // no peptides in the frame
+ if !self.frame_to_abundances.contains_key(&frame_id) {
+ return TimsFrameAnnotated::new(
+ frame_id as i32,
+ 0.0,
+ ms_type.clone(),
+ vec![],
+ vec![],
+ vec![],
+ vec![],
+ vec![],
+ vec![],
+ )
+ }
+
+ let (peptide_ids, abundances) = self.frame_to_abundances.get(&frame_id).unwrap();
+ let mut tims_spectra: Vec<TimsSpectrumAnnotated> = Vec::new();
+
+ for (peptide_id, abundance) in peptide_ids.iter().zip(abundances.iter()) {
+ // jump to next peptide if the peptide_id is not in the peptide_to_ions map
+ if !self.peptide_to_ions.contains_key(&peptide_id) {
+ continue;
+ }
+
+ let (ion_abundances, scan_occurrences, scan_abundances, charges, _) = self.peptide_to_ions.get(&peptide_id).unwrap();
+
+ for (index, ion_abundance) in ion_abundances.iter().enumerate() {
+ let scan_occurrence = scan_occurrences.get(index).unwrap();
+ let scan_abundance = scan_abundances.get(index).unwrap();
+ let charge = charges.get(index).unwrap();
+ let peptide = self.peptides.get(peptide_id).unwrap();
+ let ion = PeptideIon::new(peptide.sequence.sequence.clone(), *charge as i32, *ion_abundance as f64, Some(*peptide_id as i32));
+ // TODO: make this configurable
+ let spectrum = ion.calculate_isotopic_spectrum_annotated(1e-3, 1e-8, 200, 1e-4);
+
+ for (scan, scan_abu) in scan_occurrence.iter().zip(scan_abundance.iter()) {
+ let abundance_factor = abundance * ion_abundance * scan_abu * self.peptide_to_events.get(&peptide_id).unwrap();
+ let scan_id = *scan;
+ let scaled_spec: MzSpectrumAnnotated = spectrum.clone() * abundance_factor as f64;
+
+ let mz_spectrum = if mz_noise_precursor {
+ match uniform {
+ true => scaled_spec.add_mz_noise_uniform(precursor_noise_ppm, right_drag),
+ false => scaled_spec.add_mz_noise_normal(precursor_noise_ppm),
+ }
+ } else {
+ scaled_spec
+ };
+
+ let tims_spec = TimsSpectrumAnnotated::new(
+ frame_id as i32,
+ *scan,
+ *self.frame_to_rt.get(&frame_id).unwrap() as f64,
+ *self.scan_to_mobility.get(&scan_id).unwrap() as f64,
+ ms_type.clone(),
+ vec![0; mz_spectrum.mz.len()],
+ mz_spectrum);
+ tims_spectra.push(tims_spec);
+ }
+ }
+ }
+
+ let tims_frame = TimsFrameAnnotated::from_tims_spectra_annotated(tims_spectra);
+
+ let filtered_frame = tims_frame.filter_ranged(
+ 0.0,
+ 2000.0,
+ 0.0,
+ 2.0,
+ 0,
+ 1000,
+ 1.0,
+ 1e9,
+ );
+
+ TimsFrameAnnotated {
+ frame_id: filtered_frame.frame_id,
+ retention_time: filtered_frame.retention_time,
+ ms_type: filtered_frame.ms_type,
+ tof: filtered_frame.tof,
+ mz: filtered_frame.mz,
+ scan: filtered_frame.scan,
+ inv_mobility: filtered_frame.inv_mobility,
+ intensity: filtered_frame.intensity,
+ annotations: filtered_frame.annotations.iter().map(|x| {
+ let mut contributions = x.contributions.clone();
+ contributions.sort_by(|a, b| a.intensity_contribution.partial_cmp(&b.intensity_contribution).unwrap());
+ PeakAnnotation { contributions, ..*x }
+ }).collect::<Vec<PeakAnnotation>>(),
+ }
+ }
+
+ pub fn build_precursor_frames_annotated(&self, frame_ids: Vec<u32>, mz_noise_precursor: bool, uniform: bool, precursor_noise_ppm: f64, right_drag: bool, num_threads: usize) -> Vec<TimsFrameAnnotated> {
+ let thread_pool = ThreadPoolBuilder::new().num_threads(num_threads).build().unwrap();
+ let mut tims_frames: Vec<TimsFrameAnnotated> = Vec::new();
+
+ thread_pool.install(|| {
+ tims_frames = frame_ids.par_iter().map(|frame_id| self.build_precursor_frame_annotated(*frame_id, mz_noise_precursor, uniform, precursor_noise_ppm, right_drag)).collect();
+ });
+
+ tims_frames.sort_by(|a, b| a.frame_id.cmp(&b.frame_id));
+
+ tims_frames
+ }
+}
+1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +50 +51 +52 +53 +54 +55 +56 +57 +58 +59 +60 +61 +62 +63 +64 +65 +66 +67 +68 +69 +70 +71 +72 +73 +74 +75 +76 +77 +78 +79 +80 +81 +82 +83 +84 +85 +86 +87 +88 +89 +90 +91 +92
use mscore::data::peptide::{FragmentType, PeptideSequence};
+
+use rayon::prelude::*;
+use rayon::ThreadPoolBuilder;
+use serde_json::to_string;
+
+/// helper function to reshape the flat prosit predicted intensity array into a 3D array where:
+/// 1st dimension: 29 rows for every potential ion since prosit allows precursor sequences up to 30 amino acids
+/// 2nd dimension: 2 columns for B and Y ions
+/// 3rd dimension: 3 channels for charge 1, 2, and 3
+///
+/// # Arguments
+///
+/// * `array` - A vector of f64 representing the flat prosit array
+///
+/// # Returns
+///
+/// * A 3D vector of f64 representing the reshaped prosit array
+///
+pub fn reshape_prosit_array(array: Vec<f64>) -> Vec<Vec<Vec<f64>>> {
+ let mut array_return: Vec<Vec<Vec<f64>>> = vec![vec![vec![0.0; 3]; 2]; 29];
+ let mut ptr = 0;
+
+ for c in 0..3 {
+ for row in 0..29 {
+ // Fill in the Y ion values
+ array_return[row][0][c] = array[ptr];
+ ptr += 1;
+ }
+ for row in 0..29 {
+ // Fill in the B ion values
+ array_return[row][1][c] = array[ptr];
+ ptr += 1;
+ }
+ }
+
+ array_return
+}
+
+/// helper function to convert a peptide ion to all possible ions and serialize the result to a json string
+///
+/// # Arguments
+///
+/// * `sequence` - A string representing the peptide sequence
+/// * `charge` - An i32 representing the charge
+/// * `intensity_pred_flat` - A vector of f64 representing the flat prosit predicted intensity array
+/// * `normalize` - A bool indicating whether to normalize the intensity values
+/// * `half_charge_one` - A bool indicating whether to use half charge one
+///
+/// # Returns
+///
+/// * A json string representing the peptide ions ready to pe put into a database
+///
+pub fn sequence_to_all_ions(
+ sequence: &str,
+ charge: i32,
+ intensity_pred_flat: &Vec<f64>, // Assuming this is the reshaped intensity predictions array
+ normalize: bool,
+ half_charge_one: bool,
+ peptide_id: Option<i32>,
+) -> String {
+
+ let peptide_sequence = PeptideSequence::new(sequence.to_string(), peptide_id);
+ let fragments = peptide_sequence.associate_with_predicted_intensities(
+ charge,
+ FragmentType::B,
+ intensity_pred_flat.clone(),
+ normalize,
+ half_charge_one
+ );
+ to_string(&fragments).unwrap()
+}
+
+pub fn sequence_to_all_ions_par(
+ sequences: Vec<&str>,
+ charges: Vec<i32>,
+ intensities_pred_flat: Vec<Vec<f64>>,
+ normalize: bool,
+ half_charge_one: bool,
+ num_threads: usize,
+ peptide_ids: Vec<Option<i32>>
+) -> Vec<String> {
+ let thread_pool = ThreadPoolBuilder::new().num_threads(num_threads).build().unwrap();
+
+ let result = thread_pool.install(|| {
+ sequences.par_iter().zip(charges.par_iter()).zip(intensities_pred_flat.par_iter()).zip(peptide_ids.par_iter())
+ .map(|(((seq, charge), intensities), peptide_id)| sequence_to_all_ions(seq, *charge, intensities, normalize, half_charge_one, *peptide_id))
+ .collect()
+ });
+
+ result
+}
+
+1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +50 +51 +52 +53 +54 +55 +56 +57 +58 +59 +60 +61 +62 +63 +64 +65 +66 +67 +68 +69 +70 +71 +72 +73 +74 +75 +76 +77 +78 +79 +80 +81 +82 +83 +84 +85 +86 +87 +88 +89 +90 +91 +92 +93 +94 +95 +96 +97 +98 +99 +100 +101 +102 +103 +104 +105 +106 +107 +108 +109 +110 +111 +112 +113 +114 +115 +116 +117 +118 +119 +120 +121 +122 +123 +124 +125 +126 +127 +128 +129 +130 +131 +132 +133 +134 +135 +136 +137 +138 +139 +140 +141 +142 +143 +144 +145 +146 +147 +148 +149 +150 +151 +152 +153 +154 +155 +156 +157 +158 +159 +160 +161 +162 +163 +164 +165 +166 +167 +168 +169 +170 +171 +172 +173 +174 +175 +176 +177 +178 +179 +180 +181
use std::collections::{BTreeMap, HashMap};
+use crate::chemistry::element::{atoms_isotopic_weights, isotopic_abundance};
+
+/// convolve two distributions of masses and abundances
+///
+/// Arguments:
+///
+/// * `dist_a` - first distribution of masses and abundances
+/// * `dist_b` - second distribution of masses and abundances
+/// * `mass_tolerance` - mass tolerance for combining peaks
+/// * `abundance_threshold` - minimum abundance for a peak to be included in the result
+/// * `max_results` - maximum number of peaks to include in the result
+///
+/// Returns:
+///
+/// * `Vec<(f64, f64)>` - combined distribution of masses and abundances
+///
+/// # Examples
+///
+/// ```
+/// use rustms::algorithm::isotope::convolve;
+///
+/// let dist_a = vec![(100.0, 0.5), (101.0, 0.5)];
+/// let dist_b = vec![(100.0, 0.5), (101.0, 0.5)];
+/// let result = convolve(&dist_a, &dist_b, 1e-6, 1e-12, 200);
+/// assert_eq!(result, vec![(200.0, 0.25), (201.0, 0.5), (202.0, 0.25)]);
+/// ```
+pub fn convolve(dist_a: &Vec<(f64, f64)>, dist_b: &Vec<(f64, f64)>, mass_tolerance: f64, abundance_threshold: f64, max_results: usize) -> Vec<(f64, f64)> {
+
+ let mut result: Vec<(f64, f64)> = Vec::new();
+
+ for (mass_a, abundance_a) in dist_a {
+ for (mass_b, abundance_b) in dist_b {
+ let combined_mass = mass_a + mass_b;
+ let combined_abundance = abundance_a * abundance_b;
+
+ // Skip entries with combined abundance below the threshold
+ if combined_abundance < abundance_threshold {
+ continue;
+ }
+
+ // Insert or update the combined mass in the result distribution
+ if let Some(entry) = result.iter_mut().find(|(m, _)| (*m - combined_mass).abs() < mass_tolerance) {
+ entry.1 += combined_abundance;
+ } else {
+ result.push((combined_mass, combined_abundance));
+ }
+ }
+ }
+
+ // Sort by abundance (descending) to prepare for trimming
+ result.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap());
+
+ // Trim the vector if it exceeds max_results
+ if result.len() > max_results {
+ result.truncate(max_results);
+ }
+
+ // Optionally, sort by mass if needed for further processing
+ result.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap());
+
+ result
+}
+
+/// convolve a distribution with itself n times
+///
+/// Arguments:
+///
+/// * `dist` - distribution of masses and abundances
+/// * `n` - number of times to convolve the distribution with itself
+///
+/// Returns:
+///
+/// * `Vec<(f64, f64)>` - distribution of masses and abundances
+///
+/// # Examples
+///
+/// ```
+/// use rustms::algorithm::isotope::convolve_pow;
+///
+/// let dist = vec![(100.0, 0.5), (101.0, 0.5)];
+/// let result = convolve_pow(&dist, 2);
+/// assert_eq!(result, vec![(200.0, 0.25), (201.0, 0.5), (202.0, 0.25)]);
+/// ```
+pub fn convolve_pow(dist: &Vec<(f64, f64)>, n: i32) -> Vec<(f64, f64)> {
+ if n == 0 {
+ return vec![(0.0, 1.0)]; // Return the delta distribution
+ }
+ if n == 1 {
+ return dist.clone();
+ }
+
+ let mut result = dist.clone();
+ let mut power = 2;
+
+ while power <= n {
+ result = convolve(&result, &result, 1e-6, 1e-12, 200); // Square the result to get the next power of 2
+ power *= 2;
+ }
+
+ // If n is not a power of 2, recursively fill in the remainder
+ if power / 2 < n {
+ result = convolve(&result, &convolve_pow(dist, n - power / 2, ), 1e-6, 1e-12, 200);
+ }
+
+ result
+}
+
+/// generate the isotope distribution for a given atomic composition
+///
+/// Arguments:
+///
+/// * `atomic_composition` - atomic composition of the peptide
+/// * `mass_tolerance` - mass tolerance for combining peaks
+/// * `abundance_threshold` - minimum abundance for a peak to be included in the result
+/// * `max_result` - maximum number of peaks to include in the result
+///
+/// Returns:
+///
+/// * `Vec<(f64, f64)>` - distribution of masses and abundances
+///
+/// # Examples
+///
+/// ```
+/// use std::collections::HashMap;
+/// use rustms::algorithm::isotope::generate_isotope_distribution;
+///
+/// let mut atomic_composition = HashMap::new();
+/// atomic_composition.insert("C".to_string(), 5);
+/// atomic_composition.insert("H".to_string(), 9);
+/// atomic_composition.insert("N".to_string(), 1);
+/// atomic_composition.insert("O".to_string(), 1);
+/// let result = generate_isotope_distribution(&atomic_composition, 1e-6, 1e-12, 200);
+/// ```
+pub fn generate_isotope_distribution(
+ atomic_composition: &HashMap<String, i32>,
+ mass_tolerance: f64,
+ abundance_threshold: f64,
+ max_result: i32
+) -> Vec<(f64, f64)> {
+
+ let mut cumulative_distribution: Option<Vec<(f64, f64)>> = None;
+ let atoms_isotopic_weights: HashMap<String, Vec<f64>> = atoms_isotopic_weights().iter().map(|(k, v)| (k.to_string(), v.clone())).collect();
+ let atomic_isotope_abundance: HashMap<String, Vec<f64>> = isotopic_abundance().iter().map(|(k, v)| (k.to_string(), v.clone())).collect();
+
+ for (element, &count) in atomic_composition.iter() {
+ let elemental_isotope_weights = atoms_isotopic_weights.get(element).expect("Element not found in isotopic weights table").clone();
+ let elemental_isotope_abundance = atomic_isotope_abundance.get(element).expect("Element not found in isotopic abundance table").clone();
+
+ let element_distribution: Vec<(f64, f64)> = elemental_isotope_weights.iter().zip(elemental_isotope_abundance.iter()).map(|(&mass, &abundance
+ )| (mass, abundance)).collect();
+
+ let element_power_distribution = if count > 1 {
+ convolve_pow(&element_distribution, count)
+ } else {
+ element_distribution
+ };
+
+ cumulative_distribution = match cumulative_distribution {
+ Some(cum_dist) => Some(convolve(&cum_dist, &element_power_distribution, mass_tolerance, abundance_threshold, max_result as usize)),
+ None => Some(element_power_distribution),
+ };
+ }
+
+ let final_distribution = cumulative_distribution.expect("Peptide has no elements");
+ // Normalize the distribution
+ let total_abundance: f64 = final_distribution.iter().map(|&(_, abundance)| abundance).sum();
+ let result: Vec<_> = final_distribution.into_iter().map(|(mass, abundance)| (mass, abundance / total_abundance)).collect();
+
+ let mut sort_map: BTreeMap<i64, f64> = BTreeMap::new();
+ let quantize = |mz: f64| -> i64 { (mz * 1_000_000.0).round() as i64 };
+
+ for (mz, intensity) in result {
+ let key = quantize(mz);
+ sort_map.entry(key).and_modify(|e| *e += intensity).or_insert(intensity);
+ }
+
+ let mz: Vec<f64> = sort_map.keys().map(|&key| key as f64 / 1_000_000.0).collect();
+ let intensity: Vec<f64> = sort_map.values().map(|&intensity| intensity).collect();
+ mz.iter().zip(intensity.iter()).map(|(&mz, &intensity)| (mz, intensity)).collect()
+}
+1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +50 +51 +52 +53 +54 +55 +56 +57 +58 +59 +60 +61 +62 +63 +64 +65 +66 +67 +68 +69 +70 +71 +72 +73 +74 +75 +76 +77 +78 +79 +80 +81 +82 +83 +84 +85 +86 +87 +88 +89 +90 +91 +92 +93 +94 +95 +96 +97 +98 +99 +100 +101 +102 +103 +104 +105 +106 +107 +108 +109 +110 +111 +112 +113 +114 +115 +116 +117 +118 +119 +120 +121 +122 +123 +124 +125 +126 +127 +128 +129 +130 +131 +132 +133 +134 +135 +136 +137 +138 +139 +140 +141 +142 +143 +144 +145 +146 +147 +148 +149 +150 +151 +152 +153 +154 +155 +156 +157 +158 +159 +160 +161 +162 +163 +164 +165 +166 +167 +168 +169 +170 +171 +172 +173 +174 +175 +176 +177 +178 +179 +180 +181 +182 +183 +184 +185 +186 +187 +188 +189 +190 +191 +192 +193 +194 +195 +196 +197 +198 +199 +200 +201 +202 +203 +204 +205 +206 +207 +208 +209 +210 +211 +212 +213 +214 +215 +216 +217 +218 +219 +220 +221 +222 +223 +224 +225 +226 +227 +228 +229 +230 +231 +232 +233 +234 +235 +236 +237 +238 +239 +240 +241 +242 +243 +244 +245 +246 +247 +248 +249 +250 +251 +252 +253 +254 +255 +256 +257 +258 +259 +260 +261 +262 +263 +264 +265 +266 +267 +268 +269 +270 +271 +272 +273 +274 +275 +276 +277 +278 +279 +280 +281 +282 +283 +284 +285 +286 +287 +288 +289 +290 +291 +292 +293 +294 +295 +296 +297 +298 +299 +300 +301 +302 +303 +304 +305
use std::collections::HashMap;
+use regex::Regex;
+use rayon::prelude::*;
+use rayon::ThreadPoolBuilder;
+use crate::chemistry::constants::{MASS_CO, MASS_NH3, MASS_PROTON, MASS_WATER};
+use crate::chemistry::formula::calculate_mz;
+use crate::chemistry::unimod::{modification_atomic_composition, unimod_modifications_mass_numerical};
+use crate::chemistry::utility::{find_unimod_patterns, unimod_sequence_to_tokens};
+use crate::proteomics::amino_acid::{amino_acid_composition, amino_acid_masses};
+use crate::proteomics::peptide::{FragmentType, PeptideProductIon, PeptideSequence};
+
+/// calculate the monoisotopic mass of a peptide sequence
+///
+/// Arguments:
+///
+/// * `sequence` - peptide sequence
+///
+/// Returns:
+///
+/// * `mass` - monoisotopic mass of the peptide
+///
+/// # Examples
+///
+/// ```
+/// use rustms::algorithm::peptide::calculate_peptide_mono_isotopic_mass;
+/// use rustms::proteomics::peptide::PeptideSequence;
+///
+/// let peptide_sequence = PeptideSequence::new("PEPTIDEH".to_string(), Some(1));
+/// let mass = calculate_peptide_mono_isotopic_mass(&peptide_sequence);
+/// let mass_quantized = (mass * 1e6).round() as i32;
+/// assert_eq!(mass_quantized, 936418877);
+/// ```
+pub fn calculate_peptide_mono_isotopic_mass(peptide_sequence: &PeptideSequence) -> f64 {
+ let amino_acid_masses = amino_acid_masses();
+ let modifications_mz_numerical = unimod_modifications_mass_numerical();
+ let pattern = Regex::new(r"\[UNIMOD:(\d+)]").unwrap();
+
+ let sequence = peptide_sequence.sequence.as_str();
+
+ // Find all occurrences of the pattern
+ let modifications: Vec<u32> = pattern
+ .find_iter(sequence)
+ .filter_map(|mat| mat.as_str()[8..mat.as_str().len() - 1].parse().ok())
+ .collect();
+
+ // Remove the modifications from the sequence
+ let sequence = pattern.replace_all(sequence, "");
+
+ // Count occurrences of each amino acid
+ let mut aa_counts = HashMap::new();
+ for char in sequence.chars() {
+ *aa_counts.entry(char).or_insert(0) += 1;
+ }
+
+ // Mass of amino acids and modifications
+ let mass_sequence: f64 = aa_counts.iter().map(|(aa, &count)| amino_acid_masses.get(&aa.to_string()[..]).unwrap_or(&0.0) * count as f64).sum();
+ let mass_modifications: f64 = modifications.iter().map(|&mod_id| modifications_mz_numerical.get(&mod_id).unwrap_or(&0.0)).sum();
+
+ mass_sequence + mass_modifications + MASS_WATER
+}
+
+/// calculate the monoisotopic mass of a peptide product ion for a given fragment type
+///
+/// Arguments:
+///
+/// * `sequence` - peptide sequence
+/// * `kind` - fragment type
+///
+/// Returns:
+///
+/// * `mass` - monoisotopic mass of the peptide
+///
+/// # Examples
+/// ```
+/// use rustms::algorithm::peptide::calculate_peptide_product_ion_mono_isotopic_mass;
+/// use rustms::proteomics::peptide::FragmentType;
+/// let sequence = "PEPTIDEH";
+/// let mass = calculate_peptide_product_ion_mono_isotopic_mass(sequence, FragmentType::Y);
+/// assert_eq!(mass, 936.4188766862999);
+/// ```
+pub fn calculate_peptide_product_ion_mono_isotopic_mass(sequence: &str, kind: FragmentType) -> f64 {
+
+ let (sequence, modifications) = find_unimod_patterns(sequence);
+
+ // Return mz of empty sequence
+ if sequence.is_empty() {
+ return 0.0;
+ }
+
+ let amino_acid_masses = amino_acid_masses();
+
+ // Add up raw amino acid masses and potential modifications
+ let mass_sequence: f64 = sequence.chars()
+ .map(|aa| amino_acid_masses.get(&aa.to_string()[..]).unwrap_or(&0.0))
+ .sum();
+
+ let mass_modifications: f64 = modifications.iter().sum();
+
+ // Calculate total mass
+ let mass = mass_sequence + mass_modifications + MASS_WATER;
+
+ let mass = match kind {
+ FragmentType::A => mass - MASS_CO - MASS_WATER,
+ FragmentType::B => mass - MASS_WATER,
+ FragmentType::C => mass + MASS_NH3 - MASS_WATER,
+ FragmentType::X => mass + MASS_CO - 2.0 * MASS_PROTON,
+ FragmentType::Y => mass,
+ FragmentType::Z => mass - MASS_NH3,
+ };
+
+ mass
+}
+
+/// calculate the monoisotopic m/z of a peptide product ion for a given fragment type and charge
+///
+/// Arguments:
+///
+/// * `sequence` - peptide sequence
+/// * `kind` - fragment type
+/// * `charge` - charge
+///
+/// Returns:
+///
+/// * `mz` - monoisotopic mass of the peptide
+///
+/// # Examples
+/// ```
+/// use rustms::algorithm::peptide::calculate_product_ion_mz;
+/// use rustms::chemistry::constants::MASS_PROTON;
+/// use rustms::proteomics::peptide::FragmentType;
+/// let sequence = "PEPTIDEH";
+/// let mz = calculate_product_ion_mz(sequence, FragmentType::Y, Some(1));
+/// assert_eq!(mz, 936.4188766862999 + MASS_PROTON);
+/// ```
+pub fn calculate_product_ion_mz(sequence: &str, kind: FragmentType, charge: Option<i32>) -> f64 {
+ let mass = calculate_peptide_product_ion_mono_isotopic_mass(sequence, kind);
+ calculate_mz(mass, charge.unwrap_or(1))
+}
+
+/// get a count dictionary of the amino acid composition of a peptide sequence
+///
+/// Arguments:
+///
+/// * `sequence` - peptide sequence
+///
+/// Returns:
+///
+/// * `composition` - a dictionary of amino acid composition
+///
+/// # Examples
+///
+/// ```
+/// use rustms::algorithm::peptide::calculate_amino_acid_composition;
+///
+/// let sequence = "PEPTIDEH";
+/// let composition = calculate_amino_acid_composition(sequence);
+/// assert_eq!(composition.get("P"), Some(&2));
+/// assert_eq!(composition.get("E"), Some(&2));
+/// assert_eq!(composition.get("T"), Some(&1));
+/// assert_eq!(composition.get("I"), Some(&1));
+/// assert_eq!(composition.get("D"), Some(&1));
+/// assert_eq!(composition.get("H"), Some(&1));
+/// ```
+pub fn calculate_amino_acid_composition(sequence: &str) -> HashMap<String, i32> {
+ let mut composition = HashMap::new();
+ for char in sequence.chars() {
+ *composition.entry(char.to_string()).or_insert(0) += 1;
+ }
+ composition
+}
+
+/// calculate the atomic composition of a peptide sequence
+pub fn peptide_sequence_to_atomic_composition(peptide_sequence: &PeptideSequence) -> HashMap<&'static str, i32> {
+
+ let token_sequence = unimod_sequence_to_tokens(peptide_sequence.sequence.as_str(), false);
+ let mut collection: HashMap<&'static str, i32> = HashMap::new();
+
+ // Assuming amino_acid_composition and modification_composition return appropriate mappings...
+ let aa_compositions = amino_acid_composition();
+ let mod_compositions = modification_atomic_composition();
+
+ // No need for conversion to HashMap<String, ...> as long as you're directly accessing
+ // the HashMap provided by modification_composition() if it uses String keys.
+ for token in token_sequence {
+ if token.len() == 1 {
+ let char = token.chars().next().unwrap();
+ if let Some(composition) = aa_compositions.get(&char) {
+ for (key, value) in composition.iter() {
+ *collection.entry(key).or_insert(0) += *value;
+ }
+ }
+ } else {
+ // Directly use &token without .as_str() conversion
+ if let Some(composition) = mod_compositions.get(&token) {
+ for (key, value) in composition.iter() {
+ *collection.entry(key).or_insert(0) += *value;
+ }
+ }
+ }
+ }
+
+ // Add water
+ *collection.entry("H").or_insert(0) += 2; //
+ *collection.entry("O").or_insert(0) += 1; //
+
+ collection
+}
+
+/// calculate the atomic composition of a product ion
+///
+/// Arguments:
+///
+/// * `product_ion` - a PeptideProductIon instance
+///
+/// Returns:
+///
+/// * `Vec<(&str, i32)>` - a vector of tuples representing the atomic composition of the product ion
+pub fn atomic_product_ion_composition(product_ion: &PeptideProductIon) -> Vec<(&str, i32)> {
+
+ let mut composition = peptide_sequence_to_atomic_composition(&product_ion.ion.sequence);
+
+ match product_ion.kind {
+ FragmentType::A => {
+ // A: peptide_mass - CO - Water
+ *composition.entry("H").or_insert(0) -= 2;
+ *composition.entry("O").or_insert(0) -= 2;
+ *composition.entry("C").or_insert(0) -= 1;
+ },
+ FragmentType::B => {
+ // B: peptide_mass - Water
+ *composition.entry("H").or_insert(0) -= 2;
+ *composition.entry("O").or_insert(0) -= 1;
+ },
+ FragmentType::C => {
+ // C: peptide_mass + NH3 - Water
+ *composition.entry("H").or_insert(0) += 1;
+ *composition.entry("N").or_insert(0) += 1;
+ *composition.entry("O").or_insert(0) -= 1;
+ },
+ FragmentType::X => {
+ // X: peptide_mass + CO
+ *composition.entry("C").or_insert(0) += 1; // Add 1 for CO
+ *composition.entry("O").or_insert(0) += 1; // Add 1 for CO
+ *composition.entry("H").or_insert(0) -= 2; // Subtract 2 for 2 protons
+ },
+ FragmentType::Y => {
+ ()
+ },
+ FragmentType::Z => {
+ *composition.entry("H").or_insert(0) -= 3;
+ *composition.entry("N").or_insert(0) -= 1;
+ },
+ }
+
+ composition.iter().map(|(k, v)| (*k, *v)).collect()
+}
+
+/// calculate the atomic composition of a peptide product ion series
+/// Arguments:
+///
+/// * `product_ions` - a vector of PeptideProductIon instances
+/// * `num_threads` - an usize representing the number of threads to use
+/// Returns:
+///
+/// * `Vec<Vec<(String, i32)>>` - a vector of vectors of tuples representing the atomic composition of each product ion
+///
+pub fn fragments_to_composition(product_ions: Vec<PeptideProductIon>, num_threads: usize) -> Vec<Vec<(String, i32)>> {
+ let thread_pool = ThreadPoolBuilder::new().num_threads(num_threads).build().unwrap();
+ let result = thread_pool.install(|| {
+ product_ions.par_iter().map(|ion| atomic_product_ion_composition(ion)).map(|composition| {
+ composition.iter().map(|(k, v)| (k.to_string(), *v)).collect()
+ }).collect()
+ });
+ result
+}
+
+/// count the number of protonizable sites in a peptide sequence
+///
+/// # Arguments
+///
+/// * `sequence` - a string representing the peptide sequence
+///
+/// # Returns
+///
+/// * `usize` - the number of protonizable sites in the peptide sequence
+///
+/// # Example
+///
+/// ```
+/// use rustms::algorithm::peptide::get_num_protonizable_sites;
+///
+/// let sequence = "PEPTIDEH";
+/// let num_protonizable_sites = get_num_protonizable_sites(sequence);
+/// assert_eq!(num_protonizable_sites, 2);
+/// ```
+pub fn get_num_protonizable_sites(sequence: &str) -> usize {
+ let mut sites = 1; // n-terminus
+ for s in sequence.chars() {
+ match s {
+ 'H' | 'R' | 'K' => sites += 1,
+ _ => {}
+ }
+ }
+ sites
+}
pub const MASS_PROTON: f64 = 1.007276466621; // Unified atomic mass unit
+pub const MASS_NEUTRON: f64 = 1.00866491595; // Unified atomic mass unit
+pub const MASS_ELECTRON: f64 = 0.00054857990946; // Unified atomic mass unit
+pub const MASS_WATER: f64 = 18.0105646863; // Unified atomic mass unit
+pub const MASS_CO: f64 = 27.994915; // Mass of CO (carbon monoxide)
+pub const MASS_NH3: f64 = 17.026549; // Mass of NH3 (ammonia)
+
+// IUPAC Standards
+pub const STANDARD_TEMPERATURE: f64 = 273.15; // Kelvin
+pub const STANDARD_PRESSURE: f64 = 1e5; // Pascal
+pub const ELEMENTARY_CHARGE: f64 = 1.602176634e-19; // Coulombs
+pub const K_BOLTZMANN: f64 = 1.380649e-23; // J/K
+pub const AVOGADRO: f64 = 6.02214076e23; // mol^-1
+1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +50 +51 +52 +53 +54 +55 +56 +57 +58 +59 +60 +61 +62 +63 +64 +65 +66 +67 +68 +69 +70 +71 +72 +73 +74 +75 +76 +77 +78 +79 +80 +81 +82 +83 +84 +85 +86 +87 +88 +89 +90 +91 +92 +93 +94 +95 +96 +97 +98 +99 +100 +101 +102 +103 +104 +105 +106 +107 +108 +109 +110 +111 +112 +113 +114 +115 +116 +117 +118 +119 +120 +121 +122 +123 +124 +125 +126 +127 +128 +129 +130 +131 +132 +133 +134 +135 +136 +137 +138 +139 +140 +141 +142 +143 +144 +145 +146 +147 +148 +149 +150 +151 +152 +153 +154 +155 +156 +157 +158 +159 +160 +161 +162 +163 +164 +165 +166 +167 +168 +169 +170 +171 +172 +173 +174 +175 +176 +177 +178 +179 +180 +181 +182 +183 +184 +185 +186 +187 +188 +189 +190 +191 +192 +193 +194 +195 +196 +197 +198 +199 +200 +201 +202 +203 +204 +205 +206 +207 +208 +209 +210 +211 +212 +213 +214 +215 +216 +217 +218 +219 +220 +221 +222 +223 +224 +225 +226 +227 +228 +229 +230 +231 +232 +233 +234 +235 +236 +237 +238 +239 +240 +241 +242 +243 +244 +245 +246 +247 +248 +249 +250 +251 +252 +253 +254 +255 +256 +257 +258 +259 +260 +261
use std::collections::HashMap;
+
+/// Atomic Weights
+///
+/// # Arguments
+///
+/// None
+///
+/// # Returns
+///
+/// * `HashMap<&'static str, f64>` - a map of atomic symbols to their monoisotopic weights
+///
+/// # Example
+///
+/// ```
+/// use rustms::chemistry::element::atomic_weights_mono_isotopic;
+///
+/// let atomic_weights = atomic_weights_mono_isotopic();
+/// assert_eq!(atomic_weights.get("H"), Some(&1.00782503223));
+/// ```
+pub fn atomic_weights_mono_isotopic() -> HashMap<&'static str, f64> {
+ let mut map = HashMap::new();
+ map.insert("H", 1.00782503223);
+ map.insert("He", 4.00260325415);
+ map.insert("Li", 7.0160034366);
+ map.insert("Be", 9.012183065);
+ map.insert("B", 11.00930536);
+ map.insert("C", 12.0000000);
+ map.insert("N", 14.00307400443);
+ map.insert("O", 15.99491461957);
+ map.insert("F", 18.99840316273);
+ map.insert("Ne", 19.9924401762);
+ map.insert("Na", 22.9897692820);
+ map.insert("Mg", 23.985041697);
+ map.insert("Al", 26.98153853);
+ map.insert("Si", 27.97692653465);
+ map.insert("P", 30.97376199842);
+ map.insert("S", 31.9720711744);
+ map.insert("Cl", 34.968852682);
+ map.insert("Ar", 39.9623831237);
+ map.insert("K", 38.963706679);
+ map.insert("Ca", 39.96259098);
+ map.insert("Sc", 44.95590828);
+ map.insert("Ti", 47.9479463);
+ map.insert("V", 50.9439595);
+ map.insert("Cr", 51.9405075);
+ map.insert("Mn", 54.9380455);
+ map.insert("Fe", 55.9349375);
+ map.insert("Co", 58.9331955);
+ map.insert("Ni", 57.9353429);
+ map.insert("Cu", 62.9295975);
+ map.insert("Zn", 63.9291422);
+ map.insert("Ga", 68.9255735);
+ map.insert("Ge", 73.9211778);
+ map.insert("As", 74.9215965);
+ map.insert("Se", 79.9165218);
+ map.insert("Br", 78.9183376);
+ map.insert("Kr", 83.911507);
+ map.insert("Rb", 84.9117893);
+ map.insert("Sr", 87.9056125);
+ map.insert("Y", 88.905842);
+ map.insert("Zr", 89.9046977);
+ map.insert("Nb", 92.906373);
+ map.insert("Mo", 97.905404);
+ map.insert("Tc", 98.0);
+ map.insert("Ru", 101.904349);
+ map.insert("Rh", 102.905504);
+ map.insert("Pd", 105.903485);
+ map.insert("Ag", 106.905093);
+ map.insert("Cd", 113.903358);
+ map.insert("In", 114.903878);
+ map.insert("Sn", 119.902199);
+ map.insert("Sb", 120.903818);
+ map.insert("Te", 129.906224);
+ map.insert("I", 126.904473);
+ map.insert("Xe", 131.904155);
+ map.insert("Cs", 132.905447);
+ map.insert("Ba", 137.905247);
+ map.insert("La", 138.906355);
+ map.insert("Ce", 139.905442);
+ map.insert("Pr", 140.907662);
+ map.insert("Nd", 141.907732);
+ map.insert("Pm", 145.0);
+ map.insert("Sm", 151.919728);
+ map.insert("Eu", 152.921225);
+ map.insert("Gd", 157.924103);
+ map.insert("Tb", 158.925346);
+ map.insert("Dy", 163.929171);
+ map.insert("Ho", 164.930319);
+ map.insert("Er", 165.930290);
+ map.insert("Tm", 168.934211);
+ map.insert("Yb", 173.938859);
+ map.insert("Lu", 174.940770);
+ map.insert("Hf", 179.946550);
+ map.insert("Ta", 180.947992);
+ map.insert("W", 183.950932);
+ map.insert("Re", 186.955744);
+ map.insert("Os", 191.961467);
+ map.insert("Ir", 192.962917);
+ map.insert("Pt", 194.964766);
+ map.insert("Au", 196.966543);
+ map.insert("Hg", 201.970617);
+ map.insert("Tl", 204.974427);
+ map.insert("Pb", 207.976627);
+ map.insert("Bi", 208.980384);
+ map.insert("Po", 209.0);
+ map.insert("At", 210.0);
+ map.insert("Rn", 222.0);
+ map.insert("Fr", 223.0);
+ map.insert("Ra", 226.0);
+ map.insert("Ac", 227.0);
+ map.insert("Th", 232.038054);
+ map.insert("Pa", 231.035882);
+ map.insert("U", 238.050786);
+ map.insert("Np", 237.0);
+ map.insert("Pu", 244.0);
+ map.insert("Am", 243.0);
+ map.insert("Cm", 247.0);
+ map.insert("Bk", 247.0);
+ map.insert("Cf", 251.0);
+ map.insert("Es", 252.0);
+ map.insert("Fm", 257.0);
+ map.insert("Md", 258.0);
+ map.insert("No", 259.0);
+ map.insert("Lr", 262.0);
+ map.insert("Rf", 267.0);
+ map.insert("Db", 270.0);
+ map.insert("Sg", 271.0);
+ map.insert("Bh", 270.0);
+ map.insert("Hs", 277.0);
+ map.insert("Mt", 276.0);
+ map.insert("Ds", 281.0);
+ map.insert("Rg", 280.0);
+ map.insert("Cn", 285.0);
+ map.insert("Nh", 284.0);
+ map.insert("Fl", 289.0);
+ map.insert("Mc", 288.0);
+ map.insert("Lv", 293.0);
+ map.insert("Ts", 294.0);
+ map.insert("Og", 294.0);
+
+ map
+}
+
+/// Isotopic Weights
+///
+/// # Arguments
+///
+/// None
+///
+/// # Returns
+///
+/// * `HashMap<&'static str, Vec<f64>>` - a map of atomic symbols to their isotopic weights
+///
+/// # Example
+///
+/// ```
+/// use rustms::chemistry::element::atoms_isotopic_weights;
+///
+/// let isotopic_weights = atoms_isotopic_weights();
+/// assert_eq!(isotopic_weights.get("H"), Some(&vec![1.00782503223, 2.01410177812]));
+/// ```
+pub fn atoms_isotopic_weights() -> HashMap<&'static str, Vec<f64>> {
+ let mut map = HashMap::new();
+ map.insert("H", vec![1.00782503223, 2.01410177812]);
+ map.insert("He", vec![4.00260325415]);
+ map.insert("Li", vec![7.0160034366]);
+ map.insert("Be", vec![9.012183065]);
+ map.insert("B", vec![11.00930536]);
+ map.insert("C", vec![12.0000000, 13.00335483507]);
+ map.insert("N", vec![14.00307400443, 15.00010889888]);
+ map.insert("O", vec![15.99491461957, 16.99913175650, 17.99915961286]);
+ map.insert("F", vec![18.99840316273]);
+ map.insert("Ne", vec![19.9924401762]);
+ map.insert("Na", vec![22.9897692820]);
+ map.insert("Mg", vec![23.985041697]);
+ map.insert("Al", vec![26.98153853]);
+ map.insert("Si", vec![27.97692653465]);
+ map.insert("P", vec![30.97376199842]);
+ map.insert("S", vec![31.9720711744, 32.9714589098, 33.967867004]);
+ map.insert("Cl", vec![34.968852682, 36.965902602]);
+ map.insert("Ar", vec![39.9623831237, 35.967545105]);
+ map.insert("K", vec![38.963706679, 39.963998166, 40.961825257]);
+ map.insert("Ca", vec![39.96259098, 41.95861783, 42.95876644, 43.95548156, 45.95369276]);
+ map.insert("Sc", vec![44.95590828]);
+ map.insert("Ti", vec![47.9479463, 45.95262772, 46.95175879, 47.94794198, 49.9447912]);
+ map.insert("V", vec![50.9439595]);
+ map.insert("Cr", vec![51.9405075, 49.94604183, 50.9439637, 51.94050623, 53.93887916]);
+ map.insert("Mn", vec![54.9380455]);
+ map.insert("Fe", vec![55.9349375, 53.93960899, 54.93804514, 55.93493739, 56.93539400, 57.93327443]);
+ map.insert("Co", vec![58.9331955]);
+ map.insert("Ni", vec![57.9353429, 58.9343467, 59.93078588, 60.93105557, 61.92834537, 63.92796682]);
+ map.insert("Cu", vec![62.9295975, 61.92834537, 63.92914201]);
+ map.insert("Zn", vec![63.9291422, 65.92603381, 66.92712775, 67.92484455, 69.9253192]);
+ map.insert("Ga", vec![68.9255735]);
+ map.insert("Ge", vec![73.9211778, 71.922075826, 72.923458956, 73.921177761, 75.921402726]);
+ map.insert("As", vec![74.9215965]);
+ map.insert("Se", vec![79.9165218, 73.9224764, 75.9192136, 76.9199140, 77.9173095, 79.9165218, 81.9166995]);
+
+ map
+}
+
+/// Isotopic Abundance
+///
+/// # Arguments
+///
+/// None
+///
+/// # Returns
+///
+/// * `HashMap<&'static str, Vec<f64>>` - a map of atomic symbols to their isotopic abundances
+///
+/// # Example
+///
+/// ```
+/// use rustms::chemistry::element::isotopic_abundance;
+///
+/// let isotopic_abundance = isotopic_abundance();
+/// assert_eq!(isotopic_abundance.get("H"), Some(&vec![0.999885, 0.000115]));
+/// ```
+pub fn isotopic_abundance() -> HashMap<&'static str, Vec<f64>> {
+
+ let mut map = HashMap::new();
+
+ map.insert("H", vec![0.999885, 0.000115]);
+ map.insert("He", vec![0.99999866, 0.00000134]);
+ map.insert("Li", vec![0.0759, 0.9241]);
+ map.insert("Be", vec![1.0]);
+ map.insert("B", vec![0.199, 0.801]);
+ map.insert("C", vec![0.9893, 0.0107]);
+ map.insert("N", vec![0.99632, 0.00368]);
+ map.insert("O", vec![0.99757, 0.00038, 0.00205]);
+ map.insert("F", vec![1.0]);
+ map.insert("Ne", vec![0.9048, 0.0027, 0.0925]);
+ map.insert("Na", vec![0.5429, 0.4571]);
+ map.insert("Mg", vec![0.7899, 0.1000, 0.1101]);
+ map.insert("Al", vec![1.0]);
+ map.insert("Si", vec![0.9223, 0.0467, 0.0310]);
+ map.insert("P", vec![1.0]);
+ map.insert("S", vec![0.9493, 0.0076, 0.0429]);
+ map.insert("Cl", vec![0.7578, 0.2422]);
+ map.insert("Ar", vec![0.003365, 0.000632, 0.996003]);
+ map.insert("K", vec![0.932581, 0.000117, 0.067302]);
+ map.insert("Ca", vec![0.96941, 0.00647, 0.00135, 0.02086, 0.00187]);
+ map.insert("Sc", vec![1.0]);
+ map.insert("Ti", vec![0.0825, 0.0744, 0.7372, 0.0541, 0.0518]);
+ map.insert("V", vec![0.9975, 0.0025]);
+ map.insert("Cr", vec![0.04345, 0.83789, 0.09501, 0.02365, 0.0001]);
+ map.insert("Mn", vec![1.0]);
+ map.insert("Fe", vec![0.05845, 0.91754, 0.02119, 0.00282, 0.0002]);
+ map.insert("Co", vec![1.0]);
+ map.insert("Ni", vec![0.680769, 0.262231, 0.011399, 0.036345, 0.009256, 0.0011]);
+ map.insert("Cu", vec![0.6915, 0.3085]);
+ map.insert("Zn", vec![0.4917, 0.2773, 0.0404, 0.1845, 0.0061]);
+ map.insert("Ga", vec![0.60108, 0.39892]);
+ map.insert("Ge", vec![0.2052, 0.2745, 0.0775, 0.3652, 0.0775]);
+ map.insert("As", vec![1.0]);
+ map.insert("Se", vec![0.0089, 0.0937, 0.0763, 0.2377, 0.4961, 0.0873]);
+
+ map
+}
+1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +50 +51 +52 +53 +54 +55 +56 +57 +58 +59 +60 +61 +62 +63 +64 +65 +66 +67 +68 +69 +70 +71 +72 +73 +74 +75 +76 +77 +78 +79 +80 +81 +82 +83 +84 +85 +86 +87 +88 +89 +90 +91 +92 +93 +94 +95 +96 +97 +98
use crate::chemistry::constants::MASS_PROTON;
+
+/// convert 1 over reduced ion mobility (1/k0) to CCS
+///
+/// Arguments:
+///
+/// * `one_over_k0` - 1 over reduced ion mobility (1/k0)
+/// * `charge` - charge state of the ion
+/// * `mz` - mass-over-charge of the ion
+/// * `mass_gas` - mass of drift gas (N2)
+/// * `temp` - temperature of the drift gas in C°
+/// * `t_diff` - factor to translate from C° to K
+///
+/// Returns:
+///
+/// * `ccs` - collision cross-section
+///
+/// # Examples
+///
+/// ```
+/// use rustms::chemistry::formula::one_over_reduced_mobility_to_ccs;
+///
+/// let ccs = one_over_reduced_mobility_to_ccs(0.5, 1000.0, 2, 28.013, 31.85, 273.15);
+/// assert_eq!(ccs, 201.64796734428452);
+/// ```
+pub fn one_over_reduced_mobility_to_ccs(
+ one_over_k0: f64,
+ mz: f64,
+ charge: u32,
+ mass_gas: f64,
+ temp: f64,
+ t_diff: f64,
+) -> f64 {
+ let summary_constant = 18509.8632163405;
+ let reduced_mobility = 1.0 / one_over_k0;
+ let reduced_mass = (mz * charge as f64 * mass_gas) / (mz * charge as f64 + mass_gas);
+ summary_constant * charge as f64 / (reduced_mass * (temp + t_diff)).sqrt() / reduced_mobility
+}
+
+
+/// convert CCS to 1 over reduced ion mobility (1/k0)
+///
+/// Arguments:
+///
+/// * `ccs` - collision cross-section
+/// * `charge` - charge state of the ion
+/// * `mz` - mass-over-charge of the ion
+/// * `mass_gas` - mass of drift gas (N2)
+/// * `temp` - temperature of the drift gas in C°
+/// * `t_diff` - factor to translate from C° to K
+///
+/// Returns:
+///
+/// * `one_over_k0` - 1 over reduced ion mobility (1/k0)
+///
+/// # Examples
+///
+/// ```
+/// use rustms::chemistry::formula::ccs_to_one_over_reduced_mobility;
+///
+/// let k0 = ccs_to_one_over_reduced_mobility(806.5918693771381, 1000.0, 2, 28.013, 31.85, 273.15);
+/// assert_eq!(k0, 2.0);
+/// ```
+pub fn ccs_to_one_over_reduced_mobility(
+ ccs: f64,
+ mz: f64,
+ charge: u32,
+ mass_gas: f64,
+ temp: f64,
+ t_diff: f64,
+) -> f64 {
+ let summary_constant = 18509.8632163405;
+ let reduced_mass = (mz * charge as f64 * mass_gas) / (mz * charge as f64 + mass_gas);
+ ((reduced_mass * (temp + t_diff)).sqrt() * ccs) / (summary_constant * charge as f64)
+}
+
+/// calculate the m/z of an ion
+///
+/// Arguments:
+///
+/// * `mono_mass` - monoisotopic mass of the ion
+/// * `charge` - charge state of the ion
+///
+/// Returns:
+///
+/// * `mz` - mass-over-charge of the ion
+///
+/// # Examples
+///
+/// ```
+/// use rustms::chemistry::formula::calculate_mz;
+///
+/// let mz = calculate_mz(1000.0, 2);
+/// assert_eq!(mz, 501.007276466621);
+/// ```
+pub fn calculate_mz(monoisotopic_mass: f64, charge: i32) -> f64 {
+ (monoisotopic_mass + charge as f64 * MASS_PROTON) / charge as f64
+}
+1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +50 +51 +52 +53 +54 +55 +56 +57 +58 +59 +60 +61 +62 +63 +64 +65 +66 +67 +68 +69 +70 +71 +72 +73 +74 +75 +76 +77 +78 +79 +80 +81 +82 +83 +84 +85 +86 +87 +88 +89 +90 +91 +92 +93 +94 +95 +96 +97 +98 +99 +100 +101 +102 +103 +104 +105 +106 +107 +108 +109 +110 +111 +112 +113 +114 +115 +116 +117 +118 +119 +120 +121 +122 +123 +124 +125 +126 +127 +128 +129 +130 +131 +132 +133 +134 +135 +136
use std::collections::HashMap;
+use crate::algorithm::isotope::generate_isotope_distribution;
+use crate::chemistry::constants::MASS_PROTON;
+use crate::chemistry::element::atomic_weights_mono_isotopic;
+use crate::ms::spectrum::MzSpectrum;
+
+pub struct SumFormula {
+ pub formula: String,
+ pub elements: HashMap<String, i32>,
+}
+
+impl SumFormula {
+ pub fn new(formula: &str) -> Self {
+ let elements = parse_formula(formula).unwrap();
+ SumFormula {
+ formula: formula.to_string(),
+ elements,
+ }
+ }
+ /// Calculate the monoisotopic weight of the chemical formula.
+ ///
+ /// Arguments:
+ ///
+ /// None
+ ///
+ /// Returns:
+ ///
+ /// * `f64` - The monoisotopic weight of the chemical formula.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use rustms::chemistry::sum_formula::SumFormula;
+ ///
+ /// let formula = "H2O";
+ /// let sum_formula = SumFormula::new(formula);
+ /// assert_eq!(sum_formula.monoisotopic_weight(), 18.01056468403);
+ /// ```
+ pub fn monoisotopic_weight(&self) -> f64 {
+ let atomic_weights = atomic_weights_mono_isotopic();
+ self.elements.iter().fold(0.0, |acc, (element, count)| {
+ acc + atomic_weights[element.as_str()] * *count as f64
+ })
+ }
+
+ /// Generate the isotope distribution of the chemical formula.
+ ///
+ /// Arguments:
+ ///
+ /// * `charge` - The charge state of the ion.
+ ///
+ /// Returns:
+ ///
+ /// * `MzSpectrum` - The isotope distribution of the chemical formula.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use rustms::chemistry::sum_formula::SumFormula;
+ /// use rustms::ms::spectrum::MzSpectrum;
+ ///
+ /// let formula = "C6H12O6";
+ /// let sum_formula = SumFormula::new(formula);
+ /// let isotope_distribution = sum_formula.isotope_distribution(1);
+ /// let mut first_mz = *isotope_distribution.mz.first().unwrap();
+ /// // round to first 5 decimal places
+ /// first_mz = (first_mz * 1e5).round() / 1e5;
+ /// assert_eq!(first_mz, 181.07066);
+ /// ```
+ pub fn isotope_distribution(&self, charge: i32) -> MzSpectrum {
+ let distribution = generate_isotope_distribution(&self.elements, 1e-3, 1e-9, 200);
+ let intensity = distribution.iter().map(|(_, i)| *i).collect();
+ let mz = distribution.iter().map(|(m, _)| (*m + charge as f64 * MASS_PROTON) / charge as f64).collect();
+ MzSpectrum::new(mz, intensity)
+ }
+}
+
+/// Parse a chemical formula into a map of elements and their counts.
+///
+/// Arguments:
+///
+/// * `formula` - The chemical formula to parse.
+///
+/// Returns:
+///
+/// * `Result<HashMap<String, i32>, String>` - A map of elements and their counts.
+///
+/// # Example
+///
+/// ```
+/// use rustms::chemistry::sum_formula::parse_formula;
+///
+/// let formula = "H2O";
+/// let elements = parse_formula(formula).unwrap();
+/// assert_eq!(elements.get("H"), Some(&2));
+/// assert_eq!(elements.get("O"), Some(&1));
+/// ```
+pub fn parse_formula(formula: &str) -> Result<HashMap<String, i32>, String> {
+ let atomic_weights = atomic_weights_mono_isotopic();
+ let mut element_counts = HashMap::new();
+ let mut current_element = String::new();
+ let mut current_count = String::new();
+ let mut chars = formula.chars().peekable();
+
+ while let Some(c) = chars.next() {
+ if c.is_ascii_uppercase() {
+ if !current_element.is_empty() {
+ let count = current_count.parse::<i32>().unwrap_or(1);
+ if atomic_weights.contains_key(current_element.as_str()) {
+ *element_counts.entry(current_element.clone()).or_insert(0) += count;
+ } else {
+ return Err(format!("Unknown element: {}", current_element));
+ }
+ }
+ current_element = c.to_string();
+ current_count = String::new();
+ } else if c.is_ascii_digit() {
+ current_count.push(c);
+ } else if c.is_ascii_lowercase() {
+ current_element.push(c);
+ }
+
+ if chars.peek().map_or(true, |next_c| next_c.is_ascii_uppercase()) {
+ let count = current_count.parse::<i32>().unwrap_or(1);
+ if atomic_weights.contains_key(current_element.as_str()) {
+ *element_counts.entry(current_element.clone()).or_insert(0) += count;
+ } else {
+ return Err(format!("Unknown element: {}", current_element));
+ }
+ current_element = String::new();
+ current_count = String::new();
+ }
+ }
+
+ Ok(element_counts)
+}
+1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +50 +51 +52 +53 +54 +55 +56 +57 +58 +59 +60 +61 +62 +63 +64 +65 +66 +67 +68 +69 +70 +71 +72 +73 +74 +75 +76 +77 +78 +79 +80 +81 +82 +83 +84 +85 +86 +87 +88 +89 +90 +91 +92 +93 +94 +95 +96 +97 +98 +99 +100 +101 +102 +103 +104 +105 +106 +107 +108 +109 +110 +111 +112 +113 +114 +115 +116 +117 +118 +119 +120 +121 +122 +123 +124 +125 +126 +127 +128 +129 +130 +131 +132 +133
use std::collections::HashMap;
+
+/// Unimod Modifications
+///
+/// # Arguments
+///
+/// None
+///
+/// # Returns
+///
+/// * `HashMap<String, HashMap<&'static str, i32>>` - a map of unimod modification names to their atomic compositions
+///
+/// # Example
+///
+/// ```
+/// use rustms::chemistry::unimod::modification_atomic_composition;
+/// use std::collections::HashMap;
+///
+/// let composition = modification_atomic_composition();
+/// assert_eq!(composition.get("[UNIMOD:1]"), Some(&HashMap::from([("C", 2), ("H", 2), ("O", 1)])));
+/// ```
+pub fn modification_atomic_composition() -> HashMap<String, HashMap<&'static str, i32>> {
+ let mut composition: HashMap<String, HashMap<&'static str, i32>> = HashMap::new();
+ composition.insert("[UNIMOD:1]".to_string(), HashMap::from([("C", 2), ("H", 2), ("O", 1)])); // Acetyl
+ composition.insert("[UNIMOD:3]".to_string(), HashMap::from([("N", 2), ("C", 10), ("H", 14), ("O", 2), ("S", 1)])); // Biotinylation
+ composition.insert("[UNIMOD:4]".to_string(), HashMap::from([("C", 2), ("H", 3), ("O", 1), ("N", 1)]));
+ composition.insert("[UNIMOD:7]".to_string(), HashMap::from([("H", -1), ("N", -1), ("O", 1)])); // Hydroxylation
+ composition.insert("[UNIMOD:21]".to_string(), HashMap::from([("H", 1),("O", 3), ("P", 1)])); // Phosphorylation
+ composition.insert("[UNIMOD:34]".to_string(), HashMap::from([("H", 2), ("C", 1)])); // Methylation
+ composition.insert("[UNIMOD:35]".to_string(), HashMap::from([("O", 1)])); // Hydroxylation
+ // composition.insert("[UNIMOD:43]".to_string(), HashMap::from([("C", 8), ("H", 15), ("N", 1), ("O", 6)])); // HexNAc ??
+ composition.insert("[UNIMOD:58]".to_string(), HashMap::from([("C", 3), ("H", 4), ("O", 1)])); // Propionyl
+ composition.insert("[UNIMOD:121]".to_string(), HashMap::from([("C", 4), ("H", 6), ("O", 2), ("N", 2)])); // ubiquitinylation residue
+ composition.insert("[UNIMOD:122]".to_string(), HashMap::from([("C", 1), ("O", 1)])); // Formylation
+ composition.insert("[UNIMOD:312]".to_string(), HashMap::from([("C", 3), ("H", 5), ("O", 2), ("N", 1), ("S", 1)])); // Cysteinyl
+ composition.insert("[UNIMOD:354]".to_string(), HashMap::from([("H", -1), ("O", 2), ("N", 1)])); // Oxidation to nitro
+ // composition.insert("[UNIMOD:408]".to_string(), HashMap::from([("C", -1), ("H", -2), ("N", 1), ("O", 2)])); // Glycosyl ??
+ composition.insert("[UNIMOD:747]".to_string(), HashMap::from([("C", 3), ("H", 2), ("O", 3)])); // Malonylation
+ composition.insert("[UNIMOD:1289]".to_string(), HashMap::from([("C", 4), ("H", 6), ("O", 1)])); // Butyryl
+ composition.insert("[UNIMOD:1363]".to_string(), HashMap::from([("C", 4), ("H", 4), ("O", 1)])); // Crotonylation
+
+ composition
+}
+
+/// Unimod Modifications Mass
+///
+/// # Arguments
+///
+/// None
+///
+/// # Returns
+///
+/// * `HashMap<&'static str, f64>` - a map of unimod modification names to their mass
+///
+/// # Example
+///
+/// ```
+/// use rustms::chemistry::unimod::unimod_modifications_mass;
+///
+/// let mass = unimod_modifications_mass();
+/// assert_eq!(mass.get("[UNIMOD:1]"), Some(&42.010565));
+/// ```
+pub fn unimod_modifications_mass() -> HashMap<&'static str, f64> {
+ let mut map = HashMap::new();
+ map.insert("[UNIMOD:58]", 56.026215);
+ map.insert("[UNIMOD:408]", 148.037173);
+ map.insert("[UNIMOD:43]", 203.079373);
+ map.insert("[UNIMOD:7]", 0.984016);
+ map.insert("[UNIMOD:1]", 42.010565);
+ map.insert("[UNIMOD:35]", 15.994915);
+ map.insert("[UNIMOD:1289]", 70.041865);
+ map.insert("[UNIMOD:3]", 226.077598);
+ map.insert("[UNIMOD:1363]", 68.026215);
+ map.insert("[UNIMOD:36]", 28.031300);
+ map.insert("[UNIMOD:122]", 27.994915);
+ map.insert("[UNIMOD:1848]", 114.031694);
+ map.insert("[UNIMOD:1849]", 86.036779);
+ map.insert("[UNIMOD:64]", 100.016044);
+ map.insert("[UNIMOD:37]", 42.046950);
+ map.insert("[UNIMOD:121]", 114.042927);
+ map.insert("[UNIMOD:747]", 86.000394);
+ map.insert("[UNIMOD:34]", 14.015650);
+ map.insert("[UNIMOD:354]", 44.985078);
+ map.insert("[UNIMOD:4]", 57.021464);
+ map.insert("[UNIMOD:21]", 79.966331);
+ map.insert("[UNIMOD:312]", 119.004099);
+ map
+}
+
+/// Unimod Modifications Mass Numerical
+///
+/// # Arguments
+///
+/// None
+///
+/// # Returns
+///
+/// * `HashMap<u32, f64>` - a map of unimod modification numerical ids to their mass
+///
+/// # Example
+///
+/// ```
+/// use rustms::chemistry::unimod::unimod_modifications_mass_numerical;
+///
+/// let mass = unimod_modifications_mass_numerical();
+/// assert_eq!(mass.get(&58), Some(&56.026215));
+/// ```
+pub fn unimod_modifications_mass_numerical() -> HashMap<u32, f64> {
+ let mut map = HashMap::new();
+ map.insert(58, 56.026215);
+ map.insert(408, 148.037173);
+ map.insert(43, 203.079373);
+ map.insert(7, 0.984016);
+ map.insert(1, 42.010565);
+ map.insert(35, 15.994915);
+ map.insert(1289, 70.041865);
+ map.insert(3, 226.077598);
+ map.insert(1363, 68.026215);
+ map.insert(36, 28.031300);
+ map.insert(122, 27.994915);
+ map.insert(1848, 114.031694);
+ map.insert(1849, 86.036779);
+ map.insert(64, 100.016044);
+ map.insert(37, 42.046950);
+ map.insert(121, 114.042927);
+ map.insert(747, 86.000394);
+ map.insert(34, 14.015650);
+ map.insert(354, 44.985078);
+ map.insert(4, 57.021464);
+ map.insert(21, 79.966331);
+ map.insert(312, 119.004099);
+ map
+}
+1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +50 +51 +52 +53 +54 +55 +56 +57 +58 +59 +60 +61 +62 +63 +64 +65 +66 +67 +68 +69 +70 +71 +72 +73 +74 +75 +76 +77 +78 +79 +80 +81 +82 +83 +84 +85 +86 +87 +88 +89 +90 +91 +92 +93 +94 +95 +96 +97 +98 +99 +100 +101 +102 +103 +104 +105 +106 +107 +108 +109 +110 +111 +112 +113 +114 +115 +116 +117 +118 +119 +120 +121 +122 +123 +124 +125 +126 +127 +128 +129 +130 +131 +132 +133 +134 +135 +136 +137 +138 +139 +140 +141 +142 +143 +144 +145 +146 +147 +148 +149 +150 +151 +152 +153 +154 +155 +156 +157 +158 +159 +160 +161 +162 +163 +164 +165 +166 +167 +168 +169 +170 +171 +172 +173 +174 +175 +176 +177 +178 +179 +180 +181
use regex::Regex;
+use crate::chemistry::unimod::unimod_modifications_mass;
+
+/// Convert a peptide sequence with UNIMOD annotations to a list of tokens
+///
+/// # Arguments
+///
+/// * `sequence` - a string slice of the peptide sequence
+/// * `group_modifications` - a boolean indicating whether to group the amino acid before the UNIMOD with the UNIMOD
+///
+/// # Returns
+///
+/// * `Vec<String>` - a vector of strings representing the tokens
+///
+/// # Example
+///
+/// ```
+/// use rustms::chemistry::utility::unimod_sequence_to_tokens;
+///
+/// let sequence = "PEPTIDE[UNIMOD:1]H";
+/// let tokens = unimod_sequence_to_tokens(sequence, false);
+/// assert_eq!(tokens, vec!["P", "E", "P", "T", "I", "D", "E", "[UNIMOD:1]", "H"]);
+/// let tokens = unimod_sequence_to_tokens(sequence, true);
+/// assert_eq!(tokens, vec!["P", "E", "P", "T", "I", "D", "E[UNIMOD:1]", "H"]);
+/// ```
+pub fn unimod_sequence_to_tokens(sequence: &str, group_modifications: bool) -> Vec<String> {
+ let pattern = Regex::new(r"\[UNIMOD:\d+]").unwrap();
+ let mut tokens = Vec::new();
+ let mut last_index = 0;
+
+ for mat in pattern.find_iter(sequence) {
+ if group_modifications {
+ // When grouping, include the amino acid before the UNIMOD in the token
+ let pre_mod_sequence = &sequence[last_index..mat.start()];
+ let aa_sequence = if pre_mod_sequence.is_empty() {
+ ""
+ } else {
+ &pre_mod_sequence[..pre_mod_sequence.len() - 1]
+ };
+ tokens.extend(aa_sequence.chars().map(|c| c.to_string()));
+
+ // Group the last amino acid with the UNIMOD as one token
+ let grouped_mod = format!("{}{}", pre_mod_sequence.chars().last().unwrap_or_default().to_string(), &sequence[mat.start()..mat.end()]);
+ tokens.push(grouped_mod);
+ } else {
+ // Extract the amino acids before the current UNIMOD and add them as individual tokens
+ let aa_sequence = &sequence[last_index..mat.start()];
+ tokens.extend(aa_sequence.chars().map(|c| c.to_string()));
+
+ // Add the UNIMOD as its own token
+ let unimod = &sequence[mat.start()..mat.end()];
+ tokens.push(unimod.to_string());
+ }
+
+ // Update last_index to the end of the current UNIMOD
+ last_index = mat.end();
+ }
+
+ if !group_modifications || last_index < sequence.len() {
+ // Add the remaining amino acids after the last UNIMOD as individual tokens
+ let remaining_aa_sequence = &sequence[last_index..];
+ tokens.extend(remaining_aa_sequence.chars().map(|c| c.to_string()));
+ }
+
+ tokens
+}
+
+/// Convert a peptide sequence with UNIMOD annotations to a tuple of plain sequence and for each
+/// position in the sequence, the mass of the modification at that position (0 if no modification),
+/// which is the representation of sequence nad modifications used by SAGE
+///
+/// # Arguments
+///
+/// * `input_string` - a string slice of the peptide sequence
+///
+/// # Returns
+///
+/// * `(String, Vec<f64>)` - a tuple of the plain sequence and a vector of f64 representing the mass
+/// of the modification at each position in the sequence
+///
+/// # Example
+///
+/// ```
+/// use rustms::chemistry::utility::find_unimod_patterns;
+///
+/// let sequence = "PEPTIDE[UNIMOD:1]H";
+/// let (stripped_sequence, mods) = find_unimod_patterns(sequence);
+/// assert_eq!(stripped_sequence, "PEPTIDEH");
+/// assert_eq!(mods, vec![0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 42.010565, 0.0]);
+/// ```
+pub fn find_unimod_patterns(input_string: &str) -> (String, Vec<f64>) {
+ let results = extract_unimod_patterns(input_string);
+ let stripped_sequence = remove_unimod_annotation(input_string);
+ let index_list = generate_index_list(&results, input_string);
+ let mods = calculate_modifications(&index_list, &stripped_sequence);
+ (stripped_sequence, mods)
+}
+
+fn remove_unimod_annotation(sequence: &str) -> String {
+ let pattern = Regex::new(r"\[UNIMOD:\d+]").unwrap();
+ pattern.replace_all(sequence, "").to_string()
+}
+
+fn extract_unimod_patterns(input_string: &str) -> Vec<(usize, usize, String)> {
+ let pattern = Regex::new(r"\[UNIMOD:\d+]").unwrap();
+ pattern.find_iter(input_string)
+ .map(|mat| (mat.start(), mat.end(), mat.as_str().to_string()))
+ .collect()
+}
+
+fn generate_index_list(results: &[(usize, usize, String)], sequence: &str) -> Vec<(usize, String)> {
+ let mut index_list = Vec::new();
+ let mut chars_removed_counter = 0;
+
+ for (start, end, _) in results {
+ let num_chars_removed = end - start;
+ let mod_str = &sequence[*start..*end];
+
+ let later_aa_index = if *start != 0 {
+ start - 1 - chars_removed_counter
+ } else {
+ 0
+ };
+
+ index_list.push((later_aa_index, mod_str.to_string()));
+ chars_removed_counter += num_chars_removed;
+ }
+
+ index_list
+}
+
+fn calculate_modifications(index_list: &[(usize, String)], stripped_sequence: &str) -> Vec<f64> {
+ let mut mods = vec![0.0; stripped_sequence.len()];
+ for (index, mod_str) in index_list {
+ if let Some(mass) = unimod_modifications_mass().get(mod_str.as_str()) {
+ mods[*index] += mass;
+ }
+ }
+ mods
+}
+
+/// Reshape the flat prosit array into a 3D array of shape (29, 2, 3)
+///
+/// # Arguments
+///
+/// * `flat_array` - a vector of f64 representing the flat prosit array
+///
+/// # Returns
+///
+/// * `Vec<Vec<Vec<f64>>>` - a 3D array of shape (29, 2, 3)
+///
+/// # Example
+///
+/// ```
+/// use rustms::chemistry::utility::reshape_prosit_array;
+///
+/// let flat_array = vec![0.0; 174];
+/// let reshaped_array = reshape_prosit_array(flat_array);
+/// assert_eq!(reshaped_array.len(), 29);
+/// assert_eq!(reshaped_array[0].len(), 2);
+/// assert_eq!(reshaped_array[0][0].len(), 3);
+/// ```
+pub fn reshape_prosit_array(flat_array: Vec<f64>) -> Vec<Vec<Vec<f64>>> {
+ let mut array_return: Vec<Vec<Vec<f64>>> = vec![vec![vec![0.0; 3]; 2]; 29];
+ let mut ptr = 0;
+
+ for c in 0..3 {
+ for row in 0..29 {
+ // Fill in the Y ion values
+ array_return[row][0][c] = flat_array[ptr];
+ ptr += 1;
+ }
+ for row in 0..29 {
+ // Fill in the B ion values
+ array_return[row][1][c] = flat_array[ptr];
+ ptr += 1;
+ }
+ }
+
+ array_return
+}
+1
pub mod spectrum;
+1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +50 +51 +52 +53 +54 +55 +56 +57 +58 +59 +60 +61 +62 +63 +64 +65 +66 +67 +68 +69 +70 +71 +72 +73 +74 +75 +76 +77 +78 +79 +80 +81 +82 +83 +84 +85 +86 +87 +88 +89 +90 +91 +92 +93 +94 +95 +96 +97 +98 +99 +100 +101 +102 +103 +104 +105 +106 +107 +108 +109 +110 +111 +112 +113 +114 +115 +116 +117 +118 +119 +120 +121 +122 +123 +124 +125 +126 +127 +128 +129 +130 +131 +132 +133 +134 +135 +136 +137 +138 +139 +140 +141 +142 +143 +144 +145 +146 +147 +148 +149 +150 +151 +152 +153 +154 +155 +156 +157 +158 +159 +160 +161 +162 +163 +164 +165 +166 +167 +168 +169 +170 +171 +172 +173 +174 +175 +176 +177 +178 +179 +180 +181 +182 +183 +184 +185 +186
use std::collections::BTreeMap;
+use serde::{Deserialize, Serialize};
+
+/// Represents a mass spectrum with associated m/z values and intensities.
+#[derive(Clone, Debug, Serialize, Deserialize)]
+pub struct MzSpectrum {
+ pub mz: Vec<f64>,
+ pub intensity: Vec<f64>,
+}
+
+impl MzSpectrum {
+ /// Constructs a new `MzSpectrum`.
+ ///
+ /// # Arguments
+ ///
+ /// * `mz` - A vector of m/z values.
+ /// * `intensity` - A vector of intensity values corresponding to the m/z values.
+ ///
+ /// # Panics
+ ///
+ /// Panics if the lengths of `mz` and `intensity` are not the same. (actually, it doesn't at the moment, planning on adding this later)
+ ///
+ /// # Example
+ ///
+ /// ```rust
+ /// # use rustms::ms::spectrum::MzSpectrum;
+ /// let spectrum = MzSpectrum::new(vec![200.0, 100.0], vec![20.0, 10.0]);
+ /// assert_eq!(spectrum.mz, vec![100.0, 200.0]);
+ /// assert_eq!(spectrum.intensity, vec![10.0, 20.0]);
+ /// ```
+ pub fn new(mz: Vec<f64>, intensity: Vec<f64>) -> Self {
+ assert_eq!(mz.len(), intensity.len(), "mz and intensity vectors must have the same length");
+ // make sure mz and intensity are sorted by mz
+ let mut mz_intensity: Vec<(f64, f64)> = mz.iter().zip(intensity.iter()).map(|(m, i)| (*m, *i)).collect();
+ mz_intensity.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap());
+ MzSpectrum { mz: mz_intensity.iter().map(|(m, _)| *m).collect(), intensity: mz_intensity.iter().map(|(_, i)| *i).collect() }
+ }
+
+ /// Filters the m/z values and intensities based on a range of m/z values and intensities.
+ ///
+ /// # Arguments
+ ///
+ /// * `mz_min` - The minimum m/z value.
+ /// * `mz_max` - The maximum m/z value.
+ /// * `intensity_min` - The minimum intensity value.
+ /// * `intensity_max` - The maximum intensity value.
+ ///
+ /// # Returns
+ ///
+ /// * `MzSpectrum` - A new `MzSpectrum` with m/z values and intensities within the specified ranges.
+ ///
+ /// # Example
+ ///
+ /// ```rust
+ /// # use rustms::ms::spectrum::MzSpectrum;
+ /// let spectrum = MzSpectrum::new(vec![100.0, 200.0, 300.0], vec![10.0, 20.0, 30.0]);
+ /// let filtered_spectrum = spectrum.filter_ranged(150.0, 250.0, 15.0, 25.0);
+ /// assert_eq!(filtered_spectrum.mz, vec![200.0]);
+ /// assert_eq!(filtered_spectrum.intensity, vec![20.0]);
+ /// ```
+ pub fn filter_ranged(&self, mz_min: f64, mz_max: f64, intensity_min: f64, intensity_max: f64) -> Self {
+ let mut mz_vec: Vec<f64> = Vec::new();
+ let mut intensity_vec: Vec<f64> = Vec::new();
+
+ for (mz, intensity) in self.mz.iter().zip(self.intensity.iter()) {
+ if mz_min <= *mz && *mz <= mz_max && *intensity >= intensity_min && *intensity <= intensity_max {
+ mz_vec.push(*mz);
+ intensity_vec.push(*intensity);
+ }
+ }
+ MzSpectrum { mz: mz_vec, intensity: intensity_vec }
+ }
+
+ pub fn from_collection(collection: Vec<MzSpectrum>) -> MzSpectrum {
+
+ let quantize = |mz: f64| -> i64 {
+ (mz * 1_000_000.0).round() as i64
+ };
+
+ let mut combined_map: BTreeMap<i64, f64> = BTreeMap::new();
+
+ for spectrum in collection {
+ for (mz, intensity) in spectrum.mz.iter().zip(spectrum.intensity.iter()) {
+ let key = quantize(*mz);
+ let entry = combined_map.entry(key).or_insert(0.0);
+ *entry += *intensity;
+ }
+ }
+
+ let mz_combined: Vec<f64> = combined_map.keys().map(|&key| key as f64 / 1_000_000.0).collect();
+ let intensity_combined: Vec<f64> = combined_map.values().cloned().collect();
+
+ MzSpectrum { mz: mz_combined, intensity: intensity_combined }
+ }
+}
+
+impl std::ops::Add for MzSpectrum {
+ type Output = Self;
+ /// Combines two `MzSpectrum` instances by summing up the intensities of matching m/z values.
+ ///
+ /// # Description
+ /// Each m/z value is quantized to retain at least 6 decimals. If two spectra have m/z values
+ /// that quantize to the same integer value, their intensities are summed.
+ ///
+ /// # Example
+ /// ```
+ /// # use rustms::ms::spectrum::MzSpectrum;
+ /// let spectrum1 = MzSpectrum { mz: vec![100.523, 101.923], intensity: vec![10.0, 20.0] };
+ /// let spectrum2 = MzSpectrum { mz: vec![101.235, 105.112], intensity: vec![15.0, 30.0] };
+ ///
+ /// let combined = spectrum1 + spectrum2;
+ ///
+ /// assert_eq!(combined.mz, vec![100.523, 101.235, 101.923, 105.112]);
+ /// assert_eq!(combined.intensity, vec![10.0, 15.0, 20.0, 30.0]);
+ /// ```
+ fn add(self, other: Self) -> MzSpectrum {
+ let mut combined_map: BTreeMap<i64, f64> = BTreeMap::new();
+
+ // Helper to quantize mz to an integer key
+ let quantize = |mz: f64| -> i64 {
+ (mz * 1_000_000.0).round() as i64
+ };
+
+ // Add the m/z and intensities from the first spectrum to the map
+ for (mz, intensity) in self.mz.iter().zip(self.intensity.iter()) {
+ let key = quantize(*mz);
+ combined_map.insert(key, *intensity);
+ }
+
+ // Combine the second spectrum into the map
+ for (mz, intensity) in other.mz.iter().zip(other.intensity.iter()) {
+ let key = quantize(*mz);
+ let entry = combined_map.entry(key).or_insert(0.0);
+ *entry += *intensity;
+ }
+
+ // Convert the combined map back into two Vec<f64>
+ let mz_combined: Vec<f64> = combined_map.keys().map(|&key| key as f64 / 1_000_000.0).collect();
+ let intensity_combined: Vec<f64> = combined_map.values().cloned().collect();
+
+ MzSpectrum { mz: mz_combined, intensity: intensity_combined }
+ }
+}
+
+impl std::ops::Mul<f64> for MzSpectrum {
+ type Output = Self;
+ fn mul(self, scale: f64) -> Self::Output{
+ let mut scaled_intensities: Vec<f64> = vec![0.0; self.intensity.len()];
+ for (idx,intensity) in self.intensity.iter().enumerate(){
+ scaled_intensities[idx] = scale*intensity;
+ }
+ Self{ mz: self.mz.clone(), intensity: scaled_intensities}
+
+ }
+}
+
+impl std::ops::Sub for MzSpectrum {
+ type Output = Self;
+ fn sub(self, other: Self) -> Self::Output {
+ let mut combined_map: BTreeMap<i64, f64> = BTreeMap::new();
+
+ // Helper to quantize mz to an integer key
+ let quantize = |mz: f64| -> i64 {
+ (mz * 1_000_000.0).round() as i64
+ };
+
+ // Add the m/z and intensities from the first spectrum to the map
+ for (mz, intensity) in self.mz.iter().zip(self.intensity.iter()) {
+ let key = quantize(*mz);
+ combined_map.insert(key, *intensity);
+ }
+
+ // Combine the second spectrum into the map
+ for (mz, intensity) in other.mz.iter().zip(other.intensity.iter()) {
+ let key = quantize(*mz);
+ let entry = combined_map.entry(key).or_insert(0.0);
+ *entry -= *intensity;
+ }
+
+ // Convert the combined map back into two Vec<f64>
+ let mz_combined: Vec<f64> = combined_map.keys().map(|&key| key as f64 / 1_000_000.0).collect();
+ let intensity_combined: Vec<f64> = combined_map.values().cloned().collect();
+
+ MzSpectrum { mz: mz_combined, intensity: intensity_combined }
+ }
+}
+1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +50 +51 +52 +53 +54 +55 +56 +57 +58 +59 +60 +61 +62 +63 +64 +65 +66 +67 +68 +69 +70 +71 +72 +73 +74 +75 +76 +77 +78 +79 +80 +81 +82 +83 +84 +85 +86 +87 +88 +89 +90 +91 +92 +93 +94 +95 +96 +97 +98 +99 +100 +101 +102 +103 +104 +105 +106 +107 +108 +109 +110 +111 +112 +113 +114 +115 +116 +117 +118 +119 +120 +121 +122 +123 +124 +125 +126 +127 +128 +129 +130 +131 +132 +133 +134 +135 +136 +137 +138
use std::collections::HashMap;
+
+/// Amino Acids
+///
+/// # Arguments
+///
+/// None
+///
+/// # Returns
+///
+/// * `HashMap<&'static str, &'static str>` - a map of amino acid names to their one-letter codes
+///
+/// # Example
+///
+/// ```
+/// use rustms::proteomics::amino_acid::amino_acids;
+///
+/// let amino_acids = amino_acids();
+/// assert_eq!(amino_acids.get("Lysine"), Some(&"K"));
+/// ```
+pub fn amino_acids() -> HashMap<&'static str, &'static str> {
+ let mut map = HashMap::new();
+ map.insert("Lysine", "K");
+ map.insert("Alanine", "A");
+ map.insert("Glycine", "G");
+ map.insert("Valine", "V");
+ map.insert("Tyrosine", "Y");
+ map.insert("Arginine", "R");
+ map.insert("Glutamic Acid", "E");
+ map.insert("Phenylalanine", "F");
+ map.insert("Tryptophan", "W");
+ map.insert("Leucine", "L");
+ map.insert("Threonine", "T");
+ map.insert("Cysteine", "C");
+ map.insert("Serine", "S");
+ map.insert("Glutamine", "Q");
+ map.insert("Methionine", "M");
+ map.insert("Isoleucine", "I");
+ map.insert("Asparagine", "N");
+ map.insert("Proline", "P");
+ map.insert("Histidine", "H");
+ map.insert("Aspartic Acid", "D");
+ map.insert("Selenocysteine", "U");
+ map
+}
+
+
+/// Amino Acid Masses
+///
+/// # Arguments
+///
+/// None
+///
+/// # Returns
+///
+/// * `HashMap<&'static str, f64>` - a map of amino acid one-letter codes to their monoisotopic masses
+///
+/// # Example
+///
+/// ```
+/// use rustms::proteomics::amino_acid::amino_acid_masses;
+///
+/// let amino_acid_masses = amino_acid_masses();
+/// assert_eq!(amino_acid_masses.get("K"), Some(&128.094963));
+/// ```
+pub fn amino_acid_masses() -> HashMap<&'static str, f64> {
+ let mut map = HashMap::new();
+ map.insert("A", 71.037114);
+ map.insert("R", 156.101111);
+ map.insert("N", 114.042927);
+ map.insert("D", 115.026943);
+ map.insert("C", 103.009185);
+ map.insert("E", 129.042593);
+ map.insert("Q", 128.058578);
+ map.insert("G", 57.021464);
+ map.insert("H", 137.058912);
+ map.insert("I", 113.084064);
+ map.insert("L", 113.084064);
+ map.insert("K", 128.094963);
+ map.insert("M", 131.040485);
+ map.insert("F", 147.068414);
+ map.insert("P", 97.052764);
+ map.insert("S", 87.032028);
+ map.insert("T", 101.047679);
+ map.insert("W", 186.079313);
+ map.insert("Y", 163.063329);
+ map.insert("V", 99.068414);
+ map.insert("U", 168.053);
+ map
+}
+
+/// Amino Acid Composition
+///
+/// # Arguments
+///
+/// None
+///
+/// # Returns
+///
+/// * `HashMap<char, HashMap<&'static str, i32>>` - a map of amino acid one-letter codes to their atomic compositions
+///
+/// # Example
+///
+/// ```
+/// use rustms::proteomics::amino_acid::amino_acid_composition;
+/// use std::collections::HashMap;
+///
+/// let amino_acid_composition = amino_acid_composition();
+/// assert_eq!(amino_acid_composition.get(&'K'), Some(&HashMap::from([("C", 6), ("H", 12), ("N", 2), ("O", 1)])));
+/// ```
+pub fn amino_acid_composition() -> HashMap<char, HashMap<&'static str, i32>> {
+
+ let mut composition: HashMap<char, HashMap<&'static str, i32>> = HashMap::new();
+
+ composition.insert('G', HashMap::from([("C", 2), ("H", 3), ("N", 1), ("O", 1)])); // Glycine
+ composition.insert('A', HashMap::from([("C", 3), ("H", 5), ("N", 1), ("O", 1)])); // Alanine
+ composition.insert('S', HashMap::from([("C", 3), ("H", 5), ("N", 1), ("O", 2)])); // Serine
+ composition.insert('P', HashMap::from([("C", 5), ("H", 7), ("N", 1), ("O", 1)])); // Proline
+ composition.insert('V', HashMap::from([("C", 5), ("H", 9), ("N", 1), ("O", 1)])); // Valine
+ composition.insert('T', HashMap::from([("C", 4), ("H", 7), ("N", 1), ("O", 2)])); // Threonine
+ composition.insert('C', HashMap::from([("C", 3), ("H", 5), ("N", 1), ("O", 1), ("S", 1)])); // Cysteine
+ composition.insert('I', HashMap::from([("C", 6), ("H", 11), ("N", 1), ("O", 1)])); // Isoleucine
+ composition.insert('L', HashMap::from([("C", 6), ("H", 11), ("N", 1), ("O", 1)])); // Leucine
+ composition.insert('N', HashMap::from([("C", 4), ("H", 6), ("N", 2), ("O", 2)])); // Asparagine
+ composition.insert('D', HashMap::from([("C", 4), ("H", 5), ("N", 1), ("O", 3)])); // Aspartic Acid
+ composition.insert('Q', HashMap::from([("C", 5), ("H", 8), ("N", 2), ("O", 2)])); // Glutamine
+ composition.insert('K', HashMap::from([("C", 6), ("H", 12), ("N", 2), ("O", 1)])); // Lysine
+ composition.insert('E', HashMap::from([("C", 5), ("H", 7), ("N", 1), ("O", 3)])); // Glutamic Acid
+ composition.insert('M', HashMap::from([("C", 5), ("H", 9), ("N", 1), ("O", 1), ("S", 1)])); // Methionine
+ composition.insert('H', HashMap::from([("C", 6), ("H", 7), ("N", 3), ("O", 1)])); // Histidine
+ composition.insert('F', HashMap::from([("C", 9), ("H", 9), ("N", 1), ("O", 1)])); // Phenylalanine
+ composition.insert('R', HashMap::from([("C", 6), ("H", 12), ("N", 4), ("O", 1)])); // Arginine
+ composition.insert('Y', HashMap::from([("C", 9), ("H", 9), ("N", 1), ("O", 2)])); // Tyrosine
+ composition.insert('W', HashMap::from([("C", 11), ("H", 10), ("N", 2), ("O", 1)])); // Tryptophan
+ composition.insert('U', HashMap::from([("C", 3), ("H", 5), ("N", 1), ("O", 1), ("Se", 1)])); // Selenocysteine
+
+ composition
+}
+1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +50 +51 +52 +53 +54 +55 +56 +57 +58 +59 +60 +61 +62 +63 +64 +65 +66 +67 +68 +69 +70 +71 +72 +73 +74 +75 +76 +77 +78 +79 +80 +81 +82 +83 +84 +85 +86 +87 +88 +89 +90 +91 +92 +93 +94 +95 +96 +97 +98 +99 +100 +101 +102 +103 +104 +105 +106 +107 +108 +109 +110 +111 +112 +113 +114 +115 +116 +117 +118 +119 +120 +121 +122 +123 +124 +125 +126 +127 +128 +129 +130 +131 +132 +133 +134 +135 +136 +137 +138 +139 +140 +141 +142 +143 +144 +145 +146 +147 +148 +149 +150 +151 +152 +153 +154 +155 +156 +157 +158 +159 +160 +161 +162 +163 +164 +165 +166 +167 +168 +169 +170 +171 +172 +173 +174 +175 +176 +177 +178 +179 +180 +181 +182 +183 +184 +185 +186 +187 +188 +189 +190 +191 +192 +193 +194 +195 +196 +197 +198 +199 +200 +201 +202 +203 +204 +205 +206 +207 +208 +209 +210 +211 +212 +213 +214 +215 +216 +217 +218 +219 +220 +221 +222 +223 +224 +225 +226 +227 +228 +229 +230 +231 +232 +233 +234 +235 +236 +237 +238 +239 +240 +241 +242 +243 +244 +245 +246 +247 +248 +249 +250 +251 +252 +253 +254 +255 +256 +257 +258 +259 +260 +261 +262 +263 +264 +265 +266 +267 +268 +269 +270 +271 +272 +273 +274 +275 +276 +277 +278 +279 +280 +281 +282 +283 +284 +285 +286 +287 +288 +289 +290 +291 +292 +293 +294 +295 +296 +297 +298 +299 +300 +301 +302 +303 +304 +305 +306 +307 +308 +309 +310 +311 +312 +313 +314 +315 +316 +317 +318 +319 +320 +321 +322 +323 +324 +325 +326 +327 +328 +329 +330 +331 +332 +333 +334 +335 +336 +337 +338 +339 +340 +341 +342 +343 +344 +345 +346 +347 +348 +349 +350 +351 +352 +353 +354 +355 +356 +357 +358 +359 +360 +361 +362 +363 +364 +365 +366 +367 +368 +369 +370 +371 +372 +373 +374 +375 +376 +377 +378 +379 +380 +381 +382 +383 +384 +385 +386 +387 +388 +389 +390 +391 +392 +393 +394 +395 +396 +397 +398 +399 +400 +401
use std::collections::HashMap;
+use regex::Regex;
+use itertools::Itertools;
+use serde::{Deserialize, Serialize};
+use crate::algorithm::peptide::{calculate_peptide_mono_isotopic_mass, calculate_peptide_product_ion_mono_isotopic_mass, peptide_sequence_to_atomic_composition};
+use crate::chemistry::formula::calculate_mz;
+use crate::chemistry::utility::{find_unimod_patterns, reshape_prosit_array, unimod_sequence_to_tokens};
+use crate::ms::spectrum::MzSpectrum;
+use crate::proteomics::amino_acid::amino_acid_masses;
+use bincode::{Encode, Decode};
+
+// helper types for easier reading
+type Mass = f64;
+type Abundance = f64;
+type IsotopeDistribution = Vec<(Mass, Abundance)>;
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct PeptideIon {
+ pub sequence: PeptideSequence,
+ pub charge: i32,
+ pub intensity: f64,
+ pub ordinal: u32,
+}
+
+impl PeptideIon {
+ pub fn new(sequence: String, charge: i32, intensity: f64, ordinal: u32, peptide_id: Option<i32>) -> Self {
+ PeptideIon {
+ sequence: PeptideSequence::new(sequence, peptide_id),
+ charge,
+ intensity,
+ ordinal,
+ }
+ }
+ pub fn mz(&self) -> f64 {
+ calculate_mz(self.sequence.mono_isotopic_mass(), self.charge)
+ }
+
+ pub fn calculate_isotope_distribution(
+ &self,
+ mass_tolerance: f64,
+ abundance_threshold: f64,
+ max_result: i32,
+ intensity_min: f64,
+ ) -> IsotopeDistribution {
+
+ let atomic_composition: HashMap<String, i32> = self.sequence.atomic_composition().iter().map(|(k, v)| (k.to_string(), *v)).collect();
+
+ let distribution: IsotopeDistribution = crate::algorithm::isotope::generate_isotope_distribution(&atomic_composition, mass_tolerance, abundance_threshold, max_result)
+ .into_iter().filter(|&(_, abundance)| abundance > intensity_min).collect();
+
+ let mz_distribution = distribution.iter().map(|(mass, _)| calculate_mz(*mass, self.charge))
+ .zip(distribution.iter().map(|&(_, abundance)| abundance)).collect();
+
+ mz_distribution
+ }
+
+ pub fn calculate_isotopic_spectrum(
+ &self,
+ mass_tolerance: f64,
+ abundance_threshold: f64,
+ max_result: i32,
+ intensity_min: f64,
+ ) -> MzSpectrum {
+ let isotopic_distribution = self.calculate_isotope_distribution(mass_tolerance, abundance_threshold, max_result, intensity_min);
+ MzSpectrum::new(isotopic_distribution.iter().map(|(mz, _)| *mz).collect(), isotopic_distribution.iter().map(|(_, abundance)| *abundance).collect()) * self.intensity
+ }
+}
+
+#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
+pub enum FragmentType { A, B, C, X, Y, Z, }
+
+// implement to string for fragment type
+impl std::fmt::Display for FragmentType {
+ fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+ match self {
+ FragmentType::A => write!(f, "a"),
+ FragmentType::B => write!(f, "b"),
+ FragmentType::C => write!(f, "c"),
+ FragmentType::X => write!(f, "x"),
+ FragmentType::Y => write!(f, "y"),
+ FragmentType::Z => write!(f, "z"),
+ }
+ }
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct PeptideProductIon {
+ pub kind: FragmentType,
+ pub ion: PeptideIon,
+}
+
+impl PeptideProductIon {
+ pub fn new(kind: FragmentType, sequence: String, charge: i32, intensity: f64, ordinal: u32, peptide_id: Option<i32>) -> Self {
+ PeptideProductIon {
+ kind,
+ ion: PeptideIon {
+ sequence: PeptideSequence::new(sequence, peptide_id),
+ charge,
+ intensity,
+ ordinal,
+ },
+ }
+ }
+
+ pub fn mono_isotopic_mass(&self) -> f64 {
+ calculate_peptide_product_ion_mono_isotopic_mass(self.ion.sequence.sequence.as_str(), self.kind)
+ }
+
+ pub fn atomic_composition(&self) -> HashMap<&str, i32> {
+
+ let mut composition = peptide_sequence_to_atomic_composition(&self.ion.sequence);
+
+ match self.kind {
+ FragmentType::A => {
+ *composition.entry("H").or_insert(0) -= 2;
+ *composition.entry("O").or_insert(0) -= 2;
+ *composition.entry("C").or_insert(0) -= 1;
+ },
+
+ FragmentType::B => {
+ // B: peptide_mass - Water
+ *composition.entry("H").or_insert(0) -= 2;
+ *composition.entry("O").or_insert(0) -= 1;
+ },
+
+ FragmentType::C => {
+ // C: peptide_mass + NH3 - Water
+ *composition.entry("H").or_insert(0) += 1;
+ *composition.entry("N").or_insert(0) += 1;
+ *composition.entry("O").or_insert(0) -= 1;
+ },
+
+ FragmentType::X => {
+ // X: peptide_mass + CO + 2*H - Water
+ *composition.entry("C").or_insert(0) += 1;
+ *composition.entry("O").or_insert(0) += 1;
+ },
+
+ FragmentType::Y => {
+ ()
+ },
+
+ FragmentType::Z => {
+ *composition.entry("H").or_insert(0) -= 1;
+ *composition.entry("N").or_insert(0) -= 3;
+ },
+ }
+ composition
+ }
+
+ pub fn mz(&self) -> f64 {
+ calculate_mz(self.mono_isotopic_mass(), self.ion.charge)
+ }
+
+ pub fn isotope_distribution(
+ &self,
+ mass_tolerance: f64,
+ abundance_threshold: f64,
+ max_result: i32,
+ intensity_min: f64,
+ ) -> IsotopeDistribution {
+
+ let atomic_composition: HashMap<String, i32> = self.atomic_composition().iter().map(|(k, v)| (k.to_string(), *v)).collect();
+
+ let distribution: IsotopeDistribution = crate::algorithm::isotope::generate_isotope_distribution(&atomic_composition, mass_tolerance, abundance_threshold, max_result)
+ .into_iter().filter(|&(_, abundance)| abundance > intensity_min).collect();
+
+ let mz_distribution = distribution.iter().map(|(mass, _)| calculate_mz(*mass, self.ion.charge)).zip(distribution.iter().map(|&(_, abundance)| abundance)).collect();
+
+ mz_distribution
+ }
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize, Encode, Decode)]
+pub struct PeptideSequence {
+ pub sequence: String,
+ pub peptide_id: Option<i32>,
+}
+
+impl PeptideSequence {
+ pub fn new(raw_sequence: String, peptide_id: Option<i32>) -> Self {
+
+ // constructor will parse the sequence and check if it is valid
+ let pattern = Regex::new(r"\[UNIMOD:(\d+|\?)]").unwrap();
+
+ // remove the modifications from the sequence
+ let sequence = pattern.replace_all(&raw_sequence, "").to_string();
+
+ // check if all remaining characters are valid amino acids
+ let valid_amino_acids = sequence.chars().all(|c| amino_acid_masses().contains_key(&c.to_string()[..]));
+ if !valid_amino_acids {
+ panic!("Invalid amino acid sequence: {}, use only valid amino acids: ARNDCQEGHILKMFPSTWYVU, and modifications in the format [UNIMOD:ID]", raw_sequence);
+ }
+
+ PeptideSequence { sequence: raw_sequence, peptide_id }
+ }
+
+ pub fn mono_isotopic_mass(&self) -> f64 {
+ calculate_peptide_mono_isotopic_mass(self)
+ }
+
+ pub fn atomic_composition(&self) -> HashMap<&str, i32> {
+ peptide_sequence_to_atomic_composition(self)
+ }
+
+ pub fn to_tokens(&self, group_modifications: bool) -> Vec<String> {
+ unimod_sequence_to_tokens(&*self.sequence, group_modifications)
+ }
+
+ pub fn to_sage_representation(&self) -> (String, Vec<f64>) {
+ find_unimod_patterns(&*self.sequence)
+ }
+
+ pub fn amino_acid_count(&self) -> usize {
+ self.to_tokens(true).len()
+ }
+
+ pub fn calculate_mono_isotopic_product_ion_spectrum(&self, charge: i32, fragment_type: FragmentType) -> MzSpectrum {
+ let product_ions = self.calculate_product_ion_series(charge, fragment_type);
+ product_ions.generate_mono_isotopic_spectrum()
+ }
+
+ pub fn calculate_isotopic_product_ion_spectrum(&self, charge: i32, fragment_type: FragmentType, mass_tolerance: f64, abundance_threshold: f64, max_result: i32, intensity_min: f64) -> MzSpectrum {
+ let product_ions = self.calculate_product_ion_series(charge, fragment_type);
+ product_ions.generate_isotopic_spectrum(mass_tolerance, abundance_threshold, max_result, intensity_min)
+ }
+
+ pub fn calculate_product_ion_series(&self, target_charge: i32, fragment_type: FragmentType) -> PeptideProductIonSeries {
+ // TODO: check for n-terminal modifications
+ let tokens = unimod_sequence_to_tokens(self.sequence.as_str(), true);
+ let mut n_terminal_ions = Vec::new();
+ let mut c_terminal_ions = Vec::new();
+
+ // Generate n ions
+ for i in 1..tokens.len() {
+ let n_ion_seq = tokens[..i].join("");
+ n_terminal_ions.push(PeptideProductIon {
+ kind: match fragment_type {
+ FragmentType::A => FragmentType::A,
+ FragmentType::B => FragmentType::B,
+ FragmentType::C => FragmentType::C,
+ FragmentType::X => FragmentType::A,
+ FragmentType::Y => FragmentType::B,
+ FragmentType::Z => FragmentType::C,
+ },
+ ion: PeptideIon {
+ sequence: PeptideSequence {
+ sequence: n_ion_seq,
+ peptide_id: self.peptide_id,
+ },
+ charge: target_charge,
+ intensity: 1.0, // Placeholder intensity
+ ordinal: i as u32,
+ },
+ });
+ }
+
+ // Generate c ions
+ for i in 1..tokens.len() {
+ let c_ion_seq = tokens[tokens.len() - i..].join("");
+ c_terminal_ions.push(PeptideProductIon {
+ kind: match fragment_type {
+ FragmentType::A => FragmentType::X,
+ FragmentType::B => FragmentType::Y,
+ FragmentType::C => FragmentType::Z,
+ FragmentType::X => FragmentType::X,
+ FragmentType::Y => FragmentType::Y,
+ FragmentType::Z => FragmentType::Z,
+ },
+ ion: PeptideIon {
+ sequence: PeptideSequence {
+ sequence: c_ion_seq,
+ peptide_id: self.peptide_id,
+ },
+ charge: target_charge,
+ intensity: 1.0, // Placeholder intensity
+ ordinal: i as u32,
+ },
+ });
+ }
+
+ PeptideProductIonSeries::new(target_charge, n_terminal_ions, c_terminal_ions)
+ }
+
+ pub fn associate_with_predicted_intensities(
+ &self,
+ // TODO: check docs of prosit if charge is meant as precursor charge or max charge of fragments to generate
+ charge: i32,
+ fragment_type: FragmentType,
+ flat_intensities: Vec<f64>,
+ normalize: bool,
+ half_charge_one: bool,
+ ) -> PeptideProductIonSeriesCollection {
+
+ let reshaped_intensities = reshape_prosit_array(flat_intensities);
+ let max_charge = std::cmp::min(charge, 3).max(1); // Ensure at least 1 for loop range
+ let mut sum_intensity = if normalize { 0.0 } else { 1.0 };
+ let num_tokens = self.amino_acid_count() - 1; // Full sequence length is not counted as fragment, since nothing is cleaved off, therefore -1
+
+ let mut peptide_ion_collection = Vec::new();
+
+ if normalize {
+ for z in 1..=max_charge {
+
+ let intensity_c: Vec<f64> = reshaped_intensities[..num_tokens].iter().map(|x| x[0][z as usize - 1]).filter(|&x| x > 0.0).collect();
+ let intensity_n: Vec<f64> = reshaped_intensities[..num_tokens].iter().map(|x| x[1][z as usize - 1]).filter(|&x| x > 0.0).collect();
+
+ sum_intensity += intensity_n.iter().sum::<f64>() + intensity_c.iter().sum::<f64>();
+ }
+ }
+
+ for z in 1..=max_charge {
+
+ let mut product_ions = self.calculate_product_ion_series(z, fragment_type);
+ let intensity_n: Vec<f64> = reshaped_intensities[..num_tokens].iter().map(|x| x[1][z as usize - 1]).collect();
+ let intensity_c: Vec<f64> = reshaped_intensities[..num_tokens].iter().map(|x| x[0][z as usize - 1]).collect(); // Reverse for y
+
+ let adjusted_sum_intensity = if max_charge == 1 && half_charge_one { sum_intensity * 2.0 } else { sum_intensity };
+
+ for (i, ion) in product_ions.n_ions.iter_mut().enumerate() {
+ ion.ion.intensity = intensity_n[i] / adjusted_sum_intensity;
+ }
+ for (i, ion) in product_ions.c_ions.iter_mut().enumerate() {
+ ion.ion.intensity = intensity_c[i] / adjusted_sum_intensity;
+ }
+
+ peptide_ion_collection.push(PeptideProductIonSeries::new(z, product_ions.n_ions, product_ions.c_ions));
+ }
+
+ PeptideProductIonSeriesCollection::new(peptide_ion_collection)
+ }
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct PeptideProductIonSeries {
+ pub charge: i32,
+ pub n_ions: Vec<PeptideProductIon>,
+ pub c_ions: Vec<PeptideProductIon>,
+}
+
+impl PeptideProductIonSeries {
+ pub fn new(charge: i32, n_ions: Vec<PeptideProductIon>, c_ions: Vec<PeptideProductIon>) -> Self {
+ PeptideProductIonSeries {
+ charge,
+ n_ions,
+ c_ions,
+ }
+ }
+ pub fn generate_mono_isotopic_spectrum(&self) -> MzSpectrum {
+ let mz_i_n = self.n_ions.iter().map(|ion| (ion.mz(), ion.ion.intensity)).collect_vec();
+ let mz_i_c = self.c_ions.iter().map(|ion| (ion.mz(), ion.ion.intensity)).collect_vec();
+ let n_spectrum = MzSpectrum::new(mz_i_n.iter().map(|(mz, _)| *mz).collect(), mz_i_n.iter().map(|(_, abundance)| *abundance).collect());
+ let c_spectrum = MzSpectrum::new(mz_i_c.iter().map(|(mz, _)| *mz).collect(), mz_i_c.iter().map(|(_, abundance)| *abundance).collect());
+ MzSpectrum::from_collection(vec![n_spectrum, c_spectrum]).filter_ranged(0.0, 5_000.0, 1e-6, 1e6)
+ }
+
+ pub fn generate_isotopic_spectrum(&self, mass_tolerance: f64, abundance_threshold: f64, max_result: i32, intensity_min: f64) -> MzSpectrum {
+ let mut spectra: Vec<MzSpectrum> = Vec::new();
+
+ for ion in &self.n_ions {
+ let n_isotopes = ion.isotope_distribution(mass_tolerance, abundance_threshold, max_result, intensity_min);
+ let spectrum = MzSpectrum::new(n_isotopes.iter().map(|(mz, _)| *mz).collect(), n_isotopes.iter().map(|(_, abundance)| *abundance * ion.ion.intensity).collect());
+ spectra.push(spectrum);
+ }
+
+ for ion in &self.c_ions {
+ let c_isotopes = ion.isotope_distribution(mass_tolerance, abundance_threshold, max_result, intensity_min);
+ let spectrum = MzSpectrum::new(c_isotopes.iter().map(|(mz, _)| *mz).collect(), c_isotopes.iter().map(|(_, abundance)| *abundance * ion.ion.intensity).collect());
+ spectra.push(spectrum);
+ }
+
+ MzSpectrum::from_collection(spectra).filter_ranged(0.0, 5_000.0, 1e-6, 1e6)
+ }
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct PeptideProductIonSeriesCollection {
+ pub peptide_ions: Vec<PeptideProductIonSeries>,
+}
+impl PeptideProductIonSeriesCollection {
+ pub fn new(peptide_ions: Vec<PeptideProductIonSeries>) -> Self {
+ PeptideProductIonSeriesCollection {
+ peptide_ions,
+ }
+ }
+
+ pub fn find_ion_series(&self, charge: i32) -> Option<&PeptideProductIonSeries> {
+ self.peptide_ions.iter().find(|ion_series| ion_series.charge == charge)
+ }
+
+ pub fn generate_isotopic_spectrum(&self, mass_tolerance: f64, abundance_threshold: f64, max_result: i32, intensity_min: f64) -> MzSpectrum {
+ let mut spectra: Vec<MzSpectrum> = Vec::new();
+
+ for ion_series in &self.peptide_ions {
+ let isotopic_spectrum = ion_series.generate_isotopic_spectrum(mass_tolerance, abundance_threshold, max_result, intensity_min);
+ spectra.push(isotopic_spectrum);
+ }
+
+ MzSpectrum::from_collection(spectra).filter_ranged(0.0, 5_000.0, 1e-6, 1e6)
+ }
+}
fn:
) to \
+ restrict the search to a given item kind.","Accepted kinds are: fn
, mod
, struct
, \
+ enum
, trait
, type
, macro
, \
+ and const
.","Search functions by type signature (e.g., vec -> usize
or \
+ -> vec
or String, enum:Cow -> bool
)","You can look for items with an exact name by putting double quotes around \
+ your request: \"string\"
","Look for functions that accept or return \
+ slices and \
+ arrays by writing \
+ square brackets (e.g., -> [u8]
or [] -> Option
)","Look for items inside another one by searching for a path: vec::Vec
",].map(x=>""+x+"
").join("");const div_infos=document.createElement("div");addClass(div_infos,"infos");div_infos.innerHTML="${value.replaceAll(" ", " ")}
`}else{error[index]=value}});output+=`