Skip to content

Commit adf3be1

Browse files
authored
Merge pull request #232 from compomics/feature/correlate-single
Add correlate_single usage mode (API only)
2 parents ccc56c9 + cdefd2e commit adf3be1

File tree

2 files changed

+66
-15
lines changed

2 files changed

+66
-15
lines changed

docs/source/usage.rst

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,14 @@ For instance:
6868
ms2pip correlate --psm-filetype sage results.sage.tsv spectra.mgf
6969
7070
71+
``correlate-single``
72+
--------------------
73+
74+
Predict spectrum intensities for a single peptide and correlate them with observed intensities from
75+
an :py:class:`ObservedSpectrum` object. This mode is only available through the Python API, not
76+
through the command-line interface.
77+
78+
7179
``get-training-data``
7280
---------------------
7381

ms2pip/core.py

Lines changed: 58 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -17,17 +17,17 @@
1717
from rich.progress import track
1818

1919
import ms2pip.exceptions as exceptions
20-
from ms2pip import spectrum_output
2120
from ms2pip._cython_modules import ms2pip_pyx
2221
from ms2pip._utils.encoder import Encoder
2322
from ms2pip._utils.feature_names import get_feature_names
23+
from ms2pip._utils.ion_mobility import IonMobility
2424
from ms2pip._utils.psm_input import read_psms
2525
from ms2pip._utils.retention_time import RetentionTime
26-
from ms2pip._utils.ion_mobility import IonMobility
2726
from ms2pip._utils.xgb_models import get_predictions_xgb, validate_requested_xgb_model
2827
from ms2pip.constants import MODELS
2928
from ms2pip.result import ProcessingResult, calculate_correlations
3029
from ms2pip.search_space import ProteomeSearchSpace
30+
from ms2pip.spectrum import ObservedSpectrum
3131
from ms2pip.spectrum_input import read_spectrum_file
3232
from ms2pip.spectrum_output import SUPPORTED_FORMATS
3333

@@ -291,6 +291,62 @@ def correlate(
291291
return results
292292

293293

294+
def correlate_single(
295+
observed_spectrum: ObservedSpectrum,
296+
ms2_tolerance: float = 0.02,
297+
model: str = "HCD",
298+
) -> ProcessingResult:
299+
"""
300+
Correlate single observed spectrum with predicted intensities.\f
301+
302+
Parameters
303+
----------
304+
observed_spectrum
305+
ObservedSpectrum instance with observed m/z and intensity values and peptidoform.
306+
ms2_tolerance
307+
MS2 tolerance in Da for observed spectrum peak annotation. By default, 0.02 Da.
308+
model
309+
Model to use for prediction. Default: "HCD".
310+
311+
Returns
312+
-------
313+
result: ProcessingResult
314+
Result with theoretical m/z, predicted intensity, observed intensity, and correlation.
315+
316+
"""
317+
# Check peptidoform in observed spectrum
318+
if not isinstance(observed_spectrum.peptidoform, Peptidoform):
319+
raise ValueError("Peptidoform must be set in observed spectrum to correlate.")
320+
321+
# Annotate spectrum and get target intensities
322+
with Encoder.from_peptidoform(observed_spectrum.peptidoform) as encoder:
323+
ms2pip_pyx.ms2pip_init(*encoder.encoder_files)
324+
enc_peptidoform = encoder.encode_peptidoform(observed_spectrum.peptidoform)
325+
targets = ms2pip_pyx.get_targets(
326+
enc_peptidoform,
327+
observed_spectrum.mz.astype(np.float32),
328+
observed_spectrum.intensity.astype(np.float32),
329+
float(ms2_tolerance),
330+
MODELS[model]["peaks_version"],
331+
)
332+
333+
# Reshape to dict with intensities per ion type
334+
ion_types = [it.lower() for it in MODELS[model]["ion_types"]]
335+
observed_intensity = {
336+
i: np.array(p, dtype=np.float32).clip(min=np.log2(0.001)) # Clip negative intensities
337+
for i, p in zip(ion_types, targets)
338+
}
339+
340+
# Predict spectrum and add target intensities
341+
result = predict_single(observed_spectrum.peptidoform, model=model)
342+
result.observed_intensity = observed_intensity
343+
344+
# Add correlation
345+
calculate_correlations([result])
346+
347+
return result
348+
349+
294350
def get_training_data(
295351
psms: Union[PSMList, str, Path],
296352
spectrum_file: Union[str, Path],
@@ -704,19 +760,6 @@ def _add_xgboost_predictions(self, results: List[ProcessingResult]) -> List[Proc
704760

705761
return results
706762

707-
# TODO IMPLEMENT
708-
def write_predictions(
709-
self, all_preds: pd.DataFrame, peptides: pd.DataFrame, output_filename: str
710-
):
711-
raise NotImplementedError
712-
spec_out = spectrum_output.SpectrumOutput(
713-
all_preds,
714-
peptides,
715-
self.params["ms2pip"],
716-
output_filename=output_filename,
717-
)
718-
spec_out.write_results(self.output_formats)
719-
720763

721764
def _process_peptidoform(
722765
psm_index: int,

0 commit comments

Comments
 (0)