|
17 | 17 | from rich.progress import track
|
18 | 18 |
|
19 | 19 | import ms2pip.exceptions as exceptions
|
20 |
| -from ms2pip import spectrum_output |
21 | 20 | from ms2pip._cython_modules import ms2pip_pyx
|
22 | 21 | from ms2pip._utils.encoder import Encoder
|
23 | 22 | from ms2pip._utils.feature_names import get_feature_names
|
| 23 | +from ms2pip._utils.ion_mobility import IonMobility |
24 | 24 | from ms2pip._utils.psm_input import read_psms
|
25 | 25 | from ms2pip._utils.retention_time import RetentionTime
|
26 |
| -from ms2pip._utils.ion_mobility import IonMobility |
27 | 26 | from ms2pip._utils.xgb_models import get_predictions_xgb, validate_requested_xgb_model
|
28 | 27 | from ms2pip.constants import MODELS
|
29 | 28 | from ms2pip.result import ProcessingResult, calculate_correlations
|
30 | 29 | from ms2pip.search_space import ProteomeSearchSpace
|
| 30 | +from ms2pip.spectrum import ObservedSpectrum |
31 | 31 | from ms2pip.spectrum_input import read_spectrum_file
|
32 | 32 | from ms2pip.spectrum_output import SUPPORTED_FORMATS
|
33 | 33 |
|
@@ -291,6 +291,62 @@ def correlate(
|
291 | 291 | return results
|
292 | 292 |
|
293 | 293 |
|
| 294 | +def correlate_single( |
| 295 | + observed_spectrum: ObservedSpectrum, |
| 296 | + ms2_tolerance: float = 0.02, |
| 297 | + model: str = "HCD", |
| 298 | +) -> ProcessingResult: |
| 299 | + """ |
| 300 | + Correlate single observed spectrum with predicted intensities.\f |
| 301 | +
|
| 302 | + Parameters |
| 303 | + ---------- |
| 304 | + observed_spectrum |
| 305 | + ObservedSpectrum instance with observed m/z and intensity values and peptidoform. |
| 306 | + ms2_tolerance |
| 307 | + MS2 tolerance in Da for observed spectrum peak annotation. By default, 0.02 Da. |
| 308 | + model |
| 309 | + Model to use for prediction. Default: "HCD". |
| 310 | +
|
| 311 | + Returns |
| 312 | + ------- |
| 313 | + result: ProcessingResult |
| 314 | + Result with theoretical m/z, predicted intensity, observed intensity, and correlation. |
| 315 | +
|
| 316 | + """ |
| 317 | + # Check peptidoform in observed spectrum |
| 318 | + if not isinstance(observed_spectrum.peptidoform, Peptidoform): |
| 319 | + raise ValueError("Peptidoform must be set in observed spectrum to correlate.") |
| 320 | + |
| 321 | + # Annotate spectrum and get target intensities |
| 322 | + with Encoder.from_peptidoform(observed_spectrum.peptidoform) as encoder: |
| 323 | + ms2pip_pyx.ms2pip_init(*encoder.encoder_files) |
| 324 | + enc_peptidoform = encoder.encode_peptidoform(observed_spectrum.peptidoform) |
| 325 | + targets = ms2pip_pyx.get_targets( |
| 326 | + enc_peptidoform, |
| 327 | + observed_spectrum.mz.astype(np.float32), |
| 328 | + observed_spectrum.intensity.astype(np.float32), |
| 329 | + float(ms2_tolerance), |
| 330 | + MODELS[model]["peaks_version"], |
| 331 | + ) |
| 332 | + |
| 333 | + # Reshape to dict with intensities per ion type |
| 334 | + ion_types = [it.lower() for it in MODELS[model]["ion_types"]] |
| 335 | + observed_intensity = { |
| 336 | + i: np.array(p, dtype=np.float32).clip(min=np.log2(0.001)) # Clip negative intensities |
| 337 | + for i, p in zip(ion_types, targets) |
| 338 | + } |
| 339 | + |
| 340 | + # Predict spectrum and add target intensities |
| 341 | + result = predict_single(observed_spectrum.peptidoform, model=model) |
| 342 | + result.observed_intensity = observed_intensity |
| 343 | + |
| 344 | + # Add correlation |
| 345 | + calculate_correlations([result]) |
| 346 | + |
| 347 | + return result |
| 348 | + |
| 349 | + |
294 | 350 | def get_training_data(
|
295 | 351 | psms: Union[PSMList, str, Path],
|
296 | 352 | spectrum_file: Union[str, Path],
|
@@ -704,19 +760,6 @@ def _add_xgboost_predictions(self, results: List[ProcessingResult]) -> List[Proc
|
704 | 760 |
|
705 | 761 | return results
|
706 | 762 |
|
707 |
| - # TODO IMPLEMENT |
708 |
| - def write_predictions( |
709 |
| - self, all_preds: pd.DataFrame, peptides: pd.DataFrame, output_filename: str |
710 |
| - ): |
711 |
| - raise NotImplementedError |
712 |
| - spec_out = spectrum_output.SpectrumOutput( |
713 |
| - all_preds, |
714 |
| - peptides, |
715 |
| - self.params["ms2pip"], |
716 |
| - output_filename=output_filename, |
717 |
| - ) |
718 |
| - spec_out.write_results(self.output_formats) |
719 |
| - |
720 | 763 |
|
721 | 764 | def _process_peptidoform(
|
722 | 765 | psm_index: int,
|
|
0 commit comments