Skip to content

Commit

Permalink
Merge pull request #39 from mmzdouc/dev_mmz
Browse files Browse the repository at this point in the history
fermo_core v.0.4.3
  • Loading branch information
mmzdouc authored Jul 22, 2024
2 parents 56ed2a8 + 2164a84 commit 8498197
Show file tree
Hide file tree
Showing 10 changed files with 241 additions and 107 deletions.
12 changes: 11 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,21 +8,30 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm

## Unreleased

N/A
## [0.4.3] 22-07-2024

### Fixed:

- PhenotypeManager: prevented Pearson calculation on constant or NaN-containing arrays
- GeneralParser: fixed error-handling on malformed input files.
- MS2DeepScoreNetworker: fixed MS2 spectra filtering for ms2deepscore algorithm

## [0.4.2] 16-06-2024

### Fixed

- Fixed bug in SummaryWriter: a nonexisting function was referenced, leading to premature exit of module.

## [0.4.1] 16-06-2024

### Fixed

- Versioning

## [0.4.0] 15-06-2024

### Removed

- [Breaking change] Removed MS2Query de novo annotation after observation of process instability (unforeseen process termination by system with SIGKILL (9))

## [0.3.3] 06-06-2024
Expand All @@ -44,6 +53,7 @@ N/A
- Loosened typing restrictions for Feature and Sample object attributes: area and height (intensity) now accept float values.

### Removed

- [Breaking change] Removed toggle 'nonbiological' from 'FragmentAnnotator' and from parameters file; 'nonbiological' fragment annotation is now performed automatically

### Security
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
"""Runs the ms2deepscore library annotation module.
Copyright (c) 2024 Mitja Maximilian Zdouc, PhD
Copyright (c) 2024 to present Mitja Maximilian Zdouc, PhD
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
Expand Down Expand Up @@ -154,7 +154,7 @@ def calculate_scores_ms2deepscore(self: Self):
except func_timeout.FunctionTimedOut as e:
logger.warning(
f"'AnnotationManager/Ms2deepscoreAnnotator': timeout of "
f"MS2dDeepScore-based "
f"MS2DeepScore-based "
f"calculation: more than specified '{self.max_time}' seconds."
f"For unlimited runtime, set 'maximum_runtime' to 0 - SKIP"
)
Expand Down
45 changes: 36 additions & 9 deletions fermo_core/data_analysis/class_analysis_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,10 @@ def run_feature_filter(self: Self):
self.stats, self.features, self.samples = feature_filter.return_values()
self.params.FeatureFilteringParameters.module_passed = True
except Exception as e:
logger.warning(str(e))
logger.error(str(e))
logger.error(
"FeatureFilter: an error occurred and the module terminated prematurely - SKIP"
)
return

def run_blank_assignment(self: Self):
Expand Down Expand Up @@ -143,7 +146,10 @@ def run_blank_assignment(self: Self):
self.stats, self.features = blank_assigner.return_attrs()
self.params.BlankAssignmentParameters.module_passed = True
except Exception as e:
logger.warning(str(e))
logger.error(str(e))
logger.error(
"BlankAssigner: an error occurred and the module terminated prematurely - SKIP"
)
return

def run_group_assignment(self: Self):
Expand All @@ -160,7 +166,10 @@ def run_group_assignment(self: Self):
group_assigner.run_analysis()
self.stats, self.features = group_assigner.return_attrs()
except Exception as e:
logger.warning(str(e))
logger.error(str(e))
logger.error(
"GroupAssigner: an error occurred and the module terminated prematurely - SKIP"
)
return

def run_group_factor_assignment(self: Self):
Expand All @@ -187,7 +196,10 @@ def run_group_factor_assignment(self: Self):
self.features = group_fact_ass.return_features()
self.params.GroupFactAssignmentParameters.module_passed = True
except Exception as e:
logger.warning(str(e))
logger.error(str(e))
logger.error(
"GroupFactorAssigner: an error occurred and the module terminated prematurely - SKIP"
)
return

def run_phenotype_manager(self: Self):
Expand Down Expand Up @@ -218,7 +230,10 @@ def run_phenotype_manager(self: Self):
phenotype_manager.run_analysis()
self.stats, self.features, self.params = phenotype_manager.return_attrs()
except Exception as e:
logger.warning(str(e))
logger.error(str(e))
logger.error(
"PhenotypeManager: an error occurred and the module terminated prematurely - SKIP"
)
return

def run_sim_networks_manager(self: Self):
Expand Down Expand Up @@ -250,7 +265,10 @@ def run_sim_networks_manager(self: Self):
sim_networks_manager.return_attrs()
)
except Exception as e:
logger.warning(str(e))
logger.error(str(e))
logger.error(
"SimNetworksManager: an error occurred and the module terminated prematurely - SKIP"
)
return

def run_annotation_manager(self: Self):
Expand All @@ -268,7 +286,10 @@ def run_annotation_manager(self: Self):
annotation_manager.return_attrs()
)
except Exception as e:
logger.warning(str(e))
logger.error(str(e))
logger.error(
"AnnotationManager: an error occurred and the module terminated prematurely - SKIP"
)
return

def run_score_assignment(self: Self):
Expand All @@ -283,7 +304,10 @@ def run_score_assignment(self: Self):
score_assigner.run_analysis()
self.features, self.samples = score_assigner.return_attributes()
except Exception as e:
logger.warning(str(e))
logger.error(str(e))
logger.error(
"ScoreAssigner: an error occurred and the module terminated prematurely - SKIP"
)
return

def run_chrom_trace_calculator(self: Self):
Expand All @@ -293,5 +317,8 @@ def run_chrom_trace_calculator(self: Self):
self.samples, self.stats
)
except Exception as e:
logger.warning(str(e))
logger.error(str(e))
logger.error(
"ChromTraceCalculator: an error occurred and the module terminated prematurely - SKIP"
)
return
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import logging
from typing import Self

import numpy as np
from pydantic import BaseModel
from scipy.stats import pearsonr, zscore

Expand Down Expand Up @@ -89,7 +90,7 @@ def find_relevant_f_ids(self: Self):
else:
logger.debug(
f"'PhenQuantConcAssigner': feature id '{f_id}' only detected in "
f"'{len(feature.samples)}' samples: exclude from correlation "
f"'{len(feature.samples)}' samples: excluded from correlation "
f"analysis."
)

Expand All @@ -101,8 +102,8 @@ def calculate_correlation(self: Self):
"""
if len(self.relevant_f_ids) == 0:
raise RuntimeError(
"'PhenQuantConcAssigner': No relevant features (detected in >3 "
"samples) detected - SKIP."
"'PhenQuantConcAssigner': No relevant features detected"
"(i.e. found in >3 samples) - SKIP."
)

for f_id in self.relevant_f_ids:
Expand Down Expand Up @@ -130,6 +131,19 @@ def calculate_correlation(self: Self):
areas_scaled = zscore(areas)
activs_scaled = zscore(activs_reciprocal)

if np.isnan(areas_scaled).any():
logger.debug(
f"'PhenQuantConcAssigner': feature id '{f_id}' has constant "
f"area values ('{areas[0]}'). Cannot calculate Pearson correlation - SKIP."
)
continue
elif np.isnan(activs_scaled).any():
logger.debug(
f"'PhenQuantConcAssigner': feature id '{f_id}' has constant "
f"phenotype values ('{activs[0]}'). Cannot calculate Pearson correlation - SKIP."
)
continue

pearson_s, p_val = pearsonr(areas_scaled, activs_scaled)

p_val_cor = p_val * len(self.relevant_f_ids)
Expand All @@ -148,6 +162,7 @@ def calculate_correlation(self: Self):
score=pearson_s,
p_value=p_val,
p_value_corr=p_val_cor,
descr="Area/phenotype Pearson correlation",
)
)
self.stats.phenotypes[num].f_ids_positive.add(f_id)
Expand All @@ -163,6 +178,7 @@ def calculate_correlation(self: Self):
score=pearson_s,
p_value=p_val,
p_value_corr=p_val_cor,
descr="Area/phenotype Pearson correlation",
)
)
self.stats.phenotypes[num].f_ids_positive.add(f_id)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import logging
from typing import Self

import numpy as np
from pydantic import BaseModel
from scipy.stats import pearsonr, zscore

Expand Down Expand Up @@ -89,7 +90,7 @@ def find_relevant_f_ids(self: Self):
else:
logger.debug(
f"'PhenQuantPercAssigner': feature id '{f_id}' only detected in "
f"'{len(feature.samples)}' samples: exclude from correlation "
f"'{len(feature.samples)}' samples: excluded from correlation "
f"analysis."
)

Expand All @@ -101,8 +102,8 @@ def calculate_correlation(self: Self):
"""
if len(self.relevant_f_ids) == 0:
raise RuntimeError(
"'PhenQuantPercAssigner': No relevant features (detected in >3 "
"samples) detected - SKIP."
"'PhenQuantPercAssigner': No relevant features detected "
"(i.e. found in >3 samples) - SKIP."
)

for f_id in self.relevant_f_ids:
Expand All @@ -128,6 +129,19 @@ def calculate_correlation(self: Self):
areas_scaled = zscore(areas)
activs_scaled = zscore(activs)

if np.isnan(areas_scaled).any():
logger.debug(
f"'PhenQuantPercAssigner': feature id '{f_id}' has constant "
f"area values ('{areas[0]}'). Cannot calculate Pearson correlation - SKIP."
)
continue
elif np.isnan(activs_scaled).any():
logger.debug(
f"'PhenQuantPercAssigner': feature id '{f_id}' has constant "
f"phenotype values ('{activs[0]}'). Cannot calculate Pearson correlation - SKIP."
)
continue

pearson_s, p_val = pearsonr(areas_scaled, activs_scaled)

p_val_cor = p_val * len(self.relevant_f_ids)
Expand All @@ -146,6 +160,7 @@ def calculate_correlation(self: Self):
score=pearson_s,
p_value=p_val,
p_value_corr=p_val_cor,
descr="Area/phenotype Pearson correlation",
)
)
self.stats.phenotypes[num].f_ids_positive.add(f_id)
Expand All @@ -162,6 +177,7 @@ def calculate_correlation(self: Self):
score=pearson_s,
p_value=p_val,
p_value_corr=p_val_cor,
descr="Area/phenotype Pearson correlation",
)
)
self.stats.phenotypes[num].f_ids_positive.add(f_id)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ def assign_sample_scores(self: Self):
except Exception as e:
logger.warning(str(e))
logger.warning(
"'ScoreAssigner': Could not assign sample score, possibly due "
"'ScoreAssigner': Could not assign sample score, possibly due to "
"lack of spectral networking information - SKIP"
)
return
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@

import func_timeout
import networkx
import numpy as np
from pydantic import BaseModel

from fermo_core.data_analysis.sim_networks_manager.class_mod_cosine_networker import (
Expand Down Expand Up @@ -157,9 +158,10 @@ def run_modified_cosine_alg(self: Self):
logger.info("'SimNetworksManager/ModCosineNetworker': started calculation")

filtered_features = self.filter_input_spectra(
tuple(self.stats.active_features),
self.features,
self.params.SpecSimNetworkCosineParameters.msms_min_frag_nr,
features=tuple(self.stats.active_features),
feature_repo=self.features,
msms_min_frag_nr=self.params.SpecSimNetworkCosineParameters.msms_min_frag_nr,
algorithm="modified_cosine",
)

try:
Expand Down Expand Up @@ -205,9 +207,10 @@ def run_ms2deepscore_alg(self: Self):
return

filtered_features = self.filter_input_spectra(
tuple(self.stats.active_features),
self.features,
self.params.SpecSimNetworkDeepscoreParameters.msms_min_frag_nr,
features=tuple(self.stats.active_features),
feature_repo=self.features,
msms_min_frag_nr=self.params.SpecSimNetworkDeepscoreParameters.msms_min_frag_nr,
algorithm="ms2deepscore",
)

try:
Expand Down Expand Up @@ -246,18 +249,37 @@ def run_ms2deepscore_alg(self: Self):
self.params.SpecSimNetworkDeepscoreParameters.module_passed = True
logger.info("'SimNetworksManager/Ms2deepscoreNetworker': completed calculation")

@staticmethod
def filter_for_ms2deepscore(mz_array: np.ndarray) -> bool:
"""Filters features that have no peaks between 10 and 1000.
MS2DeepScore v0.5.0 has a function 'bin_number_array_fixed()' in file
'spectrum_binning_fixed.py' that raises an AssertionError if all peaks are
below 10 and over 1000 m/z
Arguments:
mz_array: Numpy array of peak m/z positions
"""
new_array = mz_array[(mz_array >= 10.0) & (mz_array <= 1000.0)]
if len(new_array) == 0:
return True
else:
return False

def filter_input_spectra(
self: Self,
features: tuple,
feature_repo: Repository,
msms_min_frag_nr: int,
algorithm: str,
) -> dict[str, set]:
"""Filter features for spectral similarity analysis based on given restrictions.
Arguments:
features: a tuple of feature IDs
feature_repo: containing GeneralFeature objects with feature info
msms_min_frag_nr: minimum number of fragments per spectrum to be considered
algorithm: a flag indicating the calling algorithm
Returns:
A dictionary containing included and excluded feature ints in sets.
Expand All @@ -275,6 +297,11 @@ def filter_input_spectra(
self.log_filtered_feature_nr_fragments(
f_id, len(feature.Spectrum.peaks.mz), msms_min_frag_nr
)
elif algorithm == "ms2deepscore":
if self.filter_for_ms2deepscore(feature.Spectrum.peaks.mz):
excluded.add(f_id)
else:
included.add(f_id)
else:
included.add(f_id)

Expand Down
Loading

0 comments on commit 8498197

Please sign in to comment.