Skip to content

Commit d87872f

Browse files
authored
Merge pull request #226 from paretje/feature/gzip
support reading gzipped spectrum files
2 parents d2a4281 + 14e4330 commit d87872f

File tree

2 files changed

+6
-17
lines changed

2 files changed

+6
-17
lines changed

docs/source/usage.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ For instance:
6565

6666
.. code-block:: sh
6767
68-
ms2pip correlate results.sage.tsv --spectrum-file spectra.mgf
68+
ms2pip correlate --psm-filetype sage results.sage.tsv spectra.mgf
6969
7070
7171
``get-training-data``

ms2pip/spectrum_input.py

Lines changed: 5 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -29,11 +29,12 @@ def read_spectrum_file(spectrum_file: str) -> Generator[ObservedSpectrum, None,
2929
If the file extension is not supported.
3030
3131
"""
32-
file_extension = Path(spectrum_file).suffix.lower()
33-
if file_extension not in [".mgf", ".mzml", ".d"] and not _is_minitdf(spectrum_file):
34-
raise UnsupportedSpectrumFiletypeError(file_extension)
32+
try:
33+
spectra = get_ms2_spectra(str(spectrum_file))
34+
except ValueError:
35+
raise UnsupportedSpectrumFiletypeError(Path(spectrum_file).suffixes)
3536

36-
for spectrum in get_ms2_spectra(str(spectrum_file)):
37+
for spectrum in spectra:
3738
obs_spectrum = ObservedSpectrum(
3839
mz=np.array(spectrum.mz, dtype=np.float32),
3940
intensity=np.array(spectrum.intensity, dtype=np.float32),
@@ -50,15 +51,3 @@ def read_spectrum_file(spectrum_file: str) -> Generator[ObservedSpectrum, None,
5051
):
5152
continue
5253
yield obs_spectrum
53-
54-
55-
def _is_minitdf(spectrum_file: str) -> bool:
56-
"""
57-
Check if the spectrum file is a Bruker miniTDF folder.
58-
59-
A Bruker miniTDF folder has no fixed name, but contains files matching the patterns
60-
``*ms2spectrum.bin`` and ``*ms2spectrum.parquet``.
61-
"""
62-
files = set(Path(spectrum_file).glob("*ms2spectrum.bin"))
63-
files.update(Path(spectrum_file).glob("*ms2spectrum.parquet"))
64-
return len(files) >= 2

0 commit comments

Comments
 (0)