From b67ba9f560effd52e4535a63a585c77a77b005ed Mon Sep 17 00:00:00 2001 From: Manuel <17874544+MKoesters@users.noreply.github.com> Date: Fri, 21 May 2021 13:03:38 +0200 Subject: [PATCH 01/13] Obo downloader downloads non-existent obo from tagged repository --- pymzml/obo.py | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/pymzml/obo.py b/pymzml/obo.py index a3a9d07f..fc04ccb8 100755 --- a/pymzml/obo.py +++ b/pymzml/obo.py @@ -84,6 +84,7 @@ import os import re import gzip +import urllib class OboTranslator(object): @@ -146,6 +147,16 @@ def __normalize_version(version): return version + def download_obo(self, version, obo_file): + uri = f"https://raw.githubusercontent.com/pymzml/psi-ms-CV/v{self.version}/psi-ms.obo" + urllib.request.urlretrieve(uri, obo_file) + + with open(obo_file, "rb") as fin, gzip.open(obo_file + ".gz", "wb") as fout: + breakpoint() + fout.writelines(fin.readlines()) + os.remove(obo_file) + return + def parseOBO(self): self.__obo_parsed = True """ @@ -172,13 +183,14 @@ def parseOBO(self): "obo", "psi-ms{0}.obo".format("-" + self.version if self.version else ""), ) - + breakpoint() if os.path.exists(obo_file): pass elif os.path.exists(obo_file + ".gz"): obo_file = obo_file + ".gz" else: - raise IOError("Could not find obo file {0}".format(obo_file)) + self.download_obo(self.version, obo_file) + obo_file += ".gz" with open(obo_file, "rb") as fin: # never rely on file extensions! @@ -192,7 +204,7 @@ def parseOBO(self): "The file may be corrupted or not gzipped." ) - with open_func(obo_file, "rt", encoding='utf-8') as obo: + with open_func(obo_file, "rt", encoding="utf-8") as obo: collections = {} collect = False for line in obo: From 07fdcbe6fedfb958ac4f1e838417b66725a82ec2 Mon Sep 17 00:00:00 2001 From: Manuel <17874544+MKoesters@users.noreply.github.com> Date: Fri, 21 May 2021 13:06:11 +0200 Subject: [PATCH 02/13] remove breakpoints --- pymzml/obo.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/pymzml/obo.py b/pymzml/obo.py index fc04ccb8..e8cdd50e 100755 --- a/pymzml/obo.py +++ b/pymzml/obo.py @@ -152,7 +152,6 @@ def download_obo(self, version, obo_file): urllib.request.urlretrieve(uri, obo_file) with open(obo_file, "rb") as fin, gzip.open(obo_file + ".gz", "wb") as fout: - breakpoint() fout.writelines(fin.readlines()) os.remove(obo_file) return @@ -183,7 +182,6 @@ def parseOBO(self): "obo", "psi-ms{0}.obo".format("-" + self.version if self.version else ""), ) - breakpoint() if os.path.exists(obo_file): pass elif os.path.exists(obo_file + ".gz"): From a1b34fa04c41f7e969ea3b9911a457d0d0b965d0 Mon Sep 17 00:00:00 2001 From: Alexander Kislukhin <47034358+liquidcarbon@users.noreply.github.com> Date: Tue, 5 Oct 2021 00:27:57 -0600 Subject: [PATCH 03/13] fixing precursors property otherwise error: ``` site-packages/pymzml/spec.py in precursors(self) 169 precursor(list): list of precursor ids for this spectrum. 170 """ --> 171 if self._precursors is None: 172 precursors = self.element.findall( 173 "./{ns}precursorList/{ns}precursor".format(ns=self.ns) AttributeError: 'Spectrum' object has no attribute '_precursors' ``` --- pymzml/spec.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pymzml/spec.py b/pymzml/spec.py index ce0da30e..3bfc6c1c 100755 --- a/pymzml/spec.py +++ b/pymzml/spec.py @@ -959,7 +959,7 @@ def precursors(self): precursor(list): list of precursor ids for this spectrum. """ self.deprecation_warning(sys._getframe().f_code.co_name) - if self._precursors is None: + if not hasattr(self, '_precursors'): precursors = self.element.findall( "./{ns}precursorList/{ns}precursor".format(ns=self.ns) ) From 205e96ca81fa170e1a28192efb5200a3463dce57 Mon Sep 17 00:00:00 2001 From: Manuel <17874544+MKoesters@users.noreply.github.com> Date: Thu, 28 Oct 2021 10:02:44 +0200 Subject: [PATCH 04/13] Add MS precision for MS level 3 - add 20e-6 as standarard precision for ms3 --- pymzml/run.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pymzml/run.py b/pymzml/run.py index 22fc15bb..976ac7e7 100755 --- a/pymzml/run.py +++ b/pymzml/run.py @@ -105,6 +105,7 @@ def __init__( 0: 0.0001, 1: 5e-6, 2: 20e-6, + 3: 20e-6, } self.ms_precisions.update(MS_precisions) From 1c071e918db594c291c7f4183844d3112205eaf6 Mon Sep 17 00:00:00 2001 From: Manuel <17874544+MKoesters@users.noreply.github.com> Date: Mon, 21 Feb 2022 13:07:44 +0100 Subject: [PATCH 05/13] Update run.py - Convert `Path` objects to strings internally - Consistently use `self.path_or_file` instead of `path_or_file` and `self.path_or_file` --- pymzml/run.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/pymzml/run.py b/pymzml/run.py index 976ac7e7..a7ebc6f8 100755 --- a/pymzml/run.py +++ b/pymzml/run.py @@ -112,13 +112,15 @@ def __init__( # File info self.info = ddict() self.path_or_file = path_or_file - if isinstance(path_or_file, str): - self.info["file_name"] = path_or_file - self.info["encoding"] = self._determine_file_encoding(path_or_file) + if isinstance(self.path_or_file, Path): + self.path_or_file = str(self.path_or_file) + if isinstance(self.path_or_file, str): + self.info["file_name"] = self.path_or_file + self.info["encoding"] = self._determine_file_encoding(self.path_or_file) else: - self.info["encoding"] = self._guess_encoding(path_or_file) + self.info["encoding"] = self._guess_encoding(self.path_or_file) - self.info["file_object"] = self._open_file(path_or_file) + self.info["file_object"] = self._open_file(self.path_or_file) self.info["offset_dict"] = self.info["file_object"].offset_dict if obo_version: self.info["obo_version"] = self._obo_version_validator(obo_version) From 0de2e76d68d356a9494b46892dd44951591ded0c Mon Sep 17 00:00:00 2001 From: Ming Wang Date: Tue, 22 Feb 2022 09:15:29 -0800 Subject: [PATCH 06/13] Adding support for millisecond --- pymzml/spec.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pymzml/spec.py b/pymzml/spec.py index 3bfc6c1c..1fc2e052 100755 --- a/pymzml/spec.py +++ b/pymzml/spec.py @@ -883,6 +883,8 @@ def scan_time_in_minutes(self): """ if self._scan_time_in_minutes is None: self._scan_time, time_unit = self.scan_time + if self._scan_time_unit.lower() == "millisecond": + self._scan_time_in_minutes = self._scan_time / 1000.0 / 50.0 if self._scan_time_unit.lower() == "second": self._scan_time_in_minutes = self._scan_time / 60.0 elif self._scan_time_unit.lower() == "minute": From 1a0f5fd3cc8d60b2009591e9c79325913ace9446 Mon Sep 17 00:00:00 2001 From: Manuel <17874544+MKoesters@users.noreply.github.com> Date: Fri, 8 Apr 2022 18:38:43 +0200 Subject: [PATCH 07/13] Fix issue with parsing multiple chromatograms --- pymzml/file_classes/standardMzml.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/pymzml/file_classes/standardMzml.py b/pymzml/file_classes/standardMzml.py index 7e413132..f158337c 100755 --- a/pymzml/file_classes/standardMzml.py +++ b/pymzml/file_classes/standardMzml.py @@ -593,14 +593,11 @@ def _read_to_spec_end(self, seeker, chunks_to_read=8): data_chunk += tag_end if regex_patterns.SPECTRUM_CLOSE_PATTERN.search(data_chunk): match = regex_patterns.SPECTRUM_CLOSE_PATTERN.search(data_chunk) - relative_pos_in_chunk = match.end() - end_pos = chunk_offset + relative_pos_in_chunk end_pos = match.end() end_found = True elif regex_patterns.CHROMATOGRAM_CLOSE_PATTERN.search(data_chunk): match = regex_patterns.CHROMATOGRAM_CLOSE_PATTERN.search(data_chunk) - relative_pos_in_chunk = match.end() - end_pos = chunk_offset + relative_pos_in_chunk + end_pos = match.end() end_found = True return (start_pos, end_pos) From e0c61c8215fcd0dbe088e0072eea51912e65cf24 Mon Sep 17 00:00:00 2001 From: Manuel <17874544+MKoesters@users.noreply.github.com> Date: Fri, 8 Apr 2022 18:49:45 +0200 Subject: [PATCH 08/13] Update standardMzml.py --- pymzml/file_classes/standardMzml.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pymzml/file_classes/standardMzml.py b/pymzml/file_classes/standardMzml.py index f158337c..28544cde 100755 --- a/pymzml/file_classes/standardMzml.py +++ b/pymzml/file_classes/standardMzml.py @@ -551,9 +551,11 @@ def _interpol_search(self, target_index, chunk_size=8, fallback_cutoff=100): current_position = seeker.tell() elif len(data) == 0: - sorted_keys = sorted(self.offset_dict.keys()) + sorted_int_keys = { + k: v for k, v in self.offset_dict.items() if isinstance(k, int) + } pos = ( - bisect.bisect_left(sorted_keys, target_index) - 2 + bisect.bisect_left(sorted_int_keys, target_index) - 2 ) # dat magic number :) try: key = sorted_keys[pos] @@ -587,7 +589,6 @@ def _read_to_spec_end(self, seeker, chunks_to_read=8): start_pos = seeker.tell() data_chunk = seeker.read(chunk_size) while end_found is False: - chunk_offset = seeker.tell() data_chunk += seeker.read(chunk_size) tag_end, seeker = self._read_until_tag_end(seeker) data_chunk += tag_end From becb32ca601b2dc104181e621aac68b4da809d2a Mon Sep 17 00:00:00 2001 From: Manuel <17874544+MKoesters@users.noreply.github.com> Date: Fri, 8 Apr 2022 18:52:40 +0200 Subject: [PATCH 09/13] Update standardMzml.py remove unused function argument --- pymzml/file_classes/standardMzml.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pymzml/file_classes/standardMzml.py b/pymzml/file_classes/standardMzml.py index 28544cde..4c04b10b 100755 --- a/pymzml/file_classes/standardMzml.py +++ b/pymzml/file_classes/standardMzml.py @@ -741,7 +741,7 @@ def _search_string_identifier(self, search_string, chunk_size=8): file_pointer = seeker.tell() data = seeker.read(total_chunk_size) - string, seeker = self._read_until_tag_end(seeker, byte_mode=True) + string, seeker = self._read_until_tag_end(seeker) data += string spec_start = regex_string.search(data) chrom_start = regex_patterns.CHROMO_OPEN_PATTERN.search(data) @@ -767,7 +767,7 @@ def _search_string_identifier(self, search_string, chunk_size=8): elif len(data) == 0: raise Exception("cant find specified string") - def _read_until_tag_end(self, seeker, max_search_len=12, byte_mode=False): + def _read_until_tag_end(self, seeker, max_search_len=12): """ Help make sure no splitted text appear in chunked data, so regex always find From a5632c22adf92392ec9750c0fe7e89056cd767ef Mon Sep 17 00:00:00 2001 From: Manuel <17874544+MKoesters@users.noreply.github.com> Date: Mon, 11 Apr 2022 10:05:41 +0200 Subject: [PATCH 10/13] Update run.py --- pymzml/run.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pymzml/run.py b/pymzml/run.py index a7ebc6f8..2f76456a 100755 --- a/pymzml/run.py +++ b/pymzml/run.py @@ -43,6 +43,7 @@ import xml.etree.ElementTree as ElementTree from collections import defaultdict as ddict from io import BytesIO +from pathlib import Path from . import spec from . import obo From 1c4644b7eca112041ad4ef1c9633fa6882def600 Mon Sep 17 00:00:00 2001 From: Manuel <17874544+MKoesters@users.noreply.github.com> Date: Mon, 11 Apr 2022 12:04:23 +0200 Subject: [PATCH 11/13] Update spec.py --- pymzml/spec.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pymzml/spec.py b/pymzml/spec.py index 1fc2e052..d9337bc5 100755 --- a/pymzml/spec.py +++ b/pymzml/spec.py @@ -884,7 +884,7 @@ def scan_time_in_minutes(self): if self._scan_time_in_minutes is None: self._scan_time, time_unit = self.scan_time if self._scan_time_unit.lower() == "millisecond": - self._scan_time_in_minutes = self._scan_time / 1000.0 / 50.0 + self._scan_time_in_minutes = self._scan_time / 1000.0 / 60.0 if self._scan_time_unit.lower() == "second": self._scan_time_in_minutes = self._scan_time / 60.0 elif self._scan_time_unit.lower() == "minute": From c6b3a3b03113ea603665ae5543a444ac23182998 Mon Sep 17 00:00:00 2001 From: Manuel <17874544+MKoesters@users.noreply.github.com> Date: Mon, 11 Apr 2022 13:41:40 +0200 Subject: [PATCH 12/13] Update standardMzml.py --- pymzml/file_classes/standardMzml.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pymzml/file_classes/standardMzml.py b/pymzml/file_classes/standardMzml.py index 4c04b10b..a8115410 100755 --- a/pymzml/file_classes/standardMzml.py +++ b/pymzml/file_classes/standardMzml.py @@ -554,6 +554,7 @@ def _interpol_search(self, target_index, chunk_size=8, fallback_cutoff=100): sorted_int_keys = { k: v for k, v in self.offset_dict.items() if isinstance(k, int) } + sorted_keys = sorted(sorted_int_keys.keys()) pos = ( bisect.bisect_left(sorted_int_keys, target_index) - 2 ) # dat magic number :) From 8c8bd4455f530c3a1c9a1ebaf8b9a6d13ea7d119 Mon Sep 17 00:00:00 2001 From: Manuel <17874544+MKoesters@users.noreply.github.com> Date: Mon, 11 Apr 2022 14:22:16 +0200 Subject: [PATCH 13/13] Update spec.py --- pymzml/spec.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pymzml/spec.py b/pymzml/spec.py index d9337bc5..8c36cbc2 100755 --- a/pymzml/spec.py +++ b/pymzml/spec.py @@ -426,6 +426,7 @@ def __init__(self, element=ElementTree.Element(""), measured_precision=5e-6): self._t_mass_set = None self._t_mz_set = None self._TIC = None + self._precursors = None self._transformed_mass_with_error = None self._transformed_mz_with_error = None self._transformed_peaks = None @@ -961,7 +962,7 @@ def precursors(self): precursor(list): list of precursor ids for this spectrum. """ self.deprecation_warning(sys._getframe().f_code.co_name) - if not hasattr(self, '_precursors'): + if not self._precursors: precursors = self.element.findall( "./{ns}precursorList/{ns}precursor".format(ns=self.ns) )