From b67ba9f560effd52e4535a63a585c77a77b005ed Mon Sep 17 00:00:00 2001
From: Manuel <17874544+MKoesters@users.noreply.github.com>
Date: Fri, 21 May 2021 13:03:38 +0200
Subject: [PATCH 01/13] Obo downloader downloads non-existent obo from tagged
 repository

---
 pymzml/obo.py | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

diff --git a/pymzml/obo.py b/pymzml/obo.py
index a3a9d07f..fc04ccb8 100755
--- a/pymzml/obo.py
+++ b/pymzml/obo.py
@@ -84,6 +84,7 @@
 import os
 import re
 import gzip
+import urllib
 
 
 class OboTranslator(object):
@@ -146,6 +147,16 @@ def __normalize_version(version):
 
         return version
 
+    def download_obo(self, version, obo_file):
+        uri = f"https://raw.githubusercontent.com/pymzml/psi-ms-CV/v{self.version}/psi-ms.obo"
+        urllib.request.urlretrieve(uri, obo_file)
+
+        with open(obo_file, "rb") as fin, gzip.open(obo_file + ".gz", "wb") as fout:
+            breakpoint()
+            fout.writelines(fin.readlines())
+            os.remove(obo_file)
+        return
+
     def parseOBO(self):
         self.__obo_parsed = True
         """
@@ -172,13 +183,14 @@ def parseOBO(self):
             "obo",
             "psi-ms{0}.obo".format("-" + self.version if self.version else ""),
         )
-
+        breakpoint()
         if os.path.exists(obo_file):
             pass
         elif os.path.exists(obo_file + ".gz"):
             obo_file = obo_file + ".gz"
         else:
-            raise IOError("Could not find obo file {0}".format(obo_file))
+            self.download_obo(self.version, obo_file)
+            obo_file += ".gz"
 
         with open(obo_file, "rb") as fin:
             # never rely on file extensions!
@@ -192,7 +204,7 @@ def parseOBO(self):
                     "The file may be corrupted or not gzipped."
                 )
 
-        with open_func(obo_file, "rt", encoding='utf-8') as obo:
+        with open_func(obo_file, "rt", encoding="utf-8") as obo:
             collections = {}
             collect = False
             for line in obo:

From 07fdcbe6fedfb958ac4f1e838417b66725a82ec2 Mon Sep 17 00:00:00 2001
From: Manuel <17874544+MKoesters@users.noreply.github.com>
Date: Fri, 21 May 2021 13:06:11 +0200
Subject: [PATCH 02/13] remove breakpoints

---
 pymzml/obo.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/pymzml/obo.py b/pymzml/obo.py
index fc04ccb8..e8cdd50e 100755
--- a/pymzml/obo.py
+++ b/pymzml/obo.py
@@ -152,7 +152,6 @@ def download_obo(self, version, obo_file):
         urllib.request.urlretrieve(uri, obo_file)
 
         with open(obo_file, "rb") as fin, gzip.open(obo_file + ".gz", "wb") as fout:
-            breakpoint()
             fout.writelines(fin.readlines())
             os.remove(obo_file)
         return
@@ -183,7 +182,6 @@ def parseOBO(self):
             "obo",
             "psi-ms{0}.obo".format("-" + self.version if self.version else ""),
         )
-        breakpoint()
         if os.path.exists(obo_file):
             pass
         elif os.path.exists(obo_file + ".gz"):

From a1b34fa04c41f7e969ea3b9911a457d0d0b965d0 Mon Sep 17 00:00:00 2001
From: Alexander Kislukhin <47034358+liquidcarbon@users.noreply.github.com>
Date: Tue, 5 Oct 2021 00:27:57 -0600
Subject: [PATCH 03/13] fixing precursors property

otherwise error:
```
site-packages/pymzml/spec.py in precursors(self)
    169             precursor(list): list of precursor ids for this spectrum.
    170         """
--> 171         if self._precursors is None:
    172             precursors = self.element.findall(
    173                 "./{ns}precursorList/{ns}precursor".format(ns=self.ns)

AttributeError: 'Spectrum' object has no attribute '_precursors'
```
---
 pymzml/spec.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pymzml/spec.py b/pymzml/spec.py
index ce0da30e..3bfc6c1c 100755
--- a/pymzml/spec.py
+++ b/pymzml/spec.py
@@ -959,7 +959,7 @@ def precursors(self):
             precursor(list): list of precursor ids for this spectrum.
         """
         self.deprecation_warning(sys._getframe().f_code.co_name)
-        if self._precursors is None:
+        if not hasattr(self, '_precursors'):
             precursors = self.element.findall(
                 "./{ns}precursorList/{ns}precursor".format(ns=self.ns)
             )

From 205e96ca81fa170e1a28192efb5200a3463dce57 Mon Sep 17 00:00:00 2001
From: Manuel <17874544+MKoesters@users.noreply.github.com>
Date: Thu, 28 Oct 2021 10:02:44 +0200
Subject: [PATCH 04/13] Add MS precision for MS level 3

- add 20e-6 as standarard precision for ms3
---
 pymzml/run.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pymzml/run.py b/pymzml/run.py
index 22fc15bb..976ac7e7 100755
--- a/pymzml/run.py
+++ b/pymzml/run.py
@@ -105,6 +105,7 @@ def __init__(
             0: 0.0001,
             1: 5e-6,
             2: 20e-6,
+            3: 20e-6,
         }
         self.ms_precisions.update(MS_precisions)
 

From 1c071e918db594c291c7f4183844d3112205eaf6 Mon Sep 17 00:00:00 2001
From: Manuel <17874544+MKoesters@users.noreply.github.com>
Date: Mon, 21 Feb 2022 13:07:44 +0100
Subject: [PATCH 05/13] Update run.py

- Convert `Path` objects to strings internally
- Consistently use `self.path_or_file` instead of `path_or_file` and `self.path_or_file`
---
 pymzml/run.py | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/pymzml/run.py b/pymzml/run.py
index 976ac7e7..a7ebc6f8 100755
--- a/pymzml/run.py
+++ b/pymzml/run.py
@@ -112,13 +112,15 @@ def __init__(
         # File info
         self.info = ddict()
         self.path_or_file = path_or_file
-        if isinstance(path_or_file, str):
-            self.info["file_name"] = path_or_file
-            self.info["encoding"] = self._determine_file_encoding(path_or_file)
+        if isinstance(self.path_or_file, Path):
+            self.path_or_file = str(self.path_or_file)
+        if isinstance(self.path_or_file, str):
+            self.info["file_name"] = self.path_or_file
+            self.info["encoding"] = self._determine_file_encoding(self.path_or_file)
         else:
-            self.info["encoding"] = self._guess_encoding(path_or_file)
+            self.info["encoding"] = self._guess_encoding(self.path_or_file)
 
-        self.info["file_object"] = self._open_file(path_or_file)
+        self.info["file_object"] = self._open_file(self.path_or_file)
         self.info["offset_dict"] = self.info["file_object"].offset_dict
         if obo_version:
             self.info["obo_version"] = self._obo_version_validator(obo_version)

From 0de2e76d68d356a9494b46892dd44951591ded0c Mon Sep 17 00:00:00 2001
From: Ming Wang <mwang87@gmail.com>
Date: Tue, 22 Feb 2022 09:15:29 -0800
Subject: [PATCH 06/13] Adding support for millisecond

---
 pymzml/spec.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/pymzml/spec.py b/pymzml/spec.py
index 3bfc6c1c..1fc2e052 100755
--- a/pymzml/spec.py
+++ b/pymzml/spec.py
@@ -883,6 +883,8 @@ def scan_time_in_minutes(self):
         """
         if self._scan_time_in_minutes is None:
             self._scan_time, time_unit = self.scan_time
+            if self._scan_time_unit.lower() == "millisecond":
+                self._scan_time_in_minutes = self._scan_time / 1000.0 / 50.0
             if self._scan_time_unit.lower() == "second":
                 self._scan_time_in_minutes = self._scan_time / 60.0
             elif self._scan_time_unit.lower() == "minute":

From 1a0f5fd3cc8d60b2009591e9c79325913ace9446 Mon Sep 17 00:00:00 2001
From: Manuel <17874544+MKoesters@users.noreply.github.com>
Date: Fri, 8 Apr 2022 18:38:43 +0200
Subject: [PATCH 07/13] Fix issue with parsing multiple chromatograms

---
 pymzml/file_classes/standardMzml.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/pymzml/file_classes/standardMzml.py b/pymzml/file_classes/standardMzml.py
index 7e413132..f158337c 100755
--- a/pymzml/file_classes/standardMzml.py
+++ b/pymzml/file_classes/standardMzml.py
@@ -593,14 +593,11 @@ def _read_to_spec_end(self, seeker, chunks_to_read=8):
             data_chunk += tag_end
             if regex_patterns.SPECTRUM_CLOSE_PATTERN.search(data_chunk):
                 match = regex_patterns.SPECTRUM_CLOSE_PATTERN.search(data_chunk)
-                relative_pos_in_chunk = match.end()
-                end_pos = chunk_offset + relative_pos_in_chunk
                 end_pos = match.end()
                 end_found = True
             elif regex_patterns.CHROMATOGRAM_CLOSE_PATTERN.search(data_chunk):
                 match = regex_patterns.CHROMATOGRAM_CLOSE_PATTERN.search(data_chunk)
-                relative_pos_in_chunk = match.end()
-                end_pos = chunk_offset + relative_pos_in_chunk
+                end_pos = match.end()
                 end_found = True
         return (start_pos, end_pos)
 

From e0c61c8215fcd0dbe088e0072eea51912e65cf24 Mon Sep 17 00:00:00 2001
From: Manuel <17874544+MKoesters@users.noreply.github.com>
Date: Fri, 8 Apr 2022 18:49:45 +0200
Subject: [PATCH 08/13] Update standardMzml.py

---
 pymzml/file_classes/standardMzml.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/pymzml/file_classes/standardMzml.py b/pymzml/file_classes/standardMzml.py
index f158337c..28544cde 100755
--- a/pymzml/file_classes/standardMzml.py
+++ b/pymzml/file_classes/standardMzml.py
@@ -551,9 +551,11 @@ def _interpol_search(self, target_index, chunk_size=8, fallback_cutoff=100):
                     current_position = seeker.tell()
 
             elif len(data) == 0:
-                sorted_keys = sorted(self.offset_dict.keys())
+                sorted_int_keys = {
+                    k: v for k, v in self.offset_dict.items() if isinstance(k, int)
+                }
                 pos = (
-                    bisect.bisect_left(sorted_keys, target_index) - 2
+                    bisect.bisect_left(sorted_int_keys, target_index) - 2
                 )  # dat magic number :)
                 try:
                     key = sorted_keys[pos]
@@ -587,7 +589,6 @@ def _read_to_spec_end(self, seeker, chunks_to_read=8):
         start_pos = seeker.tell()
         data_chunk = seeker.read(chunk_size)
         while end_found is False:
-            chunk_offset = seeker.tell()
             data_chunk += seeker.read(chunk_size)
             tag_end, seeker = self._read_until_tag_end(seeker)
             data_chunk += tag_end

From becb32ca601b2dc104181e621aac68b4da809d2a Mon Sep 17 00:00:00 2001
From: Manuel <17874544+MKoesters@users.noreply.github.com>
Date: Fri, 8 Apr 2022 18:52:40 +0200
Subject: [PATCH 09/13] Update standardMzml.py

remove unused function argument
---
 pymzml/file_classes/standardMzml.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pymzml/file_classes/standardMzml.py b/pymzml/file_classes/standardMzml.py
index 28544cde..4c04b10b 100755
--- a/pymzml/file_classes/standardMzml.py
+++ b/pymzml/file_classes/standardMzml.py
@@ -741,7 +741,7 @@ def _search_string_identifier(self, search_string, chunk_size=8):
                 file_pointer = seeker.tell()
 
                 data = seeker.read(total_chunk_size)
-                string, seeker = self._read_until_tag_end(seeker, byte_mode=True)
+                string, seeker = self._read_until_tag_end(seeker)
                 data += string
                 spec_start = regex_string.search(data)
                 chrom_start = regex_patterns.CHROMO_OPEN_PATTERN.search(data)
@@ -767,7 +767,7 @@ def _search_string_identifier(self, search_string, chunk_size=8):
                 elif len(data) == 0:
                     raise Exception("cant find specified string")
 
-    def _read_until_tag_end(self, seeker, max_search_len=12, byte_mode=False):
+    def _read_until_tag_end(self, seeker, max_search_len=12):
         """
         Help make sure no splitted text appear in chunked data, so regex always find
         <spectrum ...>

From a5632c22adf92392ec9750c0fe7e89056cd767ef Mon Sep 17 00:00:00 2001
From: Manuel <17874544+MKoesters@users.noreply.github.com>
Date: Mon, 11 Apr 2022 10:05:41 +0200
Subject: [PATCH 10/13] Update run.py

---
 pymzml/run.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pymzml/run.py b/pymzml/run.py
index a7ebc6f8..2f76456a 100755
--- a/pymzml/run.py
+++ b/pymzml/run.py
@@ -43,6 +43,7 @@
 import xml.etree.ElementTree as ElementTree
 from collections import defaultdict as ddict
 from io import BytesIO
+from pathlib import Path
 
 from . import spec
 from . import obo

From 1c4644b7eca112041ad4ef1c9633fa6882def600 Mon Sep 17 00:00:00 2001
From: Manuel <17874544+MKoesters@users.noreply.github.com>
Date: Mon, 11 Apr 2022 12:04:23 +0200
Subject: [PATCH 11/13] Update spec.py

---
 pymzml/spec.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pymzml/spec.py b/pymzml/spec.py
index 1fc2e052..d9337bc5 100755
--- a/pymzml/spec.py
+++ b/pymzml/spec.py
@@ -884,7 +884,7 @@ def scan_time_in_minutes(self):
         if self._scan_time_in_minutes is None:
             self._scan_time, time_unit = self.scan_time
             if self._scan_time_unit.lower() == "millisecond":
-                self._scan_time_in_minutes = self._scan_time / 1000.0 / 50.0
+                self._scan_time_in_minutes = self._scan_time / 1000.0 / 60.0
             if self._scan_time_unit.lower() == "second":
                 self._scan_time_in_minutes = self._scan_time / 60.0
             elif self._scan_time_unit.lower() == "minute":

From c6b3a3b03113ea603665ae5543a444ac23182998 Mon Sep 17 00:00:00 2001
From: Manuel <17874544+MKoesters@users.noreply.github.com>
Date: Mon, 11 Apr 2022 13:41:40 +0200
Subject: [PATCH 12/13] Update standardMzml.py

---
 pymzml/file_classes/standardMzml.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pymzml/file_classes/standardMzml.py b/pymzml/file_classes/standardMzml.py
index 4c04b10b..a8115410 100755
--- a/pymzml/file_classes/standardMzml.py
+++ b/pymzml/file_classes/standardMzml.py
@@ -554,6 +554,7 @@ def _interpol_search(self, target_index, chunk_size=8, fallback_cutoff=100):
                 sorted_int_keys = {
                     k: v for k, v in self.offset_dict.items() if isinstance(k, int)
                 }
+                sorted_keys = sorted(sorted_int_keys.keys())
                 pos = (
                     bisect.bisect_left(sorted_int_keys, target_index) - 2
                 )  # dat magic number :)

From 8c8bd4455f530c3a1c9a1ebaf8b9a6d13ea7d119 Mon Sep 17 00:00:00 2001
From: Manuel <17874544+MKoesters@users.noreply.github.com>
Date: Mon, 11 Apr 2022 14:22:16 +0200
Subject: [PATCH 13/13] Update spec.py

---
 pymzml/spec.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pymzml/spec.py b/pymzml/spec.py
index d9337bc5..8c36cbc2 100755
--- a/pymzml/spec.py
+++ b/pymzml/spec.py
@@ -426,6 +426,7 @@ def __init__(self, element=ElementTree.Element(""), measured_precision=5e-6):
         self._t_mass_set = None
         self._t_mz_set = None
         self._TIC = None
+        self._precursors = None
         self._transformed_mass_with_error = None
         self._transformed_mz_with_error = None
         self._transformed_peaks = None
@@ -961,7 +962,7 @@ def precursors(self):
             precursor(list): list of precursor ids for this spectrum.
         """
         self.deprecation_warning(sys._getframe().f_code.co_name)
-        if not hasattr(self, '_precursors'):
+        if not self._precursors:
             precursors = self.element.findall(
                 "./{ns}precursorList/{ns}precursor".format(ns=self.ns)
             )