From 73f331a4b808785dddcfb8a2dc746225afe12a25 Mon Sep 17 00:00:00 2001
From: "Zeng, Wen-Feng" <jalew188@gmail.com>
Date: Thu, 19 Jan 2023 11:56:14 +0100
Subject: [PATCH 1/9] update some raw access

---
 .github/workflows/publish_and_release.yml    |  4 ++--
 HISTORY.md                                   |  0
 alpharaw/raw_access/pysciexwifffilereader.py |  2 ++
 alpharaw/thermo.py                           | 16 ++++++++++++----
 alpharaw/wrappers/alphatims_wrapper.py       | 13 +++++++------
 5 files changed, 23 insertions(+), 12 deletions(-)
 delete mode 100644 HISTORY.md

diff --git a/.github/workflows/publish_and_release.yml b/.github/workflows/publish_and_release.yml
index dbe7c66..357e846 100644
--- a/.github/workflows/publish_and_release.yml
+++ b/.github/workflows/publish_and_release.yml
@@ -44,7 +44,7 @@ jobs:
           cd release/pypi
           . ./prepare_pypi_wheel.sh
       - name: Publish distribution to Test PyPI
-        uses: pypa/gh-action-pypi-publish@master
+        uses: pypa/gh-action-pypi-publish@release/v1
         with:
           password: ${{ secrets.TEST_PYPI_ALPHARAW_TOKEN }}
           repository_url: https://test.pypi.org/legacy/
@@ -54,7 +54,7 @@ jobs:
           cd release/pypi
           . ./install_test_pypi_wheel.sh
       - name: Publish distribution to PyPI
-        uses: pypa/gh-action-pypi-publish@master
+        uses: pypa/gh-action-pypi-publish@release/v1
         with:
           password: ${{ secrets.PYPI_ALPHARAW_TOKEN }}
   Test_PyPi_Release:
diff --git a/HISTORY.md b/HISTORY.md
deleted file mode 100644
index e69de29..0000000
diff --git a/alpharaw/raw_access/pysciexwifffilereader.py b/alpharaw/raw_access/pysciexwifffilereader.py
index 62c8108..71dd759 100644
--- a/alpharaw/raw_access/pysciexwifffilereader.py
+++ b/alpharaw/raw_access/pysciexwifffilereader.py
@@ -119,6 +119,8 @@ def load_sample(self, sample_id:int,
                     if details.IsSwath and details.MassRangeInfo.Length > 0:
                         center_mz = DotNetWiffOps.get_center_mz(details)
                         isolation_window = DotNetWiffOps.get_isolation_window(details)
+                    if isolation_window <= 0:
+                        isolation_window = 2.0
                     precursor_mz_list.append(massSpectrumInfo.ParentMZ)
                     precursor_charge_list.append(massSpectrumInfo.ParentChargeState)
                     ce_list.append(float(massSpectrumInfo.CollisionEnergy))
diff --git a/alpharaw/thermo.py b/alpharaw/thermo.py
index 08614a5..31d6783 100644
--- a/alpharaw/thermo.py
+++ b/alpharaw/thermo.py
@@ -61,10 +61,18 @@ def _import(self,
 
                 mono_mz, charge = rawfile.GetMS2MonoMzAndChargeFromScanNum(i)
 
-                precursor_mz_values.append(mono_mz)
-                precursor_charges.append(charge)
-                isolation_mz_lowers.append(isolation_center - isolation_width / 2)
-                isolation_mz_uppers.append(isolation_center + isolation_width / 2)
+                # In thermo_tof, ms1 = ms_order==2&NCE==0?
+                if isolation_center <= 0 and mono_mz <= 0:
+                    precursor_mz_values.append(-1.0)
+                    isolation_mz_lowers.append(-1.0)
+                    isolation_mz_uppers.append(-1.0)
+                    precursor_charges.append(0)
+                    ms_order_list[-1] = 1
+                else:
+                    precursor_mz_values.append(isolation_center)
+                    isolation_mz_lowers.append(isolation_center - isolation_width / 2)
+                    isolation_mz_uppers.append(isolation_center + isolation_width / 2)
+                    precursor_charges.append(charge)
         rawfile.Close()
         peak_indices = np.empty(rawfile.LastSpectrumNumber + 1, np.int64)
         peak_indices[0] = 0
diff --git a/alpharaw/wrappers/alphatims_wrapper.py b/alpharaw/wrappers/alphatims_wrapper.py
index 950f173..eadcf69 100644
--- a/alpharaw/wrappers/alphatims_wrapper.py
+++ b/alpharaw/wrappers/alphatims_wrapper.py
@@ -102,12 +102,13 @@ def _import_alpharaw_object(
         self._intensity_min_value = float(np.min(self._intensity_values))
         self._intensity_max_value = float(np.max(self._intensity_values))
         self._intensity_corrections = np.ones(self._frame_max_index)
-        self._quad_min_mz_value = float(
-            np.min(
-                self._quad_mz_values[self._quad_mz_values != -1]
-            )
-        )
-        self._quad_max_mz_value = float(np.max(self._quad_mz_values))
+        _q_mzs = self._quad_mz_values[self._quad_mz_values != -1]
+        if len(_q_mzs) > 0:
+            self._quad_min_mz_value = float(np.min(_q_mzs))
+            self._quad_max_mz_value = float(np.max(_q_mzs))
+        else:
+            self._quad_min_mz_value = 0
+            self._quad_max_mz_value = 0
         self._precursor_max_index = int(np.max(self._precursor_indices)) + 1
         self._acquisition_mode = msdata.file_type + ' ' + (
             "DDA" if dda else "DIA"

From 56acb646078ea45a025677b71f709ba00918cb41 Mon Sep 17 00:00:00 2001
From: "Zeng, Wen-Feng" <jalew188@gmail.com>
Date: Thu, 19 Jan 2023 21:15:19 +0100
Subject: [PATCH 2/9] update precursor mz assignment

---
 alpharaw/raw_access/pysciexwifffilereader.py | 7 +++++--
 alpharaw/thermo.py                           | 6 +++++-
 2 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/alpharaw/raw_access/pysciexwifffilereader.py b/alpharaw/raw_access/pysciexwifffilereader.py
index 71dd759..10586b0 100644
--- a/alpharaw/raw_access/pysciexwifffilereader.py
+++ b/alpharaw/raw_access/pysciexwifffilereader.py
@@ -120,8 +120,10 @@ def load_sample(self, sample_id:int,
                         center_mz = DotNetWiffOps.get_center_mz(details)
                         isolation_window = DotNetWiffOps.get_isolation_window(details)
                     if isolation_window <= 0:
-                        isolation_window = 2.0
-                    precursor_mz_list.append(massSpectrumInfo.ParentMZ)
+                        isolation_window = 3.0
+                    if center_mz <= 0:
+                        center_mz = massSpectrumInfo.ParentMZ
+                    precursor_mz_list.append(center_mz)
                     precursor_charge_list.append(massSpectrumInfo.ParentChargeState)
                     ce_list.append(float(massSpectrumInfo.CollisionEnergy))
                     isolation_lower_mz_list.append(center_mz-isolation_window/2)
@@ -147,4 +149,5 @@ def load_sample(self, sample_id:int,
             "precursor_charge": np.array(precursor_charge_list, dtype=np.int8), 
             'isolation_mz_lower': np.array(isolation_lower_mz_list),
             'isolation_mz_upper': np.array(isolation_upper_mz_list),
+            'nce': np.array(ce_list, dtype=np.float32),
         }
\ No newline at end of file
diff --git a/alpharaw/thermo.py b/alpharaw/thermo.py
index 31d6783..242404e 100644
--- a/alpharaw/thermo.py
+++ b/alpharaw/thermo.py
@@ -35,6 +35,7 @@ def _import(self,
         isolation_mz_uppers = []
         precursor_charges = []
         ms_order_list = []
+        ce_list = []
         for i in range(
             rawfile.FirstSpectrumNumber,
             rawfile.LastSpectrumNumber + 1
@@ -55,13 +56,15 @@ def _import(self,
                 isolation_mz_lowers.append(-1.0)
                 isolation_mz_uppers.append(-1.0)
                 precursor_charges.append(0)
+                ce_list.append(0)
             else:
                 isolation_center = rawfile.GetPrecursorMassForScanNum(i)
                 isolation_width = rawfile.GetIsolationWidthForScanNum(i)
 
                 mono_mz, charge = rawfile.GetMS2MonoMzAndChargeFromScanNum(i)
+                ce_list.append(rawfile.GetCollisionEnergyForScanNum(i))
 
-                # In thermo_tof, ms1 = ms_order==2&NCE==0?
+                # In case that: ms1 = ms_order==2&NCE==0?
                 if isolation_center <= 0 and mono_mz <= 0:
                     precursor_mz_values.append(-1.0)
                     isolation_mz_lowers.append(-1.0)
@@ -87,6 +90,7 @@ def _import(self,
             'isolation_mz_lower': np.array(isolation_mz_lowers),
             'isolation_mz_upper': np.array(isolation_mz_uppers),
             'ms_level': np.array(ms_order_list, dtype=np.int8),
+            'nce': np.array(ce_list, dtype=np.float32),
         }
 
     def _set_dataframes(self, raw_data:dict):

From 7a7f5355e7e2e097bf4998503c5886a60d9aa308 Mon Sep 17 00:00:00 2001
From: "Zeng, Wen-Feng" <jalew188@gmail.com>
Date: Mon, 23 Jan 2023 00:04:15 +0100
Subject: [PATCH 3/9] mono mz determine

---
 alpharaw/thermo.py                | 11 +++++++----
 nbdev_nbs/match/match_utils.ipynb | 14 +++++++-------
 2 files changed, 14 insertions(+), 11 deletions(-)

diff --git a/alpharaw/thermo.py b/alpharaw/thermo.py
index 242404e..e37945e 100644
--- a/alpharaw/thermo.py
+++ b/alpharaw/thermo.py
@@ -52,27 +52,30 @@ def _import(self,
             ms_order = rawfile.GetMSOrderForScanNum(i)
             ms_order_list.append(ms_order)
             if ms_order == 1:
+                ce_list.append(0)
                 precursor_mz_values.append(-1.0)
                 isolation_mz_lowers.append(-1.0)
                 isolation_mz_uppers.append(-1.0)
                 precursor_charges.append(0)
-                ce_list.append(0)
             else:
+                ce_list.append(rawfile.GetCollisionEnergyForScanNum(i))
+
                 isolation_center = rawfile.GetPrecursorMassForScanNum(i)
                 isolation_width = rawfile.GetIsolationWidthForScanNum(i)
 
                 mono_mz, charge = rawfile.GetMS2MonoMzAndChargeFromScanNum(i)
-                ce_list.append(rawfile.GetCollisionEnergyForScanNum(i))
+                if mono_mz <= 0:
+                    mono_mz = isolation_center
 
                 # In case that: ms1 = ms_order==2&NCE==0?
-                if isolation_center <= 0 and mono_mz <= 0:
+                if mono_mz <= 0:
                     precursor_mz_values.append(-1.0)
                     isolation_mz_lowers.append(-1.0)
                     isolation_mz_uppers.append(-1.0)
                     precursor_charges.append(0)
                     ms_order_list[-1] = 1
                 else:
-                    precursor_mz_values.append(isolation_center)
+                    precursor_mz_values.append(mono_mz)
                     isolation_mz_lowers.append(isolation_center - isolation_width / 2)
                     isolation_mz_uppers.append(isolation_center + isolation_width / 2)
                     precursor_charges.append(charge)
diff --git a/nbdev_nbs/match/match_utils.ipynb b/nbdev_nbs/match/match_utils.ipynb
index 4c6dfea..05ad149 100644
--- a/nbdev_nbs/match/match_utils.ipynb
+++ b/nbdev_nbs/match/match_utils.ipynb
@@ -79,12 +79,12 @@
     "#| hide\n",
     "#unittests\n",
     "spec_masses = np.arange(10, dtype=float)*100\n",
-    "query_masses = spec_masses\n",
+    "query_masses = spec_masses[:5]\n",
     "Da_tols = query_masses*20*1e-6\n",
     "spec_intens = np.ones_like(spec_masses)\n",
     "idxes = match_highest_peaks(spec_masses, spec_intens, query_masses, Da_tols)\n",
     "assert np.all(\n",
-    "    np.arange(10, dtype=np.int32)==\n",
+    "    np.arange(5, dtype=np.int32)==\n",
     "    idxes\n",
     ")"
    ]
@@ -98,7 +98,7 @@
     "#| hide\n",
     "#unittests\n",
     "spec_masses = np.arange(10, dtype=float)*100\n",
-    "query_masses = spec_masses.copy()\n",
+    "query_masses = spec_masses.copy()[:5]\n",
     "query_masses[1]+=0.01 # -1\n",
     "query_masses[3]+=0.003 # spec_masses[3]\n",
     "query_masses[5]-=0.0099 # spec_masses[5]\n",
@@ -108,7 +108,7 @@
     "Da_tols = query_masses*20*1e-6\n",
     "assert np.allclose(\n",
     "    match_closest_peaks(spec_masses, 0, query_masses, Da_tols),\n",
-    "    [0, -1,  2,  3,  4,  5, -1, -1,  7,  9]\n",
+    "    [0, -1,  2,  3,  4]\n",
     ")"
    ]
   },
@@ -121,12 +121,12 @@
     "#| hide\n",
     "#unittests\n",
     "spec_masses = np.arange(10, dtype=float)*100\n",
-    "query_masses = np.arange(20).reshape((10,2))*100\n",
+    "query_masses = np.arange(20).reshape((10,2))[:5]*100\n",
     "Da_tols = query_masses*20*1e-6\n",
-    "target = np.arange(20, dtype=np.int32)\n",
+    "target = np.arange(20, dtype=np.int32).reshape((10,2))[:5]\n",
     "target[10:] = -1\n",
     "assert np.allclose(\n",
-    "    target.reshape((10,2)),\n",
+    "    target,\n",
     "    match_closest_peaks(spec_masses, 0, query_masses, Da_tols)\n",
     ")"
    ]

From 6a4ba818b787a6bea26a407a7eeee87907a13bbf Mon Sep 17 00:00:00 2001
From: jalew188 <jalew188@gmail.com>
Date: Sun, 29 Jan 2023 21:42:39 +0100
Subject: [PATCH 4/9] FIX a few bugs

---
 alpharaw/__init__.py                   | 13 ++--
 alpharaw/legacy_msdata/mzml.py         |  1 -
 alpharaw/ms_data_base.py               | 27 ++++++-
 alpharaw/mzml.py                       | 97 ++++++++++++++++++++++++++
 alpharaw/sciex.py                      |  2 +-
 alpharaw/thermo.py                     |  2 +-
 alpharaw/wrappers/alphatims_wrapper.py | 34 +++++++++
 7 files changed, 166 insertions(+), 10 deletions(-)
 delete mode 100644 alpharaw/legacy_msdata/mzml.py
 create mode 100644 alpharaw/mzml.py

diff --git a/alpharaw/__init__.py b/alpharaw/__init__.py
index 544e92d..dc3c315 100644
--- a/alpharaw/__init__.py
+++ b/alpharaw/__init__.py
@@ -1,14 +1,15 @@
 #!python
 
-try:
+def register_all_readers():
     from .ms_data_base import ms_reader_provider
-    from .sciex import SciexWiffData
-    from .thermo import ThermoRawData
     from .legacy_msdata import mgf
-    from .legacy_msdata import mzml
+    from . import mzml
     from .wrappers import alphapept_wrapper
-except ImportError:
-    pass
+    try:
+        from .sciex import SciexWiffData
+        from .thermo import ThermoRawData
+    except (RuntimeError, ImportError):
+        return "[WARN] pythonnet is not installed"
 
 __project__ = "alpharaw"
 __version__ = "0.1.0"
diff --git a/alpharaw/legacy_msdata/mzml.py b/alpharaw/legacy_msdata/mzml.py
deleted file mode 100644
index 56f74e6..0000000
--- a/alpharaw/legacy_msdata/mzml.py
+++ /dev/null
@@ -1 +0,0 @@
-from ..ms_data_base import MSData_Base
diff --git a/alpharaw/ms_data_base.py b/alpharaw/ms_data_base.py
index d175734..66cd204 100644
--- a/alpharaw/ms_data_base.py
+++ b/alpharaw/ms_data_base.py
@@ -6,7 +6,32 @@ class MSData_Base:
     """
     The base data structure for MS Data, other MSData loader inherit
     """
-    def __init__(self, centroided:bool=True):
+
+    spectrum_df: pd.DataFrame
+    """
+    Spectrum dataframe containing the following columns:
+
+    - `rt` (float64): in minutes
+    - `precursor_mz` (float64): mono_mz (DDA) or isolation center mz
+    - `isolation_lower_mz` (float64): left of the isolation window
+    - `isolation_upper_mz` (float64): right of the isolation window
+    - `spec_idx` (int64): spectrum index. For thermo, it is `scan_num - 1`
+    - `peak_start_idx` (int64): peak start position pointing to `self.peak_df`
+    - `peak_stop_idx` (int64): peak stop position pointing to `self._peak_df`
+    - `ms_level` (int8): =1 for MS1, =2 for MS2, ...
+    - [`scan_num`] (int64): Thermo scan number
+    - [`mobility`] (float64): Bruker timsTOF mobility
+
+    Other columns depends on different vendors and file formats
+    """
+
+    peak_df: pd.DataFrame
+    """
+    Peak list dataframe containing the follow columns:
+    - `mz` (float64): m/z values of the peak
+    - `intensity` (float32): intensity values of the peak
+    """
+    def __init__(self, centroided:bool=True, **kwargs):
         """
         Parameters
         ----------
diff --git a/alpharaw/mzml.py b/alpharaw/mzml.py
new file mode 100644
index 0000000..bbea474
--- /dev/null
+++ b/alpharaw/mzml.py
@@ -0,0 +1,97 @@
+import numpy as np
+import re
+import datetime
+import pathlib
+
+from pyteomics import mzml
+
+from .ms_data_base import MSData_Base
+
+# def parse_mzml_item(item_dict: dict) -> tuple:
+#     rt = float(item_dict.get('scanList').get('scan')[0].get('scan start time'))
+#     masses = item_dict.get('m/z array')
+#     intensities = item_dict.get('intensity array')
+#     ms_level = item_dict.get('ms level')
+#     prec_mz = -1.0
+#     isolation_lower_mz = -1.0
+#     isolation_upper_mz = -1.0
+#     charge = 0
+#     if ms_level == 2:
+#         try:
+#             charge = int(item_dict.get('precursorList').get('precursor')[0].get('selectedIonList').get('selectedIon')[0].get(
+#                 'charge state'))
+#         except TypeError:
+#             charge = 0
+#         try:
+#             charge = int(item_dict.get('precursorList').get('precursor')[0].get('selectedIonList').get('selectedIon')[0].get(
+#                 'charge state'))
+#         except TypeError:
+#             charge = 0
+
+#         prec_mz = item_dict.get('precursorList').get('precursor')[0].get('selectedIonList').get('selectedIon')[0].get(
+#                 'selected ion m/z')
+#         try:
+#             iso_lower = int(item_dict.get('precursorList').get('precursor')[0].get('isolation window lower offset').get(
+#                 'charge state'))
+#             iso_upper = int(item_dict.get('precursorList').get('precursor')[0].get('isolation window upper offset').get(
+#                 'charge state'))
+#             isolation_upper_mz = prec_mz + iso_upper
+#             isolation_lower_mz = prec_mz - iso_lower
+#         except TypeError:
+#             isolation_upper_mz = prec_mz + 1.5
+#             isolation_lower_mz = prec_mz - 1.5
+        
+#     return (
+#         rt, prec_mz, isolation_lower_mz, isolation_upper_mz, 
+#         ms_level, charge, masses, intensities
+#     )
+
+# class MSFraggerMzML_Data(MSData_Base):
+#     def _import(self,
+#         filename: str,
+#     ):
+#         reader = mzml.read(filename, use_index=True)
+#         spec_indices = np.array(range(1, len(reader) + 1))
+
+#         scan_list = []
+#         rt_list = []
+#         mzs_list = []
+#         intens_list = []
+#         ms_level_list = []
+#         prec_mz_list = []
+#         charge_list = []
+
+#         self.file_type = 'unknown'
+        
+#         for idx, i in enumerate(spec_indices):
+#             spec = next(reader)
+#             if idx == 0:
+#                 ext = re.findall(r"File:\".+\.(\w+)\"", spec['spectrum title'])[0]
+#                 if ext.lower() == 'raw':
+#                     self.file_type = "thermo"
+
+#             scan_list.append(i)
+#             rt, prec_mz, ms_level, charge, masses, intensities = parse_mzml_item(spec)
+            
+#             sortindex = np.argsort(masses)
+            
+#             masses = masses[sortindex]
+#             intensities = intensities[sortindex]
+            
+#             rt_list.append(rt)
+            
+#             #Remove zero intensities
+#             to_keep = intensities>0
+#             masses = masses[to_keep]
+#             intensities = intensities[to_keep]
+            
+#             mzs_list.append(masses)
+#             intens_list.append(intensities)
+#             ms_level_list.append(ms_level)
+#             prec_mz_list.append(prec_mz)
+#             charge_list.append(charge)
+
+#         fname = pathlib.Path(filename)
+#         acquisition_date_time = datetime.datetime.fromtimestamp(fname.stat().st_mtime).strftime('%Y-%m-%dT%H:%M:%S')
+
+#         return acquisition_date_time
\ No newline at end of file
diff --git a/alpharaw/sciex.py b/alpharaw/sciex.py
index 6df4916..a68f006 100644
--- a/alpharaw/sciex.py
+++ b/alpharaw/sciex.py
@@ -10,7 +10,7 @@ class SciexWiffData(MSData_Base):
     """
     Loading Sciex Wiff data as MSData_Base data structure.
     """
-    def __init__(self, centroided:bool=True):
+    def __init__(self, centroided:bool=True, **kwargs):
         """
         Parameters
         ----------
diff --git a/alpharaw/thermo.py b/alpharaw/thermo.py
index e37945e..87d4f13 100644
--- a/alpharaw/thermo.py
+++ b/alpharaw/thermo.py
@@ -9,7 +9,7 @@ class ThermoRawData(MSData_Base):
     """
     Loading Thermo Raw data as MSData_Base data structure.
     """
-    def __init__(self, centroided:bool=True):
+    def __init__(self, centroided:bool=True, **kwargs):
         """
         Parameters
         ----------
diff --git a/alpharaw/wrappers/alphatims_wrapper.py b/alpharaw/wrappers/alphatims_wrapper.py
index eadcf69..8ade13e 100644
--- a/alpharaw/wrappers/alphatims_wrapper.py
+++ b/alpharaw/wrappers/alphatims_wrapper.py
@@ -6,6 +6,40 @@
 
 from ..ms_data_base import MSData_Base
 
+class AlphaTimsReader(MSData_Base):
+    """
+    > TimsTOF data are too large, do not use this class
+    """
+    def import_raw(self, burker_d_folder:str):
+        tims = TimsTOF(
+            burker_d_folder,
+        )
+
+        self.raw_file_path = burker_d_folder
+        self.file_type = 'bruker'
+
+        self.spectrum_df
+
+        self.spectrum_df['precursor_mz'] = tims.fragment_frames.IsolationMz.values
+        isolations = tims.fragment_frames.IsolationWidth.values/2
+        self.spectrum_df['isolation_lower_mz'] = self.spectrum_df.precursor_mz - isolations
+        self.spectrum_df['isolation_upper_mz'] = self.spectrum_df.precursor_mz + isolations
+        self.spectrum_df['peak_start_idx'] = tims._push_indptr[:-1]
+        self.spectrum_df['peak_stop_idx'] = tims._push_indptr[1:]
+        self.spectrum_df['rt'] = tims.rt_values/60
+        self.spectrum_df['mobility'] = tims.mobility_values
+        self.spectrum_df['ms_level'] = (
+            tims.precursor_indices>0
+        ).astype(np.int8)+1
+        self.spectrum_df['spec_idx'] = self.spectrum_df.index.values
+        self.spectrum_df['tims_frame'] = self.spectrum_df.spec_idx // tims.scan_max_index
+        self.spectrum_df['tims_scan'] = self.spectrum_df.spec_idx % tims.scan_max_index
+        
+        self.peak_df['mz'] = tims.mz_values[tims.tof_indices]
+        self.peak_df['intensity'] = tims.intensity_values.astype(np.float32)
+        
+
+
 class AlphaTimsWrapper(TimsTOF):
     """Create a AlphaTims object that contains 
     all data in-memory (or memory mapping).

From 095bbda5792e262252a29a814b0a50d326c48028 Mon Sep 17 00:00:00 2001
From: jalew188 <jalew188@gmail.com>
Date: Sun, 29 Jan 2023 22:21:10 +0100
Subject: [PATCH 5/9] add AlphaPept bruker FF (not done)

---
 alpharaw/bruker/ap_ff.py      | 165 ++++++++++++++++++++++++++++++++++
 alpharaw/mzml.py              |  12 +--
 requirements/requirements.txt |   1 +
 3 files changed, 172 insertions(+), 6 deletions(-)
 create mode 100644 alpharaw/bruker/ap_ff.py

diff --git a/alpharaw/bruker/ap_ff.py b/alpharaw/bruker/ap_ff.py
new file mode 100644
index 0000000..a2e0379
--- /dev/null
+++ b/alpharaw/bruker/ap_ff.py
@@ -0,0 +1,165 @@
+import pandas as pd
+import numpy as np
+import sqlalchemy as db
+
+import subprocess
+import os
+import platform
+
+from tqdm import tqdm
+
+
+def extract_bruker(file:str, base_dir:str = "ext/bruker/FF", config:str = "proteomics_4d.config"):
+    """Call Bruker Feautre Finder via subprocess.
+
+    Args:
+        file (str): Filename for feature finding.
+        base_dir (str, optional): Base dir where the feature finder is stored.. Defaults to "ext/bruker/FF".
+        config (str, optional): Config file for feature finder. Defaults to "proteomics_4d.config".
+
+    Raises:
+        NotImplementedError: Unsupported operating system.
+        FileNotFoundError: Feature finder not found.
+        FileNotFoundError: Config file not found.
+        FileNotFoundError: Feature file not found.
+    """    
+    feature_path = file + '/'+ os.path.split(file)[-1] + '.features'
+
+    base_dir = os.path.join(os.path.dirname(__file__), base_dir)
+
+    operating_system = platform.system()
+
+    if operating_system == 'Linux':
+        ff_dir = os.path.join(base_dir, 'linux64','uff-cmdline2')
+        print('Using Linux FF')
+    elif operating_system == 'Windows':
+        ff_dir = os.path.join(base_dir, 'win64','uff-cmdline2.exe')
+        print('Using Windows FF')
+    else:
+        raise NotImplementedError(f"System {operating_system} not supported.")
+
+    if os.path.exists(feature_path):
+        return feature_path
+    else:
+        if not os.path.isfile(ff_dir):
+            raise FileNotFoundError(f'Bruker feature finder cmd not found here {ff_dir}.')
+
+        config_path = base_dir + '/'+ config
+
+        if not os.path.isfile(config_path):
+            raise FileNotFoundError(f'Config file not found here {config_path}.')
+
+        if operating_system == 'Windows':
+            FF_parameters = [ff_dir,'--ff 4d',f'--readconfig "{config_path}"', f'--analysisDirectory "{file}"']
+
+            process = subprocess.Popen(' '.join(FF_parameters), stdout=subprocess.PIPE)
+            for line in iter(process.stdout.readline, b''):
+                logtxt = line.decode('utf8')
+                print(logtxt[48:].rstrip()) #Remove logging info from FF
+        elif operating_system == 'Linux':
+            FF_parameters = [
+                ff_dir,
+                '--ff',
+                '4d',
+                '--readconfig',
+                config_path,
+                '--analysisDirectory',
+                file
+            ]
+            process = subprocess.run(FF_parameters, stdout=subprocess.PIPE)
+
+        if os.path.exists(feature_path):
+            return feature_path
+        else:
+            raise FileNotFoundError(f"Feature file {feature_path} does not exist.")
+
+def convert_bruker(feature_path:str)->pd.DataFrame:
+    """Reads feature table and converts to feature table to be used with AlphaPept.
+
+    Args:
+        feature_path (str): Path to the feature file from Bruker FF (.features-file).
+
+    Returns:
+        pd.DataFrame: DataFrame containing features information.
+    """
+    engine_featurefile = db.create_engine('sqlite:///{}'.format(feature_path))
+    feature_table = pd.read_sql_table('LcTimsMsFeature', engine_featurefile)
+    feature_cluster_mapping = pd.read_sql_table('FeatureClusterMapping', engine_featurefile)
+
+    # feature_table['Mass'] = feature_table['MZ'].values * feature_table['Charge'].values - feature_table['Charge'].values*M_PROTON
+    feature_table = feature_table.rename(columns={
+        "MZ": "mz","Mass": "mass", "RT": "rt_apex", 
+        "RT_lower":"rt_start", "RT_upper":"rt_end", 
+        "Mobility": "mobility", "Mobility_lower": "mobility_lower", 
+        "Mobility_upper": "mobility_upper", "Charge":"charge",
+        "Intensity":'ms1_int_sum_apex',"ClusterCount":'n_isotopes'
+    })
+    feature_table['rt_apex'] = feature_table['rt_apex']/60
+    feature_table['rt_start'] = feature_table['rt_start']/60
+    feature_table['rt_end'] = feature_table['rt_end']/60
+    
+    feature_cluster_mapping = feature_cluster_mapping.rename(columns={
+        "FeatureId": "feature_id", "ClusterId": "cluster_id", 
+        "Monoisotopic": "monoisotopic", "Intensity": "ms1_int_sum_apex"
+    })
+    
+    return feature_table, feature_cluster_mapping
+
+
+def map_bruker(feature_path:str, feature_table:pd.DataFrame, query_data:dict)->pd.DataFrame:
+    """Map Ms1 to Ms2 via Table FeaturePrecursorMapping from Bruker FF.
+
+    Args:
+        feature_path (str): Path to the feature file from Bruker FF (.features-file).
+        feature_table (pd.DataFrame): Pandas DataFrame containing the features.
+        query_data (dict): Data structure containing the query data.
+
+    Returns:
+        pd.DataFrame: DataFrame containing features information.
+    """
+    engine_featurefile = db.create_engine('sqlite:///{}'.format(feature_path))
+
+    mapping = pd.read_sql_table('FeaturePrecursorMapping', engine_featurefile)
+    mapping = mapping.set_index('PrecursorId')
+    feature_table= feature_table.set_index('Id')
+
+
+    query_prec_id = query_data['prec_id']
+
+    #Now look up the feature for each precursor
+
+    mass_matched = []
+    mz_matched = []
+    rt_matched = []
+    query_idx = []
+    f_idx = []
+
+    for idx, prec_id in tqdm(enumerate(query_prec_id)):
+        try:
+            f_id = mapping.loc[prec_id]['FeatureId']
+            all_matches = feature_table.loc[f_id]
+            if type(f_id) == np.int64:
+                match = all_matches
+                mz_matched.append(match['mz'])
+                rt_matched.append(match['rt_apex'])
+                mass_matched.append(match['mass'])
+                query_idx.append(idx)
+                f_idx.append(match['FeatureId'])
+
+            else:
+                for k in range(len(all_matches)):
+                    match = all_matches.iloc[k]
+                    mz_matched.append(match['mz'])
+                    rt_matched.append(match['rt_apex'])
+                    mass_matched.append(match['mass'])
+                    query_idx.append(idx)
+                    f_idx.append(match['FeatureId'])
+
+        except KeyError:
+            pass
+
+    features = pd.DataFrame(np.array([mass_matched, mz_matched, rt_matched, query_idx, f_idx]).T, columns = ['mass_matched', 'mz_matched', 'rt_matched', 'query_idx', 'feature_idx'])
+
+    features['query_idx'] = features['query_idx'].astype('int')
+
+    return features
\ No newline at end of file
diff --git a/alpharaw/mzml.py b/alpharaw/mzml.py
index bbea474..05ee253 100644
--- a/alpharaw/mzml.py
+++ b/alpharaw/mzml.py
@@ -1,11 +1,11 @@
-import numpy as np
-import re
-import datetime
-import pathlib
+# import numpy as np
+# import re
+# import datetime
+# import pathlib
 
-from pyteomics import mzml
+# from pyteomics import mzml
 
-from .ms_data_base import MSData_Base
+# from .ms_data_base import MSData_Base
 
 # def parse_mzml_item(item_dict: dict) -> tuple:
 #     rt = float(item_dict.get('scanList').get('scan')[0].get('scan start time'))
diff --git a/requirements/requirements.txt b/requirements/requirements.txt
index 942f33f..bfa3c4e 100644
--- a/requirements/requirements.txt
+++ b/requirements/requirements.txt
@@ -5,5 +5,6 @@ h5py
 numba
 pandas
 numpy
+sqlalchemy
 
 alphabase
\ No newline at end of file

From 7588c533cab5363870c556f723089581cec95814 Mon Sep 17 00:00:00 2001
From: jalew188 <jalew188@gmail.com>
Date: Thu, 13 Apr 2023 14:13:58 +0200
Subject: [PATCH 6/9] CHROE

---
 alpharaw/__init__.py                          |  4 +-
 alpharaw/utils/centroiding.py                 | 39 ++++---------------
 .../development.txt                           |  0
 .../requirements.txt => requirements.txt      |  0
 setup.py                                      |  6 +--
 5 files changed, 10 insertions(+), 39 deletions(-)
 rename requirements/requirements_development.txt => extra_requirements/development.txt (100%)
 rename requirements/requirements.txt => requirements.txt (100%)

diff --git a/alpharaw/__init__.py b/alpharaw/__init__.py
index dc3c315..8ba963d 100644
--- a/alpharaw/__init__.py
+++ b/alpharaw/__init__.py
@@ -1,6 +1,6 @@
 #!python
 
-def register_all_readers():
+def register_readers():
     from .ms_data_base import ms_reader_provider
     from .legacy_msdata import mgf
     from . import mzml
@@ -50,5 +50,5 @@ def register_all_readers():
     # "Scientific paper": None,
 }
 __extra_requirements__ = {
-    "development": "requirements_development.txt",
+    "development": "extra_requirements/development.txt",
 }
diff --git a/alpharaw/utils/centroiding.py b/alpharaw/utils/centroiding.py
index 039bcf6..f075dad 100644
--- a/alpharaw/utils/centroiding.py
+++ b/alpharaw/utils/centroiding.py
@@ -3,57 +3,32 @@
 import numpy as np
 from numba import njit
 from numba.typed import List
-import numba
 
 @njit
-def get_a_centroid_group_all_adjacent(
-    start_list, end_list,
+def get_neighbors_start_stop(
+    peak_mzs:np.ndarray,
+    start_list, stop_list,
     start_idx:int,
     mz_tol:float,
     mz_diffs:np.ndarray,
-    int_grads:np.ndarray,
 ):
     for i in range(start_idx, len(mz_diffs)):
         if mz_diffs[i] > mz_tol:
             start_list.append(start_idx)
-            end_list.append(i)
+            stop_list.append(i+1)
             return i + 1
     start_list.append(start_idx)
-    end_list.append(len(mz_diffs))
+    stop_list.append(len(mz_diffs))
     return len(mz_diffs)+1
 
-@njit
-def get_a_centroid_group(
-    start_list, end_list,
-    start_idx:int,
-    mz_tol:float,
-    mz_diffs:np.ndarray,
-    int_grads:np.ndarray,
-):
-    go_up = True
-    for i in range(start_idx, len(int_grads)):
-        if mz_diffs[i] > mz_tol:
-            start_list.append(start_idx)
-            end_list.append(i)
-            return i + 1
-        elif int_grads[i] > 0 and not go_up:
-            start_list.append(start_idx)
-            end_list.append(i)
-            return i
-        elif go_up:
-            go_up = False
-    start_list.append(start_idx)
-    end_list.append(len(int_grads))
-    return len(int_grads)+1
-
 @njit
 def get_peaks(
     mz_array:np.ndarray,
     int_array: np.ndarray,
     mz_tol:float,
 ) -> list:
-    start_list = List.empty_list(numba.int64)
-    end_list = List.empty_list(numba.int64)
+    start_list = List()
+    end_list = List()
     gradient = np.diff(int_array)
     mz_diffs = np.diff(mz_array)
     start = 0
diff --git a/requirements/requirements_development.txt b/extra_requirements/development.txt
similarity index 100%
rename from requirements/requirements_development.txt
rename to extra_requirements/development.txt
diff --git a/requirements/requirements.txt b/requirements.txt
similarity index 100%
rename from requirements/requirements.txt
rename to requirements.txt
diff --git a/setup.py b/setup.py
index 1558820..81d1871 100644
--- a/setup.py
+++ b/setup.py
@@ -24,11 +24,7 @@ def get_requirements():
     requirement_file_names = package2install.__extra_requirements__
     requirement_file_names[""] = "requirements.txt"
     for extra, requirement_file_name in requirement_file_names.items():
-        full_requirement_file_name = os.path.join(
-            "requirements",
-            requirement_file_name,
-        )
-        with open(full_requirement_file_name) as requirements_file:
+        with open(requirement_file_name) as requirements_file:
             if extra != "":
                 extra_stable = f"{extra}-stable"
             else:

From 1f3573bee372197f5514a08f16b7dcc33fa4935a Mon Sep 17 00:00:00 2001
From: jalew188 <jalew188@gmail.com>
Date: Fri, 16 Jun 2023 11:22:03 +0200
Subject: [PATCH 7/9] many updates

---
 alpharaw/__init__.py                   |   2 +-
 alpharaw/legacy_msdata/mgf.py          |  11 +-
 alpharaw/match/match_utils.py          |  45 +-
 alpharaw/match/psm_match.py            |  40 +-
 alpharaw/match/psm_match_alphatims.py  |  60 +-
 alpharaw/ms_data_base.py               |  67 +-
 alpharaw/mzml.py                       | 235 ++++---
 alpharaw/sciex.py                      |   4 +-
 alpharaw/thermo.py                     |  16 +-
 alpharaw/wrappers/alphapept_wrapper.py |   2 +-
 nbdev_nbs/match/match_utils.ipynb      |  16 +-
 nbdev_nbs/match/psm_match.ipynb        |  30 +-
 nbdev_nbs/test_mzml.ipynb              | 894 +++++++++++++++++++++++++
 nbs/test_tims_match.ipynb              |   2 +-
 14 files changed, 1233 insertions(+), 191 deletions(-)
 create mode 100644 nbdev_nbs/test_mzml.ipynb

diff --git a/alpharaw/__init__.py b/alpharaw/__init__.py
index 8ba963d..3257380 100644
--- a/alpharaw/__init__.py
+++ b/alpharaw/__init__.py
@@ -3,7 +3,7 @@
 def register_readers():
     from .ms_data_base import ms_reader_provider
     from .legacy_msdata import mgf
-    from . import mzml
+    from .mzml import MzMLReader
     from .wrappers import alphapept_wrapper
     try:
         from .sciex import SciexWiffData
diff --git a/alpharaw/legacy_msdata/mgf.py b/alpharaw/legacy_msdata/mgf.py
index e2e770e..732a5b5 100644
--- a/alpharaw/legacy_msdata/mgf.py
+++ b/alpharaw/legacy_msdata/mgf.py
@@ -2,7 +2,8 @@
 
 from alpharaw.ms_data_base import (
     index_ragged_list, MSData_Base,
-    ms_reader_provider
+    ms_reader_provider,
+    PEAK_MZ_DTYPE, PEAK_INTENSITY_DTYPE
 )
 
 def read_until(file, until):
@@ -73,8 +74,8 @@ def _import(self, _path:str):
                 rt_list.append(RT)
                 precursor_mz_list.append(precursor_mz)
                 charge_list.append(charge)
-                masses_list.append(np.array(masses))
-                intens_list.append(np.array(intens))
+                masses_list.append(np.array(masses, dtype=PEAK_MZ_DTYPE))
+                intens_list.append(np.array(intens, dtype=PEAK_INTENSITY_DTYPE))
         if isinstance(_path, str): 
             f.close()
 
@@ -111,7 +112,7 @@ def _set_dataframes(self, raw_data:dict):
         charges = np.zeros(spec_num, np.int8)
         charges[spec_idxes] = raw_data['precursor_charge']
 
-        self.set_peaks_by_cat_array(
+        self.set_peak_df_by_indexed_array(
             raw_data['peak_mz'],
             raw_data['peak_intensity'],
             start_idxes,end_idxes
@@ -122,7 +123,7 @@ def _set_dataframes(self, raw_data:dict):
         self.set_precursor_mz(
             precursor_mzs
         )
-        self.set_precursor_mz_windows(
+        self.set_isolation_mz_windows(
             mz_lowers,mz_uppers
         )
 
diff --git a/alpharaw/match/match_utils.py b/alpharaw/match/match_utils.py
index 22a6217..c828483 100644
--- a/alpharaw/match/match_utils.py
+++ b/alpharaw/match/match_utils.py
@@ -36,11 +36,13 @@ def match_closest_peaks(
         Matched indices of spec_mzs, -1 means no peaks were matched.
         Same shape as query_mzs 
     """
-    query_left_mzs = query_mzs-query_mz_tols
-    query_right_mzs = query_mzs+query_mz_tols
+    mzs = query_mzs.reshape(-1)
+    query_mz_tols = query_mz_tols.reshape(-1)
+    query_left_mzs = mzs-query_mz_tols
+    query_right_mzs = mzs+query_mz_tols
     idxes = np.searchsorted(spec_mzs, query_left_mzs)
-    ret_indices = np.empty_like(query_mzs, dtype=np.int32)
-    for i,idx in np.ndenumerate(idxes):
+    ret_indices = np.empty_like(mzs, dtype=np.int32)
+    for i,idx in enumerate(idxes):
         min_merr = 1000000
         closest_idx = -1
         for _idx in range(idx, len(spec_mzs)):
@@ -48,11 +50,11 @@ def match_closest_peaks(
                 break
             elif spec_mzs[_idx]<query_left_mzs[i]:
                 continue
-            elif min_merr > abs(spec_mzs[_idx]-query_mzs[i]):
-                min_merr = abs(spec_mzs[_idx]-query_mzs[i])
+            elif min_merr > abs(spec_mzs[_idx]-mzs[i]):
+                min_merr = abs(spec_mzs[_idx]-mzs[i])
                 closest_idx = _idx
         ret_indices[i] = closest_idx
-    return ret_indices
+    return ret_indices.reshape(query_mzs.shape)
 
 
 @numba.njit
@@ -85,11 +87,13 @@ def match_highest_peaks(
         Matched indices of spec_mzs, -1 means no peaks were matched.
         Same shape as query_mzs 
     """
-    query_left_mzs = query_mzs-query_mz_tols
-    query_right_mzs = query_mzs+query_mz_tols
+    mzs = query_mzs.reshape(-1)
+    query_mz_tols = query_mz_tols.reshape(-1)
+    query_left_mzs = mzs-query_mz_tols
+    query_right_mzs = mzs+query_mz_tols
     idxes = np.searchsorted(spec_mzs, query_left_mzs)
-    ret_indices = np.empty_like(query_mzs, dtype=np.int32)
-    for i,idx in np.ndenumerate(idxes):
+    ret_indices = np.empty_like(mzs, dtype=np.int32)
+    for i,idx in enumerate(idxes):
         highest = 0
         highest_idx = -1
         for _idx in range(idx, len(spec_mzs)):
@@ -101,7 +105,7 @@ def match_highest_peaks(
                 highest = spec_intens[_idx]
                 highest_idx = _idx
         ret_indices[i] = highest_idx
-    return ret_indices
+    return ret_indices.reshape(query_mzs.shape)
 
 @numba.njit
 def match_profile_peaks(
@@ -137,16 +141,18 @@ def match_profile_peaks(
         np.ndarray: matched last (right-most) indices, the shape is the same as query_mzs.
         -1 means no peaks are matched for the query mz.
     """
-    query_left_mzs = query_mzs-query_mz_tols
-    query_right_mzs = query_mzs+query_mz_tols
+    mzs = query_mzs.reshape(-1)
+    query_mz_tols = query_mz_tols.reshape(-1)
+    query_left_mzs = mzs-query_mz_tols
+    query_right_mzs = mzs+query_mz_tols
     idxes = np.searchsorted(spec_mzs, query_left_mzs)
     first_indices = np.full_like(
-        query_mzs, -1, dtype=np.int32
+        mzs, -1, dtype=np.int32
     )
     last_indices = np.full_like(
-        query_mzs, -1, dtype=np.int32
+        mzs, -1, dtype=np.int32
     )
-    for i,idx in np.ndenumerate(idxes):
+    for i,idx in enumerate(idxes):
         for first_idx in range(idx, len(spec_mzs)):
             if spec_mzs[first_idx]<query_left_mzs[i]:
                 continue
@@ -162,5 +168,8 @@ def match_profile_peaks(
                             break
                     last_indices[i] = last_idx-1
                     break
-    return first_indices, last_indices
+    return (
+        first_indices.reshape(query_mzs.shape), 
+        last_indices.reshape(query_mzs.shape),
+    )
 
diff --git a/alpharaw/match/psm_match.py b/alpharaw/match/psm_match.py
index 7de0b68..ce9c2d3 100644
--- a/alpharaw/match/psm_match.py
+++ b/alpharaw/match/psm_match.py
@@ -10,7 +10,8 @@
 )
 
 from alpharaw.ms_data_base import (
-    MSData_Base, ms_reader_provider
+    MSData_Base, ms_reader_provider,
+    PEAK_MZ_DTYPE, PEAK_INTENSITY_DTYPE
 )
 
 from alpharaw.match.match_utils import (
@@ -21,11 +22,11 @@
 @numba.njit
 def match_one_raw_with_numba(
     spec_idxes, frag_start_idxes, frag_stop_idxes,
-    all_frag_mzs,
+    all_frag_mzs, all_frag_mz_tols,
     all_spec_mzs, all_spec_intensities, 
     peak_start_idxes, peak_stop_idxes,
-    matched_intensities, matched_mz_errs,
-    use_ppm, tol, matched_closest=True,
+    matched_intensities, matched_mz_errs, 
+    matched_closest=True,
 ):
     """ 
     Internel function to match fragment mz values to spectrum mz values.
@@ -44,11 +45,7 @@ def match_one_raw_with_numba(
         spec_intens = all_spec_intensities[peak_start:peak_stop]
 
         frag_mzs = all_frag_mzs[frag_start:frag_end,:].copy()
-        
-        if use_ppm:
-            frag_mz_tols = frag_mzs*tol*1e-6
-        else:
-            frag_mz_tols = np.full_like(frag_mzs, tol)
+        frag_mz_tols = all_frag_mz_tols[frag_start:frag_end,:].copy()
         
         if matched_closest:
             matched_idxes = match_closest_peaks(
@@ -112,7 +109,7 @@ class PepSpecMatch:
     match_closest:bool = True
     use_ppm:bool = True
     #: matching mass tolerance
-    tolerance:float = 20
+    tolerance:PEAK_MZ_DTYPE = 20.0
     def __init__(self,
         charged_frag_types:list = get_charged_frag_types(
             ['b','y','b_modloss','y_modloss'], 2
@@ -150,7 +147,8 @@ def _preprocess_psms(self, psm_df):
 
     def get_fragment_mz_df(self, psm_df):
         return create_fragment_mz_dataframe(
-            psm_df, self.charged_frag_types
+            psm_df, self.charged_frag_types,
+            dtype=PEAK_MZ_DTYPE,
         )
 
     def _add_missing_columns_to_psm_df(self,
@@ -189,10 +187,11 @@ def _add_missing_columns_to_psm_df(self,
     def _prepare_matching_dfs(self, psm_df):
 
         fragment_mz_df = self.get_fragment_mz_df(psm_df)
-        
+
         matched_intensity_df = pd.DataFrame(
             np.zeros_like(
-                fragment_mz_df.values, dtype=np.float64
+                fragment_mz_df.values, 
+                dtype=PEAK_INTENSITY_DTYPE
             ), 
             columns=fragment_mz_df.columns
         )
@@ -200,7 +199,7 @@ def _prepare_matching_dfs(self, psm_df):
         matched_mz_err_df = pd.DataFrame(
             np.full_like(
                 fragment_mz_df.values, np.inf, 
-                dtype=np.float64
+                dtype=PEAK_MZ_DTYPE
             ), 
             columns=fragment_mz_df.columns
         )
@@ -243,6 +242,8 @@ def _match_one_psm(self,
     ):
         if len(spec_mzs)==0: return
 
+        spec_mzs = spec_mzs.astype(PEAK_MZ_DTYPE)
+
         frag_mzs = fragment_mz_df.values[
             frag_start_idx:frag_stop_idx,:
         ]
@@ -250,7 +251,9 @@ def _match_one_psm(self,
         if self.use_ppm:
             mz_tols = frag_mzs*self.tolerance*1e-6
         else:
-            mz_tols = np.full_like(frag_mzs, self.tolerance)
+            mz_tols = np.full_like(
+                frag_mzs, self.tolerance
+            )
 
         if self.match_closest:
             matched_idxes = match_closest_peaks(
@@ -353,18 +356,23 @@ def _match_ms2_one_raw_numba(self, raw_name, df_group):
                 df_group, raw_data
             )
 
+            if self.use_ppm:
+                all_frag_mz_tols = self.fragment_mz_df.values*self.tolerance*1e-6
+            else:
+                all_frag_mz_tols = np.full_like(self.fragment_mz_df.values, self.tolerance)
+
             match_one_raw_with_numba(
                 df_group.spec_idx.values,
                 df_group.frag_start_idx.values,
                 df_group.frag_stop_idx.values,
                 self.fragment_mz_df.values,
+                all_frag_mz_tols, 
                 raw_data.peak_df.mz.values, 
                 raw_data.peak_df.intensity.values,
                 raw_data.spectrum_df.peak_start_idx.values,
                 raw_data.spectrum_df.peak_stop_idx.values,
                 self.matched_intensity_df.values,
                 self.matched_mz_err_df.values,
-                self.use_ppm, self.tolerance, 
                 self.match_closest
             )
     
diff --git a/alpharaw/match/psm_match_alphatims.py b/alpharaw/match/psm_match_alphatims.py
index 7e981b4..ca1563a 100644
--- a/alpharaw/match/psm_match_alphatims.py
+++ b/alpharaw/match/psm_match_alphatims.py
@@ -83,16 +83,18 @@ class PepSpecMatch_AlphaTims(PepSpecMatch):
     """
 
     #: RT win to get a MS2 spectrum by slicing
-    rt_sec_win_to_slice_ms2 = 10.0
+    rt_sec_tol_to_slice_ms2 = 3.0
 
     #: IM win to get a MS2 spectrum by slicing
-    im_win_to_slice_ms2 = 0.2
+    im_tol_to_slice_ms2 = 0.03
 
     #: find closest MS2 for the given RT when slicing
-    find_closest_ms2_by_rt = True
+    find_k_nearest_ms2_by_rt = True
+    k_rt_nearest = 1
 
     # : find closest MS2 for the given RT when slicing
-    find_closest_ms2_by_im = True
+    find_k_nearest_ms2_by_im = False
+    k_im_nearest = 11
 
     def get_peak_df(self,
         precursor_mz:float,
@@ -116,51 +118,60 @@ def get_peak_df(self,
         """
         rt_sec = rt*60
         rt_slice = slice(
-            rt_sec-self.rt_sec_win_to_slice_ms2/2,
-            rt_sec+self.rt_sec_win_to_slice_ms2/2,
+            rt_sec-self.rt_sec_tol_to_slice_ms2,
+            rt_sec+self.rt_sec_tol_to_slice_ms2,
         )
 
         if im == 0 or self.tims_data.scan_max_index == 1:
             im_slice = slice(None)
-        elif self.find_closest_ms2_by_im and self.tims_data.scan_max_index > 1:
+        elif self.find_k_nearest_ms2_by_im and self.tims_data.scan_max_index > 1:
             # AlphaTims without AlphaRaw for .d files
             im_slice = self.tims_data.scan_max_index-np.searchsorted(
                 self.tims_data.mobility_values[::-1], im
             )
         else:
             im_slice = slice(
-                im-self.im_win_to_slice_ms2/2,
-                im+self.im_win_to_slice_ms2/2
+                im-self.im_tol_to_slice_ms2,
+                im+self.im_tol_to_slice_ms2
             )
 
         spec_df = self.tims_data[
             rt_slice, im_slice, precursor_mz:precursor_mz
         ]
 
-        def find_nearest(array, val):
-            return np.argmin(np.abs(array-val))
+        def find_k_nearest(array, val, k=3):
+            nearest = np.argmin(np.abs(array-val))
+            if nearest <= k//2:
+                return slice(k)
+            elif nearest >= len(array)-k//2-1:
+                return slice(-k, None)
+            else:
+                return slice(nearest-k//2, nearest+k//2+1)
 
         if (
-            self.find_closest_ms2_by_im and 
-            im>0 and self.tims_data.scan_max_index==1
+            self.find_k_nearest_ms2_by_im and 
+            im>0 and self.tims_data.scan_max_index>1
         ):
             # RAW from AlphaRaw, mobility===0 in AlphaTims wrapper obj
-            spec_idxes = spec_df.frame_indices.unique() 
-            if len(spec_idxes) > 1: # im from psm
-                spec_idx = spec_idxes[
-                    find_nearest(
-                        self.raw_data.spectrum_df.mobility.values[spec_idxes], im
+            scan_idxes = np.sort(spec_df.scan_indices.unique())
+            if len(scan_idxes) > 1: # im from psm
+                scan_idxes = scan_idxes[
+                    find_k_nearest(
+                        self.raw_data.spectrum_df.mobility.values[scan_idxes], 
+                        im, self.k_im_nearest
                     )
                 ]
-                spec_df = spec_df[spec_df.frame_indices==spec_idx]
+                spec_df = spec_df[spec_df.scan_indices.isin(scan_idxes)]
 
-        if self.find_closest_ms2_by_rt:
-            rt_values = spec_df.rt_values.unique()
+        if self.find_k_nearest_ms2_by_rt:
+            rt_values = np.sort(spec_df.rt_values.unique())
             if len(rt_values) > 1:
-                closest_rt = rt_values[find_nearest(rt_values, rt_sec)]
-                spec_df = spec_df[spec_df.rt_values==closest_rt]
+                closest_rts = rt_values[
+                    find_k_nearest(rt_values, rt_sec, self.k_rt_nearest)
+                ]
+                spec_df = spec_df[spec_df.rt_values.isin(closest_rts)]
         
-        return spec_df.sort_values('mz_values').reset_index(drop=True)
+        return spec_df
 
     def get_peaks(
         self,
@@ -185,6 +196,7 @@ def get_peaks(
             np.ndarray: peak intensity values
         """
         spec_df = self.get_peak_df(precursor_mz, rt, im)
+        spec_df = spec_df.sort_values('mz_values').reset_index(drop=True)
         return (
             spec_df.mz_values.values, 
             spec_df.intensity_values.values
diff --git a/alpharaw/ms_data_base.py b/alpharaw/ms_data_base.py
index 66cd204..132270b 100644
--- a/alpharaw/ms_data_base.py
+++ b/alpharaw/ms_data_base.py
@@ -1,6 +1,9 @@
 import pandas as pd
 import numpy as np
 from alphabase.io.hdf import HDF_File
+from alphabase.constants._const import (
+    PEAK_MZ_DTYPE, PEAK_INTENSITY_DTYPE
+)
 
 class MSData_Base:
     """
@@ -28,9 +31,15 @@ class MSData_Base:
     peak_df: pd.DataFrame
     """
     Peak list dataframe containing the follow columns:
-    - `mz` (float64): m/z values of the peak
-    - `intensity` (float32): intensity values of the peak
+    - `mz` (PEAK_MZ_DTYPE in alphabase, float32 by default): m/z values of the peak
+    - `intensity` (PEAK_INTENSITY_DTYPE in alphabase, float32 by default): intensity values of the peak
     """
+
+    vocab: list = [
+        "CID", "HCD", "ETD", "ECD", "EAD", "EXD", "UVPD",
+        "ETHCD", "ETCID", "EXCID", "NETD",
+        "IT", "FT", "TOF", 
+    ]
     def __init__(self, centroided:bool=True, **kwargs):
         """
         Parameters
@@ -47,6 +56,16 @@ def __init__(self, centroided:bool=True, **kwargs):
         self.centroided = centroided
         self.creation_time = ''
         self.file_type = ''
+        self.instrument = 'none'
+
+    def _get_term_id(self, terminology:str):
+        """
+        Get terminology id from :data:`self.vocab`, -1 if not exist.
+        """
+        try:
+            return self.vocab.index(terminology)
+        except ValueError:
+            return -1
 
     @property
     def raw_file_path(self)->str:
@@ -66,16 +85,20 @@ def load_raw(self, _path:str):
         self.import_raw(_path)
 
     def _save_meta_to_hdf(self, hdf:HDF_File):
-        hdf.ms_data.creation_time = self.creation_time
-        hdf.ms_data.raw_file_path = self.raw_file_path
-        hdf.ms_data.file_type = self.file_type
-        hdf.ms_data.centroided = self.centroided
+        hdf.ms_data.meta = {
+            "creation_time": self.creation_time,
+            "raw_file_path": self.raw_file_path,
+            "file_type": self.file_type,
+            "centroided": self.centroided,
+            "instrument": self.instrument,
+        }
 
     def _load_meta_from_hdf(self, hdf:HDF_File):
-        self.creation_time = hdf.ms_data.creation_time
-        self.raw_file_path = hdf.ms_data.raw_file_path
-        self.file_type = hdf.ms_data.file_type
-        self.centroided = hdf.ms_data.centroided
+        self.creation_time = hdf.ms_data.meta.creation_time
+        self.raw_file_path = hdf.ms_data.meta.raw_file_path
+        self.file_type = hdf.ms_data.meta.file_type
+        self.centroided = hdf.ms_data.meta.centroided
+        self.instrument = hdf.ms_data.meta.instrument
 
     def save_hdf(self, _path:str):
         hdf = HDF_File(
@@ -123,6 +146,13 @@ def _check_df(self):
         self._check_rt()
         # self._check_mobility()
         self._check_precursor_mz()
+        self._check_peak_dtypes()
+
+    def _check_peak_dtypes(self):
+        if self.peak_df.mz.dtype != PEAK_MZ_DTYPE:
+            self.peak_df.mz = self.peak_df.mz.astype(PEAK_MZ_DTYPE)
+        if self.peak_df.intensity.dtype != PEAK_INTENSITY_DTYPE:
+            self.peak_df.intensity = self.peak_df.intensity.astype(PEAK_INTENSITY_DTYPE)
     
     def _check_rt(self):
         assert 'rt' in self.spectrum_df.columns
@@ -147,25 +177,25 @@ def create_spectrum_df(self,
         )
         self.spectrum_df['spec_idx'] = self.spectrum_df.index.values
 
-    def set_peaks_by_cat_array(self, 
+    def set_peak_df_by_indexed_array(self, 
         mz_array:np.ndarray, 
         intensity_array:np.ndarray,
         peak_start_indices:np.ndarray,
         peak_stop_indices:np.ndarray,
     ):
         self.peak_df = pd.DataFrame({
-            'mz': mz_array,
-            'intensity': intensity_array
+            'mz': mz_array.astype(PEAK_MZ_DTYPE),
+            'intensity': intensity_array.astype(PEAK_INTENSITY_DTYPE)
         })
         self.spectrum_df['peak_start_idx'] = peak_start_indices
         self.spectrum_df['peak_stop_idx'] = peak_stop_indices
 
-    def set_peaks_by_array_list(self,
+    def set_peak_df_by_array_list(self,
         mz_array_list:list,
         intensity_array_list:list,
     ):
         indices = index_ragged_list(mz_array_list)
-        self.set_peaks_by_cat_array(
+        self.set_peak_df_by_indexed_array(
             np.concatenate(mz_array_list),
             np.concatenate(intensity_array_list),
             indices[:-1],
@@ -193,13 +223,16 @@ def add_column_in_spec_df(self,
                 column_name
             ] = self.spectrum_df[column_name].astype(dtype)
 
-    def add_column_in_df_by_thermo_scan_num(self, 
+    def add_column_in_df_by_scan_num(self, 
         column_name:str, 
         values:np.ndarray, 
         scan_nums:np.ndarray,
         dtype:np.dtype=np.float64, 
         na_value=np.nan,
     ):
+        """
+        scan num starts from 1 not 0
+        """
         self.add_column_in_spec_df(
             column_name, values,
             scan_nums-1, 
@@ -225,7 +258,7 @@ def set_precursor_mz(self,
             spec_idxes, np.float64, -1.0
         )
     
-    def set_precursor_mz_windows(self,
+    def set_isolation_mz_windows(self,
         precursor_lower_mz_values:np.ndarray,
         precursor_upper_mz_values:np.ndarray,
         spec_idxes:np.ndarray=None, 
diff --git a/alpharaw/mzml.py b/alpharaw/mzml.py
index 05ee253..97e2d49 100644
--- a/alpharaw/mzml.py
+++ b/alpharaw/mzml.py
@@ -1,97 +1,166 @@
-# import numpy as np
-# import re
-# import datetime
-# import pathlib
+import numpy as np
 
-# from pyteomics import mzml
+from pyteomics import mzml
 
-# from .ms_data_base import MSData_Base
+from .ms_data_base import (
+    MSData_Base, PEAK_MZ_DTYPE, PEAK_INTENSITY_DTYPE
+)
 
-# def parse_mzml_item(item_dict: dict) -> tuple:
-#     rt = float(item_dict.get('scanList').get('scan')[0].get('scan start time'))
-#     masses = item_dict.get('m/z array')
-#     intensities = item_dict.get('intensity array')
-#     ms_level = item_dict.get('ms level')
-#     prec_mz = -1.0
-#     isolation_lower_mz = -1.0
-#     isolation_upper_mz = -1.0
-#     charge = 0
-#     if ms_level == 2:
-#         try:
-#             charge = int(item_dict.get('precursorList').get('precursor')[0].get('selectedIonList').get('selectedIon')[0].get(
-#                 'charge state'))
-#         except TypeError:
-#             charge = 0
-#         try:
-#             charge = int(item_dict.get('precursorList').get('precursor')[0].get('selectedIonList').get('selectedIon')[0].get(
-#                 'charge state'))
-#         except TypeError:
-#             charge = 0
+class MzMLReader(MSData_Base):
+    def _import(self,
+        filename: str,
+    ):
+        self.file_type = 'mzml'
+        if isinstance(filename, str):
+            reader = mzml.read(filename, use_index=True)
+        else:
+            reader = filename
+        spec_indices = np.arange(len(reader),dtype=int)
 
-#         prec_mz = item_dict.get('precursorList').get('precursor')[0].get('selectedIonList').get('selectedIon')[0].get(
-#                 'selected ion m/z')
-#         try:
-#             iso_lower = int(item_dict.get('precursorList').get('precursor')[0].get('isolation window lower offset').get(
-#                 'charge state'))
-#             iso_upper = int(item_dict.get('precursorList').get('precursor')[0].get('isolation window upper offset').get(
-#                 'charge state'))
-#             isolation_upper_mz = prec_mz + iso_upper
-#             isolation_lower_mz = prec_mz - iso_lower
-#         except TypeError:
-#             isolation_upper_mz = prec_mz + 1.5
-#             isolation_lower_mz = prec_mz - 1.5
+        rt_list = []
+        mzs_list = []
+        intens_list = []
+        ms_level_list = []
+        prec_mz_list = []
+        charge_list = []
+        _peak_indices = []
+        isolation_lower_mz_list = []
+        isolation_upper_mz_list = []
+        nce_list = []
         
-#     return (
-#         rt, prec_mz, isolation_lower_mz, isolation_upper_mz, 
-#         ms_level, charge, masses, intensities
-#     )
+        for i in spec_indices:
+            spec = next(reader)
 
-# class MSFraggerMzML_Data(MSData_Base):
-#     def _import(self,
-#         filename: str,
-#     ):
-#         reader = mzml.read(filename, use_index=True)
-#         spec_indices = np.array(range(1, len(reader) + 1))
+            (
+                rt, prec_mz, isolation_lower_mz, isolation_upper_mz, 
+                ms_level, nce, charge, masses, intensities
+            ) = parse_mzml_entry(spec)
 
-#         scan_list = []
-#         rt_list = []
-#         mzs_list = []
-#         intens_list = []
-#         ms_level_list = []
-#         prec_mz_list = []
-#         charge_list = []
-
-#         self.file_type = 'unknown'
-        
-#         for idx, i in enumerate(spec_indices):
-#             spec = next(reader)
-#             if idx == 0:
-#                 ext = re.findall(r"File:\".+\.(\w+)\"", spec['spectrum title'])[0]
-#                 if ext.lower() == 'raw':
-#                     self.file_type = "thermo"
-
-#             scan_list.append(i)
-#             rt, prec_mz, ms_level, charge, masses, intensities = parse_mzml_item(spec)
+            nce_list.append(nce)
             
-#             sortindex = np.argsort(masses)
+            sortindex = np.argsort(masses)
             
-#             masses = masses[sortindex]
-#             intensities = intensities[sortindex]
+            masses = masses[sortindex]
+            intensities = intensities[sortindex]
             
-#             rt_list.append(rt)
+            rt_list.append(rt)
             
-#             #Remove zero intensities
-#             to_keep = intensities>0
-#             masses = masses[to_keep]
-#             intensities = intensities[to_keep]
+            #Remove zero intensities
+            to_keep = intensities>0
+            masses = masses[to_keep]
+            intensities = intensities[to_keep]
+
+            _peak_indices.append(len(masses))
             
-#             mzs_list.append(masses)
-#             intens_list.append(intensities)
-#             ms_level_list.append(ms_level)
-#             prec_mz_list.append(prec_mz)
-#             charge_list.append(charge)
+            mzs_list.append(masses.astype(PEAK_MZ_DTYPE))
+            intens_list.append(intensities.astype(PEAK_INTENSITY_DTYPE))
+            ms_level_list.append(ms_level)
+            prec_mz_list.append(prec_mz)
+            charge_list.append(charge)
+            isolation_lower_mz_list.append(isolation_lower_mz)
+            isolation_upper_mz_list.append(isolation_upper_mz)
 
-#         fname = pathlib.Path(filename)
-#         acquisition_date_time = datetime.datetime.fromtimestamp(fname.stat().st_mtime).strftime('%Y-%m-%dT%H:%M:%S')
+        if isinstance(filename, str):
+            reader.close()
 
-#         return acquisition_date_time
\ No newline at end of file
+        peak_indices = np.empty(len(spec_indices)+1, np.int64)
+        peak_indices[0] = 0
+        peak_indices[1:] = np.cumsum(_peak_indices)
+        ret_dict = {
+            'peak_indices': peak_indices,
+            'peak_mz': np.concatenate(mzs_list),
+            'peak_intensity': np.concatenate(intens_list),
+            'rt': np.array(rt_list),
+            'precursor_mz': np.array(prec_mz_list),
+            'precursor_charge': np.array(charge_list, dtype=np.int8),
+            'isolation_mz_lower': np.array(isolation_lower_mz_list),
+            'isolation_mz_upper': np.array(isolation_upper_mz_list),
+            'ms_level': np.array(ms_level_list, dtype=np.int8),
+        }
+        nce_list = np.array(nce_list, dtype=np.float32)
+        if np.any(np.isnan(nce_list)):
+            return ret_dict
+        ret_dict["nce"] = nce_list
+        return ret_dict
+
+    def _set_dataframes(self, raw_data:dict):
+        self.create_spectrum_df(len(raw_data['rt']))
+        self.set_peak_df_by_indexed_array(
+            raw_data['peak_mz'],
+            raw_data['peak_intensity'],
+            raw_data['peak_indices'][:-1],
+            raw_data['peak_indices'][1:],
+        )
+        self.add_column_in_spec_df(
+            'rt', raw_data['rt']
+        )
+        self.add_column_in_spec_df(
+            'ms_level', raw_data['ms_level'],
+            dtype=np.int8
+        )
+        self.set_precursor_mz(
+            raw_data['precursor_mz']
+        )
+        self.add_column_in_spec_df(
+            'charge', raw_data['precursor_charge'],
+            dtype=np.int8
+        )
+        self.set_isolation_mz_windows(
+            raw_data['isolation_mz_lower'],
+            raw_data['isolation_mz_upper'],
+        )
+        if "nce" in raw_data:
+            self.add_column_in_spec_df(
+                "nce", raw_data["nce"],
+                dtype=np.float32,
+            )
+
+def parse_mzml_entry(item_dict: dict) -> tuple:
+    rt = float(item_dict.get('scanList').get('scan')[0].get('scan start time'))
+    masses = item_dict.get('m/z array')
+    intensities = item_dict.get('intensity array')
+    ms_level = item_dict.get('ms level')
+    prec_mz = -1.0
+    isolation_lower_mz = -1.0
+    isolation_upper_mz = -1.0
+    charge = 0
+    nce = 0.0
+    if ms_level == 2:
+        try:
+            charge = int(item_dict.get('precursorList').get('precursor')[0].get('selectedIonList').get('selectedIon')[0].get(
+                'charge state'))
+        except TypeError:
+            charge = 0
+        try:
+            charge = int(item_dict.get('precursorList').get('precursor')[0].get('selectedIonList').get('selectedIon')[0].get(
+                'charge state'))
+        except TypeError:
+            charge = 0
+
+        prec_mz = item_dict.get('precursorList').get('precursor')[0].get('selectedIonList').get('selectedIon')[0].get(
+                'selected ion m/z')
+        try:
+            iso_window = item_dict.get('precursorList').get('precursor')[0].get('isolationWindow')
+            iso_lower = float(iso_window.get('isolation window lower offset'))
+            iso_upper = float(iso_window.get('isolation window upper offset'))
+            isolation_upper_mz = prec_mz + iso_upper
+            isolation_lower_mz = prec_mz - iso_lower
+        except TypeError:
+            isolation_upper_mz = prec_mz + 1.5
+            isolation_lower_mz = prec_mz - 1.5
+        
+        nce = np.nan
+        try:
+            filter_string = item_dict.get("scanList").get("scan")[0].get["filter string"]
+            if "@hcd" in filter_string:
+                nce = float(filter_string.split("@hcd")[1].split(" ")[0])
+            elif "@cid" in filter_string:
+                nce = float(filter_string.split("@cid")[1].split(" ")[0])
+            else:
+                nce = np.nan
+        except:
+            nce = np.nan
+    return (
+        rt, prec_mz, isolation_lower_mz, isolation_upper_mz, 
+        ms_level, nce, charge, masses, intensities
+    )
\ No newline at end of file
diff --git a/alpharaw/sciex.py b/alpharaw/sciex.py
index a68f006..bdc80a8 100644
--- a/alpharaw/sciex.py
+++ b/alpharaw/sciex.py
@@ -46,7 +46,7 @@ def _import(self,
     
     def _set_dataframes(self, raw_data:dict):
         self.create_spectrum_df(len(raw_data['rt']))
-        self.set_peaks_by_cat_array(
+        self.set_peak_df_by_indexed_array(
             raw_data['peak_mz'],
             raw_data['peak_intensity'],
             raw_data['peak_indices'][:-1],
@@ -66,7 +66,7 @@ def _set_dataframes(self, raw_data:dict):
             'charge', raw_data['precursor_charge'],
             dtype=np.int8
         )
-        self.set_precursor_mz_windows(
+        self.set_isolation_mz_windows(
             raw_data['isolation_mz_lower'],
             raw_data['isolation_mz_upper'],
         )
diff --git a/alpharaw/thermo.py b/alpharaw/thermo.py
index 87d4f13..6206dcf 100644
--- a/alpharaw/thermo.py
+++ b/alpharaw/thermo.py
@@ -2,7 +2,9 @@
 import pandas as pd
 import os
 import alpharaw.raw_access.pythermorawfilereader as pyrawfilereader
-from .ms_data_base import MSData_Base
+from .ms_data_base import (
+    MSData_Base, PEAK_MZ_DTYPE, PEAK_INTENSITY_DTYPE
+)
 from .ms_data_base import ms_reader_provider
 
 class ThermoRawData(MSData_Base):
@@ -44,8 +46,8 @@ def _import(self,
                 masses, intensities = rawfile.GetProfileMassListFromScanNum(i)
             else:
                 masses, intensities = rawfile.GetCentroidMassListFromScanNum(i)
-            mz_values.append(masses)
-            intensity_values.append(intensities.astype(np.float32))
+            mz_values.append(masses.astype(PEAK_MZ_DTYPE))
+            intensity_values.append(intensities.astype(PEAK_INTENSITY_DTYPE))
             _peak_indices.append(len(masses))
             rt = rawfile.RTFromScanNum(i)
             rt_values.append(rt)
@@ -98,7 +100,7 @@ def _import(self,
 
     def _set_dataframes(self, raw_data:dict):
         self.create_spectrum_df(len(raw_data['rt']))
-        self.set_peaks_by_cat_array(
+        self.set_peak_df_by_indexed_array(
             raw_data['peak_mz'],
             raw_data['peak_intensity'],
             raw_data['peak_indices'][:-1],
@@ -118,10 +120,14 @@ def _set_dataframes(self, raw_data:dict):
             'charge', raw_data['precursor_charge'],
             dtype=np.int8
         )
-        self.set_precursor_mz_windows(
+        self.set_isolation_mz_windows(
             raw_data['isolation_mz_lower'],
             raw_data['isolation_mz_upper'],
         )
+        self.add_column_in_spec_df(
+            "nce", raw_data["nce"],
+            dtype=np.float32,
+        )
 
 ms_reader_provider.register_reader('thermo', ThermoRawData)
 ms_reader_provider.register_reader('thermo_raw', ThermoRawData)
diff --git a/alpharaw/wrappers/alphapept_wrapper.py b/alpharaw/wrappers/alphapept_wrapper.py
index a792601..76f6254 100644
--- a/alpharaw/wrappers/alphapept_wrapper.py
+++ b/alpharaw/wrappers/alphapept_wrapper.py
@@ -88,7 +88,7 @@ def _set_dataframes(self, _path):
         rt_values = np.zeros(spec_num)
         rt_values[spec_idxes] = hdf.Raw.MS2_scans.rt_list_ms2.values
 
-        self.set_peaks_by_cat_array(
+        self.set_peak_df_by_indexed_array(
             hdf.Raw.MS2_scans.mass_list_ms2.values,
             hdf.Raw.MS2_scans.int_list_ms2.values,
             start_idxes, end_idxes
diff --git a/nbdev_nbs/match/match_utils.ipynb b/nbdev_nbs/match/match_utils.ipynb
index 05ad149..35b4b8b 100644
--- a/nbdev_nbs/match/match_utils.ipynb
+++ b/nbdev_nbs/match/match_utils.ipynb
@@ -41,8 +41,8 @@
    ],
    "source": [
     "#| hide\n",
-    "spec_masses = np.array([100, 100.1, 199.9, 200.1, 300])\n",
-    "query_masses = np.array([10,100.0, 200, 300, 400])\n",
+    "spec_masses = np.array([100, 100.1, 199.9, 200.1, 300], dtype=np.float32)\n",
+    "query_masses = np.array([10,100.0, 200, 300, 400], dtype=np.float32)\n",
     "Da_tols = np.ones_like(query_masses)*0.2\n",
     "first_indices, last_indices = match_profile_peaks(\n",
     "    spec_masses, 0, query_masses, Da_tols,\n",
@@ -60,7 +60,7 @@
    "source": [
     "#| hide\n",
     "#unittests\n",
-    "spec_masses = np.arange(10, dtype=float)*100\n",
+    "spec_masses = np.arange(10, dtype=np.float32)*100\n",
     "query_masses = spec_masses\n",
     "Da_tols = query_masses*20*1e-6\n",
     "idxes = match_closest_peaks(spec_masses, 0, query_masses, Da_tols)\n",
@@ -78,7 +78,7 @@
    "source": [
     "#| hide\n",
     "#unittests\n",
-    "spec_masses = np.arange(10, dtype=float)*100\n",
+    "spec_masses = np.arange(10, dtype=np.float32)*100\n",
     "query_masses = spec_masses[:5]\n",
     "Da_tols = query_masses*20*1e-6\n",
     "spec_intens = np.ones_like(spec_masses)\n",
@@ -97,12 +97,10 @@
    "source": [
     "#| hide\n",
     "#unittests\n",
-    "spec_masses = np.arange(10, dtype=float)*100\n",
+    "spec_masses = np.arange(10, dtype=np.float32)*100\n",
     "query_masses = spec_masses.copy()[:5]\n",
     "query_masses[1]+=0.01 # -1\n",
     "query_masses[3]+=0.003 # spec_masses[3]\n",
-    "query_masses[5]-=0.0099 # spec_masses[5]\n",
-    "query_masses[6]+=0.02 #-1\n",
     "spec_masses[7] = spec_masses[8]-0.007 # matched[7] = -1\n",
     "spec_masses[8] += 0.01 # matched[8] = 7 as 7 is closer\n",
     "Da_tols = query_masses*20*1e-6\n",
@@ -120,8 +118,8 @@
    "source": [
     "#| hide\n",
     "#unittests\n",
-    "spec_masses = np.arange(10, dtype=float)*100\n",
-    "query_masses = np.arange(20).reshape((10,2))[:5]*100\n",
+    "spec_masses = np.arange(10, dtype=np.float32)*100\n",
+    "query_masses = np.arange(20, dtype=np.float32).reshape((10,2))[:5]*100\n",
     "Da_tols = query_masses*20*1e-6\n",
     "target = np.arange(20, dtype=np.int32).reshape((10,2))[:5]\n",
     "target[10:] = -1\n",
diff --git a/nbdev_nbs/match/psm_match.ipynb b/nbdev_nbs/match/psm_match.ipynb
index 67229f1..6288708 100644
--- a/nbdev_nbs/match/psm_match.ipynb
+++ b/nbdev_nbs/match/psm_match.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 1,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -25,7 +25,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 2,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -34,7 +34,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 3,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -50,14 +50,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 2/2 [00:03<00:00,  1.66s/it]\n"
+      "100%|██████████| 2/2 [00:01<00:00,  1.41it/s]\n"
      ]
     }
    ],
@@ -251,14 +251,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 2/2 [00:00<00:00, 101.41it/s]\n"
+      "100%|██████████| 2/2 [00:00<00:00, 174.90it/s]\n"
      ]
     }
    ],
@@ -278,14 +278,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 6,
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 1/1 [00:00<00:00, 71.69it/s]\n"
+      "100%|██████████| 1/1 [00:00<00:00, 111.73it/s]\n"
      ]
     }
    ],
@@ -322,6 +322,18 @@
    "display_name": "Python 3.8.3 ('base')",
    "language": "python",
    "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.12"
   }
  },
  "nbformat": 4,
diff --git a/nbdev_nbs/test_mzml.ipynb b/nbdev_nbs/test_mzml.ipynb
new file mode 100644
index 0000000..2b76529
--- /dev/null
+++ b/nbdev_nbs/test_mzml.ipynb
@@ -0,0 +1,894 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from io import StringIO\n",
+    "\n",
+    "mzml = \"/Users/wenfengzeng/Downloads/small.pwiz.1.1.mzML\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>spec_idx</th>\n",
+       "      <th>peak_start_idx</th>\n",
+       "      <th>peak_stop_idx</th>\n",
+       "      <th>rt</th>\n",
+       "      <th>ms_level</th>\n",
+       "      <th>precursor_mz</th>\n",
+       "      <th>charge</th>\n",
+       "      <th>isolation_lower_mz</th>\n",
+       "      <th>isolation_upper_mz</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>10739</td>\n",
+       "      <td>0.004935</td>\n",
+       "      <td>1</td>\n",
+       "      <td>-1.00</td>\n",
+       "      <td>0</td>\n",
+       "      <td>-1.00</td>\n",
+       "      <td>-1.00</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1</td>\n",
+       "      <td>10739</td>\n",
+       "      <td>25554</td>\n",
+       "      <td>0.007897</td>\n",
+       "      <td>1</td>\n",
+       "      <td>-1.00</td>\n",
+       "      <td>0</td>\n",
+       "      <td>-1.00</td>\n",
+       "      <td>-1.00</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>2</td>\n",
+       "      <td>25554</td>\n",
+       "      <td>26039</td>\n",
+       "      <td>0.011218</td>\n",
+       "      <td>2</td>\n",
+       "      <td>810.79</td>\n",
+       "      <td>0</td>\n",
+       "      <td>810.29</td>\n",
+       "      <td>811.29</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>3</td>\n",
+       "      <td>26039</td>\n",
+       "      <td>27045</td>\n",
+       "      <td>0.022838</td>\n",
+       "      <td>2</td>\n",
+       "      <td>837.34</td>\n",
+       "      <td>0</td>\n",
+       "      <td>836.84</td>\n",
+       "      <td>837.84</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>4</td>\n",
+       "      <td>27045</td>\n",
+       "      <td>27882</td>\n",
+       "      <td>0.034925</td>\n",
+       "      <td>2</td>\n",
+       "      <td>725.36</td>\n",
+       "      <td>0</td>\n",
+       "      <td>724.86</td>\n",
+       "      <td>725.86</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>5</td>\n",
+       "      <td>27882</td>\n",
+       "      <td>28532</td>\n",
+       "      <td>0.048620</td>\n",
+       "      <td>2</td>\n",
+       "      <td>558.87</td>\n",
+       "      <td>0</td>\n",
+       "      <td>558.37</td>\n",
+       "      <td>559.37</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>6</td>\n",
+       "      <td>28532</td>\n",
+       "      <td>29294</td>\n",
+       "      <td>0.061923</td>\n",
+       "      <td>2</td>\n",
+       "      <td>812.33</td>\n",
+       "      <td>0</td>\n",
+       "      <td>811.83</td>\n",
+       "      <td>812.83</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>7</td>\n",
+       "      <td>29294</td>\n",
+       "      <td>37374</td>\n",
+       "      <td>0.075015</td>\n",
+       "      <td>1</td>\n",
+       "      <td>-1.00</td>\n",
+       "      <td>0</td>\n",
+       "      <td>-1.00</td>\n",
+       "      <td>-1.00</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>8</td>\n",
+       "      <td>37374</td>\n",
+       "      <td>54285</td>\n",
+       "      <td>0.077788</td>\n",
+       "      <td>1</td>\n",
+       "      <td>-1.00</td>\n",
+       "      <td>0</td>\n",
+       "      <td>-1.00</td>\n",
+       "      <td>-1.00</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>9</td>\n",
+       "      <td>54285</td>\n",
+       "      <td>54837</td>\n",
+       "      <td>0.081203</td>\n",
+       "      <td>2</td>\n",
+       "      <td>810.75</td>\n",
+       "      <td>0</td>\n",
+       "      <td>810.25</td>\n",
+       "      <td>811.25</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>10</th>\n",
+       "      <td>10</td>\n",
+       "      <td>54837</td>\n",
+       "      <td>55778</td>\n",
+       "      <td>0.092903</td>\n",
+       "      <td>2</td>\n",
+       "      <td>837.96</td>\n",
+       "      <td>0</td>\n",
+       "      <td>837.46</td>\n",
+       "      <td>838.46</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>11</th>\n",
+       "      <td>11</td>\n",
+       "      <td>55778</td>\n",
+       "      <td>56413</td>\n",
+       "      <td>0.104803</td>\n",
+       "      <td>2</td>\n",
+       "      <td>644.06</td>\n",
+       "      <td>0</td>\n",
+       "      <td>643.56</td>\n",
+       "      <td>644.56</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>12</th>\n",
+       "      <td>12</td>\n",
+       "      <td>56413</td>\n",
+       "      <td>57205</td>\n",
+       "      <td>0.117215</td>\n",
+       "      <td>2</td>\n",
+       "      <td>725.23</td>\n",
+       "      <td>0</td>\n",
+       "      <td>724.73</td>\n",
+       "      <td>725.73</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>13</th>\n",
+       "      <td>13</td>\n",
+       "      <td>57205</td>\n",
+       "      <td>57874</td>\n",
+       "      <td>0.130022</td>\n",
+       "      <td>2</td>\n",
+       "      <td>559.19</td>\n",
+       "      <td>0</td>\n",
+       "      <td>558.69</td>\n",
+       "      <td>559.69</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>14</th>\n",
+       "      <td>14</td>\n",
+       "      <td>57874</td>\n",
+       "      <td>66994</td>\n",
+       "      <td>0.143452</td>\n",
+       "      <td>1</td>\n",
+       "      <td>-1.00</td>\n",
+       "      <td>0</td>\n",
+       "      <td>-1.00</td>\n",
+       "      <td>-1.00</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>15</th>\n",
+       "      <td>15</td>\n",
+       "      <td>66994</td>\n",
+       "      <td>81922</td>\n",
+       "      <td>0.146408</td>\n",
+       "      <td>1</td>\n",
+       "      <td>-1.00</td>\n",
+       "      <td>0</td>\n",
+       "      <td>-1.00</td>\n",
+       "      <td>-1.00</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>16</th>\n",
+       "      <td>16</td>\n",
+       "      <td>81922</td>\n",
+       "      <td>82501</td>\n",
+       "      <td>0.149755</td>\n",
+       "      <td>2</td>\n",
+       "      <td>811.41</td>\n",
+       "      <td>0</td>\n",
+       "      <td>810.91</td>\n",
+       "      <td>811.91</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>17</th>\n",
+       "      <td>17</td>\n",
+       "      <td>82501</td>\n",
+       "      <td>83417</td>\n",
+       "      <td>0.161442</td>\n",
+       "      <td>2</td>\n",
+       "      <td>837.36</td>\n",
+       "      <td>0</td>\n",
+       "      <td>836.86</td>\n",
+       "      <td>837.86</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>18</th>\n",
+       "      <td>18</td>\n",
+       "      <td>83417</td>\n",
+       "      <td>84087</td>\n",
+       "      <td>0.173370</td>\n",
+       "      <td>2</td>\n",
+       "      <td>643.80</td>\n",
+       "      <td>0</td>\n",
+       "      <td>643.30</td>\n",
+       "      <td>644.30</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>19</th>\n",
+       "      <td>19</td>\n",
+       "      <td>84087</td>\n",
+       "      <td>84761</td>\n",
+       "      <td>0.186658</td>\n",
+       "      <td>2</td>\n",
+       "      <td>558.94</td>\n",
+       "      <td>0</td>\n",
+       "      <td>558.44</td>\n",
+       "      <td>559.44</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>20</th>\n",
+       "      <td>20</td>\n",
+       "      <td>84761</td>\n",
+       "      <td>85652</td>\n",
+       "      <td>0.200695</td>\n",
+       "      <td>2</td>\n",
+       "      <td>725.14</td>\n",
+       "      <td>0</td>\n",
+       "      <td>724.64</td>\n",
+       "      <td>725.64</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>21</th>\n",
+       "      <td>21</td>\n",
+       "      <td>85652</td>\n",
+       "      <td>94665</td>\n",
+       "      <td>0.213673</td>\n",
+       "      <td>1</td>\n",
+       "      <td>-1.00</td>\n",
+       "      <td>0</td>\n",
+       "      <td>-1.00</td>\n",
+       "      <td>-1.00</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>22</th>\n",
+       "      <td>22</td>\n",
+       "      <td>94665</td>\n",
+       "      <td>105242</td>\n",
+       "      <td>0.216747</td>\n",
+       "      <td>1</td>\n",
+       "      <td>-1.00</td>\n",
+       "      <td>0</td>\n",
+       "      <td>-1.00</td>\n",
+       "      <td>-1.00</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>23</th>\n",
+       "      <td>23</td>\n",
+       "      <td>105242</td>\n",
+       "      <td>105821</td>\n",
+       "      <td>0.220073</td>\n",
+       "      <td>2</td>\n",
+       "      <td>810.84</td>\n",
+       "      <td>0</td>\n",
+       "      <td>810.34</td>\n",
+       "      <td>811.34</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>24</th>\n",
+       "      <td>24</td>\n",
+       "      <td>105821</td>\n",
+       "      <td>106759</td>\n",
+       "      <td>0.232923</td>\n",
+       "      <td>2</td>\n",
+       "      <td>837.42</td>\n",
+       "      <td>0</td>\n",
+       "      <td>836.92</td>\n",
+       "      <td>837.92</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>25</th>\n",
+       "      <td>25</td>\n",
+       "      <td>106759</td>\n",
+       "      <td>107548</td>\n",
+       "      <td>0.244745</td>\n",
+       "      <td>2</td>\n",
+       "      <td>674.64</td>\n",
+       "      <td>0</td>\n",
+       "      <td>674.14</td>\n",
+       "      <td>675.14</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>26</th>\n",
+       "      <td>26</td>\n",
+       "      <td>107548</td>\n",
+       "      <td>108235</td>\n",
+       "      <td>0.259172</td>\n",
+       "      <td>2</td>\n",
+       "      <td>643.74</td>\n",
+       "      <td>0</td>\n",
+       "      <td>643.24</td>\n",
+       "      <td>644.24</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>27</th>\n",
+       "      <td>27</td>\n",
+       "      <td>108235</td>\n",
+       "      <td>109100</td>\n",
+       "      <td>0.272663</td>\n",
+       "      <td>2</td>\n",
+       "      <td>725.36</td>\n",
+       "      <td>0</td>\n",
+       "      <td>724.86</td>\n",
+       "      <td>725.86</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>28</th>\n",
+       "      <td>28</td>\n",
+       "      <td>109100</td>\n",
+       "      <td>119764</td>\n",
+       "      <td>0.285483</td>\n",
+       "      <td>1</td>\n",
+       "      <td>-1.00</td>\n",
+       "      <td>0</td>\n",
+       "      <td>-1.00</td>\n",
+       "      <td>-1.00</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>29</th>\n",
+       "      <td>29</td>\n",
+       "      <td>119764</td>\n",
+       "      <td>133663</td>\n",
+       "      <td>0.288898</td>\n",
+       "      <td>1</td>\n",
+       "      <td>-1.00</td>\n",
+       "      <td>0</td>\n",
+       "      <td>-1.00</td>\n",
+       "      <td>-1.00</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>30</th>\n",
+       "      <td>30</td>\n",
+       "      <td>133663</td>\n",
+       "      <td>134601</td>\n",
+       "      <td>0.303703</td>\n",
+       "      <td>2</td>\n",
+       "      <td>837.39</td>\n",
+       "      <td>0</td>\n",
+       "      <td>836.89</td>\n",
+       "      <td>837.89</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>31</th>\n",
+       "      <td>31</td>\n",
+       "      <td>134601</td>\n",
+       "      <td>135254</td>\n",
+       "      <td>0.315650</td>\n",
+       "      <td>2</td>\n",
+       "      <td>643.80</td>\n",
+       "      <td>0</td>\n",
+       "      <td>643.30</td>\n",
+       "      <td>644.30</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>32</th>\n",
+       "      <td>32</td>\n",
+       "      <td>135254</td>\n",
+       "      <td>135956</td>\n",
+       "      <td>0.328527</td>\n",
+       "      <td>2</td>\n",
+       "      <td>558.75</td>\n",
+       "      <td>0</td>\n",
+       "      <td>558.25</td>\n",
+       "      <td>559.25</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>33</th>\n",
+       "      <td>33</td>\n",
+       "      <td>135956</td>\n",
+       "      <td>136543</td>\n",
+       "      <td>0.342915</td>\n",
+       "      <td>2</td>\n",
+       "      <td>882.45</td>\n",
+       "      <td>0</td>\n",
+       "      <td>881.95</td>\n",
+       "      <td>882.95</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>34</th>\n",
+       "      <td>34</td>\n",
+       "      <td>136543</td>\n",
+       "      <td>145312</td>\n",
+       "      <td>0.358558</td>\n",
+       "      <td>1</td>\n",
+       "      <td>-1.00</td>\n",
+       "      <td>0</td>\n",
+       "      <td>-1.00</td>\n",
+       "      <td>-1.00</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>35</th>\n",
+       "      <td>35</td>\n",
+       "      <td>145312</td>\n",
+       "      <td>156612</td>\n",
+       "      <td>0.361428</td>\n",
+       "      <td>1</td>\n",
+       "      <td>-1.00</td>\n",
+       "      <td>0</td>\n",
+       "      <td>-1.00</td>\n",
+       "      <td>-1.00</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>36</th>\n",
+       "      <td>36</td>\n",
+       "      <td>156612</td>\n",
+       "      <td>157184</td>\n",
+       "      <td>0.364755</td>\n",
+       "      <td>2</td>\n",
+       "      <td>810.73</td>\n",
+       "      <td>0</td>\n",
+       "      <td>810.23</td>\n",
+       "      <td>811.23</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>37</th>\n",
+       "      <td>37</td>\n",
+       "      <td>157184</td>\n",
+       "      <td>158248</td>\n",
+       "      <td>0.376578</td>\n",
+       "      <td>2</td>\n",
+       "      <td>837.35</td>\n",
+       "      <td>0</td>\n",
+       "      <td>836.85</td>\n",
+       "      <td>837.85</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>38</th>\n",
+       "      <td>38</td>\n",
+       "      <td>158248</td>\n",
+       "      <td>158901</td>\n",
+       "      <td>0.388673</td>\n",
+       "      <td>2</td>\n",
+       "      <td>643.73</td>\n",
+       "      <td>0</td>\n",
+       "      <td>643.23</td>\n",
+       "      <td>644.23</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>39</th>\n",
+       "      <td>39</td>\n",
+       "      <td>158901</td>\n",
+       "      <td>159776</td>\n",
+       "      <td>0.401962</td>\n",
+       "      <td>2</td>\n",
+       "      <td>725.68</td>\n",
+       "      <td>0</td>\n",
+       "      <td>725.18</td>\n",
+       "      <td>726.18</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>40</th>\n",
+       "      <td>40</td>\n",
+       "      <td>159776</td>\n",
+       "      <td>160530</td>\n",
+       "      <td>0.415132</td>\n",
+       "      <td>2</td>\n",
+       "      <td>674.70</td>\n",
+       "      <td>0</td>\n",
+       "      <td>674.20</td>\n",
+       "      <td>675.20</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>41</th>\n",
+       "      <td>41</td>\n",
+       "      <td>160530</td>\n",
+       "      <td>178264</td>\n",
+       "      <td>0.428483</td>\n",
+       "      <td>1</td>\n",
+       "      <td>-1.00</td>\n",
+       "      <td>0</td>\n",
+       "      <td>-1.00</td>\n",
+       "      <td>-1.00</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>42</th>\n",
+       "      <td>42</td>\n",
+       "      <td>178264</td>\n",
+       "      <td>193614</td>\n",
+       "      <td>0.433222</td>\n",
+       "      <td>1</td>\n",
+       "      <td>-1.00</td>\n",
+       "      <td>0</td>\n",
+       "      <td>-1.00</td>\n",
+       "      <td>-1.00</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>43</th>\n",
+       "      <td>43</td>\n",
+       "      <td>193614</td>\n",
+       "      <td>194225</td>\n",
+       "      <td>0.436567</td>\n",
+       "      <td>2</td>\n",
+       "      <td>810.82</td>\n",
+       "      <td>0</td>\n",
+       "      <td>810.32</td>\n",
+       "      <td>811.32</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>44</th>\n",
+       "      <td>44</td>\n",
+       "      <td>194225</td>\n",
+       "      <td>195235</td>\n",
+       "      <td>0.448320</td>\n",
+       "      <td>2</td>\n",
+       "      <td>837.78</td>\n",
+       "      <td>0</td>\n",
+       "      <td>837.28</td>\n",
+       "      <td>838.28</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>45</th>\n",
+       "      <td>45</td>\n",
+       "      <td>195235</td>\n",
+       "      <td>195948</td>\n",
+       "      <td>0.460565</td>\n",
+       "      <td>2</td>\n",
+       "      <td>674.84</td>\n",
+       "      <td>0</td>\n",
+       "      <td>674.34</td>\n",
+       "      <td>675.34</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>46</th>\n",
+       "      <td>46</td>\n",
+       "      <td>195948</td>\n",
+       "      <td>196607</td>\n",
+       "      <td>0.473103</td>\n",
+       "      <td>2</td>\n",
+       "      <td>558.90</td>\n",
+       "      <td>0</td>\n",
+       "      <td>558.40</td>\n",
+       "      <td>559.40</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>47</th>\n",
+       "      <td>47</td>\n",
+       "      <td>196607</td>\n",
+       "      <td>197243</td>\n",
+       "      <td>0.487237</td>\n",
+       "      <td>2</td>\n",
+       "      <td>882.54</td>\n",
+       "      <td>0</td>\n",
+       "      <td>882.04</td>\n",
+       "      <td>883.04</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "    spec_idx  peak_start_idx  peak_stop_idx        rt  ms_level  precursor_mz  \\\n",
+       "0          0               0          10739  0.004935         1         -1.00   \n",
+       "1          1           10739          25554  0.007897         1         -1.00   \n",
+       "2          2           25554          26039  0.011218         2        810.79   \n",
+       "3          3           26039          27045  0.022838         2        837.34   \n",
+       "4          4           27045          27882  0.034925         2        725.36   \n",
+       "5          5           27882          28532  0.048620         2        558.87   \n",
+       "6          6           28532          29294  0.061923         2        812.33   \n",
+       "7          7           29294          37374  0.075015         1         -1.00   \n",
+       "8          8           37374          54285  0.077788         1         -1.00   \n",
+       "9          9           54285          54837  0.081203         2        810.75   \n",
+       "10        10           54837          55778  0.092903         2        837.96   \n",
+       "11        11           55778          56413  0.104803         2        644.06   \n",
+       "12        12           56413          57205  0.117215         2        725.23   \n",
+       "13        13           57205          57874  0.130022         2        559.19   \n",
+       "14        14           57874          66994  0.143452         1         -1.00   \n",
+       "15        15           66994          81922  0.146408         1         -1.00   \n",
+       "16        16           81922          82501  0.149755         2        811.41   \n",
+       "17        17           82501          83417  0.161442         2        837.36   \n",
+       "18        18           83417          84087  0.173370         2        643.80   \n",
+       "19        19           84087          84761  0.186658         2        558.94   \n",
+       "20        20           84761          85652  0.200695         2        725.14   \n",
+       "21        21           85652          94665  0.213673         1         -1.00   \n",
+       "22        22           94665         105242  0.216747         1         -1.00   \n",
+       "23        23          105242         105821  0.220073         2        810.84   \n",
+       "24        24          105821         106759  0.232923         2        837.42   \n",
+       "25        25          106759         107548  0.244745         2        674.64   \n",
+       "26        26          107548         108235  0.259172         2        643.74   \n",
+       "27        27          108235         109100  0.272663         2        725.36   \n",
+       "28        28          109100         119764  0.285483         1         -1.00   \n",
+       "29        29          119764         133663  0.288898         1         -1.00   \n",
+       "30        30          133663         134601  0.303703         2        837.39   \n",
+       "31        31          134601         135254  0.315650         2        643.80   \n",
+       "32        32          135254         135956  0.328527         2        558.75   \n",
+       "33        33          135956         136543  0.342915         2        882.45   \n",
+       "34        34          136543         145312  0.358558         1         -1.00   \n",
+       "35        35          145312         156612  0.361428         1         -1.00   \n",
+       "36        36          156612         157184  0.364755         2        810.73   \n",
+       "37        37          157184         158248  0.376578         2        837.35   \n",
+       "38        38          158248         158901  0.388673         2        643.73   \n",
+       "39        39          158901         159776  0.401962         2        725.68   \n",
+       "40        40          159776         160530  0.415132         2        674.70   \n",
+       "41        41          160530         178264  0.428483         1         -1.00   \n",
+       "42        42          178264         193614  0.433222         1         -1.00   \n",
+       "43        43          193614         194225  0.436567         2        810.82   \n",
+       "44        44          194225         195235  0.448320         2        837.78   \n",
+       "45        45          195235         195948  0.460565         2        674.84   \n",
+       "46        46          195948         196607  0.473103         2        558.90   \n",
+       "47        47          196607         197243  0.487237         2        882.54   \n",
+       "\n",
+       "    charge  isolation_lower_mz  isolation_upper_mz  \n",
+       "0        0               -1.00               -1.00  \n",
+       "1        0               -1.00               -1.00  \n",
+       "2        0              810.29              811.29  \n",
+       "3        0              836.84              837.84  \n",
+       "4        0              724.86              725.86  \n",
+       "5        0              558.37              559.37  \n",
+       "6        0              811.83              812.83  \n",
+       "7        0               -1.00               -1.00  \n",
+       "8        0               -1.00               -1.00  \n",
+       "9        0              810.25              811.25  \n",
+       "10       0              837.46              838.46  \n",
+       "11       0              643.56              644.56  \n",
+       "12       0              724.73              725.73  \n",
+       "13       0              558.69              559.69  \n",
+       "14       0               -1.00               -1.00  \n",
+       "15       0               -1.00               -1.00  \n",
+       "16       0              810.91              811.91  \n",
+       "17       0              836.86              837.86  \n",
+       "18       0              643.30              644.30  \n",
+       "19       0              558.44              559.44  \n",
+       "20       0              724.64              725.64  \n",
+       "21       0               -1.00               -1.00  \n",
+       "22       0               -1.00               -1.00  \n",
+       "23       0              810.34              811.34  \n",
+       "24       0              836.92              837.92  \n",
+       "25       0              674.14              675.14  \n",
+       "26       0              643.24              644.24  \n",
+       "27       0              724.86              725.86  \n",
+       "28       0               -1.00               -1.00  \n",
+       "29       0               -1.00               -1.00  \n",
+       "30       0              836.89              837.89  \n",
+       "31       0              643.30              644.30  \n",
+       "32       0              558.25              559.25  \n",
+       "33       0              881.95              882.95  \n",
+       "34       0               -1.00               -1.00  \n",
+       "35       0               -1.00               -1.00  \n",
+       "36       0              810.23              811.23  \n",
+       "37       0              836.85              837.85  \n",
+       "38       0              643.23              644.23  \n",
+       "39       0              725.18              726.18  \n",
+       "40       0              674.20              675.20  \n",
+       "41       0               -1.00               -1.00  \n",
+       "42       0               -1.00               -1.00  \n",
+       "43       0              810.32              811.32  \n",
+       "44       0              837.28              838.28  \n",
+       "45       0              674.34              675.34  \n",
+       "46       0              558.40              559.40  \n",
+       "47       0              882.04              883.04  "
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from alpharaw.mzml import MzMLReader\n",
+    "\n",
+    "reader = MzMLReader()\n",
+    "reader.import_raw(mzml)\n",
+    "reader.spectrum_df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>mz</th>\n",
+       "      <th>intensity</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>204.759583</td>\n",
+       "      <td>1422.173584</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>204.759842</td>\n",
+       "      <td>3215.493164</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>204.760101</td>\n",
+       "      <td>3887.355957</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>204.760345</td>\n",
+       "      <td>2843.165527</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>204.760605</td>\n",
+       "      <td>582.906738</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>197238</th>\n",
+       "      <td>1547.776733</td>\n",
+       "      <td>1.261027</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>197239</th>\n",
+       "      <td>1723.519287</td>\n",
+       "      <td>1.640921</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>197240</th>\n",
+       "      <td>1724.323975</td>\n",
+       "      <td>1.251971</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>197241</th>\n",
+       "      <td>1724.991333</td>\n",
+       "      <td>5.156138</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>197242</th>\n",
+       "      <td>1743.361206</td>\n",
+       "      <td>3.192361</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>197243 rows × 2 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                 mz    intensity\n",
+       "0        204.759583  1422.173584\n",
+       "1        204.759842  3215.493164\n",
+       "2        204.760101  3887.355957\n",
+       "3        204.760345  2843.165527\n",
+       "4        204.760605   582.906738\n",
+       "...             ...          ...\n",
+       "197238  1547.776733     1.261027\n",
+       "197239  1723.519287     1.640921\n",
+       "197240  1724.323975     1.251971\n",
+       "197241  1724.991333     5.156138\n",
+       "197242  1743.361206     3.192361\n",
+       "\n",
+       "[197243 rows x 2 columns]"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "reader.peak_df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "base",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.12"
+  },
+  "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/nbs/test_tims_match.ipynb b/nbs/test_tims_match.ipynb
index ee03bf0..1a3ccac 100644
--- a/nbs/test_tims_match.ipynb
+++ b/nbs/test_tims_match.ipynb
@@ -952,7 +952,7 @@
    ],
    "source": [
     "tims_match.tolerance = 50\n",
-    "tims_match.rt_sec_win_to_slice_ms2 = 120\n",
+    "tims_match.rt_sec_tol_to_slice_ms2 = 60\n",
     "tims_psm_df, frag_mz_df, frag_int_df, frag_tol_df = tims_match.match_ms2_one_raw(tims_psm_df, True)\n",
     "tims_psm_df['tims_match'] = 0\n",
     "for i, (start,stop) in enumerate(tims_psm_df[['frag_start_idx','frag_stop_idx']].values):\n",

From 527ebafc147515313a318e01eeefa11e4861f048 Mon Sep 17 00:00:00 2001
From: jalew188 <jalew188@gmail.com>
Date: Fri, 18 Aug 2023 13:14:23 +0200
Subject: [PATCH 8/9] updates from multiple OSs

---
 alpharaw/legacy_msdata/mgf.py             |  60 ++++++----
 alpharaw/ms_data_base.py                  |  87 +++++++++-----
 alpharaw/mzml.py                          |  32 -----
 alpharaw/sciex.py                         |  27 -----
 alpharaw/thermo.py                        |  31 -----
 alpharaw/wrappers/alphapept_wrapper.py    |  26 ++---
 nbdev_nbs/legacy_msdata/mgf.ipynb         | 136 +++++++++++-----------
 nbdev_nbs/match/psm_match.ipynb           |  24 +---
 nbdev_nbs/match/psm_match_alphatims.ipynb |  16 ++-
 nbdev_nbs/test_mzml.ipynb                 |  25 +---
 10 files changed, 201 insertions(+), 263 deletions(-)

diff --git a/alpharaw/legacy_msdata/mgf.py b/alpharaw/legacy_msdata/mgf.py
index 732a5b5..ef4dd48 100644
--- a/alpharaw/legacy_msdata/mgf.py
+++ b/alpharaw/legacy_msdata/mgf.py
@@ -34,13 +34,16 @@ def _import(self, _path:str):
             f = open(_path)
         else:
             f = _path
-        scanset = set()
+        scan_mz_dict = {}
+        scan_charge_dict = {}
         masses_list = []
         intens_list = []
         spec_idx_list = []
+        scan_list = []
         rt_list = []
         precursor_mz_list = []
         charge_list = []
+        self._has_chimeras = False
         while True:
             line = f.readline()
             if not line: break
@@ -64,12 +67,18 @@ def _import(self, _path:str):
                     elif line.startswith('PEPMASS='):
                         precursor_mz = float(line.split('=')[1])
                     elif line.startswith('CHARGE='):
-                        charge = float(line.split('=')[1].strip()[:-1])
+                        charge = int(line.split('=')[1].strip()[:-1])
                 if not scan:
                     title = find_line(lines, 'TITLE=')
                     scan = parse_scan_from_TITLE(title)
-                if scan in scanset: continue
-                scanset.add(scan)
+                if scan in scan_mz_dict: 
+                    scan_mz_dict[scan].append(precursor_mz)
+                    scan_charge_dict[scan].append(charge)
+                    self._has_chimeras = True
+                    continue
+                scan_mz_dict[scan] = [precursor_mz]
+                scan_charge_dict[scan] = [charge]
+                scan_list.append(scan)
                 spec_idx_list.append(scan-1)
                 rt_list.append(RT)
                 precursor_mz_list.append(precursor_mz)
@@ -79,7 +88,9 @@ def _import(self, _path:str):
         if isinstance(_path, str): 
             f.close()
 
-        precursor_mz_list = np.array(precursor_mz_list)
+        if self._has_chimeras:
+            precursor_mz_list = [scan_mz_dict[scan] for scan in scan_list]
+            charge_list = [scan_charge_dict[scan] for scan in scan_list]
 
         return {
             'peak_indices': index_ragged_list(masses_list),
@@ -87,10 +98,9 @@ def _import(self, _path:str):
             'peak_intensity': np.concatenate(intens_list),
             'rt': np.array(rt_list),
             'precursor_mz': precursor_mz_list,
-            'isolation_mz_lower': precursor_mz_list-2,
-            'isolation_mz_upper': precursor_mz_list+2,
             'spec_idx': np.array(spec_idx_list, dtype=np.int64),
-            'precursor_charge': np.array(charge_list, dtype=np.int8),
+            'scan': np.array(scan_list, dtype=np.int64),
+            'precursor_charge': charge_list,
         }
 
     def _set_dataframes(self, raw_data:dict):
@@ -103,28 +113,28 @@ def _set_dataframes(self, raw_data:dict):
         end_idxes[spec_idxes] = raw_data['peak_indices'][1:]
         rt_values = np.zeros(spec_num)
         rt_values[spec_idxes] = raw_data['rt']
-        precursor_mzs = np.zeros(spec_num)
-        precursor_mzs[spec_idxes] = raw_data['precursor_mz']
-        mz_lowers = np.zeros(spec_num)
-        mz_lowers[spec_idxes] = raw_data['isolation_mz_lower']
-        mz_uppers = np.zeros(spec_num)
-        mz_uppers[spec_idxes] = raw_data['isolation_mz_upper']
-        charges = np.zeros(spec_num, np.int8)
-        charges[spec_idxes] = raw_data['precursor_charge']
+        if self._has_chimeras:
+            precursor_mzs = [[]]*spec_num
+            charges = [[]]*spec_num
+            mz_vals = raw_data['precursor_mz']
+            ch_vals = raw_data["precursor_charge"]
+            for i,idx in enumerate(spec_idxes):
+                precursor_mzs[idx] = mz_vals[i]
+                charges[idx] = ch_vals[i]
+        else:
+            precursor_mzs = np.zeros(spec_num)
+            precursor_mzs[spec_idxes] = raw_data['precursor_mz']
+            charges = np.zeros(spec_num, np.int8)
+            charges[spec_idxes] = raw_data['precursor_charge']
+
+        self.spectrum_df["charge"] = charges
+        self.spectrum_df["precursor_mz"] = precursor_mzs
 
         self.set_peak_df_by_indexed_array(
             raw_data['peak_mz'],
             raw_data['peak_intensity'],
             start_idxes,end_idxes
         )
-        self.add_column_in_spec_df('rt', rt_values)
-        self.add_column_in_spec_df('charge', charges)
+        self.add_column_in_spec_df_by_spec_idxes('rt', raw_data['rt'], spec_idxes, na_value=0)
         self.spectrum_df['ms_level'] = 2
-        self.set_precursor_mz(
-            precursor_mzs
-        )
-        self.set_isolation_mz_windows(
-            mz_lowers,mz_uppers
-        )
-
 ms_reader_provider.register_reader('mgf', MGFReader)
diff --git a/alpharaw/ms_data_base.py b/alpharaw/ms_data_base.py
index 132270b..50c98de 100644
--- a/alpharaw/ms_data_base.py
+++ b/alpharaw/ms_data_base.py
@@ -134,10 +134,45 @@ def _import(self, _path):
             f"{self.__class__} must implement `_import()`"
         )
     
-    def _set_dataframes(self, raw_data):
-        raise NotImplementedError(
-            f"{self.__class__} must implement `_set_dataframes()`"
+    def _set_dataframes(self, raw_data:dict):
+        self.create_spectrum_df(len(raw_data['rt']))
+        self.set_peak_df_by_indexed_array(
+            raw_data['peak_mz'],
+            raw_data['peak_intensity'],
+            raw_data['peak_indices'][:-1],
+            raw_data['peak_indices'][1:],
+        )
+        self.spectrum_df["rt"] = raw_data['rt']
+        self.spectrum_df["ms_level"] = np.array(
+            raw_data['ms_level'], dtype=np.int8
+        )
+        self.spectrum_df["precursor_mz"] = np.array(
+            raw_data['precursor_mz'], dtype=np.float64
+        )
+        
+        self.spectrum_df["charge"] = np.array(
+            raw_data['precursor_charge'],
+            dtype=np.int8
+        )
+        self.spectrum_df["isolation_lower_mz"] = np.array(
+            raw_data['isolation_mz_lower'], dtype=np.float64
+        )
+        self.spectrum_df["isolation_mz_upper"] = np.array(
+            raw_data['isolation_mz_upper'], dtype=np.float64
         )
+        if "nce" in raw_data:
+            self.spectrum_df["nce"] = np.array(
+                raw_data["nce"],
+                dtype=np.float32,
+            )
+        if "fragmentation" in raw_data:
+            self.spectrum_df["fragmentation"] = np.array(
+                raw_data["fragmentation"]
+            )
+        if "detector" in raw_data:
+            self.spectrum_df["detector"] = np.array(
+                raw_data["detector"]
+            )
 
     def _read_creation_time(self, raw_data):
         pass
@@ -202,26 +237,22 @@ def set_peak_df_by_array_list(self,
             indices[1:]
         )
 
-    def add_column_in_spec_df(self, 
+    def add_column_in_spec_df_by_spec_idxes(self, 
         column_name:str, 
         values:np.ndarray, 
-        spec_idxes:np.ndarray = None,
-        dtype:np.dtype=np.float64, na_value=np.nan,
+        spec_idxes:np.ndarray,
+        dtype:np.dtype=np.float64, 
+        na_value=np.nan,
     ):
-        if spec_idxes is None:
-            self.spectrum_df[
-                column_name
-            ] = np.array(values, dtype=dtype)
-        else:
-            self.spectrum_df.loc[
-                spec_idxes, column_name
-            ] = values
-            self.spectrum_df[column_name].fillna(
-                na_value, inplace=True
-            )
-            self.spectrum_df[
-                column_name
-            ] = self.spectrum_df[column_name].astype(dtype)
+        self.spectrum_df.loc[
+            spec_idxes, column_name
+        ] = values
+        self.spectrum_df[column_name].fillna(
+            na_value, inplace=True
+        )
+        self.spectrum_df[
+            column_name
+        ] = self.spectrum_df[column_name].astype(dtype)
 
     def add_column_in_df_by_scan_num(self, 
         column_name:str, 
@@ -233,7 +264,7 @@ def add_column_in_df_by_scan_num(self,
         """
         scan num starts from 1 not 0
         """
-        self.add_column_in_spec_df(
+        self.add_column_in_spec_df_by_spec_idxes(
             column_name, values,
             scan_nums-1, 
             dtype, na_value
@@ -248,27 +279,27 @@ def get_peaks(self, spec_idx):
             self.peak_df.intensity.values[start:end],
         )
 
-    def set_precursor_mz(self, 
+    def set_precursor_mz_by_spec_idxes(self, 
         precursor_mz_values:np.ndarray,
-        spec_idxes:np.ndarray=None, 
+        spec_idxes:np.ndarray, 
     ):
-        self.add_column_in_spec_df(
+        self.add_column_in_spec_df_by_spec_idxes(
             'precursor_mz', 
             precursor_mz_values, 
             spec_idxes, np.float64, -1.0
         )
     
-    def set_isolation_mz_windows(self,
+    def set_isolation_mz_windows_by_spec_idxes(self,
         precursor_lower_mz_values:np.ndarray,
         precursor_upper_mz_values:np.ndarray,
-        spec_idxes:np.ndarray=None, 
+        spec_idxes:np.ndarray, 
     ):
-        self.add_column_in_spec_df(
+        self.add_column_in_spec_df_by_spec_idxes(
             'isolation_lower_mz',
             precursor_lower_mz_values, 
             spec_idxes, np.float64, -1.0
         )
-        self.add_column_in_spec_df(
+        self.add_column_in_spec_df_by_spec_idxes(
             'isolation_upper_mz',
             precursor_upper_mz_values, 
             spec_idxes, np.float64, -1.0
diff --git a/alpharaw/mzml.py b/alpharaw/mzml.py
index 97e2d49..d258ca5 100644
--- a/alpharaw/mzml.py
+++ b/alpharaw/mzml.py
@@ -83,38 +83,6 @@ def _import(self,
         ret_dict["nce"] = nce_list
         return ret_dict
 
-    def _set_dataframes(self, raw_data:dict):
-        self.create_spectrum_df(len(raw_data['rt']))
-        self.set_peak_df_by_indexed_array(
-            raw_data['peak_mz'],
-            raw_data['peak_intensity'],
-            raw_data['peak_indices'][:-1],
-            raw_data['peak_indices'][1:],
-        )
-        self.add_column_in_spec_df(
-            'rt', raw_data['rt']
-        )
-        self.add_column_in_spec_df(
-            'ms_level', raw_data['ms_level'],
-            dtype=np.int8
-        )
-        self.set_precursor_mz(
-            raw_data['precursor_mz']
-        )
-        self.add_column_in_spec_df(
-            'charge', raw_data['precursor_charge'],
-            dtype=np.int8
-        )
-        self.set_isolation_mz_windows(
-            raw_data['isolation_mz_lower'],
-            raw_data['isolation_mz_upper'],
-        )
-        if "nce" in raw_data:
-            self.add_column_in_spec_df(
-                "nce", raw_data["nce"],
-                dtype=np.float32,
-            )
-
 def parse_mzml_entry(item_dict: dict) -> tuple:
     rt = float(item_dict.get('scanList').get('scan')[0].get('scan start time'))
     masses = item_dict.get('m/z array')
diff --git a/alpharaw/sciex.py b/alpharaw/sciex.py
index bdc80a8..096a199 100644
--- a/alpharaw/sciex.py
+++ b/alpharaw/sciex.py
@@ -43,33 +43,6 @@ def _import(self,
         self.creation_time = wiff_reader.wiffSample.Details.AcquisitionDateTime.ToString("O")
         wiff_reader.close()
         return data_dict
-    
-    def _set_dataframes(self, raw_data:dict):
-        self.create_spectrum_df(len(raw_data['rt']))
-        self.set_peak_df_by_indexed_array(
-            raw_data['peak_mz'],
-            raw_data['peak_intensity'],
-            raw_data['peak_indices'][:-1],
-            raw_data['peak_indices'][1:],
-        )
-        self.add_column_in_spec_df(
-            'rt', raw_data['rt']
-        )
-        self.add_column_in_spec_df(
-            'ms_level', raw_data['ms_level'],
-            dtype=np.int8
-        )
-        self.set_precursor_mz(
-            raw_data['precursor_mz']
-        )
-        self.add_column_in_spec_df(
-            'charge', raw_data['precursor_charge'],
-            dtype=np.int8
-        )
-        self.set_isolation_mz_windows(
-            raw_data['isolation_mz_lower'],
-            raw_data['isolation_mz_upper'],
-        )
 
 ms_reader_provider.register_reader('sciex', SciexWiffData)
 ms_reader_provider.register_reader('sciex_wiff', SciexWiffData)
diff --git a/alpharaw/thermo.py b/alpharaw/thermo.py
index 6206dcf..f8f774d 100644
--- a/alpharaw/thermo.py
+++ b/alpharaw/thermo.py
@@ -98,36 +98,5 @@ def _import(self,
             'nce': np.array(ce_list, dtype=np.float32),
         }
 
-    def _set_dataframes(self, raw_data:dict):
-        self.create_spectrum_df(len(raw_data['rt']))
-        self.set_peak_df_by_indexed_array(
-            raw_data['peak_mz'],
-            raw_data['peak_intensity'],
-            raw_data['peak_indices'][:-1],
-            raw_data['peak_indices'][1:],
-        )
-        self.add_column_in_spec_df(
-            'rt', raw_data['rt']
-        )
-        self.add_column_in_spec_df(
-            'ms_level', raw_data['ms_level'],
-            dtype=np.int8
-        )
-        self.set_precursor_mz(
-            raw_data['precursor_mz']
-        )
-        self.add_column_in_spec_df(
-            'charge', raw_data['precursor_charge'],
-            dtype=np.int8
-        )
-        self.set_isolation_mz_windows(
-            raw_data['isolation_mz_lower'],
-            raw_data['isolation_mz_upper'],
-        )
-        self.add_column_in_spec_df(
-            "nce", raw_data["nce"],
-            dtype=np.float32,
-        )
-
 ms_reader_provider.register_reader('thermo', ThermoRawData)
 ms_reader_provider.register_reader('thermo_raw', ThermoRawData)
diff --git a/alpharaw/wrappers/alphapept_wrapper.py b/alpharaw/wrappers/alphapept_wrapper.py
index 76f6254..80c18d2 100644
--- a/alpharaw/wrappers/alphapept_wrapper.py
+++ b/alpharaw/wrappers/alphapept_wrapper.py
@@ -85,8 +85,6 @@ def _set_dataframes(self, _path):
         start_idxes[spec_idxes] = peak_indices[:-1]
         end_idxes = np.full(spec_num, -1, dtype=np.int64)
         end_idxes[spec_idxes] = peak_indices[1:]
-        rt_values = np.zeros(spec_num)
-        rt_values[spec_idxes] = hdf.Raw.MS2_scans.rt_list_ms2.values
 
         self.set_peak_df_by_indexed_array(
             hdf.Raw.MS2_scans.mass_list_ms2.values,
@@ -94,25 +92,27 @@ def _set_dataframes(self, _path):
             start_idxes, end_idxes
         )
 
-        self.add_column_in_spec_df(
-            'rt', rt_values
+        self.add_column_in_spec_df_by_spec_idxes(
+            "rt", hdf.Raw.MS2_scans.rt_list_ms2.values, 
+            spec_idxes, dtype=np.float64, na_value=0
         )
         self.spectrum_df['ms_level'] = 2
 
         if hasattr(hdf.Raw.MS2_scans, 'mobility2'):
-            self.add_column_in_spec_df(
-                'mobility', hdf.Raw.MS2_scans.mobility2.values
+            self.add_column_in_spec_df_by_spec_idxes(
+                'mobility', 
+                hdf.Raw.MS2_scans.mobility2.values,
+                spec_idxes,
             )
 
         if hasattr(hdf.Raw.MS2_scans, 'mono_mzs2'):
-            precursor_mzs = np.zeros(spec_num)
-            precursor_mzs[spec_idxes] = hdf.Raw.MS2_scans.mono_mzs2.values
-            self.set_precursor_mz(
-                precursor_mzs
-            )
-            self.set_precursor_mz_windows(
-                precursor_mzs-1.5, precursor_mzs+1.5
+            self.add_column_in_spec_df_by_spec_idxes(
+                "precursor_mz", hdf.Raw.MS2_scans.mono_mzs2.values,
+                spec_idxes
             )
+            # self.set_isolation_mz_windows(
+            #     precursor_mzs-1.5, precursor_mzs+1.5
+            # )
 
 ms_reader_provider.register_reader('alphapept', AlphaPept_HDF_MS2_Reader)
 ms_reader_provider.register_reader('alphapept_hdf', AlphaPept_HDF_MS2_Reader)
diff --git a/nbdev_nbs/legacy_msdata/mgf.ipynb b/nbdev_nbs/legacy_msdata/mgf.ipynb
index ff40da5..adf706d 100644
--- a/nbdev_nbs/legacy_msdata/mgf.ipynb
+++ b/nbdev_nbs/legacy_msdata/mgf.ipynb
@@ -62,161 +62,151 @@
        "    <tr style=\"text-align: right;\">\n",
        "      <th></th>\n",
        "      <th>spec_idx</th>\n",
+       "      <th>charge</th>\n",
+       "      <th>precursor_mz</th>\n",
        "      <th>peak_start_idx</th>\n",
        "      <th>peak_stop_idx</th>\n",
        "      <th>rt</th>\n",
-       "      <th>charge</th>\n",
        "      <th>ms_level</th>\n",
-       "      <th>precursor_mz</th>\n",
        "      <th>isolation_lower_mz</th>\n",
        "      <th>isolation_upper_mz</th>\n",
-       "      <th>rt_sec</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
        "      <td>0</td>\n",
+       "      <td>[3]</td>\n",
+       "      <td>[272.276336]</td>\n",
        "      <td>0</td>\n",
        "      <td>62</td>\n",
        "      <td>0.016667</td>\n",
-       "      <td>3.0</td>\n",
        "      <td>2</td>\n",
-       "      <td>272.276336</td>\n",
-       "      <td>270.276336</td>\n",
-       "      <td>274.276336</td>\n",
-       "      <td>1.0</td>\n",
+       "      <td>-1.0</td>\n",
+       "      <td>-1.0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
        "      <td>1</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>[]</td>\n",
        "      <td>-1</td>\n",
        "      <td>-1</td>\n",
        "      <td>0.000000</td>\n",
-       "      <td>0.0</td>\n",
        "      <td>2</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.0</td>\n",
+       "      <td>-1.0</td>\n",
+       "      <td>-1.0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
        "      <td>2</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>[]</td>\n",
        "      <td>-1</td>\n",
        "      <td>-1</td>\n",
        "      <td>0.000000</td>\n",
-       "      <td>0.0</td>\n",
        "      <td>2</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.0</td>\n",
+       "      <td>-1.0</td>\n",
+       "      <td>-1.0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
        "      <td>3</td>\n",
+       "      <td>[2]</td>\n",
+       "      <td>[287.427959]</td>\n",
        "      <td>62</td>\n",
        "      <td>119</td>\n",
        "      <td>0.033333</td>\n",
-       "      <td>2.0</td>\n",
        "      <td>2</td>\n",
-       "      <td>287.427959</td>\n",
-       "      <td>285.427959</td>\n",
-       "      <td>289.427959</td>\n",
-       "      <td>2.0</td>\n",
+       "      <td>-1.0</td>\n",
+       "      <td>-1.0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
        "      <td>4</td>\n",
+       "      <td>[2]</td>\n",
+       "      <td>[287.427959]</td>\n",
        "      <td>119</td>\n",
        "      <td>121</td>\n",
        "      <td>0.050000</td>\n",
-       "      <td>2.0</td>\n",
        "      <td>2</td>\n",
-       "      <td>287.427959</td>\n",
-       "      <td>285.427959</td>\n",
-       "      <td>289.427959</td>\n",
-       "      <td>3.0</td>\n",
+       "      <td>-1.0</td>\n",
+       "      <td>-1.0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>5</th>\n",
        "      <td>5</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>[]</td>\n",
        "      <td>-1</td>\n",
        "      <td>-1</td>\n",
        "      <td>0.000000</td>\n",
-       "      <td>0.0</td>\n",
        "      <td>2</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.0</td>\n",
+       "      <td>-1.0</td>\n",
+       "      <td>-1.0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>6</th>\n",
        "      <td>6</td>\n",
+       "      <td>[2, 2]</td>\n",
+       "      <td>[287.427959, 288.427959]</td>\n",
        "      <td>121</td>\n",
        "      <td>124</td>\n",
        "      <td>0.066667</td>\n",
-       "      <td>2.0</td>\n",
        "      <td>2</td>\n",
-       "      <td>287.427959</td>\n",
-       "      <td>285.427959</td>\n",
-       "      <td>289.427959</td>\n",
-       "      <td>4.0</td>\n",
+       "      <td>-1.0</td>\n",
+       "      <td>-1.0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>7</th>\n",
        "      <td>7</td>\n",
+       "      <td>[]</td>\n",
+       "      <td>[]</td>\n",
        "      <td>-1</td>\n",
        "      <td>-1</td>\n",
        "      <td>0.000000</td>\n",
-       "      <td>0.0</td>\n",
        "      <td>2</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.0</td>\n",
+       "      <td>-1.0</td>\n",
+       "      <td>-1.0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>8</th>\n",
        "      <td>8</td>\n",
+       "      <td>[2]</td>\n",
+       "      <td>[287.427959]</td>\n",
        "      <td>124</td>\n",
        "      <td>125</td>\n",
        "      <td>0.083333</td>\n",
-       "      <td>2.0</td>\n",
        "      <td>2</td>\n",
-       "      <td>287.427959</td>\n",
-       "      <td>285.427959</td>\n",
-       "      <td>289.427959</td>\n",
-       "      <td>5.0</td>\n",
+       "      <td>-1.0</td>\n",
+       "      <td>-1.0</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "</div>"
       ],
       "text/plain": [
-       "   spec_idx  peak_start_idx  peak_stop_idx        rt  charge  ms_level  \\\n",
-       "0         0               0            62  0.016667     3.0         2   \n",
-       "1         1              -1            -1  0.000000     0.0         2   \n",
-       "2         2              -1            -1  0.000000     0.0         2   \n",
-       "3         3              62           119  0.033333     2.0         2   \n",
-       "4         4             119           121  0.050000     2.0         2   \n",
-       "5         5              -1            -1  0.000000     0.0         2   \n",
-       "6         6             121           124  0.066667     2.0         2   \n",
-       "7         7              -1            -1  0.000000     0.0         2   \n",
-       "8         8             124           125  0.083333     2.0         2   \n",
+       "   spec_idx  charge              precursor_mz  peak_start_idx  peak_stop_idx  \\\n",
+       "0         0     [3]              [272.276336]               0             62   \n",
+       "1         1      []                        []              -1             -1   \n",
+       "2         2      []                        []              -1             -1   \n",
+       "3         3     [2]              [287.427959]              62            119   \n",
+       "4         4     [2]              [287.427959]             119            121   \n",
+       "5         5      []                        []              -1             -1   \n",
+       "6         6  [2, 2]  [287.427959, 288.427959]             121            124   \n",
+       "7         7      []                        []              -1             -1   \n",
+       "8         8     [2]              [287.427959]             124            125   \n",
        "\n",
-       "   precursor_mz  isolation_lower_mz  isolation_upper_mz  rt_sec  \n",
-       "0    272.276336          270.276336          274.276336     1.0  \n",
-       "1      0.000000            0.000000            0.000000     0.0  \n",
-       "2      0.000000            0.000000            0.000000     0.0  \n",
-       "3    287.427959          285.427959          289.427959     2.0  \n",
-       "4    287.427959          285.427959          289.427959     3.0  \n",
-       "5      0.000000            0.000000            0.000000     0.0  \n",
-       "6    287.427959          285.427959          289.427959     4.0  \n",
-       "7      0.000000            0.000000            0.000000     0.0  \n",
-       "8    287.427959          285.427959          289.427959     5.0  "
+       "         rt  ms_level  isolation_lower_mz  isolation_upper_mz  \n",
+       "0  0.016667         2                -1.0                -1.0  \n",
+       "1  0.000000         2                -1.0                -1.0  \n",
+       "2  0.000000         2                -1.0                -1.0  \n",
+       "3  0.033333         2                -1.0                -1.0  \n",
+       "4  0.050000         2                -1.0                -1.0  \n",
+       "5  0.000000         2                -1.0                -1.0  \n",
+       "6  0.066667         2                -1.0                -1.0  \n",
+       "7  0.000000         2                -1.0                -1.0  \n",
+       "8  0.083333         2                -1.0                -1.0  "
       ]
      },
      "execution_count": null,
@@ -380,6 +370,16 @@
     "402.705571 402.705571\n",
     "END IONS\n",
     "BEGIN IONS\n",
+    "TITLE=02445a_BA7-TUM_HLA_7_01_01-DDA-1h-R1.32733.32733.2.1.dta\n",
+    "CHARGE=2+\n",
+    "RTINSECONDS=4.0\n",
+    "SCAN=7\n",
+    "PEPMASS=288.427959\n",
+    "103.34669 5304.0\n",
+    "104.66884 5639.7\n",
+    "402.705571 402.705571\n",
+    "END IONS\n",
+    "BEGIN IONS\n",
     "TITLE=02445a_BA7-TUM_HLA_7_01_01-DDA-1h-R1.23669.23669.2.0.dta\n",
     "CHARGE=2+\n",
     "SCAN=9\n",
diff --git a/nbdev_nbs/match/psm_match.ipynb b/nbdev_nbs/match/psm_match.ipynb
index 6288708..fcd8c03 100644
--- a/nbdev_nbs/match/psm_match.ipynb
+++ b/nbdev_nbs/match/psm_match.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -25,7 +25,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -34,7 +34,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -50,7 +50,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
@@ -251,7 +251,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
@@ -278,7 +278,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
@@ -322,18 +322,6 @@
    "display_name": "Python 3.8.3 ('base')",
    "language": "python",
    "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.9.12"
   }
  },
  "nbformat": 4,
diff --git a/nbdev_nbs/match/psm_match_alphatims.ipynb b/nbdev_nbs/match/psm_match_alphatims.ipynb
index 563d8fb..c271d2b 100644
--- a/nbdev_nbs/match/psm_match_alphatims.ipynb
+++ b/nbdev_nbs/match/psm_match_alphatims.ipynb
@@ -18,7 +18,7 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "WARNING:root:WARNING: Temp mmap arrays are written to /var/folders/hn/0f5pbk7j55sd4r5bxjp25_rm0000gn/T/temp_mmap_gb9j_x96. Cleanup of this folder is OS dependant, and might need to be triggered manually! Current space: 586,177,458,176\n",
+      "WARNING:root:WARNING: Temp mmap arrays are written to /var/folders/fh/hf8t3l1x02d42ggk3b304_rh0000gn/T/temp_mmap_v85nc51s. Cleanup of this folder is OS dependant, and might need to be triggered manually! Current space: 706,796,924,928\n",
       "WARNING:root:WARNING: No Bruker libraries are available for this operating system. Mobility and m/z values need to be estimated. While this estimation often returns acceptable results with errors < 0.02 Th, huge errors (e.g. offsets of 6 Th) have already been observed for some samples!\n"
      ]
     }
@@ -47,7 +47,19 @@
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "ename": "AssertionError",
+     "evalue": "",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mAssertionError\u001b[0m                            Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[4], line 185\u001b[0m\n\u001b[1;32m    183\u001b[0m psm_df, mz_df, inten_df, merr_df \u001b[39m=\u001b[39m matching\u001b[39m.\u001b[39mmatch_ms2_one_raw(psm_df)\n\u001b[1;32m    184\u001b[0m \u001b[39m#np.sum(matching.matched_intensity_df.values!=0,axis=1)\u001b[39;00m\n\u001b[0;32m--> 185\u001b[0m \u001b[39massert\u001b[39;00m \u001b[39mlen\u001b[39m(merr_df\u001b[39m.\u001b[39mvalues[\u001b[39m~\u001b[39mnp\u001b[39m.\u001b[39misinf(merr_df\u001b[39m.\u001b[39mvalues)])\u001b[39m==\u001b[39m\u001b[39m3\u001b[39m\n\u001b[1;32m    186\u001b[0m \u001b[39massert\u001b[39;00m np\u001b[39m.\u001b[39mcount_nonzero(inten_df\u001b[39m.\u001b[39mvalues)\u001b[39m==\u001b[39m\u001b[39m3\u001b[39m\n",
+      "\u001b[0;31mAssertionError\u001b[0m: "
+     ]
+    }
+   ],
    "source": [
     "#| hide\n",
     "#unittest\n",
diff --git a/nbdev_nbs/test_mzml.ipynb b/nbdev_nbs/test_mzml.ipynb
index 2b76529..e6881b1 100644
--- a/nbdev_nbs/test_mzml.ipynb
+++ b/nbdev_nbs/test_mzml.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -13,7 +13,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
@@ -731,7 +731,7 @@
        "47       0              882.04              883.04  "
       ]
      },
-     "execution_count": 2,
+     "execution_count": null,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -746,7 +746,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
@@ -852,7 +852,7 @@
        "[197243 rows x 2 columns]"
       ]
      },
-     "execution_count": 3,
+     "execution_count": null,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -874,20 +874,7 @@
    "display_name": "base",
    "language": "python",
    "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.9.12"
-  },
-  "orig_nbformat": 4
+  }
  },
  "nbformat": 4,
  "nbformat_minor": 2

From c1a2ee27dec1ff550088a5178d92092a46a93fe8 Mon Sep 17 00:00:00 2001
From: jalew188 <jalew188@gmail.com>
Date: Fri, 18 Aug 2023 13:14:43 +0200
Subject: [PATCH 9/9] =?UTF-8?q?Bump=20version:=200.1.0=20=E2=86=92=200.1.1?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .bumpversion.cfg     | 2 +-
 alpharaw/__init__.py | 2 +-
 docs/conf.py         | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/.bumpversion.cfg b/.bumpversion.cfg
index b791d6e..1498aba 100644
--- a/.bumpversion.cfg
+++ b/.bumpversion.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 0.1.0
+current_version = 0.1.1
 commit = True
 tag = False
 parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\-(?P<release>[a-z]+)(?P<build>\d+))?
diff --git a/alpharaw/__init__.py b/alpharaw/__init__.py
index 3257380..ad9f9ef 100644
--- a/alpharaw/__init__.py
+++ b/alpharaw/__init__.py
@@ -12,7 +12,7 @@ def register_readers():
         return "[WARN] pythonnet is not installed"
 
 __project__ = "alpharaw"
-__version__ = "0.1.0"
+__version__ = "0.1.1"
 __license__ = "Apache"
 __description__ = "An open-source Python package to unify raw MS data accession and storage."
 __author__ = "Mann Labs"
diff --git a/docs/conf.py b/docs/conf.py
index 55b9f56..1298339 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -23,7 +23,7 @@
 copyright = '2022, Mann Labs, MPIB'
 author = 'Mann Labs, MPIB'
 
-release = "0.1.0"
+release = "0.1.1"
 
 # -- General configuration ---------------------------------------------------