diff --git a/src/nplinker/metabolomics/__init__.py b/src/nplinker/metabolomics/__init__.py
index 6f39b4e9..cf34b44c 100644
--- a/src/nplinker/metabolomics/__init__.py
+++ b/src/nplinker/metabolomics/__init__.py
@@ -1,9 +1,8 @@
 import logging
 from .molecular_family import MolecularFamily
-from .spectrum import GNPS_KEY
 from .spectrum import Spectrum
 
 
 logging.getLogger(__name__).addHandler(logging.NullHandler())
 
-__all__ = ["MolecularFamily", "GNPS_KEY", "Spectrum"]
+__all__ = ["MolecularFamily", "Spectrum"]
diff --git a/src/nplinker/metabolomics/gnps/gnps_spectrum_loader.py b/src/nplinker/metabolomics/gnps/gnps_spectrum_loader.py
index 29b8df38..7860ea5d 100644
--- a/src/nplinker/metabolomics/gnps/gnps_spectrum_loader.py
+++ b/src/nplinker/metabolomics/gnps/gnps_spectrum_loader.py
@@ -67,7 +67,6 @@ def _validate(self):
 
     def _load(self):
         """Load the MGF file into Spectrum objects."""
-        i = 0
         for spec in mgf.MGF(self._file):
             # Skip if m/z array is empty, as this is an invalid spectrum.
             # The invalid spectrum does not exist in other GNPS files, e.g.
@@ -77,20 +76,22 @@ def _load(self):
                 continue
 
             # Load the spectrum
-            peaks: list[tuple[float, float]] = list(zip(spec["m/z array"], spec["intensity array"]))
             spectrum_id: str = spec["params"]["scans"]
             # calculate precursor m/z from precursor mass and charge
             precursor_mass = spec["params"]["pepmass"][0]
             precursor_charge = self._get_precursor_charge(spec["params"]["charge"])
             precursor_mz: float = precursor_mass / abs(precursor_charge)
-            rt: float | None = spec["params"].get("rtinseconds", None)
+            rt = spec["params"].get("rtinseconds", 0)
 
             spectrum = Spectrum(
-                id=i, peaks=peaks, spectrum_id=spectrum_id, precursor_mz=precursor_mz, rt=rt
+                spectrum_id=spectrum_id,
+                mz=list(spec["m/z array"]),
+                intensity=list(spec["intensity array"]),
+                precursor_mz=precursor_mz,
+                rt=rt,
+                metadata=spec["params"],
             )
-            spectrum.metadata = spec["params"]
             self._spectra.append(spectrum)
-            i += 1
 
     def _get_precursor_charge(self, charges: list) -> int:
         """Get the precursor charge from the charge list.
diff --git a/src/nplinker/metabolomics/spectrum.py b/src/nplinker/metabolomics/spectrum.py
index 0e0e97bf..218b3e04 100644
--- a/src/nplinker/metabolomics/spectrum.py
+++ b/src/nplinker/metabolomics/spectrum.py
@@ -1,214 +1,88 @@
 from __future__ import annotations
+from functools import cached_property
 from typing import TYPE_CHECKING
+import numpy as np
 from nplinker.strain import Strain
 from nplinker.strain_collection import StrainCollection
-from nplinker.utils import sqrt_normalise
 
 
 if TYPE_CHECKING:
     from .molecular_family import MolecularFamily
 
-GNPS_KEY = "gnps"
-
-JCAMP = (
-    "##TITLE={}\\n"
-    + "##JCAMP-DX=nplinker vTODO\\n"
-    + "##DATA TYPE=Spectrum\\n"
-    + "##DATA CLASS=PEAKTABLE\\n"
-    + "##ORIGIN=TODO_DATASET_ID\\n"
-    + "##OWNER=nobody\\n"
-    + "##XUNITS=M/Z\\n"
-    + "##YUNITS=RELATIVE ABUNDANCE\\n"
-    + "##NPOINTS={}\\n"
-    + "##PEAK TABLE=(XY..XY)\\n"
-    + "{}\\n"
-    + "##END=\\n"
-)
-
 
 class Spectrum:
-    def __init__(self, id, peaks, spectrum_id: str, precursor_mz, parent_mz=None, rt=None):
-        self.id = id
-        self.peaks = sorted(peaks, key=lambda x: x[0])  # ensure sorted by mz
-        self.normalised_peaks = sqrt_normalise(self.peaks)  # useful later
-        self.n_peaks = len(self.peaks)
-        self.max_ms2_intensity = max(intensity for mz, intensity in self.peaks)
-        self.total_ms2_intensity = sum(intensity for mz, intensity in self.peaks)
-        self.spectrum_id = spectrum_id  # MS1.name
-        self.rt = rt
-        # TODO CG: should include precursor mass and charge to calculate precursor_mz
-        # parent_mz can be calculate from precursor_mass and charge mass
+    def __init__(
+        self,
+        spectrum_id: str,
+        mz: list[float],
+        intensity: list[float],
+        precursor_mz: float,
+        rt: float = 0,
+        metadata: dict | None = None,
+    ) -> None:
+        """Class to model MS/MS Spectrum.
+
+        Args:
+            spectrum_id (str): the spectrum ID.
+            mz (list[float]): the list of m/z values.
+            intensity (list[float]): the list of intensity values.
+            precursor_mz (float): the precursor m/z.
+            rt (float): the retention time in seconds. Defaults to 0.
+            metadata (dict, optional): the metadata of the spectrum, i.e. the header infomation
+                in the MGF file.
+
+        Attributes:
+            spectrum_id (str): the spectrum ID.
+            mz (list[float]): the list of m/z values.
+            intensity (list[float]): the list of intensity values.
+            precursor_mz (float): the m/z value of the precursor.
+            rt (float): the retention time in seconds.
+            metadata (dict): the metadata of the spectrum, i.e. the header infomation in the MGF
+                file.
+            gnps_annotations (dict): the GNPS annotations of the spectrum.
+            gnps_id (str | None): the GNPS ID of the spectrum.
+            strains (StrainCollection): the strains that this spectrum belongs to.
+            family (MolecularFamily): the molecular family that this spectrum belongs to.
+            peaks (np.ndarray): 2D array of peaks, each row is a peak of (m/z, intensity) values.
+        """
+        self.spectrum_id = spectrum_id
+        self.mz = mz
+        self.intensity = intensity
         self.precursor_mz = precursor_mz
-        self.parent_mz = parent_mz
-        self.gnps_id = None  # CCMSLIB...
-        # TODO should add intensity here too
-        self.metadata = {}
-        self.edges = []
-        self.strains = StrainCollection()
-        # this is a dict indexed by Strain objects (the strains found in this Spectrum), with
-        # the values being dicts of the form {growth_medium: peak intensity} for the parent strain
-        self.growth_media = {}
-        self.family: MolecularFamily | None = None
-        # a dict indexed by filename, or "gnps"
-        self.annotations = {}
-        self._losses = None
-        self._jcamp = None
-
-    def add_strain(self, strain, growth_medium, peak_intensity):
-        # adds the strain to the StrainCollection if not already there
-        self.strains.add(strain)
-
-        if strain not in self.growth_media:
-            self.growth_media[strain] = {}
-
-        if growth_medium is None:
-            self.growth_media[strain].update(
-                {f"unknown_medium_{len(self.growth_media[strain])}": peak_intensity}
-            )
-            return
-
-        if strain in self.growth_media and growth_medium in self.growth_media[strain]:
-            raise Exception("Growth medium clash: {} / {} {}".format(self, strain, growth_medium))
-
-        self.growth_media[strain].update({growth_medium: peak_intensity})
-
-    @property
-    def is_library(self):
-        return GNPS_KEY in self.annotations
-
-    def set_annotations(self, key, data):
-        self.annotations[key] = data
-
-    @property
-    def gnps_annotations(self):
-        if GNPS_KEY not in self.annotations:
-            return None
-
-        return self.annotations[GNPS_KEY][0]
-
-    def has_annotations(self):
-        return len(self.annotations) > 0
-
-    def get_metadata_value(self, key):
-        val = self.metadata.get(key, None)
-        return val
-
-    def has_strain(self, strain: Strain):
-        return strain in self.strains
-
-    def get_growth_medium(self, strain):
-        if strain not in self.strains:
-            return None
-
-        gms = self.growth_media[strain]
-        return list(gms.keys())[0]
-
-    def to_jcamp_str(self, force_refresh=False):
-        if self._jcamp is not None and not force_refresh:
-            return self._jcamp
+        self.rt = rt
+        self.metadata = metadata or {}
 
-        peakdata = "\\n".join("{}, {}".format(*p) for p in self.peaks)
-        self._jcamp = JCAMP.format(str(self), self.n_peaks, peakdata)
-        return self._jcamp
+        self.gnps_annotations: dict = {}
+        self.gnps_id: str | None = None
+        self.strains: StrainCollection = StrainCollection()
+        self.family: MolecularFamily | None = None
 
-    def __str__(self):
-        return "Spectrum(id={}, spectrum_id={}, strains={})".format(
-            self.id, self.spectrum_id, len(self.strains)
-        )
+    def __str__(self) -> str:
+        return f"Spectrum(spectrum_id={self.spectrum_id}, #strains={len(self.strains)})"
 
-    def __repr__(self):
+    def __repr__(self) -> str:
         return str(self)
 
     def __eq__(self, other) -> bool:
         if isinstance(other, Spectrum):
-            return (
-                self.id == other.id
-                and self.spectrum_id == other.spectrum_id
-                and self.precursor_mz == other.precursor_mz
-                and self.parent_mz == other.parent_mz
-            )
+            return self.spectrum_id == other.spectrum_id and self.precursor_mz == other.precursor_mz
         return NotImplemented
 
     def __hash__(self) -> int:
-        return hash((self.id, self.spectrum_id, self.precursor_mz, self.parent_mz))
-
-    def __cmp__(self, other):
-        if self.parent_mz >= other.parent_mz:
-            return 1
-        else:
-            return -1
-
-    def __lt__(self, other):
-        if self.parent_mz <= other.parent_mz:
-            return 1
-        else:
-            return 0
-
-    # from molnet repo
-    def keep_top_k(self, k=6, mz_range=50):
-        # only keep peaks that are in the top k in += mz_range
-        start_pos = 0
-        new_peaks = []
-        for mz, intensity in self.peaks:
-            while self.peaks[start_pos][0] < mz - mz_range:
-                start_pos += 1
-            end_pos = start_pos
+        return hash((self.spectrum_id, self.precursor_mz))
 
-            n_bigger = 0
-            while end_pos < len(self.peaks) and self.peaks[end_pos][0] <= mz + mz_range:
-                if self.peaks[end_pos][1] > intensity:
-                    n_bigger += 1
-                end_pos += 1
+    @cached_property
+    def peaks(self) -> np.ndarray:
+        """Get the peaks, a 2D array with each row containing the values of (m/z, intensity)."""
+        return np.array(list(zip(self.mz, self.intensity)))
 
-            if n_bigger < k:
-                new_peaks.append((mz, intensity))
-
-        self.peaks = new_peaks
-        self.n_peaks = len(self.peaks)
-        if self.n_peaks > 0:
-            self.normalised_peaks = sqrt_normalise(self.peaks)
-            self.max_ms2_intensity = max(intensity for mz, intensity in self.peaks)
-            self.total_ms2_intensity = sum(intensity for mz, intensity in self.peaks)
-        else:
-            self.normalised_peaks = []
-            self.max_ms2_intensity = 0.0
-            self.total_ms2_intensity = 0.0
-
-    @property
-    def losses(self):
-        """All mass shifts in the spectrum, and the indices of the peaks."""
-        if self._losses is None:
-            # populate loss table
-            losses = []
-            for i in range(len(self.peaks)):
-                loss = self.precursor_mz - self.peaks[i][0]
-                losses.append((loss, self.id, i))
-
-            # THIS SEEMED TO ME LIKE IT WOULD TAKE THE WRONG DIFFERENCES AS LOSSES:
-            # TODO: please check!
-            #                for j in range(i):
-            #                    loss = self.peaks[i][0] - self.peaks[j][0]
-            #                    losses.append((loss, i, j))
-
-            # Sort by loss
-            losses.sort(key=lambda x: x[0])
-            self._losses = losses
-        return self._losses
-
-    def has_loss(self, mass, tol):
-        """Check if the scan has the specified loss (within tolerance)."""
-        matched_losses = []
-
-        idx = 0
-        # Check losses in range [0, mass]
-        while idx < len(self.losses) and self.losses[idx][0] <= mass:
-            if mass - self.losses[idx][0] < tol:
-                matched_losses.append(self.losses[idx])
-            idx += 1
+    def has_strain(self, strain: Strain):
+        """Check if the given strain exists in the spectrum.
 
-        # Add all losses in range [mass, mass+tol(
-        while idx < len(self.losses) and self.losses[idx][0] < mass + tol:
-            matched_losses.append(self.losses[idx])
-            idx += 1
+        Args:
+            strain(Strain): `Strain` object.
 
-        return matched_losses
+        Returns:
+            bool: True when the given strain exist in the spectrum.
+        """
+        return strain in self.strains
diff --git a/src/nplinker/pickler.py b/src/nplinker/pickler.py
index 265cd1ee..8f566898 100644
--- a/src/nplinker/pickler.py
+++ b/src/nplinker/pickler.py
@@ -44,9 +44,9 @@ def persistent_id(self, obj):
         elif isinstance(obj, GCF):
             return ("GCF", obj.gcf_id)
         elif isinstance(obj, Spectrum):
-            return ("Spectrum", obj.id)
+            return ("Spectrum", obj.spectrum_id)
         elif isinstance(obj, MolecularFamily):
-            return ("MolecularFamily", obj.id)
+            return ("MolecularFamily", obj.family_id)
         else:
             # TODO: ideally should use isinstance(obj, ScoringMethod) here
             # but it's currently a problem because it creates a circular
diff --git a/src/nplinker/scoring/iokr/nplinker_iokr.py b/src/nplinker/scoring/iokr/nplinker_iokr.py
index 8cd17f55..468e2240 100644
--- a/src/nplinker/scoring/iokr/nplinker_iokr.py
+++ b/src/nplinker/scoring/iokr/nplinker_iokr.py
@@ -137,7 +137,7 @@ def score_smiles(self, ms_list, candidate_smiles):
         candidates = iokr_opt.preprocess_candidates(candidate_fps, latent, latent_basis, gamma)
 
         for ms_index, ms in enumerate(ms_list):
-            logger.debug("Rank spectrum {} ({}/{})".format(ms.id, ms_index, len(ms_list)))
+            logger.debug("Rank spectrum {} ({}/{})".format(ms.spectrum_id, ms_index, len(ms_list)))
             ms.filter = spectrum_filters.filter_by_frozen_dag
             logger.debug("kernel vector")
             t0 = time.time()
diff --git a/src/nplinker/scoring/iokr/spectrum.py b/src/nplinker/scoring/iokr/spectrum.py
index 82ba1d80..07a27f5a 100644
--- a/src/nplinker/scoring/iokr/spectrum.py
+++ b/src/nplinker/scoring/iokr/spectrum.py
@@ -44,8 +44,8 @@ def __init__(self, mgf_dict=None, spec=None):
 
     def init_from_spec(self, spec):
         self.id = spec.id
-        self.raw_parentmass = spec.parent_mz
-        self.raw_spectrum = numpy.array(spec.peaks)
+        self.raw_parentmass = spec.precursor_mz
+        self.raw_spectrum = spec.peaks
         # TODO this is a temporary default for the Crusemann data
         # should check for it in the mgf in metabolomics.py and store
         # in the Spectrum object if found
diff --git a/src/nplinker/scoring/rosetta/rosetta.py b/src/nplinker/scoring/rosetta/rosetta.py
index f2f18304..c9937a66 100644
--- a/src/nplinker/scoring/rosetta/rosetta.py
+++ b/src/nplinker/scoring/rosetta/rosetta.py
@@ -548,7 +548,6 @@ def export_to_csv(self, filename):
             for hit in self._rosetta_hits:
                 csvwriter.writerow(
                     [
-                        hit.spec.id,
                         hit.spec.spectrum_id,
                         hit.gnps_id,
                         hit.spec_match_score,
diff --git a/src/nplinker/scoring/rosetta/rosetta_functions.py b/src/nplinker/scoring/rosetta/rosetta_functions.py
index 26ff1561..4d138e4a 100644
--- a/src/nplinker/scoring/rosetta/rosetta_functions.py
+++ b/src/nplinker/scoring/rosetta/rosetta_functions.py
@@ -12,17 +12,19 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import math
+
 
 def fast_cosine_shift(spectrum1, spectrum2, tol, min_match):
-    if spectrum1.n_peaks == 0 or spectrum2.n_peaks == 0:
+    if len(spectrum1.peaks) == 0 or len(spectrum2.peaks) == 0:
         return 0.0, []
 
-    spec1 = spectrum1.normalised_peaks
-    spec2 = spectrum2.normalised_peaks
+    spec1 = sqrt_normalise(spectrum1.peaks)
+    spec2 = sqrt_normalise(spectrum2.peaks)
 
     zero_pairs = find_pairs(spec1, spec2, tol, shift=0.0)
 
-    shift = spectrum1.parent_mz - spectrum2.parent_mz
+    shift = spectrum1.precursor_mz - spectrum2.precursor_mz
 
     nonzero_pairs = find_pairs(spec1, spec2, tol, shift=shift)
 
@@ -66,12 +68,12 @@ def find_pairs(spec1, spec2, tol, shift=0):
 
 def fast_cosine(spectrum1, spectrum2, tol, min_match):
     # spec 1 and spec 2 have to be sorted by mz
-    if spectrum1.n_peaks == 0 or spectrum2.n_peaks == 0:
+    if len(spectrum1.peaks) == 0 or len(spectrum2.peaks) == 0:
         return 0.0, []
     # find all the matching pairs
 
-    spec1 = spectrum1.normalised_peaks
-    spec2 = spectrum2.normalised_peaks
+    spec1 = sqrt_normalise(spectrum1.peaks)
+    spec2 = sqrt_normalise(spectrum2.peaks)
 
     matching_pairs = find_pairs(spec1, spec2, tol, shift=0.0)
 
@@ -107,3 +109,16 @@ def comp_scores(spectra, file_scan, similarity_function, similarity_tolerance, m
             spec2 = specs[j]
             sc, _ = similarity_function(spec, spec2, similarity_tolerance, min_match)
             print(f"{f},{s} <-> {f2},{s2} = {sc}")
+
+
+def sqrt_normalise(peaks):
+    temp = []
+    total = 0.0
+    for mz, intensity in peaks:
+        temp.append((mz, math.sqrt(intensity)))
+        total += intensity
+    norm_facc = math.sqrt(total)
+    normalised_peaks = []
+    for mz, intensity in temp:
+        normalised_peaks.append((mz, intensity / norm_facc))
+    return normalised_peaks
diff --git a/src/nplinker/scoring/rosetta/spec_lib.py b/src/nplinker/scoring/rosetta/spec_lib.py
index 56baa42a..88cc2bba 100644
--- a/src/nplinker/scoring/rosetta/spec_lib.py
+++ b/src/nplinker/scoring/rosetta/spec_lib.py
@@ -43,13 +43,13 @@ def get_ids(self):
         return list(s.spectrum_id for s in self.spectra)
 
     def get_n_peaks(self):
-        return [s.n_peaks for s in self.spectra]
+        return [len(s.peaks) for s in self.spectra]
 
     def filter(self):
         # top_k_filter
         n_done = 0
         for spec in self.spectra:
-            spec.keep_top_k()
+            self._keep_top_k(spec)
             n_done += 1
             if n_done % 100 == 0:
                 logger.info(
@@ -82,3 +82,27 @@ def _candidates(self, mz_list, query_mz, ms1_tol):
         start = pmz_list.bisect(lower)
         end = pmz_list.bisect(upper)
         return mz_list[start:end]
+
+    # from molnet repo
+    def _keep_top_k(self, spec, k=6, mz_range=50):
+        # only keep peaks that are in the top k in += mz_range
+        start_pos = 0
+        new_mz = []
+        new_intensities = []
+        for mz, intensity in spec.peaks:
+            while spec.peaks[start_pos][0] < mz - mz_range:
+                start_pos += 1
+            end_pos = start_pos
+
+            n_bigger = 0
+            while end_pos < len(spec.peaks) and spec.peaks[end_pos][0] <= mz + mz_range:
+                if spec.peaks[end_pos][1] > intensity:
+                    n_bigger += 1
+                end_pos += 1
+
+            if n_bigger < k:
+                new_mz.append(mz)
+                new_intensities.append(intensity)
+
+        spec.mz = new_mz
+        spec.intensity = new_intensities
diff --git a/src/nplinker/scoring/rosetta_scoring.py b/src/nplinker/scoring/rosetta_scoring.py
index 9c03d657..d5fbcff6 100644
--- a/src/nplinker/scoring/rosetta_scoring.py
+++ b/src/nplinker/scoring/rosetta_scoring.py
@@ -121,7 +121,7 @@ def get_links(self, objects, link_collection):
     def _collect_results_spectra(self, objects, ro_hits, results):
         for spec in objects:
             for hit in ro_hits:
-                if spec.id == hit.spec.id:
+                if spec.spectrum_id == hit.spec.spectrum_id:
                     if not self.bgc_to_gcf:
                         # can use the BGC directly
                         results = self._insert_result_met(results, spec, hit.bgc, hit)
diff --git a/src/nplinker/utils.py b/src/nplinker/utils.py
index df481c68..23a00c0e 100644
--- a/src/nplinker/utils.py
+++ b/src/nplinker/utils.py
@@ -18,7 +18,6 @@
 import gzip
 import hashlib
 import lzma
-import math
 import os
 import os.path
 import sys
@@ -32,21 +31,6 @@
 from tqdm import tqdm
 
 
-# CG: it's only used by metabolomics, should move it there
-# code to normalise peaks for spectral matching ("rosetta stone" stuff)
-def sqrt_normalise(peaks):
-    temp = []
-    total = 0.0
-    for mz, intensity in peaks:
-        temp.append((mz, math.sqrt(intensity)))
-        total += intensity
-    norm_facc = math.sqrt(total)
-    normalised_peaks = []
-    for mz, intensity in temp:
-        normalised_peaks.append((mz, intensity / norm_facc))
-    return normalised_peaks
-
-
 def find_delimiter(file: str | PathLike) -> str:
     """Detect the delimiter for the given tabular file.
 
diff --git a/tests/metabolomics/test_molecular_family.py b/tests/metabolomics/test_molecular_family.py
index 6842e6e7..e5731320 100644
--- a/tests/metabolomics/test_molecular_family.py
+++ b/tests/metabolomics/test_molecular_family.py
@@ -8,7 +8,7 @@
 @pytest.fixture()
 def spectrum1():
     """Return a Spectrum object."""
-    spec = Spectrum(id=1, peaks=[(1.0, 1.0)], spectrum_id="spec001", precursor_mz=100.0)
+    spec = Spectrum(spectrum_id="spec001", mz=[1.0], intensity=[1.0], precursor_mz=100.0)
     spec.strains = StrainCollection()
     spec.strains.add(Strain("strain001"))
     yield spec
@@ -17,7 +17,7 @@ def spectrum1():
 @pytest.fixture()
 def spectrum2():
     """Return a Spectrum object."""
-    spec = Spectrum(id=2, peaks=[(1.0, 1.0)], spectrum_id="spec002", precursor_mz=100.0)
+    spec = Spectrum(spectrum_id="spec002", mz=[1.0], intensity=[1.0], precursor_mz=100.0)
     spec.strains = StrainCollection()
     spec.strains.add(Strain("strain002"))
     yield spec
diff --git a/tests/metabolomics/test_spectrum.py b/tests/metabolomics/test_spectrum.py
index cbb34667..689f9c25 100644
--- a/tests/metabolomics/test_spectrum.py
+++ b/tests/metabolomics/test_spectrum.py
@@ -1,14 +1,70 @@
+import numpy as np
 import pytest
 from nplinker.metabolomics import Spectrum
+from nplinker.strain import Strain
+from nplinker.strain_collection import StrainCollection
 
 
-@pytest.fixture
-def spectrum() -> Spectrum:
-    spec = Spectrum(
-        1, peaks=[[10, 100], [20, 150]], spectrum_id="2", precursor_mz=30, parent_mz=50, rt=100
-    )
-    return spec
+@pytest.mark.parametrize(
+    "rt, metadata, expected_metadata",
+    [
+        [0, None, {}],
+        [1, {"info": "test"}, {"info": "test"}],
+    ],
+)
+def test_init(rt, metadata, expected_metadata):
+    """Test the initialization of the Spectrum class."""
+    spec = Spectrum("spec1", [100, 200], [0.1, 0.2], 150, rt, metadata)
 
+    assert spec.spectrum_id == "spec1"
+    assert spec.mz == [100, 200]
+    assert spec.intensity == [0.1, 0.2]
+    assert spec.precursor_mz == 150
+    assert spec.rt == rt
+    assert spec.metadata == expected_metadata
 
-def test_constructor(spectrum):
-    assert spectrum is not None
+    # test the default values of the attributes
+    assert spec.gnps_annotations == {}
+    assert spec.gnps_id is None
+    assert spec.strains == StrainCollection()
+    assert spec.family is None
+
+
+def test_str_repr():
+    """Test the __str__ and __repr__ methods."""
+    spec = Spectrum("spec1", [100, 200], [0.1, 0.2], 150)
+    assert str(spec) == "Spectrum(spectrum_id=spec1, #strains=0)"
+    assert repr(spec) == "Spectrum(spectrum_id=spec1, #strains=0)"
+
+
+def test_eq():
+    """Test the __eq__ method."""
+    spec1 = Spectrum("spec1", [100, 200], [0.1, 0.2], 150, 0, {"info": "test"})
+    spec2 = Spectrum("spec1", [100, 200], [0.1, 0.2], 150, 0, {"info": "test"})
+    spec3 = Spectrum("spec2", [100, 200], [0.1, 0.2], 150, 0, {"info": "test"})
+
+    assert spec1 == spec2
+    assert spec1 != spec3
+
+
+def test_hash():
+    """Test the __hash__ method."""
+    spec = Spectrum("spec1", [100, 200], [0.1, 0.2], 150)
+    assert hash(spec) == hash(("spec1", 150))
+
+
+def test_peaks():
+    """Test the peaks attribute."""
+    spec = Spectrum("spec1", [100, 200], [0.1, 0.2], 150)
+    assert np.array_equal(spec.peaks, np.array([[100, 0.1], [200, 0.2]]))
+
+
+def test_has_strain():
+    """Test the has_strain method."""
+    spec = Spectrum("spec1", [100, 200], [0.1, 0.2], 150)
+    strain1 = Strain("strain1")
+    strain2 = Strain("strain2")
+
+    spec.strains.add(strain1)
+    assert spec.has_strain(strain1)
+    assert not spec.has_strain(strain2)
diff --git a/tests/scoring/conftest.py b/tests/scoring/conftest.py
index dac52274..d4fec198 100644
--- a/tests/scoring/conftest.py
+++ b/tests/scoring/conftest.py
@@ -38,11 +38,11 @@ def gcfs(strains_list) -> tuple[GCF, GCF, GCF]:
 
 @fixture(scope="session")
 def spectra(strains_list) -> tuple[Spectrum, Spectrum, Spectrum]:
-    spectrum1 = Spectrum(1, [(1, 1)], "spectrum1", None)
+    spectrum1 = Spectrum("spectrum1", [1], [1], 10.0)
     spectrum1.strains.add(strains_list[0])
-    spectrum2 = Spectrum(2, [(1, 1)], "spectrum2", None)
+    spectrum2 = Spectrum("spectrum2", [1], [1], 10.0)
     spectrum2.strains.add(strains_list[1])
-    spectrum3 = Spectrum(3, [(1, 1)], "spectrum3", None)
+    spectrum3 = Spectrum("spectrum3", [1], [1], 10.0)
     spectrum3.strains.add(strains_list[0])
     spectrum3.strains.add(strains_list[1])
     return spectrum1, spectrum2, spectrum3