Skip to content

Commit

Permalink
update the logics of validating unique ids
Browse files Browse the repository at this point in the history
  • Loading branch information
CunliangGeng committed Aug 29, 2023
1 parent ac60f24 commit 69b73ae
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 21 deletions.
16 changes: 7 additions & 9 deletions src/nplinker/metabolomics/gnps/gnps_annotation_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,15 +110,13 @@ def _validate(self) -> None:
# validate that "#Scan#" must be unique
with open(self._file, mode='rt') as f:
reader = csv.DictReader(f, delimiter='\t')
ids = []
for row in reader:
_id = row["#Scan#"]
if _id in ids:
raise ValueError(
f"Invalid GNPS annotation file '{self._file}'. "
f"Expected unique '#Scan#', but found duplicate '{_id}'."
)
ids.append(_id)
scans = [row["#Scan#"] for row in reader]
duplicates = {x for x in scans if scans.count(x) > 1}
if len(duplicates) > 0:
raise ValueError(
f"Invalid GNPS annotation file '{self._file}'. "
f"Expected unique '#Scan#', but found duplicates '{duplicates}'."
)

def _load(self) -> None:
"""Load the annotations from the file."""
Expand Down
20 changes: 8 additions & 12 deletions src/nplinker/metabolomics/gnps/gnps_file_mapping_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,20 +113,16 @@ def _validate(self) -> None:
with open(self._file, mode='rt') as f:
if self._gnps_format is GNPSFormat.FBMN:
reader = csv.DictReader(f, delimiter=',')
ids = [row["row ID"] for row in reader]
else:
reader = csv.DictReader(f, delimiter='\t')
ids = []
for row in reader:
if self._gnps_format is GNPSFormat.FBMN:
_id = row["row ID"]
else:
_id = row["cluster index"]
if _id in ids:
raise ValueError(
f"Invalid GNPS file mappings file '{self._file}'. "
f"Expected unique 'cluster index' or 'row ID', "
f"but found duplicate '{_id}'.")
ids.append(_id)
ids = [row["cluster index"] for row in reader]
duplicates = {x for x in ids if ids.count(x) > 1}
if len(duplicates) > 0:
raise ValueError(
f"Invalid GNPS file mappings file '{self._file}'. "
f"Expected unique 'cluster index' or 'row ID', "
f"but found duplicates '{duplicates}'.")

def _load(self) -> None:
"""Load file mapping from the file based on the GNPS workflow type.
Expand Down

0 comments on commit 69b73ae

Please sign in to comment.