diff --git a/src/nplinker/metabolomics/gnps/gnps_format.py b/src/nplinker/metabolomics/gnps/gnps_format.py index 96fa083e..b0cc3de5 100644 --- a/src/nplinker/metabolomics/gnps/gnps_format.py +++ b/src/nplinker/metabolomics/gnps/gnps_format.py @@ -1,4 +1,5 @@ from __future__ import annotations +import re import zipfile from enum import Enum from enum import unique @@ -6,7 +7,6 @@ from pathlib import Path import httpx from bs4 import BeautifulSoup -from nplinker.utils import get_headers GNPS_TASK_URL = "https://gnps.ucsd.edu/ProteoSAFe/status.jsp?task={}" @@ -118,11 +118,13 @@ def gnps_format_from_file_mapping(file: str | PathLike) -> GNPSFormat: Returns: GNPS format identified in the file. """ - headers = get_headers(file) - if "AllFiles" in headers: + with open(file, "r") as f: + header = f.readline().strip() + + if re.search(r"\bAllFiles\b", header): return GNPSFormat.SNETS - if "UniqueFileSources" in headers: + if re.search(r"\bUniqueFileSources\b", header): return GNPSFormat.SNETSV2 - if "row ID" in headers: + if re.search(r"\b{}\b".format(re.escape("row ID")), header): return GNPSFormat.FBMN return GNPSFormat.Unknown diff --git a/src/nplinker/utils.py b/src/nplinker/utils.py index 039904b6..e1c737a4 100644 --- a/src/nplinker/utils.py +++ b/src/nplinker/utils.py @@ -59,39 +59,6 @@ def wrapper_check_disk_space(*args, **kwargs): # -def find_delimiter(file: str | PathLike) -> str: - """Detect the delimiter for the given tabular file. - - Args: - file: Path to tabular file. - - Returns: - Detected delimiter character. - - Examples: - >>> delim = find_delimiter("~/table.csv") - """ - sniffer = csv.Sniffer() - with open(file, mode="rt", encoding="utf-8") as fp: - delimiter = sniffer.sniff(fp.read(5000)).delimiter - return delimiter - - -def get_headers(file: str | PathLike) -> list[str]: - """Read headers from the given tabular file. - - Args: - file: Path to the file to read the header from. - - Returns: - A list of column names from the header. - """ - with open(file) as f: - headers = f.readline().strip() - dl = find_delimiter(file) - return headers.split(dl) - - def is_file_format(file: str | PathLike, format: str = "tsv") -> bool: """Check if the file is in the given format. diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py index 29eaf056..0d44978a 100644 --- a/tests/unit/test_utils.py +++ b/tests/unit/test_utils.py @@ -3,17 +3,6 @@ from tempfile import mkdtemp import pytest from nplinker import utils -from nplinker.utils import find_delimiter -from . import GNPS_DATA_DIR - - -@pytest.mark.parametrize( - "filename, expected", - [[GNPS_DATA_DIR / "nodes.tsv", "\t"], [GNPS_DATA_DIR / "nodes_mwe.csv", ","]], -) -def test_find_delimiter(filename, expected): - actual = find_delimiter(filename) - assert actual == expected BGC_GBK_URL = "https://mibig.secondarymetabolites.org/repository/BGC0000001/BGC0000001.gbk"