Skip to content

Commit

Permalink
update integration tests
Browse files Browse the repository at this point in the history
- simplify the `conftest.py`
- remove unused functions
- add tests for `get_link`
  • Loading branch information
CunliangGeng committed Jun 10, 2024
1 parent 9258499 commit 5e214d6
Show file tree
Hide file tree
Showing 2 changed files with 49 additions and 61 deletions.
55 changes: 18 additions & 37 deletions tests/integration/conftest.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
from __future__ import annotations
import os
import shutil
import tempfile
import zipfile
from os import PathLike
from pathlib import Path
import httpx
import pytest
from rich.progress import Progress
from . import DATA_DIR

Expand All @@ -18,46 +18,27 @@
f"https://zenodo.org/records/{dataset_doi.split('.')[-1]}/files/nplinker_local_mode_example.zip"
)

# The temporary directory for the test session
temp_dir = tempfile.gettempdir()
nplinker_root_dir = os.path.join(temp_dir, "nplinker_local_mode_example")

@pytest.fixture(scope="module")
def root_dir(tmp_path_factory):
"""Set up the NPLinker root directory for the local mode example dataset."""
temp_dir = tmp_path_factory.mktemp("nplinker_integration_test")
nplinker_root_dir = temp_dir / "nplinker_local_mode_example"

def pytest_sessionstart(session):
"""Pytest hook to run before the entire test session starts.
This hook makes sure the temporary directory `nplinker_root_dir` is created before any test
starts. When running tests in parallel, the creation operation is done by the master process,
and worker processes are not allowed to do it.
For more about this hook, see:
1. https://docs.pytest.org/en/stable/reference.html#_pytest.hookspec.pytest_sessionstart
2. https://github.com/pytest-dev/pytest-xdist/issues/271#issuecomment-826396320
"""
workerinput = getattr(session.config, "workerinput", None)
# It's master process or not running in parallell when `workerinput` is None.
if workerinput is None:
if os.path.exists(nplinker_root_dir):
shutil.rmtree(nplinker_root_dir)
dataset = DATA_DIR / "nplinker_local_mode_example.zip"
if not dataset.exists():
download_archive(dataset_url, DATA_DIR)
with zipfile.ZipFile(dataset, "r") as zip_ref:
zip_ref.extractall(temp_dir)
# NPLinker setting `root_dir` must be a path that exists, so setting it to a temporary directory.
os.environ["NPLINKER_ROOT_DIR"] = nplinker_root_dir

# Download the dataset and extract it
if os.path.exists(nplinker_root_dir):
shutil.rmtree(nplinker_root_dir)
dataset = DATA_DIR / "nplinker_local_mode_example.zip"
if not dataset.exists():
download_archive(dataset_url, DATA_DIR)
# the extracted directory is named "nplinker_local_mode_example"
with zipfile.ZipFile(dataset, "r") as zip_ref:
zip_ref.extractall(temp_dir)

def pytest_sessionfinish(session):
"""Pytest hook to run after the entire test session finishes.
# Return the root directory
yield str(nplinker_root_dir)

This hook makes sure that temporary directory `nplinker_root_dir` is only removed after all
tests finish. When running tests in parallel, the deletion operation is done by the master
process, and worker processes are not allowed to do it.
"""
workerinput = getattr(session.config, "workerinput", None)
if workerinput is None:
shutil.rmtree(nplinker_root_dir)
shutil.rmtree(nplinker_root_dir)


def download_archive(
Expand Down
55 changes: 31 additions & 24 deletions tests/integration/test_nplinker_local.py
Original file line number Diff line number Diff line change
@@ -1,38 +1,25 @@
import hashlib
from pathlib import Path
import os
import pytest
from nplinker.nplinker import NPLinker
from . import DATA_DIR


# Only tests related to data arranging and loading should be put here.
# For tests on scoring/links, add them to `scoring/test_nplinker_scoring.py`.


def get_file_hash(file_path):
h = hashlib.sha256()
with open(file_path, "rb") as file:
while True:
# Reading is buffered, so we can read smaller chunks.
chunk = file.read(h.block_size)
if not chunk:
break
h.update(chunk)

return h.hexdigest()


@pytest.fixture(scope="module")
def npl() -> NPLinker:
def npl(root_dir) -> NPLinker:
os.environ["NPLINKER_ROOT_DIR"] = root_dir
npl = NPLinker(DATA_DIR / "nplinker_local_mode.toml")
npl.load_data()
# remove cached score results before running tests
root_dir = Path(npl.root_dir)
score_cache = root_dir / "output" / "cache_metcalf_scoring.pckl"
score_cache.unlink(missing_ok=True)
return npl


def test_init(npl, root_dir):
assert str(npl.config.root_dir) == root_dir
assert npl.config.mode == "local"
assert npl.config.log.level == "DEBUG"

assert npl.root_dir == root_dir


# ---------------------------------------------------------------------------------------------------
# After manually checking data files for PODP MSV000079284, we have the following numbers:
# 370 BGCs from antismash files
Expand Down Expand Up @@ -63,3 +50,23 @@ def test_load_data(npl: NPLinker):
assert len(npl.spectra) == 24652
assert len(npl.mfs) == 29
assert len(npl.strains) == 46


def test_get_links(npl):
# default scoring parameters are used (cutoff=0, standardised=False),
# so all score values should be >= 0
scoring_method = "metcalf"
lg = npl.get_links(npl.gcfs[:3], scoring_method)
for _, _, scores in lg.links:
score = scores[scoring_method]
assert score.value >= 0

lg = npl.get_links(npl.spectra[:1], scoring_method)
for _, _, scores in lg.links:
score = scores[scoring_method]
assert score.value >= 0

lg = npl.get_links(npl.mfs[:1], scoring_method)
for _, _, scores in lg.links:
score = scores[scoring_method]
assert score.value >= 0

0 comments on commit 5e214d6

Please sign in to comment.