From 779465dde275be4cfe513446980a7cf7d551c416 Mon Sep 17 00:00:00 2001 From: Cunliang Geng Date: Wed, 5 Jul 2023 14:37:47 +0200 Subject: [PATCH] enable `to_json` return string when output file is None --- .../pairedomics/podp_antismash_downloader.py | 32 +++++++++++-------- .../test_podp_antismash_downloader.py | 20 ++++++++++-- 2 files changed, 36 insertions(+), 16 deletions(-) diff --git a/src/nplinker/pairedomics/podp_antismash_downloader.py b/src/nplinker/pairedomics/podp_antismash_downloader.py index 94944373..cf096fb4 100644 --- a/src/nplinker/pairedomics/podp_antismash_downloader.py +++ b/src/nplinker/pairedomics/podp_antismash_downloader.py @@ -72,25 +72,29 @@ def read_json(file: str | PathLike) -> dict[str, 'GenomeStatus']: @staticmethod def to_json(genome_status_dict: dict[str, 'GenomeStatus'], - file: str | PathLike) -> None: - """Save the given genome status dictionary to a JSON file. + file: str | PathLike | None = None) -> str | None: + """Convert the genome status dictionary to a JSON string. - The JSON file will be saved to the given output directory with the name - defined in variable `GENOME_STATUS_FILENAME`. The file will be overwritten - if it already exists. + If a file path is provided, the JSON string is written to the file. If + the file already exists, it is overwritten. Args: genome_status_dict (dict[str, 'GenomeStatus']): A dictionary of genome - status objects to be saved to a JSON file. - file(str | PathLike): The path to the output JSON file. + status objects. The keys are the original genome IDs and the values + are GenomeStatus objects. + file(str | PathLike | None): The path to the output JSON file. + If None, the JSON string is returned but not written to a file. + + Returns: + str | None: The JSON string if `file` is None, otherwise None. """ - json_data = { - "genome_status": - [gs._to_dict() for gs in genome_status_dict.values()], - "version": "1.0" - } - with open(file, "w") as f: - json.dump(json_data, f) + gs_list = [gs._to_dict() for gs in genome_status_dict.values()] + json_data = {"genome_status": gs_list, "version": "1.0"} + if file is not None: + with open(file, "w") as f: + json.dump(json_data, f) + return None + return json.dumps(json_data) def _to_dict(self) -> dict: """Convert the GenomeStatus object to a dict.""" diff --git a/tests/pairedomics/test_podp_antismash_downloader.py b/tests/pairedomics/test_podp_antismash_downloader.py index 0f08e90c..ea110f6a 100644 --- a/tests/pairedomics/test_podp_antismash_downloader.py +++ b/tests/pairedomics/test_podp_antismash_downloader.py @@ -1,6 +1,5 @@ import json from pathlib import Path -import tempfile import pytest from nplinker.pairedomics import GENOME_STATUS_FILENAME from nplinker.pairedomics import GenomeStatus @@ -74,10 +73,12 @@ def test_genome_status_to_json(tmp_path): "genome1": GenomeStatus("genome1", "refseq1", True, "/path/to/bgc1"), "genome2": GenomeStatus("genome2", "", False, "") } - GenomeStatus.to_json(genome_status_dict, tmp_path / GENOME_STATUS_FILENAME) + result = GenomeStatus.to_json(genome_status_dict, + tmp_path / GENOME_STATUS_FILENAME) with open(tmp_path / GENOME_STATUS_FILENAME, "r") as f: loaded_data = json.load(f) + assert result is None assert loaded_data["version"] == "1.0" assert len(loaded_data["genome_status"]) == 2 assert loaded_data["genome_status"][0]["original_id"] == "genome1" @@ -90,6 +91,21 @@ def test_genome_status_to_json(tmp_path): assert loaded_data["genome_status"][1]["bgc_path"] == "" +def test_genome_status_to_json_nofile(): + genome_status_dict = { + "genome1": GenomeStatus("genome1", "refseq1", True, "/path/to/bgc1"), + "genome2": GenomeStatus("genome2", "", False, "") + } + result = GenomeStatus.to_json(genome_status_dict) + + assert isinstance(result, str) + assert result == '{"genome_status": ' \ + '[{"original_id": "genome1", "resolved_refseq_id": "refseq1", ' \ + '"resolve_attempted": true, "bgc_path": "/path/to/bgc1"}, ' \ + '{"original_id": "genome2", "resolved_refseq_id": "", ' \ + '"resolve_attempted": false, "bgc_path": ""}], "version": "1.0"}' + + # Test `podp_download_and_extract_antismash_data` function # with multiple records containing three types of genome IDs def test_multiple_records(download_root, extract_root, genome_status_file):