From 779465dde275be4cfe513446980a7cf7d551c416 Mon Sep 17 00:00:00 2001
From: Cunliang Geng <c.geng@esciencecenter.nl>
Date: Wed, 5 Jul 2023 14:37:47 +0200
Subject: [PATCH] enable `to_json` return string when output file is None

---
 .../pairedomics/podp_antismash_downloader.py  | 32 +++++++++++--------
 .../test_podp_antismash_downloader.py         | 20 ++++++++++--
 2 files changed, 36 insertions(+), 16 deletions(-)

diff --git a/src/nplinker/pairedomics/podp_antismash_downloader.py b/src/nplinker/pairedomics/podp_antismash_downloader.py
index 94944373..cf096fb4 100644
--- a/src/nplinker/pairedomics/podp_antismash_downloader.py
+++ b/src/nplinker/pairedomics/podp_antismash_downloader.py
@@ -72,25 +72,29 @@ def read_json(file: str | PathLike) -> dict[str, 'GenomeStatus']:
 
     @staticmethod
     def to_json(genome_status_dict: dict[str, 'GenomeStatus'],
-                file: str | PathLike) -> None:
-        """Save the given genome status dictionary to a JSON file.
+                file: str | PathLike | None = None) -> str | None:
+        """Convert the genome status dictionary to a JSON string.
 
-        The JSON file will be saved to the given output directory with the name
-        defined in variable `GENOME_STATUS_FILENAME`. The file will be overwritten
-        if it already exists.
+        If a file path is provided, the JSON string is written to the file. If
+        the file already exists, it is overwritten.
 
         Args:
             genome_status_dict (dict[str, 'GenomeStatus']): A dictionary of genome
-                status objects to be saved to a JSON file.
-            file(str | PathLike): The path to the output JSON file.
+                status objects. The keys are the original genome IDs and the values
+                are GenomeStatus objects.
+            file(str | PathLike | None): The path to the output JSON file.
+                If None, the JSON string is returned but not written to a file.
+
+        Returns:
+            str | None: The JSON string if `file` is None, otherwise None.
         """
-        json_data = {
-            "genome_status":
-            [gs._to_dict() for gs in genome_status_dict.values()],
-            "version": "1.0"
-        }
-        with open(file, "w") as f:
-            json.dump(json_data, f)
+        gs_list = [gs._to_dict() for gs in genome_status_dict.values()]
+        json_data = {"genome_status": gs_list, "version": "1.0"}
+        if file is not None:
+            with open(file, "w") as f:
+                json.dump(json_data, f)
+            return None
+        return json.dumps(json_data)
 
     def _to_dict(self) -> dict:
         """Convert the GenomeStatus object to a dict."""
diff --git a/tests/pairedomics/test_podp_antismash_downloader.py b/tests/pairedomics/test_podp_antismash_downloader.py
index 0f08e90c..ea110f6a 100644
--- a/tests/pairedomics/test_podp_antismash_downloader.py
+++ b/tests/pairedomics/test_podp_antismash_downloader.py
@@ -1,6 +1,5 @@
 import json
 from pathlib import Path
-import tempfile
 import pytest
 from nplinker.pairedomics import GENOME_STATUS_FILENAME
 from nplinker.pairedomics import GenomeStatus
@@ -74,10 +73,12 @@ def test_genome_status_to_json(tmp_path):
         "genome1": GenomeStatus("genome1", "refseq1", True, "/path/to/bgc1"),
         "genome2": GenomeStatus("genome2", "", False, "")
     }
-    GenomeStatus.to_json(genome_status_dict, tmp_path / GENOME_STATUS_FILENAME)
+    result = GenomeStatus.to_json(genome_status_dict,
+                                  tmp_path / GENOME_STATUS_FILENAME)
     with open(tmp_path / GENOME_STATUS_FILENAME, "r") as f:
         loaded_data = json.load(f)
 
+    assert result is None
     assert loaded_data["version"] == "1.0"
     assert len(loaded_data["genome_status"]) == 2
     assert loaded_data["genome_status"][0]["original_id"] == "genome1"
@@ -90,6 +91,21 @@ def test_genome_status_to_json(tmp_path):
     assert loaded_data["genome_status"][1]["bgc_path"] == ""
 
 
+def test_genome_status_to_json_nofile():
+    genome_status_dict = {
+        "genome1": GenomeStatus("genome1", "refseq1", True, "/path/to/bgc1"),
+        "genome2": GenomeStatus("genome2", "", False, "")
+    }
+    result = GenomeStatus.to_json(genome_status_dict)
+
+    assert isinstance(result, str)
+    assert result == '{"genome_status": ' \
+        '[{"original_id": "genome1", "resolved_refseq_id": "refseq1", ' \
+        '"resolve_attempted": true, "bgc_path": "/path/to/bgc1"}, ' \
+        '{"original_id": "genome2", "resolved_refseq_id": "", ' \
+        '"resolve_attempted": false, "bgc_path": ""}], "version": "1.0"}'
+
+
 # Test `podp_download_and_extract_antismash_data` function
 # with multiple records containing three types of genome IDs
 def test_multiple_records(download_root, extract_root, genome_status_file):