461 add support to load decathalon datalist (Project-MONAI#465)

* [DLMED] add load_decathalon_datalist * [DLMED] add unit tests * [DLMED] fix windows CI issue * [MONAI] python code formatting * [DLMED] update according to the comments * [MONAI] python code formatting * [DLMED] update doc-string * [DLMED] update unit tests * [DLMED] add code-block to doc-string Co-authored-by: monai-bot <monai.miccai2019@gmail.com>
mbrzus · Jun 1, 2020 · da80041 · da80041
1 parent 718d11a
commit da80041
Showing 4 changed files with 184 additions and 0 deletions.
diff --git a/docs/source/data.rst b/docs/source/data.rst
@@ -87,3 +87,7 @@ Utilities
 .. automodule:: monai.data.utils
   :members:
 
+
+Decathalon DataLoader
+~~~~~~~~~~~~~~~~~~~~~
+.. autofunction:: monai.data.load_decathalon_datalist
diff --git a/monai/data/__init__.py b/monai/data/__init__.py
@@ -19,3 +19,4 @@
 from .utils import *
 from .png_saver import PNGSaver
 from .png_writer import write_png
+from .decathalon_dataloader import load_decathalon_datalist
diff --git a/monai/data/decathalon_dataloader.py b/monai/data/decathalon_dataloader.py
@@ -0,0 +1,75 @@
+# Copyright 2020 MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import json
+
+
+def _compute_path(base_dir, element):
+    if isinstance(element, str):
+        return os.path.normpath(os.path.join(base_dir, element))
+    elif isinstance(element, list):
+        for e in element:
+            if not isinstance(e, str):
+                raise ValueError("file path must be a string.")
+        return [os.path.normpath(os.path.join(base_dir, e)) for e in element]
+    else:
+        raise ValueError("file path must be a string or a list of string.")
+
+
+def _append_paths(base_dir, is_segmentation, items):
+    for item in items:
+        if not isinstance(item, dict):
+            raise ValueError("data item must be dict.")
+        for k, v in item.items():
+            if k == "image":
+                item[k] = _compute_path(base_dir, v)
+            elif is_segmentation and k == "label":
+                item[k] = _compute_path(base_dir, v)
+    return items
+
+
+def load_decathalon_datalist(data_list_file_path, is_segmentation=True, data_list_key="training", base_dir=None):
+    """Load image/label paths of decathalon challenge from JSON file
+
+    Json file is similar to what you get from http://medicaldecathlon.com/
+    Those dataset.json files
+
+    Args:
+        data_list_file_path (str): the path to the json file of datalist
+        is_segmentation (bool): whether the datalist is for segmentation task, default is True
+        data_list_key (str): the key to get a list of dictionary to be used, default is "training"
+        base_dir (str): the base directory of the dataset, if None, use the datalist directory
+
+    Returns a list of data items, each of which is a dict keyed by element names, for example:
+
+    .. code-block::
+
+        [
+            {'image': '/workspace/data/chest_19.nii.gz',  'label': 0}, 
+            {'image': '/workspace/data/chest_31.nii.gz',  'label': 1}
+        ]
+
+    """
+    if not os.path.isfile(data_list_file_path):
+        raise ValueError(f"data list file {data_list_file_path} does not exist.")
+    with open(data_list_file_path) as json_file:
+        json_data = json.load(json_file)
+    if data_list_key not in json_data:
+        raise ValueError(f"data list {data_list_key} not specified in '{data_list_file_path}'.")
+    expected_data = json_data[data_list_key]
+    if data_list_key == "test":
+        expected_data = [{"image": i} for i in expected_data]
+
+    if base_dir is None:
+        base_dir = os.path.dirname(data_list_file_path)
+
+    return _append_paths(base_dir, is_segmentation, expected_data)
diff --git a/tests/test_load_decathalon_datalist.py b/tests/test_load_decathalon_datalist.py
@@ -0,0 +1,104 @@
+# Copyright 2020 MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+import os
+import json
+import shutil
+import tempfile
+from monai.data import load_decathalon_datalist
+
+
+class TestLoadDecathalonDatalist(unittest.TestCase):
+    def test_seg_values(self):
+        tempdir = tempfile.mkdtemp()
+        test_data = {
+            "name": "Spleen",
+            "description": "Spleen Segmentation",
+            "labels": {"0": "background", "1": "spleen"},
+            "training": [
+                {"image": "spleen_19.nii.gz", "label": "spleen_19.nii.gz"},
+                {"image": "spleen_31.nii.gz", "label": "spleen_31.nii.gz"},
+            ],
+            "test": ["spleen_15.nii.gz", "spleen_23.nii.gz"],
+        }
+        json_str = json.dumps(test_data)
+        file_path = os.path.join(tempdir, "test_data.json")
+        with open(file_path, "w") as json_file:
+            json_file.write(json_str)
+        result = load_decathalon_datalist(file_path, True, "training", tempdir)
+        self.assertEqual(result[0]["image"], os.path.join(tempdir, "spleen_19.nii.gz"))
+        self.assertEqual(result[0]["label"], os.path.join(tempdir, "spleen_19.nii.gz"))
+        shutil.rmtree(tempdir)
+
+    def test_cls_values(self):
+        tempdir = tempfile.mkdtemp()
+        test_data = {
+            "name": "ChestXRay",
+            "description": "Chest X-ray classification",
+            "labels": {"0": "background", "1": "chest"},
+            "training": [{"image": "chest_19.nii.gz", "label": 0}, {"image": "chest_31.nii.gz", "label": 1}],
+            "test": ["chest_15.nii.gz", "chest_23.nii.gz"],
+        }
+        json_str = json.dumps(test_data)
+        file_path = os.path.join(tempdir, "test_data.json")
+        with open(file_path, "w") as json_file:
+            json_file.write(json_str)
+        result = load_decathalon_datalist(file_path, False, "training", tempdir)
+        self.assertEqual(result[0]["image"], os.path.join(tempdir, "chest_19.nii.gz"))
+        self.assertEqual(result[0]["label"], 0)
+        shutil.rmtree(tempdir)
+
+    def test_seg_no_basedir(self):
+        tempdir = tempfile.mkdtemp()
+        test_data = {
+            "name": "Spleen",
+            "description": "Spleen Segmentation",
+            "labels": {"0": "background", "1": "spleen"},
+            "training": [
+                {
+                    "image": os.path.join(tempdir, "spleen_19.nii.gz"),
+                    "label": os.path.join(tempdir, "spleen_19.nii.gz"),
+                },
+                {
+                    "image": os.path.join(tempdir, "spleen_31.nii.gz"),
+                    "label": os.path.join(tempdir, "spleen_31.nii.gz"),
+                },
+            ],
+            "test": [os.path.join(tempdir, "spleen_15.nii.gz"), os.path.join(tempdir, "spleen_23.nii.gz")],
+        }
+        json_str = json.dumps(test_data)
+        file_path = os.path.join(tempdir, "test_data.json")
+        with open(file_path, "w") as json_file:
+            json_file.write(json_str)
+        result = load_decathalon_datalist(file_path, True, "training", None)
+        self.assertEqual(result[0]["image"], os.path.join(tempdir, "spleen_19.nii.gz"))
+        self.assertEqual(result[0]["label"], os.path.join(tempdir, "spleen_19.nii.gz"))
+
+    def test_seg_no_labels(self):
+        tempdir = tempfile.mkdtemp()
+        test_data = {
+            "name": "Spleen",
+            "description": "Spleen Segmentation",
+            "labels": {"0": "background", "1": "spleen"},
+            "test": ["spleen_15.nii.gz", "spleen_23.nii.gz"],
+        }
+        json_str = json.dumps(test_data)
+        file_path = os.path.join(tempdir, "test_data.json")
+        with open(file_path, "w") as json_file:
+            json_file.write(json_str)
+        result = load_decathalon_datalist(file_path, True, "test", tempdir)
+        self.assertEqual(result[0]["image"], os.path.join(tempdir, "spleen_15.nii.gz"))
+        shutil.rmtree(tempdir)
+
+
+if __name__ == "__main__":
+    unittest.main()