Skip to content

Commit

Permalink
461 add support to load decathalon datalist (Project-MONAI#465)
Browse files Browse the repository at this point in the history
* [DLMED] add load_decathalon_datalist

* [DLMED] add unit tests

* [DLMED] fix windows CI issue

* [MONAI] python code formatting

* [DLMED] update according to the comments

* [MONAI] python code formatting

* [DLMED] update doc-string

* [DLMED] update unit tests

* [DLMED] add code-block to doc-string

Co-authored-by: monai-bot <monai.miccai2019@gmail.com>
Nic-Ma and monai-bot authored Jun 1, 2020

Verified

This commit was created on GitHub.com and signed with GitHub’s verified signature.
1 parent 718d11a commit da80041
Showing 4 changed files with 184 additions and 0 deletions.
4 changes: 4 additions & 0 deletions docs/source/data.rst
Original file line number Diff line number Diff line change
@@ -87,3 +87,7 @@ Utilities
.. automodule:: monai.data.utils
:members:


Decathalon DataLoader
~~~~~~~~~~~~~~~~~~~~~
.. autofunction:: monai.data.load_decathalon_datalist
1 change: 1 addition & 0 deletions monai/data/__init__.py
Original file line number Diff line number Diff line change
@@ -19,3 +19,4 @@
from .utils import *
from .png_saver import PNGSaver
from .png_writer import write_png
from .decathalon_dataloader import load_decathalon_datalist
75 changes: 75 additions & 0 deletions monai/data/decathalon_dataloader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
# Copyright 2020 MONAI Consortium
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os
import json


def _compute_path(base_dir, element):
if isinstance(element, str):
return os.path.normpath(os.path.join(base_dir, element))
elif isinstance(element, list):
for e in element:
if not isinstance(e, str):
raise ValueError("file path must be a string.")
return [os.path.normpath(os.path.join(base_dir, e)) for e in element]
else:
raise ValueError("file path must be a string or a list of string.")


def _append_paths(base_dir, is_segmentation, items):
for item in items:
if not isinstance(item, dict):
raise ValueError("data item must be dict.")
for k, v in item.items():
if k == "image":
item[k] = _compute_path(base_dir, v)
elif is_segmentation and k == "label":
item[k] = _compute_path(base_dir, v)
return items


def load_decathalon_datalist(data_list_file_path, is_segmentation=True, data_list_key="training", base_dir=None):
"""Load image/label paths of decathalon challenge from JSON file
Json file is similar to what you get from http://medicaldecathlon.com/
Those dataset.json files
Args:
data_list_file_path (str): the path to the json file of datalist
is_segmentation (bool): whether the datalist is for segmentation task, default is True
data_list_key (str): the key to get a list of dictionary to be used, default is "training"
base_dir (str): the base directory of the dataset, if None, use the datalist directory
Returns a list of data items, each of which is a dict keyed by element names, for example:
.. code-block::
[
{'image': '/workspace/data/chest_19.nii.gz', 'label': 0},
{'image': '/workspace/data/chest_31.nii.gz', 'label': 1}
]
"""
if not os.path.isfile(data_list_file_path):
raise ValueError(f"data list file {data_list_file_path} does not exist.")
with open(data_list_file_path) as json_file:
json_data = json.load(json_file)
if data_list_key not in json_data:
raise ValueError(f"data list {data_list_key} not specified in '{data_list_file_path}'.")
expected_data = json_data[data_list_key]
if data_list_key == "test":
expected_data = [{"image": i} for i in expected_data]

if base_dir is None:
base_dir = os.path.dirname(data_list_file_path)

return _append_paths(base_dir, is_segmentation, expected_data)
104 changes: 104 additions & 0 deletions tests/test_load_decathalon_datalist.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
# Copyright 2020 MONAI Consortium
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import unittest
import os
import json
import shutil
import tempfile
from monai.data import load_decathalon_datalist


class TestLoadDecathalonDatalist(unittest.TestCase):
def test_seg_values(self):
tempdir = tempfile.mkdtemp()
test_data = {
"name": "Spleen",
"description": "Spleen Segmentation",
"labels": {"0": "background", "1": "spleen"},
"training": [
{"image": "spleen_19.nii.gz", "label": "spleen_19.nii.gz"},
{"image": "spleen_31.nii.gz", "label": "spleen_31.nii.gz"},
],
"test": ["spleen_15.nii.gz", "spleen_23.nii.gz"],
}
json_str = json.dumps(test_data)
file_path = os.path.join(tempdir, "test_data.json")
with open(file_path, "w") as json_file:
json_file.write(json_str)
result = load_decathalon_datalist(file_path, True, "training", tempdir)
self.assertEqual(result[0]["image"], os.path.join(tempdir, "spleen_19.nii.gz"))
self.assertEqual(result[0]["label"], os.path.join(tempdir, "spleen_19.nii.gz"))
shutil.rmtree(tempdir)

def test_cls_values(self):
tempdir = tempfile.mkdtemp()
test_data = {
"name": "ChestXRay",
"description": "Chest X-ray classification",
"labels": {"0": "background", "1": "chest"},
"training": [{"image": "chest_19.nii.gz", "label": 0}, {"image": "chest_31.nii.gz", "label": 1}],
"test": ["chest_15.nii.gz", "chest_23.nii.gz"],
}
json_str = json.dumps(test_data)
file_path = os.path.join(tempdir, "test_data.json")
with open(file_path, "w") as json_file:
json_file.write(json_str)
result = load_decathalon_datalist(file_path, False, "training", tempdir)
self.assertEqual(result[0]["image"], os.path.join(tempdir, "chest_19.nii.gz"))
self.assertEqual(result[0]["label"], 0)
shutil.rmtree(tempdir)

def test_seg_no_basedir(self):
tempdir = tempfile.mkdtemp()
test_data = {
"name": "Spleen",
"description": "Spleen Segmentation",
"labels": {"0": "background", "1": "spleen"},
"training": [
{
"image": os.path.join(tempdir, "spleen_19.nii.gz"),
"label": os.path.join(tempdir, "spleen_19.nii.gz"),
},
{
"image": os.path.join(tempdir, "spleen_31.nii.gz"),
"label": os.path.join(tempdir, "spleen_31.nii.gz"),
},
],
"test": [os.path.join(tempdir, "spleen_15.nii.gz"), os.path.join(tempdir, "spleen_23.nii.gz")],
}
json_str = json.dumps(test_data)
file_path = os.path.join(tempdir, "test_data.json")
with open(file_path, "w") as json_file:
json_file.write(json_str)
result = load_decathalon_datalist(file_path, True, "training", None)
self.assertEqual(result[0]["image"], os.path.join(tempdir, "spleen_19.nii.gz"))
self.assertEqual(result[0]["label"], os.path.join(tempdir, "spleen_19.nii.gz"))

def test_seg_no_labels(self):
tempdir = tempfile.mkdtemp()
test_data = {
"name": "Spleen",
"description": "Spleen Segmentation",
"labels": {"0": "background", "1": "spleen"},
"test": ["spleen_15.nii.gz", "spleen_23.nii.gz"],
}
json_str = json.dumps(test_data)
file_path = os.path.join(tempdir, "test_data.json")
with open(file_path, "w") as json_file:
json_file.write(json_str)
result = load_decathalon_datalist(file_path, True, "test", tempdir)
self.assertEqual(result[0]["image"], os.path.join(tempdir, "spleen_15.nii.gz"))
shutil.rmtree(tempdir)


if __name__ == "__main__":
unittest.main()

0 comments on commit da80041

Please sign in to comment.