Skip to content

Commit b83e14d

Browse files
committed
Merge branch 'main' into joss
2 parents d7ffea7 + 6a2ff66 commit b83e14d

24 files changed

+2837
-128
lines changed

.github/workflows/continuous-integration.yml

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,11 @@ on:
88
branches:
99
- main
1010

11+
pull_request:
12+
branches:
13+
- main
14+
15+
1116
jobs:
1217
py:
1318
runs-on: ${{ matrix.os }}
@@ -26,14 +31,15 @@ jobs:
2631
- name: Install dependencies
2732
run: |
2833
python -m pip install --upgrade pip
29-
if [ -f requirements_dev.txt ]; then pip install -r requirements_dev.txt; fi
3034
pip install .
35+
pip install .[test]
3136
- name: Lint with flake8
3237
run: |
3338
make lint
3439
- name: Test with pytest
40+
working-directory: ${{ runner.temp }}
3541
run: |
36-
pytest
42+
pytest --pyargs MRdataset
3743
3844
py312:
3945
runs-on: ${{ matrix.os }}
@@ -61,5 +67,6 @@ jobs:
6167
run: |
6268
make lint
6369
- name: Test with pytest
70+
working-directory: ${{ runner.temp }}
6471
run: |
65-
pytest
72+
pytest --pyargs MRdataset

MRdataset/__init__.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
# __version__ = '0.1.0'
77
import logging
8+
import sys
89

910
from MRdataset.config import configure_logger
1011

@@ -16,10 +17,18 @@
1617
from MRdataset.dicom_utils import is_dicom_file
1718
from MRdataset.utils import valid_dirs
1819
from MRdataset.base import BaseDataset
20+
from MRdataset.bids import BidsDataset
1921
from MRdataset.dicom import DicomDataset
2022

2123
try:
2224
from MRdataset._version import __version__
2325
except ImportError:
24-
raise ImportError('It seems MRdataset is not installed correctly. Use pip '
25-
' to install it first.')
26+
if sys.version_info < (3, 8):
27+
from importlib_metadata import version
28+
else:
29+
from importlib.metadata import version
30+
31+
try:
32+
__version__ = version('MRdataset')
33+
except Exception:
34+
__version__ = "unknown"

MRdataset/bids.py

Lines changed: 158 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,158 @@
1-
# from MRdataset.base import BaseDataset
2-
# from abc import ABC
3-
#
4-
#
5-
# class BidsDataset(BaseDataset, ABC):
6-
# """Class to represent a BIDS dataset"""
7-
#
8-
# def __init__(self):
9-
# super().__init__()
10-
# pass
11-
#
12-
# def load(self):
13-
# pass
1+
from abc import ABC
2+
from pathlib import Path
3+
from re import search
4+
5+
from MRdataset import logger
6+
from MRdataset.base import BaseDataset
7+
from MRdataset.config import VALID_BIDS_DATATYPES, SUPPORTED_BIDS_DATATYPES
8+
from MRdataset.dicom_utils import is_bids_file
9+
from MRdataset.utils import folders_with_min_files, valid_dirs, read_json
10+
from protocol import BidsImagingSequence
11+
12+
13+
class BidsDataset(BaseDataset, ABC):
14+
"""
15+
Class to represent a BIDS dataset. It is a subclass of BaseDataset.
16+
It gathers data from JSON files.
17+
18+
Parameters
19+
----------
20+
data_source : str or List[str]
21+
The path to the dataset.
22+
pattern : str
23+
The pattern to match for JSON files.
24+
name : str
25+
The name of the dataset.
26+
config_path : str
27+
The path to the config file.
28+
verbose : bool
29+
Whether to print verbose output on console.
30+
ds_format : str
31+
The format of the dataset. One of ['dicom', 'bids'].
32+
"""
33+
34+
def __init__(self, data_source, pattern="*.json",
35+
name='BidsDataset',
36+
config_path=None,
37+
verbose=False,
38+
output_dir=None,
39+
min_count=1,
40+
**kwargs):
41+
42+
super().__init__(data_source=data_source, name=name, ds_format='bids')
43+
self.data_source = valid_dirs(data_source)
44+
self.pattern = pattern
45+
self.config_path = config_path
46+
self.verbose = verbose
47+
self.config_dict = None
48+
self.min_count = min_count
49+
50+
try:
51+
self.output_dir = Path(output_dir)
52+
except TypeError as exc:
53+
logger.error('Output directory not valid. Got: {output_dir}')
54+
raise exc
55+
56+
self.output_dir.mkdir(exist_ok=True, parents=True)
57+
58+
# read the config file
59+
try:
60+
self.config_dict = read_json(Path(self.config_path))
61+
except (FileNotFoundError, ValueError) as e:
62+
logger.error(f'Unable to read config file {self.config_path}')
63+
raise e
64+
65+
self.includes = self.config_dict.get('include_sequence', {})
66+
self.include_nifti_headers = self.includes.get('nifti_header', False)
67+
68+
def load(self):
69+
"""
70+
Default method to load the dataset. It iterates over all the folders
71+
in the data_source and finds subfolders with at least min_count files
72+
matching the pattern. It then processes each subfolder and adds the
73+
sequence to the dataset.
74+
"""
75+
76+
for directory in self.data_source:
77+
# find all sub-folders with at least min_count files matching the
78+
# pattern
79+
subfolders = folders_with_min_files(directory, self.pattern,
80+
self.min_count)
81+
for folder in subfolders:
82+
# process each folder
83+
sequences = self._process(folder)
84+
for seq in sequences:
85+
self.add(subject_id=seq.subject_id,
86+
session_id=seq.session_id,
87+
run_id=seq.run_id,
88+
seq_id=seq.name, seq=seq)
89+
90+
def _filter_json_files(self, folder):
91+
"""Filters the JSON files from the folder."""
92+
json_files = sorted(folder.glob(self.pattern))
93+
valid_bids_files = list(filter(is_bids_file, json_files))
94+
if not valid_bids_files:
95+
logger.info(f'No valid BIDS files found in {folder}')
96+
return []
97+
return valid_bids_files
98+
99+
def _process(self, folder):
100+
"""Processes the folder and returns a list of sequences."""
101+
json_files = self._filter_json_files(folder)
102+
sequences = []
103+
last_id = 0
104+
for i, file in enumerate(json_files):
105+
try:
106+
seq = BidsImagingSequence(bidsfile=file, path=folder)
107+
except (ValueError, IOError) as exc:
108+
logger.error(f'Error processing {file}. Skipping it. Got {exc}')
109+
continue
110+
111+
name = file.parent.name
112+
if name not in VALID_BIDS_DATATYPES:
113+
logger.error(f'Invalid datatype found: {name}. Skipping it')
114+
return sequences
115+
116+
subject_id = file.parents[2].name
117+
session_id = file.parents[1].name
118+
if 'sub' in session_id:
119+
logger.info(f"Sessions don't exist: {session_id}.")
120+
subject_id = session_id
121+
session_id = 'ses-01'
122+
123+
# None of the datasets we processed (over 20) had run information,
124+
# even though BIDS allows it. So we just use run-0x for all of them.
125+
run_id, last_id = self.get_run_id(file, last_id)
126+
seq.set_session_info(subject_id=subject_id,
127+
session_id=session_id,
128+
run_id=run_id,
129+
name=name)
130+
if seq.is_valid():
131+
sequences.append(seq)
132+
else:
133+
if name not in SUPPORTED_BIDS_DATATYPES:
134+
logger.error(f'MRdataset primarily supports '
135+
f'{SUPPORTED_BIDS_DATATYPES}'
136+
f'It seems the parameters in '
137+
f'this sequence are invalid or '
138+
f'not supported yet. Skipping it.')
139+
return sequences
140+
141+
@staticmethod
142+
def get_run_id(filename, last_id):
143+
"""
144+
Use regex to extract run id from filename.
145+
Example filename : sub-01_ses-imagery01_task-imagery_run-01_bold.json
146+
"""
147+
# Regular expression pattern
148+
pattern = r'run-[^_]+'
149+
# Extracting substring using regex
150+
match = search(pattern, str(filename))
151+
152+
if match:
153+
run_id = match.group(0)
154+
new_id_num = int(run_id.split('-')[-1])
155+
else:
156+
new_id_num = last_id + 1
157+
run_id = f'run-{str(new_id_num).zfill(2)}'
158+
return run_id, new_id_num

MRdataset/cli.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,14 @@ def parse_args():
5050
f'Using default config file - {str(args.config)}')
5151

5252
if args.output_dir:
53+
output_dir = Path(args.output_dir)
54+
if not output_dir.is_dir():
55+
try:
56+
output_dir.mkdir(parents=True, exist_ok=True)
57+
except OSError as exc:
58+
logger.error('Unable to create folder: {output_dir}')
59+
raise exc
60+
5361
if not is_writable(args.output_dir):
5462
raise OSError('Expected a writable directory for --output_dir '
5563
f'argument, Got {args.output_dir}')

MRdataset/common.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
from MRdataset import logger
66
from MRdataset.base import BaseDataset
7+
from MRdataset.bids import BidsDataset
78
from MRdataset.config import VALID_DATASET_FORMATS
89
from MRdataset.dicom import DicomDataset
910
from MRdataset.utils import random_name, check_mrds_extension
@@ -126,6 +127,8 @@ def find_dataset_using_ds_format(dataset_ds_format: str):
126127
# Import the module "{ds_format}_dataset.py"
127128
if dataset_ds_format == 'dicom':
128129
dataset_class = DicomDataset
130+
elif dataset_ds_format == 'bids':
131+
dataset_class = BidsDataset
129132
else:
130133
raise NotImplementedError(
131134
f'Dataset ds_format {dataset_ds_format} is not implemented. '

MRdataset/config.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
VALID_BIDS_EXTENSIONS = ['.json', '.nii', '.nii.gz']
1717

1818

19-
VALID_DATATYPES = [
19+
VALID_BIDS_DATATYPES = [
2020
'anat',
2121
'beh',
2222
'dwi',
@@ -30,8 +30,8 @@
3030
'pet'
3131
]
3232

33+
SUPPORTED_BIDS_DATATYPES = ['func', 'anat', 'dwi', 'fmap']
3334

34-
# Suppress duplicated warnings
3535

3636
class MRException(Exception):
3737
"""
@@ -59,7 +59,7 @@ def __init__(self):
5959
'--data_source. Got 0 DICOM/JSON files.')
6060

6161

62-
def configure_logger(log, output_dir, mode='w', level='WARNING'):
62+
def configure_logger(log, output_dir, mode='w', level='ERROR'):
6363
"""
6464
Initiate log files.
6565

MRdataset/dicom.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
from pathlib import Path
55
from typing import Tuple, List
66

7-
from protocol import ImagingSequence
7+
from protocol import DicomImagingSequence
88
from pydicom import dcmread
99
from pydicom.errors import InvalidDicomError
1010

@@ -58,17 +58,17 @@ def __init__(self,
5858
name='DicomDataset',
5959
config_path=None,
6060
verbose=False,
61-
ds_format='dicom',
6261
output_dir=None,
62+
min_count=1,
6363
**kwargs):
6464
"""constructor"""
6565

6666
super().__init__(data_source=data_source, name=name,
67-
ds_format=ds_format)
67+
ds_format='dicom')
6868
self.data_source = valid_dirs(data_source)
6969
self.pattern = pattern
7070
# TODO: Add option to change min_count passing it as an argument
71-
self.min_count = 1 # min slice count to be considered a volume
71+
self.min_count = min_count # min slice count to be considered a volume
7272
self.verbose = verbose
7373
self.config_path = config_path
7474
self.config_dict = None
@@ -215,7 +215,7 @@ def _process_slice_collection(self, folder):
215215
example, EchoTime and Echonumber for multi-echo sequences.
216216
217217
It then processes the divergent slices to find the varying parameters
218-
and updates the protocol.ImagingSequence object.
218+
and updates the protocol.DicomImagingSequence object.
219219
220220
Parameters
221221
----------
@@ -262,14 +262,14 @@ def _process_slice_collection(self, folder):
262262
# Note that we cannot use enumerate and idx ==0 here, because we
263263
# may have to skip some slices
264264
if len(divergent_slices) == 0:
265-
first_slice = ImagingSequence(dicom=dicom, path=folder)
265+
first_slice = DicomImagingSequence(dicom=dicom, path=folder)
266266
# We collect the first slice as a reference to compare
267267
# other slices with, although it is not divergent in
268268
# its true sense
269269
divergent_slices.append(first_slice)
270270

271271
else:
272-
cur_slice = ImagingSequence(dicom=dicom, path=folder)
272+
cur_slice = DicomImagingSequence(dicom=dicom, path=folder)
273273

274274
# check if the session info is same
275275
# Session info includes subject_id, session_id, run_id
@@ -329,7 +329,7 @@ def _process_echo_times(self, divergent_slices: List) -> Tuple:
329329
Parameters
330330
----------
331331
divergent_slices : list
332-
ImagingSequence objects with divergent parameters
332+
DicomImagingSequence objects with divergent parameters
333333
334334
Returns
335335
-------

0 commit comments

Comments
 (0)