Skip to content

Commit 01cd307

Browse files
fedorovvkt1414
authored andcommitted
enh: simplify by using class variable in place of string
Use class variable for defining the default download hierarchy template. This simplifies updating the default template, improves readability and will allow for specifying alternative preset templates more easily.
1 parent 96b7bd6 commit 01cd307

File tree

1 file changed

+15
-13
lines changed

1 file changed

+15
-13
lines changed

idc_index/index.py

Lines changed: 15 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44

55
import logging
66
import os
7-
import re
87
import shutil
98
import subprocess
109
import tempfile
@@ -27,6 +26,10 @@
2726

2827

2928
class IDCClient:
29+
DOWNLOAD_HIERARCHY_DEFAULT = (
30+
"%collection_id/%PatientID/%StudyInstanceUID/%Modality_%SeriesInstanceUID"
31+
)
32+
3033
def __init__(self):
3134
file_path = idc_index_data.IDC_INDEX_PARQUET_FILEPATH
3235

@@ -487,7 +490,7 @@ def _validate_update_manifest_and_get_download_size(
487490
validate_manifest (bool, optional): If True, validates the manifest for any errors. Defaults to True.
488491
show_progress_bar (bool, optional): If True, tracks the progress of download
489492
use_s5cmd_sync (bool, optional): If True, will use s5cmd sync operation instead of cp when downloadDirectory is not empty; this can significantly improve the download speed if the content is partially downloaded
490-
dirTemplate (str): A template string for the directory path. Must start with %. Defaults to %collection_id/%PatientID/%Modality/%StudyInstanceUID/%SeriesInstanceUID. It can contain attributes (PatientID, collection_id, Modality, StudyInstanceUID, SeriesInstanceUID) wrapped in '%'. Special characters can be used as connectors: '-' (hyphen), '/' (slash for subdirectories), '_' (underscore). Can be disabled by None.
493+
dirTemplate (str): A template string for the directory path. Must start with %. Defaults to index.DOWNLOAD_HIERARCHY_DEFAULT. It can contain attributes (PatientID, collection_id, Modality, StudyInstanceUID, SeriesInstanceUID) wrapped in '%'. Special characters can be used as connectors: '-' (hyphen), '/' (slash for subdirectories), '_' (underscore). Can be disabled by None.
491494
492495
Returns:
493496
total_size (float): The total size of all series in the manifest file.
@@ -709,7 +712,7 @@ def _generate_sql_concat_for_building_directory(dirTemplate, downloadDir):
709712
# CONCAT command may contain empty strings, and they are not harmless -
710713
# duckdb does not like them!
711714
# NB: double-quotes are not allowed by duckdb!
712-
concat_command = "CONCAT('" + concat_command + "')"
715+
concat_command = f"CONCAT('{downloadDir}/','" + concat_command + "')"
713716
concat_command = concat_command.replace(",''", "")
714717
concat_command = concat_command.replace("'',", "")
715718
concat_command = concat_command.replace(",'',", "")
@@ -791,7 +794,7 @@ def _parse_s5cmd_sync_output_and_generate_synced_manifest(
791794
Args:
792795
output (str): The output of s5cmd sync --dry-run command.
793796
downloadDir (str): The directory to download the files to.
794-
dirTemplate (str): A template string for the directory path. Defaults to %collection_id/%PatientID/%Modality/%StudyInstanceUID/%SeriesInstanceUID. It can contain attributes (PatientID, collection_id, Modality, StudyInstanceUID, SeriesInstanceUID) wrapped in '%'. Special characters can be used as connectors: '-' (hyphen), '/' (slash for subdirectories), '_' (underscore). Can be disabled by None.
797+
dirTemplate (str): A template string for the directory path.
795798
796799
Returns:
797800
Path: The path to the generated synced manifest file.
@@ -904,7 +907,7 @@ def _s5cmd_run(
904907
quiet (bool, optional): If True, suppresses the stdout and stderr of the s5cmd command.
905908
show_progress_bar (bool, optional): If True, tracks the progress of download
906909
use_s5cmd_sync (bool, optional): If True, will use s5cmd sync operation instead of cp when downloadDirectory is not empty; this can significantly improve the download speed if the content is partially downloaded
907-
dirTemplate (str): A template string for the directory path. Defaults to %collection_id/%PatientID/%Modality/%StudyInstanceUID/%SeriesInstanceUID. It can contain attributes (PatientID, collection_id, Modality, StudyInstanceUID, SeriesInstanceUID) wrapped in '%'. Special characters can be used as connectors: '-' (hyphen), '/' (slash for subdirectories), '_' (underscore). Can be disabled by None.
910+
dirTemplate (str): A template string for the directory path.
908911
909912
Raises:
910913
subprocess.CalledProcessError: If the s5cmd command fails.
@@ -1079,7 +1082,7 @@ def download_from_manifest(
10791082
validate_manifest: bool = True,
10801083
show_progress_bar: bool = True,
10811084
use_s5cmd_sync: bool = False,
1082-
dirTemplate="%collection_id/%PatientID/%Modality/%StudyInstanceUID/%SeriesInstanceUID",
1085+
dirTemplate=DOWNLOAD_HIERARCHY_DEFAULT,
10831086
) -> None:
10841087
"""
10851088
Download the manifest file. In a series of steps, the manifest file
@@ -1094,7 +1097,7 @@ def download_from_manifest(
10941097
validate_manifest (bool, optional): If True, validates the manifest for any errors. Defaults to True.
10951098
show_progress_bar (bool, optional): If True, tracks the progress of download
10961099
use_s5cmd_sync (bool, optional): If True, will use s5cmd sync operation instead of cp when downloadDirectory is not empty; this can significantly improve the download speed if the content is partially downloaded
1097-
dirTemplate (str): A template string for the directory path. Defaults to %collection_id/%PatientID/%Modality/%StudyInstanceUID/%SeriesInstanceUID. It can contain attributes (PatientID, collection_id, Modality, StudyInstanceUID, SeriesInstanceUID) wrapped in '%'. Special characters can be used as connectors: '-' (hyphen), '/' (slash for subdirectories), '_' (underscore). Can be disabled by None.
1100+
dirTemplate (str): A template string for the directory path. Defaults to index.DOWNLOAD_HIERARCHY_DEFAULT set to %collection_id/%PatientID/%StudyInstanceUID/%Modality_%SeriesInstanceUID. It can contain attributes (PatientID, collection_id, Modality, StudyInstanceUID, SeriesInstanceUID) prefixed by '%'. The following special characters can be used as connectors: '-' (hyphen), '/' (slash for subdirectories), '_' (underscore). When set to None all files will be downloaded to the download directory with no subdirectories.
10981101
10991102
Raises:
11001103
ValueError: If the download directory does not exist.
@@ -1144,8 +1147,7 @@ def download_from_selection(
11441147
quiet=True,
11451148
show_progress_bar=True,
11461149
use_s5cmd_sync=False,
1147-
# TODO: replace with class variables, have more than one preset
1148-
dirTemplate="%collection_id/%PatientID/%Modality/%StudyInstanceUID/%SeriesInstanceUID",
1150+
dirTemplate=DOWNLOAD_HIERARCHY_DEFAULT,
11491151
):
11501152
"""Download the files corresponding to the selection. The filtering will be applied in sequence (but does it matter?) by first selecting the collection(s), followed by
11511153
patient(s), study(studies) and series. If no filtering is applied, all the files will be downloaded.
@@ -1290,7 +1292,7 @@ def download_dicom_series(
12901292
quiet=True,
12911293
show_progress_bar=True,
12921294
use_s5cmd_sync=False,
1293-
dirTemplate="%collection_id/%PatientID/%Modality/%StudyInstanceUID/%SeriesInstanceUID",
1295+
dirTemplate=DOWNLOAD_HIERARCHY_DEFAULT,
12941296
) -> None:
12951297
"""
12961298
Download the files corresponding to the seriesInstanceUID to the specified directory.
@@ -1328,7 +1330,7 @@ def download_dicom_studies(
13281330
quiet=True,
13291331
show_progress_bar=True,
13301332
use_s5cmd_sync=False,
1331-
dirTemplate="%collection_id/%PatientID/%Modality/%StudyInstanceUID/%SeriesInstanceUID",
1333+
dirTemplate=DOWNLOAD_HIERARCHY_DEFAULT,
13321334
) -> None:
13331335
"""
13341336
Download the files corresponding to the studyInstanceUID to the specified directory.
@@ -1366,7 +1368,7 @@ def download_dicom_patients(
13661368
quiet=True,
13671369
show_progress_bar=True,
13681370
use_s5cmd_sync=False,
1369-
dirTemplate="%collection_id/%PatientID/%Modality/%StudyInstanceUID/%SeriesInstanceUID",
1371+
dirTemplate=DOWNLOAD_HIERARCHY_DEFAULT,
13701372
) -> None:
13711373
"""
13721374
Download the files corresponding to the studyInstanceUID to the specified directory.
@@ -1404,7 +1406,7 @@ def download_collection(
14041406
quiet=True,
14051407
show_progress_bar=True,
14061408
use_s5cmd_sync=False,
1407-
dirTemplate="%collection_id/%PatientID/%Modality/%StudyInstanceUID/%SeriesInstanceUID",
1409+
dirTemplate=DOWNLOAD_HIERARCHY_DEFAULT,
14081410
) -> None:
14091411
"""
14101412
Download the files corresponding to the studyInstanceUID to the specified directory.

0 commit comments

Comments
 (0)