From b16d887975460d4a77701e15d393b3e97ffc4815 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Mon, 22 Jul 2024 22:47:50 +1000 Subject: [PATCH] added spaces-to-underscores option for associated file globs --- xnat_ingest/cli/stage.py | 11 ++++++++- xnat_ingest/session.py | 13 +++++++++-- xnat_ingest/tests/test_session.py | 37 ++++++++++++++++++++++++++----- xnat_ingest/utils.py | 5 +++++ 4 files changed, 57 insertions(+), 9 deletions(-) diff --git a/xnat_ingest/cli/stage.py b/xnat_ingest/cli/stage.py index 67f4c9f..c6ac8ae 100644 --- a/xnat_ingest/cli/stage.py +++ b/xnat_ingest/cli/stage.py @@ -84,7 +84,7 @@ 'The "id-pattern" arg is a regular expression that is used to extract the scan ID & ' "type/resource from the associated filename. Should be a regular-expression " "(Python syntax) with named groups called 'id' and 'type', e.g. " - r"--assoc-id-pattern '[^\.]+\.[^\.]+\.(?P\d+)\.(?P\w+)\..*'" + r"'[^\.]+\.[^\.]+\.(?P\d+)\.(?P\w+)\..*'" ), ) @click.option( @@ -173,6 +173,13 @@ help="The XNAT server to upload to plus the user and password to use", envvar="XNAT_INGEST_TRANSFER_XNAT_LOGIN", ) +@click.option( + "--spaces-to-underscores/--no-spaces-to-underscores", + default=False, + help="Whether to replace spaces with underscores in the filenames of associated files", + envvar="XNAT_INGEST_STAGE_SPACES_TO_UNDERSCORES", + type=bool, +) def stage( dicoms_path: str, staging_dir: Path, @@ -190,6 +197,7 @@ def stage( raise_errors: bool, deidentify: bool, xnat_login: XnatLogin, + spaces_to_underscores: bool, ): set_logger_handling( log_level=log_level, @@ -247,6 +255,7 @@ def stage( remove_original=delete, deidentify=deidentify, project_list=project_list, + spaces_to_underscores=spaces_to_underscores, ) except Exception as e: if not raise_errors: diff --git a/xnat_ingest/session.py b/xnat_ingest/session.py index 9fbffca..df8d4e9 100644 --- a/xnat_ingest/session.py +++ b/xnat_ingest/session.py @@ -507,6 +507,7 @@ def stage( remove_original: bool = False, deidentify: bool = True, project_list: ty.Optional[ty.List[str]] = None, + spaces_to_underscores: bool = False, ) -> "ImagingSession": r"""Stages and deidentifies files by removing the fields listed `FIELDS_TO_ANONYMISE` and replacing birth date with 01/01/ and returning new imaging session @@ -540,6 +541,9 @@ def stage( project_list : list[str], optional list of available projects in the store, used to check whether the project ID is valid + spaces_to_underscores : bool, optional + when building associated file globs, convert spaces underscores in fields + extracted from source file metadata, false by default Returns ------- @@ -599,10 +603,14 @@ def stage( # with current session associated_fspaths: ty.Set[Path] = set() for dicom_dir in self.dicom_dirs: - assoc_glob = dicom_dir / associated_files.glob.format(**self.metadata) + assoc_glob = str( + dicom_dir / associated_files.glob.format(**self.metadata) + ) + if spaces_to_underscores: + assoc_glob = assoc_glob.replace(" ", "_") # Select files using the constructed glob pattern associated_fspaths.update( - Path(p) for p in glob(str(assoc_glob), recursive=True) + Path(p) for p in glob(assoc_glob, recursive=True) ) logger.info( @@ -630,6 +638,7 @@ def stage( assoc_glob_pattern, self.metadata, staged_metadata, + spaces_to_underscores=spaces_to_underscores, ) staged_associated_fspaths = [] diff --git a/xnat_ingest/tests/test_session.py b/xnat_ingest/tests/test_session.py index bbfddd8..578620a 100644 --- a/xnat_ingest/tests/test_session.py +++ b/xnat_ingest/tests/test_session.py @@ -10,17 +10,22 @@ ) from arcana.core.data.set import Dataset from arcana.common import DirTree +from medimages4tests.dummy.dicom.base import default_dicom_dir from medimages4tests.dummy.dicom.pet.wholebody.siemens.biograph_vision.vr20b import ( get_image as get_pet_image, + __file__ as pet_src_file, ) from medimages4tests.dummy.dicom.ct.ac.siemens.biograph_vision.vr20b import ( get_image as get_ac_image, + __file__ as ac_src_file, ) from medimages4tests.dummy.dicom.pet.topogram.siemens.biograph_vision.vr20b import ( get_image as get_topogram_image, + __file__ as topogram_src_file, ) from medimages4tests.dummy.dicom.pet.statistics.siemens.biograph_vision.vr20b import ( get_image as get_statistics_image, + __file__ as statistics_src_file, ) from medimages4tests.dummy.raw.pet.siemens.biograph_vision.vr20b import ( get_files as get_raw_data_files, @@ -29,20 +34,37 @@ from xnat_ingest.utils import AssociatedFiles -FIRST_NAME = "GivenName" +FIRST_NAME = "Given Name" LAST_NAME = "FamilyName" @pytest.fixture def imaging_session() -> ImagingSession: PatientName = f"{FIRST_NAME}^{LAST_NAME}" + default_dicom_dir dicoms = [ DicomSeries(d.iterdir()) for d in ( - get_pet_image(PatientName=PatientName), - get_ac_image(PatientName=PatientName), - get_topogram_image(PatientName=PatientName), - get_statistics_image(PatientName=PatientName), + get_pet_image( + out_dir=default_dicom_dir(pet_src_file).with_suffix(".with-spaces"), + PatientName=PatientName, + ), + get_ac_image( + out_dir=default_dicom_dir(ac_src_file).with_suffix(".with-spaces"), + PatientName=PatientName, + ), + get_topogram_image( + out_dir=default_dicom_dir(topogram_src_file).with_suffix( + ".with-spaces" + ), + PatientName=PatientName, + ), + get_statistics_image( + out_dir=default_dicom_dir(statistics_src_file).with_suffix( + ".with-spaces" + ), + PatientName=PatientName, + ), ) ] scans = [ @@ -123,7 +145,9 @@ def test_session_select_resources( assoc_dir = tmp_path / "assoc" assoc_dir.mkdir() - for fspath in get_raw_data_files(first_name=FIRST_NAME, last_name=LAST_NAME): + for fspath in get_raw_data_files( + first_name=FIRST_NAME.replace(" ", "_"), last_name=LAST_NAME + ): fspath.rename(assoc_dir / fspath.name) staging_dir = tmp_path / "staging" @@ -135,6 +159,7 @@ def test_session_select_resources( str(assoc_dir) + "/{PatientName.given_name}_{PatientName.family_name}*.ptd", r".*/[^\.]+.[^\.]+.[^\.]+.(?P\d+)\.[A-Z]+_(?P[^\.]+).*", ), + spaces_to_underscores=True, ) resources = list(staged_session.select_resources(dataset)) diff --git a/xnat_ingest/utils.py b/xnat_ingest/utils.py index 8430179..a4bc749 100644 --- a/xnat_ingest/utils.py +++ b/xnat_ingest/utils.py @@ -303,6 +303,7 @@ def transform_paths( glob_pattern: str, old_values: dict[str, str], new_values: dict[str, str], + spaces_to_underscores: bool = False, ) -> list[Path]: """Applys the transforms FS paths matching `glob_pattern` by replacing the template values found in the `old_values` dict to the values in `new_values`. Used to strip any identifying @@ -319,6 +320,8 @@ def transform_paths( the values used to parameterise the existing file paths new_values : dict[str, str] the new values to parameterise the transformed file paths + spaces_to_underscores: bool + whether to replace spaces with underscores in the transformed paths Returns ------- @@ -347,6 +350,8 @@ def str_templ_to_regex_group(match) -> str: if attr_name: groupname += "__" + attr_name old_val = getattr(old_val, attr_name) + if spaces_to_underscores: + old_val = old_val.replace(" ", "_") groupname += "__" + str(group_count[fieldname]) group_str = f"(?P<{groupname}>{old_val})" group_count[fieldname] += 1