Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Sarc dev fixing crawl function #90

Open
wants to merge 9 commits into
base: master
Choose a base branch
from
11 changes: 8 additions & 3 deletions imgtools/utils/crawl.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ def crawl_one(folder):
database = {}
for path, _, _ in os.walk(folder):
# find dicoms
dicoms = glob.glob(pathlib.Path(path, "**", "*.dcm").as_posix(), recursive=True)
dicoms = glob.glob(pathlib.Path(path, "**", "*.[Dd]cm").as_posix(), recursive=True)
# print('\n', folder, dicoms)
# instance (slice) information
for dcm in dicoms:
Expand Down Expand Up @@ -182,13 +182,18 @@ def crawl(top,
database_list = []
folders = glob.glob(pathlib.Path(top, "*").as_posix())

# This is a list of dictionaries, each dictionary is a directory containing image dirs
database_list = Parallel(n_jobs=n_jobs)(delayed(crawl_one)(pathlib.Path(top, folder).as_posix()) for folder in tqdm(folders))

# convert list to dictionary
# convert list of dictionaries to single dictionary with each key being a patient ID
database_dict = {}
for db in database_list:
for key in db:
database_dict[key] = db[key]
# If multiple directories have same patient ID, merge their information together
if key in database_dict:
database_dict[key] = database_dict[key] | db[key]
skim2257 marked this conversation as resolved.
Show resolved Hide resolved
else:
database_dict[key] = db[key]

# save one level above imaging folders
parent, dataset = os.path.split(top)
Expand Down