Also fix logic for archiving datasets

Change logic such that archiving is not triggered if study cannot be found (i.e. `study.custom_ria_url is None`)
khanlab · Jan 3, 2024 · dfe3b98 · dfe3b98
1 parent 328e628
commit dfe3b98
Showing 1 changed file with 34 additions and 13 deletions.
diff --git a/autobidsportal/tasks.py b/autobidsportal/tasks.py
@@ -361,7 +361,9 @@ def wrapped_task(*args, **kwargs):
             try:
                 task(*args, **kwargs)
             finally:
-                if (job := get_current_job()) and (not Task.query.get(job.id).complete):
+                if (job := get_current_job()) and (
+                    not Task.query.get(job.id).complete
+                ):
                     app.logger.error(error_log)
                     _set_task_error("Unknown uncaught exception")
 
@@ -495,7 +497,9 @@ def run_cfmm2tar(
     study = Study.query.get(study_id)
     app.logger.info(
         "Running cfmm2tar for patients %s in study %i",
-        [record["PatientName"] for record in studies_to_download],  # pyright: ignore
+        [
+            record["PatientName"] for record in studies_to_download
+        ],  # pyright: ignore
         study.id,
     )
 
@@ -563,7 +567,9 @@ def find_unprocessed_tar_files(study_id: int):
         dataset_type=DatasetType.RAW_DATA,
     ).one_or_none()
     existing_tar_file_ids = (
-        set() if dataset is None else {out.id for out in dataset.cfmm2tar_outputs}
+        set()
+        if dataset is None
+        else {out.id for out in dataset.cfmm2tar_outputs}
     )
     new_tar_file_ids = {
         tar_file.id for tar_file in study.cfmm2tar_outputs
@@ -848,7 +854,7 @@ def archive_raw_data(study_id: int):
     study = Study.query.get(study_id)
 
     # If study cannot be found or no content
-    if (study.custom_ria_url is not None) or (study.dataset_content is None):
+    if (study.custom_ria_url is None) or (study.dataset_content is None):
         _set_task_progress(100)
         return
 
@@ -870,7 +876,9 @@ def archive_raw_data(study_id: int):
         repo = GitRepo(str(path_dataset_raw))
 
         # If archive is up-to-date
-        if (latest_archive) and (latest_archive.dataset_hexsha == repo.get_hexsha()):
+        if (latest_archive) and (
+            latest_archive.dataset_hexsha == repo.get_hexsha()
+        ):
             app.logger.info("Archive for study %s up to date", study_id)
             _set_task_progress(100)
             return
@@ -903,7 +911,8 @@ def archive_raw_data(study_id: int):
         )
         make_remote_dir(
             app.config["ARCHIVE_BASE_URL"].split(":")[0],
-            app.config["ARCHIVE_BASE_URL"].split(":")[1] + f"/{dataset_raw.ria_alias}",
+            app.config["ARCHIVE_BASE_URL"].split(":")[1]
+            + f"/{dataset_raw.ria_alias}",
         )
         copy_file(
             app.config["ARCHIVE_BASE_URL"],
@@ -1061,7 +1070,9 @@ def run_gradcorrect(
     subject_ids
         List of subject ids to process (optional)
     """
-    participant_label = ["--participant_label", *subject_ids] if subject_ids else []
+    participant_label = (
+        ["--participant_label", *subject_ids] if subject_ids else []
+    )
     apptainer_exec(
         [
             "/gradcorrect/run.sh",
@@ -1093,7 +1104,10 @@ def gradcorrect_study(
         List of subject ids to run gradcorrect on (Optional)
     """
     _set_task_progress(0)
-    if not ((study := Study.query.get(study_id)) or (study.custom_ria_url is not None)):
+    if not (
+        (study := Study.query.get(study_id))
+        or (study.custom_ria_url is not None)
+    ):
         _set_task_progress(100)
         return
     dataset_bids = ensure_dataset_exists(study_id, DatasetType.RAW_DATA)
@@ -1132,10 +1146,15 @@ def gradcorrect_study(
             )
         # Remove intermediate data
         rmtree(
-            path_dataset_derivatives / "gradcorrect" / "sourcedata" / "scratch",
+            path_dataset_derivatives
+            / "gradcorrect"
+            / "sourcedata"
+            / "scratch",
         )
 
-        sub_string = ",".join(subject_labels) if subject_labels else "all subjects"
+        sub_string = (
+            ",".join(subject_labels) if subject_labels else "all subjects"
+        )
         finalize_dataset_changes(
             str(path_dataset_derivatives),
             f"Run gradcorrect on subjects {sub_string}",
@@ -1157,8 +1176,8 @@ def archive_derivative_data(study_id: int):
     _set_task_progress(0)
     study = Study.query.get(study_id)
 
-    # If RIA already exists
-    if study.custom_ria_url is not None:
+    # If study cannot be found
+    if study.custom_ria_url is None:
         _set_task_progress(100)
         return
 
@@ -1179,7 +1198,9 @@ def archive_derivative_data(study_id: int):
         )
         repo = GitRepo(str(path_dataset_derived))
         # If archive is already up-to-date
-        if (latest_archive) and (latest_archive.dataset_hexsha == repo.get_hexsha()):
+        if (latest_archive) and (
+            latest_archive.dataset_hexsha == repo.get_hexsha()
+        ):
             app.logger.info("Archive for study %s up to date", study_id)
             _set_task_progress(100)
             return