From 453b6894b33e4fe1af186ff42d5710c6d2c66843 Mon Sep 17 00:00:00 2001 From: yuanzhou Date: Mon, 10 Feb 2025 11:04:55 -0500 Subject: [PATCH 1/2] Fix primary condition check --- src/app.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/app.py b/src/app.py index c62773e5..98ac0407 100644 --- a/src/app.py +++ b/src/app.py @@ -3056,10 +3056,12 @@ def update_datasets_datastatus(): globus_url = get_globus_url(dataset.get('data_access_level'), dataset.get('group_name'), dataset.get('uuid')) dataset['globus_url'] = globus_url dataset['last_touch'] = dataset['last_touch'] if dataset['published_timestamp'] is None else dataset['published_timestamp'] - if dataset.get('activity_creation_action').lower().endswith("process"): - dataset['is_primary'] = "False" - else: - dataset['is_primary'] = "True" + + # Identify primary dataset based on `Activity.creation_action == "Create Dataset Activity"` + # Component datasets grnerated by `Multi-Assay Split` and + # Processed datasets from `Central Process|ExternalProcess|Lab Process` are NOT primary - Zhou 2/10/2025 + dataset['is_primary'] = dataset_is_primary(dataset.get('uuid')) + has_data = files_exist(dataset.get('uuid'), dataset.get('data_access_level'), dataset.get('group_name')) has_dataset_metadata = files_exist(dataset.get('uuid'), dataset.get('data_access_level'), dataset.get('group_name'), metadata=True) dataset['has_data'] = has_data From 31e2ba2a72633efd97971455df4d5b925a3da4ea Mon Sep 17 00:00:00 2001 From: yuanzhou Date: Mon, 10 Feb 2025 11:29:12 -0500 Subject: [PATCH 2/2] Minimize neo4j query to determine is_primary --- src/app.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/app.py b/src/app.py index 98ac0407..c40977e1 100644 --- a/src/app.py +++ b/src/app.py @@ -3059,9 +3059,10 @@ def update_datasets_datastatus(): # Identify primary dataset based on `Activity.creation_action == "Create Dataset Activity"` # Component datasets grnerated by `Multi-Assay Split` and - # Processed datasets from `Central Process|ExternalProcess|Lab Process` are NOT primary - Zhou 2/10/2025 - dataset['is_primary'] = dataset_is_primary(dataset.get('uuid')) - + # Processed datasets from `Central Process|ExternalProcess|Lab Process` are NOT primary + # For performance, don't call `dataset_is_primary()` since it issues separate Neo4j query on each dataset - Zhou 2/10/2025 + dataset['is_primary'] = "True" if dataset.get('activity_creation_action').lower() == "create dataset activity" else "False" + has_data = files_exist(dataset.get('uuid'), dataset.get('data_access_level'), dataset.get('group_name')) has_dataset_metadata = files_exist(dataset.get('uuid'), dataset.get('data_access_level'), dataset.get('group_name'), metadata=True) dataset['has_data'] = has_data