From 829018663b9f304d7867fa5a13d72cf6413c8038 Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Tue, 17 Sep 2024 14:58:40 -0400 Subject: [PATCH 01/22] add dandi upload to YAML spec --- .../_yaml_conversion_specification.py | 42 +++++++++++- ..._conversion_specification_dandi_upload.yml | 66 +++++++++++++++++++ .../test_yaml_conversion_specification.py | 33 ++++++++++ 3 files changed, 139 insertions(+), 2 deletions(-) create mode 100644 tests/test_on_data/test_yaml/conversion_specifications/GIN_conversion_specification_dandi_upload.yml diff --git a/src/neuroconv/tools/yaml_conversion_specification/_yaml_conversion_specification.py b/src/neuroconv/tools/yaml_conversion_specification/_yaml_conversion_specification.py index 10e33cbc8..ec9e96fa5 100644 --- a/src/neuroconv/tools/yaml_conversion_specification/_yaml_conversion_specification.py +++ b/src/neuroconv/tools/yaml_conversion_specification/_yaml_conversion_specification.py @@ -1,3 +1,5 @@ +import json +import os import sys from importlib import import_module from pathlib import Path @@ -7,6 +9,7 @@ from jsonschema import RefResolver, validate from pydantic import DirectoryPath, FilePath +from ..data_transfers import automatic_dandi_upload from ...nwbconverter import NWBConverter from ...utils import dict_deep_update, load_dict_from_file @@ -50,7 +53,7 @@ def run_conversion_from_yaml( data_folder_path: Optional[DirectoryPath] = None, output_folder_path: Optional[DirectoryPath] = None, overwrite: bool = False, -): +) -> None: """ Run conversion to NWB given a yaml specification file. @@ -87,6 +90,14 @@ def run_conversion_from_yaml( resolver=RefResolver(base_uri=sys_uri_base + str(schema_folder) + "/", referrer=specification_schema), ) + upload_to_dandiset = "upload_to_dandiset" in specification + if upload_to_dandiset and "DANDI_API_KEY" in os.environ: + message = ( + "The 'upload_to_dandiset' prompt was found in the YAML specification, " + "but the environment variable 'DANDI_API_KEY' was not set." + ) + raise ValueError(message) + global_metadata = specification.get("metadata", dict()) global_conversion_options = specification.get("conversion_options", dict()) data_interfaces_spec = specification.get("data_interfaces") @@ -102,6 +113,7 @@ def run_conversion_from_yaml( experiment_metadata = experiment.get("metadata", dict()) for session in experiment["sessions"]: file_counter += 1 + source_data = session["source_data"] for interface_name, interface_source_data in session["source_data"].items(): for key, value in interface_source_data.items(): @@ -109,21 +121,47 @@ def run_conversion_from_yaml( source_data[interface_name].update({key: [str(Path(data_folder_path) / x) for x in value]}) elif key in ("file_path", "folder_path"): source_data[interface_name].update({key: str(Path(data_folder_path) / value)}) + converter = CustomNWBConverter(source_data=source_data) + metadata = converter.get_metadata() for metadata_source in [global_metadata, experiment_metadata, session.get("metadata", dict())]: metadata = dict_deep_update(metadata, metadata_source) - nwbfile_name = session.get("nwbfile_name", f"temp_nwbfile_name_{file_counter}").strip(".nwb") + + session_id = session.get("metadata", dict()).get("NWBFile", dict()).get("session_id", None) + if upload_to_dandiset and session_id is None: + message = ( + "The 'upload_to_dandiset' prompt was found in the YAML specification, " + "but the 'session_id' was not found for session with info block: " + f"\n\n {json.dumps(obj=session, indent=2)}\n\n" + "File intended for DANDI upload must include a session ID." + ) + raise ValueError(message) + session_conversion_options = session.get("conversion_options", dict()) conversion_options = dict() for key in converter.data_interface_objects: conversion_options[key] = dict(session_conversion_options.get(key, dict()), **global_conversion_options) + + nwbfile_name = session.get("nwbfile_name", f"temp_nwbfile_name_{file_counter}").strip(".nwb") converter.run_conversion( nwbfile_path=output_folder_path / f"{nwbfile_name}.nwb", metadata=metadata, overwrite=overwrite, conversion_options=conversion_options, ) + + if upload_to_dandiset: + dandiset_id = specification["upload_to_dandiset"] + staging = int(dandiset_id) >= 200_000 + automatic_dandi_upload( + dandiset_id=dandiset_id, + nwb_folder_path=output_folder_path, + staging=staging, + ) + + return None # We can early return since organization below will occur within the upload step + # To properly mimic a true dandi organization, the full directory must be populated with NWBFiles. all_nwbfile_paths = [nwbfile_path for nwbfile_path in output_folder_path.iterdir() if nwbfile_path.suffix == ".nwb"] nwbfile_paths_to_set = [ diff --git a/tests/test_on_data/test_yaml/conversion_specifications/GIN_conversion_specification_dandi_upload.yml b/tests/test_on_data/test_yaml/conversion_specifications/GIN_conversion_specification_dandi_upload.yml new file mode 100644 index 000000000..d02ee204e --- /dev/null +++ b/tests/test_on_data/test_yaml/conversion_specifications/GIN_conversion_specification_dandi_upload.yml @@ -0,0 +1,66 @@ +metadata: + NWBFile: + lab: My Lab + institution: My Institution + +conversion_options: + stub_test: True + +data_interfaces: + ap: SpikeGLXRecordingInterface + lf: SpikeGLXRecordingInterface + phy: PhySortingInterface + +upload_to_dandiset: 200560 + +experiments: + ymaze: + metadata: + NWBFile: + session_description: Subject navigating a Y-shaped maze. + + sessions: + - nwbfile_name: example_converter_spec_1 + source_data: + ap: + file_path: spikeglx/Noise4Sam_g0/Noise4Sam_g0_imec0/Noise4Sam_g0_t0.imec0.ap.bin + metadata: + NWBFile: + session_start_time: "2020-10-09T21:19:09+00:00" + session_id: "test-yaml-1" + Subject: + subject_id: "yaml-1" + sex: F + age: P35D + species: Mus musculus + - nwbfile_name: example_converter_spec_2.nwb + metadata: + NWBFile: + session_start_time: "2020-10-10T21:19:09+00:00" + session_id: "test-yaml-2" + Subject: + subject_id: "yaml-002" + sex: F + age: P35D + species: Mus musculus + source_data: + lf: + file_path: spikeglx/Noise4Sam_g0/Noise4Sam_g0_imec0/Noise4Sam_g0_t0.imec0.lf.bin + + open_explore: + sessions: + - nwbfile_name: example_converter_spec_3 + source_data: + lf: + file_path: spikeglx/Noise4Sam_g0/Noise4Sam_g0_imec0/Noise4Sam_g0_t0.imec0.lf.bin + phy: + folder_path: phy/phy_example_0/ + metadata: + NWBFile: + session_start_time: "2020-10-11T21:19:09+00:00" + session_id: test YAML 3 + Subject: + subject_id: YAML Subject Name + sex: F + age: P35D + species: Mus musculus diff --git a/tests/test_on_data/test_yaml/test_yaml_conversion_specification.py b/tests/test_on_data/test_yaml/test_yaml_conversion_specification.py index 61c71cf86..b8c99b775 100644 --- a/tests/test_on_data/test_yaml/test_yaml_conversion_specification.py +++ b/tests/test_on_data/test_yaml/test_yaml_conversion_specification.py @@ -1,8 +1,10 @@ import sys +import time import unittest from datetime import datetime from pathlib import Path +import dandi.dandiapi import pytest from hdmf.testing import TestCase from jsonschema import RefResolver, validate @@ -19,6 +21,7 @@ "fname", [ "GIN_conversion_specification.yml", + "GIN_conversion_specification_dandi_upload.yml", "GIN_conversion_specification_missing_nwbfile_names.yml", "GIN_conversion_specification_no_nwbfile_name_or_other_metadata.yml", "GIN_conversion_specification_videos.yml", @@ -178,5 +181,35 @@ def test_run_conversion_from_yaml_on_behavior(self): ) +def test_run_conversion_from_yaml_with_dandi_upload(): + path_to_test_yml_files = Path(__file__).parent / "conversion_specifications" + yaml_file_path = path_to_test_yml_files / "GIN_conversion_specification_dandi_upload.yml" + run_conversion_from_yaml( + specification_file_path=yaml_file_path, + data_folder_path=DATA_PATH, + output_folder_path=OUTPUT_PATH, + overwrite=True, + ) + + time.sleep(20) # Give some buffer room for server to process before making assertions against DANDI API + + client = dandi.dandiapi.DandiAPIClient(api_url="https://api-staging.dandiarchive.org/api") + dandiset = client.get_dandiset("200560") + + expected_asset_paths = [ + "sub-yaml-1/sub-yaml-1_ses-test-yaml-1_ecephys.nwb", + "sub-yaml-002/sub-yaml-002_ses-test-yaml-2_ecephys.nwb", + "sub-YAML-Subject-Name/sub-YAML-Subject-Name_ses-test-yaml-3_ecephys.nwb", + ] + for asset_path in expected_asset_paths: + test_asset = dandiset.get_asset_by_path(path=asset_path) # Will error if not found + test_asset_metadata = test_asset.get_raw_metadata() + + # Past uploads may have created the same apparent file, so look at the modification time to ensure + # this test is actually testing the most recent upload + date_modified = datetime.fromisoformat(test_asset_metadata["dateModified"]) + assert datetime.now() - date_modified < timedelta(minutes=10) + + if __name__ == "__main__": unittest.main() From 6869bd8128015e9f6ac14457eb2bd20a401034d6 Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Tue, 17 Sep 2024 15:09:21 -0400 Subject: [PATCH 02/22] fix import test --- tests/imports.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/imports.py b/tests/imports.py index 5f8b65e72..7ac95713b 100644 --- a/tests/imports.py +++ b/tests/imports.py @@ -68,6 +68,7 @@ def test_tools(self): "get_package_version", "is_package_installed", "deploy_process", + "data_transfers", "LocalPathExpander", "get_module", ] From 8a2fe05fce23fb2754162c5efd34a4199fdf058c Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Fri, 27 Sep 2024 12:26:05 -0400 Subject: [PATCH 03/22] move test --- .../_yaml_conversion_specification.py | 1 + .../test_tools/dandi_transfer_tools.py | 32 +++++++++++++++++++ .../test_yaml_conversion_specification.py | 32 ------------------- 3 files changed, 33 insertions(+), 32 deletions(-) diff --git a/src/neuroconv/tools/yaml_conversion_specification/_yaml_conversion_specification.py b/src/neuroconv/tools/yaml_conversion_specification/_yaml_conversion_specification.py index ec9e96fa5..3a9ad7ea8 100644 --- a/src/neuroconv/tools/yaml_conversion_specification/_yaml_conversion_specification.py +++ b/src/neuroconv/tools/yaml_conversion_specification/_yaml_conversion_specification.py @@ -80,6 +80,7 @@ def run_conversion_from_yaml( output_folder_path = Path(specification_file_path).parent else: output_folder_path = Path(output_folder_path) + specification = load_dict_from_file(file_path=specification_file_path) schema_folder = Path(__file__).parent.parent.parent / "schemas" specification_schema = load_dict_from_file(file_path=schema_folder / "yaml_conversion_specification_schema.json") diff --git a/tests/test_minimal/test_tools/dandi_transfer_tools.py b/tests/test_minimal/test_tools/dandi_transfer_tools.py index df4226d10..e97c79c5f 100644 --- a/tests/test_minimal/test_tools/dandi_transfer_tools.py +++ b/tests/test_minimal/test_tools/dandi_transfer_tools.py @@ -6,10 +6,12 @@ from shutil import rmtree from tempfile import mkdtemp +import dandi.dandiapi import pytest from hdmf.testing import TestCase from pynwb import NWBHDF5IO +from neuroconv import run_conversion_from_yaml from neuroconv.tools.data_transfers import automatic_dandi_upload from neuroconv.tools.nwb_helpers import ( get_default_nwbfile_metadata, @@ -101,3 +103,33 @@ def test_automatic_dandi_upload_non_parallel_non_threaded(self): number_of_jobs=1, number_of_threads=1, ) + + +def test_run_conversion_from_yaml_with_dandi_upload(): + path_to_test_yml_files = Path(__file__).parent / "conversion_specifications" + yaml_file_path = path_to_test_yml_files / "GIN_conversion_specification_dandi_upload.yml" + run_conversion_from_yaml( + specification_file_path=yaml_file_path, + data_folder_path=DATA_PATH, + output_folder_path=OUTPUT_PATH, + overwrite=True, + ) + + time.sleep(20) # Give some buffer room for server to process before making assertions against DANDI API + + client = dandi.dandiapi.DandiAPIClient(api_url="https://api-staging.dandiarchive.org/api") + dandiset = client.get_dandiset("200560") + + expected_asset_paths = [ + "sub-yaml-1/sub-yaml-1_ses-test-yaml-1_ecephys.nwb", + "sub-yaml-002/sub-yaml-002_ses-test-yaml-2_ecephys.nwb", + "sub-YAML-Subject-Name/sub-YAML-Subject-Name_ses-test-yaml-3_ecephys.nwb", + ] + for asset_path in expected_asset_paths: + test_asset = dandiset.get_asset_by_path(path=asset_path) # Will error if not found + test_asset_metadata = test_asset.get_raw_metadata() + + # Past uploads may have created the same apparent file, so look at the modification time to ensure + # this test is actually testing the most recent upload + date_modified = datetime.fromisoformat(test_asset_metadata["dateModified"]) + assert datetime.now() - date_modified < timedelta(minutes=10) diff --git a/tests/test_on_data/test_yaml/test_yaml_conversion_specification.py b/tests/test_on_data/test_yaml/test_yaml_conversion_specification.py index b8c99b775..ceaba0b0a 100644 --- a/tests/test_on_data/test_yaml/test_yaml_conversion_specification.py +++ b/tests/test_on_data/test_yaml/test_yaml_conversion_specification.py @@ -1,10 +1,8 @@ import sys -import time import unittest from datetime import datetime from pathlib import Path -import dandi.dandiapi import pytest from hdmf.testing import TestCase from jsonschema import RefResolver, validate @@ -181,35 +179,5 @@ def test_run_conversion_from_yaml_on_behavior(self): ) -def test_run_conversion_from_yaml_with_dandi_upload(): - path_to_test_yml_files = Path(__file__).parent / "conversion_specifications" - yaml_file_path = path_to_test_yml_files / "GIN_conversion_specification_dandi_upload.yml" - run_conversion_from_yaml( - specification_file_path=yaml_file_path, - data_folder_path=DATA_PATH, - output_folder_path=OUTPUT_PATH, - overwrite=True, - ) - - time.sleep(20) # Give some buffer room for server to process before making assertions against DANDI API - - client = dandi.dandiapi.DandiAPIClient(api_url="https://api-staging.dandiarchive.org/api") - dandiset = client.get_dandiset("200560") - - expected_asset_paths = [ - "sub-yaml-1/sub-yaml-1_ses-test-yaml-1_ecephys.nwb", - "sub-yaml-002/sub-yaml-002_ses-test-yaml-2_ecephys.nwb", - "sub-YAML-Subject-Name/sub-YAML-Subject-Name_ses-test-yaml-3_ecephys.nwb", - ] - for asset_path in expected_asset_paths: - test_asset = dandiset.get_asset_by_path(path=asset_path) # Will error if not found - test_asset_metadata = test_asset.get_raw_metadata() - - # Past uploads may have created the same apparent file, so look at the modification time to ensure - # this test is actually testing the most recent upload - date_modified = datetime.fromisoformat(test_asset_metadata["dateModified"]) - assert datetime.now() - date_modified < timedelta(minutes=10) - - if __name__ == "__main__": unittest.main() From 3d36868ff0a506abe4428d04dfe78f15c93114df Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Fri, 27 Sep 2024 12:28:32 -0400 Subject: [PATCH 04/22] fix path --- tests/test_minimal/test_tools/dandi_transfer_tools.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/test_minimal/test_tools/dandi_transfer_tools.py b/tests/test_minimal/test_tools/dandi_transfer_tools.py index e97c79c5f..53594a0b5 100644 --- a/tests/test_minimal/test_tools/dandi_transfer_tools.py +++ b/tests/test_minimal/test_tools/dandi_transfer_tools.py @@ -106,7 +106,8 @@ def test_automatic_dandi_upload_non_parallel_non_threaded(self): def test_run_conversion_from_yaml_with_dandi_upload(): - path_to_test_yml_files = Path(__file__).parent / "conversion_specifications" + data_tests_base_folder_path = Path(__file__).parent.parent.parent / "test_on_data" + path_to_test_yml_files = data_tests_base_folder_path / "test_yaml" / "conversion_specifications" yaml_file_path = path_to_test_yml_files / "GIN_conversion_specification_dandi_upload.yml" run_conversion_from_yaml( specification_file_path=yaml_file_path, From 9fee7757cdd620caa73e31529aca3fe6bc710def Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Fri, 27 Sep 2024 12:37:53 -0400 Subject: [PATCH 05/22] fix path --- tests/test_minimal/test_tools/dandi_transfer_tools.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/test_minimal/test_tools/dandi_transfer_tools.py b/tests/test_minimal/test_tools/dandi_transfer_tools.py index 53594a0b5..295031f0c 100644 --- a/tests/test_minimal/test_tools/dandi_transfer_tools.py +++ b/tests/test_minimal/test_tools/dandi_transfer_tools.py @@ -18,6 +18,9 @@ make_nwbfile_from_metadata, ) +from ...test_on_data.setup_paths import ECEPHY_DATA_PATH as DATA_PATH +from ...test_on_data.setup_paths import OUTPUT_PATH + DANDI_API_KEY = os.getenv("DANDI_API_KEY") HAVE_DANDI_KEY = DANDI_API_KEY is not None and DANDI_API_KEY != "" # can be "" from external forks From 6a6297d0f8d3b57355bb2d286406eaeca1e4a4be Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Fri, 27 Sep 2024 12:45:25 -0400 Subject: [PATCH 06/22] just move entire test --- .github/workflows/live-service-testing.yml | 2 + .../test_tools/dandi_transfer_tools.py | 36 -------------- .../test_yaml/yaml_dandi_transfer_tools.py | 48 +++++++++++++++++++ 3 files changed, 50 insertions(+), 36 deletions(-) create mode 100644 tests/test_on_data/test_yaml/yaml_dandi_transfer_tools.py diff --git a/.github/workflows/live-service-testing.yml b/.github/workflows/live-service-testing.yml index b9a425a8d..de5baf018 100644 --- a/.github/workflows/live-service-testing.yml +++ b/.github/workflows/live-service-testing.yml @@ -49,5 +49,7 @@ jobs: - name: Run subset of tests that use DANDI live services run: pytest -rsx -n auto tests/test_minimal/test_tools/dandi_transfer_tools.py + - name: Run subset of tests that use DANDI live services with YAML + run: pytest -rsx -n auto tests/test_on_data/test_yaml/yaml_dandi_transfer_tools.py - name: Run subset of tests that use Globus live services run: pytest -rsx -n auto tests/test_minimal/test_tools/globus_transfer_tools.py diff --git a/tests/test_minimal/test_tools/dandi_transfer_tools.py b/tests/test_minimal/test_tools/dandi_transfer_tools.py index 295031f0c..df4226d10 100644 --- a/tests/test_minimal/test_tools/dandi_transfer_tools.py +++ b/tests/test_minimal/test_tools/dandi_transfer_tools.py @@ -6,21 +6,16 @@ from shutil import rmtree from tempfile import mkdtemp -import dandi.dandiapi import pytest from hdmf.testing import TestCase from pynwb import NWBHDF5IO -from neuroconv import run_conversion_from_yaml from neuroconv.tools.data_transfers import automatic_dandi_upload from neuroconv.tools.nwb_helpers import ( get_default_nwbfile_metadata, make_nwbfile_from_metadata, ) -from ...test_on_data.setup_paths import ECEPHY_DATA_PATH as DATA_PATH -from ...test_on_data.setup_paths import OUTPUT_PATH - DANDI_API_KEY = os.getenv("DANDI_API_KEY") HAVE_DANDI_KEY = DANDI_API_KEY is not None and DANDI_API_KEY != "" # can be "" from external forks @@ -106,34 +101,3 @@ def test_automatic_dandi_upload_non_parallel_non_threaded(self): number_of_jobs=1, number_of_threads=1, ) - - -def test_run_conversion_from_yaml_with_dandi_upload(): - data_tests_base_folder_path = Path(__file__).parent.parent.parent / "test_on_data" - path_to_test_yml_files = data_tests_base_folder_path / "test_yaml" / "conversion_specifications" - yaml_file_path = path_to_test_yml_files / "GIN_conversion_specification_dandi_upload.yml" - run_conversion_from_yaml( - specification_file_path=yaml_file_path, - data_folder_path=DATA_PATH, - output_folder_path=OUTPUT_PATH, - overwrite=True, - ) - - time.sleep(20) # Give some buffer room for server to process before making assertions against DANDI API - - client = dandi.dandiapi.DandiAPIClient(api_url="https://api-staging.dandiarchive.org/api") - dandiset = client.get_dandiset("200560") - - expected_asset_paths = [ - "sub-yaml-1/sub-yaml-1_ses-test-yaml-1_ecephys.nwb", - "sub-yaml-002/sub-yaml-002_ses-test-yaml-2_ecephys.nwb", - "sub-YAML-Subject-Name/sub-YAML-Subject-Name_ses-test-yaml-3_ecephys.nwb", - ] - for asset_path in expected_asset_paths: - test_asset = dandiset.get_asset_by_path(path=asset_path) # Will error if not found - test_asset_metadata = test_asset.get_raw_metadata() - - # Past uploads may have created the same apparent file, so look at the modification time to ensure - # this test is actually testing the most recent upload - date_modified = datetime.fromisoformat(test_asset_metadata["dateModified"]) - assert datetime.now() - date_modified < timedelta(minutes=10) diff --git a/tests/test_on_data/test_yaml/yaml_dandi_transfer_tools.py b/tests/test_on_data/test_yaml/yaml_dandi_transfer_tools.py new file mode 100644 index 000000000..3ae6d6a62 --- /dev/null +++ b/tests/test_on_data/test_yaml/yaml_dandi_transfer_tools.py @@ -0,0 +1,48 @@ +import os +from datetime import datetime +from pathlib import Path + +import dandi.dandiapi +import pytest + +from neuroconv import run_conversion_from_yaml + +from ..setup_paths import ECEPHY_DATA_PATH as DATA_PATH +from ..setup_paths import OUTPUT_PATH + +DANDI_API_KEY = os.getenv("DANDI_API_KEY") +HAVE_DANDI_KEY = DANDI_API_KEY is not None and DANDI_API_KEY != "" # can be "" from external forks + + +@pytest.mark.skipif( + not HAVE_DANDI_KEY, + reason="You must set your DANDI_API_KEY to run this test!", +) +def test_run_conversion_from_yaml_with_dandi_upload(): + path_to_test_yml_files = Path(__file__).parent / "conversion_specifications" + yaml_file_path = path_to_test_yml_files / "GIN_conversion_specification_dandi_upload.yml" + run_conversion_from_yaml( + specification_file_path=yaml_file_path, + data_folder_path=DATA_PATH, + output_folder_path=OUTPUT_PATH, + overwrite=True, + ) + + time.sleep(20) # Give some buffer room for server to process before making assertions against DANDI API + + client = dandi.dandiapi.DandiAPIClient(api_url="https://api-staging.dandiarchive.org/api") + dandiset = client.get_dandiset("200560") + + expected_asset_paths = [ + "sub-yaml-1/sub-yaml-1_ses-test-yaml-1_ecephys.nwb", + "sub-yaml-002/sub-yaml-002_ses-test-yaml-2_ecephys.nwb", + "sub-YAML-Subject-Name/sub-YAML-Subject-Name_ses-test-yaml-3_ecephys.nwb", + ] + for asset_path in expected_asset_paths: + test_asset = dandiset.get_asset_by_path(path=asset_path) # Will error if not found + test_asset_metadata = test_asset.get_raw_metadata() + + # Past uploads may have created the same apparent file, so look at the modification time to ensure + # this test is actually testing the most recent upload + date_modified = datetime.fromisoformat(test_asset_metadata["dateModified"]) + assert datetime.now() - date_modified < timedelta(minutes=10) From 044d70c66a6ff2ac0469edc537265c18e0faa955 Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Fri, 27 Sep 2024 17:35:43 -0400 Subject: [PATCH 07/22] fix validation --- src/neuroconv/schemas/yaml_conversion_specification_schema.json | 1 + 1 file changed, 1 insertion(+) diff --git a/src/neuroconv/schemas/yaml_conversion_specification_schema.json b/src/neuroconv/schemas/yaml_conversion_specification_schema.json index c6526803b..039a1cf48 100644 --- a/src/neuroconv/schemas/yaml_conversion_specification_schema.json +++ b/src/neuroconv/schemas/yaml_conversion_specification_schema.json @@ -8,6 +8,7 @@ "required": ["experiments"], "additionalProperties": false, "properties": { + "upload_to_dandiset": {"type": "string"}, "metadata": {"$ref": "./metadata_schema.json#"}, "conversion_options": {"type": "object"}, "data_interfaces": { From e725aa031622a0bc292a152112e553f35b9e4c70 Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Fri, 27 Sep 2024 17:40:34 -0400 Subject: [PATCH 08/22] fix type --- .../GIN_conversion_specification_dandi_upload.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_on_data/test_yaml/conversion_specifications/GIN_conversion_specification_dandi_upload.yml b/tests/test_on_data/test_yaml/conversion_specifications/GIN_conversion_specification_dandi_upload.yml index d02ee204e..adf590d3a 100644 --- a/tests/test_on_data/test_yaml/conversion_specifications/GIN_conversion_specification_dandi_upload.yml +++ b/tests/test_on_data/test_yaml/conversion_specifications/GIN_conversion_specification_dandi_upload.yml @@ -11,7 +11,7 @@ data_interfaces: lf: SpikeGLXRecordingInterface phy: PhySortingInterface -upload_to_dandiset: 200560 +upload_to_dandiset: "200560" experiments: ymaze: From c1387c44855e43a2d60d01053b99f5c9b5e557bb Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Fri, 27 Sep 2024 17:47:41 -0400 Subject: [PATCH 09/22] fix logic --- .../_yaml_conversion_specification.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/neuroconv/tools/yaml_conversion_specification/_yaml_conversion_specification.py b/src/neuroconv/tools/yaml_conversion_specification/_yaml_conversion_specification.py index 3a9ad7ea8..8b6c86c3f 100644 --- a/src/neuroconv/tools/yaml_conversion_specification/_yaml_conversion_specification.py +++ b/src/neuroconv/tools/yaml_conversion_specification/_yaml_conversion_specification.py @@ -92,7 +92,7 @@ def run_conversion_from_yaml( ) upload_to_dandiset = "upload_to_dandiset" in specification - if upload_to_dandiset and "DANDI_API_KEY" in os.environ: + if upload_to_dandiset and "DANDI_API_KEY" not in os.environ: message = ( "The 'upload_to_dandiset' prompt was found in the YAML specification, " "but the environment variable 'DANDI_API_KEY' was not set." From 32f6a885dd7eec3f94555cb9312ca5de53f7dcde Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Fri, 27 Sep 2024 18:04:33 -0400 Subject: [PATCH 10/22] add data loading to live services --- .github/workflows/live-service-testing.yml | 8 ++++++++ tests/test_on_data/test_yaml/yaml_dandi_transfer_tools.py | 7 +++---- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/.github/workflows/live-service-testing.yml b/.github/workflows/live-service-testing.yml index de5baf018..b4a765c43 100644 --- a/.github/workflows/live-service-testing.yml +++ b/.github/workflows/live-service-testing.yml @@ -47,6 +47,14 @@ jobs: - name: Install full requirements run: pip install .[test,full] + - name: Prepare data for tests + uses: ./.github/actions/load-data + with: + aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} + aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + s3-gin-bucket: ${{ secrets.S3_GIN_BUCKET }} + os: ${{ matrix.os }} + - name: Run subset of tests that use DANDI live services run: pytest -rsx -n auto tests/test_minimal/test_tools/dandi_transfer_tools.py - name: Run subset of tests that use DANDI live services with YAML diff --git a/tests/test_on_data/test_yaml/yaml_dandi_transfer_tools.py b/tests/test_on_data/test_yaml/yaml_dandi_transfer_tools.py index 3ae6d6a62..a0bae52f2 100644 --- a/tests/test_on_data/test_yaml/yaml_dandi_transfer_tools.py +++ b/tests/test_on_data/test_yaml/yaml_dandi_transfer_tools.py @@ -7,8 +7,7 @@ from neuroconv import run_conversion_from_yaml -from ..setup_paths import ECEPHY_DATA_PATH as DATA_PATH -from ..setup_paths import OUTPUT_PATH +from ..setup_paths import ECEPHY_DATA_PATH, OUTPUT_PATH DANDI_API_KEY = os.getenv("DANDI_API_KEY") HAVE_DANDI_KEY = DANDI_API_KEY is not None and DANDI_API_KEY != "" # can be "" from external forks @@ -23,12 +22,12 @@ def test_run_conversion_from_yaml_with_dandi_upload(): yaml_file_path = path_to_test_yml_files / "GIN_conversion_specification_dandi_upload.yml" run_conversion_from_yaml( specification_file_path=yaml_file_path, - data_folder_path=DATA_PATH, + data_folder_path=ECEPHY_DATA_PATH, output_folder_path=OUTPUT_PATH, overwrite=True, ) - time.sleep(20) # Give some buffer room for server to process before making assertions against DANDI API + time.sleep(60) # Give some buffer room for server to process before making assertions against DANDI API client = dandi.dandiapi.DandiAPIClient(api_url="https://api-staging.dandiarchive.org/api") dandiset = client.get_dandiset("200560") From 735b6ba1c4557fd8a988829b63e8bb780a1ed3ed Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Fri, 27 Sep 2024 18:18:14 -0400 Subject: [PATCH 11/22] propagate secrets --- .github/workflows/live-service-testing.yml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.github/workflows/live-service-testing.yml b/.github/workflows/live-service-testing.yml index b4a765c43..ae6b10851 100644 --- a/.github/workflows/live-service-testing.yml +++ b/.github/workflows/live-service-testing.yml @@ -15,6 +15,12 @@ on: default: '["ubuntu-latest", "macos-latest", "windows-latest"]' secrets: + AWS_ACCESS_KEY_ID: + required: true + AWS_SECRET_ACCESS_KEY: + required: true + S3_GIN_BUCKET: + required: true DANDI_API_KEY: required: true From 6517379b609a691b1a5eb1830e9678f90166e133 Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Fri, 27 Sep 2024 20:23:10 -0400 Subject: [PATCH 12/22] propagate secrets --- .github/workflows/deploy-tests.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/deploy-tests.yml b/.github/workflows/deploy-tests.yml index 4f67d15de..5ff918634 100644 --- a/.github/workflows/deploy-tests.yml +++ b/.github/workflows/deploy-tests.yml @@ -58,6 +58,9 @@ jobs: if: ${{ needs.assess-file-changes.outputs.SOURCE_CHANGED == 'true' }} uses: ./.github/workflows/live-service-testing.yml secrets: + AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + S3_GIN_BUCKET: ${{ secrets.S3_GIN_BUCKET }} DANDI_API_KEY: ${{ secrets.DANDI_API_KEY }} with: # Ternary operator: condition && value_if_true || value_if_false python-versions: ${{ github.event.pull_request.draft == true && '["3.9"]' || '["3.9", "3.10", "3.11", "3.12"]' }} From db0c3568aeba431a5de96562ef5bf113dbb67c12 Mon Sep 17 00:00:00 2001 From: Cody Baker <51133164+CodyCBakerPhD@users.noreply.github.com> Date: Fri, 27 Sep 2024 20:32:00 -0400 Subject: [PATCH 13/22] fix import --- tests/test_on_data/test_yaml/yaml_dandi_transfer_tools.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_on_data/test_yaml/yaml_dandi_transfer_tools.py b/tests/test_on_data/test_yaml/yaml_dandi_transfer_tools.py index a0bae52f2..1f3740de8 100644 --- a/tests/test_on_data/test_yaml/yaml_dandi_transfer_tools.py +++ b/tests/test_on_data/test_yaml/yaml_dandi_transfer_tools.py @@ -1,4 +1,5 @@ import os +import time from datetime import datetime from pathlib import Path From 74ef6b4d2bdbdc7d5c418828cd95b561722e6464 Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Fri, 27 Sep 2024 21:00:58 -0400 Subject: [PATCH 14/22] fix datetime parser --- tests/test_on_data/test_yaml/yaml_dandi_transfer_tools.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/test_on_data/test_yaml/yaml_dandi_transfer_tools.py b/tests/test_on_data/test_yaml/yaml_dandi_transfer_tools.py index 1f3740de8..65fc7371c 100644 --- a/tests/test_on_data/test_yaml/yaml_dandi_transfer_tools.py +++ b/tests/test_on_data/test_yaml/yaml_dandi_transfer_tools.py @@ -44,5 +44,7 @@ def test_run_conversion_from_yaml_with_dandi_upload(): # Past uploads may have created the same apparent file, so look at the modification time to ensure # this test is actually testing the most recent upload - date_modified = datetime.fromisoformat(test_asset_metadata["dateModified"]) + date_modified = datetime.fromisoformat( + test_asset_metadata["dateModified"].split("Z")[0] # Timezones look a little messy + ) assert datetime.now() - date_modified < timedelta(minutes=10) From 6ebbd449699c295021be73de3f2bf3c3c77960ce Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Fri, 27 Sep 2024 21:06:15 -0400 Subject: [PATCH 15/22] fix test --- tests/test_on_data/test_yaml/yaml_dandi_transfer_tools.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_on_data/test_yaml/yaml_dandi_transfer_tools.py b/tests/test_on_data/test_yaml/yaml_dandi_transfer_tools.py index 65fc7371c..401660796 100644 --- a/tests/test_on_data/test_yaml/yaml_dandi_transfer_tools.py +++ b/tests/test_on_data/test_yaml/yaml_dandi_transfer_tools.py @@ -47,4 +47,4 @@ def test_run_conversion_from_yaml_with_dandi_upload(): date_modified = datetime.fromisoformat( test_asset_metadata["dateModified"].split("Z")[0] # Timezones look a little messy ) - assert datetime.now() - date_modified < timedelta(minutes=10) + assert datetime.now() - date_modified < datetime.timedelta(minutes=10) From b3aba175611665b4a7756077c06d7276201b3655 Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Fri, 27 Sep 2024 21:13:12 -0400 Subject: [PATCH 16/22] fix test --- tests/test_on_data/test_yaml/yaml_dandi_transfer_tools.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_on_data/test_yaml/yaml_dandi_transfer_tools.py b/tests/test_on_data/test_yaml/yaml_dandi_transfer_tools.py index 401660796..dde9dabee 100644 --- a/tests/test_on_data/test_yaml/yaml_dandi_transfer_tools.py +++ b/tests/test_on_data/test_yaml/yaml_dandi_transfer_tools.py @@ -1,6 +1,6 @@ import os import time -from datetime import datetime +from datetime import datetime, timedelta from pathlib import Path import dandi.dandiapi @@ -47,4 +47,4 @@ def test_run_conversion_from_yaml_with_dandi_upload(): date_modified = datetime.fromisoformat( test_asset_metadata["dateModified"].split("Z")[0] # Timezones look a little messy ) - assert datetime.now() - date_modified < datetime.timedelta(minutes=10) + assert datetime.now() - date_modified < timedelta(minutes=10) From 3f58129e476a08edf95e3da5cffb28fff8d7ac6a Mon Sep 17 00:00:00 2001 From: Cody Baker <51133164+CodyCBakerPhD@users.noreply.github.com> Date: Fri, 27 Sep 2024 21:30:11 -0400 Subject: [PATCH 17/22] fix expected path on DANDI --- tests/test_on_data/test_yaml/yaml_dandi_transfer_tools.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_on_data/test_yaml/yaml_dandi_transfer_tools.py b/tests/test_on_data/test_yaml/yaml_dandi_transfer_tools.py index dde9dabee..d0bd96b66 100644 --- a/tests/test_on_data/test_yaml/yaml_dandi_transfer_tools.py +++ b/tests/test_on_data/test_yaml/yaml_dandi_transfer_tools.py @@ -36,7 +36,7 @@ def test_run_conversion_from_yaml_with_dandi_upload(): expected_asset_paths = [ "sub-yaml-1/sub-yaml-1_ses-test-yaml-1_ecephys.nwb", "sub-yaml-002/sub-yaml-002_ses-test-yaml-2_ecephys.nwb", - "sub-YAML-Subject-Name/sub-YAML-Subject-Name_ses-test-yaml-3_ecephys.nwb", + "sub-YAML-Subject-Name/sub-YAML-Subject-Name_ses-test-YAML-3_ecephys.nwb", ] for asset_path in expected_asset_paths: test_asset = dandiset.get_asset_by_path(path=asset_path) # Will error if not found From dde9df93122b3f3d3c6f0f33a0e1864fa75b967b Mon Sep 17 00:00:00 2001 From: Cody Baker <51133164+CodyCBakerPhD@users.noreply.github.com> Date: Fri, 27 Sep 2024 21:42:07 -0400 Subject: [PATCH 18/22] Update CHANGELOG.md --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9c7bc13be..ff05e7bc7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ ## Features * Using in-house `GenericDataChunkIterator` [PR #1068](https://github.com/catalystneuro/neuroconv/pull/1068) * Data interfaces now perform source (argument inputs) validation with the json schema [PR #1020](https://github.com/catalystneuro/neuroconv/pull/1020) +* YAML specification files now accept an outer keyword `upload_to_dandiset="< six-digit ID >"` to automatically upload the produced NWB files to the DANDI archive [PR #1089](https://github.com/catalystneuro/neuroconv/pull/1089) ## Improvements * Remove dev test from PR [PR #1092](https://github.com/catalystneuro/neuroconv/pull/1092) From 860d444969a664c16de003d59d84866ac941a5cc Mon Sep 17 00:00:00 2001 From: Cody Baker <51133164+CodyCBakerPhD@users.noreply.github.com> Date: Fri, 27 Sep 2024 21:57:32 -0400 Subject: [PATCH 19/22] only run on 3.12 to avoid race conditions --- tests/test_on_data/test_yaml/yaml_dandi_transfer_tools.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/test_on_data/test_yaml/yaml_dandi_transfer_tools.py b/tests/test_on_data/test_yaml/yaml_dandi_transfer_tools.py index d0bd96b66..e3e784dda 100644 --- a/tests/test_on_data/test_yaml/yaml_dandi_transfer_tools.py +++ b/tests/test_on_data/test_yaml/yaml_dandi_transfer_tools.py @@ -1,5 +1,6 @@ import os import time +import platform from datetime import datetime, timedelta from pathlib import Path @@ -12,10 +13,10 @@ DANDI_API_KEY = os.getenv("DANDI_API_KEY") HAVE_DANDI_KEY = DANDI_API_KEY is not None and DANDI_API_KEY != "" # can be "" from external forks - +_PYTHON_VERSION = platform.python_version() @pytest.mark.skipif( - not HAVE_DANDI_KEY, + not HAVE_DANDI_KEY or Version(".".join(_PYTHON_VERSION.split(".")[:2])) != Version("3.12") , reason="You must set your DANDI_API_KEY to run this test!", ) def test_run_conversion_from_yaml_with_dandi_upload(): From 851c2626cc30471f66960a721cc4eba13d8fa547 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 28 Sep 2024 01:59:12 +0000 Subject: [PATCH 20/22] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- tests/test_on_data/test_yaml/yaml_dandi_transfer_tools.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/test_on_data/test_yaml/yaml_dandi_transfer_tools.py b/tests/test_on_data/test_yaml/yaml_dandi_transfer_tools.py index e3e784dda..9d9a2ad71 100644 --- a/tests/test_on_data/test_yaml/yaml_dandi_transfer_tools.py +++ b/tests/test_on_data/test_yaml/yaml_dandi_transfer_tools.py @@ -1,6 +1,6 @@ import os -import time import platform +import time from datetime import datetime, timedelta from pathlib import Path @@ -15,8 +15,9 @@ HAVE_DANDI_KEY = DANDI_API_KEY is not None and DANDI_API_KEY != "" # can be "" from external forks _PYTHON_VERSION = platform.python_version() + @pytest.mark.skipif( - not HAVE_DANDI_KEY or Version(".".join(_PYTHON_VERSION.split(".")[:2])) != Version("3.12") , + not HAVE_DANDI_KEY or Version(".".join(_PYTHON_VERSION.split(".")[:2])) != Version("3.12"), reason="You must set your DANDI_API_KEY to run this test!", ) def test_run_conversion_from_yaml_with_dandi_upload(): From 490da53d11dc0ba740f5d5ff01c9a6f85a9d6b8e Mon Sep 17 00:00:00 2001 From: Cody Baker <51133164+CodyCBakerPhD@users.noreply.github.com> Date: Fri, 27 Sep 2024 22:03:27 -0400 Subject: [PATCH 21/22] fix import --- tests/test_on_data/test_yaml/yaml_dandi_transfer_tools.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_on_data/test_yaml/yaml_dandi_transfer_tools.py b/tests/test_on_data/test_yaml/yaml_dandi_transfer_tools.py index 9d9a2ad71..911f92b9d 100644 --- a/tests/test_on_data/test_yaml/yaml_dandi_transfer_tools.py +++ b/tests/test_on_data/test_yaml/yaml_dandi_transfer_tools.py @@ -2,6 +2,7 @@ import platform import time from datetime import datetime, timedelta +from packaging.version import Version from pathlib import Path import dandi.dandiapi From 738f734624c5f23a3c0ee6e2daa8a4bee6078c4b Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 28 Sep 2024 02:03:39 +0000 Subject: [PATCH 22/22] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- tests/test_on_data/test_yaml/yaml_dandi_transfer_tools.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_on_data/test_yaml/yaml_dandi_transfer_tools.py b/tests/test_on_data/test_yaml/yaml_dandi_transfer_tools.py index 911f92b9d..c36d072e7 100644 --- a/tests/test_on_data/test_yaml/yaml_dandi_transfer_tools.py +++ b/tests/test_on_data/test_yaml/yaml_dandi_transfer_tools.py @@ -2,11 +2,11 @@ import platform import time from datetime import datetime, timedelta -from packaging.version import Version from pathlib import Path import dandi.dandiapi import pytest +from packaging.version import Version from neuroconv import run_conversion_from_yaml