Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add DANDI upload to YAML spec #1089

Open
wants to merge 24 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .github/workflows/deploy-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,9 @@ jobs:
if: ${{ needs.assess-file-changes.outputs.SOURCE_CHANGED == 'true' }}
uses: ./.github/workflows/live-service-testing.yml
secrets:
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
S3_GIN_BUCKET: ${{ secrets.S3_GIN_BUCKET }}
DANDI_API_KEY: ${{ secrets.DANDI_API_KEY }}
with: # Ternary operator: condition && value_if_true || value_if_false
python-versions: ${{ github.event.pull_request.draft == true && '["3.9"]' || '["3.9", "3.10", "3.11", "3.12"]' }}
Expand Down
16 changes: 16 additions & 0 deletions .github/workflows/live-service-testing.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,12 @@ on:
default: '["ubuntu-latest", "macos-latest", "windows-latest"]'

secrets:
AWS_ACCESS_KEY_ID:
required: true
AWS_SECRET_ACCESS_KEY:
required: true
S3_GIN_BUCKET:
required: true
DANDI_API_KEY:
required: true

Expand Down Expand Up @@ -47,7 +53,17 @@ jobs:
- name: Install full requirements
run: pip install .[test,full]

- name: Prepare data for tests
uses: ./.github/actions/load-data
with:
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
s3-gin-bucket: ${{ secrets.S3_GIN_BUCKET }}
os: ${{ matrix.os }}

- name: Run subset of tests that use DANDI live services
run: pytest -rsx -n auto tests/test_minimal/test_tools/dandi_transfer_tools.py
- name: Run subset of tests that use DANDI live services with YAML
run: pytest -rsx -n auto tests/test_on_data/test_yaml/yaml_dandi_transfer_tools.py
- name: Run subset of tests that use Globus live services
run: pytest -rsx -n auto tests/test_minimal/test_tools/globus_transfer_tools.py
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
## Features
* Using in-house `GenericDataChunkIterator` [PR #1068](https://github.com/catalystneuro/neuroconv/pull/1068)
* Data interfaces now perform source (argument inputs) validation with the json schema [PR #1020](https://github.com/catalystneuro/neuroconv/pull/1020)
* YAML specification files now accept an outer keyword `upload_to_dandiset="< six-digit ID >"` to automatically upload the produced NWB files to the DANDI archive [PR #1089](https://github.com/catalystneuro/neuroconv/pull/1089)

## Improvements
* Remove dev test from PR [PR #1092](https://github.com/catalystneuro/neuroconv/pull/1092)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
"required": ["experiments"],
"additionalProperties": false,
"properties": {
"upload_to_dandiset": {"type": "string"},
"metadata": {"$ref": "./metadata_schema.json#"},
"conversion_options": {"type": "object"},
"data_interfaces": {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import json
import os
import sys
from importlib import import_module
from pathlib import Path
Expand All @@ -7,6 +9,7 @@
from jsonschema import RefResolver, validate
from pydantic import DirectoryPath, FilePath

from ..data_transfers import automatic_dandi_upload
from ...nwbconverter import NWBConverter
from ...utils import dict_deep_update, load_dict_from_file

Expand Down Expand Up @@ -50,7 +53,7 @@ def run_conversion_from_yaml(
data_folder_path: Optional[DirectoryPath] = None,
output_folder_path: Optional[DirectoryPath] = None,
overwrite: bool = False,
):
) -> None:
"""
Run conversion to NWB given a yaml specification file.

Expand All @@ -77,6 +80,7 @@ def run_conversion_from_yaml(
output_folder_path = Path(specification_file_path).parent
else:
output_folder_path = Path(output_folder_path)

specification = load_dict_from_file(file_path=specification_file_path)
schema_folder = Path(__file__).parent.parent.parent / "schemas"
specification_schema = load_dict_from_file(file_path=schema_folder / "yaml_conversion_specification_schema.json")
Expand All @@ -87,6 +91,14 @@ def run_conversion_from_yaml(
resolver=RefResolver(base_uri=sys_uri_base + str(schema_folder) + "/", referrer=specification_schema),
)

upload_to_dandiset = "upload_to_dandiset" in specification
if upload_to_dandiset and "DANDI_API_KEY" not in os.environ:
message = (
"The 'upload_to_dandiset' prompt was found in the YAML specification, "
"but the environment variable 'DANDI_API_KEY' was not set."
)
raise ValueError(message)

global_metadata = specification.get("metadata", dict())
global_conversion_options = specification.get("conversion_options", dict())
data_interfaces_spec = specification.get("data_interfaces")
Expand All @@ -102,28 +114,55 @@ def run_conversion_from_yaml(
experiment_metadata = experiment.get("metadata", dict())
for session in experiment["sessions"]:
file_counter += 1

source_data = session["source_data"]
for interface_name, interface_source_data in session["source_data"].items():
for key, value in interface_source_data.items():
if key == "file_paths":
source_data[interface_name].update({key: [str(Path(data_folder_path) / x) for x in value]})
elif key in ("file_path", "folder_path"):
source_data[interface_name].update({key: str(Path(data_folder_path) / value)})

converter = CustomNWBConverter(source_data=source_data)

metadata = converter.get_metadata()
for metadata_source in [global_metadata, experiment_metadata, session.get("metadata", dict())]:
metadata = dict_deep_update(metadata, metadata_source)
nwbfile_name = session.get("nwbfile_name", f"temp_nwbfile_name_{file_counter}").strip(".nwb")

session_id = session.get("metadata", dict()).get("NWBFile", dict()).get("session_id", None)
if upload_to_dandiset and session_id is None:
message = (
"The 'upload_to_dandiset' prompt was found in the YAML specification, "
"but the 'session_id' was not found for session with info block: "
f"\n\n {json.dumps(obj=session, indent=2)}\n\n"
"File intended for DANDI upload must include a session ID."
)
raise ValueError(message)

session_conversion_options = session.get("conversion_options", dict())
conversion_options = dict()
for key in converter.data_interface_objects:
conversion_options[key] = dict(session_conversion_options.get(key, dict()), **global_conversion_options)

nwbfile_name = session.get("nwbfile_name", f"temp_nwbfile_name_{file_counter}").strip(".nwb")
converter.run_conversion(
nwbfile_path=output_folder_path / f"{nwbfile_name}.nwb",
metadata=metadata,
overwrite=overwrite,
conversion_options=conversion_options,
)

if upload_to_dandiset:
dandiset_id = specification["upload_to_dandiset"]
staging = int(dandiset_id) >= 200_000
automatic_dandi_upload(
dandiset_id=dandiset_id,
nwb_folder_path=output_folder_path,
staging=staging,
)

return None # We can early return since organization below will occur within the upload step

# To properly mimic a true dandi organization, the full directory must be populated with NWBFiles.
all_nwbfile_paths = [nwbfile_path for nwbfile_path in output_folder_path.iterdir() if nwbfile_path.suffix == ".nwb"]
nwbfile_paths_to_set = [
Expand Down
1 change: 1 addition & 0 deletions tests/imports.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ def test_tools(self):
"get_package_version",
"is_package_installed",
"deploy_process",
"data_transfers",
"LocalPathExpander",
"get_module",
]
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
metadata:
NWBFile:
lab: My Lab
institution: My Institution

conversion_options:
stub_test: True

data_interfaces:
ap: SpikeGLXRecordingInterface
lf: SpikeGLXRecordingInterface
phy: PhySortingInterface

upload_to_dandiset: "200560"

experiments:
ymaze:
metadata:
NWBFile:
session_description: Subject navigating a Y-shaped maze.

sessions:
- nwbfile_name: example_converter_spec_1
source_data:
ap:
file_path: spikeglx/Noise4Sam_g0/Noise4Sam_g0_imec0/Noise4Sam_g0_t0.imec0.ap.bin
metadata:
NWBFile:
session_start_time: "2020-10-09T21:19:09+00:00"
session_id: "test-yaml-1"
Subject:
subject_id: "yaml-1"
sex: F
age: P35D
species: Mus musculus
- nwbfile_name: example_converter_spec_2.nwb
metadata:
NWBFile:
session_start_time: "2020-10-10T21:19:09+00:00"
session_id: "test-yaml-2"
Subject:
subject_id: "yaml-002"
sex: F
age: P35D
species: Mus musculus
source_data:
lf:
file_path: spikeglx/Noise4Sam_g0/Noise4Sam_g0_imec0/Noise4Sam_g0_t0.imec0.lf.bin

open_explore:
sessions:
- nwbfile_name: example_converter_spec_3
source_data:
lf:
file_path: spikeglx/Noise4Sam_g0/Noise4Sam_g0_imec0/Noise4Sam_g0_t0.imec0.lf.bin
phy:
folder_path: phy/phy_example_0/
metadata:
NWBFile:
session_start_time: "2020-10-11T21:19:09+00:00"
session_id: test YAML 3
Subject:
subject_id: YAML Subject Name
sex: F
age: P35D
species: Mus musculus
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
"fname",
[
"GIN_conversion_specification.yml",
"GIN_conversion_specification_dandi_upload.yml",
"GIN_conversion_specification_missing_nwbfile_names.yml",
"GIN_conversion_specification_no_nwbfile_name_or_other_metadata.yml",
"GIN_conversion_specification_videos.yml",
Expand Down
53 changes: 53 additions & 0 deletions tests/test_on_data/test_yaml/yaml_dandi_transfer_tools.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
import os
import platform
import time
from datetime import datetime, timedelta
from pathlib import Path

import dandi.dandiapi
import pytest
from packaging.version import Version

from neuroconv import run_conversion_from_yaml

from ..setup_paths import ECEPHY_DATA_PATH, OUTPUT_PATH

DANDI_API_KEY = os.getenv("DANDI_API_KEY")
HAVE_DANDI_KEY = DANDI_API_KEY is not None and DANDI_API_KEY != "" # can be "" from external forks
_PYTHON_VERSION = platform.python_version()


@pytest.mark.skipif(
not HAVE_DANDI_KEY or Version(".".join(_PYTHON_VERSION.split(".")[:2])) != Version("3.12"),
reason="You must set your DANDI_API_KEY to run this test!",
)
def test_run_conversion_from_yaml_with_dandi_upload():
path_to_test_yml_files = Path(__file__).parent / "conversion_specifications"
yaml_file_path = path_to_test_yml_files / "GIN_conversion_specification_dandi_upload.yml"
run_conversion_from_yaml(
specification_file_path=yaml_file_path,
data_folder_path=ECEPHY_DATA_PATH,
output_folder_path=OUTPUT_PATH,
overwrite=True,
)

time.sleep(60) # Give some buffer room for server to process before making assertions against DANDI API

client = dandi.dandiapi.DandiAPIClient(api_url="https://api-staging.dandiarchive.org/api")
dandiset = client.get_dandiset("200560")

expected_asset_paths = [
"sub-yaml-1/sub-yaml-1_ses-test-yaml-1_ecephys.nwb",
"sub-yaml-002/sub-yaml-002_ses-test-yaml-2_ecephys.nwb",
"sub-YAML-Subject-Name/sub-YAML-Subject-Name_ses-test-YAML-3_ecephys.nwb",
]
for asset_path in expected_asset_paths:
test_asset = dandiset.get_asset_by_path(path=asset_path) # Will error if not found
test_asset_metadata = test_asset.get_raw_metadata()

# Past uploads may have created the same apparent file, so look at the modification time to ensure
# this test is actually testing the most recent upload
date_modified = datetime.fromisoformat(
test_asset_metadata["dateModified"].split("Z")[0] # Timezones look a little messy
)
assert datetime.now() - date_modified < timedelta(minutes=10)
Loading