diff --git a/CHANGELOG.md b/CHANGELOG.md index 8515f40f8..106bed87e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -27,6 +27,7 @@ dj.FreeTable(dj.conn(), "common_session.session_group").drop() - Enforce match between ingested nwb probe geometry and existing table entry #1074 - Update DataJoint install and password instructions #1131 +- Fix dandi upload process for nwb's with video or linked objects #1095 - Minor docs fixes #1145 ### Pipelines diff --git a/src/spyglass/common/common_dandi.py b/src/spyglass/common/common_dandi.py index 6dfbb56e3..8a9be9677 100644 --- a/src/spyglass/common/common_dandi.py +++ b/src/spyglass/common/common_dandi.py @@ -1,6 +1,7 @@ import os import shutil from pathlib import Path +from typing import Optional import datajoint as dj import fsspec @@ -9,7 +10,7 @@ from fsspec.implementations.cached import CachingFileSystem from spyglass.common.common_usage import Export, ExportSelection -from spyglass.settings import export_dir +from spyglass.settings import export_dir, raw_dir from spyglass.utils import SpyglassMixin, logger from spyglass.utils.sql_helper_fn import SQLDumpHelper @@ -21,7 +22,8 @@ from dandi.consts import known_instances from dandi.dandiapi import DandiAPIClient from dandi.metadata.nwb import get_metadata - from dandi.organize import OrganizeInvalid + from dandi.organize import CopyMode, FileOperationMode, OrganizeInvalid + from dandi.pynwb_utils import nwb_has_external_links from dandi.validate_types import Severity except (ImportError, ModuleNotFoundError) as e: @@ -31,8 +33,11 @@ DandiAPIClient, get_metadata, OrganizeInvalid, + CopyMode, + FileOperationMode, Severity, - ) = [None] * 6 + nwb_has_external_links, + ) = [None] * 9 logger.warning(e) @@ -87,6 +92,7 @@ def compile_dandiset( dandiset_id: str, dandi_api_key: str = None, dandi_instance: str = "dandi", + skip_raw_files: bool = False, ): """Compile a Dandiset from the export. Parameters @@ -100,6 +106,8 @@ def compile_dandiset( DANDI_API_KEY is set. dandi_instance : str, optional What instance of Dandi the dandiset is on. Defaults to dev server. + skip_raw_files : bool, optional + Dev tool to skip raw files in the export. Defaults to False. """ key = (Export & key).fetch1("KEY") paper_id = (Export & key).fetch1("paper_id") @@ -137,9 +145,14 @@ def compile_dandiset( os.makedirs(destination_dir, exist_ok=False) for file in source_files: - if not os.path.exists( - f"{destination_dir}/{os.path.basename(file)}" - ): + if os.path.exists(f"{destination_dir}/{os.path.basename(file)}"): + continue + if skip_raw_files and raw_dir in file: + continue + # copy the file if it has external links so can be safely edited + if nwb_has_external_links(file): + shutil.copy(file, f"{destination_dir}/{os.path.basename(file)}") + else: os.symlink(file, f"{destination_dir}/{os.path.basename(file)}") # validate the dandiset @@ -154,11 +167,16 @@ def compile_dandiset( # organize the files in the dandiset directory dandi.organize.organize( - destination_dir, dandiset_dir, invalid=OrganizeInvalid.WARN + destination_dir, + dandiset_dir, + update_external_file_paths=True, + invalid=OrganizeInvalid.FAIL, + media_files_mode=CopyMode.SYMLINK, + files_mode=FileOperationMode.COPY, ) # get the dandi name translations - translations = translate_name_to_dandi(destination_dir) + translations = lookup_dandi_translation(destination_dir, dandiset_dir) # upload the dandiset to the dandi server if dandi_api_key: @@ -200,7 +218,7 @@ def write_mysqldump(self, export_key: dict): docker_id=None, spyglass_version=spyglass_version, ) - sql_dump.write_mysqldump(self & key, file_suffix="_dandi") + sql_dump.write_mysqldump([self & key], file_suffix="_dandi") def _get_metadata(path): @@ -229,6 +247,7 @@ def translate_name_to_dandi(folder): dict dictionary of filename to dandi_path translations """ + files = Path(folder).glob("*") metadata = list(map(_get_metadata, files)) metadata, skip_invalid = dandi.organize.filter_invalid_metadata_rows( @@ -243,6 +262,39 @@ def translate_name_to_dandi(folder): ] +def lookup_dandi_translation(source_dir: str, dandiset_dir: str): + """Get the dandi_path for each nwb file in the source_dir from + the organized dandi directory + + Parameters + ---------- + source_dir : str + location of the source files + dandiset_dir : str + location of the organized dandiset directory + + Returns + ------- + dict + dictionary of filename to dandi_path translations + """ + # get the obj_id and dandipath for each nwb file in the dandiset + dandi_name_dict = {} + for dandi_file in Path(dandiset_dir).rglob("*.nwb"): + dandi_path = dandi_file.relative_to(dandiset_dir).as_posix() + with pynwb.NWBHDF5IO(dandi_file, "r") as io: + nwb = io.read() + dandi_name_dict[nwb.object_id] = dandi_path + # for each file in the source_dir, lookup the dandipath based on the obj_id + name_translation = {} + for file in Path(source_dir).glob("*"): + with pynwb.NWBHDF5IO(file, "r") as io: + nwb = io.read() + dandi_path = dandi_name_dict[nwb.object_id] + name_translation[file.name] = dandi_path + return name_translation + + def validate_dandiset( folder, min_severity="ERROR", ignore_external_files=False ): diff --git a/src/spyglass/common/common_usage.py b/src/spyglass/common/common_usage.py index ad49d82a1..23d9d04be 100644 --- a/src/spyglass/common/common_usage.py +++ b/src/spyglass/common/common_usage.py @@ -22,6 +22,7 @@ unique_dicts, update_analysis_for_dandi_standard, ) +from spyglass.utils.nwb_helper_fn import get_linked_nwbs from spyglass.utils.sql_helper_fn import SQLDumpHelper schema = dj.schema("common_usage") @@ -236,6 +237,7 @@ class File(SpyglassMixin, dj.Part): def populate_paper(self, paper_id: Union[str, dict]): """Populate Export for a given paper_id.""" + self.load_shared_schemas() if isinstance(paper_id, dict): paper_id = paper_id.get("paper_id") self.populate(ExportSelection().paper_export_id(paper_id)) @@ -272,6 +274,21 @@ def make(self, key): query.list_file_paths(paper_key) + restr_graph.file_paths ) + # Check for linked nwb objects and add them to the export + unlinked_files = set() + for file in file_paths: + if not (links := get_linked_nwbs(file["file_path"])): + unlinked_files.add(file) + continue + logger.warning( + "Dandi not yet supported for linked nwb objects " + + f"excluding {file['file_path']} from export " + + f" and including {links} instead" + ) + for link in links: + unlinked_files.add(link) + file_paths = {"file_path": link for link in unlinked_files} + table_inserts = [ {**key, **rd, "table_id": i} for i, rd in enumerate(restr_graph.as_dict) diff --git a/src/spyglass/utils/dj_helper_fn.py b/src/spyglass/utils/dj_helper_fn.py index 35d77ef3c..ca8b99f2b 100644 --- a/src/spyglass/utils/dj_helper_fn.py +++ b/src/spyglass/utils/dj_helper_fn.py @@ -355,6 +355,7 @@ def get_child_tables(table): def update_analysis_for_dandi_standard( filepath: str, age: str = "P4M/P8M", + resolve_external_table: bool = True, ): """Function to resolve common nwb file format errors within the database @@ -364,6 +365,9 @@ def update_analysis_for_dandi_standard( abs path to the file to edit age : str, optional age to assign animal if missing, by default "P4M/P8M" + resolve_external_table : bool, optional + whether to update the external table. Set False if editing file + outside the database, by default True """ from spyglass.common import LabMember @@ -394,7 +398,7 @@ def update_analysis_for_dandi_standard( ) file["/general/subject/species"][()] = new_species_value - if not ( + elif not ( len(species_value.split(" ")) == 2 or "NCBITaxon" in species_value ): raise ValueError( @@ -427,7 +431,9 @@ def update_analysis_for_dandi_standard( file["/general/experimenter"][:] = new_experimenter_value # update the datajoint external store table to reflect the changes - _resolve_external_table(filepath, file_name) + if resolve_external_table: + location = "raw" if filepath.endswith("_.nwb") else "analysis" + _resolve_external_table(filepath, file_name, location) def dandi_format_names(experimenter: List) -> List: @@ -510,7 +516,10 @@ def make_file_obj_id_unique(nwb_path: str): new_id = str(uuid4()) with h5py.File(nwb_path, "a") as f: f.attrs["object_id"] = new_id - _resolve_external_table(nwb_path, nwb_path.split("/")[-1]) + location = "raw" if nwb_path.endswith("_.nwb") else "analysis" + _resolve_external_table( + nwb_path, nwb_path.split("/")[-1], location=location + ) return new_id diff --git a/src/spyglass/utils/nwb_helper_fn.py b/src/spyglass/utils/nwb_helper_fn.py index af25ec987..7e930de04 100644 --- a/src/spyglass/utils/nwb_helper_fn.py +++ b/src/spyglass/utils/nwb_helper_fn.py @@ -69,6 +69,14 @@ def get_nwb_file(nwb_file_path): from ..common.common_dandi import DandiPath dandi_key = {"filename": os.path.basename(nwb_file_path)} + if not DandiPath() & dandi_key: + # Check if non-copied raw file is in Dandi + dandi_key = { + "filename": Path(nwb_file_path).name.replace( + "_.nwb", ".nwb" + ) + } + if not DandiPath & dandi_key: # If not in Dandi, then we can't find the file raise FileNotFoundError( @@ -101,6 +109,16 @@ def file_from_dandi(filepath): return False +def get_linked_nwbs(path): + """Return a list of paths to NWB files that are linked by objects in + the file at the given path.""" + with pynwb.NWBHDF5IO(path, "r") as io: + # open the nwb file (opens externally linked files as well) + nwb = io.read() + # get the linked files + return [x for x in io._HDF5IO__built if x != path] + + def get_config(nwb_file_path, calling_table=None): """Return a dictionary of config settings for the given NWB file. If the file does not exist, return an empty dict.