From 7b9e6356b202baa94e57265c6e959f72fa6bbeab Mon Sep 17 00:00:00 2001 From: CBroz1 Date: Thu, 3 Oct 2024 12:11:58 -0500 Subject: [PATCH 1/2] Fix dict/list handling in Export --- src/spyglass/common/common_usage.py | 33 +++++++++++++++++++---------- src/spyglass/utils/dj_graph.py | 2 +- src/spyglass/utils/nwb_helper_fn.py | 11 ++++++---- 3 files changed, 30 insertions(+), 16 deletions(-) diff --git a/src/spyglass/common/common_usage.py b/src/spyglass/common/common_usage.py index 23d9d04be..ced518521 100644 --- a/src/spyglass/common/common_usage.py +++ b/src/spyglass/common/common_usage.py @@ -144,11 +144,19 @@ def stop_export(self, **kwargs) -> None: # before actually exporting anything, which is more associated with # Selection - def list_file_paths(self, key: dict) -> list[str]: + def list_file_paths(self, key: dict, as_dict=True) -> list[str]: """Return a list of unique file paths for a given restriction/key. Note: This list reflects files fetched during the export process. For upstream files, use RestrGraph.file_paths. + + Parameters + ---------- + key : dict + Any valid restriction key for ExportSelection.Table + as_dict : bool, optional + Return as a list of dicts: [{'file_path': x}]. Default True. + If False, returns a list of strings without key. """ file_table = self * self.File & key analysis_fp = [ @@ -159,7 +167,8 @@ def list_file_paths(self, key: dict) -> list[str]: Nwbfile().get_abs_path(fname) for fname in (AnalysisNwbfile * file_table).fetch("nwb_file_name") ] - return [{"file_path": p} for p in list({*analysis_fp, *nwbfile_fp})] + unique_ft = list({*analysis_fp, *nwbfile_fp}) + return [{"file_path": p} for p in unique_ft] if as_dict else unique_ft def get_restr_graph(self, key: dict, verbose=False) -> RestrGraph: """Return a RestrGraph for a restriction/key's tables/restrictions. @@ -270,31 +279,33 @@ def make(self, key): (self.Table & id_dict).delete_quick() restr_graph = ExportSelection().get_restr_graph(paper_key) - file_paths = unique_dicts( # Original plus upstream files - query.list_file_paths(paper_key) + restr_graph.file_paths - ) + # Original plus upstream files + file_paths = { + *query.list_file_paths(paper_key, as_dict=False), + *restr_graph.file_paths, + } # Check for linked nwb objects and add them to the export unlinked_files = set() for file in file_paths: - if not (links := get_linked_nwbs(file["file_path"])): + if not (links := get_linked_nwbs(file)): unlinked_files.add(file) continue logger.warning( "Dandi not yet supported for linked nwb objects " - + f"excluding {file['file_path']} from export " + + f"excluding {file} from export " + f" and including {links} instead" ) - for link in links: - unlinked_files.add(link) - file_paths = {"file_path": link for link in unlinked_files} + unlinked_files.update(links) + file_paths = unlinked_files # TODO: what if linked items have links? table_inserts = [ {**key, **rd, "table_id": i} for i, rd in enumerate(restr_graph.as_dict) ] file_inserts = [ - {**key, **fp, "file_id": i} for i, fp in enumerate(file_paths) + {**key, "file_path": fp, "file_id": i} + for i, fp in enumerate(file_paths) ] version_ids = query.fetch("spyglass_version") diff --git a/src/spyglass/utils/dj_graph.py b/src/spyglass/utils/dj_graph.py index 48847f61b..a66a6ac5d 100644 --- a/src/spyglass/utils/dj_graph.py +++ b/src/spyglass/utils/dj_graph.py @@ -783,7 +783,7 @@ def file_paths(self) -> List[str]: """ self.cascade() return [ - {"file_path": self.analysis_file_tbl.get_abs_path(file)} + self.analysis_file_tbl.get_abs_path(file) for file in set( [f for files in self.file_dict.values() for f in files] ) diff --git a/src/spyglass/utils/nwb_helper_fn.py b/src/spyglass/utils/nwb_helper_fn.py index 7e930de04..5d5fdaca4 100644 --- a/src/spyglass/utils/nwb_helper_fn.py +++ b/src/spyglass/utils/nwb_helper_fn.py @@ -4,6 +4,7 @@ import os.path from itertools import groupby from pathlib import Path +from typing import List import numpy as np import pynwb @@ -109,12 +110,14 @@ def file_from_dandi(filepath): return False -def get_linked_nwbs(path): - """Return a list of paths to NWB files that are linked by objects in - the file at the given path.""" +def get_linked_nwbs(path: str) -> List[str]: + """Return a paths linked in the given NWB file. + + Given a NWB file path, open & read the file to find any linked NWB objects. + """ with pynwb.NWBHDF5IO(path, "r") as io: # open the nwb file (opens externally linked files as well) - nwb = io.read() + _ = io.read() # get the linked files return [x for x in io._HDF5IO__built if x != path] From 37780444c567a6685c7e97aa30f61153f90130dd Mon Sep 17 00:00:00 2001 From: CBroz1 Date: Thu, 3 Oct 2024 12:19:50 -0500 Subject: [PATCH 2/2] Update changelog --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 11ef7e09d..0657f993b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -27,7 +27,7 @@ dj.FreeTable(dj.conn(), "common_session.session_group").drop() - Enforce match between ingested nwb probe geometry and existing table entry #1074 - Update DataJoint install and password instructions #1131 -- Fix dandi upload process for nwb's with video or linked objects #1095 +- Fix dandi upload process for nwb's with video or linked objects #1095, #1151 - Minor docs fixes #1145 ### Pipelines