From e4a3d925fc60c2b229da7ba868ef8423a7e51bdc Mon Sep 17 00:00:00 2001 From: Anna Grim <108307071+anna-grim@users.noreply.github.com> Date: Fri, 10 May 2024 12:48:23 -0700 Subject: [PATCH] feat: heterogeneous feature generation, img_utils (#135) Co-authored-by: anna-grim --- src/deep_neurographs/geometry.py | 6 +- src/deep_neurographs/img_utils.py | 202 ++++++++++++++++++ .../machine_learning/feature_generation.py | 177 +++++---------- .../heterogeneous_feature_generation.py | 191 +++++++++++++++++ src/deep_neurographs/neurograph.py | 14 +- src/deep_neurographs/utils.py | 179 ++-------------- 6 files changed, 467 insertions(+), 302 deletions(-) create mode 100644 src/deep_neurographs/img_utils.py create mode 100644 src/deep_neurographs/machine_learning/heterogeneous_feature_generation.py diff --git a/src/deep_neurographs/geometry.py b/src/deep_neurographs/geometry.py index b353819..3e53e37 100644 --- a/src/deep_neurographs/geometry.py +++ b/src/deep_neurographs/geometry.py @@ -291,7 +291,7 @@ def fill_path(img, path, val=-1): """ for xyz in path: x, y, z = tuple(np.floor(xyz).astype(int)) - img[x - 1 : x + 2, y - 1 : y + 2, z - 1 : z + 2] = val + img[x - 1: x + 2, y - 1: y + 2, z - 1: z + 2] = val return img @@ -425,9 +425,9 @@ def align(neurograph, img, branch_1, branch_2, depth): best_d2 = None best_score = 0 for d1 in range(min(depth, len(branch_1) - 1)): - xyz_1 = neurograph.to_img(branch_1[d1], shift=True) + xyz_1 = neurograph.to_voxels(branch_1[d1], shift=True) for d2 in range(min(depth, len(branch_2) - 1)): - xyz_2 = neurograph.to_img(branch_2[d2], shift=True) + xyz_2 = neurograph.to_voxels(branch_2[d2], shift=True) line = make_line(xyz_1, xyz_2, 10) score = np.mean(get_profile(img, line, window=[3, 3, 3])) if score > best_score: diff --git a/src/deep_neurographs/img_utils.py b/src/deep_neurographs/img_utils.py new file mode 100644 index 0000000..0aeca15 --- /dev/null +++ b/src/deep_neurographs/img_utils.py @@ -0,0 +1,202 @@ +""" +Created on Sat May 9 11:00:00 2024 + +@author: Anna Grim +@email: anna.grim@alleninstitute.org + +Helper routines for working with images. + +""" + +from concurrent.futures import ThreadPoolExecutor +from copy import deepcopy + +import numpy as np +import tensorstore as ts +import zarr +from skimage.color import label2rgb + +SUPPORTED_DRIVERS = ["neuroglancer_precomputed", "n5", "zarr"] + + +# --- io utils --- +def open_tensorstore(path, driver): + """ + Uploads segmentation mask stored as a directory of shard files. + + Parameters + ---------- + path : str + Path to directory containing shard files. + driver : str + Storage driver needed to read data at "path". + + Returns + ------- + sparse_volume : dict + Sparse image volume. + + """ + assert driver in SUPPORTED_DRIVERS, "Error! Driver is not supported!" + arr = ts.open( + { + "driver": driver, + "kvstore": { + "driver": "gcs", + "bucket": "allen-nd-goog", + "path": path, + }, + "context": { + "cache_pool": {"total_bytes_limit": 1000000000}, + "cache_pool#remote": {"total_bytes_limit": 1000000000}, + "data_copy_concurrency": {"limit": 8}, + }, + "recheck_cached_data": "open", + } + ).result() + if driver == "neuroglancer_precomputed": + return arr[ts.d["channel"][0]] + elif driver == "zarr": + arr = arr[0, 0, :, :, :] + arr = arr[ts.d[0].transpose[2]] + arr = arr[ts.d[0].transpose[1]] + return arr + + +def open_zarr(path): + """ + Opens zarr file at "path". + + Parameters + ---------- + path : str + Path to zarr file to be opened. + + Returns + ------- + np.ndarray + Contents of zarr file at "path". + + """ + n5store = zarr.N5FSStore(path, "r") + if "653980" in path: + return zarr.open(n5store).ch488.s0 + elif "653158" in path: + return zarr.open(n5store).s0 + + +def read_tensorstore(arr, xyz, shape, from_center=True): + """ + Reads a chunk of data from the specified tensorstore array, given the + coordinates and shape of the chunk. + + Parameters + ---------- + arr : tensorstore.ndarray Array + Array from which data is to be read. + xyz : tuple + xyz coordinates of chunk to be read from the tensorstore array. + shape : tuple + Shape (dimensions) of the chunk to be read. + from_center : bool, optional + Indication of whether the provided coordinates represent the center of + the chunk or the starting point. If True, coordinates are the center; + if False, coordinates are the starting point of the chunk. The default + is True. + + Returns + ------- + numpy.ndarray + Chunk of data read from the tensorstore array. + + """ + chunk = read_chunk(arr, xyz, shape, from_center=from_center) + return chunk.read().result() + + +def read_tensorstore_with_bbox(img, bbox): + start = bbox["min"] + end = bbox["max"] + return ( + img[start[0]: end[0], start[1]: end[1], start[2]: end[2]] + .read() + .result() + ) + + +def read_chunk(arr, xyz, shape, from_center=True): + start, end = get_start_end(xyz, shape, from_center=from_center) + return deepcopy( + arr[start[0]: end[0], start[1]: end[1], start[2]: end[2]] + ) + + +def get_start_end(xyz, shape, from_center=True): + if from_center: + start = [xyz[i] - shape[i] // 2 for i in range(3)] + end = [xyz[i] + shape[i] // 2 for i in range(3)] + else: + start = xyz + end = [xyz[i] + shape[i] for i in range(3)] + return start, end + + +def read_superchunks(img_path, labels_path, xyz, shape, from_center=True): + with ThreadPoolExecutor() as executor: + img_job = executor.submit( + get_superchunk, + img_path, + "n5" if ".n5" in img_path else "zarr", + xyz, + shape, + from_center=from_center, + ) + labels_job = executor.submit( + get_superchunk, + labels_path, + "neuroglancer_precomputed", + xyz, + shape, + from_center=from_center, + ) + img = img_job.result().astype(np.int16) + labels = labels_job.result().astype(np.int64) + assert img.shape == labels.shape, "img.shape != labels.shape" + return img, labels + + +def get_superchunk(path, driver, xyz, shape, from_center=True): + arr = open_tensorstore(path, driver) + return read_tensorstore(arr, xyz, shape, from_center=from_center) + + +# -- Image Operations -- +def normalize_img(img): + img -= np.min(img) + return img / np.max(img) + + +def get_mip(img, axis=0): + """ + Compute the maximum intensity projection (MIP) of "img" along "axis". + + Parameters + ---------- + img : numpy.ndarray + Image to compute MIP of. + axis : int, optional + Projection axis. The default is 0. + + Returns + ------- + numpy.ndarray + MIP of "img". + + """ + return np.max(img, axis=axis) + + +def get_labels_mip(img, axis=0): + mip = np.max(img, axis=axis) + mip = label2rgb(mip) + return (255 * mip).astype(np.uint8) diff --git a/src/deep_neurographs/machine_learning/feature_generation.py b/src/deep_neurographs/machine_learning/feature_generation.py index eaae184..dccc35e 100644 --- a/src/deep_neurographs/machine_learning/feature_generation.py +++ b/src/deep_neurographs/machine_learning/feature_generation.py @@ -22,7 +22,7 @@ import numpy as np import tensorstore as ts -from deep_neurographs import geometry, utils +from deep_neurographs import geometry, img_utils, utils CHUNK_SIZE = [64, 64, 64] WINDOW = [5, 5, 5] @@ -49,36 +49,38 @@ def run( labels_path=None, proposals=None, ): + # Initializations + features = dict() + img_driver = "n5" if ".n5" in img_path else "zarr" + img = img_utils.open_tensorstore(img_path, img_driver) + proposals = neurograph.get_proposals() if proposals is None else proposals + + # Generate features if "Graph" in model_type: features = dict() features["branches"] = run_on_branches(neurograph) features["proposals"] = run_on_proposals( neurograph, + img, model_type, + proposals, search_radius, - img_path, labels_path=labels_path, - proposals=proposals, ) else: features = run_on_proposals( neurograph, + img, model_type, + proposals, search_radius, - img_path, labels_path=labels_path, - proposals=proposals, ) return features def run_on_proposals( - neurograph, - model_type, - search_radius, - img_path, - labels_path=None, - proposals=None, + neurograph, img, model_type, proposals, search_radius, labels_path=None ): """ Generates feature vectors for every proposal in a neurograph. @@ -88,19 +90,18 @@ def run_on_proposals( neurograph : NeuroGraph NeuroGraph generated from a directory of swcs generated from a predicted segmentation. + img : tensorstore.Tensorstore + Image stored in a GCS bucket. model_type : str Type of model to be trained. Options include: AdaBoost, RandomForest, FeedForwardNet, ConvNet, MultiModalNet. + proposals : list[frozenset] + List of proposals for which features will be generated. search_radius : float Search radius used to generate proposals. - img_path : str - Path to raw image stored in a GCS bucket. labels_path : str, optional Path to predicted segmentation stored in a GCS bucket. The default is None. - proposals : list[frozenset], optional - List of proposals for which features will be generated. The - default is None. Returns ------- @@ -110,26 +111,20 @@ def run_on_proposals( """ # Initializations - img_driver = "n5" if ".n5" in img_path else "zarr" - img = utils.open_tensorstore(img_path, img_driver) + features = dict() if labels_path: labels_driver = "neuroglancer_precomputed" labels = utils.open_tensorstore(labels_path, labels_driver) # Generate features - proposals = neurograph.get_proposals() if proposals is None else proposals - features = { - "skel": generate_skel_features(neurograph, proposals, search_radius) - } + features["skel"] = proposal_skeletal(neurograph, proposals, search_radius) if model_type in ["ConvNet", "MultiModalNet"]: assert labels_path, "Must provide label_path for model_type!" - features["img_chunks"], features["img_profile"] = generate_img_chunks( + features["chunks"], features["profiles"] = generate_chunks( neurograph, proposals, img, labels ) else: - features["img_profile"] = generate_proposal_profiles( - neurograph, proposals, img - ) + features["profiles"] = proposal_profiles(neurograph, proposals, img) return features @@ -154,7 +149,7 @@ def run_on_branches(neurograph): # -- Proposal Feature Extraction -- -def generate_img_chunks(neurograph, proposals, img, labels): +def generate_chunks(neurograph, proposals, img, labels): """ Generates an image chunk for each proposal such that the centroid of the image chunk is the midpoint of the proposal. Image chunks contain two @@ -184,10 +179,10 @@ def generate_img_chunks(neurograph, proposals, img, labels): threads = [None] * len(proposals) for t, proposal in enumerate(proposals): xyz_0, xyz_1 = neurograph.proposal_xyz(proposal) - coord_0 = utils.to_img(xyz_0) - coord_1 = utils.to_img(xyz_1) + coord_0 = utils.to_voxels(xyz_0) + coord_1 = utils.to_voxels(xyz_1) threads[t] = executor.submit( - get_img_chunk, img, labels, coord_0, coord_1, proposal + get_chunk, img, labels, coord_0, coord_1, proposal ) # Save result @@ -200,27 +195,27 @@ def generate_img_chunks(neurograph, proposals, img, labels): return chunks, profiles -def get_img_chunk(img, labels, coord_0, coord_1, thread_id=None): +def get_chunk(img, labels, coord_0, coord_1, thread_id=None): # Extract chunks midpoint = geometry.get_midpoint(coord_0, coord_1).astype(int) if type(img) == ts.TensorStore: - img_chunk = utils.read_tensorstore(img, midpoint, CHUNK_SIZE) + chunk = utils.read_tensorstore(img, midpoint, CHUNK_SIZE) labels_chunk = utils.read_tensorstore(labels, midpoint, CHUNK_SIZE) else: - img_chunk = utils.get_chunk(img, midpoint, CHUNK_SIZE) - labels_chunk = utils.get_chunk(labels, midpoint, CHUNK_SIZE) + chunk = img_utils.read_chunk(img, midpoint, CHUNK_SIZE) + labels_chunk = img_utils.read_chunk(labels, midpoint, CHUNK_SIZE) # Coordinate transform - img_chunk = utils.normalize_img(img_chunk) + chunk = utils.normalize_img(chunk) patch_coord_0 = utils.img_to_patch(coord_0, midpoint, CHUNK_SIZE) patch_coord_1 = utils.img_to_patch(coord_1, midpoint, CHUNK_SIZE) # Generate features path = geometry.make_line(patch_coord_0, patch_coord_1, N_PROFILE_PTS) - profile = geometry.get_profile(img_chunk, path, window=WINDOW) + profile = geometry.get_profile(chunk, path, window=WINDOW) labels_chunk[labels_chunk > 0] = 1 labels_chunk = geometry.fill_path(labels_chunk, path, val=2) - chunk = np.stack([img_chunk, labels_chunk], axis=0) + chunk = np.stack([chunk, labels_chunk], axis=0) # Output if thread_id: @@ -229,7 +224,7 @@ def get_img_chunk(img, labels, coord_0, coord_1, thread_id=None): return chunk, profile -def generate_proposal_profiles(neurograph, proposals, img): +def proposal_profiles(neurograph, proposals, img): """ Generates an image intensity profile along each proposal by reading from an image on the cloud. @@ -276,34 +271,6 @@ def generate_edge_profiles(neurograph, img): pass -def generate_node_profiles(neurograph, img): - # Generate coordinates - coords = dict() - for i in neurograph.nodes: - if neurograph.degree[i] == 1: - path = get_leaf_profile_coords(neurograph, i) - else: - path = get_junction_profile_coords(neurograph, i) - - coords[i] = { - "bbox": get_node_bbox(neurograph, path), - "path": geometry.sample_curve(path, N_PROFILE_PTS), - } - - # Generate profiles - with ThreadPoolExecutor() as executor: - threads = [] - for i, coords_i in coords.items(): - threads.append(executor.submit(get_profile, img, coords_i, i)) - - # Process results - profiles = dict() - for thread in as_completed(threads): - i, profile = thread.result() - profiles[i] = profile - return profiles - - def get_profile(img, coords, thread_id): """ Gets the image intensity profile for a given proposal. @@ -325,7 +292,7 @@ def get_profile(img, coords, thread_id): Image intensity profile. """ - chunk = utils.read_tensorstore_bbox(img, coords["bbox"]) + chunk = img_utils.read_tensorstore_with_bbox(img, coords["bbox"]) return thread_id, [chunk[tuple(xyz)] for xyz in coords["path"]] @@ -349,8 +316,8 @@ def get_proposal_profile_coords(neurograph, proposal): """ # Compute coordinates xyz_0, xyz_1 = neurograph.proposal_xyz(proposal) - coord_0 = utils.to_img(xyz_0) - coord_1 = utils.to_img(xyz_1) + coord_0 = utils.to_voxels(xyz_0) + coord_1 = utils.to_voxels(xyz_1) # Store coordinates bbox = utils.get_minimal_bbox(coord_0, coord_1) @@ -362,43 +329,8 @@ def get_proposal_profile_coords(neurograph, proposal): } return coords - -def get_leaf_profile_coords(neurograph, i): - j = list(neurograph.neighbors(i))[0] - return get_profile_path(neurograph.orient_edge((i, j), i, key="xyz")) - - -def get_junction_profile_coords(neurograph, i): - # Get branches - nbs = list(neurograph.neighbors(i)) - xyz_list_1 = neurograph.orient_edge((i, nbs[0]), i, key="xyz") - xyz_list_2 = neurograph.orient_edge((i, nbs[1]), i, key="xyz") - # Get profile paths - path_1 = get_profile_path(xyz_list_1) - path_2 = get_profile_path(xyz_list_2) - return np.vstack([np.flip(path_1, axis=0), path_2]) - - -def get_profile_path(xyz_list): - path_length = 0 - for i in range(1, len(xyz_list)): - if i > 0: - path_length += geometry.dist(xyz_list[i - 1], xyz_list[i]) - if path_length >= NODE_PROFILE_DEPTH: - break - return xyz_list[0:i, :] - - -def get_node_bbox(neurograph, coords): - bbox = { - "start": np.floor(np.min(coords, axis=0)).astype(int), - "end": np.ceil(np.max(coords, axis=0)).astype(int), - } - return bbox - - -def generate_skel_features(neurograph, proposals, search_radius): +def proposal_skeletal(neurograph, proposals, search_radius): features = dict() for proposal in proposals: i, j = tuple(proposal) @@ -483,9 +415,9 @@ def avg_branch_radii(neurograph, edge): return np.array([np.mean(neurograph.edges[edge]["radius"])]) -def n_nearby_leafs(neurograph, proposal, r): +def n_nearby_leafs(neurograph, proposal, radius): xyz = neurograph.proposal_midpoint(proposal) - leafs = neurograph.query_kdtree(xyz, 24, node_type="leaf") + leafs = neurograph.query_kdtree(xyz, radius, node_type="leaf") return len(leafs) - 1 @@ -512,24 +444,11 @@ def generate_branch_features(neurograph): for edge in neurograph.edges: i, j = tuple(edge) features[frozenset(edge)] = np.concatenate( - (-1, np.zeros((32))), axis=None # edge type + (-1, np.zeros((32))), axis=None ) return features -""" - 0, - neurograph.degree[i], - neurograph.degree[j], - 0, - get_radii(neurograph, edge), - np.mean(neurograph.edges[i, j]["radius"]), - np.mean(neurograph.edges[i, j]["radius"]), - np.zeros(12), - np.zeros((N_PROFILE_PTS + 2)), -""" - - def compute_curvature(neurograph, edge): kappa = curvature(neurograph.edges[edge]["xyz"]) n_pts = len(kappa) @@ -580,7 +499,7 @@ def __multiblock_feature_matrix(neurographs, features, blocks, model_type): neurographs[block_id], features[block_id], shift=idx_shift ) elif model_type == "ConvNet": - X_i, y_i, idx_transforms_i = stack_img_chunks( + X_i, y_i, idx_transforms_i = stack_chunks( neurographs[block_id], features[block_id], shift=idx_shift ) else: @@ -616,7 +535,7 @@ def __feature_matrix(neurographs, features, model_type): if model_type == "MultiModalNet": return get_multimodal_features(neurographs, features) elif model_type == "ConvNet": - return stack_img_chunks(neurographs, features) + return stack_chunks(neurographs, features) else: return get_feature_vectors(neurographs, features) @@ -647,10 +566,10 @@ def get_multimodal_features(neurograph, features, shift=0): idx_transforms = {"block_to_idxs": set(), "idx_to_edge": dict()} # Build - for i, edge in enumerate(features["img_chunks"].keys()): - X[i, :] = features["img_chunks"][edge] + for i, edge in enumerate(features["chunks"].keys()): + X[i, :] = features["chunks"][edge] x[i, :] = np.concatenate( - (features["skel"][edge], features["img_profile"][edge]) + (features["skel"][edge], features["profiles"][edge]) ) y[i] = 1 if edge in neurograph.target_edges else 0 idx_transforms["block_to_idxs"].add(i + shift) @@ -658,15 +577,15 @@ def get_multimodal_features(neurograph, features, shift=0): return X, x, y, idx_transforms -def stack_img_chunks(neurograph, features, shift=0): +def stack_chunks(neurograph, features, shift=0): # Initialize X = np.zeros(((neurograph.n_proposals(), 2) + tuple(CHUNK_SIZE))) y = np.zeros((neurograph.n_proposals())) idx_transforms = {"block_to_idxs": set(), "idx_to_edge": dict()} # Build - for i, edge in enumerate(features["img_chunks"].keys()): - X[i, :] = features["img_chunks"][edge] + for i, edge in enumerate(features["chunks"].keys()): + X[i, :] = features["chunks"][edge] y[i] = 1 if edge in neurograph.target_edges else 0 idx_transforms["block_to_idxs"].add(i + shift) idx_transforms["idx_to_edge"][i + shift] = edge diff --git a/src/deep_neurographs/machine_learning/heterogeneous_feature_generation.py b/src/deep_neurographs/machine_learning/heterogeneous_feature_generation.py new file mode 100644 index 0000000..0476af8 --- /dev/null +++ b/src/deep_neurographs/machine_learning/heterogeneous_feature_generation.py @@ -0,0 +1,191 @@ +""" +Created on Sat May 9 11:00:00 2024 + +@author: Anna Grim +@email: anna.grim@alleninstitute.org + +Generates features for training and performing inference with a heterogenous +graph neural network. + +""" +from concurrent.futures import ThreadPoolExecutor, as_completed + +import numpy as np + +from deep_neurographs import geometry, img_utils, utils +from deep_neurographs.machine_learning import feature_generation as fg + +WINDOW = [5, 5, 5] +N_PROFILE_PTS = 10 +NODE_PROFILE_DEPTH = 15 + + +# -- Wrappers -- +def run(neurograph, search_radius, img_path, proposals=None): + # Initializations + features = dict() + img_driver = "n5" if ".n5" in img_path else "zarr" + img = img_utils.open_tensorstore(img_path, img_driver) + proposals = neurograph.get_proposals() if proposals is None else proposals + + # Generate features + features["nodes"] = run_on_nodes(neurograph, img) + features["branches"] = run_on_branches(neurograph, img) + features["proposals"] = run_on_proposals( + neurograph, search_radius, img, proposals + ) + + +def run_on_nodes(neurograph, img): + features = dict() + features["skel"] = node_skeletal(neurograph) + features["profiles"] = node_profiles(neurograph, img) + return features + + +def run_on_branches(neurograph, img): + return {"skel": branch_skeletal(neurograph)} + + +def run_on_proposals(neurograph, search_radius, img, proposals): + """ + Generates feature vectors for every proposal in a neurograph. + + Parameters + ---------- + neurograph : NeuroGraph + NeuroGraph generated from a directory of swcs generated from a + predicted segmentation. + search_radius : float + Search radius used to generate proposals. + img : str + Image stored in a GCS bucket. + proposals : list[frozenset] + List of proposals for which features will be generated. + + Returns + ------- + features : dict + Dictionary where each key-value pair corresponds to a type of feature + vector and the numerical vector. + + """ + features = dict() + features["skel"] = proposal_skeletal(neurograph, proposals, search_radius) + features["profiles"] = fg.proposal_profiles(neurograph, proposals, img) + return features + + +# -- Skeletal Features -- +def node_skeletal(neurograph): + features = dict() + for i in neurograph.nodes: + features[i] = np.concatenate( + ( + neurograph.nodes[i]["radius"], + neurograph.degree[i], + len(neurograph.nodes[i]["proposals"]), + ), + axis=None, + ) + return features + + +def branch_skeletal(neurograph): + features = dict() + for edge in neurograph.edges: + features[edge] = np.concatenate( + ( + np.mean(neurograph.edges[edge]["radius"]), + neurograph.edge_length(edge), + ), + axis=None, + ) + + +def proposal_skeletal(neurograph, proposals, search_radius): + features = dict() + for proposal in proposals: + i, j = tuple(proposal) + features[proposal] = np.concatenate( + ( + neurograph.proposal_length(proposal), + fg.n_nearby_leafs(neurograph, proposal, search_radius), + fg.get_radii(neurograph, proposal), + fg.get_directionals(neurograph, proposal, 8), + fg.get_directionals(neurograph, proposal, 16), + fg.get_directionals(neurograph, proposal, 32), + fg.get_directionals(neurograph, proposal, 64), + ), + axis=None, + ) + return features + + +# -- Image features -- +def node_profiles(neurograph, img): + # Generate coordinates + coords = dict() + for i in neurograph.nodes: + if neurograph.degree[i] == 1: + path = get_leaf_profile_path(neurograph, i) + else: + path = get_junction_profile_path(neurograph, i) + coords[i] = get_node_profile_coords(neurograph, path) + + # Generate profiles + with ThreadPoolExecutor() as executor: + threads = [] + for i, coords_i in coords.items(): + threads.append(executor.submit(fg.get_profile, img, coords_i, i)) + + # Process results + profiles = dict() + for thread in as_completed(threads): + i, profile = thread.result() + profiles[i] = profile + return profiles + + +def get_leaf_profile_path(neurograph, i): + j = list(neurograph.neighbors(i))[0] + return get_profile_path(neurograph.orient_edge((i, j), i, key="xyz")) + + +def get_junction_profile_path(neurograph, i): + # Get branches + nbs = list(neurograph.neighbors(i)) + xyz_list_1 = neurograph.orient_edge((i, nbs[0]), i, key="xyz") + xyz_list_2 = neurograph.orient_edge((i, nbs[1]), i, key="xyz") + + # Get profile paths + path_1 = get_profile_path(xyz_list_1) + path_2 = get_profile_path(xyz_list_2) + return np.vstack([np.flip(path_1, axis=0), path_2]) + + +def get_profile_path(xyz_list): + path_length = 0 + for i in range(1, len(xyz_list)): + if i > 0: + path_length += geometry.dist(xyz_list[i - 1], xyz_list[i]) + if path_length >= NODE_PROFILE_DEPTH: + break + return xyz_list[0:i, :] + + +def get_node_profile_coords(neurograph, path): + path = np.array([utils.to_voxels(xyz) for xyz in path]) + bbox = get_node_bbox(neurograph, path) + coords = { + "bbox": bbox, + "path": geometry.sample_curve(path - bbox["min"], N_PROFILE_PTS), + } + return coords + + +def get_node_bbox(neurograph, coords): + return { + "min": np.floor(np.min(coords, axis=0)).astype(int) - 1, + "max": np.ceil(np.max(coords, axis=0)).astype(int) + 1, + } diff --git a/src/deep_neurographs/neurograph.py b/src/deep_neurographs/neurograph.py index f72e046..1ccc18b 100644 --- a/src/deep_neurographs/neurograph.py +++ b/src/deep_neurographs/neurograph.py @@ -572,7 +572,7 @@ def edge_length(self, edge): def is_contained(self, node_or_xyz, buffer=0): if self.bbox: - img_coord = self.to_img(node_or_xyz) + img_coord = self.to_voxels(node_or_xyz) return utils.is_contained(self.bbox, img_coord, buffer=buffer) else: return True @@ -585,12 +585,12 @@ def branch_contained(self, xyz_list): else: return True - def to_img(self, node_or_xyz, shift=False): + def to_voxels(self, node_or_xyz, shift=False): shift = self.origin if shift else np.zeros((3)) if type(node_or_xyz) == int: - img_coord = utils.to_img(self.nodes[node_or_xyz]["xyz"]) + img_coord = utils.to_voxels(self.nodes[node_or_xyz]["xyz"]) else: - img_coord = utils.to_img(node_or_xyz) + img_coord = utils.to_voxels(node_or_xyz) return img_coord - shift def is_leaf(self, i): @@ -616,9 +616,9 @@ def get_edge_attr(self, edge, key): def to_patch_coords(self, edge, midpoint, chunk_size): patch_coords = [] for xyz in self.edges[edge]["xyz"]: - img_coord = self.to_img(xyz) - coord = utils.img_to_patch(img_coord, midpoint, chunk_size) - patch_coords.append(coord) + coord = self.to_voxels(xyz) + local_coord = utils.img_to_patch(coord, midpoint, chunk_size) + patch_coords.append(local_coord) return patch_coords def get_reconstruction(self, proposals, upd_self=False): diff --git a/src/deep_neurographs/utils.py b/src/deep_neurographs/utils.py index 32847df..447f38a 100644 --- a/src/deep_neurographs/utils.py +++ b/src/deep_neurographs/utils.py @@ -12,8 +12,6 @@ import math import os import shutil -from concurrent.futures import ThreadPoolExecutor -from copy import deepcopy from io import BytesIO from random import sample from time import time @@ -21,13 +19,9 @@ import numpy as np import psutil -import tensorstore as ts import torch -import zarr -from skimage.color import label2rgb ANISOTROPY = np.array([0.748, 0.748, 1.0]) -SUPPORTED_DRIVERS = ["neuroglancer_precomputed", "n5", "zarr"] # --- dictionary utils --- @@ -318,133 +312,7 @@ def list_gcs_filenames(bucket, cloud_path, extension): return [blob.name for blob in blobs if extension in blob.name] -# --- io utils --- -def open_zarr(path): - """ - Opens zarr file at "path". - - Parameters - ---------- - path : str - Path to zarr file to be opened. - - Returns - ------- - np.ndarray - Contents of zarr file at "path". - - """ - n5store = zarr.N5FSStore(path, "r") - if "653980" in path: - return zarr.open(n5store).ch488.s0 - elif "653158" in path: - return zarr.open(n5store).s0 - - -def open_tensorstore(path, driver): - """ - Uploads segmentation mask stored as a directory of shard files. - - Parameters - ---------- - path : str - Path to directory containing shard files. - driver : str - Storage driver needed to read data at "path". - - Returns - ------- - sparse_volume : dict - Sparse image volume. - - """ - assert driver in SUPPORTED_DRIVERS, "Error! Driver is not supported!" - arr = ts.open( - { - "driver": driver, - "kvstore": { - "driver": "gcs", - "bucket": "allen-nd-goog", - "path": path, - }, - "context": { - "cache_pool": {"total_bytes_limit": 1000000000}, - "cache_pool#remote": {"total_bytes_limit": 1000000000}, - "data_copy_concurrency": {"limit": 8}, - }, - "recheck_cached_data": "open", - } - ).result() - if driver == "neuroglancer_precomputed": - return arr[ts.d["channel"][0]] - elif driver == "zarr": - arr = arr[0, 0, :, :, :] - arr = arr[ts.d[0].transpose[2]] - arr = arr[ts.d[0].transpose[1]] - return arr - - -def read_tensorstore(arr, xyz, shape, from_center=True): - chunk = get_chunk(arr, xyz, shape, from_center=from_center) - return chunk.read().result() - - -def read_tensorstore_bbox(img, bbox): - start = bbox["min"] - end = bbox["max"] - return ( - img[start[0] : end[0], start[1] : end[1], start[2] : end[2]] - .read() - .result() - ) - - -def get_chunk(arr, xyz, shape, from_center=True): - start, end = get_start_end(xyz, shape, from_center=from_center) - return deepcopy( - arr[start[0] : end[0], start[1] : end[1], start[2] : end[2]] - ) - - -def get_start_end(xyz, shape, from_center=True): - if from_center: - start = [xyz[i] - shape[i] // 2 for i in range(3)] - end = [xyz[i] + shape[i] // 2 for i in range(3)] - else: - start = xyz - end = [xyz[i] + shape[i] for i in range(3)] - return start, end - - -def get_superchunks(img_path, labels_path, xyz, shape, from_center=True): - with ThreadPoolExecutor() as executor: - img_job = executor.submit( - get_superchunk, - img_path, - "n5" if ".n5" in img_path else "zarr", - xyz, - shape, - from_center=from_center, - ) - labels_job = executor.submit( - get_superchunk, - labels_path, - "neuroglancer_precomputed", - xyz, - shape, - from_center=from_center, - ) - img = img_job.result().astype(np.int16) - labels = labels_job.result().astype(np.int64) - assert img.shape == labels.shape, "img.shape != labels.shape" - return img, labels - - -def get_superchunk(path, driver, xyz, shape, from_center=True): - arr = open_tensorstore(path, driver) - return read_tensorstore(arr, xyz, shape, from_center=from_center) - - +# -- io utils -- def read_json(path): """ Reads json file stored at "path". @@ -487,8 +355,8 @@ def read_txt(path): def parse_metadata(path, anisotropy=[1.0, 1.0, 1.0]): metadata = read_json(path) origin = metadata["chunk_origin"] - chunk_origin = to_img(origin, anisotropy=anisotropy).tolist() - return chunk_origin, metadata["chunk_shape"] + chunk_origin = to_voxels(origin, anisotropy=anisotropy) + return chunk_origin.tolist(), metadata["chunk_shape"] def write_json(path, contents): @@ -532,16 +400,6 @@ def write_txt(path, contents): f.close() -def set_path(dir_name, filename, ext): - cnt = 0 - ext = ext.replace(".", "") - path = os.path.join(dir_name, f"{filename}.{ext}") - while os.path.exists(path): - path = os.path.join(dir_name, f"{filename}.{cnt}.{ext}") - cnt += 1 - return path - - # --- coordinate conversions --- def img_to_patch(xyz, patch_centroid, patch_dims): half_patch_dims = [patch_dims[i] // 2 for i in range(3)] @@ -558,7 +416,7 @@ def to_world(xyz, shift=[0, 0, 0]): return tuple([xyz[i] * ANISOTROPY[i] - shift[i] for i in range(3)]) -def to_img(xyz, anisotropy=ANISOTROPY): +def to_voxels(xyz, anisotropy=ANISOTROPY): return (xyz / np.array(anisotropy)).astype(int) @@ -576,7 +434,7 @@ def is_contained(bbox, xyz, buffer=0): def is_list_contained(bbox, xyz_list): - return any([is_contained(bbox, to_img(xyz)) for xyz in xyz_list]) + return any([is_contained(bbox, to_voxels(xyz)) for xyz in xyz_list]) def sample_singleton(my_container): @@ -683,21 +541,6 @@ def get_swc_id(path): return filename.split(".")[0] -def get_img_mip(img, axis=0): - return np.max(img, axis=axis) - - -def get_labels_mip(img, axis=0): - mip = np.max(img, axis=axis) - mip = label2rgb(mip) - return (255 * mip).astype(np.uint8) - - -def normalize_img(img): - img -= np.min(img) - return img / np.max(img) - - def numpy_to_hashable(arr): return [tuple(item) for item in arr.tolist()] @@ -712,4 +555,14 @@ def get_memory_usage(): def get_batches(iterable, batch_size): for start in range(0, len(iterable), batch_size): - yield iterable[start : min(start + batch_size, len(iterable))] + yield iterable[start: min(start + batch_size, len(iterable))] + + +def set_path(dir_name, filename, ext): + cnt = 0 + ext = ext.replace(".", "") + path = os.path.join(dir_name, f"{filename}.{ext}") + while os.path.exists(path): + path = os.path.join(dir_name, f"{filename}.{cnt}.{ext}") + cnt += 1 + return path