From e4a3d925fc60c2b229da7ba868ef8423a7e51bdc Mon Sep 17 00:00:00 2001
From: Anna Grim <108307071+anna-grim@users.noreply.github.com>
Date: Fri, 10 May 2024 12:48:23 -0700
Subject: [PATCH] feat: heterogeneous feature generation, img_utils (#135)

Co-authored-by: anna-grim <anna.grim@alleninstitute.org>
---
 src/deep_neurographs/geometry.py              |   6 +-
 src/deep_neurographs/img_utils.py             | 202 ++++++++++++++++++
 .../machine_learning/feature_generation.py    | 177 +++++----------
 .../heterogeneous_feature_generation.py       | 191 +++++++++++++++++
 src/deep_neurographs/neurograph.py            |  14 +-
 src/deep_neurographs/utils.py                 | 179 ++--------------
 6 files changed, 467 insertions(+), 302 deletions(-)
 create mode 100644 src/deep_neurographs/img_utils.py
 create mode 100644 src/deep_neurographs/machine_learning/heterogeneous_feature_generation.py

diff --git a/src/deep_neurographs/geometry.py b/src/deep_neurographs/geometry.py
index b353819..3e53e37 100644
--- a/src/deep_neurographs/geometry.py
+++ b/src/deep_neurographs/geometry.py
@@ -291,7 +291,7 @@ def fill_path(img, path, val=-1):
     """
     for xyz in path:
         x, y, z = tuple(np.floor(xyz).astype(int))
-        img[x - 1 : x + 2, y - 1 : y + 2, z - 1 : z + 2] = val
+        img[x - 1: x + 2, y - 1: y + 2, z - 1: z + 2] = val
     return img
 
 
@@ -425,9 +425,9 @@ def align(neurograph, img, branch_1, branch_2, depth):
     best_d2 = None
     best_score = 0
     for d1 in range(min(depth, len(branch_1) - 1)):
-        xyz_1 = neurograph.to_img(branch_1[d1], shift=True)
+        xyz_1 = neurograph.to_voxels(branch_1[d1], shift=True)
         for d2 in range(min(depth, len(branch_2) - 1)):
-            xyz_2 = neurograph.to_img(branch_2[d2], shift=True)
+            xyz_2 = neurograph.to_voxels(branch_2[d2], shift=True)
             line = make_line(xyz_1, xyz_2, 10)
             score = np.mean(get_profile(img, line, window=[3, 3, 3]))
             if score > best_score:
diff --git a/src/deep_neurographs/img_utils.py b/src/deep_neurographs/img_utils.py
new file mode 100644
index 0000000..0aeca15
--- /dev/null
+++ b/src/deep_neurographs/img_utils.py
@@ -0,0 +1,202 @@
+"""
+Created on Sat May 9 11:00:00 2024
+
+@author: Anna Grim
+@email: anna.grim@alleninstitute.org
+
+Helper routines for working with images.
+
+"""
+
+from concurrent.futures import ThreadPoolExecutor
+from copy import deepcopy
+
+import numpy as np
+import tensorstore as ts
+import zarr
+from skimage.color import label2rgb
+
+SUPPORTED_DRIVERS = ["neuroglancer_precomputed", "n5", "zarr"]
+
+
+# --- io utils ---
+def open_tensorstore(path, driver):
+    """
+    Uploads segmentation mask stored as a directory of shard files.
+
+    Parameters
+    ----------
+    path : str
+        Path to directory containing shard files.
+    driver : str
+        Storage driver needed to read data at "path".
+
+    Returns
+    -------
+    sparse_volume : dict
+        Sparse image volume.
+
+    """
+    assert driver in SUPPORTED_DRIVERS, "Error! Driver is not supported!"
+    arr = ts.open(
+        {
+            "driver": driver,
+            "kvstore": {
+                "driver": "gcs",
+                "bucket": "allen-nd-goog",
+                "path": path,
+            },
+            "context": {
+                "cache_pool": {"total_bytes_limit": 1000000000},
+                "cache_pool#remote": {"total_bytes_limit": 1000000000},
+                "data_copy_concurrency": {"limit": 8},
+            },
+            "recheck_cached_data": "open",
+        }
+    ).result()
+    if driver == "neuroglancer_precomputed":
+        return arr[ts.d["channel"][0]]
+    elif driver == "zarr":
+        arr = arr[0, 0, :, :, :]
+        arr = arr[ts.d[0].transpose[2]]
+        arr = arr[ts.d[0].transpose[1]]
+    return arr
+
+
+def open_zarr(path):
+    """
+    Opens zarr file at "path".
+
+    Parameters
+    ----------
+    path : str
+        Path to zarr file to be opened.
+
+    Returns
+    -------
+    np.ndarray
+        Contents of zarr file at "path".
+
+    """
+    n5store = zarr.N5FSStore(path, "r")
+    if "653980" in path:
+        return zarr.open(n5store).ch488.s0
+    elif "653158" in path:
+        return zarr.open(n5store).s0
+
+
+def read_tensorstore(arr, xyz, shape, from_center=True):
+    """
+    Reads a chunk of data from the specified tensorstore array, given the
+    coordinates and shape of the chunk.
+
+    Parameters
+    ----------
+    arr : tensorstore.ndarray Array
+        Array from which data is to be read.
+    xyz : tuple
+        xyz coordinates of chunk to be read from the tensorstore array.
+    shape : tuple
+        Shape (dimensions) of the chunk to be read.
+    from_center : bool, optional
+        Indication of whether the provided coordinates represent the center of
+        the chunk or the starting point. If True, coordinates are the center;
+        if False, coordinates are the starting point of the chunk. The default
+        is True.
+
+    Returns
+    -------
+    numpy.ndarray
+        Chunk of data read from the tensorstore array.
+
+    """
+    chunk = read_chunk(arr, xyz, shape, from_center=from_center)
+    return chunk.read().result()
+
+
+def read_tensorstore_with_bbox(img, bbox):
+    start = bbox["min"]
+    end = bbox["max"]
+    return (
+        img[start[0]: end[0], start[1]: end[1], start[2]: end[2]]
+        .read()
+        .result()
+    )
+
+
+def read_chunk(arr, xyz, shape, from_center=True):
+    start, end = get_start_end(xyz, shape, from_center=from_center)
+    return deepcopy(
+        arr[start[0]: end[0], start[1]: end[1], start[2]: end[2]]
+    )
+
+
+def get_start_end(xyz, shape, from_center=True):
+    if from_center:
+        start = [xyz[i] - shape[i] // 2 for i in range(3)]
+        end = [xyz[i] + shape[i] // 2 for i in range(3)]
+    else:
+        start = xyz
+        end = [xyz[i] + shape[i] for i in range(3)]
+    return start, end
+
+
+def read_superchunks(img_path, labels_path, xyz, shape, from_center=True):
+    with ThreadPoolExecutor() as executor:
+        img_job = executor.submit(
+            get_superchunk,
+            img_path,
+            "n5" if ".n5" in img_path else "zarr",
+            xyz,
+            shape,
+            from_center=from_center,
+        )
+        labels_job = executor.submit(
+            get_superchunk,
+            labels_path,
+            "neuroglancer_precomputed",
+            xyz,
+            shape,
+            from_center=from_center,
+        )
+    img = img_job.result().astype(np.int16)
+    labels = labels_job.result().astype(np.int64)
+    assert img.shape == labels.shape, "img.shape != labels.shape"
+    return img, labels
+
+
+def get_superchunk(path, driver, xyz, shape, from_center=True):
+    arr = open_tensorstore(path, driver)
+    return read_tensorstore(arr, xyz, shape, from_center=from_center)
+
+
+# -- Image Operations --
+def normalize_img(img):
+    img -= np.min(img)
+    return img / np.max(img)
+
+
+def get_mip(img, axis=0):
+    """
+    Compute the maximum intensity projection (MIP) of "img" along "axis".
+
+    Parameters
+    ----------
+    img : numpy.ndarray
+        Image to compute MIP of.
+    axis : int, optional
+        Projection axis. The default is 0.
+
+    Returns
+    -------
+    numpy.ndarray
+        MIP of "img".
+
+    """
+    return np.max(img, axis=axis)
+
+
+def get_labels_mip(img, axis=0):
+    mip = np.max(img, axis=axis)
+    mip = label2rgb(mip)
+    return (255 * mip).astype(np.uint8)
diff --git a/src/deep_neurographs/machine_learning/feature_generation.py b/src/deep_neurographs/machine_learning/feature_generation.py
index eaae184..dccc35e 100644
--- a/src/deep_neurographs/machine_learning/feature_generation.py
+++ b/src/deep_neurographs/machine_learning/feature_generation.py
@@ -22,7 +22,7 @@
 import numpy as np
 import tensorstore as ts
 
-from deep_neurographs import geometry, utils
+from deep_neurographs import geometry, img_utils, utils
 
 CHUNK_SIZE = [64, 64, 64]
 WINDOW = [5, 5, 5]
@@ -49,36 +49,38 @@ def run(
     labels_path=None,
     proposals=None,
 ):
+    # Initializations
+    features = dict()
+    img_driver = "n5" if ".n5" in img_path else "zarr"
+    img = img_utils.open_tensorstore(img_path, img_driver)
+    proposals = neurograph.get_proposals() if proposals is None else proposals
+
+    # Generate features
     if "Graph" in model_type:
         features = dict()
         features["branches"] = run_on_branches(neurograph)
         features["proposals"] = run_on_proposals(
             neurograph,
+            img,
             model_type,
+            proposals,
             search_radius,
-            img_path,
             labels_path=labels_path,
-            proposals=proposals,
         )
     else:
         features = run_on_proposals(
             neurograph,
+            img,
             model_type,
+            proposals,
             search_radius,
-            img_path,
             labels_path=labels_path,
-            proposals=proposals,
         )
     return features
 
 
 def run_on_proposals(
-    neurograph,
-    model_type,
-    search_radius,
-    img_path,
-    labels_path=None,
-    proposals=None,
+    neurograph, img, model_type, proposals, search_radius, labels_path=None
 ):
     """
     Generates feature vectors for every proposal in a neurograph.
@@ -88,19 +90,18 @@ def run_on_proposals(
     neurograph : NeuroGraph
         NeuroGraph generated from a directory of swcs generated from a
         predicted segmentation.
+    img : tensorstore.Tensorstore
+        Image stored in a GCS bucket.
     model_type : str
         Type of model to be trained. Options include: AdaBoost, RandomForest,
         FeedForwardNet, ConvNet, MultiModalNet.
+    proposals : list[frozenset]
+        List of proposals for which features will be generated.
     search_radius : float
         Search radius used to generate proposals.
-    img_path : str
-        Path to raw image stored in a GCS bucket.
     labels_path : str, optional
         Path to predicted segmentation stored in a GCS bucket. The default is
         None.
-    proposals : list[frozenset], optional
-        List of proposals for which features will be generated. The
-        default is None.
 
     Returns
     -------
@@ -110,26 +111,20 @@ def run_on_proposals(
 
     """
     # Initializations
-    img_driver = "n5" if ".n5" in img_path else "zarr"
-    img = utils.open_tensorstore(img_path, img_driver)
+    features = dict()
     if labels_path:
         labels_driver = "neuroglancer_precomputed"
         labels = utils.open_tensorstore(labels_path, labels_driver)
 
     # Generate features
-    proposals = neurograph.get_proposals() if proposals is None else proposals
-    features = {
-        "skel": generate_skel_features(neurograph, proposals, search_radius)
-    }
+    features["skel"] = proposal_skeletal(neurograph, proposals, search_radius)
     if model_type in ["ConvNet", "MultiModalNet"]:
         assert labels_path, "Must provide label_path for model_type!"
-        features["img_chunks"], features["img_profile"] = generate_img_chunks(
+        features["chunks"], features["profiles"] = generate_chunks(
             neurograph, proposals, img, labels
         )
     else:
-        features["img_profile"] = generate_proposal_profiles(
-            neurograph, proposals, img
-        )
+        features["profiles"] = proposal_profiles(neurograph, proposals, img)
     return features
 
 
@@ -154,7 +149,7 @@ def run_on_branches(neurograph):
 
 
 # -- Proposal Feature Extraction --
-def generate_img_chunks(neurograph, proposals, img, labels):
+def generate_chunks(neurograph, proposals, img, labels):
     """
     Generates an image chunk for each proposal such that the centroid of the
     image chunk is the midpoint of the proposal. Image chunks contain two
@@ -184,10 +179,10 @@ def generate_img_chunks(neurograph, proposals, img, labels):
         threads = [None] * len(proposals)
         for t, proposal in enumerate(proposals):
             xyz_0, xyz_1 = neurograph.proposal_xyz(proposal)
-            coord_0 = utils.to_img(xyz_0)
-            coord_1 = utils.to_img(xyz_1)
+            coord_0 = utils.to_voxels(xyz_0)
+            coord_1 = utils.to_voxels(xyz_1)
             threads[t] = executor.submit(
-                get_img_chunk, img, labels, coord_0, coord_1, proposal
+                get_chunk, img, labels, coord_0, coord_1, proposal
             )
 
         # Save result
@@ -200,27 +195,27 @@ def generate_img_chunks(neurograph, proposals, img, labels):
     return chunks, profiles
 
 
-def get_img_chunk(img, labels, coord_0, coord_1, thread_id=None):
+def get_chunk(img, labels, coord_0, coord_1, thread_id=None):
     # Extract chunks
     midpoint = geometry.get_midpoint(coord_0, coord_1).astype(int)
     if type(img) == ts.TensorStore:
-        img_chunk = utils.read_tensorstore(img, midpoint, CHUNK_SIZE)
+        chunk = utils.read_tensorstore(img, midpoint, CHUNK_SIZE)
         labels_chunk = utils.read_tensorstore(labels, midpoint, CHUNK_SIZE)
     else:
-        img_chunk = utils.get_chunk(img, midpoint, CHUNK_SIZE)
-        labels_chunk = utils.get_chunk(labels, midpoint, CHUNK_SIZE)
+        chunk = img_utils.read_chunk(img, midpoint, CHUNK_SIZE)
+        labels_chunk = img_utils.read_chunk(labels, midpoint, CHUNK_SIZE)
 
     # Coordinate transform
-    img_chunk = utils.normalize_img(img_chunk)
+    chunk = utils.normalize_img(chunk)
     patch_coord_0 = utils.img_to_patch(coord_0, midpoint, CHUNK_SIZE)
     patch_coord_1 = utils.img_to_patch(coord_1, midpoint, CHUNK_SIZE)
 
     # Generate features
     path = geometry.make_line(patch_coord_0, patch_coord_1, N_PROFILE_PTS)
-    profile = geometry.get_profile(img_chunk, path, window=WINDOW)
+    profile = geometry.get_profile(chunk, path, window=WINDOW)
     labels_chunk[labels_chunk > 0] = 1
     labels_chunk = geometry.fill_path(labels_chunk, path, val=2)
-    chunk = np.stack([img_chunk, labels_chunk], axis=0)
+    chunk = np.stack([chunk, labels_chunk], axis=0)
 
     # Output
     if thread_id:
@@ -229,7 +224,7 @@ def get_img_chunk(img, labels, coord_0, coord_1, thread_id=None):
         return chunk, profile
 
 
-def generate_proposal_profiles(neurograph, proposals, img):
+def proposal_profiles(neurograph, proposals, img):
     """
     Generates an image intensity profile along each proposal by reading from
     an image on the cloud.
@@ -276,34 +271,6 @@ def generate_edge_profiles(neurograph, img):
     pass
 
 
-def generate_node_profiles(neurograph, img):
-    # Generate coordinates
-    coords = dict()
-    for i in neurograph.nodes:
-        if neurograph.degree[i] == 1:
-            path = get_leaf_profile_coords(neurograph, i)
-        else:
-            path = get_junction_profile_coords(neurograph, i)
-
-        coords[i] = {
-            "bbox": get_node_bbox(neurograph, path),
-            "path": geometry.sample_curve(path, N_PROFILE_PTS),
-        }
-
-    # Generate profiles
-    with ThreadPoolExecutor() as executor:
-        threads = []
-        for i, coords_i in coords.items():
-            threads.append(executor.submit(get_profile, img, coords_i, i))
-
-    # Process results
-    profiles = dict()
-    for thread in as_completed(threads):
-        i, profile = thread.result()
-        profiles[i] = profile
-    return profiles
-
-
 def get_profile(img, coords, thread_id):
     """
     Gets the image intensity profile for a given proposal.
@@ -325,7 +292,7 @@ def get_profile(img, coords, thread_id):
         Image intensity profile.
 
     """
-    chunk = utils.read_tensorstore_bbox(img, coords["bbox"])
+    chunk = img_utils.read_tensorstore_with_bbox(img, coords["bbox"])
     return thread_id, [chunk[tuple(xyz)] for xyz in coords["path"]]
 
 
@@ -349,8 +316,8 @@ def get_proposal_profile_coords(neurograph, proposal):
     """
     # Compute coordinates
     xyz_0, xyz_1 = neurograph.proposal_xyz(proposal)
-    coord_0 = utils.to_img(xyz_0)
-    coord_1 = utils.to_img(xyz_1)
+    coord_0 = utils.to_voxels(xyz_0)
+    coord_1 = utils.to_voxels(xyz_1)
 
     # Store coordinates
     bbox = utils.get_minimal_bbox(coord_0, coord_1)
@@ -362,43 +329,8 @@ def get_proposal_profile_coords(neurograph, proposal):
     }
     return coords
 
-  
-def get_leaf_profile_coords(neurograph, i):
-    j = list(neurograph.neighbors(i))[0]
-    return get_profile_path(neurograph.orient_edge((i, j), i, key="xyz"))
-
-
-def get_junction_profile_coords(neurograph, i):
-    # Get branches
-    nbs = list(neurograph.neighbors(i))
-    xyz_list_1 = neurograph.orient_edge((i, nbs[0]), i, key="xyz")
-    xyz_list_2 = neurograph.orient_edge((i, nbs[1]), i, key="xyz")
 
-    # Get profile paths
-    path_1 = get_profile_path(xyz_list_1)
-    path_2 = get_profile_path(xyz_list_2)
-    return np.vstack([np.flip(path_1, axis=0), path_2])
-
-
-def get_profile_path(xyz_list):
-    path_length = 0
-    for i in range(1, len(xyz_list)):
-        if i > 0:
-            path_length += geometry.dist(xyz_list[i - 1], xyz_list[i])
-        if path_length >= NODE_PROFILE_DEPTH:
-            break
-    return xyz_list[0:i, :]
-
-
-def get_node_bbox(neurograph, coords):
-    bbox = {
-        "start": np.floor(np.min(coords, axis=0)).astype(int),
-        "end": np.ceil(np.max(coords, axis=0)).astype(int),
-    }
-    return bbox
-
-
-def generate_skel_features(neurograph, proposals, search_radius):
+def proposal_skeletal(neurograph, proposals, search_radius):
     features = dict()
     for proposal in proposals:
         i, j = tuple(proposal)
@@ -483,9 +415,9 @@ def avg_branch_radii(neurograph, edge):
     return np.array([np.mean(neurograph.edges[edge]["radius"])])
 
 
-def n_nearby_leafs(neurograph, proposal, r):
+def n_nearby_leafs(neurograph, proposal, radius):
     xyz = neurograph.proposal_midpoint(proposal)
-    leafs = neurograph.query_kdtree(xyz, 24, node_type="leaf")
+    leafs = neurograph.query_kdtree(xyz, radius, node_type="leaf")
     return len(leafs) - 1
 
 
@@ -512,24 +444,11 @@ def generate_branch_features(neurograph):
     for edge in neurograph.edges:
         i, j = tuple(edge)
         features[frozenset(edge)] = np.concatenate(
-            (-1, np.zeros((32))), axis=None  # edge type
+            (-1, np.zeros((32))), axis=None
         )
     return features
 
 
-"""
-                0,
-                neurograph.degree[i],
-                neurograph.degree[j],
-                0,
-                get_radii(neurograph, edge),
-                np.mean(neurograph.edges[i, j]["radius"]),
-                np.mean(neurograph.edges[i, j]["radius"]),
-                np.zeros(12),
-                np.zeros((N_PROFILE_PTS + 2)),
-"""
-
-
 def compute_curvature(neurograph, edge):
     kappa = curvature(neurograph.edges[edge]["xyz"])
     n_pts = len(kappa)
@@ -580,7 +499,7 @@ def __multiblock_feature_matrix(neurographs, features, blocks, model_type):
                 neurographs[block_id], features[block_id], shift=idx_shift
             )
         elif model_type == "ConvNet":
-            X_i, y_i, idx_transforms_i = stack_img_chunks(
+            X_i, y_i, idx_transforms_i = stack_chunks(
                 neurographs[block_id], features[block_id], shift=idx_shift
             )
         else:
@@ -616,7 +535,7 @@ def __feature_matrix(neurographs, features, model_type):
     if model_type == "MultiModalNet":
         return get_multimodal_features(neurographs, features)
     elif model_type == "ConvNet":
-        return stack_img_chunks(neurographs, features)
+        return stack_chunks(neurographs, features)
     else:
         return get_feature_vectors(neurographs, features)
 
@@ -647,10 +566,10 @@ def get_multimodal_features(neurograph, features, shift=0):
     idx_transforms = {"block_to_idxs": set(), "idx_to_edge": dict()}
 
     # Build
-    for i, edge in enumerate(features["img_chunks"].keys()):
-        X[i, :] = features["img_chunks"][edge]
+    for i, edge in enumerate(features["chunks"].keys()):
+        X[i, :] = features["chunks"][edge]
         x[i, :] = np.concatenate(
-            (features["skel"][edge], features["img_profile"][edge])
+            (features["skel"][edge], features["profiles"][edge])
         )
         y[i] = 1 if edge in neurograph.target_edges else 0
         idx_transforms["block_to_idxs"].add(i + shift)
@@ -658,15 +577,15 @@ def get_multimodal_features(neurograph, features, shift=0):
     return X, x, y, idx_transforms
 
 
-def stack_img_chunks(neurograph, features, shift=0):
+def stack_chunks(neurograph, features, shift=0):
     # Initialize
     X = np.zeros(((neurograph.n_proposals(), 2) + tuple(CHUNK_SIZE)))
     y = np.zeros((neurograph.n_proposals()))
     idx_transforms = {"block_to_idxs": set(), "idx_to_edge": dict()}
 
     # Build
-    for i, edge in enumerate(features["img_chunks"].keys()):
-        X[i, :] = features["img_chunks"][edge]
+    for i, edge in enumerate(features["chunks"].keys()):
+        X[i, :] = features["chunks"][edge]
         y[i] = 1 if edge in neurograph.target_edges else 0
         idx_transforms["block_to_idxs"].add(i + shift)
         idx_transforms["idx_to_edge"][i + shift] = edge
diff --git a/src/deep_neurographs/machine_learning/heterogeneous_feature_generation.py b/src/deep_neurographs/machine_learning/heterogeneous_feature_generation.py
new file mode 100644
index 0000000..0476af8
--- /dev/null
+++ b/src/deep_neurographs/machine_learning/heterogeneous_feature_generation.py
@@ -0,0 +1,191 @@
+"""
+Created on Sat May 9 11:00:00 2024
+
+@author: Anna Grim
+@email: anna.grim@alleninstitute.org
+
+Generates features for training and performing inference with a heterogenous
+graph neural network.
+
+"""
+from concurrent.futures import ThreadPoolExecutor, as_completed
+
+import numpy as np
+
+from deep_neurographs import geometry, img_utils, utils
+from deep_neurographs.machine_learning import feature_generation as fg
+
+WINDOW = [5, 5, 5]
+N_PROFILE_PTS = 10
+NODE_PROFILE_DEPTH = 15
+
+
+# -- Wrappers --
+def run(neurograph, search_radius, img_path, proposals=None):
+    # Initializations
+    features = dict()
+    img_driver = "n5" if ".n5" in img_path else "zarr"
+    img = img_utils.open_tensorstore(img_path, img_driver)
+    proposals = neurograph.get_proposals() if proposals is None else proposals
+
+    # Generate features
+    features["nodes"] = run_on_nodes(neurograph, img)
+    features["branches"] = run_on_branches(neurograph, img)
+    features["proposals"] = run_on_proposals(
+        neurograph, search_radius, img, proposals
+    )
+
+
+def run_on_nodes(neurograph, img):
+    features = dict()
+    features["skel"] = node_skeletal(neurograph)
+    features["profiles"] = node_profiles(neurograph, img)
+    return features
+
+
+def run_on_branches(neurograph, img):
+    return {"skel": branch_skeletal(neurograph)}
+
+
+def run_on_proposals(neurograph, search_radius, img, proposals):
+    """
+    Generates feature vectors for every proposal in a neurograph.
+
+    Parameters
+    ----------
+    neurograph : NeuroGraph
+        NeuroGraph generated from a directory of swcs generated from a
+        predicted segmentation.
+    search_radius : float
+        Search radius used to generate proposals.
+    img : str
+        Image stored in a GCS bucket.
+    proposals : list[frozenset]
+        List of proposals for which features will be generated.
+
+    Returns
+    -------
+    features : dict
+        Dictionary where each key-value pair corresponds to a type of feature
+        vector and the numerical vector.
+
+    """
+    features = dict()
+    features["skel"] = proposal_skeletal(neurograph, proposals, search_radius)
+    features["profiles"] = fg.proposal_profiles(neurograph, proposals, img)
+    return features
+
+
+# -- Skeletal Features --
+def node_skeletal(neurograph):
+    features = dict()
+    for i in neurograph.nodes:
+        features[i] = np.concatenate(
+            (
+                neurograph.nodes[i]["radius"],
+                neurograph.degree[i],
+                len(neurograph.nodes[i]["proposals"]),
+            ),
+            axis=None,
+        )
+    return features
+
+
+def branch_skeletal(neurograph):
+    features = dict()
+    for edge in neurograph.edges:
+        features[edge] = np.concatenate(
+            (
+                np.mean(neurograph.edges[edge]["radius"]),
+                neurograph.edge_length(edge),
+            ),
+            axis=None,
+        )
+
+
+def proposal_skeletal(neurograph, proposals, search_radius):
+    features = dict()
+    for proposal in proposals:
+        i, j = tuple(proposal)
+        features[proposal] = np.concatenate(
+            (
+                neurograph.proposal_length(proposal),
+                fg.n_nearby_leafs(neurograph, proposal, search_radius),
+                fg.get_radii(neurograph, proposal),
+                fg.get_directionals(neurograph, proposal, 8),
+                fg.get_directionals(neurograph, proposal, 16),
+                fg.get_directionals(neurograph, proposal, 32),
+                fg.get_directionals(neurograph, proposal, 64),
+            ),
+            axis=None,
+        )
+    return features
+
+
+# -- Image features --
+def node_profiles(neurograph, img):
+    # Generate coordinates
+    coords = dict()
+    for i in neurograph.nodes:
+        if neurograph.degree[i] == 1:
+            path = get_leaf_profile_path(neurograph, i)
+        else:
+            path = get_junction_profile_path(neurograph, i)
+        coords[i] = get_node_profile_coords(neurograph, path)
+
+    # Generate profiles
+    with ThreadPoolExecutor() as executor:
+        threads = []
+        for i, coords_i in coords.items():
+            threads.append(executor.submit(fg.get_profile, img, coords_i, i))
+
+    # Process results
+    profiles = dict()
+    for thread in as_completed(threads):
+        i, profile = thread.result()
+        profiles[i] = profile
+    return profiles
+
+
+def get_leaf_profile_path(neurograph, i):
+    j = list(neurograph.neighbors(i))[0]
+    return get_profile_path(neurograph.orient_edge((i, j), i, key="xyz"))
+
+
+def get_junction_profile_path(neurograph, i):
+    # Get branches
+    nbs = list(neurograph.neighbors(i))
+    xyz_list_1 = neurograph.orient_edge((i, nbs[0]), i, key="xyz")
+    xyz_list_2 = neurograph.orient_edge((i, nbs[1]), i, key="xyz")
+
+    # Get profile paths
+    path_1 = get_profile_path(xyz_list_1)
+    path_2 = get_profile_path(xyz_list_2)
+    return np.vstack([np.flip(path_1, axis=0), path_2])
+
+
+def get_profile_path(xyz_list):
+    path_length = 0
+    for i in range(1, len(xyz_list)):
+        if i > 0:
+            path_length += geometry.dist(xyz_list[i - 1], xyz_list[i])
+        if path_length >= NODE_PROFILE_DEPTH:
+            break
+    return xyz_list[0:i, :]
+
+
+def get_node_profile_coords(neurograph, path):
+    path = np.array([utils.to_voxels(xyz) for xyz in path])
+    bbox = get_node_bbox(neurograph, path)
+    coords = {
+        "bbox": bbox,
+        "path": geometry.sample_curve(path - bbox["min"], N_PROFILE_PTS),
+    }
+    return coords
+
+
+def get_node_bbox(neurograph, coords):
+    return {
+        "min": np.floor(np.min(coords, axis=0)).astype(int) - 1,
+        "max": np.ceil(np.max(coords, axis=0)).astype(int) + 1,
+    }
diff --git a/src/deep_neurographs/neurograph.py b/src/deep_neurographs/neurograph.py
index f72e046..1ccc18b 100644
--- a/src/deep_neurographs/neurograph.py
+++ b/src/deep_neurographs/neurograph.py
@@ -572,7 +572,7 @@ def edge_length(self, edge):
 
     def is_contained(self, node_or_xyz, buffer=0):
         if self.bbox:
-            img_coord = self.to_img(node_or_xyz)
+            img_coord = self.to_voxels(node_or_xyz)
             return utils.is_contained(self.bbox, img_coord, buffer=buffer)
         else:
             return True
@@ -585,12 +585,12 @@ def branch_contained(self, xyz_list):
         else:
             return True
 
-    def to_img(self, node_or_xyz, shift=False):
+    def to_voxels(self, node_or_xyz, shift=False):
         shift = self.origin if shift else np.zeros((3))
         if type(node_or_xyz) == int:
-            img_coord = utils.to_img(self.nodes[node_or_xyz]["xyz"])
+            img_coord = utils.to_voxels(self.nodes[node_or_xyz]["xyz"])
         else:
-            img_coord = utils.to_img(node_or_xyz)
+            img_coord = utils.to_voxels(node_or_xyz)
         return img_coord - shift
 
     def is_leaf(self, i):
@@ -616,9 +616,9 @@ def get_edge_attr(self, edge, key):
     def to_patch_coords(self, edge, midpoint, chunk_size):
         patch_coords = []
         for xyz in self.edges[edge]["xyz"]:
-            img_coord = self.to_img(xyz)
-            coord = utils.img_to_patch(img_coord, midpoint, chunk_size)
-            patch_coords.append(coord)
+            coord = self.to_voxels(xyz)
+            local_coord = utils.img_to_patch(coord, midpoint, chunk_size)
+            patch_coords.append(local_coord)
         return patch_coords
 
     def get_reconstruction(self, proposals, upd_self=False):
diff --git a/src/deep_neurographs/utils.py b/src/deep_neurographs/utils.py
index 32847df..447f38a 100644
--- a/src/deep_neurographs/utils.py
+++ b/src/deep_neurographs/utils.py
@@ -12,8 +12,6 @@
 import math
 import os
 import shutil
-from concurrent.futures import ThreadPoolExecutor
-from copy import deepcopy
 from io import BytesIO
 from random import sample
 from time import time
@@ -21,13 +19,9 @@
 
 import numpy as np
 import psutil
-import tensorstore as ts
 import torch
-import zarr
-from skimage.color import label2rgb
 
 ANISOTROPY = np.array([0.748, 0.748, 1.0])
-SUPPORTED_DRIVERS = ["neuroglancer_precomputed", "n5", "zarr"]
 
 
 # --- dictionary utils ---
@@ -318,133 +312,7 @@ def list_gcs_filenames(bucket, cloud_path, extension):
     return [blob.name for blob in blobs if extension in blob.name]
 
 
-# --- io utils ---
-def open_zarr(path):
-    """
-    Opens zarr file at "path".
-
-    Parameters
-    ----------
-    path : str
-        Path to zarr file to be opened.
-
-    Returns
-    -------
-    np.ndarray
-        Contents of zarr file at "path".
-
-    """
-    n5store = zarr.N5FSStore(path, "r")
-    if "653980" in path:
-        return zarr.open(n5store).ch488.s0
-    elif "653158" in path:
-        return zarr.open(n5store).s0
-
-
-def open_tensorstore(path, driver):
-    """
-    Uploads segmentation mask stored as a directory of shard files.
-
-    Parameters
-    ----------
-    path : str
-        Path to directory containing shard files.
-    driver : str
-        Storage driver needed to read data at "path".
-
-    Returns
-    -------
-    sparse_volume : dict
-        Sparse image volume.
-
-    """
-    assert driver in SUPPORTED_DRIVERS, "Error! Driver is not supported!"
-    arr = ts.open(
-        {
-            "driver": driver,
-            "kvstore": {
-                "driver": "gcs",
-                "bucket": "allen-nd-goog",
-                "path": path,
-            },
-            "context": {
-                "cache_pool": {"total_bytes_limit": 1000000000},
-                "cache_pool#remote": {"total_bytes_limit": 1000000000},
-                "data_copy_concurrency": {"limit": 8},
-            },
-            "recheck_cached_data": "open",
-        }
-    ).result()
-    if driver == "neuroglancer_precomputed":
-        return arr[ts.d["channel"][0]]
-    elif driver == "zarr":
-        arr = arr[0, 0, :, :, :]
-        arr = arr[ts.d[0].transpose[2]]
-        arr = arr[ts.d[0].transpose[1]]
-    return arr
-
-
-def read_tensorstore(arr, xyz, shape, from_center=True):
-    chunk = get_chunk(arr, xyz, shape, from_center=from_center)
-    return chunk.read().result()
-
-
-def read_tensorstore_bbox(img, bbox):
-    start = bbox["min"]
-    end = bbox["max"]
-    return (
-        img[start[0] : end[0], start[1] : end[1], start[2] : end[2]]
-        .read()
-        .result()
-    )
-
-
-def get_chunk(arr, xyz, shape, from_center=True):
-    start, end = get_start_end(xyz, shape, from_center=from_center)
-    return deepcopy(
-        arr[start[0] : end[0], start[1] : end[1], start[2] : end[2]]
-    )
-
-
-def get_start_end(xyz, shape, from_center=True):
-    if from_center:
-        start = [xyz[i] - shape[i] // 2 for i in range(3)]
-        end = [xyz[i] + shape[i] // 2 for i in range(3)]
-    else:
-        start = xyz
-        end = [xyz[i] + shape[i] for i in range(3)]
-    return start, end
-
-
-def get_superchunks(img_path, labels_path, xyz, shape, from_center=True):
-    with ThreadPoolExecutor() as executor:
-        img_job = executor.submit(
-            get_superchunk,
-            img_path,
-            "n5" if ".n5" in img_path else "zarr",
-            xyz,
-            shape,
-            from_center=from_center,
-        )
-        labels_job = executor.submit(
-            get_superchunk,
-            labels_path,
-            "neuroglancer_precomputed",
-            xyz,
-            shape,
-            from_center=from_center,
-        )
-    img = img_job.result().astype(np.int16)
-    labels = labels_job.result().astype(np.int64)
-    assert img.shape == labels.shape, "img.shape != labels.shape"
-    return img, labels
-
-
-def get_superchunk(path, driver, xyz, shape, from_center=True):
-    arr = open_tensorstore(path, driver)
-    return read_tensorstore(arr, xyz, shape, from_center=from_center)
-
-
+# -- io utils --
 def read_json(path):
     """
     Reads json file stored at "path".
@@ -487,8 +355,8 @@ def read_txt(path):
 def parse_metadata(path, anisotropy=[1.0, 1.0, 1.0]):
     metadata = read_json(path)
     origin = metadata["chunk_origin"]
-    chunk_origin = to_img(origin, anisotropy=anisotropy).tolist()
-    return chunk_origin, metadata["chunk_shape"]
+    chunk_origin = to_voxels(origin, anisotropy=anisotropy)
+    return chunk_origin.tolist(), metadata["chunk_shape"]
 
 
 def write_json(path, contents):
@@ -532,16 +400,6 @@ def write_txt(path, contents):
     f.close()
 
 
-def set_path(dir_name, filename, ext):
-    cnt = 0
-    ext = ext.replace(".", "")
-    path = os.path.join(dir_name, f"{filename}.{ext}")
-    while os.path.exists(path):
-        path = os.path.join(dir_name, f"{filename}.{cnt}.{ext}")
-        cnt += 1
-    return path
-
-
 # --- coordinate conversions ---
 def img_to_patch(xyz, patch_centroid, patch_dims):
     half_patch_dims = [patch_dims[i] // 2 for i in range(3)]
@@ -558,7 +416,7 @@ def to_world(xyz, shift=[0, 0, 0]):
     return tuple([xyz[i] * ANISOTROPY[i] - shift[i] for i in range(3)])
 
 
-def to_img(xyz, anisotropy=ANISOTROPY):
+def to_voxels(xyz, anisotropy=ANISOTROPY):
     return (xyz / np.array(anisotropy)).astype(int)
 
 
@@ -576,7 +434,7 @@ def is_contained(bbox, xyz, buffer=0):
 
 
 def is_list_contained(bbox, xyz_list):
-    return any([is_contained(bbox, to_img(xyz)) for xyz in xyz_list])
+    return any([is_contained(bbox, to_voxels(xyz)) for xyz in xyz_list])
 
 
 def sample_singleton(my_container):
@@ -683,21 +541,6 @@ def get_swc_id(path):
     return filename.split(".")[0]
 
 
-def get_img_mip(img, axis=0):
-    return np.max(img, axis=axis)
-
-
-def get_labels_mip(img, axis=0):
-    mip = np.max(img, axis=axis)
-    mip = label2rgb(mip)
-    return (255 * mip).astype(np.uint8)
-
-
-def normalize_img(img):
-    img -= np.min(img)
-    return img / np.max(img)
-
-
 def numpy_to_hashable(arr):
     return [tuple(item) for item in arr.tolist()]
 
@@ -712,4 +555,14 @@ def get_memory_usage():
 
 def get_batches(iterable, batch_size):
     for start in range(0, len(iterable), batch_size):
-        yield iterable[start : min(start + batch_size, len(iterable))]
+        yield iterable[start: min(start + batch_size, len(iterable))]
+
+
+def set_path(dir_name, filename, ext):
+    cnt = 0
+    ext = ext.replace(".", "")
+    path = os.path.join(dir_name, f"{filename}.{ext}")
+    while os.path.exists(path):
+        path = os.path.join(dir_name, f"{filename}.{cnt}.{ext}")
+        cnt += 1
+    return path