Debugging

cwindolf · cwindolf · commit cec774381367 · 2025-01-15T08:30:56.000-08:00
diff --git a/src/dartsort/cluster/gaussian_mixture.py b/src/dartsort/cluster/gaussian_mixture.py
@@ -683,7 +683,7 @@ def reassign(self, log_liks):
         # intersection
         n_units = max(log_liks.shape[0] - self.with_noise_unit, original.max() + 1)
         intersection = torch.zeros(n_units, dtype=int)
-        spiketorch.add_at_(intersection, assignments[kept], original[kept])
+        spiketorch.add_at_(intersection, assignments[kept], same[kept])
 
         # union by include/exclude
         union = torch.zeros_like(intersection)
@@ -783,6 +783,8 @@ def merge(self, log_liks=None, show_progress=True):
         new_labels, new_ids = self.merge_units(
             likelihoods=log_liks, show_progress=show_progress
         )
+        if new_labels is None:
+            return
         self.labels.copy_(torch.asarray(new_labels))
 
         unique_new_ids = np.unique(new_ids)
@@ -1324,6 +1326,8 @@ def kmeans_split_unit(self, unit_id, debug=False):
             debug=debug,
             debug_info=result,
         )
+        if split_labels is None:
+            return result
         split_ids, split_counts = np.unique(split_labels, return_counts=True)
         valid = split_ids >= 0
         if not valid.any():
@@ -1598,6 +1602,12 @@ def tree_merge(
         # heuristic unit groupings to investigate
         distances = sym_function(distances, distances.T)
         distances = distances[np.triu_indices(len(distances), k=1)]
+        finite = np.isfinite(distances)
+        if not finite.any():
+            return None, None, None
+        if not finite.all():
+            inf = max(0, distances[finite].max()) + max_distance + 1
+            distances[np.logical_not(finite)] = inf
         Z = linkage(distances)
         n_units = len(Z) + 1
 
@@ -1869,16 +1879,17 @@ def merge_criteria(
                 units,
                 merged_unit,
                 spikes_core[keep],
-                self.data.neighborhoods(),
+                self.data.neighborhoods()[1],
                 use_proportions=self.use_proportions,
                 reduce=False,
             )
             class_w = class_w.cpu()
             if lik_weights is not None:
                 lik_weights = lik_weights.cpu()
             labixs = labixs.cpu()
-            k_full = class_sum(labids, labixs, k_full, lik_weights) / class_w
-            k_merged = class_sum(labids, labixs, k_merged, lik_weights) / class_w
+            labids = labids.cpu()
+            k_full = class_sum(labids, labixs, k_full.cpu(), lik_weights) / class_w
+            k_merged = class_sum(labids, labixs, k_merged.cpu(), lik_weights) / class_w
             if class_balancing == "worst":
                 k_full = k_full[worst_ix]
                 k_merged = k_merged[worst_ix]
@@ -2144,6 +2155,9 @@ def merge_units(
             debug_info["distances"] = distances
         if distances.shape[0] == 1:
             return None, None
+        pdist = distances[np.triu_indices(len(distances), k=1)]
+        if not (pdist <= self.merge_distance_threshold).any():
+            return None, None
 
         if merge_kind == "hierarchical":
             return self.hierarchical_bimodality_merge(
@@ -2397,10 +2411,10 @@ def fit(
             weights = weights[kept]
 
         if self.channels_strategy.endswith("fuzzcore"):
-            achans_full = occupied_chans(
+            achans_full, _ = occupied_chans(
                 features, self.n_channels, neighborhoods=neighborhoods
             )
-            achans = occupied_chans(
+            achans, _ = occupied_chans(
                 features,
                 neighborhood_ids=core_neighborhood_ids,
                 n_channels=self.n_channels,
@@ -2410,15 +2424,15 @@ def fit(
             achans = achans[spiketorch.isin_sorted(achans, achans_full)]
             needs_direct = True
         elif self.channels_strategy.endswith("core"):
-            achans = occupied_chans(
+            achans, _ = occupied_chans(
                 features,
                 neighborhood_ids=core_neighborhood_ids,
                 n_channels=self.n_channels,
                 neighborhoods=core_neighborhoods,
             )
             needs_direct = True
         else:
-            achans = occupied_chans(
+            achans, _ = occupied_chans(
                 features, self.n_channels, neighborhoods=neighborhoods
             )
             needs_direct = False
@@ -2428,7 +2442,7 @@ def fit(
         do_pca = self.cov_kind == "ppca" and self.ppca_rank
 
         active_mean = active_W = None
-        if hasattr(self, "mean"):
+        if hasattr(self, "mean") and self.ppca_warm_start:
             active_mean = self.mean[:, achans]
         if hasattr(self, "W") and self.ppca_warm_start:
             active_W = self.W[:, achans]
@@ -2538,6 +2552,8 @@ def logdet(self, channels=None):
 
     def log_likelihood(self, features, channels, neighborhood_id=None) -> torch.Tensor:
         """Log likelihood for spike features living on the same channels."""
+        if not len(features):
+            return features.new_zeros((0,))
         mean = self.noise.mean_full[:, channels]
         if self.mean_kind == "full":
             mean = mean + self.mean[:, channels]
@@ -2736,7 +2752,7 @@ def get_average_parameter_counts(
 def class_sum(classes, inverse_inds, x, weights=None):
     wsum = x.new_zeros(len(classes))
     x = x * weights if weights is not None else x
-    spiketorch.add_at_(wsum, inverse_inds, x)
+    spiketorch.add_at_(wsum, inverse_inds.to(x.device), x)
     return wsum
 
 
diff --git a/src/dartsort/cluster/ppcalib.py b/src/dartsort/cluster/ppcalib.py
@@ -264,11 +264,13 @@ def ppca_e_step(
         xc = nd.x - nu
 
         # we need these ones everywhere
-        Cooinvxc = nd.C_oo_chol.solve(xc.T).T
+        # Cooinvxc = nd.C_oo_chol.solve(xc.T).T
+        Cooinvxc = xc @ nd.C_oo_inv
 
         # pca-centered data
         if yes_pca and nd.have_missing:
             CooinvWo = nd.C_oo_chol.solve(W_o)
+            CooinvWo = nd.C_oo_inv @ W_o
             # xcc = torch.addmm(xc, ubar, W_o.T, alpha=-1)
             # Cooinvxcc = C_oochol.solve(xcc.T).T
             Cooinvxcc = Cooinvxc.addmm(ubar, CooinvWo.T, alpha=-1)
@@ -285,7 +287,9 @@ def ppca_e_step(
         if yes_pca:
             e_xcu = xc[:, :, None] * ubar[:, None, :]
         if yes_pca and nd.have_missing:
-            e_mxcu = (Cooinvxc @ nd.C_mo.T)[:, :, None] * ubar[:, None, :]
+            # e_mxcu = (Cooinvxc @ nd.C_mo.T)[:, :, None] * ubar[:, None, :]
+            # print(f"{e_mxcu.shape=}")
+            e_mxcu = torch.einsum("ij,kj,il->ikl", Cooinvxc, nd.C_mo, ubar)
             # CmoCooinvWo = C_mo @ CooinvWo
             Wm_less_CmoCooinvWo = W_m.addmm(nd.C_mo, CooinvWo, beta=-1)
             shp = Wm_less_CmoCooinvWo.shape
@@ -314,7 +318,7 @@ def ppca_e_step(
             wxcu = nd.w_norm @ e_xcu.view(nd.neighb_n_spikes, -1)
             wxcu = wxcu.view(e_xcu.shape[1:])
         if nd.have_missing and yes_pca:
-            wmxcu = nd.w_norm @ e_mxcu.view(nd.neighb_n_spikes, -1)
+            wmxcu = nd.w_norm @ e_mxcu.reshape(nd.neighb_n_spikes, -1)
             wmxcu = wmxcu.view(e_mxcu.shape[1:])
             ycubar = y.new_zeros((rank, nc, M))
             ycubar[:, nd.active_subset] = wxcu.view(rank, nd.neighb_nc, M)
@@ -374,10 +378,12 @@ def embed(
         xc = nd.x - nu
 
         # we need these ones everywhere
-        Cooinvxc = nd.C_oo_chol.solve(xc.T).T
+        # Cooinvxc = nd.C_oo_chol.solve(xc.T).T
+        Cooinvxc = xc @ nd.C_oo_inv
 
         # moments of embeddings
-        T_inv = eye_M + W_o.T @ nd.C_oo_chol.solve(W_o)
+        # T_inv = eye_M + W_o.T @ nd.C_oo_chol.solve(W_o)
+        T_inv = eye_M + W_o.T @ nd.C_oo_inv @ W_o
         T = torch.linalg.inv(T_inv)
         ubar = Cooinvxc @ (W_o @ T)
         uubar = torch.baddbmm(T, ubar[:, :, None], ubar[:, None, :])
@@ -437,6 +443,7 @@ class NeighborhoodPPCAData:
 
     C_oo: linear_operator.LinearOperator
     C_oo_chol: CholLinearOperator
+    C_oo_inv: CholLinearOperator
     w: torch.Tensor
     w_norm: torch.Tensor
     x: torch.Tensor
@@ -482,13 +489,11 @@ def get_neighborhood_data(
         # subset of active chans which are in the neighborhood
         active_subset = spiketorch.isin_sorted(active_channels, neighb_chans)
 
-        w = weights[neighb_members]
         x = sp.features[neighb_members][:, :, neighb_subset]
 
         chans_tuple = tuple(active_channels[active_subset].tolist())
         if chans_tuple in dedup_data:
-            *info, ws, xs, mems = dedup_data[chans_tuple]
-            ws.append(w)
+            *info, xs, mems = dedup_data[chans_tuple]
             xs.append(x)
             mems.append(neighb_members)
         else:
@@ -499,23 +504,22 @@ def get_neighborhood_data(
                 active_subset,
                 can_cache_by_neighborhood,
                 have_missing,
-                [w],
                 [x],
                 [neighb_members],
             )
 
     neighborhood_data = []
     ess = weights.sum()
     for chans_tuple, chans_data in dedup_data.items():
-        *info, ws, xs, mems = chans_data
+        *info, xs, mems = chans_data
         nid, neighb_chans, active_subset, can_cache_by_neighborhood, have_missing = info
-        if len(ws) > 1:
-            w = torch.concatenate(ws)
+        if len(mems) > 1:
             x = torch.concatenate(xs)
             neighb_members = torch.concatenate(mems)
+            neighb_members, order = neighb_members.sort()
+            x = x[order]
             nid = None
         else:
-            w = ws[0]
             x = xs[0]
             neighb_members = mems[0]
 
@@ -547,7 +551,10 @@ def get_neighborhood_data(
             channels=neighb_chans, device=device, **cache_kw
         )
         assert C_oo.shape == (D_neighb, D_neighb)
-        C_oo_chol = CholLinearOperator(C_oo.cholesky())
+        chol = C_oo.cholesky(upper=False)
+        C_oo_chol = CholLinearOperator(chol)
+        Linv = chol.inverse().to_dense()
+        C_oo_inv = Linv.T @ Linv
         w = weights[neighb_members]
         C_mo = None
         if have_missing:
@@ -565,6 +572,7 @@ def get_neighborhood_data(
             have_missing=have_missing,
             C_oo=C_oo,
             C_oo_chol=C_oo_chol,
+            C_oo_inv=C_oo_inv,
             w=w,
             w_norm=w / ess,
             x=x,
diff --git a/src/dartsort/cluster/refine.py b/src/dartsort/cluster/refine.py
@@ -71,8 +71,11 @@ def refine_clustering(
     gmm.cleanup()
     for it in range(refinement_config.n_total_iters):
         log_liks = gmm.em()
-        gmm.split()
-        log_liks = gmm.em()
+        if log_liks.shape[0] > refinement_config.max_avg_units * recording.get_num_channels():
+            print(f"{log_liks.shape=}, skipping split.")
+        else:
+            gmm.split()
+            log_liks = gmm.em()
         gmm.merge(log_liks)
     gmm.em(final_split="full")
     gmm.cpu()
diff --git a/src/dartsort/cluster/stable_features.py b/src/dartsort/cluster/stable_features.py
@@ -764,13 +764,14 @@ def occupied_chans(
         neighborhood_ids = spike_data.neighborhood_ids
     ids = torch.unique(neighborhood_ids)
     chans = neighborhoods.neighborhoods[ids]
-    chans = torch.unique(chans)
+    chans, counts = torch.unique(chans, return_counts=True)
+    counts = counts[chans < n_channels]
     chans = chans[chans < n_channels]
     for _ in range(fuzz):
         chans = neighborhoods.channel_index[chans]
         chans = torch.unique(chans)
         chans = chans[chans < n_channels]
-    return chans
+    return chans, counts
 
 
 def interp_to_chans(
diff --git a/src/dartsort/util/internal_config.py b/src/dartsort/util/internal_config.py
@@ -325,6 +325,7 @@ class RefinementConfig:
     interpolation_sigma: float = 20.0
     val_proportion: float = 0.25
     max_n_spikes: float | int = argfield(default=4_000_000, arg_type=int_or_inf)
+    max_avg_units: int = 8
 
     # model params
     channels_strategy: str = "count"