cwindolf · cwindolf · Feb 21, 2024 · Jan 23, 2024 · Jan 23, 2024 · Jan 23, 2024
diff --git a/src/dartsort/peel/__init__.py b/src/dartsort/peel/__init__.py
@@ -1,3 +1,4 @@
 from .grab import GrabAndFeaturize
 from .matching import ObjectiveUpdateTemplateMatchingPeeler
 from .subtract import SubtractionPeeler, subtract_chunk
+from .threshold import ThresholdAndFeaturize
diff --git a/src/dartsort/peel/peel_base.py b/src/dartsort/peel/peel_base.py
@@ -28,7 +28,7 @@ def __init__(
         self,
         recording,
         channel_index,
-        featurization_pipeline,
+        featurization_pipeline=None,
         chunk_length_samples=30_000,
         chunk_margin_samples=0,
         n_chunks_fit=40,
@@ -46,7 +46,10 @@ def __init__(
             fit_subsampling_random_state
         )
         self.register_buffer("channel_index", channel_index)
-        self.add_module("featurization_pipeline", featurization_pipeline)
+        if featurization_pipeline is not None:
+            self.add_module("featurization_pipeline", featurization_pipeline)
+        else:
+            self.featurization_pipeline = None
 
         # subclasses can append to this if they want to store more fixed
         # arrays in the output h5 file
@@ -237,16 +240,20 @@ def out_datasets(self):
             SpikeDataset(name="times_seconds", shape_per_spike=(), dtype=float),
             SpikeDataset(name="channels", shape_per_spike=(), dtype=int),
         ]
-        for transformer in self.featurization_pipeline.transformers:
-            if transformer.is_featurizer:
-                datasets.append(transformer.spike_dataset)
+        if self.featurization_pipeline is not None:
+            for transformer in self.featurization_pipeline.transformers:
+                if transformer.is_featurizer:
+                    datasets.append(transformer.spike_dataset)
         return datasets
 
     # -- utility methods which users likely won't touch
 
     def featurize_collisioncleaned_waveforms(
         self, collisioncleaned_waveforms, max_channels
     ):
+        if self.featurization_pipeline is None:
+            return {}
+
         waveforms, features = self.featurization_pipeline(
             collisioncleaned_waveforms, max_channels
         )
@@ -329,7 +336,10 @@ def gather_chunk_result(
         return n_new_spikes
 
     def needs_fit(self):
-        return self.peeling_needs_fit() or self.featurization_pipeline.needs_fit()
+        it_does = self.peeling_needs_fit()
+        if self.featurization_pipeline is not None:
+            it_does = it_does or self.featurization_pipeline.needs_fit()
+        return it_does
 
     def fit_models(self, save_folder, overwrite=False, n_jobs=0, device=None):
         with torch.no_grad():
@@ -349,6 +359,9 @@ def fit_models(self, save_folder, overwrite=False, n_jobs=0, device=None):
         assert not self.needs_fit()
 
     def fit_featurization_pipeline(self, save_folder, n_jobs=0, device=None):
+        if self.featurization_pipeline is None:
+            return
+
         if not self.featurization_pipeline.needs_fit():
             return
 

diff --git a/src/dartsort/peel/subtract.py b/src/dartsort/peel/subtract.py
@@ -26,7 +26,7 @@ def __init__(
         spike_length_samples=121,
         detection_thresholds=[12, 10, 8, 6, 5, 4],
         chunk_length_samples=30_000,
-        peak_sign="neg",
+        peak_sign="both",
         spatial_dedup_channel_index=None,
         n_chunks_fit=40,
         fit_subsampling_random_state=0,
@@ -274,7 +274,7 @@ def subtract_chunk(
     left_margin=0,
     right_margin=0,
     detection_thresholds=[12, 10, 8, 6, 5, 4],
-    peak_sign="neg",
+    peak_sign="both",
     spatial_dedup_channel_index=None,
     residnorm_decrease_threshold=3.162,  # sqrt(10)
 ):

diff --git a/src/dartsort/peel/threshold.py b/src/dartsort/peel/threshold.py
@@ -0,0 +1,95 @@
+import torch
+from dartsort.detect import detect_and_deduplicate
+from dartsort.util import spiketorch
+
+from .peel_base import BasePeeler
+
+
+class ThresholdAndFeaturize(BasePeeler):
+    def __init__(
+        self,
+        recording,
+        channel_index,
+        featurization_pipeline=None,
+        trough_offset_samples=42,
+        spike_length_samples=121,
+        detection_threshold=5.0,
+        chunk_length_samples=30_000,
+        peak_sign="both",
+        spatial_dedup_channel_index=None,
+        n_chunks_fit=40,
+        fit_subsampling_random_state=0,
+    ):
+        super().__init__(
+            recording=recording,
+            channel_index=channel_index,
+            featurization_pipeline=featurization_pipeline,
+            chunk_length_samples=chunk_length_samples,
+            chunk_margin_samples=spike_length_samples,
+            n_chunks_fit=n_chunks_fit,
+            fit_subsampling_random_state=fit_subsampling_random_state,
+        )
+
+        self.trough_offset_samples = trough_offset_samples
+        self.spike_length_samples = spike_length_samples
+        self.peak_sign = peak_sign
+        if spatial_dedup_channel_index is not None:
+            self.register_buffer(
+                "spatial_dedup_channel_index",
+                spatial_dedup_channel_index,
+            )
+        else:
+            self.spatial_dedup_channel_index = None
+        self.detection_threshold = detection_threshold
+        self.peel_kind = f"Threshold {detection_threshold}"
+
+    def peel_chunk(
+        self,
+        traces,
+        chunk_start_samples=0,
+        left_margin=0,
+        right_margin=0,
+        return_residual=False,
+    ):
+        times_rel, channels = detect_and_deduplicate(
+            traces,
+            self.detection_threshold,
+            dedup_channel_index=self.spatial_dedup_channel_index,
+            peak_sign=self.peak_sign,
+        )
+        if not times_rel.numel():
+            return dict(n_spikes=0)
+
+        # want only peaks in the chunk
+        min_time = max(left_margin, self.spike_length_samples)
+        max_time = traces.shape[0] - max(
+            right_margin, self.spike_length_samples - self.trough_offset_samples
+        )
+        valid = (times_rel >= min_time) & (times_rel < max_time)
+        times_rel = times_rel[valid]
+        if not times_rel.numel():
+            return dict(n_spikes=0)
+        channels = channels[valid]
+
+        # load up the waveforms for this chunk
+        waveforms = spiketorch.grab_spikes(
+            traces,
+            times_rel,
+            channels,
+            self.channel_index,
+            trough_offset=self.trough_offset_samples,
+            spike_length_samples=self.spike_length_samples,
+            already_padded=False,
+            pad_value=torch.nan,
+        )
+
+        # get absolute times
+        times_samples = times_rel + chunk_start_samples - left_margin
+
+        peel_result = dict(
+            n_spikes=times_rel.numel(),
+            times_samples=times_samples,
+            channels=channels,
+            collisioncleaned_waveforms=waveforms,
+        )
+        return peel_result
diff --git a/src/dartsort/templates/template_util.py b/src/dartsort/templates/template_util.py
@@ -109,11 +109,15 @@ def get_registered_templates(
 def get_realigned_sorting(
     recording,
     sorting,
+    realign_peaks=True,
+    low_rank_denoising=False,
     **kwargs,
 ):
     results = get_templates(
         recording,
         sorting,
+        realign_peaks=realign_peaks,
+        low_rank_denoising=low_rank_denoising,
         **kwargs,
     )
     return results["sorting"]

diff --git a/src/dartsort/templates/templates.py b/src/dartsort/templates/templates.py
@@ -105,6 +105,7 @@ def from_config(
         save_npz_name="template_data.npz",
         localizations_dataset_name="point_source_localizations",
         n_jobs=0,
+        units_per_job=8,
         device=None,
         trough_offset_samples=42,
         spike_length_samples=121,
@@ -155,6 +156,7 @@ def from_config(
             denoising_fit_radius=template_config.denoising_fit_radius,
             denoising_snr_threshold=template_config.denoising_snr_threshold,
             device=device,
+            units_per_job=units_per_job,
         )
         if template_config.registered_templates and motion_est is not None:
             kwargs["registered_geom"] = drift_util.registered_geometry(