From a791bbb53af1d60d9f1231f0093c962b5578732e Mon Sep 17 00:00:00 2001
From: Mike McCann <MBARIMike@gmail.com>
Date: Fri, 5 Dec 2025 11:14:35 -0800
Subject: [PATCH 1/8] Remove the despiking step.

---
 .vscode/launch.json     |  8 +++---
 src/data/nc42netcdfs.py | 54 +++++++++++++----------------------------
 2 files changed, 21 insertions(+), 41 deletions(-)

diff --git a/.vscode/launch.json b/.vscode/launch.json
index b1c27c1e..0400b66d 100644
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -58,14 +58,14 @@
             //"args": ["-v", "1", "--log_file", "ahi/missionlogs/2025/20250908_20250912/20250911T201546/202509112015_202509112115.nc4", "--known_hash", "d1235ead55023bea05e9841465d54a45dfab007a283320322e28b84438fb8a85"]
             // brizo 20250914T080941 has bad latitude and longitude values and lots of bad Universal latitude_time and longitude_time values
             //"args": ["-v", "2", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109.nc4"]
-            //"args": ["-v", "2", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109.nc4", "--plot_time", "/longitude_time"]
+            "args": ["-v", "2", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109.nc4", "--plot_time", "/longitude_time"]
             //"args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109.nc4", "--plot_time", "/latitude_time"]
             // brizo 20250916T230652 has several ESP Samples from stoqs_lrauv_sep2025
             //"args": ["-v", "2", "--log_file", "brizo/missionlogs/2025/20250916_20250922/20250916T230652/202509162306_202509180305.nc4", "--plot_time", "/longitude_time"]
             //"args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109_cleaned_by_quinn.nc4", "--plot_time", "/longitude_time"]
             //"args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109_cleaned_by_quinn_latlon.nc4", "--plot_time", "/longitude_time"]
             // Conflicting sizes for nudged_time and data - fixed by filtering GPS fixes to be monotonically increasing
-            "args": ["-v", "1", "--log_file", "tethys/missionlogs/2012/20120908_20120920/20120917T025522/201209170255_201209171110.nc4", "--plot_time", "/longitude_time"]
+            //"args": ["-v", "1", "--log_file", "tethys/missionlogs/2012/20120908_20120920/20120917T025522/201209170255_201209171110.nc4", "--plot_time", "/longitude_time"]
         },
         {
             "name": "2.0 - calibrate.py",
@@ -345,7 +345,7 @@
             // Lots bad time values in brizo 20250914T080941 due to memory corruption on the vehicle
             //"args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109.nc4"]
             //"args": ["-v", "2", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109.nc4", "--clobber"]
-            //"args": ["-v", "2", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109.nc4", "--clobber", "--no_cleanup"]
+            "args": ["-v", "2", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109.nc4", "--clobber", "--no_cleanup"]
             //"args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250916_20250922/20250916T230652/202509162306_202509180305.nc4", "--no_cleanup"]
             //"args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250916_20250922/20250916T230652/202509162306_202509180305.nc4", "--no_cleanup", "--clobber"]
             // Has different universals time coodinates for longitude/latitude and depth
@@ -360,7 +360,7 @@
             // Fails with ValueError: different number of dimensions on data and dims: 2 vs 1 for wetlabsubat_digitized_raw_ad_counts variable
             //"args": ["-v", "1", "--log_file", "pontus/missionlogs/2025/20250604_20250616/20250608T020852/202506080209_202506081934.nc4", "--no_cleanup", "--clobber"]
             // Full month of June 2025 for Pontus with WetLabsUBAT Group data
-            "args": ["-v", "1", "--auv_name", "pontus", "--start", "20250601T000000", "--end", "20250721T000000", "--noinput", "--num_cores", "1", "--no_cleanup"]
+            //"args": ["-v", "1", "--auv_name", "pontus", "--start", "20250601T000000", "--end", "20250721T000000", "--noinput", "--num_cores", "1", "--no_cleanup"]
             //"args": ["-v", "1", "--auv_name", "pontus", "--start", "20250601T000000", "--end", "20250702T000000", "--noinput", "--num_cores", "1", "--no_cleanup", "--clobber"]
             //"args": ["-v", "1", "--log_file", "pontus/missionlogs/2025/20250623_20250707/20250707T043011/slate.nc4", "--no_cleanup"]
         },
diff --git a/src/data/nc42netcdfs.py b/src/data/nc42netcdfs.py
index ddb77b8c..505b2830 100755
--- a/src/data/nc42netcdfs.py
+++ b/src/data/nc42netcdfs.py
@@ -345,7 +345,8 @@ def _analyze_original_time_coordinates(
 
         if overlap_equal and len(lat_time) == len(lon_time):
             self.logger.info(
-                "Dead Reckoned timing: latitude_time and longitude_time are properly synchronized"
+                "Dead Reckoned timing: original latitude_time and longitude_time "
+                "are properly synchronized"
             )
             return
 
@@ -502,8 +503,6 @@ def _process_single_time_coordinate(  # noqa: PLR0913
         plot_time_coord_name: str | None,
     ) -> dict:
         """Process filtering for a single time coordinate."""
-        from scipy.signal import medfilt
-
         time_var = src_group.variables[time_coord_name]
         original_time_data = time_var[:]
         self.logger.info("Time coordinate %s: %d points", time_coord_name, len(original_time_data))
@@ -523,27 +522,23 @@ def _process_single_time_coordinate(  # noqa: PLR0913
         # First filter out values that fall outside of reasonable bounds
         valid_indices = self._filter_valid_time_indices(original_time_data)
 
-        # Despike to remove single point outliers before getting monotonic indices
-        time_data = medfilt(original_time_data[valid_indices], kernel_size=3)
-
-        # Store valid indices and despiked data for plotting
-        if plot_data is not None:
-            plot_data["valid_indices"] = valid_indices
-            plot_data["valid_data"] = original_time_data[valid_indices]
-            plot_data["despiked"] = time_data.copy()
+        # Get the valid time subset
+        valid_time_data = original_time_data[valid_indices]
 
-        # Now apply monotonic filtering to the valid subset
-        mono_indices_in_filtered = self._get_monotonic_indices(time_data)
+        # Apply monotonic filtering to the valid subset
+        mono_indices_in_filtered = self._get_monotonic_indices(valid_time_data)
 
         # Convert monotonic indices back to original array indices
         # mono_indices_in_filtered are indices into the valid_indices subset
         # We need to map them back to indices in the original time array
         final_indices = [valid_indices[i] for i in mono_indices_in_filtered]
 
-        # Generate plot if requested for this variable
+        # Store data for plotting if requested
         if plot_data is not None:
+            plot_data["valid_indices"] = valid_indices
+            plot_data["valid_data"] = valid_time_data
             plot_data["final_indices"] = mono_indices_in_filtered
-            plot_data["final_data"] = time_data[mono_indices_in_filtered]
+            plot_data["final_data"] = valid_time_data[mono_indices_in_filtered]
             self._plot_time_filtering(plot_data)
 
         return self._create_time_filter_result(
@@ -641,12 +636,11 @@ def _plot_time_filtering(self, plot_data: dict):
         original = plot_data["original"]
         valid_indices = plot_data["valid_indices"]
         valid_data = plot_data["valid_data"]
-        despiked = plot_data["despiked"]
         final_indices = plot_data["final_indices"]
         final_data = plot_data["final_data"]
 
         # Create figure with subplots
-        fig, (ax1, ax2, ax3, ax4) = plt.subplots(4, 1, figsize=(12, 9), sharex=True)
+        fig, (ax1, ax2, ax3) = plt.subplots(3, 1, figsize=(12, 8), sharex=True)
 
         # Plot 1: Original data
         ax1.plot(original, "b-", label="Original", alpha=0.7)
@@ -672,40 +666,26 @@ def _plot_time_filtering(self, plot_data: dict):
             bbox={"boxstyle": "round", "facecolor": "wheat"},
         )
 
-        # Plot 3: After despiking
-        ax3.plot(despiked, "g-", label="After Median Filter (3-point)", alpha=0.7)
+        # Plot 3: Final After Monotonic filtered data
+        ax3.plot(final_indices, final_data, "r.-", label="After Monotonic Filter", alpha=0.7)
+        ax3.set_xlabel("Index")
         ax3.set_ylabel("Time Value")
         ax3.legend()
         ax3.grid(visible=True, alpha=0.3)
-        ax3.text(
-            0.02,
-            0.60,
-            f"Points removed: {len(valid_data) - len(despiked)}\n",
-            transform=ax3.transAxes,
-            verticalalignment="top",
-            bbox={"boxstyle": "round", "facecolor": "wheat"},
-        )
-
-        # Plot 4: Final After Monotonic filtered data
-        ax4.plot(final_indices, final_data, "r.-", label="After Monotonic Filter", alpha=0.7)
-        ax4.set_xlabel("Index")
-        ax4.set_ylabel("Time Value")
-        ax4.legend()
-        ax4.grid(visible=True, alpha=0.3)
 
         # Add statistics text
         stats_text = (
-            f"Points removed: {len(despiked) - len(final_data)}\n"
+            f"Points removed: {len(valid_data) - len(final_data)}\n"
             f"Original points: {len(original)}\n"
             f"After final filter: {len(final_data)}\n"
             f"Total removed: {len(original) - len(final_data)} "
             f"({100 * (len(original) - len(final_data)) / len(original):.1f}%)"
         )
-        ax4.text(
+        ax3.text(
             0.02,
             0.90,
             stats_text,
-            transform=ax4.transAxes,
+            transform=ax3.transAxes,
             verticalalignment="top",
             bbox={"boxstyle": "round", "facecolor": "wheat"},
         )

From 6638a4a1656b2eb7452b69e35fde24959c176870 Mon Sep 17 00:00:00 2001
From: Mike McCann <MBARIMike@gmail.com>
Date: Sun, 7 Dec 2025 10:16:54 -0800
Subject: [PATCH 2/8] Add --plot_universals and do intersection before
 non-monotonic filtering.

---
 src/data/nc42netcdfs.py | 299 ++++++++++++++++++++++++++++++++++++++--
 1 file changed, 285 insertions(+), 14 deletions(-)

diff --git a/src/data/nc42netcdfs.py b/src/data/nc42netcdfs.py
index 505b2830..f4ec4485 100755
--- a/src/data/nc42netcdfs.py
+++ b/src/data/nc42netcdfs.py
@@ -134,6 +134,7 @@ def __init__(  # noqa: PLR0913
         self,
         log_file: str = None,
         plot_time: str = None,
+        plot_universals: bool = False,  # noqa: FBT001, FBT002
         filter_monotonic_time: bool = True,  # noqa: FBT001, FBT002
         verbose: int = 0,
         commandline: str = "",
@@ -143,15 +144,18 @@ def __init__(  # noqa: PLR0913
         Args:
             log_file: Log file path for processing
             plot_time: Optional plot time specification (e.g., /latitude_time)
+            plot_universals: Plot longitude, latitude, depth time filtering for root group
             filter_monotonic_time: Filter out non-monotonic time values
             verbose: Verbosity level (0-2)
             commandline: Command line string for tracking
         """
         self.log_file = log_file
         self.plot_time = plot_time
+        self.plot_universals = plot_universals
         self.filter_monotonic_time = filter_monotonic_time
         self.verbose = verbose
         self.commandline = commandline
+        self.universals_plot_data = {}  # Store plot data for universals
 
     def download_with_pooch(self, url, local_dir, known_hash=None):
         """Download using pooch with caching and verification."""
@@ -310,9 +314,15 @@ def _get_time_filters_for_variables(
             )
             time_filters[time_coord_name] = time_filter
 
-        # Align latitude and longitude in root group if needed
+        # For root group, apply intersection of valid indices before monotonic filtering
         if group_name == "/":
+            time_filters = self._apply_universals_intersection(
+                log_file, src_group, time_filters, vars_to_extract
+            )
             time_filters = self._align_root_group_coordinates(time_filters, vars_to_extract)
+            # Plot universals if requested
+            if self.plot_universals:
+                self._plot_universals_filtering()
 
         return time_filters
 
@@ -525,14 +535,40 @@ def _process_single_time_coordinate(  # noqa: PLR0913
         # Get the valid time subset
         valid_time_data = original_time_data[valid_indices]
 
-        # Apply monotonic filtering to the valid subset
+        # Apply monotonic filtering
         mono_indices_in_filtered = self._get_monotonic_indices(valid_time_data)
 
         # Convert monotonic indices back to original array indices
-        # mono_indices_in_filtered are indices into the valid_indices subset
-        # We need to map them back to indices in the original time array
         final_indices = [valid_indices[i] for i in mono_indices_in_filtered]
 
+        # For root group universals, store data for intersection step
+        is_universal_coord = group_name == "/" and time_coord_name in [
+            "longitude_time",
+            "latitude_time",
+            "depth_time",
+            "time_time",
+        ]
+
+        if is_universal_coord:
+            # Store intermediate data for intersection step
+            if self.plot_universals:
+                self.universals_plot_data[time_coord_name] = {
+                    "original": original_time_data.copy(),
+                    "valid_indices": valid_indices.copy(),
+                    "valid_data": valid_time_data.copy(),
+                    "mono_indices": final_indices.copy(),
+                    "mono_data": valid_time_data[mono_indices_in_filtered].copy(),
+                    "log_file": log_file,
+                }
+            # Return with monotonic-filtered indices - intersection happens later
+            return {
+                "indices": final_indices,
+                "filtered": len(final_indices) < len(original_time_data),
+                "comment": "Pre-intersection state",
+                "mono_indices": final_indices,  # Store for intersection
+                "original_time_data": original_time_data,  # Store for reprocessing
+            }
+
         # Store data for plotting if requested
         if plot_data is not None:
             plot_data["valid_indices"] = valid_indices
@@ -545,6 +581,62 @@ def _process_single_time_coordinate(  # noqa: PLR0913
             final_indices, len(original_time_data), time_coord_name
         )
 
+    def _apply_universals_intersection(
+        self,
+        log_file: str,
+        src_group: netCDF4.Group,
+        time_filters: dict,
+        vars_to_extract: list[str],
+    ) -> dict:
+        """Apply intersection of monotonic-filtered indices for longitude, latitude, and depth.
+
+        After outlier removal and monotonic filtering, we intersect the indices so that only
+        time points that exist in all three coordinates are kept.
+        """
+        # Identify which universal coordinates are present
+        universal_coords = ["longitude_time", "latitude_time", "depth_time", "time_time"]
+        present_coords = [tc for tc in universal_coords if tc in time_filters]
+
+        if len(present_coords) < 2:  # noqa: PLR2004
+            # Need at least 2 coordinates to do intersection
+            self.logger.info("Less than 2 universal coordinates found, skipping intersection")
+            return time_filters
+
+        # Get monotonic-filtered indices for each coordinate
+        mono_indices_sets = {}
+        for time_coord in present_coords:
+            mono_indices_sets[time_coord] = set(time_filters[time_coord]["mono_indices"])
+
+        # Compute intersection of all monotonic indices
+        intersected_indices = set.intersection(*mono_indices_sets.values())
+        intersected_indices_list = sorted(intersected_indices)
+
+        self.logger.info(
+            "Intersection of universal coordinates (after monotonic filtering): %s",
+            ", ".join(f"{tc}: {len(mono_indices_sets[tc])} points" for tc in present_coords),
+        )
+        self.logger.info("After intersection: %d common points", len(intersected_indices_list))
+
+        # Update each coordinate with intersected indices
+        for time_coord in present_coords:
+            original_time_data = time_filters[time_coord]["original_time_data"]
+
+            # Update the filter with final indices
+            time_filters[time_coord] = self._create_time_filter_result(
+                intersected_indices_list, len(original_time_data), time_coord
+            )
+
+            # Update plot data if collecting for universals
+            if self.plot_universals and time_coord in self.universals_plot_data:
+                self.universals_plot_data[time_coord]["final_indices"] = (
+                    intersected_indices_list.copy()
+                )
+                self.universals_plot_data[time_coord]["final_data"] = original_time_data[
+                    intersected_indices_list
+                ].copy()
+
+        return time_filters
+
     def _is_time_variable(self, var_name: str, var) -> bool:
         """Check if a variable is a time coordinate variable."""
         # Check name pattern
@@ -564,24 +656,40 @@ def _filter_valid_time_indices(self, time_data) -> list[int]:
         """Filter out wildly invalid time values before monotonic filtering.
 
         Returns indices of time values that are reasonable Unix epoch timestamps.
-        Uses numpy for efficient vectorized operations.
+        Uses time bounds from log file name.
         """
-        # LRAUV data bounds: September 2012 to current + 5 years buffer
-        lrauv_start_date = datetime(2012, 9, 1, tzinfo=UTC)
-        current_date = datetime.now(UTC)
-        future_buffer_date = current_date.replace(year=current_date.year + 5)
+        # Parse filename like: 202509140809_202509150109.nc4
+        # Format: YYYYMMDDHHMM_YYYYMMDDHHMM
+        import re
+        from datetime import timedelta
+        from pathlib import Path
+
+        filename = Path(self.log_file).stem
+        match = re.search(r"(\d{12})_(\d{12})", filename)
 
-        MIN_UNIX_TIME = int(lrauv_start_date.timestamp())  # September 1, 2012 UTC
-        MAX_UNIX_TIME = int(future_buffer_date.timestamp())  # Current + 5 years buffer
+        start_str = match.group(1)
+        end_str = match.group(2)
+
+        # Parse YYYYMMDDHHMM format
+        start_time = datetime.strptime(start_str, "%Y%m%d%H%M").replace(tzinfo=UTC)
+        end_time = datetime.strptime(end_str, "%Y%m%d%H%M").replace(tzinfo=UTC)
+
+        # Add 10-minute buffer before and after
+        MIN_UNIX_TIME = int((start_time - timedelta(minutes=10)).timestamp())
+        MAX_UNIX_TIME = int((end_time + timedelta(minutes=10)).timestamp())
+
+        self.logger.debug(
+            "Using time bounds from log file: %s to %s",
+            (start_time - timedelta(minutes=10)).isoformat(),
+            (end_time + timedelta(minutes=10)).isoformat(),
+        )
 
         # Convert to numpy array for efficient operations
         time_array = np.asarray(time_data)
 
-        # Create boolean masks for valid conditions
+        # Basic validity checks
         is_finite = np.isfinite(time_array)
         is_in_range = (time_array >= MIN_UNIX_TIME) & (time_array <= MAX_UNIX_TIME)
-
-        # Combine all conditions - all must be True for valid indices
         valid_mask = is_finite & is_in_range
 
         # Get indices where all conditions are met
@@ -624,6 +732,153 @@ def _get_monotonic_indices(self, time_data) -> list[int]:
 
         return mono_indices
 
+    def _plot_universals_filtering(self):
+        """Plot longitude, latitude, and depth time coordinate filtering for root group."""
+        if not MATPLOTLIB_AVAILABLE:
+            self.logger.error("Matplotlib not available. Install with: uv add matplotlib")
+            return
+
+        if not self.universals_plot_data:
+            self.logger.warning("No universals plot data collected")
+            return
+
+        import matplotlib.pyplot as plt
+
+        # Determine which coordinates are available
+        coords = []
+        coord_names = []
+        for coord in ["longitude", "latitude", "depth", "time"]:
+            time_coord = f"{coord}_time"
+            if time_coord in self.universals_plot_data:
+                coords.append(coord)
+                coord_names.append(time_coord)
+
+        if not coords:
+            self.logger.warning("No universal coordinates found for plotting")
+            return
+
+        n_coords = len(coords)
+        fig, axes = plt.subplots(n_coords, 4, figsize=(18, 2.5 * n_coords), sharex="col")
+
+        # Handle case of single coordinate
+        if n_coords == 1:
+            axes = axes.reshape(1, -1)
+
+        log_file_name = self.universals_plot_data[coord_names[0]]["log_file"]
+        fig.suptitle(
+            f"Universal Coordinates Time Filtering\nFile: {log_file_name}",
+            fontsize=14,
+            fontweight="bold",
+        )
+
+        for i, (coord, time_coord) in enumerate(zip(coords, coord_names, strict=True)):
+            data = self.universals_plot_data[time_coord]
+            original = data["original"]
+            valid_indices = data["valid_indices"]
+            valid_data = data["valid_data"]
+            mono_indices = data["mono_indices"]
+            mono_data = data["mono_data"]
+            final_indices = data["final_indices"]
+            final_data = data["final_data"]
+
+            self._plot_universals_row(
+                axes[i],
+                coord,
+                original,
+                valid_indices,
+                valid_data,
+                mono_indices,
+                mono_data,
+                final_indices,
+                final_data,
+                is_first_row=(i == 0),
+            )
+
+        # Set x-label only on bottom row
+        for j in range(4):
+            axes[-1, j].set_xlabel("Index")
+
+        plt.tight_layout()
+        plt.show()
+
+        self.logger.info("Universals time filtering plot displayed")
+
+    def _plot_universals_row(  # noqa: PLR0913
+        self,
+        axes_row,
+        coord: str,
+        original,
+        valid_indices,
+        valid_data,
+        mono_indices,
+        mono_data,
+        final_indices,
+        final_data,
+        is_first_row: bool,  # noqa: FBT001
+    ):
+        """Plot a single row of the universals filtering plot."""
+        # Column 1: Original data
+        axes_row[0].plot(original, "b-", alpha=0.7)
+        axes_row[0].set_ylabel(f"{coord}_time\n(Unix seconds)")
+        if is_first_row:
+            axes_row[0].set_title("Original")
+        axes_row[0].grid(visible=True, alpha=0.3)
+        axes_row[0].text(
+            0.02,
+            0.95,
+            f"Points: {len(original)}",
+            transform=axes_row[0].transAxes,
+            verticalalignment="top",
+            bbox={"boxstyle": "round", "facecolor": "lightblue", "alpha": 0.8},
+        )
+
+        # Column 2: After outlier removal
+        axes_row[1].plot(valid_indices, valid_data, "m.-", alpha=0.7, markersize=2)
+        if is_first_row:
+            axes_row[1].set_title("After Outlier Removal")
+        axes_row[1].grid(visible=True, alpha=0.3)
+        removed_outliers = len(original) - len(valid_data)
+        axes_row[1].text(
+            0.02,
+            0.95,
+            f"Points: {len(valid_data)}\nRemoved: {removed_outliers}",
+            transform=axes_row[1].transAxes,
+            verticalalignment="top",
+            bbox={"boxstyle": "round", "facecolor": "plum", "alpha": 0.8},
+        )
+
+        # Column 3: After monotonic removal
+        axes_row[2].plot(mono_indices, mono_data, "g.-", alpha=0.7, markersize=2)
+        if is_first_row:
+            axes_row[2].set_title("After Monotonic Removal")
+        axes_row[2].grid(visible=True, alpha=0.3)
+        removed_monotonic = len(valid_data) - len(mono_data)
+        axes_row[2].text(
+            0.02,
+            0.95,
+            f"Points: {len(mono_data)}\nRemoved: {removed_monotonic}",
+            transform=axes_row[2].transAxes,
+            verticalalignment="top",
+            bbox={"boxstyle": "round", "facecolor": "lightgreen", "alpha": 0.8},
+        )
+
+        # Column 4: After intersection
+        axes_row[3].plot(final_indices, final_data, "r.-", alpha=0.7, markersize=2)
+        if is_first_row:
+            axes_row[3].set_title("After Intersection")
+        axes_row[3].grid(visible=True, alpha=0.3)
+        removed_intersection = len(mono_data) - len(final_data)
+        total_removed = len(original) - len(final_data)
+        axes_row[3].text(
+            0.02,
+            0.95,
+            f"Points: {len(final_data)}\nRemoved: {removed_intersection}\n"
+            f"Total removed: {total_removed} ({100 * total_removed / len(original):.1f}%)",
+            transform=axes_row[3].transAxes,
+            verticalalignment="top",
+            bbox={"boxstyle": "round", "facecolor": "lightcoral", "alpha": 0.8},
+        )
+
     def _plot_time_filtering(self, plot_data: dict):
         """Plot before and after time coordinate filtering."""
         if not MATPLOTLIB_AVAILABLE:
@@ -1090,6 +1345,12 @@ def process_command_line(self):
             + " --log_file brizo/missionlogs/2025/20250909_20250915/20250914T080941/"
             + "202509140809_202509150109.nc4 --plot_time /latitude_time\n"
         )
+        examples += (
+            "    "
+            + sys.argv[0]
+            + " --log_file brizo/missionlogs/2025/20250909_20250915/20250914T080941/"
+            + "202509140809_202509150109.nc4 --plot_universals\n"
+        )
 
         # Use shared parser with nc42netcdfs-specific additions
         parser = get_standard_lrauv_parser(
@@ -1138,12 +1399,22 @@ def process_command_line(self):
                 "Format for <VARIABLE_NAME> is /Group/variable_name."
             ),
         )
+        parser.add_argument(
+            "--plot_universals",
+            action="store_true",
+            help=(
+                "Plot time filtering for longitude, latitude, and depth coordinates "
+                "in the root (/) group. Shows original, after outlier removal, after "
+                "intersection, and after non-monotonic removal in 4-column layout."
+            ),
+        )
 
         self.args = parser.parse_args()
 
         # Set instance attributes from parsed arguments
         self.log_file = self.args.log_file
         self.plot_time = self.args.plot_time
+        self.plot_universals = self.args.plot_universals
         self.filter_monotonic_time = self.args.filter_monotonic_time
         self.verbose = self.args.verbose
         self.commandline = " ".join(sys.argv)

From 9df9466275944ddfd61f49764c677c989ceef679 Mon Sep 17 00:00:00 2001
From: Mike McCann <MBARIMike@gmail.com>
Date: Sun, 7 Dec 2025 10:20:47 -0800
Subject: [PATCH 3/8] Shorten method names & improve formatting of log
 messages.

---
 .vscode/launch.json |  7 ++++---
 src/data/combine.py | 30 +++++++++++++++++-------------
 2 files changed, 21 insertions(+), 16 deletions(-)

diff --git a/.vscode/launch.json b/.vscode/launch.json
index 0400b66d..c5cab5ae 100644
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -58,7 +58,8 @@
             //"args": ["-v", "1", "--log_file", "ahi/missionlogs/2025/20250908_20250912/20250911T201546/202509112015_202509112115.nc4", "--known_hash", "d1235ead55023bea05e9841465d54a45dfab007a283320322e28b84438fb8a85"]
             // brizo 20250914T080941 has bad latitude and longitude values and lots of bad Universal latitude_time and longitude_time values
             //"args": ["-v", "2", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109.nc4"]
-            "args": ["-v", "2", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109.nc4", "--plot_time", "/longitude_time"]
+            //"args": ["-v", "2", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109.nc4", "--plot_time", "/longitude_time"]
+            "args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109.nc4", "--plot_universals"]
             //"args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109.nc4", "--plot_time", "/latitude_time"]
             // brizo 20250916T230652 has several ESP Samples from stoqs_lrauv_sep2025
             //"args": ["-v", "2", "--log_file", "brizo/missionlogs/2025/20250916_20250922/20250916T230652/202509162306_202509180305.nc4", "--plot_time", "/longitude_time"]
@@ -122,11 +123,11 @@
             "program": "${workspaceFolder}/src/data/combine.py",
             "console": "integratedTerminal",
             "justMyCode": false,
-            //"args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109.nc4"]
+            "args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109.nc4", "--plot"]
             //"args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250916_20250922/20250916T230652/202509162306_202509180305.nc4", "--plot"]
             //"args": ["-v", "1", "--log_file", "tethys/missionlogs/2012/20120908_20120920/20120909T010636/201209090106_201209091521.nc4", "--plot"]
             // Conflicting sizes for nudged_time and data - fixed by filtering GPS fixes to be monotonically increasing
-            "args": ["-v", "1", "--log_file", "tethys/missionlogs/2012/20120908_20120920/20120917T025522/201209170255_201209171110.nc4", "--plot"]
+            //"args": ["-v", "1", "--log_file", "tethys/missionlogs/2012/20120908_20120920/20120917T025522/201209170255_201209171110.nc4", "--plot"]
 
         },
         {
diff --git a/src/data/combine.py b/src/data/combine.py
index 7d745830..2bc4aa99 100755
--- a/src/data/combine.py
+++ b/src/data/combine.py
@@ -381,7 +381,7 @@ def _biolume_process(self, sensor):
                     set_to_nan=True,
                 )
 
-    def _consolidate_group_time_coords(self, ds: xr.Dataset, group_name: str) -> dict:
+    def _cons_group_time_coords(self, ds: xr.Dataset, group_name: str) -> dict:  # noqa: C901
         """Analyze and consolidate time coordinates for a group.
 
         Returns:
@@ -391,8 +391,11 @@ def _consolidate_group_time_coords(self, ds: xr.Dataset, group_name: str) -> dic
                 - time_coord_mapping: dict mapping original dims to consolidated dims
                 - variable_time_coord_mapping: dict mapping variables to their time coords
         """
-        # Find all time variables in this group
-        time_vars = {var: ds[var] for var in ds.variables if var.lower().endswith("time")}
+        if group_name.lower() == "universals":
+            # Do not include the "time" record variable from universals group
+            time_vars = {var: ds[var] for var in ds.variables if var.lower().endswith("_time")}
+        else:
+            time_vars = {var: ds[var] for var in ds.variables if var.lower().endswith("time")}
 
         if not time_vars:
             return {
@@ -407,7 +410,7 @@ def _consolidate_group_time_coords(self, ds: xr.Dataset, group_name: str) -> dic
             time_name = list(time_vars.keys())[0]
             consolidated_name = f"{group_name}_time"
             self.logger.info(
-                "Group %s: Single time coordinate '%s' - using as '%s'",
+                "Group %s: Single time coordinate '%s' -> '%s'",
                 group_name,
                 time_name,
                 consolidated_name,
@@ -475,7 +478,7 @@ def _consolidate_group_time_coords(self, ds: xr.Dataset, group_name: str) -> dic
             time_coord_mapping = dict.fromkeys(time_vars, consolidated_name)
 
             self.logger.info(
-                "%-65s %s",
+                "%-77s %s",
                 f"Consoliding {len(time_vars)} coordinates to",
                 consolidated_name,
             )
@@ -512,15 +515,15 @@ def _consolidate_group_time_coords(self, ds: xr.Dataset, group_name: str) -> dic
     def _add_time_coordinates_to_combined(self, time_info: dict, ds: xr.Dataset) -> None:
         """Add time coordinates to the combined dataset."""
         if time_info["consolidated_time_name"]:
-            self._add_consolidated_time_coordinate(time_info)
+            self._add_cons_time_coord(time_info)
         else:
-            self._add_separate_time_coordinates(time_info, ds)
+            self._add_sep_time_coord(time_info, ds)
 
-    def _add_consolidated_time_coordinate(self, time_info: dict) -> None:
+    def _add_cons_time_coord(self, time_info: dict) -> None:
         """Add a consolidated time coordinate to the combined dataset."""
         time_name = time_info["consolidated_time_name"]
         self.logger.info(
-            "Adding consolidated time coordinate %-45s %s",
+            "Adding consolidated time coordinate %-44s %s",
             f"{time_name} as",
             time_name,
         )
@@ -531,7 +534,7 @@ def _add_consolidated_time_coordinate(self, time_info: dict) -> None:
         )
         self.combined_nc[time_name].attrs = time_info["consolidated_time_data"].attrs.copy()
 
-    def _add_separate_time_coordinates(self, time_info: dict, ds: xr.Dataset) -> None:
+    def _add_sep_time_coord(self, time_info: dict, ds: xr.Dataset) -> None:
         """Add separate time coordinates to the combined dataset."""
         for orig_time_var, new_time_var in time_info["time_coord_mapping"].items():
             self.logger.info(
@@ -603,7 +606,7 @@ def _add_time_metadata_to_variable(self, var_name: str, dim_name: str) -> None:
         )
         self.combined_nc[var_name].coords[dim_name].attrs["standard_name"] = "time"
 
-    def _process_group_variables(self, ds: xr.Dataset, group_name: str, time_info: dict) -> None:
+    def _process_group_vars(self, ds: xr.Dataset, group_name: str, time_info: dict) -> None:
         """Process all data variables in a group."""
         for orig_var in ds.variables:
             if orig_var.lower().endswith("time"):
@@ -877,18 +880,19 @@ def combine_groups(self) -> None:
         self.combined_nc = xr.Dataset()
 
         for group_file in group_files:
+            self.logger.info("-" * 110)
             self.logger.info("Group file: %s", group_file.name)
             # Open group file without decoding to have np.allclose work properly
             with xr.open_dataset(group_file, decode_cf=False) as ds:
                 # Group name to prepend variable names is lowercase with underscores removed
                 group_name = group_file.stem.split(f"{GROUP}_")[1].replace("_", "").lower()
-                time_info = self._consolidate_group_time_coords(ds, group_name)
+                time_info = self._cons_group_time_coords(ds, group_name)
 
                 # Add time coordinate(s) to combined dataset
                 self._add_time_coordinates_to_combined(time_info, ds)
 
                 # Process all data variables in the group
-                self._process_group_variables(ds, group_name, time_info)
+                self._process_group_vars(ds, group_name, time_info)
 
                 # Add consolidation comment if applicable
                 self._add_consolidation_comment(time_info)

From 9a470061840207d11e9f137885d03dfa04e051ca Mon Sep 17 00:00:00 2001
From: Mike McCann <MBARIMike@gmail.com>
Date: Sun, 7 Dec 2025 10:45:02 -0800
Subject: [PATCH 4/8] Remove redundant coord interpolation, fix sample_rate and
 comment metadata.

---
 .vscode/launch.json |  4 +--
 src/data/align.py   | 77 +++++++++++++++------------------------------
 2 files changed, 28 insertions(+), 53 deletions(-)

diff --git a/.vscode/launch.json b/.vscode/launch.json
index c5cab5ae..fcb5c6ed 100644
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -150,8 +150,8 @@
             //"args": ["-v", "1", "--auv_name", "dorado", "--mission", "2008.289.03"],
             //"args": ["-v", "1", "--auv_name", "dorado", "--mission", "2023.192.01"],
             //"args": ["-v", "1", "--auv_name", "dorado", "--mission", "2024.317.01"],
-            //"args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250916_20250922/20250916T230652/202509162306_202509180305.nc4"]
-            "args": ["-v", "1", "--auv_name", "dorado", "--mission", "2025.316.02"],
+            "args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250916_20250922/20250916T230652/202509162306_202509180305.nc4"]
+            //"args": ["-v", "1", "--auv_name", "dorado", "--mission", "2025.316.02"],
         },
         {
             "name": "3.1 - align.py for LRAUV --log_file",
diff --git a/src/data/align.py b/src/data/align.py
index 082bafbe..b2d41dbf 100755
--- a/src/data/align.py
+++ b/src/data/align.py
@@ -564,53 +564,10 @@ def process_combined(self) -> Path:  # noqa: C901, PLR0912, PLR0915
             var_time = self.combined_nc[variable].get_index(timevar).view(np.int64).tolist()
 
             # Calculate sampling rate
-            sample_rate = np.round(
-                1.0 / (np.mean(np.diff(self.combined_nc[timevar])) / np.timedelta64(1, "s")),
-                decimals=2,
+            sample_rate = 1.0 / (
+                np.mean(np.diff(self.combined_nc[timevar])) / np.timedelta64(1, "s")
             )
 
-            # Create interpolated coordinate variables for this group
-            coord_names = ["depth", "latitude", "longitude"]
-            coord_interps = [depth_interp, lat_interp, lon_interp]
-            coord_sources = [nav_coords["depth"], nav_coords["latitude"], nav_coords["longitude"]]
-
-            for coord_name, coord_interp, coord_source in zip(
-                coord_names, coord_interps, coord_sources, strict=True
-            ):
-                coord_var_name = f"{group_name}_{coord_name}"
-
-                self.aligned_nc[coord_var_name] = xr.DataArray(
-                    coord_interp(var_time).astype(np.float64).tolist(),
-                    dims={timevar},
-                    coords=[self.combined_nc[variable].get_index(timevar)],
-                    name=coord_var_name,
-                )
-
-                # Copy attributes from source coordinate
-                if coord_source in self.combined_nc:
-                    self.aligned_nc[coord_var_name].attrs = self.combined_nc[coord_source].attrs
-
-                # Update attributes
-                self.aligned_nc[coord_var_name].attrs["long_name"] = coord_name.title()
-                self.aligned_nc[coord_var_name].attrs["instrument_sample_rate_hz"] = sample_rate
-
-                if coord_name in ["longitude", "latitude", "depth"]:
-                    self.aligned_nc[coord_var_name].attrs["comment"] = (
-                        self.aligned_nc[coord_var_name].attrs.get("comment", "")
-                        + f". Variable {coord_source} from {src_file} file linearly"
-                        f" interpolated onto {group_name} time values."
-                    )
-
-            # Update spatial temporal bounds for global metadata
-            if pd.to_datetime(self.aligned_nc[timevar][0].values).tz_localize(UTC) < pd.to_datetime(
-                self.min_time
-            ):
-                self.min_time = pd.to_datetime(self.aligned_nc[timevar][0].values).tz_localize(UTC)
-            if pd.to_datetime(self.aligned_nc[timevar][-1].values).tz_localize(
-                UTC
-            ) > pd.to_datetime(self.max_time):
-                self.max_time = pd.to_datetime(self.aligned_nc[timevar][-1].values).tz_localize(UTC)
-
             time_coord = variable_time_coord_mapping.get(variable)
             depth_coord = (
                 time_coord[:-5] + "_depth"
@@ -639,7 +596,10 @@ def process_combined(self) -> Path:  # noqa: C901, PLR0912, PLR0915
             )
             self.aligned_nc[depth_coord].attrs["long_name"] = "Depth"
             self.aligned_nc[depth_coord].attrs["comment"] = "depth from Group_Universals.nc"
-            self.aligned_nc[depth_coord].attrs["instrument_sample_rate_hz"] = sample_rate
+            TINY_SAMPLE_RATE = 10e-2
+            self.aligned_nc[depth_coord].attrs["instrument_sample_rate_hz"] = (
+                f"{sample_rate:.2f}" if sample_rate > TINY_SAMPLE_RATE else f"{sample_rate:.6f}"
+            )
 
             self.aligned_nc[lat_coord] = xr.DataArray(
                 lat_interp(var_time).astype(np.float64).tolist(),
@@ -649,7 +609,7 @@ def process_combined(self) -> Path:  # noqa: C901, PLR0912, PLR0915
             )
             self.aligned_nc[lat_coord].attrs = self.combined_nc["nudged_latitude"].attrs
             self.aligned_nc[lat_coord].attrs["comment"] += (
-                f". Variable nudged_latitude from {src_file} file linearly"
+                f". Variable nudged_latitude linearly"
                 f" interpolated onto {variable.split('_')[0]} time values."
             )
             self.aligned_nc[lat_coord].attrs["long_name"] = "Latitude"
@@ -663,13 +623,22 @@ def process_combined(self) -> Path:  # noqa: C901, PLR0912, PLR0915
             )
             self.aligned_nc[lon_coord].attrs = self.combined_nc["nudged_longitude"].attrs
             self.aligned_nc[lon_coord].attrs["comment"] += (
-                f". Variable nudged_longitude from {src_file} file linearly"
+                f". Variable nudged_longitude linearly"
                 f" interpolated onto {variable.split('_')[0]} time values."
             )
             self.aligned_nc[lon_coord].attrs["long_name"] = "Longitude"
             self.aligned_nc[lon_coord].attrs["instrument_sample_rate_hz"] = sample_rate
 
-            # Update bounds using the interpolated coordinates
+            # Update spatial temporal bounds for global metadata
+            if pd.to_datetime(self.aligned_nc[timevar][0].values).tz_localize(UTC) < pd.to_datetime(
+                self.min_time
+            ):
+                self.min_time = pd.to_datetime(self.aligned_nc[timevar][0].values).tz_localize(UTC)
+            if pd.to_datetime(self.aligned_nc[timevar][-1].values).tz_localize(
+                UTC
+            ) > pd.to_datetime(self.max_time):
+                self.max_time = pd.to_datetime(self.aligned_nc[timevar][-1].values).tz_localize(UTC)
+
             if self.aligned_nc[depth_coord].min() < self.min_depth:
                 self.min_depth = self.aligned_nc[depth_coord].min().to_numpy()
             if self.aligned_nc[depth_coord].max() > self.max_depth:
@@ -703,8 +672,14 @@ def process_combined(self) -> Path:  # noqa: C901, PLR0912, PLR0915
             else:
                 self.logger.info("Skipping setting coordinates attribute for %s", variable)
 
-            self.logger.info("%s: instrument_sample_rate_hz = %.2f", variable, sample_rate)
-            self.aligned_nc[variable].attrs["instrument_sample_rate_hz"] = sample_rate
+            self.logger.info(
+                "%s: instrument_sample_rate_hz = %s",
+                variable,
+                f"{sample_rate:.2f}" if sample_rate > TINY_SAMPLE_RATE else f"{sample_rate:.6f}",
+            )
+            self.aligned_nc[variable].attrs["instrument_sample_rate_hz"] = (
+                f"{sample_rate:.2f}" if sample_rate > TINY_SAMPLE_RATE else f"{sample_rate:.6f}"
+            )
 
         return netcdfs_dir
 

From 98b4668e66e228fcfc1358a354884f104a3b9cf7 Mon Sep 17 00:00:00 2001
From: Mike McCann <MBARIMike@gmail.com>
Date: Sun, 7 Dec 2025 10:55:56 -0800
Subject: [PATCH 5/8] Add _get_pitch_corrected_instrument() for more
 robustness, fixup coordinate metadata.

---
 src/data/resample.py | 116 ++++++++++++++++++++++++++++++-------------
 1 file changed, 81 insertions(+), 35 deletions(-)

diff --git a/src/data/resample.py b/src/data/resample.py
index 87a39c3e..fb8bd5c1 100755
--- a/src/data/resample.py
+++ b/src/data/resample.py
@@ -365,6 +365,36 @@ def instruments_variables(self, nc_file: str) -> dict:
                 instr_vars[instr].append(variable)
         return instr_vars
 
+    def _get_pitch_corrected_instrument(self) -> str:
+        # Determine which instrument provides the pitch corrected depth
+        # For dorado/i2map missions, this is usually 'ctd1' or 'seabird25p'
+        # For LRAUV missions, this is usually 'ctdseabird' or 'ctdneilbrown'
+        if self.log_file:
+            pitch_corrected_instr = "ctdseabird"
+            if f"{pitch_corrected_instr}_depth" in self.ds:
+                return pitch_corrected_instr
+            pitch_corrected_instr += "_sea_water_temperature"
+            if f"{pitch_corrected_instr}_depth" in self.ds:
+                return pitch_corrected_instr
+            pitch_corrected_instr = "ctdneilbrown"
+            if f"{pitch_corrected_instr}_depth" in self.ds:
+                return pitch_corrected_instr
+        else:
+            pitch_corrected_instr = "ctd1"
+            if f"{pitch_corrected_instr}_depth" in self.ds:
+                return pitch_corrected_instr
+            pitch_corrected_instr = "seabird25p"
+            if f"{pitch_corrected_instr}_depth" in self.ds:
+                return pitch_corrected_instr
+
+        # If neither instrument found, log error and raise InvalidAlignFile exception.
+        self.logger.warning("No pitch corrected depth coordinate found")
+        self.logger.info(
+            "Cannot continue without a pitch corrected depth coordinate",
+        )
+        msg = f"A pitch corrected depth coordinate was not found in {self.ds.encoding['source']}"
+        raise InvalidAlignFile(msg) from None
+
     def resample_coordinates(self, instr: str, mf_width: int, freq: str) -> None:
         self.logger.info(
             "Resampling coordinates depth, latitude and longitude with"
@@ -372,23 +402,32 @@ def resample_coordinates(self, instr: str, mf_width: int, freq: str) -> None:
             freq,
             mf_width,
         )
-        # Original
-        try:
-            self.df_o[f"{instr}_depth"] = self.ds[f"{instr}_depth"].to_pandas()
-        except KeyError:
-            self.logger.warning(
-                "Variable %s_depth not found in %s align.nc file",
-                instr,
-                self.mission,
-            )
+        # Find depth coordinate for the instrument - lrauv ctd may have unique coordinates
+        # per variable. Check for shared depth coordinate first, dorado/i2map style, then
+        # check for instrument specific depth coordinate. If neither found, log error and
+        # raise InvalidAlignFile exception.
+        if f"{instr}_depth" in self.ds:
+            depth_coord = f"{instr}_depth"
+        elif f"{instr}_sea_water_temperature_depth" in self.ds:
+            depth_coord = f"{instr}_sea_water_temperature_depth"
+        elif "ctdneilbrown_depth" in self.ds:
+            depth_coord = "ctdneilbrown_depth"
+        else:
+            self.logger.warning("No depth coordinate found for instrument %s", instr)
             self.logger.info(
                 "Cannot continue without a pitch corrected depth coordinate",
             )
             if self.log_file:
-                msg = f"A CTD depth was not found in {self.ds.encoding['source']}"
+                msg = (
+                    f"A depth coordinate was not found for instrument {instr} in "
+                    f"{self.ds.encoding['source']}"
+                )
             else:
                 msg = f"{instr}_depth not found in {self.auv_name}_{self.mission}_align.nc4"
             raise InvalidAlignFile(msg) from None
+
+        # Original
+        self.df_o[f"{instr}_depth"] = self.ds[depth_coord].to_pandas()
         try:
             self.df_o[f"{instr}_latitude"] = self.ds[f"{instr}_latitude"].to_pandas()
             self.df_o[f"{instr}_longitude"] = self.ds[f"{instr}_longitude"].to_pandas()
@@ -399,6 +438,7 @@ def resample_coordinates(self, instr: str, mf_width: int, freq: str) -> None:
             )
             self.logger.warning(msg)
             raise InvalidAlignFile(msg) from None
+
         # Median Filtered - back & forward filling nan values at ends
         self.df_o[f"{instr}_depth_mf"] = (
             self.ds[f"{instr}_depth"]
@@ -450,36 +490,46 @@ def resample_coordinates(self, instr: str, mf_width: int, freq: str) -> None:
         )
         return aggregator
 
-    def save_coordinates(
+    def save_coordinates(  # noqa: PLR0913
         self,
         instr: str,
+        pitch_corrected_instr: str,
         mf_width: int,
         freq: str,
         aggregator: str,
     ) -> None:
-        in_fn = self.ds.encoding["source"].split("/")[-1]
         self.df_r["depth"].index.rename("time", inplace=True)  # noqa: PD002
         self.resampled_nc["depth"] = self.df_r["depth"].to_xarray()
-        self.df_r["latitude"].index.rename("time", inplace=True)  # noqa: PD002
-        self.resampled_nc["latitude"] = self.df_r["latitude"].to_xarray()
-        self.df_r["longitude"].index.rename("time", inplace=True)  # noqa: PD002
-        self.resampled_nc["longitude"] = self.df_r["longitude"].to_xarray()
-        self.resampled_nc["depth"].attrs = self.ds[f"{instr}_depth"].attrs
+        self.resampled_nc["depth"].attrs = {}
+        self.resampled_nc["depth"].attrs["units"] = "meters"
+        self.resampled_nc["depth"].attrs["long_name"] = "Depth"
         self.resampled_nc["depth"].attrs["standard_name"] = "depth"
-        self.resampled_nc["depth"].attrs["comment"] += (
-            f". {self.ds[f'{instr}_depth'].attrs['comment']}"
+        self.resampled_nc["depth"].attrs["comment"] = (
+            f"{self.ds[f'{pitch_corrected_instr}_depth'].attrs['comment']}"
             f" mean sampled at {self.freq} intervals following"
             f" {self.mf_width} point median filter."
         )
-        self.resampled_nc["latitude"].attrs = self.ds[f"{instr}_latitude"].attrs
-        self.resampled_nc["latitude"].attrs["comment"] += (
-            f" Variable {instr}_latitude from {in_fn}"
+
+        self.df_r["latitude"].index.rename("time", inplace=True)  # noqa: PD002
+        self.resampled_nc["latitude"] = self.df_r["latitude"].to_xarray()
+        self.resampled_nc["latitude"].attrs = {}
+        self.resampled_nc["latitude"].attrs["units"] = "degrees_north"
+        self.resampled_nc["latitude"].attrs["long_name"] = "Latitude"
+        self.resampled_nc["latitude"].attrs["standard_name"] = "latitude"
+        self.resampled_nc["latitude"].attrs["comment"] = (
+            f"{self.ds[f'{pitch_corrected_instr}_latitude'].attrs['comment']}"
             f" median filtered with {mf_width} samples"
             f" and resampled with {aggregator} to {freq} intervals."
         )
-        self.resampled_nc["longitude"].attrs = self.ds[f"{instr}_longitude"].attrs
-        self.resampled_nc["longitude"].attrs["comment"] += (
-            f" Variable {instr}_longitude from {in_fn}"
+
+        self.df_r["longitude"].index.rename("time", inplace=True)  # noqa: PD002
+        self.resampled_nc["longitude"] = self.df_r["longitude"].to_xarray()
+        self.resampled_nc["longitude"].attrs = {}
+        self.resampled_nc["longitude"].attrs["units"] = "degrees_east"
+        self.resampled_nc["longitude"].attrs["long_name"] = "Longitude"
+        self.resampled_nc["longitude"].attrs["standard_name"] = "longitude"
+        self.resampled_nc["longitude"].attrs["comment"] = (
+            f"{self.ds[f'{pitch_corrected_instr}_longitude'].attrs['comment']}"
             f" median filtered with {mf_width} samples"
             f" and resampled with {aggregator} to {freq} intervals."
         )
@@ -1755,21 +1805,17 @@ def resample_mission(  # noqa: C901, PLR0912, PLR0915, PLR0913
                 # use 'ctddseabird', otherwise start with 'ctd1' and fall back to
                 # 'seabird25p' if needed for i2map missions. Early LRAUV missions
                 # had only CTD_NeilBrown instruments, later ones had CTD_Seabird.
-                pitch_corrected_instr = "ctdseabird" if self.log_file else "ctd1"
-                if f"{pitch_corrected_instr}_depth" not in self.ds:
-                    pitch_corrected_instr = "ctdneilbrown"
-                if f"{pitch_corrected_instr}_depth" not in self.ds:
-                    pitch_corrected_instr = "seabird25p"
-                    if pitch_corrected_instr in instrs_to_pad:
-                        # Use navigation if seabird25p failed to record
-                        # data all the way to the end of the mission
-                        pitch_corrected_instr = "navigation"
+                pitch_corrected_instr = self._get_pitch_corrected_instrument()
+                if pitch_corrected_instr in instrs_to_pad:
+                    # Use navigation if seabird25p failed to record
+                    # data all the way to the end of the mission
+                    pitch_corrected_instr = "navigation"
                 aggregator = self.resample_coordinates(
                     pitch_corrected_instr,
                     mf_width,
                     freq,
                 )
-                self.save_coordinates(instr, mf_width, freq, aggregator)
+                self.save_coordinates(instr, pitch_corrected_instr, mf_width, freq, aggregator)
                 if self.plot:
                     self.plot_coordinates(instr, freq, plot_seconds)
                 self.add_profile(depth_threshold=depth_threshold)

From 8e1fc9b78e451b797c65dc713a9778050f014c21 Mon Sep 17 00:00:00 2001
From: Mike McCann <MBARIMike@gmail.com>
Date: Sun, 7 Dec 2025 13:01:59 -0800
Subject: [PATCH 6/8] Add _remove_time_spikes() and show its results in the
 plots.

---
 .vscode/launch.json     |   6 +-
 src/data/nc42netcdfs.py | 276 +++++++++++++++++++++++++++++++++-------
 2 files changed, 233 insertions(+), 49 deletions(-)

diff --git a/.vscode/launch.json b/.vscode/launch.json
index fcb5c6ed..0a71902c 100644
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -59,14 +59,16 @@
             // brizo 20250914T080941 has bad latitude and longitude values and lots of bad Universal latitude_time and longitude_time values
             //"args": ["-v", "2", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109.nc4"]
             //"args": ["-v", "2", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109.nc4", "--plot_time", "/longitude_time"]
-            "args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109.nc4", "--plot_universals"]
-            //"args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109.nc4", "--plot_time", "/latitude_time"]
+            //"args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109.nc4", "--plot_universals"]
+            "args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109.nc4", "--plot_time", "/latitude_time"]
             // brizo 20250916T230652 has several ESP Samples from stoqs_lrauv_sep2025
             //"args": ["-v", "2", "--log_file", "brizo/missionlogs/2025/20250916_20250922/20250916T230652/202509162306_202509180305.nc4", "--plot_time", "/longitude_time"]
             //"args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109_cleaned_by_quinn.nc4", "--plot_time", "/longitude_time"]
             //"args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109_cleaned_by_quinn_latlon.nc4", "--plot_time", "/longitude_time"]
             // Conflicting sizes for nudged_time and data - fixed by filtering GPS fixes to be monotonically increasing
             //"args": ["-v", "1", "--log_file", "tethys/missionlogs/2012/20120908_20120920/20120917T025522/201209170255_201209171110.nc4", "--plot_time", "/longitude_time"]
+            // Single spike in longitude_time
+            //"args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109.nc4", "--plot_time", "/NAL9602/longitude_fix_time"]
         },
         {
             "name": "2.0 - calibrate.py",
diff --git a/src/data/nc42netcdfs.py b/src/data/nc42netcdfs.py
index f4ec4485..e463f342 100755
--- a/src/data/nc42netcdfs.py
+++ b/src/data/nc42netcdfs.py
@@ -512,7 +512,25 @@ def _process_single_time_coordinate(  # noqa: PLR0913
         plot_group_name: str | None,
         plot_time_coord_name: str | None,
     ) -> dict:
-        """Process filtering for a single time coordinate."""
+        """Process filtering for a single time coordinate.
+
+        Applies a pipeline of filtering steps to clean time coordinate data:
+        1. Range-based outlier removal (using filename time bounds ±10 min)
+        2. Single-point spike detection (removes anomalous forward-then-back jumps)
+        3. Monotonic filtering (ensures strictly increasing time values)
+        4. Universal coordinates intersection (applied separately for lon/lat/depth/time)
+
+        Args:
+            log_file: Name of the log file being processed
+            group_name: NetCDF group name
+            src_group: NetCDF group object containing the time coordinate
+            time_coord_name: Name of the time coordinate variable
+            plot_group_name: Group name for plotting (if requested)
+            plot_time_coord_name: Time coordinate name for plotting (if requested)
+
+        Returns:
+            Dictionary with filtering results including indices and metadata
+        """
         time_var = src_group.variables[time_coord_name]
         original_time_data = time_var[:]
         self.logger.info("Time coordinate %s: %d points", time_coord_name, len(original_time_data))
@@ -532,14 +550,28 @@ def _process_single_time_coordinate(  # noqa: PLR0913
         # First filter out values that fall outside of reasonable bounds
         valid_indices = self._filter_valid_time_indices(original_time_data)
 
+        # Remove single-point time spikes
+        spike_removed_indices = self._remove_time_spikes(original_time_data, valid_indices)
+
         # Get the valid time subset
-        valid_time_data = original_time_data[valid_indices]
+        valid_time_data = original_time_data[spike_removed_indices]
 
         # Apply monotonic filtering
         mono_indices_in_filtered = self._get_monotonic_indices(valid_time_data)
 
         # Convert monotonic indices back to original array indices
-        final_indices = [valid_indices[i] for i in mono_indices_in_filtered]
+        final_indices = [spike_removed_indices[i] for i in mono_indices_in_filtered]
+
+        # Store data for plotting if requested (do this before early return)
+        if plot_data is not None:
+            outlier_removed_data = original_time_data[valid_indices]
+            plot_data["outlier_indices"] = valid_indices
+            plot_data["outlier_data"] = outlier_removed_data
+            plot_data["spike_indices"] = spike_removed_indices
+            plot_data["spike_data"] = valid_time_data
+            plot_data["final_indices"] = mono_indices_in_filtered
+            plot_data["final_data"] = valid_time_data[mono_indices_in_filtered]
+            self._plot_time_filtering(plot_data)
 
         # For root group universals, store data for intersection step
         is_universal_coord = group_name == "/" and time_coord_name in [
@@ -552,10 +584,14 @@ def _process_single_time_coordinate(  # noqa: PLR0913
         if is_universal_coord:
             # Store intermediate data for intersection step
             if self.plot_universals:
+                outlier_removed_data = original_time_data[valid_indices]
+                spike_removed_data = original_time_data[spike_removed_indices]
                 self.universals_plot_data[time_coord_name] = {
                     "original": original_time_data.copy(),
-                    "valid_indices": valid_indices.copy(),
-                    "valid_data": valid_time_data.copy(),
+                    "outlier_indices": valid_indices.copy(),
+                    "outlier_data": outlier_removed_data.copy(),
+                    "spike_indices": spike_removed_indices.copy(),
+                    "spike_data": spike_removed_data.copy(),
                     "mono_indices": final_indices.copy(),
                     "mono_data": valid_time_data[mono_indices_in_filtered].copy(),
                     "log_file": log_file,
@@ -569,14 +605,6 @@ def _process_single_time_coordinate(  # noqa: PLR0913
                 "original_time_data": original_time_data,  # Store for reprocessing
             }
 
-        # Store data for plotting if requested
-        if plot_data is not None:
-            plot_data["valid_indices"] = valid_indices
-            plot_data["valid_data"] = valid_time_data
-            plot_data["final_indices"] = mono_indices_in_filtered
-            plot_data["final_data"] = valid_time_data[mono_indices_in_filtered]
-            self._plot_time_filtering(plot_data)
-
         return self._create_time_filter_result(
             final_indices, len(original_time_data), time_coord_name
         )
@@ -712,6 +740,88 @@ def _filter_valid_time_indices(self, time_data) -> list[int]:
 
         return valid_indices
 
+    def _remove_time_spikes(self, time_data, indices: list[int]) -> list[int]:
+        """Remove single-point time spikes from a list of indices.
+
+        A spike is detected when a time value jumps significantly forward or backward
+        and then returns close to the expected progression within the next few points.
+
+        Args:
+            time_data: Original time data array
+            indices: List of indices that have passed other filters
+
+        Returns:
+            List of indices with spikes removed
+        """
+        MIN_POINTS_FOR_SPIKE_DETECTION = 3
+        if len(indices) < MIN_POINTS_FOR_SPIKE_DETECTION:
+            return indices
+
+        # Get time values at the filtered indices
+        time_values = time_data[indices]
+
+        # Calculate time differences between consecutive points
+        time_diffs = np.diff(time_values)
+
+        # Calculate median and MAD of time differences for robust statistics
+        median_diff = np.median(time_diffs)
+        abs_deviations = np.abs(time_diffs - median_diff)
+        mad = np.median(abs_deviations)
+
+        if mad == 0:
+            # All differences are identical, no spikes to remove
+            return indices
+
+        # Use modified z-score to identify anomalous jumps
+        # A spike threshold of 10 is quite conservative
+        SPIKE_THRESHOLD = 10.0
+        modified_z_scores = 0.6745 * abs_deviations / mad
+
+        # Find potential spike points where jump is anomalous
+        spike_candidates = np.where(modified_z_scores > SPIKE_THRESHOLD)[0]
+
+        if len(spike_candidates) == 0:
+            return indices
+
+        # Check each candidate to see if it's a single-point spike
+        indices_to_remove = set()
+
+        for spike_idx in spike_candidates:
+            # spike_idx is the index in time_diffs array
+            # The spike affects index spike_idx+1 in the indices array
+            point_idx = spike_idx + 1
+
+            # Check if this is a single-point spike by looking at the next difference
+            if point_idx < len(time_diffs):
+                # If the next jump is in the opposite direction and also large,
+                # this is likely a spike
+                current_jump = time_diffs[spike_idx]
+                next_jump = time_diffs[point_idx]
+
+                # Spike pattern: large jump in one direction, then large jump back
+                if current_jump * next_jump < 0:  # Opposite signs
+                    next_z_score = 0.6745 * abs(next_jump - median_diff) / mad
+                    if next_z_score > SPIKE_THRESHOLD / 2:  # Less strict for return jump
+                        indices_to_remove.add(point_idx)
+                        self.logger.debug(
+                            "Detected time spike at index %d: %.2f -> %.2f -> %.2f",
+                            indices[point_idx],
+                            time_values[point_idx - 1] if point_idx > 0 else 0,
+                            time_values[point_idx],
+                            time_values[point_idx + 1] if point_idx < len(time_values) - 1 else 0,
+                        )
+
+        if indices_to_remove:
+            filtered_indices = [idx for i, idx in enumerate(indices) if i not in indices_to_remove]
+            self.logger.info(
+                "Removed %d time spike(s) from %d points",
+                len(indices_to_remove),
+                len(indices),
+            )
+            return filtered_indices
+
+        return indices
+
     def _get_monotonic_indices(self, time_data) -> list[int]:
         """Get indices for monotonic time values from time data array."""
         mono_indices = []
@@ -758,7 +868,7 @@ def _plot_universals_filtering(self):
             return
 
         n_coords = len(coords)
-        fig, axes = plt.subplots(n_coords, 4, figsize=(18, 2.5 * n_coords), sharex="col")
+        fig, axes = plt.subplots(n_coords, 5, figsize=(22, 2.5 * n_coords), sharex="col")
 
         # Handle case of single coordinate
         if n_coords == 1:
@@ -774,8 +884,10 @@ def _plot_universals_filtering(self):
         for i, (coord, time_coord) in enumerate(zip(coords, coord_names, strict=True)):
             data = self.universals_plot_data[time_coord]
             original = data["original"]
-            valid_indices = data["valid_indices"]
-            valid_data = data["valid_data"]
+            outlier_indices = data["outlier_indices"]
+            outlier_data = data["outlier_data"]
+            spike_indices = data["spike_indices"]
+            spike_data = data["spike_data"]
             mono_indices = data["mono_indices"]
             mono_data = data["mono_data"]
             final_indices = data["final_indices"]
@@ -785,8 +897,10 @@ def _plot_universals_filtering(self):
                 axes[i],
                 coord,
                 original,
-                valid_indices,
-                valid_data,
+                outlier_indices,
+                outlier_data,
+                spike_indices,
+                spike_data,
                 mono_indices,
                 mono_data,
                 final_indices,
@@ -795,7 +909,7 @@ def _plot_universals_filtering(self):
             )
 
         # Set x-label only on bottom row
-        for j in range(4):
+        for j in range(5):
             axes[-1, j].set_xlabel("Index")
 
         plt.tight_layout()
@@ -808,8 +922,10 @@ def _plot_universals_row(  # noqa: PLR0913
         axes_row,
         coord: str,
         original,
-        valid_indices,
-        valid_data,
+        outlier_indices,
+        outlier_data,
+        spike_indices,
+        spike_data,
         mono_indices,
         mono_data,
         final_indices,
@@ -833,48 +949,82 @@ def _plot_universals_row(  # noqa: PLR0913
         )
 
         # Column 2: After outlier removal
-        axes_row[1].plot(valid_indices, valid_data, "m.-", alpha=0.7, markersize=2)
+        axes_row[1].plot(
+            outlier_indices, outlier_data, "m.-", alpha=0.7, markersize=2, label="Data"
+        )
+
+        # Highlight spikes that will be removed
+        spike_indices_set = set(spike_indices)
+        removed_spike_indices = [idx for idx in outlier_indices if idx not in spike_indices_set]
+        if removed_spike_indices:
+            removed_spike_values = [original[idx] for idx in removed_spike_indices]
+            axes_row[1].plot(
+                removed_spike_indices,
+                removed_spike_values,
+                "rs",
+                markersize=8,
+                markerfacecolor="none",
+                markeredgewidth=1.5,
+                label=f"Spikes to remove ({len(removed_spike_indices)})",
+            )
+
         if is_first_row:
             axes_row[1].set_title("After Outlier Removal")
+            axes_row[1].legend(loc="upper right", fontsize=8)
         axes_row[1].grid(visible=True, alpha=0.3)
-        removed_outliers = len(original) - len(valid_data)
+        removed_outliers = len(original) - len(outlier_data)
         axes_row[1].text(
             0.02,
             0.95,
-            f"Points: {len(valid_data)}\nRemoved: {removed_outliers}",
+            f"Points: {len(outlier_data)}\nRemoved: {removed_outliers}",
             transform=axes_row[1].transAxes,
             verticalalignment="top",
             bbox={"boxstyle": "round", "facecolor": "plum", "alpha": 0.8},
         )
 
-        # Column 3: After monotonic removal
-        axes_row[2].plot(mono_indices, mono_data, "g.-", alpha=0.7, markersize=2)
+        # Column 3: After spike removal
+        axes_row[2].plot(spike_indices, spike_data, "c.-", alpha=0.7, markersize=2)
         if is_first_row:
-            axes_row[2].set_title("After Monotonic Removal")
+            axes_row[2].set_title("After Spike Removal")
         axes_row[2].grid(visible=True, alpha=0.3)
-        removed_monotonic = len(valid_data) - len(mono_data)
+        removed_spikes = len(outlier_data) - len(spike_data)
         axes_row[2].text(
             0.02,
             0.95,
-            f"Points: {len(mono_data)}\nRemoved: {removed_monotonic}",
+            f"Points: {len(spike_data)}\nRemoved: {removed_spikes}",
             transform=axes_row[2].transAxes,
             verticalalignment="top",
-            bbox={"boxstyle": "round", "facecolor": "lightgreen", "alpha": 0.8},
+            bbox={"boxstyle": "round", "facecolor": "cyan", "alpha": 0.8},
         )
 
-        # Column 4: After intersection
-        axes_row[3].plot(final_indices, final_data, "r.-", alpha=0.7, markersize=2)
+        # Column 4: After monotonic removal
+        axes_row[3].plot(mono_indices, mono_data, "g.-", alpha=0.7, markersize=2)
         if is_first_row:
-            axes_row[3].set_title("After Intersection")
+            axes_row[3].set_title("After Monotonic Removal")
         axes_row[3].grid(visible=True, alpha=0.3)
+        removed_monotonic = len(spike_data) - len(mono_data)
+        axes_row[3].text(
+            0.02,
+            0.95,
+            f"Points: {len(mono_data)}\nRemoved: {removed_monotonic}",
+            transform=axes_row[3].transAxes,
+            verticalalignment="top",
+            bbox={"boxstyle": "round", "facecolor": "lightgreen", "alpha": 0.8},
+        )
+
+        # Column 5: After intersection
+        axes_row[4].plot(final_indices, final_data, "r.-", alpha=0.7, markersize=2)
+        if is_first_row:
+            axes_row[4].set_title("After Intersection")
+        axes_row[4].grid(visible=True, alpha=0.3)
         removed_intersection = len(mono_data) - len(final_data)
         total_removed = len(original) - len(final_data)
-        axes_row[3].text(
+        axes_row[4].text(
             0.02,
             0.95,
             f"Points: {len(final_data)}\nRemoved: {removed_intersection}\n"
             f"Total removed: {total_removed} ({100 * total_removed / len(original):.1f}%)",
-            transform=axes_row[3].transAxes,
+            transform=axes_row[4].transAxes,
             verticalalignment="top",
             bbox={"boxstyle": "round", "facecolor": "lightcoral", "alpha": 0.8},
         )
@@ -889,13 +1039,15 @@ def _plot_time_filtering(self, plot_data: dict):
         import matplotlib.pyplot as plt  # noqa: F401
 
         original = plot_data["original"]
-        valid_indices = plot_data["valid_indices"]
-        valid_data = plot_data["valid_data"]
+        outlier_indices = plot_data["outlier_indices"]
+        outlier_data = plot_data["outlier_data"]
+        spike_indices = plot_data["spike_indices"]
+        spike_data = plot_data["spike_data"]
         final_indices = plot_data["final_indices"]
         final_data = plot_data["final_data"]
 
         # Create figure with subplots
-        fig, (ax1, ax2, ax3) = plt.subplots(3, 1, figsize=(12, 8), sharex=True)
+        fig, (ax1, ax2, ax3, ax4) = plt.subplots(4, 1, figsize=(12, 10), sharex=True)
 
         # Plot 1: Original data
         ax1.plot(original, "b-", label="Original", alpha=0.7)
@@ -907,40 +1059,70 @@ def _plot_time_filtering(self, plot_data: dict):
         ax1.legend()
         ax1.grid(visible=True, alpha=0.3)
 
-        # Plot 2: After valid Values filtering
-        ax2.plot(valid_indices, valid_data, "m.-", label="After Valid Values Filter", alpha=0.7)
+        # Plot 2: After outlier removal
+        ax2.plot(outlier_indices, outlier_data, "m.-", label="After Outlier Removal", alpha=0.7)
+
+        # Highlight spikes that will be removed (points in outlier_indices but not in spike_indices)
+        spike_indices_set = set(spike_indices)
+        removed_spike_indices = [idx for idx in outlier_indices if idx not in spike_indices_set]
+        if removed_spike_indices:
+            removed_spike_values = [original[idx] for idx in removed_spike_indices]
+            ax2.plot(
+                removed_spike_indices,
+                removed_spike_values,
+                "rs",
+                markersize=10,
+                markerfacecolor="none",
+                markeredgewidth=2,
+                label=f"Spikes to remove ({len(removed_spike_indices)})",
+            )
+
         ax2.set_ylabel("Time Value")
         ax2.legend()
         ax2.grid(visible=True, alpha=0.3)
         ax2.text(
             0.02,
             0.60,
-            f"Points removed: {len(original) - len(valid_data)}\n",
+            f"Points removed: {len(original) - len(outlier_data)}\n",
             transform=ax2.transAxes,
             verticalalignment="top",
             bbox={"boxstyle": "round", "facecolor": "wheat"},
         )
 
-        # Plot 3: Final After Monotonic filtered data
-        ax3.plot(final_indices, final_data, "r.-", label="After Monotonic Filter", alpha=0.7)
-        ax3.set_xlabel("Index")
+        # Plot 3: After spike removal
+        ax3.plot(spike_indices, spike_data, "c.-", label="After Spike Removal", alpha=0.7)
         ax3.set_ylabel("Time Value")
         ax3.legend()
         ax3.grid(visible=True, alpha=0.3)
+        ax3.text(
+            0.02,
+            0.60,
+            f"Points removed: {len(outlier_data) - len(spike_data)}\n",
+            transform=ax3.transAxes,
+            verticalalignment="top",
+            bbox={"boxstyle": "round", "facecolor": "lightcyan"},
+        )
+
+        # Plot 4: Final After Monotonic filtered data
+        ax4.plot(final_indices, final_data, "r.-", label="After Monotonic Filter", alpha=0.7)
+        ax4.set_xlabel("Index")
+        ax4.set_ylabel("Time Value")
+        ax4.legend()
+        ax4.grid(visible=True, alpha=0.3)
 
         # Add statistics text
         stats_text = (
-            f"Points removed: {len(valid_data) - len(final_data)}\n"
+            f"Points removed: {len(spike_data) - len(final_data)}\n"
             f"Original points: {len(original)}\n"
             f"After final filter: {len(final_data)}\n"
             f"Total removed: {len(original) - len(final_data)} "
             f"({100 * (len(original) - len(final_data)) / len(original):.1f}%)"
         )
-        ax3.text(
+        ax4.text(
             0.02,
             0.90,
             stats_text,
-            transform=ax3.transAxes,
+            transform=ax4.transAxes,
             verticalalignment="top",
             bbox={"boxstyle": "round", "facecolor": "wheat"},
         )

From 5c80408fb79911a1085d4de73c7e05f7406ee30e Mon Sep 17 00:00:00 2001
From: Mike McCann <MBARIMike@gmail.com>
Date: Sun, 7 Dec 2025 13:23:57 -0800
Subject: [PATCH 7/8] Update test values.

---
 src/data/test_process_dorado.py | 10 +++++-----
 src/data/test_process_i2map.py  |  6 +++---
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/src/data/test_process_dorado.py b/src/data/test_process_dorado.py
index aa8c06b3..a225a1e8 100644
--- a/src/data/test_process_dorado.py
+++ b/src/data/test_process_dorado.py
@@ -31,9 +31,9 @@ def test_process_dorado(complete_dorado_processing):
     # but it will alert us if a code change unexpectedly changes the file size.
     # If code changes are expected to change the file size then we should
     # update the expected size here.
-    EXPECTED_SIZE_GITHUB = 621716
-    EXPECTED_SIZE_ACT = 621714
-    EXPECTED_SIZE_LOCAL = 621764
+    EXPECTED_SIZE_GITHUB = 621410
+    EXPECTED_SIZE_ACT = 621408
+    EXPECTED_SIZE_LOCAL = 621458
     if str(proc.args.base_path).startswith("/home/runner"):
         # The size is different in GitHub Actions, maybe due to different metadata
         assert nc_file.stat().st_size == EXPECTED_SIZE_GITHUB  # noqa: S101
@@ -51,8 +51,8 @@ def test_process_dorado(complete_dorado_processing):
     if check_md5:
         # Check that the MD5 hash has not changed
         EXPECTED_MD5_GITHUB = "bbb9e7e9129c19e39895218d35cc474f"
-        EXPECTED_MD5_ACT = "1fb192c144f6f4f8f06b555a339587ed"
-        EXPECTED_MD5_LOCAL = "aec48d0abb58d9e831e5b22180eebd8f"
+        EXPECTED_MD5_ACT = "316955fd489862ad9ed5b63df8aa7db8"
+        EXPECTED_MD5_LOCAL = "f635cee8760aa0f40bd2070bc0c5fa65"
         if str(proc.args.base_path).startswith("/home/runner"):
             # The MD5 hash is different in GitHub Actions, maybe due to different metadata
             assert hashlib.md5(open(nc_file, "rb").read()).hexdigest() == EXPECTED_MD5_GITHUB  # noqa:  PTH123, S101, S324, SIM115
diff --git a/src/data/test_process_i2map.py b/src/data/test_process_i2map.py
index 121382a1..dc972932 100644
--- a/src/data/test_process_i2map.py
+++ b/src/data/test_process_i2map.py
@@ -30,9 +30,9 @@ def test_process_i2map(complete_i2map_processing):
     # but it will alert us if a code change unexpectedly changes the file size.
     # If code changes are expected to change the file size then we should
     # update the expected size here.
-    EXPECTED_SIZE_GITHUB = 52994
-    EXPECTED_SIZE_ACT = 52964
-    EXPECTED_SIZE_LOCAL = 53094
+    EXPECTED_SIZE_GITHUB = 52758
+    EXPECTED_SIZE_ACT = 52728
+    EXPECTED_SIZE_LOCAL = 52858
     if str(proc.args.base_path).startswith("/home/runner"):
         # The size is different in GitHub Actions, maybe due to different metadata
         assert nc_file.stat().st_size == EXPECTED_SIZE_GITHUB  # noqa: S101

From 9f308077a2be5bd97bea3157696562abedc7c0a9 Mon Sep 17 00:00:00 2001
From: Mike McCann <MBARIMike@gmail.com>
Date: Sun, 7 Dec 2025 13:25:54 -0800
Subject: [PATCH 8/8] Update EXPECTED_MD5_GITHUB.

---
 src/data/test_process_dorado.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/data/test_process_dorado.py b/src/data/test_process_dorado.py
index a225a1e8..88217bbc 100644
--- a/src/data/test_process_dorado.py
+++ b/src/data/test_process_dorado.py
@@ -50,7 +50,7 @@ def test_process_dorado(complete_dorado_processing):
     check_md5 = True
     if check_md5:
         # Check that the MD5 hash has not changed
-        EXPECTED_MD5_GITHUB = "bbb9e7e9129c19e39895218d35cc474f"
+        EXPECTED_MD5_GITHUB = "fec067d16eb5280a8bc2b6ef132821b8"
         EXPECTED_MD5_ACT = "316955fd489862ad9ed5b63df8aa7db8"
         EXPECTED_MD5_LOCAL = "f635cee8760aa0f40bd2070bc0c5fa65"
         if str(proc.args.base_path).startswith("/home/runner"):