From faeea85bef3c36bc4c3f0f520c177ab845cc808c Mon Sep 17 00:00:00 2001
From: Mike McCann <MBARIMike@gmail.com>
Date: Fri, 28 Nov 2025 15:32:33 -0800
Subject: [PATCH 1/6] Add _expand_ubat_to_60hz() to save raw ubat data in the
 combined.nc file.

---
 .vscode/launch.json            |   8 +-
 src/data/align.py              |  26 ++---
 src/data/combine.py            | 111 +++++++++++++++++++++
 src/data/test_process_lrauv.py | 170 +++++++++++++++++++++++++++++++++
 4 files changed, 302 insertions(+), 13 deletions(-)

diff --git a/.vscode/launch.json b/.vscode/launch.json
index 7b702f43..2bd38454 100644
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -353,8 +353,12 @@
             //"args": ["-v", "1", "--auv_name", "brizo", "--start", "20250915T000000", "--end", "20250917T000000", "--noinput", "--num_cores", "1", "--no_cleanup"]
             // No nudged latitude and longitude variables - fixed as of 26 Nov 2025
             //"args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250915T015535/202509150155_202509151602.nc4", "--no_cleanup"]
-            // Plankitvore deployment for CeNCOOS Syncro
-            "args": ["-v", "1", "--auv_name", "ahi", "--start", "20250401T000000", "--end", "20250502T000000", "--noinput", "--num_cores", "1", "--no_cleanup"]
+            // Plankitvore deployment for CeNCOOS Syncro - whole month of April 2025
+            //"args": ["-v", "1", "--auv_name", "ahi", "--start", "20250401T000000", "--end", "20250502T000000", "--noinput", "--num_cores", "1", "--no_cleanup"]
+            // Fails with ValueError: different number of dimensions on data and dims: 2 vs 1 for wetlabsubat_digitized_raw_ad_counts variable
+            "args": ["-v", "1", "--log_file", "pontus/missionlogs/2025/20250604_20250616/20250608T020852/202506080209_202506081934.nc4", "--no_cleanup"]
+            // Full month of June 2025 for Pontus with WetLabsUBAT Group data
+            //"args": ["-v", "1", "--auv_name", "pontus", "--start", "20250601T000000", "--end", "20250702T000000", "--noinput", "--num_cores", "1", "--no_cleanup"]
         },
 
     ]
diff --git a/src/data/align.py b/src/data/align.py
index 38edc25d..d7747e41 100755
--- a/src/data/align.py
+++ b/src/data/align.py
@@ -538,20 +538,24 @@ def process_combined(self) -> Path:  # noqa: C901, PLR0912, PLR0915
                 continue
 
             # Try to find the corresponding time coordinate
-            # Look for pattern: group_name + "_time"
-            possible_time_coords = []
-            for i in range(len(var_parts)):
-                group_candidate = "_".join(var_parts[: i + 1])
-                time_coord_candidate = f"{group_candidate}_time"
-                if time_coord_candidate in self.combined_nc:
-                    possible_time_coords.append((group_candidate, time_coord_candidate))
-
-            if not possible_time_coords:
+            # Check what time coordinate the variable actually uses
+            var_dims = self.combined_nc[variable].dims
+            var_time_coords = [dim for dim in var_dims if "time" in dim.lower()]
+
+            if not var_time_coords:
                 self.logger.warning("No time coordinate found for variable: %s", variable)
                 continue
 
-            # Use the longest matching group name (most specific)
-            group_name, timevar = max(possible_time_coords, key=lambda x: len(x[0]))
+            # Use the time coordinate that the variable actually has
+            timevar = var_time_coords[0]  # Should only be one time dimension
+            # Extract group name from time coordinate
+            if timevar.endswith("_time_60hz"):
+                group_name = timevar[:-10]  # Remove "_time_60hz" (10 chars)
+            elif timevar.endswith("_time"):
+                group_name = timevar[:-5]  # Remove "_time"
+            else:
+                group_name = timevar
+
             self.logger.debug(
                 "Processing %s with group %s and time %s", variable, group_name, timevar
             )
diff --git a/src/data/combine.py b/src/data/combine.py
index 7af50804..1aba0a49 100755
--- a/src/data/combine.py
+++ b/src/data/combine.py
@@ -561,6 +561,26 @@ def _create_data_array_for_variable(
             data_array.attrs = ds[orig_var].attrs.copy()
             data_array.attrs["units"] = "degrees"
             data_array.attrs["coordinates"] = f"{dim_name}"
+        elif len(ds[orig_var].dims) == 2:  # noqa: PLR2004
+            # Handle 2D arrays (time, array_index) - e.g. biolume_raw, digitized_raw_ad_counts_M
+            second_dim_name = ds[orig_var].dims[1]
+            second_dim_size = ds[orig_var].shape[1]
+            self.logger.debug(
+                "Reading 2 dimensional %s data arrays with shape %s",
+                orig_var,
+                ds[orig_var].shape,
+            )
+            data_array = xr.DataArray(
+                ds[orig_var].to_numpy(),
+                dims=[dim_name, second_dim_name],
+                coords={
+                    dim_name: time_coord_data,
+                    second_dim_name: np.arange(second_dim_size),
+                },
+            )
+            data_array.attrs = ds[orig_var].attrs.copy()
+            data_array.attrs["comment"] = f"{orig_var} from group {ds.attrs.get('group_name', '')}"
+            data_array.attrs["coordinates"] = f"{dim_name} {second_dim_name}"
         else:
             data_array = xr.DataArray(
                 ds[orig_var].to_numpy(),
@@ -625,6 +645,94 @@ def _add_consolidation_comment(self, time_info: dict) -> None:
                 f"Consolidated time coordinate from: {mapping_info}"
             )
 
+    def _expand_ubat_to_60hz(self) -> None:
+        """Expand UBAT digitized_raw_ad_counts 2D array into 60hz time series.
+
+        Replaces the 2D array with a 1D 60Hz time series, analogous to how
+        Dorado biolume_raw is stored with a time60hz coordinate.
+        """
+        ubat_var = "wetlabsubat_digitized_raw_ad_counts"
+
+        if ubat_var not in self.combined_nc:
+            self.logger.debug(
+                "No UBAT digitized_raw_ad_counts variable found, skipping 60hz expansion"
+            )
+            return
+
+        self.logger.info("Expanding UBAT %s to 60hz time series", ubat_var)
+
+        # Get the 2D array (time, sample_index)
+        ubat_2d = self.combined_nc[ubat_var]
+
+        if len(ubat_2d.dims) != 2:  # noqa: PLR2004
+            self.logger.warning("UBAT variable is not 2D, skipping 60hz expansion")
+            return
+
+        time_dim = ubat_2d.dims[0]
+        n_samples = ubat_2d.shape[1]
+
+        # Get the time coordinate
+        time_coord = self.combined_nc[time_dim]
+        n_times = len(time_coord)
+
+        # Save original attributes before removing
+        original_attrs = ubat_2d.attrs.copy()
+
+        # Calculate 60hz time offsets (assuming samples span 1 second)
+        # Each sample is 1/60th of a second apart
+        sample_offsets = np.arange(n_samples) / 60.0
+
+        # Create 60hz time series by adding offsets to each 1Hz time
+        time_60hz_list = []
+        for i in range(n_times):
+            base_time = time_coord.to_numpy()[i]
+            # Add offsets to create 60 timestamps per second
+            times_for_this_second = base_time + sample_offsets
+            time_60hz_list.append(times_for_this_second)
+
+        # Flatten the arrays
+        time_60hz = np.concatenate(time_60hz_list)
+        data_60hz = ubat_2d.to_numpy().flatten()
+
+        # Remove the old 2D variable
+        del self.combined_nc[ubat_var]
+
+        # Create new 60hz time coordinate with attributes
+        time_60hz_name = f"{time_dim}_60hz"
+        time_60hz_coord = xr.DataArray(
+            time_60hz,
+            dims=[time_60hz_name],
+            name=time_60hz_name,
+            attrs={
+                "units": "seconds since 1970-01-01T00:00:00Z",
+                "standard_name": "time",
+                "long_name": "Time at 60Hz sampling rate",
+            },
+        )
+
+        # Create replacement 1D variable with 60hz time coordinate
+        self.combined_nc[ubat_var] = xr.DataArray(
+            data_60hz,
+            coords={time_60hz_name: time_60hz_coord},
+            dims=[time_60hz_name],
+            name=ubat_var,
+        )
+
+        # Restore and update attributes
+        self.combined_nc[ubat_var].attrs = original_attrs
+        self.combined_nc[ubat_var].attrs["long_name"] = "UBAT digitized raw AD counts at 60Hz"
+        self.combined_nc[ubat_var].attrs["coordinates"] = time_60hz_name
+        self.combined_nc[ubat_var].attrs["comment"] = (
+            original_attrs.get("comment", "") + " Expanded from 2D to 1D 60Hz time series"
+        )
+
+        self.logger.info(
+            "Replaced 2D %s with 1D 60hz time series: %d samples from %d 1Hz records",
+            ubat_var,
+            len(data_60hz),
+            n_times,
+        )
+
     def _initial_coordinate_qc(self) -> None:
         """Perform initial QC on core coordinate variables for specific log files."""
         if self.log_file in (
@@ -784,6 +892,9 @@ def combine_groups(self) -> None:
                 # Collect variable coordinate mapping by group, which can be flattened
                 self.variable_time_coord_mapping.update(time_info["variable_time_coord_mapping"])
 
+        # Expand UBAT 2D arrays to 60hz time series
+        self._expand_ubat_to_60hz()
+
         # Write intermediate file for cf_xarray decoding
         intermediate_file = self._intermediate_write_netcdf()
         with xr.open_dataset(intermediate_file, decode_cf=True) as ds:
diff --git a/src/data/test_process_lrauv.py b/src/data/test_process_lrauv.py
index bf30d667..1b0c080b 100644
--- a/src/data/test_process_lrauv.py
+++ b/src/data/test_process_lrauv.py
@@ -143,3 +143,173 @@ def test_lrauv_full_pipeline(complete_lrauv_processing):
     # This would test the full pipeline but requires significant mocking
     # of calibration files, configuration, etc.
     pass  # noqa: PIE790
+
+
+def test_lrauv_2d_array_variable_handling(tmp_path):
+    """Test that 2D array variables (time, array_index) are handled correctly in combine.py."""
+    from combine import Combine_NetCDF
+
+    # Create a minimal test that exercises the _create_data_array_for_variable method
+    # with a 2D variable
+
+    # Create time array
+    time_vals = np.arange(
+        np.datetime64("2025-06-08T02:00:00"),
+        np.datetime64("2025-06-08T03:00:00"),
+        np.timedelta64(10, "s"),  # 360 time points
+    )
+
+    # Create a mock dataset with a 2D variable
+    ds = xr.Dataset(
+        {
+            # 2D variable - 60 samples per time point (like biolume_raw)
+            "biolume_array": (["time", "sample"], np.random.uniform(0, 100, (len(time_vals), 60))),
+            # 1D variable for comparison
+            "temperature": (["time"], np.random.uniform(10, 15, len(time_vals))),
+        },
+        coords={"time": time_vals},
+    )
+
+    # Create a Combine_NetCDF instance (minimal setup)
+    combine = Combine_NetCDF(
+        log_file="test/test.nc4",
+        verbose=1,
+    )
+
+    # Mock the time coordinate data
+    time_coord_data = time_vals.astype("datetime64[ns]").astype("int64") / 1e9
+
+    # Test 1D variable (should work)
+    data_array_1d = combine._create_data_array_for_variable(
+        ds, "temperature", "test_time", time_coord_data
+    )
+    assert len(data_array_1d.dims) == 1  # noqa: PLR2004, S101
+    assert data_array_1d.dims[0] == "test_time"  # noqa: S101
+
+    # Test 2D variable (this is what fails without the fix)
+    try:
+        data_array_2d = combine._create_data_array_for_variable(
+            ds, "biolume_array", "test_time", time_coord_data
+        )
+        # After the fix, this should work
+        assert len(data_array_2d.dims) == 2  # noqa: PLR2004, S101
+        assert "test_time" in data_array_2d.dims  # noqa: S101
+        assert data_array_2d.shape[1] == 60  # noqa: PLR2004, S101  # Second dimension should be 60
+    except ValueError as e:
+        if "different number of dimensions" in str(e):
+            pytest.fail(f"2D array handling not implemented: {e}")
+        raise
+
+
+def test_ubat_60hz_expansion(tmp_path):
+    """Test that UBAT 2D digitized_raw_ad_counts array is expanded to 60hz time series."""
+    from combine import Combine_NetCDF
+
+    # Create time array for 1Hz data
+    time_vals = np.arange(
+        np.datetime64("2025-06-08T02:00:00"),
+        np.datetime64("2025-06-08T02:00:10"),  # 10 seconds
+        np.timedelta64(1, "s"),
+    )
+    time_seconds = time_vals.astype("datetime64[ns]").astype("int64") / 1e9
+
+    # Create a Combine_NetCDF instance
+    combine = Combine_NetCDF(
+        log_file="test/test.nc4",
+        verbose=1,
+    )
+
+    # Create mock combined_nc with UBAT 2D data
+    combine.combined_nc = xr.Dataset(
+        {
+            "wetlabsubat_digitized_raw_ad_counts": (
+                ["wetlabsubat_time", "sample"],
+                np.random.randint(0, 1000, (len(time_vals), 60)),
+            ),
+        },
+        coords={"wetlabsubat_time": time_seconds},
+    )
+
+    # Add attributes to match real data
+    combine.combined_nc["wetlabsubat_digitized_raw_ad_counts"].attrs = {
+        "long_name": "Digitized raw AD counts",
+        "comment": "Test UBAT data",
+    }
+
+    # Call the expansion method
+    combine._expand_ubat_to_60hz()
+
+    # Check that the original variable is now 1D with 60hz time coordinate
+    # (analogous to Dorado biolume_raw with TIME60HZ)
+    assert "wetlabsubat_digitized_raw_ad_counts" in combine.combined_nc  # noqa: S101
+    assert "wetlabsubat_time_60hz" in combine.combined_nc  # noqa: S101
+
+    # Check dimensions - should now be 1D with 60hz time
+    ubat_var = combine.combined_nc["wetlabsubat_digitized_raw_ad_counts"]
+    assert len(ubat_var.dims) == 1  # noqa: PLR2004, S101
+    assert ubat_var.dims[0] == "wetlabsubat_time_60hz"  # noqa: S101
+
+    # Check shape - should have 60 samples per second, so 10 seconds * 60 = 600 samples
+    expected_samples = len(time_vals) * 60  # noqa: PLR2004
+    assert len(ubat_var) == expected_samples  # noqa: S101
+
+    # Check time coordinate has proper attributes
+    time_60hz = combine.combined_nc["wetlabsubat_time_60hz"]
+    assert time_60hz.attrs["units"] == "seconds since 1970-01-01T00:00:00Z"  # noqa: S101
+    assert time_60hz.attrs["standard_name"] == "time"  # noqa: S101
+
+    # Check attributes were copied
+    assert "long_name" in ubat_var.attrs  # noqa: S101
+    assert "coordinates" in ubat_var.attrs  # noqa: S101
+
+
+def _find_time_coordinate(variable: str, combined_nc_vars: dict) -> str:
+    """Helper to find time coordinate for a variable (mimics align.py logic)."""
+    var_parts = variable.split("_")
+    possible_time_coords = []
+
+    for i in range(len(var_parts)):
+        group_candidate = "_".join(var_parts[: i + 1])
+        for suffix in ["_time", "_time_60hz"]:
+            time_coord = f"{group_candidate}{suffix}"
+            if time_coord in combined_nc_vars:
+                possible_time_coords.append((group_candidate, time_coord))
+
+    if not possible_time_coords:
+        return None
+
+    # For 60hz variables, prefer 60hz time coordinates
+    has_60hz_time = any(tc[1].endswith("_60hz") for tc in possible_time_coords)
+    if variable.endswith("_60hz") and has_60hz_time:
+        time_60hz_coords = [(g, t) for g, t in possible_time_coords if t.endswith("_60hz")]
+        return max(time_60hz_coords, key=lambda x: len(x[0]))[1]
+
+    # For regular variables, prefer non-60hz time coordinates
+    non_60hz_coords = [(g, t) for g, t in possible_time_coords if not t.endswith("_60hz")]
+    if non_60hz_coords:
+        return max(non_60hz_coords, key=lambda x: len(x[0]))[1]
+
+    return max(possible_time_coords, key=lambda x: len(x[0]))[1]
+
+
+def test_align_60hz_time_coordinate_matching():
+    """Test that variables with 60hz time coordinates are matched correctly."""
+    # Mock dataset with both regular and 60hz time coordinates
+    combined_nc_vars = {
+        "wetlabsubat_time": True,
+        "wetlabsubat_time_60hz": True,
+    }
+
+    # Test 1: Regular variable should match regular time coordinate
+    timevar = _find_time_coordinate("wetlabsubat_flow_rate", combined_nc_vars)
+    assert timevar == "wetlabsubat_time"  # noqa: S101
+    assert not timevar.endswith("_60hz")  # noqa: S101
+
+    # Test 2: UBAT variable (now 1D with 60hz time) should match 60hz time coordinate
+    # Note: After expansion in combine.py, wetlabsubat_digitized_raw_ad_counts
+    # has coordinate wetlabsubat_time_60hz (variable name has NO _60hz suffix)
+    timevar = _find_time_coordinate("wetlabsubat_digitized_raw_ad_counts", combined_nc_vars)
+    # This will match wetlabsubat_time (the regular one) because the variable name
+    # doesn't have _60hz suffix. The actual coordinate binding happens in align.py
+    # by reading the variable's coordinate, not by name matching.
+    assert timevar == "wetlabsubat_time"  # noqa: S101

From d9438f360491bc1cb476f3aa4d23d5c2311b929c Mon Sep 17 00:00:00 2001
From: Mike McCann <MBARIMike@gmail.com>
Date: Mon, 1 Dec 2025 09:50:16 -0800
Subject: [PATCH 2/6] Remove "rename" from _PARMS dictionaries

This change clearly keeps the original variable names, but in lower case.
---
 .vscode/launch.json     |   3 +-
 src/data/nc42netcdfs.py | 161 +++++++++++++---------------------------
 2 files changed, 53 insertions(+), 111 deletions(-)

diff --git a/.vscode/launch.json b/.vscode/launch.json
index 2bd38454..ca7cd39c 100644
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -191,7 +191,8 @@
             //"args": ["--auv_name", "dorado", "--mission", "2020.337.00", "-v", "1"],
             //"args": ["--auv_name", "dorado", "--mission", "2023.123.00", "-v", "1"],
             //"args": ["-v", "2", "--log_file", "brizo/missionlogs/2025/20250916_20250922/20250916T230652/202509162306_202509180305.nc4"]
-            "args": ["--auv_name", "dorado", "--mission", "2025.316.02", "-v", "1"],
+            //"args": ["--auv_name", "dorado", "--mission", "2025.316.02", "-v", "1"],
+            "args": ["-v", "1", "--log_file", "pontus/missionlogs/2025/20250604_20250616/20250608T020852/202506080209_202506081934.nc4"],
         },
         {
             "name": "5.0 - archive.py",
diff --git a/src/data/nc42netcdfs.py b/src/data/nc42netcdfs.py
index b188c234..f3105418 100755
--- a/src/data/nc42netcdfs.py
+++ b/src/data/nc42netcdfs.py
@@ -39,117 +39,79 @@
 
 SCI_PARMS = {
     "/": [
-        {
-            "name": "concentration_of_colored_dissolved_organic_matter_in_sea_water",
-            "rename": "colored_dissolved_organic_matter",
-        },
-        {"name": "longitude", "rename": "longitude"},
-        {"name": "latitude", "rename": "latitude"},
-        {"name": "depth", "rename": "depth"},
-        {"name": "time", "rename": "time"},
+        {"name": "concentration_of_colored_dissolved_organic_matter_in_sea_water"},
+        {"name": "longitude"},
+        {"name": "latitude"},
+        {"name": "depth"},
+        {"name": "time"},
     ],
-    "Aanderaa_O2": [{"name": "mass_concentration_of_oxygen_in_sea_water", "rename": "oxygen"}],
+    "Aanderaa_O2": [{"name": "mass_concentration_of_oxygen_in_sea_water"}],
     "CTD_NeilBrown": [
-        {"name": "sea_water_salinity", "rename": "salinity"},
-        {"name": "sea_water_temperature", "rename": "temperature"},
+        {"name": "sea_water_salinity"},
+        {"name": "sea_water_temperature"},
     ],
     "CTD_Seabird": [
-        {"name": "sea_water_salinity", "rename": "salinity"},
-        {"name": "sea_water_temperature", "rename": "temperature"},
-        {
-            "name": "mass_concentration_of_oxygen_in_sea_water",
-            "rename": "mass_concentration_of_oxygen_in_sea_water",
-        },
+        {"name": "sea_water_salinity"},
+        {"name": "sea_water_temperature"},
+        {"name": "mass_concentration_of_oxygen_in_sea_water"},
     ],
-    "ISUS": [{"name": "mole_concentration_of_nitrate_in_sea_water", "rename": "nitrate"}],
-    "PAR_Licor": [{"name": "downwelling_photosynthetic_photon_flux_in_sea_water", "rename": "PAR"}],
+    "ISUS": [{"name": "mole_concentration_of_nitrate_in_sea_water"}],
+    "PAR_Licor": [{"name": "downwelling_photosynthetic_photon_flux_in_sea_water"}],
     "WetLabsBB2FL": [
-        {"name": "mass_concentration_of_chlorophyll_in_sea_water", "rename": "chlorophyll"},
-        {"name": "OutputChl", "rename": "chl"},
-        {"name": "Output470", "rename": "bbp470"},
-        {"name": "Output650", "rename": "bbp650"},
-        {"name": "VolumeScatCoeff117deg470nm", "rename": "volumescatcoeff117deg470nm"},
-        {"name": "VolumeScatCoeff117deg650nm", "rename": "volumescatcoeff117deg650nm"},
-        {
-            "name": "ParticulateBackscatteringCoeff470nm",
-            "rename": "particulatebackscatteringcoeff470nm",
-        },
-        {
-            "name": "ParticulateBackscatteringCoeff650nm",
-            "rename": "particulatebackscatteringcoeff650nm",
-        },
+        {"name": "mass_concentration_of_chlorophyll_in_sea_water"},
+        {"name": "OutputChl"},
+        {"name": "Output470"},
+        {"name": "Output650"},
+        {"name": "VolumeScatCoeff117deg470nm"},
+        {"name": "VolumeScatCoeff117deg650nm"},
+        {"name": "ParticulateBackscatteringCoeff470nm"},
+        {"name": "ParticulateBackscatteringCoeff650nm"},
     ],
     "WetLabsSeaOWL_UV_A": [
-        {
-            "name": "concentration_of_chromophoric_dissolved_organic_matter_in_sea_water",
-            "rename": "chromophoric_dissolved_organic_matter",
-        },
-        {"name": "mass_concentration_of_chlorophyll_in_sea_water", "rename": "chlorophyll"},
-        {"name": "BackscatteringCoeff700nm", "rename": "BackscatteringCoeff700nm"},
-        {"name": "VolumeScatCoeff117deg700nm", "rename": "VolumeScatCoeff117deg700nm"},
-        {
-            "name": "mass_concentration_of_petroleum_hydrocarbons_in_sea_water",
-            "rename": "petroleum_hydrocarbons",
-        },
+        {"name": "concentration_of_chromophoric_dissolved_organic_matter_in_sea_water"},
+        {"name": "mass_concentration_of_chlorophyll_in_sea_water"},
+        {"name": "BackscatteringCoeff700nm"},
+        {"name": "VolumeScatCoeff117deg700nm"},
+        {"name": "mass_concentration_of_petroleum_hydrocarbons_in_sea_water"},
     ],
     "WetLabsUBAT": [
-        {"name": "average_bioluminescence", "rename": "average_bioluminescence"},
-        {"name": "flow_rate", "rename": "ubat_flow_rate"},
-        {"name": "digitized_raw_ad_counts", "rename": "digitized_raw_ad_counts"},
+        {"name": "average_bioluminescence"},
+        {"name": "flow_rate"},
+        {"name": "digitized_raw_ad_counts"},
     ],
 }
 
 ENG_PARMS = {
     "BPC1": [
-        {"name": "platform_battery_charge", "rename": "health_platform_battery_charge"},
-        {"name": "platform_battery_voltage", "rename": "health_platform_average_voltage"},
-    ],
-    "BuoyancyServo": [
-        {"name": "platform_buoyancy_position", "rename": "control_inputs_buoyancy_position"}
+        {"name": "platform_battery_charge"},
+        {"name": "platform_battery_voltage"},
     ],
+    "BuoyancyServo": [{"name": "platform_buoyancy_position"}],
     "DeadReckonUsingMultipleVelocitySources": [
-        {
-            "name": "fix_residual_percent_distance_traveled",
-            "rename": (
-                "fix_residual_percent_distance_traveled_DeadReckonUsingMultipleVelocitySources"
-            ),
-        },
-        {"name": "longitude", "rename": "pose_longitude_DeadReckonUsingMultipleVelocitySources"},
-        {"name": "latitude", "rename": "pose_latitude_DeadReckonUsingMultipleVelocitySources"},
-        {"name": "depth", "rename": "pose_depth_DeadReckonUsingMultipleVelocitySources"},
+        {"name": "fix_residual_percent_distance_traveled"},
+        {"name": "longitude"},
+        {"name": "latitude"},
+        {"name": "depth"},
     ],
     "DeadReckonUsingSpeedCalculator": [
-        {
-            "name": "fix_residual_percent_distance_traveled",
-            "rename": "fix_residual_percent_distance_traveled_DeadReckonUsingSpeedCalculator",
-        },
-        {"name": "longitude", "rename": "pose_longitude_DeadReckonUsingSpeedCalculator"},
-        {"name": "latitude", "rename": "pose_latitude_DeadReckonUsingSpeedCalculator"},
-        {"name": "depth", "rename": "pose_depth_DeadReckonUsingSpeedCalculator"},
+        {"name": "fix_residual_percent_distance_traveled"},
+        {"name": "longitude"},
+        {"name": "latitude"},
+        {"name": "depth"},
     ],
-    "ElevatorServo": [
-        {"name": "platform_elevator_angle", "rename": "control_inputs_elevator_angle"}
-    ],
-    "MassServo": [{"name": "platform_mass_position", "rename": "control_inputs_mass_position"}],
+    "ElevatorServo": [{"name": "platform_elevator_angle"}],
+    "MassServo": [{"name": "platform_mass_position"}],
     "NAL9602": [
-        {"name": "time_fix", "rename": "fix_time"},
-        {"name": "latitude_fix", "rename": "fix_latitude"},
-        {"name": "longitude_fix", "rename": "fix_longitude"},
-    ],
-    "Onboard": [{"name": "platform_average_current", "rename": "health_platform_average_current"}],
-    "RudderServo": [{"name": "platform_rudder_angle", "rename": "control_inputs_rudder_angle"}],
-    "ThrusterServo": [
-        {
-            "name": "platform_propeller_rotation_rate",
-            "rename": "control_inputs_propeller_rotation_rate",
-        }
+        {"name": "time_fix"},
+        {"name": "latitude_fix"},
+        {"name": "longitude_fix"},
     ],
+    "Onboard": [{"name": "platform_average_current"}],
+    "RudderServo": [{"name": "platform_rudder_angle"}],
+    "ThrusterServo": [{"name": "platform_propeller_rotation_rate"}],
     "CurrentEstimator": [
-        {
-            "name": "current_direction_navigation_frame",
-            "rename": "current_direction_navigation_frame",
-        },
-        {"name": "current_speed_navigation_frame", "rename": "current_speed_navigation_frame"},
+        {"name": "current_direction_navigation_frame"},
+        {"name": "current_speed_navigation_frame"},
     ],
 }
 
@@ -192,16 +154,6 @@ def __init__(  # noqa: PLR0913
         self.verbose = verbose
         self.commandline = commandline
 
-    def show_variable_mapping(self):
-        """Show the variable mapping."""
-        for group, parms in sorted(SCIENG_PARMS.items()):
-            print(f"Group: {group}")  # noqa: T201
-            for parm in parms:
-                name = parm.get("name", "N/A")
-                rename = parm.get("rename", "N/A")
-                print(f"  {name} -> {rename}")  # noqa: T201
-            print()  # noqa: T201
-
     def download_with_pooch(self, url, local_dir, known_hash=None):
         """Download using pooch with caching and verification."""
         downloader = pooch.HTTPDownloader(timeout=(60, 300), progressbar=True)
@@ -1197,13 +1149,6 @@ def process_command_line(self):
                 "d1235ead55023bea05e9841465d54a45dfab007a283320322e28b84438fb8a85"
             ),
         )
-        (
-            parser.add_argument(
-                "--show_variable_mapping",
-                action="store_true",
-                help="Show the variable mapping: Group/variable_names -> their_renames",
-            ),
-        )
         parser.add_argument(
             "--plot_time",
             action="store",
@@ -1229,8 +1174,4 @@ def process_command_line(self):
 if __name__ == "__main__":
     extract = Extract()
     extract.process_command_line()
-    if extract.args.show_variable_mapping:
-        extract.show_variable_mapping()
-        sys.exit(0)
-    else:
-        extract.extract_groups_to_files_netcdf4(extract.args.log_file)
+    extract.extract_groups_to_files_netcdf4(extract.args.log_file)

From 678bb42881887fe4c365264e0175a30617c728e7 Mon Sep 17 00:00:00 2001
From: Mike McCann <MBARIMike@gmail.com>
Date: Mon, 1 Dec 2025 20:23:46 -0800
Subject: [PATCH 3/6] Add add_wetlabsubat_proxies() to compute biolume proxies
 for lrauv data.

Also added _find_lat_lon_variables() for finding nav whether its dorado or lrauv. Also added test for lrauv ubat processing.
---
 .vscode/launch.json            |   4 +-
 src/data/test_process_lrauv.py | 102 +++++++++++++++++++++++++++++++++
 2 files changed, 104 insertions(+), 2 deletions(-)

diff --git a/.vscode/launch.json b/.vscode/launch.json
index ca7cd39c..bb55b3d5 100644
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -357,9 +357,9 @@
             // Plankitvore deployment for CeNCOOS Syncro - whole month of April 2025
             //"args": ["-v", "1", "--auv_name", "ahi", "--start", "20250401T000000", "--end", "20250502T000000", "--noinput", "--num_cores", "1", "--no_cleanup"]
             // Fails with ValueError: different number of dimensions on data and dims: 2 vs 1 for wetlabsubat_digitized_raw_ad_counts variable
-            "args": ["-v", "1", "--log_file", "pontus/missionlogs/2025/20250604_20250616/20250608T020852/202506080209_202506081934.nc4", "--no_cleanup"]
+            //"args": ["-v", "1", "--log_file", "pontus/missionlogs/2025/20250604_20250616/20250608T020852/202506080209_202506081934.nc4", "--no_cleanup"]
             // Full month of June 2025 for Pontus with WetLabsUBAT Group data
-            //"args": ["-v", "1", "--auv_name", "pontus", "--start", "20250601T000000", "--end", "20250702T000000", "--noinput", "--num_cores", "1", "--no_cleanup"]
+            "args": ["-v", "1", "--auv_name", "pontus", "--start", "20250601T000000", "--end", "20250702T000000", "--noinput", "--num_cores", "1", "--no_cleanup"]
         },
 
     ]
diff --git a/src/data/test_process_lrauv.py b/src/data/test_process_lrauv.py
index 1b0c080b..06e298b2 100644
--- a/src/data/test_process_lrauv.py
+++ b/src/data/test_process_lrauv.py
@@ -1,6 +1,7 @@
 # noqa: INP001
 
 import numpy as np
+import pandas as pd
 import pytest
 import xarray as xr
 
@@ -313,3 +314,104 @@ def test_align_60hz_time_coordinate_matching():
     # doesn't have _60hz suffix. The actual coordinate binding happens in align.py
     # by reading the variable's coordinate, not by name matching.
     assert timevar == "wetlabsubat_time"  # noqa: S101
+
+
+def test_wetlabsubat_proxy_processing_with_realistic_coordinates(tmp_path):
+    """Test add_wetlabsubat_proxies with realistic LRAUV coordinate variable names.
+
+    Real LRAUV data has instrument-prefixed coordinates like:
+    - parlicor_latitude, parlicor_longitude
+    - massservo_latitude, massservo_longitude
+    - nudged_latitude, nudged_longitude
+    - onboard_latitude, onboard_longitude
+    - wetlabsubat_latitude, wetlabsubat_longitude
+
+    But NOT navigation_latitude/navigation_longitude (which exist in Dorado data).
+    This test ensures the coordinate lookup doesn't fail when navigation_* are missing.
+    """
+    from resample import Resampler
+
+    # Create time arrays
+    time_vals = pd.date_range("2025-06-08 02:00:00", periods=3600, freq="1s")  # 1 hour
+    time_60hz_vals = pd.date_range("2025-06-08 02:00:00", periods=3600 * 60, freq="16666667ns")
+
+    # Create a mock dataset with realistic LRAUV structure
+    # Key: NO navigation_latitude/navigation_longitude variables
+    ds = xr.Dataset(
+        {
+            # UBAT 60Hz raw data (after expansion from 2D to 1D)
+            "wetlabsubat_digitized_raw_ad_counts": (
+                ["wetlabsubat_time_60hz"],
+                np.random.randint(200, 800, len(time_60hz_vals)),
+            ),
+            # Regular 1Hz variables
+            "wetlabsubat_flow_rate": (
+                ["wetlabsubat_time"],
+                np.full(len(time_vals), 350.0),
+            ),
+            "wetlabsbb2fl_fluorescence": (
+                ["wetlabsbb2fl_time"],
+                np.random.uniform(0, 5, len(time_vals)),
+            ),
+            # Realistic coordinate variables - instrument-prefixed, NO navigation_*
+            "nudged_latitude": (["nudged_time"], np.full(len(time_vals), 36.8)),
+            "nudged_longitude": (["nudged_time"], np.full(len(time_vals), -122.0)),
+            "onboard_latitude": (["onboard_time"], np.full(len(time_vals), 36.8)),
+            "onboard_longitude": (["onboard_time"], np.full(len(time_vals), -122.0)),
+            "wetlabsubat_latitude": (
+                ["wetlabsubat_time"],
+                np.full(len(time_vals), 36.8),
+            ),
+            "wetlabsubat_longitude": (
+                ["wetlabsubat_time"],
+                np.full(len(time_vals), -122.0),
+            ),
+        },
+        coords={
+            "wetlabsubat_time": time_vals.to_numpy(),
+            "wetlabsubat_time_60hz": time_60hz_vals.to_numpy(),
+            "wetlabsbb2fl_time": time_vals.to_numpy(),
+            "nudged_time": time_vals.to_numpy(),
+            "onboard_time": time_vals.to_numpy(),
+        },
+    )
+
+    # Add attributes
+    ds["wetlabsubat_digitized_raw_ad_counts"].attrs = {
+        "long_name": "Digitized raw AD counts",
+        "units": "counts",
+    }
+    ds["nudged_latitude"].attrs = {"standard_name": "latitude", "units": "degrees_north"}
+    ds["nudged_longitude"].attrs = {"standard_name": "longitude", "units": "degrees_east"}
+
+    # Create Resampler instance
+    resampler = Resampler(
+        auv_name="pontus",
+        log_file=None,
+        freq="1S",
+        verbose=0,
+    )
+
+    # Set the dataset
+    resampler.ds = ds
+    resampler.df_r = pd.DataFrame(index=time_vals)
+
+    # Create mock resampled_nc (would normally be created by resample_variable)
+    resampler.resampled_nc = xr.Dataset(coords={"time": time_vals.to_numpy()})
+    resampler.resampled_nc["wetlabsbb2fl_fluorescence"] = (
+        ["time"],
+        np.random.uniform(0, 5, len(time_vals)),
+    )
+
+    # This should NOT raise KeyError for navigation_latitude/navigation_longitude
+    # The method should find nudged_latitude/longitude or another available coordinate
+    try:
+        resampler.add_wetlabsubat_proxies(freq="1S")
+        # If we get here, the coordinate lookup worked
+        assert True  # noqa: S101
+    except KeyError as e:
+        if "navigation_latitude" in str(e) or "navigation_longitude" in str(e):
+            pytest.fail(
+                f"Coordinate lookup failed - should find alternative to navigation_* variables: {e}"
+            )
+        raise

From f2da1aa3ba2dddded100d5d66ab4a70053e7fcfc Mon Sep 17 00:00:00 2001
From: Mike McCann <MBARIMike@gmail.com>
Date: Mon, 1 Dec 2025 20:24:29 -0800
Subject: [PATCH 4/6] Add add_wetlabsubat_proxies() to compute biolume proxies
 for lrauv data.

Also added _find_lat_lon_variables() for finding nav whether its dorado or lrauv. Also added test for lrauv ubat processing.
---
 src/data/resample.py | 421 ++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 415 insertions(+), 6 deletions(-)

diff --git a/src/data/resample.py b/src/data/resample.py
index 08b859c5..1d467b9d 100755
--- a/src/data/resample.py
+++ b/src/data/resample.py
@@ -488,11 +488,14 @@ def select_nighttime_bl_raw(
             sunsets: A list of sunset times for each night.
             sunrises: A list of sunrise times for each night.
         """
-        lat = float(self.ds["navigation_latitude"].median())
-        lon = float(self.ds["navigation_longitude"].median())
+        lat_var, lon_var = self._find_lat_lon_variables()
+        lat = float(self.ds[lat_var].median())
+        lon = float(self.ds[lon_var].median())
         self.logger.debug("Getting sun altitudes for nighttime selection")
         sun_alts = []
-        for ts in self.ds["navigation_time"].to_numpy()[::stride]:
+        # Get the time coordinate for the latitude variable
+        time_coord = self.ds[lat_var].dims[0]
+        for ts in self.ds[time_coord].to_numpy()[::stride]:
             # About 10-minute resolution from 5 Hz navigation data
             sun_alts.append(  # noqa: PERF401
                 get_altitude(
@@ -504,9 +507,7 @@ def select_nighttime_bl_raw(
 
         # Find sunset and sunrise - where sun altitude changes sign
         sign_changes = np.where(np.diff(np.sign(sun_alts)))[0]
-        ss_sr_times = (
-            self.ds["navigation_time"].isel({"navigation_time": sign_changes * stride}).to_numpy()
-        )
+        ss_sr_times = self.ds[time_coord].isel({time_coord: sign_changes * stride}).to_numpy()
         self.logger.debug("Sunset and sunrise times: %s", ss_sr_times)
 
         sunsets = []
@@ -547,6 +548,43 @@ def select_nighttime_bl_raw(
             self.logger.info("No sunset or sunrise found during this mission.")
         return nighttime_bl_raw, sunsets, sunrises
 
+    def _find_lat_lon_variables(self) -> tuple[str, str]:
+        """Find latitude and longitude variables in the dataset.
+
+        Searches for variables ending in _latitude and _longitude.
+        Prefers navigation_, nudged_, or onboard_ prefixes in that order.
+
+        Returns:
+            tuple: (lat_var_name, lon_var_name)
+
+        Raises:
+            KeyError: If no latitude/longitude variables are found
+        """
+        lat_vars = [v for v in self.ds.variables if v.endswith("_latitude")]
+        lon_vars = [v for v in self.ds.variables if v.endswith("_longitude")]
+
+        if not lat_vars or not lon_vars:
+            msg = (
+                f"No latitude/longitude variables found. "
+                f"Available variables: {list(self.ds.variables.keys())}"
+            )
+            raise KeyError(msg)
+
+        # Prefer navigation_, then nudged_, then onboard_, then any other
+        for prefix in ["navigation_", "nudged_", "onboard_"]:
+            for lat_var in lat_vars:
+                if lat_var.startswith(prefix):
+                    lon_var = prefix + "longitude"
+                    if lon_var in lon_vars:
+                        self.logger.debug("Using %s and %s for coordinates", lat_var, lon_var)
+                        return lat_var, lon_var
+
+        # Fall back to first available pair
+        lat_var = lat_vars[0]
+        lon_var = lon_vars[0]
+        self.logger.info("Using first available coordinates: %s and %s", lat_var, lon_var)
+        return lat_var, lon_var
+
     def add_profile(self, depth_threshold: float) -> None:
         # Find depth vertices value using scipy's find_peaks algorithm
         options = {"prominence": 10, "width": 30}
@@ -860,6 +898,345 @@ def add_biolume_proxies(  # noqa: PLR0913, PLR0915
 
         return fluo, sunsets, sunrises
 
+    def add_wetlabsubat_proxies(  # noqa: PLR0913, PLR0915, C901, PLR0912
+        self,
+        freq,
+        window_size_secs: int = 5,
+        envelope_mini: float = 1.5e10,
+        flash_threshold: float = FLASH_THRESHOLD,
+        proxy_ratio_adinos: float = 3.9811e13,  # Default value for LRAUV
+        proxy_cal_factor: float = 0.00470,  # Default value for LRAUV
+    ) -> tuple[pd.Series, list[datetime], list[datetime]]:
+        """Add biolume proxy variables computed from wetlabsubat_digitized_raw_ad_counts.
+
+        This is parallel to add_biolume_proxies() but for LRAUV wetlabsubat data.
+        Computations follow Appendix B in Messie et al. 2019.
+        https://www.sciencedirect.com/science/article/pii/S0079661118300478
+        """
+        self.logger.info(
+            "Adding wetlabsubat proxy variables computed from wetlabsubat_digitized_raw_ad_counts"
+        )
+        sample_rate = 60  # Assume all digitized_raw_ad_counts data is sampled at 60 Hz
+        window_size = window_size_secs * sample_rate
+
+        # s_ubat_raw includes daytime data - see below for nighttime data
+        s_ubat_raw = self.ds["wetlabsubat_digitized_raw_ad_counts"].to_pandas().dropna()
+
+        # Compute background biolumenesence envelope
+        self.logger.debug("Applying rolling min filter")
+        min_bg_unsmoothed = s_ubat_raw.rolling(
+            window_size,
+            min_periods=0,
+            center=True,
+        ).min()
+        min_bg = (
+            min_bg_unsmoothed.rolling(window_size, min_periods=0, center=True).mean().to_numpy()
+        )
+
+        self.logger.debug("Applying rolling median filter")
+        med_bg_unsmoothed = s_ubat_raw.rolling(
+            window_size,
+            min_periods=0,
+            center=True,
+        ).median()
+        s_med_bg = med_bg_unsmoothed.rolling(
+            window_size,
+            min_periods=0,
+            center=True,
+        ).mean()
+        med_bg = s_med_bg.to_numpy()
+        max_bg = med_bg * 2.0 - min_bg
+        # envelope_mini: minimum value for the envelope (max_bgrd - med_bgrd)
+        # to avoid very dim flashes when the background is low
+        max_bg[max_bg - med_bg < envelope_mini] = (
+            med_bg[max_bg - med_bg < envelope_mini] + envelope_mini
+        )
+
+        # Find the high and low peaks
+        self.logger.debug("Finding peaks")
+        peaks, _ = signal.find_peaks(s_ubat_raw, height=max_bg)
+        s_peaks = pd.Series(s_ubat_raw.iloc[peaks], index=s_ubat_raw.index[peaks])
+        s_med_bg_peaks = pd.Series(s_med_bg.iloc[peaks], index=s_ubat_raw.index[peaks])
+        if self.flash_threshold:
+            flash_threshold = self.flash_threshold
+        flash_threshold_note = f"Computed with flash_threshold = {flash_threshold:.0e}"
+        self.logger.info("Using flash_threshold = %.4e", flash_threshold)
+        nbflash_high = s_peaks[s_peaks > (s_med_bg_peaks + flash_threshold)]
+        nbflash_low = s_peaks[s_peaks <= (s_med_bg_peaks + flash_threshold)]
+
+        # Construct full time series of flashes with NaNs for non-flash values
+        s_nbflash_high = pd.Series(np.nan, index=s_ubat_raw.index)
+        s_nbflash_high.loc[nbflash_high.index] = nbflash_high
+        s_nbflash_low = pd.Series(np.nan, index=s_ubat_raw.index)
+        s_nbflash_low.loc[nbflash_low.index] = nbflash_low
+
+        # Count the number of flashes per second - use 15 second window stepping every second
+        flash_count_seconds = 15
+        flash_window = flash_count_seconds * sample_rate
+        self.logger.debug("Counting flashes using %d second window", flash_count_seconds)
+        nbflash_high_counts = (
+            s_nbflash_high.rolling(flash_window, step=1, min_periods=0, center=True)
+            .count()
+            .resample(freq.lower())
+            .mean()
+            / flash_count_seconds
+        )
+        nbflash_low_counts = (
+            s_nbflash_low.rolling(flash_window, step=1, min_periods=0, center=True)
+            .count()
+            .resample(freq.lower())
+            .mean()
+            / flash_count_seconds
+        )
+
+        # Get flow data - try both flow_rate and flow variable names
+        flow = None
+        if "wetlabsubat_flow_rate" in self.ds:
+            flow = (
+                self.ds[["wetlabsubat_flow_rate"]]["wetlabsubat_flow_rate"]
+                .to_pandas()
+                .resample("1s")
+                .mean()
+                .ffill()
+            )
+            self.logger.info("Using wetlabsubat_flow_rate for flow calculations")
+        elif "wetlabsubat_flow" in self.ds:
+            flow = (
+                self.ds[["wetlabsubat_flow"]]["wetlabsubat_flow"]
+                .to_pandas()
+                .resample("1s")
+                .mean()
+                .ffill()
+            )
+            self.logger.info("Using wetlabsubat_flow for flow calculations")
+
+        # Flow sensor is not always on or may not be present, fill in 0.0 values with 350 ml/s
+        zero_note = ""
+        if flow is None:
+            self.logger.info("No flow data found - using constant 350 ml/s")
+            # Create flow series with same index as resampled data
+            flow = pd.Series(350.0, index=nbflash_high_counts.index)
+            zero_note = "No flow data available - used constant 350 ml/s"
+        else:
+            num_zero_flow = len(np.where(flow == 0)[0])
+            if num_zero_flow > 0:
+                zero_note = (
+                    f"Zero flow values found: {num_zero_flow} of {len(flow)} "
+                    f"- replaced with 350 ml/s"
+                )
+                self.logger.info(zero_note)
+                flow = flow.replace(0.0, 350.0)
+
+        # Compute flashes per liter - pandas.Series.divide() will match indexes
+        # Units: flashes per liter = (flashes per second / mL/s) * 1000 mL/L
+        self.logger.info(
+            "Computing flashes per liter: wetlabsubat_nbflash_high, wetlabsubat_nbflash_low"
+        )
+        self.df_r["wetlabsubat_nbflash_high"] = nbflash_high_counts.divide(flow) * 1000
+        self.df_r["wetlabsubat_nbflash_high"].attrs["long_name"] = (
+            "High intensity flashes (copepods proxy)"
+        )
+        self.df_r["wetlabsubat_nbflash_high"].attrs["units"] = "flashes/liter"
+        self.df_r["wetlabsubat_nbflash_high"].attrs["comment"] = (
+            f"{zero_note} - {flash_threshold_note}"
+        )
+
+        self.df_r["wetlabsubat_nbflash_low"] = nbflash_low_counts.divide(flow) * 1000
+        self.df_r["wetlabsubat_nbflash_low"].attrs["long_name"] = (
+            "Low intensity flashes (Larvacean proxy)"
+        )
+        self.df_r["wetlabsubat_nbflash_low"].attrs["units"] = "flashes/liter"
+        self.df_r["wetlabsubat_nbflash_low"].attrs["comment"] = (
+            f"{zero_note} - {flash_threshold_note}"
+        )
+
+        # Flash intensity - proxy for small jellies - for entire mission, not just nightime
+        all_raw = self.ds[["wetlabsubat_digitized_raw_ad_counts"]][
+            "wetlabsubat_digitized_raw_ad_counts"
+        ].to_pandas()
+        med_bg_60 = pd.Series(
+            np.interp(all_raw.index, s_med_bg.index, med_bg),
+            index=all_raw.index,
+        )
+        intflash = (
+            (all_raw - med_bg_60)
+            .rolling(flash_window, min_periods=0, center=True)
+            .max()
+            .resample("1s")
+            .mean()
+        )
+        self.logger.info(
+            "Saving flash intensity: wetlabsubat_intflash - "
+            "the upper bound of the background envelope"
+        )
+        self.df_r["wetlabsubat_intflash"] = intflash
+        self.df_r["wetlabsubat_intflash"].attrs["long_name"] = (
+            "Flashes intensity (small jellies proxy)"
+        )
+        self.df_r["wetlabsubat_intflash"].attrs["units"] = "counts"
+        self.df_r["wetlabsubat_intflash"].attrs["comment"] = (
+            f"intensity of flashes from {sample_rate} Hz "
+            f"wetlabsubat_digitized_raw_ad_counts variable in {freq} intervals."
+        )
+
+        # Make min_bg a 1S pd.Series so that we can divide by flow, matching indexes
+        s_min_bg = min_bg_unsmoothed.rolling(
+            window_size,
+            min_periods=0,
+            center=True,
+        ).mean()
+        bg_biolume = pd.Series(s_min_bg, index=s_ubat_raw.index).resample("1s").mean()
+        self.logger.info("Saving Background bioluminescence (dinoflagellates proxy)")
+        self.df_r["wetlabsubat_bg_biolume"] = bg_biolume.divide(flow) * 1000
+        self.df_r["wetlabsubat_bg_biolume"].attrs["long_name"] = (
+            "Background bioluminescence (dinoflagellates proxy)"
+        )
+        self.df_r["wetlabsubat_bg_biolume"].attrs["units"] = "counts/liter"
+        self.df_r["wetlabsubat_bg_biolume"].attrs["comment"] = zero_note
+
+        fluo = None
+        nighttime_ubat_raw, sunsets, sunrises = self.select_nighttime_ubat_raw()
+        if nighttime_ubat_raw.empty:
+            self.logger.info(
+                "No nighttime wetlabsubat data to compute adinos, diatoms, hdinos proxies",
+            )
+        else:
+            # (2) Phytoplankton proxies - look for wetlabsbb2fl fluorescence/chlorophyll data
+            fluo_var = None
+            for var in self.resampled_nc.variables:
+                if "wetlabsbb2fl" in var.lower() and (
+                    "fl" in var.lower() or "chlorophyll" in var.lower()
+                ):
+                    fluo_var = var
+                    break
+
+            if fluo_var is None:
+                self.logger.info(
+                    "No wetlabsbb2fl fluorescence data found. "
+                    "Not computing adinos, diatoms, and hdinos"
+                )
+                return fluo, sunsets, sunrises
+
+            self.logger.info("Using %s for phytoplankton proxy calculations", fluo_var)
+            fluo = (
+                self.resampled_nc[fluo_var]
+                .where(
+                    (self.resampled_nc["time"] > min(sunsets))
+                    & (self.resampled_nc["time"] < max(sunrises)),
+                )
+                .to_pandas()
+                .resample(freq.lower())
+                .mean()
+            )
+            # Set negative values from fluorescence to NaN
+            fluo[fluo < 0] = np.nan
+            self.logger.info("Using proxy_ratio_adinos = %.4e", proxy_ratio_adinos)
+            self.logger.info("Using proxy_cal_factor = %.6f", proxy_cal_factor)
+
+            nighttime_bg_biolume = (
+                pd.Series(s_min_bg, index=nighttime_ubat_raw.index).resample("1s").mean()
+            )
+            nighttime_bg_biolume_perliter = nighttime_bg_biolume.divide(flow) * 1000
+            pseudo_fluorescence = nighttime_bg_biolume_perliter / proxy_ratio_adinos
+            self.df_r["wetlabsubat_proxy_adinos"] = (
+                np.minimum(fluo, pseudo_fluorescence) / proxy_cal_factor
+            )
+            self.df_r["wetlabsubat_proxy_adinos"].attrs["comment"] = (
+                f"Autotrophic dinoflagellate proxy using proxy_ratio_adinos"
+                f" = {proxy_ratio_adinos:.4e} and proxy_cal_factor = {proxy_cal_factor:.6f}"
+            )
+            self.df_r["wetlabsubat_proxy_hdinos"] = (
+                pseudo_fluorescence - np.minimum(fluo, pseudo_fluorescence)
+            ) / proxy_cal_factor
+            self.df_r["wetlabsubat_proxy_hdinos"].attrs["comment"] = (
+                f"Heterotrophic dinoflagellate proxy using proxy_ratio_adinos"
+                f" = {proxy_ratio_adinos:.4e} and proxy_cal_factor = {proxy_cal_factor:.6f}"
+            )
+            wetlabsubat_proxy_diatoms = (fluo - pseudo_fluorescence) / proxy_cal_factor
+            wetlabsubat_proxy_diatoms[wetlabsubat_proxy_diatoms < 0] = 0
+            self.df_r["wetlabsubat_proxy_diatoms"] = wetlabsubat_proxy_diatoms
+            self.df_r["wetlabsubat_proxy_diatoms"].attrs["comment"] = (
+                f"Diatom proxy using proxy_ratio_adinos"
+                f" = {proxy_ratio_adinos:.4e} and proxy_cal_factor = {proxy_cal_factor:.6f}"
+            )
+
+        return fluo, sunsets, sunrises
+
+    def select_nighttime_ubat_raw(
+        self,
+        stride: int = 3000,
+    ) -> tuple[pd.Series, list[datetime], list[datetime]]:
+        """
+        Select nighttime wetlabsubat_digitized_raw_ad_counts data for multiple nights in a mission.
+        Parallel to select_nighttime_bl_raw() but for LRAUV wetlabsubat data.
+        Default stride of 3000 gives 10-minute resolution from 5 Hz navigation data.
+
+        Returns:
+            nighttime_ubat_raw: A pandas Series containing nighttime ubat data.
+            sunsets: A list of sunset times for each night.
+            sunrises: A list of sunrise times for each night.
+        """
+        lat_var, lon_var = self._find_lat_lon_variables()
+        lat = float(self.ds[lat_var].median())
+        lon = float(self.ds[lon_var].median())
+        self.logger.debug("Getting sun altitudes for nighttime selection")
+        sun_alts = []
+        # Get the time coordinate for the latitude variable
+        time_coord = self.ds[lat_var].dims[0]
+        for ts in self.ds[time_coord].to_numpy()[::stride]:
+            # About 10-minute resolution from 5 Hz navigation data
+            sun_alts.append(  # noqa: PERF401
+                get_altitude(
+                    lat,
+                    lon,
+                    datetime.fromtimestamp(ts.astype(int) / 1.0e9, tz=UTC),
+                ),
+            )
+
+        # Find sunset and sunrise - where sun altitude changes sign
+        sign_changes = np.where(np.diff(np.sign(sun_alts)))[0]
+        ss_sr_times = self.ds[time_coord].isel({time_coord: sign_changes * stride}).to_numpy()
+        self.logger.debug("Sunset and sunrise times: %s", ss_sr_times)
+
+        sunsets = []
+        sunrises = []
+        nighttime_ubat_raw = pd.Series(dtype="float64")
+
+        # Iterate over sunset and sunrise pairs
+        for i in range(0, len(ss_sr_times) - 1, 2):
+            sunset = ss_sr_times[i] + pd.to_timedelta(1, "h")  # 1 hour past sunset
+            sunrise = ss_sr_times[i + 1] - pd.to_timedelta(1, "h")  # 1 hour before sunrise
+            sunsets.append(sunset)
+            sunrises.append(sunrise)
+
+            self.logger.info(
+                "Extracting wetlabsubat_digitized_raw_ad_counts data "
+                "between sunset %s and sunrise %s",
+                sunset,
+                sunrise,
+            )
+            nighttime_data = (
+                self.ds["wetlabsubat_digitized_raw_ad_counts"]
+                .where(
+                    (self.ds["wetlabsubat_time_60hz"] > sunset)
+                    & (self.ds["wetlabsubat_time_60hz"] < sunrise),
+                )
+                .to_pandas()
+                .dropna()
+            )
+            # This complication is needed because concat will not like an empty DataFrame
+            nighttime_ubat_raw = (
+                nighttime_ubat_raw.copy()
+                if nighttime_data.empty
+                else nighttime_data.copy()
+                if nighttime_ubat_raw.empty
+                else pd.concat([nighttime_ubat_raw, nighttime_data])  # if both DataFrames non empty
+            )
+
+        if not sunsets or not sunrises:
+            self.logger.info("No sunset or sunrise found during this mission.")
+        return nighttime_ubat_raw, sunsets, sunrises
+
     def correct_biolume_proxies(  # noqa: C901, PLR0912, PLR0913, PLR0915
         self,
         biolume_fluo: pd.Series,  # from add_biolume_proxies
@@ -1138,6 +1515,16 @@ def resample_variable(  # noqa: PLR0913
                 biolume_sunrises,
                 depth_threshold,
             )
+        elif instr == "wetlabsubat" and variable == "wetlabsubat_digitized_raw_ad_counts":
+            # All wetlabsubat proxy variables are computed from wetlabsubat_digitized_raw_ad_counts
+            # Use default parameters for LRAUV - these may need adjustment in the future
+            proxy_cal_factor = 0.00470
+            proxy_ratio_adinos = 3.9811e13
+            self.add_wetlabsubat_proxies(
+                freq=freq,
+                proxy_cal_factor=proxy_cal_factor,
+                proxy_ratio_adinos=proxy_ratio_adinos,
+            )
         else:
             self.df_o[variable] = self.ds[variable].to_pandas()
             self.df_o[f"{variable}_mf"] = (
@@ -1372,6 +1759,28 @@ def resample_mission(  # noqa: C901, PLR0912, PLR0915, PLR0913
                             self.resampled_nc[var].attrs["coordinates"] = (
                                 "time depth latitude longitude"
                             )
+                elif instr == "wetlabsubat" and variable == "wetlabsubat_digitized_raw_ad_counts":
+                    # resample_variable() creates new proxy variables for LRAUV
+                    # not in the original align.nc file
+                    self.resample_variable(
+                        instr,
+                        variable,
+                        mf_width,
+                        freq,
+                        mission_start,
+                        mission_end,
+                        instrs_to_pad,
+                        depth_threshold,
+                    )
+                    for var in self.df_r:
+                        if var not in variables:
+                            # save new proxy variable
+                            self.df_r[var].index.rename("time", inplace=True)  # noqa: PD002
+                            self.resampled_nc[var] = self.df_r[var].to_xarray()
+                            self.resampled_nc[var].attrs = self.df_r[var].attrs
+                            self.resampled_nc[var].attrs["coordinates"] = (
+                                "time depth latitude longitude"
+                            )
                 elif variable in {"biolume_latitude", "biolume_longitude"}:
                     self.logger.info(
                         "Not saving instrument coordinate variable %s to resampled file",

From 48c54363b33348948e2e4b2adfc31d24f70271a9 Mon Sep 17 00:00:00 2001
From: Mike McCann <MBARIMike@gmail.com>
Date: Tue, 2 Dec 2025 10:36:20 -0800
Subject: [PATCH 5/6] Removed
 'concentration_of_colored_dissolved_organic_matter_in_sea_water' from /
 Group.

---
 src/data/nc42netcdfs.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/data/nc42netcdfs.py b/src/data/nc42netcdfs.py
index f3105418..ddb77b8c 100755
--- a/src/data/nc42netcdfs.py
+++ b/src/data/nc42netcdfs.py
@@ -39,7 +39,6 @@
 
 SCI_PARMS = {
     "/": [
-        {"name": "concentration_of_colored_dissolved_organic_matter_in_sea_water"},
         {"name": "longitude"},
         {"name": "latitude"},
         {"name": "depth"},

From 86c402fc5af9d8415b930b6097fbfcd5ddf58dd1 Mon Sep 17 00:00:00 2001
From: Mike McCann <MBARIMike@gmail.com>
Date: Tue, 2 Dec 2025 10:37:26 -0800
Subject: [PATCH 6/6] Ensure that only the variable 'depth' has that
 standard_name in the _1S.nc file.

---
 .vscode/launch.json  |  6 ++++--
 src/data/resample.py | 10 ++++++----
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/.vscode/launch.json b/.vscode/launch.json
index bb55b3d5..d5d5c0f9 100644
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -192,7 +192,8 @@
             //"args": ["--auv_name", "dorado", "--mission", "2023.123.00", "-v", "1"],
             //"args": ["-v", "2", "--log_file", "brizo/missionlogs/2025/20250916_20250922/20250916T230652/202509162306_202509180305.nc4"]
             //"args": ["--auv_name", "dorado", "--mission", "2025.316.02", "-v", "1"],
-            "args": ["-v", "1", "--log_file", "pontus/missionlogs/2025/20250604_20250616/20250608T020852/202506080209_202506081934.nc4"],
+            //"args": ["-v", "1", "--log_file", "pontus/missionlogs/2025/20250604_20250616/20250608T020852/202506080209_202506081934.nc4"],
+            "args": ["-v", "1", "--log_file", "ahi/missionlogs/2025/20250414_20250418/20250414T205440/202504142054_202504150400.nc4"],
         },
         {
             "name": "5.0 - archive.py",
@@ -359,7 +360,8 @@
             // Fails with ValueError: different number of dimensions on data and dims: 2 vs 1 for wetlabsubat_digitized_raw_ad_counts variable
             //"args": ["-v", "1", "--log_file", "pontus/missionlogs/2025/20250604_20250616/20250608T020852/202506080209_202506081934.nc4", "--no_cleanup"]
             // Full month of June 2025 for Pontus with WetLabsUBAT Group data
-            "args": ["-v", "1", "--auv_name", "pontus", "--start", "20250601T000000", "--end", "20250702T000000", "--noinput", "--num_cores", "1", "--no_cleanup"]
+            //"args": ["-v", "1", "--auv_name", "pontus", "--start", "20250601T000000", "--end", "20250702T000000", "--noinput", "--num_cores", "1", "--no_cleanup"]
+            "args": ["-v", "1", "--auv_name", "pontus", "--start", "20250601T000000", "--end", "20250702T000000", "--noinput", "--num_cores", "1", "--no_cleanup", "--clobber"]
         },
 
     ]
diff --git a/src/data/resample.py b/src/data/resample.py
index 1d467b9d..aaf2aa4c 100755
--- a/src/data/resample.py
+++ b/src/data/resample.py
@@ -117,12 +117,13 @@ def _build_global_metadata(self) -> None:
             gitcommit = "<failed to get git commit>"
         iso_now = datetime.now(tz=UTC).isoformat().split(".")[0] + "Z"
 
-        # Ensure that only the latitude and longitude variables have
-        # standard_name attributes equal to "latitude" and "longitude" so that
+        # Ensure that only the latitude, longitude, and depth variables have
+        # standard_name attributes equal to "latitude", "longitude", and "depth" so that
         # the .cf[] accessor works correctly
         for var in self.resampled_nc.data_vars:
-            if self.resampled_nc[var].attrs.get("standard_name") in ["latitude", "longitude"]:
-                if var in {"latitude", "longitude"}:
+            standard_name = self.resampled_nc[var].attrs.get("standard_name")
+            if standard_name in ["latitude", "longitude", "depth"]:
+                if var in {"latitude", "longitude", "depth"}:
                     continue
                 self.logger.info("Removing standard_name attribute from variable %s", var)
                 del self.resampled_nc[var].attrs["standard_name"]
@@ -457,6 +458,7 @@ def save_coordinates(
         self.df_r["longitude"].index.rename("time", inplace=True)  # noqa: PD002
         self.resampled_nc["longitude"] = self.df_r["longitude"].to_xarray()
         self.resampled_nc["depth"].attrs = self.ds[f"{instr}_depth"].attrs
+        self.resampled_nc["depth"].attrs["standard_name"] = "depth"
         self.resampled_nc["depth"].attrs["comment"] += (
             f". {self.ds[f'{instr}_depth'].attrs['comment']}"
             f" mean sampled at {self.freq} intervals following"