mbari-org · MBARIMike · Dec 2, 2025 · Nov 28, 2025 · Dec 1, 2025 · Dec 2, 2025
diff --git a/.vscode/launch.json b/.vscode/launch.json
@@ -191,7 +191,9 @@
             //"args": ["--auv_name", "dorado", "--mission", "2020.337.00", "-v", "1"],
             //"args": ["--auv_name", "dorado", "--mission", "2023.123.00", "-v", "1"],
             //"args": ["-v", "2", "--log_file", "brizo/missionlogs/2025/20250916_20250922/20250916T230652/202509162306_202509180305.nc4"]
-            "args": ["--auv_name", "dorado", "--mission", "2025.316.02", "-v", "1"],
+            //"args": ["--auv_name", "dorado", "--mission", "2025.316.02", "-v", "1"],
+            //"args": ["-v", "1", "--log_file", "pontus/missionlogs/2025/20250604_20250616/20250608T020852/202506080209_202506081934.nc4"],
+            "args": ["-v", "1", "--log_file", "ahi/missionlogs/2025/20250414_20250418/20250414T205440/202504142054_202504150400.nc4"],
         },
         {
             "name": "5.0 - archive.py",
@@ -353,8 +355,13 @@
             //"args": ["-v", "1", "--auv_name", "brizo", "--start", "20250915T000000", "--end", "20250917T000000", "--noinput", "--num_cores", "1", "--no_cleanup"]
             // No nudged latitude and longitude variables - fixed as of 26 Nov 2025
             //"args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250915T015535/202509150155_202509151602.nc4", "--no_cleanup"]
-            // Plankitvore deployment for CeNCOOS Syncro
-            "args": ["-v", "1", "--auv_name", "ahi", "--start", "20250401T000000", "--end", "20250502T000000", "--noinput", "--num_cores", "1", "--no_cleanup"]
+            // Plankitvore deployment for CeNCOOS Syncro - whole month of April 2025
+            //"args": ["-v", "1", "--auv_name", "ahi", "--start", "20250401T000000", "--end", "20250502T000000", "--noinput", "--num_cores", "1", "--no_cleanup"]
+            // Fails with ValueError: different number of dimensions on data and dims: 2 vs 1 for wetlabsubat_digitized_raw_ad_counts variable
+            //"args": ["-v", "1", "--log_file", "pontus/missionlogs/2025/20250604_20250616/20250608T020852/202506080209_202506081934.nc4", "--no_cleanup"]
+            // Full month of June 2025 for Pontus with WetLabsUBAT Group data
+            //"args": ["-v", "1", "--auv_name", "pontus", "--start", "20250601T000000", "--end", "20250702T000000", "--noinput", "--num_cores", "1", "--no_cleanup"]
+            "args": ["-v", "1", "--auv_name", "pontus", "--start", "20250601T000000", "--end", "20250702T000000", "--noinput", "--num_cores", "1", "--no_cleanup", "--clobber"]
         },
 
     ]

diff --git a/src/data/align.py b/src/data/align.py
@@ -538,20 +538,24 @@ def process_combined(self) -> Path:  # noqa: C901, PLR0912, PLR0915
                 continue
 
             # Try to find the corresponding time coordinate
-            # Look for pattern: group_name + "_time"
-            possible_time_coords = []
-            for i in range(len(var_parts)):
-                group_candidate = "_".join(var_parts[: i + 1])
-                time_coord_candidate = f"{group_candidate}_time"
-                if time_coord_candidate in self.combined_nc:
-                    possible_time_coords.append((group_candidate, time_coord_candidate))
-
-            if not possible_time_coords:
+            # Check what time coordinate the variable actually uses
+            var_dims = self.combined_nc[variable].dims
+            var_time_coords = [dim for dim in var_dims if "time" in dim.lower()]
+
+            if not var_time_coords:
                 self.logger.warning("No time coordinate found for variable: %s", variable)
                 continue
 
-            # Use the longest matching group name (most specific)
-            group_name, timevar = max(possible_time_coords, key=lambda x: len(x[0]))
+            # Use the time coordinate that the variable actually has
+            timevar = var_time_coords[0]  # Should only be one time dimension
+            # Extract group name from time coordinate
+            if timevar.endswith("_time_60hz"):
+                group_name = timevar[:-10]  # Remove "_time_60hz" (10 chars)
+            elif timevar.endswith("_time"):
+                group_name = timevar[:-5]  # Remove "_time"
+            else:
+                group_name = timevar
+
             self.logger.debug(
                 "Processing %s with group %s and time %s", variable, group_name, timevar
             )

diff --git a/src/data/combine.py b/src/data/combine.py
@@ -561,6 +561,26 @@ def _create_data_array_for_variable(
             data_array.attrs = ds[orig_var].attrs.copy()
             data_array.attrs["units"] = "degrees"
             data_array.attrs["coordinates"] = f"{dim_name}"
+        elif len(ds[orig_var].dims) == 2:  # noqa: PLR2004
+            # Handle 2D arrays (time, array_index) - e.g. biolume_raw, digitized_raw_ad_counts_M
+            second_dim_name = ds[orig_var].dims[1]
+            second_dim_size = ds[orig_var].shape[1]
+            self.logger.debug(
+                "Reading 2 dimensional %s data arrays with shape %s",
+                orig_var,
+                ds[orig_var].shape,
+            )
+            data_array = xr.DataArray(
+                ds[orig_var].to_numpy(),
+                dims=[dim_name, second_dim_name],
+                coords={
+                    dim_name: time_coord_data,
+                    second_dim_name: np.arange(second_dim_size),
+                },
+            )
+            data_array.attrs = ds[orig_var].attrs.copy()
+            data_array.attrs["comment"] = f"{orig_var} from group {ds.attrs.get('group_name', '')}"
+            data_array.attrs["coordinates"] = f"{dim_name} {second_dim_name}"
         else:
             data_array = xr.DataArray(
                 ds[orig_var].to_numpy(),
@@ -625,6 +645,94 @@ def _add_consolidation_comment(self, time_info: dict) -> None:
                 f"Consolidated time coordinate from: {mapping_info}"
             )
 
+    def _expand_ubat_to_60hz(self) -> None:
+        """Expand UBAT digitized_raw_ad_counts 2D array into 60hz time series.
+
+        Replaces the 2D array with a 1D 60Hz time series, analogous to how
+        Dorado biolume_raw is stored with a time60hz coordinate.
+        """
+        ubat_var = "wetlabsubat_digitized_raw_ad_counts"
+
+        if ubat_var not in self.combined_nc:
+            self.logger.debug(
+                "No UBAT digitized_raw_ad_counts variable found, skipping 60hz expansion"
+            )
+            return
+
+        self.logger.info("Expanding UBAT %s to 60hz time series", ubat_var)
+
+        # Get the 2D array (time, sample_index)
+        ubat_2d = self.combined_nc[ubat_var]
+
+        if len(ubat_2d.dims) != 2:  # noqa: PLR2004
+            self.logger.warning("UBAT variable is not 2D, skipping 60hz expansion")
+            return
+
+        time_dim = ubat_2d.dims[0]
+        n_samples = ubat_2d.shape[1]
+
+        # Get the time coordinate
+        time_coord = self.combined_nc[time_dim]
+        n_times = len(time_coord)
+
+        # Save original attributes before removing
+        original_attrs = ubat_2d.attrs.copy()
+
+        # Calculate 60hz time offsets (assuming samples span 1 second)
+        # Each sample is 1/60th of a second apart
+        sample_offsets = np.arange(n_samples) / 60.0
+
+        # Create 60hz time series by adding offsets to each 1Hz time
+        time_60hz_list = []
+        for i in range(n_times):
+            base_time = time_coord.to_numpy()[i]
+            # Add offsets to create 60 timestamps per second
+            times_for_this_second = base_time + sample_offsets
+            time_60hz_list.append(times_for_this_second)
+
+        # Flatten the arrays
+        time_60hz = np.concatenate(time_60hz_list)
+        data_60hz = ubat_2d.to_numpy().flatten()
+
+        # Remove the old 2D variable
+        del self.combined_nc[ubat_var]
+
+        # Create new 60hz time coordinate with attributes
+        time_60hz_name = f"{time_dim}_60hz"
+        time_60hz_coord = xr.DataArray(
+            time_60hz,
+            dims=[time_60hz_name],
+            name=time_60hz_name,
+            attrs={
+                "units": "seconds since 1970-01-01T00:00:00Z",
+                "standard_name": "time",
+                "long_name": "Time at 60Hz sampling rate",
+            },
+        )
+
+        # Create replacement 1D variable with 60hz time coordinate
+        self.combined_nc[ubat_var] = xr.DataArray(
+            data_60hz,
+            coords={time_60hz_name: time_60hz_coord},
+            dims=[time_60hz_name],
+            name=ubat_var,
+        )
+
+        # Restore and update attributes
+        self.combined_nc[ubat_var].attrs = original_attrs
+        self.combined_nc[ubat_var].attrs["long_name"] = "UBAT digitized raw AD counts at 60Hz"
+        self.combined_nc[ubat_var].attrs["coordinates"] = time_60hz_name
+        self.combined_nc[ubat_var].attrs["comment"] = (
+            original_attrs.get("comment", "") + " Expanded from 2D to 1D 60Hz time series"
+        )
+
+        self.logger.info(
+            "Replaced 2D %s with 1D 60hz time series: %d samples from %d 1Hz records",
+            ubat_var,
+            len(data_60hz),
+            n_times,
+        )
+
     def _initial_coordinate_qc(self) -> None:
         """Perform initial QC on core coordinate variables for specific log files."""
         if self.log_file in (
@@ -784,6 +892,9 @@ def combine_groups(self) -> None:
                 # Collect variable coordinate mapping by group, which can be flattened
                 self.variable_time_coord_mapping.update(time_info["variable_time_coord_mapping"])
 
+        # Expand UBAT 2D arrays to 60hz time series
+        self._expand_ubat_to_60hz()
+
         # Write intermediate file for cf_xarray decoding
         intermediate_file = self._intermediate_write_netcdf()
         with xr.open_dataset(intermediate_file, decode_cf=True) as ds: