Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 10 additions & 3 deletions .vscode/launch.json
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,9 @@
//"args": ["--auv_name", "dorado", "--mission", "2020.337.00", "-v", "1"],
//"args": ["--auv_name", "dorado", "--mission", "2023.123.00", "-v", "1"],
//"args": ["-v", "2", "--log_file", "brizo/missionlogs/2025/20250916_20250922/20250916T230652/202509162306_202509180305.nc4"]
"args": ["--auv_name", "dorado", "--mission", "2025.316.02", "-v", "1"],
//"args": ["--auv_name", "dorado", "--mission", "2025.316.02", "-v", "1"],
//"args": ["-v", "1", "--log_file", "pontus/missionlogs/2025/20250604_20250616/20250608T020852/202506080209_202506081934.nc4"],
"args": ["-v", "1", "--log_file", "ahi/missionlogs/2025/20250414_20250418/20250414T205440/202504142054_202504150400.nc4"],
},
{
"name": "5.0 - archive.py",
Expand Down Expand Up @@ -353,8 +355,13 @@
//"args": ["-v", "1", "--auv_name", "brizo", "--start", "20250915T000000", "--end", "20250917T000000", "--noinput", "--num_cores", "1", "--no_cleanup"]
// No nudged latitude and longitude variables - fixed as of 26 Nov 2025
//"args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250915T015535/202509150155_202509151602.nc4", "--no_cleanup"]
// Plankitvore deployment for CeNCOOS Syncro
"args": ["-v", "1", "--auv_name", "ahi", "--start", "20250401T000000", "--end", "20250502T000000", "--noinput", "--num_cores", "1", "--no_cleanup"]
// Plankitvore deployment for CeNCOOS Syncro - whole month of April 2025
//"args": ["-v", "1", "--auv_name", "ahi", "--start", "20250401T000000", "--end", "20250502T000000", "--noinput", "--num_cores", "1", "--no_cleanup"]
// Fails with ValueError: different number of dimensions on data and dims: 2 vs 1 for wetlabsubat_digitized_raw_ad_counts variable
//"args": ["-v", "1", "--log_file", "pontus/missionlogs/2025/20250604_20250616/20250608T020852/202506080209_202506081934.nc4", "--no_cleanup"]
// Full month of June 2025 for Pontus with WetLabsUBAT Group data
//"args": ["-v", "1", "--auv_name", "pontus", "--start", "20250601T000000", "--end", "20250702T000000", "--noinput", "--num_cores", "1", "--no_cleanup"]
"args": ["-v", "1", "--auv_name", "pontus", "--start", "20250601T000000", "--end", "20250702T000000", "--noinput", "--num_cores", "1", "--no_cleanup", "--clobber"]
},

]
Expand Down
26 changes: 15 additions & 11 deletions src/data/align.py
Original file line number Diff line number Diff line change
Expand Up @@ -538,20 +538,24 @@ def process_combined(self) -> Path: # noqa: C901, PLR0912, PLR0915
continue

# Try to find the corresponding time coordinate
# Look for pattern: group_name + "_time"
possible_time_coords = []
for i in range(len(var_parts)):
group_candidate = "_".join(var_parts[: i + 1])
time_coord_candidate = f"{group_candidate}_time"
if time_coord_candidate in self.combined_nc:
possible_time_coords.append((group_candidate, time_coord_candidate))

if not possible_time_coords:
# Check what time coordinate the variable actually uses
var_dims = self.combined_nc[variable].dims
var_time_coords = [dim for dim in var_dims if "time" in dim.lower()]

if not var_time_coords:
self.logger.warning("No time coordinate found for variable: %s", variable)
continue

# Use the longest matching group name (most specific)
group_name, timevar = max(possible_time_coords, key=lambda x: len(x[0]))
# Use the time coordinate that the variable actually has
timevar = var_time_coords[0] # Should only be one time dimension
# Extract group name from time coordinate
if timevar.endswith("_time_60hz"):
group_name = timevar[:-10] # Remove "_time_60hz" (10 chars)
elif timevar.endswith("_time"):
group_name = timevar[:-5] # Remove "_time"
else:
group_name = timevar

self.logger.debug(
"Processing %s with group %s and time %s", variable, group_name, timevar
)
Expand Down
111 changes: 111 additions & 0 deletions src/data/combine.py
Original file line number Diff line number Diff line change
Expand Up @@ -561,6 +561,26 @@ def _create_data_array_for_variable(
data_array.attrs = ds[orig_var].attrs.copy()
data_array.attrs["units"] = "degrees"
data_array.attrs["coordinates"] = f"{dim_name}"
elif len(ds[orig_var].dims) == 2: # noqa: PLR2004
# Handle 2D arrays (time, array_index) - e.g. biolume_raw, digitized_raw_ad_counts_M
second_dim_name = ds[orig_var].dims[1]
second_dim_size = ds[orig_var].shape[1]
self.logger.debug(
"Reading 2 dimensional %s data arrays with shape %s",
orig_var,
ds[orig_var].shape,
)
data_array = xr.DataArray(
ds[orig_var].to_numpy(),
dims=[dim_name, second_dim_name],
coords={
dim_name: time_coord_data,
second_dim_name: np.arange(second_dim_size),
},
)
data_array.attrs = ds[orig_var].attrs.copy()
data_array.attrs["comment"] = f"{orig_var} from group {ds.attrs.get('group_name', '')}"
data_array.attrs["coordinates"] = f"{dim_name} {second_dim_name}"
else:
data_array = xr.DataArray(
ds[orig_var].to_numpy(),
Expand Down Expand Up @@ -625,6 +645,94 @@ def _add_consolidation_comment(self, time_info: dict) -> None:
f"Consolidated time coordinate from: {mapping_info}"
)

def _expand_ubat_to_60hz(self) -> None:
"""Expand UBAT digitized_raw_ad_counts 2D array into 60hz time series.

Replaces the 2D array with a 1D 60Hz time series, analogous to how
Dorado biolume_raw is stored with a time60hz coordinate.
"""
ubat_var = "wetlabsubat_digitized_raw_ad_counts"

if ubat_var not in self.combined_nc:
self.logger.debug(
"No UBAT digitized_raw_ad_counts variable found, skipping 60hz expansion"
)
return

self.logger.info("Expanding UBAT %s to 60hz time series", ubat_var)

# Get the 2D array (time, sample_index)
ubat_2d = self.combined_nc[ubat_var]

if len(ubat_2d.dims) != 2: # noqa: PLR2004
self.logger.warning("UBAT variable is not 2D, skipping 60hz expansion")
return

time_dim = ubat_2d.dims[0]
n_samples = ubat_2d.shape[1]

# Get the time coordinate
time_coord = self.combined_nc[time_dim]
n_times = len(time_coord)

# Save original attributes before removing
original_attrs = ubat_2d.attrs.copy()

# Calculate 60hz time offsets (assuming samples span 1 second)
# Each sample is 1/60th of a second apart
sample_offsets = np.arange(n_samples) / 60.0

# Create 60hz time series by adding offsets to each 1Hz time
time_60hz_list = []
for i in range(n_times):
base_time = time_coord.to_numpy()[i]
# Add offsets to create 60 timestamps per second
times_for_this_second = base_time + sample_offsets
time_60hz_list.append(times_for_this_second)

# Flatten the arrays
time_60hz = np.concatenate(time_60hz_list)
data_60hz = ubat_2d.to_numpy().flatten()

# Remove the old 2D variable
del self.combined_nc[ubat_var]

# Create new 60hz time coordinate with attributes
time_60hz_name = f"{time_dim}_60hz"
time_60hz_coord = xr.DataArray(
time_60hz,
dims=[time_60hz_name],
name=time_60hz_name,
attrs={
"units": "seconds since 1970-01-01T00:00:00Z",
"standard_name": "time",
"long_name": "Time at 60Hz sampling rate",
},
)

# Create replacement 1D variable with 60hz time coordinate
self.combined_nc[ubat_var] = xr.DataArray(
data_60hz,
coords={time_60hz_name: time_60hz_coord},
dims=[time_60hz_name],
name=ubat_var,
)

# Restore and update attributes
self.combined_nc[ubat_var].attrs = original_attrs
self.combined_nc[ubat_var].attrs["long_name"] = "UBAT digitized raw AD counts at 60Hz"
self.combined_nc[ubat_var].attrs["coordinates"] = time_60hz_name
self.combined_nc[ubat_var].attrs["comment"] = (
original_attrs.get("comment", "") + " Expanded from 2D to 1D 60Hz time series"
)

self.logger.info(
"Replaced 2D %s with 1D 60hz time series: %d samples from %d 1Hz records",
ubat_var,
len(data_60hz),
n_times,
)

def _initial_coordinate_qc(self) -> None:
"""Perform initial QC on core coordinate variables for specific log files."""
if self.log_file in (
Expand Down Expand Up @@ -784,6 +892,9 @@ def combine_groups(self) -> None:
# Collect variable coordinate mapping by group, which can be flattened
self.variable_time_coord_mapping.update(time_info["variable_time_coord_mapping"])

# Expand UBAT 2D arrays to 60hz time series
self._expand_ubat_to_60hz()

# Write intermediate file for cf_xarray decoding
intermediate_file = self._intermediate_write_netcdf()
with xr.open_dataset(intermediate_file, decode_cf=True) as ds:
Expand Down
Loading