Skip to content
19 changes: 11 additions & 8 deletions .vscode/launch.json
Original file line number Diff line number Diff line change
Expand Up @@ -59,13 +59,16 @@
// brizo 20250914T080941 has bad latitude and longitude values and lots of bad Universal latitude_time and longitude_time values
//"args": ["-v", "2", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109.nc4"]
//"args": ["-v", "2", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109.nc4", "--plot_time", "/longitude_time"]
//"args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109.nc4", "--plot_time", "/latitude_time"]
//"args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109.nc4", "--plot_universals"]
"args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109.nc4", "--plot_time", "/latitude_time"]
// brizo 20250916T230652 has several ESP Samples from stoqs_lrauv_sep2025
//"args": ["-v", "2", "--log_file", "brizo/missionlogs/2025/20250916_20250922/20250916T230652/202509162306_202509180305.nc4", "--plot_time", "/longitude_time"]
//"args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109_cleaned_by_quinn.nc4", "--plot_time", "/longitude_time"]
//"args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109_cleaned_by_quinn_latlon.nc4", "--plot_time", "/longitude_time"]
// Conflicting sizes for nudged_time and data - fixed by filtering GPS fixes to be monotonically increasing
"args": ["-v", "1", "--log_file", "tethys/missionlogs/2012/20120908_20120920/20120917T025522/201209170255_201209171110.nc4", "--plot_time", "/longitude_time"]
//"args": ["-v", "1", "--log_file", "tethys/missionlogs/2012/20120908_20120920/20120917T025522/201209170255_201209171110.nc4", "--plot_time", "/longitude_time"]
// Single spike in longitude_time
//"args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109.nc4", "--plot_time", "/NAL9602/longitude_fix_time"]
},
{
"name": "2.0 - calibrate.py",
Expand Down Expand Up @@ -122,11 +125,11 @@
"program": "${workspaceFolder}/src/data/combine.py",
"console": "integratedTerminal",
"justMyCode": false,
//"args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109.nc4"]
"args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109.nc4", "--plot"]
//"args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250916_20250922/20250916T230652/202509162306_202509180305.nc4", "--plot"]
//"args": ["-v", "1", "--log_file", "tethys/missionlogs/2012/20120908_20120920/20120909T010636/201209090106_201209091521.nc4", "--plot"]
// Conflicting sizes for nudged_time and data - fixed by filtering GPS fixes to be monotonically increasing
"args": ["-v", "1", "--log_file", "tethys/missionlogs/2012/20120908_20120920/20120917T025522/201209170255_201209171110.nc4", "--plot"]
//"args": ["-v", "1", "--log_file", "tethys/missionlogs/2012/20120908_20120920/20120917T025522/201209170255_201209171110.nc4", "--plot"]

},
{
Expand All @@ -149,8 +152,8 @@
//"args": ["-v", "1", "--auv_name", "dorado", "--mission", "2008.289.03"],
//"args": ["-v", "1", "--auv_name", "dorado", "--mission", "2023.192.01"],
//"args": ["-v", "1", "--auv_name", "dorado", "--mission", "2024.317.01"],
//"args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250916_20250922/20250916T230652/202509162306_202509180305.nc4"]
"args": ["-v", "1", "--auv_name", "dorado", "--mission", "2025.316.02"],
"args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250916_20250922/20250916T230652/202509162306_202509180305.nc4"]
//"args": ["-v", "1", "--auv_name", "dorado", "--mission", "2025.316.02"],
},
{
"name": "3.1 - align.py for LRAUV --log_file",
Expand Down Expand Up @@ -345,7 +348,7 @@
// Lots bad time values in brizo 20250914T080941 due to memory corruption on the vehicle
//"args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109.nc4"]
//"args": ["-v", "2", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109.nc4", "--clobber"]
//"args": ["-v", "2", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109.nc4", "--clobber", "--no_cleanup"]
"args": ["-v", "2", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109.nc4", "--clobber", "--no_cleanup"]
//"args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250916_20250922/20250916T230652/202509162306_202509180305.nc4", "--no_cleanup"]
//"args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250916_20250922/20250916T230652/202509162306_202509180305.nc4", "--no_cleanup", "--clobber"]
// Has different universals time coodinates for longitude/latitude and depth
Expand All @@ -360,7 +363,7 @@
// Fails with ValueError: different number of dimensions on data and dims: 2 vs 1 for wetlabsubat_digitized_raw_ad_counts variable
//"args": ["-v", "1", "--log_file", "pontus/missionlogs/2025/20250604_20250616/20250608T020852/202506080209_202506081934.nc4", "--no_cleanup", "--clobber"]
// Full month of June 2025 for Pontus with WetLabsUBAT Group data
"args": ["-v", "1", "--auv_name", "pontus", "--start", "20250601T000000", "--end", "20250721T000000", "--noinput", "--num_cores", "1", "--no_cleanup"]
//"args": ["-v", "1", "--auv_name", "pontus", "--start", "20250601T000000", "--end", "20250721T000000", "--noinput", "--num_cores", "1", "--no_cleanup"]
//"args": ["-v", "1", "--auv_name", "pontus", "--start", "20250601T000000", "--end", "20250702T000000", "--noinput", "--num_cores", "1", "--no_cleanup", "--clobber"]
//"args": ["-v", "1", "--log_file", "pontus/missionlogs/2025/20250623_20250707/20250707T043011/slate.nc4", "--no_cleanup"]
},
Expand Down
77 changes: 26 additions & 51 deletions src/data/align.py
Original file line number Diff line number Diff line change
Expand Up @@ -564,53 +564,10 @@ def process_combined(self) -> Path: # noqa: C901, PLR0912, PLR0915
var_time = self.combined_nc[variable].get_index(timevar).view(np.int64).tolist()

# Calculate sampling rate
sample_rate = np.round(
1.0 / (np.mean(np.diff(self.combined_nc[timevar])) / np.timedelta64(1, "s")),
decimals=2,
sample_rate = 1.0 / (
np.mean(np.diff(self.combined_nc[timevar])) / np.timedelta64(1, "s")
)

# Create interpolated coordinate variables for this group
coord_names = ["depth", "latitude", "longitude"]
coord_interps = [depth_interp, lat_interp, lon_interp]
coord_sources = [nav_coords["depth"], nav_coords["latitude"], nav_coords["longitude"]]

for coord_name, coord_interp, coord_source in zip(
coord_names, coord_interps, coord_sources, strict=True
):
coord_var_name = f"{group_name}_{coord_name}"

self.aligned_nc[coord_var_name] = xr.DataArray(
coord_interp(var_time).astype(np.float64).tolist(),
dims={timevar},
coords=[self.combined_nc[variable].get_index(timevar)],
name=coord_var_name,
)

# Copy attributes from source coordinate
if coord_source in self.combined_nc:
self.aligned_nc[coord_var_name].attrs = self.combined_nc[coord_source].attrs

# Update attributes
self.aligned_nc[coord_var_name].attrs["long_name"] = coord_name.title()
self.aligned_nc[coord_var_name].attrs["instrument_sample_rate_hz"] = sample_rate

if coord_name in ["longitude", "latitude", "depth"]:
self.aligned_nc[coord_var_name].attrs["comment"] = (
self.aligned_nc[coord_var_name].attrs.get("comment", "")
+ f". Variable {coord_source} from {src_file} file linearly"
f" interpolated onto {group_name} time values."
)

# Update spatial temporal bounds for global metadata
if pd.to_datetime(self.aligned_nc[timevar][0].values).tz_localize(UTC) < pd.to_datetime(
self.min_time
):
self.min_time = pd.to_datetime(self.aligned_nc[timevar][0].values).tz_localize(UTC)
if pd.to_datetime(self.aligned_nc[timevar][-1].values).tz_localize(
UTC
) > pd.to_datetime(self.max_time):
self.max_time = pd.to_datetime(self.aligned_nc[timevar][-1].values).tz_localize(UTC)

time_coord = variable_time_coord_mapping.get(variable)
depth_coord = (
time_coord[:-5] + "_depth"
Expand Down Expand Up @@ -639,7 +596,10 @@ def process_combined(self) -> Path: # noqa: C901, PLR0912, PLR0915
)
self.aligned_nc[depth_coord].attrs["long_name"] = "Depth"
self.aligned_nc[depth_coord].attrs["comment"] = "depth from Group_Universals.nc"
self.aligned_nc[depth_coord].attrs["instrument_sample_rate_hz"] = sample_rate
TINY_SAMPLE_RATE = 10e-2
self.aligned_nc[depth_coord].attrs["instrument_sample_rate_hz"] = (
f"{sample_rate:.2f}" if sample_rate > TINY_SAMPLE_RATE else f"{sample_rate:.6f}"
)

self.aligned_nc[lat_coord] = xr.DataArray(
lat_interp(var_time).astype(np.float64).tolist(),
Expand All @@ -649,7 +609,7 @@ def process_combined(self) -> Path: # noqa: C901, PLR0912, PLR0915
)
self.aligned_nc[lat_coord].attrs = self.combined_nc["nudged_latitude"].attrs
self.aligned_nc[lat_coord].attrs["comment"] += (
f". Variable nudged_latitude from {src_file} file linearly"
f". Variable nudged_latitude linearly"
f" interpolated onto {variable.split('_')[0]} time values."
)
self.aligned_nc[lat_coord].attrs["long_name"] = "Latitude"
Expand All @@ -663,13 +623,22 @@ def process_combined(self) -> Path: # noqa: C901, PLR0912, PLR0915
)
self.aligned_nc[lon_coord].attrs = self.combined_nc["nudged_longitude"].attrs
self.aligned_nc[lon_coord].attrs["comment"] += (
f". Variable nudged_longitude from {src_file} file linearly"
f". Variable nudged_longitude linearly"
f" interpolated onto {variable.split('_')[0]} time values."
)
self.aligned_nc[lon_coord].attrs["long_name"] = "Longitude"
self.aligned_nc[lon_coord].attrs["instrument_sample_rate_hz"] = sample_rate

# Update bounds using the interpolated coordinates
# Update spatial temporal bounds for global metadata
if pd.to_datetime(self.aligned_nc[timevar][0].values).tz_localize(UTC) < pd.to_datetime(
self.min_time
):
self.min_time = pd.to_datetime(self.aligned_nc[timevar][0].values).tz_localize(UTC)
if pd.to_datetime(self.aligned_nc[timevar][-1].values).tz_localize(
UTC
) > pd.to_datetime(self.max_time):
self.max_time = pd.to_datetime(self.aligned_nc[timevar][-1].values).tz_localize(UTC)

if self.aligned_nc[depth_coord].min() < self.min_depth:
self.min_depth = self.aligned_nc[depth_coord].min().to_numpy()
if self.aligned_nc[depth_coord].max() > self.max_depth:
Expand Down Expand Up @@ -703,8 +672,14 @@ def process_combined(self) -> Path: # noqa: C901, PLR0912, PLR0915
else:
self.logger.info("Skipping setting coordinates attribute for %s", variable)

self.logger.info("%s: instrument_sample_rate_hz = %.2f", variable, sample_rate)
self.aligned_nc[variable].attrs["instrument_sample_rate_hz"] = sample_rate
self.logger.info(
"%s: instrument_sample_rate_hz = %s",
variable,
f"{sample_rate:.2f}" if sample_rate > TINY_SAMPLE_RATE else f"{sample_rate:.6f}",
)
self.aligned_nc[variable].attrs["instrument_sample_rate_hz"] = (
f"{sample_rate:.2f}" if sample_rate > TINY_SAMPLE_RATE else f"{sample_rate:.6f}"
)

return netcdfs_dir

Expand Down
30 changes: 17 additions & 13 deletions src/data/combine.py
Original file line number Diff line number Diff line change
Expand Up @@ -381,7 +381,7 @@ def _biolume_process(self, sensor):
set_to_nan=True,
)

def _consolidate_group_time_coords(self, ds: xr.Dataset, group_name: str) -> dict:
def _cons_group_time_coords(self, ds: xr.Dataset, group_name: str) -> dict: # noqa: C901
"""Analyze and consolidate time coordinates for a group.

Returns:
Expand All @@ -391,8 +391,11 @@ def _consolidate_group_time_coords(self, ds: xr.Dataset, group_name: str) -> dic
- time_coord_mapping: dict mapping original dims to consolidated dims
- variable_time_coord_mapping: dict mapping variables to their time coords
"""
# Find all time variables in this group
time_vars = {var: ds[var] for var in ds.variables if var.lower().endswith("time")}
if group_name.lower() == "universals":
# Do not include the "time" record variable from universals group
time_vars = {var: ds[var] for var in ds.variables if var.lower().endswith("_time")}
else:
time_vars = {var: ds[var] for var in ds.variables if var.lower().endswith("time")}

if not time_vars:
return {
Expand All @@ -407,7 +410,7 @@ def _consolidate_group_time_coords(self, ds: xr.Dataset, group_name: str) -> dic
time_name = list(time_vars.keys())[0]
consolidated_name = f"{group_name}_time"
self.logger.info(
"Group %s: Single time coordinate '%s' - using as '%s'",
"Group %s: Single time coordinate '%s' -> '%s'",
group_name,
time_name,
consolidated_name,
Expand Down Expand Up @@ -475,7 +478,7 @@ def _consolidate_group_time_coords(self, ds: xr.Dataset, group_name: str) -> dic
time_coord_mapping = dict.fromkeys(time_vars, consolidated_name)

self.logger.info(
"%-65s %s",
"%-77s %s",
f"Consoliding {len(time_vars)} coordinates to",
consolidated_name,
)
Expand Down Expand Up @@ -512,15 +515,15 @@ def _consolidate_group_time_coords(self, ds: xr.Dataset, group_name: str) -> dic
def _add_time_coordinates_to_combined(self, time_info: dict, ds: xr.Dataset) -> None:
"""Add time coordinates to the combined dataset."""
if time_info["consolidated_time_name"]:
self._add_consolidated_time_coordinate(time_info)
self._add_cons_time_coord(time_info)
else:
self._add_separate_time_coordinates(time_info, ds)
self._add_sep_time_coord(time_info, ds)

def _add_consolidated_time_coordinate(self, time_info: dict) -> None:
def _add_cons_time_coord(self, time_info: dict) -> None:
"""Add a consolidated time coordinate to the combined dataset."""
time_name = time_info["consolidated_time_name"]
self.logger.info(
"Adding consolidated time coordinate %-45s %s",
"Adding consolidated time coordinate %-44s %s",
f"{time_name} as",
time_name,
)
Expand All @@ -531,7 +534,7 @@ def _add_consolidated_time_coordinate(self, time_info: dict) -> None:
)
self.combined_nc[time_name].attrs = time_info["consolidated_time_data"].attrs.copy()

def _add_separate_time_coordinates(self, time_info: dict, ds: xr.Dataset) -> None:
def _add_sep_time_coord(self, time_info: dict, ds: xr.Dataset) -> None:
"""Add separate time coordinates to the combined dataset."""
for orig_time_var, new_time_var in time_info["time_coord_mapping"].items():
self.logger.info(
Expand Down Expand Up @@ -603,7 +606,7 @@ def _add_time_metadata_to_variable(self, var_name: str, dim_name: str) -> None:
)
self.combined_nc[var_name].coords[dim_name].attrs["standard_name"] = "time"

def _process_group_variables(self, ds: xr.Dataset, group_name: str, time_info: dict) -> None:
def _process_group_vars(self, ds: xr.Dataset, group_name: str, time_info: dict) -> None:
"""Process all data variables in a group."""
for orig_var in ds.variables:
if orig_var.lower().endswith("time"):
Expand Down Expand Up @@ -877,18 +880,19 @@ def combine_groups(self) -> None:
self.combined_nc = xr.Dataset()

for group_file in group_files:
self.logger.info("-" * 110)
self.logger.info("Group file: %s", group_file.name)
# Open group file without decoding to have np.allclose work properly
with xr.open_dataset(group_file, decode_cf=False) as ds:
# Group name to prepend variable names is lowercase with underscores removed
group_name = group_file.stem.split(f"{GROUP}_")[1].replace("_", "").lower()
time_info = self._consolidate_group_time_coords(ds, group_name)
time_info = self._cons_group_time_coords(ds, group_name)

# Add time coordinate(s) to combined dataset
self._add_time_coordinates_to_combined(time_info, ds)

# Process all data variables in the group
self._process_group_variables(ds, group_name, time_info)
self._process_group_vars(ds, group_name, time_info)

# Add consolidation comment if applicable
self._add_consolidation_comment(time_info)
Expand Down
Loading