From faeea85bef3c36bc4c3f0f520c177ab845cc808c Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Fri, 28 Nov 2025 15:32:33 -0800 Subject: [PATCH 1/6] Add _expand_ubat_to_60hz() to save raw ubat data in the combined.nc file. --- .vscode/launch.json | 8 +- src/data/align.py | 26 ++--- src/data/combine.py | 111 +++++++++++++++++++++ src/data/test_process_lrauv.py | 170 +++++++++++++++++++++++++++++++++ 4 files changed, 302 insertions(+), 13 deletions(-) diff --git a/.vscode/launch.json b/.vscode/launch.json index 7b702f43..2bd38454 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -353,8 +353,12 @@ //"args": ["-v", "1", "--auv_name", "brizo", "--start", "20250915T000000", "--end", "20250917T000000", "--noinput", "--num_cores", "1", "--no_cleanup"] // No nudged latitude and longitude variables - fixed as of 26 Nov 2025 //"args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250915T015535/202509150155_202509151602.nc4", "--no_cleanup"] - // Plankitvore deployment for CeNCOOS Syncro - "args": ["-v", "1", "--auv_name", "ahi", "--start", "20250401T000000", "--end", "20250502T000000", "--noinput", "--num_cores", "1", "--no_cleanup"] + // Plankitvore deployment for CeNCOOS Syncro - whole month of April 2025 + //"args": ["-v", "1", "--auv_name", "ahi", "--start", "20250401T000000", "--end", "20250502T000000", "--noinput", "--num_cores", "1", "--no_cleanup"] + // Fails with ValueError: different number of dimensions on data and dims: 2 vs 1 for wetlabsubat_digitized_raw_ad_counts variable + "args": ["-v", "1", "--log_file", "pontus/missionlogs/2025/20250604_20250616/20250608T020852/202506080209_202506081934.nc4", "--no_cleanup"] + // Full month of June 2025 for Pontus with WetLabsUBAT Group data + //"args": ["-v", "1", "--auv_name", "pontus", "--start", "20250601T000000", "--end", "20250702T000000", "--noinput", "--num_cores", "1", "--no_cleanup"] }, ] diff --git a/src/data/align.py b/src/data/align.py index 38edc25d..d7747e41 100755 --- a/src/data/align.py +++ b/src/data/align.py @@ -538,20 +538,24 @@ def process_combined(self) -> Path: # noqa: C901, PLR0912, PLR0915 continue # Try to find the corresponding time coordinate - # Look for pattern: group_name + "_time" - possible_time_coords = [] - for i in range(len(var_parts)): - group_candidate = "_".join(var_parts[: i + 1]) - time_coord_candidate = f"{group_candidate}_time" - if time_coord_candidate in self.combined_nc: - possible_time_coords.append((group_candidate, time_coord_candidate)) - - if not possible_time_coords: + # Check what time coordinate the variable actually uses + var_dims = self.combined_nc[variable].dims + var_time_coords = [dim for dim in var_dims if "time" in dim.lower()] + + if not var_time_coords: self.logger.warning("No time coordinate found for variable: %s", variable) continue - # Use the longest matching group name (most specific) - group_name, timevar = max(possible_time_coords, key=lambda x: len(x[0])) + # Use the time coordinate that the variable actually has + timevar = var_time_coords[0] # Should only be one time dimension + # Extract group name from time coordinate + if timevar.endswith("_time_60hz"): + group_name = timevar[:-10] # Remove "_time_60hz" (10 chars) + elif timevar.endswith("_time"): + group_name = timevar[:-5] # Remove "_time" + else: + group_name = timevar + self.logger.debug( "Processing %s with group %s and time %s", variable, group_name, timevar ) diff --git a/src/data/combine.py b/src/data/combine.py index 7af50804..1aba0a49 100755 --- a/src/data/combine.py +++ b/src/data/combine.py @@ -561,6 +561,26 @@ def _create_data_array_for_variable( data_array.attrs = ds[orig_var].attrs.copy() data_array.attrs["units"] = "degrees" data_array.attrs["coordinates"] = f"{dim_name}" + elif len(ds[orig_var].dims) == 2: # noqa: PLR2004 + # Handle 2D arrays (time, array_index) - e.g. biolume_raw, digitized_raw_ad_counts_M + second_dim_name = ds[orig_var].dims[1] + second_dim_size = ds[orig_var].shape[1] + self.logger.debug( + "Reading 2 dimensional %s data arrays with shape %s", + orig_var, + ds[orig_var].shape, + ) + data_array = xr.DataArray( + ds[orig_var].to_numpy(), + dims=[dim_name, second_dim_name], + coords={ + dim_name: time_coord_data, + second_dim_name: np.arange(second_dim_size), + }, + ) + data_array.attrs = ds[orig_var].attrs.copy() + data_array.attrs["comment"] = f"{orig_var} from group {ds.attrs.get('group_name', '')}" + data_array.attrs["coordinates"] = f"{dim_name} {second_dim_name}" else: data_array = xr.DataArray( ds[orig_var].to_numpy(), @@ -625,6 +645,94 @@ def _add_consolidation_comment(self, time_info: dict) -> None: f"Consolidated time coordinate from: {mapping_info}" ) + def _expand_ubat_to_60hz(self) -> None: + """Expand UBAT digitized_raw_ad_counts 2D array into 60hz time series. + + Replaces the 2D array with a 1D 60Hz time series, analogous to how + Dorado biolume_raw is stored with a time60hz coordinate. + """ + ubat_var = "wetlabsubat_digitized_raw_ad_counts" + + if ubat_var not in self.combined_nc: + self.logger.debug( + "No UBAT digitized_raw_ad_counts variable found, skipping 60hz expansion" + ) + return + + self.logger.info("Expanding UBAT %s to 60hz time series", ubat_var) + + # Get the 2D array (time, sample_index) + ubat_2d = self.combined_nc[ubat_var] + + if len(ubat_2d.dims) != 2: # noqa: PLR2004 + self.logger.warning("UBAT variable is not 2D, skipping 60hz expansion") + return + + time_dim = ubat_2d.dims[0] + n_samples = ubat_2d.shape[1] + + # Get the time coordinate + time_coord = self.combined_nc[time_dim] + n_times = len(time_coord) + + # Save original attributes before removing + original_attrs = ubat_2d.attrs.copy() + + # Calculate 60hz time offsets (assuming samples span 1 second) + # Each sample is 1/60th of a second apart + sample_offsets = np.arange(n_samples) / 60.0 + + # Create 60hz time series by adding offsets to each 1Hz time + time_60hz_list = [] + for i in range(n_times): + base_time = time_coord.to_numpy()[i] + # Add offsets to create 60 timestamps per second + times_for_this_second = base_time + sample_offsets + time_60hz_list.append(times_for_this_second) + + # Flatten the arrays + time_60hz = np.concatenate(time_60hz_list) + data_60hz = ubat_2d.to_numpy().flatten() + + # Remove the old 2D variable + del self.combined_nc[ubat_var] + + # Create new 60hz time coordinate with attributes + time_60hz_name = f"{time_dim}_60hz" + time_60hz_coord = xr.DataArray( + time_60hz, + dims=[time_60hz_name], + name=time_60hz_name, + attrs={ + "units": "seconds since 1970-01-01T00:00:00Z", + "standard_name": "time", + "long_name": "Time at 60Hz sampling rate", + }, + ) + + # Create replacement 1D variable with 60hz time coordinate + self.combined_nc[ubat_var] = xr.DataArray( + data_60hz, + coords={time_60hz_name: time_60hz_coord}, + dims=[time_60hz_name], + name=ubat_var, + ) + + # Restore and update attributes + self.combined_nc[ubat_var].attrs = original_attrs + self.combined_nc[ubat_var].attrs["long_name"] = "UBAT digitized raw AD counts at 60Hz" + self.combined_nc[ubat_var].attrs["coordinates"] = time_60hz_name + self.combined_nc[ubat_var].attrs["comment"] = ( + original_attrs.get("comment", "") + " Expanded from 2D to 1D 60Hz time series" + ) + + self.logger.info( + "Replaced 2D %s with 1D 60hz time series: %d samples from %d 1Hz records", + ubat_var, + len(data_60hz), + n_times, + ) + def _initial_coordinate_qc(self) -> None: """Perform initial QC on core coordinate variables for specific log files.""" if self.log_file in ( @@ -784,6 +892,9 @@ def combine_groups(self) -> None: # Collect variable coordinate mapping by group, which can be flattened self.variable_time_coord_mapping.update(time_info["variable_time_coord_mapping"]) + # Expand UBAT 2D arrays to 60hz time series + self._expand_ubat_to_60hz() + # Write intermediate file for cf_xarray decoding intermediate_file = self._intermediate_write_netcdf() with xr.open_dataset(intermediate_file, decode_cf=True) as ds: diff --git a/src/data/test_process_lrauv.py b/src/data/test_process_lrauv.py index bf30d667..1b0c080b 100644 --- a/src/data/test_process_lrauv.py +++ b/src/data/test_process_lrauv.py @@ -143,3 +143,173 @@ def test_lrauv_full_pipeline(complete_lrauv_processing): # This would test the full pipeline but requires significant mocking # of calibration files, configuration, etc. pass # noqa: PIE790 + + +def test_lrauv_2d_array_variable_handling(tmp_path): + """Test that 2D array variables (time, array_index) are handled correctly in combine.py.""" + from combine import Combine_NetCDF + + # Create a minimal test that exercises the _create_data_array_for_variable method + # with a 2D variable + + # Create time array + time_vals = np.arange( + np.datetime64("2025-06-08T02:00:00"), + np.datetime64("2025-06-08T03:00:00"), + np.timedelta64(10, "s"), # 360 time points + ) + + # Create a mock dataset with a 2D variable + ds = xr.Dataset( + { + # 2D variable - 60 samples per time point (like biolume_raw) + "biolume_array": (["time", "sample"], np.random.uniform(0, 100, (len(time_vals), 60))), + # 1D variable for comparison + "temperature": (["time"], np.random.uniform(10, 15, len(time_vals))), + }, + coords={"time": time_vals}, + ) + + # Create a Combine_NetCDF instance (minimal setup) + combine = Combine_NetCDF( + log_file="test/test.nc4", + verbose=1, + ) + + # Mock the time coordinate data + time_coord_data = time_vals.astype("datetime64[ns]").astype("int64") / 1e9 + + # Test 1D variable (should work) + data_array_1d = combine._create_data_array_for_variable( + ds, "temperature", "test_time", time_coord_data + ) + assert len(data_array_1d.dims) == 1 # noqa: PLR2004, S101 + assert data_array_1d.dims[0] == "test_time" # noqa: S101 + + # Test 2D variable (this is what fails without the fix) + try: + data_array_2d = combine._create_data_array_for_variable( + ds, "biolume_array", "test_time", time_coord_data + ) + # After the fix, this should work + assert len(data_array_2d.dims) == 2 # noqa: PLR2004, S101 + assert "test_time" in data_array_2d.dims # noqa: S101 + assert data_array_2d.shape[1] == 60 # noqa: PLR2004, S101 # Second dimension should be 60 + except ValueError as e: + if "different number of dimensions" in str(e): + pytest.fail(f"2D array handling not implemented: {e}") + raise + + +def test_ubat_60hz_expansion(tmp_path): + """Test that UBAT 2D digitized_raw_ad_counts array is expanded to 60hz time series.""" + from combine import Combine_NetCDF + + # Create time array for 1Hz data + time_vals = np.arange( + np.datetime64("2025-06-08T02:00:00"), + np.datetime64("2025-06-08T02:00:10"), # 10 seconds + np.timedelta64(1, "s"), + ) + time_seconds = time_vals.astype("datetime64[ns]").astype("int64") / 1e9 + + # Create a Combine_NetCDF instance + combine = Combine_NetCDF( + log_file="test/test.nc4", + verbose=1, + ) + + # Create mock combined_nc with UBAT 2D data + combine.combined_nc = xr.Dataset( + { + "wetlabsubat_digitized_raw_ad_counts": ( + ["wetlabsubat_time", "sample"], + np.random.randint(0, 1000, (len(time_vals), 60)), + ), + }, + coords={"wetlabsubat_time": time_seconds}, + ) + + # Add attributes to match real data + combine.combined_nc["wetlabsubat_digitized_raw_ad_counts"].attrs = { + "long_name": "Digitized raw AD counts", + "comment": "Test UBAT data", + } + + # Call the expansion method + combine._expand_ubat_to_60hz() + + # Check that the original variable is now 1D with 60hz time coordinate + # (analogous to Dorado biolume_raw with TIME60HZ) + assert "wetlabsubat_digitized_raw_ad_counts" in combine.combined_nc # noqa: S101 + assert "wetlabsubat_time_60hz" in combine.combined_nc # noqa: S101 + + # Check dimensions - should now be 1D with 60hz time + ubat_var = combine.combined_nc["wetlabsubat_digitized_raw_ad_counts"] + assert len(ubat_var.dims) == 1 # noqa: PLR2004, S101 + assert ubat_var.dims[0] == "wetlabsubat_time_60hz" # noqa: S101 + + # Check shape - should have 60 samples per second, so 10 seconds * 60 = 600 samples + expected_samples = len(time_vals) * 60 # noqa: PLR2004 + assert len(ubat_var) == expected_samples # noqa: S101 + + # Check time coordinate has proper attributes + time_60hz = combine.combined_nc["wetlabsubat_time_60hz"] + assert time_60hz.attrs["units"] == "seconds since 1970-01-01T00:00:00Z" # noqa: S101 + assert time_60hz.attrs["standard_name"] == "time" # noqa: S101 + + # Check attributes were copied + assert "long_name" in ubat_var.attrs # noqa: S101 + assert "coordinates" in ubat_var.attrs # noqa: S101 + + +def _find_time_coordinate(variable: str, combined_nc_vars: dict) -> str: + """Helper to find time coordinate for a variable (mimics align.py logic).""" + var_parts = variable.split("_") + possible_time_coords = [] + + for i in range(len(var_parts)): + group_candidate = "_".join(var_parts[: i + 1]) + for suffix in ["_time", "_time_60hz"]: + time_coord = f"{group_candidate}{suffix}" + if time_coord in combined_nc_vars: + possible_time_coords.append((group_candidate, time_coord)) + + if not possible_time_coords: + return None + + # For 60hz variables, prefer 60hz time coordinates + has_60hz_time = any(tc[1].endswith("_60hz") for tc in possible_time_coords) + if variable.endswith("_60hz") and has_60hz_time: + time_60hz_coords = [(g, t) for g, t in possible_time_coords if t.endswith("_60hz")] + return max(time_60hz_coords, key=lambda x: len(x[0]))[1] + + # For regular variables, prefer non-60hz time coordinates + non_60hz_coords = [(g, t) for g, t in possible_time_coords if not t.endswith("_60hz")] + if non_60hz_coords: + return max(non_60hz_coords, key=lambda x: len(x[0]))[1] + + return max(possible_time_coords, key=lambda x: len(x[0]))[1] + + +def test_align_60hz_time_coordinate_matching(): + """Test that variables with 60hz time coordinates are matched correctly.""" + # Mock dataset with both regular and 60hz time coordinates + combined_nc_vars = { + "wetlabsubat_time": True, + "wetlabsubat_time_60hz": True, + } + + # Test 1: Regular variable should match regular time coordinate + timevar = _find_time_coordinate("wetlabsubat_flow_rate", combined_nc_vars) + assert timevar == "wetlabsubat_time" # noqa: S101 + assert not timevar.endswith("_60hz") # noqa: S101 + + # Test 2: UBAT variable (now 1D with 60hz time) should match 60hz time coordinate + # Note: After expansion in combine.py, wetlabsubat_digitized_raw_ad_counts + # has coordinate wetlabsubat_time_60hz (variable name has NO _60hz suffix) + timevar = _find_time_coordinate("wetlabsubat_digitized_raw_ad_counts", combined_nc_vars) + # This will match wetlabsubat_time (the regular one) because the variable name + # doesn't have _60hz suffix. The actual coordinate binding happens in align.py + # by reading the variable's coordinate, not by name matching. + assert timevar == "wetlabsubat_time" # noqa: S101 From d9438f360491bc1cb476f3aa4d23d5c2311b929c Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Mon, 1 Dec 2025 09:50:16 -0800 Subject: [PATCH 2/6] Remove "rename" from _PARMS dictionaries This change clearly keeps the original variable names, but in lower case. --- .vscode/launch.json | 3 +- src/data/nc42netcdfs.py | 161 +++++++++++++--------------------------- 2 files changed, 53 insertions(+), 111 deletions(-) diff --git a/.vscode/launch.json b/.vscode/launch.json index 2bd38454..ca7cd39c 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -191,7 +191,8 @@ //"args": ["--auv_name", "dorado", "--mission", "2020.337.00", "-v", "1"], //"args": ["--auv_name", "dorado", "--mission", "2023.123.00", "-v", "1"], //"args": ["-v", "2", "--log_file", "brizo/missionlogs/2025/20250916_20250922/20250916T230652/202509162306_202509180305.nc4"] - "args": ["--auv_name", "dorado", "--mission", "2025.316.02", "-v", "1"], + //"args": ["--auv_name", "dorado", "--mission", "2025.316.02", "-v", "1"], + "args": ["-v", "1", "--log_file", "pontus/missionlogs/2025/20250604_20250616/20250608T020852/202506080209_202506081934.nc4"], }, { "name": "5.0 - archive.py", diff --git a/src/data/nc42netcdfs.py b/src/data/nc42netcdfs.py index b188c234..f3105418 100755 --- a/src/data/nc42netcdfs.py +++ b/src/data/nc42netcdfs.py @@ -39,117 +39,79 @@ SCI_PARMS = { "/": [ - { - "name": "concentration_of_colored_dissolved_organic_matter_in_sea_water", - "rename": "colored_dissolved_organic_matter", - }, - {"name": "longitude", "rename": "longitude"}, - {"name": "latitude", "rename": "latitude"}, - {"name": "depth", "rename": "depth"}, - {"name": "time", "rename": "time"}, + {"name": "concentration_of_colored_dissolved_organic_matter_in_sea_water"}, + {"name": "longitude"}, + {"name": "latitude"}, + {"name": "depth"}, + {"name": "time"}, ], - "Aanderaa_O2": [{"name": "mass_concentration_of_oxygen_in_sea_water", "rename": "oxygen"}], + "Aanderaa_O2": [{"name": "mass_concentration_of_oxygen_in_sea_water"}], "CTD_NeilBrown": [ - {"name": "sea_water_salinity", "rename": "salinity"}, - {"name": "sea_water_temperature", "rename": "temperature"}, + {"name": "sea_water_salinity"}, + {"name": "sea_water_temperature"}, ], "CTD_Seabird": [ - {"name": "sea_water_salinity", "rename": "salinity"}, - {"name": "sea_water_temperature", "rename": "temperature"}, - { - "name": "mass_concentration_of_oxygen_in_sea_water", - "rename": "mass_concentration_of_oxygen_in_sea_water", - }, + {"name": "sea_water_salinity"}, + {"name": "sea_water_temperature"}, + {"name": "mass_concentration_of_oxygen_in_sea_water"}, ], - "ISUS": [{"name": "mole_concentration_of_nitrate_in_sea_water", "rename": "nitrate"}], - "PAR_Licor": [{"name": "downwelling_photosynthetic_photon_flux_in_sea_water", "rename": "PAR"}], + "ISUS": [{"name": "mole_concentration_of_nitrate_in_sea_water"}], + "PAR_Licor": [{"name": "downwelling_photosynthetic_photon_flux_in_sea_water"}], "WetLabsBB2FL": [ - {"name": "mass_concentration_of_chlorophyll_in_sea_water", "rename": "chlorophyll"}, - {"name": "OutputChl", "rename": "chl"}, - {"name": "Output470", "rename": "bbp470"}, - {"name": "Output650", "rename": "bbp650"}, - {"name": "VolumeScatCoeff117deg470nm", "rename": "volumescatcoeff117deg470nm"}, - {"name": "VolumeScatCoeff117deg650nm", "rename": "volumescatcoeff117deg650nm"}, - { - "name": "ParticulateBackscatteringCoeff470nm", - "rename": "particulatebackscatteringcoeff470nm", - }, - { - "name": "ParticulateBackscatteringCoeff650nm", - "rename": "particulatebackscatteringcoeff650nm", - }, + {"name": "mass_concentration_of_chlorophyll_in_sea_water"}, + {"name": "OutputChl"}, + {"name": "Output470"}, + {"name": "Output650"}, + {"name": "VolumeScatCoeff117deg470nm"}, + {"name": "VolumeScatCoeff117deg650nm"}, + {"name": "ParticulateBackscatteringCoeff470nm"}, + {"name": "ParticulateBackscatteringCoeff650nm"}, ], "WetLabsSeaOWL_UV_A": [ - { - "name": "concentration_of_chromophoric_dissolved_organic_matter_in_sea_water", - "rename": "chromophoric_dissolved_organic_matter", - }, - {"name": "mass_concentration_of_chlorophyll_in_sea_water", "rename": "chlorophyll"}, - {"name": "BackscatteringCoeff700nm", "rename": "BackscatteringCoeff700nm"}, - {"name": "VolumeScatCoeff117deg700nm", "rename": "VolumeScatCoeff117deg700nm"}, - { - "name": "mass_concentration_of_petroleum_hydrocarbons_in_sea_water", - "rename": "petroleum_hydrocarbons", - }, + {"name": "concentration_of_chromophoric_dissolved_organic_matter_in_sea_water"}, + {"name": "mass_concentration_of_chlorophyll_in_sea_water"}, + {"name": "BackscatteringCoeff700nm"}, + {"name": "VolumeScatCoeff117deg700nm"}, + {"name": "mass_concentration_of_petroleum_hydrocarbons_in_sea_water"}, ], "WetLabsUBAT": [ - {"name": "average_bioluminescence", "rename": "average_bioluminescence"}, - {"name": "flow_rate", "rename": "ubat_flow_rate"}, - {"name": "digitized_raw_ad_counts", "rename": "digitized_raw_ad_counts"}, + {"name": "average_bioluminescence"}, + {"name": "flow_rate"}, + {"name": "digitized_raw_ad_counts"}, ], } ENG_PARMS = { "BPC1": [ - {"name": "platform_battery_charge", "rename": "health_platform_battery_charge"}, - {"name": "platform_battery_voltage", "rename": "health_platform_average_voltage"}, - ], - "BuoyancyServo": [ - {"name": "platform_buoyancy_position", "rename": "control_inputs_buoyancy_position"} + {"name": "platform_battery_charge"}, + {"name": "platform_battery_voltage"}, ], + "BuoyancyServo": [{"name": "platform_buoyancy_position"}], "DeadReckonUsingMultipleVelocitySources": [ - { - "name": "fix_residual_percent_distance_traveled", - "rename": ( - "fix_residual_percent_distance_traveled_DeadReckonUsingMultipleVelocitySources" - ), - }, - {"name": "longitude", "rename": "pose_longitude_DeadReckonUsingMultipleVelocitySources"}, - {"name": "latitude", "rename": "pose_latitude_DeadReckonUsingMultipleVelocitySources"}, - {"name": "depth", "rename": "pose_depth_DeadReckonUsingMultipleVelocitySources"}, + {"name": "fix_residual_percent_distance_traveled"}, + {"name": "longitude"}, + {"name": "latitude"}, + {"name": "depth"}, ], "DeadReckonUsingSpeedCalculator": [ - { - "name": "fix_residual_percent_distance_traveled", - "rename": "fix_residual_percent_distance_traveled_DeadReckonUsingSpeedCalculator", - }, - {"name": "longitude", "rename": "pose_longitude_DeadReckonUsingSpeedCalculator"}, - {"name": "latitude", "rename": "pose_latitude_DeadReckonUsingSpeedCalculator"}, - {"name": "depth", "rename": "pose_depth_DeadReckonUsingSpeedCalculator"}, + {"name": "fix_residual_percent_distance_traveled"}, + {"name": "longitude"}, + {"name": "latitude"}, + {"name": "depth"}, ], - "ElevatorServo": [ - {"name": "platform_elevator_angle", "rename": "control_inputs_elevator_angle"} - ], - "MassServo": [{"name": "platform_mass_position", "rename": "control_inputs_mass_position"}], + "ElevatorServo": [{"name": "platform_elevator_angle"}], + "MassServo": [{"name": "platform_mass_position"}], "NAL9602": [ - {"name": "time_fix", "rename": "fix_time"}, - {"name": "latitude_fix", "rename": "fix_latitude"}, - {"name": "longitude_fix", "rename": "fix_longitude"}, - ], - "Onboard": [{"name": "platform_average_current", "rename": "health_platform_average_current"}], - "RudderServo": [{"name": "platform_rudder_angle", "rename": "control_inputs_rudder_angle"}], - "ThrusterServo": [ - { - "name": "platform_propeller_rotation_rate", - "rename": "control_inputs_propeller_rotation_rate", - } + {"name": "time_fix"}, + {"name": "latitude_fix"}, + {"name": "longitude_fix"}, ], + "Onboard": [{"name": "platform_average_current"}], + "RudderServo": [{"name": "platform_rudder_angle"}], + "ThrusterServo": [{"name": "platform_propeller_rotation_rate"}], "CurrentEstimator": [ - { - "name": "current_direction_navigation_frame", - "rename": "current_direction_navigation_frame", - }, - {"name": "current_speed_navigation_frame", "rename": "current_speed_navigation_frame"}, + {"name": "current_direction_navigation_frame"}, + {"name": "current_speed_navigation_frame"}, ], } @@ -192,16 +154,6 @@ def __init__( # noqa: PLR0913 self.verbose = verbose self.commandline = commandline - def show_variable_mapping(self): - """Show the variable mapping.""" - for group, parms in sorted(SCIENG_PARMS.items()): - print(f"Group: {group}") # noqa: T201 - for parm in parms: - name = parm.get("name", "N/A") - rename = parm.get("rename", "N/A") - print(f" {name} -> {rename}") # noqa: T201 - print() # noqa: T201 - def download_with_pooch(self, url, local_dir, known_hash=None): """Download using pooch with caching and verification.""" downloader = pooch.HTTPDownloader(timeout=(60, 300), progressbar=True) @@ -1197,13 +1149,6 @@ def process_command_line(self): "d1235ead55023bea05e9841465d54a45dfab007a283320322e28b84438fb8a85" ), ) - ( - parser.add_argument( - "--show_variable_mapping", - action="store_true", - help="Show the variable mapping: Group/variable_names -> their_renames", - ), - ) parser.add_argument( "--plot_time", action="store", @@ -1229,8 +1174,4 @@ def process_command_line(self): if __name__ == "__main__": extract = Extract() extract.process_command_line() - if extract.args.show_variable_mapping: - extract.show_variable_mapping() - sys.exit(0) - else: - extract.extract_groups_to_files_netcdf4(extract.args.log_file) + extract.extract_groups_to_files_netcdf4(extract.args.log_file) From 678bb42881887fe4c365264e0175a30617c728e7 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Mon, 1 Dec 2025 20:23:46 -0800 Subject: [PATCH 3/6] Add add_wetlabsubat_proxies() to compute biolume proxies for lrauv data. Also added _find_lat_lon_variables() for finding nav whether its dorado or lrauv. Also added test for lrauv ubat processing. --- .vscode/launch.json | 4 +- src/data/test_process_lrauv.py | 102 +++++++++++++++++++++++++++++++++ 2 files changed, 104 insertions(+), 2 deletions(-) diff --git a/.vscode/launch.json b/.vscode/launch.json index ca7cd39c..bb55b3d5 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -357,9 +357,9 @@ // Plankitvore deployment for CeNCOOS Syncro - whole month of April 2025 //"args": ["-v", "1", "--auv_name", "ahi", "--start", "20250401T000000", "--end", "20250502T000000", "--noinput", "--num_cores", "1", "--no_cleanup"] // Fails with ValueError: different number of dimensions on data and dims: 2 vs 1 for wetlabsubat_digitized_raw_ad_counts variable - "args": ["-v", "1", "--log_file", "pontus/missionlogs/2025/20250604_20250616/20250608T020852/202506080209_202506081934.nc4", "--no_cleanup"] + //"args": ["-v", "1", "--log_file", "pontus/missionlogs/2025/20250604_20250616/20250608T020852/202506080209_202506081934.nc4", "--no_cleanup"] // Full month of June 2025 for Pontus with WetLabsUBAT Group data - //"args": ["-v", "1", "--auv_name", "pontus", "--start", "20250601T000000", "--end", "20250702T000000", "--noinput", "--num_cores", "1", "--no_cleanup"] + "args": ["-v", "1", "--auv_name", "pontus", "--start", "20250601T000000", "--end", "20250702T000000", "--noinput", "--num_cores", "1", "--no_cleanup"] }, ] diff --git a/src/data/test_process_lrauv.py b/src/data/test_process_lrauv.py index 1b0c080b..06e298b2 100644 --- a/src/data/test_process_lrauv.py +++ b/src/data/test_process_lrauv.py @@ -1,6 +1,7 @@ # noqa: INP001 import numpy as np +import pandas as pd import pytest import xarray as xr @@ -313,3 +314,104 @@ def test_align_60hz_time_coordinate_matching(): # doesn't have _60hz suffix. The actual coordinate binding happens in align.py # by reading the variable's coordinate, not by name matching. assert timevar == "wetlabsubat_time" # noqa: S101 + + +def test_wetlabsubat_proxy_processing_with_realistic_coordinates(tmp_path): + """Test add_wetlabsubat_proxies with realistic LRAUV coordinate variable names. + + Real LRAUV data has instrument-prefixed coordinates like: + - parlicor_latitude, parlicor_longitude + - massservo_latitude, massservo_longitude + - nudged_latitude, nudged_longitude + - onboard_latitude, onboard_longitude + - wetlabsubat_latitude, wetlabsubat_longitude + + But NOT navigation_latitude/navigation_longitude (which exist in Dorado data). + This test ensures the coordinate lookup doesn't fail when navigation_* are missing. + """ + from resample import Resampler + + # Create time arrays + time_vals = pd.date_range("2025-06-08 02:00:00", periods=3600, freq="1s") # 1 hour + time_60hz_vals = pd.date_range("2025-06-08 02:00:00", periods=3600 * 60, freq="16666667ns") + + # Create a mock dataset with realistic LRAUV structure + # Key: NO navigation_latitude/navigation_longitude variables + ds = xr.Dataset( + { + # UBAT 60Hz raw data (after expansion from 2D to 1D) + "wetlabsubat_digitized_raw_ad_counts": ( + ["wetlabsubat_time_60hz"], + np.random.randint(200, 800, len(time_60hz_vals)), + ), + # Regular 1Hz variables + "wetlabsubat_flow_rate": ( + ["wetlabsubat_time"], + np.full(len(time_vals), 350.0), + ), + "wetlabsbb2fl_fluorescence": ( + ["wetlabsbb2fl_time"], + np.random.uniform(0, 5, len(time_vals)), + ), + # Realistic coordinate variables - instrument-prefixed, NO navigation_* + "nudged_latitude": (["nudged_time"], np.full(len(time_vals), 36.8)), + "nudged_longitude": (["nudged_time"], np.full(len(time_vals), -122.0)), + "onboard_latitude": (["onboard_time"], np.full(len(time_vals), 36.8)), + "onboard_longitude": (["onboard_time"], np.full(len(time_vals), -122.0)), + "wetlabsubat_latitude": ( + ["wetlabsubat_time"], + np.full(len(time_vals), 36.8), + ), + "wetlabsubat_longitude": ( + ["wetlabsubat_time"], + np.full(len(time_vals), -122.0), + ), + }, + coords={ + "wetlabsubat_time": time_vals.to_numpy(), + "wetlabsubat_time_60hz": time_60hz_vals.to_numpy(), + "wetlabsbb2fl_time": time_vals.to_numpy(), + "nudged_time": time_vals.to_numpy(), + "onboard_time": time_vals.to_numpy(), + }, + ) + + # Add attributes + ds["wetlabsubat_digitized_raw_ad_counts"].attrs = { + "long_name": "Digitized raw AD counts", + "units": "counts", + } + ds["nudged_latitude"].attrs = {"standard_name": "latitude", "units": "degrees_north"} + ds["nudged_longitude"].attrs = {"standard_name": "longitude", "units": "degrees_east"} + + # Create Resampler instance + resampler = Resampler( + auv_name="pontus", + log_file=None, + freq="1S", + verbose=0, + ) + + # Set the dataset + resampler.ds = ds + resampler.df_r = pd.DataFrame(index=time_vals) + + # Create mock resampled_nc (would normally be created by resample_variable) + resampler.resampled_nc = xr.Dataset(coords={"time": time_vals.to_numpy()}) + resampler.resampled_nc["wetlabsbb2fl_fluorescence"] = ( + ["time"], + np.random.uniform(0, 5, len(time_vals)), + ) + + # This should NOT raise KeyError for navigation_latitude/navigation_longitude + # The method should find nudged_latitude/longitude or another available coordinate + try: + resampler.add_wetlabsubat_proxies(freq="1S") + # If we get here, the coordinate lookup worked + assert True # noqa: S101 + except KeyError as e: + if "navigation_latitude" in str(e) or "navigation_longitude" in str(e): + pytest.fail( + f"Coordinate lookup failed - should find alternative to navigation_* variables: {e}" + ) + raise From f2da1aa3ba2dddded100d5d66ab4a70053e7fcfc Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Mon, 1 Dec 2025 20:24:29 -0800 Subject: [PATCH 4/6] Add add_wetlabsubat_proxies() to compute biolume proxies for lrauv data. Also added _find_lat_lon_variables() for finding nav whether its dorado or lrauv. Also added test for lrauv ubat processing. --- src/data/resample.py | 421 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 415 insertions(+), 6 deletions(-) diff --git a/src/data/resample.py b/src/data/resample.py index 08b859c5..1d467b9d 100755 --- a/src/data/resample.py +++ b/src/data/resample.py @@ -488,11 +488,14 @@ def select_nighttime_bl_raw( sunsets: A list of sunset times for each night. sunrises: A list of sunrise times for each night. """ - lat = float(self.ds["navigation_latitude"].median()) - lon = float(self.ds["navigation_longitude"].median()) + lat_var, lon_var = self._find_lat_lon_variables() + lat = float(self.ds[lat_var].median()) + lon = float(self.ds[lon_var].median()) self.logger.debug("Getting sun altitudes for nighttime selection") sun_alts = [] - for ts in self.ds["navigation_time"].to_numpy()[::stride]: + # Get the time coordinate for the latitude variable + time_coord = self.ds[lat_var].dims[0] + for ts in self.ds[time_coord].to_numpy()[::stride]: # About 10-minute resolution from 5 Hz navigation data sun_alts.append( # noqa: PERF401 get_altitude( @@ -504,9 +507,7 @@ def select_nighttime_bl_raw( # Find sunset and sunrise - where sun altitude changes sign sign_changes = np.where(np.diff(np.sign(sun_alts)))[0] - ss_sr_times = ( - self.ds["navigation_time"].isel({"navigation_time": sign_changes * stride}).to_numpy() - ) + ss_sr_times = self.ds[time_coord].isel({time_coord: sign_changes * stride}).to_numpy() self.logger.debug("Sunset and sunrise times: %s", ss_sr_times) sunsets = [] @@ -547,6 +548,43 @@ def select_nighttime_bl_raw( self.logger.info("No sunset or sunrise found during this mission.") return nighttime_bl_raw, sunsets, sunrises + def _find_lat_lon_variables(self) -> tuple[str, str]: + """Find latitude and longitude variables in the dataset. + + Searches for variables ending in _latitude and _longitude. + Prefers navigation_, nudged_, or onboard_ prefixes in that order. + + Returns: + tuple: (lat_var_name, lon_var_name) + + Raises: + KeyError: If no latitude/longitude variables are found + """ + lat_vars = [v for v in self.ds.variables if v.endswith("_latitude")] + lon_vars = [v for v in self.ds.variables if v.endswith("_longitude")] + + if not lat_vars or not lon_vars: + msg = ( + f"No latitude/longitude variables found. " + f"Available variables: {list(self.ds.variables.keys())}" + ) + raise KeyError(msg) + + # Prefer navigation_, then nudged_, then onboard_, then any other + for prefix in ["navigation_", "nudged_", "onboard_"]: + for lat_var in lat_vars: + if lat_var.startswith(prefix): + lon_var = prefix + "longitude" + if lon_var in lon_vars: + self.logger.debug("Using %s and %s for coordinates", lat_var, lon_var) + return lat_var, lon_var + + # Fall back to first available pair + lat_var = lat_vars[0] + lon_var = lon_vars[0] + self.logger.info("Using first available coordinates: %s and %s", lat_var, lon_var) + return lat_var, lon_var + def add_profile(self, depth_threshold: float) -> None: # Find depth vertices value using scipy's find_peaks algorithm options = {"prominence": 10, "width": 30} @@ -860,6 +898,345 @@ def add_biolume_proxies( # noqa: PLR0913, PLR0915 return fluo, sunsets, sunrises + def add_wetlabsubat_proxies( # noqa: PLR0913, PLR0915, C901, PLR0912 + self, + freq, + window_size_secs: int = 5, + envelope_mini: float = 1.5e10, + flash_threshold: float = FLASH_THRESHOLD, + proxy_ratio_adinos: float = 3.9811e13, # Default value for LRAUV + proxy_cal_factor: float = 0.00470, # Default value for LRAUV + ) -> tuple[pd.Series, list[datetime], list[datetime]]: + """Add biolume proxy variables computed from wetlabsubat_digitized_raw_ad_counts. + + This is parallel to add_biolume_proxies() but for LRAUV wetlabsubat data. + Computations follow Appendix B in Messie et al. 2019. + https://www.sciencedirect.com/science/article/pii/S0079661118300478 + """ + self.logger.info( + "Adding wetlabsubat proxy variables computed from wetlabsubat_digitized_raw_ad_counts" + ) + sample_rate = 60 # Assume all digitized_raw_ad_counts data is sampled at 60 Hz + window_size = window_size_secs * sample_rate + + # s_ubat_raw includes daytime data - see below for nighttime data + s_ubat_raw = self.ds["wetlabsubat_digitized_raw_ad_counts"].to_pandas().dropna() + + # Compute background biolumenesence envelope + self.logger.debug("Applying rolling min filter") + min_bg_unsmoothed = s_ubat_raw.rolling( + window_size, + min_periods=0, + center=True, + ).min() + min_bg = ( + min_bg_unsmoothed.rolling(window_size, min_periods=0, center=True).mean().to_numpy() + ) + + self.logger.debug("Applying rolling median filter") + med_bg_unsmoothed = s_ubat_raw.rolling( + window_size, + min_periods=0, + center=True, + ).median() + s_med_bg = med_bg_unsmoothed.rolling( + window_size, + min_periods=0, + center=True, + ).mean() + med_bg = s_med_bg.to_numpy() + max_bg = med_bg * 2.0 - min_bg + # envelope_mini: minimum value for the envelope (max_bgrd - med_bgrd) + # to avoid very dim flashes when the background is low + max_bg[max_bg - med_bg < envelope_mini] = ( + med_bg[max_bg - med_bg < envelope_mini] + envelope_mini + ) + + # Find the high and low peaks + self.logger.debug("Finding peaks") + peaks, _ = signal.find_peaks(s_ubat_raw, height=max_bg) + s_peaks = pd.Series(s_ubat_raw.iloc[peaks], index=s_ubat_raw.index[peaks]) + s_med_bg_peaks = pd.Series(s_med_bg.iloc[peaks], index=s_ubat_raw.index[peaks]) + if self.flash_threshold: + flash_threshold = self.flash_threshold + flash_threshold_note = f"Computed with flash_threshold = {flash_threshold:.0e}" + self.logger.info("Using flash_threshold = %.4e", flash_threshold) + nbflash_high = s_peaks[s_peaks > (s_med_bg_peaks + flash_threshold)] + nbflash_low = s_peaks[s_peaks <= (s_med_bg_peaks + flash_threshold)] + + # Construct full time series of flashes with NaNs for non-flash values + s_nbflash_high = pd.Series(np.nan, index=s_ubat_raw.index) + s_nbflash_high.loc[nbflash_high.index] = nbflash_high + s_nbflash_low = pd.Series(np.nan, index=s_ubat_raw.index) + s_nbflash_low.loc[nbflash_low.index] = nbflash_low + + # Count the number of flashes per second - use 15 second window stepping every second + flash_count_seconds = 15 + flash_window = flash_count_seconds * sample_rate + self.logger.debug("Counting flashes using %d second window", flash_count_seconds) + nbflash_high_counts = ( + s_nbflash_high.rolling(flash_window, step=1, min_periods=0, center=True) + .count() + .resample(freq.lower()) + .mean() + / flash_count_seconds + ) + nbflash_low_counts = ( + s_nbflash_low.rolling(flash_window, step=1, min_periods=0, center=True) + .count() + .resample(freq.lower()) + .mean() + / flash_count_seconds + ) + + # Get flow data - try both flow_rate and flow variable names + flow = None + if "wetlabsubat_flow_rate" in self.ds: + flow = ( + self.ds[["wetlabsubat_flow_rate"]]["wetlabsubat_flow_rate"] + .to_pandas() + .resample("1s") + .mean() + .ffill() + ) + self.logger.info("Using wetlabsubat_flow_rate for flow calculations") + elif "wetlabsubat_flow" in self.ds: + flow = ( + self.ds[["wetlabsubat_flow"]]["wetlabsubat_flow"] + .to_pandas() + .resample("1s") + .mean() + .ffill() + ) + self.logger.info("Using wetlabsubat_flow for flow calculations") + + # Flow sensor is not always on or may not be present, fill in 0.0 values with 350 ml/s + zero_note = "" + if flow is None: + self.logger.info("No flow data found - using constant 350 ml/s") + # Create flow series with same index as resampled data + flow = pd.Series(350.0, index=nbflash_high_counts.index) + zero_note = "No flow data available - used constant 350 ml/s" + else: + num_zero_flow = len(np.where(flow == 0)[0]) + if num_zero_flow > 0: + zero_note = ( + f"Zero flow values found: {num_zero_flow} of {len(flow)} " + f"- replaced with 350 ml/s" + ) + self.logger.info(zero_note) + flow = flow.replace(0.0, 350.0) + + # Compute flashes per liter - pandas.Series.divide() will match indexes + # Units: flashes per liter = (flashes per second / mL/s) * 1000 mL/L + self.logger.info( + "Computing flashes per liter: wetlabsubat_nbflash_high, wetlabsubat_nbflash_low" + ) + self.df_r["wetlabsubat_nbflash_high"] = nbflash_high_counts.divide(flow) * 1000 + self.df_r["wetlabsubat_nbflash_high"].attrs["long_name"] = ( + "High intensity flashes (copepods proxy)" + ) + self.df_r["wetlabsubat_nbflash_high"].attrs["units"] = "flashes/liter" + self.df_r["wetlabsubat_nbflash_high"].attrs["comment"] = ( + f"{zero_note} - {flash_threshold_note}" + ) + + self.df_r["wetlabsubat_nbflash_low"] = nbflash_low_counts.divide(flow) * 1000 + self.df_r["wetlabsubat_nbflash_low"].attrs["long_name"] = ( + "Low intensity flashes (Larvacean proxy)" + ) + self.df_r["wetlabsubat_nbflash_low"].attrs["units"] = "flashes/liter" + self.df_r["wetlabsubat_nbflash_low"].attrs["comment"] = ( + f"{zero_note} - {flash_threshold_note}" + ) + + # Flash intensity - proxy for small jellies - for entire mission, not just nightime + all_raw = self.ds[["wetlabsubat_digitized_raw_ad_counts"]][ + "wetlabsubat_digitized_raw_ad_counts" + ].to_pandas() + med_bg_60 = pd.Series( + np.interp(all_raw.index, s_med_bg.index, med_bg), + index=all_raw.index, + ) + intflash = ( + (all_raw - med_bg_60) + .rolling(flash_window, min_periods=0, center=True) + .max() + .resample("1s") + .mean() + ) + self.logger.info( + "Saving flash intensity: wetlabsubat_intflash - " + "the upper bound of the background envelope" + ) + self.df_r["wetlabsubat_intflash"] = intflash + self.df_r["wetlabsubat_intflash"].attrs["long_name"] = ( + "Flashes intensity (small jellies proxy)" + ) + self.df_r["wetlabsubat_intflash"].attrs["units"] = "counts" + self.df_r["wetlabsubat_intflash"].attrs["comment"] = ( + f"intensity of flashes from {sample_rate} Hz " + f"wetlabsubat_digitized_raw_ad_counts variable in {freq} intervals." + ) + + # Make min_bg a 1S pd.Series so that we can divide by flow, matching indexes + s_min_bg = min_bg_unsmoothed.rolling( + window_size, + min_periods=0, + center=True, + ).mean() + bg_biolume = pd.Series(s_min_bg, index=s_ubat_raw.index).resample("1s").mean() + self.logger.info("Saving Background bioluminescence (dinoflagellates proxy)") + self.df_r["wetlabsubat_bg_biolume"] = bg_biolume.divide(flow) * 1000 + self.df_r["wetlabsubat_bg_biolume"].attrs["long_name"] = ( + "Background bioluminescence (dinoflagellates proxy)" + ) + self.df_r["wetlabsubat_bg_biolume"].attrs["units"] = "counts/liter" + self.df_r["wetlabsubat_bg_biolume"].attrs["comment"] = zero_note + + fluo = None + nighttime_ubat_raw, sunsets, sunrises = self.select_nighttime_ubat_raw() + if nighttime_ubat_raw.empty: + self.logger.info( + "No nighttime wetlabsubat data to compute adinos, diatoms, hdinos proxies", + ) + else: + # (2) Phytoplankton proxies - look for wetlabsbb2fl fluorescence/chlorophyll data + fluo_var = None + for var in self.resampled_nc.variables: + if "wetlabsbb2fl" in var.lower() and ( + "fl" in var.lower() or "chlorophyll" in var.lower() + ): + fluo_var = var + break + + if fluo_var is None: + self.logger.info( + "No wetlabsbb2fl fluorescence data found. " + "Not computing adinos, diatoms, and hdinos" + ) + return fluo, sunsets, sunrises + + self.logger.info("Using %s for phytoplankton proxy calculations", fluo_var) + fluo = ( + self.resampled_nc[fluo_var] + .where( + (self.resampled_nc["time"] > min(sunsets)) + & (self.resampled_nc["time"] < max(sunrises)), + ) + .to_pandas() + .resample(freq.lower()) + .mean() + ) + # Set negative values from fluorescence to NaN + fluo[fluo < 0] = np.nan + self.logger.info("Using proxy_ratio_adinos = %.4e", proxy_ratio_adinos) + self.logger.info("Using proxy_cal_factor = %.6f", proxy_cal_factor) + + nighttime_bg_biolume = ( + pd.Series(s_min_bg, index=nighttime_ubat_raw.index).resample("1s").mean() + ) + nighttime_bg_biolume_perliter = nighttime_bg_biolume.divide(flow) * 1000 + pseudo_fluorescence = nighttime_bg_biolume_perliter / proxy_ratio_adinos + self.df_r["wetlabsubat_proxy_adinos"] = ( + np.minimum(fluo, pseudo_fluorescence) / proxy_cal_factor + ) + self.df_r["wetlabsubat_proxy_adinos"].attrs["comment"] = ( + f"Autotrophic dinoflagellate proxy using proxy_ratio_adinos" + f" = {proxy_ratio_adinos:.4e} and proxy_cal_factor = {proxy_cal_factor:.6f}" + ) + self.df_r["wetlabsubat_proxy_hdinos"] = ( + pseudo_fluorescence - np.minimum(fluo, pseudo_fluorescence) + ) / proxy_cal_factor + self.df_r["wetlabsubat_proxy_hdinos"].attrs["comment"] = ( + f"Heterotrophic dinoflagellate proxy using proxy_ratio_adinos" + f" = {proxy_ratio_adinos:.4e} and proxy_cal_factor = {proxy_cal_factor:.6f}" + ) + wetlabsubat_proxy_diatoms = (fluo - pseudo_fluorescence) / proxy_cal_factor + wetlabsubat_proxy_diatoms[wetlabsubat_proxy_diatoms < 0] = 0 + self.df_r["wetlabsubat_proxy_diatoms"] = wetlabsubat_proxy_diatoms + self.df_r["wetlabsubat_proxy_diatoms"].attrs["comment"] = ( + f"Diatom proxy using proxy_ratio_adinos" + f" = {proxy_ratio_adinos:.4e} and proxy_cal_factor = {proxy_cal_factor:.6f}" + ) + + return fluo, sunsets, sunrises + + def select_nighttime_ubat_raw( + self, + stride: int = 3000, + ) -> tuple[pd.Series, list[datetime], list[datetime]]: + """ + Select nighttime wetlabsubat_digitized_raw_ad_counts data for multiple nights in a mission. + Parallel to select_nighttime_bl_raw() but for LRAUV wetlabsubat data. + Default stride of 3000 gives 10-minute resolution from 5 Hz navigation data. + + Returns: + nighttime_ubat_raw: A pandas Series containing nighttime ubat data. + sunsets: A list of sunset times for each night. + sunrises: A list of sunrise times for each night. + """ + lat_var, lon_var = self._find_lat_lon_variables() + lat = float(self.ds[lat_var].median()) + lon = float(self.ds[lon_var].median()) + self.logger.debug("Getting sun altitudes for nighttime selection") + sun_alts = [] + # Get the time coordinate for the latitude variable + time_coord = self.ds[lat_var].dims[0] + for ts in self.ds[time_coord].to_numpy()[::stride]: + # About 10-minute resolution from 5 Hz navigation data + sun_alts.append( # noqa: PERF401 + get_altitude( + lat, + lon, + datetime.fromtimestamp(ts.astype(int) / 1.0e9, tz=UTC), + ), + ) + + # Find sunset and sunrise - where sun altitude changes sign + sign_changes = np.where(np.diff(np.sign(sun_alts)))[0] + ss_sr_times = self.ds[time_coord].isel({time_coord: sign_changes * stride}).to_numpy() + self.logger.debug("Sunset and sunrise times: %s", ss_sr_times) + + sunsets = [] + sunrises = [] + nighttime_ubat_raw = pd.Series(dtype="float64") + + # Iterate over sunset and sunrise pairs + for i in range(0, len(ss_sr_times) - 1, 2): + sunset = ss_sr_times[i] + pd.to_timedelta(1, "h") # 1 hour past sunset + sunrise = ss_sr_times[i + 1] - pd.to_timedelta(1, "h") # 1 hour before sunrise + sunsets.append(sunset) + sunrises.append(sunrise) + + self.logger.info( + "Extracting wetlabsubat_digitized_raw_ad_counts data " + "between sunset %s and sunrise %s", + sunset, + sunrise, + ) + nighttime_data = ( + self.ds["wetlabsubat_digitized_raw_ad_counts"] + .where( + (self.ds["wetlabsubat_time_60hz"] > sunset) + & (self.ds["wetlabsubat_time_60hz"] < sunrise), + ) + .to_pandas() + .dropna() + ) + # This complication is needed because concat will not like an empty DataFrame + nighttime_ubat_raw = ( + nighttime_ubat_raw.copy() + if nighttime_data.empty + else nighttime_data.copy() + if nighttime_ubat_raw.empty + else pd.concat([nighttime_ubat_raw, nighttime_data]) # if both DataFrames non empty + ) + + if not sunsets or not sunrises: + self.logger.info("No sunset or sunrise found during this mission.") + return nighttime_ubat_raw, sunsets, sunrises + def correct_biolume_proxies( # noqa: C901, PLR0912, PLR0913, PLR0915 self, biolume_fluo: pd.Series, # from add_biolume_proxies @@ -1138,6 +1515,16 @@ def resample_variable( # noqa: PLR0913 biolume_sunrises, depth_threshold, ) + elif instr == "wetlabsubat" and variable == "wetlabsubat_digitized_raw_ad_counts": + # All wetlabsubat proxy variables are computed from wetlabsubat_digitized_raw_ad_counts + # Use default parameters for LRAUV - these may need adjustment in the future + proxy_cal_factor = 0.00470 + proxy_ratio_adinos = 3.9811e13 + self.add_wetlabsubat_proxies( + freq=freq, + proxy_cal_factor=proxy_cal_factor, + proxy_ratio_adinos=proxy_ratio_adinos, + ) else: self.df_o[variable] = self.ds[variable].to_pandas() self.df_o[f"{variable}_mf"] = ( @@ -1372,6 +1759,28 @@ def resample_mission( # noqa: C901, PLR0912, PLR0915, PLR0913 self.resampled_nc[var].attrs["coordinates"] = ( "time depth latitude longitude" ) + elif instr == "wetlabsubat" and variable == "wetlabsubat_digitized_raw_ad_counts": + # resample_variable() creates new proxy variables for LRAUV + # not in the original align.nc file + self.resample_variable( + instr, + variable, + mf_width, + freq, + mission_start, + mission_end, + instrs_to_pad, + depth_threshold, + ) + for var in self.df_r: + if var not in variables: + # save new proxy variable + self.df_r[var].index.rename("time", inplace=True) # noqa: PD002 + self.resampled_nc[var] = self.df_r[var].to_xarray() + self.resampled_nc[var].attrs = self.df_r[var].attrs + self.resampled_nc[var].attrs["coordinates"] = ( + "time depth latitude longitude" + ) elif variable in {"biolume_latitude", "biolume_longitude"}: self.logger.info( "Not saving instrument coordinate variable %s to resampled file", From 48c54363b33348948e2e4b2adfc31d24f70271a9 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Tue, 2 Dec 2025 10:36:20 -0800 Subject: [PATCH 5/6] Removed 'concentration_of_colored_dissolved_organic_matter_in_sea_water' from / Group. --- src/data/nc42netcdfs.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/data/nc42netcdfs.py b/src/data/nc42netcdfs.py index f3105418..ddb77b8c 100755 --- a/src/data/nc42netcdfs.py +++ b/src/data/nc42netcdfs.py @@ -39,7 +39,6 @@ SCI_PARMS = { "/": [ - {"name": "concentration_of_colored_dissolved_organic_matter_in_sea_water"}, {"name": "longitude"}, {"name": "latitude"}, {"name": "depth"}, From 86c402fc5af9d8415b930b6097fbfcd5ddf58dd1 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Tue, 2 Dec 2025 10:37:26 -0800 Subject: [PATCH 6/6] Ensure that only the variable 'depth' has that standard_name in the _1S.nc file. --- .vscode/launch.json | 6 ++++-- src/data/resample.py | 10 ++++++---- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/.vscode/launch.json b/.vscode/launch.json index bb55b3d5..d5d5c0f9 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -192,7 +192,8 @@ //"args": ["--auv_name", "dorado", "--mission", "2023.123.00", "-v", "1"], //"args": ["-v", "2", "--log_file", "brizo/missionlogs/2025/20250916_20250922/20250916T230652/202509162306_202509180305.nc4"] //"args": ["--auv_name", "dorado", "--mission", "2025.316.02", "-v", "1"], - "args": ["-v", "1", "--log_file", "pontus/missionlogs/2025/20250604_20250616/20250608T020852/202506080209_202506081934.nc4"], + //"args": ["-v", "1", "--log_file", "pontus/missionlogs/2025/20250604_20250616/20250608T020852/202506080209_202506081934.nc4"], + "args": ["-v", "1", "--log_file", "ahi/missionlogs/2025/20250414_20250418/20250414T205440/202504142054_202504150400.nc4"], }, { "name": "5.0 - archive.py", @@ -359,7 +360,8 @@ // Fails with ValueError: different number of dimensions on data and dims: 2 vs 1 for wetlabsubat_digitized_raw_ad_counts variable //"args": ["-v", "1", "--log_file", "pontus/missionlogs/2025/20250604_20250616/20250608T020852/202506080209_202506081934.nc4", "--no_cleanup"] // Full month of June 2025 for Pontus with WetLabsUBAT Group data - "args": ["-v", "1", "--auv_name", "pontus", "--start", "20250601T000000", "--end", "20250702T000000", "--noinput", "--num_cores", "1", "--no_cleanup"] + //"args": ["-v", "1", "--auv_name", "pontus", "--start", "20250601T000000", "--end", "20250702T000000", "--noinput", "--num_cores", "1", "--no_cleanup"] + "args": ["-v", "1", "--auv_name", "pontus", "--start", "20250601T000000", "--end", "20250702T000000", "--noinput", "--num_cores", "1", "--no_cleanup", "--clobber"] }, ] diff --git a/src/data/resample.py b/src/data/resample.py index 1d467b9d..aaf2aa4c 100755 --- a/src/data/resample.py +++ b/src/data/resample.py @@ -117,12 +117,13 @@ def _build_global_metadata(self) -> None: gitcommit = "" iso_now = datetime.now(tz=UTC).isoformat().split(".")[0] + "Z" - # Ensure that only the latitude and longitude variables have - # standard_name attributes equal to "latitude" and "longitude" so that + # Ensure that only the latitude, longitude, and depth variables have + # standard_name attributes equal to "latitude", "longitude", and "depth" so that # the .cf[] accessor works correctly for var in self.resampled_nc.data_vars: - if self.resampled_nc[var].attrs.get("standard_name") in ["latitude", "longitude"]: - if var in {"latitude", "longitude"}: + standard_name = self.resampled_nc[var].attrs.get("standard_name") + if standard_name in ["latitude", "longitude", "depth"]: + if var in {"latitude", "longitude", "depth"}: continue self.logger.info("Removing standard_name attribute from variable %s", var) del self.resampled_nc[var].attrs["standard_name"] @@ -457,6 +458,7 @@ def save_coordinates( self.df_r["longitude"].index.rename("time", inplace=True) # noqa: PD002 self.resampled_nc["longitude"] = self.df_r["longitude"].to_xarray() self.resampled_nc["depth"].attrs = self.ds[f"{instr}_depth"].attrs + self.resampled_nc["depth"].attrs["standard_name"] = "depth" self.resampled_nc["depth"].attrs["comment"] += ( f". {self.ds[f'{instr}_depth'].attrs['comment']}" f" mean sampled at {self.freq} intervals following"