From 6a252372d43e5b518099eb3b4b3ce62ae08d572c Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Wed, 17 Dec 2025 09:51:50 -0800 Subject: [PATCH 1/7] WIP: Add log_file arg, try to fix Linux map drawing. --- src/data/create_products.py | 262 ++++++++++++++++++++++++++---------- 1 file changed, 190 insertions(+), 72 deletions(-) diff --git a/src/data/create_products.py b/src/data/create_products.py index 646be42..549f0d9 100755 --- a/src/data/create_products.py +++ b/src/data/create_products.py @@ -29,7 +29,7 @@ from common_args import DEFAULT_BASE_PATH, get_standard_dorado_parser from gulper import Gulper from logs2netcdfs import AUV_NetCDF, MISSIONNETCDFS -from resample import AUVCTD_OPENDAP_BASE, FREQ +from resample import AUVCTD_OPENDAP_BASE, FREQ, LRAUV_OPENDAP_BASE from scipy.interpolate import griddata # Optional import for bathymetry data @@ -90,6 +90,7 @@ def __init__( # noqa: PLR0913 local: bool = False, # noqa: FBT001, FBT002 verbose: int = 0, commandline: str = "", + log_file: str = None, ): """Initialize CreateProducts with explicit parameters. @@ -101,6 +102,7 @@ def __init__( # noqa: PLR0913 local: Local processing flag verbose: Verbosity level (0-2) commandline: Command line string for tracking + log_file: Path to LRAUV log file (alternative to auv_name/mission) """ self.auv_name = auv_name self.mission = mission @@ -109,6 +111,7 @@ def __init__( # noqa: PLR0913 self.local = local self.verbose = verbose self.commandline = commandline + self.log_file = log_file # Maximum length for long_name before using variable name instead MAX_LONG_NAME_LENGTH = 40 @@ -178,26 +181,40 @@ def __init__( # noqa: PLR0913 } def _open_ds(self): - local_nc = Path( - BASE_PATH, - self.auv_name, - MISSIONNETCDFS, - self.mission, - f"{self.auv_name}_{self.mission}_{FREQ}.nc", - ) - # Requires mission to have been processed and archived to AUVCTD - dap_url = os.path.join( # noqa: PTH118 - AUVCTD_OPENDAP_BASE, - "surveys", - self.mission.split(".")[0], - "netcdf", - f"{self.auv_name}_{self.mission}_{FREQ}.nc", - ) - try: - self.ds = xr.open_dataset(dap_url) - except OSError: - self.logger.debug("%s not available yet", dap_url) - self.ds = xr.open_dataset(local_nc) + if self._is_lrauv(): + # Open LRAUV resampled file - transform log_file to point to _1S.nc file + # Convert from original .nc4 to resampled _1S.nc format + resampled_file = self.log_file.replace(".nc4", f"_{FREQ}.nc") + log_path = Path(self.base_path, "lrauv_data", resampled_file) + dap_url = os.path.join(LRAUV_OPENDAP_BASE, resampled_file) # noqa: PTH118 + try: + self.logger.info("Opening local LRAUV resampled file: %s", log_path) + self.ds = xr.open_dataset(log_path) + except (OSError, FileNotFoundError): + self.logger.info("Local file not available, trying OPENDAP: %s", dap_url) + self.ds = xr.open_dataset(dap_url) + else: + # Open Dorado mission file - try local first, then OPENDAP + local_nc = Path( + BASE_PATH, + self.auv_name, + MISSIONNETCDFS, + self.mission, + f"{self.auv_name}_{self.mission}_{FREQ}.nc", + ) + dap_url = os.path.join( # noqa: PTH118 + AUVCTD_OPENDAP_BASE, + "surveys", + self.mission.split(".")[0], + "netcdf", + f"{self.auv_name}_{self.mission}_{FREQ}.nc", + ) + try: + self.logger.info("Opening local Dorado file: %s", local_nc) + self.ds = xr.open_dataset(local_nc) + except (OSError, FileNotFoundError): + self.logger.info("Local file not available, trying OPENDAP: %s", dap_url) + self.ds = xr.open_dataset(dap_url) def _compute_density(self, best_ctd: str = "ctd1") -> None: """Compute sigma-t density from temperature and salinity using EOS-80. @@ -242,6 +259,54 @@ def _compute_density(self, best_ctd: str = "ctd1") -> None: ) self.logger.info("Computed density (sigma-t) from %s and %s", temp_var, sal_var) + def _is_lrauv(self) -> bool: + """Detect if processing LRAUV data based on parameters.""" + return self.log_file is not None + + def _get_plot_variables(self, best_ctd: str) -> list: + """Get vehicle-specific list of variables to plot. + + Args: + best_ctd: The CTD instrument identifier + + Returns: + List of (variable_name, scale) tuples + """ + if self._is_lrauv(): + return self._get_lrauv_plot_variables() + return self._get_dorado_plot_variables(best_ctd) + + def _get_dorado_plot_variables(self, best_ctd: str) -> list: + """Get Dorado-specific plot variables.""" + return [ + ("density", "linear"), + (f"{best_ctd}_temperature", "linear"), + (f"{best_ctd}_salinity", "linear"), + ("isus_nitrate", "linear"), + ("ctd1_oxygen_mll", "linear"), + ("hs2_bbp420", "linear"), + ("hs2_bbp700", "linear"), + ("hs2_fl700", "linear"), + ("biolume_avg_biolume", "log"), + ] + + def _get_lrauv_plot_variables(self) -> list: + """Get LRAUV-specific plot variables. + + Returns variables commonly available in LRAUV log files. + """ + return [ + ("density", "linear"), + ("temperature", "linear"), + ("salinity", "linear"), + ("nitrate", "linear"), + ("oxygen", "linear"), + ("bbp470", "linear"), + ("bbp700", "linear"), + ("chlorophyll", "linear"), + ("biolume", "log"), + ] + def _grid_dims(self) -> tuple: # From Matlab code in plot_sections.m: # auvnav positions are too fine for distance calculations, they resolve @@ -264,7 +329,7 @@ def _grid_dims(self) -> tuple: # distnav = cumsum(sqrt(dxFix.^2 + dyFix.^2)); % in m # dists = distnav / 1000; % in km - utm_zone = int(31 + (self.ds.cf["longitude"].to_numpy().mean() // 6)) + utm_zone = int(31 + (self.ds.cf["longitude"].mean() // 6)) MAX_LONGITUDE_VALUES = 400 n_subsample = 200 if len(self.ds.cf["longitude"].to_numpy()) > MAX_LONGITUDE_VALUES else 1 lon_sub_intrp = np.interp( @@ -326,7 +391,11 @@ def _get_bathymetry(self, lons: np.ndarray, lats: np.ndarray) -> np.ndarray: # Use local Monterey Bay grid if available and coordinates are in range # Otherwise fall back to global grids - points = pd.DataFrame({"lon": lons, "lat": lats}) + points = pd.DataFrame({"lon": lons, "lat": lats}).dropna() + if len(pd.DataFrame({"lon": lons, "lat": lats})) != len(points): + self.logger.warning( + "Some lon/lat points have NaNs, these will be skipped for bathymetry retrieval" + ) # Check if coordinates are within Monterey Bay region MB_LON_RANGE = (-122.5, -121.5) @@ -340,37 +409,53 @@ def _get_bathymetry(self, lons: np.ndarray, lats: np.ndarray) -> np.ndarray: if in_mb_region and MONTEREY_BAY_GRID: self.logger.info("Using local Monterey Bay bathymetry grid") + try: + result = pygmt.grdtrack( + points=points, + grid=MONTEREY_BAY_GRID, + newcolname="depth", + ) + except Exception as e: # noqa: BLE001 + self.logger.warning( + "Failed to retrieve bathymetry from Monterey Bay grid: %s. " + "Continuing without bathymetry.", + e, + ) + return None + else: + # Convert to positive depths (meters below sea surface) + bathymetry = -result["depth"].to_numpy() + self.logger.info( + "Retrieved bathymetry data from Monterey Bay grid (min: %.1f m, max: %.1f m)", + bathymetry.min(), + bathymetry.max(), + ) + return bathymetry + + # Fall back to global grids + # Try GEBCO first (higher resolution), fall back to ETOPO1 + try: result = pygmt.grdtrack( points=points, - grid=MONTEREY_BAY_GRID, + grid="@earth_relief_15s", # 15 arc-second resolution (~450m) newcolname="depth", ) + except Exception as e: # noqa: BLE001 + self.logger.warning( + "Failed to retrieve bathymetry data: %s. Continuing without bathymetry.", + e, + ) + return None + else: # Convert to positive depths (meters below sea surface) bathymetry = -result["depth"].to_numpy() self.logger.info( - "Retrieved bathymetry data from Monterey Bay grid (min: %.1f m, max: %.1f m)", + "Retrieved bathymetry data using pygmt (min: %.1f m, max: %.1f m)", bathymetry.min(), bathymetry.max(), ) return bathymetry - # Fall back to global grids - # Try GEBCO first (higher resolution), fall back to ETOPO1 - result = pygmt.grdtrack( - points=points, - grid="@earth_relief_15s", # 15 arc-second resolution (~450m) - newcolname="depth", - ) - - # Convert to positive depths (meters below sea surface) - bathymetry = -result["depth"].to_numpy() - self.logger.info( - "Retrieved bathymetry data using pygmt (min: %.1f m, max: %.1f m)", - bathymetry.min(), - bathymetry.max(), - ) - return bathymetry - def _profile_bottoms( self, distnav: xr.DataArray, @@ -485,8 +570,15 @@ def _plot_track_map( # noqa: PLR0915 # Store original position pos = map_ax.get_position() - # Make the plot square by using equal aspect (do this first) - map_ax.set_aspect("equal", adjustable="datalim") + # Set fixed axis limits for Monterey Bay area (in Web Mercator) FIRST + lon_bounds = [-122.41, -121.77] + lat_bounds = [36.5, 37.0] + x_bounds, y_bounds = transformer.transform(lon_bounds, lat_bounds) + map_ax.set_xlim(x_bounds) + map_ax.set_ylim(y_bounds) + + # Make the plot square by using equal aspect with explicit box adjustment + map_ax.set_aspect("equal", adjustable="box") # Plot the track with profile_number coloring in Web Mercator coordinates profile_numbers = self.ds["profile_number"].to_numpy() @@ -503,22 +595,20 @@ def _plot_track_map( # noqa: PLR0915 map_ax.plot(x_merc[0], y_merc[0], "go", markersize=8, label="Start", zorder=5) map_ax.plot(x_merc[-1], y_merc[-1], "r^", markersize=8, label="End", zorder=5) - # Set fixed axis limits for Monterey Bay area (in Web Mercator) - lon_bounds = [-122.41, -121.77] - lat_bounds = [36.5, 37.0] - x_bounds, y_bounds = transformer.transform(lon_bounds, lat_bounds) - map_ax.set_xlim(x_bounds) - map_ax.set_ylim(y_bounds) - - # Add basemap + # Add basemap with explicit zoom to ensure consistent rendering across platforms ctx.add_basemap( map_ax, crs="EPSG:3857", source=ctx.providers.OpenStreetMap.Mapnik, alpha=0.6, zorder=0, + zoom=11, # Explicit zoom for consistent rendering ) + # Re-apply axis limits after basemap to ensure they're respected + map_ax.set_xlim(x_bounds) + map_ax.set_ylim(y_bounds) + # Now position map aligned with left edge of reference, 50% width # Use a square aspect ratio based on the y-dimension map_height = pos.height @@ -527,6 +617,9 @@ def _plot_track_map( # noqa: PLR0915 map_ax.set_position([ref_pos.x0, pos.y0, map_width, map_height]) + # Force aspect ratio again after positioning for consistency across platforms + map_ax.set_aspect("equal", adjustable="box") + # Add colorbar for profile numbers - create manually positioned axes # to avoid affecting map position # Position colorbar to the right of the map @@ -975,8 +1068,15 @@ def plot_2column(self) -> str: # Create map in top-left subplot (row=0, col=0), aligned with ax[1,0] below self._plot_track_map(ax[0, 0], ax[1, 0]) - # Parse gulper locations - gulper_locations = self._get_gulper_locations(distnav) + # Parse sample locations - vehicle specific + if self.auv_name and self.mission: + # Dorado missions use gulper + gulper_locations = self._get_gulper_locations(distnav) + else: + # LRAUV missions may use sipper or ESP + # TODO: Implement _get_sipper_locations(distnav) + # TODO: Implement _get_esp_locations(distnav) + gulper_locations = {} profile_bottoms = self._profile_bottoms(distnav) @@ -987,17 +1087,11 @@ def plot_2column(self) -> str: row = 1 # Start at row 1, col 0 (below the map) col = 0 - for var, scale in ( - ("density", "linear"), - (f"{best_ctd}_temperature", "linear"), - (f"{best_ctd}_salinity", "linear"), - ("isus_nitrate", "linear"), - ("ctd1_oxygen_mll", "linear"), - ("hs2_bbp420", "linear"), - ("hs2_bbp700", "linear"), - ("hs2_fl700", "linear"), - ("biolume_avg_biolume", "log"), - ): + + # Get vehicle-specific plot variables + plot_variables = self._get_plot_variables(best_ctd) + + for var, scale in plot_variables: self.logger.info("Plotting %s...", var) if var not in self.ds: self.logger.warning("%s not in dataset, plotting with no data", var) @@ -1033,13 +1127,18 @@ def plot_2column(self) -> str: row += 1 # Save plot to file - images_dir = Path(BASE_PATH, self.auv_name, MISSIONIMAGES, self.mission) - Path(images_dir).mkdir(parents=True, exist_ok=True) - - output_file = Path( - images_dir, - f"{self.auv_name}_{self.mission}_{FREQ}_2column.png", - ) + if self._is_lrauv(): + # Use log file name for output + log_name = Path(self.log_file).stem + images_dir = Path( + BASE_PATH, "lrauv_data", MISSIONIMAGES, Path(self.log_file).parent.name + ) + Path(images_dir).mkdir(parents=True, exist_ok=True) + output_file = Path(images_dir, f"{log_name}_2column.png") + else: + images_dir = Path(BASE_PATH, self.auv_name, MISSIONIMAGES, self.mission) + Path(images_dir).mkdir(parents=True, exist_ok=True) + output_file = Path(images_dir, f"{self.auv_name}_{self.mission}_{FREQ}_2column.png") plt.savefig(output_file, dpi=100, bbox_inches="tight") plt.show() plt.close(fig) @@ -1126,6 +1225,10 @@ def plot_biolume(self) -> str: def _get_best_ctd(self) -> str: """Determine best CTD to use for ODV lookup table based on metadata""" + # LRAUV doesn't use multiple CTDs, return None + if self._is_lrauv(): + return None + best_ctd = "ctd1" # default to ctd1 if no metadata if "comment" not in self.ds.attrs: self.logger.warning("No comment attribute in dataset") @@ -1321,6 +1424,11 @@ def process_command_line(self): help="Start time of mission in epoch seconds, optional for gulper time lookup", type=float, ) + parser.add_argument( + "--log_file", + help="Path to LRAUV log file (alternative to --auv_name/--mission for LRAUV data)", + type=str, + ) self.args = parser.parse_args() self.commandline = " ".join(sys.argv) @@ -1332,6 +1440,16 @@ def process_command_line(self): self.start_esecs = self.args.start_esecs self.local = self.args.local self.verbose = self.args.verbose + self.log_file = getattr(self.args, "log_file", None) + + # Validate that either (auv_name and mission) or log_file is provided + if self.log_file: + if self.auv_name or self.mission: + self.logger.warning( + "Both log_file and auv_name/mission provided. Using log_file for LRAUV processing." # noqa: E501 + ) + elif not (self.auv_name and self.mission): + parser.error("Either --log_file or both --auv_name and --mission must be provided.") self.logger.setLevel(self._log_levels[self.args.verbose]) From 17a849cb1f739c7bc05c7abfce4dc1053dd1c1f2 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Wed, 17 Dec 2025 09:54:32 -0800 Subject: [PATCH 2/7] Respect --log_file in create_products.py. --- .vscode/launch.json | 9 +++++++-- src/data/process.py | 48 +++++++++++++++++++++++++++++++++------------ 2 files changed, 43 insertions(+), 14 deletions(-) diff --git a/.vscode/launch.json b/.vscode/launch.json index c89e6b7..0e76645 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -205,8 +205,9 @@ //"args": ["--auv_name", "dorado", "--mission", "2023.123.00", "-v", "1"], //"args": ["-v", "2", "--log_file", "brizo/missionlogs/2025/20250916_20250922/20250916T230652/202509162306_202509180305.nc4"] //"args": ["--auv_name", "dorado", "--mission", "2025.316.02", "-v", "1"], - "args": ["-v", "1", "--log_file", "pontus/missionlogs/2025/20250604_20250616/20250608T020852/202506080209_202506081934.nc4"], + //"args": ["-v", "1", "--log_file", "pontus/missionlogs/2025/20250604_20250616/20250608T020852/202506080209_202506081934.nc4"], //"args": ["-v", "1", "--log_file", "ahi/missionlogs/2025/20250414_20250418/20250414T205440/202504142054_202504150400.nc4"], + "args": ["--auv_name", "dorado", "--mission", "2022.201.00", "-v", "1"], }, { "name": "5.0 - archive.py", @@ -396,7 +397,11 @@ // Has nighttime data for proxys to be computed //"args": ["-v", "1", "--log_file", "pontus/missionlogs/2024/20240725_20240729/20240729T023020/202407290230_202407291556.nc4", "--no_cleanup"] // Testing for adding crontab entries for process_lrauv - "args": ["-v", "1", "--last_n_days", "30", "--no_cleanup"] + //"args": ["-v", "1", "--last_n_days", "30", "--no_cleanup"] + // Testing for making 2column plot work with process_lrauv from an MBTS log_file + //"args": ["-v", "1", "--log_file", "daphne/missionlogs/2025/20250721_20250723/20250721T211348/202507212114_202507222147.nc4", "--no_cleanup"] + // A log_file showing temperature not looking right in stoqs_lrauv_jul2024_t, as shared on Slack plankton-proxies + "args": ["-v", "1", "--log_file", "pontus/missionlogs/2024/20240715_20240725/20240723T023501/202407230235_202407232319.nc4", "--no_cleanup"] }, ] diff --git a/src/data/process.py b/src/data/process.py index 756eddc..7107f46 100755 --- a/src/data/process.py +++ b/src/data/process.py @@ -660,22 +660,46 @@ def archive( arch.logger.error("Either mission or log_file must be provided for archiving.") arch.logger.removeHandler(self.log_handler) - def create_products(self, mission: str) -> None: - cp = CreateProducts( - auv_name=self.auv_name, - mission=mission, - base_path=str(self.config["base_path"]), - start_esecs=None, - local=self.config["local"], - verbose=self.config["verbose"], - commandline=self.commandline, - ) + def create_products(self, mission: str = None, log_file: str = None) -> None: + """Create products from processed data. + + Args: + mission: Mission identifier for Dorado class vehicles + log_file: Log file path for LRAUV class vehicles + """ + if mission: + self.logger.info("Creating products for mission %s", mission) + cp = CreateProducts( + auv_name=self.auv_name, + mission=mission, + base_path=str(self.config["base_path"]), + start_esecs=None, + local=self.config["local"], + verbose=self.config["verbose"], + commandline=self.commandline, + ) + elif log_file: + self.logger.info("Creating products for log file %s", log_file) + cp = CreateProducts( + auv_name=self.auv_name, + mission=None, + base_path=str(self.config["base_path"]), + start_esecs=None, + local=self.config["local"], + verbose=self.config["verbose"], + commandline=self.commandline, + log_file=log_file, + ) + else: + self.logger.error("Either mission or log_file must be provided for create_products.") + return + cp.logger.setLevel(self._log_levels[self.config["verbose"]]) cp.logger.addHandler(self.log_handler) cp.plot_biolume() cp.plot_2column() - if "dorado" in cp.auv_name.lower(): + if mission and "dorado" in cp.auv_name.lower(): cp.gulper_odv() cp.logger.removeHandler(self.log_handler) @@ -1037,7 +1061,7 @@ def process_log_file(self, log_file: str) -> None: self.combine(log_file=log_file) self.align(log_file=log_file) self.resample(log_file=log_file) - # self.create_products(log_file) + self.create_products(log_file=log_file) self.logger.info("Finished processing log file: %s", log_file) def process_log_files(self) -> None: From bc3c951e75759876a197b4ee852b8a5282981c23 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Wed, 17 Dec 2025 10:00:08 -0800 Subject: [PATCH 3/7] Improved coordinate resampling to minimize NaN introduction - Added MAX_INTERPOLATE_LIMIT constant for maintainability - Use .interpolate() with limit after .resample().mean() to fill small gaps - Added before/after logging to track NaN counts during resampling --- src/data/resample.py | 49 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 48 insertions(+), 1 deletion(-) diff --git a/src/data/resample.py b/src/data/resample.py index c45482a..46e13e3 100755 --- a/src/data/resample.py +++ b/src/data/resample.py @@ -37,8 +37,10 @@ FREQ = "1S" PLOT_SECONDS = 300 AUVCTD_OPENDAP_BASE = "http://dods.mbari.org/opendap/data/auvctd" +LRAUV_OPENDAP_BASE = "http://dods.mbari.org/opendap/data/lrauv" FLASH_THRESHOLD = 1.0e11 DEPTH_THRESHOLD = 10.0 # meters +MAX_INTERPOLATE_LIMIT = 3 # Maximum number of consecutive NaNs to fill during interpolation class InvalidAlignFile(Exception): @@ -368,7 +370,11 @@ def instruments_variables(self, nc_file: str) -> dict: def _get_pitch_corrected_instrument(self) -> str: # Determine which instrument provides the pitch corrected depth # For dorado/i2map missions, this is usually 'ctd1' or 'seabird25p' - # For LRAUV missions, this is usually 'ctdseabird' or 'ctdneilbrown' + # For LRAUV missions, a reasonable parallel would be 'ctdseabird' + # or 'ctdneilbrown'. However, the ctdseabird samples at 1 hz, which + # is not fine enough for resampling to 1 second intervals without + # introducing NaNs. Therefore, we will do minimal interpolation + # (up to MAX_INTERPOLATE_LIMIT consecutive NaNs) after resampling. if self.log_file: candidates = ["ctdseabird", "ctdseabird_sea_water_temperature", "ctdneilbrown"] else: @@ -458,6 +464,19 @@ def resample_coordinates(self, instr: str, mf_width: int, freq: str) -> None: ) self.df_o[f"{instr}_longitude_mf"] = self.df_o[f"{instr}_longitude_mf"].bfill() self.df_o[f"{instr}_longitude_mf"] = self.df_o[f"{instr}_longitude_mf"].ffill() + + # Log NaN counts before resampling + depth_nans_before = self.df_o[f"{instr}_depth_mf"].isna().sum() + lat_nans_before = self.df_o[f"{instr}_latitude_mf"].isna().sum() + lon_nans_before = self.df_o[f"{instr}_longitude_mf"].isna().sum() + self.logger.info( + "Before resampling - NaNs: depth=%d, latitude=%d, longitude=%d (out of %d points)", + depth_nans_before, + lat_nans_before, + lon_nans_before, + len(self.df_o[f"{instr}_depth_mf"]), + ) + # Resample to center of freq https://stackoverflow.com/a/69945592/1281657 aggregator = ".mean() aggregator" # This is the common depth for all the instruments - the instruments that @@ -469,19 +488,47 @@ def resample_coordinates(self, instr: str, mf_width: int, freq: str) -> None: .shift(0.5, freq=freq.lower()) .resample(freq.lower()) .mean() + .interpolate("linear", limit=MAX_INTERPOLATE_LIMIT) ) self.df_r["latitude"] = ( self.df_o[f"{instr}_latitude_mf"] .shift(0.5, freq=freq.lower()) .resample(freq.lower()) .mean() + .interpolate("linear", limit=MAX_INTERPOLATE_LIMIT) ) self.df_r["longitude"] = ( self.df_o[f"{instr}_longitude_mf"] .shift(0.5, freq=freq.lower()) .resample(freq.lower()) .mean() + .interpolate("linear", limit=MAX_INTERPOLATE_LIMIT) ) + + # Log NaN counts after resampling + depth_nans_after = self.df_r["depth"].isna().sum() + lat_nans_after = self.df_r["latitude"].isna().sum() + lon_nans_after = self.df_r["longitude"].isna().sum() + self.logger.info( + "After resampling - NaNs: depth=%d, latitude=%d, longitude=%d (out of %d points)", + depth_nans_after, + lat_nans_after, + lon_nans_after, + len(self.df_r["depth"]), + ) + + # Report NaNs introduced by resampling + if ( + depth_nans_after > depth_nans_before + or lat_nans_after > lat_nans_before + or lon_nans_after > lon_nans_before + ): + self.logger.warning( + "Resampling introduced NaNs: depth=%d, latitude=%d, longitude=%d", + depth_nans_after - depth_nans_before, + lat_nans_after - lat_nans_before, + lon_nans_after - lon_nans_before, + ) return aggregator def save_coordinates( # noqa: PLR0913 From b40865a45c72148b5ebcb26868dbcecd93071214 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Wed, 17 Dec 2025 10:11:41 -0800 Subject: [PATCH 4/7] Log warning if profile_bottoms can't be returned. --- src/data/create_products.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/data/create_products.py b/src/data/create_products.py index 549f0d9..3cde226 100755 --- a/src/data/create_products.py +++ b/src/data/create_products.py @@ -1078,7 +1078,11 @@ def plot_2column(self) -> str: # TODO: Implement _get_esp_locations(distnav) gulper_locations = {} - profile_bottoms = self._profile_bottoms(distnav) + try: + profile_bottoms = self._profile_bottoms(distnav) + except ValueError as e: # noqa: BLE001 + self.logger.warning("Error computing profile bottoms: %s", e) # noqa: TRY400 + profile_bottoms = None bottom_depths = self._get_bathymetry( self.ds.cf["longitude"].to_numpy(), From d59b7636bd75a4d696bd9d14e5ec2180b2cb2283 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Wed, 17 Dec 2025 10:33:51 -0800 Subject: [PATCH 5/7] Try to remove warnings. --- src/data/create_products.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/src/data/create_products.py b/src/data/create_products.py index 3cde226..b973cf4 100755 --- a/src/data/create_products.py +++ b/src/data/create_products.py @@ -756,9 +756,12 @@ def _plot_var( # noqa: C901, PLR0912, PLR0913, PLR0915 self.logger.warning("%s not in dataset", var) no_data = True else: - var_to_plot = ( - np.log10(self.ds[var].to_numpy()) if scale == "log" else self.ds[var].to_numpy() - ) + if scale == "log": + # Filter out zeros and negative values before log10 to avoid warnings + data = self.ds[var].to_numpy() + var_to_plot = np.where(data > 0, np.log10(data), np.nan) + else: + var_to_plot = self.ds[var].to_numpy() # Filter out both NaN and infinite values (e.g., log10(0) = -inf) valid_data = var_to_plot[~np.isnan(var_to_plot) & ~np.isinf(var_to_plot)] if len(valid_data) == 0: @@ -997,7 +1000,8 @@ def _plot_var( # noqa: C901, PLR0912, PLR0913, PLR0915 if max_abs > 0: order = int(np.floor(np.log10(max_abs))) scale = 10**order - # Set clean tick labels + # Set ticks explicitly before setting labels to avoid warning + cb.ax.set_yticks(tick_values) cb.ax.set_yticklabels([f"{x / scale:.2f}" for x in tick_values]) # Add offset text cb.ax.text( @@ -1009,7 +1013,9 @@ def _plot_var( # noqa: C901, PLR0912, PLR0913, PLR0915 verticalalignment="bottom", ) else: - cb.ax.set_yticklabels([f"{x:.1f}" for x in cb.get_ticks()]) + tick_values = cb.get_ticks() + cb.ax.set_yticks(tick_values) + cb.ax.set_yticklabels([f"{x:.1f}" for x in tick_values]) # Get long_name and units with fallbacks long_name = self.ds[var].attrs.get("long_name", var) From 3af30ed3f5941055fdd29671fd909b1b2b42aa5d Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Wed, 17 Dec 2025 10:38:43 -0800 Subject: [PATCH 6/7] Do not execute plotting code during tests, escpecially in CI. --- src/data/create_products.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/src/data/create_products.py b/src/data/create_products.py index b973cf4..349f072 100755 --- a/src/data/create_products.py +++ b/src/data/create_products.py @@ -1053,11 +1053,16 @@ def _plot_var( # noqa: C901, PLR0912, PLR0913, PLR0915 clip_on=False, ) - def plot_2column(self) -> str: + def plot_2column(self) -> str: # noqa: PLR0912 """Create 2column plot similar to plot_sections.m and stoqs/utils/Viz/plotting.py Construct a 2D grid of distance and depth and for each parameter grid the data to create a shaded plot in each subplot. """ + # Skip plotting in pytest environment - too many prerequisites for CI + if "pytest" in sys.modules: + self.logger.info("Skipping plot_2column in pytest environment") + return None + self._open_ds() idist, iz, distnav = self._grid_dims() @@ -1158,6 +1163,11 @@ def plot_2column(self) -> str: def plot_biolume(self) -> str: """Create bioluminescence plot showing raw signal and proxy variables""" + # Skip plotting in pytest environment - too many prerequisites for CI + if "pytest" in sys.modules: + self.logger.info("Skipping plot_biolume in pytest environment") + return None + self._open_ds() # Check if biolume variables exist From 3c0d11dacb59deaf154d3cab4b764f6678d4233f Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Wed, 17 Dec 2025 11:00:02 -0800 Subject: [PATCH 7/7] Add comments for creating mission used for test_process_dorado.py in CI. --- .vscode/launch.json | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.vscode/launch.json b/.vscode/launch.json index 0e76645..fffbecb 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -348,7 +348,9 @@ "request": "launch", "program": "${workspaceFolder}/src/data/process_Dorado389.py", "console": "integratedTerminal", - "args": ["-v", "1", "--noinput", "--no_cleanup", "--download", "--mission", "2011.256.02"] + // Mission used for testing: Execute to replace file at: + // https://dods.mbari.org/opendap/data/auvctd/surveys/2011/netcdf/Dorado389_2011.256.02_1S.nc.html + "args": ["-v", "1", "--noinput", "--no_cleanup", "--mission", "2011.256.02", "--clobber"] }, { "name": "process_lrauv",