diff --git a/.vscode/launch.json b/.vscode/launch.json index 71db7357..f9b60a3e 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -39,13 +39,24 @@ "args": ["-v", "1", "-d", "0", "-i", "data/auv_data/dorado/missionlogs/2009.055.05/lopc.bin", "-n", "data/auv_data/dorado/missionnetcdfs/2009.055.05/lopc.nc", "-f", "--LargeCopepod_AIcrit", "0.3"] }, { - "name": "1.1 - correct_log_times.py --mission 2017.284.00 --auv_name Dorado389", + "name": "1.2 - correct_log_times.py --mission 2017.284.00 --auv_name Dorado389", "type": "debugpy", "request": "launch", "program": "${workspaceFolder}/src/data/correct_log_times.py", "console": "integratedTerminal", "args": ["--auv_name", "Dorado389", "--mission", "2017.284.00", "-v", "2"] }, + { + "name": "1.3 - nc42netcdfs", + "type": "debugpy", + "request": "launch", + "program": "${workspaceFolder}/src/data/nc42netcdfs.py", + "console": "integratedTerminal", + // A small log_file that has a reasonable amount of data, and known_hash to verify download + //"args": ["-v", "1", "--log_file", "ahi/missionlogs/2025/20250908_20250912/20250911T201546/202509112015_202509112115.nc4", "--known_hash", "d1235ead55023bea05e9841465d54a45dfab007a283320322e28b84438fb8a85"] + // Has bad latitude and longitude values + "args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109.nc4"] + }, { "name": "2.0 - calibrate.py", "type": "debugpy", @@ -92,6 +103,16 @@ "program": "${workspaceFolder}/src/data/hs2_proc.py", "console": "integratedTerminal", }, + + { + "name": "2.2 - combine.py", + "type": "debugpy", + "request": "launch", + "program": "${workspaceFolder}/src/data/combine.py", + "console": "integratedTerminal", + "justMyCode": false, + "args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109.nc4"] + }, { "name": "3.0 - align.py", "type": "debugpy", @@ -282,5 +303,17 @@ "console": "integratedTerminal", "args": ["-v", "1", "--noinput", "--no_cleanup", "--download", "--mission", "2011.256.02"] }, + { + "name": "process_lrauv", + "type": "debugpy", + "request": "launch", + "program": "${workspaceFolder}/src/data/process_lrauv.py", + "console": "integratedTerminal", + //"args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109.nc4"] + //"args": ["-v", "2", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109.nc4", "--clobber"] + "args": ["-v", "2", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109.nc4", "--clobber", "--no_cleanup"] + //"args": ["-v", "1", "--auv_name", "tethys", "--start", "20120901", "--end", "20121101", "--noinput"] + }, + ] } diff --git a/WORKFLOW.md b/DORADO_WORKFLOW.md similarity index 96% rename from WORKFLOW.md rename to DORADO_WORKFLOW.md index d946c635..1cd12da2 100644 --- a/WORKFLOW.md +++ b/DORADO_WORKFLOW.md @@ -1,6 +1,6 @@ -## Data Workflow +## Dorado Data Workflow -The sequence of steps to process data is as follows: +The sequence of steps to process Dorado data is as follows: logs2netcdfs.py → calibrate.py → align.py → resample.py → archive.py → plot.py @@ -70,6 +70,6 @@ on the local file system's work directory is as follows: archive.py Copy the netCDF files to the archive directory. The archive directory - is initally in the AUVCTD share on atlas which is shared with the + is initially in the AUVCTD share on atlas which is shared with the data from the Dorado Gulper vehicle, but can also be on the M3 share on thalassa near the original log data. diff --git a/LRAUV_WORKFLOW.md b/LRAUV_WORKFLOW.md new file mode 100644 index 00000000..98307671 --- /dev/null +++ b/LRAUV_WORKFLOW.md @@ -0,0 +1,68 @@ +## LRAUV Data Workflow + +The sequence of steps to process LRAUV data is as follows: + + nc42netcdfs.py → combine.py → align.py → resample.py → archive.py → plot.py + +Details of each step are described in the respective scripts and in the +description of output netCDF files below. The output file directory structure +on the local file system's work directory is as follows: + + ├── data + │ ├── lrauv_data + │ │ ├── <- e.g.: ahi, brizo, pontus, tethys, ... + │ │ │ ├── missionlogs/year/dlist_dir + │ │ │ │ ├── <- e.g.: ahi/missionlogs/2025/20250908_20250912/20250911T201546/202509112015_202509112115.nc4 + │ │ │ │ │ ├── <- .nc4 file containing original data + │ │ │ │ │ ├── <- .nc files, one for each group from the .nc4 file + | | | | | | data identical to original in NETCDF4 format + │ │ │ │ │ ├── <_cal> <- A single NETCDF3 .nc file containing all the + | | | | | | varibles from the .nc files along with nudged + | | | | | | latitudes and longitudes - created by combine.py + │ │ │ │ │ ├── <_align> <- .nc file with all measurement variables + | | | | | | having associated coordinate variables + | | | | | | at original instrument sampling rate - + | | | | | | created by align.py + │ │ │ │ │ ├── <_nS> <- .nc file with all measurement variables + resampled to a common time grid at n + Second intervals - created by resample.py + + nc42netcdfs.py + Extract the groups and the variables we want from the groups into + individual .nc files. These data are saved using NETCDF4 format as + there are many unlimited dimensions that are not allowed in NETCDF3. + The data in the .nc files are identical to what is in the .nc4 groups. + + combine.py + Apply calibration coefficients to the original data. The calibrated data + are written to a new netCDF file in the missionnetcdfs/ + directory ending with _cal.nc. This step also includes nudging the + underwater portions of the navigation positions to the GPS fixes + done at the surface and applying pitch corrections to the sensor + depth for those sensors (instruments) for which offset values are + specified in SensorInfo. Some minimal QC is done in this step, namely + removal on non-monotonic times. The record variables in the netCDF + file have only their original coordinates, namely time associated with + them. + + align.py + Interpolate corrected lat/lon variables to the original sampling + intervals for each instrument's record variables. This format is + analogous to the .nc4 files produced by the LRAUV unserialize + process. These are the best files to use for the highest temporal + resolution of the data. Unlike the .nc4 files align.py's output files + use a naming convention rather than netCDF4 groups for each instrument. + + resample.py + Produce a netCDF file with all of the instrument's record variables + resampled to the same temporal interval. The coordinate variables are + also resampled to the same temporal interval and named with standard + depth, latitude, and longitude names. These are the best files to + use for loading data into STOQS and for analyses requiring all the + data to be on the same spatial temporal grid. + + archive.py + Copy the netCDF files to the archive directory. The archive directory + is initially in the AUVCTD share on atlas which is shared with the + data from the Dorado Gulper vehicle, but can also be on the M3 share + on thalassa near the original log data. diff --git a/README.md b/README.md index d9a185e1..81417927 100644 --- a/README.md +++ b/README.md @@ -60,7 +60,7 @@ print out the usage information for each of the processing scripts: uv run src/data/process_i2map.py --help uv run src/data/process_dorado.py --help -See [WORKFLOW.md](WORKFLOW.md) for more details on the data processing workflow. +See [DORADO_WORKFLOW.md](DORADO_WORKFLOW.md) for more details on the data processing workflow. ### Jupyter Notebooks ### To run the Jupyter Notebooks, start Jupyter Lab at the command line with: diff --git a/TROUBLESHOOTING.md b/TROUBLESHOOTING.md index eee85c00..7d159a04 100644 --- a/TROUBLESHOOTING.md +++ b/TROUBLESHOOTING.md @@ -14,7 +14,7 @@ and make sure that it's the only entry in "process_dorado" that is uncommented. 2. From VS Code's Run and Debug panel select "process_dorado" and click the green Start Debugging play button. For data to be copied from the archive the smb://atlas.shore.mbari.org/AUVCTD share must be mounted on your computer. Primary development is done in MacOS where the local mount point is /Volumes. Archive volumes are hard-coded as literals in [src/data/process_dorado.py](https://github.com/mbari-org/auv-python/blob/fc3b58613761b295ab47907993c4d0eb0bceb197/src/data/process_dorado.py) and [src/data/process_i2map.py](https://github.com/mbari-org/auv-python/blob/fc3b58613761b295ab47907993c4d0eb0bceb197/src/data/process_i2map.py). These should be changed if you mount these volumes at a different location. -3. Mission log data will copied to your `auv-python/data/auv_data/` directory into subdirectories organized by vehicle name, mission, and processing step. Data will be processed as described in [WORKFLOW.md](WORKFLOW.md). A typical mission takes about 10 minutes to process. +3. Mission log data will copied to your `auv-python/data/auv_data/` directory into subdirectories organized by vehicle name, mission, and processing step. Data will be processed as described in [DORADO_WORKFLOW.md](DORADO_WORKFLOW.md). A typical mission takes about 10 minutes to process. 4. After all of the intermediate files are created any step of the workflow may be executed and debugged in VS Code. The `.vscode\launch.json` file has several example entries that can be modified for specific debugging purposes via the menu in the Run and Debug panel. diff --git a/notebooks/README.md b/notebooks/README.md index bb952fed..ff95f775 100644 --- a/notebooks/README.md +++ b/notebooks/README.md @@ -1,5 +1,6 @@ The Notebooks in this directory are intended to be used to examine the data -generated by each of the steps described in the [workflow]("../WORKFLOW.md"): +generated by each of the steps described in the [Dorado]]("../DORADO_WORKFLOW.md") +or [LRAUV]("../LRAUV_WORKFLOW.md") WORKFLOW documents: logs2netcdfs.py → calibrate.py → align.py → resample.py → archive.py → 1.x 2.x 3.x 4.x 5.x 6.x diff --git a/pyproject.toml b/pyproject.toml index 9a21f413..f6684c65 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -14,6 +14,7 @@ dependencies = [ "datashader>=0.18.1", "defusedxml>=0.7.1", "gitpython>=3.1.44", + "gsw>=3.6.20", "hvplot>=0.11.3", "ipympl>=0.9.7", "jupyter>=1.1.1", diff --git a/src/data/AUV.py b/src/data/AUV.py index ba1fa8fa..75accb25 100755 --- a/src/data/AUV.py +++ b/src/data/AUV.py @@ -8,11 +8,12 @@ MBARI 30 March 2020 """ -import sys +import logging from datetime import UTC, datetime import coards import numpy as np +import xarray as xr def monotonic_increasing_time_indices(time_array: np.array) -> np.ndarray: @@ -48,9 +49,350 @@ def add_global_metadata(self): ) self.nc_file.distribution_statement = "Any use requires prior approval from MBARI" - self.nc_file.license = self.nc_file.distribution_statement - self.nc_file.useconst = "Not intended for legal use. Data may contain inaccuracies." - self.nc_file.history = 'Created by "{}" on {}'.format( - " ".join(sys.argv), - iso_now, + + +def nudge_positions( # noqa: C901, PLR0912, PLR0913, PLR0915 + nav_longitude: xr.DataArray, + nav_latitude: xr.DataArray, + gps_longitude: xr.DataArray, + gps_latitude: xr.DataArray, + logger: logging.Logger, + auv_name: str = "", + mission: str = "", + max_sec_diff_at_end: int = 10, + create_plots: bool = False, # noqa: FBT001, FBT002 +) -> tuple[xr.DataArray, xr.DataArray, int, float]: + """ + Apply linear nudges to underwater latitudes and longitudes so that + they match the surface GPS positions. + + Parameters: + ----------- + nav_longitude : xr.DataArray + Navigation longitude data (dead reckoned) + nav_latitude : xr.DataArray + Navigation latitude data (dead reckoned) + gps_longitude : xr.DataArray + GPS longitude fixes + gps_latitude : xr.DataArray + GPS latitude fixes + logger : logging.Logger + Logger for output messages + auv_name : str, optional + AUV name for plot titles + mission : str, optional + Mission name for plot titles + max_sec_diff_at_end : int, optional + Maximum allowable time difference at segment end (default: 10) + create_plots : bool, optional + Whether to create debug plots (default: False) + + Returns: + -------- + tuple[xr.DataArray, xr.DataArray, int, float] + nudged_longitude, nudged_latitude, segment_count, segment_minsum + """ + segment_count = None + segment_minsum = None + + lon = nav_longitude + lat = nav_latitude + lon_fix = gps_longitude + lat_fix = gps_latitude + + logger.info( + f"{'seg#':5s} {'end_sec_diff':12s} {'end_lon_diff':12s} {'end_lat_diff':12s}" # noqa: G004 + f" {'len(segi)':9s} {'seg_min':>9s} {'u_drift (cm/s)':14s} {'v_drift (cm/s)':14s}" + f" {'start datetime of segment':>29}", + ) + + # Any dead reckoned points before first GPS fix - usually empty + # as GPS fix happens before dive + segi = np.where(lat.cf["T"].data < lat_fix.cf["T"].data[0])[0] + if lon[:][segi].any(): + lon_nudged_array = lon[segi] + lat_nudged_array = lat[segi] + dt_nudged = lon.get_index("navigation_time")[segi] + logger.debug( + "Filled _nudged arrays with %d values starting at %s " + "which were before the first GPS fix at %s", + len(segi), + lat.get_index("navigation_time")[0], + lat_fix.get_index("gps_time")[0], + ) + else: + lon_nudged_array = np.array([]) + lat_nudged_array = np.array([]) + dt_nudged = np.array([], dtype="datetime64[ns]") + if segi.any(): + seg_min = ( + lat.get_index("navigation_time")[segi][-1] - lat.get_index("navigation_time")[segi][0] + ).total_seconds() / 60 + else: + seg_min = 0 + logger.info( + f"{' ':5} {'-':>12} {'-':>12} {'-':>12} {len(segi):-9d} {seg_min:9.2f} {'-':>14} {'-':>14} {'-':>29}", # noqa: E501, G004 + ) + + MIN_SEGMENT_LENGTH = 10 + seg_count = 0 + seg_minsum = 0 + for i in range(len(lat_fix) - 1): + # Segment of dead reckoned (under water) positions, each surrounded by GPS fixes + segi = np.where( + np.logical_and( + lat.cf["T"].data > lat_fix.cf["T"].data[i], + lat.cf["T"].data < lat_fix.cf["T"].data[i + 1], + ), + )[0] + if not segi.any(): + logger.debug( + f"No dead reckoned values found between GPS times of " # noqa: G004 + f"{lat_fix.cf['T'].data[i]} and {lat_fix.cf['T'].data[i + 1]}", + ) + continue + + end_sec_diff = float(lat_fix.cf["T"].data[i + 1] - lat.cf["T"].data[segi[-1]]) / 1.0e9 + + end_lon_diff = float(lon_fix[i + 1]) - float(lon[segi[-1]]) + end_lat_diff = float(lat_fix[i + 1]) - float(lat[segi[-1]]) + + # Compute approximate horizontal drift rate as a sanity check + try: + u_drift = ( + end_lon_diff + * float(np.cos(lat_fix[i + 1] * np.pi / 180)) + * 60 + * 185300 + / (float(lat.cf["T"].data[segi][-1] - lat.cf["T"].data[segi][0]) / 1.0e9) + ) + except ZeroDivisionError: + u_drift = 0 + try: + v_drift = ( + end_lat_diff + * 60 + * 185300 + / (float(lat.cf["T"].data[segi][-1] - lat.cf["T"].data[segi][0]) / 1.0e9) + ) + except ZeroDivisionError: + v_drift = 0 + + if abs(end_lon_diff) > 1 or abs(end_lat_diff) > 1: + # Error handling - same as original + logger.info( + f"{i:5d}: {end_sec_diff:12.3f} {end_lon_diff:12.7f}" # noqa: G004 + f" {end_lat_diff:12.7f} {len(segi):-9d} {seg_min:9.2f}" + f" {u_drift:14.3f} {v_drift:14.3f} {lat.cf['T'].data[segi][-1]}", + ) + logger.error( + "End of underwater segment dead reckoned position is too different " + "from GPS fix: abs(end_lon_diff) (%s) > 1 or abs(end_lat_diff) (%s) > 1", + end_lon_diff, + end_lat_diff, + ) + logger.info( + "Fix this error by calling _range_qc_combined_nc() in " + "_navigation_process() and/or _gps_process() for %s %s", + auv_name, + mission, + ) + error_message = ( + f"abs(end_lon_diff) ({end_lon_diff}) > 1 or abs(end_lat_diff) ({end_lat_diff}) > 1" + ) + raise ValueError(error_message) + if abs(end_sec_diff) > max_sec_diff_at_end: + logger.warning( + "abs(end_sec_diff) (%s) > max_sec_diff_at_end (%s)", + end_sec_diff, + max_sec_diff_at_end, + ) + logger.info( + "Overriding end_lon_diff (%s) and end_lat_diff (%s) by setting them to 0", + end_lon_diff, + end_lat_diff, + ) + end_lon_diff = 0 + end_lat_diff = 0 + + seg_min = float(lat.cf["T"].data[segi][-1] - lat.cf["T"].data[segi][0]) / 1.0e9 / 60 + seg_minsum += seg_min + + if len(segi) > MIN_SEGMENT_LENGTH: + logger.info( + f"{i:5d}: {end_sec_diff:12.3f} {end_lon_diff:12.7f}" # noqa: G004 + f" {end_lat_diff:12.7f} {len(segi):-9d} {seg_min:9.2f}" + f" {u_drift:14.3f} {v_drift:14.3f} {lat.cf['T'].data[segi][-1]}", + ) + + # Start with zero adjustment at beginning and linearly ramp up to the diff at the end + lon_nudge = np.interp( + lon.cf["T"].data[segi].astype(np.int64), + [ + lon.cf["T"].data[segi].astype(np.int64)[0], + lon.cf["T"].data[segi].astype(np.int64)[-1], + ], + [0, end_lon_diff], ) + lat_nudge = np.interp( + lat.cf["T"].data[segi].astype(np.int64), + [ + lat.cf["T"].data[segi].astype(np.int64)[0], + lat.cf["T"].data[segi].astype(np.int64)[-1], + ], + [0, end_lat_diff], + ) + + # Sanity checks + MAX_LONGITUDE = 180 + MAX_LATITUDE = 90 + if ( + np.max(np.abs(lon[segi] + lon_nudge)) > MAX_LONGITUDE + or np.max(np.abs(lat[segi] + lon_nudge)) > MAX_LATITUDE + ): + logger.warning( + "Nudged coordinate is way out of reasonable range - segment %d", + seg_count, + ) + logger.warning( + " max(abs(lon)) = %s", + np.max(np.abs(lon[segi] + lon_nudge)), + ) + logger.warning( + " max(abs(lat)) = %s", + np.max(np.abs(lat[segi] + lat_nudge)), + ) + + lon_nudged_array = np.append(lon_nudged_array, lon[segi] + lon_nudge) + lat_nudged_array = np.append(lat_nudged_array, lat[segi] + lat_nudge) + dt_nudged = np.append(dt_nudged, lon.cf["T"].data[segi]) + seg_count += 1 + + # Any dead reckoned points after last GPS fix + segi = np.where(lat.cf["T"].data > lat_fix.cf["T"].data[-1])[0] + seg_min = 0 + if segi.any(): + lon_nudged_array = np.append(lon_nudged_array, lon[segi]) + lat_nudged_array = np.append(lat_nudged_array, lat[segi]) + dt_nudged = np.append(dt_nudged, lon.cf["T"].data[segi]) + seg_min = float(lat.cf["T"].data[segi][-1] - lat.cf["T"].data[segi][0]) / 1.0e9 / 60 + + logger.info( + f"{seg_count + 1:4d}: {'-':>12} {'-':>12} {'-':>12} {len(segi):-9d} {seg_min:9.2f} {'-':>14} {'-':>14}", # noqa: E501, G004 + ) + segment_count = seg_count + segment_minsum = seg_minsum + + logger.info("Points in final series = %d", len(dt_nudged)) + + lon_nudged = xr.DataArray( + data=lon_nudged_array, + dims=["time"], + coords={"time": dt_nudged}, + name="longitude", + ) + lat_nudged = xr.DataArray( + data=lat_nudged_array, + dims=["time"], + coords={"time": dt_nudged}, + name="latitude", + ) + + # Optional plotting code + if create_plots: + _create_nudge_plots( + lat, lon, lat_fix, lon_fix, lat_nudged, lon_nudged, auv_name, mission, logger + ) + + return lon_nudged, lat_nudged, segment_count, segment_minsum + + +def _create_nudge_plots( # noqa: PLR0913 + lat, lon, lat_fix, lon_fix, lat_nudged, lon_nudged, auv_name, mission, logger +): + """Create debug plots for position nudging (separated for clarity).""" + try: + import matplotlib.pyplot as plt + + try: + import cartopy.crs as ccrs # type: ignore # noqa: I001, PGH003 + from matplotlib import patches + from shapely.geometry import LineString # type: ignore # noqa: PGH003 + + has_cartopy = True + except ImportError: + has_cartopy = False + + # Time series plots + fig, axes = plt.subplots(nrows=2, figsize=(18, 6)) + axes[0].plot(lat_nudged.coords["time"].data, lat_nudged, "-") + axes[0].plot(lat.cf["T"].data, lat, "--") + axes[0].plot(lat_fix.cf["T"].data, lat_fix, "*") + axes[0].set_ylabel("Latitude") + axes[0].legend(["Nudged", "Original", "GPS Fixes"]) + axes[1].plot(lon_nudged.coords["time"].data, lon_nudged, "-") + axes[1].plot(lon.cf["T"].data, lon, "--") + axes[1].plot(lon_fix.cf["T"].data, lon_fix, "*") + axes[1].set_ylabel("Longitude") + axes[1].legend(["Nudged", "Original", "GPS Fixes"]) + title = "Corrected nav from nudge_positions()" + fig.suptitle(title) + axes[0].grid() + axes[1].grid() + logger.debug("Pausing with plot entitled: %s. Close window to continue.", title) + plt.show() + + # Map plot + if has_cartopy: + ax = plt.axes(projection=ccrs.PlateCarree()) + nudged = LineString(zip(lon_nudged.to_numpy(), lat_nudged.to_numpy(), strict=False)) + original = LineString(zip(lon.to_numpy(), lat.to_numpy(), strict=False)) + ax.add_geometries( + [nudged], + crs=ccrs.PlateCarree(), + edgecolor="red", + facecolor="none", + label="Nudged", + ) + ax.add_geometries( + [original], + crs=ccrs.PlateCarree(), + edgecolor="grey", + facecolor="none", + label="Original", + ) + handle_gps = ax.scatter( + lon_fix.to_numpy(), + lat_fix.to_numpy(), + color="green", + label="GPS Fixes", + ) + bounds = nudged.buffer(0.02).bounds + extent = bounds[0], bounds[2], bounds[1], bounds[3] + ax.set_extent(extent, crs=ccrs.PlateCarree()) + ax.coastlines() + + handle_nudged = patches.Rectangle((0, 0), 1, 0.1, facecolor="red") + handle_original = patches.Rectangle((0, 0), 1, 0.1, facecolor="gray") + ax.legend( + [handle_nudged, handle_original, handle_gps], + ["Nudged", "Original", "GPS Fixes"], + ) + ax.gridlines( + crs=ccrs.PlateCarree(), + draw_labels=True, + linewidth=1, + color="gray", + alpha=0.5, + ) + ax.set_title(f"{auv_name} {mission}") + logger.debug( + "Pausing map plot (doesn't work well in VS Code debugger)." + " Close window to continue.", + ) + plt.show() + else: + logger.warning("No map plot, could not import cartopy") + + except ImportError: + logger.warning("Could not create plots - matplotlib not available") diff --git a/src/data/GITHUB_ISSUE_6_NC42NETCDFS_IMPLEMENTATION.md b/src/data/GITHUB_ISSUE_6_NC42NETCDFS_IMPLEMENTATION.md new file mode 100644 index 00000000..c855ea05 --- /dev/null +++ b/src/data/GITHUB_ISSUE_6_NC42NETCDFS_IMPLEMENTATION.md @@ -0,0 +1,76 @@ +# GitHub Issue #6 Implementation Summary - CORRECTED VERSION + +## Problem +LRAUV NetCDF files sometimes contain non-monotonic time data, which breaks downstream processing tools that expect monotonic time coordinates. **The critical issue was that each NetCDF group contains multiple independent time variables (e.g., `time_NAL9602`, `time_CTD_NeilBrown`) that each need their own monotonic filtering.** + +## Solution Implemented +Complete rewrite of time filtering to handle **multiple independent time variables per group** with the following architecture: + +### 1. Per-Variable Time Detection and Filtering +- **`_get_time_filters_for_variables()`**: Identifies ALL time variables in the extraction list and computes monotonic filtering for each independently +- **`_is_time_variable()`**: Determines if a variable is a time coordinate using name patterns and units +- **`_get_monotonic_indices()`**: Computes monotonic indices for any time data array + +### 2. Multi-Variable Time Processing +- **`_copy_variable_with_appropriate_time_filter()`**: Applies the correct time filtering based on the specific variable: + - If the variable IS a time coordinate: applies its own monotonic filtering + - If the variable DEPENDS on time coordinates: uses the appropriate time dimension's filtering + - If no time dependencies: copies all data unchanged +- **`_create_dimensions_with_time_filters()`**: Adjusts dimension sizes for each filtered time coordinate +- **`_apply_multidimensional_time_filter()`**: Handles complex multi-dimensional filtering + +### 3. Independent Time Coordinate Processing +Each time variable (like `time_NAL9602`, `time_CTD_NeilBrown`) gets: +- Its own monotonic analysis +- Its own filtered indices +- Its own dimension size adjustment +- Independent logging of filtering results + +### 4. Command Line Control (Unchanged) +- **`--filter_monotonic_time`**: Enable time filtering (default behavior) +- **`--no_filter_monotonic_time`**: Disable filtering to preserve all time values + +## Key Methods - CORRECTED ARCHITECTURE + +```python +def _get_time_filters_for_variables(self, src_group, vars_to_extract) -> dict[str, dict]: + """Get time filtering info for EACH time variable in the extraction list. + Returns: {time_var_name: {"indices": list[int], "filtered": bool}}""" + +def _is_time_variable(self, var_name: str, var) -> bool: + """Check if a variable is a time coordinate variable.""" + +def _get_monotonic_indices(self, time_data) -> list[int]: + """Get monotonic indices for any time data array.""" + +def _copy_variable_with_appropriate_time_filter(self, src_group, dst_dataset, var_name, time_filters): + """Copy variable with the APPROPRIATE time filtering for that specific variable.""" + +def _create_dimensions_with_time_filters(self, src_group, dst_dataset, dims_needed, time_filters): + """Create dimensions with MULTIPLE time coordinate filtering.""" + +def _apply_multidimensional_time_filter(self, src_var, dst_var, var_name, filtered_dims): + """Apply time filtering to multi-dimensional variables.""" +``` + +## Testing - CORRECTED VALIDATION +- ✅ Created test with multiple time variables in single group (`time_NAL9602`, `time_CTD_NeilBrown`) +- ✅ Verified independent filtering: `time_NAL9602` (10→8 points), `time_CTD_NeilBrown` (8→6 points) +- ✅ Confirmed each time variable gets its own monotonic indices +- ✅ Validated that data variables use appropriate time coordinate filtering + +## Root Cause Fix +**Previous implementation incorrectly assumed ONE time coordinate per group.** The corrected implementation recognizes that: + +1. **Each group can have multiple time variables** (`time_NAL9602`, `time_CTD_NeilBrown`, etc.) +2. **Each time variable needs independent monotonic filtering** +3. **Data variables must use the filtering from their specific time coordinate** +4. **Different time coordinates can have different amounts of filtering** + +## Backward Compatibility +- Default behavior enables time filtering for safer processing +- Users can disable filtering with `--no_filter_monotonic_time` if needed +- No breaking changes to existing API +- Works with single time coordinate groups (backward compatible) AND multiple time coordinate groups (new functionality) + +This corrected implementation properly addresses GitHub issue #6 by handling the real-world complexity of LRAUV NetCDF files with multiple independent time coordinates per group. \ No newline at end of file diff --git a/src/data/archive.py b/src/data/archive.py index a1a3748a..2352d5bf 100755 --- a/src/data/archive.py +++ b/src/data/archive.py @@ -19,10 +19,12 @@ from create_products import MISSIONIMAGES, MISSIONODVS from logs2netcdfs import BASE_PATH, LOG_FILES, MISSIONNETCDFS, AUV_NetCDF +from nc42netcdfs import BASE_LRAUV_PATH, GROUP from resample import FREQ LOG_NAME = "processing.log" AUVCTD_VOL = "/Volumes/AUVCTD" +LRAUV_VOL = "/Volumes/LRAUV" class Archiver: @@ -170,6 +172,46 @@ def copy_to_AUVTCD(self, nc_file_base: Path, freq: str = FREQ) -> None: # noqa: def copy_to_M3(self, resampled_nc_file: str) -> None: pass + def copy_to_LRAUV(self, log_file: str, freq: str = FREQ) -> None: + "Copy the intermediate and resampled netCDF file(s) to the archive LRAUV location" + src_dir = Path(BASE_LRAUV_PATH, Path(log_file).parent) + dst_dir = Path(LRAUV_VOL, Path(log_file).parent) + try: + Path(dst_dir).stat() + except FileNotFoundError: + self.logger.exception("%s not found", dst_dir) + self.logger.info("Is %s mounted?", self.mount_dir) + sys.exit(1) + for src_file in sorted(src_dir.glob(f"{Path(log_file).stem}_{GROUP}_*.nc")): + dst_file = Path(dst_dir, src_file.name) + if self.args.clobber: + if dst_file.exists(): + self.logger.info("Removing %s", dst_file) + dst_file.unlink() + if src_file.exists(): + shutil.copyfile(src_file, dst_file) + self.logger.info("copyfile %s %s done.", src_file, dst_dir) + else: + self.logger.info( + "%-75s exists, but is not being archived because --clobber is not specified.", + src_file.name, + ) + for ftype in (f"{freq}.nc", "cal.nc", "align.nc"): + src_file = Path(src_dir, f"{Path(log_file).stem}_{ftype}") + dst_file = Path(dst_dir, src_file.name) + if self.args.clobber: + if dst_file.exists(): + self.logger.info("Removing %s", dst_file) + dst_file.unlink() + if src_file.exists(): + shutil.copyfile(src_file, dst_file) + self.logger.info("copyfile %s %s done.", src_file, dst_dir) + else: + self.logger.info( + "%-36s exists, but is not being archived because --clobber is not specified.", # noqa: E501 + src_file.name, + ) + def process_command_line(self): parser = argparse.ArgumentParser( formatter_class=argparse.RawTextHelpFormatter, diff --git a/src/data/calibrate.py b/src/data/calibrate.py index 704597e4..2cdc8941 100755 --- a/src/data/calibrate.py +++ b/src/data/calibrate.py @@ -27,7 +27,7 @@ __author__ = "Mike McCann" __copyright__ = "Copyright 2020, Monterey Bay Aquarium Research Institute" -import argparse +import argparse # noqa: I001 import logging import os import shlex @@ -50,19 +50,11 @@ from scipy.interpolate import interp1d from seawater import eos80 -try: - import cartopy.crs as ccrs # type: ignore # noqa: PGH003 - from shapely.geometry import LineString # type: ignore # noqa: PGH003 -except ModuleNotFoundError: - # cartopy is not installed, will not be able to plot maps - pass - import pandas as pd import pyproj -from AUV import monotonic_increasing_time_indices +from AUV import monotonic_increasing_time_indices, nudge_positions from hs2_proc import compute_backscatter, hs2_calc_bb, hs2_read_cal_file from logs2netcdfs import BASE_PATH, MISSIONLOGS, MISSIONNETCDFS, TIME, TIME60HZ, AUV_NetCDF -from matplotlib import patches from scipy import signal AVG_SALINITY = 33.6 # Typical value for upper 100m of Monterey Bay @@ -1661,13 +1653,10 @@ def _navigation_process(self, sensor): # noqa: C901, PLR0912, PLR0915 }, ) - def _nudge_pos(self, max_sec_diff_at_end=10): # noqa: C901, PLR0912, PLR0915 + def _nudge_pos(self, max_sec_diff_at_end=10): """Apply linear nudges to underwater latitudes and longitudes so that they match the surface gps positions. """ - self.segment_count = None - self.segment_minsum = None - try: lon = self.combined_nc["navigation_longitude"] except KeyError: @@ -1677,279 +1666,22 @@ def _nudge_pos(self, max_sec_diff_at_end=10): # noqa: C901, PLR0912, PLR0915 lon_fix = self.combined_nc["gps_longitude"] lat_fix = self.combined_nc["gps_latitude"] - self.logger.info( - f"{'seg#':5s} {'end_sec_diff':12s} {'end_lon_diff':12s} {'end_lat_diff':12s}" # noqa: G004 - f" {'len(segi)':9s} {'seg_min':>9s} {'u_drift (cm/s)':14s} {'v_drift (cm/s)':14s}" - f" {'start datetime of segment':>29}", - ) - - # Any dead reckoned points before first GPS fix - usually empty - # as GPS fix happens before dive - segi = np.where(lat.cf["T"].data < lat_fix.cf["T"].data[0])[0] - if lon[:][segi].any(): - lon_nudged_array = lon[segi] - lat_nudged_array = lat[segi] - dt_nudged = lon.get_index("navigation_time")[segi] - self.logger.debug( - "Filled _nudged arrays with %d values starting at %s " - "which were before the first GPS fix at %s", - len(segi), - lat.get_index("navigation_time")[0], - lat_fix.get_index("gps_time")[0], - ) - else: - lon_nudged_array = np.array([]) - lat_nudged_array = np.array([]) - dt_nudged = np.array([], dtype="datetime64[ns]") - if segi.any(): - seg_min = ( - lat.get_index("navigation_time")[segi][-1] - - lat.get_index("navigation_time")[segi][0] - ).total_seconds() / 60 - else: - seg_min = 0 - self.logger.info( - f"{' ':5} {'-':>12} {'-':>12} {'-':>12} {len(segi):-9d} {seg_min:9.2f} {'-':>14} {'-':>14} {'-':>29}", # noqa: E501, G004 - ) - - seg_count = 0 - seg_minsum = 0 - for i in range(len(lat_fix) - 1): - # Segment of dead reckoned (under water) positions, each surrounded by GPS fixes - segi = np.where( - np.logical_and( - lat.cf["T"].data > lat_fix.cf["T"].data[i], - lat.cf["T"].data < lat_fix.cf["T"].data[i + 1], - ), - )[0] - if not segi.any(): - self.logger.debug( - f"No dead reckoned values found between GPS times of " # noqa: G004 - f"{lat_fix.cf['T'].data[i]} and {lat_fix.cf['T'].data[i + 1]}", - ) - continue - - end_sec_diff = float(lat_fix.cf["T"].data[i + 1] - lat.cf["T"].data[segi[-1]]) / 1.0e9 - - end_lon_diff = float(lon_fix[i + 1]) - float(lon[segi[-1]]) - end_lat_diff = float(lat_fix[i + 1]) - float(lat[segi[-1]]) - - # Compute approximate horizontal drift rate as a sanity check - try: - u_drift = ( - end_lon_diff - * float(np.cos(lat_fix[i + 1] * np.pi / 180)) - * 60 - * 185300 - / (float(lat.cf["T"].data[segi][-1] - lat.cf["T"].data[segi][0]) / 1.0e9) - ) - except ZeroDivisionError: - u_drift = 0 - try: - v_drift = ( - end_lat_diff - * 60 - * 185300 - / (float(lat.cf["T"].data[segi][-1] - lat.cf["T"].data[segi][0]) / 1.0e9) - ) - except ZeroDivisionError: - v_drift = 0 - - if abs(end_lon_diff) > 1 or abs(end_lat_diff) > 1: - # It's a problem if we have more than 1 degree difference at the end of the segment. - # This is usually because the GPS fix is bad, but sometimes it's because the - # dead reckoned position is bad. Or sometimes it's both as in dorado 2016.384.00. - # Early QC by calling _range_qc_combined_nc() can remove the bad points. - # Monterey Bay missions that have bad points can be added to the lists in - # _navigation_process() and/or _gps_process(). - self.logger.info( - f"{i:5d}: {end_sec_diff:12.3f} {end_lon_diff:12.7f}" # noqa: G004 - f" {end_lat_diff:12.7f} {len(segi):-9d} {seg_min:9.2f}" - f" {u_drift:14.3f} {v_drift:14.3f} {lat.cf['T'].data[segi][-1]}", - ) - self.logger.error( - "End of underwater segment dead reckoned position is too different " - "from GPS fix: abs(end_lon_diff) (%s) > 1 or abs(end_lat_diff) (%s) > 1", - end_lon_diff, - end_lat_diff, - ) - self.logger.info( - "Fix this error by calling _range_qc_combined_nc() in " - "_navigation_process() and/or _gps_process() for %s %s", - self.args.auv_name, - self.args.mission, - ) - error_message = ( - f"abs(end_lon_diff) ({end_lon_diff}) > 1 or " - f"abs(end_lat_diff) ({end_lat_diff}) > 1" - ) - raise ValueError(error_message) - if abs(end_sec_diff) > max_sec_diff_at_end: - # Happens in dorado 2016.348.00 because of a bad GPS fixes being removed - self.logger.warning( - "abs(end_sec_diff) (%s) > max_sec_diff_at_end (%s)", - end_sec_diff, - max_sec_diff_at_end, - ) - self.logger.info( - "Overriding end_lon_diff (%s) and end_lat_diff (%s) by setting them to 0", - end_lon_diff, - end_lat_diff, - ) - end_lon_diff = 0 - end_lat_diff = 0 - - seg_min = float(lat.cf["T"].data[segi][-1] - lat.cf["T"].data[segi][0]) / 1.0e9 / 60 - seg_minsum += seg_min - - if len(segi) > 10: # noqa: PLR2004 - self.logger.info( - f"{i:5d}: {end_sec_diff:12.3f} {end_lon_diff:12.7f}" # noqa: G004 - f" {end_lat_diff:12.7f} {len(segi):-9d} {seg_min:9.2f}" - f" {u_drift:14.3f} {v_drift:14.3f} {lat.cf['T'].data[segi][-1]}", - ) - - # Start with zero adjustment at begining and linearly ramp up to the diff at the end - lon_nudge = np.interp( - lon.cf["T"].data[segi].astype(np.int64), - [ - lon.cf["T"].data[segi].astype(np.int64)[0], - lon.cf["T"].data[segi].astype(np.int64)[-1], - ], - [0, end_lon_diff], - ) - lat_nudge = np.interp( - lat.cf["T"].data[segi].astype(np.int64), - [ - lat.cf["T"].data[segi].astype(np.int64)[0], - lat.cf["T"].data[segi].astype(np.int64)[-1], - ], - [0, end_lat_diff], - ) - - # Sanity checks - if ( - np.max(np.abs(lon[segi] + lon_nudge)) > 180 # noqa: PLR2004 - or np.max(np.abs(lat[segi] + lon_nudge)) > 90 # noqa: PLR2004 - ): - self.logger.warning( - "Nudged coordinate is way out of reasonable range - segment %d", - seg_count, - ) - self.logger.warning( - " max(abs(lon)) = %s", - np.max(np.abs(lon[segi] + lon_nudge)), - ) - self.logger.warning( - " max(abs(lat)) = %s", - np.max(np.abs(lat[segi] + lat_nudge)), - ) - - lon_nudged_array = np.append(lon_nudged_array, lon[segi] + lon_nudge) - lat_nudged_array = np.append(lat_nudged_array, lat[segi] + lat_nudge) - dt_nudged = np.append(dt_nudged, lon.cf["T"].data[segi]) - seg_count += 1 - - # Any dead reckoned points after first GPS fix - not possible to nudge, just copy in - segi = np.where(lat.cf["T"].data > lat_fix.cf["T"].data[-1])[0] - seg_min = 0 - if segi.any(): - lon_nudged_array = np.append(lon_nudged_array, lon[segi]) - lat_nudged_array = np.append(lat_nudged_array, lat[segi]) - dt_nudged = np.append(dt_nudged, lon.cf["T"].data[segi]) - seg_min = float(lat.cf["T"].data[segi][-1] - lat.cf["T"].data[segi][0]) / 1.0e9 / 60 - - self.logger.info( - f"{seg_count + 1:4d}: {'-':>12} {'-':>12} {'-':>12} {len(segi):-9d} {seg_min:9.2f} {'-':>14} {'-':>14}", # noqa: E501, G004 - ) - self.segment_count = seg_count - self.segment_minsum = seg_minsum - - self.logger.info("Points in final series = %d", len(dt_nudged)) - - lon_nudged = xr.DataArray( - data=lon_nudged_array, - dims=["time"], - coords={"time": dt_nudged}, - name="longitude", - ) - lat_nudged = xr.DataArray( - data=lat_nudged_array, - dims=["time"], - coords={"time": dt_nudged}, - name="latitude", - ) - if self.args.plot: - fig, axes = plt.subplots(nrows=2, figsize=(18, 6)) - axes[0].plot(lat_nudged.coords["time"].data, lat_nudged, "-") - axes[0].plot(lat.cf["T"].data, lat, "--") - axes[0].plot(lat_fix.cf["T"].data, lat_fix, "*") - axes[0].set_ylabel("Latitude") - axes[0].legend(["Nudged", "Original", "GPS Fixes"]) - axes[1].plot(lon_nudged.coords["time"].data, lon_nudged, "-") - axes[1].plot(lon.cf["T"].data, lon, "--") - axes[1].plot(lon_fix.cf["T"].data, lon_fix, "*") - axes[1].set_ylabel("Longitude") - axes[1].legend(["Nudged", "Original", "GPS Fixes"]) - title = "Corrected nav from _nudge_pos()" - fig.suptitle(title) - axes[0].grid() - axes[1].grid() - self.logger.debug("Pausing with plot entitled: %s. Close window to continue.", title) - plt.show() - - gps_plot = True - if gps_plot: - try: - ax = plt.axes(projection=ccrs.PlateCarree()) - except NameError: - self.logger.warning("No gps_plot, could not import cartopy") - return lon_nudged, lat_nudged - nudged = LineString(zip(lon_nudged.to_numpy(), lat_nudged.to_numpy(), strict=False)) - original = LineString(zip(lon.to_numpy(), lat.to_numpy(), strict=False)) - ax.add_geometries( - [nudged], - crs=ccrs.PlateCarree(), - edgecolor="red", - facecolor="none", - label="Nudged", - ) - ax.add_geometries( - [original], - crs=ccrs.PlateCarree(), - edgecolor="grey", - facecolor="none", - label="Original", - ) - handle_gps = ax.scatter( - lon_fix.to_numpy(), - lat_fix.to_numpy(), - color="green", - label="GPS Fixes", - ) - bounds = nudged.buffer(0.02).bounds - extent = bounds[0], bounds[2], bounds[1], bounds[3] - ax.set_extent(extent, crs=ccrs.PlateCarree()) - ax.coastlines() - handle_nudged = patches.Rectangle((0, 0), 1, 0.1, facecolor="red") - handle_original = patches.Rectangle((0, 0), 1, 0.1, facecolor="gray") - ax.legend( - [handle_nudged, handle_original, handle_gps], - ["Nudged", "Original", "GPS Fixes"], - ) - ax.gridlines( - crs=ccrs.PlateCarree(), - draw_labels=True, - linewidth=1, - color="gray", - alpha=0.5, - ) - ax.set_title(f"{self.args.auv_name} {self.args.mission}") - self.logger.debug( - "Pausing map plot (doesn't work well in VS Code debugger)." - " Close window to continue.", - ) - plt.show() + # Use the shared function from AUV module + lon_nudged, lat_nudged, segment_count, segment_minsum = nudge_positions( + nav_longitude=lon, + nav_latitude=lat, + gps_longitude=lon_fix, + gps_latitude=lat_fix, + logger=self.logger, + auv_name=self.args.auv_name, + mission=self.args.mission, + max_sec_diff_at_end=max_sec_diff_at_end, + create_plots=True, + ) + + # Store results in instance variables for compatibility + self.segment_count = segment_count + self.segment_minsum = segment_minsum return lon_nudged, lat_nudged diff --git a/src/data/combine.py b/src/data/combine.py new file mode 100755 index 00000000..f39a1bd0 --- /dev/null +++ b/src/data/combine.py @@ -0,0 +1,645 @@ +#!/usr/bin/env python +""" +Combine original LRAUV data from separate .nc files and produce a single NetCDF +file that also contains corrected (nudged) latitudes and longitudes. + +Read original data from netCDF files created by nc42netcdfs.py and write out a +single netCDF file with the important variables at original sampling intervals. +Geometric alignment and any plumbing lag corrections are also done during this +step. This script is similar to calibrate.py that is used for Dorado and i2map +data, but does not apply any sensor calibrations as those are done on the LRAUV +vehicles before the data is logged and unserialized to NetCDF-4 files. The QC +methods implemented in calibrate.py will be reused here. + +The file will contain combined variables (the combined_nc member variable) and +be analogous to the original NetCDF-4. Rather than using groups in NetCDF-4 the +data will be written in classic NetCDF-CF with a naming convention that is +similar to Dorado data, with group names (any underscores removed) preceeding +the variable name with an underscore - all lower case characters: +``` + _ + _<..........> + _ + _time + _depth + _latitude + _longitude +``` +The file will be named with a "_cal.nc" suffix to be consistent with the Dorado +and i2map files, indicating the stage of processing. + +""" + +__author__ = "Mike McCann" +__copyright__ = "Copyright 2025, Monterey Bay Aquarium Research Institute" + +import argparse # noqa: I001 +import logging +import sys +import time +from argparse import RawTextHelpFormatter +from datetime import UTC +from pathlib import Path +from socket import gethostname +from typing import NamedTuple +import cf_xarray # Needed for the .cf accessor # noqa: F401 +import matplotlib.pyplot as plt +import numpy as np +import xarray as xr +from scipy.interpolate import interp1d + +import pandas as pd +from AUV import monotonic_increasing_time_indices, nudge_positions +from logs2netcdfs import AUV_NetCDF, TIME, TIME60HZ +from nc42netcdfs import BASE_LRAUV_PATH, GROUP + +AVG_SALINITY = 33.6 # Typical value for upper 100m of Monterey Bay + + +class Range(NamedTuple): + min: float + max: float + + +# Using lower case vehicle names, modify in _define_sensor_info() for changes +# over time Used to reduce ERROR & WARNING log messages for expected missing +# sensor data. There are core data common to most all vehicles, whose groups +# are listed in BASE_GROUPS. EXPECTED_GROUPS contains additional groups for +# specific vehicles. +BASE_GROUPS = { + "lrauv": [ + "CTDSeabird", + "WetLabsBB2FL", + ], +} + +EXPECTED_GROUPS = { + "dorado": [ + "navigation", + "gps", + "depth", + "ecopuck", + "hs2", + "ctd1", + "ctd2", + "isus", + "biolume", + "lopc", + "tailcone", + ], + "i2map": [ + "navigation", + "gps", + "depth", + "seabird25p", + "transmissometer", + "tailcone", + ], +} +# Used in test fixture in conftetst.py +EXPECTED_GROUPS["Dorado389"] = EXPECTED_GROUPS["dorado"] + + +def align_geom(sensor_offset, pitches): + """Use x & y sensor_offset values in meters from sensor_info and + pitch in degrees to compute and return actual depths of the sensor + based on the geometry relative to the vehicle's depth sensor. + """ + # See https://en.wikipedia.org/wiki/Rotation_matrix + # + # * instrument location with pitch applied + # / | + # / | + # / | + # / | + # / | + # / | + # / | + # / | + # / | + # / + # / + # / y + # / _ + # / o + # / f + # / f + # / * instrument location + # / | + # / \ | | + # / \ | y + # / pitch (theta) | | + # / \ | | + # --------------------x------------------+ --> nose + # + # [ cos(pitch) -sin(pitch) ] [x] [x'] + # X = + # [ sin(pitch) cos(pitch) ] [y] [y'] + offsets = [] + for pitch in pitches: + theta = pitch * np.pi / 180.0 + R = np.array([[np.cos(theta), -np.sin(theta)], [np.sin(theta), np.cos(theta)]]) + x_off, y_off = np.matmul(R, sensor_offset) + offsets.append(y_off) + + return offsets + + +class Combine_NetCDF: + logger = logging.getLogger(__name__) + _handler = logging.StreamHandler() + _handler.setFormatter(AUV_NetCDF._formatter) + logger.addHandler(_handler) + _log_levels = (logging.WARN, logging.INFO, logging.DEBUG) + + def global_metadata(self): + """Use instance variables to return a dictionary of + metadata specific for the data that are written + """ + from datetime import datetime + + iso_now = datetime.now(tz=UTC).isoformat() + "Z" + + metadata = {} + metadata["netcdf_version"] = "4" + metadata["Conventions"] = "CF-1.6" + metadata["date_created"] = iso_now + metadata["date_update"] = iso_now + metadata["date_modified"] = iso_now + metadata["featureType"] = "trajectory" + try: + metadata["time_coverage_start"] = str( + self.combined_nc["depth_time"].to_pandas().iloc[0].isoformat(), + ) + except KeyError: + error_message = "No depth_time variable in combined_nc" + raise EOFError(error_message) from None + metadata["time_coverage_end"] = str( + self.combined_nc["depth_time"].to_pandas().iloc[-1].isoformat(), + ) + metadata["distribution_statement"] = "Any use requires prior approval from MBARI" + metadata["license"] = metadata["distribution_statement"] + metadata["useconst"] = "Not intended for legal use. Data may contain inaccuracies." + metadata["history"] = f"Created by {self.commandline} on {iso_now}" + + metadata["title"] = ( + f"Calibrated AUV sensor data from {self.args.auv_name} mission {self.args.mission}" + ) + metadata["summary"] = ( + "Observational oceanographic data obtained from an Autonomous" + " Underwater Vehicle mission with measurements at" + " original sampling intervals. The data have been processed" + " by MBARI's auv-python software." + ) + if self.summary_fields: + # Should be just one item in set, but just in case join them + metadata["summary"] += " " + ". ".join(self.summary_fields) + metadata["comment"] = ( + f"MBARI Long Range AUV data produced from original data" + f" with execution of '{self.commandline}'' at {iso_now} on" + f" host {gethostname()}. Software available at" + f" 'https://github.com/mbari-org/auv-python'" + ) + + return metadata + + def _range_qc_combined_nc( # noqa: C901, PLR0912 + self, + instrument: str, + variables: list[str], + ranges: dict, + set_to_nan: bool = False, # noqa: FBT001, FBT002 + ) -> None: + """For variables in combined_nc remove values that fall outside + of specified min, max range. Meant to be called by instrument so + that the union of bad values from a set of variables can be removed. + Use set_to_nan=True to set values outside of range to NaN instead of + removing all variables from the instrument. Setting set_to_nan=True + makes sense for record (data) variables - such as ctd1_salinity, + but not for coordinate variables.""" + out_of_range_indices = np.array([], dtype=int) + vars_checked = [] + for var in variables: + if var in self.combined_nc.variables: + if var in ranges: + out_of_range = np.where( + (self.combined_nc[var] < ranges[var].min) + | (self.combined_nc[var] > ranges[var].max), + )[0] + self.logger.debug( + "%s: %d out of range values = %s", + var, + len(self.combined_nc[var][out_of_range].to_numpy()), + self.combined_nc[var][out_of_range].to_numpy(), + ) + out_of_range_indices = np.union1d( + out_of_range_indices, + out_of_range, + ) + if len(out_of_range_indices) > 500: # noqa: PLR2004 + self.logger.warning( + "More than 500 (%d) %s values found outside of range. " + "This may indicate a problem with the %s data.", + len(self.combined_nc[var][out_of_range_indices].to_numpy()), + var, + instrument, + ) + if set_to_nan and var not in self.combined_nc.coords: + self.logger.info( + "Setting %s %s values to NaN", len(out_of_range_indices), var + ) + self.combined_nc[var][out_of_range_indices] = np.nan + vars_checked.append(var) + else: + self.logger.debug("No Ranges set for %s", var) + else: + self.logger.warning("%s not in self.combined_nc", var) + inst_vars = [ + str(var) for var in self.combined_nc.variables if str(var).startswith(f"{instrument}_") + ] + self.logger.info( + "Checked for data outside of these variables and ranges: %s", + [(v, ranges[v]) for v in vars_checked], + ) + if not set_to_nan: + for var in inst_vars: + self.logger.info( + "%s: deleting %d values found outside of above ranges: %s", + var, + len(self.combined_nc[var][out_of_range_indices].to_numpy()), + self.combined_nc[var][out_of_range_indices].to_numpy(), + ) + coord = next(iter(self.combined_nc[var].coords)) + self.combined_nc[f"{var}_qced"] = ( + self.combined_nc[var] + .drop_isel({coord: out_of_range_indices}) + .rename({f"{instrument}_time": f"{instrument}_time_qced"}) + ) + self.combined_nc = self.combined_nc.drop_vars(inst_vars) + for var in inst_vars: + self.logger.debug("Renaming %s_qced to %s", var, var) + self.combined_nc[var] = self.combined_nc[f"{var}_qced"].rename( + {f"{coord}_qced": coord}, + ) + qced_vars = [f"{var}_qced" for var in inst_vars] + self.combined_nc = self.combined_nc.drop_vars(qced_vars) + self.logger.info("Done range checking %s", instrument) + + def _nudge_pos(self, max_sec_diff_at_end=10): + """Apply linear nudges to underwater latitudes and longitudes so that + they match the surface gps positions. + """ + try: + lon = self.combined_nc["navigation_longitude"] + except KeyError: + error_message = "No navigation_longitude data in combined_nc" + raise EOFError(error_message) from None + lat = self.combined_nc["navigation_latitude"] + lon_fix = self.combined_nc["gps_longitude"] + lat_fix = self.combined_nc["gps_latitude"] + + # Use the shared function from AUV module + lon_nudged, lat_nudged, segment_count, segment_minsum = nudge_positions( + nav_longitude=lon, + nav_latitude=lat, + gps_longitude=lon_fix, + gps_latitude=lat_fix, + logger=self.logger, + auv_name=self.args.auv_name, + mission=self.args.mission, + max_sec_diff_at_end=max_sec_diff_at_end, + create_plots=True, + ) + + # Store results in instance variables for compatibility + self.segment_count = segment_count + self.segment_minsum = segment_minsum + + return lon_nudged, lat_nudged + + def _apply_plumbing_lag( + self, + sensor: str, + time_index: pd.DatetimeIndex, + time_name: str, + ) -> tuple[xr.DataArray, str]: + """ + Apply plumbing lag to a time index in the combined netCDF file. + """ + # Convert lag_secs to milliseconds as np.timedelta64 neeeds an integer + lagged_time = time_index - np.timedelta64( + int(self.sinfo[sensor]["lag_secs"] * 1000), + "ms", + ) + # Need to update the sensor's time coordinate in the combined netCDF file + # so that DataArrays created with lagged_time fit onto the coordinate + self.combined_nc.coords[f"{sensor}_{time_name}"] = xr.DataArray( + lagged_time, + coords=[lagged_time], + dims={f"{sensor}_{time_name}"}, + name=f"{sensor}_{time_name}", + ) + lag_info = f"with plumbing lag correction of {self.sinfo[sensor]['lag_secs']} seconds" + return lagged_time, lag_info + + def _biolume_process(self, sensor): + try: + orig_nc = getattr(self, sensor).orig_data + except FileNotFoundError as e: + self.logger.error("%s", e) # noqa: TRY400 + return + except AttributeError: + error_message = f"{sensor} has no orig_data - likely a missing or zero-sized .log file" + raise EOFError(error_message) from None + + # Remove non-monotonic times + self.logger.debug("Checking for non-monotonic increasing time") + monotonic = monotonic_increasing_time_indices(orig_nc.get_index("time")) + if (~monotonic).any(): + self.logger.debug( + "Removing non-monotonic increasing time at indices: %s", + np.argwhere(~monotonic).flatten(), + ) + orig_nc = orig_nc.sel({TIME: monotonic}) + + self.logger.info("Checking for non-monotonic increasing %s", TIME60HZ) + monotonic = monotonic_increasing_time_indices(orig_nc.get_index(TIME60HZ)) + if (~monotonic).any(): + self.logger.info( + "Removing non-monotonic increasing %s at indices: %s", + TIME60HZ, + np.argwhere(~monotonic).flatten(), + ) + orig_nc = orig_nc.sel({TIME60HZ: monotonic}) + + self.combined_nc[f"{sensor}_depth"] = self._geometric_depth_correction( + sensor, + orig_nc, + ) + + source = self.sinfo[sensor]["data_filename"] + self.combined_nc["biolume_flow"] = xr.DataArray( + orig_nc["flow"].to_numpy() * self.sinfo["biolume"]["flow_conversion"], + coords=[orig_nc.get_index("time")], + dims={f"{sensor}_time"}, + name=f"{sensor}_flow", + ) + self.combined_nc["biolume_flow"].attrs = { + "long_name": "Bioluminesence pump flow rate", + "units": "mL/s", + "coordinates": f"{sensor}_time {sensor}_depth", + "comment": f"flow from {source}", + } + + lagged_time, lag_info = self._apply_plumbing_lag( + sensor, + orig_nc.get_index(TIME), + TIME, + ) + self.combined_nc["biolume_avg_biolume"] = xr.DataArray( + orig_nc["avg_biolume"].to_numpy(), + coords=[lagged_time], + dims={f"{sensor}_{TIME}"}, + name=f"{sensor}_avg_biolume", + ) + self.combined_nc["biolume_avg_biolume"].attrs = { + "long_name": "Bioluminesence Average of 60Hz data", + "units": "photons s^-1", + "coordinates": f"{sensor}_{TIME} {sensor}_depth", + "comment": f"avg_biolume from {source} {lag_info}", + } + + lagged_time, lag_info = self._apply_plumbing_lag( + sensor, + orig_nc.get_index(TIME60HZ), + TIME60HZ, + ) + self.combined_nc["biolume_raw"] = xr.DataArray( + orig_nc["raw"].to_numpy(), + coords=[lagged_time], + dims={f"{sensor}_{TIME60HZ}"}, + name=f"{sensor}_raw", + ) + self.combined_nc["biolume_raw"].attrs = { + "long_name": "Raw 60 hz biolume data", + # xarray writes out its own units attribute + "coordinates": f"{sensor}_{TIME60HZ} {sensor}_depth60hz", + "comment": f"raw values from {source} {lag_info}", + } + if self.args.mission == "2010.284.00": + self.logger.info( + "Removing points outside of time range for %s/biolume.nc", self.args.mission + ) + for time_axis in (TIME, TIME60HZ): + self._range_qc_combined_nc( + instrument=sensor, + variables=[ + "biolume_time", + "biolume_time60hz", + "biolume_depth", + "biolume_flow", + "biolume_avg_biolume", + "biolume_raw", + ], + ranges={ + f"{sensor}_{time_axis}": Range( + pd.Timestamp(2010, 10, 11, 20, 0, 0), + pd.Timestamp(2010, 10, 12, 3, 28, 0), + ), + }, + set_to_nan=True, + ) + + def _geometric_depth_correction(self, sensor, orig_nc): + """Performs the align_geom() function from the legacy Matlab. + Works for any sensor, but requires navigation being processed first + as its variables in combined_nc are required. Returns corrected depth + array. + """ + # Fix pitch values to first and last points for interpolation to time + # values outside the range of the pitch values. + # See https://stackoverflow.com/a/45446546 + # and https://github.com/scipy/scipy/issues/12707#issuecomment-672794335 + try: + p_interp = interp1d( + self.combined_nc["navigation_time"].to_numpy().tolist(), + self.combined_nc["navigation_pitch"].to_numpy(), + fill_value=( + self.combined_nc["navigation_pitch"].to_numpy()[0], + self.combined_nc["navigation_pitch"].to_numpy()[-1], + ), + bounds_error=False, + ) + except KeyError: + error_message = "No navigation_time or navigation_pitch in combined_nc." + raise EOFError(error_message) from None + pitch = p_interp(orig_nc["time"].to_numpy().tolist()) + + d_interp = interp1d( + self.combined_nc["depth_time"].to_numpy().tolist(), + self.combined_nc["depth_filtdepth"].to_numpy(), + fill_value=( + self.combined_nc["depth_filtdepth"].to_numpy()[0], + self.combined_nc["depth_filtdepth"].to_numpy()[-1], + ), + bounds_error=False, + ) + orig_depth = d_interp(orig_nc["time"].to_numpy().tolist()) + offs_depth = align_geom(self.sinfo[sensor]["sensor_offset"], pitch) + + corrected_depth = xr.DataArray( + (orig_depth - offs_depth).astype(np.float64).tolist(), + coords=[orig_nc.get_index("time")], + dims={f"{sensor}_time"}, + name=f"{sensor}_depth", + ) + # 2008.289.03 has self.combined_nc["depth_time"][-1] (2008-10-16T15:42:32) + # at lot less than orig_nc["time"][-1] (2008-10-16T16:24:43) + # which, with "extrapolate" causes wildly incorrect depths to -359 m + # There may be other cases where this happens, in which case we'd like + # a general solution. For now, we'll just correct this mission. + d_beg_time_diff = ( + orig_nc["time"].to_numpy()[0] - self.combined_nc["depth_time"].to_numpy()[0] + ) + d_end_time_diff = ( + orig_nc["time"].to_numpy()[-1] - self.combined_nc["depth_time"].to_numpy()[-1] + ) + self.logger.info( + "%s: d_beg_time_diff: %s, d_end_time_diff: %s", + sensor, + d_beg_time_diff.astype("timedelta64[s]"), + d_end_time_diff.astype("timedelta64[s]"), + ) + if self.args.mission in ( + "2008.289.03", + "2010.259.01", + "2010.259.02", + ): + # This could be a more general check for all missions, but let's restrict it + # to known problematic missions for now. The above info message can help + # determine if this is needed for other missions. + self.logger.info( + "%s: Special QC for mission %s: Setting corrected_depth to NaN for times after %s", + sensor, + self.args.mission, + self.combined_nc["depth_time"][-1].to_numpy(), + ) + corrected_depth[ + np.where( + orig_nc.get_index("time") > self.combined_nc["depth_time"].to_numpy()[-1], + ) + ] = np.nan + if self.args.plot: + plt.figure(figsize=(18, 6)) + plt.plot( + orig_nc["time"].to_numpy(), + orig_depth, + "-", + orig_nc["time"].to_numpy(), + corrected_depth, + "--", + orig_nc["time"].to_numpy(), + pitch, + ".", + ) + plt.ylabel("Depth (m) & Pitch (deg)") + plt.legend(("Original depth", "Pitch corrected depth", "Pitch")) + plt.title( + f"Original and pitch corrected depth for {self.args.auv_name} {self.args.mission}", + ) + plt.show() + + return corrected_depth + + def combine_groups(self): + log_file = self.args.log_file + src_dir = Path(BASE_LRAUV_PATH, Path(log_file).parent) + group_files = sorted(src_dir.glob(f"{Path(log_file).stem}_{GROUP}_*.nc")) + self.combined_nc = xr.Dataset() + for group_file in group_files: + self.logger.info("Found group file: %s", group_file) + # Make nudged_longitude, nudged_latitude = self._nudge_pos() call on when appropriate + + def write_netcdf(self) -> None: + log_file = self.args.log_file + netcdfs_dir = Path(BASE_LRAUV_PATH, Path(log_file).parent) + out_fn = Path(netcdfs_dir, f"{self.args.log_file.stem}_cal.nc") + + self.combined_nc.attrs = self.global_metadata() + self.logger.info("Writing combined group data to %s", out_fn) + if Path(out_fn).exists(): + Path(out_fn).unlink() + self.combined_nc.to_netcdf(out_fn) + self.logger.info( + "Data variables written: %s", + ", ".join(sorted(self.combined_nc.variables)), + ) + + return netcdfs_dir + + def process_command_line(self): + examples = "Examples:" + "\n\n" + examples += " Combine original data from Group files for an LRAUV log file:\n" + examples += ( + " " + + sys.argv[0] + + " -v --log_file brizo/missionlogs/2025/" + + "20250909_20250915/20250914T080941/" + + "202509140809_202509150109.nc4\n" + ) + + parser = argparse.ArgumentParser( + formatter_class=RawTextHelpFormatter, + description=__doc__, + epilog=examples, + ) + + parser.add_argument( + "--noinput", + action="store_true", + help="Execute without asking for a response, e.g. to not ask to re-download file", + ) + parser.add_argument( + "--log_file", + action="store", + help=( + "Path to the log file for the mission, e.g.: " + "brizo/missionlogs/2025/20250903_20250909/" + "20250905T072042/202509050720_202509051653.nc4" + ), + ) + parser.add_argument( + "--plot", + action="store", + help="Create intermediate plots" + " to validate data operations. Use first to plot " + " points, e.g. first2000. Program blocks upon show.", + ) + parser.add_argument( + "-v", + "--verbose", + type=int, + choices=range(3), + action="store", + default=0, + const=1, + nargs="?", + help="verbosity level: " + + ", ".join( + [f"{i}: {v}" for i, v in enumerate(("WARN", "INFO", "DEBUG"))], + ), + ) + + self.args = parser.parse_args() + self.logger.setLevel(self._log_levels[self.args.verbose]) + + self.commandline = " ".join(sys.argv) + + +if __name__ == "__main__": + combine = Combine_NetCDF() + combine.process_command_line() + start = time.time() + combine.combine_groups() + ##combine.write_netcdf() + combine.logger.info("Time to process: %.2f seconds", (time.time() - start)) diff --git a/src/data/dorado_info.py b/src/data/dorado_info.py index cf6cc795..123a652c 100644 --- a/src/data/dorado_info.py +++ b/src/data/dorado_info.py @@ -2293,7 +2293,7 @@ "Overnight diamond pattern for CANON September 2017" " Bad blocks in hs2 data" " QC note: Best CTD is ctd2, ctd2 not great but better for salt although a couple screwey profiles in temp" - " - ctdToUse = ctd1 " + " - ctdToUse = ctd2 " ), } dorado_info["2017.347.00"] = { diff --git a/src/data/m1_soundspeed.py b/src/data/m1_soundspeed.py new file mode 100755 index 00000000..ec089c47 --- /dev/null +++ b/src/data/m1_soundspeed.py @@ -0,0 +1,171 @@ +#! /usr/bin/env python +""" +Read most recent profile of temperature and practical salinity from the MBARI M1 +mooring in Monterey Bay and return a profile of sound speed as a function of +depth. + +This uses the opendap URL produced on an hourly basis as part of MBARI's SSDS +realtime data system. + +Using Ferret to access the data: +================================ +The most recent profile is retrieved using the SET REGION/L=2156:2156 statement +where the number 2156 is seen as the last index for the L axis (TIME) seen in +the output of the SHOW DATA/VAR statement. Below is a terminal session showing +how to access the data: + +[ssdsadmin@elvis ~]$ ferret + NOAA/PMEL TMAP + FERRET v7.43 (optimized) + Linux 3.10.0-862.11.6.el7.x86_64 64-bit - 09/14/18 + 22-Oct-25 09:20 + +yes? USE "http://dods.mbari.org/opendap/data/ssdsdata/deployments/m1/202507/OS_MBARI-M1_20250724_R_TS.nc" +yes? SHOW DATA/VAR + currently SET data sets: + 1> http://dods.mbari.org/opendap/data/ssdsdata/deployments/m1/202507/OS_MBARI-M1_20250724_R_TS.nc (default) + Hourly Gridded MBARI Mooring M1 Sea Water Temperature and Salinity Observations + name title I J K L + PSAL Hourly sea_water_salinity 1:1 1:1 1:11 1:2156 + 1 on grid GEN1 with -1.E+34 for missing data + X=122.5W(-122.5):121.5W(-121.5) Y=36.3N:37.3N Z=0:325 + PSAL_QC quality flag 1:1 1:1 1:11 1:2156 + on grid GEN1 with -1.E+34 for missing data + X=122.5W(-122.5):121.5W(-121.5) Y=36.3N:37.3N Z=0:325 + TEMP Hourly sea_water_temperature 1:1 1:1 1:11 1:2156 + celsius on grid GEN1 with -1.E+34 for missing data + X=122.5W(-122.5):121.5W(-121.5) Y=36.3N:37.3N Z=0:325 + TEMP_QC quality flag 1:1 1:1 1:11 1:2156 + on grid GEN1 with -1.E+34 for missing data + X=122.5W(-122.5):121.5W(-121.5) Y=36.3N:37.3N Z=0:325 + TIME_QC Quality flag for time axis, 1: ... ... ... 1:2156 + flag on grid GEN2 with -1.E+34 for missing data + + POSITION_QC + Quality flag for Latitude and L 1:1 ... ... ... + on grid GEN3 with -1.E+34 for missing data + X=122.5W(-122.5):121.5W(-121.5) + DEPTH_QC Quality flag for depth axis, 1: ... ... 1:11 ... + on grid GEN4 with -1.E+34 for missing data + Z=0:325 + + time range: 24-JUL-2025 18:30 to 22-OCT-2025 13:30 + +yes? SET REGION/L=2156:2156 +yes? LIST TEMP, PSAL + DATA SET: http://dods.mbari.org/opendap/data/ssdsdata/deployments/m1/202507/OS_MBARI-M1_20250724_R_TS.nc + Hourly Gridded MBARI Mooring M1 Sea Water Temperature and Salinity Observations + DEPTH (m): 0 to 325 + LONGITUDE: 122W(-122) + LATITUDE: 36.8N + TIME: 22-OCT-2025 13:30 + Column 1: TEMP is Hourly sea_water_temperature (celsius) + Column 2: PSAL is Hourly sea_water_salinity (1) + TEMP PSAL +1 / 1: 16.38 33.32 +10 / 2: 16.39 33.32 +20 / 3: 15.43 33.28 +40 / 4: 13.30 33.42 +60 / 5: 11.95 33.51 +80 / 6: 11.33 33.61 +100 / 7: 11.01 33.63 +150 / 8: 10.09 33.81 +200 / 9: 9.67 33.93 +250 / 10: 9.12 34.08 +300 / 11: 7.92 34.09 +yes? quit + + +Using Python to access the data: +================================ +The Xarray library and variety of Python packages provides similar ease-of-use +capability in more modern computational environments. This module provides that +implementation. There are two dependencies that need to be installed via pip or +some other package manager: + gsw + xarray + netcdf4 + +Installation: +------------- +1. Create a directory to hold this script and a virtual environment: + mkdir m1_soundspeed + cd m1_soundspeed +2. Create a virtual environment (optional but recommended): + python3 -m venv venv +3. Activate the virtual environment: + source venv/bin/activate +4. Install the required packages: + pip install gsw xarray netcdf4 +5. Save this script as m1_soundspeed.py +6. Run the script: + python m1_soundspeed.py + +Sample Output: +============== +python m1_soundspeed.py + +Most recent sound speed profile from M1 mooring +----------------------------------------------- +Data source: http://dods.mbari.org/opendap/data/ssdsdata/deployments/m1/202507/OS_MBARI-M1_20250724_R_TS.nc +Title: Hourly Gridded MBARI Mooring M1 Sea Water Temperature and Salinity Observations +Latitude: 36.75 +Longitude: -122.03 +Time: 2025-10-22T14:30:00 UTC + + Depth (m) Sound Speed (m/s) + 1.00 1508.89 + 10.00 1508.96 + 20.00 1505.97 + 40.00 1501.15 + 60.00 1496.70 + 80.00 1494.31 + 100.00 1493.70 + 150.00 1490.97 + 200.00 1490.73 + 250.00 1489.76 + 300.00 1486.44 +__author__ = "Mike McCann" +__copyright__ = "Copyright 2025, Monterey Bay Aquarium Research Institute" +""" # noqa: E501 + +import gsw +import xarray as xr + +# Source for realtime M1 mooring data +url = ( + "http://dods.mbari.org/opendap/data/ssdsdata/deployments/m1/202507/OS_MBARI-M1_20250724_R_TS.nc" +) +ds = xr.open_dataset(url) + +# Select the most recent profile by indexing the TIME dimension +latest = ds.isel(TIME=-1) +temp = latest["TEMP"].to_numpy().flatten() +psal = latest["PSAL"].to_numpy().flatten() +depth = latest["DEPTH"].to_numpy().flatten() + +# Convert practical salinity to absolute salinity using lat and lon of M1 +# mooring from the index data in the dataset +lon = ds["LONGITUDE"].to_numpy().item() +lat = ds["LATITUDE"].to_numpy().item() +abs_sal = gsw.SA_from_SP(psal, depth, lon, lat) + +# Print out a header showing time, lat, lon and data source similar to Ferret output +time_str = str(latest["TIME"].to_numpy()) +time_str = time_str.split(".")[0] + " UTC" # Remove fractional seconds +print() # noqa: T201 +print("Most recent sound speed profile from M1 mooring") # noqa: T201 +print("-----------------------------------------------") # noqa: T201 +print(f"Data source: {url}") # noqa: T201 +print(f"Title: {ds.title}") # noqa: T201 +print(f"Latitude: {lat:.2f}") # noqa: T201 +print(f"Longitude: {lon:.2f}") # noqa: T201 +print(f"Time: {time_str}") # noqa: T201 +print() # noqa: T201 + +# Calculate sound speed using the Gibbs Seawater (GSW) Oceanographic Toolbox +# Print out the profile of sound speed as a table +soundspeed = gsw.sound_speed(abs_sal, temp, depth) +print(f"{'Depth (m)':>10} {'Sound Speed (m/s)':>20}") # noqa: T201 +for d, c in zip(depth, soundspeed, strict=True): + print(f"{d:10.2f} {c:20.2f}") # noqa: T201 diff --git a/src/data/nc42netcdfs.py b/src/data/nc42netcdfs.py new file mode 100755 index 00000000..32584a18 --- /dev/null +++ b/src/data/nc42netcdfs.py @@ -0,0 +1,819 @@ +#!/usr/bin/env python +""" +Extract instrument/group data from LRAUV .nc4 files into individual NetCDF files. + +Makes the original data more accessible for analysis and visualization. +""" + +__author__ = "Mike McCann" +__copyright__ = "Copyright 2025, Monterey Bay Aquarium Research Institute" + +import argparse +import logging +import os +import sys +from pathlib import Path +from typing import Any + +import netCDF4 +import pooch +import xarray as xr + +# Local directory that serves as the work area for log_files and netcdf files +BASE_LRAUV_WEB = "https://dods.mbari.org/data/lrauv/" +BASE_LRAUV_PATH = Path(__file__).parent.joinpath("../../data/lrauv_data").resolve() +SUMMARY_SOURCE = "Original LRAUV data extracted from {}, group {}" +GROUPS = ["navigation", "ctd", "ecopuck"] # Your actual group names +GROUP = "Group" # A literal in the filename to use for identifying group .nc files + +SCI_PARMS = { + "/": [ + { + "name": "concentration_of_colored_dissolved_organic_matter_in_sea_water", + "rename": "colored_dissolved_organic_matter", + }, + {"name": "longitude", "rename": "longitude"}, + {"name": "latitude", "rename": "latitude"}, + {"name": "depth", "rename": "depth"}, + {"name": "time", "rename": "time"}, + ], + "Aanderaa_O2": [{"name": "mass_concentration_of_oxygen_in_sea_water", "rename": "oxygen"}], + "CTD_NeilBrown": [ + {"name": "sea_water_salinity", "rename": "salinity"}, + {"name": "sea_water_temperature", "rename": "temperature"}, + ], + "CTD_Seabird": [ + {"name": "sea_water_salinity", "rename": "salinity"}, + {"name": "sea_water_temperature", "rename": "temperature"}, + { + "name": "mass_concentration_of_oxygen_in_sea_water", + "rename": "mass_concentration_of_oxygen_in_sea_water", + }, + ], + "ISUS": [{"name": "mole_concentration_of_nitrate_in_sea_water", "rename": "nitrate"}], + "PAR_Licor": [{"name": "downwelling_photosynthetic_photon_flux_in_sea_water", "rename": "PAR"}], + "WetLabsBB2FL": [ + {"name": "mass_concentration_of_chlorophyll_in_sea_water", "rename": "chlorophyll"}, + {"name": "OutputChl", "rename": "chl"}, + {"name": "Output470", "rename": "bbp470"}, + {"name": "Output650", "rename": "bbp650"}, + {"name": "VolumeScatCoeff117deg470nm", "rename": "volumescatcoeff117deg470nm"}, + {"name": "VolumeScatCoeff117deg650nm", "rename": "volumescatcoeff117deg650nm"}, + { + "name": "ParticulateBackscatteringCoeff470nm", + "rename": "particulatebackscatteringcoeff470nm", + }, + { + "name": "ParticulateBackscatteringCoeff650nm", + "rename": "particulatebackscatteringcoeff650nm", + }, + ], + "WetLabsSeaOWL_UV_A": [ + { + "name": "concentration_of_chromophoric_dissolved_organic_matter_in_sea_water", + "rename": "chromophoric_dissolved_organic_matter", + }, + {"name": "mass_concentration_of_chlorophyll_in_sea_water", "rename": "chlorophyll"}, + {"name": "BackscatteringCoeff700nm", "rename": "BackscatteringCoeff700nm"}, + {"name": "VolumeScatCoeff117deg700nm", "rename": "VolumeScatCoeff117deg700nm"}, + { + "name": "mass_concentration_of_petroleum_hydrocarbons_in_sea_water", + "rename": "petroleum_hydrocarbons", + }, + ], + "WetLabsUBAT": [ + {"name": "average_bioluminescence", "rename": "average_bioluminescence"}, + {"name": "flow_rate", "rename": "ubat_flow_rate"}, + {"name": "digitized_raw_ad_counts", "rename": "digitized_raw_ad_counts"}, + ], +} + +ENG_PARMS = { + "BPC1": [ + {"name": "platform_battery_charge", "rename": "health_platform_battery_charge"}, + {"name": "platform_battery_voltage", "rename": "health_platform_average_voltage"}, + ], + "BuoyancyServo": [ + {"name": "platform_buoyancy_position", "rename": "control_inputs_buoyancy_position"} + ], + "DeadReckonUsingMultipleVelocitySources": [ + { + "name": "fix_residual_percent_distance_traveled", + "rename": "fix_residual_percent_distance_traveled_DeadReckonUsingMultipleVelocitySources", # noqa: E501 + }, + {"name": "longitude", "rename": "pose_longitude_DeadReckonUsingMultipleVelocitySources"}, + {"name": "latitude", "rename": "pose_latitude_DeadReckonUsingMultipleVelocitySources"}, + {"name": "depth", "rename": "pose_depth_DeadReckonUsingMultipleVelocitySources"}, + ], + "DeadReckonUsingSpeedCalculator": [ + { + "name": "fix_residual_percent_distance_traveled", + "rename": "fix_residual_percent_distance_traveled_DeadReckonUsingSpeedCalculator", + }, + {"name": "longitude", "rename": "pose_longitude_DeadReckonUsingSpeedCalculator"}, + {"name": "latitude", "rename": "pose_latitude_DeadReckonUsingSpeedCalculator"}, + {"name": "depth", "rename": "pose_depth_DeadReckonUsingSpeedCalculator"}, + ], + "ElevatorServo": [ + {"name": "platform_elevator_angle", "rename": "control_inputs_elevator_angle"} + ], + "MassServo": [{"name": "platform_mass_position", "rename": "control_inputs_mass_position"}], + "NAL9602": [ + {"name": "time_fix", "rename": "fix_time"}, + {"name": "latitude_fix", "rename": "fix_latitude"}, + {"name": "longitude_fix", "rename": "fix_longitude"}, + ], + "Onboard": [{"name": "platform_average_current", "rename": "health_platform_average_current"}], + "RudderServo": [{"name": "platform_rudder_angle", "rename": "control_inputs_rudder_angle"}], + "ThrusterServo": [ + { + "name": "platform_propeller_rotation_rate", + "rename": "control_inputs_propeller_rotation_rate", + } + ], + "CurrentEstimator": [ + { + "name": "current_direction_navigation_frame", + "rename": "current_direction_navigation_frame", + }, + {"name": "current_speed_navigation_frame", "rename": "current_speed_navigation_frame"}, + ], +} + +SCIENG_PARMS = {**SCI_PARMS, **ENG_PARMS} + + +class Extract: + """Extract instrument/group data from LRAUV .nc4 files into individual NetCDF files.""" + + logger = logging.getLogger(__name__) + _handler = logging.StreamHandler() + _formatter = logging.Formatter( + "%(levelname)s %(asctime)s %(filename)s " + "%(funcName)s():%(lineno)d [%(process)d] %(message)s", + ) + _handler.setFormatter(_formatter) + logger.addHandler(_handler) + _log_levels = (logging.WARN, logging.INFO, logging.DEBUG) + + def show_variable_mapping(self): + """Show the variable mapping.""" + for group, parms in sorted(SCIENG_PARMS.items()): + print(f"Group: {group}") # noqa: T201 + for parm in parms: + name = parm.get("name", "N/A") + rename = parm.get("rename", "N/A") + print(f" {name} -> {rename}") # noqa: T201 + print() # noqa: T201 + + def download_with_pooch(self, url, local_dir, known_hash=None): + """Download using pooch with caching and verification.""" + downloader = pooch.HTTPDownloader(timeout=(60, 300), progressbar=True) + return pooch.retrieve( + url=url, + known_hash=known_hash, # Optional but recommended for integrity + fname=Path(url).name, + path=local_dir, + downloader=downloader, + ) + + def get_groups_netcdf4(self, file_path): + """Get list of groups using netCDF4 library.""" + with netCDF4.Dataset(file_path, "r") as dataset: + return list(dataset.groups.keys()) + + def extract_groups_to_files_netcdf4(self, log_file: str) -> Path: + """Extract each group from .nc4 file to a separate .nc file using netCDF4 library. + + Args: + log_file: Relative path from BASE_LRAUV_WEB to .nc4 log_file + + Returns: + netcdfs_dir: Local directory where NetCDF files were saved + + Note: + The xarray library fails reading the WetLabsBB2FL group from this file: + brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109.nc4 + with garbled data for the serial variable (using ncdump): + serial = "$F!{<8D>\031@7\024[P]\001\030" ; + but netCDF4 can skip over it and read the rest of the variables. + """ + # Download over http so that we don't need to mount smb shares + url = os.path.join(BASE_LRAUV_WEB, log_file) # noqa: PTH118 + netcdfs_dir = Path(BASE_LRAUV_PATH, Path(log_file).parent) + netcdfs_dir.mkdir(exist_ok=True, parents=True) + + self.logger.info("Downloading %s", url) + input_file = self.download_with_pooch(url, netcdfs_dir) + + self.logger.info("Extracting data from %s", input_file) + with netCDF4.Dataset(input_file, "r") as src_dataset: + # Extract root group first + self._extract_root_group(src_dataset, log_file, netcdfs_dir) + + # Extract all other groups + all_groups = list(src_dataset.groups.keys()) + for group_name in SCIENG_PARMS: + if group_name == "/" or group_name not in all_groups: + if group_name != "/" and group_name not in all_groups: + self.logger.warning("Group %s not found in %s", group_name, input_file) + continue + self._extract_single_group(src_dataset, group_name, log_file, netcdfs_dir) + + return netcdfs_dir + + def _extract_root_group(self, src_dataset: netCDF4.Dataset, log_file: str, output_dir: Path): + """Extract variables from the root group to _{GROUP}_Universals.nc.""" + root_parms = SCIENG_PARMS.get("/", []) + if not root_parms: + return + + try: + self.logger.info("Extracting root group '/'") + vars_to_extract = self._get_available_variables(src_dataset, root_parms) + + if vars_to_extract: + output_file = output_dir / f"{Path(log_file).stem}_{GROUP}_Universals.nc" + self._create_netcdf_file(src_dataset, vars_to_extract, output_file) + self.logger.info("Extracted root group '/' to %s", output_file) + else: + self.logger.warning("No requested variables found in root group '/'") + + except Exception as e: # noqa: BLE001 + self.logger.warning("Could not extract root group '/': %s", e) + + def _extract_single_group( + self, src_dataset: netCDF4.Dataset, group_name: str, log_file: str, output_dir: Path + ): + "Extract a single group to its own NetCDF file named like _{GROUP}_.nc." + group_parms = SCIENG_PARMS[group_name] + + try: + self.logger.debug(" Group %s", group_name) + src_group = src_dataset.groups[group_name] + + vars_to_extract = self._get_available_variables(src_group, group_parms) + + if vars_to_extract: + output_file = output_dir / f"{Path(log_file).stem}_{GROUP}_{group_name}.nc" + self._create_netcdf_file(src_group, vars_to_extract, output_file) + self.logger.info("Extracted %s to %s", group_name, output_file) + else: + self.logger.warning("No requested variables found in group %s", group_name) + + except KeyError: + self.logger.warning("Group %s not found", group_name) + except Exception as e: # noqa: BLE001 + self.logger.warning("Could not extract %s: %s", group_name, e) + + def _get_available_variables( + self, src_group: netCDF4.Group, group_parms: list[dict[str, Any]] + ) -> list[str]: + """Get the intersection of requested and available variables.""" + requested_vars = [p["name"] for p in group_parms if "name" in p] + available_vars = list(src_group.variables.keys()) + vars_to_extract = [var for var in requested_vars if var in available_vars] + + self.logger.debug(" Variables to extract: %s", vars_to_extract) + return vars_to_extract + + def _find_time_coordinate(self, src_group: netCDF4.Group) -> str: + """Find the time coordinate variable in a group using introspection. + + Returns: + str: Name of the time coordinate variable, or empty string if not found + """ + # Strategy 1: Look for variables with "time" in the name (most common) + time_vars = [var_name for var_name in src_group.variables if "time" in var_name.lower()] + if time_vars: + # Prefer variables that start with 'time' (like time_NAL9602) + time_vars.sort(key=lambda x: (not x.lower().startswith("time"), x)) + self.logger.debug("Found time coordinate %s via name pattern", time_vars[0]) + return time_vars[0] + + # Strategy 2: Look for variables with time-like units + for var_name, var in src_group.variables.items(): + if hasattr(var, "units"): + units = getattr(var, "units", "").lower() + time_patterns = ["seconds since", "days since", "hours since"] + if any(pattern in units for pattern in time_patterns): + self.logger.debug("Found time coordinate %s via units", var_name) + return var_name + + # Strategy 3: Look for unlimited dimension (backup) + for dim_name, dim in src_group.dimensions.items(): + if dim.isunlimited() and dim_name in src_group.variables: + self.logger.debug("Found time coordinate %s via unlimited dimension", dim_name) + return dim_name + + self.logger.debug("No time coordinate found in group") + return "" + + def _get_time_filters_for_variables( + self, src_group: netCDF4.Group, vars_to_extract: list[str] + ) -> dict[str, dict]: + """Get time filtering information for time coordinates used by vars_to_extract. + + Returns: + dict: Map of time_coord_name -> {"indices": list[int], "filtered": bool} + """ + time_filters = {} + + # Check if time filtering is enabled + if not getattr(self.args, "filter_monotonic_time", True): + return time_filters + + # Find all time coordinates used by variables in extraction list + time_coords_found = set() + for var_name in vars_to_extract: + if var_name in src_group.variables: + var = src_group.variables[var_name] + + # Check each dimension to see if it's a time coordinate + for dim_name in var.dimensions: + if dim_name in src_group.variables: + dim_var = src_group.variables[dim_name] + + # Check if this dimension variable is a time coordinate + if self._is_time_variable(dim_name, dim_var): + time_coords_found.add(dim_name) + + # Now process each unique time coordinate found + for time_coord_name in time_coords_found: + time_var = src_group.variables[time_coord_name] + time_data = time_var[:] + mono_indices = self._get_monotonic_indices(time_data) + + # Check if filtering was actually needed + filtered = len(mono_indices) < len(time_data) + if filtered: + self.logger.info( + "Time coordinate %s: filtered %d non-monotonic points (%d -> %d), %.2f%%", + time_coord_name, + len(time_data) - len(mono_indices), + len(time_data), + len(mono_indices), + 100 * (len(time_data) - len(mono_indices)) / len(time_data), + ) + + time_filters[time_coord_name] = {"indices": mono_indices, "filtered": filtered} + + return time_filters + + def _is_time_variable(self, var_name: str, var) -> bool: + """Check if a variable is a time coordinate variable.""" + # Check name pattern + if "time" in var_name.lower(): + return True + + # Check units + if hasattr(var, "units"): + units = getattr(var, "units", "").lower() + time_patterns = ["seconds since", "days since", "hours since"] + if any(pattern in units for pattern in time_patterns): + return True + + return False + + def _get_monotonic_indices(self, time_data) -> list[int]: + """Get indices for monotonic time values from time data array.""" + mono_indices = [] + if len(time_data) > 0: + mono_indices.append(0) # Always include first point + + for i in range(1, len(time_data)): + if time_data[i] > time_data[mono_indices[-1]]: + mono_indices.append(i) + + return mono_indices + + def _get_monotonic_time_indices(self, src_group: netCDF4.Group) -> tuple[list[int], bool]: + """Get indices for monotonically increasing time data. + + Returns: + list[int]: List of indices for monotonic time points + bool: True if filtering was applied + """ + # Check if time filtering is enabled + if not getattr(self.args, "filter_monotonic_time", True): + return [], False + + # Find the time coordinate variable using introspection + time_var_name = self._find_time_coordinate(src_group) + if not time_var_name: + # No time variable found, return all data + return [], False + + time_var = src_group.variables[time_var_name] + time_data = time_var[:] + + # Find monotonically increasing indices + mono_indices = [] + if len(time_data) > 0: + mono_indices.append(0) # Always include first point + + for i in range(1, len(time_data)): + if time_data[i] > time_data[mono_indices[-1]]: + mono_indices.append(i) + else: + self.logger.debug( + "Non-monotonic time value at index %d: %s <= %s (var: %s)", + i, + time_data[i], + time_data[mono_indices[-1]], + time_var_name, + ) + + total_points = len(time_data) + filtered_points = len(mono_indices) + + if filtered_points < total_points: + self.logger.warning( + "Filtered %d non-monotonic time points (kept %d/%d) for variable %s", + total_points - filtered_points, + filtered_points, + total_points, + time_var_name, + ) + return mono_indices, True + + return mono_indices, False + + def _copy_variable_with_appropriate_time_filter( + self, + src_group: netCDF4.Group, + dst_dataset: netCDF4.Dataset, + var_name: str, + time_filters: dict[str, dict], + ): + """Copy a variable with appropriate time filtering applied.""" + try: + src_var = src_group.variables[var_name] + + # Create variable in destination + dst_var = dst_dataset.createVariable( + var_name, + src_var.dtype, + src_var.dimensions, + zlib=True, + complevel=6, + shuffle=True, + fletcher32=True, + ) + + # Check if this variable itself is a time coordinate that needs filtering + if var_name in time_filters and time_filters[var_name]["filtered"]: + # This is a time coordinate variable that needs filtering + time_indices = time_filters[var_name]["indices"] + dst_var[:] = src_var[:][time_indices] + self.logger.debug("Applied time filtering to time coordinate %s", var_name) + + # Check if this variable depends on any filtered time dimensions + elif src_var.dimensions: + # Find which (if any) of this variable's dimensions are filtered time coordinates + filtered_dims = {} + for dim_name in src_var.dimensions: + if dim_name in time_filters and time_filters[dim_name]["filtered"]: + filtered_dims[dim_name] = time_filters[dim_name]["indices"] + + if filtered_dims: + # Apply filtering for the appropriate dimensions + self._apply_multidimensional_time_filter( + src_var, dst_var, var_name, filtered_dims + ) + else: + # No time filtering needed + dst_var[:] = src_var[:] + else: + # Scalar variable or no dimensions + dst_var[:] = src_var[:] + + # Copy attributes + for attr_name in src_var.ncattrs(): + dst_var.setncattr(attr_name, src_var.getncattr(attr_name)) + + self.logger.debug(" Copied variable: %s", var_name) + + except Exception as e: # noqa: BLE001 + self.logger.warning("Failed to copy variable %s: %s", var_name, e) + + def _apply_multidimensional_time_filter( + self, src_var, dst_var, var_name: str, filtered_dims: dict[str, list[int]] + ): + """Apply time filtering to a multi-dimensional variable.""" + # For now, handle the common case where time is the first dimension + if len(filtered_dims) == 1: + dim_name = list(filtered_dims.keys())[0] + time_indices = filtered_dims[dim_name] + + if src_var.dimensions[0] == dim_name: + # Time is first dimension + if len(src_var.dimensions) == 1: + # 1D variable + dst_var[:] = src_var[:][time_indices] + else: + # Multi-dimensional with time as first dimension + dst_var[:] = src_var[:][time_indices, ...] + self.logger.debug( + "Applied time filtering to variable %s (dim: %s)", var_name, dim_name + ) + else: + # Time dimension is not first - more complex indexing needed + self.logger.warning( + "Variable %s has filtered time dimension %s but not as first dimension - " + "copying all data", + var_name, + dim_name, + ) + dst_var[:] = src_var[:] + else: + # Multiple time dimensions filtered - complex case + self.logger.warning( + "Variable %s has multiple filtered time dimensions - copying all data", var_name + ) + dst_var[:] = src_var[:] + + def _create_dimensions_with_time_filters( + self, + src_group: netCDF4.Group, + dst_dataset: netCDF4.Dataset, + dims_needed: set[str], + time_filters: dict[str, dict], + ): + """Create dimensions in the destination dataset, adjusting time dimensions if filtered.""" + for dim_name in dims_needed: + if dim_name in src_group.dimensions: + src_dim = src_group.dimensions[dim_name] + + # Check if this dimension corresponds to a filtered time variable + if dim_name in time_filters and time_filters[dim_name]["filtered"]: + # Use the number of filtered time points + filtered_size = len(time_filters[dim_name]["indices"]) + size = filtered_size if not src_dim.isunlimited() else None + self.logger.debug( + "Created filtered time dimension %s: %s -> %s", + dim_name, + len(src_dim), + size or filtered_size, + ) + else: + size = len(src_dim) if not src_dim.isunlimited() else None + + dst_dataset.createDimension(dim_name, size) + + def _create_netcdf_file( + self, src_group: netCDF4.Group, vars_to_extract: list[str], output_file: Path + ): + """Create a new NetCDF file with the specified variables and monotonic time.""" + # Get time filtering information for each time variable + time_filters = self._get_time_filters_for_variables(src_group, vars_to_extract) + + with netCDF4.Dataset(output_file, "w", format="NETCDF4") as dst_dataset: + # Copy global attributes + self._copy_global_attributes(src_group, dst_dataset) + + # Add note about time filtering if applied + if any(tf["filtered"] for tf in time_filters.values()): + dst_dataset.setncattr( + "processing_note", "Non-monotonic time values filtered out during extraction" + ) + + # Create dimensions - may need to adjust time dimension sizes + dims_needed = self._get_required_dimensions(src_group, vars_to_extract) + self._create_dimensions_with_time_filters( + src_group, dst_dataset, dims_needed, time_filters + ) + + # Copy coordinate variables with time filtering + coord_vars = self._get_coordinate_variables(src_group, dims_needed, vars_to_extract) + for var_name in coord_vars: + self._copy_variable_with_appropriate_time_filter( + src_group, dst_dataset, var_name, time_filters + ) + + # Copy requested variables with time filtering + for var_name in vars_to_extract: + self._copy_variable_with_appropriate_time_filter( + src_group, dst_dataset, var_name, time_filters + ) + + def _copy_global_attributes(self, src_group: netCDF4.Group, dst_dataset: netCDF4.Dataset): + """Copy global attributes from source to destination.""" + for attr_name in src_group.ncattrs(): + dst_dataset.setncattr(attr_name, src_group.getncattr(attr_name)) + + def _get_required_dimensions( + self, src_group: netCDF4.Group, vars_to_extract: list[str] + ) -> set[str]: + """Get all dimensions needed by the variables to extract.""" + dims_needed = set() + for var_name in vars_to_extract: + if var_name in src_group.variables: + var = src_group.variables[var_name] + dims_needed.update(var.dimensions) + return dims_needed + + def _create_dimensions( + self, src_group: netCDF4.Group, dst_dataset: netCDF4.Dataset, dims_needed: set[str] + ): + """Create dimensions in the destination dataset.""" + for dim_name in dims_needed: + if dim_name in src_group.dimensions: + src_dim = src_group.dimensions[dim_name] + size = len(src_dim) if not src_dim.isunlimited() else None + dst_dataset.createDimension(dim_name, size) + + def _get_coordinate_variables( + self, src_group: netCDF4.Group, dims_needed: set[str], vars_to_extract: list[str] + ) -> list[str]: + """Get coordinate variables that aren't already in vars_to_extract.""" + coord_vars = [] + for dim_name in dims_needed: + if dim_name in src_group.variables and dim_name not in vars_to_extract: + coord_vars.append(dim_name) # noqa: PERF401 + return coord_vars + + def _copy_variable(self, src_group: netCDF4.Group, dst_dataset: netCDF4.Dataset, var_name: str): + """Helper method to copy a variable from source to destination.""" + try: + src_var = src_group.variables[var_name] + + # Create variable in destination + dst_var = dst_dataset.createVariable( + var_name, + src_var.dtype, + src_var.dimensions, + zlib=True, + complevel=6, + shuffle=True, + fletcher32=True, + ) + + # Copy data and attributes + dst_var[:] = src_var[:] + for attr_name in src_var.ncattrs(): + dst_var.setncattr(attr_name, src_var.getncattr(attr_name)) + + self.logger.debug(" Copied variable: %s", var_name) + + except Exception as e: # noqa: BLE001 + self.logger.warning("Failed to copy variable %s: %s", var_name, e) + + def extract_groups_to_files(self, input_file, output_dir): + """Extract each group to a separate NetCDF file.""" + output_dir = Path(output_dir) + output_dir.mkdir(exist_ok=True, parents=True) + + all_groups = self.get_groups_netcdf4(input_file) + + self.logger.info("Extracting data from %s", input_file) + for group_name, group_parms in SCIENG_PARMS.items(): + if group_name not in all_groups: + self.logger.warning("Group %s not found in %s", group_name, input_file) + continue + try: + self.logger.info(" Group %s", group_name) + ds = xr.open_dataset(input_file, group=group_name) + output_file = output_dir / f"{group_name}.nc" + # Output only the variables of interest + parms = [p["name"] for p in group_parms if "name" in p] + self.logger.debug(" Variables to extract: %s", parms) + ds = ds[parms] + ds.to_netcdf(path=str(output_file), format="NETCDF4") + ds.close() + self.logger.info("Extracted %s to %s", group_name, output_file) + except (FileNotFoundError, OSError, ValueError): + self.logger.warning("Could not extract %s", group_name) + except KeyError: + self.logger.warning("Variable %s not found in group %s", parms, group_name) + except TypeError: + self.logger.warning( + "Type error processing group %s: %s", group_name, sys.exc_info() + ) + + def process_command_line(self): + examples = "Examples:" + "\n\n" + examples += " Write to local missionnetcdfs direcory:\n" + examples += " " + sys.argv[0] + " --mission 2020.064.10\n" + examples += " " + sys.argv[0] + " --auv_name i2map --mission 2020.055.01\n" + + parser = argparse.ArgumentParser( + formatter_class=argparse.RawTextHelpFormatter, + description=__doc__, + epilog=examples, + ) + + parser.add_argument( + "--base_path", + action="store", + default=BASE_LRAUV_PATH, + help=( + "Base directory for missionlogs and missionnetcdfs, " + "default: auv_data in repo data directory" + ), + ) + parser.add_argument( + "--title", + action="store", + help="A short description of the dataset", + ) + parser.add_argument( + "--summary", + action="store", + help="Additional information about the dataset", + ) + + parser.add_argument( + "--noinput", + action="store_true", + help="Execute without asking for a response, e.g. to not ask to re-download file", + ) + parser.add_argument( + "--clobber", + action="store_true", + help="Use with --noinput to overwrite existing downloaded log files", + ) + parser.add_argument( + "--noreprocess", + action="store_true", + help="Use with --noinput to not re-process existing downloaded log files", + ) + parser.add_argument( + "--filter_monotonic_time", + action="store_true", + default=True, + help="Filter out non-monotonic time values (default: True)", + ) + parser.add_argument( + "--no_filter_monotonic_time", + dest="filter_monotonic_time", + action="store_false", + help="Keep all time values, including non-monotonic ones", + ) + parser.add_argument( + "--start", + action="store", + help="Convert a range of missions wth start time in YYYYMMDD format", + ) + parser.add_argument( + "--end", + action="store", + help="Convert a range of missions wth end time in YYYYMMDD format", + ) + parser.add_argument( + "--auv_name", + action="store", + help="Name of the AUV and the directory name for its data, e.g.: tethys, ahi, pontus", + ) + parser.add_argument( + "--log_file", + action="store", + help=( + "Path to the log file for the mission, e.g.: " + "brizo/missionlogs/2025/20250903_20250909/" + "20250905T072042/202509050720_202509051653.nc4" + ), + ) + parser.add_argument( + "--known_hash", + action="store", + help=( + "Known hash for the file to be downloaded, e.g. " + "d1235ead55023bea05e9841465d54a45dfab007a283320322e28b84438fb8a85" + ), + ) + ( + parser.add_argument( + "--show_variable_mapping", + action="store_true", + help="Show the variable mapping: Group/variable_names -> their_renames", + ), + ) + parser.add_argument( + "-v", + "--verbose", + type=int, + choices=range(3), + action="store", + default=0, + const=1, + nargs="?", + help="verbosity level: " + + ", ".join( + [f"{i}: {v}" for i, v in enumerate(("WARN", "INFO", "DEBUG"))], + ), + ) + + self.args = parser.parse_args() + self.logger.setLevel(self._log_levels[self.args.verbose]) + self.commandline = " ".join(sys.argv) + + +if __name__ == "__main__": + extract = Extract() + extract.process_command_line() + if extract.args.show_variable_mapping: + extract.show_variable_mapping() + sys.exit(0) + else: + extract.extract_groups_to_files_netcdf4(extract.args.log_file) diff --git a/src/data/process.py b/src/data/process.py index f0036d72..4dcedd38 100755 --- a/src/data/process.py +++ b/src/data/process.py @@ -1,13 +1,26 @@ #!/usr/bin/env python """ -Base module for data processing. +Base module for data processing for Dorado class and LRAUV class data. Run the data through standard science data processing to calibrated, aligned, and resampled netCDF files. Use a standard set of processing options; more flexibility is available via the inndividual processing modules. +The desire is to reuse as much code as possible between Dorado class +and LRAUV class data processing. The initial steps of creating the _cal.nc +files differ because Dorado class data are raw binary log files that need to be +processed to _nc files, while LRAUV class data are NetCDF4 log files that +already contain much of the necessary information. The initial step for Dorado +class data are: download_process and calibrate, while for LRAUV class data +are: extract and combine. After that, the processing steps are similar with +the data in a local directory organized similarly to their institutional +archives. + +Dorado class data processing: +============================= + Limit processing to specific steps by providing arugments: - --download_process + --download_process (logs2netcdf.py & lopcToNetCDF.py) --calibrate --align --resample @@ -18,6 +31,21 @@ If none provided then perform all steps. Uses command line arguments from logs2netcdf.py and calibrate.py. + + +LRAUV class data processing: +============================ + +Limit processing to specific steps by providing arugments: + --extract (nc42netcdfs.py) + --combine + --align + --resample + --archive + --create_products + --email_to + --cleanup +If none provided then perform all steps. """ __author__ = "Mike McCann" @@ -45,6 +73,7 @@ from emailer import NOTIFICATION_EMAIL, Emailer from logs2netcdfs import BASE_PATH, MISSIONLOGS, MISSIONNETCDFS, AUV_NetCDF from lopcToNetCDF import LOPC_Processor, UnexpectedAreaOfCode +from nc42netcdfs import BASE_LRAUV_PATH, BASE_LRAUV_WEB, Extract from resample import ( AUVCTD_OPENDAP_BASE, FLASH_THRESHOLD, @@ -67,6 +96,29 @@ class FailedMission(Exception): pass +def log_file_processor(func): + """Decorator to handle LRAUV log_file processing exceptions and cleanup.""" + + def wrapper(self, log_file: str): + t_start = time.time() + try: + return func(self, log_file) + except (TestMission, FailedMission) as e: + self.logger.info(str(e)) + finally: + if hasattr(self, "log_handler"): + # Cleanup and archiving logic + self.archive(mission=None, log_file=log_file) + if not self.args.no_cleanup: + self.cleanup(log_file=log_file) + self.logger.info( + "log_file %s took %.1f seconds to process", log_file, time.time() - t_start + ) + self.logger.removeHandler(self.log_handler) + + return wrapper + + class Processor: """ Base class for data processing. Run the data through standard science data @@ -320,10 +372,20 @@ def resample(self, mission: str) -> None: finally: resamp.logger.removeHandler(self.log_handler) - def archive(self, mission: str, add_logger_handlers: bool = True) -> None: # noqa: FBT001, FBT002 + def archive( + self, + mission: str = None, + log_file: Path = None, + add_logger_handlers: bool = True, # noqa: FBT001, FBT002 + ) -> None: + """Archiving steps for mission or log_file. + + If mission is provided, archive the processed data for Dorado class vehicles. + If log_file is provided, archive the processed data for LRAUV class vehicles.""" arch = Archiver(add_logger_handlers) arch.args = argparse.Namespace() arch.args.auv_name = self.vehicle + arch.mount_dir = self.mount_dir arch.args.mission = mission arch.commandline = self.commandline arch.args.create_products = self.args.create_products @@ -334,25 +396,33 @@ def archive(self, mission: str, add_logger_handlers: bool = True) -> None: # no arch.args.verbose = self.args.verbose arch.logger.setLevel(self._log_levels[self.args.verbose]) if add_logger_handlers: - self.logger.info("Archiving steps for %s", mission) arch.logger.addHandler(self.log_handler) - file_name_base = f"{arch.args.auv_name}_{arch.args.mission}" - nc_file_base = Path( - BASE_PATH, - arch.args.auv_name, - MISSIONNETCDFS, - arch.args.mission, - file_name_base, - ) - self.logger.info("nc_file_base = %s, BASE_PATH = %s", nc_file_base, BASE_PATH) - if str(BASE_PATH).startswith(("/home/runner/", "/root")): - arch.logger.info( - "Not archiving %s %s to AUVCTD as it's likely CI testing", + if mission: + # Dorado class vehicle archiving + self.logger.info("Archiving steps for %s", mission) + file_name_base = f"{arch.args.auv_name}_{arch.args.mission}" + nc_file_base = Path( + BASE_PATH, arch.args.auv_name, + MISSIONNETCDFS, arch.args.mission, + file_name_base, ) + self.logger.info("nc_file_base = %s, BASE_PATH = %s", nc_file_base, BASE_PATH) + if str(BASE_PATH).startswith(("/home/runner/", "/root")): + arch.logger.info( + "Not archiving %s %s to AUVCTD as it's likely CI testing", + arch.args.auv_name, + arch.args.mission, + ) + else: + arch.copy_to_AUVTCD(nc_file_base, self.args.freq) + elif log_file: + # LRAUV class vehicle archiving + self.logger.info("Archiving steps for %s", log_file) + arch.copy_to_LRAUV(log_file, freq=self.args.freq) else: - arch.copy_to_AUVTCD(nc_file_base, self.args.freq) + arch.logger.error("Either mission or log_file must be provided for archiving.") arch.logger.removeHandler(self.log_handler) def create_products(self, mission: str) -> None: @@ -385,23 +455,59 @@ def email(self, mission: str) -> None: email.logger.setLevel(self._log_levels[self.args.verbose]) email.logger.addHandler(self.log_handler) - def cleanup(self, mission: str) -> None: - self.logger.info( - "Removing %s files from %s and %s", - mission, - MISSIONNETCDFS, - MISSIONLOGS, - ) - try: - shutil.rmtree( - Path(self.args.base_path, self.vehicle, MISSIONLOGS, mission), - ) - shutil.rmtree( - Path(self.args.base_path, self.vehicle, MISSIONNETCDFS, mission), + def _remove_empty_parents(self, path: Path, stop_at: Path) -> None: + """Remove empty parent directories up to stop_at path.""" + parent = path.parent + while parent != stop_at: + try: + ds_store = parent / ".DS_Store" + if ds_store.exists(): + ds_store.unlink() # Remove .DS_Store file so that the directory is empty + if parent.exists() and not any(parent.iterdir()): + self.logger.debug("Removing empty directory: %s", parent) + parent.rmdir() + parent = parent.parent + else: + break + except OSError as e: + self.logger.debug("Could not remove directory %s: %s", parent, e) + break + + def cleanup(self, mission: str = None, log_file: str = None) -> None: + if mission: + self.logger.info( + "Removing mission %s files from %s and %s", + mission, + MISSIONNETCDFS, + MISSIONLOGS, ) - self.logger.info("Done removing %s work files", mission) - except FileNotFoundError as e: - self.logger.info("File not found: %s", e) + try: + shutil.rmtree( + Path(self.args.base_path, self.vehicle, MISSIONLOGS, mission), + ) + shutil.rmtree( + Path(self.args.base_path, self.vehicle, MISSIONNETCDFS, mission), + ) + self.logger.info("Done removing %s work files", mission) + except FileNotFoundError as e: + self.logger.info("File not found: %s", e) + elif log_file: + self.logger.info("Removing work files from local directory for %s", log_file) + try: + log_path = Path(BASE_LRAUV_PATH, log_file).resolve() + for item in log_path.parent.iterdir(): + if item.is_file(): + self.logger.debug("Removing file %s", item) + item.unlink() + elif item.is_dir(): + self.logger.debug("Removing directory %s", item) + shutil.rmtree(item) + self._remove_empty_parents(log_path, Path(BASE_LRAUV_PATH)) + self.logger.info("Done removing work files for %s", log_file) + except FileNotFoundError as e: + self.logger.info("File not found: %s", e) + else: + self.logger.error("Either mission or log_file must be provided for cleanup.") def process_mission(self, mission: str, src_dir: str = "") -> None: # noqa: C901, PLR0912, PLR0915 netcdfs_dir = Path( @@ -621,6 +727,55 @@ def process_missions(self, start_year: int) -> None: src_dir=self.get_mission_dir(mission), ) + # ====================== LRAUV data specific processing ====================== + # The command line arument --log_file distinguishes LRAUV data from Dorado data. + # Dorado class data uses --mission instead. Also, start and end specifications + # are different for LRAUV data: --start and --end instead of --start_year, + # --start_yd, --end_year, and --end_yd. If --start and --end are spcified then + # --auv_name is required to look up the individual log files to process. + + def extract(self, log_file: str) -> None: + self.logger.info("Extracting log file: %s", log_file) + extract = Extract() + extract.args = argparse.Namespace() + extract.args.verbose = self.args.verbose + extract.logger.setLevel(self._log_levels[self.args.verbose]) + extract.logger.addHandler(self.log_handler) + + url = os.path.join(BASE_LRAUV_WEB, log_file) # noqa: PTH118 + output_dir = Path(BASE_LRAUV_PATH, Path(log_file).parent) + extract.logger.info("Downloading %s", url) + input_file = extract.download_with_pooch(url, output_dir) + return extract.extract_groups_to_files_netcdf4(input_file) + + @log_file_processor + def process_log_file(self, log_file: str) -> None: + netcdfs_dir = Path(BASE_LRAUV_PATH, Path(log_file).parent) + Path(netcdfs_dir).mkdir(parents=True, exist_ok=True) + self.log_handler = logging.FileHandler( + Path(BASE_LRAUV_PATH, f"{log_file}_extract.log"), mode="w+" + ) + self.log_handler.setLevel(self._log_levels[self.args.verbose]) + self.log_handler.setFormatter(AUV_NetCDF._formatter) + self.logger.info( + "=====================================================================================================================", + ) + self.logger.addHandler(self.log_handler) + self.logger.info("commandline = %s", self.commandline) + + netcdfs_dir = self.extract(log_file) + # self.align(log_file) + # self.resample(log_file) + # self.create_products(log_file) + self.logger.info("Finished processing log file: %s", log_file) + + def process_log_files(self) -> None: + if self.args.log_file: + # log_file is string like: + # brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109.nc4 + self.vehicle = self.args.log_file.split("/")[0].lower() + self.process_log_file(self.args.log_file) + def process_command_line(self): parser = argparse.ArgumentParser( formatter_class=argparse.RawTextHelpFormatter, @@ -741,7 +896,12 @@ def process_command_line(self): parser.add_argument( "--mission", action="store", - help="Process only this mission", + help="For Doado class data - process only this mission", + ) + parser.add_argument( + "--log_file", + action="store", + help="For LRAUV class data - process only this log file", ) parser.add_argument( "--freq", diff --git a/src/data/process_lrauv.py b/src/data/process_lrauv.py new file mode 100755 index 00000000..7a99f92b --- /dev/null +++ b/src/data/process_lrauv.py @@ -0,0 +1,43 @@ +#!/usr/bin/env python +""" +Process LRAUV data from NetCDF4 log files to resampled .nc files. +(This replaces the legacy lrauvNc4ToNetcdf.py script in STOQS.) + +Find LRAUV log files in smb://atlas.shore.mbari.org/LRAUV/missionlogs +and run the data through standard science data processing to calibrated, +aligned, and resampled netCDF files. Use a standard set of processing options; +more flexibility is available via the inndividual processing modules. + +Limit processing to specific steps by providing arguments: + --extract + --combine + --resample + --archive + --cleanup +If none provided then perform all steps. + +Uses command line arguments from nc42netcdfs.py and combine.py. +""" + +__author__ = "Mike McCann" +__copyright__ = "Copyright 2025, Monterey Bay Aquarium Research Institute" + +from process import Processor + + +class LRAUVProcessor(Processor): + pass + + +if __name__ == "__main__": + VEHICLE = "tethys" + LRAUV_DIR = "/Volumes/LRAUV" + # It's possible that we might need calibration files for some sensors + # in the future, so point to a potential directory where they can be found. + CALIBRATION_DIR = "/Volumes/DMO/MDUC_CORE_CTD_200103/Calibration Files" + MOUNT_DIR = "smb://atlas.shore.mbari.org/LRAUV" + START_YEAR = 2012 + + proc = LRAUVProcessor(VEHICLE, LRAUV_DIR, MOUNT_DIR, CALIBRATION_DIR) + proc.process_command_line() + proc.process_log_files() diff --git a/src/data/test_process_dorado.py b/src/data/test_process_dorado.py index 3eb1033c..90ec047b 100644 --- a/src/data/test_process_dorado.py +++ b/src/data/test_process_dorado.py @@ -31,7 +31,7 @@ def test_process_dorado(complete_dorado_processing): # but it will alert us if a code change unexpectedly changes the file size. # If code changes are expected to change the file size then we should # update the expected size here. - EXPECTED_SIZE_GITHUB = 621298 + EXPECTED_SIZE_GITHUB = 621286 EXPECTED_SIZE_ACT = 621298 EXPECTED_SIZE_LOCAL = 621286 if str(proc.args.base_path).startswith("/home/runner"): @@ -50,7 +50,7 @@ def test_process_dorado(complete_dorado_processing): check_md5 = True if check_md5: # Check that the MD5 hash has not changed - EXPECTED_MD5_GITHUB = "6550bb8ed5919f21413f30dfffdcf116" + EXPECTED_MD5_GITHUB = "9f3f9e2e5abed08692ddb233dec0d0ac" EXPECTED_MD5_ACT = "bdb9473e5dedb694618f518b8cf0ca1e" EXPECTED_MD5_LOCAL = "6ecb2229b00835055619e982fe9d5023" if str(proc.args.base_path).startswith("/home/runner"): diff --git a/src/data/test_process_i2map.py b/src/data/test_process_i2map.py index cbd2c2c3..e2f6cb05 100644 --- a/src/data/test_process_i2map.py +++ b/src/data/test_process_i2map.py @@ -30,7 +30,7 @@ def test_process_i2map(complete_i2map_processing): # but it will alert us if a code change unexpectedly changes the file size. # If code changes are expected to change the file size then we should # update the expected size here. - EXPECTED_SIZE_GITHUB = 58839 + EXPECTED_SIZE_GITHUB = 58832 EXPECTED_SIZE_ACT = 58816 EXPECTED_SIZE_LOCAL = 58884 if str(proc.args.base_path).startswith("/home/runner"): diff --git a/uv.lock b/uv.lock index e3c41136..82bbff19 100644 --- a/uv.lock +++ b/uv.lock @@ -1,5 +1,5 @@ version = 1 -revision = 2 +revision = 3 requires-python = "==3.12.*" [[package]] @@ -175,6 +175,7 @@ dependencies = [ { name = "datashader" }, { name = "defusedxml" }, { name = "gitpython" }, + { name = "gsw" }, { name = "hvplot" }, { name = "ipympl" }, { name = "jupyter" }, @@ -206,6 +207,7 @@ requires-dist = [ { name = "datashader", specifier = ">=0.18.1" }, { name = "defusedxml", specifier = ">=0.7.1" }, { name = "gitpython", specifier = ">=3.1.44" }, + { name = "gsw", specifier = ">=3.6.20" }, { name = "hvplot", specifier = ">=0.11.3" }, { name = "ipympl", specifier = ">=0.9.7" }, { name = "jupyter", specifier = ">=1.1.1" }, @@ -624,6 +626,22 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/1d/9a/4114a9057db2f1462d5c8f8390ab7383925fe1ac012eaa42402ad65c2963/GitPython-3.1.44-py3-none-any.whl", hash = "sha256:9e0e10cda9bed1ee64bc9a6de50e7e38a9c9943241cd7f585f6df3ed28011110", size = 207599, upload-time = "2025-01-02T07:32:40.731Z" }, ] +[[package]] +name = "gsw" +version = "3.6.20" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "numpy" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/eb/39/edd76e26b0c8b8a6bcee0107cbcee5219673bb59f274b757de9f989a0fb1/gsw-3.6.20.tar.gz", hash = "sha256:e528cd6563fdc09b244387bfebf131b01199c20ac248f4e5b4eaf00cded1abe6", size = 2702713, upload-time = "2025-08-04T18:04:14.669Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/1e/d9/18382b8fe6e8736bad967dd4ed8ab2c2deabbb9f6121d9e41265e7317f24/gsw-3.6.20-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:9656dcb42ddeee8134f2bb6d7394928b0b8629634c9e223f9cce7a3c7309597c", size = 2222002, upload-time = "2025-08-04T18:03:34.123Z" }, + { url = "https://files.pythonhosted.org/packages/9f/85/3a9ba4372ac4291e38e887ed8dac44c0385d4b72ee967a7858c4c7a48d96/gsw-3.6.20-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:857a1f0804980186514a0690e0f7dbdffd15a17059649771f3d3a84771e8fb8f", size = 2261350, upload-time = "2025-08-04T18:03:35.481Z" }, + { url = "https://files.pythonhosted.org/packages/dc/36/c3d845de2e453a01f6b1cb099c63ab63c581814d638890c143d064a33a8d/gsw-3.6.20-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:2b5a143b2993ac150c5b3cb7edf942d1376a20abbc57cc3d8ec4a5a430632890", size = 2400962, upload-time = "2025-08-04T18:03:37.194Z" }, + { url = "https://files.pythonhosted.org/packages/8f/f1/5b6999c89b3ea20cd9ac1169e0cd7c820a881ca97d6b34c7899da28a3d17/gsw-3.6.20-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:33ca2560378d1719fa49dcd380ce0c4a261b01cbd2aa865a3c6c99bfb90b5853", size = 2443576, upload-time = "2025-08-04T18:03:38.782Z" }, + { url = "https://files.pythonhosted.org/packages/13/ed/419237d32a704e4b4bbfcdec8129fbb381ccdf2e33a2cc7d1153c1a1eaa0/gsw-3.6.20-cp312-cp312-win_amd64.whl", hash = "sha256:719d1983bd97991e4e44c1c725322269fc7019c29abc7a641e6a676f1a54f54e", size = 2180514, upload-time = "2025-08-04T18:03:40.217Z" }, +] + [[package]] name = "h11" version = "0.16.0" @@ -1387,6 +1405,11 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/2d/1a/32b7427aaf62fed3d4e4456f874b25ce39373dbddf6cfde9edbcfc2417fc/netCDF4-1.7.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cb95b11804fe051897d1f2044b05d82a1847bc2549631cdd2f655dde7de77a9c", size = 9377415, upload-time = "2024-10-22T19:00:54.412Z" }, { url = "https://files.pythonhosted.org/packages/fd/bf/5e671495c8bdf6b628e091aa8980793579474a10e51bc6ba302a3af6a778/netCDF4-1.7.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f9d8a848373723f41ef662590b4f5e1832227501c9fd4513e8ad8da58c269977", size = 9260579, upload-time = "2024-10-22T19:00:56.594Z" }, { url = "https://files.pythonhosted.org/packages/d4/57/0a0bcdebcfaf72e96e7bcaa512f80ee096bf71945a3318d38253338e9c25/netCDF4-1.7.2-cp312-cp312-win_amd64.whl", hash = "sha256:568ea369e00b581302d77fc5fd0b8f78e520c7e08d0b5af5219ba51f3f1cd694", size = 6991523, upload-time = "2024-10-22T19:00:58.97Z" }, + { url = "https://files.pythonhosted.org/packages/84/0a/182bb4fe5639699ba39d558b553b8e6f04fbfea6cf78404c0f21ef149bf7/netcdf4-1.7.2-cp311-abi3-macosx_13_0_x86_64.whl", hash = "sha256:7e81c3c47f2772eab0b93fba8bb05b17b58dce17720e1bed25e9d76551deecd0", size = 2751391, upload-time = "2025-10-13T18:32:22.749Z" }, + { url = "https://files.pythonhosted.org/packages/2d/1f/54ac27c791360f7452ca27ed1cb2917946bbe1ea4337c590a5abcef6332d/netcdf4-1.7.2-cp311-abi3-macosx_14_0_arm64.whl", hash = "sha256:cb2791dba37fc98fd1ac4e236c97822909f54efbcdf7f1415c9777810e0a28f4", size = 2387513, upload-time = "2025-10-13T18:32:27.499Z" }, + { url = "https://files.pythonhosted.org/packages/5c/5e/9bf3008a9e45c08f4c9fedce4d6f722ef5d970f56a9c5eb375a200dd2b66/netcdf4-1.7.2-cp311-abi3-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:bf11480f6b8a5b246818ffff6b4d90481e51f8b9555b41af0c372eb0aaf8b65f", size = 9621674, upload-time = "2025-10-13T18:32:29.193Z" }, + { url = "https://files.pythonhosted.org/packages/a1/75/46871e85f2bbfb1efe229623d25d7c9daa17e2e968d5235572b2c8bb53e8/netcdf4-1.7.2-cp311-abi3-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1ccc05328a8ff31921b539821791aeb20b054879f3fdf6d1d505bf6422824fec", size = 9453759, upload-time = "2025-10-13T18:32:31.136Z" }, + { url = "https://files.pythonhosted.org/packages/cd/10/c52f12297965938d9b9be666ea1f9d8340c2aea31d6909d90aa650847248/netcdf4-1.7.2-cp311-abi3-win_amd64.whl", hash = "sha256:999bfc4acebf400ed724d5e7329e2e768accc7ee1fa1d82d505da782f730301b", size = 7148514, upload-time = "2025-10-13T18:32:33.121Z" }, ] [[package]]