From 3b3b7747bbeec307e9bddd5e71fb7a0464f349cc Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Fri, 21 Nov 2025 17:07:34 -0800 Subject: [PATCH 01/13] Add --start and --end options for lrauv log file processing. --- src/data/process.py | 168 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 166 insertions(+), 2 deletions(-) diff --git a/src/data/process.py b/src/data/process.py index fde779ca..369ca597 100755 --- a/src/data/process.py +++ b/src/data/process.py @@ -178,6 +178,9 @@ def __init__(self, auv_name, vehicle_dir, mount_dir, calibration_dir, config=Non "end_yd": None, "last_n_days": None, "mission": None, + "start": None, # LRAUV datetime filtering + "end": None, # LRAUV datetime filtering + "auv_name": None, # LRAUV AUV name filtering } # Subset of config schema that should be passed to child processes @@ -280,6 +283,125 @@ def mission_list(self, start_year: int, end_year: int) -> dict: self.logger.warning("Cannot parse year from %s", mission) return missions + def _parse_datetime_string(self, datetime_str: str) -> datetime | None: + """Parse datetime string in YYYYMMDDTHHMMSS format.""" + try: + return datetime.strptime(datetime_str, "%Y%m%dT%H%M%S").replace(tzinfo=UTC) + except ValueError: + return None + + def _normalize_datetime_dir(self, dir_datetime_str: str) -> str: + """Normalize datetime directory name to YYYYMMDDTHHMMSS format.""" + if "T" not in dir_datetime_str: + return "" + + PARTIAL_DATETIME_LEN = 13 # YYYYMMDDTHHNN format + SHORT_DATETIME_LEN = 11 # YYYYMMDDTHH format + + if len(dir_datetime_str) == PARTIAL_DATETIME_LEN: + return dir_datetime_str + "00" # Add seconds + if len(dir_datetime_str) == SHORT_DATETIME_LEN: + return dir_datetime_str + "0000" # Add minutes and seconds + return dir_datetime_str + + def _find_log_files_in_datetime_dir( + self, datetime_dir: Path, start_dt: datetime, end_dt: datetime + ) -> list: + """Find log files in a datetime directory if it's in range.""" + log_files = [] + + # Normalize and parse directory datetime + normalized_str = self._normalize_datetime_dir(datetime_dir.name) + if not normalized_str: + return log_files + + dir_dt = self._parse_datetime_string(normalized_str) + if not dir_dt: + return log_files + + # Check if directory datetime is in range + if start_dt <= dir_dt <= end_dt: + # Look for main log file (*.nc4 file) + nc4_files = list(datetime_dir.glob("*.nc4")) + if nc4_files: + relative_path = str(nc4_files[0].relative_to(Path(self.vehicle_dir))) + log_files.append(relative_path) + self.logger.debug("Found log file: %s", relative_path) + + return log_files + + def _should_process_auv_dir(self, auv_dir: Path, auv_name: str) -> bool: + """Check if an AUV directory should be processed based on auv_name filter.""" + if auv_name and auv_dir.name.lower() != auv_name.lower(): + return False + + missionlogs_dir = auv_dir / "missionlogs" + return missionlogs_dir.exists() + + def log_file_list(self, start_datetime: str, end_datetime: str, auv_name: str = None) -> list: + """Return a list of LRAUV log files within the specified datetime range. + + Args: + start_datetime: Start datetime in YYYYMMDDTHHMMSS format + end_datetime: End datetime in YYYYMMDDTHHMMSS format + auv_name: Optional AUV name to filter results (e.g., 'brizo', 'ahi') + + Returns: + List of log file paths relative to base_path + """ + log_files = [] + vehicle_dir = Path(self.vehicle_dir).resolve() + + # Parse datetime strings + start_dt = self._parse_datetime_string(start_datetime) + end_dt = self._parse_datetime_string(end_datetime) + + if not start_dt or not end_dt: + self.logger.exception("Invalid datetime format. Use YYYYMMDDTHHMMSS") + return log_files + + if auv_name: + self.logger.info( + "Finding log files from %s to %s for AUV: %s", + start_datetime, + end_datetime, + auv_name, + ) + else: + self.logger.info( + "Finding log files from %s to %s for all AUVs", + start_datetime, + end_datetime, + ) + + # Search through each AUV directory + for auv_dir in vehicle_dir.glob("*/"): + if not self._should_process_auv_dir(auv_dir, auv_name): + continue + + missionlogs_dir = auv_dir / "missionlogs" + + # Search through years + for year_dir in sorted(missionlogs_dir.glob("*/")): + try: + year = int(year_dir.name) + # Skip if year is clearly outside our range + if year < start_dt.year or year > end_dt.year: + continue + except ValueError: + continue + + # Search through date range directories and datetime directories + for date_range_dir in year_dir.glob("*/"): + for datetime_dir in date_range_dir.glob("*/"): + files_found = self._find_log_files_in_datetime_dir( + datetime_dir, start_dt, end_dt + ) + log_files.extend(files_found) + + self.logger.info("Found %d log files in date range", len(log_files)) + return log_files + def get_mission_dir(self, mission: str) -> str: """Return the mission directory.""" if not Path(self.vehicle_dir).exists(): @@ -827,8 +949,8 @@ def combine(self, log_file: str) -> None: combine.logger.setLevel(self._log_levels[self.config["verbose"]]) combine.logger.addHandler(self.log_handler) - combine.combine_groups() - combine.write_netcdf() + combine.combine_groups(log_file=log_file) + combine.write_netcdf(log_file=log_file) @log_file_processor def process_log_file(self, log_file: str) -> None: @@ -858,6 +980,28 @@ def process_log_files(self) -> None: # brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109.nc4 self.auv_name = self.config["log_file"].split("/")[0].lower() self.process_log_file(self.config["log_file"]) + elif self.config.get("start") and self.config.get("end"): + # Process multiple log files within datetime range + log_files = self.log_file_list( + self.config["start"], self.config["end"], self.config.get("auv_name") + ) + if not log_files: + self.logger.warning( + "No log files found in datetime range %s to %s", + self.config["start"], + self.config["end"], + ) + return + + self.logger.info("Processing %d log files in datetime range", len(log_files)) + for log_file in log_files: + # Extract AUV name from path + self.auv_name = log_file.split("/")[0].lower() + self.logger.info("Processing log file: %s", log_file) + self.process_log_file(log_file) + else: + self.logger.error("Must provide either --log_file or both --start and --end arguments") + return def process_command_line(self): parser = argparse.ArgumentParser( @@ -986,6 +1130,23 @@ def process_command_line(self): action="store", help="For LRAUV class data - process only this log file", ) + parser.add_argument( + "--start", + action="store", + help="For LRAUV class data - start processing from this datetime " + "(YYYYMMDDTHHMMSS format)", + ) + parser.add_argument( + "--end", + action="store", + help="For LRAUV class data - end processing at this datetime (YYYYMMDDTHHMMSS format)", + ) + parser.add_argument( + "--auv_name", + action="store", + help="For LRAUV class data - restrict log file search to this AUV name " + "(e.g., brizo, ahi). If not specified, all AUVs will be searched.", + ) parser.add_argument( "--freq", action="store", @@ -1094,5 +1255,8 @@ def process_command_line(self): # Process based on arguments if args.log_file: proc.process_log_files() + elif args.start and args.end: + # Process LRAUV log files in datetime range + proc.process_log_files() else: proc.process_missions(2020) From 5ffb2eb713630d4bc058931f27c7d4fc2df6293f Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Fri, 21 Nov 2025 17:08:08 -0800 Subject: [PATCH 02/13] Make INFO logs less verbose. --- src/data/nc42netcdfs.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/data/nc42netcdfs.py b/src/data/nc42netcdfs.py index da65f9ec..85438e4a 100755 --- a/src/data/nc42netcdfs.py +++ b/src/data/nc42netcdfs.py @@ -241,10 +241,10 @@ def _extract_root_group( vars_to_extract, _ = self._get_available_variables(src_dataset, root_parms) # Add debugging output for root group processing - self.logger.info("=== ROOT GROUP DEBUG ===") - self.logger.info("Available variables: %s", sorted(vars_to_extract)) - self.logger.info("Available dimensions: %s", sorted(src_dataset.dimensions.keys())) - self.logger.info( + self.logger.debug("=== ROOT GROUP DEBUG ===") + self.logger.debug("Available variables: %s", sorted(vars_to_extract)) + self.logger.debug("Available dimensions: %s", sorted(src_dataset.dimensions.keys())) + self.logger.debug( "Available coordinate variables: %s", [v for v in sorted(src_dataset.variables.keys()) if v in src_dataset.dimensions], ) From af8f01f64f0479656c89e11e63c59de0d11bd7e9 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Fri, 21 Nov 2025 17:10:52 -0800 Subject: [PATCH 03/13] Fixup the --verbose option. --- src/data/common_args.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/data/common_args.py b/src/data/common_args.py index e79b6198..dcfd2427 100644 --- a/src/data/common_args.py +++ b/src/data/common_args.py @@ -51,9 +51,12 @@ def get_core_parser(): ) parser.add_argument( "--verbose", + "-v", type=int, choices=range(3), default=0, + const=1, + nargs="?", help="Verbosity level: 0=WARN (default), 1=INFO, 2=DEBUG", ) From 9fd0bbf886b4c8211ffc0c2e96835851018f37e0 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Fri, 21 Nov 2025 17:13:18 -0800 Subject: [PATCH 04/13] Pass log_file as arguement as --start & --end doesn't set --log_file. --- src/data/align.py | 35 +++++++++++++++++++---------------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/src/data/align.py b/src/data/align.py index 21295e27..896e37f5 100755 --- a/src/data/align.py +++ b/src/data/align.py @@ -48,10 +48,15 @@ class Align_NetCDF: logger.addHandler(_handler) _log_levels = (logging.WARN, logging.INFO, logging.DEBUG) - def global_metadata(self): + def global_metadata(self, auv_name: str = "", mission: str = "", log_file: str = "") -> dict: # noqa: PLR0915 """Use instance variables to return a dictionary of metadata specific for the data that are written """ + # Support calling with self.args values and for + # either mission/vehicle or log_file as method args + auv_name = self.args.auv_name or auv_name + mission = self.args.mission or mission + log_file = self.args.log_file or log_file # Try to get actual host name, fall back to container name actual_hostname = os.getenv("HOST_NAME", gethostname()) repo = git.Repo(search_parent_directories=True) @@ -95,10 +100,9 @@ def global_metadata(self): metadata["useconst"] = "Not intended for legal use. Data may contain inaccuracies." metadata["history"] = f"Created by {self.commandline} on {iso_now}" - if self.args.auv_name and self.args.mission: + if auv_name and mission: metadata["title"] = ( - f"Calibrated and aligned AUV sensor data from" - f" {self.args.auv_name} mission {self.args.mission}" + f"Calibrated and aligned AUV sensor data from {auv_name} mission {mission}" ) from_data = "calibrated data" metadata["source"] = ( @@ -114,10 +118,9 @@ def global_metadata(self): " and the coordinate variables aligned using MBARI's auv-python" " software." ) - elif self.args.log_file: + elif log_file: metadata["title"] = ( - f"Combined and aligned LRAUV instrument data from" - f" log file {Path(self.args.log_file)}" + f"Combined and aligned LRAUV instrument data from log file {Path(log_file)}" ) from_data = "combined data" metadata["source"] = ( @@ -145,7 +148,7 @@ def global_metadata(self): f" host {gethostname()}. Software available at" f" 'https://github.com/mbari-org/auv-python'" ) - elif self.args.log_file: + elif log_file: matches = re.search( "(" + SUMMARY_SOURCE.replace("{}", r".+$") + ")", self.combined_nc.attrs["summary"], @@ -384,6 +387,10 @@ def process_combined(self, log_file: str) -> None: # noqa: C901, PLR0912, PLR09 self.min_lon = np.inf self.max_lon = -np.inf + # Coordinates - use mapping from global variable_time_coord_mapping attribute + variable_time_coord_mapping = json.loads( + self.combined_nc.attrs.get("variable_time_coord_mapping", "{}") + ) # Find navigation coordinates from combined data - must be from universals group nav_coords = {} for coord_type in ["longitude", "latitude", "depth", "time"]: @@ -400,7 +407,7 @@ def process_combined(self, log_file: str) -> None: # noqa: C901, PLR0912, PLR09 try: lat_interp = interp1d( self.combined_nc[nav_coords["latitude"]] - .get_index("universals_time") + .get_index(variable_time_coord_mapping[nav_coords["latitude"]]) .view(np.int64) .tolist(), self.combined_nc[nav_coords["latitude"]].values, @@ -413,7 +420,7 @@ def process_combined(self, log_file: str) -> None: # noqa: C901, PLR0912, PLR09 lon_interp = interp1d( self.combined_nc[nav_coords["longitude"]] - .get_index("universals_time") + .get_index(variable_time_coord_mapping[nav_coords["longitude"]]) .view(np.int64) .tolist(), self.combined_nc[nav_coords["longitude"]].values, @@ -426,7 +433,7 @@ def process_combined(self, log_file: str) -> None: # noqa: C901, PLR0912, PLR09 depth_interp = interp1d( self.combined_nc[nav_coords["depth"]] - .get_index("universals_time") + .get_index(variable_time_coord_mapping[nav_coords["depth"]]) .view(np.int64) .tolist(), self.combined_nc[nav_coords["depth"]].values, @@ -532,10 +539,6 @@ def process_combined(self, log_file: str) -> None: # noqa: C901, PLR0912, PLR09 ) > pd.to_datetime(self.max_time): self.max_time = pd.to_datetime(self.aligned_nc[timevar][-1].values).tz_localize(UTC) - # Coordinates - use mapping from global variable_time_coord_mapping attribute - variable_time_coord_mapping = json.loads( - self.combined_nc.attrs.get("variable_time_coord_mapping", "{}") - ) time_coord = variable_time_coord_mapping.get(variable) depth_coord = ( time_coord[:-5] + "_depth" @@ -645,7 +648,7 @@ def write_combined_netcdf( vehicle = vehicle or self.args.auv_name out_fn = Path(netcdfs_dir, f"{vehicle}_{name}_align.nc") - self.aligned_nc.attrs = self.global_metadata() + self.aligned_nc.attrs = self.global_metadata(log_file=log_file) self.logger.info("Writing aligned combined data to %s", out_fn) if out_fn.exists(): self.logger.debug("Removing existing file %s", out_fn) From e082d69d84e4e2aadfe710a89c9ca26f5899eaea Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Fri, 21 Nov 2025 17:14:08 -0800 Subject: [PATCH 05/13] Pass in log_file and do plot before raising exception on error. --- src/data/AUV.py | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/src/data/AUV.py b/src/data/AUV.py index cffa6fe8..9915a748 100755 --- a/src/data/AUV.py +++ b/src/data/AUV.py @@ -37,6 +37,7 @@ def nudge_positions( # noqa: C901, PLR0912, PLR0913, PLR0915 auv_name: str = "", mission: str = "", max_sec_diff_at_end: int = 10, + log_file: str = "", create_plots: bool = False, # noqa: FBT001, FBT002 ) -> tuple[xr.DataArray, xr.DataArray, int, float]: """ @@ -169,16 +170,23 @@ def nudge_positions( # noqa: C901, PLR0912, PLR0913, PLR0915 end_lon_diff, end_lat_diff, ) - logger.info( - "Fix this error by calling _range_qc_combined_nc() in " - "_navigation_process() and/or _gps_process() for %s %s", - auv_name, - mission, - ) + if log_file: + logger.info( + "Fix this error by calling _range_qc_combined_nc() in " + "_navigation_process() and/or _gps_process() for %s", + log_file, + ) + logger.info("Run to get a plot: combine.py -v 1 --plot --log_file %s", log_file) + elif auv_name and mission: + logger.info( + "Fix this error by calling _range_qc_combined_nc() in " + "_navigation_process() and/or _gps_process() for %s %s", + auv_name, + mission, + ) error_message = ( f"abs(end_lon_diff) ({end_lon_diff}) > 1 or abs(end_lat_diff) ({end_lat_diff}) > 1" ) - raise ValueError(error_message) if abs(end_sec_diff) > max_sec_diff_at_end: logger.warning( "abs(end_sec_diff) (%s) > max_sec_diff_at_end (%s)", @@ -276,12 +284,16 @@ def nudge_positions( # noqa: C901, PLR0912, PLR0913, PLR0915 name="latitude", ) - # Optional plotting code + # Optional plotting code - raise error after opportunity to plot if create_plots: _create_nudge_plots( lat, lon, lat_fix, lon_fix, lat_nudged, lon_nudged, auv_name, mission, logger ) + if error_message: + logger.error("Nudge positions error: %s", error_message) + raise ValueError(error_message) + return lon_nudged, lat_nudged, segment_count, segment_minsum From 787fe4421b0f1a188d4fecd185335a6b3ca7c12d Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Fri, 21 Nov 2025 17:15:33 -0800 Subject: [PATCH 06/13] Get the correct time coordinates for lat & lon as some log files have different dimensions for lat/lon and depth. --- src/data/combine.py | 41 ++++++++++++++++++++++++++--------------- 1 file changed, 26 insertions(+), 15 deletions(-) diff --git a/src/data/combine.py b/src/data/combine.py index dc05ecec..bf6cdf89 100755 --- a/src/data/combine.py +++ b/src/data/combine.py @@ -581,18 +581,23 @@ def _add_consolidation_comment(self, time_info: dict) -> None: f"Consolidated time coordinate from: {mapping_info}" ) - def _add_nudged_coordinates(self, max_sec_diff_at_end: int = 10) -> None: + def _add_nudged_coordinates(self, max_sec_diff_at_end: int = 10, log_file: str = "") -> None: """Add nudged longitude and latitude variables to the combined dataset.""" try: nudged_longitude, nudged_latitude, segment_count, segment_minsum = nudge_positions( # For LRAUV data the nav positions are shifted by 1 to align with GPS fixes - nav_longitude=self.combined_nc["universals_longitude"].shift(universals_time=1), - nav_latitude=self.combined_nc["universals_latitude"].shift(universals_time=1), + nav_longitude=self.combined_nc["universals_longitude"].shift( + **{self.variable_time_coord_mapping["universals_longitude"]: 1} + ), + nav_latitude=self.combined_nc["universals_latitude"].shift( + **{self.variable_time_coord_mapping["universals_latitude"]: 1} + ), gps_longitude=self.combined_nc["nal9602_longitude_fix"], gps_latitude=self.combined_nc["nal9602_latitude_fix"], logger=self.logger, auv_name="", mission="", + log_file=log_file, max_sec_diff_at_end=max_sec_diff_at_end, create_plots=self.args.plot, ) @@ -607,7 +612,11 @@ def _add_nudged_coordinates(self, max_sec_diff_at_end: int = 10) -> None: ) self.combined_nc["nudged_longitude"] = xr.DataArray( nudged_longitude, - coords=[self.combined_nc["universals_time"].to_numpy()], + coords=[ + self.combined_nc[ + self.variable_time_coord_mapping["universals_longitude"] + ].to_numpy() + ], dims={f"nudged_{TIME}"}, name="nudged_longitude", ) @@ -622,7 +631,9 @@ def _add_nudged_coordinates(self, max_sec_diff_at_end: int = 10) -> None: } self.combined_nc["nudged_latitude"] = xr.DataArray( nudged_latitude, - coords=[self.combined_nc["universals_time"].to_numpy()], + coords=[ + self.combined_nc[self.variable_time_coord_mapping["universals_latitude"]].to_numpy() + ], dims={f"nudged_{TIME}"}, name="nudged_latitude", ) @@ -636,9 +647,9 @@ def _add_nudged_coordinates(self, max_sec_diff_at_end: int = 10) -> None: ), } - def combine_groups(self): + def combine_groups(self, log_file: str = None) -> None: """Combine group files into a single NetCDF dataset with consolidated time coordinates.""" - log_file = self.args.log_file + log_file = self.args.log_file or log_file src_dir = Path(BASE_LRAUV_PATH, Path(log_file).parent) group_files = sorted(src_dir.glob(f"{Path(log_file).stem}_{GROUP}_*.nc")) self.summary_fields = set() @@ -665,20 +676,20 @@ def combine_groups(self): self.variable_time_coord_mapping.update(time_info["variable_time_coord_mapping"]) # Write intermediate file for cf_xarray decoding - intermediate_file = self._intermediate_write_netcdf() + intermediate_file = self._intermediate_write_netcdf(log_file=log_file) with xr.open_dataset(intermediate_file, decode_cf=True) as ds: self.combined_nc = ds.load() # Add nudged coordinates - self._add_nudged_coordinates() + self._add_nudged_coordinates(log_file=log_file) # Clean up intermediate file Path(intermediate_file).unlink() - def _intermediate_write_netcdf(self) -> None: + def _intermediate_write_netcdf(self, log_file: str = None) -> None: """Write out an intermediate combined netCDF file so that data can be read using decode_cf=True for nudge_positions() to work with cf accessors.""" - log_file = self.args.log_file + log_file = self.args.log_file or log_file netcdfs_dir = Path(BASE_LRAUV_PATH, Path(log_file).parent) out_fn = Path(netcdfs_dir, f"{Path(log_file).stem}_combined_intermediate.nc") @@ -687,7 +698,7 @@ def _intermediate_write_netcdf(self) -> None: if Path(out_fn).exists(): Path(out_fn).unlink() self.combined_nc.to_netcdf(out_fn) - self.logger.info( + self.logger.debug( "Data variables written: %s", ", ".join(sorted(self.combined_nc.variables)), ) @@ -697,8 +708,8 @@ def _intermediate_write_netcdf(self) -> None: ) return out_fn - def write_netcdf(self) -> None: - log_file = self.args.log_file + def write_netcdf(self, log_file: str = None) -> None: + log_file = self.args.log_file or log_file netcdfs_dir = Path(BASE_LRAUV_PATH, Path(log_file).parent) out_fn = Path(netcdfs_dir, f"{Path(log_file).stem}_combined.nc") @@ -707,7 +718,7 @@ def write_netcdf(self) -> None: if Path(out_fn).exists(): Path(out_fn).unlink() self.combined_nc.to_netcdf(out_fn) - self.logger.info( + self.logger.debug( "Data variables written: %s", ", ".join(sorted(self.combined_nc.variables)), ) From 5084b5f7660f04845dced21182337429f7ff5d66 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Fri, 21 Nov 2025 17:16:04 -0800 Subject: [PATCH 07/13] Testing --start and --end for lrauv processing. --- .vscode/launch.json | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/.vscode/launch.json b/.vscode/launch.json index 48d7a853..83614e3d 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -120,7 +120,8 @@ "console": "integratedTerminal", "justMyCode": false, //"args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109.nc4"] - "args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250916_20250922/20250916T230652/202509162306_202509180305.nc4"] + //"args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250916_20250922/20250916T230652/202509162306_202509180305.nc4"] + "args": ["-v", "1", "--log_file", "tethys/missionlogs/2012/20120908_20120920/20120909T010636/201209090106_201209091521.nc4", "--plot"] }, { "name": "3.0 - align.py", @@ -312,9 +313,9 @@ //"args": ["-v", "1", "--noinput", "--no_cleanup", "--mission", "2020.337.00", "--clobber"] //"args": ["-v", "1", "--noinput", "--no_cleanup", "--mission", "2008.010.10"] //"args": ["-v", "2", "--mission", "2004.029.03", "--noinput", "--no_cleanup"], - //"args": ["-v", "1", "--mission", "2023.192.01", "--noinput", "--no_cleanup"], + "args": ["-v", "1", "--mission", "2023.192.01", "--noinput", "--no_cleanup"], //"args": ["-v", "1", "--mission", "2010.151.04", "--noinput", "--no_cleanup", "--clobber"], - "args": ["-v", "1", "--mission", "2025.316.02", "--noinput", "--no_cleanup", "--add_seconds", "619315200"], + //"args": ["-v", "1", "--mission", "2025.316.02", "--noinput", "--no_cleanup", "--add_seconds", "619315200"], }, { @@ -335,8 +336,12 @@ //"args": ["-v", "2", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109.nc4", "--clobber"] //"args": ["-v", "2", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109.nc4", "--clobber", "--no_cleanup"] //"args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250916_20250922/20250916T230652/202509162306_202509180305.nc4", "--no_cleanup"] - "args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250916_20250922/20250916T230652/202509162306_202509180305.nc4", "--no_cleanup", "--clobber"] - //"args": ["-v", "1", "--auv_name", "tethys", "--start", "20120901", "--end", "20121101", "--noinput"] + //"args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250916_20250922/20250916T230652/202509162306_202509180305.nc4", "--no_cleanup", "--clobber"] + // Has different universals time coodinates for longitude/latitude and depth + "args": ["-v", "1", "--auv_name", "tethys", "--start", "20120901T000000", "--end", "20121101T000000", "--noinput", "--no_cleanup"] + //"args": ["-v", "1", "--auv_name", "brizo", "--start", "20250915T000000", "--end", "20250917T000000", "--noinput", "--num_cores", "1", "--no_cleanup"] + // No nudged latitude and longitude variables + //"args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250915T015535/202509150155_202509151602.nc4", "--no_cleanup"] }, ] From 8078ee8e7d7881c28b92af1aff36735a4d05ebd1 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Fri, 21 Nov 2025 17:21:39 -0800 Subject: [PATCH 08/13] Initialize error_message. --- src/data/AUV.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/data/AUV.py b/src/data/AUV.py index 9915a748..87aa2b65 100755 --- a/src/data/AUV.py +++ b/src/data/AUV.py @@ -116,6 +116,7 @@ def nudge_positions( # noqa: C901, PLR0912, PLR0913, PLR0915 MIN_SEGMENT_LENGTH = 10 seg_count = 0 seg_minsum = 0 + error_message = "" for i in range(len(lat_fix) - 1): # Segment of dead reckoned (under water) positions, each surrounded by GPS fixes segi = np.where( From 712dc3d5b3f65ff1797fe1425def5796e75ace77 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Mon, 24 Nov 2025 09:59:07 -0800 Subject: [PATCH 09/13] Reorder missions_to_check. --- src/data/calibrate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/data/calibrate.py b/src/data/calibrate.py index f06c1b69..4833568e 100755 --- a/src/data/calibrate.py +++ b/src/data/calibrate.py @@ -1615,6 +1615,7 @@ def _navigation_process(self, sensor): # noqa: C901, PLR0912, PLR0915 "2007.134.09", "2010.293.00", "2011.116.00", + "2011.166.00", "2013.227.00", "2016.348.00", "2017.121.00", @@ -1622,7 +1623,6 @@ def _navigation_process(self, sensor): # noqa: C901, PLR0912, PLR0915 "2017.297.00", "2017.347.00", "2017.304.00", - "2011.166.00", } if self.args.mission in missions_to_check: self.logger.info( From 2a82b29e80606ce461d6c2c7969f033a18bf4f6e Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Mon, 24 Nov 2025 10:15:18 -0800 Subject: [PATCH 10/13] Add _initial_coordinate_qc() and fix the renaming of _qced variables. --- .vscode/launch.json | 10 +++++----- src/data/combine.py | 46 ++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 48 insertions(+), 8 deletions(-) diff --git a/.vscode/launch.json b/.vscode/launch.json index 83614e3d..c987e2a3 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -14,7 +14,7 @@ //"args": ["--auv_name", "i2map", "--mission", "2020.055.01", "--noinput", "--local", "-v", "2", "--clobber"] //"args": ["--auv_name", "Dorado389", "--mission", "2020.245.00", "--noinput", "-v", "2", "--portal", "http://stoqs.mbari.org:8080/auvdata/v1", "--clobber"] //"args": ["--auv_name", "Dorado389", "--mission", "2020.245.00", "--noinput", "-v"] - //"args": ["--auv_name", "Dorado389", "--mission", "2017.297.00", "--local", "-v", "2"] + "args": ["--auv_name", "dorado", "--mission", "2017.297.00", "-v", "2", "--vehicle_dir", "/Volumes/AUVCTD/missionlogs"] //"args": ["--auv_name", "Dorado389", "--start", "20190701", "--end", "20191230", "-v", "2"] //"args": ["--auv_name", "i2map", "--mission", "2021.062.01", "--noinput", "-v", "1"] //"args": ["--auv_name", "dorado", "--mission", "2021.109.00", "--noinput", "-v"] @@ -26,7 +26,7 @@ //"args": ["--auv_name", "dorado", "--mission", "2010.265.00", "--noinput", "-v"] //"args": ["--auv_name", "dorado", "--mission", "2023.324.00", "--noinput", "-v", "--vehicle_dir", "/Volumes/AUVCTD/missionlogs"] // Mission suffering from GPS Rollover bug. Add 1024 * 7 * 24 * 3600 = 619315200 seconds - "args": ["--auv_name", "dorado", "--mission", "2025.316.02", "--noinput", "-v", "--vehicle_dir", "/Volumes/AUVCTD/missionlogs", "--add_seconds", "619315200" ] + //"args": ["--auv_name", "dorado", "--mission", "2025.316.02", "--noinput", "-v", "--vehicle_dir", "/Volumes/AUVCTD/missionlogs", "--add_seconds", "619315200" ] }, { "name": "1.1 - lopcToNetCDF", @@ -84,7 +84,7 @@ //"args": ["--auv_name", "dorado", "--mission", "2010.181.00", "--plot", "first1000", "-v", "1"] // OverflowError: time values outside range of 64 bit signed integers in calibrate.py:413 //"args": ["--auv_name", "dorado", "--mission", "2017.304.00", "--plot", "first1000", "-v", "1"] - //"args": ["--auv_name", "dorado", "--mission", "2017.297.00", "-v", "1"] + "args": ["--auv_name", "dorado", "--mission", "2017.297.00", "-v", "1"] //"args": ["--auv_name", "i2map", "--mission", "2022.094.01", "-v", "2"] //"args": ["--auv_name", "i2map", "--mission", "2018.025.00", "-v", "2"] //"args": ["--auv_name", "dorado", "--mission", "2017.248.01", "-v", "1"] @@ -102,7 +102,7 @@ //"args": ["--auv_name", "dorado", "--mission", "2018.079.00", "-v", "1"] //"args": ["--auv_name", "i2map", "--mission", "2018.348.01", "-v", "2"] //"args": ["--auv_name", "dorado", "--mission", "2023.324.00", "-v", "1", "--plot", "first10000"] - "args": ["--auv_name", "dorado", "--mission", "2022.201.00", "-v", "1", "--plot", "first10000"] + //"args": ["--auv_name", "dorado", "--mission", "2022.201.00", "-v", "1", "--plot", "first10000"] }, { "name": "2.1 - Test hs2_proc.py (its unit tests)", @@ -120,7 +120,7 @@ "console": "integratedTerminal", "justMyCode": false, //"args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109.nc4"] - //"args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250916_20250922/20250916T230652/202509162306_202509180305.nc4"] + //"args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250916_20250922/20250916T230652/202509162306_202509180305.nc4", "--plot"] "args": ["-v", "1", "--log_file", "tethys/missionlogs/2012/20120908_20120920/20120909T010636/201209090106_201209091521.nc4", "--plot"] }, { diff --git a/src/data/combine.py b/src/data/combine.py index bf6cdf89..46f1c3fc 100755 --- a/src/data/combine.py +++ b/src/data/combine.py @@ -213,13 +213,19 @@ def _range_qc_combined_nc( # noqa: C901, PLR0912 self.combined_nc[f"{var}_qced"] = ( self.combined_nc[var] .drop_isel({coord: out_of_range_indices}) - .rename({f"{instrument}_time": f"{instrument}_time_qced"}) + .rename({f"{coord}": f"{coord}_qced"}) + .rename(f"{var}_qced") ) self.combined_nc = self.combined_nc.drop_vars(inst_vars) for var in inst_vars: self.logger.debug("Renaming %s_qced to %s", var, var) - self.combined_nc[var] = self.combined_nc[f"{var}_qced"].rename( - {f"{coord}_qced": coord}, + coord = next(iter(self.combined_nc[f"{var}_qced"].coords)) + self.combined_nc[var] = ( + self.combined_nc[f"{var}_qced"] + .rename( + {f"{coord}": coord[:-5]}, # Remove '_qced' suffix from coord name + ) + .rename(var) ) qced_vars = [f"{var}_qced" for var in inst_vars] self.combined_nc = self.combined_nc.drop_vars(qced_vars) @@ -581,8 +587,42 @@ def _add_consolidation_comment(self, time_info: dict) -> None: f"Consolidated time coordinate from: {mapping_info}" ) + def _initial_coordinate_qc(self, log_file: str = "") -> None: + """Perform initial QC on core coordinate variables for specific log files.""" + log_file = log_file or self.args.log_file + if log_file in ( + "tethys/missionlogs/2012/20120908_20120920/20120909T010636/201209090106_201209091521.nc4", + ): + self.logger.info("Performing initial coordinate QC for %s", self.args.log_file) + self._range_qc_combined_nc( + instrument="universals", + variables=[ + "universals_longitude", + "universals_latitude", + ], + ranges={ + "universals_longitude": Range(-123.5, -121.5), + "universals_latitude": Range(35.0, 37.0), + }, + set_to_nan=False, + ) + self._range_qc_combined_nc( + instrument="nal9602", + variables=[ + "nal9602_longitude_fix", + "nal9602_latitude_fix", + ], + ranges={ + "nal9602_longitude_fix": Range(-123.5, -121.5), + "nal9602_latitude_fix": Range(35.0, 37.0), + }, + set_to_nan=False, + ) + def _add_nudged_coordinates(self, max_sec_diff_at_end: int = 10, log_file: str = "") -> None: """Add nudged longitude and latitude variables to the combined dataset.""" + log_file = log_file or self.args.log_file + self._initial_coordinate_qc(log_file=log_file) try: nudged_longitude, nudged_latitude, segment_count, segment_minsum = nudge_positions( # For LRAUV data the nav positions are shifted by 1 to align with GPS fixes From e9cf8d9986d00566641d99a895d0ac10479393f0 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Mon, 24 Nov 2025 10:18:07 -0800 Subject: [PATCH 11/13] Undo the shift of 1 of dead reckoned positions in relation to gps. --- .vscode/launch.json | 4 ++-- src/data/combine.py | 9 ++------- 2 files changed, 4 insertions(+), 9 deletions(-) diff --git a/.vscode/launch.json b/.vscode/launch.json index c987e2a3..4d79882c 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -120,8 +120,8 @@ "console": "integratedTerminal", "justMyCode": false, //"args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109.nc4"] - //"args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250916_20250922/20250916T230652/202509162306_202509180305.nc4", "--plot"] - "args": ["-v", "1", "--log_file", "tethys/missionlogs/2012/20120908_20120920/20120909T010636/201209090106_201209091521.nc4", "--plot"] + "args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250916_20250922/20250916T230652/202509162306_202509180305.nc4", "--plot"] + //"args": ["-v", "1", "--log_file", "tethys/missionlogs/2012/20120908_20120920/20120909T010636/201209090106_201209091521.nc4", "--plot"] }, { "name": "3.0 - align.py", diff --git a/src/data/combine.py b/src/data/combine.py index 46f1c3fc..9141192e 100755 --- a/src/data/combine.py +++ b/src/data/combine.py @@ -625,13 +625,8 @@ def _add_nudged_coordinates(self, max_sec_diff_at_end: int = 10, log_file: str = self._initial_coordinate_qc(log_file=log_file) try: nudged_longitude, nudged_latitude, segment_count, segment_minsum = nudge_positions( - # For LRAUV data the nav positions are shifted by 1 to align with GPS fixes - nav_longitude=self.combined_nc["universals_longitude"].shift( - **{self.variable_time_coord_mapping["universals_longitude"]: 1} - ), - nav_latitude=self.combined_nc["universals_latitude"].shift( - **{self.variable_time_coord_mapping["universals_latitude"]: 1} - ), + nav_longitude=self.combined_nc["universals_longitude"], + nav_latitude=self.combined_nc["universals_latitude"], gps_longitude=self.combined_nc["nal9602_longitude_fix"], gps_latitude=self.combined_nc["nal9602_latitude_fix"], logger=self.logger, From cb9d437cb5cd3f5ff84c23cd9367d01dc435f7b3 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Mon, 24 Nov 2025 13:31:39 -0800 Subject: [PATCH 12/13] Implement constructor-based architecture. Methods now use "self.attribute" consistently instead of optional arguments with fallbacks. --- .vscode/launch.json | 6 +- src/data/align.py | 114 ++++++++++++------- src/data/archive.py | 86 +++++++++----- src/data/calibrate.py | 219 +++++++++++++++++++++--------------- src/data/combine.py | 67 +++++++---- src/data/conftest.py | 26 +++-- src/data/create_products.py | 69 ++++++++---- src/data/logs2netcdfs.py | 188 +++++++++++++++++++++++-------- src/data/lopcMEP.py | 4 +- src/data/lopcToNetCDF.py | 4 +- src/data/nc42netcdfs.py | 35 +++++- src/data/process.py | 144 ++++++++++++++++-------- src/data/resample.py | 102 +++++++++++------ src/data/usblToNetCDF.py | 4 +- 14 files changed, 711 insertions(+), 357 deletions(-) diff --git a/.vscode/launch.json b/.vscode/launch.json index 4d79882c..82e4948c 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -301,7 +301,7 @@ //"args": ["-v", "1", "--noinput", "--no_cleanup", "--start_year", "2007", "--end_year", "2007", "--create_products", "--num_cores", "1", "--archive_only_products"] //"args": ["-v", "1", "--noinput", "--no_cleanup", "--start_year", "2007", "--end_year", "2007", "--start_yd", "171", "--end_yd", "171", "--num_cores", "1", "--create_products", "--archive_only_products"] //"args": ["-v", "1", "--noinput", "--no_cleanup", "--mission", "2008.261.01", "--create_products", "--archive", "--archive_only_products"] - //"args": ["-v", "1", "--noinput", "--no_cleanup", "--start_year", "2011", "--end_year", "2011", "--start_yd", "158", "--end_yd", "164", "--num_cores", "1"] + "args": ["-v", "1", "--noinput", "--no_cleanup", "--start_year", "2011", "--end_year", "2011", "--start_yd", "158", "--end_yd", "164", "--num_cores", "1"] //"args": ["-v", "1", "--noinput", "--start_year", "2016", "--end_year", "2016", "--start_yd", "270", "--end_yd", "270", "--num_cores", "1", "--create_products", "--archive", "--archive_only_products"] //"args": ["-v", "1", "--noinput", "--num_cores", "1", "--mission", "2023.285.01"] //"args": ["-v", "1", "--noinput", "--no_cleanup", "--mission", "2018.079.00"] @@ -313,7 +313,7 @@ //"args": ["-v", "1", "--noinput", "--no_cleanup", "--mission", "2020.337.00", "--clobber"] //"args": ["-v", "1", "--noinput", "--no_cleanup", "--mission", "2008.010.10"] //"args": ["-v", "2", "--mission", "2004.029.03", "--noinput", "--no_cleanup"], - "args": ["-v", "1", "--mission", "2023.192.01", "--noinput", "--no_cleanup"], + //"args": ["-v", "1", "--mission", "2023.192.01", "--noinput", "--no_cleanup"], //"args": ["-v", "1", "--mission", "2010.151.04", "--noinput", "--no_cleanup", "--clobber"], //"args": ["-v", "1", "--mission", "2025.316.02", "--noinput", "--no_cleanup", "--add_seconds", "619315200"], @@ -339,6 +339,8 @@ //"args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250916_20250922/20250916T230652/202509162306_202509180305.nc4", "--no_cleanup", "--clobber"] // Has different universals time coodinates for longitude/latitude and depth "args": ["-v", "1", "--auv_name", "tethys", "--start", "20120901T000000", "--end", "20121101T000000", "--noinput", "--no_cleanup"] + // Conflicting sizes for nudged_time and data + //"args": ["-v", "1", "--log_file", "tethys/missionlogs/2012/20120908_20120920/20120917T025522/201209170255_201209171110.nc4", "--no_cleanup" //"args": ["-v", "1", "--auv_name", "brizo", "--start", "20250915T000000", "--end", "20250917T000000", "--noinput", "--num_cores", "1", "--no_cleanup"] // No nudged latitude and longitude variables //"args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250915T015535/202509150155_202509151602.nc4", "--no_cleanup"] diff --git a/src/data/align.py b/src/data/align.py index 896e37f5..497d47fd 100755 --- a/src/data/align.py +++ b/src/data/align.py @@ -48,15 +48,44 @@ class Align_NetCDF: logger.addHandler(_handler) _log_levels = (logging.WARN, logging.INFO, logging.DEBUG) - def global_metadata(self, auv_name: str = "", mission: str = "", log_file: str = "") -> dict: # noqa: PLR0915 + # noqa: PLR0913 - Many parameters needed for initialization + def __init__( # noqa: PLR0913 + self, + auv_name: str, + mission: str, + base_path: str, + log_file: str = "", + plot: str = None, + verbose: int = 0, + commandline: str = "", + ) -> None: + """Initialize Align_NetCDF with explicit parameters. + + Args: + auv_name: Name of the AUV (e.g., 'Dorado389', 'i2map', 'tethys') + mission: Mission identifier (e.g., '2011.256.02') + base_path: Base directory path for data + log_file: Optional LRAUV log file path for log-based processing + plot: Optional plot specification + verbose: Verbosity level (0=WARN, 1=INFO, 2=DEBUG) + commandline: Command line string for metadata + """ + self.auv_name = auv_name + self.mission = mission + self.base_path = base_path + self.log_file = log_file + self.plot = plot + self.verbose = verbose + self.commandline = commandline + self.logger.setLevel(self._log_levels[verbose]) + + def global_metadata(self) -> dict: # noqa: PLR0915 """Use instance variables to return a dictionary of metadata specific for the data that are written """ - # Support calling with self.args values and for - # either mission/vehicle or log_file as method args - auv_name = self.args.auv_name or auv_name - mission = self.args.mission or mission - log_file = self.args.log_file or log_file + auv_name = self.auv_name + mission = self.mission + log_file = self.log_file # Try to get actual host name, fall back to container name actual_hostname = os.getenv("HOST_NAME", gethostname()) repo = git.Repo(search_parent_directories=True) @@ -137,7 +166,7 @@ def global_metadata(self, auv_name: str = "", mission: str = "", log_file: str = " using MBARI's auv-python software." ) # Append location of original data files to summary - if self.args.auv_name and self.args.mission: + if self.auv_name and self.mission: matches = re.search( "(" + SUMMARY_SOURCE.replace("{}", r".+$") + ")", self.calibrated_nc.attrs["summary"], @@ -164,15 +193,14 @@ def global_metadata(self, auv_name: str = "", mission: str = "", log_file: str = return metadata - def process_cal(self, vehicle: str = "", name: str = "", log_file: str = "") -> None: # noqa: C901, PLR0912, PLR0915 - name = name or self.args.mission - vehicle = vehicle or self.args.auv_name - if name and vehicle: - netcdfs_dir = Path(self.args.base_path, vehicle, MISSIONNETCDFS, name) - src_file = Path(netcdfs_dir, f"{vehicle}_{name}_cal.nc") - elif log_file: - netcdfs_dir = Path(BASE_LRAUV_PATH, f"{Path(log_file).parent}") - src_file = Path(netcdfs_dir, f"{Path(log_file).stem}_cal.nc") + def process_cal(self) -> Path: # noqa: C901, PLR0912, PLR0915 + """Process calibrated netCDF file using instance attributes.""" + if self.mission and self.auv_name: + netcdfs_dir = Path(self.base_path, self.auv_name, MISSIONNETCDFS, self.mission) + src_file = Path(netcdfs_dir, f"{self.auv_name}_{self.mission}_cal.nc") + elif self.log_file: + netcdfs_dir = Path(BASE_LRAUV_PATH, f"{Path(self.log_file).parent}") + src_file = Path(netcdfs_dir, f"{Path(self.log_file).stem}_cal.nc") else: msg = "Must provide either mission and vehicle or log_file" raise ValueError(msg) @@ -370,10 +398,10 @@ def process_cal(self, vehicle: str = "", name: str = "", log_file: str = "") -> return netcdfs_dir - def process_combined(self, log_file: str) -> None: # noqa: C901, PLR0912, PLR0915 + def process_combined(self) -> Path: # noqa: C901, PLR0912, PLR0915 """Process combined LRAUV data from *_combined.nc files created by combine.py""" - netcdfs_dir = Path(BASE_LRAUV_PATH, f"{Path(log_file).parent}") - src_file = Path(netcdfs_dir, f"{Path(log_file).stem}_combined.nc") + netcdfs_dir = Path(BASE_LRAUV_PATH, f"{Path(self.log_file).parent}") + src_file = Path(netcdfs_dir, f"{Path(self.log_file).stem}_combined.nc") self.combined_nc = xr.open_dataset(src_file) self.logger.info("Processing %s", src_file) @@ -636,19 +664,15 @@ def process_combined(self, log_file: str) -> None: # noqa: C901, PLR0912, PLR09 return netcdfs_dir - def write_combined_netcdf( - self, netcdfs_dir, vehicle: str = "", name: str = "", log_file: str = "" - ) -> None: + def write_combined_netcdf(self, netcdfs_dir: Path) -> None: """Write aligned combined data to NetCDF file""" - if log_file: + if self.log_file: # For LRAUV log files, use the log file stem for output name - out_fn = Path(netcdfs_dir, f"{Path(log_file).stem}_align.nc") + out_fn = Path(netcdfs_dir, f"{Path(self.log_file).stem}_align.nc") else: - name = name or self.args.mission - vehicle = vehicle or self.args.auv_name - out_fn = Path(netcdfs_dir, f"{vehicle}_{name}_align.nc") + out_fn = Path(netcdfs_dir, f"{self.auv_name}_{self.mission}_align.nc") - self.aligned_nc.attrs = self.global_metadata(log_file=log_file) + self.aligned_nc.attrs = self.global_metadata() self.logger.info("Writing aligned combined data to %s", out_fn) if out_fn.exists(): self.logger.debug("Removing existing file %s", out_fn) @@ -659,11 +683,10 @@ def write_combined_netcdf( ", ".join(sorted(self.aligned_nc.variables)), ) - def write_netcdf(self, netcdfs_dir, vehicle: str = "", name: str = "") -> None: - name = name or self.args.mission - vehicle = vehicle or self.args.auv_name + def write_netcdf(self, netcdfs_dir: Path) -> None: + """Write aligned netCDF file using instance attributes.""" self.aligned_nc.attrs = self.global_metadata() - out_fn = Path(netcdfs_dir, f"{vehicle}_{name}_align.nc") + out_fn = Path(netcdfs_dir, f"{self.auv_name}_{self.mission}_align.nc") self.logger.info("Writing aligned data to %s", out_fn) if out_fn.exists(): self.logger.debug("Removing file %s", out_fn) @@ -701,21 +724,32 @@ def process_command_line(self): help="Create intermediate plots to validate data operations.", ) - self.args = parser.parse_args() - self.logger.setLevel(self._log_levels[self.args.verbose]) - self.commandline = " ".join(sys.argv) + args = parser.parse_args() + + # Reinitialize object with parsed arguments + self.__init__( + auv_name=args.auv_name, + mission=args.mission, + base_path=args.base_path, + log_file=args.log_file if hasattr(args, "log_file") else None, + plot=args.plot if hasattr(args, "plot") else False, + verbose=args.verbose, + commandline=" ".join(sys.argv), + ) + self.logger.setLevel(self._log_levels[args.verbose]) if __name__ == "__main__": - align_netcdf = Align_NetCDF() + # Create with default values for command-line usage + align_netcdf = Align_NetCDF(auv_name="", mission="", base_path="") align_netcdf.process_command_line() p_start = time.time() - if align_netcdf.args.log_file: + if align_netcdf.log_file: # Process combined LRAUV data using log_file - netcdf_dir = align_netcdf.process_combined(log_file=align_netcdf.args.log_file) - align_netcdf.write_combined_netcdf(netcdf_dir, log_file=align_netcdf.args.log_file) - elif align_netcdf.args.auv_name and align_netcdf.args.mission: + netcdf_dir = align_netcdf.process_combined() + align_netcdf.write_combined_netcdf(netcdf_dir) + elif align_netcdf.auv_name and align_netcdf.mission: # Process calibrated data using auv_name and mission netcdf_dir = align_netcdf.process_cal() align_netcdf.write_netcdf(netcdf_dir) diff --git a/src/data/archive.py b/src/data/archive.py index 69d43f31..78222899 100755 --- a/src/data/archive.py +++ b/src/data/archive.py @@ -36,7 +36,44 @@ class Archiver: _handler.setFormatter(AUV_NetCDF._formatter) _log_levels = (logging.WARN, logging.INFO, logging.DEBUG) - def __init__(self, add_handlers=True): # noqa: FBT002 + def __init__( # noqa: PLR0913 + self, + add_handlers: bool = True, # noqa: FBT001, FBT002 + auv_name: str = None, + mission: str = None, + clobber: bool = False, # noqa: FBT001, FBT002 + resample: bool = False, # noqa: FBT001, FBT002 + flash_threshold: float = None, + archive_only_products: bool = False, # noqa: FBT001, FBT002 + create_products: bool = False, # noqa: FBT001, FBT002 + verbose: int = 0, + commandline: str = "", + ): + """Initialize Archiver with explicit parameters. + + Args: + add_handlers: Whether to add logging handlers + auv_name: Name of the AUV vehicle + mission: Mission identifier + clobber: Overwrite existing files + resample: Resample flag + flash_threshold: Flash detection threshold + archive_only_products: Archive only product files + create_products: Create product files flag + verbose: Verbosity level (0-2) + commandline: Command line string for tracking + """ + self.auv_name = auv_name + self.mission = mission + self.clobber = clobber + self.resample = resample + self.flash_threshold = flash_threshold + self.archive_only_products = archive_only_products + self.create_products = create_products + self.verbose = verbose + self.commandline = commandline + self.mount_dir = None # Will be set by caller + if add_handlers: self.logger.addHandler(self._handler) @@ -56,29 +93,26 @@ def copy_to_AUVTCD(self, nc_file_base: Path, freq: str = FREQ) -> None: # noqa: self.logger.exception("%s not found", surveys_dir) self.logger.info("Is smb://atlas.shore.mbari.org/AUVCTD mounted?") sys.exit(1) - year = self.args.mission.split(".")[0] + year = self.mission.split(".")[0] surveynetcdfs_dir = Path(surveys_dir, year, "netcdf") # To avoid "fchmod failed: Permission denied" message use shutil.copyfile - if not self.args.archive_only_products: + if not self.archive_only_products: self.logger.info("Archiving %s files to %s", nc_file_base, surveynetcdfs_dir) # Copy netCDF files to AUVCTD/surveys/YYYY/netcdf - if hasattr(self.args, "flash_threshold"): - if self.args.flash_threshold and self.args.resample: - ft_ending = f"{freq}_ft{self.args.flash_threshold:.0E}.nc".replace( - "E+", - "E", - ) - ftypes = (ft_ending,) - else: - ftypes = (f"{freq}.nc", "cal.nc", "align.nc") + if self.flash_threshold and self.resample: + ft_ending = f"{freq}_ft{self.flash_threshold:.0E}.nc".replace( + "E+", + "E", + ) + ftypes = (ft_ending,) else: ftypes = (f"{freq}.nc", "cal.nc", "align.nc") for ftype in ftypes: src_file = Path(f"{nc_file_base}_{ftype}") dst_file = Path(surveynetcdfs_dir, src_file.name) - if self.args.clobber: + if self.clobber: if dst_file.exists(): self.logger.info("Removing %s", dst_file) dst_file.unlink() @@ -91,15 +125,15 @@ def copy_to_AUVTCD(self, nc_file_base: Path, freq: str = FREQ) -> None: # noqa: src_file.name, ) - if not hasattr(self.args, "resample") or not self.args.resample: + if not self.resample: # Copy intermediate files to AUVCTD/missionnetcdfs/YYYY/YYYYJJJ - YYYYJJJ = "".join(self.args.mission.split(".")[:2]) + YYYYJJJ = "".join(self.mission.split(".")[:2]) missionnetcdfs_dir = Path( AUVCTD_VOL, MISSIONNETCDFS, year, YYYYJJJ, - self.args.mission, + self.mission, ) Path(missionnetcdfs_dir).mkdir(parents=True, exist_ok=True) src_dir = Path(nc_file_base).parent @@ -107,7 +141,7 @@ def copy_to_AUVTCD(self, nc_file_base: Path, freq: str = FREQ) -> None: # noqa: # so that lopc.nc is archived along with the other netcdf versions of the log files. for log in [*LOG_FILES, "lopc.log"]: src_file = Path(src_dir, f"{log.replace('.log', '')}.nc") - if self.args.clobber: + if self.clobber: if src_file.exists(): shutil.copyfile(src_file, missionnetcdfs_dir / src_file.name) self.logger.info("copyfile %s %s done.", src_file, missionnetcdfs_dir) @@ -122,14 +156,14 @@ def copy_to_AUVTCD(self, nc_file_base: Path, freq: str = FREQ) -> None: # noqa: for src_dir, dst_dir in ((MISSIONODVS, "odv"), (MISSIONIMAGES, "images")): src_dir = Path( # noqa: PLW2901 BASE_PATH, - self.args.auv_name, + self.auv_name, src_dir, - self.args.mission, + self.mission, ) if Path(src_dir).exists(): dst_dir = Path(surveys_dir, year, dst_dir) # noqa: PLW2901 Path(dst_dir).mkdir(parents=True, exist_ok=True) - if self.args.clobber: + if self.clobber: # Copy files individually to avoid permission issues with copytree. # This will not copy subdirectories, but we don't expect any. for src_file in src_dir.glob("*"): @@ -151,7 +185,7 @@ def copy_to_AUVTCD(self, nc_file_base: Path, freq: str = FREQ) -> None: # noqa: ) else: self.logger.debug("%s not found", src_dir) - if self.args.create_products or (hasattr(self.args, "resample") and self.args.resample): + if self.create_products or self.resample: # Do not copy processing.log file if only partial processing was done self.logger.info( "Partial processing, not archiving %s", @@ -162,7 +196,7 @@ def copy_to_AUVTCD(self, nc_file_base: Path, freq: str = FREQ) -> None: # noqa: src_file = Path(f"{nc_file_base}_{LOG_NAME}") dst_file = Path(surveynetcdfs_dir, src_file.name) if src_file.exists(): - if self.args.clobber: + if self.clobber: self.logger.info("copyfile %s %s", src_file, surveynetcdfs_dir) shutil.copyfile(src_file, dst_file) self.logger.info("copyfile %s %s done.", src_file, surveynetcdfs_dir) @@ -187,7 +221,7 @@ def copy_to_LRAUV(self, log_file: str, freq: str = FREQ) -> None: # noqa: C901, sys.exit(1) for src_file in sorted(src_dir.glob(f"{Path(log_file).stem}_{GROUP}_*.nc")): dst_file = Path(dst_dir, src_file.name) - if self.args.clobber: + if self.clobber: if dst_file.exists(): self.logger.info("Removing %s", dst_file) dst_file.unlink() @@ -202,7 +236,7 @@ def copy_to_LRAUV(self, log_file: str, freq: str = FREQ) -> None: # noqa: C901, for ftype in (f"{freq}.nc", "combined.nc", "align.nc"): src_file = Path(src_dir, f"{Path(log_file).stem}_{ftype}") dst_file = Path(dst_dir, src_file.name) - if self.args.clobber: + if self.clobber: if dst_file.exists(): self.logger.info("Removing %s", dst_file) dst_file.unlink() @@ -218,7 +252,7 @@ def copy_to_LRAUV(self, log_file: str, freq: str = FREQ) -> None: # noqa: C901, src_file = Path(src_dir, f"{Path(log_file).stem}_{LOG_NAME}") dst_file = Path(dst_dir, src_file.name) if src_file.exists(): - if self.args.clobber: + if self.clobber: self.logger.info("copyfile %s %s", src_file, dst_dir) shutil.copyfile(src_file, dst_file) self.logger.info("copyfile %s %s done.", src_file, dst_dir) @@ -258,7 +292,7 @@ def process_command_line(self): ) self.args = parser.parse_args() - self.logger.setLevel(self._log_levels[self.args.verbose]) + self.logger.setLevel(self._log_levels[self.verbose]) self.commandline = " ".join(sys.argv) diff --git a/src/data/calibrate.py b/src/data/calibrate.py index 4833568e..68be6a61 100755 --- a/src/data/calibrate.py +++ b/src/data/calibrate.py @@ -600,6 +600,49 @@ class Calibrate_NetCDF: logger.addHandler(_handler) _log_levels = (logging.WARN, logging.INFO, logging.DEBUG) + # noqa: PLR0913 - Many parameters needed for initialization + def __init__( # noqa: PLR0913 + self, + auv_name: str, + mission: str, + base_path: str, + calibration_dir: str, + plot: str = None, + verbose: int = 0, + commandline: str = "", + local: bool = False, # noqa: FBT001, FBT002 + noinput: bool = False, # noqa: FBT001, FBT002 + clobber: bool = False, # noqa: FBT001, FBT002 + noreprocess: bool = False, # noqa: FBT001, FBT002 + ) -> None: + """Initialize Calibrate_NetCDF with explicit parameters. + + Args: + auv_name: Name of the AUV + mission: Mission identifier + base_path: Base directory path for data + calibration_dir: Directory containing calibration files + plot: Optional plot specification + verbose: Verbosity level (0=WARN, 1=INFO, 2=DEBUG) + commandline: Command line string for metadata + local: Use local data only (no downloads) + noinput: Don't prompt for user input + clobber: Overwrite existing files + noreprocess: Skip reprocessing if output exists + """ + self.auv_name = auv_name + self.mission = mission + self.base_path = base_path + self.calibration_dir = calibration_dir + self.plot = plot + self.verbose = verbose + self.commandline = commandline + self.local = local + self.noinput = noinput + self.clobber = clobber + self.noreprocess = noreprocess + self.logger.setLevel(self._log_levels[verbose]) + def global_metadata(self): """Use instance variables to return a dictionary of metadata specific for the data that are written @@ -631,7 +674,7 @@ def global_metadata(self): metadata["history"] = f"Created by {self.commandline} on {iso_now}" metadata["title"] = ( - f"Calibrated AUV sensor data from {self.args.auv_name} mission {self.args.mission}" + f"Calibrated AUV sensor data from {self.auv_name} mission {self.mission}" ) metadata["summary"] = ( "Observational oceanographic data obtained from an Autonomous" @@ -665,7 +708,7 @@ def _get_file(self, download_url, local_filename, session): with Path(local_filename).open("wb") as handle: for chunk in resp.content.iter_chunked(1024): handle.write(chunk) - if self.args.verbose > 1: + if self.verbose > 1: self.logger.info("%s(done)", Path(local_filename).name) def _define_sensor_info(self, start_datetime): @@ -830,7 +873,7 @@ class SensorOffset(NamedTuple): ) # Changes over time - if self.args.auv_name.lower().startswith("dorado"): + if self.auv_name.lower().startswith("dorado"): self.sinfo["depth"]["sensor_offset"] = None if start_datetime >= datetime(2007, 4, 30, tzinfo=UTC): # First missions with 10 Gulpers: 2007.120.00 & 2007.120.01 @@ -1199,7 +1242,7 @@ def _read_oxy_coeffs( # noqa: C901, PLR0912, PLR0915 self.logger.debug( "Finding calibration file for oxygen serial number = %s on mission %s", serial_number, - self.args.mission, + self.mission, ) safe_calibration_dir = Path(self.calibration_dir).resolve() @@ -1244,7 +1287,7 @@ def _read_oxy_coeffs( # noqa: C901, PLR0912, PLR0915 self.logger.info( "Breaking from loop as %s is after %s with mission_start=%s", cal_dates[cal_date], - self.args.mission, + self.mission, mission_start, ) break @@ -1254,14 +1297,14 @@ def _read_oxy_coeffs( # noqa: C901, PLR0912, PLR0915 self.logger.info( "File %s is just before %s with mission_start=%s", cal_dates[cal_date_to_use], - self.args.mission, + self.mission, mission_start, ) else: self.logger.info( "File %s is the first calibration file, but is after %s with mission_start=%s", cal_dates[cal_date_to_use], - self.args.mission, + self.mission, mission_start, ) @@ -1372,7 +1415,7 @@ def _navigation_process(self, sensor): # noqa: C901, PLR0912, PLR0915 except AttributeError: error_message = ( f"{sensor} has no orig_data - likely a missing or zero-sized .log file" - f" in {Path(MISSIONLOGS, self.args.mission)}" + f" in {Path(MISSIONLOGS, self.mission)}" ) raise EOFError(error_message) from None @@ -1514,7 +1557,7 @@ def _navigation_process(self, sensor): # noqa: C901, PLR0912, PLR0915 # - all missions in Monterey Bay (Zone 10) self.logger.info( "Converting from Easting/Northing to lat/lon for mission %s", - self.args.mission, + self.mission, ) proj = pyproj.Proj(proj="utm", zone=10, ellps="WGS84", radians=False) navlons, navlats = proj( @@ -1571,14 +1614,14 @@ def _navigation_process(self, sensor): # noqa: C901, PLR0912, PLR0915 # pdIndx = find(Nav.depth > 1); # posDepths = Nav.depth(pdIndx); pos_depths = np.where(self.combined_nc["navigation_depth"].to_numpy() > 1) - if self.args.mission in {"2013.301.02", "2009.111.00"}: + if self.mission in {"2013.301.02", "2009.111.00"}: self.logger.info("Bypassing Nav QC depth check") maxGoodDepth = 1250 else: if pos_depths[0].size == 0: self.logger.warning( "No positive depths found in %s/navigation.nc", - self.args.mission, + self.mission, ) maxGoodDepth = 1250 else: @@ -1586,15 +1629,15 @@ def _navigation_process(self, sensor): # noqa: C901, PLR0912, PLR0915 self.logger.debug("median of positive valued depths = %s", np.median(pos_depths)) if maxGoodDepth < 0: maxGoodDepth = 100 # Fudge for the 2009.272.00 mission where median was -0.1347! - if self.args.mission == "2010.153.01": + if self.mission == "2010.153.01": maxGoodDepth = 1250 # Fudge for 2010.153.01 where the depth was bogus, about 1.3 self.logger.debug("Finding depths less than '%s' and times > 0'", maxGoodDepth) - if self.args.mission == "2010.172.01": + if self.mission == "2010.172.01": self.logger.info( "Performing special QC for %s/navigation.nc", - self.args.mission, + self.mission, ) self._range_qc_combined_nc( instrument="navigation", @@ -1624,9 +1667,9 @@ def _navigation_process(self, sensor): # noqa: C901, PLR0912, PLR0915 "2017.347.00", "2017.304.00", } - if self.args.mission in missions_to_check: + if self.mission in missions_to_check: self.logger.info( - "Removing points outside of Monterey Bay for %s/navigation.nc", self.args.mission + "Removing points outside of Monterey Bay for %s/navigation.nc", self.mission ) self._range_qc_combined_nc( instrument="navigation", @@ -1636,10 +1679,10 @@ def _navigation_process(self, sensor): # noqa: C901, PLR0912, PLR0915 "navigation_latitude": Range(36, 37), }, ) - if self.args.mission == "2010.284.00": + if self.mission == "2010.284.00": self.logger.info( "Removing points outside of time range for %s/navigation.nc", - self.args.mission, + self.mission, ) self._range_qc_combined_nc( instrument="navigation", @@ -1672,8 +1715,8 @@ def _nudge_pos(self, max_sec_diff_at_end=10): gps_longitude=lon_fix, gps_latitude=lat_fix, logger=self.logger, - auv_name=self.args.auv_name, - mission=self.args.mission, + auv_name=self.auv_name, + mission=self.mission, max_sec_diff_at_end=max_sec_diff_at_end, create_plots=False, ) @@ -1691,27 +1734,27 @@ def _gps_process(self, sensor): self.logger.exception("%s", e) # noqa: TRY401 return except AttributeError: - if self.args.mission == "2010.151.04": + if self.mission == "2010.151.04": # Gulf of Mexico mission - use data from usbl.dat file(s) usbl_file = Path( - self.args.base_path, - self.args.auv_name, + self.base_path, + self.auv_name, MISSIONNETCDFS, - self.args.mission, + self.mission, "usbl.nc", ) if not usbl_file.exists(): # Copy from archive AUVCTD/missionnetcdfs/YYYY/YYYYJJJ the usbl.nc file from archive import AUVCTD_VOL - year = self.args.mission.split(".")[0] - YYYYJJJ = "".join(self.args.mission.split(".")[:2]) + year = self.mission.split(".")[0] + YYYYJJJ = "".join(self.mission.split(".")[:2]) missionnetcdfs_dir = Path( AUVCTD_VOL, MISSIONNETCDFS, year, YYYYJJJ, - self.args.mission, + self.mission, ) shutil.copyfile( Path(missionnetcdfs_dir, "usbl.nc"), @@ -1732,7 +1775,7 @@ def _gps_process(self, sensor): else: error_message = ( f"{sensor} has no orig_data - likely a missing or zero-sized .log file" - f" in {Path(MISSIONLOGS, self.args.mission)}" + f" in {Path(MISSIONLOGS, self.mission)}" ) raise EOFError(error_message) from None @@ -1782,7 +1825,7 @@ def _gps_process(self, sensor): "units": "degrees_east", "comment": f"longitude from {source}", } - if self.args.mission in { + if self.mission in { "2004.345.00", "2005.240.00", "2007.134.09", @@ -1797,9 +1840,7 @@ def _gps_process(self, sensor): "2017.304.00", "2011.166.00", }: - self.logger.info( - "Removing points outside of Monterey Bay for %s/gps.nc", self.args.mission - ) + self.logger.info("Removing points outside of Monterey Bay for %s/gps.nc", self.mission) self._range_qc_combined_nc( instrument="gps", variables=vars_to_qc, @@ -1888,12 +1929,12 @@ def _depth_process(self, sensor, latitude=36, cutoff_freq=1): # noqa: PLR0915 "2012.258.00": Range(-1, 160), # Shallow Monterey Bay "2012.270.04": Range(-1, 30), # Shallow Monterey Bay } - if self.args.mission in mission_depth_ranges: - valid_depth_range = mission_depth_ranges[self.args.mission] + if self.mission in mission_depth_ranges: + valid_depth_range = mission_depth_ranges[self.mission] self.logger.info( - "Removing depths outside of valid_depth_range=%s for self.args.mission=%s", + "Removing depths outside of valid_depth_range=%s for self.mission=%s", valid_depth_range, - self.args.mission, + self.mission, ) out_of_range = np.where( (depths < valid_depth_range.min) | (depths > valid_depth_range.max), @@ -1941,20 +1982,19 @@ def _depth_process(self, sensor, latitude=36, cutoff_freq=1): # noqa: PLR0915 b = signal.windows.boxcar(a) depth_filtpres_boxcar = signal.filtfilt(b, a, pres) pres_plot = True # Set to False for debugging other plots - if self.args.plot and pres_plot: + if self.plot and pres_plot: # Use Pandas to plot multiple columns of data # to validate that the filtering works as expected pbeg = 0 pend = len(depths.get_index("time")) - if self.args.plot.startswith("first"): - pend = int(self.args.plot.split("first")[1]) + if self.plot.startswith("first"): + pend = int(self.plot.split("first")[1]) df_plot = pd.DataFrame(index=depths.get_index("time")[pbeg:pend]) df_plot["pres"] = pres[pbeg:pend] df_plot["depth_filtpres_butter"] = depth_filtpres_butter[pbeg:pend] df_plot["depth_filtpres_boxcar"] = depth_filtpres_boxcar[pbeg:pend] title = ( - f"First {pend} points from" - f" {self.args.mission}/{self.sinfo[sensor]['data_filename']}" + f"First {pend} points from" f" {self.mission}/{self.sinfo[sensor]['data_filename']}" ) ax = df_plot.plot(title=title, figsize=(18, 6)) ax.grid("on") @@ -2144,19 +2184,18 @@ def _hs2_process(self, sensor, logs_dir): # noqa: C901, PLR0912, PLR0915 red_bs = red_bs[:][~mfl.mask] red_blue_plot = True # Set to False for debugging other plots - if self.args.plot and red_blue_plot: + if self.plot and red_blue_plot: # Use Pandas to more easiily plot multiple columns of data pbeg = 0 pend = len(blue_bs.get_index("hs2_time")) - if self.args.plot.startswith("first"): - pend = int(self.args.plot.split("first")[1]) + if self.plot.startswith("first"): + pend = int(self.plot.split("first")[1]) df_plot = pd.DataFrame(index=blue_bs.get_index("hs2_time")[pbeg:pend]) df_plot["blue_bs"] = blue_bs[pbeg:pend] df_plot["red_bs"] = red_bs[pbeg:pend] ## df_plot["fl"] = fl[pbeg:pend] title = ( - f"First {pend} points from" - f" {self.args.mission}/{self.sinfo[sensor]['data_filename']}" + f"First {pend} points from" f" {self.mission}/{self.sinfo[sensor]['data_filename']}" ) ax = df_plot.plot(title=title, figsize=(18, 6), ylim=(-0.003, 0.004)) ax.grid("on") @@ -2189,7 +2228,7 @@ def _hs2_process(self, sensor, logs_dir): # noqa: C901, PLR0912, PLR0915 sensor, orig_nc, ) - out_fn = f"{self.args.auv_name}_{self.args.mission}_cal.nc" + out_fn = f"{self.auv_name}_{self.mission}_cal.nc" self.combined_nc[f"{sensor}_depth"].attrs = { "long_name": "Depth", "units": "m", @@ -2299,7 +2338,7 @@ def _ctd_process(self, logs_dir, sensor, cf): # noqa: C901, PLR0912, PLR0915 except AttributeError: error_message = ( f"{sensor} has no orig_data - likely a missing or zero-sized .log file" - f" in {Path(MISSIONLOGS, self.args.mission)}" + f" in {Path(MISSIONLOGS, self.mission)}" ) raise EOFError(error_message) from None @@ -2338,8 +2377,10 @@ def _ctd_process(self, logs_dir, sensor, cf): # noqa: C901, PLR0912, PLR0915 self.combined_nc[f"{sensor}_temperature"] = temperature self.logger.debug("Calling _calibrated_sal_from_cond_frequency()") + # Create a simple namespace for backward compatibility with helper functions + args_ns = type("obj", (object,), {"plot": self.plot})() cal_conductivity, cal_salinity = _calibrated_sal_from_cond_frequency( - self.args, + args_ns, self.combined_nc, self.logger, cf, @@ -2466,12 +2507,12 @@ def _ctd_process(self, logs_dir, sensor, cf): # noqa: C901, PLR0912, PLR0915 "", ) except KeyError: - self.logger.debug("No dissolvedO2 data in %s", self.args.mission) + self.logger.debug("No dissolvedO2 data in %s", self.mission) except ValueError as e: cfg_file = Path( MISSIONLOGS, - "".join(self.args.mission.split(".")[:2]), - self.args.mission, + "".join(self.mission.split(".")[:2]), + self.mission, self.sinfo["ctd"]["cal_filename"], ) self.logger.exception("Likely missing a calibration coefficient in %s", cfg_file) @@ -2504,7 +2545,7 @@ def _ctd_process(self, logs_dir, sensor, cf): # noqa: C901, PLR0912, PLR0915 "port", ) except KeyError: - self.logger.debug("No dissolvedO2_port data in %s", self.args.mission) + self.logger.debug("No dissolvedO2_port data in %s", self.mission) self.logger.debug("Collecting dissolvedO2_port") try: dissolvedO2_stbd = xr.DataArray( @@ -2533,7 +2574,7 @@ def _ctd_process(self, logs_dir, sensor, cf): # noqa: C901, PLR0912, PLR0915 "stbd", ) except KeyError: - self.logger.debug("No dissolvedO2_port data in %s", self.args.mission) + self.logger.debug("No dissolvedO2_port data in %s", self.mission) # === flow variables === # A lot of 0.0 values in Dorado missions until about 2020.282.01 @@ -2552,7 +2593,7 @@ def _ctd_process(self, logs_dir, sensor, cf): # noqa: C901, PLR0912, PLR0915 } self.combined_nc[f"{sensor}_flow1"] = flow1 except KeyError: - self.logger.debug("No flow1 data in %s", self.args.mission) + self.logger.debug("No flow1 data in %s", self.mission) self.logger.debug("Collecting flow2") try: flow2 = xr.DataArray( @@ -2568,7 +2609,7 @@ def _ctd_process(self, logs_dir, sensor, cf): # noqa: C901, PLR0912, PLR0915 } self.combined_nc[f"{sensor}_flow2"] = flow2 except KeyError: - self.logger.debug("No flow2 data in %s", self.args.mission) + self.logger.debug("No flow2 data in %s", self.mission) # === beam_transmittance variable from seabird25p on i2map vehicle === try: @@ -2594,7 +2635,7 @@ def _ctd_process(self, logs_dir, sensor, cf): # noqa: C901, PLR0912, PLR0915 except KeyError: self.logger.debug( "No transmissometer data in %s/%s.nc", - self.args.mission, + self.mission, sensor, ) @@ -2602,7 +2643,7 @@ def _ctd_process(self, logs_dir, sensor, cf): # noqa: C901, PLR0912, PLR0915 sensor, orig_nc, ) - out_fn = f"{self.args.auv_name}_{self.args.mission}_cal.nc" + out_fn = f"{self.auv_name}_{self.mission}_cal.nc" self.combined_nc[f"{sensor}_depth"].attrs = { "long_name": "Depth", "units": "m", @@ -2629,13 +2670,13 @@ def _ctd_process(self, logs_dir, sensor, cf): # noqa: C901, PLR0912, PLR0915 self.combined_nc[f"{sensor}_par"] = par except KeyError: - self.logger.debug("No par data in %s/%s.nc", self.args.mission, sensor) + self.logger.debug("No par data in %s/%s.nc", self.mission, sensor) self.combined_nc[f"{sensor}_depth"] = self._geometric_depth_correction( sensor, orig_nc, ) - out_fn = f"{self.args.auv_name}_{self.args.mission}_cal.nc" + out_fn = f"{self.auv_name}_{self.mission}_cal.nc" self.combined_nc[f"{sensor}_depth"].attrs = { "long_name": "Depth", "units": "m", @@ -2648,7 +2689,7 @@ def _ctd_process(self, logs_dir, sensor, cf): # noqa: C901, PLR0912, PLR0915 # === ad hoc Range checking === self.logger.info( - "Performing range checking of %s in %s/%s.nc", vars_to_qc, self.args.mission, sensor + "Performing range checking of %s in %s/%s.nc", vars_to_qc, self.mission, sensor ) self._range_qc_combined_nc( instrument=sensor, @@ -2656,9 +2697,9 @@ def _ctd_process(self, logs_dir, sensor, cf): # noqa: C901, PLR0912, PLR0915 ranges={f"{sensor}_salinity": Range(30, 40)}, set_to_nan=True, ) - if self.args.mission == "2010.284.00": + if self.mission == "2010.284.00": self.logger.info( - "Removing points outside of time range for %s/%s.nc", self.args.mission, sensor + "Removing points outside of time range for %s/%s.nc", self.mission, sensor ) self._range_qc_combined_nc( instrument=sensor, @@ -2682,7 +2723,7 @@ def _tailcone_process(self, sensor): except AttributeError: error_message = ( f"{sensor} has no orig_data - likely a missing or zero-sized .log file" - f" in {Path(MISSIONLOGS, self.args.mission)}" + f" in {Path(MISSIONLOGS, self.mission)}" ) raise EOFError(error_message) from None @@ -2727,7 +2768,7 @@ def _ecopuck_process(self, sensor, cf): except AttributeError: error_message = ( f"{sensor} has no orig_data - likely a missing or zero-sized .log file" - f" in {Path(MISSIONLOGS, self.args.mission)}" + f" in {Path(MISSIONLOGS, self.mission)}" ) raise EOFError(error_message) from None @@ -2842,7 +2883,7 @@ def _biolume_process(self, sensor): except AttributeError: error_message = ( f"{sensor} has no orig_data - likely a missing or zero-sized .log file" - f" in {Path(MISSIONLOGS, self.args.mission)}" + f" in {Path(MISSIONLOGS, self.mission)}" ) raise EOFError(error_message) from None @@ -2920,9 +2961,9 @@ def _biolume_process(self, sensor): "coordinates": f"{sensor}_{TIME60HZ} {sensor}_depth60hz", "comment": f"raw values from {source} {lag_info}", } - if self.args.mission == "2010.284.00": + if self.mission == "2010.284.00": self.logger.info( - "Removing points outside of time range for %s/biolume.nc", self.args.mission + "Removing points outside of time range for %s/biolume.nc", self.mission ) for time_axis in (TIME, TIME60HZ): self._range_qc_combined_nc( @@ -2953,7 +2994,7 @@ def _lopc_process(self, sensor): except AttributeError: error_message = ( f"{sensor} has no orig_data - likely a missing or zero-sized .log file" - f" in {Path(MISSIONLOGS, self.args.mission)}" + f" in {Path(MISSIONLOGS, self.mission)}" ) raise EOFError(error_message) from None @@ -2965,7 +3006,7 @@ def _lopc_process(self, sensor): if "time" not in orig_nc.coords: error_message = ( f"{sensor} has no time coordinate - likely an incomplete lopc.nc file" - f" in {Path(MISSIONLOGS, self.args.mission)}" + f" in {Path(MISSIONLOGS, self.mission)}" ) raise EOFError(error_message) @@ -3043,7 +3084,7 @@ def _isus_process(self, sensor): except AttributeError: error_message = ( f"{sensor} has no orig_data - likely a missing or zero-sized .log file" - f" in {Path(MISSIONLOGS, self.args.mission)}" + f" in {Path(MISSIONLOGS, self.mission)}" ) raise EOFError(error_message) from None @@ -3157,7 +3198,7 @@ def _geometric_depth_correction(self, sensor, orig_nc): d_beg_time_diff.astype("timedelta64[s]"), d_end_time_diff.astype("timedelta64[s]"), ) - if self.args.mission in ( + if self.mission in ( "2008.289.03", "2010.259.01", "2010.259.02", @@ -3168,7 +3209,7 @@ def _geometric_depth_correction(self, sensor, orig_nc): self.logger.info( "%s: Special QC for mission %s: Setting corrected_depth to NaN for times after %s", sensor, - self.args.mission, + self.mission, self.combined_nc["depth_time"][-1].to_numpy(), ) corrected_depth[ @@ -3176,7 +3217,7 @@ def _geometric_depth_correction(self, sensor, orig_nc): orig_nc.get_index("time") > self.combined_nc["depth_time"].to_numpy()[-1], ) ] = np.nan - if self.args.plot: + if self.plot: plt.figure(figsize=(18, 6)) plt.plot( orig_nc["time"].to_numpy(), @@ -3192,7 +3233,7 @@ def _geometric_depth_correction(self, sensor, orig_nc): plt.ylabel("Depth (m) & Pitch (deg)") plt.legend(("Original depth", "Pitch corrected depth", "Pitch")) plt.title( - f"Original and pitch corrected depth for {self.args.auv_name} {self.args.mission}", + f"Original and pitch corrected depth for {self.auv_name} {self.mission}", ) plt.show() @@ -3231,11 +3272,10 @@ def _process(self, sensor, logs_dir, netcdfs_dir): # noqa: C901, PLR0912 elif hasattr(getattr(self, sensor), "orig_data"): self.logger.warning("No method (yet) to process %s", sensor) - def write_netcdf(self, netcdfs_dir, vehicle: str = "", name: str = "") -> None: - name = name or self.args.mission - vehicle = vehicle or self.args.auv_name + def write_netcdf(self, netcdfs_dir: Path) -> None: + """Write calibrated netCDF file using instance attributes.""" self.combined_nc.attrs = self.global_metadata() - out_fn = Path(netcdfs_dir, f"{vehicle}_{name}_cal.nc") + out_fn = Path(netcdfs_dir, f"{self.auv_name}_{self.mission}_cal.nc") self.logger.info("Writing calibrated instrument data to %s", out_fn) if Path(out_fn).exists(): Path(out_fn).unlink() @@ -3245,12 +3285,13 @@ def write_netcdf(self, netcdfs_dir, vehicle: str = "", name: str = "") -> None: ", ".join(sorted(self.combined_nc.variables)), ) - def process_logs(self, vehicle: str = "", name: str = "", process_gps: bool = True) -> None: # noqa: FBT001, FBT002 - name = name or self.args.mission - vehicle = vehicle or self.args.auv_name - logs_dir = Path(self.args.base_path, vehicle, MISSIONLOGS, name) - netcdfs_dir = Path(self.args.base_path, vehicle, MISSIONNETCDFS, name) - start_datetime = datetime.strptime(".".join(name.split(".")[:2]), "%Y.%j").astimezone( + def process_logs(self, process_gps: bool = True) -> Path: # noqa: FBT001, FBT002 + """Process logs using instance attributes.""" + logs_dir = Path(self.base_path, self.auv_name, MISSIONLOGS, self.mission) + netcdfs_dir = Path(self.base_path, self.auv_name, MISSIONNETCDFS, self.mission) + start_datetime = datetime.strptime( + ".".join(self.mission.split(".")[:2]), "%Y.%j" + ).astimezone( UTC, ) self._define_sensor_info(start_datetime) @@ -3261,12 +3302,12 @@ def process_logs(self, vehicle: str = "", name: str = "", process_gps: bool = Tr if not process_gps and sensor == "gps": continue # to skip gps processing in conftest.py fixture getattr(self, sensor).cal_align_data = xr.Dataset() - self.logger.debug("Processing %s %s %s", vehicle, name, sensor) + self.logger.debug("Processing %s %s %s", self.auv_name, self.mission, sensor) try: self._process(sensor, logs_dir, netcdfs_dir) except EOFError as e: - short_name = vehicle.lower() - if vehicle == "Dorado389": + short_name = self.auv_name.lower() + if self.auv_name == "Dorado389": # For supporting pytest & conftest.py fixture short_name = "dorado" if sensor in EXPECTED_SENSORS[short_name]: @@ -3303,7 +3344,7 @@ def process_command_line(self): ) self.args = parser.parse_args() - self.logger.setLevel(self._log_levels[self.args.verbose]) + self.logger.setLevel(self._log_levels[self.verbose]) self.commandline = " ".join(sys.argv) diff --git a/src/data/combine.py b/src/data/combine.py index 9141192e..cc63b555 100755 --- a/src/data/combine.py +++ b/src/data/combine.py @@ -91,6 +91,27 @@ class Combine_NetCDF: _log_levels = (logging.WARN, logging.INFO, logging.DEBUG) variable_time_coord_mapping: dict = {} + def __init__( + self, + log_file: str, + verbose: int = 0, + plot: str = None, + commandline: str = "", + ) -> None: + """Initialize Combine_NetCDF with explicit parameters. + + Args: + log_file: LRAUV log file path for processing + verbose: Verbosity level (0=WARN, 1=INFO, 2=DEBUG) + plot: Optional plot specification + commandline: Command line string for metadata + """ + self.log_file = log_file + self.verbose = verbose + self.plot = plot + self.commandline = commandline + self.logger.setLevel(self._log_levels[verbose]) + def global_metadata(self): """Use instance variables to return a dictionary of metadata specific for the data that are written @@ -121,7 +142,7 @@ def global_metadata(self): metadata["useconst"] = "Not intended for legal use. Data may contain inaccuracies." metadata["history"] = f"Created by {self.commandline} on {iso_now}" metadata["variable_time_coord_mapping"] = json.dumps(self.variable_time_coord_mapping) - log_file = self.args.log_file + log_file = self.log_file metadata["title"] = ( f"Combined LRAUV data from {log_file} - relevant variables extracted for STOQS" ) @@ -587,13 +608,12 @@ def _add_consolidation_comment(self, time_info: dict) -> None: f"Consolidated time coordinate from: {mapping_info}" ) - def _initial_coordinate_qc(self, log_file: str = "") -> None: + def _initial_coordinate_qc(self) -> None: """Perform initial QC on core coordinate variables for specific log files.""" - log_file = log_file or self.args.log_file - if log_file in ( + if self.log_file in ( "tethys/missionlogs/2012/20120908_20120920/20120909T010636/201209090106_201209091521.nc4", ): - self.logger.info("Performing initial coordinate QC for %s", self.args.log_file) + self.logger.info("Performing initial coordinate QC for %s", self.log_file) self._range_qc_combined_nc( instrument="universals", variables=[ @@ -619,10 +639,9 @@ def _initial_coordinate_qc(self, log_file: str = "") -> None: set_to_nan=False, ) - def _add_nudged_coordinates(self, max_sec_diff_at_end: int = 10, log_file: str = "") -> None: + def _add_nudged_coordinates(self, max_sec_diff_at_end: int = 10) -> None: """Add nudged longitude and latitude variables to the combined dataset.""" - log_file = log_file or self.args.log_file - self._initial_coordinate_qc(log_file=log_file) + self._initial_coordinate_qc() try: nudged_longitude, nudged_latitude, segment_count, segment_minsum = nudge_positions( nav_longitude=self.combined_nc["universals_longitude"], @@ -632,9 +651,9 @@ def _add_nudged_coordinates(self, max_sec_diff_at_end: int = 10, log_file: str = logger=self.logger, auv_name="", mission="", - log_file=log_file, + log_file=self.log_file, max_sec_diff_at_end=max_sec_diff_at_end, - create_plots=self.args.plot, + create_plots=self.plot, ) except ValueError as e: self.logger.error("Nudging positions failed: %s", e) # noqa: TRY400 @@ -682,11 +701,10 @@ def _add_nudged_coordinates(self, max_sec_diff_at_end: int = 10, log_file: str = ), } - def combine_groups(self, log_file: str = None) -> None: + def combine_groups(self) -> None: """Combine group files into a single NetCDF dataset with consolidated time coordinates.""" - log_file = self.args.log_file or log_file - src_dir = Path(BASE_LRAUV_PATH, Path(log_file).parent) - group_files = sorted(src_dir.glob(f"{Path(log_file).stem}_{GROUP}_*.nc")) + src_dir = Path(BASE_LRAUV_PATH, Path(self.log_file).parent) + group_files = sorted(src_dir.glob(f"{Path(self.log_file).stem}_{GROUP}_*.nc")) self.summary_fields = set() self.combined_nc = xr.Dataset() @@ -711,22 +729,21 @@ def combine_groups(self, log_file: str = None) -> None: self.variable_time_coord_mapping.update(time_info["variable_time_coord_mapping"]) # Write intermediate file for cf_xarray decoding - intermediate_file = self._intermediate_write_netcdf(log_file=log_file) + intermediate_file = self._intermediate_write_netcdf() with xr.open_dataset(intermediate_file, decode_cf=True) as ds: self.combined_nc = ds.load() # Add nudged coordinates - self._add_nudged_coordinates(log_file=log_file) + self._add_nudged_coordinates() # Clean up intermediate file Path(intermediate_file).unlink() - def _intermediate_write_netcdf(self, log_file: str = None) -> None: + def _intermediate_write_netcdf(self) -> None: """Write out an intermediate combined netCDF file so that data can be read using decode_cf=True for nudge_positions() to work with cf accessors.""" - log_file = self.args.log_file or log_file - netcdfs_dir = Path(BASE_LRAUV_PATH, Path(log_file).parent) - out_fn = Path(netcdfs_dir, f"{Path(log_file).stem}_combined_intermediate.nc") + netcdfs_dir = Path(BASE_LRAUV_PATH, Path(self.log_file).parent) + out_fn = Path(netcdfs_dir, f"{Path(self.log_file).stem}_combined_intermediate.nc") self.combined_nc.attrs = self.global_metadata() self.logger.info("Writing intermediate combined group data to %s", out_fn) @@ -743,10 +760,10 @@ def _intermediate_write_netcdf(self, log_file: str = None) -> None: ) return out_fn - def write_netcdf(self, log_file: str = None) -> None: - log_file = self.args.log_file or log_file - netcdfs_dir = Path(BASE_LRAUV_PATH, Path(log_file).parent) - out_fn = Path(netcdfs_dir, f"{Path(log_file).stem}_combined.nc") + def write_netcdf(self) -> None: + """Write combined netCDF file using instance attributes.""" + netcdfs_dir = Path(BASE_LRAUV_PATH, Path(self.log_file).parent) + out_fn = Path(netcdfs_dir, f"{Path(self.log_file).stem}_combined.nc") self.combined_nc.attrs = self.global_metadata() self.logger.info("Writing combined group data to %s", out_fn) @@ -787,7 +804,7 @@ def process_command_line(self): ) self.args = parser.parse_args() - self.logger.setLevel(self._log_levels[self.args.verbose]) + self.logger.setLevel(self._log_levels[self.verbose]) self.commandline = " ".join(sys.argv) diff --git a/src/data/conftest.py b/src/data/conftest.py index 054dba6a..02c47cb4 100644 --- a/src/data/conftest.py +++ b/src/data/conftest.py @@ -1,5 +1,4 @@ # noqa: INP001 -import logging import os import sys from argparse import Namespace @@ -95,16 +94,23 @@ def create_test_namespace(vehicle_overrides=None, processing_overrides=None): def mission_data(): if not Path(TEST_VEHICLE_DIR).exists(): pytest.fail(f"\n\n{bootstrap_mission}\n") - """Load a short recent mission to have some real data to work with""" - cal_netcdf = Calibrate_NetCDF() - ns = Namespace() + """Load a short mission to have some real data to work with""" # The BASE_PATH environment variable can be set in ci.yml for running in GitHub Actions - ns.base_path = os.getenv("BASE_PATH", BASE_PATH) - ns.auv_name = TEST_VEHICLE - ns.mission = TEST_MISSION - ns.plot = None - cal_netcdf.args = ns - cal_netcdf.logger.setLevel(logging.DEBUG) + base_path = os.getenv("BASE_PATH", BASE_PATH) + + cal_netcdf = Calibrate_NetCDF( + auv_name=TEST_VEHICLE, + mission=TEST_MISSION, + base_path=base_path, + calibration_dir=TEST_CALIBRATION_DIR, + plot=None, + verbose=2, # DEBUG level + commandline="test", + local=True, + noinput=True, + clobber=False, + noreprocess=False, + ) cal_netcdf.process_logs(process_gps=False) return cal_netcdf diff --git a/src/data/create_products.py b/src/data/create_products.py index 54dbdece..fdf0806c 100755 --- a/src/data/create_products.py +++ b/src/data/create_products.py @@ -43,6 +43,35 @@ class CreateProducts: logger.addHandler(_handler) _log_levels = (logging.WARN, logging.INFO, logging.DEBUG) + def __init__( # noqa: PLR0913 + self, + auv_name: str = None, + mission: str = None, + base_path: str = str(BASE_PATH), + start_esecs: float = None, + local: bool = False, # noqa: FBT001, FBT002 + verbose: int = 0, + commandline: str = "", + ): + """Initialize CreateProducts with explicit parameters. + + Args: + auv_name: Name of the AUV vehicle + mission: Mission identifier + base_path: Base path for output files + start_esecs: Start epoch seconds for processing + local: Local processing flag + verbose: Verbosity level (0-2) + commandline: Command line string for tracking + """ + self.auv_name = auv_name + self.mission = mission + self.base_path = base_path + self.start_esecs = start_esecs + self.local = local + self.verbose = verbose + self.commandline = commandline + # Column name format required by ODV - will be tab delimited ODV_COLUMN_NAMES = [ # noqa: RUF012 "Cruise", @@ -95,18 +124,18 @@ class CreateProducts: def _open_ds(self): local_nc = Path( BASE_PATH, - self.args.auv_name, + self.auv_name, MISSIONNETCDFS, - self.args.mission, - f"{self.args.auv_name}_{self.args.mission}_{FREQ}.nc", + self.mission, + f"{self.auv_name}_{self.mission}_{FREQ}.nc", ) # Requires mission to have been processed and archived to AUVCTD dap_url = os.path.join( # noqa: PTH118 AUVCTD_OPENDAP_BASE, "surveys", - self.args.mission.split(".")[0], + self.mission.split(".")[0], "netcdf", - f"{self.args.auv_name}_{self.args.mission}_{FREQ}.nc", + f"{self.auv_name}_{self.mission}_{FREQ}.nc", ) try: self.ds = xr.open_dataset(dap_url) @@ -354,13 +383,13 @@ def plot_2column(self) -> str: col = 1 # Save plot to file - images_dir = Path(BASE_PATH, self.args.auv_name, MISSIONIMAGES) + images_dir = Path(BASE_PATH, self.auv_name, MISSIONIMAGES) Path(images_dir).mkdir(parents=True, exist_ok=True) plt.savefig( Path( images_dir, - f"{self.args.auv_name}_{self.args.mission}_{FREQ}_2column.png", + f"{self.auv_name}_{self.mission}_{FREQ}_2column.png", ), ) @@ -390,29 +419,29 @@ def gulper_odv(self, sec_bnds: int = 1) -> str: # noqa: C901, PLR0912, PLR0915 gulper = Gulper() gulper.args = argparse.Namespace() - gulper.args.base_path = self.args.base_path - gulper.args.auv_name = self.args.auv_name - gulper.args.mission = self.args.mission - gulper.args.local = self.args.local - gulper.args.verbose = self.args.verbose - gulper.args.start_esecs = self.args.start_esecs - gulper.logger.setLevel(self._log_levels[self.args.verbose]) + gulper.args.base_path = self.base_path + gulper.args.auv_name = self.auv_name + gulper.args.mission = self.mission + gulper.args.local = self.local + gulper.args.verbose = self.verbose + gulper.args.start_esecs = self.start_esecs + gulper.logger.setLevel(self._log_levels[self.verbose]) gulper.logger.addHandler(self._handler) gulper_times = gulper.parse_gulpers() if not gulper_times: - self.logger.info("No gulper times found for %s", self.args.mission) + self.logger.info("No gulper times found for %s", self.mission) return odv_dir = Path( BASE_PATH, - self.args.auv_name, + self.auv_name, MISSIONODVS, - self.args.mission, + self.mission, ) Path(odv_dir).mkdir(parents=True, exist_ok=True) gulper_odv_filename = Path( odv_dir, - f"{self.args.auv_name}_{self.args.mission}_{FREQ}_Gulper.txt", + f"{self.auv_name}_{self.mission}_{FREQ}_Gulper.txt", ) self._open_ds() @@ -436,7 +465,7 @@ def gulper_odv(self, sec_bnds: int = 1) -> str: # noqa: C901, PLR0912, PLR0915 ) for count, name in enumerate(odv_column_names): if name == "Cruise": - f.write(f"{self.args.auv_name}_{self.args.mission}_{FREQ}") + f.write(f"{self.auv_name}_{self.mission}_{FREQ}") elif name == "Station": f.write(f"{int(gulper_data['profile_number'].to_numpy().mean()):d}") elif name == "Type": @@ -543,7 +572,7 @@ def process_command_line(self): ) self.args = parser.parse_args() - self.logger.setLevel(self._log_levels[self.args.verbose]) + self.logger.setLevel(self._log_levels[self.verbose]) self.commandline = " ".join(sys.argv) diff --git a/src/data/logs2netcdfs.py b/src/data/logs2netcdfs.py index c931bcb8..ad5bac69 100755 --- a/src/data/logs2netcdfs.py +++ b/src/data/logs2netcdfs.py @@ -72,6 +72,68 @@ class AUV_NetCDF: logger.addHandler(_handler) _log_levels = (logging.WARN, logging.INFO, logging.DEBUG) + def __init__( # noqa: PLR0913 + self, + auv_name: str = None, + mission: str = None, + vehicle_dir: str = None, + base_path: str = str(BASE_PATH), + start: str = None, + end: str = None, + preview: bool = False, # noqa: FBT001, FBT002 + verbose: int = 0, + title: str = None, + summary: str = None, + add_seconds: float = None, + local: bool = False, # noqa: FBT001, FBT002 + noinput: bool = False, # noqa: FBT001, FBT002 + clobber: bool = False, # noqa: FBT001, FBT002 + noreprocess: bool = False, # noqa: FBT001, FBT002 + use_portal: bool = False, # noqa: FBT001, FBT002 + portal: str = None, + commandline: str = "", + ): + """Initialize AUV_NetCDF with explicit parameters. + + Args: + auv_name: Name of the AUV vehicle + mission: Mission identifier + vehicle_dir: Directory containing vehicle mission logs + base_path: Base path for output files + start: Start datetime for filtering (LRAUV) + end: End datetime for filtering (LRAUV) + preview: Preview mode flag + verbose: Verbosity level (0-2) + title: Custom title for netCDF metadata + summary: Custom summary for netCDF metadata + add_seconds: Seconds to add for time correction + local: Process local mission without standard directory structure + noinput: Don't prompt for user input + clobber: Overwrite existing files + noreprocess: Don't reprocess existing files + use_portal: Use portal for data download + portal: Portal base URL + commandline: Command line string for tracking + """ + self.auv_name = auv_name + self.mission = mission + self.vehicle_dir = vehicle_dir + self.base_path = base_path + self.start = start + self.end = end + self.preview = preview + self.verbose = verbose + self.title = title + self.summary = summary + self.add_seconds = add_seconds + self.local = local + self.noinput = noinput + self.clobber = clobber + self.noreprocess = noreprocess + self.use_portal = use_portal + self.portal = portal + self.commandline = commandline + def read(self, file: Path) -> list[log_record]: """Reads and parses an AUV log and returns a list of `log_records`""" byte_offset = 0 @@ -351,8 +413,8 @@ def _unique_vehicle_names(self): return {d["vehicle"] for d in resp.json()} def _deployments_between(self): - start = f"{self.args.start}T000000Z" - end = f"{self.args.end}T235959Z" + start = f"{self.start}T000000Z" + end = f"{self.end}T235959Z" url = f"{self.deployments_url}?from={start}&to={end}" self.logger.debug("Getting missions from %s", url) with requests.get(url, timeout=TIMEOUT) as resp: @@ -363,15 +425,15 @@ def _deployments_between(self): error_message = f"No missions from {url}" raise LookupError(error_message) for item in resp.json(): - if self.args.preview: - self.logger.setLevel(self._log_levels[max(1, self.args.verbose)]) + if self.preview: + self.logger.setLevel(self._log_levels[max(1, self.verbose)]) self.logger.info("%s %s", item["vehicle"], item["name"]) else: - if self.args.auv_name and item["vehicle"].upper() != self.args.auv_name.upper(): + if self.auv_name and item["vehicle"].upper() != self.auv_name.upper(): self.logger.debug( "%s != %s", item["vehicle"], - self.args.auv_name, + self.auv_name, ) continue try: @@ -392,8 +454,8 @@ def _deployments_between(self): self.download_process_logs(item["vehicle"], item["name"]) def _files_from_mission(self, name=None, vehicle=None): - name = name or self.args.mission - vehicle = vehicle or self.args.auv_name + name = name or self.mission + vehicle = vehicle or self.auv_name files_url = f"{self.portal_base}/files/list/{name}/{vehicle}" self.logger.debug("Getting files list from %s", files_url) with requests.get(files_url, timeout=TIMEOUT) as resp: @@ -423,7 +485,7 @@ async def _get_file(self, download_url, local_filename, session): async for chunk in resp.content.iter_chunked(1024): await handle.write(chunk) handle.write(chunk) - if self.args.verbose > 1: + if self.verbose > 1: print( # noqa: T201 f"{Path(local_filename).name}(done) ", end="", @@ -434,8 +496,8 @@ async def _get_file(self, download_url, local_filename, session): self.logger.exception() async def _download_files(self, logs_dir, name=None, vehicle=None): - name = name or self.args.mission - vehicle = vehicle or self.args.auv_name + name = name or self.mission + vehicle = vehicle or self.auv_name tasks = [] async with ClientSession(timeout=TIMEOUT) as session: for ffm in self._files_from_mission(name, vehicle): @@ -583,9 +645,9 @@ def correct_times(self, log_data, add_seconds: int = 0): def write_variables(self, log_data, netcdf_filename): log_data = self._correct_dup_short_names(log_data) - if self.args.mission == "2025.316.02" and self.args.add_seconds: + if self.mission == "2025.316.02" and self.add_seconds: # So far only this mission is known to suffer from GPS Week Rollover bug - log_data = self.correct_times(log_data, self.args.add_seconds) + log_data = self.correct_times(log_data, self.add_seconds) self.nc_file.createDimension(TIME, len(log_data[0].data)) for variable in log_data: self.logger.debug( @@ -725,19 +787,19 @@ def _process_log_file(self, log_filename, netcdf_filename, src_dir=None): # Add the global metadata, overriding with command line options provided self.add_global_metadata() - vehicle = self.args.auv_name + vehicle = self.auv_name self.nc_file.title = f"Original AUV {vehicle} data converted from {log_filename}" - if hasattr(self.args, "title") and self.args.title: - self.nc_file.title = self.args.title + if self.title: + self.nc_file.title = self.title if src_dir: # The source attribute might make more sense for the location of # the source data, but the summary field is shown in STOQS metadata self.nc_file.summary = SUMMARY_SOURCE.format(src_dir) - if hasattr(self.args, "summary") and self.args.summary: - self.nc_file.summary = self.args.summary - if self.args.add_seconds: + if self.summary: + self.nc_file.summary = self.summary + if self.add_seconds: self.nc_file.summary += ( - f". Corrected timeTag variables by adding {self.args.add_seconds} seconds. " + f". Corrected timeTag variables by adding {self.add_seconds} seconds. " ) monotonic = monotonic_increasing_time_indices(self.nc_file["time"][:]) if (~monotonic).any(): @@ -754,15 +816,15 @@ def get_mission_dir(self, mission: str) -> str: """Return the mission directory. This method is nearly identical to the one in the Processor class, but it is used here to be explicit and to avoid the need to import the Processor class.""" - if not Path(self.args.vehicle_dir).exists(): - self.logger.error("%s does not exist.", self.args.vehicle_dir) + if not Path(self.vehicle_dir).exists(): + self.logger.error("%s does not exist.", self.vehicle_dir) self.logger.info("Is %s mounted?", self.mount_dir) sys.exit(1) - if self.args.auv_name.lower() == "dorado": + if self.auv_name.lower() == "dorado": year = mission.split(".")[0] yearyd = "".join(mission.split(".")[:2]) - path = Path(self.args.vehicle_dir, year, yearyd, mission) - elif self.args.auv_name.lower() == "i2map": + path = Path(self.vehicle_dir, year, yearyd, mission) + elif self.auv_name.lower() == "i2map": year = int(mission.split(".")[0]) # Could construct the YYYY/MM/YYYYMMDD path on M3/Master # but use the mission_list() method to find the mission dir instead @@ -770,12 +832,12 @@ def get_mission_dir(self, mission: str) -> str: if mission in missions: path = missions[mission] else: - self.logger.error("Cannot find %s in %s", mission, self.args.vehicle_dir) - error_message = f"Cannot find {mission} in {self.args.vehicle_dir}" + self.logger.error("Cannot find %s in %s", mission, self.vehicle_dir) + error_message = f"Cannot find {mission} in {self.vehicle_dir}" raise FileNotFoundError(error_message) - elif self.args.auv_name == "Dorado389": + elif self.auv_name == "Dorado389": # The Dorado389 vehicle is a special case used for testing locally and in CI - path = self.args.vehicle_dir + path = self.vehicle_dir if not Path(path).exists(): self.logger.error("%s does not exist.", path) error_message = f"{path} does not exist." @@ -788,33 +850,33 @@ def download_process_logs( # noqa: C901, PLR0912, PLR0915 name: str = "", src_dir: Path = Path(), ) -> None: - name = name or self.args.mission - vehicle = vehicle or self.args.auv_name - logs_dir = Path(self.args.base_path, vehicle, MISSIONLOGS, name) + name = name or self.mission + vehicle = vehicle or self.auv_name + logs_dir = Path(self.base_path, vehicle, MISSIONLOGS, name) if src_dir: self.logger.info("src_dir = %s", src_dir) - if not self.args.local: + if not self.local: # As of 20 July 2023 this returns 404, which is distracting # self.logger.debug( # f"Unique vehicle names: {self._unique_vehicle_names()} seconds" # ) yes_no = "Y" if Path(logs_dir, "vehicle.cfg").exists(): - if self.args.noinput: - if self.args.clobber: + if self.noinput: + if self.clobber: self.logger.info("Clobbering existing %s files", logs_dir) else: self.logger.info("%s exists", logs_dir) yes_no = "N" - if self.args.noreprocess: + if self.noreprocess: self.logger.info("Not reprocessing %s", logs_dir) return else: yes_no = input(f"Directory {logs_dir} exists. Re-download? [Y/n]: ") or "Y" if yes_no.upper().startswith("Y"): - if self.args.use_portal: + if self.use_portal: self._portal_download(logs_dir, name=name, vehicle=vehicle) elif src_dir: safe_src_dir = Path(src_dir).resolve() @@ -834,7 +896,7 @@ def download_process_logs( # noqa: C901, PLR0912, PLR0915 self._portal_download(logs_dir, name=name, vehicle=vehicle) self.logger.info("Processing mission: %s %s", vehicle, name) - netcdfs_dir = Path(self.args.base_path, vehicle, MISSIONNETCDFS, name) + netcdfs_dir = Path(self.base_path, vehicle, MISSIONNETCDFS, name) Path(netcdfs_dir).mkdir(parents=True, exist_ok=True) p_start = time.time() for log in LOG_FILES: @@ -864,7 +926,7 @@ def download_process_logs( # noqa: C901, PLR0912, PLR0915 self.logger.info("Time to process: %.2f seconds", time.time() - p_start) def update(self): - self.logger.setLevel(self._log_levels[max(1, self.args.verbose)]) + self.logger.setLevel(self._log_levels[max(1, self.verbose)]) url = "http://portal.shore.mbari.org:8080/auvdata/v1/deployments/update" auv_netcdf.logger.info("Sending an 'update' request: %s", url) resp = requests.post(url, timeout=TIMEOUT) @@ -880,9 +942,9 @@ def update(self): def set_portal(self) -> None: self.portal_base = PORTAL_BASE self.deployments_url = Path(self.portal_base, "deployments") - if hasattr(self.args, "portal") and self.args.portal: - self.portal_base = self.args.portal - self.deployments_url = Path(self.args.portal, "deployments") + if self.portal: + self.portal_base = self.portal + self.deployments_url = Path(self.portal, "deployments") def process_command_line(self): """Process command line arguments using shared parser infrastructure.""" @@ -941,10 +1003,36 @@ def process_command_line(self): help="Directory for the vehicle's mission logs, e.g.: /Volumes/AUVCTD/missionlogs", ) - self.args = parser.parse_args() - self.logger.setLevel(self._log_levels[self.args.verbose]) + args = parser.parse_args() + + # Reinitialize with parsed arguments + self.__init__( + auv_name=args.auv_name, + mission=args.mission, + vehicle_dir=args.vehicle_dir, + base_path=args.base_path, + start=args.start, + end=args.end, + preview=args.preview, + verbose=args.verbose, + title=args.title, + summary=args.summary, + add_seconds=args.add_seconds, + local=args.local, + noinput=args.noinput, + clobber=args.clobber, + noreprocess=args.noreprocess, + use_portal=args.use_portal, + portal=args.portal, + commandline=" ".join(sys.argv), + ) + + # Keep args for backward compatibility with any code that expects it + self.args = args + self.update_attr = args.update # Special case for update flag + + self.logger.setLevel(self._log_levels[self.verbose]) self.set_portal() - self.commandline = " ".join(sys.argv) if __name__ == "__main__": @@ -952,18 +1040,18 @@ def process_command_line(self): auv_netcdf.process_command_line() p_start = time.time() - if auv_netcdf.args.update: + if auv_netcdf.update_attr: auv_netcdf.update() - elif auv_netcdf.args.auv_name and auv_netcdf.args.mission: - if auv_netcdf.args.vehicle_dir: - path = auv_netcdf.get_mission_dir(auv_netcdf.args.mission) + elif auv_netcdf.auv_name and auv_netcdf.mission: + if auv_netcdf.vehicle_dir: + path = auv_netcdf.get_mission_dir(auv_netcdf.mission) auv_netcdf.download_process_logs(src_dir=path) else: raise argparse.ArgumentError( None, "Must provide --vehicle_dir with --auv_name & --mission", ) - elif auv_netcdf.args.start and auv_netcdf.args.end: + elif auv_netcdf.start and auv_netcdf.end: auv_netcdf._deployments_between() else: raise argparse.ArgumentError( diff --git a/src/data/lopcMEP.py b/src/data/lopcMEP.py index 0629acc4..b312d924 100755 --- a/src/data/lopcMEP.py +++ b/src/data/lopcMEP.py @@ -1,7 +1,7 @@ #!/usr/bin/env python __author__ = "Mike McCann" -__version__ = "$Revision: 1.8 $".split()[1] -__date__ = "$Date: 2010/08/30 23:24:40 $".split()[1] +__version__ = ["$Revision:", "1.8", "$"][1] +__date__ = ["$Date:", "2010/08/30", "23:24:40", "$"][1] __copyright__ = "2010" __license__ = "GPL v3" __contact__ = "mccann at mbari.org" diff --git a/src/data/lopcToNetCDF.py b/src/data/lopcToNetCDF.py index 5e4bb948..cab8903d 100755 --- a/src/data/lopcToNetCDF.py +++ b/src/data/lopcToNetCDF.py @@ -1,7 +1,7 @@ #!/usr/bin/env python __author__ = "Mike McCann" -__version__ = "$Revision: 1.43 $".split()[1] -__date__ = "$Date: 2020/11/23 21:40:04 $".split()[1] +__version__ = ["$Revision:", "1.43", "$"][1] +__date__ = ["$Date:", "2020/11/23", "21:40:04", "$"][1] __copyright__ = "2009" __license__ = "GPL v3" __contact__ = "mccann at mbari.org" diff --git a/src/data/nc42netcdfs.py b/src/data/nc42netcdfs.py index 85438e4a..01e49373 100755 --- a/src/data/nc42netcdfs.py +++ b/src/data/nc42netcdfs.py @@ -168,6 +168,29 @@ class Extract: logger.addHandler(_handler) _log_levels = (logging.WARN, logging.INFO, logging.DEBUG) + def __init__( # noqa: PLR0913 + self, + log_file: str = None, + plot_time: bool = False, # noqa: FBT001, FBT002 + filter_monotonic_time: bool = True, # noqa: FBT001, FBT002 + verbose: int = 0, + commandline: str = "", + ) -> None: + """Initialize Extract with explicit parameters. + + Args: + log_file: Log file path for processing + plot_time: Enable time plotting + filter_monotonic_time: Filter out non-monotonic time values + verbose: Verbosity level (0-2) + commandline: Command line string for tracking + """ + self.log_file = log_file + self.plot_time = plot_time + self.filter_monotonic_time = filter_monotonic_time + self.verbose = verbose + self.commandline = commandline + def show_variable_mapping(self): """Show the variable mapping.""" for group, parms in sorted(SCIENG_PARMS.items()): @@ -308,7 +331,7 @@ def _get_time_filters_for_variables( dict: Map of time_coord_name -> {"indices": list[int], "filtered": bool} """ # Check if time filtering is enabled - if not getattr(self.args, "filter_monotonic_time", True): + if not self.filter_monotonic_time: return {} self.logger.info("========================= Group %s =========================", group_name) @@ -462,10 +485,10 @@ def _find_time_coordinates( def _parse_plot_time_argument(self) -> tuple[str | None, str | None]: """Parse the --plot_time argument and return (group_name, time_coord_name).""" - if not getattr(self.args, "plot_time", None): + if not self.plot_time: return None, None - plot_time = self.args.plot_time + plot_time = self.plot_time if not plot_time.startswith("/"): msg = "Invalid plot_time format, must be //" raise ValueError(msg) @@ -930,7 +953,7 @@ def _create_netcdf_file( # noqa: PLR0913 self._copy_global_attributes(src_group, dst_dataset) # Add standard global attributes - log_file = self.args.log_file + log_file = self.log_file for attr_name, attr_value in self.global_metadata(log_file, group_name).items(): dst_dataset.setncattr(attr_name, attr_value) @@ -1013,7 +1036,7 @@ def global_metadata(self, log_file: str, group_name: str): metadata["license"] = metadata["distribution_statement"] metadata["useconst"] = "Not intended for legal use. Data may contain inaccuracies." metadata["history"] = f"Created by {self.commandline} on {iso_now}" - log_file = self.args.log_file + log_file = self.log_file metadata["title"] = f"Extracted LRAUV data from {log_file}, Group: {group_name}" metadata["source"] = ( f"MBARI LRAUV data extracted from {log_file}" @@ -1101,7 +1124,7 @@ def process_command_line(self): ) self.args = parser.parse_args() - self.logger.setLevel(self._log_levels[self.args.verbose]) + self.logger.setLevel(self._log_levels[self.verbose]) self.commandline = " ".join(sys.argv) diff --git a/src/data/process.py b/src/data/process.py index 369ca597..8977425b 100755 --- a/src/data/process.py +++ b/src/data/process.py @@ -437,12 +437,22 @@ def get_mission_dir(self, mission: str) -> str: def download_process(self, mission: str, src_dir: str) -> None: self.logger.info("Download and processing steps for %s", mission) - auv_netcdf = AUV_NetCDF() - auv_netcdf.args = self._create_child_namespace(auv_name=self.auv_name, mission=mission) + auv_netcdf = AUV_NetCDF( + auv_name=self.auv_name, + mission=mission, + base_path=str(self.config["base_path"]), + local=self.config["local"], + noinput=self.config["noinput"], + clobber=self.config["clobber"], + noreprocess=self.config["noreprocess"], + use_portal=self.config["use_portal"], + add_seconds=self.config["add_seconds"], + verbose=self.config["verbose"], + commandline=self.commandline, + ) auv_netcdf.set_portal() auv_netcdf.logger.setLevel(self._log_levels[self.config["verbose"]]) auv_netcdf.logger.addHandler(self.log_handler) - auv_netcdf.commandline = self.commandline auv_netcdf.download_process_logs(src_dir=src_dir) auv_netcdf.logger.removeHandler(self.log_handler) @@ -489,14 +499,20 @@ def download_process(self, mission: str, src_dir: str) -> None: def calibrate(self, mission: str) -> None: self.logger.info("Calibration steps for %s", mission) - cal_netcdf = Calibrate_NetCDF() - cal_netcdf.args = self._create_child_namespace( - auv_name=self.auv_name, mission=mission, plot=None + cal_netcdf = Calibrate_NetCDF( + auv_name=self.auv_name, + mission=mission, + base_path=self.config["base_path"], + calibration_dir=self.calibration_dir, + plot=None, + verbose=self.config["verbose"], + commandline=self.commandline, + local=self.config["local"], + noinput=self.config["noinput"], + clobber=self.config["clobber"], + noreprocess=self.config["noreprocess"], ) - cal_netcdf.calibration_dir = self.calibration_dir - cal_netcdf.logger.setLevel(self._log_levels[self.config["verbose"]]) cal_netcdf.logger.addHandler(self.log_handler) - cal_netcdf.commandline = self.commandline try: netcdf_dir = cal_netcdf.process_logs() cal_netcdf.write_netcdf(netcdf_dir) @@ -505,21 +521,24 @@ def calibrate(self, mission: str) -> None: cal_netcdf.logger.removeHandler(self.log_handler) def align(self, mission: str = "", log_file: str = "") -> None: - self.logger.info("Alignment steps for %s", mission) - align_netcdf = Align_NetCDF() - align_netcdf.args = self._create_child_namespace( - auv_name=self.auv_name, mission=mission, plot=None + self.logger.info("Alignment steps for %s", mission or log_file) + align_netcdf = Align_NetCDF( + auv_name=self.auv_name, + mission=mission, + base_path=self.config["base_path"], + log_file=log_file, + plot=None, + verbose=self.config["verbose"], + commandline=self.commandline, ) - align_netcdf.logger.setLevel(self._log_levels[self.config["verbose"]]) align_netcdf.logger.addHandler(self.log_handler) - align_netcdf.commandline = self.commandline try: if log_file: - netcdf_dir = align_netcdf.process_combined(log_file=log_file) - align_netcdf.write_combined_netcdf(netcdf_dir, log_file=log_file) + netcdf_dir = align_netcdf.process_combined() + align_netcdf.write_combined_netcdf(netcdf_dir) else: netcdf_dir = align_netcdf.process_cal() - align_netcdf.write_combined_netcdf(netcdf_dir, vehicle=self.auv_name) + align_netcdf.write_combined_netcdf(netcdf_dir) except (FileNotFoundError, EOFError) as e: align_netcdf.logger.error("%s %s", mission, e) # noqa: TRY400 error_message = f"{mission} {e}" @@ -529,23 +548,29 @@ def align(self, mission: str = "", log_file: str = "") -> None: def resample(self, mission: str = "") -> None: self.logger.info("Resampling steps for %s", mission) - resamp = Resampler() - resamp.args = self._create_child_namespace( - auv_name=self.auv_name, mission=mission, plot=None + resamp = Resampler( + auv_name=self.auv_name, + mission=mission, + log_file=self.config["log_file"], + freq=self.config["freq"], + mf_width=self.config["mf_width"], + flash_threshold=self.config["flash_threshold"], + verbose=self.config["verbose"], + plot=None, + commandline=self.commandline, ) - resamp.commandline = self.commandline resamp.logger.setLevel(self._log_levels[self.config["verbose"]]) resamp.logger.addHandler(self.log_handler) - file_name = f"{resamp.args.auv_name}_{resamp.args.mission}_align.nc" - if resamp.args.log_file: - netcdfs_dir = Path(BASE_LRAUV_PATH, Path(resamp.args.log_file).parent) - nc_file = Path(netcdfs_dir, f"{Path(resamp.args.log_file).stem}_align.nc") + file_name = f"{resamp.auv_name}_{resamp.mission}_align.nc" + if resamp.log_file: + netcdfs_dir = Path(BASE_LRAUV_PATH, Path(resamp.log_file).parent) + nc_file = Path(netcdfs_dir, f"{Path(resamp.log_file).stem}_align.nc") else: nc_file = Path( self.config["base_path"], - resamp.args.auv_name, + resamp.auv_name, MISSIONNETCDFS, - resamp.args.mission, + resamp.mission, file_name, ) if self.config["flash_threshold"] and self.config["resample"]: @@ -556,7 +581,7 @@ def resample(self, mission: str = "") -> None: dap_file_str = os.path.join( # noqa: PTH118 AUVCTD_OPENDAP_BASE.replace("opendap/", ""), "surveys", - resamp.args.mission.split(".")[0], + resamp.mission.split(".")[0], "netcdf", file_name, ) @@ -587,30 +612,39 @@ def archive( If mission is provided, archive the processed data for Dorado class vehicles. If log_file is provided, archive the processed data for LRAUV class vehicles.""" - arch = Archiver(add_logger_handlers) - arch.args = self._create_child_namespace(auv_name=self.auv_name, mission=mission) + arch = Archiver( + add_handlers=add_logger_handlers, + auv_name=self.auv_name, + mission=mission, + clobber=self.config["clobber"], + resample=self.config["resample"], + flash_threshold=self.config["flash_threshold"], + archive_only_products=self.config["archive_only_products"], + create_products=self.config["create_products"], + verbose=self.config["verbose"], + commandline=self.commandline, + ) arch.mount_dir = self.mount_dir - arch.commandline = self.commandline arch.logger.setLevel(self._log_levels[self.config["verbose"]]) if add_logger_handlers: arch.logger.addHandler(self.log_handler) if mission: # Dorado class vehicle archiving self.logger.info("Archiving steps for %s", mission) - file_name_base = f"{arch.args.auv_name}_{arch.args.mission}" + file_name_base = f"{arch.auv_name}_{arch.mission}" nc_file_base = Path( BASE_PATH, - arch.args.auv_name, + arch.auv_name, MISSIONNETCDFS, - arch.args.mission, + arch.mission, file_name_base, ) self.logger.info("nc_file_base = %s, BASE_PATH = %s", nc_file_base, BASE_PATH) if str(BASE_PATH).startswith(("/home/runner/", "/root")): arch.logger.info( "Not archiving %s %s to AUVCTD as it's likely CI testing", - arch.args.auv_name, - arch.args.mission, + arch.auv_name, + arch.mission, ) else: arch.copy_to_AUVTCD(nc_file_base, self.config["freq"]) @@ -623,16 +657,21 @@ def archive( arch.logger.removeHandler(self.log_handler) def create_products(self, mission: str) -> None: - cp = CreateProducts() - cp.args = self._create_child_namespace( - auv_name=self.auv_name, mission=mission, start_esecs=None + cp = CreateProducts( + auv_name=self.auv_name, + mission=mission, + base_path=str(self.config["base_path"]), + start_esecs=None, + local=self.config["local"], + verbose=self.config["verbose"], + commandline=self.commandline, ) cp.logger.setLevel(self._log_levels[self.config["verbose"]]) cp.logger.addHandler(self.log_handler) # cp.plot_biolume() # cp.plot_2column() - if "dorado" in cp.args.auv_name.lower(): + if "dorado" in cp.auv_name.lower(): cp.gulper_odv() cp.logger.removeHandler(self.log_handler) @@ -925,9 +964,13 @@ def process_missions(self, start_year: int = None) -> None: def extract(self, log_file: str) -> None: self.logger.info("Extracting log file: %s", log_file) - extract = Extract() - extract.args = self._create_child_namespace() - extract.commandline = self.commandline + extract = Extract( + log_file=log_file, + plot_time=False, + filter_monotonic_time=True, + verbose=self.config["verbose"], + commandline=self.commandline, + ) extract.logger.setLevel(self._log_levels[self.config["verbose"]]) extract.logger.addHandler(self.log_handler) @@ -943,14 +986,17 @@ def combine(self, log_file: str) -> None: "Equivalent to the calibrate step for Dorado class vehicles. " "Adds nudge positions and more layers of quality control." ) - combine = Combine_NetCDF() - combine.args = self._create_child_namespace(plot=None) - combine.commandline = self.commandline + combine = Combine_NetCDF( + log_file=log_file, + verbose=self.config["verbose"], + plot=None, + commandline=self.commandline, + ) combine.logger.setLevel(self._log_levels[self.config["verbose"]]) combine.logger.addHandler(self.log_handler) - combine.combine_groups(log_file=log_file) - combine.write_netcdf(log_file=log_file) + combine.combine_groups() + combine.write_netcdf() @log_file_processor def process_log_file(self, log_file: str) -> None: diff --git a/src/data/resample.py b/src/data/resample.py index 0fb6a7a0..a8a0750e 100755 --- a/src/data/resample.py +++ b/src/data/resample.py @@ -51,7 +51,41 @@ class Resampler: logger.addHandler(_handler) _log_levels = (logging.WARN, logging.INFO, logging.DEBUG) - def __init__(self) -> None: + def __init__( # noqa: PLR0913 + self, + auv_name: str = None, + mission: str = None, + log_file: str = None, + freq: str = FREQ, + mf_width: int = MF_WIDTH, + flash_threshold: float = None, + verbose: int = 0, + plot: bool = None, # noqa: FBT001 + commandline: str = "", + ) -> None: + """Initialize Resampler with explicit parameters. + + Args: + auv_name: Name of the AUV vehicle + mission: Mission identifier + log_file: Log file path (for LRAUV processing) + freq: Resampling frequency (default: '1S') + mf_width: Median filter width (default: 3) + flash_threshold: Flash detection threshold + verbose: Verbosity level (0-2) + plot: Enable plotting + commandline: Command line string for tracking + """ + self.auv_name = auv_name + self.mission = mission + self.log_file = log_file + self.freq = freq + self.mf_width = mf_width + self.flash_threshold = flash_threshold + self.verbose = verbose + self.plot = plot + self.commandline = commandline + plt.rcParams["figure.figsize"] = (15, 5) self.resampled_nc = xr.Dataset() iso_now = datetime.now(tz=UTC).isoformat().split(".")[0] + "Z" @@ -134,7 +168,7 @@ def _build_global_metadata(self) -> None: self.metadata["summary"] = ( f"Observational oceanographic data obtained from an Autonomous" f" Underwater Vehicle mission with measurements sampled at" - f" {self.args.freq} intervals." + f" {self.freq} intervals." f" Data processed at {iso_now} using MBARI's auv-python software." ) @@ -144,41 +178,41 @@ def dorado_global_metadata(self) -> dict: """ self.metadata["title"] = "Calibrated, " try: - if dorado_info[self.args.mission].get("program"): + if dorado_info[self.mission].get("program"): self.metadata["title"] = ( - f"{dorado_info[self.args.mission]['program']} program - calibrated, " + f"{dorado_info[self.mission]['program']} program - calibrated, " ) except KeyError: self.logger.warning( "No entry for for mission %s program in dorado_info.py", - self.args.mission, + self.mission, ) self.metadata["title"] += ( f"aligned, and resampled AUV sensor data from" - f" {self.args.auv_name} mission {self.args.mission}" + f" {self.auv_name} mission {self.mission}" ) try: self.metadata["summary"] += ( f" Processing log file: {AUVCTD_OPENDAP_BASE}/surveys/" - f"{self.args.mission.split('.')[0]}/netcdf/" - f"{self.args.auv_name}_{self.args.mission}_processing.log" + f"{self.mission.split('.')[0]}/netcdf/" + f"{self.auv_name}_{self.mission}_processing.log" ) except KeyError: # Likely no _1S.nc file was created, hence no summary to append to self.logger.warning( "Could not add processing log file to summary matadata for mission %s", - self.args.mission, + self.mission, ) try: - if dorado_info[self.args.mission].get("program"): - self.metadata["program"] = dorado_info[self.args.mission].get("program") - if dorado_info[self.args.mission].get("comment"): - self.metadata["comment"] = dorado_info[self.args.mission].get("comment") + if dorado_info[self.mission].get("program"): + self.metadata["program"] = dorado_info[self.mission].get("program") + if dorado_info[self.mission].get("comment"): + self.metadata["comment"] = dorado_info[self.mission].get("comment") except KeyError: self.logger.warning( "No entry for for mission %s program or comment in dorado_info.py", - self.args.mission, + self.mission, ) try: # Parse from ctd1_depth comment: "using SensorOffset(x=1.003, y=0.0001)" @@ -189,7 +223,7 @@ def dorado_global_metadata(self) -> dict: except KeyError: self.logger.warning( "No comment for pitch correction in ctd1_depth for mission %s", - self.args.mission, + self.mission, ) return self.metadata @@ -200,7 +234,7 @@ def i2map_global_metadata(self) -> dict: """ self.metadata["title"] = ( f"Calibrated, aligned, and resampled AUV sensor data from" - f" {self.args.auv_name} mission {self.args.mission}" + f" {self.auv_name} mission {self.mission}" ) # Append location of original data files to summary matches = re.search( @@ -212,8 +246,8 @@ def i2map_global_metadata(self) -> dict: " " + matches.group(1) + f". Processing log file: {AUVCTD_OPENDAP_BASE}/surveys/" - + f"{self.args.mission.split('.')[0]}/netcdf/" - + f"{self.args.auv_name}_{self.args.mission}_processing.log" + + f"{self.mission.split('.')[0]}/netcdf/" + + f"{self.auv_name}_{self.mission}_processing.log" ) # Append shortened location of original data files to title # Useful for I2Map data as it's in a YYYY/MM directory structure @@ -235,7 +269,7 @@ def i2map_global_metadata(self) -> dict: except KeyError: self.logger.warning( "No entry for for mission %s comment in dorado_info.py", - self.args.mission, + self.mission, ) return self.metadata @@ -272,12 +306,12 @@ def resample_coordinates(self, instr: str, mf_width: int, freq: str) -> None: self.logger.warning( "Variable %s_depth not found in %s align.nc file", instr, - self.args.mission, + self.mission, ) self.logger.info( "Cannot continue without a pitch corrected depth coordinate", ) - msg = f"{instr}_depth not found in {self.args.auv_name}_{self.args.mission}_align.nc" + msg = f"{instr}_depth not found in {self.auv_name}_{self.mission}_align.nc" raise InvalidAlignFile(msg) from None try: self.df_o[f"{instr}_latitude"] = self.ds[f"{instr}_latitude"].to_pandas() @@ -285,7 +319,7 @@ def resample_coordinates(self, instr: str, mf_width: int, freq: str) -> None: except KeyError: msg = ( f"Variable {instr}_latitude or {instr}_longitude not found in " - f"{self.args.mission} align.nc file" + f"{self.mission} align.nc file" ) self.logger.warning(msg) raise InvalidAlignFile(msg) from None @@ -357,8 +391,8 @@ def save_coordinates( self.resampled_nc["depth"].attrs = self.ds[f"{instr}_depth"].attrs self.resampled_nc["depth"].attrs["comment"] += ( f". {self.ds[f'{instr}_depth'].attrs['comment']}" - f" mean sampled at {self.args.freq} intervals following" - f" {self.args.mf_width} point median filter." + f" mean sampled at {self.freq} intervals following" + f" {self.mf_width} point median filter." ) self.resampled_nc["latitude"].attrs = self.ds[f"{instr}_latitude"].attrs self.resampled_nc["latitude"].attrs["comment"] += ( @@ -602,8 +636,8 @@ def add_biolume_proxies( # noqa: PLR0913, PLR0915 peaks, _ = signal.find_peaks(s_biolume_raw, height=max_bg) s_peaks = pd.Series(s_biolume_raw.iloc[peaks], index=s_biolume_raw.index[peaks]) s_med_bg_peaks = pd.Series(s_med_bg.iloc[peaks], index=s_biolume_raw.index[peaks]) - if self.args.flash_threshold: - flash_threshold = self.args.flash_threshold + if self.flash_threshold: + flash_threshold = self.flash_threshold flash_threshold_note = f"Computed with flash_threshold = {flash_threshold:.0e}" self.logger.info("Using flash_threshold = %.4e", flash_threshold) nbflash_high = s_peaks[s_peaks > (s_med_bg_peaks + flash_threshold)] @@ -1224,7 +1258,7 @@ def resample_mission( # noqa: C901, PLR0912, PLR0915, PLR0913 # nosecone instruments. If we are processing LRAUV data then # use 'ctddseabird', otherwise start with 'ctd1' and fall back to # 'seabird25p' if needed for i2map missions. - pitch_corrected_instr = "ctdseabird" if self.args.log_file else "ctd1" + pitch_corrected_instr = "ctdseabird" if self.log_file else "ctd1" if f"{pitch_corrected_instr}_depth" not in self.ds: pitch_corrected_instr = "seabird25p" if pitch_corrected_instr in instrs_to_pad: @@ -1237,7 +1271,7 @@ def resample_mission( # noqa: C901, PLR0912, PLR0915, PLR0913 freq, ) self.save_coordinates(instr, mf_width, freq, aggregator) - if self.args.plot: + if self.plot: self.plot_coordinates(instr, freq, plot_seconds) self.add_profile(depth_threshold=depth_threshold) if instr != last_instr: @@ -1293,7 +1327,7 @@ def resample_mission( # noqa: C901, PLR0912, PLR0915, PLR0913 f" median filtered with {mf_width} samples" f" and resampled with {aggregator} to {freq} intervals." ) - if self.args.plot: + if self.plot: self.plot_variable(instr, variable, freq, plot_seconds) try: self._build_global_metadata() @@ -1304,18 +1338,18 @@ def resample_mission( # noqa: C901, PLR0912, PLR0915, PLR0913 e, # noqa: TRY401 nc_file, ) - if self.args.auv_name.lower() == "dorado": + if self.auv_name.lower() == "dorado": self.resampled_nc.attrs = self.dorado_global_metadata() - elif self.args.auv_name.lower() == "i2map": + elif self.auv_name.lower() == "i2map": self.resampled_nc.attrs = self.i2map_global_metadata() self.resampled_nc["time"].attrs = { "standard_name": "time", "long_name": "Time (UTC)", } out_fn = str(nc_file).replace("_align.nc", f"_{freq}.nc") - if self.args.flash_threshold and self.args.flash_threshold != FLASH_THRESHOLD: + if self.flash_threshold and self.flash_threshold != FLASH_THRESHOLD: # Append flash_threshold to output filename - ft_ending = f"_ft{self.args.flash_threshold:.0E}.nc".replace("E+", "E") + ft_ending = f"_ft{self.flash_threshold:.0E}.nc".replace("E+", "E") out_fn = out_fn.replace(".nc", ft_ending) self.resampled_nc.to_netcdf(path=out_fn, format="NETCDF4_CLASSIC") self.logger.info("Saved resampled mission to %s", out_fn) @@ -1347,7 +1381,7 @@ def process_command_line(self): ) self.args = parser.parse_args() - self.logger.setLevel(self._log_levels[self.args.verbose]) + self.logger.setLevel(self._log_levels[self.verbose]) self.commandline = " ".join(sys.argv) diff --git a/src/data/usblToNetCDF.py b/src/data/usblToNetCDF.py index ff3324b8..007ec8a6 100755 --- a/src/data/usblToNetCDF.py +++ b/src/data/usblToNetCDF.py @@ -1,7 +1,7 @@ #!/usr/bin/env python __author__ = "Mike McCann" -__version__ = "$Revision: 1.2 $".split()[1] -__date__ = "$Date: 2010/08/24 18:58:19 $".split()[1] +__version__ = ["$Revision:", "1.2", "$"][1] +__date__ = ["$Date:", "2010/08/24", "18:58:19", "$"][1] __copyright__ = "2009" __license__ = "GPL v3" __contact__ = "mccann at mbari.org" From e3140e633f9859460379181a4b667666c4fd5715 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Mon, 24 Nov 2025 13:34:22 -0800 Subject: [PATCH 13/13] Reformat lines. --- src/data/calibrate.py | 8 ++------ src/data/resample.py | 3 +-- 2 files changed, 3 insertions(+), 8 deletions(-) diff --git a/src/data/calibrate.py b/src/data/calibrate.py index 68be6a61..7ea7f9b2 100755 --- a/src/data/calibrate.py +++ b/src/data/calibrate.py @@ -1993,9 +1993,7 @@ def _depth_process(self, sensor, latitude=36, cutoff_freq=1): # noqa: PLR0915 df_plot["pres"] = pres[pbeg:pend] df_plot["depth_filtpres_butter"] = depth_filtpres_butter[pbeg:pend] df_plot["depth_filtpres_boxcar"] = depth_filtpres_boxcar[pbeg:pend] - title = ( - f"First {pend} points from" f" {self.mission}/{self.sinfo[sensor]['data_filename']}" - ) + title = f"First {pend} points from {self.mission}/{self.sinfo[sensor]['data_filename']}" ax = df_plot.plot(title=title, figsize=(18, 6)) ax.grid("on") self.logger.debug("Pausing with plot entitled: %s. Close window to continue.", title) @@ -2194,9 +2192,7 @@ def _hs2_process(self, sensor, logs_dir): # noqa: C901, PLR0912, PLR0915 df_plot["blue_bs"] = blue_bs[pbeg:pend] df_plot["red_bs"] = red_bs[pbeg:pend] ## df_plot["fl"] = fl[pbeg:pend] - title = ( - f"First {pend} points from" f" {self.mission}/{self.sinfo[sensor]['data_filename']}" - ) + title = f"First {pend} points from {self.mission}/{self.sinfo[sensor]['data_filename']}" ax = df_plot.plot(title=title, figsize=(18, 6), ylim=(-0.003, 0.004)) ax.grid("on") self.logger.debug("Pausing with plot entitled: %s. Close window to continue.", title) diff --git a/src/data/resample.py b/src/data/resample.py index a8a0750e..f37182a5 100755 --- a/src/data/resample.py +++ b/src/data/resample.py @@ -188,8 +188,7 @@ def dorado_global_metadata(self) -> dict: self.mission, ) self.metadata["title"] += ( - f"aligned, and resampled AUV sensor data from" - f" {self.auv_name} mission {self.mission}" + f"aligned, and resampled AUV sensor data from {self.auv_name} mission {self.mission}" ) try: self.metadata["summary"] += (