From 7efef8de6ec09fc48053fa8ece7f13e4a0314cb3 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Thu, 16 Oct 2025 10:40:47 -0700 Subject: [PATCH 1/7] WIP: Initial attempt at a process_lrauv.py module. --- src/data/process_lrauv.py | 43 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) create mode 100755 src/data/process_lrauv.py diff --git a/src/data/process_lrauv.py b/src/data/process_lrauv.py new file mode 100755 index 00000000..1af00808 --- /dev/null +++ b/src/data/process_lrauv.py @@ -0,0 +1,43 @@ +#!/usr/bin/env python +""" +Process LRAUV data from NetCDF4 log files to resampled .nc files. +(This replaces the legacy lrauvNc4ToNetcdf.py script in STOQS.) + +Find LRAUV log files in smb://atlas.shore.mbari.org/LRAUV/missionlogs +and run the data through standard science data processing to calibrated, +aligned, and resampled netCDF files. Use a standard set of processing options; +more flexibility is available via the inndividual processing modules. + +Limit processing to specific steps by providing arguments: + --extract + --combine + --resample + --archive + --cleanup +If none provided then perform all steps. + +Uses command line arguments from nc42netcdfs.py and combine.py. +""" + +__author__ = "Mike McCann" +__copyright__ = "Copyright 2025, Monterey Bay Aquarium Research Institute" + +from process import Processor + + +class LRAUVProcessor(Processor): + pass + + +if __name__ == "__main__": + VEHICLE = "tethys" + LRAUV_DIR = "/Volumes/LRAUV" + # It's possible that we might need calibration files for some sensors + # in the future, so point to a potential directory where they can be found. + CALIBRATION_DIR = "/Volumes/DMO/MDUC_CORE_CTD_200103/Calibration Files" + MOUNT_DIR = "smb://atlas.shore.mbari.org/LRAUV" + START_YEAR = 2012 + + proc = LRAUVProcessor(VEHICLE, LRAUV_DIR, MOUNT_DIR, CALIBRATION_DIR) + proc.process_command_line() + proc.process_missions(START_YEAR) From 1de36bb47a1580e82f415ddaf73a33d43076c1c2 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Tue, 21 Oct 2025 11:52:33 -0700 Subject: [PATCH 2/7] Simplify calling methods with just log_file, save using _Group pattern. --- src/data/nc42netcdfs.py | 55 ++++++++++++++++++++++++----------------- 1 file changed, 33 insertions(+), 22 deletions(-) diff --git a/src/data/nc42netcdfs.py b/src/data/nc42netcdfs.py index eda7ae2d..7b958395 100755 --- a/src/data/nc42netcdfs.py +++ b/src/data/nc42netcdfs.py @@ -21,7 +21,7 @@ # Local directory that serves as the work area for log_files and netcdf files BASE_LRAUV_WEB = "https://dods.mbari.org/data/lrauv/" -BASE_PATH = Path(__file__).parent.joinpath("../../data/lrauv_data").resolve() +BASE_LRAUV_PATH = Path(__file__).parent.joinpath("../../data/lrauv_data").resolve() SUMMARY_SOURCE = "Original LRAUV data extracted from {}, group {}" GROUPS = ["navigation", "ctd", "ecopuck"] # Your actual group names @@ -181,23 +181,34 @@ def get_groups_netcdf4(self, file_path): with netCDF4.Dataset(file_path, "r") as dataset: return list(dataset.groups.keys()) - def extract_groups_to_files_netcdf4(self, input_file, output_dir): - """Extract each group to a separate NetCDF file using netCDF4 library. + def extract_groups_to_files_netcdf4(self, log_file: str) -> Path: + """Extract each group from .nc4 file to a separate .nc file using netCDF4 library. + Args: + log_file: Relative path from BASE_LRAUV_WEB to .nc4 log_file + + Returns: + netcdfs_dir: Local directory where NetCDF files were saved + + Note: The xarray library fails reading the WetLabsBB2FL group from this file: brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109.nc4 with garbled data for the serial variable (using ncdump): serial = "$F!{<8D>\031@7\024[P]\001\030" ; but netCDF4 can skip over it and read the rest of the variables. """ - output_dir = Path(output_dir) - output_dir.mkdir(exist_ok=True, parents=True) + # Download over http so that we don't need to mount smb shares + url = os.path.join(BASE_LRAUV_WEB, log_file) # noqa: PTH118 + netcdfs_dir = Path(BASE_LRAUV_PATH, Path(log_file).parent) + netcdfs_dir.mkdir(exist_ok=True, parents=True) - self.logger.info("Extracting data from %s", input_file) + extract.logger.info("Downloading %s", url) + input_file = extract.download_with_pooch(url, netcdfs_dir, self.args.known_hash) + self.logger.info("Extracting data from %s", input_file) with netCDF4.Dataset(input_file, "r") as src_dataset: # Extract root group first - self._extract_root_group(src_dataset, output_dir) + self._extract_root_group(src_dataset, log_file, netcdfs_dir) # Extract all other groups all_groups = list(src_dataset.groups.keys()) @@ -206,10 +217,12 @@ def extract_groups_to_files_netcdf4(self, input_file, output_dir): if group_name != "/" and group_name not in all_groups: self.logger.warning("Group %s not found in %s", group_name, input_file) continue - self._extract_single_group(src_dataset, group_name, output_dir) + self._extract_single_group(src_dataset, group_name, log_file, netcdfs_dir) + + return netcdfs_dir - def _extract_root_group(self, src_dataset: netCDF4.Dataset, output_dir: Path): - """Extract variables from the root group to Universals.nc.""" + def _extract_root_group(self, src_dataset: netCDF4.Dataset, log_file: str, output_dir: Path): + """Extract variables from the root group to _Group_Universals.nc.""" root_parms = SCIENG_PARMS.get("/", []) if not root_parms: return @@ -219,7 +232,7 @@ def _extract_root_group(self, src_dataset: netCDF4.Dataset, output_dir: Path): vars_to_extract = self._get_available_variables(src_dataset, root_parms) if vars_to_extract: - output_file = output_dir / "Universals.nc" + output_file = output_dir / f"{Path(log_file).stem}_Group_Universals.nc" self._create_netcdf_file(src_dataset, vars_to_extract, output_file) self.logger.info("Extracted root group '/' to %s", output_file) else: @@ -229,9 +242,9 @@ def _extract_root_group(self, src_dataset: netCDF4.Dataset, output_dir: Path): self.logger.warning("Could not extract root group '/': %s", e) def _extract_single_group( - self, src_dataset: netCDF4.Dataset, group_name: str, output_dir: Path + self, src_dataset: netCDF4.Dataset, group_name: str, log_file: str, output_dir: Path ): - """Extract a single group to its own NetCDF file.""" + """Extract a single group to its own NetCDF file named like _Group_.nc.""" group_parms = SCIENG_PARMS[group_name] try: @@ -241,7 +254,7 @@ def _extract_single_group( vars_to_extract = self._get_available_variables(src_group, group_parms) if vars_to_extract: - output_file = output_dir / f"{group_name}.nc" + output_file = output_dir / f"{Path(log_file).stem}_Group_{group_name}.nc" self._create_netcdf_file(src_group, vars_to_extract, output_file) self.logger.info("Extracted %s to %s", group_name, output_file) else: @@ -393,8 +406,11 @@ def process_command_line(self): parser.add_argument( "--base_path", action="store", - default=BASE_PATH, - help="Base directory for missionlogs and missionnetcdfs, default: auv_data", + default=BASE_LRAUV_PATH, + help=( + "Base directory for missionlogs and missionnetcdfs, " + "default: auv_data in repo data directory" + ), ) parser.add_argument( "--title", @@ -488,9 +504,4 @@ def process_command_line(self): extract.show_variable_mapping() sys.exit(0) else: - url = os.path.join(BASE_LRAUV_WEB, extract.args.log_file) # noqa: PTH118 - output_dir = Path(BASE_PATH, Path(extract.args.log_file).parent) - extract.logger.info("Downloading %s", url) - input_file = extract.download_with_pooch(url, output_dir, extract.args.known_hash) - # extract.extract_groups_to_files(input_file, output_dir) - extract.extract_groups_to_files_netcdf4(input_file, output_dir) + extract.extract_groups_to_files_netcdf4(extract.args.log_file) From 2c7575ae75ab115192f74b2e4881b68642a15589 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Tue, 21 Oct 2025 11:53:05 -0700 Subject: [PATCH 3/7] Add test for process_lrauv.py --- .vscode/launch.json | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/.vscode/launch.json b/.vscode/launch.json index 88e8ae2b..804aa7f1 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -293,6 +293,15 @@ "console": "integratedTerminal", "args": ["-v", "1", "--noinput", "--no_cleanup", "--download", "--mission", "2011.256.02"] }, + { + "name": "process_lrauv", + "type": "debugpy", + "request": "launch", + "program": "${workspaceFolder}/src/data/process_lrauv.py", + "console": "integratedTerminal", + "args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109.nc4"] + //"args": ["-v", "1", "--auv_name", "tethys", "--start", "20120901", "--end", "20121101", "--noinput"] + }, ] } From 6abcb698b98085241f844e1b2be5a6ce968c0ee0 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Tue, 21 Oct 2025 11:53:55 -0700 Subject: [PATCH 4/7] Call process_log_files(), signifying that these are LRAUV data. --- src/data/process_lrauv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/data/process_lrauv.py b/src/data/process_lrauv.py index 1af00808..7a99f92b 100755 --- a/src/data/process_lrauv.py +++ b/src/data/process_lrauv.py @@ -40,4 +40,4 @@ class LRAUVProcessor(Processor): proc = LRAUVProcessor(VEHICLE, LRAUV_DIR, MOUNT_DIR, CALIBRATION_DIR) proc.process_command_line() - proc.process_missions(START_YEAR) + proc.process_log_files() From 9aecc2aa1201dec737ec62d9ac17cbb87e2479d7 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Tue, 21 Oct 2025 15:56:57 -0700 Subject: [PATCH 5/7] Implement first and "last" steps in process.py for LRAUV data. This gives a decent foundation for infilling the combine -> align -> resample steps using the existing framework that works for dorado and i2map data. --- .vscode/launch.json | 4 +- src/data/archive.py | 42 ++++++++ src/data/nc42netcdfs.py | 6 +- src/data/process.py | 228 ++++++++++++++++++++++++++++++++++------ 4 files changed, 242 insertions(+), 38 deletions(-) diff --git a/.vscode/launch.json b/.vscode/launch.json index 804aa7f1..73a3045b 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -299,7 +299,9 @@ "request": "launch", "program": "${workspaceFolder}/src/data/process_lrauv.py", "console": "integratedTerminal", - "args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109.nc4"] + //"args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109.nc4"] + //"args": ["-v", "2", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109.nc4", "--clobber"] + "args": ["-v", "2", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109.nc4", "--clobber", "--no_cleanup"] //"args": ["-v", "1", "--auv_name", "tethys", "--start", "20120901", "--end", "20121101", "--noinput"] }, diff --git a/src/data/archive.py b/src/data/archive.py index a1a3748a..2bf8aa37 100755 --- a/src/data/archive.py +++ b/src/data/archive.py @@ -19,10 +19,12 @@ from create_products import MISSIONIMAGES, MISSIONODVS from logs2netcdfs import BASE_PATH, LOG_FILES, MISSIONNETCDFS, AUV_NetCDF +from nc42netcdfs import BASE_LRAUV_PATH from resample import FREQ LOG_NAME = "processing.log" AUVCTD_VOL = "/Volumes/AUVCTD" +LRAUV_VOL = "/Volumes/LRAUV" class Archiver: @@ -170,6 +172,46 @@ def copy_to_AUVTCD(self, nc_file_base: Path, freq: str = FREQ) -> None: # noqa: def copy_to_M3(self, resampled_nc_file: str) -> None: pass + def copy_to_LRAUV(self, log_file: str, freq: str = FREQ) -> None: + "Copy the intermediate and resampled netCDF file(s) to the archive LRAUV location" + src_dir = Path(BASE_LRAUV_PATH, Path(log_file).parent) + dst_dir = Path(LRAUV_VOL, Path(log_file).parent) + try: + Path(dst_dir).stat() + except FileNotFoundError: + self.logger.exception("%s not found", dst_dir) + self.logger.info("Is %s mounted?", self.mount_dir) + sys.exit(1) + for src_file in sorted(src_dir.glob(f"{Path(log_file).stem}_Group_*.nc")): + dst_file = Path(dst_dir, src_file.name) + if self.args.clobber: + if dst_file.exists(): + self.logger.info("Removing %s", dst_file) + dst_file.unlink() + if src_file.exists(): + shutil.copyfile(src_file, dst_file) + self.logger.info("copyfile %s %s done.", src_file, dst_dir) + else: + self.logger.info( + "%-75s exists, but is not being archived because --clobber is not specified.", + src_file.name, + ) + for ftype in (f"{freq}.nc", "cal.nc", "align.nc"): + src_file = Path(src_dir, f"{Path(log_file).stem}_{ftype}") + dst_file = Path(dst_dir, src_file.name) + if self.args.clobber: + if dst_file.exists(): + self.logger.info("Removing %s", dst_file) + dst_file.unlink() + if src_file.exists(): + shutil.copyfile(src_file, dst_file) + self.logger.info("copyfile %s %s done.", src_file, dst_dir) + else: + self.logger.info( + "%-36s exists, but is not being archived because --clobber is not specified.", # noqa: E501 + src_file.name, + ) + def process_command_line(self): parser = argparse.ArgumentParser( formatter_class=argparse.RawTextHelpFormatter, diff --git a/src/data/nc42netcdfs.py b/src/data/nc42netcdfs.py index 7b958395..877a7adf 100755 --- a/src/data/nc42netcdfs.py +++ b/src/data/nc42netcdfs.py @@ -202,8 +202,8 @@ def extract_groups_to_files_netcdf4(self, log_file: str) -> Path: netcdfs_dir = Path(BASE_LRAUV_PATH, Path(log_file).parent) netcdfs_dir.mkdir(exist_ok=True, parents=True) - extract.logger.info("Downloading %s", url) - input_file = extract.download_with_pooch(url, netcdfs_dir, self.args.known_hash) + self.logger.info("Downloading %s", url) + input_file = self.download_with_pooch(url, netcdfs_dir) self.logger.info("Extracting data from %s", input_file) with netCDF4.Dataset(input_file, "r") as src_dataset: @@ -248,7 +248,7 @@ def _extract_single_group( group_parms = SCIENG_PARMS[group_name] try: - self.logger.info(" Group %s", group_name) + self.logger.debug(" Group %s", group_name) src_group = src_dataset.groups[group_name] vars_to_extract = self._get_available_variables(src_group, group_parms) diff --git a/src/data/process.py b/src/data/process.py index f0036d72..4dcedd38 100755 --- a/src/data/process.py +++ b/src/data/process.py @@ -1,13 +1,26 @@ #!/usr/bin/env python """ -Base module for data processing. +Base module for data processing for Dorado class and LRAUV class data. Run the data through standard science data processing to calibrated, aligned, and resampled netCDF files. Use a standard set of processing options; more flexibility is available via the inndividual processing modules. +The desire is to reuse as much code as possible between Dorado class +and LRAUV class data processing. The initial steps of creating the _cal.nc +files differ because Dorado class data are raw binary log files that need to be +processed to _nc files, while LRAUV class data are NetCDF4 log files that +already contain much of the necessary information. The initial step for Dorado +class data are: download_process and calibrate, while for LRAUV class data +are: extract and combine. After that, the processing steps are similar with +the data in a local directory organized similarly to their institutional +archives. + +Dorado class data processing: +============================= + Limit processing to specific steps by providing arugments: - --download_process + --download_process (logs2netcdf.py & lopcToNetCDF.py) --calibrate --align --resample @@ -18,6 +31,21 @@ If none provided then perform all steps. Uses command line arguments from logs2netcdf.py and calibrate.py. + + +LRAUV class data processing: +============================ + +Limit processing to specific steps by providing arugments: + --extract (nc42netcdfs.py) + --combine + --align + --resample + --archive + --create_products + --email_to + --cleanup +If none provided then perform all steps. """ __author__ = "Mike McCann" @@ -45,6 +73,7 @@ from emailer import NOTIFICATION_EMAIL, Emailer from logs2netcdfs import BASE_PATH, MISSIONLOGS, MISSIONNETCDFS, AUV_NetCDF from lopcToNetCDF import LOPC_Processor, UnexpectedAreaOfCode +from nc42netcdfs import BASE_LRAUV_PATH, BASE_LRAUV_WEB, Extract from resample import ( AUVCTD_OPENDAP_BASE, FLASH_THRESHOLD, @@ -67,6 +96,29 @@ class FailedMission(Exception): pass +def log_file_processor(func): + """Decorator to handle LRAUV log_file processing exceptions and cleanup.""" + + def wrapper(self, log_file: str): + t_start = time.time() + try: + return func(self, log_file) + except (TestMission, FailedMission) as e: + self.logger.info(str(e)) + finally: + if hasattr(self, "log_handler"): + # Cleanup and archiving logic + self.archive(mission=None, log_file=log_file) + if not self.args.no_cleanup: + self.cleanup(log_file=log_file) + self.logger.info( + "log_file %s took %.1f seconds to process", log_file, time.time() - t_start + ) + self.logger.removeHandler(self.log_handler) + + return wrapper + + class Processor: """ Base class for data processing. Run the data through standard science data @@ -320,10 +372,20 @@ def resample(self, mission: str) -> None: finally: resamp.logger.removeHandler(self.log_handler) - def archive(self, mission: str, add_logger_handlers: bool = True) -> None: # noqa: FBT001, FBT002 + def archive( + self, + mission: str = None, + log_file: Path = None, + add_logger_handlers: bool = True, # noqa: FBT001, FBT002 + ) -> None: + """Archiving steps for mission or log_file. + + If mission is provided, archive the processed data for Dorado class vehicles. + If log_file is provided, archive the processed data for LRAUV class vehicles.""" arch = Archiver(add_logger_handlers) arch.args = argparse.Namespace() arch.args.auv_name = self.vehicle + arch.mount_dir = self.mount_dir arch.args.mission = mission arch.commandline = self.commandline arch.args.create_products = self.args.create_products @@ -334,25 +396,33 @@ def archive(self, mission: str, add_logger_handlers: bool = True) -> None: # no arch.args.verbose = self.args.verbose arch.logger.setLevel(self._log_levels[self.args.verbose]) if add_logger_handlers: - self.logger.info("Archiving steps for %s", mission) arch.logger.addHandler(self.log_handler) - file_name_base = f"{arch.args.auv_name}_{arch.args.mission}" - nc_file_base = Path( - BASE_PATH, - arch.args.auv_name, - MISSIONNETCDFS, - arch.args.mission, - file_name_base, - ) - self.logger.info("nc_file_base = %s, BASE_PATH = %s", nc_file_base, BASE_PATH) - if str(BASE_PATH).startswith(("/home/runner/", "/root")): - arch.logger.info( - "Not archiving %s %s to AUVCTD as it's likely CI testing", + if mission: + # Dorado class vehicle archiving + self.logger.info("Archiving steps for %s", mission) + file_name_base = f"{arch.args.auv_name}_{arch.args.mission}" + nc_file_base = Path( + BASE_PATH, arch.args.auv_name, + MISSIONNETCDFS, arch.args.mission, + file_name_base, ) + self.logger.info("nc_file_base = %s, BASE_PATH = %s", nc_file_base, BASE_PATH) + if str(BASE_PATH).startswith(("/home/runner/", "/root")): + arch.logger.info( + "Not archiving %s %s to AUVCTD as it's likely CI testing", + arch.args.auv_name, + arch.args.mission, + ) + else: + arch.copy_to_AUVTCD(nc_file_base, self.args.freq) + elif log_file: + # LRAUV class vehicle archiving + self.logger.info("Archiving steps for %s", log_file) + arch.copy_to_LRAUV(log_file, freq=self.args.freq) else: - arch.copy_to_AUVTCD(nc_file_base, self.args.freq) + arch.logger.error("Either mission or log_file must be provided for archiving.") arch.logger.removeHandler(self.log_handler) def create_products(self, mission: str) -> None: @@ -385,23 +455,59 @@ def email(self, mission: str) -> None: email.logger.setLevel(self._log_levels[self.args.verbose]) email.logger.addHandler(self.log_handler) - def cleanup(self, mission: str) -> None: - self.logger.info( - "Removing %s files from %s and %s", - mission, - MISSIONNETCDFS, - MISSIONLOGS, - ) - try: - shutil.rmtree( - Path(self.args.base_path, self.vehicle, MISSIONLOGS, mission), - ) - shutil.rmtree( - Path(self.args.base_path, self.vehicle, MISSIONNETCDFS, mission), + def _remove_empty_parents(self, path: Path, stop_at: Path) -> None: + """Remove empty parent directories up to stop_at path.""" + parent = path.parent + while parent != stop_at: + try: + ds_store = parent / ".DS_Store" + if ds_store.exists(): + ds_store.unlink() # Remove .DS_Store file so that the directory is empty + if parent.exists() and not any(parent.iterdir()): + self.logger.debug("Removing empty directory: %s", parent) + parent.rmdir() + parent = parent.parent + else: + break + except OSError as e: + self.logger.debug("Could not remove directory %s: %s", parent, e) + break + + def cleanup(self, mission: str = None, log_file: str = None) -> None: + if mission: + self.logger.info( + "Removing mission %s files from %s and %s", + mission, + MISSIONNETCDFS, + MISSIONLOGS, ) - self.logger.info("Done removing %s work files", mission) - except FileNotFoundError as e: - self.logger.info("File not found: %s", e) + try: + shutil.rmtree( + Path(self.args.base_path, self.vehicle, MISSIONLOGS, mission), + ) + shutil.rmtree( + Path(self.args.base_path, self.vehicle, MISSIONNETCDFS, mission), + ) + self.logger.info("Done removing %s work files", mission) + except FileNotFoundError as e: + self.logger.info("File not found: %s", e) + elif log_file: + self.logger.info("Removing work files from local directory for %s", log_file) + try: + log_path = Path(BASE_LRAUV_PATH, log_file).resolve() + for item in log_path.parent.iterdir(): + if item.is_file(): + self.logger.debug("Removing file %s", item) + item.unlink() + elif item.is_dir(): + self.logger.debug("Removing directory %s", item) + shutil.rmtree(item) + self._remove_empty_parents(log_path, Path(BASE_LRAUV_PATH)) + self.logger.info("Done removing work files for %s", log_file) + except FileNotFoundError as e: + self.logger.info("File not found: %s", e) + else: + self.logger.error("Either mission or log_file must be provided for cleanup.") def process_mission(self, mission: str, src_dir: str = "") -> None: # noqa: C901, PLR0912, PLR0915 netcdfs_dir = Path( @@ -621,6 +727,55 @@ def process_missions(self, start_year: int) -> None: src_dir=self.get_mission_dir(mission), ) + # ====================== LRAUV data specific processing ====================== + # The command line arument --log_file distinguishes LRAUV data from Dorado data. + # Dorado class data uses --mission instead. Also, start and end specifications + # are different for LRAUV data: --start and --end instead of --start_year, + # --start_yd, --end_year, and --end_yd. If --start and --end are spcified then + # --auv_name is required to look up the individual log files to process. + + def extract(self, log_file: str) -> None: + self.logger.info("Extracting log file: %s", log_file) + extract = Extract() + extract.args = argparse.Namespace() + extract.args.verbose = self.args.verbose + extract.logger.setLevel(self._log_levels[self.args.verbose]) + extract.logger.addHandler(self.log_handler) + + url = os.path.join(BASE_LRAUV_WEB, log_file) # noqa: PTH118 + output_dir = Path(BASE_LRAUV_PATH, Path(log_file).parent) + extract.logger.info("Downloading %s", url) + input_file = extract.download_with_pooch(url, output_dir) + return extract.extract_groups_to_files_netcdf4(input_file) + + @log_file_processor + def process_log_file(self, log_file: str) -> None: + netcdfs_dir = Path(BASE_LRAUV_PATH, Path(log_file).parent) + Path(netcdfs_dir).mkdir(parents=True, exist_ok=True) + self.log_handler = logging.FileHandler( + Path(BASE_LRAUV_PATH, f"{log_file}_extract.log"), mode="w+" + ) + self.log_handler.setLevel(self._log_levels[self.args.verbose]) + self.log_handler.setFormatter(AUV_NetCDF._formatter) + self.logger.info( + "=====================================================================================================================", + ) + self.logger.addHandler(self.log_handler) + self.logger.info("commandline = %s", self.commandline) + + netcdfs_dir = self.extract(log_file) + # self.align(log_file) + # self.resample(log_file) + # self.create_products(log_file) + self.logger.info("Finished processing log file: %s", log_file) + + def process_log_files(self) -> None: + if self.args.log_file: + # log_file is string like: + # brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109.nc4 + self.vehicle = self.args.log_file.split("/")[0].lower() + self.process_log_file(self.args.log_file) + def process_command_line(self): parser = argparse.ArgumentParser( formatter_class=argparse.RawTextHelpFormatter, @@ -741,7 +896,12 @@ def process_command_line(self): parser.add_argument( "--mission", action="store", - help="Process only this mission", + help="For Doado class data - process only this mission", + ) + parser.add_argument( + "--log_file", + action="store", + help="For LRAUV class data - process only this log file", ) parser.add_argument( "--freq", From d805f9731ca6f655bf9aed475c46cd07369b580b Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Tue, 21 Oct 2025 16:05:05 -0700 Subject: [PATCH 6/7] Update EXPECTED_SIZE_GITHUB values. --- src/data/test_process_dorado.py | 2 +- src/data/test_process_i2map.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/data/test_process_dorado.py b/src/data/test_process_dorado.py index 3eb1033c..bcb41ac1 100644 --- a/src/data/test_process_dorado.py +++ b/src/data/test_process_dorado.py @@ -31,7 +31,7 @@ def test_process_dorado(complete_dorado_processing): # but it will alert us if a code change unexpectedly changes the file size. # If code changes are expected to change the file size then we should # update the expected size here. - EXPECTED_SIZE_GITHUB = 621298 + EXPECTED_SIZE_GITHUB = 621286 EXPECTED_SIZE_ACT = 621298 EXPECTED_SIZE_LOCAL = 621286 if str(proc.args.base_path).startswith("/home/runner"): diff --git a/src/data/test_process_i2map.py b/src/data/test_process_i2map.py index cbd2c2c3..e2f6cb05 100644 --- a/src/data/test_process_i2map.py +++ b/src/data/test_process_i2map.py @@ -30,7 +30,7 @@ def test_process_i2map(complete_i2map_processing): # but it will alert us if a code change unexpectedly changes the file size. # If code changes are expected to change the file size then we should # update the expected size here. - EXPECTED_SIZE_GITHUB = 58839 + EXPECTED_SIZE_GITHUB = 58832 EXPECTED_SIZE_ACT = 58816 EXPECTED_SIZE_LOCAL = 58884 if str(proc.args.base_path).startswith("/home/runner"): From 886728ef777c74f076a9973cdff18de39901ed48 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Tue, 21 Oct 2025 16:08:19 -0700 Subject: [PATCH 7/7] Update EXPECTED_MD5_GITHUB value. --- src/data/test_process_dorado.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/data/test_process_dorado.py b/src/data/test_process_dorado.py index bcb41ac1..90ec047b 100644 --- a/src/data/test_process_dorado.py +++ b/src/data/test_process_dorado.py @@ -50,7 +50,7 @@ def test_process_dorado(complete_dorado_processing): check_md5 = True if check_md5: # Check that the MD5 hash has not changed - EXPECTED_MD5_GITHUB = "6550bb8ed5919f21413f30dfffdcf116" + EXPECTED_MD5_GITHUB = "9f3f9e2e5abed08692ddb233dec0d0ac" EXPECTED_MD5_ACT = "bdb9473e5dedb694618f518b8cf0ca1e" EXPECTED_MD5_LOCAL = "6ecb2229b00835055619e982fe9d5023" if str(proc.args.base_path).startswith("/home/runner"):