From 89988093f5936a5e886d72201c33a7b2c3971e91 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Tue, 2 Dec 2025 15:30:25 -0800 Subject: [PATCH 1/5] Use ".nc4" extensions for files containing NetCDF4 data types. --- src/data/align.py | 10 +++++----- src/data/calibrate.py | 2 +- src/data/process.py | 4 ++-- src/data/resample.py | 8 ++++---- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/src/data/align.py b/src/data/align.py index d3fca8ac..9a8f91fc 100755 --- a/src/data/align.py +++ b/src/data/align.py @@ -226,10 +226,10 @@ def process_cal(self) -> Path: # noqa: C901, PLR0912, PLR0915 """Process calibrated netCDF file using instance attributes.""" if self.mission and self.auv_name: netcdfs_dir = Path(self.base_path, self.auv_name, MISSIONNETCDFS, self.mission) - src_file = Path(netcdfs_dir, f"{self.auv_name}_{self.mission}_cal.nc") + src_file = Path(netcdfs_dir, f"{self.auv_name}_{self.mission}_cal.nc4") elif self.log_file: netcdfs_dir = Path(BASE_LRAUV_PATH, f"{Path(self.log_file).parent}") - src_file = Path(netcdfs_dir, f"{Path(self.log_file).stem}_cal.nc") + src_file = Path(netcdfs_dir, f"{Path(self.log_file).stem}_cal.nc4") else: msg = "Must provide either mission and vehicle or log_file" raise ValueError(msg) @@ -712,9 +712,9 @@ def write_combined_netcdf(self, netcdfs_dir: Path) -> None: """Write aligned combined data to NetCDF file""" if self.log_file: # For LRAUV log files, use the log file stem for output name - out_fn = Path(netcdfs_dir, f"{Path(self.log_file).stem}_align.nc") + out_fn = Path(netcdfs_dir, f"{Path(self.log_file).stem}_align.nc4") else: - out_fn = Path(netcdfs_dir, f"{self.auv_name}_{self.mission}_align.nc") + out_fn = Path(netcdfs_dir, f"{self.auv_name}_{self.mission}_align.nc4") self.aligned_nc.attrs = self.global_metadata() self.logger.info("Writing aligned combined data to %s", out_fn) @@ -730,7 +730,7 @@ def write_combined_netcdf(self, netcdfs_dir: Path) -> None: def write_netcdf(self, netcdfs_dir: Path) -> None: """Write aligned netCDF file using instance attributes.""" self.aligned_nc.attrs = self.global_metadata() - out_fn = Path(netcdfs_dir, f"{self.auv_name}_{self.mission}_align.nc") + out_fn = Path(netcdfs_dir, f"{self.auv_name}_{self.mission}_align.nc4") self.logger.info("Writing aligned data to %s", out_fn) if out_fn.exists(): self.logger.debug("Removing file %s", out_fn) diff --git a/src/data/calibrate.py b/src/data/calibrate.py index 95fe1e68..272ed731 100755 --- a/src/data/calibrate.py +++ b/src/data/calibrate.py @@ -3297,7 +3297,7 @@ def _process(self, sensor, logs_dir, netcdfs_dir): # noqa: C901, PLR0912 def write_netcdf(self, netcdfs_dir: Path) -> None: """Write calibrated netCDF file using instance attributes.""" self.combined_nc.attrs = self.global_metadata() - out_fn = Path(netcdfs_dir, f"{self.auv_name}_{self.mission}_cal.nc") + out_fn = Path(netcdfs_dir, f"{self.auv_name}_{self.mission}_cal.nc4") self.logger.info("Writing calibrated instrument data to %s", out_fn) if Path(out_fn).exists(): Path(out_fn).unlink() diff --git a/src/data/process.py b/src/data/process.py index 518dfce4..2227b76e 100755 --- a/src/data/process.py +++ b/src/data/process.py @@ -561,10 +561,10 @@ def resample(self, mission: str = "", log_file: str = "") -> None: ) resamp.logger.setLevel(self._log_levels[self.config["verbose"]]) resamp.logger.addHandler(self.log_handler) - file_name = f"{resamp.auv_name}_{resamp.mission}_align.nc" + file_name = f"{resamp.auv_name}_{resamp.mission}_align.nc4" if resamp.log_file: netcdfs_dir = Path(BASE_LRAUV_PATH, Path(resamp.log_file).parent) - nc_file = Path(netcdfs_dir, f"{Path(resamp.log_file).stem}_align.nc") + nc_file = Path(netcdfs_dir, f"{Path(resamp.log_file).stem}_align.nc4") else: nc_file = Path( self.config["base_path"], diff --git a/src/data/resample.py b/src/data/resample.py index a9b409f0..5e1cf4a8 100755 --- a/src/data/resample.py +++ b/src/data/resample.py @@ -387,7 +387,7 @@ def resample_coordinates(self, instr: str, mf_width: int, freq: str) -> None: if self.log_file: msg = f"A CTD depth was not found in {self.ds.encoding['source']}" else: - msg = f"{instr}_depth not found in {self.auv_name}_{self.mission}_align.nc" + msg = f"{instr}_depth not found in {self.auv_name}_{self.mission}_align.nc4" raise InvalidAlignFile(msg) from None try: self.df_o[f"{instr}_latitude"] = self.ds[f"{instr}_latitude"].to_pandas() @@ -1831,7 +1831,7 @@ def resample_mission( # noqa: C901, PLR0912, PLR0915, PLR0913 "standard_name": "time", "long_name": "Time (UTC)", } - out_fn = str(nc_file).replace("_align.nc", f"_{freq}.nc") + out_fn = str(nc_file).replace("_align.nc4", f"_{freq}.nc") if self.flash_threshold and self.flash_threshold != FLASH_THRESHOLD: # Append flash_threshold to output filename ft_ending = f"_ft{self.flash_threshold:.0E}.nc".replace("E+", "E") @@ -1899,9 +1899,9 @@ def process_command_line(self): resamp.process_command_line() if resamp.args.log_file: netcdfs_dir = Path(BASE_LRAUV_PATH, f"{Path(resamp.args.log_file).parent}") - nc_file = Path(netcdfs_dir, f"{Path(resamp.args.log_file).stem}_align.nc") + nc_file = Path(netcdfs_dir, f"{Path(resamp.args.log_file).stem}_align.nc4") else: - file_name = f"{resamp.args.auv_name}_{resamp.args.mission}_align.nc" + file_name = f"{resamp.args.auv_name}_{resamp.args.mission}_align.nc4" nc_file = Path( BASE_PATH, resamp.args.auv_name, From f018fc63c0369dcccfb8af29ee446053db171cca Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Tue, 2 Dec 2025 15:31:10 -0800 Subject: [PATCH 2/5] Updata ACT and LOCAL EXPECTED_SIZEs. --- src/data/test_process_dorado.py | 4 ++-- src/data/test_process_i2map.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/data/test_process_dorado.py b/src/data/test_process_dorado.py index 1f00d2c5..6753d02f 100644 --- a/src/data/test_process_dorado.py +++ b/src/data/test_process_dorado.py @@ -32,8 +32,8 @@ def test_process_dorado(complete_dorado_processing): # If code changes are expected to change the file size then we should # update the expected size here. EXPECTED_SIZE_GITHUB = 621404 - EXPECTED_SIZE_ACT = 621402 - EXPECTED_SIZE_LOCAL = 621452 + EXPECTED_SIZE_ACT = 621406 + EXPECTED_SIZE_LOCAL = 621456 if str(proc.args.base_path).startswith("/home/runner"): # The size is different in GitHub Actions, maybe due to different metadata assert nc_file.stat().st_size == EXPECTED_SIZE_GITHUB # noqa: S101 diff --git a/src/data/test_process_i2map.py b/src/data/test_process_i2map.py index df470347..1acedbc4 100644 --- a/src/data/test_process_i2map.py +++ b/src/data/test_process_i2map.py @@ -31,8 +31,8 @@ def test_process_i2map(complete_i2map_processing): # If code changes are expected to change the file size then we should # update the expected size here. EXPECTED_SIZE_GITHUB = 52682 - EXPECTED_SIZE_ACT = 52652 - EXPECTED_SIZE_LOCAL = 52782 + EXPECTED_SIZE_ACT = 52656 + EXPECTED_SIZE_LOCAL = 52786 if str(proc.args.base_path).startswith("/home/runner"): # The size is different in GitHub Actions, maybe due to different metadata assert nc_file.stat().st_size == EXPECTED_SIZE_GITHUB # noqa: S101 From d61bd47b07b0d631bcf4e9152a1212d572e128bb Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Tue, 2 Dec 2025 15:36:58 -0800 Subject: [PATCH 3/5] Update EXPECTED_SIZE_GITHUBs, added 4. --- src/data/test_process_dorado.py | 2 +- src/data/test_process_i2map.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/data/test_process_dorado.py b/src/data/test_process_dorado.py index 6753d02f..777e3e66 100644 --- a/src/data/test_process_dorado.py +++ b/src/data/test_process_dorado.py @@ -31,7 +31,7 @@ def test_process_dorado(complete_dorado_processing): # but it will alert us if a code change unexpectedly changes the file size. # If code changes are expected to change the file size then we should # update the expected size here. - EXPECTED_SIZE_GITHUB = 621404 + EXPECTED_SIZE_GITHUB = 621408 EXPECTED_SIZE_ACT = 621406 EXPECTED_SIZE_LOCAL = 621456 if str(proc.args.base_path).startswith("/home/runner"): diff --git a/src/data/test_process_i2map.py b/src/data/test_process_i2map.py index 1acedbc4..d465e8ee 100644 --- a/src/data/test_process_i2map.py +++ b/src/data/test_process_i2map.py @@ -30,7 +30,7 @@ def test_process_i2map(complete_i2map_processing): # but it will alert us if a code change unexpectedly changes the file size. # If code changes are expected to change the file size then we should # update the expected size here. - EXPECTED_SIZE_GITHUB = 52682 + EXPECTED_SIZE_GITHUB = 52686 EXPECTED_SIZE_ACT = 52656 EXPECTED_SIZE_LOCAL = 52786 if str(proc.args.base_path).startswith("/home/runner"): From 34e9a14ad9516d516ecf081252690b70edb889e0 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Tue, 2 Dec 2025 15:40:29 -0800 Subject: [PATCH 4/5] Update ACT and LOCAL EXPECTED_MD5s. --- src/data/test_process_dorado.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/data/test_process_dorado.py b/src/data/test_process_dorado.py index 777e3e66..decf6f15 100644 --- a/src/data/test_process_dorado.py +++ b/src/data/test_process_dorado.py @@ -51,8 +51,8 @@ def test_process_dorado(complete_dorado_processing): if check_md5: # Check that the MD5 hash has not changed EXPECTED_MD5_GITHUB = "3bab0300e575c1d752a35f49e49e340e" - EXPECTED_MD5_ACT = "bb1d539284bee531a00c4d4d99580bf0" - EXPECTED_MD5_LOCAL = "9137be5a2ed840cfca94a723285355ec" + EXPECTED_MD5_ACT = "fac632c321b4956b6015d6de97958f8a" + EXPECTED_MD5_LOCAL = "c4a13d6e7ec7f35ae5aafa97ea9a2279" if str(proc.args.base_path).startswith("/home/runner"): # The MD5 hash is different in GitHub Actions, maybe due to different metadata assert hashlib.md5(open(nc_file, "rb").read()).hexdigest() == EXPECTED_MD5_GITHUB # noqa: PTH123, S101, S324, SIM115 From 181e47b2256dd6ee2b29e04d713e2815dd783878 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Tue, 2 Dec 2025 15:42:53 -0800 Subject: [PATCH 5/5] Update EXPECTED_MD5_GITHUB. --- src/data/test_process_dorado.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/data/test_process_dorado.py b/src/data/test_process_dorado.py index decf6f15..cc8f7e94 100644 --- a/src/data/test_process_dorado.py +++ b/src/data/test_process_dorado.py @@ -50,7 +50,7 @@ def test_process_dorado(complete_dorado_processing): check_md5 = True if check_md5: # Check that the MD5 hash has not changed - EXPECTED_MD5_GITHUB = "3bab0300e575c1d752a35f49e49e340e" + EXPECTED_MD5_GITHUB = "7c7a3e844988d3bb6ec2a951824bc0d5" EXPECTED_MD5_ACT = "fac632c321b4956b6015d6de97958f8a" EXPECTED_MD5_LOCAL = "c4a13d6e7ec7f35ae5aafa97ea9a2279" if str(proc.args.base_path).startswith("/home/runner"):