Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions .vscode/launch.json
Original file line number Diff line number Diff line change
Expand Up @@ -293,6 +293,17 @@
"console": "integratedTerminal",
"args": ["-v", "1", "--noinput", "--no_cleanup", "--download", "--mission", "2011.256.02"]
},
{
"name": "process_lrauv",
"type": "debugpy",
"request": "launch",
"program": "${workspaceFolder}/src/data/process_lrauv.py",
"console": "integratedTerminal",
//"args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109.nc4"]
//"args": ["-v", "2", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109.nc4", "--clobber"]
"args": ["-v", "2", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109.nc4", "--clobber", "--no_cleanup"]
//"args": ["-v", "1", "--auv_name", "tethys", "--start", "20120901", "--end", "20121101", "--noinput"]
},

]
}
42 changes: 42 additions & 0 deletions src/data/archive.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,12 @@

from create_products import MISSIONIMAGES, MISSIONODVS
from logs2netcdfs import BASE_PATH, LOG_FILES, MISSIONNETCDFS, AUV_NetCDF
from nc42netcdfs import BASE_LRAUV_PATH
from resample import FREQ

LOG_NAME = "processing.log"
AUVCTD_VOL = "/Volumes/AUVCTD"
LRAUV_VOL = "/Volumes/LRAUV"


class Archiver:
Expand Down Expand Up @@ -170,6 +172,46 @@ def copy_to_AUVTCD(self, nc_file_base: Path, freq: str = FREQ) -> None: # noqa:
def copy_to_M3(self, resampled_nc_file: str) -> None:
pass

def copy_to_LRAUV(self, log_file: str, freq: str = FREQ) -> None:
"Copy the intermediate and resampled netCDF file(s) to the archive LRAUV location"
src_dir = Path(BASE_LRAUV_PATH, Path(log_file).parent)
dst_dir = Path(LRAUV_VOL, Path(log_file).parent)
try:
Path(dst_dir).stat()
except FileNotFoundError:
self.logger.exception("%s not found", dst_dir)
self.logger.info("Is %s mounted?", self.mount_dir)
sys.exit(1)
for src_file in sorted(src_dir.glob(f"{Path(log_file).stem}_Group_*.nc")):
dst_file = Path(dst_dir, src_file.name)
if self.args.clobber:
if dst_file.exists():
self.logger.info("Removing %s", dst_file)
dst_file.unlink()
if src_file.exists():
shutil.copyfile(src_file, dst_file)
self.logger.info("copyfile %s %s done.", src_file, dst_dir)
else:
self.logger.info(
"%-75s exists, but is not being archived because --clobber is not specified.",
src_file.name,
)
for ftype in (f"{freq}.nc", "cal.nc", "align.nc"):
src_file = Path(src_dir, f"{Path(log_file).stem}_{ftype}")
dst_file = Path(dst_dir, src_file.name)
if self.args.clobber:
if dst_file.exists():
self.logger.info("Removing %s", dst_file)
dst_file.unlink()
if src_file.exists():
shutil.copyfile(src_file, dst_file)
self.logger.info("copyfile %s %s done.", src_file, dst_dir)
else:
self.logger.info(
"%-36s exists, but is not being archived because --clobber is not specified.", # noqa: E501
src_file.name,
)

def process_command_line(self):
parser = argparse.ArgumentParser(
formatter_class=argparse.RawTextHelpFormatter,
Expand Down
57 changes: 34 additions & 23 deletions src/data/nc42netcdfs.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@

# Local directory that serves as the work area for log_files and netcdf files
BASE_LRAUV_WEB = "https://dods.mbari.org/data/lrauv/"
BASE_PATH = Path(__file__).parent.joinpath("../../data/lrauv_data").resolve()
BASE_LRAUV_PATH = Path(__file__).parent.joinpath("../../data/lrauv_data").resolve()
SUMMARY_SOURCE = "Original LRAUV data extracted from {}, group {}"
GROUPS = ["navigation", "ctd", "ecopuck"] # Your actual group names

Expand Down Expand Up @@ -181,23 +181,34 @@ def get_groups_netcdf4(self, file_path):
with netCDF4.Dataset(file_path, "r") as dataset:
return list(dataset.groups.keys())

def extract_groups_to_files_netcdf4(self, input_file, output_dir):
"""Extract each group to a separate NetCDF file using netCDF4 library.
def extract_groups_to_files_netcdf4(self, log_file: str) -> Path:
"""Extract each group from .nc4 file to a separate .nc file using netCDF4 library.

Args:
log_file: Relative path from BASE_LRAUV_WEB to .nc4 log_file

Returns:
netcdfs_dir: Local directory where NetCDF files were saved

Note:
The xarray library fails reading the WetLabsBB2FL group from this file:
brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109.nc4
with garbled data for the serial variable (using ncdump):
serial = "<C0>$F<C4>!{<8D>\031@<AE>7\024[<FB><BF>P<C0><D4>]\001\030" ;
but netCDF4 can skip over it and read the rest of the variables.
"""
output_dir = Path(output_dir)
output_dir.mkdir(exist_ok=True, parents=True)
# Download over http so that we don't need to mount smb shares
url = os.path.join(BASE_LRAUV_WEB, log_file) # noqa: PTH118
netcdfs_dir = Path(BASE_LRAUV_PATH, Path(log_file).parent)
netcdfs_dir.mkdir(exist_ok=True, parents=True)

self.logger.info("Extracting data from %s", input_file)
self.logger.info("Downloading %s", url)
input_file = self.download_with_pooch(url, netcdfs_dir)

self.logger.info("Extracting data from %s", input_file)
with netCDF4.Dataset(input_file, "r") as src_dataset:
# Extract root group first
self._extract_root_group(src_dataset, output_dir)
self._extract_root_group(src_dataset, log_file, netcdfs_dir)

# Extract all other groups
all_groups = list(src_dataset.groups.keys())
Expand All @@ -206,10 +217,12 @@ def extract_groups_to_files_netcdf4(self, input_file, output_dir):
if group_name != "/" and group_name not in all_groups:
self.logger.warning("Group %s not found in %s", group_name, input_file)
continue
self._extract_single_group(src_dataset, group_name, output_dir)
self._extract_single_group(src_dataset, group_name, log_file, netcdfs_dir)

return netcdfs_dir

def _extract_root_group(self, src_dataset: netCDF4.Dataset, output_dir: Path):
"""Extract variables from the root group to Universals.nc."""
def _extract_root_group(self, src_dataset: netCDF4.Dataset, log_file: str, output_dir: Path):
"""Extract variables from the root group to <stem>_Group_Universals.nc."""
root_parms = SCIENG_PARMS.get("/", [])
if not root_parms:
return
Expand All @@ -219,7 +232,7 @@ def _extract_root_group(self, src_dataset: netCDF4.Dataset, output_dir: Path):
vars_to_extract = self._get_available_variables(src_dataset, root_parms)

if vars_to_extract:
output_file = output_dir / "Universals.nc"
output_file = output_dir / f"{Path(log_file).stem}_Group_Universals.nc"
self._create_netcdf_file(src_dataset, vars_to_extract, output_file)
self.logger.info("Extracted root group '/' to %s", output_file)
else:
Expand All @@ -229,19 +242,19 @@ def _extract_root_group(self, src_dataset: netCDF4.Dataset, output_dir: Path):
self.logger.warning("Could not extract root group '/': %s", e)

def _extract_single_group(
self, src_dataset: netCDF4.Dataset, group_name: str, output_dir: Path
self, src_dataset: netCDF4.Dataset, group_name: str, log_file: str, output_dir: Path
):
"""Extract a single group to its own NetCDF file."""
"""Extract a single group to its own NetCDF file named like <stem>_Group_<group_name>.nc."""
group_parms = SCIENG_PARMS[group_name]

try:
self.logger.info(" Group %s", group_name)
self.logger.debug(" Group %s", group_name)
src_group = src_dataset.groups[group_name]

vars_to_extract = self._get_available_variables(src_group, group_parms)

if vars_to_extract:
output_file = output_dir / f"{group_name}.nc"
output_file = output_dir / f"{Path(log_file).stem}_Group_{group_name}.nc"
self._create_netcdf_file(src_group, vars_to_extract, output_file)
self.logger.info("Extracted %s to %s", group_name, output_file)
else:
Expand Down Expand Up @@ -393,8 +406,11 @@ def process_command_line(self):
parser.add_argument(
"--base_path",
action="store",
default=BASE_PATH,
help="Base directory for missionlogs and missionnetcdfs, default: auv_data",
default=BASE_LRAUV_PATH,
help=(
"Base directory for missionlogs and missionnetcdfs, "
"default: auv_data in repo data directory"
),
)
parser.add_argument(
"--title",
Expand Down Expand Up @@ -488,9 +504,4 @@ def process_command_line(self):
extract.show_variable_mapping()
sys.exit(0)
else:
url = os.path.join(BASE_LRAUV_WEB, extract.args.log_file) # noqa: PTH118
output_dir = Path(BASE_PATH, Path(extract.args.log_file).parent)
extract.logger.info("Downloading %s", url)
input_file = extract.download_with_pooch(url, output_dir, extract.args.known_hash)
# extract.extract_groups_to_files(input_file, output_dir)
extract.extract_groups_to_files_netcdf4(input_file, output_dir)
extract.extract_groups_to_files_netcdf4(extract.args.log_file)
Loading