From 628ece4ef1237f984ba72bb5789d638841074671 Mon Sep 17 00:00:00 2001
From: Scott Staniewicz <scott.j.staniewicz@jpl.nasa.gov>
Date: Mon, 11 Dec 2023 12:12:28 -0500
Subject: [PATCH] Add stitched geometry creation (#12)

* add static layer download and geometry creation

* add more types, fix pre-commits

* add tests and fix failing

* add tests and fix failing

* spacing

* try to fix pytet warning, io is optional

* install rastserio too

* try reverse

* try this version

* install deps correctly

* add another warning ignore, gdal 3.4

* gdal min is 3.5

* missing future import

* use `lru_cache` for py38

* add changelog, bump to py39

* remove `cast`

* back to `@cache`
---
 .github/workflows/ci.yml     |  12 +-
 CHANGELOG.md                 |  22 ++
 LICENSE                      |  36 ++-
 environment-geo.yml          |   6 +-
 environment.yml              |   2 +-
 pyproject.toml               |  12 +-
 src/opera_utils/__init__.py  |   1 -
 src/opera_utils/_dates.py    |  16 +-
 src/opera_utils/_io.py       | 122 ++++++++
 src/opera_utils/_utils.py    | 155 ++++++++++
 src/opera_utils/constants.py |  15 +
 src/opera_utils/geometry.py  | 166 +++++++++++
 src/opera_utils/stitching.py | 533 +++++++++++++++++++++++++++++++++++
 tests/conftest.py            |   5 -
 tests/test_io.py             |  91 ++++++
 tests/test_utils.py          |  58 ++++
 16 files changed, 1218 insertions(+), 34 deletions(-)
 create mode 100644 CHANGELOG.md
 create mode 100644 src/opera_utils/_utils.py
 create mode 100644 src/opera_utils/geometry.py
 create mode 100644 src/opera_utils/stitching.py
 create mode 100644 tests/test_io.py
 create mode 100644 tests/test_utils.py

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index e5bbf13..4136a78 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -16,13 +16,17 @@ jobs:
       matrix:
         os: [ubuntu-latest, macos-latest]
         deps:
-            # Note: for now we're manually adding tophu deps,
-            # will change once it's on conda forge
           - label: Latest
-            spec: ""
+            spec: >-
+              rasterio
+              gdal
+              asf_search
           - label: Minimum
             spec: >-
-              python=3.8
+              python=3.9
+              rasterio=1.3
+              gdal=3.5
+              asf_search=6.7.2
 
       fail-fast: false
     name: ${{ matrix.os }} • ${{ matrix.deps.label }}
diff --git a/CHANGELOG.md b/CHANGELOG.md
new file mode 100644
index 0000000..bf4498a
--- /dev/null
+++ b/CHANGELOG.md
@@ -0,0 +1,22 @@
+# [Unreleased](https://github.com/opera-adt/opera-utils/compare/v0.1.4...main)
+
+**Added**
+- Based raster metadata convenience functions as `_io.get_raster_*`
+- Ability to download CSLC static layers and stitch into line-of-sight rasters
+
+
+**Requirements**
+Minimum python version is 3.9
+
+- click>=7.0
+- h5py>=1.10
+- numpy>=1.20
+- pooch>=1.7
+- pyproj>=3.3
+- shapely>=1.8
+- typing_extensions>=4
+
+For other interactions with geospatial rasters (i.e. reading metadata, creating geometries from Static Layers):
+- asf_search>=6.7.2
+- gdal>=3.5
+- rasterio>=1.3
diff --git a/LICENSE b/LICENSE
index bc1ef5d..10af7c5 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,3 +1,36 @@
+This software is licensed under either the BSD-3-Clause or Apache-2.0 license. Users may
+choose which license to apply. The terms of each license are reproduced below.
+
+---------------------------------------------------------------------------------------
+
+Copyright (c) 2023 California Institute of Technology ("Caltech"). U.S. Government
+sponsorship acknowledged.
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification, are
+permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice, this list of
+  conditions and the following disclaimer.
+* Redistributions in binary form must reproduce the above copyright notice, this list of
+  conditions and the following disclaimer in the documentation and/or other materials
+  provided with the distribution.
+* Neither the name of Caltech nor its operating division, the Jet Propulsion Laboratory,
+  nor the names of its contributors may be used to endorse or promote products derived
+  from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
+OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+---------------------------------------------------------------------------------------
 
                                  Apache License
                            Version 2.0, January 2004
@@ -187,7 +220,8 @@
       same "printed page" as the copyright notice for easier
       identification within third-party archives.
 
-   Copyright 2023 Scott Staniewicz
+   Copyright 2023 California Institute of Technology (“Caltech”).
+   U.S. Government sponsorship acknowledged.
 
    Licensed under the Apache License, Version 2.0 (the "License");
    you may not use this file except in compliance with the License.
diff --git a/environment-geo.yml b/environment-geo.yml
index 3c070f7..7d7d26b 100644
--- a/environment-geo.yml
+++ b/environment-geo.yml
@@ -1,6 +1,6 @@
 channels:
   - conda-forge
 dependencies:
-  - pyogrio>=0.5
-  - gdal>=3.3
-  - geopandas-base>=0.12
+  - asf_search>=6.7
+  - gdal>=3.5
+  - rasterio>=1.3
diff --git a/environment.yml b/environment.yml
index fbb0d31..a2f9f30 100644
--- a/environment.yml
+++ b/environment.yml
@@ -2,7 +2,7 @@ name: opera-utils-env
 channels:
   - conda-forge
 dependencies:
-  - python>=3.8
+  - python>=3.9
   - pip>=21.3  # https://pip.pypa.io/en/stable/reference/build-system/pyproject-toml/#editable-installation
   - git  # for pip install, due to setuptools_scm
   - click>=7.0
diff --git a/pyproject.toml b/pyproject.toml
index d26c16b..bb267cb 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -9,13 +9,12 @@ authors = [
 ]
 description = "Miscellaneous utilities for working with OPERA data products"
 readme = { file = "README.md", content-type = "text/markdown" }
-requires-python = ">=3.8"
+requires-python = ">=3.9"
 
 classifiers = [
   "Intended Audience :: Developers",
   "Intended Audience :: Science/Research",
   "Programming Language :: Python :: 3",
-  "Programming Language :: Python :: 3.8",
   "Programming Language :: Python :: 3.9",
   "Programming Language :: Python :: 3.10",
   "Programming Language :: Python :: 3.11",
@@ -71,7 +70,7 @@ preview = true
 
 [tool.isort]
 profile = "black"
-known_first_party = [" opera_utils"]
+known_first_party = ["opera_utils"]
 
 [tool.mypy]
 python_version = "3.10"
@@ -84,4 +83,9 @@ ignore = "D100,D102,D104,D105,D106,D107,D203,D204,D213,D413"
 [tool.pytest.ini_options]
 doctest_optionflags = "NORMALIZE_WHITESPACE NUMBER"
 addopts = "  --cov=opera_utils  --doctest-modules --randomly-seed=1234 --ignore=scripts --ignore=docs"
-filterwarnings = ["error"]
+filterwarnings = [
+  "error",
+  "ignore:h5py is running against HDF5.*:UserWarning",
+  # https://github.com/dateutil/dateutil/pull/1285 will be released, but not yet
+  "ignore:datetime.datetime.utcfromtimestamp.*:DeprecationWarning",
+]
diff --git a/src/opera_utils/__init__.py b/src/opera_utils/__init__.py
index 7fe9603..89f23bb 100644
--- a/src/opera_utils/__init__.py
+++ b/src/opera_utils/__init__.py
@@ -4,7 +4,6 @@
 from __future__ import annotations
 
 from ._dates import *
-from ._io import *
 from ._version import version as __version__
 from .bursts import *
 from .burst_frame_db import *
diff --git a/src/opera_utils/_dates.py b/src/opera_utils/_dates.py
index 6e0c5b4..70e29e0 100644
--- a/src/opera_utils/_dates.py
+++ b/src/opera_utils/_dates.py
@@ -4,10 +4,10 @@
 import itertools
 import re
 from collections import defaultdict
-from pathlib import Path
 from typing import Iterable, overload
 
 from ._types import DateOrDatetime, Filename, PathLikeT
+from ._utils import _get_path_from_gdal_str
 
 __all__ = [
     "get_dates",
@@ -145,20 +145,6 @@ def _parse_date(datestr: str, fmt: str = DATE_FORMAT) -> datetime.datetime:
     return datetime.datetime.strptime(datestr, fmt)
 
 
-def _get_path_from_gdal_str(name: Filename) -> Path:
-    s = str(name)
-    if s.upper().startswith("DERIVED_SUBDATASET"):
-        # like DERIVED_SUBDATASET:AMPLITUDE:slc_filepath.tif
-        p = s.split(":")[-1].strip('"').strip("'")
-    elif ":" in s and (s.upper().startswith("NETCDF") or s.upper().startswith("HDF")):
-        # like NETCDF:"slc_filepath.nc":subdataset
-        p = s.split(":")[1].strip('"').strip("'")
-    else:
-        # Whole thing is the path
-        p = str(name)
-    return Path(p)
-
-
 def _date_format_to_regex(date_format: str) -> re.Pattern:
     r"""Convert a python date format string to a regular expression.
 
diff --git a/src/opera_utils/_io.py b/src/opera_utils/_io.py
index e69de29..3a67b7d 100644
--- a/src/opera_utils/_io.py
+++ b/src/opera_utils/_io.py
@@ -0,0 +1,122 @@
+from __future__ import annotations
+
+from typing import Any
+
+import numpy as np
+import rasterio as rio
+from affine import Affine
+from pyproj import CRS
+
+from ._types import Bbox, PathOrStr
+
+
+def get_raster_nodata(filename: PathOrStr, band: int = 1) -> float | None:
+    """Get the nodata value from a file.
+
+    Parameters
+    ----------
+    filename : PathOrStr
+        Path to the file to load.
+    band : int, optional
+        Band to get nodata value for, by default 1.
+
+    Returns
+    -------
+    Optional[float]
+        Nodata value, or None if not found.
+    """
+    nodatas = _get_dataset_attr(filename, "nodatavals")
+    return nodatas[band - 1]
+
+
+def get_raster_crs(filename: PathOrStr) -> CRS:
+    """Get the CRS from a file.
+
+    Parameters
+    ----------
+    filename : PathOrStr
+        Path to the file to load.
+
+    Returns
+    -------
+    CRS
+        pyproj CRS for `filename`
+    """
+    return _get_dataset_attr(filename, "crs")
+
+
+def get_raster_transform(filename: PathOrStr) -> Affine:
+    """Get the rasterio `Affine` transform from a file.
+
+    Parameters
+    ----------
+    filename : PathOrStr
+        Path to the file to load.
+
+    Returns
+    -------
+    List[float]
+        6 floats representing a GDAL Geotransform.
+    """
+    return _get_dataset_attr(filename, "transform")
+
+
+def get_raster_gt(filename: PathOrStr) -> list[float]:
+    """Get the gdal geotransform from a file.
+
+    Parameters
+    ----------
+    filename : PathOrStr
+        Path to the file to load.
+
+    Returns
+    -------
+    Affine
+        Two dimensional affine transform for 2D linear mapping.
+    """
+    return get_raster_transform(filename).to_gdal()
+
+
+def get_raster_dtype(filename: PathOrStr, band: int = 1) -> np.dtype:
+    """Get the numpy data type from a raster file.
+
+    Parameters
+    ----------
+    filename : PathOrStr
+        Path to the file to load.
+    band : int, optional
+        Band to get nodata value for, by default 1.
+
+    Returns
+    -------
+    np.dtype
+        Data type.
+    """
+    dtype_per_band = _get_dataset_attr(filename, "dtypes")
+    return np.dtype(dtype_per_band[band - 1])
+
+
+def get_raster_driver(filename: PathOrStr) -> str:
+    """Get the GDAL driver `ShortName` from a file.
+
+    Parameters
+    ----------
+    filename : PathOrStr
+        Path to the file to load.
+
+    Returns
+    -------
+    str
+        Driver name.
+    """
+    return _get_dataset_attr(filename, "driver")
+
+
+def get_raster_bounds(filename: PathOrStr) -> Bbox:
+    """Get the (left, bottom, right, top) bounds of the image."""
+    return _get_dataset_attr(filename, "bounds")
+
+
+def _get_dataset_attr(filename: PathOrStr, attr_name: str) -> Any:
+    with rio.open(filename) as src:
+        return getattr(src, attr_name)
diff --git a/src/opera_utils/_utils.py b/src/opera_utils/_utils.py
new file mode 100644
index 0000000..015e462
--- /dev/null
+++ b/src/opera_utils/_utils.py
@@ -0,0 +1,155 @@
+from __future__ import annotations
+
+import os
+import shutil
+import tempfile
+from contextlib import contextmanager
+from pathlib import Path
+from typing import Generator
+
+import numpy as np
+from numpy.typing import DTypeLike
+
+from ._types import PathOrStr
+
+__all__ = [
+    "format_nc_filename",
+    "scratch_directory",
+]
+
+
+def format_nc_filename(filename: PathOrStr, ds_name: str | None = None) -> str:
+    """Format an HDF5/NetCDF filename with dataset for reading using GDAL.
+
+    If `filename` is already formatted, or if `filename` is not an HDF5/NetCDF
+    file (based on the file extension), it is returned unchanged.
+
+    Parameters
+    ----------
+    filename : str or PathLike
+        Filename to format.
+    ds_name : str, optional
+        Dataset name to use. If not provided for a .h5 or .nc file, an error is raised.
+
+    Returns
+    -------
+    str
+        Formatted filename like
+        NETCDF:"filename.nc":"//ds_name"
+
+    Raises
+    ------
+    ValueError
+        If `ds_name` is not provided for a .h5 or .nc file.
+    """
+    # If we've already formatted the filename, return it
+    if str(filename).startswith("NETCDF:") or str(filename).startswith("HDF5:"):
+        return str(filename)
+
+    if not (os.fspath(filename).endswith(".nc") or os.fspath(filename).endswith(".h5")):
+        return os.fspath(filename)
+
+    # Now we're definitely dealing with an HDF5/NetCDF file
+    if ds_name is None:
+        raise ValueError("Must provide dataset name for HDF5/NetCDF files")
+
+    return f'NETCDF:"{filename}":"//{ds_name.lstrip("/")}"'
+
+
+def _get_path_from_gdal_str(name: PathOrStr) -> Path:
+    s = str(name)
+    if s.upper().startswith("DERIVED_SUBDATASET"):
+        # like DERIVED_SUBDATASET:AMPLITUDE:slc_filepath.tif
+        p = s.split(":")[-1].strip('"').strip("'")
+    elif ":" in s and (s.upper().startswith("NETCDF") or s.upper().startswith("HDF")):
+        # like NETCDF:"slc_filepath.nc":subdataset
+        p = s.split(":")[1].strip('"').strip("'")
+    else:
+        # Whole thing is the path
+        p = str(name)
+    return Path(p)
+
+
+def numpy_to_gdal_type(np_dtype: DTypeLike) -> int:
+    """Convert numpy dtype to gdal type.
+
+    Parameters
+    ----------
+    np_dtype : DTypeLike
+        Numpy dtype to convert.
+
+    Returns
+    -------
+    int
+        GDAL type code corresponding to `np_dtype`.
+
+    Raises
+    ------
+    TypeError
+        If `np_dtype` is not a numpy dtype, or if the provided dtype is not
+        supported by GDAL (for example, `np.dtype('>i4')`)
+    """
+    from osgeo import gdal_array, gdalconst
+
+    np_dtype = np.dtype(np_dtype)
+
+    if np.issubdtype(bool, np_dtype):
+        return gdalconst.GDT_Byte
+    gdal_code = gdal_array.NumericTypeCodeToGDALTypeCode(np_dtype)
+    if gdal_code is None:
+        raise TypeError(f"dtype {np_dtype} not supported by GDAL.")
+    return gdal_code
+
+
+def gdal_to_numpy_type(gdal_type: str | int) -> np.dtype:
+    """Convert gdal type to numpy type."""
+    from osgeo import gdal, gdal_array
+
+    if isinstance(gdal_type, str):
+        gdal_type = gdal.GetDataTypeByName(gdal_type)
+    return np.dtype(gdal_array.GDALTypeCodeToNumericTypeCode(gdal_type))
+
+
+@contextmanager
+def scratch_directory(
+    dir_: PathOrStr | None = None, *, delete: bool = True
+) -> Generator[Path, None, None]:
+    """Context manager that creates a (possibly temporary) file system directory.
+
+    If `dir_` is a path-like object, a directory will be created at the specified
+    file system path if it did not already exist. Otherwise, if `dir_` is None, a
+    temporary directory will instead be created as though by
+    ``tempfile.TemporaryDirectory()``.
+
+    The directory may be automatically removed from the file system upon exiting the
+    context manager.
+
+    Parameters
+    ----------
+    dir_ : PathOrStr or None, optional
+        Scratch directory path. If None, a temporary directory will be created. Defaults
+        to None.
+    delete : bool, optional
+        If True and `dir_` didn't exist, the directory and its contents are
+        recursively removed from the file system upon exiting the context manager.
+        Note that if `dir_` already exists, this option is ignored.
+        Defaults to True.
+
+    Yields
+    ------
+    pathlib.Path
+        Scratch directory path. If `delete` was True, the directory will be removed from
+        the file system upon exiting the context manager scope.
+    """
+    if dir_ is None:
+        scratchdir = Path(tempfile.mkdtemp())
+        dir_already_existed = False
+    else:
+        scratchdir = Path(dir_)
+        dir_already_existed = scratchdir.exists()
+        scratchdir.mkdir(parents=True, exist_ok=True)
+
+    yield scratchdir
+
+    if delete and not dir_already_existed:
+        shutil.rmtree(scratchdir)
diff --git a/src/opera_utils/constants.py b/src/opera_utils/constants.py
index 468baf7..8ebfc15 100644
--- a/src/opera_utils/constants.py
+++ b/src/opera_utils/constants.py
@@ -22,3 +22,18 @@
     r"[tT](?P<track>\d{3})[-_](?P<burst_id>\d{6})[-_](?P<subswath>iw[1-3])",
     re.IGNORECASE,
 )
+
+DEFAULT_TIFF_OPTIONS = (
+    "COMPRESS=DEFLATE",
+    "ZLEVEL=4",
+    "TILED=YES",
+    "BLOCKXSIZE=128",
+    "BLOCKYSIZE=128",
+)
+EXTRA_COMPRESSED_TIFF_OPTIONS = (
+    *DEFAULT_TIFF_OPTIONS,
+    # Note: we're dropping mantissa bits before we do not
+    # need prevision for LOS rasters (or incidence)
+    "DISCARD_LSB=6",
+    "PREDICTOR=2",
+)
diff --git a/src/opera_utils/geometry.py b/src/opera_utils/geometry.py
new file mode 100644
index 0000000..a56c248
--- /dev/null
+++ b/src/opera_utils/geometry.py
@@ -0,0 +1,166 @@
+from __future__ import annotations
+
+import logging
+import netrc
+from enum import Enum
+from functools import cache
+from pathlib import Path
+from typing import Literal, Mapping, Sequence
+
+import asf_search as asf
+
+from opera_utils import get_burst_ids_for_frame, stitching
+from opera_utils._types import PathOrStr
+from opera_utils._utils import format_nc_filename, scratch_directory
+
+from .constants import EXTRA_COMPRESSED_TIFF_OPTIONS
+
+logger = logging.getLogger(__name__)
+
+
+class Layer(Enum):
+    """Names of available datasets in CSLC static layers HDF5 files."""
+
+    LOS_EAST = "los_east"
+    LOS_NORTH = "los_north"
+    LAYOVER_SHADOW_MASK = "layover_shadow_mask"
+    LOCAL_INCIDENCE_ANGLE = "local_incidence_angle"
+
+
+# Layover shadow mask. 0=no layover, no shadow; 1=shadow; 2=layover; 3=shadow and layover.
+DEFAULT_LAYERS = list(Layer)[:3]  # Skip the local incidence, much less compressible
+DEFAULT_STRIDES = {"x": 6, "y": 3}
+LAYER_TO_NODATA = {
+    Layer.LOS_EAST: 0,
+    Layer.LOS_NORTH: 0,
+    Layer.LOCAL_INCIDENCE_ANGLE: 0,
+    # layover_shadow_mask is Int8 with 127 meaning nodata
+    Layer.LAYOVER_SHADOW_MASK: 127,
+}
+
+
+def create_geometry_files(
+    *,
+    frame_id: int | None = None,
+    burst_ids: Sequence[str] | None = None,
+    output_dir: PathOrStr = Path("."),
+    download_dir: PathOrStr | None = None,
+    save_hdf5_files: bool = False,
+    layers: Sequence[Layer | str] = DEFAULT_LAYERS,
+    strides: Mapping[str, int] = DEFAULT_STRIDES,
+    max_download_jobs: int = 3,
+) -> list[Path]:
+    """Create merged geometry files for a frame of list of burst IDs.
+
+    Parameters
+    ----------
+    frame_id : int | None, optional
+        DISP frame ID to create, by default None
+    burst_ids : Sequence[str] | None, optional
+        Alternative to `frame_id`, manually specify CSLC burst IDs.
+    output_dir : PathOrStr, optional
+        Directory to store output geotiffs, by default Path(".")
+    download_dir : PathOrStr | None, optional
+        Directory to save files, by default None
+    save_hdf5_files : bool, optional
+        _description_, by default False
+    layers : Sequence[Layer  |  str], optional
+        _description_, by default DEFAULT_LAYERS
+    strides : Mapping[str, int], optional
+        _description_, by default DEFAULT_STRIDES
+    max_download_jobs : int, optional
+        _description_, by default 3
+
+    Returns
+    -------
+    list[Path]
+        _description_
+
+    Raises
+    ------
+    ValueError
+        _description_
+    """
+    if frame_id is not None:
+        burst_ids = get_burst_ids_for_frame(frame_id=frame_id)
+
+    if not burst_ids:
+        raise ValueError("Must provide frame_id or burst_ids")
+
+    output_path = Path(output_dir)
+    output_path.mkdir(exist_ok=True, parents=True)
+    if download_dir is None:
+        download_dir = output_path / "hdf5"
+    output_files: list[Path] = []
+
+    with scratch_directory(download_dir, delete=not save_hdf5_files) as sd:
+        local_hdf5_files = _download_static_layers(
+            burst_ids=burst_ids, output_dir=sd, max_jobs=max_download_jobs
+        )
+        for layer in layers:
+            layer_enum = Layer(layer)
+            name = layer_enum.value
+            gdal_strings = [
+                format_nc_filename(f, ds_name=f"data/{name}") for f in local_hdf5_files
+            ]
+            nodata = LAYER_TO_NODATA[Layer(layer)]
+            cur_outfile = output_path / f"{name}.tif"
+            output_files.append(cur_outfile)
+            logger.info(f"Merging images for {name}")
+            stitching.merge_images(
+                file_list=gdal_strings,
+                outfile=cur_outfile,
+                strides=strides,
+                driver="GTIff",
+                options=EXTRA_COMPRESSED_TIFF_OPTIONS,
+                resample_alg="nearest",
+                in_nodata=nodata,
+                out_nodata=nodata,
+            )
+
+    return output_files
+
+
+@cache
+def _search(burst_ids: Sequence[str]) -> asf.ASFSearchResults:
+    return asf.search(operaBurstID=list(burst_ids), processingLevel="CSLC-STATIC")
+
+
+def _get_urls(
+    results: asf.ASFSearchResults,
+    type_: Literal["https", "s3"] = "https",
+) -> list[str]:
+    if type_ == "https":
+        return [r.properties["url"] for r in results]
+    elif type_ == "s3":
+        # TODO: go through .umm, find s3 url
+        raise NotImplementedError()
+    else:
+        raise ValueError(f"type_ must be 'https' or 's3'. Got {type_}")
+    # r.umm
+    # 'RelatedUrls': [...
+    #     {'URL': 's3://asf-cumulus-prod-opera-products/OPERA_L2_CSLC
+    #    'Type': 'GET DATA VIA DIRECT ACCESS',
+    #    'Description': 'This link provides direct download access vi
+    #    'Format': 'HDF5'},
+
+
+def _download_static_layers(
+    burst_ids: Sequence[str], output_dir: Path, max_jobs: int = 3
+) -> list[Path]:
+    # Make a tuple so it can be hashed
+    results = _search(burst_ids=tuple(burst_ids))
+    session = _get_auth_session()
+    urls = _get_urls(results)
+    asf.download_urls(urls=urls, path=output_dir, session=session, processes=max_jobs)
+    return [output_dir / r.properties["fileName"] for r in results]
+
+
+def _get_auth_session() -> asf.ASFSession:
+    host = "urs.earthdata.nasa.gov"
+
+    auth = netrc.netrc().authenticators(host)
+    if auth is None:
+        raise ValueError(f"No .netrc entry foudn for {host}")
+    username, _, password = auth
+    return asf.ASFSession().auth_with_creds(username, password)
diff --git a/src/opera_utils/stitching.py b/src/opera_utils/stitching.py
new file mode 100644
index 0000000..00e9b15
--- /dev/null
+++ b/src/opera_utils/stitching.py
@@ -0,0 +1,533 @@
+"""stitching.py: utilities for merging geographic rasters."""
+from __future__ import annotations
+
+import logging
+import math
+import subprocess
+import tempfile
+from os import fspath
+from pathlib import Path
+from typing import Iterable, Mapping, Optional, Sequence
+
+import numpy as np
+from numpy.typing import DTypeLike
+from osgeo import gdal, osr
+from pyproj import Transformer
+
+from opera_utils import _io
+from opera_utils._types import Bbox, PathOrStr
+from opera_utils._utils import _get_path_from_gdal_str, numpy_to_gdal_type
+
+from .constants import DEFAULT_TIFF_OPTIONS
+
+logger = logging.getLogger(__name__)
+
+
+def merge_images(
+    file_list: Sequence[PathOrStr],
+    outfile: PathOrStr,
+    target_aligned_pixels: bool = True,
+    out_bounds: Optional[Bbox] = None,
+    out_bounds_epsg: Optional[int] = None,
+    strides: Mapping[str, int] = {"x": 1, "y": 1},
+    driver: str = "GTiff",
+    out_nodata: Optional[float] = 0,
+    out_dtype: Optional[DTypeLike] = None,
+    in_nodata: Optional[float] = None,
+    resample_alg: str = "nearest",
+    overwrite=False,
+    options: Optional[Sequence[str]] = DEFAULT_TIFF_OPTIONS,
+    create_only: bool = False,
+) -> None:
+    """Combine multiple SLC images on the same date into one image.
+
+    Parameters
+    ----------
+    file_list : list[PathOrStr]
+        list of raster filenames
+    outfile : PathOrStr
+        Path to output file
+    target_aligned_pixels: bool
+        If True, adjust output image bounds so that pixel coordinates
+        are integer multiples of pixel size, matching the ``-tap``
+        options of GDAL utilities.
+        Default is True.
+    out_bounds: Optional[tuple[float]]
+        if provided, forces the output image bounds to
+            (left, bottom, right, top).
+        Otherwise, computes from the outside of all input images.
+    out_bounds_epsg: Optional[int]
+        EPSG code for the `out_bounds`.
+        If not provided, assumed to match the projections of `file_list`.
+    strides : dict[str, int]
+        subsample factor: {"x": x strides, "y": y strides}
+    driver : str
+        GDAL driver to use for output file. Default is GTiff.
+    out_nodata : Optional[float | str]
+        Nodata value to use for output file. Default is 0.
+    out_dtype : Optional[DTypeLike]
+        Output data type. Default is None, which will use the data type
+        of the first image in the list.
+    in_nodata : Optional[float | str]
+        Override the files' `nodata` and use `in_nodata` during merging.
+    resample_alg : str, default="nearest"
+        Method for gdal to use for reprojection.
+        Default is nearest-neighbor.
+    overwrite : bool
+        Overwrite existing files. Default is False.
+    options : Optional[Sequence[str]]
+        Driver-specific creation options passed to GDAL. Default is ["SUFFIX=ADD"]
+    create_only : bool
+        If True, creates an empty output file, does not write data. Default is False.
+    """
+    if Path(outfile).exists():
+        if not overwrite:
+            logger.info(f"{outfile} already exists, skipping")
+            return
+        else:
+            logger.info(f"Overwrite=True: removing {outfile}")
+            Path(outfile).unlink()
+
+    if len(file_list) == 1:
+        logger.info("Only one image, no stitching needed")
+        logger.info(f"Copying {file_list[0]} to {outfile} and zeroing nodata values.")
+        _copy_set_nodata(
+            file_list[0],
+            outfile=outfile,
+            driver=driver,
+            creation_options=options,
+            out_nodata=out_nodata or 0,
+        )
+        return
+
+    # Make sure all the files are in the same projection.
+    projection = _get_mode_projection(file_list)
+    # If not, warp them to the most common projection using VRT files in a tempdir
+    temp_dir = tempfile.TemporaryDirectory()
+
+    if strides is not None and strides["x"] > 1 and strides["y"] > 1:
+        file_list = get_downsampled_vrts(
+            file_list,
+            strides=strides,
+            dirname=Path(temp_dir.name),
+        )
+
+    warped_file_list = warp_to_projection(
+        file_list,
+        # temp_dir,
+        dirname=Path(temp_dir.name),
+        projection=projection,
+        resample_alg=resample_alg,
+    )
+    # Compute output array shape. We guarantee it will cover the output
+    # bounds completely
+    bounds, combined_nodata = get_combined_bounds_nodata(  # type: ignore
+        *warped_file_list,
+        target_aligned_pixels=target_aligned_pixels,
+        out_bounds=out_bounds,
+        out_bounds_epsg=out_bounds_epsg,
+        strides=strides,
+    )
+    (xmin, ymin, xmax, ymax) = bounds
+
+    # Write out the files for gdal_merge using the --optfile flag
+    optfile = Path(temp_dir.name) / "file_list.txt"
+    optfile.write_text("\n".join(map(str, warped_file_list)))
+    args = [
+        "gdal_merge.py",
+        "-o",
+        outfile,
+        "--optfile",
+        optfile,
+        "-of",
+        driver,
+        "-ul_lr",
+        xmin,
+        ymax,
+        xmax,
+        ymin,
+    ]
+    if out_nodata is not None:
+        args.extend(["-a_nodata", str(out_nodata)])
+    if in_nodata is not None or combined_nodata is not None:
+        ndv = str(in_nodata) if in_nodata is not None else str(combined_nodata)
+        args.extend(["-n", ndv])  # type: ignore
+    if out_dtype is not None:
+        out_gdal_dtype = gdal.GetDataTypeName(numpy_to_gdal_type(out_dtype))
+        args.extend(["-ot", out_gdal_dtype])
+    if target_aligned_pixels:
+        args.append("-tap")
+    if create_only:
+        args.append("-create")
+    if options is not None:
+        for option in options:
+            args.extend(["-co", option])
+
+    arg_list = [str(a) for a in args]
+    logger.info(f"Running {' '.join(arg_list)}")
+    subprocess.check_call(arg_list)
+
+    temp_dir.cleanup()
+
+
+def get_downsampled_vrts(
+    filenames: Sequence[PathOrStr],
+    strides: Mapping[str, int],
+    dirname: PathOrStr,
+) -> list[Path]:
+    """Create downsampled VRTs from a list of files.
+
+    Does not reproject, only uses `gdal_translate`.
+
+    Parameters
+    ----------
+    filenames : Sequence[PathOrStr]
+        list of filenames to warp.
+    strides : dict[str, int]
+        subsample factor: {"x": x strides, "y": y strides}
+    dirname : PathOrStr
+        The directory to write the warped files to.
+
+    Returns
+    -------
+    list[PathOrStr]
+        The warped filenames.
+    """
+    if not filenames:
+        return []
+    warped_files = []
+    res = _get_resolution(filenames)
+    for idx, fn in enumerate(filenames):
+        fn = Path(fn)
+        warped_fn = Path(dirname) / _get_temp_filename(fn, idx, "_downsampled")
+        logger.debug(f"Downsampling {fn} by {strides}")
+        warped_files.append(warped_fn)
+        gdal.Translate(
+            fspath(warped_fn),
+            fspath(fn),
+            format="VRT",  # Just creates a file that will warp on the fly
+            resampleAlg="nearest",  # nearest neighbor for resampling
+            xRes=res[0] * strides["x"],
+            yRes=res[1] * strides["y"],
+        )
+
+    return warped_files
+
+
+def _get_temp_filename(fn: Path, idx: int, extra: str = ""):
+    base = _get_path_from_gdal_str(fn).stem
+    return f"{base}_{idx}{extra}.vrt"
+
+
+def warp_to_projection(
+    filenames: Sequence[PathOrStr],
+    dirname: PathOrStr,
+    projection: str,
+    res: Optional[tuple[float, float]] = None,
+    resample_alg: str = "lanczos",
+) -> list[Path]:
+    """Warp a list of files to `projection`.
+
+    If the input file's projection matches `projection`, the same file is returned.
+    Otherwise, a new file is created in `dirname` with the same name as the input file,
+    but with '_warped' appended.
+
+    Parameters
+    ----------
+    filenames : Sequence[PathOrStr]
+        list of filenames to warp.
+    dirname : PathOrStr
+        The directory to write the warped files to.
+    projection : str
+        The desired projection, as a WKT string or 'EPSG:XXXX' string.
+    res : tuple[float, float]
+        The desired [x, y] resolution.
+    resample_alg : str, default="lanczos"
+        Method for gdal to use for reprojection.
+        Default is lanczos (sinc-kernel)
+
+    Returns
+    -------
+    list[PathOrStr]
+        The warped filenames.
+    """
+    if projection is None:
+        projection = _get_mode_projection(filenames)
+    if res is None:
+        res = _get_resolution(filenames)
+
+    warped_files = []
+    for idx, fn in enumerate(filenames):
+        fn = Path(fn)
+        ds = gdal.Open(fspath(fn))
+        proj_in = ds.GetProjection()
+        if proj_in == projection:
+            warped_files.append(fn)
+            continue
+        warped_fn = Path(dirname) / _get_temp_filename(fn, idx, "_warped")
+        warped_fn = Path(dirname) / f"{fn.stem}_{idx}_warped.vrt"
+        from_srs_name = ds.GetSpatialRef().GetName()
+        to_srs_name = osr.SpatialReference(projection).GetName()
+        logger.info(
+            f"Reprojecting {fn} from {from_srs_name} to match mode projection"
+            f" {to_srs_name}"
+        )
+        warped_files.append(warped_fn)
+        gdal.Warp(
+            fspath(warped_fn),
+            fspath(fn),
+            format="VRT",  # Just creates a file that will warp on the fly
+            dstSRS=projection,
+            resampleAlg=resample_alg,
+            targetAlignedPixels=True,  # align in multiples of dx, dy
+            xRes=res[0],
+            yRes=res[1],
+        )
+
+    return warped_files
+
+
+def _get_mode_projection(filenames: Iterable[PathOrStr]) -> str:
+    """Get the most common projection in the list."""
+    projs = [gdal.Open(fspath(fn)).GetProjection() for fn in filenames]
+    return max(set(projs), key=projs.count)
+
+
+def _get_resolution(filenames: Iterable[PathOrStr]) -> tuple[float, float]:
+    """Get the most common resolution in the list."""
+    gts = [gdal.Open(fspath(fn)).GetGeoTransform() for fn in filenames]
+    res = [(dx, dy) for (_, dx, _, _, _, dy) in gts]
+    if len(set(res)) > 1:
+        raise ValueError(f"The input files have different resolutions: {res}. ")
+    return res[0]
+
+
+def get_combined_bounds_nodata(
+    *filenames: PathOrStr,
+    target_aligned_pixels: bool = False,
+    out_bounds: Optional[Bbox] = None,
+    out_bounds_epsg: Optional[int] = None,
+    strides: Mapping[str, int] = {"x": 1, "y": 1},
+) -> tuple[Bbox, str | float | None]:
+    """Get the bounds and nodata of the combined image.
+
+    Parameters
+    ----------
+    filenames : list[PathOrStr]
+        list of filenames to combine
+    target_aligned_pixels : bool
+        if True, adjust output image bounds so that pixel coordinates
+        are integer multiples of pixel size, matching the `-tap` GDAL option.
+    out_bounds: Optional[Bbox]
+        if provided, forces the output image bounds to
+            (left, bottom, right, top).
+        Otherwise, computes from the outside of all input images.
+    out_bounds_epsg: Optional[int]
+        The EPSG of `out_bounds`. If not provided, assumed to be the same
+        as the EPSG of all `*filenames`.
+    strides : dict[str, int]
+        subsample factor: {"x": x strides, "y": y strides}
+
+    Returns
+    -------
+    bounds : Bbox
+        (min_x, min_y, max_x, max_y)
+    nodata : float
+        Nodata value of the input files
+
+    Raises
+    ------
+    ValueError:
+        If the inputs files have different resolutions/projections/nodata values
+    """
+    # scan input files
+    xs = []
+    ys = []
+    resolutions = set()
+    projs = set()
+    nodatas = set()
+
+    # Check all files match in resolution/projection
+    for fn in filenames:
+        ds = gdal.Open(fspath(fn))
+        left, bottom, right, top = _io.get_raster_bounds(fn)
+        gt = ds.GetGeoTransform()
+        dx, dy = gt[1], gt[5]
+
+        resolutions.add((abs(dx), abs(dy)))  # dy is negative for north-up
+        projs.add(ds.GetProjection())
+
+        xs.extend([left, right])
+        ys.extend([bottom, top])
+
+        nd = _io.get_raster_nodata(fn)
+        # Need to stringify 'nan', or it is repeatedly added
+        nodatas.add(str(nd) if (nd is not None and np.isnan(nd)) else nd)
+
+    if len(resolutions) > 1:
+        raise ValueError(f"The input files have different resolutions: {resolutions}. ")
+    if len(projs) > 1:
+        raise ValueError(f"The input files have different projections: {projs}. ")
+    if len(nodatas) > 1:
+        raise ValueError(f"The input files have different nodata values: {nodatas}. ")
+    res = (abs(dx) * strides["x"], abs(dy) * strides["y"])
+
+    if out_bounds is not None:
+        if out_bounds_epsg is not None:
+            dst_epsg = _io.get_raster_crs(filenames[0]).to_epsg()
+            bounds = _reproject_bounds(out_bounds, out_bounds_epsg, dst_epsg)
+        else:
+            bounds = out_bounds  # type: ignore
+    else:
+        bounds = min(xs), min(ys), max(xs), max(ys)
+
+    if target_aligned_pixels:
+        bounds = _align_bounds(bounds, res)
+
+    return bounds, list(nodatas)[0]
+
+
+def _align_bounds(bounds: Iterable[float], res: tuple[float, float]):
+    """Align boundary with an integer multiple of the resolution."""
+    left, bottom, right, top = bounds
+    left = math.floor(left / res[0]) * res[0]
+    right = math.ceil(right / res[0]) * res[0]
+    bottom = math.floor(bottom / res[1]) * res[1]
+    top = math.ceil(top / res[1]) * res[1]
+    return (left, bottom, right, top)
+
+
+def _reproject_bounds(bounds: Bbox, src_epsg: int, dst_epsg: int) -> Bbox:
+    t = Transformer.from_crs(src_epsg, dst_epsg, always_xy=True)
+    left, bottom, right, top = bounds
+    bbox: Bbox = (*t.transform(left, bottom), *t.transform(right, top))  # type: ignore
+    return bbox
+
+
+def get_transformed_bounds(filename: PathOrStr, epsg_code: Optional[int] = None):
+    """Get the bounds of a raster, possibly in a different CRS.
+
+    Parameters
+    ----------
+    filename : str
+        Path to the raster file.
+    epsg_code : Optional[int]
+        EPSG code of the CRS to transform to.
+        If not provided, or the raster is already in the desired CRS,
+        the bounds will not be transformed.
+
+    Returns
+    -------
+    tuple
+        The bounds of the raster as (left, bottom, right, top)
+    """
+    bounds = _io.get_raster_bounds(filename)
+    if epsg_code is None:
+        return bounds
+    from_epsg = _io.get_raster_crs(filename=filename).to_epsg()
+    assert from_epsg is not None
+    if from_epsg == epsg_code:
+        return bounds
+
+    return _reproject_bounds(bounds, from_epsg, epsg_code)
+
+
+def _copy_set_nodata(
+    infile: PathOrStr,
+    outfile: Optional[PathOrStr] = None,
+    ext: Optional[str] = None,
+    in_band: int = 1,
+    out_nodata: float = 0,
+    driver="GTiff",
+    creation_options=DEFAULT_TIFF_OPTIONS,
+):
+    """Make a copy of infile and replace NaNs/input nodata with `out_nodata`."""
+    in_p = Path(infile)
+    if outfile is None:
+        if ext is None:
+            ext = in_p.suffix
+        out_dir = in_p.parent
+        outfile = out_dir / (in_p.stem + "_tmp" + ext)
+
+    ds_in = gdal.Open(fspath(infile))
+    drv = gdal.GetDriverByName(driver)
+    ds_out = drv.CreateCopy(fspath(outfile), ds_in, options=creation_options)
+
+    bnd = ds_in.GetRasterBand(in_band)
+    nodata = bnd.GetNoDataValue()
+    arr = bnd.ReadAsArray()
+    # also make sure to replace NaNs, even if nodata is not set
+    mask = np.logical_or(np.isnan(arr), arr == nodata)
+    arr[mask] = out_nodata
+
+    bnd1 = ds_out.GetRasterBand(1)
+    bnd1.WriteArray(arr)
+    bnd1.SetNoDataValue(out_nodata)
+    ds_out = bnd1 = None
+
+    return outfile
+
+
+def warp_to_match(
+    input_file: PathOrStr,
+    match_file: PathOrStr,
+    output_file: Optional[PathOrStr] = None,
+    resample_alg: str = "near",
+    output_format: Optional[str] = None,
+) -> Path:
+    """Reproject `input_file` to align with the `match_file`.
+
+    Uses the bounds, resolution, and CRS of `match_file`.
+
+    Parameters
+    ----------
+    input_file: PathOrStr
+        Path to the image to be reprojected.
+    match_file: PathOrStr
+        Path to the input image to serve as a reference for the reprojected image.
+        Uses the bounds, resolution, and CRS of this image.
+    output_file: PathOrStr
+        Path to the output, reprojected image.
+        If None, creates an in-memory warped VRT using the `/vsimem/` protocol.
+    resample_alg: str, optional, default = "near"
+        Resampling algorithm to be used during reprojection.
+        See https://gdal.org/programs/gdalwarp.html#cmdoption-gdalwarp-r for choices.
+    output_format: str, optional, default = None
+        Output format to be used for the output image.
+        If None, gdal will try to infer the format from the output file extension, or
+        (if the extension of `output_file` matches `input_file`) use the input driver.
+
+    Returns
+    -------
+    Path
+        Path to the output image.
+        Same as `output_file` if provided, otherwise a path to the in-memory VRT.
+    """
+    bounds = _io.get_raster_bounds(match_file)
+    crs_wkt = _io.get_raster_crs(match_file).to_wkt()
+    gt = _io.get_raster_gt(match_file)
+    resolution = (gt[1], gt[5])
+
+    if output_file is None:
+        output_file = f"/vsimem/warped_{Path(input_file).stem}.vrt"
+        logger.debug(f"Creating in-memory warped VRT: {output_file}")
+
+    if output_format is None and Path(input_file).suffix == Path(output_file).suffix:
+        output_format = _io.get_raster_driver(input_file)
+
+    options = gdal.WarpOptions(
+        dstSRS=crs_wkt,
+        format=output_format,
+        xRes=resolution[0],
+        yRes=resolution[1],
+        outputBounds=bounds,
+        outputBoundsSRS=crs_wkt,
+        resampleAlg=resample_alg,
+    )
+    gdal.Warp(
+        fspath(output_file),
+        fspath(input_file),
+        options=options,
+    )
+
+    return Path(output_file)
diff --git a/tests/conftest.py b/tests/conftest.py
index d28896b..e69de29 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1,5 +0,0 @@
-import pytest
-
-pytestmark = pytest.mark.filterwarnings(
-    "ignore::UserWarning:h5py is running against HDF5.*"
-)
diff --git a/tests/test_io.py b/tests/test_io.py
new file mode 100644
index 0000000..9994aa1
--- /dev/null
+++ b/tests/test_io.py
@@ -0,0 +1,91 @@
+import numpy as np
+import pytest
+from pyproj import CRS
+
+try:
+    import rasterio as rio
+    import rasterio.transform
+
+    RASTERIO_MISSING = False
+except ImportError:
+    RASTERIO_MISSING = True
+
+
+if RASTERIO_MISSING:
+    pytest.skip(reason="Rasterio not installed")
+
+from opera_utils import _io
+
+
+@pytest.fixture(scope="module")
+def bounds():
+    return (-104, 30, -103, 33)
+
+
+@pytest.fixture(scope="module")
+def crs():
+    return CRS.from_epsg(32611)
+
+
+@pytest.fixture(scope="module")
+def shape():
+    return 30, 20
+
+
+@pytest.fixture(scope="module")
+def transform(bounds, shape):
+    height, width = shape
+    return rasterio.transform.from_bounds(*bounds, width=width, height=height)
+
+
+@pytest.fixture(scope="module")
+def raster_file(tmp_path_factory, crs, transform, shape):
+    height, width = shape
+    img = np.random.rand(height, width).astype("float32")
+
+    filename = tmp_path_factory.mktemp("data") / "raster.tif"
+    with rio.open(
+        filename,
+        "w",
+        driver="GTiff",
+        width=width,
+        height=height,
+        dtype="float32",
+        count=1,
+        crs=crs,
+        transform=transform,
+        nodata=np.nan,
+    ) as src:
+        src.write(img, 1)
+
+    return filename
+
+
+def test_get_raster_bounds(raster_file, bounds):
+    assert _io.get_raster_bounds(raster_file) == bounds
+
+
+def test_get_raster_crs(raster_file, crs):
+    assert _io.get_raster_crs(raster_file) == crs
+
+
+def test_get_raster_nodata(raster_file):
+    assert np.isnan(_io.get_raster_nodata(raster_file))
+
+
+def test_get_raster_transform(raster_file, transform):
+    assert _io.get_raster_transform(raster_file) == transform
+
+
+def test_get_raster_get(raster_file, transform):
+    assert _io.get_raster_gt(raster_file) == transform.to_gdal()
+
+
+def test_get_raster_dtype(
+    raster_file,
+):
+    assert _io.get_raster_dtype(raster_file) == np.float32
+
+
+def test_get_raster_driver(raster_file):
+    assert _io.get_raster_driver(raster_file) == "GTiff"
diff --git a/tests/test_utils.py b/tests/test_utils.py
new file mode 100644
index 0000000..8cf3ee3
--- /dev/null
+++ b/tests/test_utils.py
@@ -0,0 +1,58 @@
+from pathlib import Path
+
+import pytest
+
+from opera_utils._utils import format_nc_filename, scratch_directory
+
+
+def test_scratch_directory(tmp_path):
+    test_dir = tmp_path / "test_dir"
+    with scratch_directory(test_dir) as d:
+        (d / "test1.txt").write_text("asf")
+    assert not Path(test_dir).exists()
+
+    with scratch_directory(test_dir, delete=False) as d:
+        (d / "test1.txt").write_text("asdf")
+    assert Path(test_dir).exists()
+    assert Path(test_dir / "test1.txt").read_text() == "asdf"
+
+
+def test_scratch_directory_already_exists(tmp_path):
+    test_dir = tmp_path / "test_dir"
+    test_dir.mkdir()
+    assert Path(test_dir).exists()
+
+    with scratch_directory(test_dir, delete=True) as d:
+        (d / "test1.txt").write_text("asdf")
+    assert Path(test_dir).exists()
+
+    with scratch_directory(test_dir, delete=False) as d:
+        (d / "test1.txt").write_text("asdf")
+    assert Path(test_dir).exists()
+
+
+def test_format_nc_filename():
+    expected = 'NETCDF:"/usr/19990101/20200303_20210101.nc":"//variable"'
+    assert (
+        format_nc_filename("/usr/19990101/20200303_20210101.nc", "variable") == expected
+    )
+
+    # check on Path
+    assert (
+        format_nc_filename(Path("/usr/19990101/20200303_20210101.nc"), "variable")
+        == expected
+    )
+
+    # check non-netcdf file
+    assert (
+        format_nc_filename("/usr/19990101/20200303_20210101.tif")
+        == "/usr/19990101/20200303_20210101.tif"
+    )
+    assert (
+        format_nc_filename("/usr/19990101/20200303_20210101.int", "ignored")
+        == "/usr/19990101/20200303_20210101.int"
+    )
+
+    with pytest.raises(ValueError):
+        # Missing the subdataset name
+        format_nc_filename("/usr/19990101/20200303_20210101.nc")