diff --git a/.github/workflows/pip-checks.yml b/.github/workflows/pip-checks.yml index 57b778cb..7651a0de 100644 --- a/.github/workflows/pip-checks.yml +++ b/.github/workflows/pip-checks.yml @@ -30,7 +30,6 @@ jobs: - name: Initiate empty environment uses: conda-incubator/setup-miniconda@v3 with: - miniforge-variant: Mambaforge miniforge-version: latest auto-update-conda: true use-mamba: true diff --git a/.github/workflows/python-tests.yml b/.github/workflows/python-tests.yml index b6b289f2..6a7db86c 100644 --- a/.github/workflows/python-tests.yml +++ b/.github/workflows/python-tests.yml @@ -30,7 +30,6 @@ jobs: - name: Initiate empty environment uses: conda-incubator/setup-miniconda@v3 with: - miniforge-variant: Mambaforge miniforge-version: latest auto-update-conda: true use-mamba: true diff --git a/dev-environment.yml b/dev-environment.yml index baf71c9c..6c9122b3 100644 --- a/dev-environment.yml +++ b/dev-environment.yml @@ -32,6 +32,7 @@ dependencies: - pylint - netcdf4 # To write synthetic data with chunksizes - dask-memusage + - pre-commit # Doc dependencies - sphinx diff --git a/doc/source/about_geoutils.md b/doc/source/about_geoutils.md index 3905c37c..f3a794d9 100644 --- a/doc/source/about_geoutils.md +++ b/doc/source/about_geoutils.md @@ -90,7 +90,7 @@ header-rows: 1 * - ```{eval-rst} .. literalinclude:: code/about_geoutils_sidebyside_raster_geoutils.py :language: python - :lines: 14-29 + :lines: 15-30 ``` - ```{eval-rst} diff --git a/doc/source/api.md b/doc/source/api.md index d1a0d999..04efff11 100644 --- a/doc/source/api.md +++ b/doc/source/api.md @@ -37,7 +37,7 @@ documentation. (api-raster-attrs)= -### Unique attributes +### Main attributes ```{eval-rst} .. autosummary:: @@ -60,16 +60,25 @@ documentation. Raster.height Raster.width Raster.count - Raster.count_on_disk Raster.bands - Raster.bands_on_disk Raster.res Raster.bounds Raster.dtype +``` + +### Other attributes + +```{eval-rst} +.. autosummary:: + :toctree: gen_modules/ + + Raster.count_on_disk + Raster.bands_on_disk Raster.is_loaded Raster.is_modified Raster.name Raster.driver + Raster.tags ``` (api-geo-handle)= @@ -203,36 +212,6 @@ And reverse operations. Raster.__array_function__ ``` -## SatelliteImage - -```{eval-rst} -.. minigallery:: geoutils.SatelliteImage - :add-heading: -``` - -### Opening a file - -```{eval-rst} -.. autosummary:: - :toctree: gen_modules/ - - SatelliteImage -``` - -### Satellite image metadata - -```{eval-rst} -.. autosummary:: - :toctree: gen_modules/ - - SatelliteImage.datetime - SatelliteImage.tile_name - SatelliteImage.satellite - SatelliteImage.sensor - SatelliteImage.product - SatelliteImage.version -``` - ## Mask ```{eval-rst} @@ -289,7 +268,7 @@ And reverse operations. Vector.info ``` -### Unique attributes +### Main attributes ```{eval-rst} .. autosummary:: diff --git a/doc/source/code/about_geoutils_sidebyside_raster_geoutils.py b/doc/source/code/about_geoutils_sidebyside_raster_geoutils.py index b0982f8d..21cd70cc 100644 --- a/doc/source/code/about_geoutils_sidebyside_raster_geoutils.py +++ b/doc/source/code/about_geoutils_sidebyside_raster_geoutils.py @@ -9,6 +9,7 @@ import warnings warnings.filterwarnings("ignore", category=UserWarning, message="For reprojection, nodata must be set.*") +warnings.filterwarnings("ignore", category=UserWarning, message="One raster has a pixel interpretation*") #### import geoutils as gu diff --git a/doc/source/conf.py b/doc/source/conf.py index c18f6b0b..2ae1752d 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -1,4 +1,5 @@ # +# # Configuration file for the Sphinx documentation builder. # import glob @@ -124,7 +125,6 @@ inheritance_alias = { "geoutils.raster.raster.Raster": "geoutils.Raster", "geoutils.raster.raster.Mask": "geoutils.Mask", - "geoutils.raster.satimg.SatelliteImage": "geoutils.SatelliteImage", "geoutils.vector.Vector": "geoutils.Vector", "xdem.dem.DEM": "xdem.DEM", } diff --git a/doc/source/core_index.md b/doc/source/core_index.md index ca4595f0..adc44453 100644 --- a/doc/source/core_index.md +++ b/doc/source/core_index.md @@ -11,5 +11,6 @@ core_match_ref core_py_ops core_array_funcs core_lazy_load +core_parsing core_inheritance ``` diff --git a/doc/source/core_inheritance.md b/doc/source/core_inheritance.md index 8e7f0746..be342713 100644 --- a/doc/source/core_inheritance.md +++ b/doc/source/core_inheritance.md @@ -11,7 +11,7 @@ kernelspec: name: geoutils --- (core-inheritance)= -# Inheritance to geo-images and beyond +# Inheritance to DEMs and beyond Inheritance is practical to naturally pass down parent methods and attributes to child classes. @@ -20,12 +20,13 @@ implemented in GeoUtils. ## Overview of {class}`~geoutils.Raster` inheritance - -Below is a diagram showing current {class}`~geoutils.Raster` inheritance, which extends into other packages such as [xDEM](https://xdem.readthedocs.io/) +Current {class}`~geoutils.Raster` inheritance extends into other packages, such as [xDEM](https://xdem.readthedocs.io/) for analyzing digital elevation models. +Within GeoUtils, inheritance extends only to {class}`~geoutils.Mask` that implements overloaded methods specific to binary raster masks, +as shown in the diagram below. ```{eval-rst} -.. inheritance-diagram:: geoutils.raster.raster geoutils.raster.satimg +.. inheritance-diagram:: geoutils.raster.raster :top-classes: geoutils.raster.raster.Raster ``` @@ -37,42 +38,11 @@ Among others, it also adds a {attr}`~xdem.DEM.vcrs` property to consistently man If you are DEM-enthusiastic, **[check-out our sister package xDEM](https://xdem.readthedocs.io/) for digital elevation models.** ``` -## The internal {class}`~geoutils.SatelliteImage` subclass - -GeoUtils subclasses {class}`Rasters` to {class}`SatelliteImages` for remote sensing users interested in parsing -metadata from space- or airborne imagery. - -Based on the filename, or auxiliary files, the {class}`~geoutils.SatelliteImage` class attempts to automatically parse a -{attr}`~geoutils.SatelliteImage.datetime`, {attr}`~geoutils.SatelliteImage.sensor`, {attr}`~geoutils.SatelliteImage.tile_name`, -and other information. - -```{code-cell} ipython3 -import geoutils as gu - -# Instantiate a geo-image from an ASTER image -filename_geoimg = gu.examples.get_path("exploradores_aster_dem") -geoimg = gu.SatelliteImage(filename_geoimg, silent=False) -``` - -```{code-cell} ipython3 -# Instantiate a geo-image from a Landsat 7 image -filename_geoimg2 = gu.examples.get_path("everest_landsat_b4") -geoimg2 = gu.SatelliteImage(filename_geoimg2, silent=False) -``` - -Along these additional attributes, the {class}`~geoutils.SatelliteImage` possesses the same main attributes as a {class}`~geoutils.Raster`. - -```{code-cell} ipython3 - -# The geo-image main attributes -geoimg -``` - ## And beyond Many types of geospatial data can be viewed as a subclass of {class}`Rasters`, which have more attributes and require their own methods: **spectral images**, **velocity fields**, **phase difference maps**, etc... -If you are interested to build your own subclass of {class}`~geoutils.Raster`, you can take example of the structure of {class}`geoutils.SatelliteImage` and -{class}`xdem.DEM`. Then, just add any of your own attributes and methods, and overload parent methods if necessary! Don't hesitate to reach out on our +If you are interested to build your own subclass of {class}`~geoutils.Raster`, you can take example of the structure of {class}`xdem.DEM`. +Then, just add any of your own attributes and methods, and overload parent methods if necessary! Don't hesitate to reach out on our GitHub if you have a subclassing project. diff --git a/doc/source/core_parsing.md b/doc/source/core_parsing.md new file mode 100644 index 00000000..95913582 --- /dev/null +++ b/doc/source/core_parsing.md @@ -0,0 +1,62 @@ +--- +file_format: mystnb +jupytext: + formats: md:myst + text_representation: + extension: .md + format_name: myst +kernelspec: + display_name: geoutils-env + language: python + name: geoutils +--- +(satimg-parsing)= + +# Sensor metadata parsing + +GeoUtils functionalities for remote sensing users interested in parsing metadata from space- or airborne imagery. + +## Parsing metadata at raster instantiation + +A {class}`~geoutils.Raster` can be instantiated while trying to parse metadata usint the ``parse_sensor_metadata`` argument. + +```{code-cell} ipython3 +import geoutils as gu + +# Parse metadata from an ASTER raster +filename_aster = gu.examples.get_path("exploradores_aster_dem") +rast_aster = gu.Raster(filename_aster, parse_sensor_metadata=True, silent=False) +``` + + +```{code-cell} ipython3 +# Parse metadata from a Landsat 7 raster +filename_landsat = gu.examples.get_path("everest_landsat_b4") +rast_landsat = gu.Raster(filename_landsat, parse_sensor_metadata=True, silent=False) +``` + +The metadata is then stored in the {attr}`~geoutils.Raster.tags` attribute of the raster. + +```{code-cell} ipython3 +rast_aster.tags +``` + +For tiled products such as SRTM, the tile naming is also retrieved, and converted to usable tile sizes and extents based on known metadata. + +## Supported sensors + +Right now are supported: + - Landsat, + - Sentinel-2, + - SPOT, + - ASTER, + - ArcticDEM and REMA, + - ALOS, + - SRTM, + - TanDEM-X, and + - NASADEM. + +```{important} +Sensor metadata parsing is still in development. We hope to add the ability to parse from +auxiliary files in the future (such as [here](https://github.com/jlandmann/glaciersat/blob/master/glaciersat/core/imagery.py)). +``` diff --git a/doc/source/data_object_index.md b/doc/source/data_object_index.md index 958cf079..c5db3332 100644 --- a/doc/source/data_object_index.md +++ b/doc/source/data_object_index.md @@ -10,5 +10,4 @@ raster_class vector_class pointcloud_class mask_class -satimg_class ``` diff --git a/doc/source/feature_overview.md b/doc/source/feature_overview.md index b53c9b68..a24a2b64 100644 --- a/doc/source/feature_overview.md +++ b/doc/source/feature_overview.md @@ -241,10 +241,10 @@ import os os.remove("myaoi.gpkg") ``` -## Parsing metadata with {class}`~geoutils.SatelliteImage` +## Parsing sensor metadata -In our case, `rast` would be better opened using the {class}`~geoutils.Raster` object {class}`~geoutils.SatelliteImage` instead, which tentatively parses -metadata recognized from the filename or auxiliary files. +In our case, `rast` would be better opened using the ``parse_sensor_metadata`` argument of a `{class}`~geoutils.Raster` +, which tentatively parses metadata recognized from the filename or auxiliary files. ```{code-cell} ipython3 # Name of the image we used @@ -254,7 +254,7 @@ print(os.path.basename(filename_rast)) ```{code-cell} ipython3 # Open while parsing metadata -rast = gu.SatelliteImage(filename_rast, silent=False) +rast = gu.Raster(filename_rast, parse_sensor_metadata=True, silent=False) ``` ```{admonition} Wrap-up diff --git a/doc/source/satimg_class.md b/doc/source/satimg_class.md deleted file mode 100644 index 67e7df81..00000000 --- a/doc/source/satimg_class.md +++ /dev/null @@ -1,63 +0,0 @@ ---- -file_format: mystnb -jupytext: - formats: md:myst - text_representation: - extension: .md - format_name: myst -kernelspec: - display_name: geoutils-env - language: python - name: geoutils ---- -(satimg-class)= - -# The geo-image ({class}`~geoutils.SatelliteImage`) - -Below, a summary of the {class}`~geoutils.SatelliteImage` object and its methods. - -## Object definition - -A {class}`~geoutils.SatelliteImage` is a subclass of {class}`~geoutils.Raster` that contains all its main attributes, and retains additional ones: -- a {class}`numpy.datetime64` as {class}`~geoutils.SatelliteImage.datetime`, and -- several {class}`strings` for {class}`~geoutils.SatelliteImage.satellite`, {class}`~geoutils.SatelliteImage.sensor`, {class}`~geoutils.SatelliteImage.version`, {class}`~geoutils.SatelliteImage.tile_name` and {class}`~geoutils.SatelliteImage.product`. - -A {class}`~geoutils.SatelliteImage` also inherits the same derivative attributes as a {class}`~geoutils.Raster`. - -## Metadata parsing - -The {class}`~geoutils.SatelliteImage` attempts to parse metadata from the filename. - -Right now are supported: - - Landsat, - - Sentinel-2, - - SPOT, - - ASTER, - - ArcticDEM and REMA, - - ALOS, - - SRTM, - - TanDEM-X, and - - NASADEM. - -The {class}`~geoutils.SatelliteImage.datetime` is always parsed or deduced. - -For tiled products such as SRTM, the tile naming is also retrieved, which can be converted to geographic extent with {func}`geoutils.raster.satimg.parse_tile_attr_from_name`. - -```{code-cell} ipython3 -import geoutils as gu - -# Instantiate a geo-image from an ASTER image -filename_geoimg = gu.examples.get_path("exploradores_aster_dem") -geoimg = gu.SatelliteImage(filename_geoimg, silent=False) -``` - -```{code-cell} ipython3 -# Instantiate a geo-image from a Landsat 7 image -filename_geoimg2 = gu.examples.get_path("everest_landsat_b4") -geoimg2 = gu.SatelliteImage(filename_geoimg2, silent=False) -``` - -```{important} -The {class}`~geoutils.SatelliteImage` class is still in development, and we hope to further refine it in the future using metadata classes able to parse -auxiliary files metadata (such as [here](https://github.com/jlandmann/glaciersat/blob/master/glaciersat/core/imagery.py)). -``` diff --git a/examples/io/open_save/read_satimg.py b/examples/io/open_save/read_satimg.py index daae9c35..4c721c71 100644 --- a/examples/io/open_save/read_satimg.py +++ b/examples/io/open_save/read_satimg.py @@ -1,8 +1,8 @@ """ -Parsing image metadata -====================== +Parsing sensor metadata +======================= -This example demonstrates the instantiation of an image through :class:`~geoutils.SatelliteImage`. +This example demonstrates the instantiation of a raster while parsing image sensor metadata. """ import geoutils as gu @@ -15,9 +15,9 @@ print(os.path.basename(filename_geoimg)) # %% -# We open it as a geo-image, un-silencing the attribute retrieval to see the parsed data. -img = gu.SatelliteImage(filename_geoimg, silent=False) +# We open it as a raster with the option to parse metadata, un-silencing the attribute retrieval to see it printed. +img = gu.Raster(filename_geoimg, parse_sensor_metadata=True, silent=False) # %% -# We have now retrieved the metadata. For the rest, the :class:`~geoutils.SatelliteImage` is a subclass of :class:`~geoutils.Raster`, and behaves similarly. -img +# We have now retrieved the metadata, stored in the :attr:`geoutils.Raster.tags` attribute. +img.tags diff --git a/geoutils/__init__.py b/geoutils/__init__.py index e93338b6..01bd4c34 100644 --- a/geoutils/__init__.py +++ b/geoutils/__init__.py @@ -2,9 +2,9 @@ GeoUtils is a Python package for the analysis of geospatial data. """ -from geoutils import examples, projtools, raster, vector # noqa +from geoutils import examples, pointcloud, projtools, raster, vector # noqa from geoutils._config import config # noqa -from geoutils.raster import Mask, Raster, SatelliteImage, xr_accessor # noqa +from geoutils.raster import Mask, Raster, xr_accessor # noqa from geoutils.raster.xr_accessor import open_raster # noqa from geoutils.vector import Vector # noqa diff --git a/geoutils/interface/raster_point.py b/geoutils/interface/raster_point.py index 594e81a8..b9127f47 100644 --- a/geoutils/interface/raster_point.py +++ b/geoutils/interface/raster_point.py @@ -12,7 +12,7 @@ import geoutils as gu from geoutils._typing import NDArrayNum -from geoutils.raster.array import _get_mask_from_array +from geoutils.raster.array import get_mask_from_array from geoutils.raster.georeferencing import _default_nodata, _xy2ij from geoutils.raster.sampling import subsample_array @@ -168,11 +168,11 @@ def _raster_to_pointcloud( if skip_nodata: if source_raster.is_loaded: if source_raster.count == 1: - self_mask = _get_mask_from_array( + self_mask = get_mask_from_array( source_raster.data ) # This is to avoid the case where the mask is just "False" else: - self_mask = _get_mask_from_array( + self_mask = get_mask_from_array( source_raster.data[data_band - 1, :, :] ) # This is to avoid the case where the mask is just "False" valid_mask = ~self_mask diff --git a/geoutils/raster/__init__.py b/geoutils/raster/__init__.py index 35f981b7..f1e454cc 100644 --- a/geoutils/raster/__init__.py +++ b/geoutils/raster/__init__.py @@ -4,6 +4,6 @@ from geoutils.raster.geotransformations import * # noqa from geoutils.raster.multiraster import * # noqa from geoutils.raster.sampling import * # noqa -from geoutils.raster.satimg import SatelliteImage # noqa +from geoutils.raster.satimg import * # noqa __all__ = ["RasterType", "Raster"] diff --git a/geoutils/raster/array.py b/geoutils/raster/array.py index ed32b909..e1df06c5 100644 --- a/geoutils/raster/array.py +++ b/geoutils/raster/array.py @@ -10,7 +10,7 @@ from geoutils._typing import MArrayNum, NDArrayBool, NDArrayNum -def _get_mask_from_array(array: NDArrayNum | NDArrayBool | MArrayNum) -> NDArrayBool: +def get_mask_from_array(array: NDArrayNum | NDArrayBool | MArrayNum) -> NDArrayBool: """ Return the mask of invalid values, whether array is a ndarray with NaNs or a np.ma.masked_array. @@ -22,7 +22,7 @@ def _get_mask_from_array(array: NDArrayNum | NDArrayBool | MArrayNum) -> NDArray return mask.squeeze() -def _get_array_and_mask( +def get_array_and_mask( array: NDArrayNum | MArrayNum, check_shape: bool = True, copy: bool = True ) -> tuple[NDArrayNum, NDArrayBool]: """ @@ -59,19 +59,19 @@ def _get_array_and_mask( array_data = np.array(array).squeeze() if copy else np.asarray(array).squeeze() # Get the mask of invalid pixels and set nans if it is occupied. - invalid_mask = _get_mask_from_array(array) + invalid_mask = get_mask_from_array(array) if np.any(invalid_mask): array_data[invalid_mask] = np.nan return array_data, invalid_mask -def _get_valid_extent(array: NDArrayNum | NDArrayBool | MArrayNum) -> tuple[int, ...]: +def get_valid_extent(array: NDArrayNum | NDArrayBool | MArrayNum) -> tuple[int, ...]: """ Return (rowmin, rowmax, colmin, colmax), the first/last row/column of array with valid pixels """ if not array.dtype == "bool": - valid_mask = ~_get_mask_from_array(array) + valid_mask = ~get_mask_from_array(array) else: # Not sure why Mypy is not recognizing that the type of the array can only be bool here valid_mask = array # type: ignore @@ -80,7 +80,7 @@ def _get_valid_extent(array: NDArrayNum | NDArrayBool | MArrayNum) -> tuple[int, return rows_nonzero[0], rows_nonzero[-1], cols_nonzero[0], cols_nonzero[-1] -def _get_xy_rotated(raster: gu.Raster, along_track_angle: float) -> tuple[NDArrayNum, NDArrayNum]: +def get_xy_rotated(raster: gu.Raster, along_track_angle: float) -> tuple[NDArrayNum, NDArrayNum]: """ Rotate x, y axes of image to get along- and cross-track distances. :param raster: Raster to get x,y positions from. diff --git a/geoutils/raster/base.py b/geoutils/raster/base.py index 9e0c97b6..45419efe 100644 --- a/geoutils/raster/base.py +++ b/geoutils/raster/base.py @@ -74,7 +74,7 @@ def __init__(self): self._driver: str | None = None self._name: str | None = None self.filename: str | None = None - self.tags: dict[str, Any] = {} + self._tags: dict[str, Any] = {} self._bands_loaded: int | tuple[int, ...] | None = None self._disk_shape: tuple[int, int, int] | None = None self._disk_bands: tuple[int] | None = None @@ -396,6 +396,28 @@ def area_or_point(self, new_area_or_point: Literal["Area", "Point"] | None) -> N """ self.set_area_or_point(new_area_or_point=new_area_or_point) + @property + def tags(self) -> dict[str, Any]: + """ + Metadata tags of the raster. + + :returns: Dictionary of raster metadata, potentially including sensor information. + """ + if self._is_xr: + return self._obj.attrs + else: + return self._tags + + @tags.setter + def tags(self, new_tags: dict[str, Any] | None) -> None: + """ + Set the metadata tags of the raster. + """ + + if new_tags is None: + new_tags = {} + self._tags = new_tags + @property def is_loaded(self) -> bool: """Whether the raster array is loaded.""" diff --git a/geoutils/raster/multiraster.py b/geoutils/raster/multiraster.py index 73851f9b..e73d58dd 100644 --- a/geoutils/raster/multiraster.py +++ b/geoutils/raster/multiraster.py @@ -12,7 +12,7 @@ import geoutils as gu from geoutils._typing import NDArrayNum -from geoutils.raster.array import _get_array_and_mask +from geoutils.raster.array import get_array_and_mask from geoutils.raster.geotransformations import _resampling_method_from_str from geoutils.raster.raster import RasterType, _default_nodata @@ -194,7 +194,7 @@ def stack_rasters( # Optionally calculate difference if diff: diff_to_ref = (reference_raster.data - reprojected_raster.data).squeeze() - diff_to_ref, _ = _get_array_and_mask(diff_to_ref) + diff_to_ref, _ = get_array_and_mask(diff_to_ref) data.append(diff_to_ref) else: # img_data, _ = get_array_and_mask(reprojected_raster.data.squeeze()) diff --git a/geoutils/raster/raster.py b/geoutils/raster/raster.py index 8f87a07a..38861ea3 100644 --- a/geoutils/raster/raster.py +++ b/geoutils/raster/raster.py @@ -38,6 +38,13 @@ _cast_pixel_interpretation, _default_nodata, ) + +from geoutils.raster.geotransformations import _crop, _reproject, _translate +from geoutils.raster.sampling import subsample_array +from geoutils.raster.satimg import ( + decode_sensor_metadata, + parse_and_convert_metadata_from_filename, +) from geoutils.vector.vector import Vector # If python38 or above, Literal is builtin. Otherwise, use typing_extensions @@ -319,6 +326,8 @@ def __init__( ), bands: int | list[int] | None = None, load_data: bool = False, + parse_sensor_metadata: bool = False, + silent: bool = True, downsample: Number = 1, force_nodata: int | float | None = None, ) -> None: @@ -326,13 +335,11 @@ def __init__( Instantiate a raster from a filename or rasterio dataset. :param filename_or_dataset: Path to file or Rasterio dataset. - :param bands: Band(s) to load into the object. Default loads all bands. - :param load_data: Whether to load the array during instantiation. Default is False. - + :param parse_sensor_metadata: Whether to parse sensor metadata from filename and similarly-named metadata files. + :param silent: Whether to parse metadata silently or with console output. :param downsample: Downsample the array once loaded by a round factor. Default is no downsampling. - :param force_nodata: Force nodata value to be used (overwrites the metadata). Default reads from metadata. """ @@ -346,6 +353,7 @@ def __init__( # This is for Raster.from_array to work. if isinstance(filename_or_dataset, dict): + self.tags = filename_or_dataset["tags"] # To have "area_or_point" user input go through checks of the set() function without shifting the transform self.set_area_or_point(filename_or_dataset["area_or_point"], shift_area_or_point=False) @@ -363,7 +371,7 @@ def __init__( self.crs: rio.crs.CRS = filename_or_dataset["crs"] for key in filename_or_dataset: - if key in ["data", "transform", "crs", "nodata", "area_or_point"]: + if key in ["data", "transform", "crs", "nodata", "area_or_point", "tags"]: continue setattr(self, key, filename_or_dataset[key]) return @@ -399,7 +407,11 @@ def __init__( self._nodata = ds.nodata self._name = ds.name self._driver = ds.driver - self.tags.update(ds.tags()) + self._tags.update(ds.tags()) + + # For tags saved from sensor metadata, convert from string to practical type (datetime, etc) + converted_tags = decode_sensor_metadata(self.tags) + self._tags.update(converted_tags) self._area_or_point = self.tags.get("AREA_OR_POINT", None) @@ -462,6 +474,11 @@ def __init__( else: raise TypeError("The filename argument is not recognised, should be a path or a Rasterio dataset.") + # Parse metadata and add to tags + if parse_sensor_metadata and self.filename is not None: + sensor_meta = parse_and_convert_metadata_from_filename(self.filename, silent=silent) + self._tags.update(sensor_meta) + def _load_only_mask(self, bands: int | list[int] | None = None, **kwargs: Any) -> NDArrayBool: """ Load only the raster mask from disk and return as independent array (not stored in any class attributes). @@ -1637,7 +1654,7 @@ def save( corresponding to this value, instead of writing the image data to disk. :param co_opts: GDAL creation options provided as a dictionary, e.g. {'TILED':'YES', 'COMPRESS':'LZW'}. - :param metadata: Pairs of metadata key, value. + :param metadata: Pairs of metadata to save to disk, in addition to existing metadata in self.tags. :param gcps: List of gcps, each gcp being [row, col, x, y, (z)]. :param gcps_crs: CRS of the GCPS. @@ -1646,8 +1663,9 @@ def save( if co_opts is None: co_opts = {} - if metadata is None: - metadata = {} + meta = self.tags if not None else {} + if metadata is not None: + meta.update(metadata) if gcps is None: gcps = [] @@ -1704,7 +1722,7 @@ def save( dst.write(save_data) # Add metadata (tags in rio) - dst.update_tags(**metadata) + dst.update_tags(**meta) # Save GCPs if not isinstance(gcps, list): @@ -2271,6 +2289,7 @@ def proximity( "crs": self.crs, "nodata": self.nodata, "area_or_point": self.area_or_point, + "tags": self.tags, } ) return raster.proximity( diff --git a/geoutils/raster/sampling.py b/geoutils/raster/sampling.py index a4559403..3014b682 100644 --- a/geoutils/raster/sampling.py +++ b/geoutils/raster/sampling.py @@ -7,7 +7,7 @@ import numpy as np from geoutils._typing import MArrayNum, NDArrayNum -from geoutils.raster.array import _get_mask_from_array +from geoutils.raster.array import get_mask_from_array @overload @@ -60,7 +60,7 @@ def subsample_array( rng = np.random.default_rng(random_state) # Remove invalid values and flatten array - mask = _get_mask_from_array(array) # -> need to remove .squeeze in get_mask + mask = get_mask_from_array(array) # -> need to remove .squeeze in get_mask valids = np.argwhere(~mask.flatten()).squeeze() # Get number of points to extract diff --git a/geoutils/raster/satimg.py b/geoutils/raster/satimg.py index 79871dbb..f660d641 100644 --- a/geoutils/raster/satimg.py +++ b/geoutils/raster/satimg.py @@ -1,5 +1,5 @@ """ -geoutils.satimg provides a toolset for working with satellite data. +This module provides functionalities for parsing sensor metadata, most often related to satellite imagery. """ from __future__ import annotations @@ -7,20 +7,38 @@ import datetime as dt import os import re -import warnings from collections import abc -from typing import Any +from typing import Any, TypedDict import numpy as np -import rasterio as rio -from geoutils._typing import NDArrayNum -from geoutils.raster import Raster, RasterType +# Metadata tags used for all +satimg_tags = ["platform", "sensor", "product", "version", "tile_name", "datetime"] -lsat_sensor = {"C": "OLI/TIRS", "E": "ETM+", "T": "TM", "M": "MSS", "O": "OLI", "TI": "TIRS"} + +class SatImgDict(TypedDict, total=False): + """Keys and types of inputs associated with image metadata.""" + + # Metadata extract directly from filename + platform: str + sensor: str + product: str + version: str + tile_name: str + datetime: dt.datetime + + # Derivative metadata + tile_xmin: float + tile_ymin: float + tile_xsize: float + tile_ysize: float def parse_landsat(gname: str) -> list[Any]: + """Parse Landsat metadata.""" + + lsat_sensor = {"C": "OLI/TIRS", "E": "ETM+", "T": "TM", "M": "MSS", "O": "OLI", "TI": "TIRS"} + attrs: list[Any] = [] if len(gname.split("_")[0]) > 15: attrs.append(f"Landsat {int(gname[2])}") @@ -43,20 +61,34 @@ def parse_landsat(gname: str) -> list[Any]: return attrs -def parse_metadata_from_fn(fname: str) -> list[Any]: - bname = os.path.splitext(os.path.basename(fname))[0] +def parse_metadata_from_fn(filename: str) -> SatImgDict: + """ + Parse metadata from filename: platform, sensor, product, version, tile name and datetime. + + :param filename: Filename. + """ + + # Extract basename from full filename + bname = os.path.splitext(os.path.basename(filename))[0] + + # The attributes correspond in order to: platform, sensor, product, version, tile_name, datetime + tags = satimg_tags - # assumes that the filename has a form XX_YY.ext + # First, we assume that the filename has a form XX_YY.ext if "_" in bname: spl = bname.split("_") - # attrs corresponds to: satellite, sensor, product, version, tile_name, datetime + # Landsat if re.match("L[COTEM][0-9]{2}", spl[0]): attrs: tuple[Any, ...] | list[Any] = parse_landsat(bname) elif spl[0][0] == "L" and len(spl) == 1: attrs = parse_landsat(bname) + + # Sentinel-2 elif re.match("T[0-9]{2}[A-Z]{3}", spl[0]): attrs = ("Sentinel-2", "MSI", None, None, spl[0][1:], dt.datetime.strptime(spl[1], "%Y%m%dT%H%M%S")) + + # ArcticDEM and REMA elif spl[0] == "SETSM": # For PGC DEMs: if the second part of the name starts with a "s", it is the new nomenclature # (starting at "s2s041") with the version first, @@ -76,12 +108,18 @@ def parse_metadata_from_fn(fname: str) -> list[Any]: None, dt.datetime.strptime(spl[index + 2], "%Y%m%d"), ) + + # SPOT elif spl[0] == "SPOT": attrs = ("HFS", "SPOT5", None, None, None, dt.datetime.strptime(spl[2], "%Y%m%d")) + + # IceBridge elif spl[0] == "IODEM3": attrs = ("IceBridge", "DMS", "IODEM3", None, None, dt.datetime.strptime(spl[1] + spl[2], "%Y%m%d%H%M%S")) elif spl[0] == "ILAKS1B": attrs = ("IceBridge", "UAF-LS", "ILAKS1B", None, None, dt.datetime.strptime(spl[1], "%Y%m%d")) + + # ASTER L1A elif spl[0] == "AST" and spl[1] == "L1A": attrs = ( "Terra", @@ -91,32 +129,43 @@ def parse_metadata_from_fn(fname: str) -> list[Any]: None, dt.datetime.strptime(bname.split("_")[2][3:], "%m%d%Y%H%M%S"), ) + + # ASTER GDEM elif spl[0] == "ASTGTM2": attrs = ("Terra", "ASTER", "ASTGTM2", "2", spl[1], None) + + # NASADEM elif spl[0] == "NASADEM": attrs = ("SRTM", "SRTM", "NASADEM-" + spl[1], "1", spl[2], dt.datetime(year=2000, month=2, day=15)) + + # TanDEM-X elif spl[0] == "TDM1" and spl[1] == "DEM": attrs = ("TanDEM-X", "TanDEM-X", "TDM1", "1", spl[4], None) + + # SRTM v4.1 elif spl[0] == "srtm": attrs = ("SRTM", "SRTM", "SRTMv4.1", None, "_".join(spl[1:]), dt.datetime(year=2000, month=2, day=15)) else: attrs = (None,) * 6 - # if the form is only XX.ext (only the first versions of SRTM had a naming that... bad (simplified?)) - elif os.path.splitext(os.path.basename(fname))[1] == ".hgt": + # Or, if the form is only XX.ext + # (Only the first versions of SRTM had a naming that... bad (simplified?)) + elif os.path.splitext(os.path.basename(filename))[1] == ".hgt": attrs = ( "SRTM", "SRTM", "SRTMGL1", "3", - os.path.splitext(os.path.basename(fname))[0], + os.path.splitext(os.path.basename(filename))[0], dt.datetime(year=2000, month=2, day=15), ) else: attrs = (None,) * 6 - return list(attrs) + dict_meta: SatImgDict = {tags[i]: attrs[i] for i in range(len(tags))} # type: ignore + + return dict_meta def parse_tile_attr_from_name(tile_name: str, product: str | None = None) -> tuple[float, float, tuple[int, int], int]: @@ -207,11 +256,11 @@ def latlon_to_sw_naming( Convert latitude and longitude to widely used southwestern corner tile naming (originally for SRTMGL1) Can account for varying tile sizes, and a dependency with the latitude (e.g., TDX global DEM) - :param latlon: latitude and longitude - :param latlon_sizes: sizes of lat/lon tiles corresponding to latitude intervals - :param lat_lims: latitude intervals + :param latlon: Latitude and longitude. + :param latlon_sizes: Sizes of lat/lon tiles corresponding to latitude intervals. + :param lat_lims: Latitude intervals. - :returns: tile name + :returns: Tile name. """ lon = latlon[1] @@ -245,195 +294,61 @@ def latlon_to_sw_naming( return tile_name -satimg_attrs = ["satellite", "sensor", "product", "version", "tile_name", "datetime"] - - -class SatelliteImage(Raster): # type: ignore - date: None | dt.datetime - - def __init__( - self, - filename_or_dataset: str | RasterType | rio.io.DatasetReader | rio.io.MemoryFile, - load_data: bool = True, - bands: int | list[int] | None = None, - read_from_fn: bool = True, - datetime: dt.datetime | None = None, - tile_name: str | None = None, - satellite: str | None = None, - sensor: str | None = None, - product: str | None = None, - version: str | None = None, - read_from_meta: bool = True, - fn_meta: str | None = None, - silent: bool = True, - ) -> None: - """ - Load satellite data through the Raster class and parse additional attributes from filename or metadata. - - :param filename_or_dataset: The filename of the dataset. - :param load_data: Load the raster data into the object. Default is True. - :param bands: The band(s) to load into the object. Default is to load all bands. - :param read_from_fn: Try to read metadata from the filename - :param datetime: Provide datetime attribute - :param tile_name: Provide tile name - :param satellite: Provide satellite name - :param sensor: Provide sensor name - :param product: Provide data product name - :param version: Provide data version - :param read_from_meta: Try to read metadata from known associated metadata files - :param fn_meta: Provide filename of associated metadata - :param silent: No informative output when trying to read metadata - - :return: A SatelliteImage object (Raster subclass) - """ - - # If SatelliteImage is passed, simply point back to SatelliteImage - if isinstance(filename_or_dataset, SatelliteImage): - for key in filename_or_dataset.__dict__: - setattr(self, key, filename_or_dataset.__dict__[key]) - return - # Else rely on parent Raster class options (including raised errors) - else: - super().__init__(filename_or_dataset, load_data=load_data, bands=bands) - - # priority to user input - self._datetime = datetime - self._tile_name = tile_name - self._satellite = satellite - self._sensor = sensor - self._product = product - self._version = version - - # trying to get metadata from separate metadata file - if read_from_meta and self.filename is not None and fn_meta is not None: - self.__parse_metadata_from_file(fn_meta) - - # trying to get metadata from filename for the None attributes - if read_from_fn and self.filename is not None: - self.__parse_metadata_from_fn(silent=silent) - - self.__get_date() - - def __get_date(self) -> dt.datetime | None: # type: ignore - """ - Get date from datetime - :return: - """ - if self.datetime is not None: - self.date = self.datetime.date() # type: ignore - else: - self.date = None - - def __parse_metadata_from_fn(self, silent: bool = False) -> None: - """ - Attempts to pull metadata (e.g., sensor, date information) from fname, setting sensor, satellite, - tile, datetime, and date attributes. - """ - fname = self.filename - name_attrs = ["satellite", "sensor", "product", "version", "tile_name", "datetime"] - attrs = parse_metadata_from_fn(fname if fname is not None else "") - - if all(att is None for att in attrs): - if not silent: - print("No metadata could be read from filename.") - return - - for name in name_attrs: - attr = self.__getattribute__(name) - attr_fn = attrs[name_attrs.index(name)] - if attr is None and attr_fn is not None: - if not silent: - print("From filename: setting " + name + " as " + str(attr_fn)) - # Set hidden attribute first - setattr(self, name, attr_fn) - elif attr is not None and attrs[name_attrs.index(name)] is not None: - if not silent: - print( - "Leaving user input of " - + str(attr) - + " for attribute " - + name - + " despite reading " - + str(attrs[name_attrs.index(name)]) - + "from filename" - ) - - @property - def datetime(self) -> dt.datetime | None: - return self._datetime - - @datetime.setter - def datetime(self, value: dt.datetime | None) -> None: - if isinstance(value, (dt.datetime, np.datetime64)) or value is None: - self._datetime = value - else: - raise ValueError("Datetime must be set with a python or NumPy datetime.") +################################################### +# MAIN FUNCTION THAT WILL BE CALLED BY RASTER CLASS +################################################### - @property - def satellite(self) -> str | None: - return self._satellite - @satellite.setter - def satellite(self, value: str | None) -> None: - if isinstance(value, str) or value is None: - self._satellite = value - else: - raise ValueError("Satellite must be set with a string.") - - @property - def sensor(self) -> str | None: - return self._sensor - - @sensor.setter - def sensor(self, value: str | None) -> None: - if isinstance(value, str) or value is None: - self._sensor = value - else: - raise ValueError("Sensor must be set with a string.") +def parse_and_convert_metadata_from_filename(filename: str, silent: bool = False) -> SatImgDict: + """ + Attempts to pull metadata (e.g., sensor, date information) from fname, and convert to human-usable format. - @property - def product(self) -> str | None: - return self._product + Sets platform, sensor, tile, datetime, and date attributes. + """ - @product.setter - def product(self, value: str | None) -> None: - if isinstance(value, str) or value is None: - self._product = value - else: - raise ValueError("Product must be set with a string.") + # Get list of metadata + attrs = parse_metadata_from_fn(filename if filename is not None else "") - @property - def version(self) -> str | None: - return self._version + # If no metadata was read, return empty dictionary here + if all(att is None for att in attrs.values()): + if not silent: + print("No metadata could be read from filename.") + return {} - @version.setter - def version(self, value: str | None) -> None: - if isinstance(value, str) or value is None: - self._version = value - else: - raise ValueError("Version must be set with a string.") + # Else, if not silent, print what was read + for k, v in attrs.items(): + if v is not None: + if not silent: + print("Setting " + k + " as " + str(v) + " read from filename.") - @property - def tile_name(self) -> str | None: - return self._tile_name + # And convert tile name to human-readable tile extent/size + supported_tile = ["ASTGTM2", "SRTMGL1", "NASADEM", "TDM1"] + if attrs["tile_name"] is not None and attrs["product"] is not None and attrs["product"] in supported_tile: + ymin, xmin, yx_sizes, _ = parse_tile_attr_from_name(attrs["tile_name"], product=attrs["product"]) + tile_attrs = SatImgDict(tile_xmin=xmin, tile_ymin=ymin, tile_xsize=yx_sizes[1], tile_ysize=yx_sizes[0]) + attrs.update(tile_attrs) - @tile_name.setter - def tile_name(self, value: str | None) -> None: - if isinstance(value, str) or value is None: - self._tile_name = value - else: - raise ValueError("Tile name must be set with a string.") + return attrs - def __parse_metadata_from_file(self, fn_meta: str | None) -> None: - warnings.warn(f"Parse metadata from file not implemented. {fn_meta}") - return None +def decode_sensor_metadata(input_tags: dict[str, str]) -> SatImgDict: + """ + Decode sensor metadata from their string values saved on disk in Raster.tags. - def copy(self, new_array: NDArrayNum | None = None, cast_nodata: bool = True) -> SatelliteImage: - new_satimg = super().copy(new_array=new_array, cast_nodata=cast_nodata) # type: ignore - # all objects here are immutable so no need for a copy method (string and datetime) - # satimg_attrs = ['satellite', 'sensor', 'product', 'version', 'tile_name', 'datetime'] #taken outside of class - for attrs in satimg_attrs: - setattr(new_satimg, attrs, getattr(self, attrs)) + :param input_tags: + :return: + """ - return new_satimg # type: ignore + converted_sensor_tags = SatImgDict() + for tag in satimg_tags: + if tag in input_tags: + if tag == "datetime": + as_dt = dt.datetime.strptime(input_tags[tag], "%Y-%m-%d %H:%M:%S") + converted_sensor_tags.update({tag: as_dt}) # type: ignore + elif tag in ["tile_xmin", "tile_ymin", "tile_xsize", "tile_ymin"]: + converted_sensor_tags.update({tag: float(input_tags[tag])}) # type: ignore + elif isinstance(input_tags[tag], str) and input_tags[tag] == "None": + converted_sensor_tags.update({tag: None}) # type: ignore + continue + + return converted_sensor_tags diff --git a/geoutils/vector/geometric.py b/geoutils/vector/geometric.py index a30404a6..a6172a28 100644 --- a/geoutils/vector/geometric.py +++ b/geoutils/vector/geometric.py @@ -41,16 +41,16 @@ def _buffer_metric(gdf: gpd.GeoDataFrame, buffer_size: float) -> gu.Vector: def _buffer_without_overlap( - gdf: gpd.GeoDataFrame, buffer_size: int | float, metric: bool = True, plot: bool = False + ds: gpd.GeoDataFrame, buffer_size: int | float, metric: bool = True, plot: bool = False ) -> gu.Vector: """See Vector.buffer_without_overlap() for details.""" # Project in local UTM if metric is True if metric: - crs_utm_ups = _get_utm_ups_crs(df=gdf) - gdf = gdf.to_crs(crs=crs_utm_ups) + crs_utm_ups = _get_utm_ups_crs(df=ds) + gdf = ds.to_crs(crs=crs_utm_ups) else: - gdf = gdf + gdf = ds # Dissolve all geometries into one merged = gdf.dissolve() @@ -111,7 +111,7 @@ def _buffer_without_overlap( # Reverse-project to the original CRS if metric is True if metric: - merged_voronoi = merged_voronoi.to_crs(crs=gdf.crs) + merged_voronoi = merged_voronoi.to_crs(crs=ds.crs) return gu.Vector(merged_voronoi) diff --git a/geoutils/vector/geotransformations.py b/geoutils/vector/geotransformations.py index a6c8a92f..1dedf9f4 100644 --- a/geoutils/vector/geotransformations.py +++ b/geoutils/vector/geotransformations.py @@ -2,10 +2,7 @@ from __future__ import annotations -import os - import geopandas as gpd -import pyogrio import rasterio as rio from rasterio.crs import CRS @@ -14,7 +11,7 @@ def _reproject( gdf: gpd.GeoDataFrame, - ref: gu.Raster | rio.io.DatasetReader | gu.Vector | gpd.GeoDataFrame | str | None = None, + ref: gu.Raster | rio.io.DatasetReader | gu.Vector | gpd.GeoDataFrame | None = None, crs: CRS | str | int | None = None, ) -> gpd.GeoDataFrame: """Reproject a vector. See Vector.reproject() for more details.""" @@ -26,23 +23,10 @@ def _reproject( # Case a raster or vector is provided as reference if ref is not None: # Check that ref type is either str, Raster or rasterio data set - # Preferably use Raster instance to avoid rasterio data set to remain open. See PR #45 - if isinstance(ref, (gu.Raster, gu.Vector)): - ds_ref = ref - elif isinstance(ref, (rio.io.DatasetReader, gpd.GeoDataFrame)): + if isinstance(ref, (gu.Raster, gu.Vector, rio.io.DatasetReader, gpd.GeoDataFrame)): ds_ref = ref - elif isinstance(ref, str): - if not os.path.exists(ref): - raise ValueError("Reference raster or vector path does not exist.") - try: - ds_ref = gu.Raster(ref, load_data=False) - except rio.errors.RasterioIOError: - try: - ds_ref = gu.Vector(ref) - except pyogrio.errors.DataSourceError: - raise ValueError("Could not open raster or vector with rasterio or pyogrio.") else: - raise TypeError("Type of ref must be string path to file, Raster or Vector.") + raise TypeError("Type of ref must be a raster or vector.") # Read reprojecting params from ref raster crs = ds_ref.crs diff --git a/geoutils/vector/vector.py b/geoutils/vector/vector.py index 7d563b0b..083224a9 100644 --- a/geoutils/vector/vector.py +++ b/geoutils/vector/vector.py @@ -1057,7 +1057,7 @@ def crop( @overload def reproject( self: Vector, - ref: gu.Raster | rio.io.DatasetReader | VectorType | gpd.GeoDataFrame | str | None = None, + ref: gu.Raster | rio.io.DatasetReader | VectorType | gpd.GeoDataFrame | None = None, crs: CRS | str | int | None = None, *, inplace: Literal[False] = False, @@ -1066,7 +1066,7 @@ def reproject( @overload def reproject( self: Vector, - ref: gu.Raster | rio.io.DatasetReader | VectorType | gpd.GeoDataFrame | str | None = None, + ref: gu.Raster | rio.io.DatasetReader | VectorType | gpd.GeoDataFrame | None = None, crs: CRS | str | int | None = None, *, inplace: Literal[True], @@ -1075,7 +1075,7 @@ def reproject( @overload def reproject( self: Vector, - ref: gu.Raster | rio.io.DatasetReader | VectorType | gpd.GeoDataFrame | str | None = None, + ref: gu.Raster | rio.io.DatasetReader | VectorType | gpd.GeoDataFrame | None = None, crs: CRS | str | int | None = None, *, inplace: bool = False, @@ -1083,7 +1083,7 @@ def reproject( def reproject( self: Vector, - ref: gu.Raster | rio.io.DatasetReader | VectorType | gpd.GeoDataFrame | str | None = None, + ref: gu.Raster | rio.io.DatasetReader | VectorType | gpd.GeoDataFrame | None = None, crs: CRS | str | int | None = None, inplace: bool = False, ) -> Vector | None: @@ -1096,9 +1096,9 @@ def reproject( To reproject a Vector with different source bounds, first run Vector.crop(). - :param ref: A reference raster or vector whose CRS to use as a reference for reprojection. - Can be provided as a raster, vector, Rasterio dataset, GeoPandas dataframe, or path to the file. - :param crs: Specify the Coordinate Reference System or EPSG to reproject to. If dst_ref not set, + :param ref: Reference raster or vector whose CRS to use as a reference for reprojection. + Can be provided as a raster, vector, Rasterio dataset, or GeoPandas dataframe. + :param crs: Specify the coordinate reference system or EPSG to reproject to. If dst_ref not set, defaults to self.crs. :param inplace: Whether to update the vector in-place. diff --git a/setup.cfg b/setup.cfg index 60662298..4fa874ab 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,7 +1,7 @@ [metadata] author = The GlacioHack Team name = geoutils -version = 0.1.10 +version = 0.1.12 description = Analysis and handling of georeferenced rasters and vectors keywords = raster, vector, geospatial, gis, xarray long_description = file: README.md @@ -48,17 +48,24 @@ geoutils = include = geoutils geoutils.raster + geoutils.vector + geoutils.interface + geoutils.pointcloud [options.extras_require] opt = scikit-image test = + gdal pytest pytest-xdist pytest-lazy-fixture pyyaml flake8 pylint + netcdf4 + dask-memusage + pre-commit doc = sphinx sphinx-book-theme diff --git a/tests/test_raster/test_array.py b/tests/test_raster/test_array.py index 1e02c612..198640db 100644 --- a/tests/test_raster/test_array.py +++ b/tests/test_raster/test_array.py @@ -9,11 +9,7 @@ import rasterio as rio import geoutils as gu -from geoutils.raster.array import ( - _get_array_and_mask, - _get_valid_extent, - _get_xy_rotated, -) +from geoutils.raster.array import get_array_and_mask, get_valid_extent, get_xy_rotated class TestArray: @@ -59,13 +55,13 @@ def test_get_array_and_mask( # Validate that incorrect shapes raise the correct error. if not check_should_pass: with pytest.raises(ValueError, match="Invalid array shape given"): - _get_array_and_mask(array, check_shape=True) + get_array_and_mask(array, check_shape=True) # Stop the test here as the failure is now validated. return # Get a copy of the array and check its shape (it should always pass at this point) - arr, _ = _get_array_and_mask(array, copy=True, check_shape=True) + arr, _ = get_array_and_mask(array, copy=True, check_shape=True) # Validate that the array is a copy assert not np.shares_memory(arr, array) @@ -82,7 +78,7 @@ def test_get_array_and_mask( warnings.simplefilter("always") # Try to create a view. - arr_view, mask = _get_array_and_mask(array, copy=False) + arr_view, mask = get_array_and_mask(array, copy=False) # If it should be possible, validate that there were no warnings. if view_should_be_possible: @@ -108,21 +104,21 @@ def test_get_valid_extent(self) -> None: # For no invalid values, the function should return the edges # For the array - assert (0, 4, 0, 4) == _get_valid_extent(arr) + assert (0, 4, 0, 4) == get_valid_extent(arr) # For the masked-array - assert (0, 4, 0, 4) == _get_valid_extent(mask_ma) + assert (0, 4, 0, 4) == get_valid_extent(mask_ma) # 1/ First column: # If we mask it in the masked array mask_ma[0, :] = np.ma.masked - assert (1, 4, 0, 4) == _get_valid_extent(mask_ma) + assert (1, 4, 0, 4) == get_valid_extent(mask_ma) # If we changed the array to NaNs arr[0, :] = np.nan - assert (1, 4, 0, 4) == _get_valid_extent(arr) + assert (1, 4, 0, 4) == get_valid_extent(arr) mask_ma.data[0, :] = np.nan mask_ma.mask = False - assert (1, 4, 0, 4) == _get_valid_extent(mask_ma) + assert (1, 4, 0, 4) == get_valid_extent(mask_ma) # 2/ First row: arr = np.ones(shape=(5, 5)) @@ -130,14 +126,14 @@ def test_get_valid_extent(self) -> None: mask_ma = np.ma.masked_array(data=arr, mask=arr_mask) # If we mask it in the masked array mask_ma[:, 0] = np.ma.masked - assert (0, 4, 1, 4) == _get_valid_extent(mask_ma) + assert (0, 4, 1, 4) == get_valid_extent(mask_ma) # If we changed the array to NaNs arr[:, 0] = np.nan - assert (0, 4, 1, 4) == _get_valid_extent(arr) + assert (0, 4, 1, 4) == get_valid_extent(arr) mask_ma.data[:, 0] = np.nan mask_ma.mask = False - assert (0, 4, 1, 4) == _get_valid_extent(mask_ma) + assert (0, 4, 1, 4) == get_valid_extent(mask_ma) # 3/ Last column: arr = np.ones(shape=(5, 5)) @@ -146,14 +142,14 @@ def test_get_valid_extent(self) -> None: # If we mask it in the masked array mask_ma[-1, :] = np.ma.masked - assert (0, 3, 0, 4) == _get_valid_extent(mask_ma) + assert (0, 3, 0, 4) == get_valid_extent(mask_ma) # If we changed the array to NaNs arr[-1, :] = np.nan - assert (0, 3, 0, 4) == _get_valid_extent(arr) + assert (0, 3, 0, 4) == get_valid_extent(arr) mask_ma.data[-1, :] = np.nan mask_ma.mask = False - assert (0, 3, 0, 4) == _get_valid_extent(mask_ma) + assert (0, 3, 0, 4) == get_valid_extent(mask_ma) # 4/ Last row: arr = np.ones(shape=(5, 5)) @@ -162,14 +158,14 @@ def test_get_valid_extent(self) -> None: # If we mask it in the masked array mask_ma[:, -1] = np.ma.masked - assert (0, 4, 0, 3) == _get_valid_extent(mask_ma) + assert (0, 4, 0, 3) == get_valid_extent(mask_ma) # If we changed the array to NaNs arr[:, -1] = np.nan - assert (0, 4, 0, 3) == _get_valid_extent(arr) + assert (0, 4, 0, 3) == get_valid_extent(arr) mask_ma.data[:, -1] = np.nan mask_ma.mask = False - assert (0, 4, 0, 3) == _get_valid_extent(mask_ma) + assert (0, 4, 0, 3) == get_valid_extent(mask_ma) def test_get_xy_rotated(self) -> None: """Check the function to rotate array.""" @@ -184,27 +180,27 @@ def test_get_xy_rotated(self) -> None: xx, yy = r1.coords(grid=True, force_offset="ll") # Rotating the coordinates 90 degrees should be the same as rotating the array - xx90, yy90 = _get_xy_rotated(r1, along_track_angle=90) + xx90, yy90 = get_xy_rotated(r1, along_track_angle=90) assert np.allclose(np.rot90(xx90), xx) assert np.allclose(np.rot90(yy90), yy) # Same for 180 degrees - xx180, yy180 = _get_xy_rotated(r1, along_track_angle=180) + xx180, yy180 = get_xy_rotated(r1, along_track_angle=180) assert np.allclose(np.rot90(xx180, k=2), xx) assert np.allclose(np.rot90(yy180, k=2), yy) # Same for 270 degrees - xx270, yy270 = _get_xy_rotated(r1, along_track_angle=270) + xx270, yy270 = get_xy_rotated(r1, along_track_angle=270) assert np.allclose(np.rot90(xx270, k=3), xx) assert np.allclose(np.rot90(yy270, k=3), yy) # 360 degrees should get us back on our feet - xx360, yy360 = _get_xy_rotated(r1, along_track_angle=360) + xx360, yy360 = get_xy_rotated(r1, along_track_angle=360) assert np.allclose(xx360, xx) assert np.allclose(yy360, yy) # Test that the values make sense for 45 degrees - xx45, yy45 = _get_xy_rotated(r1, along_track_angle=45) + xx45, yy45 = get_xy_rotated(r1, along_track_angle=45) # Should have zero on the upper left corner for xx assert xx45[0, 0] == pytest.approx(0) # Then a multiple of sqrt2 along each dimension @@ -215,4 +211,4 @@ def test_get_xy_rotated(self) -> None: # Finally, yy should be rotated by 90 assert np.allclose(np.rot90(xx45), yy45) - xx, yy = _get_xy_rotated(r1, along_track_angle=90) + xx, yy = get_xy_rotated(r1, along_track_angle=90) diff --git a/tests/test_raster/test_base.py b/tests/test_raster/test_base.py index b6b2636c..a3246cf8 100644 --- a/tests/test_raster/test_base.py +++ b/tests/test_raster/test_base.py @@ -89,7 +89,7 @@ class TestClassVsAccessorConsistency: # Test common attributes attributes = ["crs", "transform", "nodata", "area_or_point", "res", "count", "height", "width", "footprint", - "shape", "bands", "indexes", "is_xr", "is_loaded"] + "shape", "bands", "indexes", "_is_xr", "is_loaded"] @pytest.mark.parametrize("path_raster", [landsat_b4_path, aster_dem_path, landsat_rgb_path]) # type: ignore @pytest.mark.parametrize("attr", attributes) # type: ignore @@ -109,7 +109,7 @@ def test_attributes(self, path_raster: str, attr: str) -> None: output_ds = getattr(getattr(ds, "rst"), attr) # Assert equality - if attr != "is_xr": # Only attribute that is (purposely) not the same, but the opposite + if attr != "_is_xr": # Only attribute that is (purposely) not the same, but the opposite assert output_equal(output_raster, output_ds) else: assert output_raster != output_ds diff --git a/tests/test_raster/test_multiraster.py b/tests/test_raster/test_multiraster.py index 86cd7803..59ae71cc 100644 --- a/tests/test_raster/test_multiraster.py +++ b/tests/test_raster/test_multiraster.py @@ -143,11 +143,6 @@ def images_different_crs(): # type: ignore return RealImageStack("everest_landsat_b4", different_crs=4326) -@pytest.fixture -def sat_images(): # type: ignore - return RealImageStack("everest_landsat_b4", cls=gu.SatelliteImage) - - @pytest.fixture def images_3d(): # type: ignore return RealImageStack("everest_landsat_rgb") @@ -163,7 +158,6 @@ class TestMultiRaster: "rasters", [ pytest.lazy_fixture("images_1d"), - pytest.lazy_fixture("sat_images"), pytest.lazy_fixture("images_different_crs"), pytest.lazy_fixture("images_3d"), pytest.lazy_fixture("images_nodata_zero"), diff --git a/tests/test_raster/test_raster.py b/tests/test_raster/test_raster.py index 404e4745..aee3894c 100644 --- a/tests/test_raster/test_raster.py +++ b/tests/test_raster/test_raster.py @@ -1009,7 +1009,7 @@ def test_copy(self, example: str) -> None: # When passing the new array as a NaN ndarray, only the valid data is equal, because masked data is NaN in one # case, and -9999 in the other - r_arr = gu.raster.array._get_array_and_mask(r)[0] + r_arr = gu.raster.array.get_array_and_mask(r)[0] r2 = r.copy(new_array=r_arr) assert np.ma.allequal(r.data, r2.data) # If a nodata value exists, and we update the NaN pixels to be that nodata value, then the two Rasters should @@ -2192,11 +2192,6 @@ class TestArithmetic: area_or_point="Point", ) - # Tests with child class - satimg = gu.SatelliteImage.from_array( - rng.integers(1, 255, (height, width)).astype("float32"), transform=transform, crs=None - ) - def test_raster_equal(self) -> None: """ Test that raster_equal() works as expected. @@ -2335,7 +2330,6 @@ def test_ops_2args_expl(self, op: str) -> None: r1_nodata = self.r1_nodata r2 = self.r2 r2_zero = self.r2_zero - satimg = self.satimg rng = np.random.default_rng(42) array = rng.integers(1, 255, (self.height, self.width)).astype("float64") floatval = 3.14 @@ -2424,10 +2418,6 @@ def test_ops_2args_expl(self, op: str) -> None: else: assert r3.nodata == _default_nodata(dtype) - # Test with child class - r3 = getattr(satimg, op)(intval) - assert isinstance(r3, gu.SatelliteImage) - reflective_ops = [["__add__", "__radd__"], ["__mul__", "__rmul__"]] @pytest.mark.parametrize("ops", reflective_ops) # type: ignore diff --git a/tests/test_raster/test_satimg.py b/tests/test_raster/test_satimg.py index c05a46ee..3a70724b 100644 --- a/tests/test_raster/test_satimg.py +++ b/tests/test_raster/test_satimg.py @@ -1,48 +1,43 @@ """ -Test functions for SatelliteImage class +Test functions for metadata parsing from sensor, often satellite imagery. """ +from __future__ import annotations + import datetime import datetime as dt +import os import sys +import tempfile from io import StringIO -import numpy as np import pytest -import rasterio as rio import geoutils as gu from geoutils import examples +from geoutils.raster.satimg import satimg_tags DO_PLOT = False -class TestSatelliteImage: +class TestSatImg: + landsat_b4 = examples.get_path("everest_landsat_b4") aster_dem = examples.get_path("exploradores_aster_dem") @pytest.mark.parametrize("example", [landsat_b4, aster_dem]) # type: ignore def test_init(self, example: str) -> None: - """ - Test that inputs work properly in SatelliteImage class init - """ + """Test that the sensor reading through Raster initialisation works.""" - # from filename, checking option - img = gu.SatelliteImage(example, read_from_fn=False) - img = gu.SatelliteImage(example) - assert isinstance(img, gu.SatelliteImage) + # Load with parse sensor metadata, it should write metadata in the tags + rast = gu.Raster(example, parse_sensor_metadata=True) + for tag in satimg_tags: + assert tag in rast.tags.keys() - # from SatelliteImage - img2 = gu.SatelliteImage(img) - assert isinstance(img2, gu.SatelliteImage) - - # from Raster - r = gu.Raster(example) - img3 = gu.SatelliteImage(r) - assert isinstance(img3, gu.SatelliteImage) - - assert img.raster_equal(img2) - assert img.raster_equal(img3) + # And that otherwise it does not + rast = gu.Raster(example, parse_sensor_metadata=False) + for tag in satimg_tags: + assert tag not in rast.tags.keys() @pytest.mark.parametrize("example", [landsat_b4, aster_dem]) # type: ignore def test_silent(self, example: str) -> None: @@ -50,8 +45,8 @@ def test_silent(self, example: str) -> None: Test that the silent method does not return any output in console """ - # let's capture stdout - # cf https://stackoverflow.com/questions/16571150/how-to-capture-stdout-output-from-a-python-function-call + # Let's capture stdout + # See https://stackoverflow.com/questions/16571150/how-to-capture-stdout-output-from-a-python-function-call class Capturing(list): # type: ignore def __enter__(self): # type: ignore self._stdout = sys.stdout @@ -64,88 +59,39 @@ def __exit__(self, *args) -> None: # type: ignore sys.stdout = self._stdout with Capturing() as output1: - gu.SatelliteImage(example, silent=False) + gu.Raster(example, parse_sensor_metadata=True, silent=False) - # check the metadata reading outputs to console + # Check the metadata reading outputs to console assert len(output1) > 0 with Capturing() as output2: - gu.SatelliteImage(example, silent=True) + gu.Raster(example, parse_sensor_metadata=True, silent=True) - # check nothing outputs to console + # Check nothing outputs to console assert len(output2) == 0 - def test_add_sub(self) -> None: - """ - Test that overloading of addition, subtraction and negation works for child classes as well. - """ - # Create fake rasters with random values in 0-255 and dtype uint8 - rng = np.random.default_rng(42) - width = height = 5 - transform = rio.transform.from_bounds(0, 0, 1, 1, width, height) - satimg1 = gu.SatelliteImage.from_array( - rng.integers(0, 255, (height, width), dtype="uint8"), transform=transform, crs=None - ) - satimg2 = gu.SatelliteImage.from_array( - rng.integers(0, 255, (height, width), dtype="uint8"), transform=transform, crs=None - ) - - # Check that output type is same - other tests are in test_raster.py - sat_out = -satimg1 - assert isinstance(sat_out, gu.SatelliteImage) - - sat_out = satimg1 + satimg2 - assert isinstance(sat_out, gu.SatelliteImage) - - sat_out = satimg1 - satimg2 # type: ignore - assert isinstance(sat_out, gu.SatelliteImage) - @pytest.mark.parametrize("example", [landsat_b4, aster_dem]) # type: ignore - def test_copy(self, example: str) -> None: - """ - Test that the copy method works as expected for SatelliteImage. In particular - when copying r to r2: - - if r.data is modified and r copied, the updated data is copied - - if r is copied, r.data changed, r2.data should be unchanged - """ - # Open dataset, update data and make a copy - r = gu.SatelliteImage(example) - r.data += 5 - r2 = r.copy() + def test_save_tags(self, example: str) -> None: + """Check that the metadata read is saved in tags of raster metadata.""" - # Objects should be different (not pointing to the same memory) - assert r is not r2 - - # Check the object is a SatelliteImage - assert isinstance(r2, gu.SatelliteImage) - - # Check all immutable attributes are equal - raster_attrs = [ - attr for attr in gu.raster.raster._default_rio_attrs if attr not in ["driver", "filename", "name"] - ] - satimg_attrs = ["satellite", "sensor", "product", "version", "tile_name", "datetime"] - # Using list directly available in class - attrs = raster_attrs + satimg_attrs - all_attrs = attrs + gu.raster.satimg.satimg_attrs - for attr in all_attrs: - assert r.__getattribute__(attr) == r2.__getattribute__(attr) + rast = gu.Raster(example, parse_sensor_metadata=True) - # Check data array - assert np.array_equal(r.data, r2.data, equal_nan=True) + # Temporary folder + temp_dir = tempfile.TemporaryDirectory() - # Check dataset_mask array - assert np.array_equal(r.data.mask, r2.data.mask) + # Save file to temporary file, with defaults opts + temp_file = os.path.join(temp_dir.name, "test.tif") + rast.save(temp_file) + saved = gu.Raster(temp_file) - # Check that if r.data is modified, it does not affect r2.data - r.data += 5 - assert not np.array_equal(r.data, r2.data, equal_nan=True) + assert saved.tags == rast.tags def test_filename_parsing(self) -> None: """Test metadata parsing from filenames""" copied_names = [ "TDM1_DEM__30_N00E104_DEM.tif", - "SETSM_WV02_20141026_1030010037D17F00_10300100380B4000_mosaic5_2m_v3.0_dem.tif", + "SETSM_WV02_20141026_ex1030010037D17F00_10300100380B4000_mosaic5_2m_v3.0_dem.tif", "SETSM_s2s041_WV02_20150615_10300100443C2D00_1030010043373000_seg1_2m_dem.tif", "AST_L1A_00303132015224418_final.tif", "ILAKS1B_20190928_271_Gilkey-DEM.tif", @@ -155,7 +101,7 @@ def test_filename_parsing(self) -> None: "NASADEM_HGT_n00e041.hgt", ] # Corresponding data, filled manually - satellites = ["TanDEM-X", "WorldView", "WorldView", "Terra", "IceBridge", "SRTM", "Terra", "SRTM", "SRTM"] + platform = ["TanDEM-X", "WorldView", "WorldView", "Terra", "IceBridge", "SRTM", "Terra", "SRTM", "SRTM"] sensors = ["TanDEM-X", "WV02", "WV02", "ASTER", "UAF-LS", "SRTM", "ASTER", "SRTM", "SRTM"] products = [ "TDM1", @@ -168,7 +114,7 @@ def test_filename_parsing(self) -> None: "SRTMGL1", "NASADEM-HGT", ] - # we can skip the version, bit subjective... + # We can skip the version, bit subjective... tiles = ["N00E104", None, None, None, None, "06_01", "N00E108", "N00E015", "n00e041"] datetimes = [ None, @@ -185,11 +131,11 @@ def test_filename_parsing(self) -> None: for names in copied_names: attrs = gu.raster.satimg.parse_metadata_from_fn(names) i = copied_names.index(names) - assert satellites[i] == attrs[0] - assert sensors[i] == attrs[1] - assert products[i] == attrs[2] - assert tiles[i] == attrs[4] - assert datetimes[i] == attrs[5] + assert platform[i] == attrs["platform"] + assert sensors[i] == attrs["sensor"] + assert products[i] == attrs["product"] + assert tiles[i] == attrs["tile_name"] + assert datetimes[i] == attrs["datetime"] def test_sw_tile_naming_parsing(self) -> None: # normal examples @@ -203,12 +149,12 @@ def test_sw_tile_naming_parsing(self) -> None: for latlon in test_latlon: assert gu.raster.satimg.latlon_to_sw_naming(latlon) == test_tiles[test_latlon.index(latlon)] - # check possible exceptions, rounded lat/lon belong to their southwest border + # Check possible exceptions, rounded lat/lon belong to their southwest border assert gu.raster.satimg.latlon_to_sw_naming((0, 0)) == "N00E000" - # those are the same point, should give same naming + # Those are the same point, should give same naming assert gu.raster.satimg.latlon_to_sw_naming((-90, 0)) == "S90E000" assert gu.raster.satimg.latlon_to_sw_naming((90, 0)) == "S90E000" - # same here + # Same here assert gu.raster.satimg.latlon_to_sw_naming((0, -180)) == "N00W180" assert gu.raster.satimg.latlon_to_sw_naming((0, 180)) == "N00W180" @@ -248,7 +194,7 @@ def test_parse_tile_attr_from_name(self) -> None: def test_parse_landsat(self) -> None: """Test the parsing of landsat metadata from name.""" - # Landsat 1 + # Landsat 1 example landsat1 = "LM10170391976031AAA01.tif" attrs1 = gu.raster.satimg.parse_landsat(landsat1) diff --git a/tests/test_vector/test_geometric.py b/tests/test_vector/test_geometric.py index 20d38463..43858dcd 100644 --- a/tests/test_vector/test_geometric.py +++ b/tests/test_vector/test_geometric.py @@ -172,6 +172,7 @@ def test_buffer_without_overlap(self, monkeypatch) -> None: # type: ignore # Output should be of same size as input and same geometry type assert len(buffer.ds) == len(two_squares.ds) assert np.all(buffer.ds.geometry.geom_type == two_squares.ds.geometry.geom_type) + assert buffer.crs == two_squares.crs # Extract individual geometries polys = [] diff --git a/tests/test_vector/test_geotransformations_vector.py b/tests/test_vector/test_geotransformations_vector.py index e1db15df..1c78ec98 100644 --- a/tests/test_vector/test_geotransformations_vector.py +++ b/tests/test_vector/test_geotransformations_vector.py @@ -53,14 +53,8 @@ def test_reproject(self) -> None: with pytest.raises(ValueError, match=re.escape("Either of `ref` or `crs` must be set. Not both.")): v0.reproject() v0.reproject(ref=r0, crs=32617) - # If the path provided does not exist - with pytest.raises(ValueError, match=re.escape("Reference raster or vector path does not exist.")): - v0.reproject(ref="tmp.lol") - # If it exists but cannot be opened by rasterio or fiona - with pytest.raises(ValueError, match=re.escape("Could not open raster or vector with rasterio or pyogrio.")): - v0.reproject(ref="geoutils/examples.py") # If input of wrong type - with pytest.raises(TypeError, match=re.escape("Type of ref must be string path to file, Raster or Vector.")): + with pytest.raises(TypeError, match=re.escape("Type of ref must be a raster or vector.")): v0.reproject(ref=10) # type: ignore test_data = [[landsat_b4_crop_path, everest_outlines_path], [aster_dem_path, aster_outlines_path]]