From fe8795c6e0559c46135ac835255d0664cecd7adc Mon Sep 17 00:00:00 2001 From: Romain Hugonnet Date: Tue, 30 Jan 2024 03:12:34 -0900 Subject: [PATCH] Improve error messages and make `Raster` indexing/assigment NumPy compatible (#454) --- geoutils/raster/raster.py | 122 +++++++++++++++-------- geoutils/vector.py | 10 +- tests/test_raster.py | 204 ++++++++++++++++++++++++++++++-------- tests/test_vector.py | 4 - 4 files changed, 242 insertions(+), 98 deletions(-) diff --git a/geoutils/raster/raster.py b/geoutils/raster/raster.py index 6b408b92..3b2453ca 100644 --- a/geoutils/raster/raster.py +++ b/geoutils/raster/raster.py @@ -353,6 +353,51 @@ def _get_reproject_params( return dst_transform, dst_size +def _check_cast_array_raster( + input1: RasterType | NDArrayNum, input2: RasterType | NDArrayNum, operation_name: str +) -> None: + """ + Check the casting between an array and a raster, or raise an (helpful) error message. + + :param input1: Raster or array. + :param input2: Raster or array. + :param operation_name: Name of operation to raise in the error message. + + :return: None. + """ + + if isinstance(input1, Raster) and isinstance(input2, Raster): + + # Check that both rasters have the same shape and georeferences + if input1.georeferenced_grid_equal(input2): + pass + else: + raise ValueError( + "Both rasters must have the same shape, transform and CRS for " + operation_name + ". " + "For example, use raster1 = raster1.reproject(raster2) to reproject raster1 on the " + "same grid and CRS than raster2." + ) + + else: + + # The shape compatibility should be valid even when squeezing + if isinstance(input1, np.ndarray): + input1 = input1.squeeze() + elif isinstance(input2, np.ndarray): + input2 = input2.squeeze() + + if input1.shape == input2.shape: + pass + else: + raise ValueError( + "The raster and array must have the same shape for " + operation_name + ". " + "For example, if the array comes from another raster, use raster1 = " + "raster1.reproject(raster2) beforehand to reproject raster1 on the same grid and CRS " + "than raster2. Or, if the array does not come from a raster, define one with raster = " + "Raster.from_array(array, array_transform, array_crs, array_nodata) then reproject." + ) + + class Raster: """ The georeferenced raster. @@ -860,27 +905,25 @@ def __str__(self) -> str: return str(s) - def __getitem__(self, index: Raster | Vector | NDArrayNum | list[float] | tuple[float, ...]) -> NDArrayNum | Raster: + def __getitem__(self, index: Mask | NDArrayBool | Any) -> NDArrayBool | Raster: """ - Index or subset the raster. + Index the raster. - Two cases: - - If a mask of same georeferencing or array of same shape is passed, return the indexed raster array. - - If a raster, vector, list or tuple of bounds is passed, return the cropped raster matching those objects. + In addition to all index types supported by NumPy, also supports a mask of same georeferencing or a + boolean array of the same shape as the raster. """ + if isinstance(index, (Mask, np.ndarray)): + _check_cast_array_raster(self, index, operation_name="an indexing operation") # type: ignore + # If input is Mask with the same shape and georeferencing if isinstance(index, Mask): - if not self.georeferenced_grid_equal(index): - raise ValueError("Indexing a raster with a mask requires the two being on the same georeferenced grid.") if self.count == 1: return self.data[index.data.squeeze()] else: return self.data[:, index.data.squeeze()] # If input is array with the same shape elif isinstance(index, np.ndarray): - if np.shape(index) != self.shape: - raise ValueError("Indexing a raster with an array requires the two having the same shape.") if str(index.dtype) != "bool": index = index.astype(bool) warnings.warn(message="Input array was cast to boolean for indexing.", category=UserWarning) @@ -889,49 +932,47 @@ def __getitem__(self, index: Raster | Vector | NDArrayNum | list[float] | tuple[ else: return self.data[:, index] - # Otherwise, subset with crop + # Otherwise, use any other possible index and leave it to NumPy else: - return self.crop(crop_geom=index) + return self.data[index] - def __setitem__(self, index: Mask | NDArrayBool, assign: NDArrayNum | Number) -> None: + def __setitem__(self, index: Mask | NDArrayBool | Any, assign: NDArrayNum | Number) -> None: """ Perform index assignment on the raster. - If a mask of same georeferencing or array of same shape is passed, - it is used as index to assign values to the raster array. + In addition to all index types supported by NumPy, also supports a mask of same georeferencing or a + boolean array of the same shape as the raster. """ # First, check index + if isinstance(index, (Mask, np.ndarray)): + _check_cast_array_raster(self, index, operation_name="an index assignment operation") # type: ignore # If input is Mask with the same shape and georeferencing if isinstance(index, Mask): - if not self.georeferenced_grid_equal(index): - raise ValueError("Indexing a raster with a mask requires the two being on the same georeferenced grid.") - ind = index.data.data + use_all_bands = False # If input is array with the same shape elif isinstance(index, np.ndarray): - if np.shape(index) != self.shape: - raise ValueError("Indexing a raster with an array requires the two having the same shape.") if str(index.dtype) != "bool": ind = index.astype(bool) warnings.warn(message="Input array was cast to boolean for indexing.", category=UserWarning) - else: - ind = index - # Otherwise, raise an error + use_all_bands = False + # Otherwise, use the index, NumPy will raise appropriate errors itself else: - raise ValueError( - "Indexing a raster requires a mask of same georeferenced grid, or a boolean array of same shape." - ) + ind = index + use_all_bands = True - # Second, assign, NumPy will raise appropriate errors itself + # Second, assign the data, here also let NumPy do the job # We need to explicitly load here, as we cannot call the data getter/setter directly if not self.is_loaded: self.load() - # Assign the values to the index - if self.count == 1: + + # Assign the values to the index (single band raster with mask/array, or other NumPy index) + if self.count == 1 or use_all_bands: self._data[ind] = assign # type: ignore + # For multi-band rasters with a mask/array else: self._data[:, ind] = assign # type: ignore return None @@ -991,18 +1032,12 @@ def _overloading_check( nodata1 = self.nodata dtype1 = self.data.dtype + # Raise error messages if grids don't match (CRS + transform for raster, shape for array) + if isinstance(other, (Raster, np.ndarray)): + _check_cast_array_raster(self, other, operation_name="an arithmetic operation") # type: ignore + # Case 1 - other is a Raster if isinstance(other, Raster): - # Not necessary anymore with implicit loading - # # Check that both data are loaded - # if not (self.is_loaded & other.is_loaded): - # raise ValueError("Raster's data must be loaded with self.load().") - - # Check that both rasters have the same shape and georeferences - if (self.data.shape == other.data.shape) & (self.transform == other.transform) & (self.crs == other.crs): - pass - else: - raise ValueError("Both rasters must have the same shape, transform and CRS.") nodata2 = other.nodata dtype2 = other.data.dtype @@ -1018,11 +1053,6 @@ def _overloading_check( else: other_data = other - if self.data.shape == other_data.shape: - pass - else: - raise ValueError("The raster and array must have the same shape.") - nodata2 = None dtype2 = other.dtype @@ -1851,6 +1881,10 @@ def __array_ufunc__( # If the universal function takes two inputs (Note: no ufunc exists that has three inputs or more) else: + + # Check the casting between Raster and array inputs, and return error messages if not consistent + _check_cast_array_raster(inputs[0], inputs[1], "an arithmetic operation") # type: ignore + if ufunc.nout == 1: return self.from_array( data=final_ufunc(inputs[0].data, inputs[1].data, **kwargs), # type: ignore @@ -1911,6 +1945,8 @@ def __array_function__( outputs = func(first_arg, *args[1:], **kwargs) # type: ignore else: second_arg = args[1].data + # Check the casting between Raster and array inputs, and return error messages if not consistent + _check_cast_array_raster(first_arg, second_arg, operation_name="an arithmetic operation") outputs = func(first_arg, second_arg, *args[2:], **kwargs) # type: ignore # Below, we recast to Raster if the shape was preserved, otherwise return an array diff --git a/geoutils/vector.py b/geoutils/vector.py index c17a7a32..a4a331fd 100644 --- a/geoutils/vector.py +++ b/geoutils/vector.py @@ -725,16 +725,10 @@ def rename_geometry(self, col: str, inplace: bool = False) -> Vector | None: def __getitem__(self, key: gu.Raster | Vector | list[float] | tuple[float, ...] | Any) -> Vector: """ - Index the geodataframe or crop the vector. - - If a raster, vector or tuple is passed, crops to its bounds. - Otherwise, indexes the geodataframe. + Index the geodataframe. """ - if isinstance(key, (gu.Raster, Vector)): - return self.crop(crop_geom=key, clip=False) - else: - return self._override_gdf_output(self.ds.__getitem__(key)) + return self._override_gdf_output(self.ds.__getitem__(key)) @copy_doc(gpd.GeoDataFrame, "Vector") def __setitem__(self, key: Any, value: Any) -> None: diff --git a/tests/test_raster.py b/tests/test_raster.py index dda0e7c4..10b31dd5 100644 --- a/tests/test_raster.py +++ b/tests/test_raster.py @@ -916,6 +916,8 @@ def test_masking(self, example: str) -> None: def test_getitem_setitem(self, example: str) -> None: """Test the __getitem__ method ([]) for indexing and __setitem__ for index assignment.""" + # -- First, we test mask or boolean array indexing and assignment, specific to rasters -- + # Open a Raster rst = gu.Raster(example) @@ -940,29 +942,65 @@ def test_getitem_setitem(self, example: str) -> None: # The rasters should be the same assert rst2.raster_equal(rst) - # Check that errors are raised for both indexing and index assignment + # -- Second, we test NumPy indexes (slices, integers, ellipses, new axes) -- + + # Indexing + assert np.ma.allequal(rst[0], rst.data[0]) # Test an integer + assert np.ma.allequal(rst[0:10], rst.data[0:10]) # Test a slice + # New axis adds a dimension, but the 0 index reduces one, so we still get a 2D raster + assert np.ma.allequal(rst[np.newaxis, 0], rst.data[np.newaxis, 0]) # Test a new axis + assert np.ma.allequal(rst[...], rst.data[...]) # Test an ellipsis + + # Index assignment + rst[0] = 1 + assert np.ma.allequal(rst.data[0], np.ones(np.shape(rst.data[0]))) # Test an integer + rst[0:10] = 1 + assert np.ma.allequal(rst.data[0:10], np.ones(np.shape(rst.data[0:10]))) # Test a slice + # Same as above for the new axis + rst[0, np.newaxis] = 1 + assert np.ma.allequal(rst.data[0, np.newaxis], np.ones(np.shape(rst.data[0, np.newaxis]))) # Test a new axis + rst[...] = 1 + assert np.ma.allequal(rst.data[...], np.ones(np.shape(rst.data[...]))) # Test an ellipsis + + # -- Finally, we check that errors are raised for both indexing and index assignment -- + + # For indexing + op_name_index = "an indexing operation" + op_name_assign = "an index assignment operation" + message_raster = ( + "Both rasters must have the same shape, transform and CRS for {}. " + "For example, use raster1 = raster1.reproject(raster2) to reproject raster1 on the " + "same grid and CRS than raster2." + ) + message_array = ( + "The raster and array must have the same shape for {}. " + "For example, if the array comes from another raster, use raster1 = " + "raster1.reproject(raster2) beforehand to reproject raster1 on the same grid and CRS " + "than raster2. Or, if the array does not come from a raster, define one with raster = " + "Raster.from_array(array, array_transform, array_crs, array_nodata) then reproject." + ) + # An error when the shape is wrong - with pytest.raises(ValueError, match="Indexing a raster with an array requires the two having the same shape."): + with pytest.raises(ValueError, match=re.escape(message_array.format(op_name_index))): rst[arr[:-1, :-1]] - rst[arr[:-1, :-1]] = 1 + + # An error when the georeferencing of the Mask does not match + mask.shift(1, 1) + with pytest.raises(ValueError, match=re.escape(message_raster.format(op_name_index))): + rst[mask] + # A warning when the array type is not boolean with pytest.warns(UserWarning, match="Input array was cast to boolean for indexing."): rst[arr.astype("uint8")] rst[arr.astype("uint8")] = 1 - # An error when the georeferencing of the Mask does not match - mask.shift(1, 1) - with pytest.raises( - ValueError, match="Indexing a raster with a mask requires the two being on the same georeferenced grid." - ): - rst[mask] + + # For index assignment + # An error when the shape is wrong + with pytest.raises(ValueError, match=re.escape(message_array.format(op_name_assign))): + rst[arr[:-1, :-1]] = 1 + + with pytest.raises(ValueError, match=re.escape(message_raster.format(op_name_assign))): rst[mask] = 1 - # For assignment, an error when the input index is neither a Mask or a boolean ndarray - # (indexing attempts to call crop for differently shaped data) - with pytest.raises( - ValueError, - match="Indexing a raster requires a mask of same georeferenced grid, or a boolean array of same shape.", - ): - rst["lol"] = 1 test_data = [[landsat_b4_path, everest_outlines_path], [aster_dem_path, aster_outlines_path]] @@ -987,10 +1025,6 @@ def test_crop(self, data: list[str]) -> None: r_cropped = r.crop(crop_geom2) assert r_cropped.raster_equal(r) - # Test with bracket call - r_cropped_getitem = r[crop_geom2] - assert r_cropped_getitem.raster_equal(r_cropped) - # - Test cropping each side by a random integer of pixels - # rand_int = np.random.randint(1, min(r.shape) - 1) @@ -1051,10 +1085,6 @@ def test_crop(self, data: list[str]) -> None: r_cropped4 = r.crop(crop_geom=r_cropped_reproj.get_bounds_projected(out_crs=r.crs)) assert r_cropped3.raster_equal(r_cropped4) - # Check with bracket call - r_cropped5 = r[r_cropped_reproj] - assert r_cropped4.raster_equal(r_cropped5) - # -- Test with inplace=True -- # r_copy = r.copy() r_copy.crop(r_cropped, inplace=True) @@ -1141,10 +1171,6 @@ def test_crop(self, data: list[str]) -> None: r_cropped2 = r.crop(outlines) assert list(r_cropped2.bounds) == list(new_bounds) - # Finally, we check with a bracket call - r_cropped3 = r[outlines] - assert list(r_cropped3.bounds) == list(new_bounds) - # -- Test crop works as expected even if transform has been modified, e.g. through downsampling -- # # Test that with downsampling, cropping to same bounds result in same raster r = gu.Raster(raster_path, downsample=5) @@ -2940,10 +2966,6 @@ def test_crop(self, mask: gu.Mask) -> None: # Check if instance is respected assert isinstance(mask_cropped, gu.Mask) - # Test with bracket call - mask_cropped_getitem = mask[crop_geom] - assert mask_cropped_getitem.raster_equal(mask_cropped) - # - Test cropping each side by a random integer of pixels - # rand_int = np.random.randint(1, min(mask.shape) - 1) @@ -3554,19 +3576,43 @@ def test_ops_logical_bitwise_implicit(self) -> None: def test_raise_errors(self, op: str) -> None: """ Test that errors are properly raised in certain situations. - """ - # different shapes - expected_message = "Both rasters must have the same shape, transform and CRS." - with pytest.raises(ValueError, match=expected_message): - getattr(self.r1_wrong_shape, op)(self.r2) - # different CRS - with pytest.raises(ValueError, match=expected_message): - getattr(self.r1_wrong_crs, op)(self.r2) + !! Important !! Here we test errors with the operator on the raster only (arithmetic overloading), + calling with array first is supported with the NumPy interface and tested in ArrayInterface. + """ + # Rasters with different CRS, transform, or shape + # Different shape + expected_message = ( + "Both rasters must have the same shape, transform and CRS for an arithmetic operation. " + "For example, use raster1 = raster1.reproject(raster2) to reproject raster1 on the " + "same grid and CRS than raster2." + ) + with pytest.raises(ValueError, match=re.escape(expected_message)): + getattr(self.r2, op)(self.r1_wrong_shape) + + # Different CRS + with pytest.raises(ValueError, match=re.escape(expected_message)): + getattr(self.r2, op)(self.r1_wrong_crs) + + # Different transform + with pytest.raises(ValueError, match=re.escape(expected_message)): + getattr(self.r2, op)(self.r1_wrong_transform) + + # Array with different shape + expected_message = ( + "The raster and array must have the same shape for an arithmetic operation. " + "For example, if the array comes from another raster, use raster1 = " + "raster1.reproject(raster2) beforehand to reproject raster1 on the same grid and CRS " + "than raster2. Or, if the array does not come from a raster, define one with raster = " + "Raster.from_array(array, array_transform, array_crs, array_nodata) then reproject." + ) + # Different shape, masked array + with pytest.raises(ValueError, match=re.escape(expected_message)): + getattr(self.r2, op)(self.r1_wrong_shape.data) - # different transform - with pytest.raises(ValueError, match=expected_message): - getattr(self.r1_wrong_transform, op)(self.r2) + # Different shape, normal array with NaNs + with pytest.raises(ValueError, match=re.escape(expected_message)): + getattr(self.r2, op)(self.r1_wrong_shape.data.filled(np.nan)) # Wrong type of "other" expected_message = "Operation between an object of type .* and a Raster impossible." @@ -3708,6 +3754,13 @@ class TestArrayInterface: assert np.count_nonzero(~mask2) > 0 assert np.count_nonzero(~mask3) > 0 + # Wrong shaped arrays to check errors are raised + arr_wrong_shape = np.random.randint(min_val, max_val, (height - 1, width - 1), dtype="int32") + np.random.normal( + size=(height - 1, width - 1) + ) + wrong_transform = rio.transform.from_bounds(0, 0, 1, 1, width - 1, height - 1) + mask_wrong_shape = np.random.randint(0, 2, size=(width - 1, height - 1), dtype=bool) + @pytest.mark.parametrize("ufunc_str", ufuncs_str_1nin_1nout + ufuncs_str_1nin_2nout) # type: ignore @pytest.mark.parametrize( "dtype", ["uint8", "int8", "uint16", "int16", "uint32", "int32", "float32", "float64", "longdouble"] @@ -4008,3 +4061,68 @@ def test_ufunc_methods(self, method_str): # # assert np.ma.allequal(outputs_ma[0], outputs_rst[0].data) and np.ma.allequal( # outputs_ma[1], outputs_rst[1].data) + + @pytest.mark.parametrize( + "np_func_name", ufuncs_str_2nin_1nout + ufuncs_str_2nin_2nout + handled_functions_2in + ) # type: ignore + def test_raise_errors_2nin(self, np_func_name: str) -> None: + """Check that proper errors are raised when input raster/array don't match (only 2-input functions).""" + + # Create Rasters + ma = np.ma.masked_array(data=self.arr1, mask=self.mask1) + ma_wrong_shape = np.ma.masked_array(data=self.arr_wrong_shape, mask=self.mask_wrong_shape) + rst = gu.Raster.from_array(ma, transform=self.transform, crs=4326, nodata=_default_nodata(ma.dtype)) + rst_wrong_shape = gu.Raster.from_array( + ma_wrong_shape, transform=self.transform, crs=4326, nodata=_default_nodata(ma_wrong_shape.dtype) + ) + rst_wrong_crs = gu.Raster.from_array(ma, transform=self.transform, crs=32610, nodata=_default_nodata(ma.dtype)) + rst_wrong_transform = gu.Raster.from_array( + ma, transform=self.wrong_transform, crs=4326, nodata=_default_nodata(ma_wrong_shape.dtype) + ) + + # Get ufunc + np_func = getattr(np, np_func_name) + + # Strange errors happening only for these 4 functions... + # See issue #457 + if np_func_name not in ["allclose", "isclose", "array_equal", "array_equiv"]: + + # Rasters with different CRS, transform, or shape + # Different shape + expected_message = ( + "Both rasters must have the same shape, transform and CRS for an arithmetic operation. " + "For example, use raster1 = raster1.reproject(raster2) to reproject raster1 on the " + "same grid and CRS than raster2." + ) + + with pytest.raises(ValueError, match=re.escape(expected_message)): + np_func(rst, rst_wrong_shape) + + # Different CRS + with pytest.raises(ValueError, match=re.escape(expected_message)): + np_func(rst, rst_wrong_crs) + + # Different transform + with pytest.raises(ValueError, match=re.escape(expected_message)): + np_func(rst, rst_wrong_transform) + + # Array with different shape + expected_message = ( + "The raster and array must have the same shape for an arithmetic operation. " + "For example, if the array comes from another raster, use raster1 = " + "raster1.reproject(raster2) beforehand to reproject raster1 on the same grid and CRS " + "than raster2. Or, if the array does not come from a raster, define one with raster = " + "Raster.from_array(array, array_transform, array_crs, array_nodata) then reproject." + ) + # Different shape, masked array + # Check reflectivity just in case (just here, not later) + with pytest.raises(ValueError, match=re.escape(expected_message)): + np_func(ma_wrong_shape, rst) + with pytest.raises(ValueError, match=re.escape(expected_message)): + np_func(rst, ma_wrong_shape) + + # Different shape, normal array with NaNs + with pytest.raises(ValueError, match=re.escape(expected_message)): + np_func(ma_wrong_shape.filled(np.nan), rst) + with pytest.raises(ValueError, match=re.escape(expected_message)): + np_func(rst, ma_wrong_shape.filled(np.nan)) diff --git a/tests/test_vector.py b/tests/test_vector.py index 2fd95d76..79ce97d3 100644 --- a/tests/test_vector.py +++ b/tests/test_vector.py @@ -214,10 +214,6 @@ def test_crop(self, data: list[str]) -> None: # Check the return-by-copy as well assert_geodataframe_equal(outlines_copy.ds, outlines_new_bounds.ds) - # Check with bracket call - outlines_new2 = outlines_new[rst] - assert_geodataframe_equal(outlines_new.ds, outlines_new2.ds) - # Verify that geometries intersect with raster bound rst_poly = gu.projtools.bounds2poly(rst.bounds) intersects_new = []