diff --git a/pyproject.toml b/pyproject.toml index f8357df5..58accf0e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "ssb-sgis" -version = "1.0.7" +version = "1.0.8" description = "GIS functions used at Statistics Norway." authors = ["Morten Letnes "] license = "MIT" diff --git a/src/sgis/helpers.py b/src/sgis/helpers.py index 136b5875..2067361d 100644 --- a/src/sgis/helpers.py +++ b/src/sgis/helpers.py @@ -72,19 +72,29 @@ def to_numpy_func(text: str) -> Callable: raise ValueError -def is_property(obj: object, attribute: str) -> bool: +def is_property(obj: object, attr: str) -> bool: """Determine if a class attribute is a property. Args: obj: The object to check. - attribute: The attribute name to check on the object. + attr: The attribute name to check on the object. Returns: True if the attribute is a property, False otherwise. """ - return hasattr(obj.__class__, attribute) and isinstance( - getattr(obj.__class__, attribute), property - ) + if not hasattr(obj.__class__, attr): + return False + if isinstance(obj, type): + return isinstance(getattr(obj, attr), property) + else: + return isinstance(getattr(obj.__class__, attr), property) + + +def is_method(obj: Any, attr: str) -> bool: + if isinstance(obj, type): + return inspect.ismethod(getattr(obj, attr, None)) + else: + return inspect.ismethod(getattr(obj.__class__, attr, None)) def dict_zip_intersection(*dicts: dict) -> Generator[tuple[Any, ...], None, None]: diff --git a/src/sgis/raster/image_collection.py b/src/sgis/raster/image_collection.py index a681bf06..fc36a738 100644 --- a/src/sgis/raster/image_collection.py +++ b/src/sgis/raster/image_collection.py @@ -95,6 +95,8 @@ class Dataset: from ..helpers import _fix_path from ..helpers import get_all_files from ..helpers import get_numpy_func +from ..helpers import is_method +from ..helpers import is_property from ..io._is_dapla import is_dapla from ..io.opener import opener from . import sentinel_config as config @@ -164,6 +166,7 @@ def _read_parquet_func(*args, **kwargs) -> list[str]: "backend", "masking", "_merged", + "date", ] _load_counter: int = 0 @@ -319,7 +322,7 @@ def __init__(self, *, metadata=None, bbox=None, **kwargs) -> None: self._bounds = None self._merged = False self._from_array = False - self._from_gdf = False + self._from_geopandas = False self.metadata_attributes = self.metadata_attributes or {} self._path = None self._metadata_from_xml = False @@ -328,32 +331,30 @@ def __init__(self, *, metadata=None, bbox=None, **kwargs) -> None: self.metadata = self._metadata_to_nested_dict(metadata) - if self.filename_regexes: - if isinstance(self.filename_regexes, str): - self.filename_regexes = (self.filename_regexes,) - self.filename_patterns = [ - re.compile(regexes, flags=re.VERBOSE) - for regexes in self.filename_regexes - ] - else: - self.filename_patterns = () - - if self.image_regexes: - if isinstance(self.image_regexes, str): - self.image_regexes = (self.image_regexes,) - self.image_patterns = [ - re.compile(regexes, flags=re.VERBOSE) for regexes in self.image_regexes - ] - else: - self.image_patterns = () + self.image_patterns = self._compile_regexes("image_regexes") + self.filename_patterns = self._compile_regexes("filename_regexes") for key, value in kwargs.items(): + error_obj = ValueError( + f"{self.__class__.__name__} got an unexpected keyword argument '{key}'" + ) if key in ALLOWED_INIT_KWARGS and key in dir(self): - setattr(self, key, value) + if is_property(self, key): + setattr(self, f"_{key}", value) + elif is_method(self, key): + raise error_obj + else: + setattr(self, key, value) else: - raise ValueError( - f"{self.__class__.__name__} got an unexpected keyword argument '{key}'" - ) + raise error_obj + + def _compile_regexes(self, regex_attr: str) -> tuple[re.Pattern]: + regexes = getattr(self, regex_attr) + if regexes: + if isinstance(regexes, str): + regexes = (regexes,) + return tuple(re.compile(regexes, flags=re.VERBOSE) for regexes in regexes) + return () @staticmethod def _metadata_to_nested_dict( @@ -367,6 +368,7 @@ def _metadata_to_nested_dict( if isinstance(metadata, pd.DataFrame): def is_scalar(x) -> bool: + """Check if scalar because 'truth value of Series is ambigous'.""" return not hasattr(x, "__len__") or len(x) <= 1 def na_to_none(x) -> None: @@ -631,12 +633,24 @@ def _get_metadata_attributes(self, metadata_attributes: dict) -> dict: def _to_xarray(self, array: np.ndarray, transform: Affine) -> DataArray: """Convert the raster to an xarray.DataArray.""" + attrs = {"crs": self.crs} + for attr in set(self.metadata_attributes).union({"date"}): + try: + attrs[attr] = getattr(self, attr) + except Exception: + pass + if len(array.shape) == 2: height, width = array.shape dims = ["y", "x"] elif len(array.shape) == 3: height, width = array.shape[1:] dims = ["band", "y", "x"] + elif not any(dim for dim in array.shape): + DataArray( + name=self.name or self.__class__.__name__, + attrs=attrs, + ) else: raise ValueError( f"Array should be 2 or 3 dimensional. Got shape {array.shape}" @@ -644,13 +658,6 @@ def _to_xarray(self, array: np.ndarray, transform: Affine) -> DataArray: coords = _generate_spatial_coords(transform, width, height) - attrs = {"crs": self.crs} - for attr in set(self.metadata_attributes).union({"date"}): - try: - attrs[attr] = getattr(self, attr) - except Exception: - pass - return DataArray( array, coords=coords, @@ -667,7 +674,7 @@ class Band(_ImageBandBase): backend: str = "numpy" @classmethod - def from_gdf( + def from_geopandas( cls, gdf: GeoDataFrame | GeoSeries, res: int, @@ -691,7 +698,7 @@ def from_gdf( ) obj = cls(arr, res=res, crs=gdf.crs, bounds=gdf.total_bounds, **kwargs) - obj._from_gdf = True + obj._from_geopandas = True return obj def __init__( @@ -839,12 +846,18 @@ def band_id(self) -> str: @property def height(self) -> int: """Pixel heigth of the image band.""" - return self.values.shape[-2] + try: + return self.values.shape[-2] + except IndexError: + return 0 @property def width(self) -> int: """Pixel width of the image band.""" - return self.values.shape[-1] + try: + return self.values.shape[-1] + except IndexError: + return 0 @property def tile(self) -> str: @@ -892,7 +905,7 @@ def get_n_largest( copied = self.copy() value_must_be_at_least = np.sort(np.ravel(copied.values))[-n] - (precision or 0) copied._values = np.where(copied.values >= value_must_be_at_least, 1, 0) - df = copied.to_gdf(column).loc[lambda x: x[column] == 1] + df = copied.to_geopandas(column).loc[lambda x: x[column] == 1] df[column] = f"largest_{n}" return df @@ -903,7 +916,7 @@ def get_n_smallest( copied = self.copy() value_must_be_at_least = np.sort(np.ravel(copied.values))[n] - (precision or 0) copied._values = np.where(copied.values <= value_must_be_at_least, 1, 0) - df = copied.to_gdf(column).loc[lambda x: x[column] == 1] + df = copied.to_geopandas(column).loc[lambda x: x[column] == 1] df[column] = f"smallest_{n}" return df @@ -911,6 +924,9 @@ def clip( self, mask: GeoDataFrame | GeoSeries | Polygon | MultiPolygon, **kwargs ) -> "Band": """Clip band values to geometry mask.""" + if not self.height or not self.width: + return self + values = _clip_xarray( self.to_xarray(), mask, @@ -978,7 +994,6 @@ def load( if self.has_array and [int(x) for x in bounds] != [int(x) for x in self.bounds]: print(self) print(self.mask) - print(self.mask.values.shape) print(self.values.shape) print([int(x) for x in bounds], [int(x) for x in self.bounds]) raise ValueError( @@ -1284,7 +1299,7 @@ def zonal( dropna=dropna, ) - def to_gdf(self, column: str = "value") -> GeoDataFrame: + def to_geopandas(self, column: str = "value") -> GeoDataFrame: """Create a GeoDataFrame from the image Band. Args: @@ -1328,17 +1343,35 @@ def _to_numpy( self, arr: np.ndarray | DataArray, masked: bool = True ) -> np.ndarray | np.ma.core.MaskedArray: if not isinstance(arr, np.ndarray): + mask_arr = None if masked: + # if self.mask is not None: + # print(self.mask.values.shape, arr.shape) + # if self.mask is not None and self.mask.values.shape == arr.shape: + # print("hei", self.mask.values.sum()) + # mask_arr = self.mask.values + # else: + # mask_arr = np.full(arr.shape, False) + # try: + # print("hei222", arr.isnull().values.sum()) + # mask_arr |= arr.isnull().values + # except AttributeError: + # pass + # mask_arr = np.full(arr.shape, False) try: mask_arr = arr.isnull().values except AttributeError: - mask_arr = np.full(arr.shape, False) + pass try: arr = arr.to_numpy() except AttributeError: arr = arr.values + if mask_arr is not None: + arr = np.ma.array(arr, mask=mask_arr, fill_value=self.nodata) + if not isinstance(arr, np.ndarray): arr = np.array(arr) + if ( masked and self.mask is not None @@ -1750,10 +1783,10 @@ def bounds(self) -> tuple[int, int, int, int] | None: bounds.append(band.bounds) return get_total_bounds(bounds) - def to_gdf(self, column: str = "value") -> GeoDataFrame: + def to_geopandas(self, column: str = "value") -> GeoDataFrame: """Convert the array to a GeoDataFrame of grid polygons and values.""" return pd.concat( - [band.to_gdf(column=column) for band in self], ignore_index=True + [band.to_geopandas(column=column) for band in self], ignore_index=True ) def sample( @@ -2491,7 +2524,7 @@ def to_xarray( return xr.combine_by_coords(list(xarrs.values())) # return Dataset(xarrs) - def to_gdfs(self, column: str = "value") -> dict[str, GeoDataFrame]: + def to_geopandas(self, column: str = "value") -> dict[str, GeoDataFrame]: """Convert each band in each Image to a GeoDataFrame.""" out = {} i = 0 @@ -2504,7 +2537,7 @@ def to_gdfs(self, column: str = "value") -> dict[str, GeoDataFrame]: name = f"{self.__class__.__name__}({i})" if name not in out: - out[name] = band.to_gdf(column=column) + out[name] = band.to_geopandas(column=column) return out def sample(self, n: int = 1, size: int = 500) -> "ImageCollection": @@ -3257,7 +3290,7 @@ def __str__(self) -> str: what = "that have been merged" elif self.instance._from_array: what = "from arrays" - elif self.instance._from_gdf: + elif self.instance._from_geopandas: what = "from GeoDataFrames" else: raise ValueError(self.instance) diff --git a/tests/test_explore.py b/tests/test_explore.py index 7b4cbd76..fdb01fc9 100644 --- a/tests/test_explore.py +++ b/tests/test_explore.py @@ -18,123 +18,6 @@ path_sentinel = testdata + "/sentinel2" -# np.set_printoptions(linewidth=400) - -if 0: - for p, bbox in zip( - [ - # "S2A_MSIL2A_20230624T104621_N0509_R051_T32VPM_20230624T170454.SAFE", - "S2B_MSIL2A_20170826T104019_N0208_R008_T32VNM_20221207T150454.SAFE", - "S2B_MSIL2A_20230606T103629_N0509_R008_T32VNM_20230606T121204.SAFE", - ], - [ - # (sg.to_gdf([11.0771105, 59.9483914], crs=4326).to_crs(25833).buffer(1500)), - ( - sg.to_gdf([10.28173443, 60.16616654], crs=4326) - .to_crs(25833) - .buffer(1500) - ), - ( - sg.to_gdf([10.28173443, 60.16616654], crs=4326) - .to_crs(25833) - .buffer(1500) - ), - ], - strict=False, - ): - paths = sg.helpers.get_all_files( - f"C:/Users/ort/OneDrive - Statistisk sentralbyrå/data/sentinel2/{p}" - ) - img = sg.Sentinel2Image( - f"C:/Users/ort/OneDrive - Statistisk sentralbyrå/data/sentinel2/{p}", - ) - for path in paths: - - if "tif" not in path: - continue - raster = sg.Raster.from_path(path) - try: - centroid - except NameError: - centroid = raster.centroid - out = ( - Path(r"C:\Users\ort\git\ssb-sgis\tests\testdata\raster\sentinel2") - / f"{p}" - ) / (Path(raster.name).stem + "_clipped.tif") - print(path) - print(out) - import os - - os.makedirs(out.parent, exist_ok=True) - - print(bbox) - raster = raster.load().clip( - bbox, # boundless=False, masked=False - ) - print(raster.bounds) - print(raster.values) - print(raster.values.shape) - print(np.sum(np.where(raster.values != 0, 1, 0))) - - # sg.explore(raster.to_gdf()) - raster.write(out) - print(raster.shape) - # print(raster) - # sg.explore(raster.to_gdf(), "value") -if 0: - SENTINEL2_FILENAME_REGEX = r""" - ^(?PT\d{2}[A-Z]{3}) - _(?P\d{8}T\d{6}) - _(?PB[018][\dA]) - (?:_(?P\d+)m)? - .* - \..*$ - """ - path_sentinel = r"C:\Users\ort\git\ssb-sgis\tests\testdata\raster\sentinel2\S2A_MSIL2A_20230601T104021_N0509_R008_T32VNM_20230601T215503.SAFE" - - cube = sg.DataCube.from_root( - path_sentinel, res=10, filename_regex=SENTINEL2_FILENAME_REGEX - ) - - band2 = cube["B02"].load(indexes=1) - band3 = cube["B03"].load(indexes=1) - band4 = cube["B04"].load(indexes=1) - - sg.torchgeo.Sentinel2.filename_regex = SENTINEL2_FILENAME_REGEX - ds = sg.torchgeo.Sentinel2(path_sentinel) - ds.plot(ds[ds.bounds]) - - print(band2.values.shape) - print(band3.values.shape) - print(band4.values.shape) - # sg.explore(band2.to_gdf(), "value") - - arr = np.array([band2.values, band3.values, band4.values]) - arr = arr.reshape(arr.shape[1], arr.shape[2], arr.shape[0]) - - print(arr.shape) - - import matplotlib.pyplot as plt - - fig, ax = plt.subplots(1, 1, figsize=(4, 4)) - - import torch - - image = torch.clamp(torch.tensor(arr) / 10000, min=0, max=1) - - print(image) - - ax.imshow(image) - ax.axis("off") - - # Use bilinear interpolation (or it is displayed as bicubic by default). - # plt.imshow(arr, interpolation="nearest") - plt.show() - - import folium - - folium.raster_layers.ImageOverlay(arr, bounds=raster.bounds) - def test_image_collection(): @@ -369,7 +252,6 @@ def main(): from oslo import roads_oslo test_image_collection() - # test_torch() test_explore(points_oslo(), roads_oslo()) # not_test_explore(points_oslo(), roads_oslo()) diff --git a/tests/test_image_collection.py b/tests/test_image_collection.py index 998cfd7b..d8ca3b0f 100644 --- a/tests/test_image_collection.py +++ b/tests/test_image_collection.py @@ -297,7 +297,7 @@ def _test_ndvi(collection, type_should_be, cloudless: bool): # assert (ndvi.cmap) == "Greens" print("explore ndvi, masking=", img.masking) - gdf = ndvi.to_gdf() + gdf = ndvi.to_geopandas() gdf_sample = gdf.sample(min(n, len(gdf))) e = sg.explore(ndvi, gdf_sample=gdf_sample, column="value") @@ -401,7 +401,7 @@ def not_test_sample(): for img in collection.sample(1, size=size): for band in img: band.load() - print(band.to_gdf().pipe(sg.sort_small_first).geometry.area.sum()) + print(band.to_geopandas().pipe(sg.sort_small_first).geometry.area.sum()) e = sg.explore(collection.sample(1, size=size)) # low buffer resolution means the area won't be exactly this @@ -423,7 +423,7 @@ def not_test_sample(): print("as gdfs") e = sg.explore( - collection.sample(1, size=size).load().to_gdfs(), + collection.sample(1, size=size).load().to_geopandas(), column="value", return_explorer=True, ) @@ -976,7 +976,7 @@ def test_merge(): sg.explore(merged_by_year) sg.explore(grouped_by_year_merged_by_band) sg.explore(merged_by_band) - df = merged_by_band.to_gdf() + df = merged_by_band.to_geopandas() assert (bounds := tuple(int(x) for x in df.total_bounds)) == ( 569631, 6657859, @@ -1235,7 +1235,7 @@ def test_cloud(): # assert np.sum(cloud_band) assert isinstance(cloud_band, sg.Band), cloud_band assert cloud_band.values.shape == (299, 299), cloud_band.values.shape - cloud_polys = img.mask.to_gdf().geometry + cloud_polys = img.mask.to_geopandas().geometry sg.explore(cloud_polys) assert isinstance(cloud_polys, GeoSeries), type(cloud_polys) @@ -1414,16 +1414,18 @@ def test_convertion(): _from_array = sg.Band(arr, res=band.res, crs=band.crs, bounds=band.bounds) assert (shape := _from_array.values.shape) == (29, 29), shape - gdf = band.to_gdf(column="val") - from_gdf = sg.Band.from_gdf(gdf, res=band.res) + gdf = band.to_geopandas(column="val") + from_geopandas = sg.Band.from_geopandas(gdf, res=band.res) - e = sg.explore(from_gdf=from_gdf.to_gdf(), band=band.to_gdf(), gdf=gdf) + e = sg.explore( + from_geopandas=from_geopandas.to_geopandas(), band=band.to_geopandas(), gdf=gdf + ) assert len(e._gdfs) == 3 assert all(len(gdf) == 837 for gdf in e._gdfs), [len(gdf) for gdf in e._gdfs] assert all(int(gdf.area.sum()) == 898_5540 for gdf in e._gdfs), [ int(gdf.area.sum()) for gdf in e._gdfs ] - assert (shape := from_gdf.values.shape) == (29, 29), shape + assert (shape := from_geopandas.values.shape) == (29, 29), shape @print_function_name