Skip to content

Commit

Permalink
Merge pull request #249 from statisticsnorway/img
Browse files Browse the repository at this point in the history
Merger kartverket-wms og småtteri
  • Loading branch information
mortewle authored Nov 6, 2024
2 parents 34d4ce0 + 547b4e8 commit f27d643
Show file tree
Hide file tree
Showing 5 changed files with 104 additions and 177 deletions.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "ssb-sgis"
version = "1.0.7"
version = "1.0.8"
description = "GIS functions used at Statistics Norway."
authors = ["Morten Letnes <morten.letnes@ssb.no>"]
license = "MIT"
Expand Down
20 changes: 15 additions & 5 deletions src/sgis/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,19 +72,29 @@ def to_numpy_func(text: str) -> Callable:
raise ValueError


def is_property(obj: object, attribute: str) -> bool:
def is_property(obj: object, attr: str) -> bool:
"""Determine if a class attribute is a property.
Args:
obj: The object to check.
attribute: The attribute name to check on the object.
attr: The attribute name to check on the object.
Returns:
True if the attribute is a property, False otherwise.
"""
return hasattr(obj.__class__, attribute) and isinstance(
getattr(obj.__class__, attribute), property
)
if not hasattr(obj.__class__, attr):
return False
if isinstance(obj, type):
return isinstance(getattr(obj, attr), property)
else:
return isinstance(getattr(obj.__class__, attr), property)


def is_method(obj: Any, attr: str) -> bool:
if isinstance(obj, type):
return inspect.ismethod(getattr(obj, attr, None))
else:
return inspect.ismethod(getattr(obj.__class__, attr, None))


def dict_zip_intersection(*dicts: dict) -> Generator[tuple[Any, ...], None, None]:
Expand Down
121 changes: 77 additions & 44 deletions src/sgis/raster/image_collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,8 @@ class Dataset:
from ..helpers import _fix_path
from ..helpers import get_all_files
from ..helpers import get_numpy_func
from ..helpers import is_method
from ..helpers import is_property
from ..io._is_dapla import is_dapla
from ..io.opener import opener
from . import sentinel_config as config
Expand Down Expand Up @@ -164,6 +166,7 @@ def _read_parquet_func(*args, **kwargs) -> list[str]:
"backend",
"masking",
"_merged",
"date",
]

_load_counter: int = 0
Expand Down Expand Up @@ -319,7 +322,7 @@ def __init__(self, *, metadata=None, bbox=None, **kwargs) -> None:
self._bounds = None
self._merged = False
self._from_array = False
self._from_gdf = False
self._from_geopandas = False
self.metadata_attributes = self.metadata_attributes or {}
self._path = None
self._metadata_from_xml = False
Expand All @@ -328,32 +331,30 @@ def __init__(self, *, metadata=None, bbox=None, **kwargs) -> None:

self.metadata = self._metadata_to_nested_dict(metadata)

if self.filename_regexes:
if isinstance(self.filename_regexes, str):
self.filename_regexes = (self.filename_regexes,)
self.filename_patterns = [
re.compile(regexes, flags=re.VERBOSE)
for regexes in self.filename_regexes
]
else:
self.filename_patterns = ()

if self.image_regexes:
if isinstance(self.image_regexes, str):
self.image_regexes = (self.image_regexes,)
self.image_patterns = [
re.compile(regexes, flags=re.VERBOSE) for regexes in self.image_regexes
]
else:
self.image_patterns = ()
self.image_patterns = self._compile_regexes("image_regexes")
self.filename_patterns = self._compile_regexes("filename_regexes")

for key, value in kwargs.items():
error_obj = ValueError(
f"{self.__class__.__name__} got an unexpected keyword argument '{key}'"
)
if key in ALLOWED_INIT_KWARGS and key in dir(self):
setattr(self, key, value)
if is_property(self, key):
setattr(self, f"_{key}", value)
elif is_method(self, key):
raise error_obj
else:
setattr(self, key, value)
else:
raise ValueError(
f"{self.__class__.__name__} got an unexpected keyword argument '{key}'"
)
raise error_obj

def _compile_regexes(self, regex_attr: str) -> tuple[re.Pattern]:
regexes = getattr(self, regex_attr)
if regexes:
if isinstance(regexes, str):
regexes = (regexes,)
return tuple(re.compile(regexes, flags=re.VERBOSE) for regexes in regexes)
return ()

@staticmethod
def _metadata_to_nested_dict(
Expand All @@ -367,6 +368,7 @@ def _metadata_to_nested_dict(
if isinstance(metadata, pd.DataFrame):

def is_scalar(x) -> bool:
"""Check if scalar because 'truth value of Series is ambigous'."""
return not hasattr(x, "__len__") or len(x) <= 1

def na_to_none(x) -> None:
Expand Down Expand Up @@ -631,26 +633,31 @@ def _get_metadata_attributes(self, metadata_attributes: dict) -> dict:

def _to_xarray(self, array: np.ndarray, transform: Affine) -> DataArray:
"""Convert the raster to an xarray.DataArray."""
attrs = {"crs": self.crs}
for attr in set(self.metadata_attributes).union({"date"}):
try:
attrs[attr] = getattr(self, attr)
except Exception:
pass

if len(array.shape) == 2:
height, width = array.shape
dims = ["y", "x"]
elif len(array.shape) == 3:
height, width = array.shape[1:]
dims = ["band", "y", "x"]
elif not any(dim for dim in array.shape):
DataArray(
name=self.name or self.__class__.__name__,
attrs=attrs,
)
else:
raise ValueError(
f"Array should be 2 or 3 dimensional. Got shape {array.shape}"
)

coords = _generate_spatial_coords(transform, width, height)

attrs = {"crs": self.crs}
for attr in set(self.metadata_attributes).union({"date"}):
try:
attrs[attr] = getattr(self, attr)
except Exception:
pass

return DataArray(
array,
coords=coords,
Expand All @@ -667,7 +674,7 @@ class Band(_ImageBandBase):
backend: str = "numpy"

@classmethod
def from_gdf(
def from_geopandas(
cls,
gdf: GeoDataFrame | GeoSeries,
res: int,
Expand All @@ -691,7 +698,7 @@ def from_gdf(
)

obj = cls(arr, res=res, crs=gdf.crs, bounds=gdf.total_bounds, **kwargs)
obj._from_gdf = True
obj._from_geopandas = True
return obj

def __init__(
Expand Down Expand Up @@ -839,12 +846,18 @@ def band_id(self) -> str:
@property
def height(self) -> int:
"""Pixel heigth of the image band."""
return self.values.shape[-2]
try:
return self.values.shape[-2]
except IndexError:
return 0

@property
def width(self) -> int:
"""Pixel width of the image band."""
return self.values.shape[-1]
try:
return self.values.shape[-1]
except IndexError:
return 0

@property
def tile(self) -> str:
Expand Down Expand Up @@ -892,7 +905,7 @@ def get_n_largest(
copied = self.copy()
value_must_be_at_least = np.sort(np.ravel(copied.values))[-n] - (precision or 0)
copied._values = np.where(copied.values >= value_must_be_at_least, 1, 0)
df = copied.to_gdf(column).loc[lambda x: x[column] == 1]
df = copied.to_geopandas(column).loc[lambda x: x[column] == 1]
df[column] = f"largest_{n}"
return df

Expand All @@ -903,14 +916,17 @@ def get_n_smallest(
copied = self.copy()
value_must_be_at_least = np.sort(np.ravel(copied.values))[n] - (precision or 0)
copied._values = np.where(copied.values <= value_must_be_at_least, 1, 0)
df = copied.to_gdf(column).loc[lambda x: x[column] == 1]
df = copied.to_geopandas(column).loc[lambda x: x[column] == 1]
df[column] = f"smallest_{n}"
return df

def clip(
self, mask: GeoDataFrame | GeoSeries | Polygon | MultiPolygon, **kwargs
) -> "Band":
"""Clip band values to geometry mask."""
if not self.height or not self.width:
return self

values = _clip_xarray(
self.to_xarray(),
mask,
Expand Down Expand Up @@ -978,7 +994,6 @@ def load(
if self.has_array and [int(x) for x in bounds] != [int(x) for x in self.bounds]:
print(self)
print(self.mask)
print(self.mask.values.shape)
print(self.values.shape)
print([int(x) for x in bounds], [int(x) for x in self.bounds])
raise ValueError(
Expand Down Expand Up @@ -1284,7 +1299,7 @@ def zonal(
dropna=dropna,
)

def to_gdf(self, column: str = "value") -> GeoDataFrame:
def to_geopandas(self, column: str = "value") -> GeoDataFrame:
"""Create a GeoDataFrame from the image Band.
Args:
Expand Down Expand Up @@ -1328,17 +1343,35 @@ def _to_numpy(
self, arr: np.ndarray | DataArray, masked: bool = True
) -> np.ndarray | np.ma.core.MaskedArray:
if not isinstance(arr, np.ndarray):
mask_arr = None
if masked:
# if self.mask is not None:
# print(self.mask.values.shape, arr.shape)
# if self.mask is not None and self.mask.values.shape == arr.shape:
# print("hei", self.mask.values.sum())
# mask_arr = self.mask.values
# else:
# mask_arr = np.full(arr.shape, False)
# try:
# print("hei222", arr.isnull().values.sum())
# mask_arr |= arr.isnull().values
# except AttributeError:
# pass
# mask_arr = np.full(arr.shape, False)
try:
mask_arr = arr.isnull().values
except AttributeError:
mask_arr = np.full(arr.shape, False)
pass
try:
arr = arr.to_numpy()
except AttributeError:
arr = arr.values
if mask_arr is not None:
arr = np.ma.array(arr, mask=mask_arr, fill_value=self.nodata)

if not isinstance(arr, np.ndarray):
arr = np.array(arr)

if (
masked
and self.mask is not None
Expand Down Expand Up @@ -1750,10 +1783,10 @@ def bounds(self) -> tuple[int, int, int, int] | None:
bounds.append(band.bounds)
return get_total_bounds(bounds)

def to_gdf(self, column: str = "value") -> GeoDataFrame:
def to_geopandas(self, column: str = "value") -> GeoDataFrame:
"""Convert the array to a GeoDataFrame of grid polygons and values."""
return pd.concat(
[band.to_gdf(column=column) for band in self], ignore_index=True
[band.to_geopandas(column=column) for band in self], ignore_index=True
)

def sample(
Expand Down Expand Up @@ -2491,7 +2524,7 @@ def to_xarray(
return xr.combine_by_coords(list(xarrs.values()))
# return Dataset(xarrs)

def to_gdfs(self, column: str = "value") -> dict[str, GeoDataFrame]:
def to_geopandas(self, column: str = "value") -> dict[str, GeoDataFrame]:
"""Convert each band in each Image to a GeoDataFrame."""
out = {}
i = 0
Expand All @@ -2504,7 +2537,7 @@ def to_gdfs(self, column: str = "value") -> dict[str, GeoDataFrame]:
name = f"{self.__class__.__name__}({i})"

if name not in out:
out[name] = band.to_gdf(column=column)
out[name] = band.to_geopandas(column=column)
return out

def sample(self, n: int = 1, size: int = 500) -> "ImageCollection":
Expand Down Expand Up @@ -3257,7 +3290,7 @@ def __str__(self) -> str:
what = "that have been merged"
elif self.instance._from_array:
what = "from arrays"
elif self.instance._from_gdf:
elif self.instance._from_geopandas:
what = "from GeoDataFrames"
else:
raise ValueError(self.instance)
Expand Down
Loading

0 comments on commit f27d643

Please sign in to comment.