From 77ceae4121bb10b8fc2116a36b989e89c6d40fa1 Mon Sep 17 00:00:00 2001 From: Morten Letnes Date: Thu, 16 Jan 2025 16:04:26 +0100 Subject: [PATCH 1/8] 30 buffer resolution, komm_nr, small refactors --- .gitignore | 1 + .../buffer_dissolve_explode.py | 18 ++++---- src/sgis/helpers.py | 46 +++++++++---------- src/sgis/io/dapla_functions.py | 20 ++++---- src/sgis/parallel/parallel.py | 10 ++-- 5 files changed, 48 insertions(+), 47 deletions(-) diff --git a/.gitignore b/.gitignore index b08d4341..1ca4823c 100644 --- a/.gitignore +++ b/.gitignore @@ -17,6 +17,7 @@ *.xls *.xlsx *.zip +*.log # Allow .parquet files below the testdata directory !tests/testdata/**/*.parquet diff --git a/src/sgis/geopandas_tools/buffer_dissolve_explode.py b/src/sgis/geopandas_tools/buffer_dissolve_explode.py index 99944560..b57bb3c7 100644 --- a/src/sgis/geopandas_tools/buffer_dissolve_explode.py +++ b/src/sgis/geopandas_tools/buffer_dissolve_explode.py @@ -5,7 +5,7 @@ - Geometries are made valid after buffer and dissolve. -- The buffer resolution defaults to 50 (geopandas' default is 16). +- The buffer resolution defaults to 30 (geopandas' default is 16). - If 'by' is not specified, the index will be labeled 0, 1, …, n - 1 after exploded, instead of 0, 0, …, 0 as it will with the geopandas defaults. @@ -49,7 +49,7 @@ def buffdissexp( gdf: GeoDataFrame, distance: int | float, *, - resolution: int = 50, + resolution: int = 30, index_parts: bool = False, copy: bool = True, grid_size: float | int | None = None, @@ -68,7 +68,7 @@ def buffdissexp( distance: the distance (meters, degrees, depending on the crs) to buffer the geometry by resolution: The number of segments used to approximate a quarter circle. - Here defaults to 50, as opposed to the default 16 in geopandas. + Here defaults to 30, as opposed to the default 16 in geopandas. index_parts: If False (default), the index after dissolve is respected. If True, an integer index level is added during explode. copy: Whether to copy the GeoDataFrame before buffering. Defaults to True. @@ -101,7 +101,7 @@ def buffdissexp( def buffdiss( gdf: GeoDataFrame, distance: int | float, - resolution: int = 50, + resolution: int = 30, copy: bool = True, n_jobs: int = 1, join_style: int | str = "round", @@ -119,7 +119,7 @@ def buffdiss( distance: the distance (meters, degrees, depending on the crs) to buffer the geometry by resolution: The number of segments used to approximate a quarter circle. - Here defaults to 50, as opposed to the default 16 in geopandas. + Here defaults to 30, as opposed to the default 16 in geopandas. join_style: Buffer join style. copy: Whether to copy the GeoDataFrame before buffering. Defaults to True. n_jobs: Number of threads to use. Defaults to 1. @@ -511,7 +511,7 @@ def buffdissexp_by_cluster( gdf: GeoDataFrame, distance: int | float, *, - resolution: int = 50, + resolution: int = 30, copy: bool = True, n_jobs: int = 1, join_style: int | str = "round", @@ -532,7 +532,7 @@ def buffdissexp_by_cluster( distance: the distance (meters, degrees, depending on the crs) to buffer the geometry by resolution: The number of segments used to approximate a quarter circle. - Here defaults to 50, as opposed to the default 16 in geopandas. + Here defaults to 30, as opposed to the default 16 in geopandas. join_style: Buffer join style. copy: Whether to copy the GeoDataFrame before buffering. Defaults to True. n_jobs: int = 1, @@ -554,7 +554,7 @@ def buffdissexp_by_cluster( def buff( gdf: GeoDataFrame | GeoSeries, distance: int | float, - resolution: int = 50, + resolution: int = 30, copy: bool = True, join_style: int | str = "round", **buffer_kwargs, @@ -566,7 +566,7 @@ def buff( distance: the distance (meters, degrees, depending on the crs) to buffer the geometry by resolution: The number of segments used to approximate a quarter circle. - Here defaults to 50, as opposed to the default 16 in geopandas. + Here defaults to 30, as opposed to the default 16 in geopandas. join_style: Buffer join style. copy: Whether to copy the GeoDataFrame before buffering. Defaults to True. **buffer_kwargs: additional keyword arguments passed to geopandas' buffer. diff --git a/src/sgis/helpers.py b/src/sgis/helpers.py index 2067361d..687f62b7 100644 --- a/src/sgis/helpers.py +++ b/src/sgis/helpers.py @@ -223,30 +223,30 @@ def get_object_name( var: object, start: int = 2, stop: int = 7, ignore_self: bool = True ) -> str: frame = inspect.currentframe() # frame can be FrameType or None - if frame: - try: - for _ in range(start): - frame = frame.f_back if frame else None - for _ in range(start, stop): - if frame: - names = [ - var_name - for var_name, var_val in frame.f_locals.items() - if var_val is var and not (ignore_self and var_name == "self") - ] - names = [name for name in names if not name.startswith("_")] - if names: - if len(names) != 1: - warnings.warn( - "More than one local variable matches the object. Name might be wrong.", - stacklevel=2, - ) - return names[0] - frame = frame.f_back if frame else None - finally: + if not frame: + raise ValueError(f"Couldn't find name for {var}") + try: + for _ in range(start): + frame = frame.f_back if frame else None + for _ in range(start, stop): if frame: - del frame # Explicitly delete frame reference to assist with garbage collection - raise ValueError(f"Couldn't find name for {var}") + names = [ + var_name + for var_name, var_val in frame.f_locals.items() + if var_val is var and not (ignore_self and var_name == "self") + ] + names = [name for name in names if not name.startswith("_")] + if names: + if len(names) != 1: + warnings.warn( + "More than one local variable matches the object. Name might be wrong.", + stacklevel=2, + ) + return names[0] + frame = frame.f_back if frame else None + finally: + if frame: + del frame # Explicitly delete frame reference to assist with garbage collection def make_namedict(gdfs: tuple[GeoDataFrame]) -> dict[int, str]: diff --git a/src/sgis/io/dapla_functions.py b/src/sgis/io/dapla_functions.py index 313b7e81..80672904 100644 --- a/src/sgis/io/dapla_functions.py +++ b/src/sgis/io/dapla_functions.py @@ -15,6 +15,7 @@ import pyarrow import pyarrow.parquet as pq import shapely +from gcsfs import GCSFileSystem from geopandas import GeoDataFrame from geopandas import GeoSeries from geopandas.io.arrow import _geopandas_to_arrow @@ -30,7 +31,7 @@ def read_geopandas( gcs_path: str | Path | list[str | Path] | tuple[str | Path] | GeoSeries, pandas_fallback: bool = False, - file_system: dp.gcs.GCSFileSystem | None = None, + file_system: GCSFileSystem | None = None, mask: GeoSeries | GeoDataFrame | shapely.Geometry | tuple | None = None, threads: int | None = None, **kwargs, @@ -138,8 +139,7 @@ def read_geopandas( raise e.__class__( f"{e.__class__.__name__}: {e} for {gcs_path}." ) from e - df = dp.read_pandas(gcs_path, **kwargs) - + df = pd.read_parquet(file, **kwargs) if pandas_fallback or not len(df): return df else: @@ -157,7 +157,7 @@ def read_geopandas( except ValueError as e: if "Missing geo metadata" not in str(e) and "geometry" not in str(e): raise e - df = dp.read_pandas(gcs_path, **kwargs) + df = pd.read_parquet(file, **kwargs) if pandas_fallback or not len(df): return df @@ -168,7 +168,7 @@ def read_geopandas( ) from e except Exception as e: raise e.__class__( - f"{e.__class__.__name__}: {e} for {df}." + more_txt + f"{e.__class__.__name__}: {e} for {gcs_path}." + more_txt ) from e if mask is not None: @@ -177,7 +177,7 @@ def read_geopandas( def _get_bounds_parquet( - path: str | Path, file_system: dp.gcs.GCSFileSystem, pandas_fallback: bool = False + path: str | Path, file_system: GCSFileSystem, pandas_fallback: bool = False ) -> tuple[list[float], dict] | tuple[None, None]: with file_system.open(path) as f: try: @@ -202,7 +202,7 @@ def _get_bounds_parquet( return meta["bbox"], meta["crs"] -def _get_columns(path: str | Path, file_system: dp.gcs.GCSFileSystem) -> pd.Index: +def _get_columns(path: str | Path, file_system: GCSFileSystem) -> pd.Index: with file_system.open(path) as f: schema = pq.read_schema(f) index_cols = _get_index_cols(schema) @@ -216,7 +216,7 @@ def _get_index_cols(schema: pyarrow.Schema) -> list[str]: def get_bounds_series( paths: list[str | Path] | tuple[str | Path], - file_system: dp.gcs.GCSFileSystem | None = None, + file_system: GCSFileSystem | None = None, threads: int | None = None, pandas_fallback: bool = False, ) -> GeoSeries: @@ -227,7 +227,7 @@ def get_bounds_series( Args: paths: Iterable of file paths in gcs. - file_system: Optional instance of dp.gcs.GCSFileSystem. + file_system: Optional instance of GCSFileSystem. If None, an instance is created within the function. Note that this is slower in long loops. threads: Number of threads to use if reading multiple files. Defaults to @@ -307,7 +307,7 @@ def write_geopandas( gcs_path: str | Path, overwrite: bool = True, pandas_fallback: bool = False, - file_system: dp.gcs.GCSFileSystem | None = None, + file_system: GCSFileSystem | None = None, write_covering_bbox: bool = False, **kwargs, ) -> None: diff --git a/src/sgis/parallel/parallel.py b/src/sgis/parallel/parallel.py index f99f6606..5ab43209 100644 --- a/src/sgis/parallel/parallel.py +++ b/src/sgis/parallel/parallel.py @@ -590,7 +590,7 @@ def write_municipality_data( with_neighbors: bool = False, funcdict: dict[str, Callable] | None = None, file_type: str = "parquet", - muni_number_col: str = "KOMMUNENR", + muni_number_col: str = "komm_nr", strict: bool = False, write_empty: bool = False, id_assign_func: Callable | functools.partial = clean_overlay, @@ -622,7 +622,7 @@ def write_municipality_data( the data is read. file_type: Defaults to parquet. muni_number_col: String column name with municipality - number/identifier. Defaults to KOMMUNENR. If the column is not present + number/identifier. Defaults to komm_nr. If the column is not present in the data to be split, the data will be intersected with the municipalities. strict: If False (default), the dictionaries 'out_data' and 'funcdict' does @@ -761,7 +761,7 @@ def write_municipality_data( out_folder: str, municipalities: GeoDataFrame | list[str] | None = None, with_neighbors: bool = False, - muni_number_col: str = "KOMMUNENR", + muni_number_col: str = "komm_nr", file_type: str = "parquet", func: Callable | None = None, write_empty: bool = False, @@ -840,7 +840,7 @@ def _write_municipality_data( data: str | GeoDataFrame | DataFrame, out_folder: str, municipalities: GeoDataFrame | list[str] | None = None, - muni_number_col: str = "KOMMUNENR", + muni_number_col: str = "komm_nr", file_type: str = "parquet", func: Callable | None = None, write_empty: bool = False, @@ -896,7 +896,7 @@ def _write_neighbor_municipality_data( data: str | GeoDataFrame | DataFrame, out_folder: str, municipalities: GeoDataFrame, - muni_number_col: str = "KOMMUNENR", + muni_number_col: str = "komm_nr", file_type: str = "parquet", func: Callable | None = None, write_empty: bool = False, From 4a4c5127f10e80a63bfa42530431a7ae6cc35f8a Mon Sep 17 00:00:00 2001 From: Morten Letnes Date: Thu, 16 Jan 2025 16:05:01 +0100 Subject: [PATCH 2/8] version --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 0a48ca5c..0760ba61 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "ssb-sgis" -version = "1.0.15" +version = "1.1.0" description = "GIS functions used at Statistics Norway." authors = ["Morten Letnes "] license = "MIT" From 17c026b7e164e8170ea42dc8ecd51d7d50593748 Mon Sep 17 00:00:00 2001 From: Morten Letnes Date: Thu, 16 Jan 2025 16:47:04 +0100 Subject: [PATCH 3/8] res 30 --- tests/test_xbuffdissexp.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_xbuffdissexp.py b/tests/test_xbuffdissexp.py index 1ec2e09c..687fe9b3 100644 --- a/tests/test_xbuffdissexp.py +++ b/tests/test_xbuffdissexp.py @@ -188,7 +188,7 @@ def test_buffdissexp(gdf_fixture): copy = gdf_fixture.copy() # with geopandas - copy["geometry"] = copy.buffer(distance, resolution=50).make_valid() + copy["geometry"] = copy.buffer(distance, resolution=30).make_valid() copy = copy.dissolve(by="txtcol") copy["geometry"] = copy.make_valid() copy = copy.explode(index_parts=False) @@ -215,7 +215,7 @@ def test_buffdiss(gdf_fixture): copy = gdf_fixture.copy() # with geopandas - copy["geometry"] = copy.buffer(distance, resolution=50).make_valid() + copy["geometry"] = copy.buffer(distance, resolution=30).make_valid() copy = copy.dissolve(by="txtcol") copy["geometry"] = copy.make_valid() From 668a69bd698761ab3ad52261ef7e7fa0a8820e4d Mon Sep 17 00:00:00 2001 From: Morten Letnes Date: Thu, 16 Jan 2025 16:47:28 +0100 Subject: [PATCH 4/8] back to previous version --- src/sgis/helpers.py | 46 ++++++++++++++++++++++----------------------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/src/sgis/helpers.py b/src/sgis/helpers.py index 687f62b7..2067361d 100644 --- a/src/sgis/helpers.py +++ b/src/sgis/helpers.py @@ -223,30 +223,30 @@ def get_object_name( var: object, start: int = 2, stop: int = 7, ignore_self: bool = True ) -> str: frame = inspect.currentframe() # frame can be FrameType or None - if not frame: - raise ValueError(f"Couldn't find name for {var}") - try: - for _ in range(start): - frame = frame.f_back if frame else None - for _ in range(start, stop): + if frame: + try: + for _ in range(start): + frame = frame.f_back if frame else None + for _ in range(start, stop): + if frame: + names = [ + var_name + for var_name, var_val in frame.f_locals.items() + if var_val is var and not (ignore_self and var_name == "self") + ] + names = [name for name in names if not name.startswith("_")] + if names: + if len(names) != 1: + warnings.warn( + "More than one local variable matches the object. Name might be wrong.", + stacklevel=2, + ) + return names[0] + frame = frame.f_back if frame else None + finally: if frame: - names = [ - var_name - for var_name, var_val in frame.f_locals.items() - if var_val is var and not (ignore_self and var_name == "self") - ] - names = [name for name in names if not name.startswith("_")] - if names: - if len(names) != 1: - warnings.warn( - "More than one local variable matches the object. Name might be wrong.", - stacklevel=2, - ) - return names[0] - frame = frame.f_back if frame else None - finally: - if frame: - del frame # Explicitly delete frame reference to assist with garbage collection + del frame # Explicitly delete frame reference to assist with garbage collection + raise ValueError(f"Couldn't find name for {var}") def make_namedict(gdfs: tuple[GeoDataFrame]) -> dict[int, str]: From 3bf881058948a09f36067b1442420952616ebfa6 Mon Sep 17 00:00:00 2001 From: Morten Letnes Date: Fri, 17 Jan 2025 09:00:42 +0100 Subject: [PATCH 5/8] resolution --- tests/test_geopandas_utils.py | 2 +- tests/test_img.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_geopandas_utils.py b/tests/test_geopandas_utils.py index ecfb1f49..294bea93 100644 --- a/tests/test_geopandas_utils.py +++ b/tests/test_geopandas_utils.py @@ -86,7 +86,7 @@ def test_points_in_bounds(): def test_area(): gdf = create_all_geometry_types() - gdf = sg.buffdissexp(gdf, 25) + gdf = sg.buffdissexp(gdf, 25, resolution=50) assert round(gdf.area.sum(), 5) == 6270.69379, round(gdf.area.sum(), 5) assert round(gdf.length.sum(), 5) == 332.02674, round(gdf.length.sum(), 5) diff --git a/tests/test_img.py b/tests/test_img.py index ba99d030..61db6ec8 100644 --- a/tests/test_img.py +++ b/tests/test_img.py @@ -1774,8 +1774,8 @@ def _get_metadata_for_one_path(file_path: str, band_endswith: str) -> dict: def main(): - test_merge() test_ndvi() + test_merge() test_explore() test_pixelwise() test_ndvi_predictions() From e42c8d976a7b3f2ecb3d9307ce3501281cc522d4 Mon Sep 17 00:00:00 2001 From: Morten Letnes Date: Fri, 17 Jan 2025 12:32:41 +0100 Subject: [PATCH 6/8] copy --- src/sgis/geopandas_tools/bounds.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/sgis/geopandas_tools/bounds.py b/src/sgis/geopandas_tools/bounds.py index 3b424240..5389bff5 100644 --- a/src/sgis/geopandas_tools/bounds.py +++ b/src/sgis/geopandas_tools/bounds.py @@ -669,6 +669,7 @@ def bounds_to_points( 0 MULTIPOINT (1.00000 0.00000, 1.00000 1.00000, ... 1 MULTIPOINT (0.00000 0.00000) """ + gdf = gdf.copy() if copy else gdf as_bounds = bounds_to_polygon(gdf, copy=copy) if isinstance(gdf, GeoSeries): return GeoSeries(extract_unique_points(as_bounds), index=gdf.index) From 115210c5952de388bbf8ba2f9b09a2747151a73a Mon Sep 17 00:00:00 2001 From: Morten Letnes Date: Fri, 17 Jan 2025 12:41:06 +0100 Subject: [PATCH 7/8] add kwargs --- src/sgis/maps/thematicmap.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/sgis/maps/thematicmap.py b/src/sgis/maps/thematicmap.py index caa87603..59ea88e7 100644 --- a/src/sgis/maps/thematicmap.py +++ b/src/sgis/maps/thematicmap.py @@ -280,7 +280,10 @@ def change_cmap(self, cmap: str, start: int = 0, stop: int = 256) -> "ThematicMa return self def add_background( - self, gdf: GeoDataFrame, color: str | None = None + self, + gdf: GeoDataFrame, + color: str | None = None, + **kwargs, ) -> "ThematicMap": """Add a GeoDataFrame as a background layer. @@ -288,6 +291,7 @@ def add_background( gdf: a GeoDataFrame. color: Single color. Defaults to gray (shade depends on whether the map facecolor is black or white). + **kwargs: Keyword arguments sent to GeoDataFrame.plot. """ if color: self.bg_gdf_color = color @@ -299,6 +303,7 @@ def add_background( ) if self.bounds is None: self.bounds = to_bbox(self._gdf.total_bounds) + self.bg_gdf_kwargs = kwargs return self def plot(self, **kwargs) -> None: @@ -515,7 +520,9 @@ def _make_bin_value_dict(self, gdf: GeoDataFrame, classified: np.ndarray) -> dic def _actually_add_background(self) -> None: self.ax.set_xlim([self.minx - self.diffx * 0.03, self.maxx + self.diffx * 0.03]) self.ax.set_ylim([self.miny - self.diffy * 0.03, self.maxy + self.diffy * 0.03]) - self._background_gdfs.plot(ax=self.ax, color=self.bg_gdf_color) + self._background_gdfs.plot( + ax=self.ax, color=self.bg_gdf_color, **self.bg_gdf_kwargs + ) @staticmethod def _get_matplotlib_figure_and_axix( From d5730eeb3973209913b5f58216b8d172ff0e83e1 Mon Sep 17 00:00:00 2001 From: Morten Letnes Date: Fri, 17 Jan 2025 13:19:15 +0100 Subject: [PATCH 8/8] change assert --- tests/test_geopandas_utils.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/test_geopandas_utils.py b/tests/test_geopandas_utils.py index 294bea93..b86b8ff9 100644 --- a/tests/test_geopandas_utils.py +++ b/tests/test_geopandas_utils.py @@ -45,7 +45,7 @@ def test_drop_inactive(): assert list(gdf.columns) == ["geometry"] -def test__rename_geometry_if(): +def test_rename_geometry_if(): gdf = sg.to_gdf([0, 0]) gdf = gdf.rename_geometry("geom2") gdf.columns = ["geom2"] @@ -87,8 +87,8 @@ def test_points_in_bounds(): def test_area(): gdf = create_all_geometry_types() gdf = sg.buffdissexp(gdf, 25, resolution=50) - assert round(gdf.area.sum(), 5) == 6270.69379, round(gdf.area.sum(), 5) - assert round(gdf.length.sum(), 5) == 332.02674, round(gdf.length.sum(), 5) + assert round(gdf.area.sum(), 5) == 6270.72656, round(gdf.area.sum(), 5) + assert round(gdf.length.sum(), 5) == 332.02452, round(gdf.length.sum(), 5) def test_clean_clip(): @@ -229,6 +229,7 @@ def main(): if __name__ == "__main__": main() + test_area() test_points_in_bounds() test_clean_clip() test_random_points_in_polygons()