From a229f701470c27583223a3461d4a1bd3831982f0 Mon Sep 17 00:00:00 2001 From: Frederique Date: Thu, 28 Sep 2023 16:18:52 +0200 Subject: [PATCH 1/4] Moved a few functions to the new gis.py file --- hydromt_fiat/workflows/exposure_vector.py | 26 +++------- hydromt_fiat/workflows/gis.py | 62 +++++++++++++++++++++++ 2 files changed, 68 insertions(+), 20 deletions(-) create mode 100644 hydromt_fiat/workflows/gis.py diff --git a/hydromt_fiat/workflows/exposure_vector.py b/hydromt_fiat/workflows/exposure_vector.py index 84980bd6..542ed7ee 100644 --- a/hydromt_fiat/workflows/exposure_vector.py +++ b/hydromt_fiat/workflows/exposure_vector.py @@ -7,7 +7,6 @@ import numpy as np import pandas as pd from hydromt.data_catalog import DataCatalog -from hydromt.gis_utils import utm_crs from pyproj import CRS from hydromt_fiat.data_apis.national_structure_inventory import get_assets_from_nsi @@ -20,6 +19,7 @@ from .exposure import Exposure from .utils import detect_delimiter from .vulnerability import Vulnerability +from .gis import get_area, sjoin_largest_area, get_crs_str_from_gdf class ExposureVector(Exposure): @@ -229,8 +229,7 @@ def setup_asset_locations(self, asset_locations: str) -> None: ) # Set the CRS of the exposure data - source_data_authority = assets.crs.to_authority() - self.crs = source_data_authority[0] + ":" + source_data_authority[1] + self.crs = get_crs_str_from_gdf(assets.crs) # Check if the 'Object ID' column exists and if so, is unique if "Object ID" not in assets.columns: @@ -299,13 +298,7 @@ def setup_occupancy_type(self, occupancy_source: str, occupancy_attr: str) -> No # If there is only one exposure geom, do the spatial join with the # occupancy_map. Only take the largest overlapping object from the # occupancy_map. - gdf = gpd.overlay( - self.exposure_geoms[0], occupancy_map[to_keep], how="intersection" - ) - gdf["area"] = gdf.geometry.area - gdf.sort_values(by="area", inplace=True) - gdf.drop_duplicates(subset="Object ID", keep="last", inplace=True) - gdf.drop(columns=["area"], inplace=True) + gdf = sjoin_largest_area(self.exposure_geoms[0], occupancy_map[to_keep]) # Remove the objects that do not have a Primary Object Type, that were not # overlapping with the land use map, or that had a land use type of 'nan'. @@ -463,14 +456,7 @@ def setup_max_potential_damage( # Calculate the area of each object gdf = self.get_full_gdf(self.exposure_db)[["Primary Object Type", "geometry"]] - if gdf.crs.is_geographic: - # If the CRS is geographic, reproject to the nearest UTM zone - nearest_utm = utm_crs(gdf.total_bounds) - gdf_utm = gdf.to_crs(nearest_utm) - gdf["area"] = gdf_utm["geometry"].area - elif gdf.crs.is_projected: - # If the CRS is projected, calculate the area in the same CRS - gdf["area"] = gdf["geometry"].area + gdf = get_area(gdf) # Set the damage values to the exposure data if damage_types is None: @@ -911,7 +897,7 @@ def link_exposure_vulnerability( len_diff_secondary_linking_types = 100000 if "Primary Object Type" in self.exposure_db.columns: unique_types_primary = set( - self.exposure_db["Primary Object Type"].unique() + self.get_primary_object_type() ) diff_primary_linking_types = unique_types_primary - unique_linking_types len_diff_primary_linking_types = len(diff_primary_linking_types) @@ -919,7 +905,7 @@ def link_exposure_vulnerability( unique_types_secondary = set() if "Secondary Object Type" in self.exposure_db.columns: unique_types_secondary = set( - self.exposure_db["Secondary Object Type"].unique() + self.get_secondary_object_type() ) diff_secondary_linking_types = ( unique_types_secondary - unique_linking_types diff --git a/hydromt_fiat/workflows/gis.py b/hydromt_fiat/workflows/gis.py new file mode 100644 index 00000000..11e544b0 --- /dev/null +++ b/hydromt_fiat/workflows/gis.py @@ -0,0 +1,62 @@ +import geopandas as gpd +from hydromt.gis_utils import utm_crs + + +def get_area(gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame: + """Adds an area column to a GeoDataFrame. + + Parameters + ---------- + gdf : gpd.GeoDataFrame + The GeoDataFrame to which the area column will be added. + + Returns + ------- + gpd.GeoDataFrame + The GeoDataFrame with an additional column "area". + """ + # Calculate the area of each object + if gdf.crs.is_geographic: + # If the CRS is geographic, reproject to the nearest UTM zone + nearest_utm = utm_crs(gdf.total_bounds) + gdf_utm = gdf.to_crs(nearest_utm) + gdf["area"] = gdf_utm["geometry"].area + elif gdf.crs.is_projected: + # If the CRS is projected, calculate the area in the same CRS + gdf["area"] = gdf["geometry"].area + return gdf + + +def sjoin_largest_area( + left_gdf: gpd.GeoDataFrame, right_gdf: gpd.GeoDataFrame, id_col: str = "Object ID" +) -> gpd.GeoDataFrame: + """Spatial join of two GeoDataFrames, keeping only the joined data from the largest + intersection per object. + + Parameters + ---------- + left_gdf : gpd.GeoDataFrame + The GeoDataFrame to which the data from the right GeoDataFrame will be joined. + right_gdf : gpd.GeoDataFrame + The GeoDataFrame from which the data will be joined to the left GeoDataFrame. + id_col : str, optional + The ID column that will be used to drop the duplicates from overlapping + geometries, by default "Object ID" + + Returns + ------- + gpd.GeoDataFrame + Resulting GeoDataFrame with the joined data from the largest intersection per + object. + """ + gdf = gpd.overlay(left_gdf, right_gdf, how="intersection") + gdf["area"] = gdf.geometry.area + gdf.sort_values(by="area", inplace=True) + gdf.drop_duplicates(subset=id_col, keep="last", inplace=True) + gdf.drop(columns=["area"], inplace=True) + return gdf + + +def get_crs_str_from_gdf(gdf_crs: gpd.GeoDataFrame.crs) -> str: + source_data_authority = gdf_crs.to_authority() + return source_data_authority[0] + ":" + source_data_authority[1] \ No newline at end of file From fa51da4416ee6f36dc7b87fa45bd14f03410d8f4 Mon Sep 17 00:00:00 2001 From: Frederique Date: Thu, 28 Sep 2023 17:41:28 +0200 Subject: [PATCH 2/4] Update with new function to join_spatial_data (generic function for adding ground floor height info, max pot damage, etc.) --- hydromt_fiat/fiat.py | 1 + hydromt_fiat/workflows/exposure_vector.py | 40 ++++++++-- hydromt_fiat/workflows/gis.py | 89 ++++++++++++++++++++++- tests/test_update_ground_floor_height.py | 43 +++++++++++ 4 files changed, 166 insertions(+), 7 deletions(-) create mode 100644 tests/test_update_ground_floor_height.py diff --git a/hydromt_fiat/fiat.py b/hydromt_fiat/fiat.py index 22457b06..4b292fd1 100644 --- a/hydromt_fiat/fiat.py +++ b/hydromt_fiat/fiat.py @@ -682,6 +682,7 @@ def read_tables(self): crs=self.get_config("exposure.geom.crs"), logger=self.logger, unit=self.get_config("exposure.geom.unit"), + data_catalog=self.data_catalog, # TODO: See if this works also when no data catalog is provided ) self.exposure.read_table(exposure_fn) self._tables["exposure"] = self.exposure.exposure_db diff --git a/hydromt_fiat/workflows/exposure_vector.py b/hydromt_fiat/workflows/exposure_vector.py index 542ed7ee..7684b74c 100644 --- a/hydromt_fiat/workflows/exposure_vector.py +++ b/hydromt_fiat/workflows/exposure_vector.py @@ -19,7 +19,7 @@ from .exposure import Exposure from .utils import detect_delimiter from .vulnerability import Vulnerability -from .gis import get_area, sjoin_largest_area, get_crs_str_from_gdf +from .gis import get_area, sjoin_largest_area, get_crs_str_from_gdf, join_spatial_data class ExposureVector(Exposure): @@ -412,8 +412,33 @@ def setup_aggregation_labels(self): def setup_ground_floor_height( self, - ground_floor_height: Union[int, float, None, str, Path], + ground_floor_height: Union[int, float, None, str, Path, List[str], List[Path]], + attr_name: Union[str, List[str], None] = None, + method: Union[str, List[str], None] = "nearest", ) -> None: + """Set the ground floor height of the exposure data. This function overwrites + the existing Ground Floor Height column if it already exists. + + Parameters + ---------- + ground_floor_height : Union[int, float, None, str, Path, List[str], List[Path]] + A number to set the Ground Floor Height of all assets to the same value, a + path to a file that contains the Ground Floor Height of each asset, or a + list of paths to files that contain the Ground Floor Height of each asset, + in the order of preference (the first item in the list gets the highest + priority in assigning the values). + attr_name : Union[str, List[str]], optional + The name of the attribute that contains the Ground Floor Height in the + file(s) that are submitted. If multiple `ground_floor_height` files are + submitted, the attribute names are linked to the files in the same order as + the files are submitted. By default None. + method : Union[str, List[str]], optional + The method to use to assign the Ground Floor Height to the assets. If + multiple `ground_floor_height` files are submitted, the methods are linked + to the files in the same order as the files are submitted. The method can + be either 'nearest' (nearest neighbor) or 'intersection'. By default + 'nearest'. + """ # Set the ground floor height column. # If the Ground Floor Height is input as a number, assign all objects with # the same Ground Floor Height. @@ -422,14 +447,19 @@ def setup_ground_floor_height( ground_floor_height, float ): self.exposure_db["Ground Floor Height"] = ground_floor_height - elif isinstance(ground_floor_height, str): - # TODO: implement the option to add the ground floor height from a file. + elif isinstance(ground_floor_height, str) or isinstance(ground_floor_height, Path): + # A single file is used to assign the ground floor height to the assets + gfh = self.data_catalog.get_geodataframe(ground_floor_height) + gdf = self.get_full_gdf(self.exposure_db) + gdf = join_spatial_data(gdf, gfh, attr_name, method) + elif isinstance(ground_floor_height, list): + # Multiple files are used to assign the ground floor height to the assets NotImplemented else: # Set the Ground Floor Height to 0 if the user did not specify any # Ground Floor Height. self.exposure_db["Ground Floor Height"] = 0 - + def setup_max_potential_damage( self, max_potential_damage: Union[List[float], float], diff --git a/hydromt_fiat/workflows/gis.py b/hydromt_fiat/workflows/gis.py index 11e544b0..63de4149 100644 --- a/hydromt_fiat/workflows/gis.py +++ b/hydromt_fiat/workflows/gis.py @@ -1,5 +1,5 @@ import geopandas as gpd -from hydromt.gis_utils import utm_crs +from hydromt.gis_utils import utm_crs, nearest_merge, nearest def get_area(gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame: @@ -57,6 +57,91 @@ def sjoin_largest_area( return gdf +def check_geometry_type(gdf: gpd.GeoDataFrame) -> str: + """Check if the geometry type of a GeoDataFrame is homogeneous. + + Parameters + ---------- + gdf : gpd.GeoDataFrame + The GeoDataFrame to check. + + Raises + ------ + ValueError + If the geometry type of the GeoDataFrame is not homogeneous. + """ + if len(gdf.geom_type.unique()) > 1: + raise ValueError( + "The geometry type of the GeoDataFrame is not homogeneous. " + f"Geometry types found: {gdf.geom_type.unique()}" + ) + else: + return gdf.geom_type.unique()[0] + + def get_crs_str_from_gdf(gdf_crs: gpd.GeoDataFrame.crs) -> str: source_data_authority = gdf_crs.to_authority() - return source_data_authority[0] + ":" + source_data_authority[1] \ No newline at end of file + return source_data_authority[0] + ":" + source_data_authority[1] + + +def join_nearest_points(left_gdf, right_gdf, attribute_name, max_dist) -> gpd.GeoDataFrame: + gdf_merged = nearest_merge( + gdf1=left_gdf, gdf2=right_gdf, columns=[attribute_name], max_dist=max_dist + ) + print(gdf_merged.columns) + + # TODO: clean up the geodataframe (remove unnecessary columns, etc.) + + +def join_spatial_data( + left_gdf: gpd.GeoDataFrame, + right_gdf: gpd.GeoDataFrame, + attribute_name: str, + method: str, + max_dist: float = 10, +) -> gpd.GeoDataFrame: + """Join two GeoDataFrames based on their spatial relationship. + + Parameters + ---------- + left_gdf : gpd.GeoDataFrame + The GeoDataFrame to which the data from the right GeoDataFrame will be joined. + right_gdf : gpd.GeoDataFrame + The GeoDataFrame from which the data will be joined to the left GeoDataFrame. + attribute_name : str + The name of the attribute that will be joined. + method : str + The method that will be used to join the data. Either "nearest" or + "intersection". + max_dist : float, optional + The maximum distance for the nearest join measured in meters, by default + 10 (meters). + + Returns + ------- + gpd.GeoDataFrame + The joined GeoDataFrame. + """ + left_gdf_type = check_geometry_type(left_gdf) + right_gdf_type = check_geometry_type(right_gdf) + + if method == "nearest": + if (left_gdf_type == "Point") and (right_gdf_type == "Point"): + gdf = join_nearest_points(left_gdf, right_gdf, attribute_name, max_dist) + elif method == "intersection": + if left_gdf_type == "Polygon": + gdf = sjoin_largest_area(left_gdf, right_gdf) + elif left_gdf_type == "Point": + # TODO: create a separate function for this? + gdf = gpd.sjoin(left_gdf, right_gdf, how="left", predicate="intersects") + else: + raise NotImplementedError( + f"Join method {method} is not implemented for geometry type " + f"{left_gdf_type}" + ) + else: + raise NotImplementedError(f"Join method {method} is not implemented") + + # TODO: use the data from the new column to update the original column + + return gdf diff --git a/tests/test_update_ground_floor_height.py b/tests/test_update_ground_floor_height.py new file mode 100644 index 00000000..c1fc064c --- /dev/null +++ b/tests/test_update_ground_floor_height.py @@ -0,0 +1,43 @@ +from hydromt_fiat.fiat import FiatModel +from hydromt.log import setuplog +from pathlib import Path +import pytest +import shutil + + +EXAMPLEDIR = Path( + "P:/11207949-dhs-phaseii-floodadapt/Model-builder/Delft-FIAT/local_test_database" +) +DATADIR = Path().absolute() / "hydromt_fiat" / "data" + +_cases = { + "update_ground_floor_height": { + "dir": "test_read", + "new_root": EXAMPLEDIR / "test_update_ground_floor_height", + "data_catalog": DATADIR / "hydromt_fiat_catalog_USA.yml", + }, +} + + +@pytest.mark.parametrize("case", list(_cases.keys())) +def test_update_ground_floor_height(case): + # Read model in examples folder. + root = EXAMPLEDIR.joinpath(_cases[case]["dir"]) + logger = setuplog("hydromt_fiat", log_level=10) + + fm = FiatModel(root=root, mode="r", data_libs=[_cases[case]["data_catalog"]], logger=logger) + fm.read() + + ground_floor_height_file = EXAMPLEDIR / "data" / "ground_floor_height" / "fake_elevation_certificates.gpkg" + attribute = "FFE" + method = "nearest" + + fm.exposure.setup_ground_floor_height(ground_floor_height_file, attribute, method) + + # Remove the new root folder if it already exists + if _cases[case]["new_root"].exists(): + shutil.rmtree(_cases[case]["new_root"]) + + # Set the new root and write the model + fm.set_root(_cases[case]["new_root"]) + fm.write() From 5cfaa835a447bb9b763cdb1f1fbf7785757bea2b Mon Sep 17 00:00:00 2001 From: Frederique Date: Thu, 28 Sep 2023 17:49:25 +0200 Subject: [PATCH 3/4] Update function to add data --- hydromt_fiat/workflows/exposure_vector.py | 1 + hydromt_fiat/workflows/gis.py | 26 +++++++++++++++++++++-- 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/hydromt_fiat/workflows/exposure_vector.py b/hydromt_fiat/workflows/exposure_vector.py index 7684b74c..ef9e2b6a 100644 --- a/hydromt_fiat/workflows/exposure_vector.py +++ b/hydromt_fiat/workflows/exposure_vector.py @@ -452,6 +452,7 @@ def setup_ground_floor_height( gfh = self.data_catalog.get_geodataframe(ground_floor_height) gdf = self.get_full_gdf(self.exposure_db) gdf = join_spatial_data(gdf, gfh, attr_name, method) + gdf.loc[gdf[attr_name].notna(), "Ground Floor Height"] = gdf.loc[gdf[attr_name].notna(), attr_name] elif isinstance(ground_floor_height, list): # Multiple files are used to assign the ground floor height to the assets NotImplemented diff --git a/hydromt_fiat/workflows/gis.py b/hydromt_fiat/workflows/gis.py index 63de4149..a490c19b 100644 --- a/hydromt_fiat/workflows/gis.py +++ b/hydromt_fiat/workflows/gis.py @@ -85,13 +85,35 @@ def get_crs_str_from_gdf(gdf_crs: gpd.GeoDataFrame.crs) -> str: def join_nearest_points(left_gdf, right_gdf, attribute_name, max_dist) -> gpd.GeoDataFrame: + """Join two GeoDataFrames based on the nearest distance between their points. + + Parameters + ---------- + left_gdf : gpd.GeoDataFrame + The GeoDataFrame to which the data from the right GeoDataFrame will be joined. + right_gdf : gpd.GeoDataFrame + The GeoDataFrame from which the data will be joined to the left GeoDataFrame. + attribute_name : str + The name of the attribute that will be joined. + max_dist : float + The maximum distance for the nearest join measured in meters. + + Returns + ------- + gpd.GeoDataFrame + The joined GeoDataFrame. + """ + # Use the HydroMT function nearest_merge to join the geodataframes gdf_merged = nearest_merge( gdf1=left_gdf, gdf2=right_gdf, columns=[attribute_name], max_dist=max_dist ) - print(gdf_merged.columns) - # TODO: clean up the geodataframe (remove unnecessary columns, etc.) + # Clean up the geodataframe (remove unnecessary columns) + del gdf_merged["distance_right"] + del gdf_merged["index_right"] + return gdf_merged + def join_spatial_data( left_gdf: gpd.GeoDataFrame, From 59e6b54bd218a57a789c1a6ceb185e81ba6ab86c Mon Sep 17 00:00:00 2001 From: Frederique Date: Fri, 29 Sep 2023 12:58:59 +0200 Subject: [PATCH 4/4] Implemented the join_spatial_data function a bit better with some checks --- hydromt_fiat/workflows/exposure_vector.py | 46 ++++++++------ hydromt_fiat/workflows/gis.py | 74 +++++++++++++++++++---- 2 files changed, 89 insertions(+), 31 deletions(-) diff --git a/hydromt_fiat/workflows/exposure_vector.py b/hydromt_fiat/workflows/exposure_vector.py index ef9e2b6a..bb97ca78 100644 --- a/hydromt_fiat/workflows/exposure_vector.py +++ b/hydromt_fiat/workflows/exposure_vector.py @@ -416,7 +416,7 @@ def setup_ground_floor_height( attr_name: Union[str, List[str], None] = None, method: Union[str, List[str], None] = "nearest", ) -> None: - """Set the ground floor height of the exposure data. This function overwrites + """Set the ground floor height of the exposure data. This function overwrites the existing Ground Floor Height column if it already exists. Parameters @@ -424,8 +424,8 @@ def setup_ground_floor_height( ground_floor_height : Union[int, float, None, str, Path, List[str], List[Path]] A number to set the Ground Floor Height of all assets to the same value, a path to a file that contains the Ground Floor Height of each asset, or a - list of paths to files that contain the Ground Floor Height of each asset, - in the order of preference (the first item in the list gets the highest + list of paths to files that contain the Ground Floor Height of each asset, + in the order of preference (the first item in the list gets the highest priority in assigning the values). attr_name : Union[str, List[str]], optional The name of the attribute that contains the Ground Floor Height in the @@ -433,26 +433,27 @@ def setup_ground_floor_height( submitted, the attribute names are linked to the files in the same order as the files are submitted. By default None. method : Union[str, List[str]], optional - The method to use to assign the Ground Floor Height to the assets. If + The method to use to assign the Ground Floor Height to the assets. If multiple `ground_floor_height` files are submitted, the methods are linked - to the files in the same order as the files are submitted. The method can - be either 'nearest' (nearest neighbor) or 'intersection'. By default + to the files in the same order as the files are submitted. The method can + be either 'nearest' (nearest neighbor) or 'intersection'. By default 'nearest'. """ - # Set the ground floor height column. - # If the Ground Floor Height is input as a number, assign all objects with - # the same Ground Floor Height. if ground_floor_height: if isinstance(ground_floor_height, int) or isinstance( ground_floor_height, float ): + # If the Ground Floor Height is input as a number, assign all objects with + # the same Ground Floor Height. self.exposure_db["Ground Floor Height"] = ground_floor_height - elif isinstance(ground_floor_height, str) or isinstance(ground_floor_height, Path): + elif isinstance(ground_floor_height, str) or isinstance( + ground_floor_height, Path + ): # A single file is used to assign the ground floor height to the assets gfh = self.data_catalog.get_geodataframe(ground_floor_height) gdf = self.get_full_gdf(self.exposure_db) gdf = join_spatial_data(gdf, gfh, attr_name, method) - gdf.loc[gdf[attr_name].notna(), "Ground Floor Height"] = gdf.loc[gdf[attr_name].notna(), attr_name] + gdf = self._set_values_from_other_column(gdf, "Ground Floor Height", attr_name) elif isinstance(ground_floor_height, list): # Multiple files are used to assign the ground floor height to the assets NotImplemented @@ -460,7 +461,7 @@ def setup_ground_floor_height( # Set the Ground Floor Height to 0 if the user did not specify any # Ground Floor Height. self.exposure_db["Ground Floor Height"] = 0 - + def setup_max_potential_damage( self, max_potential_damage: Union[List[float], float], @@ -927,17 +928,13 @@ def link_exposure_vulnerability( len_diff_primary_linking_types = 100000 len_diff_secondary_linking_types = 100000 if "Primary Object Type" in self.exposure_db.columns: - unique_types_primary = set( - self.get_primary_object_type() - ) + unique_types_primary = set(self.get_primary_object_type()) diff_primary_linking_types = unique_types_primary - unique_linking_types len_diff_primary_linking_types = len(diff_primary_linking_types) unique_types_secondary = set() if "Secondary Object Type" in self.exposure_db.columns: - unique_types_secondary = set( - self.get_secondary_object_type() - ) + unique_types_secondary = set(self.get_secondary_object_type()) diff_secondary_linking_types = ( unique_types_secondary - unique_linking_types ) @@ -1298,3 +1295,16 @@ def set_height_relative_to_reference( ) return exposure_to_modify.reset_index(drop=True) + + @staticmethod + def _set_values_from_other_column( + df: Union[pd.DataFrame, gpd.GeoDataFrame], col_to_set: str, col_to_copy: str + ) -> Union[pd.DataFrame, gpd.GeoDataFrame]: + """Sets the values of to where the values of are + nan and deletes . + """ + df.loc[df[col_to_copy].notna(), col_to_set] = df.loc[ + df[col_to_copy].notna(), col_to_copy + ] + del df[col_to_copy] + return df diff --git a/hydromt_fiat/workflows/gis.py b/hydromt_fiat/workflows/gis.py index a490c19b..00876f9a 100644 --- a/hydromt_fiat/workflows/gis.py +++ b/hydromt_fiat/workflows/gis.py @@ -1,5 +1,6 @@ import geopandas as gpd -from hydromt.gis_utils import utm_crs, nearest_merge, nearest +from hydromt.gis_utils import utm_crs, nearest_merge +from typing import List def get_area(gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame: @@ -84,6 +85,26 @@ def get_crs_str_from_gdf(gdf_crs: gpd.GeoDataFrame.crs) -> str: return source_data_authority[0] + ":" + source_data_authority[1] +def clean_up_gdf(gdf: gpd.GeoDataFrame, columns: List[str]) -> gpd.GeoDataFrame: + """Clean up a GeoDataFrame by removing unnecessary columns. + + Parameters + ---------- + gdf : gpd.GeoDataFrame + The GeoDataFrame to clean up. + + Returns + ------- + gpd.GeoDataFrame + The cleaned up GeoDataFrame. + """ + # Remove unnecessary columns + for col in columns: + del gdf[col] + + return gdf + + def join_nearest_points(left_gdf, right_gdf, attribute_name, max_dist) -> gpd.GeoDataFrame: """Join two GeoDataFrames based on the nearest distance between their points. @@ -109,11 +130,19 @@ def join_nearest_points(left_gdf, right_gdf, attribute_name, max_dist) -> gpd.Ge ) # Clean up the geodataframe (remove unnecessary columns) - del gdf_merged["distance_right"] - del gdf_merged["index_right"] + gdf_merged = clean_up_gdf(gdf_merged, ["distance_right", "index_right"]) return gdf_merged + +def intersect_points_polygons(left_gdf, right_gdf, attribute_name) -> gpd.GeoDataFrame: + gdf_merged = gpd.sjoin(left_gdf, right_gdf, how="left", predicate="intersects") + + # Clean up the geodataframe (remove unnecessary columns) + gdf_merged = clean_up_gdf(gdf_merged, ["index_right"]) + + return gdf_merged + def join_spatial_data( left_gdf: gpd.GeoDataFrame, @@ -144,26 +173,45 @@ def join_spatial_data( gpd.GeoDataFrame The joined GeoDataFrame. """ + assert left_gdf.crs == right_gdf.crs, ( + "The CRS of the GeoDataFrames to join do not match. " + f"Left CRS: {get_crs_str_from_gdf(left_gdf.crs)}, " + f"Right CRS: {get_crs_str_from_gdf(right_gdf.crs)}" + ) + left_gdf_type = check_geometry_type(left_gdf) right_gdf_type = check_geometry_type(right_gdf) + assert (left_gdf_type == "Polygon") or (left_gdf_type == "Point"), ( + "The left GeoDataFrame should contain either polygons or points." + ) + + assert (right_gdf_type == "Polygon") or (right_gdf_type == "Point"), ( + "The right GeoDataFrame should contain either polygons or points." + ) + if method == "nearest": - if (left_gdf_type == "Point") and (right_gdf_type == "Point"): - gdf = join_nearest_points(left_gdf, right_gdf, attribute_name, max_dist) - elif method == "intersection": if left_gdf_type == "Polygon": + left_gdf_geom_original = left_gdf.geometry + left_gdf.geometry = left_gdf.geometry.centroid + + if right_gdf_type == "Polygon": + right_gdf.geometry = right_gdf.geometry.centroid + + gdf = join_nearest_points(left_gdf, right_gdf, attribute_name, max_dist) + left_gdf.geometry = left_gdf_geom_original # TODO: Check if this works well + + elif method == "intersection": + if (left_gdf_type == "Polygon") and (right_gdf_type == "Polygon"): gdf = sjoin_largest_area(left_gdf, right_gdf) - elif left_gdf_type == "Point": - # TODO: create a separate function for this? - gdf = gpd.sjoin(left_gdf, right_gdf, how="left", predicate="intersects") + elif (left_gdf_type == "Point") and (right_gdf_type == "Polygon"): + gdf = intersect_points_polygons(left_gdf, right_gdf, attribute_name) else: raise NotImplementedError( - f"Join method {method} is not implemented for geometry type " - f"{left_gdf_type}" + f"Join method {method} is not implemented for joining data of geometry " + f"type {left_gdf_type} and {right_gdf_type}" ) else: raise NotImplementedError(f"Join method {method} is not implemented") - # TODO: use the data from the new column to update the original column - return gdf