Skip to content

Commit

Permalink
Merge branch '#146-and-#125-gis-functions-in-separate-file-and-ground…
Browse files Browse the repository at this point in the history
…-floor-height-updating' into fiat_integrator
  • Loading branch information
frederique-hub authored Oct 10, 2023
2 parents bce3b93 + 59e6b54 commit fb2670e
Show file tree
Hide file tree
Showing 4 changed files with 318 additions and 30 deletions.
1 change: 1 addition & 0 deletions hydromt_fiat/fiat.py
Original file line number Diff line number Diff line change
Expand Up @@ -697,6 +697,7 @@ def read_tables(self):
crs=self.get_config("exposure.geom.crs"),
logger=self.logger,
unit=self.get_config("exposure.geom.unit"),
data_catalog=self.data_catalog, # TODO: See if this works also when no data catalog is provided
)
self.exposure.read_table(exposure_fn)
self._tables["exposure"] = self.exposure.exposure_db
Expand Down
87 changes: 57 additions & 30 deletions hydromt_fiat/workflows/exposure_vector.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
import numpy as np
import pandas as pd
from hydromt.data_catalog import DataCatalog
from hydromt.gis_utils import utm_crs
from pyproj import CRS

from hydromt_fiat.data_apis.national_structure_inventory import get_assets_from_nsi
Expand All @@ -20,6 +19,7 @@
from hydromt_fiat.workflows.exposure import Exposure
from hydromt_fiat.workflows.utils import detect_delimiter
from hydromt_fiat.workflows.vulnerability import Vulnerability
from hydromt_fiat.workflows.gis import get_area, sjoin_largest_area, get_crs_str_from_gdf, join_spatial_data


class ExposureVector(Exposure):
Expand Down Expand Up @@ -229,8 +229,7 @@ def setup_asset_locations(self, asset_locations: str) -> None:
)

# Set the CRS of the exposure data
source_data_authority = assets.crs.to_authority()
self.crs = source_data_authority[0] + ":" + source_data_authority[1]
self.crs = get_crs_str_from_gdf(assets.crs)

# Check if the 'Object ID' column exists and if so, is unique
if "Object ID" not in assets.columns:
Expand Down Expand Up @@ -299,13 +298,7 @@ def setup_occupancy_type(self, occupancy_source: str, occupancy_attr: str) -> No
# If there is only one exposure geom, do the spatial join with the
# occupancy_map. Only take the largest overlapping object from the
# occupancy_map.
gdf = gpd.overlay(
self.exposure_geoms[0], occupancy_map[to_keep], how="intersection"
)
gdf["area"] = gdf.geometry.area
gdf.sort_values(by="area", inplace=True)
gdf.drop_duplicates(subset="Object ID", keep="last", inplace=True)
gdf.drop(columns=["area"], inplace=True)
gdf = sjoin_largest_area(self.exposure_geoms[0], occupancy_map[to_keep])

# Remove the objects that do not have a Primary Object Type, that were not
# overlapping with the land use map, or that had a land use type of 'nan'.
Expand Down Expand Up @@ -419,18 +412,50 @@ def setup_aggregation_labels(self):

def setup_ground_floor_height(
self,
ground_floor_height: Union[int, float, None, str, Path],
ground_floor_height: Union[int, float, None, str, Path, List[str], List[Path]],
attr_name: Union[str, List[str], None] = None,
method: Union[str, List[str], None] = "nearest",
) -> None:
# Set the ground floor height column.
# If the Ground Floor Height is input as a number, assign all objects with
# the same Ground Floor Height.
"""Set the ground floor height of the exposure data. This function overwrites
the existing Ground Floor Height column if it already exists.
Parameters
----------
ground_floor_height : Union[int, float, None, str, Path, List[str], List[Path]]
A number to set the Ground Floor Height of all assets to the same value, a
path to a file that contains the Ground Floor Height of each asset, or a
list of paths to files that contain the Ground Floor Height of each asset,
in the order of preference (the first item in the list gets the highest
priority in assigning the values).
attr_name : Union[str, List[str]], optional
The name of the attribute that contains the Ground Floor Height in the
file(s) that are submitted. If multiple `ground_floor_height` files are
submitted, the attribute names are linked to the files in the same order as
the files are submitted. By default None.
method : Union[str, List[str]], optional
The method to use to assign the Ground Floor Height to the assets. If
multiple `ground_floor_height` files are submitted, the methods are linked
to the files in the same order as the files are submitted. The method can
be either 'nearest' (nearest neighbor) or 'intersection'. By default
'nearest'.
"""
if ground_floor_height:
if isinstance(ground_floor_height, int) or isinstance(
ground_floor_height, float
):
# If the Ground Floor Height is input as a number, assign all objects with
# the same Ground Floor Height.
self.exposure_db["Ground Floor Height"] = ground_floor_height
elif isinstance(ground_floor_height, str):
# TODO: implement the option to add the ground floor height from a file.
elif isinstance(ground_floor_height, str) or isinstance(
ground_floor_height, Path
):
# A single file is used to assign the ground floor height to the assets
gfh = self.data_catalog.get_geodataframe(ground_floor_height)
gdf = self.get_full_gdf(self.exposure_db)
gdf = join_spatial_data(gdf, gfh, attr_name, method)
gdf = self._set_values_from_other_column(gdf, "Ground Floor Height", attr_name)
elif isinstance(ground_floor_height, list):
# Multiple files are used to assign the ground floor height to the assets
NotImplemented
else:
# Set the Ground Floor Height to 0 if the user did not specify any
Expand Down Expand Up @@ -463,14 +488,7 @@ def setup_max_potential_damage(

# Calculate the area of each object
gdf = self.get_full_gdf(self.exposure_db)[["Primary Object Type", "geometry"]]
if gdf.crs.is_geographic:
# If the CRS is geographic, reproject to the nearest UTM zone
nearest_utm = utm_crs(gdf.total_bounds)
gdf_utm = gdf.to_crs(nearest_utm)
gdf["area"] = gdf_utm["geometry"].area
elif gdf.crs.is_projected:
# If the CRS is projected, calculate the area in the same CRS
gdf["area"] = gdf["geometry"].area
gdf = get_area(gdf)

# Set the damage values to the exposure data
if damage_types is None:
Expand Down Expand Up @@ -910,17 +928,13 @@ def link_exposure_vulnerability(
len_diff_primary_linking_types = 100000
len_diff_secondary_linking_types = 100000
if "Primary Object Type" in self.exposure_db.columns:
unique_types_primary = set(
self.exposure_db["Primary Object Type"].unique()
)
unique_types_primary = set(self.get_primary_object_type())
diff_primary_linking_types = unique_types_primary - unique_linking_types
len_diff_primary_linking_types = len(diff_primary_linking_types)

unique_types_secondary = set()
if "Secondary Object Type" in self.exposure_db.columns:
unique_types_secondary = set(
self.exposure_db["Secondary Object Type"].unique()
)
unique_types_secondary = set(self.get_secondary_object_type())
diff_secondary_linking_types = (
unique_types_secondary - unique_linking_types
)
Expand Down Expand Up @@ -1281,3 +1295,16 @@ def set_height_relative_to_reference(
)

return exposure_to_modify.reset_index(drop=True)

@staticmethod
def _set_values_from_other_column(
df: Union[pd.DataFrame, gpd.GeoDataFrame], col_to_set: str, col_to_copy: str
) -> Union[pd.DataFrame, gpd.GeoDataFrame]:
"""Sets the values of <col_to_set> to where the values of <col_to_copy> are
nan and deletes <col_to_copy>.
"""
df.loc[df[col_to_copy].notna(), col_to_set] = df.loc[
df[col_to_copy].notna(), col_to_copy
]
del df[col_to_copy]
return df
217 changes: 217 additions & 0 deletions hydromt_fiat/workflows/gis.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,217 @@
import geopandas as gpd
from hydromt.gis_utils import utm_crs, nearest_merge
from typing import List


def get_area(gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame:
"""Adds an area column to a GeoDataFrame.
Parameters
----------
gdf : gpd.GeoDataFrame
The GeoDataFrame to which the area column will be added.
Returns
-------
gpd.GeoDataFrame
The GeoDataFrame with an additional column "area".
"""
# Calculate the area of each object
if gdf.crs.is_geographic:
# If the CRS is geographic, reproject to the nearest UTM zone
nearest_utm = utm_crs(gdf.total_bounds)
gdf_utm = gdf.to_crs(nearest_utm)
gdf["area"] = gdf_utm["geometry"].area
elif gdf.crs.is_projected:
# If the CRS is projected, calculate the area in the same CRS
gdf["area"] = gdf["geometry"].area
return gdf


def sjoin_largest_area(
left_gdf: gpd.GeoDataFrame, right_gdf: gpd.GeoDataFrame, id_col: str = "Object ID"
) -> gpd.GeoDataFrame:
"""Spatial join of two GeoDataFrames, keeping only the joined data from the largest
intersection per object.
Parameters
----------
left_gdf : gpd.GeoDataFrame
The GeoDataFrame to which the data from the right GeoDataFrame will be joined.
right_gdf : gpd.GeoDataFrame
The GeoDataFrame from which the data will be joined to the left GeoDataFrame.
id_col : str, optional
The ID column that will be used to drop the duplicates from overlapping
geometries, by default "Object ID"
Returns
-------
gpd.GeoDataFrame
Resulting GeoDataFrame with the joined data from the largest intersection per
object.
"""
gdf = gpd.overlay(left_gdf, right_gdf, how="intersection")
gdf["area"] = gdf.geometry.area
gdf.sort_values(by="area", inplace=True)
gdf.drop_duplicates(subset=id_col, keep="last", inplace=True)
gdf.drop(columns=["area"], inplace=True)
return gdf


def check_geometry_type(gdf: gpd.GeoDataFrame) -> str:
"""Check if the geometry type of a GeoDataFrame is homogeneous.
Parameters
----------
gdf : gpd.GeoDataFrame
The GeoDataFrame to check.
Raises
------
ValueError
If the geometry type of the GeoDataFrame is not homogeneous.
"""
if len(gdf.geom_type.unique()) > 1:
raise ValueError(
"The geometry type of the GeoDataFrame is not homogeneous. "
f"Geometry types found: {gdf.geom_type.unique()}"
)
else:
return gdf.geom_type.unique()[0]


def get_crs_str_from_gdf(gdf_crs: gpd.GeoDataFrame.crs) -> str:
source_data_authority = gdf_crs.to_authority()
return source_data_authority[0] + ":" + source_data_authority[1]


def clean_up_gdf(gdf: gpd.GeoDataFrame, columns: List[str]) -> gpd.GeoDataFrame:
"""Clean up a GeoDataFrame by removing unnecessary columns.
Parameters
----------
gdf : gpd.GeoDataFrame
The GeoDataFrame to clean up.
Returns
-------
gpd.GeoDataFrame
The cleaned up GeoDataFrame.
"""
# Remove unnecessary columns
for col in columns:
del gdf[col]

return gdf


def join_nearest_points(left_gdf, right_gdf, attribute_name, max_dist) -> gpd.GeoDataFrame:
"""Join two GeoDataFrames based on the nearest distance between their points.
Parameters
----------
left_gdf : gpd.GeoDataFrame
The GeoDataFrame to which the data from the right GeoDataFrame will be joined.
right_gdf : gpd.GeoDataFrame
The GeoDataFrame from which the data will be joined to the left GeoDataFrame.
attribute_name : str
The name of the attribute that will be joined.
max_dist : float
The maximum distance for the nearest join measured in meters.
Returns
-------
gpd.GeoDataFrame
The joined GeoDataFrame.
"""
# Use the HydroMT function nearest_merge to join the geodataframes
gdf_merged = nearest_merge(
gdf1=left_gdf, gdf2=right_gdf, columns=[attribute_name], max_dist=max_dist
)

# Clean up the geodataframe (remove unnecessary columns)
gdf_merged = clean_up_gdf(gdf_merged, ["distance_right", "index_right"])

return gdf_merged


def intersect_points_polygons(left_gdf, right_gdf, attribute_name) -> gpd.GeoDataFrame:
gdf_merged = gpd.sjoin(left_gdf, right_gdf, how="left", predicate="intersects")

# Clean up the geodataframe (remove unnecessary columns)
gdf_merged = clean_up_gdf(gdf_merged, ["index_right"])

return gdf_merged


def join_spatial_data(
left_gdf: gpd.GeoDataFrame,
right_gdf: gpd.GeoDataFrame,
attribute_name: str,
method: str,
max_dist: float = 10,
) -> gpd.GeoDataFrame:
"""Join two GeoDataFrames based on their spatial relationship.
Parameters
----------
left_gdf : gpd.GeoDataFrame
The GeoDataFrame to which the data from the right GeoDataFrame will be joined.
right_gdf : gpd.GeoDataFrame
The GeoDataFrame from which the data will be joined to the left GeoDataFrame.
attribute_name : str
The name of the attribute that will be joined.
method : str
The method that will be used to join the data. Either "nearest" or
"intersection".
max_dist : float, optional
The maximum distance for the nearest join measured in meters, by default
10 (meters).
Returns
-------
gpd.GeoDataFrame
The joined GeoDataFrame.
"""
assert left_gdf.crs == right_gdf.crs, (
"The CRS of the GeoDataFrames to join do not match. "
f"Left CRS: {get_crs_str_from_gdf(left_gdf.crs)}, "
f"Right CRS: {get_crs_str_from_gdf(right_gdf.crs)}"
)

left_gdf_type = check_geometry_type(left_gdf)
right_gdf_type = check_geometry_type(right_gdf)

assert (left_gdf_type == "Polygon") or (left_gdf_type == "Point"), (
"The left GeoDataFrame should contain either polygons or points."
)

assert (right_gdf_type == "Polygon") or (right_gdf_type == "Point"), (
"The right GeoDataFrame should contain either polygons or points."
)

if method == "nearest":
if left_gdf_type == "Polygon":
left_gdf_geom_original = left_gdf.geometry
left_gdf.geometry = left_gdf.geometry.centroid

if right_gdf_type == "Polygon":
right_gdf.geometry = right_gdf.geometry.centroid

gdf = join_nearest_points(left_gdf, right_gdf, attribute_name, max_dist)
left_gdf.geometry = left_gdf_geom_original # TODO: Check if this works well

elif method == "intersection":
if (left_gdf_type == "Polygon") and (right_gdf_type == "Polygon"):
gdf = sjoin_largest_area(left_gdf, right_gdf)
elif (left_gdf_type == "Point") and (right_gdf_type == "Polygon"):
gdf = intersect_points_polygons(left_gdf, right_gdf, attribute_name)
else:
raise NotImplementedError(
f"Join method {method} is not implemented for joining data of geometry "
f"type {left_gdf_type} and {right_gdf_type}"
)
else:
raise NotImplementedError(f"Join method {method} is not implemented")

return gdf
Loading

0 comments on commit fb2670e

Please sign in to comment.