Merge pull request #8 from IGNF/virtual_point

Z on Virtual points
IGNF · Jul 18, 2024 · 1b9fde5 · 1b9fde5
2 parents 6c63990 + a0192f6
commit 1b9fde5
Show file tree

Hide file tree

Showing 28 changed files with 901 additions and 40 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1,3 +1,3 @@
 **/__pycache__
 tmp
-main.log
+**.log
diff --git a/configs/configs_lidro.yaml b/configs/configs_lidro.yaml
@@ -8,6 +8,8 @@ work_dir: ${hydra:runtime.cwd}
 
 io:
   input_filename: null
+  input_mask_hydro: null
+  input_skeleton: null
   input_dir: null
   output_dir: null
   srid: 2154
@@ -17,24 +19,33 @@ io:
   no_data_value: -9999
   tile_size: 1000
 
-raster:
-  # size for dilatation
-  dilation_size: 3
+mask_generation:
+  raster:
+    # size for dilatation
+    dilation_size: 3
+  filter:
+    # Classes to be considered as "non-water"
+    keep_classes: [0, 1, 2, 3, 4, 5, 6, 17, 64, 65, 66, 67] # All classes
+  vector:
+    # Filter water's area (m²)
+    min_water_area: 150
+    # Parameters for buffer
+    buffer_positive: 1
+    buffer_negative: -1.5 # negative buffer should be bigger than positive buffer to prevent protruding over the banks
+    # Tolerance from Douglas-Peucker
+    tolerance: 1
 
-vector:
-  # Filter water's area (m²)
-  min_water_area: 150
-  # Parameters for buffer
-  buffer_positive: 1
-  buffer_negative: -1.5 # negative buffer should be bigger than positive buffer to prevent protruding over the banks
-  # Tolerance from Douglas-Peucker
-  tolerance: 1
-  # distance in meters between 2 consecutive points 
-  distance_meters: 2
-
-filter:
-  # Classes to be considered as "non-water"
-  keep_classes: [0, 1, 2, 3, 4, 5, 6, 17, 64, 65, 66, 67] # All classes
+virtual_point:
+  filter:
+    # Keep ground and water pointclouds between Hydro Mask and Hydro Mask buffer
+    keep_neighbors_classes: [2, 9]
+  vector:
+    # distance in meters between 2 consecutive points 
+    distance_meters: 2
+    # buffer for searching the points classification (default. "2") of the input LAS/LAZ file
+    buffer: 2
+    # The number of nearest neighbors to find with KNeighbors
+    k: 20
 
 hydra:
   output_subdir: null

diff --git a/environment.yml b/environment.yml
@@ -11,7 +11,7 @@ dependencies:
   - scipy
   - geojson
   - rasterio
-  - geopandas
+  - geopandas==0.*
   - pyproj
   - pdal>=2.6
   - python-pdal>=3.2.1
@@ -24,4 +24,6 @@ dependencies:
   - flake8
   - isort
   - pre-commit
+  - pip:
+    - -r requirements.txt
 
diff --git a/lidro/create_virtual_point/pointcloud/crop_las.py b/lidro/create_virtual_point/pointcloud/crop_las.py
@@ -0,0 +1,35 @@
+# -*- coding: utf-8 -*-
+""" Filter pointcloud to keep only the ground class """
+from typing import List
+
+import numpy as np
+import pdal
+from shapely.geometry import MultiPolygon
+
+
+def read_filter_and_crop_pointcloud(input_points: str, geom: MultiPolygon, classes: List[int]) -> np.array:
+    """Filter pointcloud to keep only the ground class
+
+    Args:
+        input_points (str): Path to the input LAS/LAZ file
+        geom (MultiPolygon):  An array of WKT or GeoJSON 2D MultiPolygon (Mask Hydro with buffer)
+        classes (List[int]): Classification values to keep for input points
+
+    Returns:
+        np.array : Numpy array containing point coordinates (X, Y, Z) after filtering and croping
+    """
+    # Crop pointcloud by point
+    pipeline = (
+        pdal.Reader.las(filename=input_points, nosrs=True)
+        | pdal.Filter.range(
+            limits=",".join(f"Classification[{v}:{v}]" for v in classes),
+        )
+        | pdal.Filter.crop(
+            polygon=geom,
+        )
+    )
+    pipeline.execute()
+    # extract points
+    cropped_points = pipeline.arrays[0]
+
+    return np.array([cropped_points["X"], cropped_points["Y"], cropped_points["Z"]]).T
diff --git a/lidro/create_virtual_point/stats/calculate_stat.py b/lidro/create_virtual_point/stats/calculate_stat.py
@@ -0,0 +1,37 @@
+# -*- coding: utf-8 -*-
+""" Calculates statistics from np.array (ex. pointcloud) """
+import numpy as np
+
+
+def calculate_percentile(points: np.array, q: int) -> float:
+    """
+    Calculates the quartile's value of Z coordinates
+
+    Parameters:
+        - points (numpy.ndarray): An array of 3D points [x, y, z]
+        - q (int): Percentage for the percentile to compute.
+                   Values must be between 0 and 100 inclusive.
+
+    Returns:
+        - float: The quartile of Z coordinates
+    """
+    altitudes = points[:, 2]  # Extract the altitude column
+    n_quartile = np.round(np.percentile(altitudes, q), 2)
+
+    return n_quartile
+
+
+def calculate_median(points: np.array) -> float:
+    """
+    Calculates the median's value of Z coordinates
+
+    Parameters:
+        - points (numpy.ndarray): An array of 3D points [x, y, z]
+
+    Returns:
+        - float: The median of Z coordinates
+    """
+    altitudes = points[:, 2]  # Extract the altitude column
+    n_median = np.round(np.median(altitudes), 2)
+
+    return n_median
diff --git a/lidro/create_virtual_point/stats/knn_distance.py b/lidro/create_virtual_point/stats/knn_distance.py
@@ -0,0 +1,37 @@
+# -*- coding: utf-8 -*-
+""" KNN """
+import numpy as np
+from sklearn.neighbors import NearestNeighbors
+
+
+def find_k_nearest_neighbors(points_skeleton: np.array, points_ground_3d: np.array, k: int) -> np.array:
+    """Finds the K nearest neighbors of a given point from a list of 3D points
+
+    Args:
+        points_skeleton (np.array): a single point representing the skeleton Hydro
+                                    an array of 2D point [x, y]
+        points_ground_3d (np.array): ground's points from LIDAR.
+                                    an array of 3D points [x, y, z]
+        k (int): the number of nearest neighbors to find
+
+    Returns:
+        numpy.ndarray: an array of the K nearest 3D points [x, y, z]
+    """
+    # Extract 2D coordinates (x, y) from the 3D points
+    points_ground_2d = points_ground_3d[:, :2]
+
+    # Convert point_2d to nump.array
+    points_skeleton_array = np.array(points_skeleton).reshape(1, -1)
+
+    # Initialize and fit the NearestNeighbors model
+    nbrs = NearestNeighbors(n_neighbors=k, algorithm="auto").fit(points_ground_2d)
+
+    # Find the distances and indices of the nearest neighbors
+    indices = nbrs.kneighbors(points_skeleton_array, return_distance=False)
+
+    # Retrieve the points corresponding to the indices
+    # Use indices  to retrieve the closest 3D points
+    k_nearest_points = points_ground_3d[indices[0]]
+    # = points_3d[indices.flatten()]
+
+    return k_nearest_points
diff --git a/lidro/create_virtual_point/vectors/clip_points_with_bounding_box.py b/lidro/create_virtual_point/vectors/clip_points_with_bounding_box.py
@@ -0,0 +1,30 @@
+# -*- coding: utf-8 -*-
+""" Clip Skeleton points by tile (pointcloud)
+"""
+import geopandas as gpd
+from shapely.geometry import box
+
+
+def clip_points_with_box(gdf_points: gpd.GeoDataFrame, bbox: tuple) -> gpd.GeoDataFrame:
+    """Clip skeleton points by tile (bounding box)
+
+    Args:
+        gdf_points (gpd.GeoDataFrame): Points every 2 meters (by default) along skeleton hydro
+        bbox (tuple): bounding box from tile (pointcloud)
+
+    Returns:
+       gpd.GeoDataframe : Points every 2 meters (by default) along skeleton hydro by tile
+    """
+    # Extract the bounding box limits
+    xmin = bbox[0][0]
+    xmax = bbox[0][1]
+    ymin = bbox[1][0]
+    ymax = bbox[1][1]
+
+    # Create a polygon representing the bounding box
+    bounding_box = box(xmin, ymin, xmax, ymax)
+
+    # Clip points using the bounding box
+    clipped_points = gdf_points[gdf_points.within(bounding_box)]
+
+    return clipped_points
diff --git a/lidro/create_virtual_point/vectors/extract_points_around_skeleton.py b/lidro/create_virtual_point/vectors/extract_points_around_skeleton.py
@@ -0,0 +1,63 @@
+# -*- coding: utf-8 -*-
+""" Extract points around skeleton by tile
+"""
+import logging
+import os
+from typing import List
+
+import geopandas as gpd
+from pdaltools.las_info import las_get_xy_bounds
+from shapely.geometry import Point
+
+from lidro.create_virtual_point.pointcloud.crop_las import (
+    read_filter_and_crop_pointcloud,
+)
+from lidro.create_virtual_point.vectors.clip_points_with_bounding_box import (
+    clip_points_with_box,
+)
+from lidro.create_virtual_point.vectors.las_around_point import filter_las_around_point
+
+
+def extract_points_around_skeleton_points_one_tile(
+    filename: str,
+    input_dir: str,
+    input_mask_hydro_buffer: gpd.GeoDataFrame,
+    points_skeleton_gdf: gpd.GeoDataFrame,
+    classes: List[int:int],
+    k: int,
+):
+    """Severals steps :
+        1-  Crop filtered pointcloud by Mask Hydro with buffer
+        2-  Extract a Z elevation value along the hydrographic skeleton
+
+    Args:
+        filename (str): filename to the LAS file
+        input_dir (str): input folder
+        input_mask_hydro_buffer (gpd.GeoDataFrame): hydro mask with buffer
+        points_skeleton_gdf (gpd.GeoDataFrame): Points every 2 meters (by default) along skeleton hydro
+        classes (List):  List of classes to use for the filtering
+        k (int): the number of nearest neighbors to find
+
+    Returns:
+        points_clip (np.array) : Numpy array containing point coordinates (X, Y, Z) after filtering and croping
+    """
+    # Step 1 : Crop filtered pointcloud by Mask Hydro with buffer
+    input_dir_points = os.path.join(input_dir, "pointcloud")
+    tilename = os.path.splitext(filename)[0]  # filename to the LAS file
+    input_pointcloud = os.path.join(input_dir_points, filename)  # path to the LAS file
+    logging.info(f"\nCroping filtered pointcloud by Mask Hydro with buffer for tile : {tilename}")
+    # Croping filtered pointcloud by Mask Hydro with buffer for tile
+    points_clip = read_filter_and_crop_pointcloud(input_pointcloud, str(input_mask_hydro_buffer), classes)
+    logging.info(f"\nCropping skeleton points for tile: {tilename}")
+    # Extract bounding box for clipping points by tile
+    bbox = las_get_xy_bounds(input_pointcloud)
+    points_skeleton_clip = clip_points_with_box(points_skeleton_gdf, bbox)
+    # Create list with Skeleton's points with Z for step 4
+    points_skeleton_with_z_clip = [
+        ([geom.x, geom.y]) for geom in points_skeleton_clip.geometry if isinstance(geom, Point)
+    ]
+    # Step 2 : Extract a Z elevation value along the hydrographic skeleton
+    logging.info(f"\nExtract a Z elevation value along the hydrographic skeleton for tile : {tilename}")
+    result = filter_las_around_point(points_skeleton_with_z_clip, points_clip, k)
+
+    return result
diff --git a/lidro/create_virtual_point/vectors/las_around_point.py b/lidro/create_virtual_point/vectors/las_around_point.py
@@ -0,0 +1,37 @@
+# -*- coding: utf-8 -*-
+""" Extract a Z elevation value every N meters along the hydrographic skeleton
+"""
+from typing import List
+
+import numpy as np
+from shapely.geometry import Point
+
+from lidro.create_virtual_point.stats.calculate_stat import calculate_percentile
+from lidro.create_virtual_point.stats.knn_distance import find_k_nearest_neighbors
+
+
+def filter_las_around_point(points_skeleton: List, points_clip: np.array, k: int) -> List:
+    """Extract a Z elevation value every N meters along the hydrographic skeleton
+
+    Args:
+        points_skeleton (list) : points every N meters (by default: 2) along skeleton Hydro
+        points_clip (np.array): Numpy array containing point coordinates (X, Y, Z) after filtering and croping
+        k (int): The number of nearest neighbors to find
+
+    Returns:
+        List : Result {'geometry': Point 3D, 'z_q1': points KNN}
+    """
+    # Finds the K nearest neighbors of a given point from a list of 3D points
+    points_knn_list = [
+        ({"geometry": Point(point), "points_knn": find_k_nearest_neighbors(point, points_clip, k)})
+        for point in points_skeleton
+    ]
+
+    # Calcule Z "Q1" for each points every N meters along skeleton hydro
+    results = [
+        ({"geometry": p["geometry"], "z_q1": calculate_percentile(p["points_knn"], 10)})
+        for p in points_knn_list
+        if not np.all(np.isnan(p["points_knn"]))
+    ]
+
+    return results
diff --git a/lidro/create_virtual_point/vectors/mask_hydro_with_buffer.py b/lidro/create_virtual_point/vectors/mask_hydro_with_buffer.py
@@ -0,0 +1,28 @@
+# -*- coding: utf-8 -*-
+""" Extract a Z elevation value every N meters along the hydrographic skeleton
+"""
+import geopandas as gpd
+from shapely.geometry import CAP_STYLE
+
+
+def import_mask_hydro_with_buffer(file_mask: str, buffer: float, crs: str | int) -> gpd.GeoDataFrame:
+    """Apply buffer (2 meters by default) from Mask Hydro
+
+    Args:
+        file_mask (str): Path from Mask Hydro
+        buffer (float): buffer to apply to the input geometry
+        crs (str | int): Make a CRS from an EPSG code : CRS WKT string
+
+    Returns:
+        gpd.GeoDataFrame : geometry columns are encoded to WKT
+    """
+    # Read Mask Hydro merged
+    gdf = gpd.read_file(file_mask, crs=crs).unary_union
+
+    # Apply buffer (2 meters by default) from Mask Hydro
+    gdf_buffer = gdf.buffer(buffer, cap_style=CAP_STYLE.square)
+
+    # Return a polygon representing the limit of the bank with a buffer of N meters
+    limit_bank = gdf_buffer.difference(gdf)
+
+    return limit_bank
diff --git a/lidro/main_create_mask.py b/lidro/main_create_mask.py
@@ -45,8 +45,8 @@ def main(config: DictConfig):
     pixel_size = config.io.pixel_size
     tile_size = config.io.tile_size
     crs = CRS.from_user_input(config.io.srid)
-    classe = config.filter.keep_classes
-    dilation_size = config.raster.dilation_size
+    classe = config.mask_generation.filter.keep_classes
+    dilation_size = config.mask_generation.raster.dilation_size
 
     def main_on_one_tile(filename):
         """Lauch main.py on one tile