Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -174,5 +174,6 @@ test_data/output.tif

# tifs created from tests
*.tif
!docs/example_data/USGS_1M_4_x83y219_HI_Hawaii_Island_Lidar_NOAA_2017_B17_cropped_3857.tif

docs/example_data/ept
Binary file not shown.
Binary file modified docs/example_data/test_segment.gpkg
Binary file not shown.
47 changes: 30 additions & 17 deletions docs/examples/calculate-forest-metrics.ipynb

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -147,7 +147,7 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -163,16 +163,16 @@
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"create_geotiff(chm, \"../example_data/20191210_5QKB020880_DS05_chm.tif\", \"EPSG:32605\", extent)"
"create_geotiff(chm, \"/Users/iosefa/repos/obia/docs/example_data/chm037.tif\", \"EPSG:32605\", extent)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"execution_count": 11,
"metadata": {},
"outputs": [
{
Expand All @@ -189,6 +189,13 @@
"source": [
"plot_metric(\"Canopy Height Model\", chm, extent, metric_name='Height (m)', cmap='viridis', fig_size=None)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
Expand Down
266 changes: 227 additions & 39 deletions docs/examples/working-with-large-point-clouds.ipynb

Large diffs are not rendered by default.

125 changes: 79 additions & 46 deletions pyforestscan/process.py
Original file line number Diff line number Diff line change
@@ -1,49 +1,55 @@
import json

from pyforestscan.calculate import calculate_fhd, calculate_pad, calculate_pai, assign_voxels, calculate_chm
from pyforestscan.handlers import create_geotiff

import pdal
import numpy as np
import os
import pdal

from tqdm import tqdm

from pyforestscan.calculate import calculate_fhd, calculate_pad, calculate_pai, assign_voxels, calculate_chm
from pyforestscan.filters import remove_outliers_and_clean
from pyforestscan.handlers import create_geotiff
from pyforestscan.pipeline import _hag_raster, _hag_delaunay
from pyforestscan.utils import get_bounds_from_ept, get_srs_from_ept


def get_bounds(ept_file):
"""
Extracts the spatial bounds of a point cloud from an ept file using PDAL.
import tempfile
import os
import rasterio
from rasterio.windows import from_bounds


:param ept_file: Path to the ept file containing the point cloud data.
:return: A tuple with bounds in the format (min_x, max_x, min_y, max_y).
def _crop_dtm(dtm_path, tile_min_x, tile_min_y, tile_max_x, tile_max_y):
"""
pipeline_json = f"""
{{
"pipeline": [
"{ept_file}",
{{
"type": "filters.info"
}}
]
}}
Crops the input DTM TIFF to the given bounding box (in the same CRS),
saves it to a temporary file, and returns the path to that temp file.
"""
with rasterio.open(dtm_path) as src:
window = from_bounds(
left=tile_min_x, bottom=tile_min_y,
right=tile_max_x, top=tile_max_y,
transform=src.transform
)
data = src.read(1, window=window)
new_transform = src.window_transform(window)

pipeline = pdal.Pipeline(pipeline_json)
pipeline.execute()
metadata = pipeline.metadata['metadata']
try:
min_x = metadata['filters.info']['bbox']['minx']
max_x = metadata['filters.info']['bbox']['maxx']
min_y = metadata['filters.info']['bbox']['miny']
max_y = metadata['filters.info']['bbox']['maxy']
return min_x, max_x, min_y, max_y
except KeyError:
raise KeyError("Bounds information is not available in the metadata.")
fd, cropped_path = tempfile.mkstemp(suffix=".tif")
os.close(fd)

new_profile = src.profile.copy()
new_profile.update({
"height": data.shape[0],
"width": data.shape[1],
"transform": new_transform
})

with rasterio.open(cropped_path, "w", **new_profile) as dst:
dst.write(data, 1)

return cropped_path


def process_with_tiles(ept_file, tile_size, output_path, metric, voxel_size, buffer_size=0.1, srs=None, hag=False,
hag_dtm=False, dtm=None, bounds=None):
hag_dtm=False, dtm=None, bounds=None, interpolation=None, remove_outliers=False):
"""
Processes a large EPT point cloud by tiling, calculates CHM or other metrics for each tile,
and writes the results to the specified output path.
Expand All @@ -58,8 +64,25 @@ def process_with_tiles(ept_file, tile_size, output_path, metric, voxel_size, buf
:param hag: Boolean indicating whether to compute Height Above Ground using Delaunay triangulation.
:param hag_dtm: Boolean indicating whether to compute Height Above Ground using a provided DTM raster.
:param dtm: Path to the DTM raster file (required if hag_dtm is True).
:param interpolation: Interpolation method to use for CHM calculation (e.g., "linear", "cubic", "nearest", or None).
:param bounds: Bounds within which to crop the data. Must be of the form: ([xmin, xmax], [ymin, ymax], [zmin, zmax]) or ([xmin, xmax], [ymin, ymax]). If none is given, tiling will happen on the entire dataset.
:param remove_outliers: Boolean indicating whether to remove statistical outliers before calculating metrics.
"""
min_x, max_x, min_y, max_y = get_bounds(ept_file)
if metric not in ["chm", "fhd", "pai"]:
raise ValueError(f"Unsupported metric: {metric}")

(min_z, max_z) = (None, None)
if bounds:
if len(bounds) == 2:
(min_x, max_x), (min_y, max_y) = bounds
else:
(min_x, max_x), (min_y, max_y), (min_z, max_z) = bounds
else:
min_x, max_x, min_y, max_y, min_z, max_z = get_bounds_from_ept(ept_file)

if not srs:
srs = get_srs_from_ept(ept_file)

num_tiles_x = int(np.ceil((max_x - min_x) / tile_size[0]))
num_tiles_y = int(np.ceil((max_y - min_y) / tile_size[1]))
total_tiles = num_tiles_x * num_tiles_y
Expand Down Expand Up @@ -92,30 +115,38 @@ def process_with_tiles(ept_file, tile_size, output_path, metric, voxel_size, buf
pbar.update(1)
continue

tile_pipeline_stages = [
{
"type": "filters.crop",
"bounds": f"([{tile_min_x},{tile_max_x}], [{tile_min_y},{tile_max_y}])"
}
]
if min_z and max_z:
tile_bounds = ([tile_min_x, tile_max_x], [tile_min_y, tile_max_y], [min_z, max_z])
else:
tile_bounds = ([tile_min_x, tile_max_x], [tile_min_y, tile_max_y])
tile_pipeline_stages = []

if hag:
tile_pipeline_stages.append(_hag_delaunay())
elif hag_dtm:
if not dtm or not os.path.isfile(dtm):
raise FileNotFoundError(f"DTM file is required for HAG calculation using DTM: {dtm}")
tile_pipeline_stages.append(_hag_raster(dtm))

base_pipeline = {"type": "readers.ept", "filename": ept_file}
if bounds:
base_pipeline["bounds"] = f"{bounds}"
cropped_dtm_path = _crop_dtm(
dtm,
tile_min_x, tile_min_y,
tile_max_x, tile_max_y
)
tile_pipeline_stages.append(_hag_raster(cropped_dtm_path))
base_pipeline = {
"type": "readers.ept",
"filename": ept_file,
"bounds": f"{tile_bounds}",
}
tile_pipeline_json = {
"pipeline": [base_pipeline] + tile_pipeline_stages
}

tile_pipeline = pdal.Pipeline(json.dumps(tile_pipeline_json))
tile_pipeline.execute()
tile_points = tile_pipeline.arrays[0]
if remove_outliers:
tile_points = remove_outliers_and_clean(tile_pipeline.arrays)[0]
else:
tile_points = tile_pipeline.arrays[0]

if tile_points.size == 0:
print(f"Warning: No data in tile ({i}, {j}). Skipping.")
Expand All @@ -126,7 +157,7 @@ def process_with_tiles(ept_file, tile_size, output_path, metric, voxel_size, buf
buffer_pixels_y = int(np.ceil(buffer_y / voxel_size[1]))

if metric == "chm":
chm, extent = calculate_chm(tile_points, voxel_size)
chm, extent = calculate_chm(tile_points, voxel_size, interpolation=interpolation)

if buffer_pixels_x * 2 >= chm.shape[1] or buffer_pixels_y * 2 >= chm.shape[0]:
print(
Expand Down Expand Up @@ -162,7 +193,9 @@ def process_with_tiles(ept_file, tile_size, output_path, metric, voxel_size, buf
if current_buffer_size > 0:
if buffer_pixels_x * 2 >= result.shape[1] or buffer_pixels_y * 2 >= result.shape[0]:
print(
f"Warning: Buffer size exceeds {metric.upper()} dimensions for tile ({i}, {j}). Adjusting buffer size.")
f"Warning: Buffer size exceeds {metric.upper()} dimensions for tile ({i}, {j}). "
f"Adjusting buffer size."
)
buffer_pixels_x = max(0, result.shape[1] // 2 - 1)
buffer_pixels_y = max(0, result.shape[0] // 2 - 1)

Expand Down
142 changes: 142 additions & 0 deletions pyforestscan/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
import json
import os
import math
import numpy as np
import requests

from pyforestscan.handlers import read_lidar, write_las



def _read_ept_json(ept_source):
if any(ept_source.lower().startswith(proto) for proto in ["http://", "https://", "s3://"]):
r = requests.get(ept_source)
r.raise_for_status()
ept_json = r.json()
else:
if not os.path.isfile(ept_source):
raise FileNotFoundError(f"EPT JSON file not found at {ept_source}")
with open(ept_source, "r") as f:
ept_json = json.load(f)
return ept_json


def get_srs_from_ept(ept_file):
"""
Extracts the Spatial Reference System (SRS) from an EPT (Entwine Point Tile) file.

This function reads the EPT JSON file and retrieves the SRS information, if available.
The SRS is returned in the format '{authority}:{horizontal}'.

:param ept_file: Path to the ept file containing the point cloud data.
:return: The SRS string in the format '{authority}:{horizontal}' if available,
otherwise None.
"""
ept_json = _read_ept_json(ept_file)

srs_obj = ept_json.get("srs", {})
authority = srs_obj.get("authority", "")
horizontal = srs_obj.get("horizontal", "")
if authority and horizontal:
return f"{authority}:{horizontal}"
else:
return None


def get_bounds_from_ept(ept_file):
"""
Extracts the spatial bounds of a point cloud from an ept file using PDAL.

:param ept_file: Path to the ept file containing the point cloud data.
:return: A tuple with bounds in the format (min_x, max_x, min_y, max_y, min_z, max_z).
"""
ept_json = _read_ept_json(ept_file)

try:
bounds = ept_json["bounds"]
return bounds
except KeyError:
raise KeyError("Bounds information is not available in the ept metadata.")


def tile_las_in_memory(
las_file,
tile_width,
tile_height,
overlap,
output_dir,
srs=None
):
"""
Reads the entire LAS file into memory, then subdivides it into tiles
with a specified overlap on the right and bottom edges.

:param las_file: Path to the source .las/.laz/.copc/.copc.laz file
:param tile_width: Tile width in map units (meters if in UTM)
:param tile_height: Tile height in map units
:param overlap: Overlap (meters) to apply on the right & bottom edges
:param output_dir: Directory where the tiled outputs will be written
:param srs: Spatial reference for the input data (e.g., 'EPSG:32610').
Pass None if not needed or if the file already has it.
"""
arrays = read_lidar(
input_file=las_file,
srs=srs,
bounds=None,
thin_radius=None,
hag=False,
hag_dtm=False,
dtm=None,
crop_poly=False,
poly=None
)
if not arrays or len(arrays) == 0 or arrays[0].size == 0:
print(f"No data found in {las_file}. Exiting.")
return

big_cloud = arrays[0]

min_x = np.min(big_cloud['X'])
max_x = np.max(big_cloud['X'])
min_y = np.min(big_cloud['Y'])
max_y = np.max(big_cloud['Y'])

total_width = max_x - min_x
total_height = max_y - min_y

step_x = tile_width - overlap
step_y = tile_height - overlap
num_tiles_x = max(1, math.ceil(total_width / step_x))
num_tiles_y = max(1, math.ceil(total_height / step_y))

if not os.path.isdir(output_dir):
os.makedirs(output_dir, exist_ok=True)

tile_index = 0
for i in range(num_tiles_x):
tile_min_x = min_x + i * step_x
tile_max_x = tile_min_x + tile_width
if tile_max_x > max_x:
tile_max_x = max_x
if tile_min_x >= max_x:
break
for j in range(num_tiles_y):
tile_min_y = min_y + j * step_y
tile_max_y = tile_min_y + tile_height
if tile_max_y > max_y:
tile_max_y = max_y
if tile_min_y >= max_y:
break
in_tile = (
(big_cloud['X'] >= tile_min_x) & (big_cloud['X'] < tile_max_x) &
(big_cloud['Y'] >= tile_min_y) & (big_cloud['Y'] < tile_max_y)
)
tile_points = big_cloud[in_tile]
if tile_points.size == 0:
continue

tile_index += 1
out_path = os.path.join(output_dir, f"tile_{tile_index}.las")

write_las([tile_points], out_path, srs=srs, compress=False)
print(f"Created tile: {out_path}")
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
setuptools~=75.1.0
requests~=2.32.3
rasterio~=1.3.11
pdal~=3.4.5
geopandas~=1.0.1
Expand Down
Loading
Loading