diff --git a/docs/examples/getting-started-importing-preprocessing-dtm-chm.ipynb b/docs/examples/getting-started-importing-preprocessing-dtm-chm.ipynb index bcaa467..7bd8613 100644 --- a/docs/examples/getting-started-importing-preprocessing-dtm-chm.ipynb +++ b/docs/examples/getting-started-importing-preprocessing-dtm-chm.ipynb @@ -14,7 +14,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 1, "metadata": { "ExecuteTime": { "end_time": "2024-09-20T09:26:40.870714Z", @@ -77,7 +77,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -94,7 +94,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -110,7 +110,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 6, "metadata": {}, "outputs": [ { @@ -138,7 +138,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ @@ -147,7 +147,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ @@ -163,7 +163,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ @@ -172,7 +172,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 10, "metadata": {}, "outputs": [ { @@ -207,7 +207,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.7" + "version": "3.12.9" } }, "nbformat": 4, diff --git a/pyforestscan/calculate.py b/pyforestscan/calculate.py index 76f7ee6..1d2918a 100644 --- a/pyforestscan/calculate.py +++ b/pyforestscan/calculate.py @@ -100,11 +100,13 @@ def calculate_pad(voxel_returns, voxel_height, beer_lambert_constant=None): """ Calculate the Plant Area Density (PAD) using the Beer-Lambert Law. - :param voxel_returns: numpy array representing the returns from each voxel (x, y, z). + :param voxel_returns: numpy array of shape (X, Y, Z) representing + the returns in each voxel column. :param voxel_height: float, height of each voxel. - :param beer_lambert_constant: Optional Beer-Lambert constant, defaults to 1 if not provided. + :param beer_lambert_constant: optional float. If not provided, defaults to 1. - :return: numpy array containing PAD values for each voxel. + :return: A numpy array containing PAD values for each voxel (same shape as voxel_returns). + Columns that have zero returns across all Z are set to NaN. """ shots_in = np.cumsum(voxel_returns[::-1], axis=2)[::-1] shots_through = shots_in - voxel_returns @@ -117,6 +119,9 @@ def calculate_pad(voxel_returns, voxel_height, beer_lambert_constant=None): pad = np.log(division_result) * (1 / (beer_lambert_constant or 1) / voxel_height) pad = np.where(np.isfinite(pad) & (pad > 0), pad, 0) + sum_across_z = np.sum(voxel_returns, axis=2, keepdims=True) + empty_mask = (sum_across_z == 0) + pad = np.where(empty_mask, np.nan, pad) return pad @@ -136,7 +141,11 @@ def calculate_pai(pad, min_height=1, max_height=None): if min_height >= max_height: raise ValueError("Minimum height index must be less than maximum height index.") - pai = np.nansum(pad[:, :, min_height:max_height], axis=2) + slice_3d = pad[:, :, min_height:max_height] + + sum_vals = np.nansum(slice_3d, axis=2) + count_non_nan = np.sum(~np.isnan(slice_3d), axis=2) + pai = np.where(count_non_nan == 0, np.nan, sum_vals) return pai diff --git a/pyforestscan/filters.py b/pyforestscan/filters.py index eb48890..64adfa5 100644 --- a/pyforestscan/filters.py +++ b/pyforestscan/filters.py @@ -47,22 +47,31 @@ def filter_select_ground(arrays): return pipeline.arrays -def remove_outliers_and_clean(arrays, mean_k=8, multiplier=3.0): +def remove_outliers_and_clean(arrays, mean_k=8, multiplier=3.0, remove=False): """ - Removes statistical outliers from the point cloud to enhance data quality. - - :param arrays: list - List of point cloud arrays obtained from read_lidar. - :type point_cloud_arrays: list - :param mean_k: int, number of nearest neighbors for outlier removal. - :param multiplier: float, standard deviation multiplier for outlier removal. - :return: list - Cleaned array of point cloud data without outliers. - :rtype: list - :raises RuntimeError: - If there's an error during the pipeline execution. - :raises ValueError: - If no data is returned after outlier removal. + Processes input arrays by removing statistical outliers and optionally cleans + the data, filtering out specific classifications. + By default, this function only labels outliers as "7" (outlier). + + # todo: this function will be renamed in a future release. + + Args: + arrays: The input arrays to process, typically a list of structured arrays or + similar data types. + mean_k (int, optional): The number of neighbors to analyze for each point for + statistical outlier removal. Default is 8. + multiplier (float, optional): The multiplication factor for the standard deviation + threshold to classify an outlier. Default is 3.0. + remove (bool, optional): If True, additional cleaning is performed to remove points + specifically classified as outliers. Defaults to False. + + Returns: + list: A list of processed arrays after outlier removal, and optionally after + cleaning, if specified. + + Raises: + RuntimeError: If the outlier removal pipeline fails during execution. + ValueError: If no data is returned after applying outlier removal. """ pipeline_stages = [ _filter_statistical_outlier(mean_k=mean_k, multiplier=multiplier) @@ -73,7 +82,15 @@ def remove_outliers_and_clean(arrays, mean_k=8, multiplier=3.0): except RuntimeError as e: raise RuntimeError(f"Outlier Removal Pipeline Failed: {e}") - processed_arrays = pipeline.arrays + if remove: + clean_arrays = [] + for arr in pipeline.arrays: + mask = (arr["Classification"] != 7) + cleaned = arr[mask] + clean_arrays.append(cleaned) + processed_arrays = clean_arrays + else: + processed_arrays = pipeline.arrays if not processed_arrays: raise ValueError("No data returned after outlier removal.") diff --git a/pyforestscan/visualize.py b/pyforestscan/visualize.py index 676502d..fd7b6c2 100644 --- a/pyforestscan/visualize.py +++ b/pyforestscan/visualize.py @@ -2,54 +2,86 @@ import numpy as np -def plot_2d(points, x_dim='X', y_dim='Z', color_map='viridis', alpha=1.0, point_size=1, fig_size=None): +def plot_2d(points, x_dim='X', + y_dim='Z', color_by='HeightAboveGround', + color_map='viridis', colorbar_label=None, + alpha=1.0, point_size=1, + fig_size=None, fig_title=None, + slice_dim=None, slice_val=0.0, + slice_tolerance=5 +): """ - Plots a 2D scatter plot from a set of points with customizable dimensions, colors, and sizes. - - :param points: A DataFrame containing point data with dimensions and 'HeightAboveGround'. - :type points: pandas.DataFrame - :param x_dim: The dimension for the X-axis ('X', 'Y', 'Z', or 'HeightAboveGround'). - :type x_dim: str - :param y_dim: The dimension for the Y-axis ('X', 'Y', 'Z', or 'HeightAboveGround'). - :type y_dim: str - :param color_map: The colormap used to color the points according to 'HeightAboveGround'. - :type color_map: str - :param alpha: The transparency level of the points. - :type alpha: float - :param point_size: The size of each point in the scatter plot. - :type point_size: float - :param fig_size: The size of the figure as a tuple (width, height). If None, it is computed based on the aspect ratio. - :type fig_size: tuple or None - :return: None - :rtype: None - :raises ValueError: If the provided dimensions are not valid. + Plots a 2D scatter plot of data points with customizable axes, coloring, and display settings. + + Args: + points (np.ndarray): A structured array with named columns like 'X','Y','Z','Classification', etc. + x_dim (str): The column name for the horizontal axis. Defaults to 'X'. + y_dim (str): The column name for the vertical axis. Defaults to 'Z'. + color_by (str): The column name used to color the points. Defaults to 'HeightAboveGround'. + If 'HeightAboveGround' doesn't exist, pick e.g. 'Classification' or another column. + color_map (str): A valid matplotlib colormap name. Defaults to 'viridis'. + colorbar_label (str): Label for the colorbar. If None, uses color_by. + alpha (float): Transparency of the points (0 to 1). Defaults to 1.0 (opaque). + point_size (int): Size of the scatter points. Defaults to 1. + fig_size (tuple): (width, height) in inches. If None, auto-derive from data aspect ratio. + fig_title (str): Title of the plot. If None, auto-generated. + slice_dim (str): If provided, dimension to "fix" at a slice_val (e.g. 'Y'). + slice_val (float): The coordinate value at which to slice the slice_dim dimension. + slice_tolerance (float): Allowed absolute difference when matching slice_val. + (Helps with floating-point comparisons.) + + Returns: + None + + Raises: + ValueError: If the `x_dim` or `y_dim` parameter is not one of ['X', 'Y', 'Z', + 'HeightAboveGround']. """ valid_dims = ['X', 'Y', 'Z', 'HeightAboveGround'] if x_dim not in valid_dims or y_dim not in valid_dims: raise ValueError(f"Invalid dimensions. Choose from: {valid_dims}") + required_dims = [x_dim, y_dim, color_by] + if slice_dim is not None: + required_dims.append(slice_dim) + + for dim in required_dims: + if dim not in points.dtype.names: + raise ValueError(f"'{dim}' not found in array dtype names: {points.dtype.names}") + + if slice_dim and slice_dim in points.dtype.names: + mask = np.isclose(points[slice_dim], slice_val, atol=slice_tolerance, rtol=0) + points = points[mask] + + if colorbar_label is None: + colorbar_label = color_by + x = points[x_dim] y = points[y_dim] - colors = points['HeightAboveGround'] + colors = points[color_by] if fig_size is None: aspect_ratio = (np.max(x) - np.min(x)) / (np.max(y) - np.min(y)) fig_size = (10 * aspect_ratio, 10) - max_fig_size = 20 # inches + max_fig_size = 20 if max(fig_size) > max_fig_size: scale_factor = max_fig_size / max(fig_size) fig_size = (fig_size[0] * scale_factor, fig_size[1] * scale_factor) + if fig_title is None: + fig_title = f'{x_dim} vs {y_dim} Colored by {color_by}' + plt.figure(figsize=fig_size) plt.scatter(x, y, c=colors, cmap=color_map, alpha=alpha, s=point_size) plt.xlabel(x_dim) plt.ylabel(y_dim) - plt.title(f'{x_dim} vs {y_dim} Colored by Height Above Ground') - plt.colorbar(label='Height Above Ground (m)') + plt.title(fig_title) + plt.colorbar(label=colorbar_label) plt.show() + def plot_metric(title, metric, extent, metric_name=None, cmap='viridis', fig_size=None): """ Plots a given metric using the provided data and configuration. @@ -93,57 +125,85 @@ def plot_metric(title, metric, extent, metric_name=None, cmap='viridis', fig_siz plt.ylabel('Y') plt.show() -def plot_pad(pad, slice_index, axis='x', cmap='viridis', hag_values=None, horizontal_values=None): + +def plot_pad(pad, slice_index=None, axis='x', cmap='viridis', + hag_values=None, horizontal_values=None, title=None): """ - Plots a 2D slice of Plant Area Density (PAD) data with dZ HAG on the Y-axis. - - :param pad: numpy.ndarray - The 3D Plant Area Density data with shape (X, Y, HAG). - :param slice_index: int - The index of the slice to be plotted along the specified axis. - :param axis: str, optional - The axis along which to slice the data. Default is 'x'. Choose from 'x', 'y'. - :param cmap: str, optional - The colormap to be used for plotting. Default is 'viridis'. - :param hag_values: numpy.ndarray, optional - Array of HAG (Height Above Ground) values corresponding to the third dimension. - If None, will use indices. - :param horizontal_values: numpy.ndarray, optional - Array of horizontal (X or Y) values corresponding to the first or second dimension. - If None, will use indices. - :raises ValueError: If an invalid axis is provided or slice_index is out of bounds. - :return: None - :rtype: None + Plots the plant area density (PAD) data as a 2D image visualization. + + This function projects or slices 3D PAD data based on the specified axis and + optional slice index. It visualizes the density using a colormap and displays + the data with respect to the specified coordinates and labels. + + Args: + pad: A 3D numpy array representing PAD values (shape: dZ x Y x X). + slice_index: Optional; An integer specifying the index of the 2D slice + along the given axis. If None, the PAD data will be collapsed along + the axis using the maximum value. + axis: A string specifying the axis ('x' or 'y') to use for slicing + or projection. Defaults to 'x'. + cmap: A string specifying the colormap to use for visualization. + Defaults to 'viridis'. + hag_values: Optional; A 1D numpy array representing height above ground + (dZ) values. If None, an array from 0 to the size of the dZ axis + will be used. + horizontal_values: Optional; A 1D numpy array representing horizontal + axis values (X or Y, depending on `axis`). If None, an array from + 0 to the corresponding size of the horizontal axis will be used. + title: Optional; A string specifying the title of the plot. If None, an + appropriate default title will be generated based on the input + parameters. + + Returns: + None. The function visualizes the PAD data using matplotlib. + + Raises: + ValueError: If `axis` is not 'x' or 'y'. + ValueError: If `slice_index` is out of the valid range for the specified + axis. + ValueError: If the length of `horizontal_values` does not match the + dimension of the specified axis. """ - # Validate the axis parameter + # Validate axis if axis not in ['x', 'y']: raise ValueError(f"Invalid axis: '{axis}'. Choose from 'x' or 'y'.") + hag_values = hag_values if hag_values is not None else np.arange(pad.shape[2]) + if axis == 'x': - if slice_index < 0 or slice_index >= pad.shape[0]: - raise ValueError(f"slice_index {slice_index} out of range for axis 'x' with size {pad.shape[0]}") - pad_2d = pad[slice_index, :, :] - horizontal_axis_label = 'Y' - horizontal_axis_values = horizontal_values if horizontal_values is not None else np.arange(pad.shape[1]) - else: # axis == 'y' - if slice_index < 0 or slice_index >= pad.shape[1]: - raise ValueError(f"slice_index {slice_index} out of range for axis 'y' with size {pad.shape[1]}") - pad_2d = pad[:, slice_index, :] - horizontal_axis_label = 'X' - horizontal_axis_values = horizontal_values if horizontal_values is not None else np.arange(pad.shape[0]) + if slice_index is None: + pad_2d = pad.max(axis=0) + horizontal_axis_label = 'Y' + horizontal_count = pad.shape[1] + if horizontal_values is not None and len(horizontal_values) != horizontal_count: + raise ValueError("Length of horizontal_values does not match the Y dimension of pad.") + horizontal_axis_values = horizontal_values if horizontal_values is not None else np.arange(horizontal_count) + + else: + if slice_index < 0 or slice_index >= pad.shape[0]: + raise ValueError(f"slice_index {slice_index} out of range for axis 'x' with size {pad.shape[0]}") + pad_2d = pad[slice_index, :, :] + horizontal_axis_label = 'Y' + horizontal_axis_values = horizontal_values if horizontal_values is not None else np.arange(pad.shape[1]) - hag_values = hag_values if hag_values is not None else np.arange(pad.shape[2]) + else: # axis == 'y' + if slice_index is None: + pad_2d = np.nanmax(pad, axis=1) + horizontal_axis_label = 'X' + horizontal_count = pad.shape[0] + if horizontal_values is not None and len(horizontal_values) != horizontal_count: + raise ValueError("Length of horizontal_values does not match the X dimension of pad.") + horizontal_axis_values = horizontal_values if horizontal_values is not None else np.arange(horizontal_count) + + else: + if slice_index < 0 or slice_index >= pad.shape[1]: + raise ValueError(f"slice_index {slice_index} out of range for axis 'y' with size {pad.shape[1]}") + pad_2d = pad[:, slice_index, :] + horizontal_axis_label = 'X' + horizontal_axis_values = horizontal_values if horizontal_values is not None else np.arange(pad.shape[0]) pad_2d = pad_2d.T - if horizontal_values is not None: - if axis == 'x' and len(horizontal_values) != pad.shape[1]: - raise ValueError("Length of horizontal_values does not match the Y dimension of pad.") - if axis == 'y' and len(horizontal_values) != pad.shape[0]: - raise ValueError("Length of horizontal_values does not match the X dimension of pad.") - else: - horizontal_axis_values = np.arange(pad.shape[1]) if axis == 'x' else np.arange(pad.shape[0]) - plt.figure(figsize=(10, 6)) img = plt.imshow( pad_2d, @@ -158,7 +218,12 @@ def plot_pad(pad, slice_index, axis='x', cmap='viridis', hag_values=None, horizo aspect='auto' ) plt.colorbar(img, label='PAD') - plt.title(f'Plant Area Density (PAD) - {horizontal_axis_label} vs dZ at Slice {slice_index}') + if title is None: + if slice_index is None: + title = f'Plant Area Density (PAD) - Collapsed by max along axis {axis}' + else: + title = f'Plant Area Density (PAD) - {horizontal_axis_label} vs dZ at slice {slice_index}' + plt.title(title) plt.xlabel(horizontal_axis_label) plt.ylabel('dZ') plt.tight_layout() diff --git a/tests/test_calculate.py b/tests/test_calculate.py index dbeda26..91024c6 100644 --- a/tests/test_calculate.py +++ b/tests/test_calculate.py @@ -140,8 +140,8 @@ def test_calculate_pad_with_zero_shots(): pad = calculate_pad(voxel_returns, voxel_height) assert isinstance(pad, np.ndarray) assert pad.shape == voxel_returns.shape - # PAD should be zero where shots_through == 0 - assert np.all(pad == 0) + # PAD should be nan where shots_through == 0 + assert np.all(np.isnan(pad)) def test_calculate_pad_with_inf_nan_values():