iosefa · iosefa · Mar 23, 2025 · Mar 17, 2025 · Mar 23, 2025 · Mar 23, 2025
diff --git a/docs/examples/getting-started-importing-preprocessing-dtm-chm.ipynb b/docs/examples/getting-started-importing-preprocessing-dtm-chm.ipynb
@@ -14,7 +14,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 1,
    "metadata": {
     "ExecuteTime": {
      "end_time": "2024-09-20T09:26:40.870714Z",
@@ -77,7 +77,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -94,7 +94,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -110,7 +110,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 6,
    "metadata": {},
    "outputs": [
     {
@@ -138,7 +138,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 7,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -147,7 +147,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 8,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -163,7 +163,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 9,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -172,7 +172,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 10,
    "metadata": {},
    "outputs": [
     {
@@ -207,7 +207,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.12.7"
+   "version": "3.12.9"
   }
  },
  "nbformat": 4,

diff --git a/pyforestscan/calculate.py b/pyforestscan/calculate.py
@@ -100,11 +100,13 @@ def calculate_pad(voxel_returns, voxel_height, beer_lambert_constant=None):
     """
     Calculate the Plant Area Density (PAD) using the Beer-Lambert Law.
 
-    :param voxel_returns: numpy array representing the returns from each voxel (x, y, z).
+    :param voxel_returns: numpy array of shape (X, Y, Z) representing
+                          the returns in each voxel column.
     :param voxel_height: float, height of each voxel.
-    :param beer_lambert_constant: Optional Beer-Lambert constant, defaults to 1 if not provided.
+    :param beer_lambert_constant: optional float. If not provided, defaults to 1.
 
-    :return: numpy array containing PAD values for each voxel.
+    :return: A numpy array containing PAD values for each voxel (same shape as voxel_returns).
+             Columns that have zero returns across all Z are set to NaN.
     """
     shots_in = np.cumsum(voxel_returns[::-1], axis=2)[::-1]
     shots_through = shots_in - voxel_returns
@@ -117,6 +119,9 @@ def calculate_pad(voxel_returns, voxel_height, beer_lambert_constant=None):
         pad = np.log(division_result) * (1 / (beer_lambert_constant or 1) / voxel_height)
 
     pad = np.where(np.isfinite(pad) & (pad > 0), pad, 0)
+    sum_across_z = np.sum(voxel_returns, axis=2, keepdims=True)
+    empty_mask = (sum_across_z == 0)
+    pad = np.where(empty_mask, np.nan, pad)
 
     return pad
 
@@ -136,7 +141,11 @@ def calculate_pai(pad, min_height=1, max_height=None):
     if min_height >= max_height:
         raise ValueError("Minimum height index must be less than maximum height index.")
 
-    pai = np.nansum(pad[:, :, min_height:max_height], axis=2)
+    slice_3d = pad[:, :, min_height:max_height]
+
+    sum_vals = np.nansum(slice_3d, axis=2)
+    count_non_nan = np.sum(~np.isnan(slice_3d), axis=2)
+    pai = np.where(count_non_nan == 0, np.nan, sum_vals)
 
     return pai
 

diff --git a/pyforestscan/filters.py b/pyforestscan/filters.py
@@ -47,22 +47,31 @@ def filter_select_ground(arrays):
     return pipeline.arrays
 
 
-def remove_outliers_and_clean(arrays, mean_k=8, multiplier=3.0):
+def remove_outliers_and_clean(arrays, mean_k=8, multiplier=3.0, remove=False):
     """
-    Removes statistical outliers from the point cloud to enhance data quality.
-
-    :param arrays: list
-        List of point cloud arrays obtained from read_lidar.
-    :type point_cloud_arrays: list
-    :param mean_k: int, number of nearest neighbors for outlier removal.
-    :param multiplier: float, standard deviation multiplier for outlier removal.
-    :return: list
-        Cleaned array of point cloud data without outliers.
-    :rtype: list
-    :raises RuntimeError:
-        If there's an error during the pipeline execution.
-    :raises ValueError:
-        If no data is returned after outlier removal.
+    Processes input arrays by removing statistical outliers and optionally cleans
+    the data, filtering out specific classifications.
+    By default, this function only labels outliers as "7" (outlier).
+
+    # todo: this function will be renamed in a future release.
+
+    Args:
+        arrays: The input arrays to process, typically a list of structured arrays or
+            similar data types.
+        mean_k (int, optional): The number of neighbors to analyze for each point for
+            statistical outlier removal. Default is 8.
+        multiplier (float, optional): The multiplication factor for the standard deviation
+            threshold to classify an outlier. Default is 3.0.
+        remove (bool, optional): If True, additional cleaning is performed to remove points
+            specifically classified as outliers. Defaults to False.
+
+    Returns:
+        list: A list of processed arrays after outlier removal, and optionally after
+        cleaning, if specified.
+
+    Raises:
+        RuntimeError: If the outlier removal pipeline fails during execution.
+        ValueError: If no data is returned after applying outlier removal.
     """
     pipeline_stages = [
         _filter_statistical_outlier(mean_k=mean_k, multiplier=multiplier)
@@ -73,7 +82,15 @@ def remove_outliers_and_clean(arrays, mean_k=8, multiplier=3.0):
     except RuntimeError as e:
         raise RuntimeError(f"Outlier Removal Pipeline Failed: {e}")
 
-    processed_arrays = pipeline.arrays
+    if remove:
+        clean_arrays = []
+        for arr in pipeline.arrays:
+            mask = (arr["Classification"] != 7)
+            cleaned = arr[mask]
+            clean_arrays.append(cleaned)
+        processed_arrays = clean_arrays
+    else:
+        processed_arrays = pipeline.arrays
 
     if not processed_arrays:
         raise ValueError("No data returned after outlier removal.")