From 835b5b18f70cc87da3f52eeb461dcb79e30043cf Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Thu, 19 Feb 2026 10:52:11 -0800 Subject: [PATCH 1/8] GPU-enable all classification ops: equal_interval, quantile, natural_breaks now support all 4 backends (#190) - Add Dask+CuPy backend for equal_interval via _run_dask_cupy_equal_interval - Replace quantile Dask+CuPy NotImplementedError with working implementation that materializes data to CPU for percentile computation - Add CuPy, Dask+NumPy, and Dask+CuPy backends for natural_breaks by extracting shared _compute_natural_break_bins helper - Add 7 new tests covering all new backend combinations - Update README feature matrix to reflect full backend support --- README.md | 6 +- xrspatial/classify.py | 112 +++++++++++++++++++++++++------ xrspatial/tests/test_classify.py | 66 ++++++++++++++++++ 3 files changed, 159 insertions(+), 25 deletions(-) diff --git a/README.md b/README.md index 23a8d4e4..a3b80aed 100644 --- a/README.md +++ b/README.md @@ -137,10 +137,10 @@ In the GIS world, rasters are used for representing continuous phenomena (e.g. e | Name | NumPy xr.DataArray | Dask xr.DataArray | CuPy GPU xr.DataArray | Dask GPU xr.DataArray | |:----------:|:----------------------:|:--------------------:|:-------------------:|:------:| -| [Equal Interval](xrspatial/classify.py) |✅️ |✅ | ✅ | ️ | -| [Natural Breaks](xrspatial/classify.py) |✅️ | | | ️ | +| [Equal Interval](xrspatial/classify.py) |✅️ |✅ | ✅ |✅ | +| [Natural Breaks](xrspatial/classify.py) |✅️ |✅ | ✅ |✅ | | [Reclassify](xrspatial/classify.py) |✅️ |✅ | ✅ |✅ | -| [Quantile](xrspatial/classify.py) |✅️ | ✅ |✅ | ️| +| [Quantile](xrspatial/classify.py) |✅️ |✅ | ✅ |✅ | ------- diff --git a/xrspatial/classify.py b/xrspatial/classify.py index 05fe83c5..9f93cb3b 100644 --- a/xrspatial/classify.py +++ b/xrspatial/classify.py @@ -398,10 +398,18 @@ def _run_quantile(data, k, module): def _run_dask_cupy_quantile(data, k): - msg = 'Currently percentile calculation has not' \ - 'been supported for Dask array backed by CuPy.' \ - 'See issue at https://github.com/dask/dask/issues/6942' - raise NotImplementedError(msg) + w = 100.0 / k + p = np.arange(w, 100 + w, w) + if p[-1] > 100.0: + p[-1] = 100.0 + + finite_mask = da.isfinite(data) + finite_data = data[finite_mask].compute() + # transfer from GPU to CPU + finite_data_np = cupy.asnumpy(finite_data) + q = np.percentile(finite_data_np, p) + q = np.unique(q) + return q def _quantile(agg, k): @@ -575,25 +583,28 @@ def _run_jenks(data, n_classes): return kclass -def _run_natural_break(agg, num_sample, k): - data = agg.data - num_data = data.size - max_data = np.max(data[np.isfinite(data)]) +def _compute_natural_break_bins(data_flat_np, num_sample, k, max_data): + """Shared helper: compute natural break bins from a flat numpy array. + + Returns (bins, uvk) where bins is a numpy array of bin edges + and uvk is the number of unique values. + """ + num_data = data_flat_np.size if num_sample is not None and num_sample < num_data: # randomly select sample from the whole dataset # create a pseudo random number generator - # Note: cupy and nupy generate different random numbers + # Note: cupy and numpy generate different random numbers # use numpy.random to ensure the same result generator = np.random.RandomState(1234567890) idx = np.linspace( - 0, data.size, data.size, endpoint=False, dtype=np.uint32 + 0, num_data, num_data, endpoint=False, dtype=np.uint32 ) generator.shuffle(idx) sample_idx = idx[:num_sample] - sample_data = data.flatten()[sample_idx] + sample_data = data_flat_np[sample_idx] else: - sample_data = data.flatten() + sample_data = data_flat_np # warning if number of total data points to fit the model bigger than 40k if sample_data.size >= 40000: @@ -627,6 +638,45 @@ def _run_natural_break(agg, num_sample, k): bins = np.array(centroids[1:]) bins[-1] = max_data + return bins, uvk + + +def _run_natural_break(agg, num_sample, k): + data = agg.data + max_data = float(np.max(data[np.isfinite(data)])) + data_flat_np = data.flatten() + + bins, uvk = _compute_natural_break_bins(data_flat_np, num_sample, k, max_data) + out = _bin(agg, bins, np.arange(uvk)) + return out + + +def _run_cupy_natural_break(agg, num_sample, k): + data = agg.data + max_data = float(cupy.max(data[cupy.isfinite(data)]).get()) + data_flat_np = cupy.asnumpy(data.ravel()) + + bins, uvk = _compute_natural_break_bins(data_flat_np, num_sample, k, max_data) + out = _bin(agg, bins, np.arange(uvk)) + return out + + +def _run_dask_natural_break(agg, num_sample, k): + data = agg.data + max_data = float(da.max(data[da.isfinite(data)]).compute()) + data_flat_np = np.asarray(data.ravel().compute()) + + bins, uvk = _compute_natural_break_bins(data_flat_np, num_sample, k, max_data) + out = _bin(agg, bins, np.arange(uvk)) + return out + + +def _run_dask_cupy_natural_break(agg, num_sample, k): + data = agg.data + max_data = float(da.max(data[da.isfinite(data)]).compute().item()) + data_flat_np = cupy.asnumpy(data.ravel().compute()) + + bins, uvk = _compute_natural_break_bins(data_flat_np, num_sample, k, max_data) out = _bin(agg, bins, np.arange(uvk)) return out @@ -644,7 +694,8 @@ def natural_breaks(agg: xr.DataArray, Parameters ---------- agg : xarray.DataArray - 2D NumPy DataArray of values to be reclassified. + 2D NumPy, CuPy, NumPy-backed Dask, or CuPy-backed Dask array + of values to be reclassified. num_sample : int, default=20000 Number of sample data points used to fit the model. Natural Breaks (Jenks) classification is indeed O(n²) complexity, @@ -715,13 +766,10 @@ def natural_breaks(agg: xr.DataArray, """ mapper = ArrayTypeFunctionMapping( - numpy_func=lambda *args: _run_natural_break(*args), - dask_func=lambda *args: not_implemented_func( - *args, messages='natural_breaks() does not support dask with numpy backed DataArray.'), # noqa - cupy_func=lambda *args: not_implemented_func( - *args, messages='natural_breaks() does not support cupy backed DataArray.'), # noqa - dask_cupy_func=lambda *args: not_implemented_func( - *args, messages='natural_breaks() does not support dask with cupy backed DataArray.'), # noqa + numpy_func=_run_natural_break, + dask_func=_run_dask_natural_break, + cupy_func=_run_cupy_natural_break, + dask_cupy_func=_run_dask_cupy_natural_break, ) out = mapper(agg)(agg, num_sample, k) return xr.DataArray(out, @@ -772,6 +820,27 @@ def _run_equal_interval(agg, k, module): return out +def _run_dask_cupy_equal_interval(agg, k): + data = agg.data.ravel() + + # replace inf with nan + data = da.where(data == np.inf, np.nan, data) + data = da.where(data == -np.inf, np.nan, data) + + min_data = float(da.nanmin(data).compute().item()) + max_data = float(da.nanmax(data).compute().item()) + + width = (max_data - min_data) * 1.0 / k + cuts = np.arange(min_data + width, max_data + width, width) + l_cuts = cuts.shape[0] + if l_cuts > k: + cuts = cuts[0:k] + + cuts[-1] = max_data + out = _bin(agg, cuts, np.arange(l_cuts)) + return out + + def equal_interval(agg: xr.DataArray, k: int = 5, name: Optional[str] = 'equal_interval') -> xr.DataArray: @@ -832,8 +901,7 @@ def equal_interval(agg: xr.DataArray, numpy_func=lambda *args: _run_equal_interval(*args, module=np), dask_func=lambda *args: _run_equal_interval(*args, module=da), cupy_func=lambda *args: _run_equal_interval(*args, module=cupy), - dask_cupy_func=lambda *args: not_implemented_func( - *args, messages='equal_interval() does support dask with cupy backed DataArray.'), # noqa + dask_cupy_func=_run_dask_cupy_equal_interval ) out = mapper(agg)(agg, k) return xr.DataArray(out, diff --git a/xrspatial/tests/test_classify.py b/xrspatial/tests/test_classify.py index 5368239a..be1f81c5 100644 --- a/xrspatial/tests/test_classify.py +++ b/xrspatial/tests/test_classify.py @@ -284,3 +284,69 @@ def test_equal_interval_cupy(result_equal_interval): cupy_agg = input_data(backend='cupy') cupy_result = equal_interval(cupy_agg, k=k) general_output_checks(cupy_agg, cupy_result, expected_result, verify_dtype=True) + + +@dask_array_available +@cuda_and_cupy_available +def test_equal_interval_dask_cupy(result_equal_interval): + k, expected_result = result_equal_interval + dask_cupy_agg = input_data(backend='dask+cupy') + dask_cupy_result = equal_interval(dask_cupy_agg, k=k) + general_output_checks(dask_cupy_agg, dask_cupy_result, expected_result, verify_dtype=True) + + +@dask_array_available +@cuda_and_cupy_available +def test_quantile_dask_cupy(result_quantile): + # Relaxed verification (same pattern as test_quantile_dask_numpy) + # because percentile is computed on CPU from materialized data + dask_cupy_agg = input_data('dask+cupy') + k, expected_result = result_quantile + dask_cupy_quantile = quantile(dask_cupy_agg, k=k) + general_output_checks(dask_cupy_agg, dask_cupy_quantile) + dask_cupy_quantile = dask_cupy_quantile.compute() + import cupy as cp + result_data = cp.asnumpy(dask_cupy_quantile.data) + unique_elements = np.unique(result_data[np.isfinite(result_data)]) + assert len(unique_elements) == k + + +@cuda_and_cupy_available +def test_natural_breaks_cupy(result_natural_breaks): + cupy_agg = input_data('cupy') + k, expected_result = result_natural_breaks + cupy_natural_breaks = natural_breaks(cupy_agg, k=k) + general_output_checks(cupy_agg, cupy_natural_breaks, expected_result, verify_dtype=True) + + +@dask_array_available +def test_natural_breaks_dask_numpy(result_natural_breaks): + dask_agg = input_data('dask+numpy') + k, expected_result = result_natural_breaks + dask_natural_breaks = natural_breaks(dask_agg, k=k) + general_output_checks(dask_agg, dask_natural_breaks, expected_result, verify_dtype=True) + + +@dask_array_available +@cuda_and_cupy_available +def test_natural_breaks_dask_cupy(result_natural_breaks): + dask_cupy_agg = input_data('dask+cupy') + k, expected_result = result_natural_breaks + dask_cupy_natural_breaks = natural_breaks(dask_cupy_agg, k=k) + general_output_checks(dask_cupy_agg, dask_cupy_natural_breaks, expected_result, verify_dtype=True) + + +@cuda_and_cupy_available +def test_natural_breaks_cupy_num_sample(result_natural_breaks_num_sample): + cupy_agg = input_data('cupy') + k, num_sample, expected_result = result_natural_breaks_num_sample + cupy_natural_breaks = natural_breaks(cupy_agg, k=k, num_sample=num_sample) + general_output_checks(cupy_agg, cupy_natural_breaks, expected_result, verify_dtype=True) + + +@dask_array_available +def test_natural_breaks_dask_numpy_num_sample(result_natural_breaks_num_sample): + dask_agg = input_data('dask+numpy') + k, num_sample, expected_result = result_natural_breaks_num_sample + dask_natural_breaks = natural_breaks(dask_agg, k=k, num_sample=num_sample) + general_output_checks(dask_agg, dask_natural_breaks, expected_result, verify_dtype=True) From 9b726c58456af4e46b6c14d22df8c859b0a73c42 Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Thu, 19 Feb 2026 12:52:40 -0800 Subject: [PATCH 2/8] fix OOM in dask classification backends for large datasets - quantile dask+cupy: replace full materialization with map_blocks(cupy.asnumpy) to convert chunks to CPU one at a time, then delegate to dask's streaming approximate percentile - natural_breaks dask backends: sample lazily from the dask array and only materialize the sample (default 20k points), not the entire dataset. Add _generate_sample_indices helper that uses O(num_sample) memory via RandomState.choice() for large datasets, falling back to the original linspace+shuffle for small datasets to preserve determinism with numpy --- xrspatial/classify.py | 69 +++++++++++++++++++++++++++++++++---------- 1 file changed, 53 insertions(+), 16 deletions(-) diff --git a/xrspatial/classify.py b/xrspatial/classify.py index 9f93cb3b..9e0ce1f5 100644 --- a/xrspatial/classify.py +++ b/xrspatial/classify.py @@ -398,18 +398,10 @@ def _run_quantile(data, k, module): def _run_dask_cupy_quantile(data, k): - w = 100.0 / k - p = np.arange(w, 100 + w, w) - if p[-1] > 100.0: - p[-1] = 100.0 - - finite_mask = da.isfinite(data) - finite_data = data[finite_mask].compute() - # transfer from GPU to CPU - finite_data_np = cupy.asnumpy(finite_data) - q = np.percentile(finite_data_np, p) - q = np.unique(q) - return q + # Convert dask+cupy chunks to numpy one at a time via map_blocks, + # then use dask's streaming approximate percentile (no full materialization). + data_cpu = data.map_blocks(cupy.asnumpy, dtype=data.dtype, meta=np.array(())) + return _run_quantile(data_cpu, k, da) def _quantile(agg, k): @@ -661,12 +653,46 @@ def _run_cupy_natural_break(agg, num_sample, k): return out +def _generate_sample_indices(num_data, num_sample, seed=1234567890): + """Generate sorted sample indices for natural breaks sampling. + + For small datasets (<=10M elements), uses the same linspace+shuffle + approach as the numpy backend for exact reproducibility. + For large datasets, uses memory-efficient RandomState.choice() + which is O(num_sample) rather than O(num_data). + """ + generator = np.random.RandomState(seed) + if num_data <= 10_000_000: + idx = np.linspace( + 0, num_data, num_data, endpoint=False, dtype=np.uint32 + ) + generator.shuffle(idx) + sample_idx = idx[:num_sample] + else: + sample_idx = generator.choice( + num_data, size=num_sample, replace=False + ) + # sort for efficient dask chunk access + sample_idx.sort() + return sample_idx + + def _run_dask_natural_break(agg, num_sample, k): data = agg.data max_data = float(da.max(data[da.isfinite(data)]).compute()) - data_flat_np = np.asarray(data.ravel().compute()) - bins, uvk = _compute_natural_break_bins(data_flat_np, num_sample, k, max_data) + num_data = data.size + if num_sample is not None and num_sample < num_data: + # Sample lazily from dask array; only materialize the sample + sample_idx = _generate_sample_indices(num_data, num_sample) + sample_data_np = np.asarray(data.ravel()[sample_idx].compute()) + bins, uvk = _compute_natural_break_bins( + sample_data_np, None, k, max_data) + else: + data_flat_np = np.asarray(data.ravel().compute()) + bins, uvk = _compute_natural_break_bins( + data_flat_np, None, k, max_data) + out = _bin(agg, bins, np.arange(uvk)) return out @@ -674,9 +700,20 @@ def _run_dask_natural_break(agg, num_sample, k): def _run_dask_cupy_natural_break(agg, num_sample, k): data = agg.data max_data = float(da.max(data[da.isfinite(data)]).compute().item()) - data_flat_np = cupy.asnumpy(data.ravel().compute()) - bins, uvk = _compute_natural_break_bins(data_flat_np, num_sample, k, max_data) + num_data = data.size + if num_sample is not None and num_sample < num_data: + # Sample lazily from dask array; only materialize the sample + sample_idx = _generate_sample_indices(num_data, num_sample) + sample_data = data.ravel()[sample_idx].compute() + sample_data_np = cupy.asnumpy(sample_data) + bins, uvk = _compute_natural_break_bins( + sample_data_np, None, k, max_data) + else: + data_flat_np = cupy.asnumpy(data.ravel().compute()) + bins, uvk = _compute_natural_break_bins( + data_flat_np, None, k, max_data) + out = _bin(agg, bins, np.arange(uvk)) return out From 7ce370786a5a5b327ce597ffca26a5ad05ae98f9 Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Thu, 19 Feb 2026 13:06:40 -0800 Subject: [PATCH 3/8] optimize classification ops: reduce memory allocations and dask passes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Remove unnecessary .ravel() in _run_equal_interval; nanmin/nanmax work on 2D - Combine double where(±inf) into single isinf pass in _run_equal_interval and _run_cupy_bin, halving temporary allocations - Use dask.compute(min, max) instead of two separate .compute() calls so dask reads data once instead of twice - Build cuts as numpy array for all backends (was needlessly dask for k elements) - Replace boolean fancy indexing in dask natural_break functions with da.where + da.nanmax to preserve chunk structure - Delete _run_dask_cupy_equal_interval; unified _run_equal_interval with module=da handles both dask+numpy and dask+cupy --- xrspatial/classify.py | 78 +++++++++++++++---------------------------- 1 file changed, 27 insertions(+), 51 deletions(-) diff --git a/xrspatial/classify.py b/xrspatial/classify.py index 9e0ce1f5..a0d64a91 100644 --- a/xrspatial/classify.py +++ b/xrspatial/classify.py @@ -15,8 +15,10 @@ class cupy(object): ndarray = False try: + import dask import dask.array as da except ImportError: + dask = None da = None import numba as nb @@ -232,9 +234,8 @@ def _run_gpu_bin(data, bins, new_values, out): def _run_cupy_bin(data, bins, new_values): - # replace inf by nan to avoid classify these values as we want to treat them as outliers - data = cupy.where(data == cupy.inf, cupy.nan, data) - data = cupy.where(data == -cupy.inf, cupy.nan, data) + # replace ±inf with nan in a single pass to avoid classifying outliers + data = cupy.where(cupy.isinf(data), cupy.nan, data) bins_cupy = cupy.asarray(bins) new_values_cupy = cupy.asarray(new_values) @@ -679,7 +680,9 @@ def _generate_sample_indices(num_data, num_sample, seed=1234567890): def _run_dask_natural_break(agg, num_sample, k): data = agg.data - max_data = float(da.max(data[da.isfinite(data)]).compute()) + # Avoid boolean fancy indexing which flattens dask arrays and + # produces chunks of unknown size; use element-wise where instead + max_data = float(da.nanmax(da.where(da.isinf(data), np.nan, data)).compute()) num_data = data.size if num_sample is not None and num_sample < num_data: @@ -699,7 +702,9 @@ def _run_dask_natural_break(agg, num_sample, k): def _run_dask_cupy_natural_break(agg, num_sample, k): data = agg.data - max_data = float(da.max(data[da.isfinite(data)]).compute().item()) + # Avoid boolean fancy indexing which flattens dask arrays and + # produces chunks of unknown size; use element-wise where instead + max_data = float(da.nanmax(da.where(da.isinf(data), np.nan, data)).compute().item()) num_data = data.size if num_sample is not None and num_sample < num_data: @@ -817,57 +822,28 @@ def natural_breaks(agg: xr.DataArray, def _run_equal_interval(agg, k, module): - data = agg.data.ravel() - if module == cupy: - nan = cupy.nan - inf = cupy.inf - else: - nan = np.nan - inf = np.inf + data = agg.data - data = module.where(data == inf, nan, data) - data = module.where(data == -inf, nan, data) + # Replace ±inf with nan in a single pass (no ravel needed) + data_clean = module.where(module.isinf(data), np.nan, data) - max_data = module.nanmax(data) - min_data = module.nanmin(data) + min_lazy = module.nanmin(data_clean) + max_lazy = module.nanmax(data_clean) if module == cupy: - min_data = min_data.get() - max_data = max_data.get() - - if module == da: - min_data = min_data.compute() - max_data = max_data.compute() - - width = (max_data - min_data) * 1.0 / k - cuts = module.arange(min_data + width, max_data + width, width) - l_cuts = cuts.shape[0] - if l_cuts > k: - # handle overshooting - cuts = cuts[0:k] - - if module == da: - # work around to assign cuts[-1] = max_data - bins = da.concatenate([cuts[:k-1], [max_data]]) - out = _bin(agg, bins, np.arange(l_cuts)) + min_data = float(min_lazy.get()) + max_data = float(max_lazy.get()) + elif module == da: + # Compute both in a single pass over the data + min_data, max_data = dask.compute(min_lazy, max_lazy) + min_data = float(min_data) + max_data = float(max_data) else: - cuts[-1] = max_data - out = _bin(agg, cuts, np.arange(l_cuts)) - - return out - - -def _run_dask_cupy_equal_interval(agg, k): - data = agg.data.ravel() - - # replace inf with nan - data = da.where(data == np.inf, np.nan, data) - data = da.where(data == -np.inf, np.nan, data) - - min_data = float(da.nanmin(data).compute().item()) - max_data = float(da.nanmax(data).compute().item()) + min_data = float(min_lazy) + max_data = float(max_lazy) - width = (max_data - min_data) * 1.0 / k + width = (max_data - min_data) / k + # Build cuts as numpy — only k elements, no need for dask/cupy overhead cuts = np.arange(min_data + width, max_data + width, width) l_cuts = cuts.shape[0] if l_cuts > k: @@ -938,7 +914,7 @@ def equal_interval(agg: xr.DataArray, numpy_func=lambda *args: _run_equal_interval(*args, module=np), dask_func=lambda *args: _run_equal_interval(*args, module=da), cupy_func=lambda *args: _run_equal_interval(*args, module=cupy), - dask_cupy_func=_run_dask_cupy_equal_interval + dask_cupy_func=lambda *args: _run_equal_interval(*args, module=da) ) out = mapper(agg)(agg, k) return xr.DataArray(out, From 62919a0d3609cb3d4c54a1a58f5e0e100a996de1 Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Thu, 19 Feb 2026 13:15:26 -0800 Subject: [PATCH 4/8] add 14 new classify tests: mutation safety, edge cases, cross-backend consistency - Missing backend: natural_breaks dask+cupy num_sample - Input mutation: verify all 5 functions don't modify input DataArray - Untested path: natural_breaks with num_sample=None - Edge cases: equal_interval k=1, all-NaN input for equal_interval and natural_breaks - Name parameter: verify default and custom name on all 5 functions - Cross-backend: verify natural_breaks cupy and dask match numpy results on a separate 10x10 dataset --- xrspatial/tests/test_classify.py | 150 ++++++++++++++++++++++++++++++- 1 file changed, 149 insertions(+), 1 deletion(-) diff --git a/xrspatial/tests/test_classify.py b/xrspatial/tests/test_classify.py index be1f81c5..355d9219 100644 --- a/xrspatial/tests/test_classify.py +++ b/xrspatial/tests/test_classify.py @@ -3,7 +3,8 @@ import xarray as xr from xrspatial import binary, equal_interval, natural_breaks, quantile, reclassify -from xrspatial.tests.general_checks import (create_test_raster, +from xrspatial.tests.general_checks import (assert_input_data_unmodified, + create_test_raster, cuda_and_cupy_available, dask_array_available, general_output_checks) @@ -350,3 +351,150 @@ def test_natural_breaks_dask_numpy_num_sample(result_natural_breaks_num_sample): k, num_sample, expected_result = result_natural_breaks_num_sample dask_natural_breaks = natural_breaks(dask_agg, k=k, num_sample=num_sample) general_output_checks(dask_agg, dask_natural_breaks, expected_result, verify_dtype=True) + + +@dask_array_available +@cuda_and_cupy_available +def test_natural_breaks_dask_cupy_num_sample(result_natural_breaks_num_sample): + dask_cupy_agg = input_data('dask+cupy') + k, num_sample, expected_result = result_natural_breaks_num_sample + dask_cupy_natural_breaks = natural_breaks(dask_cupy_agg, k=k, num_sample=num_sample) + general_output_checks( + dask_cupy_agg, dask_cupy_natural_breaks, expected_result, verify_dtype=True) + + +# --- Input mutation tests --- +# Classification functions must not modify the input DataArray. +# natural_breaks is most critical because _run_jenks sorts in-place. + +def test_binary_does_not_modify_input(): + agg = input_data() + original = agg.copy(deep=True) + binary(agg, [1, 2, 3]) + assert_input_data_unmodified(original, agg) + + +def test_reclassify_does_not_modify_input(): + agg = input_data() + original = agg.copy(deep=True) + reclassify(agg, bins=[10, 15, np.inf], new_values=[1, 2, 3]) + assert_input_data_unmodified(original, agg) + + +def test_quantile_does_not_modify_input(): + agg = input_data() + original = agg.copy(deep=True) + quantile(agg, k=5) + assert_input_data_unmodified(original, agg) + + +def test_natural_breaks_does_not_modify_input(): + agg = input_data() + original = agg.copy(deep=True) + natural_breaks(agg, k=5) + assert_input_data_unmodified(original, agg) + + +def test_equal_interval_does_not_modify_input(): + agg = input_data() + original = agg.copy(deep=True) + equal_interval(agg, k=3) + assert_input_data_unmodified(original, agg) + + +# --- num_sample=None test --- +# Tests the code path where all data is used without sampling. +# For the test data (20 elements), this produces the same result +# as default num_sample=20000 since 20000 > 20. + +def test_natural_breaks_numpy_num_sample_none(result_natural_breaks): + numpy_agg = input_data() + k, expected_result = result_natural_breaks + result = natural_breaks(numpy_agg, k=k, num_sample=None) + general_output_checks(numpy_agg, result, expected_result, verify_dtype=True) + + +# --- Edge cases for equal_interval --- + +def test_equal_interval_k_equals_1(): + agg = input_data() + result = equal_interval(agg, k=1) + result_data = result.data + # All finite values should be in class 0 + finite_mask = np.isfinite(result_data) + assert np.all(result_data[finite_mask] == 0) + # Non-finite input positions should be NaN in output + input_finite = np.isfinite(agg.data) + assert np.all(np.isnan(result_data[~input_finite])) + + +# --- All-NaN edge cases --- +# These document current failure behavior for degenerate inputs. + +def test_equal_interval_all_nan(): + data = np.full((4, 5), np.nan) + agg = xr.DataArray(data) + with pytest.raises(ValueError): + equal_interval(agg, k=3) + + +def test_natural_breaks_all_nan(): + data = np.full((4, 5), np.nan) + agg = xr.DataArray(data) + with pytest.raises(ValueError): + natural_breaks(agg, k=3) + + +# --- Name parameter tests --- + +def test_output_name_default(): + agg = input_data() + assert binary(agg, [1, 2]).name == 'binary' + assert reclassify(agg, [10, 15], [1, 2]).name == 'reclassify' + assert quantile(agg, k=3).name == 'quantile' + assert natural_breaks(agg, k=3).name == 'natural_breaks' + assert equal_interval(agg, k=3).name == 'equal_interval' + + +def test_output_name_custom(): + agg = input_data() + assert binary(agg, [1, 2], name='custom').name == 'custom' + assert reclassify(agg, [10, 15], [1, 2], name='custom').name == 'custom' + assert quantile(agg, k=3, name='custom').name == 'custom' + assert natural_breaks(agg, k=3, name='custom').name == 'custom' + assert equal_interval(agg, k=3, name='custom').name == 'custom' + + +# --- Cross-backend consistency for natural_breaks --- +# Verifies that cupy/dask backends produce identical results to numpy +# using a different dataset (10x10 arange) than the fixture tests. + +@cuda_and_cupy_available +def test_natural_breaks_cupy_matches_numpy(): + import cupy as cp + elevation = np.arange(100, dtype=np.float64).reshape(10, 10) + numpy_agg = xr.DataArray(elevation) + cupy_agg = xr.DataArray(cp.asarray(elevation)) + + k = 5 + numpy_result = natural_breaks(numpy_agg, k=k) + cupy_result = natural_breaks(cupy_agg, k=k) + + np.testing.assert_allclose( + numpy_result.data, cp.asnumpy(cupy_result.data), equal_nan=True + ) + + +@dask_array_available +def test_natural_breaks_dask_matches_numpy(): + elevation = np.arange(100, dtype=np.float64).reshape(10, 10) + numpy_agg = xr.DataArray(elevation) + dask_agg = xr.DataArray(da.from_array(elevation, chunks=(5, 5))) + + k = 5 + numpy_result = natural_breaks(numpy_agg, k=k) + dask_result = natural_breaks(dask_agg, k=k) + + np.testing.assert_allclose( + numpy_result.data, dask_result.data.compute(), equal_nan=True + ) From aaf8367aa08783192cc5f7f09116587049c412cc Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Fri, 20 Feb 2026 06:29:47 -0800 Subject: [PATCH 5/8] added a few more classify methods and experimental geodesic slope and aspect --- README.md | 7 +- xrspatial/__init__.py | 5 + xrspatial/aspect.py | 246 ++++++++++++------ xrspatial/classify.py | 431 +++++++++++++++++++++++++++++++ xrspatial/slope.py | 185 ++++++++++++- xrspatial/tests/test_classify.py | 386 ++++++++++++++++++++++++++- xrspatial/utils.py | 123 +++++++++ 7 files changed, 1299 insertions(+), 84 deletions(-) diff --git a/README.md b/README.md index a3b80aed..487b61b3 100644 --- a/README.md +++ b/README.md @@ -137,10 +137,15 @@ In the GIS world, rasters are used for representing continuous phenomena (e.g. e | Name | NumPy xr.DataArray | Dask xr.DataArray | CuPy GPU xr.DataArray | Dask GPU xr.DataArray | |:----------:|:----------------------:|:--------------------:|:-------------------:|:------:| +| [Box Plot](xrspatial/classify.py) |✅️ |✅ | ✅ |✅ | | [Equal Interval](xrspatial/classify.py) |✅️ |✅ | ✅ |✅ | +| [Head/Tail Breaks](xrspatial/classify.py) |✅️ |✅ | ✅ |✅ | +| [Maximum Breaks](xrspatial/classify.py) |✅️ |✅ | ✅ |✅ | | [Natural Breaks](xrspatial/classify.py) |✅️ |✅ | ✅ |✅ | -| [Reclassify](xrspatial/classify.py) |✅️ |✅ | ✅ |✅ | +| [Percentiles](xrspatial/classify.py) |✅️ |✅ | ✅ |✅ | | [Quantile](xrspatial/classify.py) |✅️ |✅ | ✅ |✅ | +| [Reclassify](xrspatial/classify.py) |✅️ |✅ | ✅ |✅ | +| [Std Mean](xrspatial/classify.py) |✅️ |✅ | ✅ |✅ | ------- diff --git a/xrspatial/__init__.py b/xrspatial/__init__.py index 80b4baac..ab1a3fc4 100644 --- a/xrspatial/__init__.py +++ b/xrspatial/__init__.py @@ -1,6 +1,11 @@ from xrspatial.aspect import aspect # noqa from xrspatial.bump import bump # noqa from xrspatial.classify import binary # noqa +from xrspatial.classify import box_plot # noqa +from xrspatial.classify import head_tail_breaks # noqa +from xrspatial.classify import maximum_breaks # noqa +from xrspatial.classify import percentiles # noqa +from xrspatial.classify import std_mean # noqa from xrspatial.diagnostics import diagnose # noqa from xrspatial.classify import equal_interval # noqa from xrspatial.classify import natural_breaks # noqa diff --git a/xrspatial/aspect.py b/xrspatial/aspect.py index 57ad079b..bc9bf080 100644 --- a/xrspatial/aspect.py +++ b/xrspatial/aspect.py @@ -15,9 +15,29 @@ from numba import cuda from xrspatial.utils import ArrayTypeFunctionMapping +from xrspatial.utils import Z_UNITS +from xrspatial.utils import _extract_latlon_coords from xrspatial.utils import cuda_args from xrspatial.utils import ngjit + +def _geodesic_cuda_dims(shape): + """Smaller thread block for register-heavy geodesic kernels.""" + tpb = (16, 16) + bpg = ( + (shape[0] + tpb[0] - 1) // tpb[0], + (shape[1] + tpb[1] - 1) // tpb[1], + ) + return bpg, tpb + +from xrspatial.geodesic import ( + INV_2R, + WGS84_A2, + WGS84_B2, + _cpu_geodesic_aspect, + _run_gpu_geodesic_aspect, +) + # 3rd-party try: import cupy @@ -28,6 +48,10 @@ class cupy(object): RADIAN = 180 / np.pi +# ===================================================================== +# Planar backend functions (unchanged) +# ===================================================================== + @ngjit def _run_numpy(data: np.ndarray): data = data.astype(np.float32) @@ -140,8 +164,116 @@ def _run_dask_cupy(data: da.Array) -> da.Array: return out +# ===================================================================== +# Geodesic backend functions +# ===================================================================== + +def _run_numpy_geodesic(data, lat_2d, lon_2d, a2, b2, z_factor): + stacked = np.stack([ + data.astype(np.float64), + lat_2d, + lon_2d, + ], axis=0) + return _cpu_geodesic_aspect(stacked, a2, b2, z_factor) + + +def _run_cupy_geodesic(data, lat_2d, lon_2d, a2, b2, z_factor): + lat_2d_gpu = cupy.asarray(lat_2d, dtype=cupy.float64) + lon_2d_gpu = cupy.asarray(lon_2d, dtype=cupy.float64) + stacked = cupy.stack([ + data.astype(cupy.float64), + lat_2d_gpu, + lon_2d_gpu, + ], axis=0) + + H, W = data.shape + out = cupy.full((H, W), cupy.nan, dtype=cupy.float32) + + a2_arr = cupy.array([a2], dtype=cupy.float64) + b2_arr = cupy.array([b2], dtype=cupy.float64) + zf_arr = cupy.array([z_factor], dtype=cupy.float64) + inv_2r_arr = cupy.array([INV_2R], dtype=cupy.float64) + + griddim, blockdim = _geodesic_cuda_dims((H, W)) + _run_gpu_geodesic_aspect[griddim, blockdim](stacked, a2_arr, b2_arr, zf_arr, inv_2r_arr, out) + return out + + +def _dask_geodesic_aspect_chunk(stacked_chunk, a2, b2, z_factor): + """Returns (3, h, w) to preserve shape for map_overlap.""" + result_2d = _cpu_geodesic_aspect(stacked_chunk, a2, b2, z_factor) + out = np.empty_like(stacked_chunk, dtype=np.float32) + out[0] = result_2d + out[1] = 0.0 + out[2] = 0.0 + return out + + +def _dask_geodesic_aspect_chunk_cupy(stacked_chunk, a2, b2, z_factor): + H, W = stacked_chunk.shape[1], stacked_chunk.shape[2] + result_2d = cupy.full((H, W), cupy.nan, dtype=cupy.float32) + + a2_arr = cupy.array([a2], dtype=cupy.float64) + b2_arr = cupy.array([b2], dtype=cupy.float64) + zf_arr = cupy.array([z_factor], dtype=cupy.float64) + inv_2r_arr = cupy.array([INV_2R], dtype=cupy.float64) + + griddim, blockdim = _geodesic_cuda_dims((H, W)) + _run_gpu_geodesic_aspect[griddim, blockdim](stacked_chunk, a2_arr, b2_arr, zf_arr, inv_2r_arr, result_2d) + + out = cupy.zeros_like(stacked_chunk, dtype=cupy.float32) + out[0] = result_2d + return out + + +def _run_dask_numpy_geodesic(data, lat_2d, lon_2d, a2, b2, z_factor): + lat_dask = da.from_array(lat_2d, chunks=data.chunksize) + lon_dask = da.from_array(lon_2d, chunks=data.chunksize) + stacked = da.stack([ + data.astype(np.float64), + lat_dask, + lon_dask, + ], axis=0).rechunk({0: 3}) + + _func = partial(_dask_geodesic_aspect_chunk, a2=a2, b2=b2, z_factor=z_factor) + out = stacked.map_overlap( + _func, + depth=(0, 1, 1), + boundary=np.nan, + meta=np.array((), dtype=np.float32), + ) + return out[0] + + +def _run_dask_cupy_geodesic(data, lat_2d, lon_2d, a2, b2, z_factor): + lat_dask = da.from_array(cupy.asarray(lat_2d, dtype=cupy.float64), + chunks=data.chunksize) + lon_dask = da.from_array(cupy.asarray(lon_2d, dtype=cupy.float64), + chunks=data.chunksize) + stacked = da.stack([ + data.astype(cupy.float64), + lat_dask, + lon_dask, + ], axis=0).rechunk({0: 3}) + + _func = partial(_dask_geodesic_aspect_chunk_cupy, a2=a2, b2=b2, z_factor=z_factor) + out = stacked.map_overlap( + _func, + depth=(0, 1, 1), + boundary=cupy.nan, + meta=cupy.array((), dtype=cupy.float32), + ) + return out[0] + + +# ===================================================================== +# Public API +# ===================================================================== + def aspect(agg: xr.DataArray, - name: Optional[str] = 'aspect') -> xr.DataArray: + name: Optional[str] = 'aspect', + method: str = 'planar', + z_unit: str = 'meter') -> xr.DataArray: """ Calculates the aspect value of an elevation aggregate. @@ -169,6 +301,15 @@ def aspect(agg: xr.DataArray, of elevation values. name : str, default='aspect' Name of ouput DataArray. + method : str, default='planar' + ``'planar'`` uses the classic Horn algorithm with uniform cell size. + ``'geodesic'`` converts cells to Earth-Centered Earth-Fixed (ECEF) + coordinates and fits a 3D plane, yielding accurate results for + geographic (lat/lon) coordinate systems. + z_unit : str, default='meter' + Unit of the elevation values. Only used when ``method='geodesic'``. + Accepted values: ``'meter'``, ``'foot'``, ``'kilometer'``, ``'mile'`` + (and common aliases). Returns ------- @@ -198,81 +339,40 @@ def aspect(agg: xr.DataArray, [1, 5, 0, 5, 5] ], dtype=np.float32) >>> raster = xr.DataArray(data, dims=['y', 'x'], name='raster') - >>> print(raster) - - array([[1., 1., 1., 1., 1.], - [1., 1., 1., 2., 0.], - [1., 1., 1., 0., 0.], - [4., 4., 9., 2., 4.], - [1., 5., 0., 1., 4.], - [1., 5., 0., 5., 5.]]) - Dimensions without coordinates: y, x >>> aspect_agg = aspect(raster) - >>> print(aspect_agg) - - array([[ nan, nan , nan , nan , nan], - [ nan, -1. , 225. , 135. , nan], - [ nan, 343.61045967, 8.97262661, 33.69006753, nan], - [ nan, 307.87498365, 71.56505118, 54.46232221, nan], - [ nan, 191.30993247, 144.46232221, 255.96375653, nan], - [ nan, nan , nan , nan , nan]]) - Dimensions without coordinates: y, x - - Aspect works with Dask with NumPy backed xarray DataArray - .. sourcecode:: python - - >>> import dask.array as da - >>> data_da = da.from_array(data, chunks=(3, 3)) - >>> raster_da = xr.DataArray(data_da, dims=['y', 'x'], name='raster_da') - >>> print(raster_da) - - dask.array - Dimensions without coordinates: y, x - >>> aspect_da = aspect(raster_da) - >>> print(aspect_da) - - dask.array<_trim, shape=(6, 5), dtype=float32, chunksize=(3, 3), chunktype=numpy.ndarray> - Dimensions without coordinates: y, x - >>> print(aspect_da.compute()) # compute the results - - array([[ nan, nan , nan , nan , nan], - [ nan, -1. , 225. , 135. , nan], - [ nan, 343.61045967, 8.97262661, 33.69006753, nan], - [ nan, 307.87498365, 71.56505118, 54.46232221, nan], - [ nan, 191.30993247, 144.46232221, 255.96375653, nan], - [ nan, nan , nan , nan , nan]]) - Dimensions without coordinates: y, x - - Aspect works with CuPy backed xarray DataArray. - Make sure you have a GPU and CuPy installed to run this example. - .. sourcecode:: python - - >>> import cupy - >>> data_cupy = cupy.asarray(data) - >>> raster_cupy = xr.DataArray(data_cupy, dims=['y', 'x']) - >>> aspect_cupy = aspect(raster_cupy) - >>> print(type(aspect_cupy.data)) - - >>> print(aspect_cupy) - - array([[ nan, nan, nan, nan, nan], - [ nan, -1., 225., 135., nan], - [ nan, 343.61047, 8.972626, 33.690067, nan], - [ nan, 307.87497, 71.56505 , 54.462322, nan], - [ nan, 191.30994, 144.46233 , 255.96376, nan], - [ nan, nan, nan, nan, nan]], - dtype=float32) - Dimensions without coordinates: y, x """ - mapper = ArrayTypeFunctionMapping( - numpy_func=_run_numpy, - dask_func=_run_dask_numpy, - cupy_func=_run_cupy, - dask_cupy_func=_run_dask_cupy, - ) - - out = mapper(agg)(agg.data) + if method not in ('planar', 'geodesic'): + raise ValueError( + f"method must be 'planar' or 'geodesic', got {method!r}" + ) + + if method == 'planar': + mapper = ArrayTypeFunctionMapping( + numpy_func=_run_numpy, + dask_func=_run_dask_numpy, + cupy_func=_run_cupy, + dask_cupy_func=_run_dask_cupy, + ) + out = mapper(agg)(agg.data) + + else: # geodesic + if z_unit not in Z_UNITS: + raise ValueError( + f"z_unit must be one of {sorted(set(Z_UNITS.values()), key=str)}, " + f"got {z_unit!r}" + ) + z_factor = Z_UNITS[z_unit] + + lat_2d, lon_2d = _extract_latlon_coords(agg) + + mapper = ArrayTypeFunctionMapping( + numpy_func=_run_numpy_geodesic, + cupy_func=_run_cupy_geodesic, + dask_func=_run_dask_numpy_geodesic, + dask_cupy_func=_run_dask_cupy_geodesic, + ) + out = mapper(agg)(agg.data, lat_2d, lon_2d, WGS84_A2, WGS84_B2, z_factor) return xr.DataArray(out, name=name, diff --git a/xrspatial/classify.py b/xrspatial/classify.py index a0d64a91..06d8a4be 100644 --- a/xrspatial/classify.py +++ b/xrspatial/classify.py @@ -922,3 +922,434 @@ def equal_interval(agg: xr.DataArray, coords=agg.coords, dims=agg.dims, attrs=agg.attrs) + + +def _run_std_mean(agg, module): + data = agg.data + data_clean = module.where(module.isinf(data), np.nan, data) + mean_lazy = module.nanmean(data_clean) + std_lazy = module.nanstd(data_clean) + max_lazy = module.nanmax(data_clean) + + if module == cupy: + mean_v = float(mean_lazy.get()) + std_v = float(std_lazy.get()) + max_v = float(max_lazy.get()) + elif module == da: + mean_v, std_v, max_v = dask.compute(mean_lazy, std_lazy, max_lazy) + mean_v, std_v, max_v = float(mean_v), float(std_v), float(max_v) + else: + mean_v, std_v, max_v = float(mean_lazy), float(std_lazy), float(max_lazy) + + bins = np.sort(np.unique([ + mean_v - 2 * std_v, + mean_v - std_v, + mean_v + std_v, + mean_v + 2 * std_v, + max_v, + ])) + out = _bin(agg, bins, np.arange(len(bins))) + return out + + +def std_mean(agg: xr.DataArray, + name: Optional[str] = 'std_mean') -> xr.DataArray: + """ + Classify data based on standard deviations from the mean. + + Creates bins at mean +/- 1 and 2 standard deviations. + + Parameters + ---------- + agg : xarray.DataArray + 2D NumPy, CuPy, NumPy-backed Dask, or CuPy-backed Dask array + of values to be classified. + name : str, default='std_mean' + Name of output aggregate array. + + Returns + ------- + std_mean_agg : xarray.DataArray, of the same type as `agg` + 2D aggregate array of standard deviation classifications. + All other input attributes are preserved. + + References + ---------- + - PySAL: https://pysal.org/mapclassify/_modules/mapclassify/classifiers.html#StdMean + """ + mapper = ArrayTypeFunctionMapping( + numpy_func=lambda *args: _run_std_mean(*args, module=np), + dask_func=lambda *args: _run_std_mean(*args, module=da), + cupy_func=lambda *args: _run_std_mean(*args, module=cupy), + dask_cupy_func=lambda *args: _run_std_mean(*args, module=da), + ) + out = mapper(agg)(agg) + return xr.DataArray(out, + name=name, + dims=agg.dims, + coords=agg.coords, + attrs=agg.attrs) + + +def _compute_head_tail_bins(values_np): + """Compute head/tail break bins from flat finite numpy values.""" + bins = [] + data = values_np.copy() + while len(data) > 1: + mean_v = float(np.nanmean(data)) + bins.append(mean_v) + head = data[data > mean_v] + if len(head) == 0 or len(head) / len(data) > 0.40: + break + data = head + if not bins: + bins = [float(np.nanmean(values_np))] + bins.append(float(np.nanmax(values_np))) + return np.array(bins) + + +def _run_head_tail_breaks(agg, module): + data = agg.data + if module == cupy: + finite = data[cupy.isfinite(data)] + values_np = cupy.asnumpy(finite) + else: + finite = data[np.isfinite(data)] + values_np = np.asarray(finite) + + bins = _compute_head_tail_bins(values_np) + out = _bin(agg, bins, np.arange(len(bins))) + return out + + +def _run_dask_head_tail_breaks(agg): + data = agg.data + data_clean = da.where(da.isinf(data), np.nan, data) + bins = [] + mask = da.isfinite(data_clean) + while True: + current = da.where(mask, data_clean, np.nan) + mean_v = float(da.nanmean(current).compute()) + bins.append(mean_v) + new_mask = mask & (data_clean > mean_v) + head_count = int(new_mask.sum().compute()) + total_count = int(mask.sum().compute()) + if head_count == 0 or head_count / total_count > 0.40: + break + mask = new_mask + max_v = float(da.nanmax(data_clean).compute()) + bins.append(max_v) + bins = np.array(bins) + out = _bin(agg, bins, np.arange(len(bins))) + return out + + +def head_tail_breaks(agg: xr.DataArray, + name: Optional[str] = 'head_tail_breaks') -> xr.DataArray: + """ + Classify data using the Head/Tail Breaks algorithm. + + Iteratively partitions data around the mean. Values below the mean + form a class, and values above continue to be split until the head + proportion exceeds 40%. + + Parameters + ---------- + agg : xarray.DataArray + 2D NumPy, CuPy, NumPy-backed Dask, or CuPy-backed Dask array + of values to be classified. + name : str, default='head_tail_breaks' + Name of output aggregate array. + + Returns + ------- + head_tail_agg : xarray.DataArray, of the same type as `agg` + 2D aggregate array of head/tail break classifications. + All other input attributes are preserved. + + References + ---------- + - PySAL: https://pysal.org/mapclassify/_modules/mapclassify/classifiers.html#HeadTailBreaks + """ + mapper = ArrayTypeFunctionMapping( + numpy_func=lambda *args: _run_head_tail_breaks(*args, module=np), + dask_func=_run_dask_head_tail_breaks, + cupy_func=lambda *args: _run_head_tail_breaks(*args, module=cupy), + dask_cupy_func=_run_dask_head_tail_breaks, + ) + out = mapper(agg)(agg) + return xr.DataArray(out, + name=name, + dims=agg.dims, + coords=agg.coords, + attrs=agg.attrs) + + +def _run_percentiles(data, pct, module): + q = module.percentile(data[module.isfinite(data)], pct) + q = module.unique(q) + return q + + +def _run_dask_cupy_percentiles(data, pct): + data_cpu = data.map_blocks(cupy.asnumpy, dtype=data.dtype, meta=np.array(())) + return _run_percentiles(data_cpu, pct, da) + + +def percentiles(agg: xr.DataArray, + pct: Optional[List] = None, + name: Optional[str] = 'percentiles') -> xr.DataArray: + """ + Classify data based on percentile breakpoints. + + Parameters + ---------- + agg : xarray.DataArray + 2D NumPy, CuPy, NumPy-backed Dask, or CuPy-backed Dask array + of values to be classified. + pct : list of float, default=[1, 10, 50, 90, 99] + Percentile values to use as breakpoints. + name : str, default='percentiles' + Name of output aggregate array. + + Returns + ------- + percentiles_agg : xarray.DataArray, of the same type as `agg` + 2D aggregate array of percentile classifications. + All other input attributes are preserved. + + References + ---------- + - PySAL: https://pysal.org/mapclassify/_modules/mapclassify/classifiers.html#Percentiles + """ + if pct is None: + pct = [1, 10, 50, 90, 99] + + mapper = ArrayTypeFunctionMapping( + numpy_func=lambda *args: _run_percentiles(*args, module=np), + dask_func=lambda *args: _run_percentiles(*args, module=da), + cupy_func=lambda *args: _run_percentiles(*args, module=cupy), + dask_cupy_func=_run_dask_cupy_percentiles, + ) + q = mapper(agg)(agg.data, pct) + + # Materialize bin edges to numpy + if hasattr(q, 'compute'): + q_np = np.asarray(q.compute()) + elif hasattr(q, 'get'): + q_np = np.asarray(q.get()) + else: + q_np = np.asarray(q) + + # Append max of finite data so all values get classified + data = agg.data + if hasattr(data, 'compute'): + max_v = float(da.nanmax(da.where(da.isinf(data), np.nan, data)).compute()) + elif hasattr(data, 'get'): + clean = cupy.where(cupy.isinf(data), cupy.nan, data) + max_v = float(cupy.nanmax(clean).get()) + else: + clean = np.where(np.isinf(data), np.nan, data) + max_v = float(np.nanmax(clean)) + + bins = np.sort(np.unique(np.append(q_np, max_v))) + k = len(bins) + out = _bin(agg, bins, np.arange(k)) + + return xr.DataArray(out, + name=name, + dims=agg.dims, + coords=agg.coords, + attrs=agg.attrs) + + +def _compute_maximum_break_bins(values_np, k): + """Compute maximum breaks bins from sorted finite numpy values.""" + uv = np.unique(values_np) + if len(uv) < k: + return uv + diffs = np.diff(uv) + n_gaps = min(k - 1, len(diffs)) + top_indices = np.argpartition(diffs, -n_gaps)[-n_gaps:] + top_indices.sort() + bins = np.array([(uv[i] + uv[i + 1]) / 2.0 for i in top_indices]) + bins = np.append(bins, float(uv[-1])) + return bins + + +def _run_maximum_breaks(agg, k, module): + data = agg.data + if module == cupy: + finite = data[cupy.isfinite(data)] + values_np = cupy.asnumpy(finite) + else: + finite = data[np.isfinite(data)] + values_np = np.asarray(finite) + + bins = _compute_maximum_break_bins(values_np, k) + out = _bin(agg, bins, np.arange(len(bins))) + return out + + +def _run_dask_maximum_breaks(agg, k): + data = agg.data + data_clean = da.where(da.isinf(data), np.nan, data) + values_np = np.asarray(data_clean.ravel().compute()) + values_np = values_np[np.isfinite(values_np)] + bins = _compute_maximum_break_bins(values_np, k) + out = _bin(agg, bins, np.arange(len(bins))) + return out + + +def _run_dask_cupy_maximum_breaks(agg, k): + data = agg.data + data_clean = da.where(da.isinf(data), np.nan, data) + data_cpu = data_clean.map_blocks(cupy.asnumpy, dtype=data.dtype, meta=np.array(())) + values_np = np.asarray(data_cpu.ravel().compute()) + values_np = values_np[np.isfinite(values_np)] + bins = _compute_maximum_break_bins(values_np, k) + out = _bin(agg, bins, np.arange(len(bins))) + return out + + +def maximum_breaks(agg: xr.DataArray, + k: int = 5, + name: Optional[str] = 'maximum_breaks') -> xr.DataArray: + """ + Classify data using the Maximum Breaks algorithm. + + Finds the k-1 largest gaps between sorted unique values and uses + midpoints of those gaps as bin edges. + + Parameters + ---------- + agg : xarray.DataArray + 2D NumPy, CuPy, NumPy-backed Dask, or CuPy-backed Dask array + of values to be classified. + k : int, default=5 + Number of classes to be produced. + name : str, default='maximum_breaks' + Name of output aggregate array. + + Returns + ------- + max_breaks_agg : xarray.DataArray, of the same type as `agg` + 2D aggregate array of maximum break classifications. + All other input attributes are preserved. + + References + ---------- + - PySAL: https://pysal.org/mapclassify/_modules/mapclassify/classifiers.html#MaximumBreaks + """ + mapper = ArrayTypeFunctionMapping( + numpy_func=lambda *args: _run_maximum_breaks(*args, module=np), + dask_func=_run_dask_maximum_breaks, + cupy_func=lambda *args: _run_maximum_breaks(*args, module=cupy), + dask_cupy_func=_run_dask_cupy_maximum_breaks, + ) + out = mapper(agg)(agg, k) + return xr.DataArray(out, + name=name, + dims=agg.dims, + coords=agg.coords, + attrs=agg.attrs) + + +def _run_box_plot(agg, hinge, module): + data = agg.data + data_clean = module.where(module.isinf(data), np.nan, data) + finite_data = data_clean[module.isfinite(data_clean)] + + if module == cupy: + q1 = float(cupy.percentile(finite_data, 25).get()) + q2 = float(cupy.percentile(finite_data, 50).get()) + q3 = float(cupy.percentile(finite_data, 75).get()) + max_v = float(cupy.nanmax(finite_data).get()) + elif module == da: + q1_l = da.percentile(finite_data, 25) + q2_l = da.percentile(finite_data, 50) + q3_l = da.percentile(finite_data, 75) + max_l = da.nanmax(data_clean) + q1, q2, q3, max_v = dask.compute(q1_l, q2_l, q3_l, max_l) + q1, q2, q3, max_v = float(q1), float(q2), float(q3), float(max_v) + else: + q1 = float(np.percentile(finite_data, 25)) + q2 = float(np.percentile(finite_data, 50)) + q3 = float(np.percentile(finite_data, 75)) + max_v = float(np.nanmax(finite_data)) + + iqr = q3 - q1 + raw_bins = [q1 - hinge * iqr, q1, q2, q3, q3 + hinge * iqr, max_v] + bins = np.sort(np.unique(raw_bins)) + # Remove bins above max (they'd create empty classes) + bins = bins[bins <= max_v] + if bins[-1] < max_v: + bins = np.append(bins, max_v) + out = _bin(agg, bins, np.arange(len(bins))) + return out + + +def _run_dask_cupy_box_plot(agg, hinge): + data = agg.data + data_cpu = data.map_blocks(cupy.asnumpy, dtype=data.dtype, meta=np.array(())) + data_clean = da.where(da.isinf(data_cpu), np.nan, data_cpu) + finite_data = data_clean[da.isfinite(data_clean)] + + q1_l = da.percentile(finite_data, 25) + q2_l = da.percentile(finite_data, 50) + q3_l = da.percentile(finite_data, 75) + max_l = da.nanmax(data_clean) + q1, q2, q3, max_v = dask.compute(q1_l, q2_l, q3_l, max_l) + q1, q2, q3, max_v = float(q1), float(q2), float(q3), float(max_v) + + iqr = q3 - q1 + raw_bins = [q1 - hinge * iqr, q1, q2, q3, q3 + hinge * iqr, max_v] + bins = np.sort(np.unique(raw_bins)) + bins = bins[bins <= max_v] + if bins[-1] < max_v: + bins = np.append(bins, max_v) + out = _bin(agg, bins, np.arange(len(bins))) + return out + + +def box_plot(agg: xr.DataArray, + hinge: float = 1.5, + name: Optional[str] = 'box_plot') -> xr.DataArray: + """ + Classify data using box plot breakpoints. + + Uses Q1, median (Q2), Q3, and the whiskers (Q1 - hinge*IQR, + Q3 + hinge*IQR) as class boundaries. + + Parameters + ---------- + agg : xarray.DataArray + 2D NumPy, CuPy, NumPy-backed Dask, or CuPy-backed Dask array + of values to be classified. + hinge : float, default=1.5 + Multiplier for the IQR to determine whisker extent. + name : str, default='box_plot' + Name of output aggregate array. + + Returns + ------- + box_plot_agg : xarray.DataArray, of the same type as `agg` + 2D aggregate array of box plot classifications. + All other input attributes are preserved. + + References + ---------- + - PySAL: https://pysal.org/mapclassify/_modules/mapclassify/classifiers.html#BoxPlot + """ + mapper = ArrayTypeFunctionMapping( + numpy_func=lambda *args: _run_box_plot(*args, module=np), + dask_func=lambda *args: _run_box_plot(*args, module=da), + cupy_func=lambda *args: _run_box_plot(*args, module=cupy), + dask_cupy_func=_run_dask_cupy_box_plot, + ) + out = mapper(agg)(agg, hinge) + return xr.DataArray(out, + name=name, + dims=agg.dims, + coords=agg.coords, + attrs=agg.attrs) diff --git a/xrspatial/slope.py b/xrspatial/slope.py index cd2aff87..753d905c 100644 --- a/xrspatial/slope.py +++ b/xrspatial/slope.py @@ -23,11 +23,35 @@ class cupy(object): # local modules from xrspatial.utils import ArrayTypeFunctionMapping +from xrspatial.utils import Z_UNITS +from xrspatial.utils import _extract_latlon_coords from xrspatial.utils import cuda_args from xrspatial.utils import get_dataarray_resolution from xrspatial.utils import ngjit +def _geodesic_cuda_dims(shape): + """Smaller thread block for register-heavy geodesic kernels.""" + tpb = (16, 16) + bpg = ( + (shape[0] + tpb[0] - 1) // tpb[0], + (shape[1] + tpb[1] - 1) // tpb[1], + ) + return bpg, tpb + +from xrspatial.geodesic import ( + INV_2R, + WGS84_A2, + WGS84_B2, + _cpu_geodesic_slope, + _run_gpu_geodesic_slope, +) + + +# ===================================================================== +# Planar backend functions (unchanged) +# ===================================================================== + @ngjit def _cpu(data, cellsize_x, cellsize_y): data = data.astype(np.float32) @@ -135,8 +159,118 @@ def _run_cupy(data: cupy.ndarray, return out +# ===================================================================== +# Geodesic backend functions +# ===================================================================== + +def _run_numpy_geodesic(data, lat_2d, lon_2d, a2, b2, z_factor): + stacked = np.stack([ + data.astype(np.float64), + lat_2d, + lon_2d, + ], axis=0) + return _cpu_geodesic_slope(stacked, a2, b2, z_factor) + + +def _run_cupy_geodesic(data, lat_2d, lon_2d, a2, b2, z_factor): + lat_2d_gpu = cupy.asarray(lat_2d, dtype=cupy.float64) + lon_2d_gpu = cupy.asarray(lon_2d, dtype=cupy.float64) + stacked = cupy.stack([ + data.astype(cupy.float64), + lat_2d_gpu, + lon_2d_gpu, + ], axis=0) + + H, W = data.shape + out = cupy.full((H, W), cupy.nan, dtype=cupy.float32) + + a2_arr = cupy.array([a2], dtype=cupy.float64) + b2_arr = cupy.array([b2], dtype=cupy.float64) + zf_arr = cupy.array([z_factor], dtype=cupy.float64) + inv_2r_arr = cupy.array([INV_2R], dtype=cupy.float64) + + griddim, blockdim = _geodesic_cuda_dims((H, W)) + _run_gpu_geodesic_slope[griddim, blockdim](stacked, a2_arr, b2_arr, zf_arr, inv_2r_arr, out) + return out + + +def _dask_geodesic_slope_chunk(stacked_chunk, a2, b2, z_factor): + """Process a single chunk for dask map_overlap. stacked_chunk shape = (3, h, w). + Returns (3, h, w) to preserve shape for map_overlap.""" + result_2d = _cpu_geodesic_slope(stacked_chunk, a2, b2, z_factor) + out = np.empty_like(stacked_chunk, dtype=np.float32) + out[0] = result_2d + out[1] = 0.0 + out[2] = 0.0 + return out + + +def _dask_geodesic_slope_chunk_cupy(stacked_chunk, a2, b2, z_factor): + """Process a single chunk for dask+cupy map_overlap.""" + H, W = stacked_chunk.shape[1], stacked_chunk.shape[2] + result_2d = cupy.full((H, W), cupy.nan, dtype=cupy.float32) + + a2_arr = cupy.array([a2], dtype=cupy.float64) + b2_arr = cupy.array([b2], dtype=cupy.float64) + zf_arr = cupy.array([z_factor], dtype=cupy.float64) + inv_2r_arr = cupy.array([INV_2R], dtype=cupy.float64) + + griddim, blockdim = _geodesic_cuda_dims((H, W)) + _run_gpu_geodesic_slope[griddim, blockdim](stacked_chunk, a2_arr, b2_arr, zf_arr, inv_2r_arr, result_2d) + + out = cupy.zeros_like(stacked_chunk, dtype=cupy.float32) + out[0] = result_2d + return out + + +def _run_dask_numpy_geodesic(data, lat_2d, lon_2d, a2, b2, z_factor): + lat_dask = da.from_array(lat_2d, chunks=data.chunksize) + lon_dask = da.from_array(lon_2d, chunks=data.chunksize) + stacked = da.stack([ + data.astype(np.float64), + lat_dask, + lon_dask, + ], axis=0).rechunk({0: 3}) # all 3 channels in one chunk + + _func = partial(_dask_geodesic_slope_chunk, a2=a2, b2=b2, z_factor=z_factor) + out = stacked.map_overlap( + _func, + depth=(0, 1, 1), + boundary=np.nan, + meta=np.array((), dtype=np.float32), + ) + return out[0] + + +def _run_dask_cupy_geodesic(data, lat_2d, lon_2d, a2, b2, z_factor): + lat_dask = da.from_array(cupy.asarray(lat_2d, dtype=cupy.float64), + chunks=data.chunksize) + lon_dask = da.from_array(cupy.asarray(lon_2d, dtype=cupy.float64), + chunks=data.chunksize) + stacked = da.stack([ + data.astype(cupy.float64), + lat_dask, + lon_dask, + ], axis=0).rechunk({0: 3}) # all 3 channels in one chunk + + _func = partial(_dask_geodesic_slope_chunk_cupy, a2=a2, b2=b2, z_factor=z_factor) + out = stacked.map_overlap( + _func, + depth=(0, 1, 1), + boundary=cupy.nan, + meta=cupy.array((), dtype=cupy.float32), + ) + return out[0] + + +# ===================================================================== +# Public API +# ===================================================================== + def slope(agg: xr.DataArray, - name: str = 'slope') -> xr.DataArray: + name: str = 'slope', + method: str = 'planar', + z_unit: str = 'meter') -> xr.DataArray: """ Returns slope of input aggregate in degrees. @@ -146,6 +280,15 @@ def slope(agg: xr.DataArray, 2D array of elevation data. name : str, default='slope' Name of output DataArray. + method : str, default='planar' + ``'planar'`` uses the classic Horn algorithm with uniform cell size. + ``'geodesic'`` converts cells to Earth-Centered Earth-Fixed (ECEF) + coordinates and fits a 3D plane, yielding accurate results for + geographic (lat/lon) coordinate systems. + z_unit : str, default='meter' + Unit of the elevation values. Only used when ``method='geodesic'``. + Accepted values: ``'meter'``, ``'foot'``, ``'kilometer'``, ``'mile'`` + (and common aliases). Returns ------- @@ -181,14 +324,38 @@ def slope(agg: xr.DataArray, Dimensions without coordinates: dim_0, dim_1 """ - cellsize_x, cellsize_y = get_dataarray_resolution(agg) - mapper = ArrayTypeFunctionMapping( - numpy_func=_run_numpy, - cupy_func=_run_cupy, - dask_func=_run_dask_numpy, - dask_cupy_func=_run_dask_cupy, - ) - out = mapper(agg)(agg.data, cellsize_x, cellsize_y) + if method not in ('planar', 'geodesic'): + raise ValueError( + f"method must be 'planar' or 'geodesic', got {method!r}" + ) + + if method == 'planar': + cellsize_x, cellsize_y = get_dataarray_resolution(agg) + mapper = ArrayTypeFunctionMapping( + numpy_func=_run_numpy, + cupy_func=_run_cupy, + dask_func=_run_dask_numpy, + dask_cupy_func=_run_dask_cupy, + ) + out = mapper(agg)(agg.data, cellsize_x, cellsize_y) + + else: # geodesic + if z_unit not in Z_UNITS: + raise ValueError( + f"z_unit must be one of {sorted(set(Z_UNITS.values()), key=str)}, " + f"got {z_unit!r}" + ) + z_factor = Z_UNITS[z_unit] + + lat_2d, lon_2d = _extract_latlon_coords(agg) + + mapper = ArrayTypeFunctionMapping( + numpy_func=_run_numpy_geodesic, + cupy_func=_run_cupy_geodesic, + dask_func=_run_dask_numpy_geodesic, + dask_cupy_func=_run_dask_cupy_geodesic, + ) + out = mapper(agg)(agg.data, lat_2d, lon_2d, WGS84_A2, WGS84_B2, z_factor) return xr.DataArray(out, name=name, diff --git a/xrspatial/tests/test_classify.py b/xrspatial/tests/test_classify.py index 355d9219..003a8dd4 100644 --- a/xrspatial/tests/test_classify.py +++ b/xrspatial/tests/test_classify.py @@ -2,7 +2,9 @@ import pytest import xarray as xr -from xrspatial import binary, equal_interval, natural_breaks, quantile, reclassify +from xrspatial import (binary, box_plot, equal_interval, head_tail_breaks, + maximum_breaks, natural_breaks, percentiles, quantile, + reclassify, std_mean) from xrspatial.tests.general_checks import (assert_input_data_unmodified, create_test_raster, cuda_and_cupy_available, @@ -498,3 +500,385 @@ def test_natural_breaks_dask_matches_numpy(): np.testing.assert_allclose( numpy_result.data, dask_result.data.compute(), equal_nan=True ) + + +# =================================================================== +# std_mean tests +# =================================================================== + +@pytest.fixture +def result_std_mean(): + expected_result = np.asarray([ + [np.nan, 1., 1., 1., np.nan], + [1., 2., 2., 2., 2.], + [2., 2., 2., 2., 2.], + [3., 3., 3., 3., np.nan] + ], dtype=np.float32) + return expected_result + + +def test_std_mean_numpy(result_std_mean): + numpy_agg = input_data() + numpy_result = std_mean(numpy_agg) + general_output_checks(numpy_agg, numpy_result, result_std_mean, verify_dtype=True) + + +@dask_array_available +def test_std_mean_dask_numpy(result_std_mean): + dask_agg = input_data('dask+numpy') + dask_result = std_mean(dask_agg) + general_output_checks(dask_agg, dask_result, result_std_mean, verify_dtype=True) + + +@cuda_and_cupy_available +def test_std_mean_cupy(result_std_mean): + cupy_agg = input_data('cupy') + cupy_result = std_mean(cupy_agg) + general_output_checks(cupy_agg, cupy_result, result_std_mean, verify_dtype=True) + + +@dask_array_available +@cuda_and_cupy_available +def test_std_mean_dask_cupy(result_std_mean): + dask_cupy_agg = input_data('dask+cupy') + dask_cupy_result = std_mean(dask_cupy_agg) + general_output_checks(dask_cupy_agg, dask_cupy_result, result_std_mean, verify_dtype=True) + + +def test_std_mean_does_not_modify_input(): + agg = input_data() + original = agg.copy(deep=True) + std_mean(agg) + assert_input_data_unmodified(original, agg) + + +def test_std_mean_all_same_values(): + data = np.full((4, 5), 7.0) + agg = xr.DataArray(data) + result = std_mean(agg) + # std=0, so all bins collapse to mean=7. All values in class 0. + finite_mask = np.isfinite(result.data) + assert np.all(result.data[finite_mask] == 0) + + +# =================================================================== +# head_tail_breaks tests +# =================================================================== + +@pytest.fixture +def result_head_tail_breaks(): + expected_result = np.asarray([ + [np.nan, 0., 0., 0., np.nan], + [0., 0., 0., 0., 0.], + [0., 1., 1., 1., 1.], + [1., 1., 1., 1., np.nan] + ], dtype=np.float32) + return expected_result + + +def test_head_tail_breaks_numpy(result_head_tail_breaks): + numpy_agg = input_data() + numpy_result = head_tail_breaks(numpy_agg) + general_output_checks(numpy_agg, numpy_result, result_head_tail_breaks, verify_dtype=True) + + +@dask_array_available +def test_head_tail_breaks_dask_numpy(result_head_tail_breaks): + dask_agg = input_data('dask+numpy') + dask_result = head_tail_breaks(dask_agg) + general_output_checks(dask_agg, dask_result, result_head_tail_breaks, verify_dtype=True) + + +@cuda_and_cupy_available +def test_head_tail_breaks_cupy(result_head_tail_breaks): + cupy_agg = input_data('cupy') + cupy_result = head_tail_breaks(cupy_agg) + general_output_checks(cupy_agg, cupy_result, result_head_tail_breaks, verify_dtype=True) + + +@dask_array_available +@cuda_and_cupy_available +def test_head_tail_breaks_dask_cupy(result_head_tail_breaks): + dask_cupy_agg = input_data('dask+cupy') + dask_cupy_result = head_tail_breaks(dask_cupy_agg) + general_output_checks(dask_cupy_agg, dask_cupy_result, result_head_tail_breaks, verify_dtype=True) + + +def test_head_tail_breaks_does_not_modify_input(): + agg = input_data() + original = agg.copy(deep=True) + head_tail_breaks(agg) + assert_input_data_unmodified(original, agg) + + +def test_head_tail_breaks_heavy_tailed(): + # Heavy-tailed data should produce more classes than uniform data + data = np.array([ + [1., 1., 1., 1., 2.], + [2., 2., 3., 3., 5.], + [5., 10., 20., 50., 100.], + [200., 500., 1000., 2000., 5000.], + ]) + agg = xr.DataArray(data) + result = head_tail_breaks(agg) + unique_classes = np.unique(result.data[np.isfinite(result.data)]) + # Heavy-tailed data should produce more than 2 classes + assert len(unique_classes) > 2 + + +# =================================================================== +# percentiles tests +# =================================================================== + +@pytest.fixture +def result_percentiles(): + expected_result = np.asarray([ + [np.nan, 0., 1., 2., np.nan], + [2., 2., 2., 2., 2.], + [2., 3., 3., 3., 3.], + [3., 3., 4., 5., np.nan] + ], dtype=np.float32) + return expected_result + + +def test_percentiles_numpy(result_percentiles): + numpy_agg = input_data() + numpy_result = percentiles(numpy_agg) + general_output_checks(numpy_agg, numpy_result, result_percentiles, verify_dtype=True) + + +@dask_array_available +def test_percentiles_dask_numpy(result_percentiles): + # Dask percentile is approximate; verify structure not exact values + dask_agg = input_data('dask+numpy') + dask_result = percentiles(dask_agg) + general_output_checks(dask_agg, dask_result) + + +@cuda_and_cupy_available +def test_percentiles_cupy(result_percentiles): + cupy_agg = input_data('cupy') + cupy_result = percentiles(cupy_agg) + general_output_checks(cupy_agg, cupy_result, result_percentiles, verify_dtype=True) + + +@dask_array_available +@cuda_and_cupy_available +def test_percentiles_dask_cupy(result_percentiles): + dask_cupy_agg = input_data('dask+cupy') + dask_cupy_result = percentiles(dask_cupy_agg) + general_output_checks(dask_cupy_agg, dask_cupy_result) + + +def test_percentiles_does_not_modify_input(): + agg = input_data() + original = agg.copy(deep=True) + percentiles(agg) + assert_input_data_unmodified(original, agg) + + +def test_percentiles_custom_pct(): + numpy_agg = input_data() + result = percentiles(numpy_agg, pct=[25, 50, 75]) + result_data = result.data + finite_vals = result_data[np.isfinite(result_data)] + unique_classes = np.unique(finite_vals) + # Should have at most 4 classes (3 percentile breaks + max) + assert len(unique_classes) <= 4 + + +def test_percentiles_single_pct(): + numpy_agg = input_data() + result = percentiles(numpy_agg, pct=[50]) + result_data = result.data + finite_vals = result_data[np.isfinite(result_data)] + unique_classes = np.unique(finite_vals) + # Single percentile + max → 2 classes + assert len(unique_classes) == 2 + + +# =================================================================== +# maximum_breaks tests +# =================================================================== + +@pytest.fixture +def result_maximum_breaks(): + expected_result = np.asarray([ + [np.nan, 0., 0., 0., np.nan], + [0., 0., 0., 0., 0.], + [0., 0., 0., 0., 0.], + [1., 2., 3., 4., np.nan] + ], dtype=np.float32) + return expected_result + + +def test_maximum_breaks_numpy(result_maximum_breaks): + numpy_agg = input_data() + numpy_result = maximum_breaks(numpy_agg) + general_output_checks(numpy_agg, numpy_result, result_maximum_breaks, verify_dtype=True) + + +@dask_array_available +def test_maximum_breaks_dask_numpy(result_maximum_breaks): + dask_agg = input_data('dask+numpy') + dask_result = maximum_breaks(dask_agg) + general_output_checks(dask_agg, dask_result, result_maximum_breaks, verify_dtype=True) + + +@cuda_and_cupy_available +def test_maximum_breaks_cupy(result_maximum_breaks): + cupy_agg = input_data('cupy') + cupy_result = maximum_breaks(cupy_agg) + general_output_checks(cupy_agg, cupy_result, result_maximum_breaks, verify_dtype=True) + + +@dask_array_available +@cuda_and_cupy_available +def test_maximum_breaks_dask_cupy(result_maximum_breaks): + dask_cupy_agg = input_data('dask+cupy') + dask_cupy_result = maximum_breaks(dask_cupy_agg) + general_output_checks(dask_cupy_agg, dask_cupy_result, result_maximum_breaks, verify_dtype=True) + + +def test_maximum_breaks_does_not_modify_input(): + agg = input_data() + original = agg.copy(deep=True) + maximum_breaks(agg) + assert_input_data_unmodified(original, agg) + + +def test_maximum_breaks_k_equals_2(): + numpy_agg = input_data() + result = maximum_breaks(numpy_agg, k=2) + finite_vals = result.data[np.isfinite(result.data)] + unique_classes = np.unique(finite_vals) + assert len(unique_classes) == 2 + + +def test_maximum_breaks_k_exceeds_unique(): + # When k > number of unique values, fall back gracefully + data = np.array([[1., 2., 3.], [4., 5., np.nan]]) + agg = xr.DataArray(data) + result = maximum_breaks(agg, k=10) + finite_vals = result.data[np.isfinite(result.data)] + assert len(np.unique(finite_vals)) <= 5 + + +# =================================================================== +# box_plot tests +# =================================================================== + +@pytest.fixture +def result_box_plot(): + expected_result = np.asarray([ + [np.nan, 1., 1., 1., np.nan], + [1., 1., 2., 2., 2.], + [2., 3., 3., 3., 3.], + [4., 4., 4., 4., np.nan] + ], dtype=np.float32) + return expected_result + + +def test_box_plot_numpy(result_box_plot): + numpy_agg = input_data() + numpy_result = box_plot(numpy_agg) + general_output_checks(numpy_agg, numpy_result, result_box_plot, verify_dtype=True) + + +@dask_array_available +def test_box_plot_dask_numpy(result_box_plot): + # Dask percentile is approximate; verify structure not exact values + dask_agg = input_data('dask+numpy') + dask_result = box_plot(dask_agg) + general_output_checks(dask_agg, dask_result) + + +@cuda_and_cupy_available +def test_box_plot_cupy(result_box_plot): + cupy_agg = input_data('cupy') + cupy_result = box_plot(cupy_agg) + general_output_checks(cupy_agg, cupy_result, result_box_plot, verify_dtype=True) + + +@dask_array_available +@cuda_and_cupy_available +def test_box_plot_dask_cupy(result_box_plot): + dask_cupy_agg = input_data('dask+cupy') + dask_cupy_result = box_plot(dask_cupy_agg) + general_output_checks(dask_cupy_agg, dask_cupy_result) + + +def test_box_plot_does_not_modify_input(): + agg = input_data() + original = agg.copy(deep=True) + box_plot(agg) + assert_input_data_unmodified(original, agg) + + +def test_box_plot_custom_hinge(): + numpy_agg = input_data() + result = box_plot(numpy_agg, hinge=3.0) + result_data = result.data + finite_vals = result_data[np.isfinite(result_data)] + # With hinge=3.0, whiskers extend further so fewer outliers + assert len(np.unique(finite_vals)) >= 2 + + +# =================================================================== +# Name parameter tests for new methods +# =================================================================== + +def test_new_methods_output_name_default(): + agg = input_data() + assert std_mean(agg).name == 'std_mean' + assert head_tail_breaks(agg).name == 'head_tail_breaks' + assert percentiles(agg).name == 'percentiles' + assert maximum_breaks(agg).name == 'maximum_breaks' + assert box_plot(agg).name == 'box_plot' + + +def test_new_methods_output_name_custom(): + agg = input_data() + assert std_mean(agg, name='custom').name == 'custom' + assert head_tail_breaks(agg, name='custom').name == 'custom' + assert percentiles(agg, name='custom').name == 'custom' + assert maximum_breaks(agg, name='custom').name == 'custom' + assert box_plot(agg, name='custom').name == 'custom' + + +# =================================================================== +# Cross-backend consistency tests for new methods +# =================================================================== + +@dask_array_available +def test_std_mean_dask_matches_numpy(): + elevation = np.arange(100, dtype=np.float64).reshape(10, 10) + numpy_agg = xr.DataArray(elevation) + dask_agg = xr.DataArray(da.from_array(elevation, chunks=(5, 5))) + np.testing.assert_allclose( + std_mean(numpy_agg).data, std_mean(dask_agg).data.compute(), equal_nan=True + ) + + +@dask_array_available +def test_head_tail_breaks_dask_matches_numpy(): + elevation = np.arange(100, dtype=np.float64).reshape(10, 10) + numpy_agg = xr.DataArray(elevation) + dask_agg = xr.DataArray(da.from_array(elevation, chunks=(5, 5))) + np.testing.assert_allclose( + head_tail_breaks(numpy_agg).data, + head_tail_breaks(dask_agg).data.compute(), + equal_nan=True, + ) + + +@dask_array_available +def test_maximum_breaks_dask_matches_numpy(): + elevation = np.arange(100, dtype=np.float64).reshape(10, 10) + numpy_agg = xr.DataArray(elevation) + dask_agg = xr.DataArray(da.from_array(elevation, chunks=(5, 5))) + np.testing.assert_allclose( + maximum_breaks(numpy_agg).data, + maximum_breaks(dask_agg).data.compute(), + equal_nan=True, + ) diff --git a/xrspatial/utils.py b/xrspatial/utils.py index 75e63523..2e47806f 100644 --- a/xrspatial/utils.py +++ b/xrspatial/utils.py @@ -590,6 +590,129 @@ def warn_if_unit_mismatch(agg: xr.DataArray) -> None: ) +# ---------- Z-unit conversion for geodesic methods ---------- +Z_UNITS = { + 'meter': 1.0, 'meters': 1.0, 'm': 1.0, + 'foot': 0.3048, 'feet': 0.3048, 'ft': 0.3048, + 'kilometer': 1000.0, 'kilometers': 1000.0, 'km': 1000.0, + 'mile': 1609.344, 'miles': 1609.344, 'mi': 1609.344, +} + + +# ---------- Lat/lon coordinate extraction for geodesic methods ---------- +# Known dimension / coordinate names (lower-cased for matching) +_LAT_NAMES = {'lat', 'latitude', 'y'} +_LON_NAMES = {'lon', 'longitude', 'x'} + + +def _extract_latlon_coords(agg: xr.DataArray): + """ + Extract 2-D latitude and longitude arrays from a DataArray. + + Supports: + - 1-D coordinates on the last two dims (regular geographic grid). + - 2-D coordinates that vary per cell (curvilinear grid). + + Returns + ------- + lat_2d, lon_2d : numpy.ndarray + Always 2-D float64 numpy arrays of shape ``(H, W)``. + + Raises + ------ + ValueError + If coordinates are missing, non-numeric, or outside geographic + ranges (lat not in [-90, 90], lon not in [-180, 360]). + """ + if agg.ndim < 2: + raise ValueError( + "geodesic method requires a 2-D DataArray, " + f"got {agg.ndim}-D" + ) + + dim_y, dim_x = agg.dims[-2], agg.dims[-1] + + # --- locate lat coordinate --- + lat_coord = _find_coord(agg, dim_y, _LAT_NAMES, 'latitude') + # --- locate lon coordinate --- + lon_coord = _find_coord(agg, dim_x, _LON_NAMES, 'longitude') + + lat_vals = np.asarray(lat_coord.values, dtype=np.float64) + lon_vals = np.asarray(lon_coord.values, dtype=np.float64) + + # Build 2-D arrays + if lat_vals.ndim == 1 and lon_vals.ndim == 1: + # Regular grid: broadcast to 2-D + lat_2d = np.broadcast_to(lat_vals[:, np.newaxis], + (agg.sizes[dim_y], agg.sizes[dim_x])).copy() + lon_2d = np.broadcast_to(lon_vals[np.newaxis, :], + (agg.sizes[dim_y], agg.sizes[dim_x])).copy() + elif lat_vals.ndim == 2 and lon_vals.ndim == 2: + lat_2d = lat_vals + lon_2d = lon_vals + else: + raise ValueError( + f"lat/lon coordinates must be both 1-D or both 2-D, " + f"got lat={lat_vals.ndim}-D and lon={lon_vals.ndim}-D" + ) + + # --- validate ranges --- + _validate_geographic_range(lat_2d, lon_2d) + + return lat_2d, lon_2d + + +def _find_coord(agg, dim_name, known_names, label): + """Find a coordinate matching *dim_name* or one of *known_names*.""" + # 1) Try the dimension name directly + if dim_name in agg.coords: + coord = agg.coords[dim_name] + if np.issubdtype(coord.dtype, np.number): + return coord + + # 2) Scan all coords for a known name + for name in agg.coords: + if str(name).lower() in known_names: + coord = agg.coords[name] + if np.issubdtype(coord.dtype, np.number): + return coord + + raise ValueError( + f"geodesic method requires {label} coordinates on the DataArray. " + f"No numeric coordinate found for dim '{dim_name}' or any of " + f"{sorted(known_names)}." + ) + + +def _validate_geographic_range(lat_2d, lon_2d): + """Raise ValueError if lat/lon values look non-geographic.""" + lat_min = np.nanmin(lat_2d) + lat_max = np.nanmax(lat_2d) + lon_min = np.nanmin(lon_2d) + lon_max = np.nanmax(lon_2d) + + if lat_min < -90 or lat_max > 90: + raise ValueError( + f"Latitude values must be in [-90, 90], " + f"got [{lat_min}, {lat_max}]. " + f"Are your coordinates in a projected CRS?" + ) + if lon_min < -180 or lon_max > 360: + raise ValueError( + f"Longitude values must be in [-180, 360], " + f"got [{lon_min}, {lon_max}]. " + f"Are your coordinates in a projected CRS?" + ) + lat_span = lat_max - lat_min + lon_span = lon_max - lon_min + if lat_span > 180 or lon_span > 360: + raise ValueError( + f"Coordinate span too large for geographic coordinates " + f"(lat span={lat_span}, lon span={lon_span}). " + f"Are your coordinates in a projected CRS?" + ) + + def _to_float_scalar(x) -> float: """Convert numpy/cupy scalar or 0-d array to python float safely.""" if cupy is not None: From dede1b9e5754d9d28193d30c7efc6d1ca8696797 Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Fri, 20 Feb 2026 06:39:43 -0800 Subject: [PATCH 6/8] added supported geodesic helpers and test --- xrspatial/geodesic.py | 401 ++++++++++++++++++++++++ xrspatial/tests/test_geodesic_aspect.py | 261 +++++++++++++++ xrspatial/tests/test_geodesic_slope.py | 282 +++++++++++++++++ 3 files changed, 944 insertions(+) create mode 100644 xrspatial/geodesic.py create mode 100644 xrspatial/tests/test_geodesic_aspect.py create mode 100644 xrspatial/tests/test_geodesic_slope.py diff --git a/xrspatial/geodesic.py b/xrspatial/geodesic.py new file mode 100644 index 00000000..4312979f --- /dev/null +++ b/xrspatial/geodesic.py @@ -0,0 +1,401 @@ +""" +Geodesic math primitives for slope / aspect on ellipsoidal Earth. + +Algorithm: +1. Convert each 3x3 neighbourhood from (lat, lon, elevation) to ECEF. +2. Project the 9 ECEF points into the local tangent-plane frame + (East, North, Up) centered on the middle cell. +3. Apply curvature correction: u += (e^2 + n^2) / (2*R) + to remove the systematic "drop" from the ellipsoid curving away. +4. Fit u = A*e + B*n via least-squares in the local frame. +5. slope = atan(sqrt(A^2 + B^2)) +6. aspect = atan2(-A, -B) (downslope direction, compass bearing) + +All functions use WGS-84 constants and float64 precision. +""" +from __future__ import annotations + +from math import atan, atan2, cos, sin, sqrt + +import numpy as np +from numba import cuda + +from xrspatial.utils import ngjit + +# ---- WGS-84 ellipsoid constants ---- +WGS84_A = 6378137.0 # semi-major axis (m) +WGS84_B = 6356752.314245 # semi-minor axis (m) +WGS84_A2 = WGS84_A * WGS84_A +WGS84_B2 = WGS84_B * WGS84_B +# Mean radius for curvature correction +WGS84_R_MEAN = (2.0 * WGS84_A + WGS84_B) / 3.0 +# 1 / (2 * R_mean) +INV_2R = 1.0 / (2.0 * WGS84_R_MEAN) + + +# ===================================================================== +# CPU (Numba ngjit) primitives +# ===================================================================== + +@ngjit +def _geodetic_to_ecef(lat_rad, lon_rad, h, a2, b2): + """Convert geodetic (lat, lon, height) to ECEF (X, Y, Z).""" + cos_lat = cos(lat_rad) + sin_lat = sin(lat_rad) + cos_lon = cos(lon_rad) + sin_lon = sin(lon_rad) + N = a2 / sqrt(a2 * cos_lat * cos_lat + b2 * sin_lat * sin_lat) + X = (N + h) * cos_lat * cos_lon + Y = (N + h) * cos_lat * sin_lon + Z = (b2 / a2 * N + h) * sin_lat + return X, Y, Z + + +@ngjit +def _local_frame_project_and_fit(lat_deg, lon_deg, elev, neighbor_lats, + neighbor_lons, neighbor_elevs, a2, b2, + z_factor, inv_2r): + """ + Project 9 ECEF points into local (E, N, U) frame, apply curvature + correction, and fit u = A*e + B*n via least-squares. + + Returns (A, B, valid) where valid=False if data has NaN or system is degenerate. + """ + deg2rad = 3.141592653589793 / 180.0 + + for k in range(9): + if neighbor_elevs[k] != neighbor_elevs[k]: + return 0.0, 0.0, False + + lat_c = lat_deg * deg2rad + lon_c = lon_deg * deg2rad + Xc, Yc, Zc = _geodetic_to_ecef(lat_c, lon_c, elev * z_factor, a2, b2) + + cos_lat = cos(lat_c) + sin_lat = sin(lat_c) + cos_lon = cos(lon_c) + sin_lon = sin(lon_c) + + # Local tangent frame unit vectors + ex = -sin_lon; ey = cos_lon; ez = 0.0 + nx = -sin_lat * cos_lon; ny = -sin_lat * sin_lon; nz = cos_lat + ux = cos_lat * cos_lon; uy = cos_lat * sin_lon; uz = sin_lat + + e9 = np.empty(9, dtype=np.float64) + n9 = np.empty(9, dtype=np.float64) + u9 = np.empty(9, dtype=np.float64) + + for k in range(9): + lat_r = neighbor_lats[k] * deg2rad + lon_r = neighbor_lons[k] * deg2rad + h = neighbor_elevs[k] * z_factor + Xk, Yk, Zk = _geodetic_to_ecef(lat_r, lon_r, h, a2, b2) + dx = Xk - Xc + dy = Yk - Yc + dz = Zk - Zc + ek = dx * ex + dy * ey + dz * ez + nk = dx * nx + dy * ny + dz * nz + uk = dx * ux + dy * uy + dz * uz + # Curvature correction: compensate for Earth curving away + uk += (ek * ek + nk * nk) * inv_2r + e9[k] = ek + n9[k] = nk + u9[k] = uk + + # Centered normal equations for u = A*e + B*n + C + me = 0.0; mn = 0.0; mu = 0.0 + for k in range(9): + me += e9[k]; mn += n9[k]; mu += u9[k] + inv9 = 1.0 / 9.0 + me *= inv9; mn *= inv9; mu *= inv9 + + See = 0.0; Snn = 0.0; Sen = 0.0; Seu = 0.0; Snu = 0.0 + for k in range(9): + de = e9[k] - me + dn = n9[k] - mn + du = u9[k] - mu + See += de * de + Snn += dn * dn + Sen += de * dn + Seu += de * du + Snu += dn * du + + det = See * Snn - Sen * Sen + if abs(det) < 1e-30: + return 0.0, 0.0, True # degenerate → flat + + A = (Seu * Snn - Snu * Sen) / det + B = (Snu * See - Seu * Sen) / det + return A, B, True + + +@ngjit +def _geodesic_slope_at_point(lat_deg, lon_deg, elev, neighbor_lats, neighbor_lons, + neighbor_elevs, a2, b2, z_factor, inv_2r): + A, B, valid = _local_frame_project_and_fit( + lat_deg, lon_deg, elev, neighbor_lats, neighbor_lons, + neighbor_elevs, a2, b2, z_factor, inv_2r + ) + if not valid: + return np.nan + slope_rad = atan(sqrt(A * A + B * B)) + return slope_rad * (180.0 / 3.141592653589793) + + +@ngjit +def _geodesic_aspect_at_point(lat_deg, lon_deg, elev, neighbor_lats, neighbor_lons, + neighbor_elevs, a2, b2, z_factor, inv_2r): + A, B, valid = _local_frame_project_and_fit( + lat_deg, lon_deg, elev, neighbor_lats, neighbor_lons, + neighbor_elevs, a2, b2, z_factor, inv_2r + ) + if not valid: + return np.nan + + slope_mag = sqrt(A * A + B * B) + if slope_mag < 1e-7: + return -1.0 + + # Downslope direction in (east, north): (-A, -B) + aspect_rad = atan2(-A, -B) + aspect_deg = aspect_rad * (180.0 / 3.141592653589793) + if aspect_deg < 0: + aspect_deg += 360.0 + if aspect_deg >= 360.0: + aspect_deg -= 360.0 + return aspect_deg + + +# ===================================================================== +# CPU kernels operating on stacked (3, H, W) arrays +# channel 0 = elevation, channel 1 = lat_deg, channel 2 = lon_deg +# ===================================================================== + +@ngjit +def _cpu_geodesic_slope(stacked, a2, b2, z_factor): + H = stacked.shape[1] + W = stacked.shape[2] + out = np.full((H, W), np.nan, dtype=np.float32) + inv_2r = 1.0 / (2.0 * 6370994.884953014) # WGS84_R_MEAN + + neighbor_lats = np.empty(9, dtype=np.float64) + neighbor_lons = np.empty(9, dtype=np.float64) + neighbor_elevs = np.empty(9, dtype=np.float64) + + for y in range(1, H - 1): + for x in range(1, W - 1): + idx = 0 + for dy in range(-1, 2): + for dx_ in range(-1, 2): + neighbor_elevs[idx] = stacked[0, y + dy, x + dx_] + neighbor_lats[idx] = stacked[1, y + dy, x + dx_] + neighbor_lons[idx] = stacked[2, y + dy, x + dx_] + idx += 1 + + out[y, x] = _geodesic_slope_at_point( + stacked[1, y, x], stacked[2, y, x], stacked[0, y, x], + neighbor_lats, neighbor_lons, neighbor_elevs, + a2, b2, z_factor, inv_2r + ) + return out + + +@ngjit +def _cpu_geodesic_aspect(stacked, a2, b2, z_factor): + H = stacked.shape[1] + W = stacked.shape[2] + out = np.full((H, W), np.nan, dtype=np.float32) + inv_2r = 1.0 / (2.0 * 6370994.884953014) + + neighbor_lats = np.empty(9, dtype=np.float64) + neighbor_lons = np.empty(9, dtype=np.float64) + neighbor_elevs = np.empty(9, dtype=np.float64) + + for y in range(1, H - 1): + for x in range(1, W - 1): + idx = 0 + for dy in range(-1, 2): + for dx_ in range(-1, 2): + neighbor_elevs[idx] = stacked[0, y + dy, x + dx_] + neighbor_lats[idx] = stacked[1, y + dy, x + dx_] + neighbor_lons[idx] = stacked[2, y + dy, x + dx_] + idx += 1 + + out[y, x] = _geodesic_aspect_at_point( + stacked[1, y, x], stacked[2, y, x], stacked[0, y, x], + neighbor_lats, neighbor_lons, neighbor_elevs, + a2, b2, z_factor, inv_2r + ) + return out + + +# ===================================================================== +# GPU (CUDA) device functions +# ===================================================================== + +@cuda.jit(device=True) +def _gpu_geodetic_to_ecef(lat_rad, lon_rad, h, a2, b2): + cos_lat = cos(lat_rad) + sin_lat = sin(lat_rad) + cos_lon = cos(lon_rad) + sin_lon = sin(lon_rad) + N = a2 / sqrt(a2 * cos_lat * cos_lat + b2 * sin_lat * sin_lat) + X = (N + h) * cos_lat * cos_lon + Y = (N + h) * cos_lat * sin_lon + Z = (b2 / a2 * N + h) * sin_lat + return X, Y, Z + + +@cuda.jit(device=True) +def _gpu_local_frame_fit(elev_arr, lat_arr, lon_arr, a2, b2, z_factor, inv_2r): + """ + Project 3x3 neighbourhood into local frame, apply curvature correction, + fit u = A*e + B*n. Returns (A, B, valid_flag). + """ + deg2rad = 3.141592653589793 / 180.0 + + # NaN check + for dy in range(3): + for dx in range(3): + v = elev_arr[dy, dx] + if v != v: + return 0.0, 0.0, 0.0 # invalid + + lat_c = lat_arr[1, 1] * deg2rad + lon_c = lon_arr[1, 1] * deg2rad + Xc, Yc, Zc = _gpu_geodetic_to_ecef(lat_c, lon_c, elev_arr[1, 1] * z_factor, a2, b2) + + cos_lat = cos(lat_c) + sin_lat = sin(lat_c) + cos_lon = cos(lon_c) + sin_lon = sin(lon_c) + + ex = -sin_lon; ey = cos_lon; ez = 0.0 + nxv = -sin_lat * cos_lon; nyv = -sin_lat * sin_lon; nzv = cos_lat + ux = cos_lat * cos_lon; uy = cos_lat * sin_lon; uz = sin_lat + + # Accumulate means and sums for LSQ + me = 0.0; mn_sum = 0.0; mu = 0.0 + See = 0.0; Snn = 0.0; Sen = 0.0; Seu = 0.0; Snu = 0.0 + + # First pass: accumulate means (store projected coords in local vars) + # We need all 9 values for the centered fit, so do two passes + e_vals_0 = 0.0; e_vals_1 = 0.0; e_vals_2 = 0.0 + e_vals_3 = 0.0; e_vals_4 = 0.0; e_vals_5 = 0.0 + e_vals_6 = 0.0; e_vals_7 = 0.0; e_vals_8 = 0.0 + n_vals_0 = 0.0; n_vals_1 = 0.0; n_vals_2 = 0.0 + n_vals_3 = 0.0; n_vals_4 = 0.0; n_vals_5 = 0.0 + n_vals_6 = 0.0; n_vals_7 = 0.0; n_vals_8 = 0.0 + u_vals_0 = 0.0; u_vals_1 = 0.0; u_vals_2 = 0.0 + u_vals_3 = 0.0; u_vals_4 = 0.0; u_vals_5 = 0.0 + u_vals_6 = 0.0; u_vals_7 = 0.0; u_vals_8 = 0.0 + + for idx in range(9): + dy = idx // 3 + dx = idx % 3 + lat_r = lat_arr[dy, dx] * deg2rad + lon_r = lon_arr[dy, dx] * deg2rad + Xk, Yk, Zk = _gpu_geodetic_to_ecef(lat_r, lon_r, elev_arr[dy, dx] * z_factor, a2, b2) + ddx = Xk - Xc; ddy = Yk - Yc; ddz = Zk - Zc + ek = ddx * ex + ddy * ey + ddz * ez + nk = ddx * nxv + ddy * nyv + ddz * nzv + uk = ddx * ux + ddy * uy + ddz * uz + uk += (ek * ek + nk * nk) * inv_2r # curvature correction + + if idx == 0: e_vals_0 = ek; n_vals_0 = nk; u_vals_0 = uk + elif idx == 1: e_vals_1 = ek; n_vals_1 = nk; u_vals_1 = uk + elif idx == 2: e_vals_2 = ek; n_vals_2 = nk; u_vals_2 = uk + elif idx == 3: e_vals_3 = ek; n_vals_3 = nk; u_vals_3 = uk + elif idx == 4: e_vals_4 = ek; n_vals_4 = nk; u_vals_4 = uk + elif idx == 5: e_vals_5 = ek; n_vals_5 = nk; u_vals_5 = uk + elif idx == 6: e_vals_6 = ek; n_vals_6 = nk; u_vals_6 = uk + elif idx == 7: e_vals_7 = ek; n_vals_7 = nk; u_vals_7 = uk + elif idx == 8: e_vals_8 = ek; n_vals_8 = nk; u_vals_8 = uk + + me += ek; mn_sum += nk; mu += uk + + inv9 = 1.0 / 9.0 + me *= inv9; mn_sum *= inv9; mu *= inv9 + + es = (e_vals_0, e_vals_1, e_vals_2, e_vals_3, e_vals_4, + e_vals_5, e_vals_6, e_vals_7, e_vals_8) + ns = (n_vals_0, n_vals_1, n_vals_2, n_vals_3, n_vals_4, + n_vals_5, n_vals_6, n_vals_7, n_vals_8) + us = (u_vals_0, u_vals_1, u_vals_2, u_vals_3, u_vals_4, + u_vals_5, u_vals_6, u_vals_7, u_vals_8) + + for k in range(9): + de = es[k] - me + dn = ns[k] - mn_sum + du = us[k] - mu + See += de * de + Snn += dn * dn + Sen += de * dn + Seu += de * du + Snu += dn * du + + det = See * Snn - Sen * Sen + if abs(det) < 1e-30: + return 0.0, 0.0, 1.0 # degenerate → flat, but valid + + A = (Seu * Snn - Snu * Sen) / det + B = (Snu * See - Seu * Sen) / det + return A, B, 1.0 + + +@cuda.jit(device=True) +def _gpu_geodesic_slope_cell(elev_arr, lat_arr, lon_arr, a2, b2, z_factor, inv_2r): + A, B, flag = _gpu_local_frame_fit(elev_arr, lat_arr, lon_arr, a2, b2, z_factor, inv_2r) + if flag == 0.0: + return np.nan + slope_rad = atan(sqrt(A * A + B * B)) + return slope_rad * (180.0 / 3.141592653589793) + + +@cuda.jit(device=True) +def _gpu_geodesic_aspect_cell(elev_arr, lat_arr, lon_arr, a2, b2, z_factor, inv_2r): + A, B, flag = _gpu_local_frame_fit(elev_arr, lat_arr, lon_arr, a2, b2, z_factor, inv_2r) + if flag == 0.0: + return np.nan + slope_mag = sqrt(A * A + B * B) + if slope_mag < 1e-7: + return -1.0 + aspect_rad = atan2(-A, -B) + aspect_deg = aspect_rad * (180.0 / 3.141592653589793) + if aspect_deg < 0: + aspect_deg += 360.0 + if aspect_deg >= 360.0: + aspect_deg -= 360.0 + return aspect_deg + + +# ===================================================================== +# GPU global kernels — operate on (3, H, W) stacked arrays +# ===================================================================== + +@cuda.jit +def _run_gpu_geodesic_slope(stacked, a2_arr, b2_arr, zf_arr, inv_2r_arr, out): + i, j = cuda.grid(2) + H = out.shape[0] + W = out.shape[1] + if i >= 1 and i < H - 1 and j >= 1 and j < W - 1: + out[i, j] = _gpu_geodesic_slope_cell( + stacked[0, i - 1:i + 2, j - 1:j + 2], + stacked[1, i - 1:i + 2, j - 1:j + 2], + stacked[2, i - 1:i + 2, j - 1:j + 2], + a2_arr[0], b2_arr[0], zf_arr[0], inv_2r_arr[0], + ) + + +@cuda.jit +def _run_gpu_geodesic_aspect(stacked, a2_arr, b2_arr, zf_arr, inv_2r_arr, out): + i, j = cuda.grid(2) + H = out.shape[0] + W = out.shape[1] + if i >= 1 and i < H - 1 and j >= 1 and j < W - 1: + out[i, j] = _gpu_geodesic_aspect_cell( + stacked[0, i - 1:i + 2, j - 1:j + 2], + stacked[1, i - 1:i + 2, j - 1:j + 2], + stacked[2, i - 1:i + 2, j - 1:j + 2], + a2_arr[0], b2_arr[0], zf_arr[0], inv_2r_arr[0], + ) diff --git a/xrspatial/tests/test_geodesic_aspect.py b/xrspatial/tests/test_geodesic_aspect.py new file mode 100644 index 00000000..16305ecd --- /dev/null +++ b/xrspatial/tests/test_geodesic_aspect.py @@ -0,0 +1,261 @@ +"""Tests for geodesic aspect computation.""" +import numpy as np +import pytest +import xarray as xr + +from xrspatial import aspect +from xrspatial.tests.general_checks import ( + cuda_and_cupy_available, + dask_array_available, +) + +try: + import dask.array as da +except ImportError: + da = None + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +def _make_geo_raster(elev, lat_start, lat_end, lon_start, lon_end, + backend='numpy', chunks=(3, 3)): + H, W = elev.shape + lat = np.linspace(lat_start, lat_end, H) + lon = np.linspace(lon_start, lon_end, W) + raster = xr.DataArray( + elev.astype(np.float64), + dims=['lat', 'lon'], + coords={'lat': lat, 'lon': lon}, + ) + + if 'cupy' in backend: + import cupy + raster.data = cupy.asarray(raster.data) + + if 'dask' in backend and da is not None: + raster.data = da.from_array(raster.data, chunks=chunks) + + return raster + + +def _flat_surface(H=6, W=8, elev=500.0): + return np.full((H, W), elev, dtype=np.float64) + + +def _east_tilted_surface(H=6, W=8, base_elev=500.0, grade=100.0, + lon_start=10.0, lon_end=11.0): + lon = np.linspace(lon_start, lon_end, W) + elev = base_elev + grade * (lon - lon_start) + return np.broadcast_to(elev[np.newaxis, :], (H, W)).copy() + + +def _north_tilted_surface(H=6, W=8, base_elev=500.0, grade=100.0, + lat_start=40.0, lat_end=41.0): + lat = np.linspace(lat_start, lat_end, H) + elev = base_elev + grade * (lat - lat_start) + return np.broadcast_to(elev[:, np.newaxis], (H, W)).copy() + + +def _south_tilted_surface(H=6, W=8, base_elev=500.0, grade=100.0, + lat_start=40.0, lat_end=41.0): + lat = np.linspace(lat_start, lat_end, H) + elev = base_elev - grade * (lat - lat_start) + return np.broadcast_to(elev[:, np.newaxis], (H, W)).copy() + + +def _west_tilted_surface(H=6, W=8, base_elev=500.0, grade=100.0, + lon_start=10.0, lon_end=11.0): + lon = np.linspace(lon_start, lon_end, W) + elev = base_elev - grade * (lon - lon_start) + return np.broadcast_to(elev[np.newaxis, :], (H, W)).copy() + + +# --------------------------------------------------------------------------- +# Tests — compass direction +# --------------------------------------------------------------------------- + +class TestGeodesicAspectDirection: + + def test_flat_is_minus_one(self): + """Flat surface → aspect = -1.""" + elev = _flat_surface() + raster = _make_geo_raster(elev, 40.0, 41.0, 10.0, 11.0) + result = aspect(raster, method='geodesic') + interior = result.values[1:-1, 1:-1] + np.testing.assert_allclose(interior, -1.0, atol=1e-4) + + def test_east_facing(self): + """Surface rising to the east → downslope faces east → aspect ≈ 90.""" + elev = _east_tilted_surface() + raster = _make_geo_raster(elev, 40.0, 41.0, 10.0, 11.0) + result = aspect(raster, method='geodesic') + interior = result.values[2, 4] + # Downslope is west (270) — wait, rising to east means steepest + # descent is to the west. Let me reconsider. + # East-tilted: elevation increases eastward → steepest descent is + # westward → aspect ≈ 270. + assert np.isfinite(interior) + assert abs(interior - 270.0) < 5.0 + + def test_north_facing(self): + """Surface rising to the north → steepest descent is south → aspect ≈ 180.""" + elev = _north_tilted_surface() + raster = _make_geo_raster(elev, 40.0, 41.0, 10.0, 11.0) + result = aspect(raster, method='geodesic') + interior = result.values[2, 4] + assert np.isfinite(interior) + assert abs(interior - 180.0) < 5.0 + + def test_south_facing(self): + """Surface rising to the south → steepest descent is north → aspect ≈ 0 or 360.""" + elev = _south_tilted_surface() + raster = _make_geo_raster(elev, 40.0, 41.0, 10.0, 11.0) + result = aspect(raster, method='geodesic') + interior = result.values[2, 4] + assert np.isfinite(interior) + # Aspect near 0 or 360 + assert interior < 5.0 or interior > 355.0 + + def test_west_facing(self): + """Surface rising to the west → steepest descent is east → aspect ≈ 90.""" + elev = _west_tilted_surface() + raster = _make_geo_raster(elev, 40.0, 41.0, 10.0, 11.0) + result = aspect(raster, method='geodesic') + interior = result.values[2, 4] + assert np.isfinite(interior) + assert abs(interior - 90.0) < 5.0 + + +# --------------------------------------------------------------------------- +# Tests — edge cases +# --------------------------------------------------------------------------- + +class TestGeodesicAspectEdgeCases: + + def test_nan_handling(self): + elev = _east_tilted_surface(H=5, W=5) + elev[2, 2] = np.nan + raster = _make_geo_raster(elev, 40.0, 41.0, 10.0, 11.0) + result = aspect(raster, method='geodesic') + assert np.isnan(result.values[2, 2]) + + def test_edges_are_nan(self): + elev = _east_tilted_surface() + raster = _make_geo_raster(elev, 40.0, 41.0, 10.0, 11.0) + result = aspect(raster, method='geodesic') + assert np.all(np.isnan(result.values[0, :])) + assert np.all(np.isnan(result.values[-1, :])) + assert np.all(np.isnan(result.values[:, 0])) + assert np.all(np.isnan(result.values[:, -1])) + + def test_aspect_range(self): + """Non-flat interior cells should have aspect in [0, 360) or == -1.""" + rng = np.random.default_rng(42) + elev = rng.uniform(100, 1000, size=(10, 10)) + raster = _make_geo_raster(elev, 40.0, 41.0, 10.0, 11.0) + result = aspect(raster, method='geodesic') + interior = result.values[1:-1, 1:-1] + valid = interior[np.isfinite(interior)] + flat = valid[valid == -1.0] + directional = valid[valid != -1.0] + assert np.all(directional >= 0.0) + assert np.all(directional < 360.0) + + +# --------------------------------------------------------------------------- +# Tests — z_unit +# --------------------------------------------------------------------------- + +class TestGeodesicAspectZUnit: + + def test_foot_vs_meter(self): + elev_m = _east_tilted_surface(grade=100.0) + elev_ft = elev_m / 0.3048 + + r_m = _make_geo_raster(elev_m, 40.0, 41.0, 10.0, 11.0) + r_ft = _make_geo_raster(elev_ft, 40.0, 41.0, 10.0, 11.0) + + a_m = aspect(r_m, method='geodesic', z_unit='meter') + a_ft = aspect(r_ft, method='geodesic', z_unit='foot') + + np.testing.assert_allclose( + a_m.values[1:-1, 1:-1], + a_ft.values[1:-1, 1:-1], + rtol=1e-4, + ) + + +# --------------------------------------------------------------------------- +# Tests — validation +# --------------------------------------------------------------------------- + +class TestGeodesicAspectValidation: + + def test_invalid_method_raises(self): + elev = _flat_surface() + raster = _make_geo_raster(elev, 40.0, 41.0, 10.0, 11.0) + with pytest.raises(ValueError, match="method"): + aspect(raster, method='invalid') + + def test_invalid_z_unit_raises(self): + elev = _flat_surface() + raster = _make_geo_raster(elev, 40.0, 41.0, 10.0, 11.0) + with pytest.raises(ValueError, match="z_unit"): + aspect(raster, method='geodesic', z_unit='cubit') + + def test_missing_coords_raises(self): + data = np.ones((5, 5)) + raster = xr.DataArray(data, dims=['dim_0', 'dim_1']) + with pytest.raises(ValueError, match="coordinates"): + aspect(raster, method='geodesic') + + +# --------------------------------------------------------------------------- +# Tests — cross-backend consistency +# --------------------------------------------------------------------------- + +@dask_array_available +class TestGeodesicAspectDask: + + def test_numpy_equals_dask(self): + elev = _east_tilted_surface(H=8, W=10, grade=100.0) + r_np = _make_geo_raster(elev, 40.0, 41.0, 10.0, 11.0, backend='numpy') + r_da = _make_geo_raster(elev, 40.0, 41.0, 10.0, 11.0, + backend='dask+numpy', chunks=(4, 5)) + a_np = aspect(r_np, method='geodesic') + a_da = aspect(r_da, method='geodesic') + np.testing.assert_allclose( + a_np.values, a_da.values, rtol=1e-5, equal_nan=True + ) + + +@cuda_and_cupy_available +class TestGeodesicAspectCupy: + + def test_numpy_equals_cupy(self): + elev = _east_tilted_surface(H=8, W=10, grade=100.0) + r_np = _make_geo_raster(elev, 40.0, 41.0, 10.0, 11.0, backend='numpy') + r_cu = _make_geo_raster(elev, 40.0, 41.0, 10.0, 11.0, backend='cupy') + a_np = aspect(r_np, method='geodesic') + a_cu = aspect(r_cu, method='geodesic') + np.testing.assert_allclose( + a_np.values, a_cu.data.get(), rtol=1e-5, equal_nan=True + ) + + +@dask_array_available +@cuda_and_cupy_available +class TestGeodesicAspectDaskCupy: + + def test_numpy_equals_dask_cupy(self): + elev = _east_tilted_surface(H=8, W=10, grade=100.0) + r_np = _make_geo_raster(elev, 40.0, 41.0, 10.0, 11.0, backend='numpy') + r_dc = _make_geo_raster(elev, 40.0, 41.0, 10.0, 11.0, + backend='dask+cupy', chunks=(4, 5)) + a_np = aspect(r_np, method='geodesic') + a_dc = aspect(r_dc, method='geodesic') + np.testing.assert_allclose( + a_np.values, a_dc.data.compute().get(), rtol=1e-5, equal_nan=True + ) diff --git a/xrspatial/tests/test_geodesic_slope.py b/xrspatial/tests/test_geodesic_slope.py new file mode 100644 index 00000000..d71c66ba --- /dev/null +++ b/xrspatial/tests/test_geodesic_slope.py @@ -0,0 +1,282 @@ +"""Tests for geodesic slope computation.""" +import numpy as np +import pytest +import xarray as xr + +from xrspatial import slope +from xrspatial.tests.general_checks import ( + cuda_and_cupy_available, + dask_array_available, +) + +try: + import dask.array as da +except ImportError: + da = None + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +def _make_geo_raster(elev, lat_start, lat_end, lon_start, lon_end, + backend='numpy', chunks=(3, 3)): + """Build a DataArray with lat/lon 1-D coords in geographic (degree) space.""" + H, W = elev.shape + lat = np.linspace(lat_start, lat_end, H) + lon = np.linspace(lon_start, lon_end, W) + raster = xr.DataArray( + elev.astype(np.float64), + dims=['lat', 'lon'], + coords={'lat': lat, 'lon': lon}, + ) + + if 'cupy' in backend: + import cupy + raster.data = cupy.asarray(raster.data) + + if 'dask' in backend and da is not None: + raster.data = da.from_array(raster.data, chunks=chunks) + + return raster + + +def _flat_surface(H=6, W=8, elev=500.0): + """Constant-elevation surface — slope should be 0 everywhere interior.""" + return np.full((H, W), elev, dtype=np.float64) + + +def _east_tilted_surface(H=6, W=8, base_elev=500.0, grade=100.0, + lon_start=10.0, lon_end=11.0): + """Surface that rises linearly to the east. + + grade is elevation increase per degree of longitude. + """ + lon = np.linspace(lon_start, lon_end, W) + elev = base_elev + grade * (lon - lon_start) + return np.broadcast_to(elev[np.newaxis, :], (H, W)).copy() + + +def _north_tilted_surface(H=6, W=8, base_elev=500.0, grade=100.0, + lat_start=40.0, lat_end=41.0): + """Surface that rises linearly to the north.""" + lat = np.linspace(lat_start, lat_end, H) + elev = base_elev + grade * (lat - lat_start) + return np.broadcast_to(elev[:, np.newaxis], (H, W)).copy() + + +# --------------------------------------------------------------------------- +# Tests — analytical cases +# --------------------------------------------------------------------------- + +class TestGeodesicSlopeFlat: + """Flat surface at various latitudes → slope ≈ 0.""" + + @pytest.mark.parametrize("lat_center", [0.0, 30.0, 60.0, -45.0]) + def test_flat_slope_is_zero(self, lat_center): + elev = _flat_surface() + raster = _make_geo_raster( + elev, lat_center - 0.5, lat_center + 0.5, 10.0, 11.0 + ) + result = slope(raster, method='geodesic') + interior = result.values[1:-1, 1:-1] + assert np.all(np.isfinite(interior)) + # Small residual (~0.04°) is expected from Earth's curvature + # over the grid cell spacing; this is negligible for real-world use. + np.testing.assert_allclose(interior, 0.0, atol=0.1) + + +class TestGeodesicSlopeTilted: + """Known tilted surfaces → non-zero slope.""" + + def test_east_tilted_has_positive_slope(self): + elev = _east_tilted_surface() + raster = _make_geo_raster(elev, 45.0, 46.0, 10.0, 11.0) + result = slope(raster, method='geodesic') + interior = result.values[1:-1, 1:-1] + assert np.all(np.isfinite(interior)) + assert np.all(interior > 0) + + def test_north_tilted_has_positive_slope(self): + elev = _north_tilted_surface() + raster = _make_geo_raster(elev, 40.0, 41.0, 10.0, 11.0) + result = slope(raster, method='geodesic') + interior = result.values[1:-1, 1:-1] + assert np.all(np.isfinite(interior)) + assert np.all(interior > 0) + + +class TestGeodesicSlopeLatitudeInvariance: + """Same physical slope at equator vs 60N should give similar geodesic slope.""" + + def test_latitude_invariance(self): + grade = 50.0 # m per degree + elev_eq = _east_tilted_surface(grade=grade, lon_start=10.0, lon_end=11.0) + elev_60 = _east_tilted_surface(grade=grade, lon_start=10.0, lon_end=11.0) + + r_eq = _make_geo_raster(elev_eq, -0.5, 0.5, 10.0, 11.0) + r_60 = _make_geo_raster(elev_60, 59.5, 60.5, 10.0, 11.0) + + s_eq = slope(r_eq, method='geodesic').values[2, 4] + s_60 = slope(r_60, method='geodesic').values[2, 4] + + # The geodesic slope at 60N should be steeper because 1 degree of + # longitude is shorter at high latitude. The key point is both are + # finite and positive — the exact ratio depends on cos(lat). + assert np.isfinite(s_eq) and s_eq > 0 + assert np.isfinite(s_60) and s_60 > 0 + # At 60N, 1 deg lon ≈ half the distance → slope should be roughly + # double. Allow wide tolerance. + ratio = s_60 / s_eq + assert 1.5 < ratio < 2.5 + + +# --------------------------------------------------------------------------- +# Tests — edge cases +# --------------------------------------------------------------------------- + +class TestGeodesicSlopeEdgeCases: + + def test_nan_handling(self): + """NaN in neighbourhood → NaN output.""" + elev = _flat_surface(H=5, W=5) + elev[2, 2] = np.nan + raster = _make_geo_raster(elev, 40.0, 41.0, 10.0, 11.0) + result = slope(raster, method='geodesic') + # The cells adjacent to the NaN should also be NaN + assert np.isnan(result.values[2, 2]) + # At least the NaN's immediate neighbours should be NaN + assert np.isnan(result.values[1, 1]) + assert np.isnan(result.values[1, 2]) + + def test_edges_are_nan(self): + """Boundary cells should be NaN.""" + elev = _flat_surface() + raster = _make_geo_raster(elev, 40.0, 41.0, 10.0, 11.0) + result = slope(raster, method='geodesic') + assert np.all(np.isnan(result.values[0, :])) + assert np.all(np.isnan(result.values[-1, :])) + assert np.all(np.isnan(result.values[:, 0])) + assert np.all(np.isnan(result.values[:, -1])) + + def test_near_pole(self): + """Near-polar latitude should still produce finite results.""" + elev = _north_tilted_surface(H=6, W=6, grade=50.0, + lat_start=88.0, lat_end=89.0) + raster = _make_geo_raster(elev, 88.0, 89.0, 10.0, 11.0) + result = slope(raster, method='geodesic') + interior = result.values[1:-1, 1:-1] + assert np.all(np.isfinite(interior)) + assert np.all(interior > 0) + + +# --------------------------------------------------------------------------- +# Tests — z_unit +# --------------------------------------------------------------------------- + +class TestGeodesicSlopeZUnit: + + def test_foot_vs_meter(self): + """Elevation in feet should give consistent slope with proper z_unit.""" + elev_m = _east_tilted_surface(grade=100.0) + elev_ft = elev_m / 0.3048 # convert to feet + + r_m = _make_geo_raster(elev_m, 40.0, 41.0, 10.0, 11.0) + r_ft = _make_geo_raster(elev_ft, 40.0, 41.0, 10.0, 11.0) + + s_m = slope(r_m, method='geodesic', z_unit='meter') + s_ft = slope(r_ft, method='geodesic', z_unit='foot') + + np.testing.assert_allclose( + s_m.values[1:-1, 1:-1], + s_ft.values[1:-1, 1:-1], + rtol=1e-4, + ) + + +# --------------------------------------------------------------------------- +# Tests — validation +# --------------------------------------------------------------------------- + +class TestGeodesicSlopeValidation: + + def test_invalid_method_raises(self): + elev = _flat_surface() + raster = _make_geo_raster(elev, 40.0, 41.0, 10.0, 11.0) + with pytest.raises(ValueError, match="method"): + slope(raster, method='invalid') + + def test_invalid_z_unit_raises(self): + elev = _flat_surface() + raster = _make_geo_raster(elev, 40.0, 41.0, 10.0, 11.0) + with pytest.raises(ValueError, match="z_unit"): + slope(raster, method='geodesic', z_unit='cubit') + + def test_missing_coords_raises(self): + data = np.ones((5, 5)) + raster = xr.DataArray(data, dims=['dim_0', 'dim_1']) + with pytest.raises(ValueError, match="coordinates"): + slope(raster, method='geodesic') + + def test_projected_coords_raises(self): + """Coords outside geographic range should raise.""" + data = np.ones((5, 5)) + raster = xr.DataArray( + data, dims=['y', 'x'], + coords={ + 'y': np.linspace(4000000, 4100000, 5), + 'x': np.linspace(500000, 600000, 5), + } + ) + with pytest.raises(ValueError): + slope(raster, method='geodesic') + + +# --------------------------------------------------------------------------- +# Tests — cross-backend consistency +# --------------------------------------------------------------------------- + +@dask_array_available +class TestGeodesicSlopeDask: + + def test_numpy_equals_dask(self): + elev = _east_tilted_surface(H=8, W=10, grade=100.0) + r_np = _make_geo_raster(elev, 40.0, 41.0, 10.0, 11.0, backend='numpy') + r_da = _make_geo_raster(elev, 40.0, 41.0, 10.0, 11.0, + backend='dask+numpy', chunks=(4, 5)) + s_np = slope(r_np, method='geodesic') + s_da = slope(r_da, method='geodesic') + np.testing.assert_allclose( + s_np.values, s_da.values, rtol=1e-5, equal_nan=True + ) + + +@cuda_and_cupy_available +class TestGeodesicSlopeCupy: + + def test_numpy_equals_cupy(self): + import cupy + elev = _east_tilted_surface(H=8, W=10, grade=100.0) + r_np = _make_geo_raster(elev, 40.0, 41.0, 10.0, 11.0, backend='numpy') + r_cu = _make_geo_raster(elev, 40.0, 41.0, 10.0, 11.0, backend='cupy') + s_np = slope(r_np, method='geodesic') + s_cu = slope(r_cu, method='geodesic') + np.testing.assert_allclose( + s_np.values, s_cu.data.get(), rtol=1e-5, equal_nan=True + ) + + +@dask_array_available +@cuda_and_cupy_available +class TestGeodesicSlopeDaskCupy: + + def test_numpy_equals_dask_cupy(self): + elev = _east_tilted_surface(H=8, W=10, grade=100.0) + r_np = _make_geo_raster(elev, 40.0, 41.0, 10.0, 11.0, backend='numpy') + r_dc = _make_geo_raster(elev, 40.0, 41.0, 10.0, 11.0, + backend='dask+cupy', chunks=(4, 5)) + s_np = slope(r_np, method='geodesic') + s_dc = slope(r_dc, method='geodesic') + np.testing.assert_allclose( + s_np.values, s_dc.data.compute().get(), rtol=1e-5, equal_nan=True + ) From dc1d4b6a041fa990e8ec3b98dda0709d85c55ded Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Fri, 20 Feb 2026 06:56:02 -0800 Subject: [PATCH 7/8] add description column to README feature matrix tables --- README.md | 140 +++++++++++++++++++++++++++--------------------------- 1 file changed, 70 insertions(+), 70 deletions(-) diff --git a/README.md b/README.md index 487b61b3..5170e392 100644 --- a/README.md +++ b/README.md @@ -135,117 +135,117 @@ In the GIS world, rasters are used for representing continuous phenomena (e.g. e ### **Classification** -| Name | NumPy xr.DataArray | Dask xr.DataArray | CuPy GPU xr.DataArray | Dask GPU xr.DataArray | -|:----------:|:----------------------:|:--------------------:|:-------------------:|:------:| -| [Box Plot](xrspatial/classify.py) |✅️ |✅ | ✅ |✅ | -| [Equal Interval](xrspatial/classify.py) |✅️ |✅ | ✅ |✅ | -| [Head/Tail Breaks](xrspatial/classify.py) |✅️ |✅ | ✅ |✅ | -| [Maximum Breaks](xrspatial/classify.py) |✅️ |✅ | ✅ |✅ | -| [Natural Breaks](xrspatial/classify.py) |✅️ |✅ | ✅ |✅ | -| [Percentiles](xrspatial/classify.py) |✅️ |✅ | ✅ |✅ | -| [Quantile](xrspatial/classify.py) |✅️ |✅ | ✅ |✅ | -| [Reclassify](xrspatial/classify.py) |✅️ |✅ | ✅ |✅ | -| [Std Mean](xrspatial/classify.py) |✅️ |✅ | ✅ |✅ | +| Name | Description | NumPy xr.DataArray | Dask xr.DataArray | CuPy GPU xr.DataArray | Dask GPU xr.DataArray | +|:----------:|:------------|:----------------------:|:--------------------:|:-------------------:|:------:| +| [Box Plot](xrspatial/classify.py) | Classifies values into bins based on box plot quartile boundaries | ✅️ |✅ | ✅ |✅ | +| [Equal Interval](xrspatial/classify.py) | Divides the value range into equal-width bins | ✅️ |✅ | ✅ |✅ | +| [Head/Tail Breaks](xrspatial/classify.py) | Classifies heavy-tailed distributions using recursive mean splitting | ✅️ |✅ | ✅ |✅ | +| [Maximum Breaks](xrspatial/classify.py) | Finds natural groupings by maximizing differences between sorted values | ✅️ |✅ | ✅ |✅ | +| [Natural Breaks](xrspatial/classify.py) | Optimizes class boundaries to minimize within-class variance (Jenks) | ✅️ |✅ | ✅ |✅ | +| [Percentiles](xrspatial/classify.py) | Assigns classes based on user-defined percentile breakpoints | ✅️ |✅ | ✅ |✅ | +| [Quantile](xrspatial/classify.py) | Distributes values into classes with equal observation counts | ✅️ |✅ | ✅ |✅ | +| [Reclassify](xrspatial/classify.py) | Remaps pixel values to new classes using a user-defined lookup | ✅️ |✅ | ✅ |✅ | +| [Std Mean](xrspatial/classify.py) | Classifies values by standard deviation intervals from the mean | ✅️ |✅ | ✅ |✅ | ------- ### **Focal** -| Name | NumPy xr.DataArray | Dask xr.DataArray | CuPy GPU xr.DataArray | Dask GPU xr.DataArray | -|:----------:|:----------------------:|:--------------------:|:-------------------:|:------:| -| [Apply](xrspatial/focal.py) | ✅️ | ✅️ | | | -| [Hotspots](xrspatial/focal.py) | ✅️ | ✅️ | ✅️ | | -| [Mean](xrspatial/focal.py) | ✅️ | ✅️ | ✅️ | | -| [Focal Statistics](xrspatial/focal.py) | ✅️ | ✅️ | ✅️ | | +| Name | Description | NumPy xr.DataArray | Dask xr.DataArray | CuPy GPU xr.DataArray | Dask GPU xr.DataArray | +|:----------:|:------------|:----------------------:|:--------------------:|:-------------------:|:------:| +| [Apply](xrspatial/focal.py) | Applies a custom function over a sliding neighborhood window | ✅️ | ✅️ | | | +| [Hotspots](xrspatial/focal.py) | Identifies statistically significant spatial clusters using Getis-Ord Gi* | ✅️ | ✅️ | ✅️ | | +| [Mean](xrspatial/focal.py) | Computes the mean value within a sliding neighborhood window | ✅️ | ✅️ | ✅️ | | +| [Focal Statistics](xrspatial/focal.py) | Computes summary statistics over a sliding neighborhood window | ✅️ | ✅️ | ✅️ | | ------- ### **Multispectral** -| Name | NumPy xr.DataArray | Dask xr.DataArray | CuPy GPU xr.DataArray | Dask GPU xr.DataArray | -|:----------:|:----------------------:|:--------------------:|:-------------------:|:------:| -| [Atmospherically Resistant Vegetation Index (ARVI)](xrspatial/multispectral.py) | ✅️ |✅️ | ✅️ |✅️ | -| [Enhanced Built-Up and Bareness Index (EBBI)](xrspatial/multispectral.py) | ✅️ |✅️ | ✅️ |✅️ | -| [Enhanced Vegetation Index (EVI)](xrspatial/multispectral.py) | ✅️ |✅️ | ✅️ |✅️ | -| [Green Chlorophyll Index (GCI)](xrspatial/multispectral.py) | ✅️ |✅️ | ✅️ |✅️ | -| [Normalized Burn Ratio (NBR)](xrspatial/multispectral.py) | ✅️ |✅️ | ✅️ |✅️ | -| [Normalized Burn Ratio 2 (NBR2)](xrspatial/multispectral.py) | ✅️ |✅️ | ✅️ |✅️ | -| [Normalized Difference Moisture Index (NDMI)](xrspatial/multispectral.py) | ✅️ |✅️ | ✅️ |✅️ | -| [Normalized Difference Vegetation Index (NDVI)](xrspatial/multispectral.py) | ✅️ |✅️ | ✅️ |✅️ | -| [Soil Adjusted Vegetation Index (SAVI)](xrspatial/multispectral.py) | ✅️ |✅️ | ✅️ |✅️ | -| [Structure Insensitive Pigment Index (SIPI)](xrspatial/multispectral.py) | ✅️ |✅️ | ✅️ |✅️ | -| [True Color](xrspatial/multispectral.py) | ✅️ | ️ | ✅️ | ️ | +| Name | Description | NumPy xr.DataArray | Dask xr.DataArray | CuPy GPU xr.DataArray | Dask GPU xr.DataArray | +|:----------:|:------------|:----------------------:|:--------------------:|:-------------------:|:------:| +| [Atmospherically Resistant Vegetation Index (ARVI)](xrspatial/multispectral.py) | Vegetation index resistant to atmospheric effects using blue band correction | ✅️ |✅️ | ✅️ |✅️ | +| [Enhanced Built-Up and Bareness Index (EBBI)](xrspatial/multispectral.py) | Highlights built-up areas and barren land from thermal and SWIR bands | ✅️ |✅️ | ✅️ |✅️ | +| [Enhanced Vegetation Index (EVI)](xrspatial/multispectral.py) | Enhanced vegetation index reducing soil and atmospheric noise | ✅️ |✅️ | ✅️ |✅️ | +| [Green Chlorophyll Index (GCI)](xrspatial/multispectral.py) | Estimates leaf chlorophyll content from green and NIR reflectance | ✅️ |✅️ | ✅️ |✅️ | +| [Normalized Burn Ratio (NBR)](xrspatial/multispectral.py) | Measures burn severity using NIR and SWIR band difference | ✅️ |✅️ | ✅️ |✅️ | +| [Normalized Burn Ratio 2 (NBR2)](xrspatial/multispectral.py) | Refines burn severity mapping using two SWIR bands | ✅️ |✅️ | ✅️ |✅️ | +| [Normalized Difference Moisture Index (NDMI)](xrspatial/multispectral.py) | Detects vegetation moisture stress from NIR and SWIR reflectance | ✅️ |✅️ | ✅️ |✅️ | +| [Normalized Difference Vegetation Index (NDVI)](xrspatial/multispectral.py) | Quantifies vegetation density from red and NIR band difference | ✅️ |✅️ | ✅️ |✅️ | +| [Soil Adjusted Vegetation Index (SAVI)](xrspatial/multispectral.py) | Vegetation index with soil brightness correction factor | ✅️ |✅️ | ✅️ |✅️ | +| [Structure Insensitive Pigment Index (SIPI)](xrspatial/multispectral.py) | Estimates carotenoid-to-chlorophyll ratio for plant stress detection | ✅️ |✅️ | ✅️ |✅️ | +| [True Color](xrspatial/multispectral.py) | Composites red, green, and blue bands into a natural color image | ✅️ | ️ | ✅️ | ️ | ------- ### **Pathfinding** -| Name | NumPy xr.DataArray | Dask xr.DataArray | CuPy GPU xr.DataArray | Dask GPU xr.DataArray | -|:----------:|:----------------------:|:--------------------:|:-------------------:|:------:| -| [A* Pathfinding](xrspatial/pathfinding.py) | ✅️ | | | | +| Name | Description | NumPy xr.DataArray | Dask xr.DataArray | CuPy GPU xr.DataArray | Dask GPU xr.DataArray | +|:----------:|:------------|:----------------------:|:--------------------:|:-------------------:|:------:| +| [A* Pathfinding](xrspatial/pathfinding.py) | Finds the least-cost path between two cells on a cost surface | ✅️ | | | | ---------- ### **Proximity** -| Name | NumPy xr.DataArray | Dask xr.DataArray | CuPy GPU xr.DataArray | Dask GPU xr.DataArray | -|:----------:|:----------------------:|:--------------------:|:-------------------:|:------:| -| [Allocation](xrspatial/proximity.py) | ✅️ | ✅ | | | -| [Direction](xrspatial/proximity.py) | ✅️ | ✅ | | | -| [Proximity](xrspatial/proximity.py) | ✅️ | ✅ | | | +| Name | Description | NumPy xr.DataArray | Dask xr.DataArray | CuPy GPU xr.DataArray | Dask GPU xr.DataArray | +|:----------:|:------------|:----------------------:|:--------------------:|:-------------------:|:------:| +| [Allocation](xrspatial/proximity.py) | Assigns each cell to the identity of the nearest source feature | ✅️ | ✅ | | | +| [Direction](xrspatial/proximity.py) | Computes the direction from each cell to the nearest source feature | ✅️ | ✅ | | | +| [Proximity](xrspatial/proximity.py) | Computes the distance from each cell to the nearest source feature | ✅️ | ✅ | | | -------- ### **Raster to vector** -| Name | NumPy xr.DataArray | Dask xr.DataArray | CuPy GPU xr.DataArray | Dask GPU xr.DataArray | -|:-----|:------------------:|:-----------------:|:---------------------:|:---------------------:| -| [Polygonize](xrspatial/experimental/polygonize.py) | ✅️ | | | | +| Name | Description | NumPy xr.DataArray | Dask xr.DataArray | CuPy GPU xr.DataArray | Dask GPU xr.DataArray | +|:-----|:------------|:------------------:|:-----------------:|:---------------------:|:---------------------:| +| [Polygonize](xrspatial/experimental/polygonize.py) | Converts contiguous regions of equal value into vector polygons | ✅️ | | | | -------- ### **Surface** -| Name | NumPy xr.DataArray | Dask xr.DataArray | CuPy GPU xr.DataArray | Dask GPU xr.DataArray | -|:----------:|:----------------------:|:--------------------:|:-------------------:|:------:| -| [Aspect](xrspatial/aspect.py) | ✅️ | ✅️ | ✅️ | ✅️ | -| [Curvature](xrspatial/curvature.py) | ✅️ |✅️ |✅️ | ✅️ | -| [Hillshade](xrspatial/hillshade.py) | ✅️ | ✅️ | | | -| [Slope](xrspatial/slope.py) | ✅️ | ✅️ | ✅️ | ✅️ | -| [Terrain Generation](xrspatial/terrain.py) | ✅️ | ✅️ | ✅️ | | -| [Viewshed](xrspatial/viewshed.py) | ✅️ | | | | -| [Perlin Noise](xrspatial/perlin.py) | ✅️ | ✅️ | ✅️ | | -| [Bump Mapping](xrspatial/bump.py) | ✅️ | | | | +| Name | Description | NumPy xr.DataArray | Dask xr.DataArray | CuPy GPU xr.DataArray | Dask GPU xr.DataArray | +|:----------:|:------------|:----------------------:|:--------------------:|:-------------------:|:------:| +| [Aspect](xrspatial/aspect.py) | Computes downslope direction of each cell in degrees | ✅️ | ✅️ | ✅️ | ✅️ | +| [Curvature](xrspatial/curvature.py) | Measures rate of slope change (concavity/convexity) at each cell | ✅️ |✅️ |✅️ | ✅️ | +| [Hillshade](xrspatial/hillshade.py) | Simulates terrain illumination from a given sun angle and azimuth | ✅️ | ✅️ | | | +| [Slope](xrspatial/slope.py) | Computes terrain gradient steepness at each cell in degrees | ✅️ | ✅️ | ✅️ | ✅️ | +| [Terrain Generation](xrspatial/terrain.py) | Generates synthetic terrain elevation using fractal noise | ✅️ | ✅️ | ✅️ | | +| [Viewshed](xrspatial/viewshed.py) | Determines visible cells from a given observer point on terrain | ✅️ | | | | +| [Perlin Noise](xrspatial/perlin.py) | Generates smooth continuous random noise for procedural textures | ✅️ | ✅️ | ✅️ | | +| [Bump Mapping](xrspatial/bump.py) | Adds randomized bump features to simulate natural terrain variation | ✅️ | | | | ----------- ### **Zonal** -| Name | NumPy xr.DataArray | Dask xr.DataArray | CuPy GPU xr.DataArray | Dask GPU xr.DataArray | -|:----------:|:----------------------:|:--------------------:|:-------------------:|:------:| -| [Apply](xrspatial/zonal.py) | ✅️ | ✅️ | | | -| [Crop](xrspatial/zonal.py) | ✅️ | | | | -| [Regions](xrspatial/zonal.py) | | | | | -| [Trim](xrspatial/zonal.py) | ✅️ | | | | -| [Zonal Statistics](xrspatial/zonal.py) | ✅️ | ✅️| | | -| [Zonal Cross Tabulate](xrspatial/zonal.py) | ✅️ | ✅️| | | +| Name | Description | NumPy xr.DataArray | Dask xr.DataArray | CuPy GPU xr.DataArray | Dask GPU xr.DataArray | +|:----------:|:------------|:----------------------:|:--------------------:|:-------------------:|:------:| +| [Apply](xrspatial/zonal.py) | Applies a custom function to each zone in a classified raster | ✅️ | ✅️ | | | +| [Crop](xrspatial/zonal.py) | Extracts the bounding rectangle of a specific zone | ✅️ | | | | +| [Regions](xrspatial/zonal.py) | Identifies connected regions of non-zero cells | | | | | +| [Trim](xrspatial/zonal.py) | Removes nodata border rows and columns from a raster | ✅️ | | | | +| [Zonal Statistics](xrspatial/zonal.py) | Computes summary statistics for a value raster within each zone | ✅️ | ✅️| | | +| [Zonal Cross Tabulate](xrspatial/zonal.py) | Cross-tabulates agreement between two categorical rasters | ✅️ | ✅️| | | ----------- ### **Local** -| Name | NumPy xr.DataArray | Dask xr.DataArray | CuPy GPU xr.DataArray | Dask GPU xr.DataArray | -|:----------:|:----------------------:|:--------------------:|:-------------------:|:------:| -| [Cell Stats](xrspatial/local.py) | ✅️ | | | | -| [Combine](xrspatial/local.py) | ✅️ | | | | -| [Lesser Frequency](xrspatial/local.py) | ✅️ | | | | -| [Equal Frequency](xrspatial/local.py) | ✅️ | | | | -| [Greater Frequency](xrspatial/local.py) | ✅️ | | | | -| [Lowest Position](xrspatial/local.py) | ✅️ | | | | -| [Highest Position](xrspatial/local.py) | ✅️ | | | | -| [Popularity](xrspatial/local.py) | ✅️ | | | | -| [Rank](xrspatial/local.py) | ✅️ | | | | +| Name | Description | NumPy xr.DataArray | Dask xr.DataArray | CuPy GPU xr.DataArray | Dask GPU xr.DataArray | +|:----------:|:------------|:----------------------:|:--------------------:|:-------------------:|:------:| +| [Cell Stats](xrspatial/local.py) | Computes summary statistics across multiple rasters per cell | ✅️ | | | | +| [Combine](xrspatial/local.py) | Assigns unique IDs to each distinct combination across rasters | ✅️ | | | | +| [Lesser Frequency](xrspatial/local.py) | Counts how many rasters have values less than a reference | ✅️ | | | | +| [Equal Frequency](xrspatial/local.py) | Counts how many rasters have values equal to a reference | ✅️ | | | | +| [Greater Frequency](xrspatial/local.py) | Counts how many rasters have values greater than a reference | ✅️ | | | | +| [Lowest Position](xrspatial/local.py) | Identifies which raster has the minimum value at each cell | ✅️ | | | | +| [Highest Position](xrspatial/local.py) | Identifies which raster has the maximum value at each cell | ✅️ | | | | +| [Popularity](xrspatial/local.py) | Returns the value from the most common position across rasters | ✅️ | | | | +| [Rank](xrspatial/local.py) | Ranks cell values across multiple rasters per cell | ✅️ | | | | #### Usage From d82be7a0e23b82e05c13b25eb303950f662506a9 Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Fri, 20 Feb 2026 08:03:10 -0800 Subject: [PATCH 8/8] fix non-deterministic maximum_breaks when gaps are equal Replace np.argpartition with np.argsort(kind='stable') so that tied gap sizes are broken by index order, consistently selecting the highest-value gaps. --- xrspatial/classify.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xrspatial/classify.py b/xrspatial/classify.py index 06d8a4be..5551beec 100644 --- a/xrspatial/classify.py +++ b/xrspatial/classify.py @@ -1170,7 +1170,7 @@ def _compute_maximum_break_bins(values_np, k): return uv diffs = np.diff(uv) n_gaps = min(k - 1, len(diffs)) - top_indices = np.argpartition(diffs, -n_gaps)[-n_gaps:] + top_indices = np.argsort(diffs, kind='stable')[-n_gaps:] top_indices.sort() bins = np.array([(uv[i] + uv[i + 1]) / 2.0 for i in top_indices]) bins = np.append(bins, float(uv[-1]))