From b23a97324bcbf5d94a69c0ca1b427ca461578019 Mon Sep 17 00:00:00 2001 From: Wei Ji <23487320+weiji14@users.noreply.github.com> Date: Sun, 17 Mar 2024 13:37:50 +1300 Subject: [PATCH] :sparkles: Implement PyCogReader struct with new and to_numpy methods (#12) * :sparkles: Implement PyCogReader struct with new and data methods A (Py)CogReader class for Python that wraps around the CogReader struct! Have implemented the new (i.e. __init__) and data methods, and used them in the read_geotiff_py function. Needed to set CogReader struct visibility to pub(crate). Renamed vec_data to array_data since they contain arrays, not Vec. Also updated the regex match for one of the unit tests. * :memo: Add Python docstrings for CogReader class and methods Numpydoc style docstrings for the CogReader class and .data method. Included some example code on how to use the class and methods to read a sample GeoTIFF too. * :white_check_mark: Add unit test for CogReader's data method Ensure that the `.data()` method returns a numpy.ndarray output with the correct shape and values. * :truck: Rename data method as to_numpy Originally went with `.data()` to match https://docs.xarray.dev/en/v2024.02.0/generated/xarray.DataArray.data.html, but realized that xarray's `.data` is a property, not a method. Now going with `.to_numpy()` instead, following what's used by xarray, pandas and pyarrow. --- python/cog3pio/__init__.py | 2 +- python/tests/test_io_geotiff.py | 23 ++++++++-- src/io/geotiff.rs | 17 +++---- src/python/adapters.rs | 80 ++++++++++++++++++++++++++++----- 4 files changed, 100 insertions(+), 22 deletions(-) diff --git a/python/cog3pio/__init__.py b/python/cog3pio/__init__.py index b1217f8..3910bbd 100644 --- a/python/cog3pio/__init__.py +++ b/python/cog3pio/__init__.py @@ -4,7 +4,7 @@ from importlib.metadata import version -from .cog3pio import read_geotiff +from .cog3pio import CogReader, read_geotiff # noqa: F401 __doc__ = cog3pio.__doc__ __version__ = version("cog3pio") # e.g. 0.1.2.dev3+g0ab3cd78 diff --git a/python/tests/test_io_geotiff.py b/python/tests/test_io_geotiff.py index 3ff5cf5..25563b8 100644 --- a/python/tests/test_io_geotiff.py +++ b/python/tests/test_io_geotiff.py @@ -5,9 +5,10 @@ import tempfile import urllib.request +import numpy as np import pytest -from cog3pio import read_geotiff +from cog3pio import CogReader, read_geotiff # %% @@ -81,9 +82,25 @@ def test_read_geotiff_unsupported_dtype(): """ with pytest.raises( ValueError, - match="Cannot read GeoTIFF because: " - "The Decoder does not support the image format ", + match="The Decoder does not support the image format ", ): read_geotiff( path="https://github.com/corteva/rioxarray/raw/0.15.1/test/test_data/input/cint16.tif" ) + + +def test_CogReader_to_numpy(): + """ + Ensure that the CogReader class's `to_numpy` method produces a numpy.ndarray output. + """ + reader = CogReader( + path="https://github.com/rasterio/rasterio/raw/1.3.9/tests/data/float32.tif" + ) + array = reader.to_numpy() + assert array.shape == (1, 2, 3) # band, height, width + np.testing.assert_equal( + actual=array, + desired=np.array( + [[[1.41, 1.23, 0.78], [0.32, -0.23, -1.88]]], dtype=np.float32 + ), + ) diff --git a/src/io/geotiff.rs b/src/io/geotiff.rs index c457229..ca810fd 100644 --- a/src/io/geotiff.rs +++ b/src/io/geotiff.rs @@ -7,13 +7,14 @@ use tiff::tags::Tag; use tiff::{TiffError, TiffFormatError, TiffResult}; /// Cloud-optimized GeoTIFF reader -struct CogReader { - decoder: Decoder, +pub(crate) struct CogReader { + /// TIFF decoder + pub decoder: Decoder, } impl CogReader { /// Create a new GeoTIFF decoder that decodes from a stream buffer - fn new(stream: R) -> TiffResult { + pub fn new(stream: R) -> TiffResult { // Open TIFF stream with decoder let mut decoder = Decoder::new(stream)?; decoder = decoder.with_limits(Limits::unlimited()); @@ -22,7 +23,7 @@ impl CogReader { } /// Decode GeoTIFF image to an [`ndarray::Array`] - fn ndarray(&mut self) -> TiffResult> { + pub fn ndarray(&mut self) -> TiffResult> { // Get image dimensions let (width, height): (u32, u32) = self.decoder.dimensions()?; @@ -34,10 +35,10 @@ impl CogReader { }; // Put image pixel data into an ndarray - let vec_data = Array3::from_shape_vec((1, height as usize, width as usize), image_data) + let array_data = Array3::from_shape_vec((1, height as usize, width as usize), image_data) .map_err(|_| TiffFormatError::InvalidDimensions(height, width))?; - Ok(vec_data) + Ok(array_data) } /// Affine transformation for 2D matrix extracted from TIFF tag metadata, used to transform @@ -96,9 +97,9 @@ pub fn read_geotiff(stream: R) -> TiffResult> { let mut reader = CogReader::new(stream)?; // Decode TIFF into ndarray - let vec_data: Array3 = reader.ndarray()?; + let array_data: Array3 = reader.ndarray()?; - Ok(vec_data) + Ok(array_data) } #[cfg(test)] diff --git a/src/python/adapters.rs b/src/python/adapters.rs index 5a05684..436d794 100644 --- a/src/python/adapters.rs +++ b/src/python/adapters.rs @@ -1,15 +1,75 @@ use std::io::Cursor; use bytes::Bytes; +use ndarray::Array3; use numpy::{PyArray3, ToPyArray}; use object_store::{parse_url, ObjectStore}; use pyo3::exceptions::{PyBufferError, PyFileNotFoundError, PyValueError}; -use pyo3::prelude::{pyfunction, pymodule, PyModule, PyResult, Python}; +use pyo3::prelude::{pyclass, pyfunction, pymethods, pymodule, PyModule, PyResult, Python}; use pyo3::wrap_pyfunction; use pyo3::PyErr; use url::Url; -use crate::io::geotiff::read_geotiff; +use crate::io::geotiff::CogReader; + +/// Python class interface to a Cloud-optimized GeoTIFF reader. +/// +/// Parameters +/// ---------- +/// path : str +/// The path to the file, or a url to a remote file. +/// +/// Returns +/// ------- +/// reader : cog3pio.CogReader +/// A new CogReader instance for decoding GeoTIFF files. +/// +/// Examples +/// -------- +/// >>> import numpy as np +/// >>> from cog3pio import CogReader +/// >>> +/// >>> reader = CogReader( +/// >>> path="https://github.com/rasterio/rasterio/raw/1.3.9/tests/data/float32.tif" +/// >>> ) +/// >>> array: np.ndarray = reader.data() +/// >>> array.shape +/// >>> (1, 12, 13) +/// >>> array.dtype +/// >>> dtype('float32') +#[pyclass] +#[pyo3(name = "CogReader")] +struct PyCogReader { + inner: CogReader>, +} + +#[pymethods] +impl PyCogReader { + #[new] + fn new(path: &str) -> PyResult { + let stream: Cursor = path_to_stream(path)?; + let reader = + CogReader::new(stream).map_err(|err| PyValueError::new_err(err.to_string()))?; + + Ok(Self { inner: reader }) + } + + /// Get image pixel data from GeoTIFF as a numpy.ndarray + /// + /// Returns + /// ------- + /// array : np.ndarray + /// 3D array of shape (band, height, width) containing the GeoTIFF pixel data. + fn to_numpy<'py>(&mut self, py: Python<'py>) -> PyResult<&'py PyArray3> { + let array_data: Array3 = self + .inner + .ndarray() + .map_err(|err| PyValueError::new_err(err.to_string()))?; + + // Convert from ndarray (Rust) to numpy ndarray (Python) + Ok(array_data.to_pyarray(py)) + } +} /// Read from a filepath or url into a byte stream fn path_to_stream(path: &str) -> PyResult> { @@ -54,7 +114,7 @@ fn path_to_stream(path: &str) -> PyResult> { /// Returns /// ------- /// array : np.ndarray -/// 2D array containing the GeoTIFF pixel data. +/// 3D array of shape (band, height, width) containing the GeoTIFF pixel data. /// /// Examples /// -------- @@ -65,15 +125,13 @@ fn path_to_stream(path: &str) -> PyResult> { #[pyfunction] #[pyo3(name = "read_geotiff")] fn read_geotiff_py<'py>(path: &str, py: Python<'py>) -> PyResult<&'py PyArray3> { - // Parse URL into byte stream - let stream = path_to_stream(path)?; + // Open URL with TIFF decoder + let mut reader = PyCogReader::new(path)?; - // Get image pixel data as an ndarray - let vec_data = read_geotiff(stream) - .map_err(|err| PyValueError::new_err(format!("Cannot read GeoTIFF because: {err}")))?; + // Decode TIFF into numpy ndarray + let array_data = reader.to_numpy(py)?; - // Convert from ndarray (Rust) to numpy ndarray (Python) - Ok(vec_data.to_pyarray(py)) + Ok(array_data) } /// A Python module implemented in Rust. The name of this function must match @@ -81,6 +139,8 @@ fn read_geotiff_py<'py>(path: &str, py: Python<'py>) -> PyResult<&'py PyArray3 PyResult<()> { + // Register Python classes + m.add_class::()?; // Register Python functions m.add_function(wrap_pyfunction!(read_geotiff_py, m)?)?; Ok(())