Skip to content

Commit

Permalink
✨ Implement PyCogReader struct with new and to_numpy methods (#12)
Browse files Browse the repository at this point in the history
* ✨ Implement PyCogReader struct with new and data methods

A (Py)CogReader class for Python that wraps around the CogReader struct! Have implemented the new (i.e. __init__) and data methods, and used them in the read_geotiff_py function. Needed to set CogReader struct visibility to pub(crate). Renamed vec_data to array_data since they contain arrays, not Vec. Also updated the regex match for one of the unit tests.

* 📝 Add Python docstrings for CogReader class and methods

Numpydoc style docstrings for the CogReader class and .data method. Included some example code on how to use the class and methods to read a sample GeoTIFF too.

* ✅ Add unit test for CogReader's data method

Ensure that the `.data()` method returns a numpy.ndarray output with the correct shape and values.

* 🚚 Rename data method as to_numpy

Originally went with `.data()` to match https://docs.xarray.dev/en/v2024.02.0/generated/xarray.DataArray.data.html, but realized that xarray's `.data` is a property, not a method. Now going with `.to_numpy()` instead, following what's used by xarray, pandas and pyarrow.
  • Loading branch information
weiji14 authored Mar 17, 2024
1 parent bca4c18 commit b23a973
Show file tree
Hide file tree
Showing 4 changed files with 100 additions and 22 deletions.
2 changes: 1 addition & 1 deletion python/cog3pio/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

from importlib.metadata import version

from .cog3pio import read_geotiff
from .cog3pio import CogReader, read_geotiff # noqa: F401

__doc__ = cog3pio.__doc__
__version__ = version("cog3pio") # e.g. 0.1.2.dev3+g0ab3cd78
Expand Down
23 changes: 20 additions & 3 deletions python/tests/test_io_geotiff.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,10 @@
import tempfile
import urllib.request

import numpy as np
import pytest

from cog3pio import read_geotiff
from cog3pio import CogReader, read_geotiff


# %%
Expand Down Expand Up @@ -81,9 +82,25 @@ def test_read_geotiff_unsupported_dtype():
"""
with pytest.raises(
ValueError,
match="Cannot read GeoTIFF because: "
"The Decoder does not support the image format ",
match="The Decoder does not support the image format ",
):
read_geotiff(
path="https://github.com/corteva/rioxarray/raw/0.15.1/test/test_data/input/cint16.tif"
)


def test_CogReader_to_numpy():
"""
Ensure that the CogReader class's `to_numpy` method produces a numpy.ndarray output.
"""
reader = CogReader(
path="https://github.com/rasterio/rasterio/raw/1.3.9/tests/data/float32.tif"
)
array = reader.to_numpy()
assert array.shape == (1, 2, 3) # band, height, width
np.testing.assert_equal(
actual=array,
desired=np.array(
[[[1.41, 1.23, 0.78], [0.32, -0.23, -1.88]]], dtype=np.float32
),
)
17 changes: 9 additions & 8 deletions src/io/geotiff.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,14 @@ use tiff::tags::Tag;
use tiff::{TiffError, TiffFormatError, TiffResult};

/// Cloud-optimized GeoTIFF reader
struct CogReader<R: Read + Seek> {
decoder: Decoder<R>,
pub(crate) struct CogReader<R: Read + Seek> {
/// TIFF decoder
pub decoder: Decoder<R>,
}

impl<R: Read + Seek> CogReader<R> {
/// Create a new GeoTIFF decoder that decodes from a stream buffer
fn new(stream: R) -> TiffResult<Self> {
pub fn new(stream: R) -> TiffResult<Self> {
// Open TIFF stream with decoder
let mut decoder = Decoder::new(stream)?;
decoder = decoder.with_limits(Limits::unlimited());
Expand All @@ -22,7 +23,7 @@ impl<R: Read + Seek> CogReader<R> {
}

/// Decode GeoTIFF image to an [`ndarray::Array`]
fn ndarray(&mut self) -> TiffResult<Array3<f32>> {
pub fn ndarray(&mut self) -> TiffResult<Array3<f32>> {
// Get image dimensions
let (width, height): (u32, u32) = self.decoder.dimensions()?;

Expand All @@ -34,10 +35,10 @@ impl<R: Read + Seek> CogReader<R> {
};

// Put image pixel data into an ndarray
let vec_data = Array3::from_shape_vec((1, height as usize, width as usize), image_data)
let array_data = Array3::from_shape_vec((1, height as usize, width as usize), image_data)
.map_err(|_| TiffFormatError::InvalidDimensions(height, width))?;

Ok(vec_data)
Ok(array_data)
}

/// Affine transformation for 2D matrix extracted from TIFF tag metadata, used to transform
Expand Down Expand Up @@ -96,9 +97,9 @@ pub fn read_geotiff<R: Read + Seek>(stream: R) -> TiffResult<Array3<f32>> {
let mut reader = CogReader::new(stream)?;

// Decode TIFF into ndarray
let vec_data: Array3<f32> = reader.ndarray()?;
let array_data: Array3<f32> = reader.ndarray()?;

Ok(vec_data)
Ok(array_data)
}

#[cfg(test)]
Expand Down
80 changes: 70 additions & 10 deletions src/python/adapters.rs
Original file line number Diff line number Diff line change
@@ -1,15 +1,75 @@
use std::io::Cursor;

use bytes::Bytes;
use ndarray::Array3;
use numpy::{PyArray3, ToPyArray};
use object_store::{parse_url, ObjectStore};
use pyo3::exceptions::{PyBufferError, PyFileNotFoundError, PyValueError};
use pyo3::prelude::{pyfunction, pymodule, PyModule, PyResult, Python};
use pyo3::prelude::{pyclass, pyfunction, pymethods, pymodule, PyModule, PyResult, Python};
use pyo3::wrap_pyfunction;
use pyo3::PyErr;
use url::Url;

use crate::io::geotiff::read_geotiff;
use crate::io::geotiff::CogReader;

/// Python class interface to a Cloud-optimized GeoTIFF reader.
///
/// Parameters
/// ----------
/// path : str
/// The path to the file, or a url to a remote file.
///
/// Returns
/// -------
/// reader : cog3pio.CogReader
/// A new CogReader instance for decoding GeoTIFF files.
///
/// Examples
/// --------
/// >>> import numpy as np
/// >>> from cog3pio import CogReader
/// >>>
/// >>> reader = CogReader(
/// >>> path="https://github.com/rasterio/rasterio/raw/1.3.9/tests/data/float32.tif"
/// >>> )
/// >>> array: np.ndarray = reader.data()
/// >>> array.shape
/// >>> (1, 12, 13)
/// >>> array.dtype
/// >>> dtype('float32')
#[pyclass]
#[pyo3(name = "CogReader")]
struct PyCogReader {
inner: CogReader<Cursor<Bytes>>,
}

#[pymethods]
impl PyCogReader {
#[new]
fn new(path: &str) -> PyResult<Self> {
let stream: Cursor<Bytes> = path_to_stream(path)?;
let reader =
CogReader::new(stream).map_err(|err| PyValueError::new_err(err.to_string()))?;

Ok(Self { inner: reader })
}

/// Get image pixel data from GeoTIFF as a numpy.ndarray
///
/// Returns
/// -------
/// array : np.ndarray
/// 3D array of shape (band, height, width) containing the GeoTIFF pixel data.
fn to_numpy<'py>(&mut self, py: Python<'py>) -> PyResult<&'py PyArray3<f32>> {
let array_data: Array3<f32> = self
.inner
.ndarray()
.map_err(|err| PyValueError::new_err(err.to_string()))?;

// Convert from ndarray (Rust) to numpy ndarray (Python)
Ok(array_data.to_pyarray(py))
}
}

/// Read from a filepath or url into a byte stream
fn path_to_stream(path: &str) -> PyResult<Cursor<Bytes>> {
Expand Down Expand Up @@ -54,7 +114,7 @@ fn path_to_stream(path: &str) -> PyResult<Cursor<Bytes>> {
/// Returns
/// -------
/// array : np.ndarray
/// 2D array containing the GeoTIFF pixel data.
/// 3D array of shape (band, height, width) containing the GeoTIFF pixel data.
///
/// Examples
/// --------
Expand All @@ -65,22 +125,22 @@ fn path_to_stream(path: &str) -> PyResult<Cursor<Bytes>> {
#[pyfunction]
#[pyo3(name = "read_geotiff")]
fn read_geotiff_py<'py>(path: &str, py: Python<'py>) -> PyResult<&'py PyArray3<f32>> {
// Parse URL into byte stream
let stream = path_to_stream(path)?;
// Open URL with TIFF decoder
let mut reader = PyCogReader::new(path)?;

// Get image pixel data as an ndarray
let vec_data = read_geotiff(stream)
.map_err(|err| PyValueError::new_err(format!("Cannot read GeoTIFF because: {err}")))?;
// Decode TIFF into numpy ndarray
let array_data = reader.to_numpy(py)?;

// Convert from ndarray (Rust) to numpy ndarray (Python)
Ok(vec_data.to_pyarray(py))
Ok(array_data)
}

/// A Python module implemented in Rust. The name of this function must match
/// the `lib.name` setting in the `Cargo.toml`, else Python will not be able to
/// import the module.
#[pymodule]
fn cog3pio(_py: Python, m: &PyModule) -> PyResult<()> {
// Register Python classes
m.add_class::<PyCogReader>()?;
// Register Python functions
m.add_function(wrap_pyfunction!(read_geotiff_py, m)?)?;
Ok(())
Expand Down

0 comments on commit b23a973

Please sign in to comment.