From d02dd138b592b68913732396ca3780e047ec9f92 Mon Sep 17 00:00:00 2001 From: Wei Ji <23487320+weiji14@users.noreply.github.com> Date: Thu, 14 Mar 2024 11:13:08 +1300 Subject: [PATCH] :truck: Move pyo3 functions under src/python/adapters.rs (#9) Removing all pyo3 wrapper code from src/lib.rs, and putting them in src/python/adapters.rs instead to more clearly separate Rust and Python bindings. Also created a path_to_stream function to isolate the object_store code from the ndarray conversion code. --- python/tests/test_io_geotiff.py | 4 +- src/lib.rs | 83 +------------------------------ src/python/adapters.rs | 87 +++++++++++++++++++++++++++++++++ src/python/mod.rs | 2 + 4 files changed, 94 insertions(+), 82 deletions(-) create mode 100644 src/python/adapters.rs create mode 100644 src/python/mod.rs diff --git a/python/tests/test_io_geotiff.py b/python/tests/test_io_geotiff.py index 0a461b7..4adfd73 100644 --- a/python/tests/test_io_geotiff.py +++ b/python/tests/test_io_geotiff.py @@ -1,13 +1,15 @@ """ Test I/O on GeoTIFF files. """ +import os import tempfile import urllib.request -import os + import pytest from cog3pio import read_geotiff + # %% @pytest.fixture(scope="module", name="geotiff_path") def fixture_geotiff_path(): diff --git a/src/lib.rs b/src/lib.rs index 2f066f2..dc1746f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -46,84 +46,5 @@ /// Modules for handling Input/Output of GeoTIFF data pub mod io; - -use std::io::Cursor; - -use bytes::Bytes; -use ndarray::Dim; -use numpy::{PyArray, ToPyArray}; -use object_store::{parse_url, ObjectStore}; -use pyo3::exceptions::{PyBufferError, PyFileNotFoundError, PyValueError}; -use pyo3::prelude::{pyfunction, pymodule, PyModule, PyResult, Python}; -use pyo3::{wrap_pyfunction, PyErr}; -use url::Url; - -/// Read a GeoTIFF file from a path on disk into an ndarray -/// -/// Parameters -/// ---------- -/// path : str -/// The path to the file, or a url to a remote file. -/// -/// Returns -/// ------- -/// array : np.ndarray -/// 2D array containing the GeoTIFF pixel data. -/// -/// Examples -/// -------- -/// from cog3pio import read_geotiff -/// -/// array = read_geotiff("https://github.com/pka/georaster/raw/v0.1.0/data/tiff/float32.tif") -/// assert array.shape == (20, 20) -#[pyfunction] -#[pyo3(name = "read_geotiff")] -fn read_geotiff_py<'py>( - path: &str, - py: Python<'py>, -) -> PyResult<&'py PyArray>> { - // Parse URL into ObjectStore and path - let file_or_url = match Url::from_file_path(path) { - // Parse local filepath - Ok(filepath) => filepath, - // Parse remote URL - Err(_) => Url::parse(path) - .map_err(|_| PyValueError::new_err(format!("Cannot parse path: {path}")))?, - }; - let (store, location) = parse_url(&file_or_url) - .map_err(|_| PyValueError::new_err(format!("Cannot parse url: {file_or_url}")))?; - - // Initialize async runtime - let runtime = tokio::runtime::Builder::new_current_thread() - .enable_all() - .build()?; - - // Get TIFF file stream asynchronously - let stream = runtime.block_on(async { - let result = store - .get(&location) - .await - .map_err(|_| PyFileNotFoundError::new_err(format!("Cannot find file: {path}")))?; - let bytes = result.bytes().await.map_err(|_| { - PyBufferError::new_err(format!("Failed to stream data from {path} into bytes.")) - })?; - // Return cursor to in-memory buffer - Ok::, PyErr>(Cursor::new(bytes)) - })?; - - // Get image pixel data as an ndarray - let vec_data = io::geotiff::read_geotiff(stream) - .map_err(|err| PyValueError::new_err(format!("Cannot read GeoTIFF because: {err}")))?; - - // Convert from ndarray (Rust) to numpy ndarray (Python) - Ok(vec_data.to_pyarray(py)) -} - -/// A Python module implemented in Rust. The name of this function must match -/// the `lib.name` setting in the `Cargo.toml`, else Python will not be able to -/// import the module. -#[pymodule] -fn cog3pio(_py: Python, m: &PyModule) -> PyResult<()> { - m.add_function(wrap_pyfunction!(read_geotiff_py, m)?)?; - Ok(()) -} +/// Modules for Python to interface with Rust code +pub mod python; diff --git a/src/python/adapters.rs b/src/python/adapters.rs new file mode 100644 index 0000000..99c079f --- /dev/null +++ b/src/python/adapters.rs @@ -0,0 +1,87 @@ +use std::io::Cursor; + +use bytes::Bytes; +use numpy::{PyArray2, ToPyArray}; +use object_store::{parse_url, ObjectStore}; +use pyo3::exceptions::{PyBufferError, PyFileNotFoundError, PyValueError}; +use pyo3::prelude::{pyfunction, pymodule, PyModule, PyResult, Python}; +use pyo3::wrap_pyfunction; +use pyo3::PyErr; +use url::Url; + +use crate::io::geotiff::read_geotiff; + +/// Read from a filepath or url into a byte stream +fn path_to_stream(path: &str) -> PyResult> { + // Parse URL into ObjectStore and path + let file_or_url = match Url::from_file_path(path) { + // Parse local filepath + Ok(filepath) => filepath, + // Parse remote URL + Err(_) => Url::parse(path) + .map_err(|_| PyValueError::new_err(format!("Cannot parse path: {path}")))?, + }; + let (store, location) = parse_url(&file_or_url) + .map_err(|_| PyValueError::new_err(format!("Cannot parse url: {file_or_url}")))?; + + // Initialize async runtime + let runtime = tokio::runtime::Builder::new_current_thread() + .enable_all() + .build()?; + + // Get TIFF file stream asynchronously + let stream = runtime.block_on(async { + let result = store + .get(&location) + .await + .map_err(|_| PyFileNotFoundError::new_err(format!("Cannot find file: {path}")))?; + let bytes = result.bytes().await.map_err(|_| { + PyBufferError::new_err(format!("Failed to stream data from {path} into bytes.")) + })?; + // Return cursor to in-memory buffer + Ok::, PyErr>(Cursor::new(bytes)) + })?; + Ok(stream) +} + +/// Read a GeoTIFF file from a path on disk into an ndarray +/// +/// Parameters +/// ---------- +/// path : str +/// The path to the file, or a url to a remote file. +/// +/// Returns +/// ------- +/// array : np.ndarray +/// 2D array containing the GeoTIFF pixel data. +/// +/// Examples +/// -------- +/// from cog3pio import read_geotiff +/// +/// array = read_geotiff("https://github.com/pka/georaster/raw/v0.1.0/data/tiff/float32.tif") +/// assert array.shape == (20, 20) +#[pyfunction] +#[pyo3(name = "read_geotiff")] +fn read_geotiff_py<'py>(path: &str, py: Python<'py>) -> PyResult<&'py PyArray2> { + // Parse URL into byte stream + let stream = path_to_stream(path)?; + + // Get image pixel data as an ndarray + let vec_data = read_geotiff(stream) + .map_err(|err| PyValueError::new_err(format!("Cannot read GeoTIFF because: {err}")))?; + + // Convert from ndarray (Rust) to numpy ndarray (Python) + Ok(vec_data.to_pyarray(py)) +} + +/// A Python module implemented in Rust. The name of this function must match +/// the `lib.name` setting in the `Cargo.toml`, else Python will not be able to +/// import the module. +#[pymodule] +fn cog3pio(_py: Python, m: &PyModule) -> PyResult<()> { + // Register Python functions + m.add_function(wrap_pyfunction!(read_geotiff_py, m)?)?; + Ok(()) +} diff --git a/src/python/mod.rs b/src/python/mod.rs new file mode 100644 index 0000000..3fc3fc6 --- /dev/null +++ b/src/python/mod.rs @@ -0,0 +1,2 @@ +/// Adapter interface from Rust to Python +pub mod adapters;