From 0dd4e4d4232aa3f5ee65a121239bfc6b31677b5c Mon Sep 17 00:00:00 2001 From: Carl Kadie Date: Mon, 29 Jan 2024 15:55:42 -0800 Subject: [PATCH] rename to max_chunk_bytes --- CHANGELOG.md | 2 +- Cargo.toml | 2 +- bed_reader/_open_bed.py | 34 +++++----- docs/_modules/bed_reader/_open_bed.html | 34 +++++----- docs/index.html | 12 ++-- src/bed_cloud.rs | 88 +++---------------------- src/lib.rs | 20 +++--- src/python_module.rs | 12 ++-- tests/tests_api_cloud.rs | 4 +- 9 files changed, 68 insertions(+), 140 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5b9dc15..d48e310 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,7 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [1.0.1] - 2024-4-16 -cmk +- Add support for cloud files to both Rust and Python. ## [1.0.0] - 2023-11-5 diff --git a/Cargo.toml b/Cargo.toml index b05f3a4..b036375 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -28,7 +28,7 @@ crate-type = ["cdylib", "rlib"] [features] extension-module = ["pyo3/extension-module", "pyo3-asyncio", "cloud"] cloud = [ - "cloud-file", # cmk does this imply object_store? + "cloud-file", "object_store", "pyo3-asyncio", "tokio/full", diff --git a/bed_reader/_open_bed.py b/bed_reader/_open_bed.py index f55ecb7..9ebf15c 100644 --- a/bed_reader/_open_bed.py +++ b/bed_reader/_open_bed.py @@ -98,9 +98,9 @@ def get_max_concurrent_requests(max_concurrent_requests=None): return 10 -def get_max_chunk_size(max_chunk_size=None): - if max_chunk_size is not None: - return max_chunk_size +def get_max_chunk_bytes(max_chunk_bytes=None): + if max_chunk_bytes is not None: + return max_chunk_bytes return 8_000_000 @@ -162,7 +162,7 @@ class open_bed: max_concurrent_requests: None or int, optional The maximum number of concurrent requests to make to the cloud storage service. Defaults to 10. - max_chunk_size: None or int, optional + max_chunk_bytes: None or int, optional The maximum number of bytes to read in a single request to the cloud storage service. Defaults to 8MB. filepath: same as location @@ -259,7 +259,7 @@ def __init__( bim_location: Union[str, Path, UrlParseResult] = None, cloud_options: Mapping[str, str] = {}, max_concurrent_requests: Optional[int] = None, - max_chunk_size: Optional[int] = None, + max_chunk_bytes: Optional[int] = None, # accept old keywords filepath: Union[str, Path] = None, fam_filepath: Union[str, Path] = None, @@ -278,7 +278,7 @@ def __init__( self.count_A1 = count_A1 self._num_threads = num_threads self._max_concurrent_requests = max_concurrent_requests - self._max_chunk_size = max_chunk_size + self._max_chunk_bytes = max_chunk_bytes self.skip_format_check = skip_format_check self._fam_location = ( self._path_or_url(fam_location) @@ -362,7 +362,7 @@ def read( force_python_only: Optional[bool] = False, num_threads=None, max_concurrent_requests=None, - max_chunk_size=None, + max_chunk_bytes=None, ) -> np.ndarray: """ Read genotype information. @@ -398,7 +398,7 @@ def read( The maximum number of concurrent requests to make to the cloud storage service. Defaults to 10. - max_chunk_size: None or int, optional + max_chunk_bytes: None or int, optional The maximum number of bytes to read in a single request to the cloud storage service. Defaults to 8MB. @@ -510,8 +510,8 @@ def read( if max_concurrent_requests is None else max_concurrent_requests ) - max_chunk_size = get_max_chunk_size( - self._max_chunk_size if max_chunk_size is None else max_chunk_size + max_chunk_bytes = get_max_chunk_bytes( + self._max_chunk_bytes if max_chunk_bytes is None else max_chunk_bytes ) val = np.zeros((len(iid_index), len(sid_index)), order=order, dtype=dtype) @@ -545,7 +545,7 @@ def read( val=val, num_threads=num_threads, max_concurrent_requests=max_concurrent_requests, - max_chunk_size=max_chunk_size, + max_chunk_bytes=max_chunk_bytes, ) else: @@ -1390,7 +1390,7 @@ def read_sparse( format: Optional[str] = "csc", num_threads=None, max_concurrent_requests=None, - max_chunk_size=None, + max_chunk_bytes=None, ) -> (Union[sparse.csc_matrix, sparse.csr_matrix]) if sparse is not None else None: """ Read genotype information into a :mod:`scipy.sparse` matrix. Sparse matrices @@ -1431,7 +1431,7 @@ def read_sparse( max_concurrent_requests: None or int, optional The maximum number of concurrent requests to make to the cloud storage service. Defaults to 10. - max_chunk_size: None or int, optional + max_chunk_bytes: None or int, optional The maximum number of bytes to read in a single request to the cloud storage service. Defaults to 8MB. @@ -1587,8 +1587,8 @@ def read_sparse( if max_concurrent_requests is None else max_concurrent_requests ) - max_chunk_size = get_max_chunk_size( - self._max_chunk_size if max_chunk_size is None else max_chunk_size + max_chunk_bytes = get_max_chunk_bytes( + self._max_chunk_bytes if max_chunk_bytes is None else max_chunk_bytes ) if format == "csc": @@ -1648,7 +1648,7 @@ def read_sparse( val=val, num_threads=num_threads, max_concurrent_requests=max_concurrent_requests, - max_chunk_size=max_chunk_size, + max_chunk_bytes=max_chunk_bytes, ) self.sparsify( @@ -1697,7 +1697,7 @@ def read_sparse( val=val, num_threads=num_threads, max_concurrent_requests=max_concurrent_requests, - max_chunk_size=max_chunk_size, + max_chunk_bytes=max_chunk_bytes, ) self.sparsify( diff --git a/docs/_modules/bed_reader/_open_bed.html b/docs/_modules/bed_reader/_open_bed.html index d01083d..5876fa5 100644 --- a/docs/_modules/bed_reader/_open_bed.html +++ b/docs/_modules/bed_reader/_open_bed.html @@ -177,9 +177,9 @@

Source code for bed_reader._open_bed

     return 10
 
 
-def get_max_chunk_size(max_chunk_size=None):
-    if max_chunk_size is not None:
-        return max_chunk_size
+def get_max_chunk_bytes(max_chunk_bytes=None):
+    if max_chunk_bytes is not None:
+        return max_chunk_bytes
     return 8_000_000
 
 
@@ -241,7 +241,7 @@ 

Source code for bed_reader._open_bed

     max_concurrent_requests: None or int, optional
         The maximum number of concurrent requests to make to the cloud storage service.
         Defaults to 10.
-    max_chunk_size: None or int, optional
+    max_chunk_bytes: None or int, optional
         The maximum number of bytes to read in a single request to the cloud storage
         service. Defaults to 8MB.
     filepath: same as location
@@ -338,7 +338,7 @@ 

Source code for bed_reader._open_bed

         bim_location: Union[str, Path, UrlParseResult] = None,
         cloud_options: Mapping[str, str] = {},
         max_concurrent_requests: Optional[int] = None,
-        max_chunk_size: Optional[int] = None,
+        max_chunk_bytes: Optional[int] = None,
         # accept old keywords
         filepath: Union[str, Path] = None,
         fam_filepath: Union[str, Path] = None,
@@ -357,7 +357,7 @@ 

Source code for bed_reader._open_bed

         self.count_A1 = count_A1
         self._num_threads = num_threads
         self._max_concurrent_requests = max_concurrent_requests
-        self._max_chunk_size = max_chunk_size
+        self._max_chunk_bytes = max_chunk_bytes
         self.skip_format_check = skip_format_check
         self._fam_location = (
             self._path_or_url(fam_location)
@@ -441,7 +441,7 @@ 

Source code for bed_reader._open_bed

         force_python_only: Optional[bool] = False,
         num_threads=None,
         max_concurrent_requests=None,
-        max_chunk_size=None,
+        max_chunk_bytes=None,
     ) -> np.ndarray:
         """
         Read genotype information.
@@ -477,7 +477,7 @@ 

Source code for bed_reader._open_bed

             The maximum number of concurrent requests to make to the cloud storage
             service. Defaults to 10.
 
-        max_chunk_size: None or int, optional
+        max_chunk_bytes: None or int, optional
             The maximum number of bytes to read in a single request to the cloud
             storage service. Defaults to 8MB.
 
@@ -589,8 +589,8 @@ 

Source code for bed_reader._open_bed

                 if max_concurrent_requests is None
                 else max_concurrent_requests
             )
-            max_chunk_size = get_max_chunk_size(
-                self._max_chunk_size if max_chunk_size is None else max_chunk_size
+            max_chunk_bytes = get_max_chunk_bytes(
+                self._max_chunk_bytes if max_chunk_bytes is None else max_chunk_bytes
             )
 
             val = np.zeros((len(iid_index), len(sid_index)), order=order, dtype=dtype)
@@ -624,7 +624,7 @@ 

Source code for bed_reader._open_bed

                         val=val,
                         num_threads=num_threads,
                         max_concurrent_requests=max_concurrent_requests,
-                        max_chunk_size=max_chunk_size,
+                        max_chunk_bytes=max_chunk_bytes,
                     )
 
         else:
@@ -1469,7 +1469,7 @@ 

Source code for bed_reader._open_bed

         format: Optional[str] = "csc",
         num_threads=None,
         max_concurrent_requests=None,
-        max_chunk_size=None,
+        max_chunk_bytes=None,
     ) -> (Union[sparse.csc_matrix, sparse.csr_matrix]) if sparse is not None else None:
         """
         Read genotype information into a :mod:`scipy.sparse` matrix. Sparse matrices
@@ -1510,7 +1510,7 @@ 

Source code for bed_reader._open_bed

         max_concurrent_requests: None or int, optional
             The maximum number of concurrent requests to make to the cloud storage
             service. Defaults to 10.
-        max_chunk_size: None or int, optional
+        max_chunk_bytes: None or int, optional
             The maximum number of bytes to read in a single request to the cloud
             storage service. Defaults to 8MB.
 
@@ -1666,8 +1666,8 @@ 

Source code for bed_reader._open_bed

             if max_concurrent_requests is None
             else max_concurrent_requests
         )
-        max_chunk_size = get_max_chunk_size(
-            self._max_chunk_size if max_chunk_size is None else max_chunk_size
+        max_chunk_bytes = get_max_chunk_bytes(
+            self._max_chunk_bytes if max_chunk_bytes is None else max_chunk_bytes
         )
 
         if format == "csc":
@@ -1727,7 +1727,7 @@ 

Source code for bed_reader._open_bed

                             val=val,
                             num_threads=num_threads,
                             max_concurrent_requests=max_concurrent_requests,
-                            max_chunk_size=max_chunk_size,
+                            max_chunk_bytes=max_chunk_bytes,
                         )
 
                     self.sparsify(
@@ -1776,7 +1776,7 @@ 

Source code for bed_reader._open_bed

                             val=val,
                             num_threads=num_threads,
                             max_concurrent_requests=max_concurrent_requests,
-                            max_chunk_size=max_chunk_size,
+                            max_chunk_bytes=max_chunk_bytes,
                         )
 
                     self.sparsify(
diff --git a/docs/index.html b/docs/index.html
index 557847c..6b37149 100644
--- a/docs/index.html
+++ b/docs/index.html
@@ -267,7 +267,7 @@ 

Details

-class bed_reader.open_bed(location: str | Path | ParseResult, iid_count: int | None = None, sid_count: int | None = None, properties: Mapping[str, List[Any]] = {}, count_A1: bool = True, num_threads: int | None = None, skip_format_check: bool = False, fam_location: str | Path | ParseResult = None, bim_location: str | Path | ParseResult = None, cloud_options: Mapping[str, str] = {}, max_concurrent_requests: int | None = None, max_chunk_size: int | None = None, filepath: str | Path = None, fam_filepath: str | Path = None, bim_filepath: str | Path = None)[source]
+class bed_reader.open_bed(location: str | Path | ParseResult, iid_count: int | None = None, sid_count: int | None = None, properties: Mapping[str, List[Any]] = {}, count_A1: bool = True, num_threads: int | None = None, skip_format_check: bool = False, fam_location: str | Path | ParseResult = None, bim_location: str | Path | ParseResult = None, cloud_options: Mapping[str, str] = {}, max_concurrent_requests: int | None = None, max_chunk_bytes: int | None = None, filepath: str | Path = None, fam_filepath: str | Path = None, bim_filepath: str | Path = None)[source]

Open a PLINK .bed file, local or cloud, for reading.

Parameters:
@@ -313,7 +313,7 @@

open_beddict, optional) – A dictionary of options for reading from cloud storage. The default is an empty.

  • max_concurrent_requests (None or int, optional) – The maximum number of concurrent requests to make to the cloud storage service. Defaults to 10.

  • -
  • max_chunk_size (None or int, optional) – The maximum number of bytes to read in a single request to the cloud storage +

  • max_chunk_bytes (None or int, optional) – The maximum number of bytes to read in a single request to the cloud storage service. Defaults to 8MB.

  • filepath (same as location) – Deprecated. Use location instead.

  • fam_filepath (same as fam_location) – Deprecated. Use fam_location instead.

  • @@ -723,7 +723,7 @@

    open_bed
    -read(index: Any | None = None, dtype: type | str | None = 'float32', order: str | None = 'F', force_python_only: bool | None = False, num_threads=None, max_concurrent_requests=None, max_chunk_size=None) ndarray[source]
    +read(index: Any | None = None, dtype: type | str | None = 'float32', order: str | None = 'F', force_python_only: bool | None = False, num_threads=None, max_concurrent_requests=None, max_chunk_bytes=None) ndarray[source]

    Read genotype information.

    Parameters:
    @@ -747,7 +747,7 @@

    open_bedint, optional) – The maximum number of concurrent requests to make to the cloud storage service. Defaults to 10.

    -
  • max_chunk_size (None or int, optional) – The maximum number of bytes to read in a single request to the cloud +

  • max_chunk_bytes (None or int, optional) – The maximum number of bytes to read in a single request to the cloud storage service. Defaults to 8MB.

  • @@ -821,7 +821,7 @@

    open_bed
    -read_sparse(index: Any | None = None, dtype: type | str | None = 'float32', batch_size: int | None = None, format: str | None = 'csc', num_threads=None, max_concurrent_requests=None, max_chunk_size=None) csc_matrix | csr_matrix[source]
    +read_sparse(index: Any | None = None, dtype: type | str | None = 'float32', batch_size: int | None = None, format: str | None = 'csc', num_threads=None, max_concurrent_requests=None, max_chunk_bytes=None) csc_matrix | csr_matrix[source]

    Read genotype information into a scipy.sparse matrix. Sparse matrices may be useful when the data is mostly zeros.

    @@ -853,7 +853,7 @@

    open_bedint, optional) – The maximum number of concurrent requests to make to the cloud storage service. Defaults to 10.

    -
  • max_chunk_size (None or int, optional) – The maximum number of bytes to read in a single request to the cloud +

  • max_chunk_bytes (None or int, optional) – The maximum number of bytes to read in a single request to the cloud storage service. Defaults to 8MB.

  • diff --git a/src/bed_cloud.rs b/src/bed_cloud.rs index 422532e..443e4e6 100644 --- a/src/bed_cloud.rs +++ b/src/bed_cloud.rs @@ -15,7 +15,7 @@ use std::ops::Range; use std::path::PathBuf; use crate::{ - check_and_precompute_iid_index, compute_max_chunk_size, compute_max_concurrent_requests, + check_and_precompute_iid_index, compute_max_chunk_bytes, compute_max_concurrent_requests, set_up_two_bits_to_value, try_div_4, BedError, BedErrorPlus, BedVal, FromStringArray, Hold, Metadata, ReadOptions, BED_FILE_MAGIC1, BED_FILE_MAGIC2, STATIC_FETCH_DATA, }; @@ -158,7 +158,7 @@ async fn internal_read_no_alloc( sid_index: &[isize], missing_value: TVal, max_concurrent_requests: usize, - max_chunk_size: usize, + max_chunk_bytes: usize, out_val: &mut nd::ArrayViewMut2<'_, TVal>, ) -> Result<(), Box> { // compute numbers outside of the loop @@ -168,7 +168,7 @@ async fn internal_read_no_alloc( if i_div_4_len == 0 { return Ok(()); // we must return early because the chucks method doesn't work with size 0 } - let chunk_count = max(1, max_chunk_size / i_div_4_len as usize); + let chunk_count = max(1, max_chunk_bytes / i_div_4_len as usize); let from_two_bits_to_value = set_up_two_bits_to_value(is_a1_counted, missing_value); let lower_sid_count = -(in_sid_count as isize); let upper_sid_count: isize = (in_sid_count as isize) - 1; @@ -298,7 +298,7 @@ async fn read_no_alloc( sid_index: &[isize], missing_value: TVal, max_concurrent_requests: usize, - max_chunk_size: usize, + max_chunk_bytes: usize, val: &mut nd::ArrayViewMut2<'_, TVal>, //mutable slices additionally allow to modify elements. But slices cannot grow - they are just a view into some vector. ) -> Result<(), Box> { @@ -319,7 +319,7 @@ async fn read_no_alloc( iid_index, missing_value, max_concurrent_requests, - max_chunk_size, + max_chunk_bytes, &mut val_t, ) .await?; @@ -335,7 +335,7 @@ async fn read_no_alloc( sid_index, missing_value, max_concurrent_requests, - max_chunk_size, + max_chunk_bytes, val, ) .await?; @@ -2002,7 +2002,7 @@ impl BedCloud { let max_concurrent_requests = compute_max_concurrent_requests(read_options.max_concurrent_requests)?; - let max_chunk_size = compute_max_chunk_size(read_options.max_chunk_size)?; + let max_chunk_bytes = compute_max_chunk_bytes(read_options.max_chunk_bytes)?; // If we already have a Vec, reference it. If we don't, create one and reference it. let iid_hold = Hold::new(&read_options.iid_index, iid_count)?; @@ -2029,7 +2029,7 @@ impl BedCloud { sid_index, read_options.missing_value, max_concurrent_requests, - max_chunk_size, + max_chunk_bytes, &mut val.view_mut(), ) .await @@ -2195,78 +2195,6 @@ impl BedCloud { } } -// // cmk remove after no longer needed -// /// Returns the cloud locations of a .bed file as an [`CloudFile`](struct.CloudFile.html). -// /// -// /// Behind the scenes, the "cloud location" will actually be local. -// /// If necessary, the file will be downloaded. -// /// The .fam and .bim files will also be downloaded, if they are not already present. -// /// SHA256 hashes are used to verify that the files are correct. -// /// The files will be in a directory determined by environment variable `BED_READER_DATA_DIR`. -// /// If that environment variable is not set, a cache folder, appropriate to the OS, will be used. -// #[anyinput] -// pub fn sample_bed_url(bed_path: AnyPath) -> Result> { -// let mut path_list: Vec = Vec::new(); -// for ext in &["bed", "bim", "fam"] { -// let file_path = bed_path.with_extension(ext); -// path_list.push(file_path); -// } - -// let mut vec = sample_cloud_files(path_list)?; -// debug_assert!(vec.len() == 3); -// Ok(vec.swap_remove(0)) -// } - -// /// Returns the cloud locations of a file as an [`CloudFile`](struct.CloudFile.html). -// /// -// /// Behind the scenes, the "cloud location" will actually be local. -// /// If necessary, the file will be downloaded. -// /// A SHA256 hash is used to verify that the file is correct. -// /// The file will be in a directory determined by environment variable `BED_READER_DATA_DIR`. -// /// If that environment variable is not set, a cache folder, appropriate to the OS, will be used. -// #[anyinput] -// pub fn sample_cloud_file(path: AnyPath) -> Result> { -// let object_store = Arc::new(LocalFileSystem::new()); - -// let file_path = STATIC_FETCH_DATA -// .fetch_file(path) -// .map_err(|e| BedError::SampleFetch(e.to_string()))?; -// let store_path = StorePath::from_filesystem_path(file_path)?; -// let cloud_file = CloudFile { -// arc_object_store: &object_store, -// store_path, -// }; -// Ok(cloud_file) -// } - -// /// Returns the cloud locations of a list of files as [`CloudFile`](struct.CloudFile.html)s. -// /// -// /// Behind the scenes, the "cloud location" will actually be local. -// /// If necessary, the file will be downloaded. -// /// SHA256 hashes are used to verify that the files are correct. -// /// The files will be in a directory determined by environment variable `BED_READER_DATA_DIR`. -// /// If that environment variable is not set, a cache folder, appropriate to the OS, will be used. -// #[anyinput] -// pub fn sample_cloud_files( -// path_list: AnyIter, -// ) -> Result, Box> { -// let arc_object_store = Arc::new(LocalFileSystem::new()); - -// let file_paths = STATIC_FETCH_DATA -// .fetch_files(path_list) -// .map_err(|e| BedError::SampleFetch(e.to_string()))?; -// file_paths -// .iter() -// .map(|file_path| { -// let store_path = StorePath::from_filesystem_path(file_path)?; -// Ok(CloudFile { -// arc_object_store: arc_object_store.clone(), -// store_path, -// }) -// }) -// .collect() -// } - /// Returns the cloud location of a sample .bed file as a URL string. /// /// Behind the scenes, the "cloud location" will actually be local. diff --git a/src/lib.rs b/src/lib.rs index c3b9d88..97b3ac6 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -71,7 +71,7 @@ //! | [`is_a1_counted`](struct.ReadOptionsBuilder.html#method.is_a1_counted) | Is allele 1 counted? (defaults to true) | //! | [`num_threads`](struct.ReadOptionsBuilder.html#method.num_threads) | Number of threads to use (defaults to all processors) | //! | [`max_concurrent_requests`](struct.ReadOptionsBuilder.html#method.max_concurrent_requests) | Maximum number of concurrent async requests (defaults to 10) -- Used by [`BedCloud`](struct.BedCloud.html). | -//! | [`max_chunk_size`](struct.ReadOptionsBuilder.html#method.max_chunk_size) | Maximum chunk size of async requests (defaults to 8_000_000 bytes) -- Used by [`BedCloud`](struct.BedCloud.html). | +//! | [`max_chunk_bytes`](struct.ReadOptionsBuilder.html#method.max_chunk_bytes) | Maximum chunk size of async requests (defaults to 8_000_000 bytes) -- Used by [`BedCloud`](struct.BedCloud.html). | //! //! ### [`Index`](enum.Index.html) Expressions //! @@ -3161,11 +3161,11 @@ fn compute_max_concurrent_requests( #[allow(clippy::unnecessary_wraps)] #[cfg(feature = "cloud")] -fn compute_max_chunk_size( - option_max_chunk_size: Option, +fn compute_max_chunk_bytes( + option_max_chunk_bytes: Option, ) -> Result> { - let max_chunk_size = if let Some(max_chunk_size) = option_max_chunk_size { - max_chunk_size + let max_chunk_bytes = if let Some(max_chunk_bytes) = option_max_chunk_bytes { + max_chunk_bytes // } else if let Ok(num_threads) = env::var("BED_READER_NUM_THREADS") { // num_threads.parse::()? // } else if let Ok(num_threads) = env::var("NUM_THREADS") { @@ -3173,8 +3173,8 @@ fn compute_max_chunk_size( } else { 8_000_000 }; - Ok(max_chunk_size) -} // cmk rename max_chunk_bytes? + Ok(max_chunk_bytes) +} impl Index { // We can't define a 'From' because we want to add count at the last moment. @@ -4033,7 +4033,7 @@ pub struct ReadOptions { /// # Runtime::new().unwrap().block_on(async { /// let url = sample_bed_url("small.bed")?; /// let mut bed_cloud = BedCloud::new(&url, EMPTY_OPTIONS).await?; - /// let val = ReadOptions::builder().max_chunk_size(1_000_000).i8().read_cloud(&mut bed_cloud).await?; + /// let val = ReadOptions::builder().max_chunk_bytes(1_000_000).i8().read_cloud(&mut bed_cloud).await?; /// /// assert_eq_nan( /// &val, @@ -4047,7 +4047,7 @@ pub struct ReadOptions { /// ``` #[builder(default, setter(strip_option))] #[allow(dead_code)] - max_chunk_size: Option, + max_chunk_bytes: Option, } impl ReadOptions { @@ -6762,7 +6762,7 @@ impl Metadata { let line_chunk = line_chunk.map_err(CloudFileError::ObjectStoreError)?; let lines = std::str::from_utf8(&line_chunk)?.split_terminator('\n'); for line in lines { - count += 1; // cmk do the iterator trick here + count += 1; let fields: Vec<&str> = if is_split_whitespace { line.split_whitespace().collect() diff --git a/src/python_module.rs b/src/python_module.rs index b0b2055..20dd5c0 100644 --- a/src/python_module.rs +++ b/src/python_module.rs @@ -196,7 +196,7 @@ fn bed_reader(_py: Python<'_>, m: &PyModule) -> PyResult<()> { val: &PyArray2, num_threads: usize, max_concurrent_requests: usize, - max_chunk_size: usize, + max_chunk_bytes: usize, ) -> Result<(), PyErr> { let iid_index = iid_index.readonly(); let sid_index = sid_index.readonly(); @@ -220,7 +220,7 @@ fn bed_reader(_py: Python<'_>, m: &PyModule) -> PyResult<()> { .is_a1_counted(is_a1_counted) .num_threads(num_threads) .max_concurrent_requests(max_concurrent_requests) - .max_chunk_size(max_chunk_size) + .max_chunk_bytes(max_chunk_bytes) .read_and_fill_cloud(&mut bed_cloud, &mut val.view_mut()) .await?; @@ -241,7 +241,7 @@ fn bed_reader(_py: Python<'_>, m: &PyModule) -> PyResult<()> { val: &PyArray2, num_threads: usize, max_concurrent_requests: usize, - max_chunk_size: usize, + max_chunk_bytes: usize, ) -> Result<(), PyErr> { let iid_index = iid_index.readonly(); let sid_index = sid_index.readonly(); @@ -265,7 +265,7 @@ fn bed_reader(_py: Python<'_>, m: &PyModule) -> PyResult<()> { .is_a1_counted(is_a1_counted) .num_threads(num_threads) .max_concurrent_requests(max_concurrent_requests) - .max_chunk_size(max_chunk_size) + .max_chunk_bytes(max_chunk_bytes) .read_and_fill_cloud(&mut bed_cloud, &mut val.view_mut()) .await?; @@ -286,7 +286,7 @@ fn bed_reader(_py: Python<'_>, m: &PyModule) -> PyResult<()> { val: &PyArray2, num_threads: usize, max_concurrent_requests: usize, - max_chunk_size: usize, + max_chunk_bytes: usize, ) -> Result<(), PyErr> { let iid_index = iid_index.readonly(); let sid_index = sid_index.readonly(); @@ -310,7 +310,7 @@ fn bed_reader(_py: Python<'_>, m: &PyModule) -> PyResult<()> { .is_a1_counted(is_a1_counted) .num_threads(num_threads) .max_concurrent_requests(max_concurrent_requests) - .max_chunk_size(max_chunk_size) + .max_chunk_bytes(max_chunk_bytes) .read_and_fill_cloud(&mut bed_cloud, &mut val.view_mut()) .await?; diff --git a/tests/tests_api_cloud.rs b/tests/tests_api_cloud.rs index 50db66a..8e45d71 100644 --- a/tests/tests_api_cloud.rs +++ b/tests/tests_api_cloud.rs @@ -525,7 +525,7 @@ async fn max_concurrent_requests() -> Result<(), Box> { // Read data with specified number of threads (or equivalent parallel processing setting) let val = ReadOptions::builder() .max_concurrent_requests(1) - .max_chunk_size(1_000_000) + .max_chunk_bytes(1_000_000) .i8() .read_cloud(&mut bed_cloud) .await?; @@ -2429,7 +2429,7 @@ fn http_cloud_urls_md_3() -> Result<(), Box> { Ok::<(), Box>(()) } -// cmk removed this test because it is too slow +// NOTE: removed this test because it is too slow // #[tokio::test] // async fn http_cloud_column_speed_test() -> Result<(), Box> { // let mut bed_cloud = BedCloud::builder(