diff --git a/Cargo.lock b/Cargo.lock index 336efd4..f0e943c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -101,17 +101,6 @@ dependencies = [ "syn 2.0.48", ] -[[package]] -name = "atty" -version = "0.2.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" -dependencies = [ - "hermit-abi", - "libc", - "winapi", -] - [[package]] name = "autocfg" version = "1.1.0" @@ -165,9 +154,7 @@ dependencies = [ "ndarray-rand", "num-traits", "numpy", - "object_store", "pyo3", - "pyo3-asyncio", "pyo3-build-config", "rayon", "rusoto_credential", @@ -176,7 +163,6 @@ dependencies = [ "thiserror", "thousands", "tokio", - "url", ] [[package]] @@ -252,30 +238,6 @@ dependencies = [ "windows-targets 0.48.0", ] -[[package]] -name = "clap" -version = "3.2.25" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4ea181bf566f71cb9a5d17a59e1871af638180a18fb0035c92ae62b705207123" -dependencies = [ - "atty", - "bitflags", - "clap_lex", - "indexmap 1.9.3", - "strsim", - "termcolor", - "textwrap", -] - -[[package]] -name = "clap_lex" -version = "0.2.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2850f2f5a82cbf437dd5af4d49848fbdfc27c157c3d010345776f952765261c5" -dependencies = [ - "os_str_bytes", -] - [[package]] name = "cloud-file" version = "0.1.0-beta.2" @@ -735,19 +697,13 @@ dependencies = [ "futures-sink", "futures-util", "http", - "indexmap 2.1.0", + "indexmap", "slab", "tokio", "tokio-util", "tracing", ] -[[package]] -name = "hashbrown" -version = "0.12.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" - [[package]] name = "hashbrown" version = "0.14.3" @@ -886,16 +842,6 @@ dependencies = [ "unicode-normalization", ] -[[package]] -name = "indexmap" -version = "1.9.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" -dependencies = [ - "autocfg", - "hashbrown 0.12.3", -] - [[package]] name = "indexmap" version = "2.1.0" @@ -903,7 +849,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d530e1a18b1cb4c484e6e34556a0d948706958449fca0cab753d649f2bce3d1f" dependencies = [ "equivalent", - "hashbrown 0.14.3", + "hashbrown", ] [[package]] @@ -912,12 +858,6 @@ version = "2.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e186cfbae8084e513daff4240b4797e342f988cecda4fb6c939150f96315fd8" -[[package]] -name = "inventory" -version = "0.3.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c8573b2b1fb643a372c73b23f4da5f888677feef3305146d68a539250a9bccc7" - [[package]] name = "ipnet" version = "2.9.0" @@ -1238,12 +1178,6 @@ version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf" -[[package]] -name = "os_str_bytes" -version = "6.6.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2355d85b9a3786f481747ced0e0ff2ba35213a1f9bd406ed906554d7af805a1" - [[package]] name = "parking_lot" version = "0.12.1" @@ -1404,33 +1338,6 @@ dependencies = [ "unindent", ] -[[package]] -name = "pyo3-asyncio" -version = "0.20.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ea6b68e93db3622f3bb3bf363246cf948ed5375afe7abff98ccbdd50b184995" -dependencies = [ - "clap", - "futures", - "inventory", - "once_cell", - "pin-project-lite", - "pyo3", - "pyo3-asyncio-macros", - "tokio", -] - -[[package]] -name = "pyo3-asyncio-macros" -version = "0.20.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56c467178e1da6252c95c29ecf898b133f742e9181dca5def15dc24e19d45a39" -dependencies = [ - "proc-macro2", - "quote", - "syn 1.0.102", -] - [[package]] name = "pyo3-build-config" version = "0.20.0" @@ -2068,21 +1975,6 @@ version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "921f1e9c427802414907a48b21a6504ff6b3a15a1a3cf37e699590949ad9befc" -[[package]] -name = "termcolor" -version = "1.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ff1bc3d3f05aff0403e8ac0d92ced918ec05b666a43f83297ccef5bea8a3d449" -dependencies = [ - "winapi-util", -] - -[[package]] -name = "textwrap" -version = "0.16.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "222a222a5bfe1bba4a77b45ec488a741b3cb8872e5e499451fd7d0129c9c7c3d" - [[package]] name = "thiserror" version = "1.0.56" diff --git a/Cargo.toml b/Cargo.toml index 926b1ab..9cb96c1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,7 +6,7 @@ repository = "https://github.com/fastlmm/bed-reader" readme = "README-rust.md" documentation = "https://docs.rs/bed-reader/latest/bed_reader/" authors = ["FaST-LMM Team "] -license = "Apache-2.0" # toml-ignore +license = "Apache-2.0" # toml-ignore keywords = ["bioinformatics", "plink", "genomics", "genotype", "snps"] categories = ["science"] edition = "2021" @@ -26,19 +26,8 @@ crate-type = ["cdylib", "rlib"] # https://github.com/PyO3/pyo3/discussions/2271 # https://pyo3.rs/latest/faq.html#i-cant-run-cargo-test-or-i-cant-build-in-a-cargo-workspace-im-having-linker-issues-like-symbol-not-found-or-undefined-reference-to-_pyexc_systemerror [features] -extension-module = ["pyo3/extension-module", "pyo3-asyncio", "cloud", "tokio/full"] -cloud = [ - "cloud-file", - "object_store", - "itertools", - "futures-util", - "bytes", - # "pyo3-asyncio", - # "tokio/full", - # "bytecount", - # "url" -] -default = ["cloud", "extension-module"] # remove extension-module from default +extension-module = ["pyo3/extension-module", "tokio/full"] +default = [ "extension-module"] # cmk remove extension-module from default [dependencies] thiserror = "1.0.40" @@ -49,36 +38,24 @@ numpy = "0.20.0" ndarray = { version = "0.15.6", features = ["approx", "rayon"] } statrs = "0.16.0" byteorder = { version = "1.4.3", default-features = false } -pyo3 = { version = "0.20.0", features = ["extension-module"], optional = true } dpc-pariter = "0.4.0" # // pariter = "0.5.1" derive_builder = "0.13.0" anyinput = { version = "0.1.6", features = ["ndarray"] } fetch-data = "0.1.6" - -# Dependencies for cloud feature -pyo3-asyncio = { version = "0.20.0", features = ["tokio-runtime"], optional = true } -# when object_store goes to 0.9.1 check if can remove http cloud options work around code. -object_store = { version = "0.9.0", optional = true } -tokio = { version = "1.35.0", features = ["full"], optional = true } -futures-util = { version = "0.3.29", optional = true } -bytecount = { version = "0.6.7", optional = true } -itertools = { version = "0.12.0", optional = true } -bytes = { version = "1.5.0", optional = true } -url = { version = "2.5.0", optional = true } +futures-util = { version = "0.3.29"} +bytecount = { version = "0.6.7"} +itertools = { version = "0.12.0"} +bytes = { version = "1.5.0"} # cmk cloud-file = { version = "0.1.0-beta.2", optional = true } -cloud-file = {path="../cloud-file", optional = true} +cloud-file = {path="../cloud-file"} +pyo3 = { version = "0.20.0", features = ["extension-module"], optional = true } +tokio = { version = "1.35.0", features = ["full"], optional = true } [dev-dependencies] ndarray-rand = "0.14.0" anyhow = "1.0.75" rusoto_credential = "0.48.0" -pyo3-asyncio = { version = "0.20.0", features = [ - "tokio-runtime", - "attributes", - "testing", -] } temp_testdir = "0.2.3" -object_store = { version = "0.9.0", features = ["aws","http"]} thousands = "0.2.0" diff --git a/README-rust.md b/README-rust.md index 839b01f..400c338 100644 --- a/README-rust.md +++ b/README-rust.md @@ -106,8 +106,7 @@ at index position 2. (See ["Cloud URLs and `CloudFile` Examples"](supplemental_d for details specifying a file in the cloud.) ```rust -# #[cfg(feature = "cloud")] // '#' needed for doctest -# { use {bed_reader::BedErrorPlus, tokio::runtime::Runtime}; +# { use {bed_reader::BedErrorPlus, tokio::runtime::Runtime}; // '#' needed for doctest use ndarray as nd; use bed_reader::{assert_eq_nan, BedCloud, ReadOptions}; # Runtime::new().unwrap().block_on(async { diff --git a/examples/cloud.rs b/examples/cloud.rs new file mode 100644 index 0000000..8b0a23a --- /dev/null +++ b/examples/cloud.rs @@ -0,0 +1,25 @@ +use bed_reader::{BedCloud, BedErrorPlus, ReadOptions}; +use ndarray::s; +use std::collections::HashSet; + +#[tokio::main] +async fn main() -> Result<(), Box> { + let url = "https://raw.githubusercontent.com/fastlmm/bed-sample-files/main/some_missing.bed"; + let cloud_options = [("timeout", "10s")]; + + let mut bed_cloud = BedCloud::new_with_options(url, cloud_options).await?; + println!("{:?}", bed_cloud.iid().await?.slice(s![..5])); // Outputs ndarray: ["iid_0", "iid_1", "iid_2", "iid_3", "iid_4"] + println!("{:?}", bed_cloud.sid().await?.slice(s![..5])); // Outputs ndarray: ["sid_0", "sid_1", "sid_2", "sid_3", "sid_4"] + println!( + "{:?}", + bed_cloud.chromosome().await?.iter().collect::>() + ); + // Outputs: {"12", "10", "4", "8", "19", "21", "9", "15", "6", "16", "13", "7", "17", "18", "1", "22", "11", "2", "20", "3", "5", "14"} + let _ = ReadOptions::builder() + .sid_index(bed_cloud.chromosome().await?.map(|elem| elem == "5")) + .f64() + .read_cloud(&mut bed_cloud) + .await?; + + Ok(()) +} diff --git a/examples/no_cloud.rs b/examples/no_cloud.rs new file mode 100644 index 0000000..fe03b69 --- /dev/null +++ b/examples/no_cloud.rs @@ -0,0 +1,19 @@ +use bed_reader::{sample_bed_file, Bed, BedErrorPlus, ReadOptions}; +use ndarray::s; +use std::collections::HashSet; + +fn main() -> Result<(), Box> { + let file_name = sample_bed_file("some_missing.bed")?; + + let mut bed = Bed::new(file_name)?; + println!("{:?}", bed.iid()?.slice(s![..5])); // Outputs ndarray: ["iid_0", "iid_1", "iid_2", "iid_3", "iid_4"] + println!("{:?}", bed.sid()?.slice(s![..5])); // Outputs ndarray: ["sid_0", "sid_1", "sid_2", "sid_3", "sid_4"] + println!("{:?}", bed.chromosome()?.iter().collect::>()); + // Outputs: {"12", "10", "4", "8", "19", "21", "9", "15", "6", "16", "13", "7", "17", "18", "1", "22", "11", "2", "20", "3", "5", "14"} + let _ = ReadOptions::builder() + .sid_index(bed.chromosome()?.map(|elem| elem == "5")) + .f64() + .read(&mut bed)?; + + Ok(()) +} diff --git a/src/lib.rs b/src/lib.rs index 24bfa59..1df1051 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -111,20 +111,16 @@ //! Any requested sample file will be downloaded to this directory. If the environment variable is not set, //! a cache folder, appropriate to the OS, will be used. -#[cfg(feature = "cloud")] mod python_module; mod tests; use anyinput::anyinput; -#[cfg(feature = "cloud")] pub use bed_cloud::{sample_bed_url, sample_url, sample_urls, BedCloud, BedCloudBuilder}; use byteorder::{LittleEndian, ReadBytesExt}; -#[cfg(feature = "cloud")] pub use cloud_file::{CloudFile, CloudFileError, EMPTY_OPTIONS}; use core::fmt::Debug; use derive_builder::Builder; use dpc_pariter::{scope, IteratorExt}; use fetch_data::FetchData; -#[cfg(feature = "cloud")] use futures_util::StreamExt; use nd::ShapeBuilder; use ndarray as nd; @@ -152,7 +148,6 @@ use std::{ path::{Path, PathBuf}, }; use thiserror::Error; -#[cfg(feature = "cloud")] mod bed_cloud; const BED_FILE_MAGIC1: u8 = 0x6C; // 0b01101100 or 'l' (lowercase 'L') @@ -193,15 +188,10 @@ pub enum BedErrorPlus { #[error(transparent)] ParseFloatError(#[from] ParseFloatError), - #[cfg(feature = "cloud")] #[allow(missing_docs)] #[error(transparent)] CloudFileError(#[from] CloudFileError), - // #[cfg(feature = "cloud")] - // #[allow(missing_docs)] - // #[error(transparent)] - // JoinError(#[from] tokio::task::JoinError), #[allow(missing_docs)] #[error(transparent)] Utf8Error(#[from] Utf8Error), @@ -459,7 +449,6 @@ impl From<::derive_builder::UninitializedFieldError> for BedErrorPlus { } } -#[cfg(feature = "cloud")] impl From for Box { fn from(err: CloudFileError) -> Self { Box::new(BedErrorPlus::CloudFileError(err)) @@ -3140,7 +3129,6 @@ fn compute_num_threads(option_num_threads: Option) -> Result, ) -> Result> { @@ -3158,7 +3146,6 @@ fn compute_max_concurrent_requests( } #[allow(clippy::unnecessary_wraps)] -#[cfg(feature = "cloud")] fn compute_max_chunk_bytes( option_max_chunk_bytes: Option, ) -> Result> { @@ -3992,7 +3979,6 @@ pub struct ReadOptions { /// /// In this example, we read using only request at a time. /// ``` - /// # #[cfg(feature = "cloud")] /// # { use {tokio::runtime::Runtime, bed_reader::BedErrorPlus}; /// use ndarray as nd; /// use bed_reader::{BedCloud, ReadOptions, sample_bed_url, EMPTY_OPTIONS}; @@ -4022,7 +4008,6 @@ pub struct ReadOptions { /// /// In this example, we read using only 1_000_000 bytes per request. /// ``` - /// # #[cfg(feature = "cloud")] /// # { use {tokio::runtime::Runtime, bed_reader::BedErrorPlus}; /// use ndarray as nd; /// use bed_reader::{BedCloud, ReadOptions, sample_bed_url, EMPTY_OPTIONS}; @@ -4345,7 +4330,6 @@ impl ReadOptionsBuilder { /// # Ok::<(), Box>(())}).unwrap(); /// # use {tokio::runtime::Runtime, bed_reader::BedErrorPlus}; /// ``` - #[cfg(feature = "cloud")] pub async fn read_cloud( &self, bed_cloud: &mut BedCloud, @@ -4428,7 +4412,6 @@ impl ReadOptionsBuilder { /// # Ok::<(), Box>(())}).unwrap(); /// # use {tokio::runtime::Runtime, bed_reader::BedErrorPlus}; /// ``` - #[cfg(feature = "cloud")] pub async fn read_and_fill_cloud( &self, bed_cloud: &mut BedCloud, @@ -6452,7 +6435,6 @@ impl Metadata { /// # Ok::<(), Box>(())}).unwrap(); /// # use {tokio::runtime::Runtime, bed_reader::BedErrorPlus}; /// ``` - #[cfg(feature = "cloud")] pub async fn read_fam_cloud( &self, cloud_file: &CloudFile, @@ -6635,7 +6617,6 @@ impl Metadata { /// # Ok::<(), Box>(())}).unwrap(); /// # use {tokio::runtime::Runtime, bed_reader::BedErrorPlus}; /// ``` - #[cfg(feature = "cloud")] pub async fn read_bim_cloud( &self, cloud_file: &CloudFile, @@ -6745,7 +6726,6 @@ impl Metadata { Ok((vec_of_vec, count)) } - #[cfg(feature = "cloud")] async fn read_fam_or_bim_cloud( &self, field_vec: &[usize], @@ -7179,12 +7159,10 @@ where .map_err(|e| BedError::SampleFetch(e.to_string()))?) } -#[cfg(feature = "cloud")] pub mod supplemental_document_options { #![doc = include_str!("supplemental_documents/options_etc.md")] } -#[cfg(feature = "cloud")] pub mod supplemental_document_cloud_urls { #![doc = include_str!("supplemental_documents/cloud_urls_etc.md")] } diff --git a/tests/tests_api_cloud.rs b/tests/tests_api_cloud.rs index d2daa24..345135a 100644 --- a/tests/tests_api_cloud.rs +++ b/tests/tests_api_cloud.rs @@ -1,5 +1,3 @@ -#![cfg(feature = "cloud")] - use bed_reader::allclose; use bed_reader::assert_eq_nan; use bed_reader::assert_error_variant;