From 53253528d311125e077127f020b4e44bbf03bb4b Mon Sep 17 00:00:00 2001 From: Carl Kadie Date: Wed, 20 Dec 2023 08:41:22 -0800 Subject: [PATCH] Simply generic types for BedCloud --- .gitignore | 2 + src/bed_cloud.rs | 247 ++++++++++++++++++++++++++++------------------- src/lib.rs | 47 ++++----- 3 files changed, 173 insertions(+), 123 deletions(-) diff --git a/.gitignore b/.gitignore index 1fa88a63..ffc34897 100644 --- a/.gitignore +++ b/.gitignore @@ -148,3 +148,5 @@ old4cargo.lock old5Cargo.lock oldCargo.lock bed_reader/Untitled-1.ipynb +del.rs +src/orig.rs diff --git a/src/bed_cloud.rs b/src/bed_cloud.rs index 1c8e4650..8f95998b 100644 --- a/src/bed_cloud.rs +++ b/src/bed_cloud.rs @@ -13,7 +13,6 @@ use object_store::ObjectStore; use object_store::{GetOptions, GetResult, ObjectMeta}; use std::cmp::max; use std::collections::HashSet; -use std::ops::Deref; use std::sync::atomic::{AtomicUsize, Ordering}; use std::sync::Arc; @@ -55,22 +54,21 @@ use crate::{MetadataFields, CB_HEADER_U64}; /// # use {tokio::runtime::Runtime, bed_reader::BedErrorPlus}; /// ``` #[derive(Clone, Debug, Builder)] -#[builder(build_fn(private, name = "build_no_file_check", error = "BedErrorPlus"))] -pub struct BedCloud +#[builder(build_fn(skip))] +pub struct BedCloud where - TArcStore: ArcStore, - TArcStore::Target: ArcStoreTarget, + TObjectStore: ObjectStore, { #[builder(setter(custom))] - object_path: ObjectPath, + object_path: ObjectPath, #[builder(setter(custom))] #[builder(default = "None")] - fam_object_path: Option>, + fam_object_path: Option>, #[builder(setter(custom))] #[builder(default = "None")] - bim_object_path: Option>, + bim_object_path: Option>, #[builder(setter(custom))] #[builder(default = "true")] @@ -91,6 +89,62 @@ where skip_set: HashSet, } +// We need to define our own build_no_file_check +// because otherwise derive_builder (needlessly) requires ObjectStore: Clone +impl BedCloudBuilder +where + TObjectStore: ObjectStore, +{ + fn build_no_file_check(&self) -> Result, Box> { + Ok(BedCloud { + object_path: match self.object_path { + Some(ref value) => Clone::clone(value), + None => { + return Result::Err(Into::into( + ::derive_builder::UninitializedFieldError::from("object_path"), + )); + } + }, + fam_object_path: match self.fam_object_path { + Some(ref value) => Clone::clone(value), + None => None, + }, + bim_object_path: match self.bim_object_path { + Some(ref value) => Clone::clone(value), + None => None, + }, + is_checked_early: match self.is_checked_early { + Some(ref value) => Clone::clone(value), + None => true, + }, + iid_count: match self.iid_count { + Some(ref value) => Clone::clone(value), + None => None, + }, + sid_count: match self.sid_count { + Some(ref value) => Clone::clone(value), + None => None, + }, + metadata: match self.metadata { + Some(ref value) => Clone::clone(value), + None => { + return Result::Err(Into::into( + ::derive_builder::UninitializedFieldError::from("metadata"), + )); + } + }, + skip_set: match self.skip_set { + Some(ref value) => Clone::clone(value), + None => { + return Result::Err(Into::into( + ::derive_builder::UninitializedFieldError::from("skip_set"), + )); + } + }, + }) + } +} + fn convert_negative_sid_index( in_sid_i_signed: isize, upper_sid_count: isize, @@ -111,8 +165,8 @@ fn convert_negative_sid_index( // cmk we should turn sid_index into a slice of ranges. #[allow(clippy::too_many_arguments)] -async fn internal_read_no_alloc( - object_path: &ObjectPath, +async fn internal_read_no_alloc( + object_path: &ObjectPath, object_meta: &ObjectMeta, in_iid_count: usize, in_sid_count: usize, @@ -125,8 +179,8 @@ async fn internal_read_no_alloc( out_val: &mut nd::ArrayViewMut2<'_, TVal>, ) -> Result<(), Box> where - TArcStore: ArcStore, - TArcStore::Target: ArcStoreTarget, + // cmk00 TObjectStore: ArcStore, + TObjectStore: ObjectStore, { // compute numbers outside of the loop let (in_iid_count_div4, in_iid_count_div4_u64) = @@ -234,16 +288,16 @@ fn decode_bytes_into_columns( } } -fn check_file_length( +fn check_file_length( in_iid_count: usize, in_sid_count: usize, object_meta: &ObjectMeta, object_path: I, ) -> Result<(usize, u64), Box> where - TArcStore: ArcStore, - TArcStore::Target: ArcStoreTarget, - I: Into>, + // cmk00 TObjectStore: ArcStore, + TObjectStore: ObjectStore, + I: Into>, { let (in_iid_count_div4, in_iid_count_div4_u64) = try_div_4(in_iid_count, in_sid_count, CB_HEADER_U64)?; @@ -259,7 +313,7 @@ where #[inline] #[allow(clippy::too_many_arguments)] -async fn read_no_alloc( +async fn read_no_alloc( object_path: I, iid_count: usize, sid_count: usize, @@ -273,9 +327,9 @@ async fn read_no_alloc( val: &mut nd::ArrayViewMut2<'_, TVal>, //mutable slices additionally allow to modify elements. But slices cannot grow - they are just a view into some vector. ) -> Result<(), Box> where - TArcStore: ArcStore, - TArcStore::Target: ArcStoreTarget, - I: Into>, + // cmk00 TObjectStore: ArcStore, + TObjectStore: ObjectStore, + I: Into>, { let object_path = object_path.into(); let (object_meta, bytes) = open_and_check(&object_path).await?; @@ -321,13 +375,13 @@ where Ok(()) } -async fn open_and_check( +async fn open_and_check( object_path: I, ) -> Result<(ObjectMeta, Bytes), Box> where - TArcStore: ArcStore, - TArcStore::Target: ArcStoreTarget, - I: Into>, + // cmk00 TObjectStore: ArcStore, + TObjectStore: ObjectStore, + I: Into>, { let object_path = object_path.into(); let get_options = GetOptions { @@ -350,15 +404,15 @@ where Ok((object_meta, bytes)) } -impl BedCloudBuilder +impl BedCloudBuilder where - TArcStore: ArcStore, - TArcStore::Target: ArcStoreTarget, + // cmk00 TObjectStore: ArcStore, + TObjectStore: ObjectStore, { // #[anyinput] fn new(object_path: I) -> Self where - I: Into>, + I: Into>, { Self { object_path: Some(object_path.into()), @@ -377,7 +431,7 @@ where /// Create [`BedCloud`](struct.BedCloud.html) from the builder. /// /// > See [`BedCloud::builder`](struct.BedCloud.html#method.builder) for more details and examples. - pub async fn build(&self) -> Result, Box> { + pub async fn build(&self) -> Result, Box> { let mut bed_cloud = self.build_no_file_check()?; // cmk is this unwrap OK? @@ -639,7 +693,7 @@ where /// ``` pub fn fam_object_path(mut self, object_path: I) -> Self where - I: Into>, + I: Into>, { self.fam_object_path = Some(Some(object_path.into())); self @@ -668,7 +722,7 @@ where // #[anyinput] pub fn bim_object_path(mut self, object_path: I) -> Self where - I: Into>, + I: Into>, { let object_path = object_path.into(); self.bim_object_path = Some(Some(object_path)); @@ -880,10 +934,10 @@ where } } -impl BedCloud +impl BedCloud where - TArcStore: ArcStore, - TArcStore::Target: ArcStoreTarget, + // cmk00 TObjectStore: ArcStore, + TObjectStore: ObjectStore, { /// Attempts to open a PLINK .bed file in the cloud for reading. Supports options. /// @@ -990,9 +1044,9 @@ where /// # use {tokio::runtime::Runtime, bed_reader::BedErrorPlus}; /// ``` /// - pub fn builder(object_path: I) -> BedCloudBuilder + pub fn builder(object_path: I) -> BedCloudBuilder where - I: Into>, + I: Into>, { let object_path = object_path.into(); BedCloudBuilder::new(object_path) @@ -1053,7 +1107,7 @@ where /// ``` pub async fn new(object_path: I) -> Result> where - I: Into>, + I: Into>, { let object_path = object_path.into(); BedCloud::builder(object_path).build().await @@ -1538,12 +1592,12 @@ where } /// Return the object_path of the .bed file. - pub fn object_path(&self) -> &ObjectPath { + pub fn object_path(&self) -> &ObjectPath { &self.object_path } /// Return the cloud location of the .fam file. - pub fn fam_object_path(&mut self) -> Result, Box> { + pub fn fam_object_path(&mut self) -> Result, Box> { // We need to clone the object_path because self might mutate later if let Some(fam_object_path) = &self.fam_object_path { Ok(fam_object_path.clone()) @@ -1556,7 +1610,7 @@ where } /// Return the cloud location of the .bim file. - pub fn bim_object_path(&mut self) -> Result, Box> { + pub fn bim_object_path(&mut self) -> Result, Box> { // We need to clone the object_path because self might mutate later if let Some(bim_object_path) = &self.bim_object_path { Ok(bim_object_path.clone()) @@ -1979,7 +2033,7 @@ where #[anyinput] pub fn sample_bed_object_path( bed_path: AnyPath, -) -> Result>, Box> { +) -> Result, Box> { use std::path::PathBuf; let mut path_list: Vec = Vec::new(); @@ -2001,9 +2055,7 @@ pub fn sample_bed_object_path( /// The file will be in a directory determined by environment variable `BED_READER_DATA_DIR`. /// If that environment variable is not set, a cache folder, appropriate to the OS, will be used. #[anyinput] -pub fn sample_object_path( - path: AnyPath, -) -> Result>, Box> { +pub fn sample_object_path(path: AnyPath) -> Result, Box> { let object_store = Arc::new(LocalFileSystem::new()); let file_path = STATIC_FETCH_DATA @@ -2024,7 +2076,7 @@ pub fn sample_object_path( #[anyinput] pub fn sample_object_paths( path_list: AnyIter, -) -> Result>>, Box> { +) -> Result>, Box> { let object_store = Arc::new(LocalFileSystem::new()); let file_paths = STATIC_FETCH_DATA @@ -2039,35 +2091,35 @@ pub fn sample_object_paths( .collect() } +#[derive(Debug)] /// cmk doc -pub trait ArcStore: Clone + Deref + Send + Sync + 'static {} -/// cmk doc -pub trait ArcStoreTarget: ObjectStore + Send + Sync {} - -impl ArcStore for Arc where T: ArcStoreTarget {} - -impl ArcStoreTarget for T where T: ObjectStore {} -// impl ArcStoreTarget for LocalFileSystem {} - -#[derive(Clone, Debug)] -/// cmk doc -pub struct ObjectPath +pub struct ObjectPath where - TArcStore: ArcStore, - TArcStore::Target: ArcStoreTarget, + TObjectStore: ObjectStore, { - object_store: TArcStore, + object_store: Arc, path: StorePath, } -impl ObjectPath +impl Clone for ObjectPath +where + TObjectStore: ObjectStore, +{ + fn clone(&self) -> Self { + ObjectPath { + object_store: self.object_store.clone(), + path: self.path.clone(), + } + } +} + +impl ObjectPath where - TArcStore: ArcStore, - TArcStore::Target: ArcStoreTarget, + TObjectStore: ObjectStore, { /// cmk doc - pub fn new(object_store: TArcStore, path: StorePath) -> Self { + pub fn new(object_store: Arc, path: StorePath) -> Self { ObjectPath { object_store, path } } @@ -2112,83 +2164,84 @@ where } } +// cmk00 should be make them pass in an Arc or can we do that? // Implementing From trait for ObjectPath to allow tuple conversions. -impl From<(TArcStore, StorePath)> for ObjectPath +impl From<(Arc, StorePath)> for ObjectPath where - TArcStore: ArcStore, - TArcStore::Target: ArcStoreTarget, + // cmk00 TObjectStore: ArcStore, + TObjectStore: ObjectStore, { - fn from(tuple: (TArcStore, StorePath)) -> Self { + fn from(tuple: (Arc, StorePath)) -> Self { ObjectPath { object_store: tuple.0, path: tuple.1, } } } -impl From<(&TArcStore, &StorePath)> for ObjectPath +impl From<(&Arc, &StorePath)> for ObjectPath where - TArcStore: ArcStore, - TArcStore::Target: ArcStoreTarget, + // cmk00 TObjectStore: ArcStore, + TObjectStore: ObjectStore, { - fn from(tuple: (&TArcStore, &StorePath)) -> Self { + fn from(tuple: (&Arc, &StorePath)) -> Self { ObjectPath { object_store: tuple.0.clone(), path: tuple.1.clone(), } } } -impl From<(TArcStore, &StorePath)> for ObjectPath +impl From<(Arc, &StorePath)> for ObjectPath where - TArcStore: ArcStore, - TArcStore::Target: ArcStoreTarget, + // cmk00 TObjectStore: ArcStore, + TObjectStore: ObjectStore, { - fn from(tuple: (TArcStore, &StorePath)) -> Self { + fn from(tuple: (Arc, &StorePath)) -> Self { ObjectPath { object_store: tuple.0, path: tuple.1.clone(), } } } -impl From<(&TArcStore, StorePath)> for ObjectPath +impl From<(&Arc, StorePath)> for ObjectPath where - TArcStore: ArcStore, - TArcStore::Target: ArcStoreTarget, + // cmk00 TObjectStore: ArcStore, + TObjectStore: ObjectStore, { - fn from(tuple: (&TArcStore, StorePath)) -> Self { + fn from(tuple: (&Arc, StorePath)) -> Self { ObjectPath { object_store: tuple.0.clone(), path: tuple.1, } } } -impl From<&ObjectPath> for ObjectPath +impl From<&ObjectPath> for ObjectPath where - TArcStore: ArcStore, - TArcStore::Target: ArcStoreTarget, + // cmk00 TObjectStore: ArcStore, + TObjectStore: ObjectStore, { - fn from(ref_thing: &ObjectPath) -> Self { + fn from(ref_thing: &ObjectPath) -> Self { ref_thing.clone() } } -impl fmt::Display for ObjectPath +impl fmt::Display for ObjectPath where - TArcStore: ArcStore, - TArcStore::Target: ArcStoreTarget, + // cmk00 TObjectStore: ArcStore, + TObjectStore: ObjectStore, { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "ObjectPath: {:?}", self.path) } } -fn to_metadata_path( - bed_object_path: &ObjectPath, - metadata_object_path: &Option>, +fn to_metadata_path( + bed_object_path: &ObjectPath, + metadata_object_path: &Option>, extension: &str, -) -> Result, Box> +) -> Result, Box> where - TArcStore: ArcStore, - TArcStore::Target: ArcStoreTarget, + // cmk00 TObjectStore: ArcStore, + TObjectStore: ObjectStore, { if let Some(metadata_object_path) = metadata_object_path { Ok(metadata_object_path.clone()) @@ -2201,11 +2254,11 @@ where } } -async fn count_lines(object_path: I) -> Result> +async fn count_lines(object_path: I) -> Result> where - TArcStore: ArcStore, - TArcStore::Target: ArcStoreTarget, - I: Into>, + // cmk00 TObjectStore: ArcStore, + TObjectStore: ObjectStore, + I: Into>, { let stream = object_path .into() diff --git a/src/lib.rs b/src/lib.rs index 8cd884a5..1c19d657 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -101,8 +101,6 @@ mod python_module; mod tests; use anyinput::anyinput; -use bed_cloud::ArcStore; -use bed_cloud::ArcStoreTarget; use bed_cloud::ObjectPath; pub use bed_cloud::{sample_bed_object_path, sample_object_path, sample_object_paths, BedCloud}; use core::fmt::Debug; @@ -113,6 +111,7 @@ use futures_util::stream::StreamExt; use nd::ShapeBuilder; use ndarray as nd; use object_store::delimited::newline_delimited_stream; +use object_store::ObjectStore; use std::cmp::Ordering; use std::collections::HashSet; use std::convert::TryFrom; @@ -4290,13 +4289,12 @@ impl ReadOptionsBuilder { } /// cmk - pub async fn read_cloud( + pub async fn read_cloud( &self, - bed_cloud: &mut BedCloud, + bed_cloud: &mut BedCloud, ) -> Result, Box> where - TArcStore: ArcStore, - TArcStore::Target: ArcStoreTarget, + TObjectStore: ObjectStore, { let read_options = self.build()?; bed_cloud.read_with_options(&read_options).await @@ -4376,14 +4374,13 @@ impl ReadOptionsBuilder { /// # Ok::<(), Box>(())}).unwrap(); /// # use {tokio::runtime::Runtime, bed_reader::BedErrorPlus}; /// ``` - pub async fn read_and_fill_cloud( + pub async fn read_and_fill_cloud( &self, - bed_cloud: &mut BedCloud, + bed_cloud: &mut BedCloud, val: &mut nd::ArrayViewMut2<'_, TVal>, //mutable slices additionally allow to modify elements. But slices cannot grow - they are just a view into some vector. ) -> Result<(), Box> where - TArcStore: ArcStore, - TArcStore::Target: ArcStoreTarget, + TObjectStore: ObjectStore, { let read_options = self.build()?; bed_cloud @@ -6356,9 +6353,9 @@ impl Metadata { /// let skip_set = HashSet::::new(); /// let metadata_empty = Metadata::new(); /// let (metadata_fam, iid_count) = - /// metadata_empty.read_fam_cloud(sample_object_path("small.fam")?, &skip_set)?; + /// metadata_empty.read_fam_cloud(sample_object_path("small.fam")?, &skip_set).await?; /// let (metadata_bim, sid_count) = - /// metadata_fam.read_bim_cloud(sample_object_path("small.bim")?, &skip_set)?; + /// metadata_fam.read_bim_cloud(sample_object_path("small.bim")?, &skip_set).await?; /// assert_eq!(iid_count, 3); /// assert_eq!(sid_count, 4); /// println!("{0:?}", metadata_fam.iid()); // Outputs optional ndarray Some(["iid1", "iid2", "iid3"]...) @@ -6367,15 +6364,14 @@ impl Metadata { /// # Ok::<(), Box>(())}).unwrap(); /// # use {tokio::runtime::Runtime, bed_reader::BedErrorPlus}; /// ``` - pub async fn read_fam_cloud( + pub async fn read_fam_cloud( &self, object_path: I, skip_set: &HashSet, ) -> Result<(Metadata, usize), Box> where - TArcStore: ArcStore, - TArcStore::Target: ArcStoreTarget, - I: Into>, + TObjectStore: ObjectStore, + I: Into>, { let object_path = object_path.into(); let mut field_vec: Vec = Vec::new(); @@ -6542,9 +6538,9 @@ impl Metadata { /// let skip_set = HashSet::::new(); /// let metadata_empty = Metadata::new(); /// let (metadata_fam, iid_count) = - /// metadata_empty.read_fam_cloud(sample_object_path("small.fam")?, &skip_set)?; + /// metadata_empty.read_fam_cloud(sample_object_path("small.fam")?, &skip_set).await?; /// let (metadata_bim, sid_count) = - /// metadata_fam.read_bim_cloud(sample_object_path("small.bim")?, &skip_set)?; + /// metadata_fam.read_bim_cloud(sample_object_path("small.bim")?, &skip_set).await?; /// assert_eq!(iid_count, 3); /// assert_eq!(sid_count, 4); /// println!("{0:?}", metadata_fam.iid()); // Outputs optional ndarray Some(["iid1", "iid2", "iid3"]...) @@ -6553,15 +6549,15 @@ impl Metadata { /// # Ok::<(), Box>(())}).unwrap(); /// # use {tokio::runtime::Runtime, bed_reader::BedErrorPlus}; /// ``` - pub async fn read_bim_cloud( + pub async fn read_bim_cloud( &self, object_path: I, skip_set: &HashSet, ) -> Result<(Metadata, usize), Box> where - TArcStore: ArcStore, - TArcStore::Target: ArcStoreTarget, - I: Into>, + TObjectStore: ObjectStore, + + I: Into>, { let object_path = object_path.into(); let mut field_vec: Vec = Vec::new(); @@ -6670,15 +6666,14 @@ impl Metadata { Ok((vec_of_vec, count)) } - async fn read_fam_or_bim_cloud( + async fn read_fam_or_bim_cloud( &self, field_vec: &[usize], is_split_whitespace: bool, - object_path: &ObjectPath, + object_path: &ObjectPath, ) -> Result<(Vec>, usize), Box> where - TArcStore: ArcStore, - TArcStore::Target: ArcStoreTarget, + TObjectStore: ObjectStore, { let mut vec_of_vec = vec![vec![]; field_vec.len()];