|
| 1 | +#[cfg(not(target_pointer_width = "64"))] |
| 2 | +compile_error!( |
| 3 | + "This code requires a 64-bit target architecture because the cloud-library assumes it." |
| 4 | +); |
| 5 | + |
1 | 6 | use anyinput::anyinput;
|
2 | 7 | use bytes::Bytes;
|
3 | 8 | use core::fmt;
|
@@ -156,16 +161,14 @@ fn convert_negative_sid_index(
|
156 | 161 | Ok(in_sid_i_signed as u64)
|
157 | 162 | } else if (lower_sid_count..=-1).contains(&in_sid_i_signed) {
|
158 | 163 | #[allow(clippy::cast_sign_loss)]
|
159 |
| - Ok((in_sid_i_signed - lower_sid_count) as u64) // cmk not sure about overflow |
| 164 | + Ok((in_sid_i_signed - lower_sid_count) as u64) |
160 | 165 | } else {
|
161 | 166 | Err(Box::new(BedErrorPlus::BedError(BedError::SidIndexTooBig(
|
162 | 167 | in_sid_i_signed,
|
163 | 168 | ))))
|
164 | 169 | }
|
165 | 170 | }
|
166 | 171 |
|
167 |
| -// cmk somehow we must only compile if size(usize) is 64 bits. |
168 |
| - |
169 | 172 | #[allow(clippy::too_many_arguments)]
|
170 | 173 | #[allow(clippy::similar_names)]
|
171 | 174 | async fn internal_read_no_alloc<TVal: BedVal, TObjectStore>(
|
@@ -272,11 +275,7 @@ fn decode_bytes_into_columns<TVal: BedVal>(
|
272 | 275 | ) {
|
273 | 276 | for (bytes, out_sid_i) in bytes_slice.iter().zip(out_sid_i_vec.into_iter()) {
|
274 | 277 | let mut col = out_val.column_mut(out_sid_i);
|
275 |
| - // // cmk In parallel, decompress the iid info and put it in its column |
276 |
| - // // cmk .par_bridge() // This seems faster that parallel zip |
277 |
| - // .try_for_each(|(bytes_vector_result, mut col)| match bytes_vector_result { |
278 |
| - // Err(e) => Err(e), |
279 |
| - // Ok(bytes_vector) => { |
| 278 | + // LATER: Consider doing this in parallel as in the non-cloud version. |
280 | 279 | for out_iid_i in 0..iid_index.len() {
|
281 | 280 | let i_div_4 = i_div_4_array[out_iid_i];
|
282 | 281 | let i_mod_4_times_2: u8 = i_mod_4_times_2_array[out_iid_i];
|
@@ -455,7 +454,6 @@ where
|
455 | 454 | Ok(bed_cloud)
|
456 | 455 | }
|
457 | 456 |
|
458 |
| - /// cmk update docs |
459 | 457 | /// Create [`BedCloud`](struct.BedCloud.html) from the builder.
|
460 | 458 | ///
|
461 | 459 | /// > See [`BedCloud::builder`](struct.BedCloud.html#method.builder) for more details and examples.
|
@@ -1223,7 +1221,7 @@ where
|
1223 | 1221 | /// assert!(dim == (3,4));
|
1224 | 1222 | /// # Ok::<(), Box<BedErrorPlus>>(())}).unwrap();
|
1225 | 1223 | /// # use {tokio::runtime::Runtime, bed_reader::BedErrorPlus};
|
1226 |
| - // cmk call these at the same time? |
| 1224 | + // LATER: Could these be called at the same time, async? |
1227 | 1225 | pub async fn dim(&mut self) -> Result<(usize, usize), Box<BedErrorPlus>> {
|
1228 | 1226 | Ok((self.iid_count().await?, self.sid_count().await?))
|
1229 | 1227 | }
|
@@ -1767,52 +1765,6 @@ where
|
1767 | 1765 | .await
|
1768 | 1766 | }
|
1769 | 1767 |
|
1770 |
| - /// cmk doc |
1771 |
| - // have read_and_fill_with_options call this |
1772 |
| - pub async fn read_and_fill_with_options_no_mut<TVal: BedVal>( |
1773 |
| - &self, |
1774 |
| - iid_count: usize, |
1775 |
| - sid_count: usize, |
1776 |
| - val: &mut nd::ArrayViewMut2<'_, TVal>, //mutable slices additionally allow to modify elements. But slices cannot grow - they are just a view into some vector., |
1777 |
| - read_options: &ReadOptions<TVal>, |
1778 |
| - ) -> Result<(), Box<BedErrorPlus>> { |
1779 |
| - // // must do these one-at-a-time because they mutate self to cache the results |
1780 |
| - // let iid_count = self.iid_count().await?; |
1781 |
| - // let sid_count = self.sid_count().await?; |
1782 |
| - |
1783 |
| - let max_concurrent_requests = |
1784 |
| - compute_max_concurrent_requests(read_options.max_concurrent_requests)?; |
1785 |
| - |
1786 |
| - let max_chunk_size = compute_max_chunk_size(read_options.max_chunk_size)?; |
1787 |
| - |
1788 |
| - // If we already have a Vec<isize>, reference it. If we don't, create one and reference it. |
1789 |
| - let iid_hold = Hold::new(&read_options.iid_index, iid_count)?; |
1790 |
| - let iid_index = iid_hold.as_ref(); |
1791 |
| - let sid_hold = Hold::new(&read_options.sid_index, sid_count)?; |
1792 |
| - let sid_index = sid_hold.as_ref(); |
1793 |
| - |
1794 |
| - let dim = val.dim(); |
1795 |
| - if dim != (iid_index.len(), sid_index.len()) { |
1796 |
| - return Err(Box::new( |
1797 |
| - BedError::InvalidShape(iid_index.len(), sid_index.len(), dim.0, dim.1).into(), |
1798 |
| - )); |
1799 |
| - } |
1800 |
| - |
1801 |
| - read_no_alloc( |
1802 |
| - &self.object_path, |
1803 |
| - iid_count, |
1804 |
| - sid_count, |
1805 |
| - read_options.is_a1_counted, |
1806 |
| - iid_index, |
1807 |
| - sid_index, |
1808 |
| - read_options.missing_value, |
1809 |
| - max_concurrent_requests, |
1810 |
| - max_chunk_size, |
1811 |
| - &mut val.view_mut(), |
1812 |
| - ) |
1813 |
| - .await |
1814 |
| - } |
1815 |
| - |
1816 | 1768 | /// Read all genotype data into a preallocated array.
|
1817 | 1769 | ///
|
1818 | 1770 | /// > Also see [`ReadOptions::builder`](struct.ReadOptions.html#method.builder).
|
@@ -1896,122 +1848,7 @@ where
|
1896 | 1848 | Ok(val)
|
1897 | 1849 | }
|
1898 | 1850 |
|
1899 |
| - /// Write genotype data with default metadata. |
1900 |
| - /// |
1901 |
| - /// > Also see [`WriteOptions::builder`](struct.WriteOptions.html#method.builder), which supports metadata and options. |
1902 |
| - /// |
1903 |
| - /// # Errors |
1904 |
| - /// See [`BedError`](enum.BedError.html) and [`BedErrorPlus`](enum.BedErrorPlus.html) |
1905 |
| - /// for all possible errors. |
1906 |
| - /// |
1907 |
| - /// # Example |
1908 |
| - /// In this example, write genotype data using default metadata. |
1909 |
| - /// ```ignore // cmk |
1910 |
| - /// use ndarray as nd; |
1911 |
| - /// use bed_reader::{BedCloud, WriteOptions}; |
1912 |
| - /// |
1913 |
| - /// let output_folder = temp_testdir::TempDir::default(); |
1914 |
| - /// let output_file = output_folder.join("small.bed"); |
1915 |
| - /// |
1916 |
| - /// let val = nd::array![[1, 0, -127, 0], [2, 0, -127, 2], [0, 1, 2, 0]]; |
1917 |
| - /// BedCloud::write(&val, &output_file)?; |
1918 |
| - /// |
1919 |
| - /// // If we then read the new file and list the chromosome property, |
1920 |
| - /// // it is an array of zeros, the default chromosome value. |
1921 |
| - /// let mut bed_cloud2 = BedCloud::new(&output_file)?; |
1922 |
| - /// println!("{:?}", bed_cloud2.chromosome().await?); // Outputs ndarray ["0", "0", "0", "0"] |
1923 |
| - /// # use bed_reader::BedErrorPlus; |
1924 |
| - /// # Ok::<(), Box<BedErrorPlus>>(()) |
1925 |
| - /// ``` |
1926 |
| - // cmk need to do 'write' |
1927 |
| - // pub fn write<S: nd::Data<Elem = TVal>, TVal: BedVal>( |
1928 |
| - // val: &nd::ArrayBase<S, nd::Ix2>, |
1929 |
| - // path: &Path, |
1930 |
| - // ) -> Result<(), Box<BedErrorPlus>> { |
1931 |
| - // WriteOptions::builder(path).write(val) |
1932 |
| - // } |
1933 |
| - |
1934 |
| - /// Given an 2D array of genotype data and a [`WriteOptions`](struct.WriteOptionsBuilder.html), write to a .bed file. |
1935 |
| - /// |
1936 |
| - /// > Also see [`WriteOptionsBuilder::write`](struct.WriteOptionsBuilder.html#method.write), which creates |
1937 |
| - /// > a [`WriteOptions`](struct.WriteOptionsBuilder.html) and writes to file in one step. |
1938 |
| - /// |
1939 |
| - /// # Example |
1940 |
| - /// ```ignore // cmk |
1941 |
| - /// use ndarray as nd; |
1942 |
| - /// use bed_reader::{BedCloud, WriteOptions}; |
1943 |
| - /// |
1944 |
| - /// let val = nd::array![ |
1945 |
| - /// [1.0, 0.0, f64::NAN, 0.0], |
1946 |
| - /// [2.0, 0.0, f64::NAN, 2.0], |
1947 |
| - /// [0.0, 1.0, 2.0, 0.0] |
1948 |
| - /// ]; |
1949 |
| - /// |
1950 |
| - /// let output_folder = temp_testdir::TempDir::default(); |
1951 |
| - /// let output_file = output_folder.join("small.bed"); |
1952 |
| - /// let write_options = WriteOptions::builder(output_file) |
1953 |
| - /// .iid(["iid1", "iid2", "iid3"]) |
1954 |
| - /// .sid(["sid1", "sid2", "sid3", "sid4"]) |
1955 |
| - /// .build(3,4)?; |
1956 |
| - /// |
1957 |
| - /// BedCloud::write_with_options(&val, &write_options)?; |
1958 |
| - /// # use bed_reader::BedErrorPlus; |
1959 |
| - /// # Ok::<(), Box<BedErrorPlus>>(()) |
1960 |
| - /// ``` |
1961 |
| - // cmk need to do 'write_with_options' |
1962 |
| - // pub fn write_with_options<S, TVal>( |
1963 |
| - // val: &nd::ArrayBase<S, nd::Ix2>, |
1964 |
| - // write_options: &WriteOptions<TVal>, |
1965 |
| - // ) -> Result<(), Box<BedErrorPlus>> |
1966 |
| - // where |
1967 |
| - // S: nd::Data<Elem = TVal>, |
1968 |
| - // TVal: BedVal, |
1969 |
| - // { |
1970 |
| - // let (iid_count, sid_count) = val.dim(); |
1971 |
| - // if iid_count != write_options.iid_count() { |
1972 |
| - // return Err(BedError::InconsistentCount( |
1973 |
| - // "iid".to_string(), |
1974 |
| - // write_options.iid_count(), |
1975 |
| - // iid_count, |
1976 |
| - // ) |
1977 |
| - // .into()); |
1978 |
| - // } |
1979 |
| - // if sid_count != write_options.sid_count() { |
1980 |
| - // return Err(BedError::InconsistentCount( |
1981 |
| - // "sid".to_string(), |
1982 |
| - // write_options.sid_count(), |
1983 |
| - // sid_count, |
1984 |
| - // ) |
1985 |
| - // .into()); |
1986 |
| - // } |
1987 |
| - |
1988 |
| - // let num_threads = compute_num_threads(write_options.num_threads)?; |
1989 |
| - // write_val( |
1990 |
| - // &write_options.path, |
1991 |
| - // val, |
1992 |
| - // write_options.is_a1_counted, |
1993 |
| - // write_options.missing_value, |
1994 |
| - // num_threads, |
1995 |
| - // )?; |
1996 |
| - |
1997 |
| - // if !write_options.skip_fam() { |
1998 |
| - // if let Err(e) = write_options.metadata.write_fam(write_options.fam_object_path()) { |
1999 |
| - // // Clean up the file |
2000 |
| - // let _ = fs::remove_file(&write_options.fam_object_path); |
2001 |
| - // return Err(e); |
2002 |
| - // } |
2003 |
| - // } |
2004 |
| - |
2005 |
| - // if !write_options.skip_bim() { |
2006 |
| - // if let Err(e) = write_options.metadata.write_bim(write_options.bim_object_path()) { |
2007 |
| - // // Clean up the file |
2008 |
| - // let _ = fs::remove_file(&write_options.bim_object_path); |
2009 |
| - // return Err(e); |
2010 |
| - // } |
2011 |
| - // } |
2012 |
| - |
2013 |
| - // Ok(()) |
2014 |
| - // } |
| 1851 | + // LATER: Support writing to a BedCloud |
2015 | 1852 |
|
2016 | 1853 | async fn unlazy_fam<T: FromStringArray<T>>(
|
2017 | 1854 | &mut self,
|
@@ -2227,9 +2064,9 @@ pub struct ObjectPath<TObjectStore>
|
2227 | 2064 | where
|
2228 | 2065 | TObjectStore: ObjectStore,
|
2229 | 2066 | {
|
2230 |
| - /// cmk doc |
| 2067 | + /// An `Arc`-wrapped [`ObjectStore`], for example, an AWS S3 reader or a local file reader. |
2231 | 2068 | pub object_store: Arc<TObjectStore>,
|
2232 |
| - /// cmk doc |
| 2069 | + /// A [`StorePath`] that points to a file on the [`ObjectStore`]. |
2233 | 2070 | pub path: StorePath,
|
2234 | 2071 | }
|
2235 | 2072 |
|
@@ -2287,7 +2124,9 @@ where
|
2287 | 2124 | /// ```
|
2288 | 2125 | pub async fn size(&self) -> Result<usize, Box<BedErrorPlus>> {
|
2289 | 2126 | let get_result = self.get().await?;
|
2290 |
| - let object_meta = &get_result.meta; // cmk good idea? |
| 2127 | + // LATER: See if https://github.com/apache/arrow-rs/issues/5272 if fixed in |
| 2128 | + // a way so that only one read is needed. |
| 2129 | + let object_meta = &get_result.meta; |
2291 | 2130 | Ok(object_meta.size)
|
2292 | 2131 | }
|
2293 | 2132 |
|
|
0 commit comments