From d33ee119a7c63a75a2b77224ce07854966d0df1e Mon Sep 17 00:00:00 2001 From: Patrick Marks Date: Sat, 16 Oct 2021 09:39:43 -0700 Subject: [PATCH 1/2] switch to const generics for selecting K --- Cargo.toml | 2 +- src/kmer.rs | 240 ++++++++-------------------------------------------- src/test.rs | 8 +- 3 files changed, 39 insertions(+), 211 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 5542b17e..08464814 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "debruijn" -version = "0.3.4" +version = "0.4.0" authors = ["Patrick Marks "] license = "MIT" edition = '2018' diff --git a/src/kmer.rs b/src/kmer.rs index e89bd6fc..37d5b605 100644 --- a/src/kmer.rs +++ b/src/kmer.rs @@ -39,7 +39,6 @@ use serde_derive::{Deserialize, Serialize}; use std; use std::fmt; use std::hash::Hash; -use std::marker::PhantomData; use crate::bits_to_base; use crate::Kmer; @@ -51,47 +50,47 @@ use crate::Mer; pub type Kmer64 = IntKmer; /// 48-base kmer, backed by a single u128 -pub type Kmer48 = VarIntKmer; +pub type Kmer48 = VarIntKmer; /// 40-base kmer, backed by a single u128 -pub type Kmer40 = VarIntKmer; +pub type Kmer40 = VarIntKmer; /// 32-base kmer, backed by a single u64 pub type Kmer32 = IntKmer; /// 30-base kmer, backed by a single u64 -pub type Kmer30 = VarIntKmer; +pub type Kmer30 = VarIntKmer; /// 24-base kmer, backed by a single u64 -pub type Kmer24 = VarIntKmer; +pub type Kmer24 = VarIntKmer; /// 20-base kmer, backed by a single u64 -pub type Kmer20 = VarIntKmer; +pub type Kmer20 = VarIntKmer; /// 16-base kmer, backed by a single u32 pub type Kmer16 = IntKmer; /// 15-base kmer, backed by a single u32 -pub type Kmer15 = VarIntKmer; +pub type Kmer15 = VarIntKmer; /// 14-base kmer, backed by a single u32 -pub type Kmer14 = VarIntKmer; +pub type Kmer14 = VarIntKmer; /// 12-base kmer, backed by a single u32 -pub type Kmer12 = VarIntKmer; +pub type Kmer12 = VarIntKmer; /// 10-base kmer, backed by a single u32 -pub type Kmer10 = VarIntKmer; +pub type Kmer10 = VarIntKmer; /// 8-base kmer, backed by a single u16 pub type Kmer8 = IntKmer; -pub type Kmer6 = VarIntKmer; -pub type Kmer5 = VarIntKmer; +pub type Kmer6 = VarIntKmer; +pub type Kmer5 = VarIntKmer; pub type Kmer4 = IntKmer; -pub type Kmer3 = VarIntKmer; -pub type Kmer2 = VarIntKmer; +pub type Kmer3 = VarIntKmer; +pub type Kmer2 = VarIntKmer; /// Trait for specialized integer operations used in DeBruijn Graph pub trait IntHelp: PrimInt + FromPrimitive { @@ -416,6 +415,7 @@ impl fmt::Debug for IntKmer { } } + /// Helper trait for declaring the K value of a Kmer. Will be removed when const generics are available pub trait KmerSize: Ord + Hash + Copy + fmt::Debug { #[allow(non_snake_case)] @@ -432,22 +432,20 @@ pub trait KmerSize: Ord + Hash + Copy + fmt::Debug { /// sorting the integer will give a lexicographic sorting of the corresponding string. /// kmers that don't fill `storage` are always aligned to the least signifcant bits #[derive(Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Hash, Serialize, Deserialize)] -pub struct VarIntKmer { +pub struct VarIntKmer { pub storage: T, - pub phantom: PhantomData, } -impl Kmer for VarIntKmer { +impl Kmer for VarIntKmer { fn empty() -> Self { VarIntKmer { storage: T::zero(), - phantom: PhantomData, } } #[inline] fn k() -> usize { - Self::_k() + KS } fn to_u64(&self) -> u64 { @@ -457,7 +455,6 @@ impl Kmer for VarIntK fn from_u64(v: u64) -> Self { VarIntKmer { storage: Self::t_from_u64(v), - phantom: PhantomData, } } @@ -466,7 +463,6 @@ impl Kmer for VarIntK let new = self.storage >> 2; let mut kmer = VarIntKmer { storage: new, - phantom: PhantomData, }; kmer.set_mut(0, v); kmer @@ -476,7 +472,6 @@ impl Kmer for VarIntK let new = self.storage << 2 & !Self::top_mask(0); let mut kmer = VarIntKmer { storage: new, - phantom: PhantomData, }; kmer.set_mut(Self::k() - 1, v); kmer @@ -489,7 +484,7 @@ impl Kmer for VarIntK } } -impl VarIntKmer { +impl VarIntKmer { #[inline(always)] fn msk() -> T { T::one() << 1 | T::one() @@ -514,16 +509,10 @@ impl VarIntKmer usize { - KS::K() - } - // Bits used by this kmer #[inline(always)] fn _bits() -> usize { - Self::_k() * 2 + KS * 2 } #[inline(always)] @@ -557,10 +546,10 @@ impl VarIntKmer Mer for VarIntKmer { +impl Mer for VarIntKmer { #[inline(always)] fn len(&self) -> usize { - Self::_k() + KS } /// Get the letter at the given position. @@ -622,7 +611,6 @@ impl Mer for VarIntKm VarIntKmer { storage: new, - phantom: PhantomData, } } @@ -642,8 +630,8 @@ impl Mer for VarIntKm mask_lower.count_ones() } } - -impl fmt::Debug for VarIntKmer { + +impl fmt::Debug for VarIntKmer { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { let mut s = String::new(); for pos in 0..Self::k() { @@ -654,167 +642,7 @@ impl fmt::Debug for V } } -/// Marker struct for generating K=48 Kmers -#[derive(Debug, Hash, Copy, Clone, Ord, PartialOrd, Eq, PartialEq)] -pub struct K48; - -impl KmerSize for K48 { - #[inline(always)] - fn K() -> usize { - 48 - } -} - -/// Marker trait for generating K=40 Kmers -#[derive(Debug, Hash, Copy, Clone, Ord, PartialOrd, Eq, PartialEq)] -pub struct K40; - -impl KmerSize for K40 { - #[inline(always)] - fn K() -> usize { - 40 - } -} - -/// Marker trait for generating K=31 Kmers -#[derive(Debug, Hash, Copy, Clone, Ord, PartialOrd, Eq, PartialEq)] -pub struct K31; - -impl KmerSize for K31 { - #[inline(always)] - fn K() -> usize { - 31 - } -} - -/// Marker trait for generating K=30 Kmers -#[derive(Debug, Hash, Copy, Clone, Ord, PartialOrd, Eq, PartialEq)] -pub struct K30; - -impl KmerSize for K30 { - #[inline(always)] - fn K() -> usize { - 30 - } -} - -/// Marker trait for generating K=24 Kmers -#[derive(Debug, Hash, Copy, Clone, Ord, PartialOrd, Eq, PartialEq)] -pub struct K24; - -impl KmerSize for K24 { - #[inline(always)] - fn K() -> usize { - 24 - } -} - -/// Marker trait for generating K=20 Kmers -#[derive(Debug, Hash, Copy, Clone, Ord, PartialOrd, Eq, PartialEq)] -pub struct K20; - -impl KmerSize for K20 { - #[inline(always)] - fn K() -> usize { - 20 - } -} - -/// Marker trait for generating K=14 Kmers -#[derive(Debug, Hash, Copy, Clone, Ord, PartialOrd, Eq, PartialEq)] -pub struct K15; - -impl KmerSize for K15 { - #[inline(always)] - fn K() -> usize { - 15 - } -} - -/// Marker trait for generating K=14 Kmers -#[derive(Debug, Hash, Copy, Clone, Ord, PartialOrd, Eq, PartialEq)] -pub struct K14; - -impl KmerSize for K14 { - #[inline(always)] - fn K() -> usize { - 14 - } -} - -/// Marker trait for generating K=12 Kmers -#[derive(Debug, Hash, Copy, Clone, Ord, PartialOrd, Eq, PartialEq)] -pub struct K12; - -impl KmerSize for K12 { - #[inline] - fn K() -> usize { - 12 - } -} - -/// Marker trait for generating K=12 Kmers -#[derive(Debug, Hash, Copy, Clone, Ord, PartialOrd, Eq, PartialEq)] -pub struct K10; - -impl KmerSize for K10 { - #[inline] - fn K() -> usize { - 10 - } -} - -/// Marker trait for generating K=6 Kmers -#[derive(Debug, Hash, Copy, Clone, Ord, PartialOrd, Eq, PartialEq)] -pub struct K6; -impl KmerSize for K6 { - #[inline(always)] - fn K() -> usize { - 6 - } -} - -/// Marker trait for generating K=6 Kmers -#[derive(Debug, Hash, Copy, Clone, Ord, PartialOrd, Eq, PartialEq)] -pub struct K5; - -impl KmerSize for K5 { - #[inline(always)] - fn K() -> usize { - 5 - } -} -/// Marker trait for generating K=6 Kmers -#[derive(Debug, Hash, Copy, Clone, Ord, PartialOrd, Eq, PartialEq)] -pub struct K4; - -impl KmerSize for K4 { - #[inline(always)] - fn K() -> usize { - 4 - } -} -/// Marker trait for generating K=6 Kmers -#[derive(Debug, Hash, Copy, Clone, Ord, PartialOrd, Eq, PartialEq)] -pub struct K3; - -impl KmerSize for K3 { - #[inline(always)] - fn K() -> usize { - 3 - } -} -/// Marker trait for generating K=6 Kmers -#[derive(Debug, Hash, Copy, Clone, Ord, PartialOrd, Eq, PartialEq)] -pub struct K2; - -impl KmerSize for K2 { - #[inline(always)] - fn K() -> usize { - 2 - } -} #[cfg(test)] mod tests { @@ -1012,7 +840,7 @@ mod tests { #[test] fn test_lmer_3_kmer_48() { for _ in 0..10000 { - check_vmer::, VarIntKmer>(); + check_vmer::, VarIntKmer>(); } } @@ -1033,14 +861,14 @@ mod tests { #[test] fn test_lmer_1_kmer_24() { for _ in 0..10000 { - check_vmer::, VarIntKmer>(); + check_vmer::, VarIntKmer>(); } } #[test] fn test_lmer_1_kmer_20() { for _ in 0..10000 { - check_vmer::, VarIntKmer>(); + check_vmer::, VarIntKmer>(); } } @@ -1061,7 +889,7 @@ mod tests { #[test] fn test_kmer_48() { for _ in 0..10000 { - check_kmer::>(); + check_kmer::>(); } } @@ -1075,21 +903,21 @@ mod tests { #[test] fn test_kmer_31() { for _ in 0..10000 { - check_kmer::>(); + check_kmer::>(); } } #[test] fn test_kmer_24() { for _ in 0..10000 { - check_kmer::>(); + check_kmer::>(); } } #[test] fn test_kmer_20() { for _ in 0..10000 { - check_kmer::>(); + check_kmer::>(); } } @@ -1103,28 +931,28 @@ mod tests { #[test] fn test_kmer_15() { for _ in 0..10000 { - check_kmer::>(); + check_kmer::>(); } } #[test] fn test_kmer_14() { for _ in 0..10000 { - check_kmer::>(); + check_kmer::>(); } } #[test] fn test_kmer_12() { for _ in 0..10000 { - check_kmer::>(); + check_kmer::>(); } } #[test] fn test_kmer_10() { for _ in 0..10000 { - check_kmer::>(); + check_kmer::>(); } } diff --git a/src/test.rs b/src/test.rs index 4a195653..fa78294a 100644 --- a/src/test.rs +++ b/src/test.rs @@ -150,7 +150,7 @@ mod tests { use crate::dna_string::DnaString; use crate::filter; use crate::kmer::Kmer6; - use crate::kmer::{IntKmer, VarIntKmer, K31}; + use crate::kmer::{IntKmer, VarIntKmer}; use crate::msp; use std::ops::Sub; @@ -179,7 +179,7 @@ mod tests { .map(crate::base_to_bits) .collect(); - reassemble_contigs::, DnaString>(vec![seq.clone(), seq], false); + reassemble_contigs::, DnaString>(vec![seq.clone(), seq], false); } #[test] @@ -192,7 +192,7 @@ mod tests { .map(crate::base_to_bits) .collect(); - reassemble_sharded::, DnaString>(vec![seq.clone(), seq], false); + reassemble_sharded::, DnaString>(vec![seq.clone(), seq], false); } #[test] @@ -221,7 +221,7 @@ mod tests { fn complex_path_compress_k31() { for _ in 0..100 { let contigs = random_contigs(); - simplify_from_kmers::>(contigs, false); + simplify_from_kmers::>(contigs, false); } } From 488b8ccf661ed7805dc55c8ba1a5f290ed6c191e Mon Sep 17 00:00:00 2001 From: Patrick Marks Date: Sat, 16 Oct 2021 09:42:43 -0700 Subject: [PATCH 2/2] fmt --- src/kmer.rs | 25 ++++++++----------------- 1 file changed, 8 insertions(+), 17 deletions(-) diff --git a/src/kmer.rs b/src/kmer.rs index 37d5b605..3b6a53bf 100644 --- a/src/kmer.rs +++ b/src/kmer.rs @@ -415,7 +415,6 @@ impl fmt::Debug for IntKmer { } } - /// Helper trait for declaring the K value of a Kmer. Will be removed when const generics are available pub trait KmerSize: Ord + Hash + Copy + fmt::Debug { #[allow(non_snake_case)] @@ -438,9 +437,7 @@ pub struct VarIntKmer { impl Kmer for VarIntKmer { fn empty() -> Self { - VarIntKmer { - storage: T::zero(), - } + VarIntKmer { storage: T::zero() } } #[inline] @@ -461,18 +458,14 @@ impl Kmer for VarI /// Shift the base v into the left end of the kmer fn extend_left(&self, v: u8) -> Self { let new = self.storage >> 2; - let mut kmer = VarIntKmer { - storage: new, - }; + let mut kmer = VarIntKmer { storage: new }; kmer.set_mut(0, v); kmer } fn extend_right(&self, v: u8) -> Self { let new = self.storage << 2 & !Self::top_mask(0); - let mut kmer = VarIntKmer { - storage: new, - }; + let mut kmer = VarIntKmer { storage: new }; kmer.set_mut(Self::k() - 1, v); kmer } @@ -609,9 +602,7 @@ impl Mer for VarIn new = new >> up_shift; } - VarIntKmer { - storage: new, - } + VarIntKmer { storage: new } } fn at_count(&self) -> u32 { @@ -630,8 +621,10 @@ impl Mer for VarIn mask_lower.count_ones() } } - -impl fmt::Debug for VarIntKmer { + +impl fmt::Debug + for VarIntKmer +{ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { let mut s = String::new(); for pos in 0..Self::k() { @@ -642,8 +635,6 @@ impl fmt::Debug fo } } - - #[cfg(test)] mod tests { use super::*;