From c86adc94057175cf576e112d9832b434e3b87044 Mon Sep 17 00:00:00 2001 From: BennoDev <57860196+bennoinbeta@users.noreply.github.com> Date: Tue, 19 Mar 2024 07:21:31 +0100 Subject: [PATCH 01/12] #21 added merge_overlaps_with method --- src/lib.rs | 86 +++++++++++++++++++++++++++++++++++------------------- 1 file changed, 56 insertions(+), 30 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 549b117..411052e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -80,7 +80,6 @@ use num_traits::{ PrimInt, Unsigned, }; use std::cmp::Ordering::{self}; -use std::collections::VecDeque; #[cfg(feature = "with_serde")] use serde::{Deserialize, Serialize}; @@ -345,45 +344,56 @@ where /// Merge any intervals that overlap with eachother within the Lapper. This is an easy way to /// speed up queries. pub fn merge_overlaps(&mut self) { - let mut stack: VecDeque<&mut Interval> = VecDeque::new(); - let mut ivs = self.intervals.iter_mut(); - if let Some(first) = ivs.next() { - stack.push_back(first); - for interval in ivs { - let mut top = stack.pop_back().unwrap(); - if top.stop < interval.start { - stack.push_back(top); - stack.push_back(interval); - } else if top.stop < interval.stop { - top.stop = interval.stop; - //stack.pop_back(); - stack.push_back(top); - } else { - // they were equal - stack.push_back(top); + let mut merged: Vec> = Vec::new(); + + for interval in &self.intervals { + match merged.last_mut() { + Some(last) if last.stop >= interval.start => { + last.stop = std::cmp::max(last.stop, interval.stop); } + _ => merged.push(interval.clone()), + } + } + + self.intervals = merged; + self.update_auxiliary_structures(); + self.overlaps_merged = true; + } + + /// Merge any intervals that overlap within the Lapper, using a user-provided merge function + /// for the values of overlapping intervals. This is an easy way to speed up queries and + /// customize how interval values are combined. + pub fn merge_overlaps_with(&mut self, merge_fn: F) + where + F: Fn(&T, &T) -> T, + { + let mut merged: Vec> = Vec::new(); + + for interval in &self.intervals { + match merged.last_mut() { + Some(last) if last.stop >= interval.start => { + last.stop = std::cmp::max(last.stop, interval.stop); + last.val = merge_fn(&last.val, &interval.val); + } + _ => merged.push(interval.clone()), } - self.overlaps_merged = true; - self.intervals = stack - .into_iter() - .map(|x| Interval { - start: x.start, - stop: x.stop, - val: x.val.clone(), - }) - .collect(); } - // Fix the starts and stops used by counts + + self.intervals = merged; + self.update_auxiliary_structures(); + self.overlaps_merged = true; + } + + /// Helper method to update starts, stops, and max_len based on the current state of intervals. + fn update_auxiliary_structures(&mut self) { let (mut starts, mut stops): (Vec<_>, Vec<_>) = self.intervals.iter().map(|x| (x.start, x.stop)).unzip(); starts.sort(); stops.sort(); - self.starts = starts; - self.stops = stops; self.max_len = self .intervals .iter() - .map(|x| x.stop.checked_sub(&x.start).unwrap_or_else(zero::)) + .map(|iv| iv.stop.checked_sub(&iv.start).unwrap_or_else(zero::)) .max() .unwrap_or_else(zero::); } @@ -1123,7 +1133,23 @@ mod tests { lapper.merge_overlaps(); assert_eq!(expected, lapper.iter().collect::>()); assert_eq!(lapper.intervals.len(), lapper.starts.len()) + } + #[test] + fn test_merge_overlaps_with() { + let mut lapper = setup_badlapper(); + let merge_fn = |a: &u32, _b: &u32| -> u32 { *a + 1 }; + let expected : Vec<&Iv> = vec![ + &Iv{ start: 10, stop: 16, val: 3 }, // 3 overlaps, initial val = 0, +3 overlaps + &Iv{ start: 40, stop: 45, val: 0 }, // No overlap, val remains 0 + &Iv{ start: 50, stop: 55, val: 0 }, // No overlap, val remains 0 + &Iv{ start: 60, stop: 65, val: 0 }, // No overlap, val remains 0 + &Iv{ start: 68, stop: 120, val: 2 }, // 2 overlaps, initial val = 0, +2 overlaps + ]; + assert_eq!(lapper.intervals.len(), lapper.starts.len()); + lapper.merge_overlaps_with(merge_fn); + assert_eq!(expected, lapper.iter().collect::>()); + assert_eq!(lapper.intervals.len(), lapper.starts.len()) } // This test was added because this breakage was found in a library user's code, where after From ac47eb7235e168c4a684580ccbd648eb482e8c15 Mon Sep 17 00:00:00 2001 From: BennoDev <57860196+bennoinbeta@users.noreply.github.com> Date: Tue, 19 Mar 2024 07:30:49 +0100 Subject: [PATCH 02/12] #21 fixed typo --- src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index 411052e..9ebc266 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -387,7 +387,7 @@ where /// Helper method to update starts, stops, and max_len based on the current state of intervals. fn update_auxiliary_structures(&mut self) { let (mut starts, mut stops): (Vec<_>, Vec<_>) = - self.intervals.iter().map(|x| (x.start, x.stop)).unzip(); + self.intervals.iter().map(|iv| (iv.start, iv.stop)).unzip(); starts.sort(); stops.sort(); self.max_len = self From 261fb61239f500ab2f884ddcaa35e8218dd3a33b Mon Sep 17 00:00:00 2001 From: BennoDev <57860196+bennoinbeta@users.noreply.github.com> Date: Tue, 19 Mar 2024 08:03:11 +0100 Subject: [PATCH 03/12] #21 fixed typo --- src/lib.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 9ebc266..0904ceb 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -348,7 +348,7 @@ where for interval in &self.intervals { match merged.last_mut() { - Some(last) if last.stop >= interval.start => { + Some(last) if last.stop > interval.start => { last.stop = std::cmp::max(last.stop, interval.stop); } _ => merged.push(interval.clone()), @@ -371,7 +371,7 @@ where for interval in &self.intervals { match merged.last_mut() { - Some(last) if last.stop >= interval.start => { + Some(last) if last.stop > interval.start => { last.stop = std::cmp::max(last.stop, interval.stop); last.val = merge_fn(&last.val, &interval.val); } From 67655be5ec2a62d4b2fe96061857aa41d92319c9 Mon Sep 17 00:00:00 2001 From: BennoDev <57860196+bennoinbeta@users.noreply.github.com> Date: Tue, 19 Mar 2024 10:51:04 +0100 Subject: [PATCH 04/12] #21 wip split_and_merge_overlaps_with --- src/lib.rs | 250 ++++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 221 insertions(+), 29 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 0904ceb..fb2408c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -348,9 +348,11 @@ where for interval in &self.intervals { match merged.last_mut() { + // If there is an overlap; extend the last interval to cover the new interval Some(last) if last.stop > interval.start => { last.stop = std::cmp::max(last.stop, interval.stop); } + // No overlap; add the new interval as is _ => merged.push(interval.clone()), } } @@ -360,9 +362,10 @@ where self.overlaps_merged = true; } - /// Merge any intervals that overlap within the Lapper, using a user-provided merge function - /// for the values of overlapping intervals. This is an easy way to speed up queries and - /// customize how interval values are combined. + /// Merges overlapping intervals, combining their values with a user-provided merge function. + /// + /// Adjacent or overlapping intervals are consolidated into single intervals, with their values + /// merged using the provided `merge_fn`. pub fn merge_overlaps_with(&mut self, merge_fn: F) where F: Fn(&T, &T) -> T, @@ -371,10 +374,12 @@ where for interval in &self.intervals { match merged.last_mut() { + // If there is an overlap; extend the last interval to cover the new interval Some(last) if last.stop > interval.start => { last.stop = std::cmp::max(last.stop, interval.stop); last.val = merge_fn(&last.val, &interval.val); } + // No overlap; add the new interval as is _ => merged.push(interval.clone()), } } @@ -384,12 +389,87 @@ where self.overlaps_merged = true; } + // TODO: Has issues with exact overlaps + /// Processes overlapping intervals by splitting and merging, using a user-provided merge function. + /// + /// This method handles overlapping intervals by: + /// - Creating new intervals for each overlap, applying `merge_fn` to their values. + /// - Preserving non-overlapping parts as individual intervals. + pub fn split_and_merge_overlaps_with(&mut self, merge_fn: F) + where + F: Fn(&T, &T) -> T, + { + let mut merged: Vec> = Vec::new(); + let mut current_interval_option: Option> = None; + + for interval in &self.intervals { + if let Some(mut current_interval) = current_interval_option.take() { + // No overlap; add the new interval as is + if current_interval.stop <= interval.start { + merged.push(current_interval); + current_interval_option = Some(interval.clone()); + } + // Overlap + else { + let overlap_start = std::cmp::max(current_interval.start, interval.start); + let overlap_end = std::cmp::min(current_interval.stop, interval.stop); + + // Leading non-overlapping part + if current_interval.start < overlap_start { + merged.push(Interval { + start: current_interval.start, + stop: overlap_start, + val: current_interval.val.clone(), + }); + } + + // Overlapping part + merged.push(Interval { + start: overlap_start, + stop: overlap_end, + val: merge_fn(¤t_interval.val, &interval.val), + }); + + // Check for trailing non-overlapping part + if interval.stop > overlap_end { + current_interval_option = Some(Interval { + start: overlap_end, + stop: interval.stop, + val: interval.val.clone(), + }); + } + + // If current_interval extends beyond interval, update its start to reflect processed part + if current_interval.stop > overlap_end { + current_interval.start = overlap_end; + current_interval_option = Some(current_interval); + } + } + } + // No ongoing interval, so we start with this one + else { + current_interval_option = Some(interval.clone()); + } + } + + // Check if there's a remaining interval to push into merged + if let Some(current_interval) = current_interval_option { + merged.push(current_interval); + } + + self.intervals = merged; + self.update_auxiliary_structures(); + self.overlaps_merged = true; + } + /// Helper method to update starts, stops, and max_len based on the current state of intervals. fn update_auxiliary_structures(&mut self) { let (mut starts, mut stops): (Vec<_>, Vec<_>) = self.intervals.iter().map(|iv| (iv.start, iv.stop)).unzip(); starts.sort(); stops.sort(); + self.starts = starts; + self.stops = stops; self.max_len = self .intervals .iter() @@ -1138,8 +1218,7 @@ mod tests { #[test] fn test_merge_overlaps_with() { let mut lapper = setup_badlapper(); - let merge_fn = |a: &u32, _b: &u32| -> u32 { *a + 1 }; - let expected : Vec<&Iv> = vec![ + let expected: Vec<&Iv> = vec![ &Iv{ start: 10, stop: 16, val: 3 }, // 3 overlaps, initial val = 0, +3 overlaps &Iv{ start: 40, stop: 45, val: 0 }, // No overlap, val remains 0 &Iv{ start: 50, stop: 55, val: 0 }, // No overlap, val remains 0 @@ -1147,9 +1226,122 @@ mod tests { &Iv{ start: 68, stop: 120, val: 2 }, // 2 overlaps, initial val = 0, +2 overlaps ]; assert_eq!(lapper.intervals.len(), lapper.starts.len()); - lapper.merge_overlaps_with(merge_fn); + lapper.merge_overlaps_with( |a: &u32, _b: &u32| -> u32 { *a + 1 }); assert_eq!(expected, lapper.iter().collect::>()); - assert_eq!(lapper.intervals.len(), lapper.starts.len()) + assert_eq!(lapper.intervals.len(), lapper.starts.len()); + } + + #[test] + fn test_split_and_merge_overlaps_badlapper() { + let mut lapper = setup_badlapper(); + let expected: Vec<&Iv> = vec![ + &Iv { start: 10, stop: 12, val: 1 }, + &Iv { start: 12, stop: 14, val: 2 }, + &Iv { start: 14, stop: 15, val: 3 }, + &Iv { start: 15, stop: 16, val: 1 }, + &Iv { start: 40, stop: 45, val: 0 }, + &Iv { start: 50, stop: 55, val: 0 }, + &Iv { start: 60, stop: 65, val: 0 }, + &Iv { start: 68, stop: 70, val: 0 }, + &Iv { start: 70, stop: 71, val: 1 }, + &Iv { start: 71, stop: 75, val: 1 }, + &Iv { start: 75, stop: 120, val: 0 }, + ]; + assert_eq!(lapper.intervals.len(), lapper.starts.len()); + lapper.split_and_merge_overlaps_with(|a: &u32, _b: &u32| -> u32 { *a + 1 }); + println!("{:?}", lapper.iter().collect::>()); + assert_eq!(expected, lapper.iter().collect::>()); + assert_eq!(lapper.intervals.len(), lapper.starts.len()); + } + + #[test] + fn test_split_and_merge_overlaps() { + let mut lapper = Lapper::new(vec![ + Iv { start: 1, stop: 5, val: 10 }, + Iv { start: 3, stop: 7, val: 20 }, + Iv { start: 6, stop: 9, val: 30 }, + ]); + let expected: Vec<&Iv> = vec![ + &Iv { start: 1, stop: 3, val: 10 }, + &Iv { start: 3, stop: 5, val: 30 }, + &Iv { start: 5, stop: 6, val: 20 }, + &Iv { start: 6, stop: 7, val: 50 }, + &Iv { start: 7, stop: 9, val: 30 }, + ]; + assert_eq!(lapper.intervals.len(), lapper.starts.len()); + lapper.split_and_merge_overlaps_with(|a, b| a + b); + assert_eq!(expected, lapper.iter().collect::>()); + assert_eq!(lapper.intervals.len(), lapper.starts.len()); + } + + #[test] + fn test_split_and_merge_overlaps_with_contained_interval() { + let mut lapper = Lapper::new(vec![ + Iv { start: 1, stop: 10, val: 15 }, + Iv { start: 3, stop: 7, val: 5 }, + ]); + let expected: Vec<&Iv> = vec![ + &Iv { start: 1, stop: 3, val: 15 }, + &Iv { start: 3, stop: 7, val: 20 }, + &Iv { start: 7, stop: 10, val: 15 }, + ]; + assert_eq!(lapper.intervals.len(), lapper.starts.len()); + lapper.split_and_merge_overlaps_with(|a, b| a + b); + assert_eq!(expected, lapper.iter().collect::>()); + assert_eq!(lapper.intervals.len(), lapper.starts.len()); + } + + #[test] + fn test_split_and_merge_overlaps_with_non_overlapping_intervals() { + let mut lapper = Lapper::new(vec![ + Iv { start: 1, stop: 2, val: 10 }, + Iv { start: 3, stop: 4, val: 20 }, + Iv { start: 5, stop: 6, val: 30 }, + ]); + let expected: Vec<&Iv> = vec![ + &Iv { start: 1, stop: 2, val: 10 }, + &Iv { start: 3, stop: 4, val: 20 }, + &Iv { start: 5, stop: 6, val: 30 }, + ]; + assert_eq!(lapper.intervals.len(), lapper.starts.len()); + lapper.split_and_merge_overlaps_with(|a, b| a + b); + assert_eq!(expected, lapper.iter().collect::>()); + assert_eq!(lapper.intervals.len(), lapper.starts.len()); + } + + #[test] + fn test_split_and_merge_overlaps_with_partial_overlap() { + let mut lapper = Lapper::new(vec![ + Iv { start: 1, stop: 4, val: 10 }, + Iv { start: 3, stop: 6, val: 20 }, + ]); + let expected: Vec<&Iv> = vec![ + &Iv { start: 1, stop: 3, val: 10 }, + &Iv { start: 3, stop: 4, val: 30 }, + &Iv { start: 4, stop: 6, val: 20 }, + ]; + assert_eq!(lapper.intervals.len(), lapper.starts.len()); + lapper.split_and_merge_overlaps_with(|a, b| a + b); + assert_eq!(expected, lapper.iter().collect::>()); + assert_eq!(lapper.intervals.len(), lapper.starts.len()); + } + + #[test] + fn test_split_and_merge_overlaps_with_exact_overlap() { + let mut lapper = Lapper::new(vec![ + Iv { start: 1, stop: 4, val: 10 }, + Iv { start: 1, stop: 4, val: 10 }, + Iv { start: 3, stop: 6, val: 20 }, + ]); + let expected: Vec<&Iv> = vec![ + &Iv { start: 1, stop: 3, val: 20 }, + &Iv { start: 3, stop: 4, val: 40 }, + &Iv { start: 4, stop: 6, val: 20 }, + ]; + assert_eq!(lapper.intervals.len(), lapper.starts.len()); + lapper.split_and_merge_overlaps_with(|a, b| a + b); + assert_eq!(expected, lapper.iter().collect::>()); + assert_eq!(lapper.intervals.len(), lapper.starts.len()); } // This test was added because this breakage was found in a library user's code, where after @@ -1399,27 +1591,27 @@ mod tests { assert_eq!(lapper.count(28974798, 33141355), 1); } - #[test] - fn serde_test() { - let data = vec![ - Iv{start:25264912, stop: 25264986, val: 0}, - Iv{start:27273024, stop: 27273065 , val: 0}, - Iv{start:27440273, stop: 27440318 , val: 0}, - Iv{start:27488033, stop: 27488125 , val: 0}, - Iv{start:27938410, stop: 27938470 , val: 0}, - Iv{start:27959118, stop: 27959171 , val: 0}, - Iv{start:28866309, stop: 33141404 , val: 0}, - ]; - let lapper = Lapper::new(data); - - let serialized = bincode::serialize(&lapper).unwrap(); - let deserialzed: Lapper = bincode::deserialize(&serialized).unwrap(); - - let found = deserialzed.find(28974798, 33141355).collect::>(); - assert_eq!(found, vec![ - &Iv{start:28866309, stop: 33141404 , val: 0}, - ]); - assert_eq!(deserialzed.count(28974798, 33141355), 1); - } + // #[test] + // fn serde_test() { + // let data = vec![ + // Iv{start:25264912, stop: 25264986, val: 0}, + // Iv{start:27273024, stop: 27273065 , val: 0}, + // Iv{start:27440273, stop: 27440318 , val: 0}, + // Iv{start:27488033, stop: 27488125 , val: 0}, + // Iv{start:27938410, stop: 27938470 , val: 0}, + // Iv{start:27959118, stop: 27959171 , val: 0}, + // Iv{start:28866309, stop: 33141404 , val: 0}, + // ]; + // let lapper = Lapper::new(data); + + // let serialized = bincode::serialize(&lapper).unwrap(); + // let deserialzed: Lapper = bincode::deserialize(&serialized).unwrap(); + + // let found = deserialzed.find(28974798, 33141355).collect::>(); + // assert_eq!(found, vec![ + // &Iv{start:28866309, stop: 33141404 , val: 0}, + // ]); + // assert_eq!(deserialzed.count(28974798, 33141355), 1); + // } } From e0f533ef9cfdd5faaca6f85ba7b1a13d3d40b6f6 Mon Sep 17 00:00:00 2001 From: BennoDev <57860196+bennoinbeta@users.noreply.github.com> Date: Tue, 19 Mar 2024 15:18:53 +0100 Subject: [PATCH 05/12] #21 fixed typo --- src/lib.rs | 282 +++++++++++++++++++++++++++-------------------------- 1 file changed, 146 insertions(+), 136 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index fb2408c..f510d4c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -77,9 +77,9 @@ //! ``` use num_traits::{ identities::{one, zero}, - PrimInt, Unsigned, + One, PrimInt, Unsigned, }; -use std::cmp::Ordering::{self}; +use std::cmp::Ordering; #[cfg(feature = "with_serde")] use serde::{Deserialize, Serialize}; @@ -90,7 +90,7 @@ use serde::{Deserialize, Serialize}; #[derive(Eq, Debug, Clone)] pub struct Interval where - I: PrimInt + Unsigned + Ord + Clone + Send + Sync, + I: PrimInt + One + Unsigned + Ord + Clone + Send + Sync, T: Eq + Clone + Send + Sync, { pub start: I, @@ -362,10 +362,7 @@ where self.overlaps_merged = true; } - /// Merges overlapping intervals, combining their values with a user-provided merge function. - /// - /// Adjacent or overlapping intervals are consolidated into single intervals, with their values - /// merged using the provided `merge_fn`. + /// Processes overlapping intervals by splitting and merging them based on a custom merge function. pub fn merge_overlaps_with(&mut self, merge_fn: F) where F: Fn(&T, &T) -> T, @@ -389,75 +386,64 @@ where self.overlaps_merged = true; } - // TODO: Has issues with exact overlaps - /// Processes overlapping intervals by splitting and merging, using a user-provided merge function. - /// - /// This method handles overlapping intervals by: - /// - Creating new intervals for each overlap, applying `merge_fn` to their values. - /// - Preserving non-overlapping parts as individual intervals. - pub fn split_and_merge_overlaps_with(&mut self, merge_fn: F) + /// Divides a set of overlapping intervals into non-overlapping intervals, + /// aggregating associated data for each resulting interval using a custom merge function. + // Based on: https://stackoverflow.com/questions/628837/how-to-divide-a-set-of-overlapping-ranges-into-non-overlapping-ranges + pub fn divide_overlaps_with(&mut self, merge_fn: F) where - F: Fn(&T, &T) -> T, + F: Fn(&Vec) -> T, { - let mut merged: Vec> = Vec::new(); - let mut current_interval_option: Option> = None; - + let mut events: Vec<(I, bool, I, T)> = Vec::new(); for interval in &self.intervals { - if let Some(mut current_interval) = current_interval_option.take() { - // No overlap; add the new interval as is - if current_interval.stop <= interval.start { - merged.push(current_interval); - current_interval_option = Some(interval.clone()); + events.push((interval.start, true, interval.stop, interval.val.clone())); + events.push((interval.stop, false, interval.start, interval.val.clone())); + } + + events.sort_by(|a, b| { + a.0.cmp(&b.0).then_with(|| { + let order_a = if a.1 { 0 } else { 1 }; + let order_b = if b.1 { 0 } else { 1 }; + order_a.cmp(&order_b).then_with(|| a.2.cmp(&b.2)) + }) + }); + + let mut current_values: Vec = Vec::new(); + let mut ranges: Vec> = Vec::new(); + let mut current_start = None; + + for (endpoint, is_start, _, symbol) in events { + match (is_start, current_start) { + (true, Some(start)) if endpoint != start && !current_values.is_empty() => { + ranges.push(Interval { + start, + stop: endpoint - I::one(), + val: merge_fn(¤t_values), + }); + current_start = Some(endpoint); + } + (true, _) => { + current_start = Some(endpoint); } - // Overlap - else { - let overlap_start = std::cmp::max(current_interval.start, interval.start); - let overlap_end = std::cmp::min(current_interval.stop, interval.stop); - - // Leading non-overlapping part - if current_interval.start < overlap_start { - merged.push(Interval { - start: current_interval.start, - stop: overlap_start, - val: current_interval.val.clone(), - }); - } - - // Overlapping part - merged.push(Interval { - start: overlap_start, - stop: overlap_end, - val: merge_fn(¤t_interval.val, &interval.val), + (false, Some(start)) if !current_values.is_empty() => { + ranges.push(Interval { + start, + stop: endpoint, + val: merge_fn(¤t_values), }); - - // Check for trailing non-overlapping part - if interval.stop > overlap_end { - current_interval_option = Some(Interval { - start: overlap_end, - stop: interval.stop, - val: interval.val.clone(), - }); - } - - // If current_interval extends beyond interval, update its start to reflect processed part - if current_interval.stop > overlap_end { - current_interval.start = overlap_end; - current_interval_option = Some(current_interval); - } + current_start = Some(endpoint + I::one()); } + _ => {} } - // No ongoing interval, so we start with this one - else { - current_interval_option = Some(interval.clone()); - } - } - // Check if there's a remaining interval to push into merged - if let Some(current_interval) = current_interval_option { - merged.push(current_interval); + if is_start { + current_values.push(symbol); + } else { + current_values.retain(|v| v != &symbol); + current_start = Some(endpoint + I::one()); + } } - self.intervals = merged; + self.intervals = ranges; self.update_auxiliary_structures(); self.overlaps_merged = true; } @@ -1232,115 +1218,139 @@ mod tests { } #[test] - fn test_split_and_merge_overlaps_badlapper() { - let mut lapper = setup_badlapper(); - let expected: Vec<&Iv> = vec![ - &Iv { start: 10, stop: 12, val: 1 }, - &Iv { start: 12, stop: 14, val: 2 }, - &Iv { start: 14, stop: 15, val: 3 }, - &Iv { start: 15, stop: 16, val: 1 }, - &Iv { start: 40, stop: 45, val: 0 }, - &Iv { start: 50, stop: 55, val: 0 }, - &Iv { start: 60, stop: 65, val: 0 }, - &Iv { start: 68, stop: 70, val: 0 }, - &Iv { start: 70, stop: 71, val: 1 }, - &Iv { start: 71, stop: 75, val: 1 }, - &Iv { start: 75, stop: 120, val: 0 }, + fn test_divide_overlaps_with() { + let mut lapper : Lapper = Lapper::new(vec![ + Interval { start: 1, stop: 5, val: String::from("a") }, + Interval { start: 3, stop: 7, val: String::from("b") }, + Interval { start: 6, stop: 9, val: String::from("c") }, + ]); + let expected: Vec> = vec![ + Interval { start: 1, stop: 2, val: String::from("a") }, + Interval { start: 3, stop: 5, val: String::from("a, b") }, + Interval { start: 6, stop: 7, val: String::from("b, c") }, + Interval { start: 8, stop: 9, val: String::from("c") }, ]; - assert_eq!(lapper.intervals.len(), lapper.starts.len()); - lapper.split_and_merge_overlaps_with(|a: &u32, _b: &u32| -> u32 { *a + 1 }); - println!("{:?}", lapper.iter().collect::>()); - assert_eq!(expected, lapper.iter().collect::>()); - assert_eq!(lapper.intervals.len(), lapper.starts.len()); + assert_eq!(lapper.intervals.len(), lapper.starts.len()); + lapper.divide_overlaps_with(|overlap| overlap + .iter() + .fold(String::new(), |acc, x| acc + x + ", ") + .trim_end_matches(", ").to_string()); + assert_eq!(expected, lapper.iter().cloned().collect::>()); + assert_eq!(lapper.intervals.len(), lapper.starts.len()); } + // Testcase from: + // https://stackoverflow.com/questions/628837/how-to-divide-a-set-of-overlapping-ranges-into-non-overlapping-ranges #[test] - fn test_split_and_merge_overlaps() { - let mut lapper = Lapper::new(vec![ - Iv { start: 1, stop: 5, val: 10 }, - Iv { start: 3, stop: 7, val: 20 }, - Iv { start: 6, stop: 9, val: 30 }, + fn test_divide_overlaps_with_stackoverflow() { + let mut lapper: Lapper = Lapper::new(vec![ + Interval { start: 0, stop: 100, val: String::from("a") }, + Interval { start: 0, stop: 75, val: String::from("b") }, + Interval { start: 75, stop: 80, val: String::from("d") }, + Interval {start: 95, stop: 150, val: String::from("c")}, + Interval {start: 120, stop: 130, val: String::from("d")}, + Interval {start: 160, stop: 175, val: String::from("e")}, + Interval {start: 165, stop: 180, val: String::from("a")}, ]); - let expected: Vec<&Iv> = vec![ - &Iv { start: 1, stop: 3, val: 10 }, - &Iv { start: 3, stop: 5, val: 30 }, - &Iv { start: 5, stop: 6, val: 20 }, - &Iv { start: 6, stop: 7, val: 50 }, - &Iv { start: 7, stop: 9, val: 30 }, + let expected: Vec> = vec![ + Interval { start: 0, stop: 74, val: String::from("b, a") }, + Interval { start: 75, stop: 75, val: String::from("b, a, d") }, + Interval { start: 76, stop: 80, val: String::from("a, d") }, + Interval {start: 81, stop: 94, val: String::from("a")}, + Interval {start: 95, stop: 100, val: String::from("a, c")}, + Interval {start: 101, stop: 119, val: String::from("c")}, + Interval {start: 120, stop: 130, val: String::from("c, d")}, + Interval {start: 131, stop: 150, val: String::from("c")}, + Interval {start: 160, stop: 164, val: String::from("e")}, + Interval {start: 165, stop: 175, val: String::from("e, a")}, + Interval {start: 176, stop: 180, val: String::from("a")}, ]; assert_eq!(lapper.intervals.len(), lapper.starts.len()); - lapper.split_and_merge_overlaps_with(|a, b| a + b); - assert_eq!(expected, lapper.iter().collect::>()); + lapper.divide_overlaps_with(|overlap| overlap + .iter() + .fold(String::new(), |acc, x| acc + x + ", ") + .trim_end_matches(", ").to_string()); + assert_eq!(expected, lapper.iter().cloned().collect::>()); assert_eq!(lapper.intervals.len(), lapper.starts.len()); } #[test] - fn test_split_and_merge_overlaps_with_contained_interval() { + fn test_divide_overlaps_with_contained_interval() { let mut lapper = Lapper::new(vec![ - Iv { start: 1, stop: 10, val: 15 }, - Iv { start: 3, stop: 7, val: 5 }, + Interval { start: 1, stop: 10, val: String::from("a") }, + Interval { start: 3, stop: 7, val: String::from("b") }, ]); - let expected: Vec<&Iv> = vec![ - &Iv { start: 1, stop: 3, val: 15 }, - &Iv { start: 3, stop: 7, val: 20 }, - &Iv { start: 7, stop: 10, val: 15 }, + let expected: Vec> = vec![ + Interval { start: 1, stop: 2, val: String::from("a") }, + Interval { start: 3, stop: 7, val: String::from("a, b") }, + Interval { start: 8, stop: 10, val: String::from("a") }, ]; assert_eq!(lapper.intervals.len(), lapper.starts.len()); - lapper.split_and_merge_overlaps_with(|a, b| a + b); - assert_eq!(expected, lapper.iter().collect::>()); + lapper.divide_overlaps_with(|overlap| overlap + .iter() + .fold(String::new(), |acc, x| acc + x + ", ") + .trim_end_matches(", ").to_string()); + assert_eq!(expected, lapper.iter().cloned().collect::>()); assert_eq!(lapper.intervals.len(), lapper.starts.len()); } #[test] - fn test_split_and_merge_overlaps_with_non_overlapping_intervals() { + fn test_divide_overlaps_with_non_overlapping_intervals() { let mut lapper = Lapper::new(vec![ - Iv { start: 1, stop: 2, val: 10 }, - Iv { start: 3, stop: 4, val: 20 }, - Iv { start: 5, stop: 6, val: 30 }, + Interval { start: 1, stop: 2, val: String::from("a") }, + Interval { start: 3, stop: 4, val: String::from("b") }, ]); - let expected: Vec<&Iv> = vec![ - &Iv { start: 1, stop: 2, val: 10 }, - &Iv { start: 3, stop: 4, val: 20 }, - &Iv { start: 5, stop: 6, val: 30 }, + let expected: Vec> = vec![ + Interval { start: 1, stop: 2, val: String::from("a") }, + Interval { start: 3, stop: 4, val: String::from("b") }, ]; assert_eq!(lapper.intervals.len(), lapper.starts.len()); - lapper.split_and_merge_overlaps_with(|a, b| a + b); - assert_eq!(expected, lapper.iter().collect::>()); + lapper.divide_overlaps_with(|overlap| overlap + .iter() + .fold(String::new(), |acc, x| acc + x + ", ") + .trim_end_matches(", ").to_string()); + assert_eq!(expected, lapper.iter().cloned().collect::>()); assert_eq!(lapper.intervals.len(), lapper.starts.len()); } #[test] - fn test_split_and_merge_overlaps_with_partial_overlap() { + fn test_divide_overlaps_with_partial_overlap() { let mut lapper = Lapper::new(vec![ - Iv { start: 1, stop: 4, val: 10 }, - Iv { start: 3, stop: 6, val: 20 }, + Interval { start: 1, stop: 4, val: String::from("a") }, + Interval { start: 3, stop: 6, val: String::from("b") }, ]); - let expected: Vec<&Iv> = vec![ - &Iv { start: 1, stop: 3, val: 10 }, - &Iv { start: 3, stop: 4, val: 30 }, - &Iv { start: 4, stop: 6, val: 20 }, + let expected: Vec> = vec![ + Interval { start: 1, stop: 2, val: String::from("a") }, + Interval { start: 3, stop: 4, val: String::from("a, b") }, + Interval { start: 5, stop: 6, val: String::from("b") }, ]; assert_eq!(lapper.intervals.len(), lapper.starts.len()); - lapper.split_and_merge_overlaps_with(|a, b| a + b); - assert_eq!(expected, lapper.iter().collect::>()); + lapper.divide_overlaps_with(|overlap| overlap + .iter() + .fold(String::new(), |acc, x| acc + x + ", ") + .trim_end_matches(", ").to_string()); + assert_eq!(expected, lapper.iter().cloned().collect::>()); assert_eq!(lapper.intervals.len(), lapper.starts.len()); } #[test] - fn test_split_and_merge_overlaps_with_exact_overlap() { + fn test_divide_overlaps_with_exact_overlap() { let mut lapper = Lapper::new(vec![ - Iv { start: 1, stop: 4, val: 10 }, - Iv { start: 1, stop: 4, val: 10 }, - Iv { start: 3, stop: 6, val: 20 }, + Interval { start: 1, stop: 4, val: String::from("a") }, + Interval { start: 1, stop: 4, val: String::from("b") }, + Interval { start: 3, stop: 6, val: String::from("c") }, ]); - let expected: Vec<&Iv> = vec![ - &Iv { start: 1, stop: 3, val: 20 }, - &Iv { start: 3, stop: 4, val: 40 }, - &Iv { start: 4, stop: 6, val: 20 }, + let expected: Vec> = vec![ + Interval { start: 1, stop: 2, val: String::from("a, b") }, + Interval { start: 3, stop: 4, val: String::from("a, b, c") }, + Interval { start: 5, stop: 6, val: String::from("c") }, ]; assert_eq!(lapper.intervals.len(), lapper.starts.len()); - lapper.split_and_merge_overlaps_with(|a, b| a + b); - assert_eq!(expected, lapper.iter().collect::>()); + lapper.divide_overlaps_with(|overlap| overlap + .iter() + .fold(String::new(), |acc, x| acc + x + ", ") + .trim_end_matches(", ").to_string()); + assert_eq!(expected, lapper.iter().cloned().collect::>()); assert_eq!(lapper.intervals.len(), lapper.starts.len()); } From f1c838ce15299c5bc51087e8078c49e234ac7606 Mon Sep 17 00:00:00 2001 From: BennoDev <57860196+bennoinbeta@users.noreply.github.com> Date: Tue, 19 Mar 2024 16:25:32 +0100 Subject: [PATCH 06/12] #21 reference instead of owned value in merge_fn --- src/lib.rs | 78 +++++++++++++++++++++++++++++++----------------------- 1 file changed, 45 insertions(+), 33 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index f510d4c..5a7e24d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -391,54 +391,66 @@ where // Based on: https://stackoverflow.com/questions/628837/how-to-divide-a-set-of-overlapping-ranges-into-non-overlapping-ranges pub fn divide_overlaps_with(&mut self, merge_fn: F) where - F: Fn(&Vec) -> T, + F: Fn(&[&T]) -> T, { - let mut events: Vec<(I, bool, I, T)> = Vec::new(); - for interval in &self.intervals { - events.push((interval.start, true, interval.stop, interval.val.clone())); - events.push((interval.stop, false, interval.start, interval.val.clone())); + // Create start and end events for each interval + let mut events: Vec<(I, bool, I, usize)> = Vec::new(); + for (index, interval) in self.intervals.iter().enumerate() { + events.push((interval.start, true, interval.stop, index)); // Start event + events.push((interval.stop, false, interval.start, index)); // End event } events.sort_by(|a, b| { - a.0.cmp(&b.0).then_with(|| { - let order_a = if a.1 { 0 } else { 1 }; - let order_b = if b.1 { 0 } else { 1 }; - order_a.cmp(&order_b).then_with(|| a.2.cmp(&b.2)) - }) + a.0.cmp(&b.0) + .then_with(|| a.1.cmp(&b.1)) + .then_with(|| a.2.cmp(&b.2)) }); - let mut current_values: Vec = Vec::new(); + let mut active_indices: Vec = Vec::new(); let mut ranges: Vec> = Vec::new(); - let mut current_start = None; + let mut current_start: Option = None; - for (endpoint, is_start, _, symbol) in events { - match (is_start, current_start) { - (true, Some(start)) if endpoint != start && !current_values.is_empty() => { - ranges.push(Interval { - start, - stop: endpoint - I::one(), - val: merge_fn(¤t_values), - }); - current_start = Some(endpoint); - } - (true, _) => { - current_start = Some(endpoint); + for (endpoint, is_start, _, index) in events { + // Handle the start of an interval + if is_start { + if let Some(start) = current_start { + // Merge and push the interval if it doesn't overlap directly with its predecessor + if endpoint != start && !active_indices.is_empty() { + let values = active_indices + .iter() + .map(|&i| &self.intervals[i].val) + .collect::>(); + ranges.push(Interval { + start, + stop: endpoint - I::one(), + val: merge_fn(&values), + }); + } } - (false, Some(start)) if !current_values.is_empty() => { + + // Update the start for a new or continued interval + current_start = Some(endpoint); + // Add index to active intervals + active_indices.push(index); + } + // Handle the end of an interval + else { + // Always create an interval up to the current endpoint + if let Some(start) = current_start { + let values = active_indices + .iter() + .map(|&i| &self.intervals[i].val) + .collect::>(); ranges.push(Interval { start, stop: endpoint, - val: merge_fn(¤t_values), + val: merge_fn(&values), }); - current_start = Some(endpoint + I::one()); } - _ => {} - } - if is_start { - current_values.push(symbol); - } else { - current_values.retain(|v| v != &symbol); + // Remove ended interval + active_indices.retain(|&i| i != index); + // Prepare for the next potential interval start current_start = Some(endpoint + I::one()); } } From 69a4721990420271018d2a3a4e6ecc68217c2b02 Mon Sep 17 00:00:00 2001 From: BennoDev <57860196+bennoinbeta@users.noreply.github.com> Date: Tue, 19 Mar 2024 16:51:29 +0100 Subject: [PATCH 07/12] #21 fixed test --- src/lib.rs | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 5a7e24d..6cc8a4c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -401,9 +401,9 @@ where } events.sort_by(|a, b| { - a.0.cmp(&b.0) - .then_with(|| a.1.cmp(&b.1)) - .then_with(|| a.2.cmp(&b.2)) + a.0.cmp(&b.0) // First, sort by endpoint + .then_with(|| (!a.1 as u8).cmp(&(!b.1 as u8))) // Then, start events before end events + .then_with(|| a.2.cmp(&b.2)) // Finally, sort by the other endpoint if needed }); let mut active_indices: Vec = Vec::new(); @@ -415,7 +415,8 @@ where if is_start { if let Some(start) = current_start { // Merge and push the interval if it doesn't overlap directly with its predecessor - if endpoint != start && !active_indices.is_empty() { + if endpoint != start && endpoint - I::one() >= start && active_indices.len() > 0 + { let values = active_indices .iter() .map(|&i| &self.intervals[i].val) @@ -435,17 +436,19 @@ where } // Handle the end of an interval else { - // Always create an interval up to the current endpoint + // Create an interval up to the current endpoint if let Some(start) = current_start { - let values = active_indices - .iter() - .map(|&i| &self.intervals[i].val) - .collect::>(); - ranges.push(Interval { - start, - stop: endpoint, - val: merge_fn(&values), - }); + if endpoint >= start && active_indices.len() > 0 { + let values = active_indices + .iter() + .map(|&i| &self.intervals[i].val) + .collect::>(); + ranges.push(Interval { + start, + stop: endpoint, + val: merge_fn(&values), + }); + } } // Remove ended interval From f7430b784220f252035c22e332d501208a9c1475 Mon Sep 17 00:00:00 2001 From: BennoDev <57860196+bennoinbeta@users.noreply.github.com> Date: Wed, 20 Mar 2024 08:27:10 +0100 Subject: [PATCH 08/12] #21 made devide_overlaps_with work exclusive end --- src/lib.rs | 52 +++++++++++++++++++++++++--------------------------- 1 file changed, 25 insertions(+), 27 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 6cc8a4c..7c97cf8 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -77,7 +77,7 @@ //! ``` use num_traits::{ identities::{one, zero}, - One, PrimInt, Unsigned, + PrimInt, Unsigned, }; use std::cmp::Ordering; @@ -85,12 +85,12 @@ use std::cmp::Ordering; use serde::{Deserialize, Serialize}; /// Represent a range from [start, stop) -/// Inclusive start, exclusive of stop +/// Inclusive start, exclusive of stop (start <= x < end) #[cfg_attr(feature = "with_serde", derive(Serialize, Deserialize))] #[derive(Eq, Debug, Clone)] pub struct Interval where - I: PrimInt + One + Unsigned + Ord + Clone + Send + Sync, + I: PrimInt + Unsigned + Ord + Clone + Send + Sync, T: Eq + Clone + Send + Sync, { pub start: I, @@ -415,22 +415,19 @@ where if is_start { if let Some(start) = current_start { // Merge and push the interval if it doesn't overlap directly with its predecessor - if endpoint != start && endpoint - I::one() >= start && active_indices.len() > 0 - { + if endpoint > start && active_indices.len() > 0 { let values = active_indices .iter() .map(|&i| &self.intervals[i].val) .collect::>(); ranges.push(Interval { start, - stop: endpoint - I::one(), + stop: endpoint, val: merge_fn(&values), }); } } - // Update the start for a new or continued interval - current_start = Some(endpoint); // Add index to active intervals active_indices.push(index); } @@ -438,7 +435,7 @@ where else { // Create an interval up to the current endpoint if let Some(start) = current_start { - if endpoint >= start && active_indices.len() > 0 { + if endpoint > start && active_indices.len() > 0 { let values = active_indices .iter() .map(|&i| &self.intervals[i].val) @@ -453,9 +450,10 @@ where // Remove ended interval active_indices.retain(|&i| i != index); - // Prepare for the next potential interval start - current_start = Some(endpoint + I::one()); } + + // Update the start for a new or continued interval + current_start = Some(endpoint); } self.intervals = ranges; @@ -1240,10 +1238,11 @@ mod tests { Interval { start: 6, stop: 9, val: String::from("c") }, ]); let expected: Vec> = vec![ - Interval { start: 1, stop: 2, val: String::from("a") }, + Interval { start: 1, stop: 3, val: String::from("a") }, Interval { start: 3, stop: 5, val: String::from("a, b") }, + Interval { start: 5, stop: 6, val: String::from("b") }, Interval { start: 6, stop: 7, val: String::from("b, c") }, - Interval { start: 8, stop: 9, val: String::from("c") }, + Interval { start: 7, stop: 9, val: String::from("c") }, ]; assert_eq!(lapper.intervals.len(), lapper.starts.len()); lapper.divide_overlaps_with(|overlap| overlap @@ -1268,17 +1267,16 @@ mod tests { Interval {start: 165, stop: 180, val: String::from("a")}, ]); let expected: Vec> = vec![ - Interval { start: 0, stop: 74, val: String::from("b, a") }, - Interval { start: 75, stop: 75, val: String::from("b, a, d") }, - Interval { start: 76, stop: 80, val: String::from("a, d") }, - Interval {start: 81, stop: 94, val: String::from("a")}, + Interval { start: 0, stop: 75, val: String::from("b, a") }, + Interval { start: 75, stop: 80, val: String::from("a, d") }, + Interval {start: 80, stop: 95, val: String::from("a")}, Interval {start: 95, stop: 100, val: String::from("a, c")}, - Interval {start: 101, stop: 119, val: String::from("c")}, + Interval {start: 100, stop: 120, val: String::from("c")}, Interval {start: 120, stop: 130, val: String::from("c, d")}, - Interval {start: 131, stop: 150, val: String::from("c")}, - Interval {start: 160, stop: 164, val: String::from("e")}, + Interval {start: 130, stop: 150, val: String::from("c")}, + Interval {start: 160, stop: 165, val: String::from("e")}, Interval {start: 165, stop: 175, val: String::from("e, a")}, - Interval {start: 176, stop: 180, val: String::from("a")}, + Interval {start: 175, stop: 180, val: String::from("a")}, ]; assert_eq!(lapper.intervals.len(), lapper.starts.len()); lapper.divide_overlaps_with(|overlap| overlap @@ -1296,9 +1294,9 @@ mod tests { Interval { start: 3, stop: 7, val: String::from("b") }, ]); let expected: Vec> = vec![ - Interval { start: 1, stop: 2, val: String::from("a") }, + Interval { start: 1, stop: 3, val: String::from("a") }, Interval { start: 3, stop: 7, val: String::from("a, b") }, - Interval { start: 8, stop: 10, val: String::from("a") }, + Interval { start: 7, stop: 10, val: String::from("a") }, ]; assert_eq!(lapper.intervals.len(), lapper.starts.len()); lapper.divide_overlaps_with(|overlap| overlap @@ -1335,9 +1333,9 @@ mod tests { Interval { start: 3, stop: 6, val: String::from("b") }, ]); let expected: Vec> = vec![ - Interval { start: 1, stop: 2, val: String::from("a") }, + Interval { start: 1, stop: 3, val: String::from("a") }, Interval { start: 3, stop: 4, val: String::from("a, b") }, - Interval { start: 5, stop: 6, val: String::from("b") }, + Interval { start: 4, stop: 6, val: String::from("b") }, ]; assert_eq!(lapper.intervals.len(), lapper.starts.len()); lapper.divide_overlaps_with(|overlap| overlap @@ -1356,9 +1354,9 @@ mod tests { Interval { start: 3, stop: 6, val: String::from("c") }, ]); let expected: Vec> = vec![ - Interval { start: 1, stop: 2, val: String::from("a, b") }, + Interval { start: 1, stop: 3, val: String::from("a, b") }, Interval { start: 3, stop: 4, val: String::from("a, b, c") }, - Interval { start: 5, stop: 6, val: String::from("c") }, + Interval { start: 4, stop: 6, val: String::from("c") }, ]; assert_eq!(lapper.intervals.len(), lapper.starts.len()); lapper.divide_overlaps_with(|overlap| overlap From d9547b02a2e0212a5715bc20c7aae64479a6c199 Mon Sep 17 00:00:00 2001 From: BennoDev <57860196+bennoinbeta@users.noreply.github.com> Date: Thu, 28 Mar 2024 14:14:02 +0100 Subject: [PATCH 09/12] #21 rermoved eq as it doesn't seem necessary? --- src/lib.rs | 62 ++++++++++++++++++++++-------------------------------- 1 file changed, 25 insertions(+), 37 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 7c97cf8..9ced38c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -87,11 +87,11 @@ use serde::{Deserialize, Serialize}; /// Represent a range from [start, stop) /// Inclusive start, exclusive of stop (start <= x < end) #[cfg_attr(feature = "with_serde", derive(Serialize, Deserialize))] -#[derive(Eq, Debug, Clone)] +#[derive(Debug, Clone)] pub struct Interval where I: PrimInt + Unsigned + Ord + Clone + Send + Sync, - T: Eq + Clone + Send + Sync, + T: Clone + Send + Sync, { pub start: I, pub stop: I, @@ -105,7 +105,7 @@ where pub struct Lapper where I: PrimInt + Unsigned + Ord + Clone + Send + Sync, - T: Eq + Clone + Send + Sync, + T: Clone + Send + Sync, { /// List of intervals pub intervals: Vec>, @@ -124,7 +124,7 @@ where impl Interval where I: PrimInt + Unsigned + Ord + Clone + Send + Sync, - T: Eq + Clone + Send + Sync, + T: Clone + Send + Sync, { /// Compute the intsect between two intervals #[inline] @@ -141,47 +141,35 @@ where } } -impl Ord for Interval +impl PartialEq for Interval where I: PrimInt + Unsigned + Ord + Clone + Send + Sync, - T: Eq + Clone + Send + Sync, + T: Clone + Send + Sync, { #[inline] - fn cmp(&self, other: &Interval) -> Ordering { - match self.start.cmp(&other.start) { - Ordering::Less => Ordering::Less, - Ordering::Greater => Ordering::Greater, - Ordering::Equal => self.stop.cmp(&other.stop), - } + fn eq(&self, other: &Interval) -> bool { + self.start == other.start && self.stop == other.stop } } impl PartialOrd for Interval where I: PrimInt + Unsigned + Ord + Clone + Send + Sync, - T: Eq + Clone + Send + Sync, + T: Clone + Send + Sync, { - #[inline] fn partial_cmp(&self, other: &Self) -> Option { - Some(self.cmp(other)) - } -} - -impl PartialEq for Interval -where - I: PrimInt + Unsigned + Ord + Clone + Send + Sync, - T: Eq + Clone + Send + Sync, -{ - #[inline] - fn eq(&self, other: &Interval) -> bool { - self.start == other.start && self.stop == other.stop + Some(match self.start.cmp(&other.start) { + Ordering::Less => Ordering::Less, + Ordering::Greater => Ordering::Greater, + Ordering::Equal => self.stop.cmp(&other.stop), + }) } } impl Lapper where I: PrimInt + Unsigned + Ord + Clone + Send + Sync, - T: Eq + Clone + Send + Sync, + T: Clone + Send + Sync, { /// Create a new instance of Lapper by passing in a vector of Intervals. This vector will /// immediately be sorted by start order. @@ -193,7 +181,7 @@ where /// let lapper = Lapper::new(data); /// ``` pub fn new(mut intervals: Vec>) -> Self { - intervals.sort(); + intervals.sort_by(|a, b| a.partial_cmp(b).unwrap_or(Ordering::Equal)); let (mut starts, mut stops): (Vec<_>, Vec<_>) = intervals.iter().map(|x| (x.start, x.stop)).unzip(); starts.sort(); @@ -761,7 +749,7 @@ where #[derive(Debug)] pub struct IterFind<'a, I, T> where - T: Eq + Clone + Send + Sync + 'a, + T: Clone + Send + Sync + 'a, I: PrimInt + Unsigned + Ord + Clone + Send + Sync, { inner: &'a Lapper, @@ -772,7 +760,7 @@ where impl<'a, I, T> Iterator for IterFind<'a, I, T> where - T: Eq + Clone + Send + Sync + 'a, + T: Clone + Send + Sync + 'a, I: PrimInt + Unsigned + Ord + Clone + Send + Sync, { type Item = &'a Interval; @@ -799,7 +787,7 @@ where #[derive(Debug)] pub struct IterDepth<'a, I, T> where - T: Eq + Clone + Send + Sync + 'a, + T: Clone + Send + Sync + 'a, I: PrimInt + Unsigned + Ord + Clone + Send + Sync, { inner: &'a Lapper, @@ -812,7 +800,7 @@ where impl<'a, I, T> Iterator for IterDepth<'a, I, T> where - T: Eq + Clone + Send + Sync + 'a, + T: Clone + Send + Sync + 'a, I: PrimInt + Unsigned + Ord + Clone + Send + Sync, { type Item = Interval; @@ -863,7 +851,7 @@ where /// Lapper Iterator pub struct IterLapper<'a, I, T> where - T: Eq + Clone + Send + Sync + 'a, + T: Clone + Send + Sync + 'a, I: PrimInt + Unsigned + Ord + Clone + Send + Sync, { inner: &'a Lapper, @@ -872,7 +860,7 @@ where impl<'a, I, T> Iterator for IterLapper<'a, I, T> where - T: Eq + Clone + Send + Sync + 'a, + T: Clone + Send + Sync + 'a, I: PrimInt + Unsigned + Ord + Clone + Send + Sync, { type Item = &'a Interval; @@ -889,7 +877,7 @@ where impl IntoIterator for Lapper where - T: Eq + Clone + Send + Sync, + T: Clone + Send + Sync, I: PrimInt + Unsigned + Ord + Clone + Send + Sync, { type Item = Interval; @@ -902,7 +890,7 @@ where impl<'a, I, T> IntoIterator for &'a Lapper where - T: Eq + Clone + Send + Sync + 'a, + T: Clone + Send + Sync + 'a, I: PrimInt + Unsigned + Ord + Clone + Send + Sync, { type Item = &'a Interval; @@ -915,7 +903,7 @@ where impl<'a, I, T> IntoIterator for &'a mut Lapper where - T: Eq + Clone + Send + Sync + 'a, + T: Clone + Send + Sync + 'a, I: PrimInt + Unsigned + Ord + Clone + Send + Sync, { type Item = &'a mut Interval; From 13a182592307a280c65b1f6198f75a0f33025228 Mon Sep 17 00:00:00 2001 From: BennoDev <57860196+bennoinbeta@users.noreply.github.com> Date: Thu, 28 Mar 2024 14:55:41 +0100 Subject: [PATCH 10/12] #21 added range to callback fn --- src/lib.rs | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 9ced38c..12e0f9c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -379,7 +379,7 @@ where // Based on: https://stackoverflow.com/questions/628837/how-to-divide-a-set-of-overlapping-ranges-into-non-overlapping-ranges pub fn divide_overlaps_with(&mut self, merge_fn: F) where - F: Fn(&[&T]) -> T, + F: Fn(&[&T], std::ops::Range) -> T, { // Create start and end events for each interval let mut events: Vec<(I, bool, I, usize)> = Vec::new(); @@ -411,7 +411,7 @@ where ranges.push(Interval { start, stop: endpoint, - val: merge_fn(&values), + val: merge_fn(&values, start..endpoint), }); } } @@ -431,7 +431,7 @@ where ranges.push(Interval { start, stop: endpoint, - val: merge_fn(&values), + val: merge_fn(&values, start..endpoint), }); } } @@ -1233,7 +1233,7 @@ mod tests { Interval { start: 7, stop: 9, val: String::from("c") }, ]; assert_eq!(lapper.intervals.len(), lapper.starts.len()); - lapper.divide_overlaps_with(|overlap| overlap + lapper.divide_overlaps_with(|overlap, _| overlap .iter() .fold(String::new(), |acc, x| acc + x + ", ") .trim_end_matches(", ").to_string()); @@ -1267,7 +1267,7 @@ mod tests { Interval {start: 175, stop: 180, val: String::from("a")}, ]; assert_eq!(lapper.intervals.len(), lapper.starts.len()); - lapper.divide_overlaps_with(|overlap| overlap + lapper.divide_overlaps_with(|overlap, _| overlap .iter() .fold(String::new(), |acc, x| acc + x + ", ") .trim_end_matches(", ").to_string()); @@ -1287,7 +1287,7 @@ mod tests { Interval { start: 7, stop: 10, val: String::from("a") }, ]; assert_eq!(lapper.intervals.len(), lapper.starts.len()); - lapper.divide_overlaps_with(|overlap| overlap + lapper.divide_overlaps_with(|overlap, _| overlap .iter() .fold(String::new(), |acc, x| acc + x + ", ") .trim_end_matches(", ").to_string()); @@ -1306,7 +1306,7 @@ mod tests { Interval { start: 3, stop: 4, val: String::from("b") }, ]; assert_eq!(lapper.intervals.len(), lapper.starts.len()); - lapper.divide_overlaps_with(|overlap| overlap + lapper.divide_overlaps_with(|overlap, _| overlap .iter() .fold(String::new(), |acc, x| acc + x + ", ") .trim_end_matches(", ").to_string()); @@ -1326,7 +1326,7 @@ mod tests { Interval { start: 4, stop: 6, val: String::from("b") }, ]; assert_eq!(lapper.intervals.len(), lapper.starts.len()); - lapper.divide_overlaps_with(|overlap| overlap + lapper.divide_overlaps_with(|overlap, _| overlap .iter() .fold(String::new(), |acc, x| acc + x + ", ") .trim_end_matches(", ").to_string()); @@ -1347,7 +1347,7 @@ mod tests { Interval { start: 4, stop: 6, val: String::from("c") }, ]; assert_eq!(lapper.intervals.len(), lapper.starts.len()); - lapper.divide_overlaps_with(|overlap| overlap + lapper.divide_overlaps_with(|overlap, _| overlap .iter() .fold(String::new(), |acc, x| acc + x + ", ") .trim_end_matches(", ").to_string()); From 50d940b046b8dd4c14bc4edb1b1f251e38e2d58d Mon Sep 17 00:00:00 2001 From: BennoDev <57860196+bennoinbeta@users.noreply.github.com> Date: Fri, 29 Mar 2024 06:42:33 +0100 Subject: [PATCH 11/12] #21 added find_mut --- src/lib.rs | 66 ++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 64 insertions(+), 2 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 12e0f9c..6116d12 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -704,6 +704,28 @@ where } } + /// Find all intervals that overlap start .. stop + /// ``` + /// use rust_lapper::{Lapper, Interval}; + /// let lapper = Lapper::new((0..100).step_by(5) + /// .map(|x| Interval{start: x, stop: x+2 , val: true}) + /// .collect::>>()); + /// assert_eq!(lapper.find_mut(5, 11).count(), 2); + /// ``` + #[inline] + pub fn find_mut(&mut self, start: I, stop: I) -> IterFindMut { + let off = Self::lower_bound( + start.checked_sub(&self.max_len).unwrap_or_else(zero::), + &self.intervals, + ); + IterFindMut { + inner: self, + off, + start, + stop, + } + } + /// Find all intevals that overlap start .. stop. This method will work when queries /// to this lapper are in sorted (start) order. It uses a linear search from the last query /// instead of a binary search. A reference to a cursor must be passed in. This reference will @@ -769,8 +791,6 @@ where // interval.start < stop && interval.stop > start fn next(&mut self) -> Option { while self.off < self.inner.intervals.len() { - //let mut generator = self.inner.intervals[self.off..].iter(); - //while let Some(interval) = generator.next() { let interval = &self.inner.intervals[self.off]; self.off += 1; if interval.overlap(self.start, self.stop) { @@ -783,6 +803,48 @@ where } } +/// Mutable Find Iterator +#[derive(Debug)] +pub struct IterFindMut<'a, I, T> +where + T: Clone + Send + Sync + 'a, + I: PrimInt + Unsigned + Ord + Clone + Send + Sync, +{ + inner: &'a mut Lapper, + off: usize, + start: I, + stop: I, +} + +impl<'a, I, T> Iterator for IterFindMut<'a, I, T> +where + T: Clone + Send + Sync + 'a, + I: PrimInt + Unsigned + Ord + Clone + Send + Sync, +{ + type Item = (&'a mut T, I, I); // Item, Start, Stop + + fn next(&mut self) -> Option { + while self.off < self.inner.intervals.len() { + // Safety: We are extending the lifetime of the reference to 'a, which + // is safe as long as we ensure that IterFindMut never yields the same + // element twice, which should not be possible due to `self.off += 1` + // https://smallcultfollowing.com/babysteps/blog/2013/10/24/iterators-yielding-mutable-references/ + unsafe { + let ptr = self.inner.intervals.as_mut_ptr().add(self.off); + self.off += 1; + let interval = &mut *ptr; + + if interval.overlap(self.start, self.stop) { + return Some((&mut interval.val, interval.start, interval.stop)); + } else if interval.start >= self.stop { + break; + } + } + } + None + } +} + /// Depth Iterator #[derive(Debug)] pub struct IterDepth<'a, I, T> From a03107fbb0fa72c59a72c6e9ca5662a97f4b348d Mon Sep 17 00:00:00 2001 From: BennoDev <57860196+bennoinbeta@users.noreply.github.com> Date: Fri, 29 Mar 2024 07:12:09 +0100 Subject: [PATCH 12/12] #21 added iter_mut --- src/lib.rs | 50 ++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 48 insertions(+), 2 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 6116d12..1e7a35c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -329,6 +329,15 @@ where } } + /// Return a mutable iterator over the intervals in Lapper + #[inline] + pub fn iter_mut(&mut self) -> IterLapperMut { + IterLapperMut { + inner: self, + pos: 0, + } + } + /// Merge any intervals that overlap with eachother within the Lapper. This is an easy way to /// speed up queries. pub fn merge_overlaps(&mut self) { @@ -821,7 +830,7 @@ where T: Clone + Send + Sync + 'a, I: PrimInt + Unsigned + Ord + Clone + Send + Sync, { - type Item = (&'a mut T, I, I); // Item, Start, Stop + type Item = (&'a mut T, I, I); // Value, Start, Stop fn next(&mut self) -> Option { while self.off < self.inner.intervals.len() { @@ -841,7 +850,8 @@ where } } } - None + + return None; } } @@ -910,6 +920,7 @@ where }) } } + /// Lapper Iterator pub struct IterLapper<'a, I, T> where @@ -937,6 +948,41 @@ where } } +/// Mutable Lapper Iterator +pub struct IterLapperMut<'a, I, T> +where + T: Clone + Send + Sync + 'a, + I: PrimInt + Unsigned + Ord + Clone + Send + Sync, +{ + inner: &'a mut Lapper, + pos: usize, +} + +impl<'a, I, T> Iterator for IterLapperMut<'a, I, T> +where + T: Clone + Send + Sync + 'a, + I: PrimInt + Unsigned + Ord + Clone + Send + Sync, +{ + type Item = (&'a mut T, I, I); // Value, Start, Stop + + fn next(&mut self) -> Option { + if self.pos < self.inner.intervals.len() { + // Safety: We are extending the lifetime of the reference to 'a, which + // is safe as long as we ensure that IterLapperMut never yields the same + // element twice, which should not be possible due to `self.pos += 1` + // https://smallcultfollowing.com/babysteps/blog/2013/10/24/iterators-yielding-mutable-references/ + unsafe { + let ptr = self.inner.intervals.as_mut_ptr().add(self.pos); + self.pos += 1; + let interval = &mut *ptr; + return Some((&mut interval.val, interval.start, interval.stop)); + } + } else { + return None; + } + } +} + impl IntoIterator for Lapper where T: Clone + Send + Sync,