Skip to content

Commit

Permalink
matcher done
Browse files Browse the repository at this point in the history
only at like 1.5x faster
  • Loading branch information
ACEnglish committed Feb 4, 2024
1 parent 8db1db0 commit 353e687
Show file tree
Hide file tree
Showing 4 changed files with 147 additions and 102 deletions.
58 changes: 22 additions & 36 deletions trust/src/comparisons.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,18 @@ use noodles_vcf::{
};
use std::str::FromStr;

pub fn coords_within(qstart: usize, qend: usize, rstart: usize, rend: usize, end_within: bool) -> bool {
let ending = if end_within { qend <= rend } else { qend < rend};
pub fn coords_within(
qstart: usize,
qend: usize,
rstart: usize,
rend: usize,
end_within: bool,
) -> bool {
let ending = if end_within {
qend <= rend
} else {
qend < rend
};
(qstart >= rstart) & ending
}

Expand Down Expand Up @@ -143,8 +153,8 @@ pub fn sizesim(size_a: usize, size_b: usize) -> (f32, isize) {
}

pub fn entry_within(entry: &vcf::Record, rstart: usize, rend: usize) -> bool {
let (qstart, qend) = entry_boundaries(&entry, false);
let end_within = entry_variant_type(&entry) != Svtype::Ins;
let (qstart, qend) = entry_boundaries(entry, false);
let end_within = entry_variant_type(entry) != Svtype::Ins;
coords_within(qstart, qend, rstart, rend, end_within)
}

Expand All @@ -170,38 +180,14 @@ pub fn entry_gt_comp(
sample_a: usize,
sample_b: usize,
) -> bool {
let gt_a = Gt::new(
entry_a
.genotypes()
.get_index(sample_a)
.expect("Bad sample index")
.genotype()
.expect("Unable to parse genotype")
.unwrap(),
);
let gt_b = Gt::new(
entry_b
.genotypes()
.get_index(sample_b)
.expect("Bad sample index")
.genotype()
.expect("Unable to parse genotype")
.unwrap(),
);
let gt_a = Gt::new(entry_a, sample_a);
let gt_b = Gt::new(entry_b, sample_b);
gt_a == gt_b
}

pub fn entry_is_present(entry: &vcf::Record, sample: usize) -> bool {
let gt = Gt::new(
entry
.genotypes()
.get_index(sample)
.expect("Bad sample index")
.genotype()
.expect("Unable to parse genotype")
.unwrap(),
);
return (gt == Gt::Het) || (gt == Gt::Hom);
let gt = Gt::new(entry, sample);
(gt == Gt::Het) || (gt == Gt::Hom)
}

pub fn entry_is_filtered(entry: &vcf::Record) -> bool {
Expand Down Expand Up @@ -240,7 +226,7 @@ pub fn entry_seq_similarity(entry_a: &vcf::Record, entry_b: &vcf::Record) -> f32
.to_string(),
};

let (mut st_dist, mut ed_dist) = entry_distance(&entry_a, &entry_b);
let (mut st_dist, ed_dist) = entry_distance(entry_a, entry_b);
if (st_dist == 0) || (ed_dist == 0) {
return seqsim(&a_seq, &b_seq);
}
Expand All @@ -256,9 +242,9 @@ pub fn entry_seq_similarity(entry_a: &vcf::Record, entry_b: &vcf::Record) -> f32
}

pub fn unroll_compare(a_seq: &String, b_seq: &String, p: usize, up: bool) -> f32 {
let b_len = b_seq.len() as usize;
let b_len = b_seq.len();
let f = p % b_len;
let position = (b_len - f) as usize; // I'm worried about signs here
let position = b_len - f; // I'm worried about signs here
if position >= b_len {
return 0.0; // should never be called unless Symbolic alts are present, in which case we
// can't compare
Expand All @@ -268,7 +254,7 @@ pub fn unroll_compare(a_seq: &String, b_seq: &String, p: usize, up: bool) -> f32
true => format!("{}{}", &b_seq[position..], &b_seq[..position]),
false => format!("{}{}", &b_seq[..position], &b_seq[position..]),
};
seqsim(&a_seq, &rolled)
seqsim(a_seq, &rolled)
}

/* TODO
Expand Down
16 changes: 12 additions & 4 deletions trust/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,18 @@ fn main() {
.build_from_path("sample.vcf.gz")
.expect("Unable to parse vcf");
let header = reader.read_header().expect("Unable to parse header");
let mut mmatch1 = matching::MatchResult{ ..Default::default() };
let mut mmatch1 = matching::MatchResult {
..Default::default()
};
mmatch1.base_gt_count = 4;
mmatch1.score = Some(4.0);
let mut mmatch2 = matching::MatchResult{ ..Default::default() };
let mut mmatch2 = matching::MatchResult {
..Default::default()
};
mmatch2.score = Some(5.0);
let mut mmatch3 = matching::MatchResult{ ..Default::default() };
let mut mmatch3 = matching::MatchResult {
..Default::default()
};
mmatch3.state = true;
let mut parts = vec![mmatch1, mmatch2, mmatch3];
parts.sort();
Expand All @@ -22,7 +28,9 @@ fn main() {
println!("{:?}", i);
}

let m_params = matching::MatchParams{ ..Default::default() };
let m_params = matching::MatchParams {
..Default::default()
};
let mat = matching::Matcher::new(m_params);
let mut up_record = vcf::Record::default();
reader
Expand Down
Loading

0 comments on commit 353e687

Please sign in to comment.