Skip to content

Commit

Permalink
Merge pull request #43 from PacificBiosciences/bugfix/20240808
Browse files Browse the repository at this point in the history
updates for v1.4.3
  • Loading branch information
holtjma authored Aug 8, 2024
2 parents 9b01d7e + 3136fb6 commit 52ab5c0
Show file tree
Hide file tree
Showing 6 changed files with 91 additions and 38 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# 1.4.3
## Fixed
* Replaced a panic caused by a chromosome appearing in a VCF but not in the BAM file with a more descriptive error message
* Fixed an error caused by a multi-sample VCF with a mixture of haploid and diploid genotypes

# v1.4.2
## Changes
* Removes a 1 basepair shift from tandem repeat region calculation to support anchor base changes in TRGT v1.0.0; internal results are nearly identical before and after this change
Expand Down
48 changes: 33 additions & 15 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "hiphase"
version = "1.4.2"
version = "1.4.3"
authors = ["J. Matthew Holt <mholt@pacificbiosciences.com>"]
description = "A tool for jointly phasing small, structural, and tandem repeat variants for PacBio sequencing data"
edition = "2021"
Expand Down
32 changes: 25 additions & 7 deletions LICENSE-THIRDPARTY.json
Original file line number Diff line number Diff line change
Expand Up @@ -325,7 +325,7 @@
},
{
"name": "deranged",
"version": "0.3.7",
"version": "0.3.11",
"authors": "Jacob Pratt <jacob@jhpratt.dev>",
"repository": "https://github.com/jhpratt/deranged",
"license": "Apache-2.0 OR MIT",
Expand Down Expand Up @@ -496,7 +496,7 @@
},
{
"name": "hiphase",
"version": "1.4.2",
"version": "1.4.3",
"authors": "J. Matthew Holt <mholt@pacificbiosciences.com>",
"repository": null,
"license": null,
Expand Down Expand Up @@ -764,6 +764,15 @@
"license_file": null,
"description": "Complex numbers implementation for Rust"
},
{
"name": "num-conv",
"version": "0.1.0",
"authors": "Jacob Pratt <jacob@jhpratt.dev>",
"repository": "https://github.com/jhpratt/num-conv",
"license": "Apache-2.0 OR MIT",
"license_file": null,
"description": "`num_conv` is a crate to convert between integer types without using `as` casts. This provides better certainty when refactoring, makes the exact behavior of code more explicit, and allows using turbofish syntax."
},
{
"name": "num-integer",
"version": "0.1.45",
Expand Down Expand Up @@ -872,6 +881,15 @@
"license_file": null,
"description": "A library to run the pkg-config system tool at build time in order to be used in Cargo build scripts."
},
{
"name": "powerfmt",
"version": "0.2.0",
"authors": "Jacob Pratt <jacob@jhpratt.dev>",
"repository": "https://github.com/jhpratt/powerfmt",
"license": "Apache-2.0 OR MIT",
"license_file": null,
"description": "`powerfmt` is a library that provides utilities for formatting values. This crate makes it significantly easier to support filling to a minimum width with alignment, avoid heap allocation, and avoid repetitive calculations."
},
{
"name": "ppv-lite86",
"version": "0.2.16",
Expand Down Expand Up @@ -1081,7 +1099,7 @@
},
{
"name": "serde",
"version": "1.0.147",
"version": "1.0.205",
"authors": "Erick Tryzelaar <erick.tryzelaar@gmail.com>|David Tolnay <dtolnay@gmail.com>",
"repository": "https://github.com/serde-rs/serde",
"license": "Apache-2.0 OR MIT",
Expand All @@ -1090,7 +1108,7 @@
},
{
"name": "serde_derive",
"version": "1.0.147",
"version": "1.0.205",
"authors": "Erick Tryzelaar <erick.tryzelaar@gmail.com>|David Tolnay <dtolnay@gmail.com>",
"repository": "https://github.com/serde-rs/serde",
"license": "Apache-2.0 OR MIT",
Expand Down Expand Up @@ -1216,7 +1234,7 @@
},
{
"name": "time",
"version": "0.3.25",
"version": "0.3.36",
"authors": "Jacob Pratt <open-source@jhpratt.dev>|Time contributors",
"repository": "https://github.com/time-rs/time",
"license": "Apache-2.0 OR MIT",
Expand All @@ -1225,7 +1243,7 @@
},
{
"name": "time-core",
"version": "0.1.1",
"version": "0.1.2",
"authors": "Jacob Pratt <open-source@jhpratt.dev>|Time contributors",
"repository": "https://github.com/time-rs/time",
"license": "Apache-2.0 OR MIT",
Expand All @@ -1234,7 +1252,7 @@
},
{
"name": "time-macros",
"version": "0.2.11",
"version": "0.2.18",
"authors": "Jacob Pratt <open-source@jhpratt.dev>|Time contributors",
"repository": "https://github.com/time-rs/time",
"license": "Apache-2.0 OR MIT",
Expand Down
25 changes: 18 additions & 7 deletions src/block_gen.rs
Original file line number Diff line number Diff line change
Expand Up @@ -627,13 +627,18 @@ impl PhaseBlockIterator {
/// # Arguments
/// * `chrom` - the chromosome of the locus
/// * `pos` - the position of the locus
fn get_longest_multispan(&self, chrom: &str, pos: u64) -> u64 {
fn get_longest_multispan(&self, chrom: &str, pos: u64) -> Result<u64, Box<dyn std::error::Error>> {
use bio::bio_types::genome::AbstractInterval;
use rust_htslib::bam::Read;
let mut span_list: Vec<u64> = vec![];
for bam_ref in self.bam_readers.iter() {
for (bam_index, bam_ref) in self.bam_readers.iter().enumerate() {
let mut bam = bam_ref.borrow_mut();
bam.fetch((chrom, pos, pos+1)).unwrap();
match bam.fetch((chrom, pos, pos+1)) {
Ok(()) => {},
Err(e) => {
bail!("Error while fetching \"{}:{}\" in aligned file #{}: {}", chrom, pos, bam_index, e);
}
};

// calling .records() is what is triggering the URL warning
for read_entry in bam.records() {
Expand All @@ -656,10 +661,10 @@ impl PhaseBlockIterator {

if span_list.len() < self.min_spanning_reads {
// this is a sentinel indicating that the range is effectively empty
pos
Ok(pos)
} else {
span_list.sort();
span_list[span_list.len() - self.min_spanning_reads]
Ok(span_list[span_list.len() - self.min_spanning_reads])
}
}

Expand Down Expand Up @@ -891,7 +896,10 @@ impl Iterator for PhaseBlockIterator {
phase_block.add_locus_variant(&chrom_name, variant_pos, pop_index);

// go ahead and run the max span calculation
max_span = self.get_longest_multispan(&chrom_name, variant_pos);
max_span = match self.get_longest_multispan(&chrom_name, variant_pos) {
Ok(ms) => ms,
Err(e) => return Some(Err(e))
};
if max_span == variant_pos {
// there are not enough reads overlapping this position, it will be unphased
phase_block.set_unphased_block();
Expand All @@ -916,7 +924,10 @@ impl Iterator for PhaseBlockIterator {
}
} else {
//we check the reads from the most recent locus
max_span = self.get_longest_multispan(&chrom_name, previous_pos);
max_span = match self.get_longest_multispan(&chrom_name, previous_pos) {
Ok(ms) => ms,
Err(e) => return Some(Err(e))
};
assert!(max_span != previous_pos);
if max_span > variant_pos {
//new max span connects
Expand Down
17 changes: 9 additions & 8 deletions src/writers/ordered_vcf_writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -308,7 +308,7 @@ impl OrderedVcfWriter {
// we have already written though, so don't write it again
continue;
}

// we now have to iterate over each sample and modify the entries as necessary
let vcf_sample_indices = &self.sample_indices[vcf_index];
let mut changes_made: bool = false;
Expand All @@ -323,16 +323,17 @@ impl OrderedVcfWriter {
1 => {
// TRGT can make single-allele GT calls, just copy it over as normal
// it will not be modified below because it is a homozygous allele
alleles.push(genotype[0]);
alleles.push(i32::from(genotype[0]));
alleles.push(i32::MIN+1); // sentinel value for end
},
2 => {
// this is 99.99999999% path
alleles.push(genotype[0]);
alleles.push(genotype[1]);
alleles.push(i32::from(genotype[0]));
alleles.push(i32::from(genotype[1]));
},
gt_len => {
// we do not have 3+ GT entries implemented
bail!("Encountered GT of length {} at position {}", gt_len, record.pos())
bail!("Encountered GT of length {} at record {}", gt_len, record.desc())
}
}
}
Expand Down Expand Up @@ -373,8 +374,8 @@ impl OrderedVcfWriter {
} else {
// we need to alter the genotypes for this sample to phased
let sample_gt_offset: usize = 2 * sample_index;
alleles[sample_gt_offset] = GenotypeAllele::Unphased(h1 as i32);
alleles[sample_gt_offset + 1] = GenotypeAllele::Phased(h2 as i32);
alleles[sample_gt_offset] = i32::from(GenotypeAllele::Unphased(h1 as i32));
alleles[sample_gt_offset + 1] = i32::from(GenotypeAllele::Phased(h2 as i32));

// the push_format_string expects &[u8] bytes so we have to:
// 1. convert the output to a String
Expand All @@ -391,7 +392,7 @@ impl OrderedVcfWriter {

if changes_made {
// if we altered something, then alter the record and add PS
record.push_genotypes(&alleles)?;
record.push_format_integer(b"GT", &alleles)?;
record.push_format_string("PS".as_bytes(), &ps_blocks).unwrap();
}
if flagged_variants {
Expand Down

0 comments on commit 52ab5c0

Please sign in to comment.