Skip to content

Commit 6c537a0

Browse files
implement various bio-types (interval, locus, strand)
1 parent 3100b8c commit 6c537a0

File tree

7 files changed

+119
-16
lines changed

7 files changed

+119
-16
lines changed

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ regex = "1.3"
2828
linear-map = "1.2"
2929
serde_base = { version = "^1", optional = true, package = "serde" }
3030
serde_bytes = { version = "0.11", optional = true }
31-
bio-types = ">=0.5.1"
31+
bio-types = ">=0.6"
3232
snafu = ">= 0.5.0, <= 0.6.0"
3333
hts-sys = { version = "^1.10", path = "hts-sys", default-features = false }
3434

src/bam/mod.rs

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ use std::ffi;
2020
use std::path::Path;
2121
use std::slice;
2222
use std::str;
23+
2324
use url::Url;
2425

2526
use crate::htslib;
@@ -268,7 +269,11 @@ impl Read for Reader {
268269
-1 => Ok(false),
269270
-2 => Err(Error::TruncatedRecord),
270271
-4 => Err(Error::InvalidRecord),
271-
_ => Ok(true),
272+
_ => {
273+
record.set_header(self.header().clone());
274+
275+
Ok(true)
276+
}
272277
}
273278
}
274279

@@ -458,7 +463,11 @@ impl Read for IndexedReader {
458463
-1 => Ok(false),
459464
-2 => Err(Error::TruncatedRecord),
460465
-4 => Err(Error::InvalidRecord),
461-
_ => Ok(true),
466+
_ => {
467+
record.set_header(self.header().clone());
468+
469+
Ok(true)
470+
}
462471
}
463472
}
464473
None => Ok(false),
@@ -862,6 +871,10 @@ impl HeaderView {
862871
}
863872
}
864873

874+
pub fn tid2name(&self, tid: u32) -> &[u8] {
875+
unsafe { ffi::CStr::from_ptr(htslib::sam_hdr_tid2name(self.inner, tid as i32)).to_bytes() }
876+
}
877+
865878
pub fn target_count(&self) -> u32 {
866879
self.inner().n_targets as u32
867880
}

src/bam/record.rs

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,9 @@ use crate::htslib;
2222
use crate::utils;
2323

2424
use bio_types::alignment::{Alignment, AlignmentMode, AlignmentOperation};
25+
use bio_types::genome;
2526
use bio_types::sequence::SequenceRead;
27+
use bio_types::strand::ReqStrand;
2628

2729
/// A macro creating methods for flag access.
2830
macro_rules! flag {
@@ -46,6 +48,7 @@ pub struct Record {
4648
pub inner: htslib::bam1_t,
4749
own: bool,
4850
cigar: Option<CigarStringView>,
51+
header: Option<HeaderView>,
4952
}
5053

5154
unsafe impl Send for Record {}
@@ -109,6 +112,7 @@ impl Record {
109112
inner: unsafe { MaybeUninit::zeroed().assume_init() },
110113
own: true,
111114
cigar: None,
115+
header: None,
112116
}
113117
}
114118

@@ -128,6 +132,7 @@ impl Record {
128132
},
129133
own: false,
130134
cigar: None,
135+
header: None,
131136
}
132137
}
133138

@@ -162,6 +167,10 @@ impl Record {
162167
}
163168
}
164169

170+
pub fn set_header(&mut self, header: HeaderView) {
171+
self.header = Some(header);
172+
}
173+
165174
pub(super) fn data(&self) -> &[u8] {
166175
unsafe { slice::from_raw_parts(self.inner().data, self.inner().l_data as usize) }
167176
}
@@ -224,6 +233,16 @@ impl Record {
224233
self.inner_mut().core.qual = mapq;
225234
}
226235

236+
/// Get strand information from record flags.
237+
pub fn strand(&mut self) -> ReqStrand {
238+
let reverse = self.flags() & 0x10 != 0;
239+
if reverse {
240+
ReqStrand::Reverse
241+
} else {
242+
ReqStrand::Forward
243+
}
244+
}
245+
227246
/// Get raw flags.
228247
pub fn flags(&self) -> u16 {
229248
self.inner().core.flag
@@ -706,6 +725,39 @@ impl SequenceRead for Record {
706725
}
707726
}
708727

728+
impl genome::AbstractInterval for Record {
729+
/// Return contig name. Panics if record does not know its header (which happens if it has not been read from a file).
730+
fn contig(&self) -> &str {
731+
let tid = self.tid();
732+
if tid < 0 {
733+
panic!("invalid tid, must be at least zero");
734+
}
735+
str::from_utf8(
736+
self.header
737+
.as_ref()
738+
.expect(
739+
"header must be set (this is the case if the record has been read from a file)",
740+
)
741+
.tid2name(tid as u32),
742+
)
743+
.expect("unable to interpret contig name as UTF-8")
744+
}
745+
746+
/// Return genomic range covered by alignment. Panics if `Record::cache_cigar()` has not been called first or `Record::pos()` is less than zero.
747+
fn range(&self) -> ops::Range<genome::Position> {
748+
let end_pos = self
749+
.cigar_cached()
750+
.expect("cigar has not been cached yet, call cache_cigar() first")
751+
.end_pos() as u64;
752+
753+
if self.pos() < 0 {
754+
panic!("invalid position, must be positive")
755+
}
756+
757+
self.pos() as u64..end_pos
758+
}
759+
}
760+
709761
/// Auxiliary record data.
710762
#[derive(Debug, Clone, Copy, PartialEq)]
711763
pub enum Aux<'a> {
@@ -1091,6 +1143,28 @@ impl CigarStringView {
10911143
pos
10921144
}
10931145

1146+
/// Get number of bases softclipped at the beginning of the alignment.
1147+
pub fn leading_softclips(&self) -> i64 {
1148+
self.first().map_or(0, |cigar| {
1149+
if let Cigar::SoftClip(s) = cigar {
1150+
*s as i64
1151+
} else {
1152+
0
1153+
}
1154+
})
1155+
}
1156+
1157+
/// Get number of bases softclipped at the end of the alignment.
1158+
pub fn trailing_softclips(&self) -> i64 {
1159+
self.last().map_or(0, |cigar| {
1160+
if let Cigar::SoftClip(s) = cigar {
1161+
*s as i64
1162+
} else {
1163+
0
1164+
}
1165+
})
1166+
}
1167+
10941168
/// For a given position in the reference, get corresponding position within read.
10951169
/// If reference position is outside of the read alignment, return None.
10961170
///

src/bcf/buffer.rs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -52,14 +52,14 @@ impl RecordBuffer {
5252
self.ringbuffer2.clear();
5353
}
5454

55-
fn drain_left(&mut self, rid: u32, window_start: u32) -> usize {
55+
fn drain_left(&mut self, rid: u32, window_start: u64) -> usize {
5656
// remove records too far left or from wrong rid
5757
// rec.rid() will always yield Some(), because otherwise we won't put the rec into the
5858
// buffer.
5959
let to_remove = self
6060
.ringbuffer
6161
.iter()
62-
.take_while(|rec| rec.pos() < window_start || rec.rid().unwrap() != rid)
62+
.take_while(|rec| (rec.pos() as u64) < window_start || rec.rid().unwrap() != rid)
6363
.count();
6464
self.ringbuffer.drain(..to_remove);
6565
to_remove
@@ -69,7 +69,7 @@ impl RecordBuffer {
6969
/// the start coordinate of any previous `fill` operation.
7070
/// Coordinates are 0-based, and end is exclusive.
7171
/// Returns tuple with numbers of added and deleted records compared to previous fetch.
72-
pub fn fetch(&mut self, chrom: &[u8], start: u32, end: u32) -> Result<(usize, usize)> {
72+
pub fn fetch(&mut self, chrom: &[u8], start: u64, end: u64) -> Result<(usize, usize)> {
7373
// TODO panic if start is left of previous start or we have moved past the given chrom
7474
// before.
7575
let rid = self.reader.header.name2rid(chrom)?;
@@ -107,7 +107,7 @@ impl RecordBuffer {
107107

108108
// move overflow from last fill into ringbuffer
109109
if self.overflow.is_some() {
110-
let pos = self.overflow.as_ref().unwrap().pos();
110+
let pos = self.overflow.as_ref().unwrap().pos() as u64;
111111
if pos >= start {
112112
if pos <= end {
113113
self.ringbuffer.push_back(self.overflow.take().unwrap());
@@ -129,7 +129,7 @@ impl RecordBuffer {
129129
// EOF
130130
break;
131131
}
132-
let pos = rec.pos();
132+
let pos = rec.pos() as u64;
133133
if let Some(rec_rid) = rec.rid() {
134134
if rec_rid == rid {
135135
if pos >= end {

src/bcf/errors.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ pub enum Error {
1616
#[snafu(display("error setting threads for writing BCF/VCF file(s)"))]
1717
SetThreads,
1818
#[snafu(display("error seeking to {}:{} in indexed BCF/VCF file", contig, start))]
19-
Seek { contig: String, start: u32 },
19+
Seek { contig: String, start: u64 },
2020
#[snafu(display("error writing record to BCF/VCF file"))]
2121
Write,
2222
#[snafu(display("tag {} undefined in BCF/VCF header", tag))]

src/bcf/mod.rs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -166,7 +166,7 @@ pub struct IndexedReader {
166166
header: Rc<HeaderView>,
167167

168168
/// The position of the previous fetch, if any.
169-
current_region: Option<(u32, u32, u32)>,
169+
current_region: Option<(u32, u64, u64)>,
170170
}
171171

172172
unsafe impl Send for IndexedReader {}
@@ -225,7 +225,7 @@ impl IndexedReader {
225225
/// contig name to ID.
226226
/// * `start` - `0`-based start coordinate of region on reference.
227227
/// * `end` - `0`-based end coordinate of region on reference.
228-
pub fn fetch(&mut self, rid: u32, start: u32, end: u32) -> Result<()> {
228+
pub fn fetch(&mut self, rid: u32, start: u64, end: u64) -> Result<()> {
229229
let contig = self.header.rid2name(rid).unwrap();
230230
let contig = ffi::CString::new(contig).unwrap();
231231
if unsafe { htslib::bcf_sr_seek(self.inner, contig.as_ptr(), start as i64) } != 0 {
@@ -269,7 +269,7 @@ impl Read for IndexedReader {
269269
Some((rid, _start, end)) => {
270270
if record.rid().is_some()
271271
&& rid == record.rid().unwrap()
272-
&& record.pos() <= end
272+
&& record.pos() as u64 <= end
273273
{
274274
Ok(true)
275275
} else {
@@ -349,7 +349,7 @@ pub mod synced {
349349
headers: Vec<Rc<HeaderView>>,
350350

351351
/// The position of the previous fetch, if any.
352-
current_region: Option<(u32, u32, u32)>,
352+
current_region: Option<(u32, u64, u64)>,
353353
}
354354

355355
// TODO: add interface for setting threads, ensure that the pool is freed properly
@@ -500,7 +500,7 @@ pub mod synced {
500500
/// contig name to ID.
501501
/// * `start` - `0`-based start coordinate of region on reference.
502502
/// * `end` - `0`-based end coordinate of region on reference.
503-
pub fn fetch(&mut self, rid: u32, start: u32, end: u32) -> Result<()> {
503+
pub fn fetch(&mut self, rid: u32, start: u64, end: u64) -> Result<()> {
504504
let contig = {
505505
let contig = self.header(0).rid2name(rid).unwrap(); //.clone();
506506
ffi::CString::new(contig).unwrap()

src/bcf/record.rs

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ use std::rc::Rc;
1313
use std::slice;
1414
use std::str;
1515

16+
use bio_types::genome;
1617
use ieee754::Ieee754;
1718
use itertools::Itertools;
1819

@@ -160,8 +161,8 @@ impl Record {
160161
}
161162

162163
// Return 0-based position.
163-
pub fn pos(&self) -> u32 {
164-
self.inner().pos as u32
164+
pub fn pos(&self) -> i64 {
165+
self.inner().pos
165166
}
166167

167168
/// Set 0-based position.
@@ -622,6 +623,21 @@ impl Record {
622623
}
623624
}
624625

626+
impl genome::AbstractLocus for Record {
627+
fn contig(&self) -> &str {
628+
str::from_utf8(
629+
self.header()
630+
.rid2name(self.rid().expect("rid not set"))
631+
.expect("unable to find rid in header"),
632+
)
633+
.expect("unable to interpret contig name as UTF-8")
634+
}
635+
636+
fn pos(&self) -> u64 {
637+
self.pos() as u64
638+
}
639+
}
640+
625641
/// Phased or unphased alleles, represented as indices.
626642
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
627643
pub enum GenotypeAllele {

0 commit comments

Comments
 (0)