Skip to content

Commit

Permalink
bb (bigwig / bigbed) improvements
Browse files Browse the repository at this point in the history
  • Loading branch information
jrobinso committed Nov 18, 2023
1 parent c933450 commit f4f7f64
Show file tree
Hide file tree
Showing 6 changed files with 269 additions and 236 deletions.
139 changes: 139 additions & 0 deletions js/bigwig/bpChromTree.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
import {igvxhr} from "../../node_modules/igv-utils/src/index.js"
import {buildOptions} from "../util/igvUtils.js"
import BinaryParser from "../binary.js"

/**
* A UCSC BigBed B+ tree, used to support searching the "extra indexes".
*
* Nodes are loaded on demand during search, avoiding the need to read the entire tree into
* memory. Tree nodes can be scattered across the file, making loading the entire tree unfeasible in reasonable time.
*/
export default class BPChromTree {

static magic = 2026540177
littleEndian = true
nodeCache = new Map()

static async loadBpTree(path, startOffset) {
const bpTree = new BPChromTree(path, startOffset)
return bpTree.init()
}

constructor(path, startOffset) {
this.path = path
this.startOffset = startOffset
this.littleEndian = littleEndian
}

async init() {
const binaryParser = await this.#getParserFor(this.startOffset, 32)
let magic = binaryParser.getInt()
if(magic !== BPChromTree.magic) {
binaryParser.setPosition(0)
this.littleEndian = !this.littleEndian
binaryParser.littleEndian = this.littleEndian
magic = binaryParser.getInt()
if(magic !== BPChromTree.magic) {
throw Error(`Bad magic number ${magic}`)
}
}
const blockSize = binaryParser.getInt()
const keySize = binaryParser.getInt()
const valSize = binaryParser.getInt()
const itemCount = binaryParser.getLong()
const reserved = binaryParser.getLong()
const nodeOffset = this.startOffset + 32
this.header = {magic, blockSize, keySize, valSize, itemCount, reserved, nodeOffset}
return this
}

async search(term) {

if(!this.header) {
await this.init();
}

const {keySize, valSize} = this.header

if (valSize !== 8) {
throw Error(`Unexpected valSize ${valSize}`)
}

const readTreeNode = async (offset) => {

if (this.nodeCache.has(offset)) {
return this.nodeCache.get(offset)
} else {

let binaryParser = await this.#getParserFor(offset, 4)
const type = binaryParser.getByte()
const reserved = binaryParser.getByte()
const count = binaryParser.getUShort()
const items = []

if (type === 1) {
// Leaf node -- we walk through all items, rather return on match, because nodes are cached
const size = count * (keySize + valSize)
binaryParser = await this.#getParserFor(offset + 4, size)
for (let i = 0; i < count; i++) {
const key = binaryParser.getFixedLengthString(keySize)
const chromId = binaryParser.getUInt();
const chromSize = binaryParser.getUInt();
items.push({key, chromId, chromSize})
}
} else {
// Non leaf node
const size = count * (keySize + 8)
binaryParser = await this.#getParserFor(offset + 4, size)

for (let i = 0; i < count; i++) {
const key = binaryParser.getFixedLengthString(keySize)
const offset = binaryParser.getLong()
items.push({key, offset})
}
}

const node = {type, count, items}
this.nodeCache.set(offset, node)
return node
}
}

const walkTreeNode = async (offset) => {

const node = await readTreeNode(offset)

if (node.type === 1) {
// Leaf node
for (let item of node.items) {
if (term === item.key) {
return item
}
}
} else {
// Non leaf node
// Read and discard the first key.
let childOffset = node.items[0].offset

for (let i = 1; i < node.items.length; i++) {
const key = node.items[i].key
if (term.localeCompare(key) < 0) {
break
}
childOffset = node.items[i].offset
}

return walkTreeNode(childOffset)
}
}

// Kick things off
return walkTreeNode(this.header.nodeOffset)
}

async #getParserFor(start, size) {
const data = await igvxhr.loadArrayBuffer(this.path, {range: {start, size}})
return new BinaryParser(new DataView(data), this.littleEndian)
}

}
20 changes: 18 additions & 2 deletions js/bigwig/bpTree.js
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ import BinaryParser from "../binary.js"
*/
export default class BPTree {

static magic = 2026540177
littleEndian = true
nodeCache = new Map()

static async loadBpTree(path, startOffset) {
Expand All @@ -24,7 +26,17 @@ export default class BPTree {

async init() {
const binaryParser = await this.#getParserFor(this.startOffset, 32)
const magic = binaryParser.getInt()
let magic = binaryParser.getInt()
if(magic !== BPTree.magic) {
binaryParser.setPosition(0)
this.littleEndian = !this.littleEndian
binaryParser.littleEndian = this.littleEndian
magic = binaryParser.getInt()
if(magic !== BPTree.magic) {
throw Error(`Bad magic number ${magic}`)
}
}

const blockSize = binaryParser.getInt()
const keySize = binaryParser.getInt()
const valSize = binaryParser.getInt()
Expand All @@ -37,6 +49,10 @@ export default class BPTree {

async search(term) {

if(!this.header) {
await this.init();
}

const {keySize, valSize} = this.header

if (!(valSize === 16 || valSize === 8)) {
Expand Down Expand Up @@ -126,7 +142,7 @@ export default class BPTree {

async #getParserFor(start, size) {
const data = await igvxhr.loadArrayBuffer(this.path, {range: {start, size}})
return new BinaryParser(new DataView(data))
return new BinaryParser(new DataView(data), this.littleEndian)
}

}
40 changes: 18 additions & 22 deletions js/bigwig/bwReader.js
Original file line number Diff line number Diff line change
Expand Up @@ -54,9 +54,7 @@ class BWReader {
this.config = config
this.bufferSize = BUFFER_SIZE
this.loader = isDataURL(this.path) ?
new DataBuffer(this.path) :
config.wholeFile ? new WholeFileBuffer(this.path) :
igvxhr
new DataBuffer(this.path) : igvxhr

if (config.searchTrix) {
this._trix = new Trix(`${config.searchTrix}x`, config.searchTrix)
Expand Down Expand Up @@ -140,7 +138,7 @@ class BWReader {
} else {
plain = uint8Array
}
decodeFunction.call(this, new DataView(plain.buffer), chrIdx1, bpStart, chrIdx2, bpEnd, features, this.chromTree.idToName, windowFunction)
decodeFunction.call(this, new DataView(plain.buffer), chrIdx1, bpStart, chrIdx2, bpEnd, features, this.chromTree.idToName, windowFunction, this.littleEndian)
}

features.sort(function (a, b) {
Expand Down Expand Up @@ -300,7 +298,7 @@ class BWReader {
// Assume low-to-high unless proven otherwise
this.littleEndian = true

let binaryParser = new BinaryParser(new DataView(data))
let binaryParser = new BinaryParser(new DataView(data), this.littleEndian)
let magic = binaryParser.getUInt()
if (magic === BIGWIG_MAGIC_LTH) {
this.type = "bigwig"
Expand Down Expand Up @@ -345,7 +343,7 @@ class BWReader {
data = await this.loader.loadArrayBuffer(this.path, buildOptions(this.config, {range: range}))

const nZooms = header.nZoomLevels
binaryParser = new BinaryParser(new DataView(data))
binaryParser = new BinaryParser(new DataView(data), this.littleEndian)

this.zoomLevelHeaders = []
this.firstZoomDataOffset = Number.MAX_SAFE_INTEGER
Expand All @@ -372,6 +370,7 @@ class BWReader {
}

// Chrom data index
// TODO -- replace this with BPChromTree
if (header.chromTreeOffset > 0) {
binaryParser.position = header.chromTreeOffset - startOffset
this.chromTree = await ChromTree.parseTree(binaryParser, startOffset, this.genome)
Expand All @@ -382,8 +381,8 @@ class BWReader {
}

//Finally total data count
binaryParser.position = header.fullDataOffset - startOffset
header.dataCount = binaryParser.getInt()
// binaryParser.position = header.fullDataOffset - startOffset
// header.dataCount = binaryParser.getInt()

this.header = header

Expand All @@ -408,7 +407,7 @@ class BWReader {
size: BBFILE_EXTENDED_HEADER_HEADER_SIZE
}
}))
let binaryParser = new BinaryParser(new DataView(data))
let binaryParser = new BinaryParser(new DataView(data), this.littleEndian)
const extensionSize = binaryParser.getUShort()
const extraIndexCount = binaryParser.getUShort()
const extraIndexListOffset = binaryParser.getLong()
Expand All @@ -421,7 +420,7 @@ class BWReader {
size: sz
}
}))
binaryParser = new BinaryParser(new DataView(data))
binaryParser = new BinaryParser(new DataView(data), this.littleEndian)

const type = []
const fieldCount = []
Expand Down Expand Up @@ -456,8 +455,8 @@ class BWReader {
if (rpTree) {
return rpTree
} else {
rpTree = new RPTree(offset, this.config, this.littleEndian, this.loader)
await rpTree.load()
rpTree = new RPTree(this.path, offset)
await rpTree.init()
this.rpTreeCache.set(offset, rpTree)
return rpTree
}
Expand All @@ -482,10 +481,7 @@ class BWReader {
this.visibilityWindow = -1
} else {
this.visibilityWindow = -1
// bigbed -- todo, we can't know genome length.
//let genomeSize = this.genome ? this.genome.getGenomeLength() : 3088286401
// Estimate window size to return ~ 1,000 features, assuming even distribution across the genome
//this.visibilityWindow = header.dataCount < 1000 ? -1 : 1000 * (genomeSize / header.dataCount)
// bigbed -- todo

}
}
Expand Down Expand Up @@ -579,9 +575,9 @@ function zoomLevelForScale(bpPerPixel, zoomLevelHeaders) {
}


function decodeWigData(data, chrIdx1, bpStart, chrIdx2, bpEnd, featureArray, chrDict) {
function decodeWigData(data, chrIdx1, bpStart, chrIdx2, bpEnd, featureArray, chrDict, windowFunction, littleEndian) {

const binaryParser = new BinaryParser(data)
const binaryParser = new BinaryParser(data, littleEndian)
const chromId = binaryParser.getInt()
const blockStart = binaryParser.getInt()
let chromStart = blockStart
Expand Down Expand Up @@ -631,8 +627,8 @@ function getBedDataDecoder() {

const minSize = 3 * 4 + 1 // Minimum # of bytes required for a bed record
const decoder = getDecoder(this.header.definedFieldCount, this.header.fieldCount, this.autoSql, this.format)
return function (data, chrIdx1, bpStart, chrIdx2, bpEnd, featureArray, chrDict) {
const binaryParser = new BinaryParser(data)
return function (data, chrIdx1, bpStart, chrIdx2, bpEnd, featureArray, chrDict, windowFunction, littleEndian) {
const binaryParser = new BinaryParser(data, littleEndian)
while (binaryParser.remLength() >= minSize) {

const chromId = binaryParser.getInt()
Expand All @@ -654,9 +650,9 @@ function getBedDataDecoder() {
}


function decodeZoomData(data, chrIdx1, bpStart, chrIdx2, bpEnd, featureArray, chrDict, windowFunction) {
function decodeZoomData(data, chrIdx1, bpStart, chrIdx2, bpEnd, featureArray, chrDict, windowFunction, littleEndian) {

const binaryParser = new BinaryParser(data)
const binaryParser = new BinaryParser(data, littleEndian)
const minSize = 8 * 4 // Minimum # of bytes required for a zoom record


Expand Down
Loading

0 comments on commit f4f7f64

Please sign in to comment.