diff --git a/js/bigwig/bpChromTree.js b/js/bigwig/bpChromTree.js new file mode 100644 index 000000000..fc8d19d40 --- /dev/null +++ b/js/bigwig/bpChromTree.js @@ -0,0 +1,139 @@ +import {igvxhr} from "../../node_modules/igv-utils/src/index.js" +import {buildOptions} from "../util/igvUtils.js" +import BinaryParser from "../binary.js" + +/** + * A UCSC BigBed B+ tree, used to support searching the "extra indexes". + * + * Nodes are loaded on demand during search, avoiding the need to read the entire tree into + * memory. Tree nodes can be scattered across the file, making loading the entire tree unfeasible in reasonable time. + */ +export default class BPChromTree { + + static magic = 2026540177 + littleEndian = true + nodeCache = new Map() + + static async loadBpTree(path, startOffset) { + const bpTree = new BPChromTree(path, startOffset) + return bpTree.init() + } + + constructor(path, startOffset) { + this.path = path + this.startOffset = startOffset + this.littleEndian = littleEndian + } + + async init() { + const binaryParser = await this.#getParserFor(this.startOffset, 32) + let magic = binaryParser.getInt() + if(magic !== BPChromTree.magic) { + binaryParser.setPosition(0) + this.littleEndian = !this.littleEndian + binaryParser.littleEndian = this.littleEndian + magic = binaryParser.getInt() + if(magic !== BPChromTree.magic) { + throw Error(`Bad magic number ${magic}`) + } + } + const blockSize = binaryParser.getInt() + const keySize = binaryParser.getInt() + const valSize = binaryParser.getInt() + const itemCount = binaryParser.getLong() + const reserved = binaryParser.getLong() + const nodeOffset = this.startOffset + 32 + this.header = {magic, blockSize, keySize, valSize, itemCount, reserved, nodeOffset} + return this + } + + async search(term) { + + if(!this.header) { + await this.init(); + } + + const {keySize, valSize} = this.header + + if (valSize !== 8) { + throw Error(`Unexpected valSize ${valSize}`) + } + + const readTreeNode = async (offset) => { + + if (this.nodeCache.has(offset)) { + return this.nodeCache.get(offset) + } else { + + let binaryParser = await this.#getParserFor(offset, 4) + const type = binaryParser.getByte() + const reserved = binaryParser.getByte() + const count = binaryParser.getUShort() + const items = [] + + if (type === 1) { + // Leaf node -- we walk through all items, rather return on match, because nodes are cached + const size = count * (keySize + valSize) + binaryParser = await this.#getParserFor(offset + 4, size) + for (let i = 0; i < count; i++) { + const key = binaryParser.getFixedLengthString(keySize) + const chromId = binaryParser.getUInt(); + const chromSize = binaryParser.getUInt(); + items.push({key, chromId, chromSize}) + } + } else { + // Non leaf node + const size = count * (keySize + 8) + binaryParser = await this.#getParserFor(offset + 4, size) + + for (let i = 0; i < count; i++) { + const key = binaryParser.getFixedLengthString(keySize) + const offset = binaryParser.getLong() + items.push({key, offset}) + } + } + + const node = {type, count, items} + this.nodeCache.set(offset, node) + return node + } + } + + const walkTreeNode = async (offset) => { + + const node = await readTreeNode(offset) + + if (node.type === 1) { + // Leaf node + for (let item of node.items) { + if (term === item.key) { + return item + } + } + } else { + // Non leaf node + // Read and discard the first key. + let childOffset = node.items[0].offset + + for (let i = 1; i < node.items.length; i++) { + const key = node.items[i].key + if (term.localeCompare(key) < 0) { + break + } + childOffset = node.items[i].offset + } + + return walkTreeNode(childOffset) + } + } + + // Kick things off + return walkTreeNode(this.header.nodeOffset) + } + + async #getParserFor(start, size) { + const data = await igvxhr.loadArrayBuffer(this.path, {range: {start, size}}) + return new BinaryParser(new DataView(data), this.littleEndian) + } + +} diff --git a/js/bigwig/bpTree.js b/js/bigwig/bpTree.js index 0d996568d..d39e47eb0 100644 --- a/js/bigwig/bpTree.js +++ b/js/bigwig/bpTree.js @@ -10,6 +10,8 @@ import BinaryParser from "../binary.js" */ export default class BPTree { + static magic = 2026540177 + littleEndian = true nodeCache = new Map() static async loadBpTree(path, startOffset) { @@ -24,7 +26,17 @@ export default class BPTree { async init() { const binaryParser = await this.#getParserFor(this.startOffset, 32) - const magic = binaryParser.getInt() + let magic = binaryParser.getInt() + if(magic !== BPTree.magic) { + binaryParser.setPosition(0) + this.littleEndian = !this.littleEndian + binaryParser.littleEndian = this.littleEndian + magic = binaryParser.getInt() + if(magic !== BPTree.magic) { + throw Error(`Bad magic number ${magic}`) + } + } + const blockSize = binaryParser.getInt() const keySize = binaryParser.getInt() const valSize = binaryParser.getInt() @@ -37,6 +49,10 @@ export default class BPTree { async search(term) { + if(!this.header) { + await this.init(); + } + const {keySize, valSize} = this.header if (!(valSize === 16 || valSize === 8)) { @@ -126,7 +142,7 @@ export default class BPTree { async #getParserFor(start, size) { const data = await igvxhr.loadArrayBuffer(this.path, {range: {start, size}}) - return new BinaryParser(new DataView(data)) + return new BinaryParser(new DataView(data), this.littleEndian) } } diff --git a/js/bigwig/bwReader.js b/js/bigwig/bwReader.js index a17981104..1f2815c7c 100644 --- a/js/bigwig/bwReader.js +++ b/js/bigwig/bwReader.js @@ -54,9 +54,7 @@ class BWReader { this.config = config this.bufferSize = BUFFER_SIZE this.loader = isDataURL(this.path) ? - new DataBuffer(this.path) : - config.wholeFile ? new WholeFileBuffer(this.path) : - igvxhr + new DataBuffer(this.path) : igvxhr if (config.searchTrix) { this._trix = new Trix(`${config.searchTrix}x`, config.searchTrix) @@ -140,7 +138,7 @@ class BWReader { } else { plain = uint8Array } - decodeFunction.call(this, new DataView(plain.buffer), chrIdx1, bpStart, chrIdx2, bpEnd, features, this.chromTree.idToName, windowFunction) + decodeFunction.call(this, new DataView(plain.buffer), chrIdx1, bpStart, chrIdx2, bpEnd, features, this.chromTree.idToName, windowFunction, this.littleEndian) } features.sort(function (a, b) { @@ -300,7 +298,7 @@ class BWReader { // Assume low-to-high unless proven otherwise this.littleEndian = true - let binaryParser = new BinaryParser(new DataView(data)) + let binaryParser = new BinaryParser(new DataView(data), this.littleEndian) let magic = binaryParser.getUInt() if (magic === BIGWIG_MAGIC_LTH) { this.type = "bigwig" @@ -345,7 +343,7 @@ class BWReader { data = await this.loader.loadArrayBuffer(this.path, buildOptions(this.config, {range: range})) const nZooms = header.nZoomLevels - binaryParser = new BinaryParser(new DataView(data)) + binaryParser = new BinaryParser(new DataView(data), this.littleEndian) this.zoomLevelHeaders = [] this.firstZoomDataOffset = Number.MAX_SAFE_INTEGER @@ -372,6 +370,7 @@ class BWReader { } // Chrom data index + // TODO -- replace this with BPChromTree if (header.chromTreeOffset > 0) { binaryParser.position = header.chromTreeOffset - startOffset this.chromTree = await ChromTree.parseTree(binaryParser, startOffset, this.genome) @@ -382,8 +381,8 @@ class BWReader { } //Finally total data count - binaryParser.position = header.fullDataOffset - startOffset - header.dataCount = binaryParser.getInt() + // binaryParser.position = header.fullDataOffset - startOffset + // header.dataCount = binaryParser.getInt() this.header = header @@ -408,7 +407,7 @@ class BWReader { size: BBFILE_EXTENDED_HEADER_HEADER_SIZE } })) - let binaryParser = new BinaryParser(new DataView(data)) + let binaryParser = new BinaryParser(new DataView(data), this.littleEndian) const extensionSize = binaryParser.getUShort() const extraIndexCount = binaryParser.getUShort() const extraIndexListOffset = binaryParser.getLong() @@ -421,7 +420,7 @@ class BWReader { size: sz } })) - binaryParser = new BinaryParser(new DataView(data)) + binaryParser = new BinaryParser(new DataView(data), this.littleEndian) const type = [] const fieldCount = [] @@ -456,8 +455,8 @@ class BWReader { if (rpTree) { return rpTree } else { - rpTree = new RPTree(offset, this.config, this.littleEndian, this.loader) - await rpTree.load() + rpTree = new RPTree(this.path, offset) + await rpTree.init() this.rpTreeCache.set(offset, rpTree) return rpTree } @@ -482,10 +481,7 @@ class BWReader { this.visibilityWindow = -1 } else { this.visibilityWindow = -1 - // bigbed -- todo, we can't know genome length. - //let genomeSize = this.genome ? this.genome.getGenomeLength() : 3088286401 - // Estimate window size to return ~ 1,000 features, assuming even distribution across the genome - //this.visibilityWindow = header.dataCount < 1000 ? -1 : 1000 * (genomeSize / header.dataCount) + // bigbed -- todo } } @@ -579,9 +575,9 @@ function zoomLevelForScale(bpPerPixel, zoomLevelHeaders) { } -function decodeWigData(data, chrIdx1, bpStart, chrIdx2, bpEnd, featureArray, chrDict) { +function decodeWigData(data, chrIdx1, bpStart, chrIdx2, bpEnd, featureArray, chrDict, windowFunction, littleEndian) { - const binaryParser = new BinaryParser(data) + const binaryParser = new BinaryParser(data, littleEndian) const chromId = binaryParser.getInt() const blockStart = binaryParser.getInt() let chromStart = blockStart @@ -631,8 +627,8 @@ function getBedDataDecoder() { const minSize = 3 * 4 + 1 // Minimum # of bytes required for a bed record const decoder = getDecoder(this.header.definedFieldCount, this.header.fieldCount, this.autoSql, this.format) - return function (data, chrIdx1, bpStart, chrIdx2, bpEnd, featureArray, chrDict) { - const binaryParser = new BinaryParser(data) + return function (data, chrIdx1, bpStart, chrIdx2, bpEnd, featureArray, chrDict, windowFunction, littleEndian) { + const binaryParser = new BinaryParser(data, littleEndian) while (binaryParser.remLength() >= minSize) { const chromId = binaryParser.getInt() @@ -654,9 +650,9 @@ function getBedDataDecoder() { } -function decodeZoomData(data, chrIdx1, bpStart, chrIdx2, bpEnd, featureArray, chrDict, windowFunction) { +function decodeZoomData(data, chrIdx1, bpStart, chrIdx2, bpEnd, featureArray, chrDict, windowFunction, littleEndian) { - const binaryParser = new BinaryParser(data) + const binaryParser = new BinaryParser(data, littleEndian) const minSize = 8 * 4 // Minimum # of bytes required for a zoom record diff --git a/js/bigwig/rpTree.js b/js/bigwig/rpTree.js index da12e6d37..4fb352ebd 100644 --- a/js/bigwig/rpTree.js +++ b/js/bigwig/rpTree.js @@ -1,47 +1,106 @@ -import {isDataURL} from "../util/igvUtils.js" -import BufferedReader from "./bufferedReader.js" +import {igvxhr} from "../../node_modules/igv-utils/src/index.js" import BinaryParser from "../binary.js" -let RPTREE_HEADER_SIZE = 48 -let RPTREE_NODE_LEAF_ITEM_SIZE = 32 // leaf item size -let RPTREE_NODE_CHILD_ITEM_SIZE = 24 // child item size -let BUFFER_SIZE = 512000 // buffer +const RPTREE_HEADER_SIZE = 48 +const RPTREE_NODE_LEAF_ITEM_SIZE = 32 // leaf item size +const RPTREE_NODE_CHILD_ITEM_SIZE = 24 // child item size export default class RPTree { - constructor(fileOffset, config, littleEndian, loader) { + static magic = 610839776 + littleEndian = true + nodeCache = new Map() - this.config = config - this.loader = loader - this.fileOffset = fileOffset // File offset to beginning of tree - this.path = config.url - this.littleEndian = littleEndian + constructor(path, startOffset) { + + this.path = path + this.startOffset = startOffset } - async load() { - const rootNodeOffset = this.fileOffset + RPTREE_HEADER_SIZE - const bufferedReader = isDataURL(this.path) || this.config.wholeFile ? - this.loader : - new BufferedReader(this.config, BUFFER_SIZE) - this.rootNode = await this.readNode(rootNodeOffset, bufferedReader) + + async init() { + const binaryParser = await this.#getParserFor(this.startOffset, RPTREE_HEADER_SIZE) + let magic = binaryParser.getInt() + console.log(magic) + if(magic !== RPTree.magic) { + binaryParser.setPosition(0) + this.littleEndian = !this.littleEndian + binaryParser.littleEndian = this.littleEndian + magic = binaryParser.getInt() + if(magic !== RPTree.magic) { + throw Error(`Bad magic number ${magic}`) + } + } + + const blockSize = binaryParser.getUInt() + const itemCount = binaryParser.getLong() + const startChromIx = binaryParser.getUInt() + const startBase = binaryParser.getUInt() + const endChromIx = binaryParser.getUInt() + const endBase = binaryParser.getUInt() + const endFileOffset = binaryParser.getLong() + const itemsPerSlot = binaryParser.getUInt() + const reserved = binaryParser.getUInt() + const rootNodeOffset = this.startOffset + RPTREE_HEADER_SIZE + this.header = { + magic, + blockSize, + itemCount, + startChromIx, + startBase, + endChromIx, + endBase, + endFileOffset, + itemsPerSlot, + reserved, + rootNodeOffset + } return this } - async readNode(filePosition, bufferedReader) { + async #getParserFor(start, size) { + const data = await igvxhr.loadArrayBuffer(this.path, {range: {start, size}}) + return new BinaryParser(new DataView(data), this.littleEndian) + } + + + async findLeafItemsOverlapping(chrIdx1, startBase, chrIdx2, endBase) { + + const leafItems = [] + const walkTreeNode = async (offset) => { + const node = await this.readNode(offset) + for (let item of node.items) { + if (overlaps(item, chrIdx1, startBase, chrIdx2, endBase)) { + if (node.type === 1) { // Leaf node + leafItems.push(item) + } else { // Non leaf node + await walkTreeNode(item.childOffset) + } + } + } + } - let dataView = await bufferedReader.dataViewForRange({start: filePosition, size: 4}, false) - let binaryParser = new BinaryParser(dataView, this.littleEndian) + await walkTreeNode(this.header.rootNodeOffset) + return leafItems + } + + + async readNode(offset) { + + const nodeKey = offset + if (this.nodeCache.has(nodeKey)) { + return this.nodeCache + } + + let binaryParser = await this.#getParserFor(offset, 4) const type = binaryParser.getByte() const isLeaf = (type === 1) const reserved = binaryParser.getByte() const count = binaryParser.getUShort() - filePosition += 4 - let bytesRequired = count * (isLeaf ? RPTREE_NODE_LEAF_ITEM_SIZE : RPTREE_NODE_CHILD_ITEM_SIZE) - let range2 = {start: filePosition, size: bytesRequired} - dataView = await bufferedReader.dataViewForRange(range2, false) - const items = new Array(count) - binaryParser = new BinaryParser(dataView) + binaryParser = await this.#getParserFor(offset + 4, bytesRequired) + + const items = [] if (isLeaf) { for (let i = 0; i < count; i++) { @@ -54,10 +113,8 @@ export default class RPTree { dataOffset: binaryParser.getLong(), dataSize: binaryParser.getLong() } - items[i] = item - + items.push(item) } - return new RPTreeNode(items) } else { // non-leaf for (let i = 0; i < count; i++) { @@ -71,95 +128,12 @@ export default class RPTree { } items[i] = item } - - return new RPTreeNode(items) } - - } - - async findLeafItemsOverlapping(chrIdx1, startBase, chrIdx2, endBase) { - - let self = this - - return new Promise( (fulfill, reject) => { - - let leafItems = [], - processing = new Set(), - bufferedReader = isDataURL(this.path) || this.config.wholeFile ? - this.loader : - new BufferedReader(this.config, BUFFER_SIZE) - - processing.add(0) // Zero represents the root node - findLeafItems(this.rootNode, 0) - - function findLeafItems(node, nodeId) { - - if (overlaps(node, chrIdx1, startBase, chrIdx2, endBase)) { - - let items = node.items - - items.forEach(function (item) { - - if (overlaps(item, chrIdx1, startBase, chrIdx2, endBase)) { - - if (item.isLeaf) { - leafItems.push(item) - } else { - if (item.childNode) { - findLeafItems(item.childNode) - } else { - processing.add(item.childOffset) // Represent node to-be-loaded by its file position - - self.readNode(item.childOffset, bufferedReader) - .then(function (node) { - item.childNode = node - findLeafItems(node, item.childOffset) - }) - .catch(reject) - } - } - } - }) - - } - - if (nodeId !== undefined) processing.delete(nodeId) - - // Wait until all nodes are processed - if (processing.size === 0) { - fulfill(leafItems) - } - } - }) + const node = {type, items} + this.nodeCache.set(nodeKey, node) + return node } -} - -class RPTreeNode { - constructor(items) { - - this.items = items - - let minChromId = Number.MAX_SAFE_INTEGER, - maxChromId = 0, - minStartBase = Number.MAX_SAFE_INTEGER, - maxEndBase = 0, - i, - item - - for (i = 0; i < items.length; i++) { - item = items[i] - minChromId = Math.min(minChromId, item.startChrom) - maxChromId = Math.max(maxChromId, item.endChrom) - minStartBase = Math.min(minStartBase, item.startBase) - maxEndBase = Math.max(maxEndBase, item.endBase) - } - - this.startChrom = minChromId - this.endChrom = maxChromId - this.startBase = minStartBase - this.endBase = maxEndBase - } } /** diff --git a/js/binary.js b/js/binary.js index 1f302b286..9184b87a5 100644 --- a/js/binary.js +++ b/js/binary.js @@ -34,6 +34,10 @@ class BinaryParser { this.length = dataView.byteLength } + setPosition(position) { + this.position = position + } + available() { return this.length - this.position } diff --git a/js/genome/cytoband.js b/js/genome/cytoband.js index 9eb008022..2d139107b 100644 --- a/js/genome/cytoband.js +++ b/js/genome/cytoband.js @@ -22,100 +22,4 @@ class Cytoband { } } -async function loadCytobands(cytobandURL, config) { - - let data - if (isDataURL(cytobandURL)) { - const plain = BGZip.decodeDataURI(cytobandURL) - data = "" - const len = plain.length - for (let i = 0; i < len; i++) { - data += String.fromCharCode(plain[i]) - } - } else { - data = await igvxhr.loadString(cytobandURL, buildOptions(config)) - } - - const cytobands = {} - let lastChr - let bands = [] - const lines = StringUtils.splitLines(data) - for (let line of lines) { - - const tokens = line.split("\t") - const chrName = tokens[0] //genome.getChromosomeName(tokens[0]) // Note allowance for alias name, not sure why this is needed here - if (!lastChr) lastChr = chrName - - if (chrName !== lastChr) { - cytobands[lastChr] = bands - bands = [] - lastChr = chrName - } - - if (tokens.length === 5) { - //10 0 3000000 p15.3 gneg - var start = parseInt(tokens[1]) - var end = parseInt(tokens[2]) - var name = tokens[3] - var stain = tokens[4] - bands.push(new Cytoband(start, end, name, stain)) - } - } - - return cytobands -} - -/** - * Load a UCSC bigbed cytoband file. Features are in bed+4 format. - * { - * "chr": "chr1", - * "start": 0, - * "end": 1735965, - * "name": "p36.33", - * "gieStain": "gneg" - * } - * @param url - * @param config - * @returns {Promise<*[]>} - */ -async function loadCytobandsBB(url, config) { - - const bbReader = new BWReader({url: url, format: "bigbed", wholeFile: true}) - const features = await bbReader.readWGFeatures() - if (features.length === 0) return - - // Sort features - features.sort((a, b) => { - if (a.chr === b.chr) { - return a.start - b.start - } else { - return a.chr.localeCompare(b.chr) - } - }) - - const cytobands = {} - const chromosomes = new Map() // chromosome metadata object - let order = 0 - let lastChr - let lastEnd - let bands = [] - for (let f of features) { - - const chrName = f.chr - if (!lastChr) lastChr = chrName - - if (chrName !== lastChr) { - cytobands[lastChr] = bands - chromosomes.set(lastChr, new Chromosome(lastChr, order++, lastEnd)) - bands = [] - lastChr = chrName - } - - bands.push(new Cytoband(f.start, f.end, f.name, f.gieStain)) - lastEnd = f.end - } - - return {chromosomes, cytobands} -} - export {Cytoband}