From 5d21b09b7a0d265b11e8c4565a0b3ecb61ca5c95 Mon Sep 17 00:00:00 2001 From: Kenny Daniel Date: Wed, 16 Oct 2024 01:09:18 -0700 Subject: [PATCH] Export cachedAsyncBuffer --- demo/asyncBuffer.ts | 50 -------------------------- demo/workers/parquetWorkerClient.ts | 2 +- src/asyncBuffer.js | 54 +++++++++++++++++++++++++++++ src/hyparquet.d.ts | 5 +++ src/hyparquet.js | 2 ++ 5 files changed, 62 insertions(+), 51 deletions(-) delete mode 100644 demo/asyncBuffer.ts create mode 100644 src/asyncBuffer.js diff --git a/demo/asyncBuffer.ts b/demo/asyncBuffer.ts deleted file mode 100644 index 12ddde3..0000000 --- a/demo/asyncBuffer.ts +++ /dev/null @@ -1,50 +0,0 @@ -import type { AsyncBuffer, Awaitable } from "../src/types.js" - -/** - * Returns a caches layer on top of an AsyncBuffer. - * This is useful for caching slices of a file that are read multiple times, - * possibly over a network. - * - * TODO: require data to be loaded with preload(), reads outside of preload rejected. - * - * @param {AsyncBuffer} file file-like object to cache - * @returns {AsyncBuffer} cached file-like object - */ -export function cachedAsyncBuffer(file: AsyncBuffer): AsyncBuffer { - // indexed by 'start,end' - const cache = new Map>() - return { - byteLength: file.byteLength, - slice(start: number, end?: number): Awaitable { - // ensure both "100-200" and "100-" are both cached the same - const key = cacheKey(start, end, file.byteLength) - const cached = cache.get(key) - if (cached) return cached - // cache miss, read from file - const promise = file.slice(start, end) - cache.set(key, promise) - return promise - }, - } -} - - -/** - * Returns canonical cache key for a byte range. - * Cache key is a string of the form 'start,end'. - * Attempts to normalize int-range and suffix-range requests to the same key. - */ -function cacheKey(start: number, end: number | undefined, fileSize: number | undefined): string { - if (start < 0) { - if (end !== undefined) throw new Error(`invalid suffix range [${start}, ${end}]`) - if (fileSize === undefined) return `${start},` - return `${fileSize + start},${fileSize}` - } else if (end !== undefined) { - if (start > end) throw new Error(`invalid empty range [${start}, ${end}]`) - return `${start},${end}` - } else if (fileSize === undefined) { - return `${start},` - } else { - return `${start},${fileSize}` - } -} diff --git a/demo/workers/parquetWorkerClient.ts b/demo/workers/parquetWorkerClient.ts index dfdd464..bb49daf 100644 --- a/demo/workers/parquetWorkerClient.ts +++ b/demo/workers/parquetWorkerClient.ts @@ -1,6 +1,6 @@ +import { cachedAsyncBuffer } from '../../src/asyncBuffer.js' import type { AsyncBuffer, FileMetaData } from '../../src/hyparquet.js' import { asyncBufferFromUrl } from '../../src/utils.js' -import { cachedAsyncBuffer } from '../asyncBuffer.js' // Serializable constructors for AsyncBuffers interface AsyncBufferFromFile { diff --git a/src/asyncBuffer.js b/src/asyncBuffer.js new file mode 100644 index 0000000..afd739d --- /dev/null +++ b/src/asyncBuffer.js @@ -0,0 +1,54 @@ + +/** + * Returns a cached layer on top of an AsyncBuffer. For caching slices of a file + * that are read multiple times, possibly over a network. + * + * @typedef {import('./types.js').AsyncBuffer} AsyncBuffer + * @param {AsyncBuffer} file file-like object to cache + * @returns {AsyncBuffer} cached file-like object + */ +export function cachedAsyncBuffer({ byteLength, slice }) { + const cache = new Map() + return { + byteLength, + /** + * @param {number} start + * @param {number} [end] + * @returns {import('./types.js').Awaitable} + */ + slice(start, end) { + const key = cacheKey(start, end, byteLength) + const cached = cache.get(key) + if (cached) return cached + // cache miss, read from file + const promise = slice(start, end) + cache.set(key, promise) + return promise + }, + } +} + + +/** + * Returns canonical cache key for a byte range 'start,end'. + * Normalize int-range and suffix-range requests to the same key. + * + * @param {number} start start byte of range + * @param {number} [end] end byte of range, or undefined for suffix range + * @param {number} [size] size of file, or undefined for suffix range + * @returns {string} + */ +function cacheKey(start, end, size) { + if (start < 0) { + if (end !== undefined) throw new Error(`invalid suffix range [${start}, ${end}]`) + if (size === undefined) return `${start},` + return `${size + start},${size}` + } else if (end !== undefined) { + if (start > end) throw new Error(`invalid empty range [${start}, ${end}]`) + return `${start},${end}` + } else if (size === undefined) { + return `${start},` + } else { + return `${start},${size}` + } +} diff --git a/src/hyparquet.d.ts b/src/hyparquet.d.ts index 233a868..c6ec81a 100644 --- a/src/hyparquet.d.ts +++ b/src/hyparquet.d.ts @@ -130,6 +130,11 @@ export function asyncBufferFromFile(filename: string): Promise */ export function byteLengthFromUrl(url: string): Promise +/** + * Returns a cached layer on top of an AsyncBuffer. + */ +export function cachedAsyncBuffer(asyncBuffer: AsyncBuffer): AsyncBuffer + /** * Parquet query options for reading data */ diff --git a/src/hyparquet.js b/src/hyparquet.js index 6f93c1d..407a866 100644 --- a/src/hyparquet.js +++ b/src/hyparquet.js @@ -9,6 +9,8 @@ export { snappyUncompress } from './snappy.js' export { asyncBufferFromFile, asyncBufferFromUrl, byteLengthFromUrl, toJson } from './utils.js' +export { cachedAsyncBuffer } from './asyncBuffer.js' + /** * @param {import('./hyparquet.js').ParquetReadOptions} options * @returns {Promise>>}