From 79ccbe3ff0547a8fcdc8071ebb1a2afb06c8ef95 Mon Sep 17 00:00:00 2001 From: Brian Park Date: Thu, 19 Dec 2024 20:50:41 -0500 Subject: [PATCH] add test cases for readColumn --- test/column.test.js | 60 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 59 insertions(+), 1 deletion(-) diff --git a/test/column.test.js b/test/column.test.js index 3c0b2dc..7e5e20b 100644 --- a/test/column.test.js +++ b/test/column.test.js @@ -18,8 +18,66 @@ describe('readColumn', () => { const columnArrayBuffer = arrayBuffer.slice(columnStartByte, columnEndByte) const schemaPath = getSchemaPath(metadata.schema, column.meta_data?.path_in_schema ?? []) const reader = { view: new DataView(columnArrayBuffer), offset: 0 } - const result = readColumn(reader, undefined, column.meta_data, schemaPath, { file: asyncBuffer, compressors }) + + const rowLimit = undefined + const result = readColumn(reader, rowLimit, column.meta_data, schemaPath, { file: asyncBuffer, compressors }) const expected = [null, 1, -2, NaN, 0, -1, -0, 2] expect(result).toEqual(expected) }) + + it('read columns when rowLimit is Infinity', async () => { + const testFile = 'test/files/float16_nonzeros_and_nans.parquet' + const asyncBuffer = await asyncBufferFromFile(testFile) + const arrayBuffer = await asyncBuffer.slice(0) + const metadata = parquetMetadata(arrayBuffer) + + const column = metadata.row_groups[0].columns[0] + if (!column.meta_data) throw new Error(`No column metadata for ${testFile}`) + const [columnStartByte, columnEndByte] = getColumnRange(column.meta_data).map(Number) + const columnArrayBuffer = arrayBuffer.slice(columnStartByte, columnEndByte) + const schemaPath = getSchemaPath(metadata.schema, column.meta_data?.path_in_schema ?? []) + const reader = { view: new DataView(columnArrayBuffer), offset: 0 } + + const rowLimit = Infinity + const result = readColumn(reader, rowLimit, column.meta_data, schemaPath, { file: asyncBuffer, compressors }) + const expected = [null, 1, -2, NaN, 0, -1, -0, 2] + expect(result).toEqual(expected) + }) + + it('read columns when rowLimit is defined', async () => { + const testFile = 'test/files/float16_nonzeros_and_nans.parquet' + const asyncBuffer = await asyncBufferFromFile(testFile) + const arrayBuffer = await asyncBuffer.slice(0) + const metadata = parquetMetadata(arrayBuffer) + + const column = metadata.row_groups[0].columns[0] + if (!column.meta_data) throw new Error(`No column metadata for ${testFile}`) + const [columnStartByte, columnEndByte] = getColumnRange(column.meta_data).map(Number) + const columnArrayBuffer = arrayBuffer.slice(columnStartByte, columnEndByte) + const schemaPath = getSchemaPath(metadata.schema, column.meta_data?.path_in_schema ?? []) + const reader = { view: new DataView(columnArrayBuffer), offset: 0 } + + const rowLimit = 2 + const result = readColumn(reader, rowLimit, column.meta_data, schemaPath, { file: asyncBuffer, compressors }) + expect(result.length).toBe(rowLimit) + }) + + it('read columns when rowLimit is 0', async () => { + const testFile = 'test/files/float16_nonzeros_and_nans.parquet' + const asyncBuffer = await asyncBufferFromFile(testFile) + const arrayBuffer = await asyncBuffer.slice(0) + const metadata = parquetMetadata(arrayBuffer) + + const column = metadata.row_groups[0].columns[0] + if (!column.meta_data) throw new Error(`No column metadata for ${testFile}`) + const [columnStartByte, columnEndByte] = getColumnRange(column.meta_data).map(Number) + const columnArrayBuffer = arrayBuffer.slice(columnStartByte, columnEndByte) + const schemaPath = getSchemaPath(metadata.schema, column.meta_data?.path_in_schema ?? []) + const reader = { view: new DataView(columnArrayBuffer), offset: 0 } + + const rowLimit = 0 + const result = readColumn(reader, rowLimit, column.meta_data, schemaPath, { file: asyncBuffer, compressors }) + expect(result.length).toBe(rowLimit) + }) + })