Skip to content

Commit

Permalink
verify edge case works when rowLimit is undefined
Browse files Browse the repository at this point in the history
  • Loading branch information
park-brian committed Dec 20, 2024
1 parent 481c5d7 commit 3eb6568
Showing 1 changed file with 15 additions and 22 deletions.
37 changes: 15 additions & 22 deletions test/column.test.js
Original file line number Diff line number Diff line change
@@ -1,32 +1,25 @@
import fs from 'fs'
import { compressors } from 'hyparquet-compressors'
import { describe, expect, it } from 'vitest'
import { parquetMetadata } from '../src/hyparquet.js'
import { getSchemaPath } from '../src/schema.js'
import { getColumnRange, readColumn } from '../src/column.js'
import { asyncBufferFromFile, toJson } from '../src/utils.js'
import { fileToJson } from './helpers.js'
import { asyncBufferFromFile } from '../src/utils.js'

describe('readColumn', () => {
const parquetFiles = fs.readdirSync('test/files').filter(f => f.endsWith('.parquet'))
parquetFiles.forEach((file) => {
it(`read columns from ${file} when rowLimit is undefined`, async () => {
const asyncBuffer = await asyncBufferFromFile(`test/files/${file}`)
const arrayBuffer = await asyncBuffer.slice(0)
const metadata = parquetMetadata(arrayBuffer)
it('read columns when rowLimit is undefined', async () => {
const testFile = 'test/files/float16_nonzeros_and_nans.parquet'
const asyncBuffer = await asyncBufferFromFile(testFile)
const arrayBuffer = await asyncBuffer.slice(0)
const metadata = parquetMetadata(arrayBuffer)

const result = metadata.row_groups.map((rowGroup) => rowGroup.columns.map((column) => {
if (!column.meta_data) return []
const [columnStartByte, columnEndByte] = getColumnRange(column.meta_data).map(Number)
const columnArrayBuffer = arrayBuffer.slice(columnStartByte, columnEndByte)
const schemaPath = getSchemaPath(metadata.schema, column.meta_data?.path_in_schema ?? [])
const reader = { view: new DataView(columnArrayBuffer), offset: 0 }
return readColumn(reader, undefined, column.meta_data, schemaPath, { file: asyncBuffer, compressors })
}))

const base = file.replace('.parquet', '')
const expected = fileToJson(`test/files/${base}.columns.json`)
expect(JSON.stringify(toJson(result))).toEqual(JSON.stringify(toJson(expected))) // ensure that we're not comparing NaN, -0, etc.
})
const column = metadata.row_groups[0].columns[0]
if (!column.meta_data) throw new Error(`No column metadata for ${testFile}`)
const [columnStartByte, columnEndByte] = getColumnRange(column.meta_data).map(Number)
const columnArrayBuffer = arrayBuffer.slice(columnStartByte, columnEndByte)
const schemaPath = getSchemaPath(metadata.schema, column.meta_data?.path_in_schema ?? [])
const reader = { view: new DataView(columnArrayBuffer), offset: 0 }
const result = readColumn(reader, undefined, column.meta_data, schemaPath, { file: asyncBuffer, compressors })
const expected = [null, 1, -2, NaN, 0, -1, -0, 2]
expect(result).toEqual(expected)
})
})

0 comments on commit 3eb6568

Please sign in to comment.