1
1
import { compressors } from 'hyparquet-compressors'
2
2
import { describe , expect , it } from 'vitest'
3
+ import { getColumnRange , readColumn } from '../src/column.js'
3
4
import { parquetMetadata } from '../src/hyparquet.js'
4
5
import { getSchemaPath } from '../src/schema.js'
5
- import { getColumnRange , readColumn } from '../src/column.js'
6
6
import { asyncBufferFromFile } from '../src/utils.js'
7
7
8
- describe ( 'readColumn' , ( ) => {
9
- it ( 'read columns when rowLimit is undefined' , async ( ) => {
10
- const testFile = 'test/files/float16_nonzeros_and_nans.parquet'
11
- const asyncBuffer = await asyncBufferFromFile ( testFile )
12
- const arrayBuffer = await asyncBuffer . slice ( 0 )
13
- const metadata = parquetMetadata ( arrayBuffer )
8
+ const values = [ null , 1 , - 2 , NaN , 0 , - 1 , - 0 , 2 ]
14
9
15
- const column = metadata . row_groups [ 0 ] . columns [ 0 ]
16
- if ( ! column . meta_data ) throw new Error ( `No column metadata for ${ testFile } ` )
17
- const [ columnStartByte , columnEndByte ] = getColumnRange ( column . meta_data ) . map ( Number )
18
- const columnArrayBuffer = arrayBuffer . slice ( columnStartByte , columnEndByte )
19
- const schemaPath = getSchemaPath ( metadata . schema , column . meta_data ?. path_in_schema ?? [ ] )
20
- const reader = { view : new DataView ( columnArrayBuffer ) , offset : 0 }
21
-
22
- const rowLimit = undefined
23
- const result = readColumn ( reader , rowLimit , column . meta_data , schemaPath , { file : asyncBuffer , compressors } )
24
- const expected = [ null , 1 , - 2 , NaN , 0 , - 1 , - 0 , 2 ]
25
- expect ( result ) . toEqual ( expected )
26
- } )
27
-
28
- it ( 'read columns when rowLimit is Infinity' , async ( ) => {
10
+ describe ( 'readColumn' , ( ) => {
11
+ it . for ( [
12
+ { rowLimit : undefined , expected : values } ,
13
+ { rowLimit : Infinity , expected : values } ,
14
+ { rowLimit : 2 , expected : values . slice ( 0 , 2 ) } ,
15
+ { rowLimit : 0 , expected : [ ] } ,
16
+ ] ) ( 'readColumn with rowLimit %p' , async ( { rowLimit, expected } ) => {
29
17
const testFile = 'test/files/float16_nonzeros_and_nans.parquet'
30
18
const asyncBuffer = await asyncBufferFromFile ( testFile )
31
19
const arrayBuffer = await asyncBuffer . slice ( 0 )
@@ -38,46 +26,7 @@ describe('readColumn', () => {
38
26
const schemaPath = getSchemaPath ( metadata . schema , column . meta_data ?. path_in_schema ?? [ ] )
39
27
const reader = { view : new DataView ( columnArrayBuffer ) , offset : 0 }
40
28
41
- const rowLimit = Infinity
42
29
const result = readColumn ( reader , rowLimit , column . meta_data , schemaPath , { file : asyncBuffer , compressors } )
43
- const expected = [ null , 1 , - 2 , NaN , 0 , - 1 , - 0 , 2 ]
44
30
expect ( result ) . toEqual ( expected )
45
31
} )
46
-
47
- it ( 'read columns when rowLimit is defined' , async ( ) => {
48
- const testFile = 'test/files/float16_nonzeros_and_nans.parquet'
49
- const asyncBuffer = await asyncBufferFromFile ( testFile )
50
- const arrayBuffer = await asyncBuffer . slice ( 0 )
51
- const metadata = parquetMetadata ( arrayBuffer )
52
-
53
- const column = metadata . row_groups [ 0 ] . columns [ 0 ]
54
- if ( ! column . meta_data ) throw new Error ( `No column metadata for ${ testFile } ` )
55
- const [ columnStartByte , columnEndByte ] = getColumnRange ( column . meta_data ) . map ( Number )
56
- const columnArrayBuffer = arrayBuffer . slice ( columnStartByte , columnEndByte )
57
- const schemaPath = getSchemaPath ( metadata . schema , column . meta_data ?. path_in_schema ?? [ ] )
58
- const reader = { view : new DataView ( columnArrayBuffer ) , offset : 0 }
59
-
60
- const rowLimit = 2
61
- const result = readColumn ( reader , rowLimit , column . meta_data , schemaPath , { file : asyncBuffer , compressors } )
62
- expect ( result . length ) . toBe ( rowLimit )
63
- } )
64
-
65
- it ( 'read columns when rowLimit is 0' , async ( ) => {
66
- const testFile = 'test/files/float16_nonzeros_and_nans.parquet'
67
- const asyncBuffer = await asyncBufferFromFile ( testFile )
68
- const arrayBuffer = await asyncBuffer . slice ( 0 )
69
- const metadata = parquetMetadata ( arrayBuffer )
70
-
71
- const column = metadata . row_groups [ 0 ] . columns [ 0 ]
72
- if ( ! column . meta_data ) throw new Error ( `No column metadata for ${ testFile } ` )
73
- const [ columnStartByte , columnEndByte ] = getColumnRange ( column . meta_data ) . map ( Number )
74
- const columnArrayBuffer = arrayBuffer . slice ( columnStartByte , columnEndByte )
75
- const schemaPath = getSchemaPath ( metadata . schema , column . meta_data ?. path_in_schema ?? [ ] )
76
- const reader = { view : new DataView ( columnArrayBuffer ) , offset : 0 }
77
-
78
- const rowLimit = 0
79
- const result = readColumn ( reader , rowLimit , column . meta_data , schemaPath , { file : asyncBuffer , compressors } )
80
- expect ( result . length ) . toBe ( rowLimit )
81
- } )
82
-
83
32
} )
0 commit comments