21
21
//version compressionType='LZ4'
22
22
//version compressionType='ZSTD'
23
23
24
+ import STD.File AS FileServices;
24
25
import ^ as root;
25
26
compressionType := #IFDEFINED(root.compressionType, 'UNCOMPRESSED' );
26
27
27
28
IMPORT Std;
28
29
IMPORT Parquet;
29
30
30
- dropzoneDirectory := Std.File .GetDefaultDropZone() + '/regress/parquet' ;
31
+ dropzoneDirectory := Std.File .GetDefaultDropZone() + '/regress/parquet/' + WORKUNIT + '- ' ;
31
32
32
33
// Covers data types supported by ECL and Arrow
33
34
@@ -40,9 +41,7 @@ booleanDatasetOut := DATASET([
40
41
{001 , 'aab' , FALSE }
41
42
], booleanRecord);
42
43
43
- ParquetIO.Write(booleanDatasetOut, dropzoneDirectory + '/BooleanTest.parquet' , TRUE , compressionType);
44
-
45
- booleanDatasetIn := ParquetIO.Read(booleanRecord, dropzoneDirectory + '/BooleanTest.parquet' );
44
+ booleanDatasetIn := ParquetIO.Read(booleanRecord, dropzoneDirectory + 'BooleanTest.parquet' );
46
45
47
46
{UNSIGNED testid, STRING3 testname, BOOLEAN isEqual} booleanJoin (booleanDatasetOut a, booleanDatasetIn b) := TRANSFORM
48
47
SELF .testid := a.testid;
@@ -63,9 +62,7 @@ integerDatasetOut := DATASET([
63
62
{101 , 'maxvalues' , 127 , 32767 , 8388607 , 2147483647 , 549755813887 , 140737488355327 , 36028797018963967 , 9223372036854775807 }
64
63
], integerRecord);
65
64
66
- ParquetIO.Write(integerDatasetOut, dropzoneDirectory + '/IntegerTest.parquet' , TRUE , compressionType);
67
-
68
- integerDatasetIn := ParquetIO.Read(integerRecord, dropzoneDirectory + '/IntegerTest.parquet' );
65
+ integerDatasetIn := ParquetIO.Read(integerRecord, dropzoneDirectory + 'IntegerTest.parquet' );
69
66
70
67
{UNSIGNED testid, STRING testname, BOOLEAN isEqual} integerJoin (integerDatasetOut a, integerDatasetIn b) := TRANSFORM
71
68
SELF .testid := a.testid;
@@ -93,9 +90,7 @@ unsignedDatasetOut := DATASET([
93
90
{201 , 'maxvalues' , 255 , 65535 , 16777215 , 4294967295 , 1099511627775 , 281474976710655 , 72057594037927935 , 18446744073709551615 }
94
91
], unsignedRecord);
95
92
96
- ParquetIO.Write(unsignedDatasetOut, dropzoneDirectory + '/UnsignedTest.parquet' , TRUE , compressionType);
97
-
98
- unsignedDatasetIn := ParquetIO.Read(unsignedRecord, dropzoneDirectory + '/UnsignedTest.parquet' );
93
+ unsignedDatasetIn := ParquetIO.Read(unsignedRecord, dropzoneDirectory + 'UnsignedTest.parquet' );
99
94
100
95
{UNSIGNED testid, STRING testname, BOOLEAN isEqual} unsignedJoin (unsignedDatasetOut a, unsignedDatasetIn b) := TRANSFORM
101
96
SELF .testid := a.testid;
@@ -127,9 +122,7 @@ realDatasetOut := DATASET([
127
122
{305 , 'negative' , - 9 .87D, - 234 .853D}
128
123
], realRecord);
129
124
130
- ParquetIO.Write(realDatasetOut, dropzoneDirectory + '/RealTest.parquet' , TRUE , compressionType);
131
-
132
- realDatasetIn := ParquetIO.Read(realRecord, dropzoneDirectory + '/RealTest.parquet' );
125
+ realDatasetIn := ParquetIO.Read(realRecord, dropzoneDirectory + 'RealTest.parquet' );
133
126
134
127
{UNSIGNED testid, STRING testname, BOOLEAN isEqual} realJoin (realDatasetOut a, realDatasetIn b) := TRANSFORM
135
128
SELF .testid := a.testid;
@@ -152,9 +145,7 @@ decimalDatasetOut := DATASET([
152
145
{402 , 'abb' , 0 .00D}
153
146
], decimalRecord);
154
147
155
- ParquetIO.Write(decimalDatasetOut, dropzoneDirectory + '/DecimalTest.parquet' , TRUE , compressionType);
156
-
157
- decimalDatasetIn := ParquetIO.Read(decimalRecord, dropzoneDirectory + '/DecimalTest.parquet' );
148
+ decimalDatasetIn := ParquetIO.Read(decimalRecord, dropzoneDirectory + 'DecimalTest.parquet' );
158
149
159
150
{UNSIGNED testid, STRING3 testname, BOOLEAN isEqual} decimalJoin (decimalDatasetOut a, decimalDatasetIn b) := TRANSFORM
160
151
SELF .testid := a.testid;
@@ -180,9 +171,7 @@ stringDatasetOut := DATASET([
180
171
{506 , 'data1' , (STRING )X '00FF00FF00FF00FF' }
181
172
], stringRecord);
182
173
183
- ParquetIO.Write(stringDatasetOut, dropzoneDirectory + '/StringTest.parquet' , TRUE , compressionType);
184
-
185
- stringDatasetIn := ParquetIO.Read(stringRecord, dropzoneDirectory + '/StringTest.parquet' );
174
+ stringDatasetIn := ParquetIO.Read(stringRecord, dropzoneDirectory + 'StringTest.parquet' );
186
175
187
176
{STRING5 name, BOOLEAN value} stringJoin (stringDatasetOut a, stringDatasetIn b) := TRANSFORM
188
177
SELF .name := a.name;
@@ -214,9 +203,7 @@ dataDatasetOut := DATASET([
214
203
{604 , 'neg' , X '0000000000000000' , REALToBinary(- 2.71828 ), REALToLargeBinary(- 2.71828 )}
215
204
], dataRecord);
216
205
217
- ParquetIO.Write(dataDatasetOut, dropzoneDirectory + '/DataTest.parquet' , TRUE , compressionType);
218
-
219
- dataDatasetIn := ParquetIO.Read(dataRecord, dropzoneDirectory + '/DataTest.parquet' );
206
+ dataDatasetIn := ParquetIO.Read(dataRecord, dropzoneDirectory + 'DataTest.parquet' );
220
207
221
208
{UNSIGNED testid, STRING5 name, BOOLEAN allEqual} dataJoin(dataDatasetOut a, dataDatasetIn b) := TRANSFORM
222
209
SELF .testid := a.testid;
@@ -243,9 +230,7 @@ varStringDatasetOut := DATASET([
243
230
{706 , 'data1' , (STRING )X '00FF00FF00FF00FF' }
244
231
], varstringRecord);
245
232
246
- ParquetIO.Write(varStringDatasetOut, dropzoneDirectory + '/VarStringTest.parquet' , TRUE , compressionType);
247
-
248
- varStringDatasetIn := ParquetIO.Read(varstringRecord, dropzoneDirectory + '/VarStringTest.parquet' );
233
+ varStringDatasetIn := ParquetIO.Read(varstringRecord, dropzoneDirectory + 'VarStringTest.parquet' );
249
234
250
235
{UNSIGNED testid, STRING testname, BOOLEAN isEqual} varstringJoin (varStringDatasetOut a, varStringDatasetIn b) := TRANSFORM
251
236
SELF .testid := a.testid;
@@ -270,9 +255,7 @@ qStringDatasetOut := DATASET([
270
255
{805 , 'data1' , (STRING )X '00FF00FF00FF00FF' }
271
256
], qstringRecord);
272
257
273
- ParquetIO.Write(qStringDatasetOut, dropzoneDirectory + '/QStringTest.parquet' , TRUE , compressionType);
274
-
275
- qStringDatasetIn := ParquetIO.Read(qstringRecord, dropzoneDirectory + '/QStringTest.parquet' );
258
+ qStringDatasetIn := ParquetIO.Read(qstringRecord, dropzoneDirectory + 'QStringTest.parquet' );
276
259
277
260
{UNSIGNED testid, STRING testname, BOOLEAN isEqual} qstringJoin (qStringDatasetOut a, qStringDatasetIn b) := TRANSFORM
278
261
SELF .testid := a.testid;
@@ -294,9 +277,7 @@ utf8DatasetOut := DATASET([
294
277
{902 , 'extrachars1' , U8 '\316\2 21\316\2 22\316\2 23\316\2 24\316\2 25\316\2 26\316\2 27\316\2 30\316\2 31\316\2 32\316\2 33\316\2 34' }
295
278
], utf8Record);
296
279
297
- ParquetIO.Write(utf8DatasetOut, dropzoneDirectory + '/UTF8Test.parquet' , TRUE , compressionType);
298
-
299
- utf8DatasetIn := ParquetIO.Read(utf8Record, dropzoneDirectory + '/UTF8Test.parquet' );
280
+ utf8DatasetIn := ParquetIO.Read(utf8Record, dropzoneDirectory + 'UTF8Test.parquet' );
300
281
301
282
{UNSIGNED testid, STRING testname, BOOLEAN isEqual} utf8Join (utf8DatasetOut a, utf8DatasetIn b) := TRANSFORM
302
283
SELF .testid := a.testid;
@@ -319,9 +300,7 @@ unicodeDatasetOut := DATASET([
319
300
{1003 , 'adw' , U 'ᄠᄡᄢᄣᄤᄥᄦᄨᄩᄪᄫᄬᄭᄮᄯᆰᆱᆲᆳᆴᆵᆶᆷᆸᆹᆼᆽᇲᇳᇴᇵᇶᇷᇸ㈸㋄㋅㋆㋇㋈㋉㋊㋋㋌' }
320
301
], unicodeRecord);
321
302
322
- ParquetIO.Write(unicodeDatasetOut, dropzoneDirectory + '/UnicodeTest.parquet' , TRUE , compressionType);
323
-
324
- unicodeDatasetIn := ParquetIO.Read(unicodeRecord, dropzoneDirectory + '/UnicodeTest.parquet' );
303
+ unicodeDatasetIn := ParquetIO.Read(unicodeRecord, dropzoneDirectory + 'UnicodeTest.parquet' );
325
304
326
305
{UNSIGNED testid, STRING testname, BOOLEAN isEqual} unicodeJoin(unicodeDatasetOut a, unicodeDatasetIn b) := TRANSFORM
327
306
SELF .testid := a.testid;
@@ -343,22 +322,52 @@ setDatasetOut := DATASET([
343
322
{1102 , 'multiple' , [TRUE , FALSE ], [1 , 2 ], [1 , 2 ], [1.0 , 2.0 ], [1.0 , 2.0 ], ['a' , 'b' ], ['a' , 'b' ], ['a' , 'b' ], [U 'a' , U 'b' ], [U 'a' , U 'b' ], [X '0000' , X 'FFFF' ]}
344
323
], setRecord);
345
324
346
- ParquetIO.Write(setDatasetOut, dropzoneDirectory + '/SetTest.parquet' , TRUE , compressionType);
347
-
348
- OUTPUT (ParquetIO.Read(setRecord, dropzoneDirectory + '/SetTest.parquet' ), NAMED ('SetTest' ));
325
+ setResult := ParquetIO.Read(setRecord, dropzoneDirectory + 'SetTest.parquet' );
349
326
350
327
// ======================== OUTPUT ========================
351
328
352
- PARALLEL (
353
- OUTPUT (booleanResult, NAMED ('BooleanTest' ), OVERWRITE ),
354
- OUTPUT (integerResult, NAMED ('IntegerTest' ), OVERWRITE ),
355
- OUTPUT (unsignedResult, NAMED ('UnsignedTest' ), OVERWRITE ),
356
- OUTPUT (realResult, NAMED ('RealTest' ), OVERWRITE ),
357
- OUTPUT (decimalResult, NAMED ('DecimalTest' ), OVERWRITE ),
358
- OUTPUT (stringResult, NAMED ('StringTest' ), OVERWRITE ),
359
- OUTPUT (dataResult, NAMED ('DataTest' ), OVERWRITE ),
360
- OUTPUT (varStringResult, NAMED ('VarStringTest' ), OVERWRITE ),
361
- OUTPUT (qStringResult, NAMED ('QStringTest' ), OVERWRITE ),
362
- OUTPUT (utf8Result, NAMED ('UTF8Test' ), OVERWRITE ),
363
- OUTPUT (unicodeResult, NAMED ('UnicodeTest' ), OVERWRITE )
329
+ SEQUENTIAL (
330
+ // Set up test files
331
+ PARALLEL (
332
+ ParquetIO.Write(booleanDatasetOut, dropzoneDirectory + 'BooleanTest.parquet' , TRUE , compressionType),
333
+ ParquetIO.Write(integerDatasetOut, dropzoneDirectory + 'IntegerTest.parquet' , TRUE , compressionType),
334
+ ParquetIO.Write(unsignedDatasetOut, dropzoneDirectory + 'UnsignedTest.parquet' , TRUE , compressionType),
335
+ ParquetIO.Write(realDatasetOut, dropzoneDirectory + 'RealTest.parquet' , TRUE , compressionType),
336
+ ParquetIO.Write(decimalDatasetOut, dropzoneDirectory + 'DecimalTest.parquet' , TRUE , compressionType),
337
+ ParquetIO.Write(stringDatasetOut, dropzoneDirectory + 'StringTest.parquet' , TRUE , compressionType),
338
+ ParquetIO.Write(dataDatasetOut, dropzoneDirectory + 'DataTest.parquet' , TRUE , compressionType),
339
+ ParquetIO.Write(varStringDatasetOut, dropzoneDirectory + 'VarStringTest.parquet' , TRUE , compressionType),
340
+ ParquetIO.Write(qStringDatasetOut, dropzoneDirectory + 'QStringTest.parquet' , TRUE , compressionType),
341
+ ParquetIO.Write(utf8DatasetOut, dropzoneDirectory + 'UTF8Test.parquet' , TRUE , compressionType),
342
+ ParquetIO.Write(unicodeDatasetOut, dropzoneDirectory + 'UnicodeTest.parquet' , TRUE , compressionType),
343
+ ParquetIO.Write(setDatasetOut, dropzoneDirectory + 'SetTest.parquet' , TRUE , compressionType)
344
+ ),
345
+ // Read and compare results
346
+ OUTPUT (booleanResult, NAMED ('BooleanTest' )),
347
+ OUTPUT (integerResult, NAMED ('IntegerTest' )),
348
+ OUTPUT (unsignedResult, NAMED ('UnsignedTest' )),
349
+ OUTPUT (realResult, NAMED ('RealTest' )),
350
+ OUTPUT (decimalResult, NAMED ('DecimalTest' )),
351
+ OUTPUT (stringResult, NAMED ('StringTest' )),
352
+ OUTPUT (dataResult, NAMED ('DataTest' )),
353
+ OUTPUT (varStringResult, NAMED ('VarStringTest' )),
354
+ OUTPUT (qStringResult, NAMED ('QStringTest' )),
355
+ OUTPUT (utf8Result, NAMED ('UTF8Test' )),
356
+ OUTPUT (unicodeResult, NAMED ('UnicodeTest' )),
357
+ OUTPUT (setResult, NAMED ('SetTest' )),
358
+ // Clean up temporary files
359
+ PARALLEL (
360
+ FileServices.DeleteExternalFile('.' , dropzoneDirectory + 'BooleanTest.parquet' ),
361
+ FileServices.DeleteExternalFile('.' , dropzoneDirectory + 'IntegerTest.parquet' ),
362
+ FileServices.DeleteExternalFile('.' , dropzoneDirectory + 'UnsignedTest.parquet' ),
363
+ FileServices.DeleteExternalFile('.' , dropzoneDirectory + 'RealTest.parquet' ),
364
+ FileServices.DeleteExternalFile('.' , dropzoneDirectory + 'DecimalTest.parquet' ),
365
+ FileServices.DeleteExternalFile('.' , dropzoneDirectory + 'StringTest.parquet' ),
366
+ FileServices.DeleteExternalFile('.' , dropzoneDirectory + 'DataTest.parquet' ),
367
+ FileServices.DeleteExternalFile('.' , dropzoneDirectory + 'VarStringTest.parquet' ),
368
+ FileServices.DeleteExternalFile('.' , dropzoneDirectory + 'QStringTest.parquet' ),
369
+ FileServices.DeleteExternalFile('.' , dropzoneDirectory + 'UTF8Test.parquet' ),
370
+ FileServices.DeleteExternalFile('.' , dropzoneDirectory + 'UnicodeTest.parquet' ),
371
+ FileServices.DeleteExternalFile('.' , dropzoneDirectory + 'SetTest.parquet' )
372
+ )
364
373
);
0 commit comments