diff --git a/src/ArrowFunctions.cpp b/src/ArrowFunctions.cpp index e55552709e..a79f1ca95f 100644 --- a/src/ArrowFunctions.cpp +++ b/src/ArrowFunctions.cpp @@ -1418,8 +1418,6 @@ int cpp_writeStrListColumnToParquet(const char* filename, void* chpl_segs, void* int64_t i = 0; int64_t numLeft = numelems; auto segments = (int64_t*)chpl_segs; - auto offsets = (int64_t*)chpl_offsets; - auto chpl_ptr = (uint8_t*)chpl_arr; int64_t segIdx = 0; // index into segarray segments int64_t offIdx = 0; // index into the segstring segments int64_t valIdx = 0; // index into chpl_arr @@ -1432,6 +1430,8 @@ int cpp_writeStrListColumnToParquet(const char* filename, void* chpl_segs, void* while (numLeft > 0 && count < rowGroupSize) { // ensures rowGroupSize maintained int64_t segmentLength = segments[segIdx+1] - segments[segIdx]; if (segmentLength > 0) { + auto offsets = (int64_t*)chpl_offsets; + auto chpl_ptr = (uint8_t*)chpl_arr; for (int64_t x = 0; x < segmentLength; x++){ int16_t rep_lvl = (x == 0) ? 0 : 1; int16_t def_lvl = 3; @@ -1451,7 +1451,6 @@ int cpp_writeStrListColumnToParquet(const char* filename, void* chpl_segs, void* } segIdx++; numLeft--;count++; - } } diff --git a/src/ParquetMsg.chpl b/src/ParquetMsg.chpl index e5fc2a56b6..de26436eac 100644 --- a/src/ParquetMsg.chpl +++ b/src/ParquetMsg.chpl @@ -1092,18 +1092,7 @@ module ParquetMsg { createEmptyListParquetFile(myFilename, dsetName, c_dtype, compression); } else { - var localSegments = segments[locDom]; - var startOffsetIdx = localSegments[locDom.low]; - var endOffsetIdx = if (lastOffset == localSegments[locDom.high]) then lastOffsetIdx else segments[locDom.high + 1] - 1; - var offIdxRange = startOffsetIdx..endOffsetIdx; - - // need to get the local string values - var localOffsets: [offIdxRange] int = oldOff[offIdxRange]; - var startValIdx = oldOff[offIdxRange.low]; - var endValIdx = if (lastOffsetIdx == offIdxRange.high) then lastValIdx else oldOff[offIdxRange.high + 1] - 1; - var valIdxRange = startValIdx..endValIdx; - var localVals: [valIdxRange] uint(8) = oldVal[valIdxRange]; - + var localSegments = segments[locDom]; var locSegments: [0..#locDom.size+1] int; locSegments[0..#locDom.size] = segments[locDom]; if locDom.high == segments.domain.high then @@ -1111,20 +1100,54 @@ module ParquetMsg { else locSegments[locSegments.domain.high] = segments[locDom.high+1]; - var locOffsets: [0..#offIdxRange.size+1] int; - locOffsets[0..#offIdxRange.size] = oldOff[offIdxRange]; - if offIdxRange.high == oldOff.domain.high then - locOffsets[locOffsets.domain.high] = extraOffset; - else - locOffsets[locOffsets.domain.high] = oldOff[offIdxRange.high+1]; - + var startOffsetIdx = localSegments[locDom.low]; + var endOffsetIdx = if (lastOffset == localSegments[locDom.high]) then lastOffsetIdx else segments[locDom.high + 1] - 1; + var offIdxRange = startOffsetIdx..endOffsetIdx; + var pqErr = new parquetErrorMsg(); var dtypeRep = ARROWSTRING; - if c_writeStrListColumnToParquet(myFilename.localize().c_str(), c_ptrTo(locSegments), c_ptrTo(locOffsets), - c_ptrTo(localVals), dsetName.localize().c_str(), locSegments.size-1, + var valPtr: c_void_ptr = nil; + var offPtr: c_void_ptr = nil; + + // need to get the local string values + if offIdxRange.size > 0 { + var localOffsets: [offIdxRange] int = oldOff[offIdxRange]; + var startValIdx = oldOff[offIdxRange.low]; + var endValIdx = if (lastOffsetIdx == offIdxRange.high) then lastValIdx else oldOff[offIdxRange.high + 1] - 1; + var valIdxRange = startValIdx..endValIdx; + var localVals: [valIdxRange] uint(8) = oldVal[valIdxRange]; + + var locOffsets: [0..#offIdxRange.size+1] int; + locOffsets[0..#offIdxRange.size] = oldOff[offIdxRange]; + + if offIdxRange.high == oldOff.domain.high { + locOffsets[locOffsets.domain.high] = extraOffset; + } else { + locOffsets[locOffsets.domain.high] = oldOff[offIdxRange.high+1]; + } + + if localVals.size > 0 { + valPtr = c_ptrTo(localVals); + } + if locOffsets.size > 0 { + offPtr = c_ptrTo(locOffsets); + } + // the call to c must be within the if block so the arrays stay in scope + if c_writeStrListColumnToParquet(myFilename.localize().c_str(), c_ptrTo(locSegments), offPtr, + valPtr, dsetName.localize().c_str(), locSegments.size-1, ROWGROUPS, dtypeRep, compression, c_ptrTo(pqErr.errMsg)) == ARROWERROR { - pqErr.parquetError(getLineNumber(), getRoutineName(), getModuleName()); + pqErr.parquetError(getLineNumber(), getRoutineName(), getModuleName()); + } } + else { + // empty segment case + if c_writeStrListColumnToParquet(myFilename.localize().c_str(), c_ptrTo(locSegments), offPtr, + valPtr, dsetName.localize().c_str(), locSegments.size-1, + ROWGROUPS, dtypeRep, compression, c_ptrTo(pqErr.errMsg)) == ARROWERROR { + pqErr.parquetError(getLineNumber(), getRoutineName(), getModuleName()); + } + } + } } return filesExist; // trigger warning if overwrite occuring