Skip to content

Commit

Permalink
Corrections to multi-locale cases. (Bears-R-Us#2574)
Browse files Browse the repository at this point in the history
  • Loading branch information
Ethan-DeBandi99 committed Jul 14, 2023
1 parent efc4186 commit f1ec971
Show file tree
Hide file tree
Showing 2 changed files with 47 additions and 25 deletions.
5 changes: 2 additions & 3 deletions src/ArrowFunctions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1418,8 +1418,6 @@ int cpp_writeStrListColumnToParquet(const char* filename, void* chpl_segs, void*
int64_t i = 0;
int64_t numLeft = numelems;
auto segments = (int64_t*)chpl_segs;
auto offsets = (int64_t*)chpl_offsets;
auto chpl_ptr = (uint8_t*)chpl_arr;
int64_t segIdx = 0; // index into segarray segments
int64_t offIdx = 0; // index into the segstring segments
int64_t valIdx = 0; // index into chpl_arr
Expand All @@ -1432,6 +1430,8 @@ int cpp_writeStrListColumnToParquet(const char* filename, void* chpl_segs, void*
while (numLeft > 0 && count < rowGroupSize) { // ensures rowGroupSize maintained
int64_t segmentLength = segments[segIdx+1] - segments[segIdx];
if (segmentLength > 0) {
auto offsets = (int64_t*)chpl_offsets;
auto chpl_ptr = (uint8_t*)chpl_arr;
for (int64_t x = 0; x < segmentLength; x++){
int16_t rep_lvl = (x == 0) ? 0 : 1;
int16_t def_lvl = 3;
Expand All @@ -1451,7 +1451,6 @@ int cpp_writeStrListColumnToParquet(const char* filename, void* chpl_segs, void*
}
segIdx++;
numLeft--;count++;

}
}

Expand Down
67 changes: 45 additions & 22 deletions src/ParquetMsg.chpl
Original file line number Diff line number Diff line change
Expand Up @@ -1092,39 +1092,62 @@ module ParquetMsg {
createEmptyListParquetFile(myFilename, dsetName, c_dtype, compression);
}
else {
var localSegments = segments[locDom];
var startOffsetIdx = localSegments[locDom.low];
var endOffsetIdx = if (lastOffset == localSegments[locDom.high]) then lastOffsetIdx else segments[locDom.high + 1] - 1;
var offIdxRange = startOffsetIdx..endOffsetIdx;

// need to get the local string values
var localOffsets: [offIdxRange] int = oldOff[offIdxRange];
var startValIdx = oldOff[offIdxRange.low];
var endValIdx = if (lastOffsetIdx == offIdxRange.high) then lastValIdx else oldOff[offIdxRange.high + 1] - 1;
var valIdxRange = startValIdx..endValIdx;
var localVals: [valIdxRange] uint(8) = oldVal[valIdxRange];

var localSegments = segments[locDom];
var locSegments: [0..#locDom.size+1] int;
locSegments[0..#locDom.size] = segments[locDom];
if locDom.high == segments.domain.high then
locSegments[locSegments.domain.high] = extraSegment;
else
locSegments[locSegments.domain.high] = segments[locDom.high+1];

var locOffsets: [0..#offIdxRange.size+1] int;
locOffsets[0..#offIdxRange.size] = oldOff[offIdxRange];
if offIdxRange.high == oldOff.domain.high then
locOffsets[locOffsets.domain.high] = extraOffset;
else
locOffsets[locOffsets.domain.high] = oldOff[offIdxRange.high+1];

var startOffsetIdx = localSegments[locDom.low];
var endOffsetIdx = if (lastOffset == localSegments[locDom.high]) then lastOffsetIdx else segments[locDom.high + 1] - 1;
var offIdxRange = startOffsetIdx..endOffsetIdx;

var pqErr = new parquetErrorMsg();
var dtypeRep = ARROWSTRING;
if c_writeStrListColumnToParquet(myFilename.localize().c_str(), c_ptrTo(locSegments), c_ptrTo(locOffsets),
c_ptrTo(localVals), dsetName.localize().c_str(), locSegments.size-1,
var valPtr: c_void_ptr = nil;
var offPtr: c_void_ptr = nil;

// need to get the local string values
if offIdxRange.size > 0 {
var localOffsets: [offIdxRange] int = oldOff[offIdxRange];
var startValIdx = oldOff[offIdxRange.low];
var endValIdx = if (lastOffsetIdx == offIdxRange.high) then lastValIdx else oldOff[offIdxRange.high + 1] - 1;
var valIdxRange = startValIdx..endValIdx;
var localVals: [valIdxRange] uint(8) = oldVal[valIdxRange];

var locOffsets: [0..#offIdxRange.size+1] int;
locOffsets[0..#offIdxRange.size] = oldOff[offIdxRange];

if offIdxRange.high == oldOff.domain.high {
locOffsets[locOffsets.domain.high] = extraOffset;
} else {
locOffsets[locOffsets.domain.high] = oldOff[offIdxRange.high+1];
}

if localVals.size > 0 {
valPtr = c_ptrTo(localVals);
}
if locOffsets.size > 0 {
offPtr = c_ptrTo(locOffsets);
}
// the call to c must be within the if block so the arrays stay in scope
if c_writeStrListColumnToParquet(myFilename.localize().c_str(), c_ptrTo(locSegments), offPtr,
valPtr, dsetName.localize().c_str(), locSegments.size-1,
ROWGROUPS, dtypeRep, compression, c_ptrTo(pqErr.errMsg)) == ARROWERROR {
pqErr.parquetError(getLineNumber(), getRoutineName(), getModuleName());
pqErr.parquetError(getLineNumber(), getRoutineName(), getModuleName());
}
}
else {
// empty segment case
if c_writeStrListColumnToParquet(myFilename.localize().c_str(), c_ptrTo(locSegments), offPtr,
valPtr, dsetName.localize().c_str(), locSegments.size-1,
ROWGROUPS, dtypeRep, compression, c_ptrTo(pqErr.errMsg)) == ARROWERROR {
pqErr.parquetError(getLineNumber(), getRoutineName(), getModuleName());
}
}

}
}
return filesExist; // trigger warning if overwrite occuring
Expand Down

0 comments on commit f1ec971

Please sign in to comment.