From 48b26d7543533d89d2c60996c448812215a0d242 Mon Sep 17 00:00:00 2001 From: Gang Wu Date: Thu, 31 Aug 2023 16:09:59 +0800 Subject: [PATCH] ORC-1315: [C++] Fix byte to integer conversions fail on platforms with unsigned char type ### What changes were proposed in this pull request? This patch adds static_cast for the char type cast in RLE/ColumnReader. ### Why are the changes needed? For the context of [ORC-1315](https://issues.apache.org/jira/browse/ORC-1315), this patch fixes signed char to unsigned char conversions fail where char is by default unsigned. ### How was this patch tested? No new tests were added. Closes #1607 from wgtmac/branch-1.8. Authored-by: Gang Wu Signed-off-by: Gang Wu --- c++/src/ByteRLE.cc | 6 +++--- c++/src/ColumnReader.cc | 2 +- c++/src/RLEv1.cc | 2 +- c++/test/TestByteRle.cc | 31 +++++++++++++++---------------- c++/test/TestColumnReader.cc | 4 ++-- tools/test/TestMatch.cc | 2 +- 6 files changed, 23 insertions(+), 24 deletions(-) diff --git a/c++/src/ByteRLE.cc b/c++/src/ByteRLE.cc index 557fc8fd76..cdaed55861 100644 --- a/c++/src/ByteRLE.cc +++ b/c++/src/ByteRLE.cc @@ -366,7 +366,7 @@ namespace orc { if (bufferStart == bufferEnd) { nextBuffer(); } - return *(bufferStart++); + return static_cast(*(bufferStart++)); } void ByteRleDecoderImpl::readHeader() { @@ -377,7 +377,7 @@ namespace orc { } else { remainingValues = static_cast(ch) + MINIMUM_REPEAT; repeating = true; - value = readByte(); + value = static_cast(readByte()); } } @@ -466,7 +466,7 @@ namespace orc { if (notNull) { for(uint64_t i=0; i < count; ++i) { if (notNull[position + i]) { - data[position + i] = readByte(); + data[position + i] = static_cast(readByte()); consumed += 1; } } diff --git a/c++/src/ColumnReader.cc b/c++/src/ColumnReader.cc index f4a4df9248..f34185982c 100644 --- a/c++/src/ColumnReader.cc +++ b/c++/src/ColumnReader.cc @@ -131,7 +131,7 @@ namespace orc { */ void expandBytesToLongs(int64_t* buffer, uint64_t numValues) { for(size_t i=numValues - 1; i < numValues; --i) { - buffer[i] = reinterpret_cast(buffer)[i]; + buffer[i] = reinterpret_cast(buffer)[i]; } } diff --git a/c++/src/RLEv1.cc b/c++/src/RLEv1.cc index d80a81a56d..e82b7b07e8 100644 --- a/c++/src/RLEv1.cc +++ b/c++/src/RLEv1.cc @@ -147,7 +147,7 @@ signed char RleDecoderV1::readByte() { bufferStart = static_cast(bufferPointer); bufferEnd = bufferStart + bufferLength; } - return *(bufferStart++); + return static_cast(*(bufferStart++)); } uint64_t RleDecoderV1::readLong() { diff --git a/c++/test/TestByteRle.cc b/c++/test/TestByteRle.cc index bdcc13c9e9..8191c7a49f 100644 --- a/c++/test/TestByteRle.cc +++ b/c++/test/TestByteRle.cc @@ -50,8 +50,8 @@ TEST(ByteRle, nullTest) { char buffer[258]; char notNull[266]; char result[266]; - buffer[0] = -128; - buffer[129] = -128; + buffer[0] = static_cast(-128); + buffer[129] = static_cast(-128); for(int i=0; i < 128; ++i) { buffer[1 + i] = static_cast(i); buffer[130 + i] = static_cast(128 + i); @@ -186,7 +186,7 @@ TEST(ByteRle, testNulls) { createByteRleDecoder( std::unique_ptr (new SeekableArrayInputStream(buffer, ARRAY_SIZE(buffer), 3))); - std::vector data(16, -1); + std::vector data(16, static_cast(-1)); std::vector notNull(data.size()); for (size_t i = 0; i < data.size(); ++i) { notNull[i] = (i + 1) % 2; @@ -198,17 +198,16 @@ TEST(ByteRle, testNulls) { EXPECT_EQ((i*data.size() + j)/2, data[j]) << "Output wrong at " << (i * data.size() + j); } else { - EXPECT_EQ(-1, data[j]) << "Output wrong at " - << (i * data.size() + j); + EXPECT_EQ(static_cast(-1), data[j]) << "Output wrong at " + << (i * data.size() + j); } } } for (size_t i = 0; i < 8; ++i) { rle->next(data.data(), data.size(), notNull.data()); for (size_t j = 0; j < data.size(); ++j) { - EXPECT_EQ(j % 2 == 0 ? -36 : -1, - data[j]) - << "Output wrong at " << (i * data.size() + j + 32); + EXPECT_EQ(j % 2 == 0 ? static_cast(-36) : static_cast(-1), data[j]) + << "Output wrong at " << (i * data.size() + j + 32); } } } @@ -221,26 +220,26 @@ TEST(ByteRle, testAllNulls) { createByteRleDecoder( std::unique_ptr (new SeekableArrayInputStream(buffer, ARRAY_SIZE(buffer)))); - std::vector data(16, -1); + std::vector data(16, static_cast(-1)); std::vector allNull(data.size(), 0); std::vector noNull(data.size(), 1); rle->next(data.data(), data.size(), allNull.data()); for (size_t i = 0; i < data.size(); ++i) { - EXPECT_EQ(-1, data[i]) << "Output wrong at " << i; + EXPECT_EQ(static_cast(-1), data[i]) << "Output wrong at " << i; } rle->next(data.data(), data.size(), noNull.data()); for (size_t i = 0; i < data.size(); ++i) { EXPECT_EQ(i, data[i]) << "Output wrong at " << i; - data[i] = -1; + data[i] = static_cast(-1); } rle->next(data.data(), data.size(), allNull.data()); for (size_t i = 0; i < data.size(); ++i) { - EXPECT_EQ(-1, data[i]) << "Output wrong at " << i; + EXPECT_EQ(static_cast(-1), data[i]) << "Output wrong at " << i; } for (size_t i = 0; i < 4; ++i) { rle->next(data.data(), data.size(), noNull.data()); for (size_t j = 0; j < data.size(); ++j) { - EXPECT_EQ(-36, data[j]) << "Output wrong at " << i; + EXPECT_EQ(static_cast(-36), data[j]) << "Output wrong at " << i; } } rle->next(data.data(), data.size(), allNull.data()); @@ -1107,7 +1106,7 @@ TEST(BooleanRle, skipTestWithNulls) { someNull[1] = 1; std::vector allNull(data.size(), 0); for (size_t i = 0; i < 16384; i += 5) { - data.assign(data.size(), -1); + data.assign(data.size(), static_cast(-1)); rle->next(data.data(), data.size(), someNull.data()); EXPECT_EQ(0, data[0]) << "Output wrong at " << i; EXPECT_EQ(0, data[2]) << "Output wrong at " << i; @@ -1118,7 +1117,7 @@ TEST(BooleanRle, skipTestWithNulls) { rle->skip(4); } rle->skip(0); - data.assign(data.size(), -1); + data.assign(data.size(), static_cast(-1)); rle->next(data.data(), data.size(), allNull.data()); for (size_t j = 0; j < data.size(); ++j) { EXPECT_EQ(0, data[j]) << "Output wrong at " << i << ", " << j; @@ -1390,7 +1389,7 @@ TEST(BooleanRle, seekTestWithNulls) { EXPECT_EQ(i < 8192 ? i & 1 : (i / 3) & 1, data[i]) << "Output wrong at " << i; - data[0] = -1; + data[0] = static_cast(-1); rle->next(data.data(), 1, allNull.data()); EXPECT_EQ(0, data[0]) << "Output wrong at " << i; } while (i != 0); diff --git a/c++/test/TestColumnReader.cc b/c++/test/TestColumnReader.cc index bc0ecb8eb1..187aecde27 100644 --- a/c++/test/TestColumnReader.cc +++ b/c++/test/TestColumnReader.cc @@ -273,7 +273,7 @@ TEST(TestColumnReader, testByteWithNulls) { EXPECT_EQ(0, longBatch->notNull[i]) << "Wrong value at " << i; } else { EXPECT_EQ(1, longBatch->notNull[i]) << "Wrong value at " << i; - EXPECT_EQ(static_cast(next++), longBatch->data[i]) + EXPECT_EQ(static_cast(next++), static_cast(longBatch->data[i])) << "Wrong value at " << i; } } @@ -337,7 +337,7 @@ TEST(TestColumnReader, testByteSkipsWithNulls) { ASSERT_EQ(true, !batch.hasNulls); ASSERT_EQ(5, longBatch->numElements); ASSERT_EQ(true, longBatch->hasNulls); - EXPECT_EQ(static_cast(-1), longBatch->data[0]); + EXPECT_EQ(static_cast(-1), static_cast(longBatch->data[0])); EXPECT_EQ(true, !longBatch->notNull[1]); EXPECT_EQ(true, !longBatch->notNull[2]); EXPECT_EQ(true, !longBatch->notNull[3]); diff --git a/tools/test/TestMatch.cc b/tools/test/TestMatch.cc index 9fb28f4948..bc75593837 100644 --- a/tools/test/TestMatch.cc +++ b/tools/test/TestMatch.cc @@ -150,7 +150,7 @@ namespace orc { ASSERT_EQ(true, expected.nextLine(expectedLine)); line.clear(); printer->printRow(i); - EXPECT_EQ(expectedLine, line) + ASSERT_EQ(expectedLine, line) << "wrong output at row " << (rowCount + i); } rowCount += batch->numElements;