Skip to content

Commit

Permalink
backport for field (#6629)
Browse files Browse the repository at this point in the history
* backport part of ClickHouse/ClickHouse#53182

* backport part of ClickHouse/ClickHouse#54999

* backport part of ClickHouse/ClickHouse#62264

* Merge pull request #64024 from rschu1ze/throw-for-invalid-types

Deserialize untrusted binary inputs in a safer way

* Merge pull request #70803 from ClickHouse/make-array-field-serializetion-consistent

Make Array Field serialization consistent.

* fixes
  • Loading branch information
yl-lisen authored and Jasmine-ge committed Dec 9, 2024
1 parent 53443a5 commit 2bae2ba
Show file tree
Hide file tree
Showing 6 changed files with 696 additions and 99 deletions.
141 changes: 56 additions & 85 deletions src/Core/Field.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,17 +11,27 @@
#include <Common/FieldVisitorWriteBinary.h>


using namespace std::literals;

namespace DB
{

namespace ErrorCodes
{
extern const int CANNOT_RESTORE_FROM_FIELD_DUMP;
extern const int DECIMAL_OVERFLOW;
extern const int INCORRECT_DATA;
}

inline Field getBinaryValue(UInt8 type, ReadBuffer & buf)
template <typename T>
T DecimalField<T>::getScaleMultiplier() const
{
switch (type)
return DecimalUtils::scaleMultiplier<T>(scale);
}

Field getBinaryValue(UInt8 type, ReadBuffer & buf)
{
switch (static_cast<Field::Types::Which>(type))
{
case Field::Types::Null:
{
Expand Down Expand Up @@ -96,7 +106,7 @@ inline Field getBinaryValue(UInt8 type, ReadBuffer & buf)
case Field::Types::Array:
{
Array value;
readBinary(value, buf);
readBinaryArray(value, buf);
return value;
}
case Field::Types::Tuple:
Expand Down Expand Up @@ -131,63 +141,30 @@ inline Field getBinaryValue(UInt8 type, ReadBuffer & buf)
return bool(value);
}
case Field::Types::Decimal32:
{
Decimal32 value;
UInt32 scale;
readBinary(value, buf);
readVarUInt(scale, buf);
return DecimalField<Decimal32>(value, scale);
}
case Field::Types::Decimal64:
{
Decimal64 value;
UInt32 scale;
readBinary(value, buf);
readVarUInt(scale, buf);
return DecimalField<Decimal64>(value, scale);
}
case Field::Types::Decimal128:
{
Decimal128 value;
UInt32 scale;
readBinary(value, buf);
readVarUInt(scale, buf);
return DecimalField<Decimal128>(value, scale);
}
case Field::Types::Decimal256:
{
Decimal256 value;
UInt32 scale;
readBinary(value, buf);
readVarUInt(scale, buf);
return DecimalField<Decimal256>(value, scale);
}
return Field();
}
throw Exception(ErrorCodes::INCORRECT_DATA, "Unknown field type {}", std::to_string(type));
}

void readBinaryArray(Array & x, ReadBuffer & buf)
{
size_t size;
UInt8 type;
readBinary(type, buf);
readBinary(size, buf);

for (size_t index = 0; index < size; ++index)
x.push_back(getBinaryValue(type, buf));
x.push_back(readFieldBinary(buf));
}

void writeBinary(const Array & x, WriteBuffer & buf)
void writeBinaryArray(const Array & x, WriteBuffer & buf)
{
UInt8 type = Field::Types::Null;
size_t size = x.size();
if (size)
type = x.front().getType();
writeBinary(type, buf);
writeBinary(size, buf);

for (const auto & elem : x)
Field::dispatch([&buf] (const auto & value) { FieldVisitorWriteBinary()(value, buf); }, elem);
writeFieldBinary(elem, buf);
}

void writeText(const Array & x, WriteBuffer & buf)
Expand All @@ -202,11 +179,7 @@ void readBinary(Tuple & x, ReadBuffer & buf)
readBinary(size, buf);

for (size_t index = 0; index < size; ++index)
{
UInt8 type;
readBinary(type, buf);
x.push_back(getBinaryValue(type, buf));
}
x.push_back(readFieldBinary(buf));
}

void writeBinary(const Tuple & x, WriteBuffer & buf)
Expand All @@ -215,11 +188,7 @@ void writeBinary(const Tuple & x, WriteBuffer & buf)
writeBinary(size, buf);

for (const auto & elem : x)
{
const UInt8 type = elem.getType();
writeBinary(type, buf);
Field::dispatch([&buf] (const auto & value) { FieldVisitorWriteBinary()(value, buf); }, elem);
}
writeFieldBinary(elem, buf);
}

void writeText(const Tuple & x, WriteBuffer & buf)
Expand All @@ -233,11 +202,7 @@ void readBinary(Map & x, ReadBuffer & buf)
readBinary(size, buf);

for (size_t index = 0; index < size; ++index)
{
UInt8 type;
readBinary(type, buf);
x.push_back(getBinaryValue(type, buf));
}
x.push_back(readFieldBinary(buf));
}

void writeBinary(const Map & x, WriteBuffer & buf)
Expand All @@ -246,11 +211,7 @@ void writeBinary(const Map & x, WriteBuffer & buf)
writeBinary(size, buf);

for (const auto & elem : x)
{
const UInt8 type = elem.getType();
writeBinary(type, buf);
Field::dispatch([&buf] (const auto & value) { FieldVisitorWriteBinary()(value, buf); }, elem);
}
writeFieldBinary(elem, buf);
}

void writeText(const Map & x, WriteBuffer & buf)
Expand Down Expand Up @@ -328,7 +289,6 @@ void writeFieldBinary(const Field & x, WriteBuffer & buf)
{
const UInt8 type = x.getType();
writeBinary(type, buf);

Field::dispatch([&buf] (const auto & value) { FieldVisitorWriteBinary()(value, buf); }, x);
}

Expand Down Expand Up @@ -595,7 +555,7 @@ template bool decimalLessOrEqual<Decimal256>(Decimal256 x, Decimal256 y, UInt32
template bool decimalLessOrEqual<DateTime64>(DateTime64 x, DateTime64 y, UInt32 x_scale, UInt32 y_scale);


inline void writeText(const Null & x, WriteBuffer & buf)
void writeText(const Null & x, WriteBuffer & buf)
{
if (x.isNegativeInfinity())
writeText("-inf", buf);
Expand All @@ -617,33 +577,44 @@ String toString(const Field & x)
x);
}

String fieldTypeToString(Field::Types::Which type)
std::string_view fieldTypeToString(Field::Types::Which type)
{
switch (type)
{
case Field::Types::Which::Null: return "null";
case Field::Types::Which::Array: return "array";
case Field::Types::Which::Tuple: return "tuple";
case Field::Types::Which::Map: return "map";
case Field::Types::Which::Object: return "json";
case Field::Types::Which::AggregateFunctionState: return "aggregate_function_state";
case Field::Types::Which::Bool: return "bool";
case Field::Types::Which::String: return "string";
case Field::Types::Which::Decimal32: return "decimal32";
case Field::Types::Which::Decimal64: return "decimal64";
case Field::Types::Which::Decimal128: return "decimal128";
case Field::Types::Which::Decimal256: return "decimal256";
case Field::Types::Which::Float64: return "float64";
case Field::Types::Which::Int64: return "int64";
case Field::Types::Which::Int128: return "int128";
case Field::Types::Which::Int256: return "int256";
case Field::Types::Which::UInt64: return "uint64";
case Field::Types::Which::UInt128: return "uint128";
case Field::Types::Which::UInt256: return "uint256";
case Field::Types::Which::UUID: return "uuid";
case Field::Types::Which::IPv4: return "ipv4";
case Field::Types::Which::IPv6: return "ipv6";
case Field::Types::Which::Null: return "null"sv;
case Field::Types::Which::Array: return "array"sv;
case Field::Types::Which::Tuple: return "tuple"sv;
case Field::Types::Which::Map: return "map"sv;
case Field::Types::Which::Object: return "json"sv;
case Field::Types::Which::AggregateFunctionState: return "aggregate_function_state"sv;
case Field::Types::Which::Bool: return "bool"sv;
case Field::Types::Which::String: return "string"sv;
case Field::Types::Which::Decimal32: return "decimal32"sv;
case Field::Types::Which::Decimal64: return "decimal64"sv;
case Field::Types::Which::Decimal128: return "decimal128"sv;
case Field::Types::Which::Decimal256: return "decimal256"sv;
case Field::Types::Which::Float64: return "float64"sv;
case Field::Types::Which::Int64: return "int64"sv;
case Field::Types::Which::Int128: return "int128"sv;
case Field::Types::Which::Int256: return "int256"sv;
case Field::Types::Which::UInt64: return "uint64"sv;
case Field::Types::Which::UInt128: return "uint128"sv;
case Field::Types::Which::UInt256: return "uint256"sv;
case Field::Types::Which::UUID: return "uuid"sv;
case Field::Types::Which::IPv4: return "ipv4"sv;
case Field::Types::Which::IPv6: return "ipv6"sv;
}
}

/// Keep in mind, that "magic_enum" is very expensive for compiler, that's why we don't use it.
std::string_view Field::getTypeName() const
{
return fieldTypeToString(which);
}

template class DecimalField<Decimal32>;
template class DecimalField<Decimal64>;
template class DecimalField<Decimal128>;
template class DecimalField<Decimal256>;
template class DecimalField<DateTime64>;
}
28 changes: 17 additions & 11 deletions src/Core/Field.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
#include <Common/AllocatorWithMemoryTracking.h>
#include <Core/Types.h>
#include <Core/Defines.h>
#include <Core/DecimalFunctions.h>
#include <Core/UUID.h>
#include <base/IPv4andIPv6.h>
#include <base/DayNum.h>
Expand Down Expand Up @@ -117,7 +116,7 @@ class DecimalField

operator T() const { return dec; } /// NOLINT
T getValue() const { return dec; }
T getScaleMultiplier() const { return DecimalUtils::scaleMultiplier<T>(scale); }
T getScaleMultiplier() const;
UInt32 getScale() const { return scale; }

template <typename U>
Expand Down Expand Up @@ -166,6 +165,12 @@ class DecimalField
UInt32 scale;
};

extern template class DecimalField<Decimal32>;
extern template class DecimalField<Decimal64>;
extern template class DecimalField<Decimal128>;
extern template class DecimalField<Decimal256>;
extern template class DecimalField<DateTime64>;

template <typename T> constexpr bool is_decimal_field = false;
template <> constexpr inline bool is_decimal_field<DecimalField<Decimal32>> = true;
template <> constexpr inline bool is_decimal_field<DecimalField<Decimal64>> = true;
Expand Down Expand Up @@ -414,7 +419,7 @@ class Field

Types::Which getType() const { return which; }

constexpr std::string_view getTypeName() const { return magic_enum::enum_name(which); }
std::string_view getTypeName() const;

bool isNull() const { return which == Types::Null; }
template <typename T>
Expand Down Expand Up @@ -902,14 +907,15 @@ inline Field & Field::operator=(String && str)
class ReadBuffer;
class WriteBuffer;

/// It is assumed that all elements of the array have the same type.
void readBinary(Array & x, ReadBuffer & buf);
[[noreturn]] inline void readText(Array &, ReadBuffer &) { throw Exception("Cannot read array.", ErrorCodes::NOT_IMPLEMENTED); }
[[noreturn]] inline void readQuoted(Array &, ReadBuffer &) { throw Exception("Cannot read array.", ErrorCodes::NOT_IMPLEMENTED); }
/// Binary serialization of generic field.
void writeFieldBinary(const Field & x, WriteBuffer & buf);
Field readFieldBinary(ReadBuffer & buf);

void readBinaryArray(Array & x, ReadBuffer & buf);
[[noreturn]] inline void readText(Array &, ReadBuffer &) { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot read Array."); }
[[noreturn]] inline void readQuoted(Array &, ReadBuffer &) { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot read Array."); }

/// It is assumed that all elements of the array have the same type.
/// Also write size and type into buf. UInt64 and Int64 is written in variadic size form
void writeBinary(const Array & x, WriteBuffer & buf);
void writeBinaryArray(const Array & x, WriteBuffer & buf);
void writeText(const Array & x, WriteBuffer & buf);
[[noreturn]] inline void writeQuoted(const Array &, WriteBuffer &) { throw Exception("Cannot write array quoted.", ErrorCodes::NOT_IMPLEMENTED); }

Expand Down Expand Up @@ -957,7 +963,7 @@ void writeFieldText(const Field & x, WriteBuffer & buf);

String toString(const Field & x);

String fieldTypeToString(Field::Types::Which type);
std::string_view fieldTypeToString(Field::Types::Which type);

}

Expand Down
4 changes: 2 additions & 2 deletions src/Interpreters/ConvertStringsToEnumVisitor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,9 @@ namespace
/// @note We place strings in ascending order here under the assumption it colud speed up String to Enum conversion.
String makeStringsEnum(const std::set<String> & values)
{
String enum_string = "Enum8(";
String enum_string = "enum8(";
if (values.size() >= 255)
enum_string = "Enum16(";
enum_string = "enum16(";

size_t number = 1;
for (const auto & item : values)
Expand Down
Loading

0 comments on commit 2bae2ba

Please sign in to comment.