Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions be/src/exec/common/variant_util.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1846,7 +1846,7 @@ void parse_json_to_variant_impl(IColumn& column, const char* src, size_t length,
auto& [paths, values] = *result;
assert(paths.size() == values.size());
size_t old_num_rows = column_variant.rows();
if (config.enable_flatten_nested) {
if (config.deprecated_enable_flatten_nested) {
// here we should check the paths in variant and paths in result,
// if two paths which same prefix have different structure, we should throw an exception
std::vector<PathInData> check_paths;
Expand Down Expand Up @@ -2155,7 +2155,9 @@ Status parse_and_materialize_variant_columns(Block& block, const TabletSchema& t

std::vector<ParseConfig> configs(variant_column_pos.size());
for (size_t i = 0; i < variant_column_pos.size(); ++i) {
configs[i].enable_flatten_nested = tablet_schema.variant_flatten_nested();
// Deprecated legacy flatten-nested switch. Distinct from variant_enable_nested_group.
configs[i].deprecated_enable_flatten_nested =
tablet_schema.deprecated_variant_flatten_nested();
const auto& column = tablet_schema.column(variant_schema_pos[i]);
if (!column.is_variant_type()) {
return Status::InternalError("column is not variant type, column name: {}",
Expand Down
1 change: 1 addition & 0 deletions be/src/storage/tablet/tablet_meta.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -364,6 +364,7 @@ TabletMeta::TabletMeta(int64_t table_id, int64_t partition_id, int64_t tablet_id
schema->set_disable_auto_compaction(tablet_schema.disable_auto_compaction);
}

// Deprecated legacy flatten-nested switch. Distinct from variant_enable_nested_group.
if (tablet_schema.__isset.variant_enable_flatten_nested) {
schema->set_enable_variant_flatten_nested(tablet_schema.variant_enable_flatten_nested);
}
Expand Down
12 changes: 8 additions & 4 deletions be/src/storage/tablet/tablet_schema.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1289,7 +1289,7 @@ void TabletSchema::init_from_pb(const TabletSchemaPB& schema, bool ignore_extrac

_row_store_column_unique_ids.assign(schema.row_store_column_unique_ids().begin(),
schema.row_store_column_unique_ids().end());
_enable_variant_flatten_nested = schema.enable_variant_flatten_nested();
_deprecated_enable_variant_flatten_nested = schema.enable_variant_flatten_nested();
if (schema.has_is_external_segment_column_meta_used()) {
_is_external_segment_column_meta_used = schema.is_external_segment_column_meta_used();
} else {
Expand Down Expand Up @@ -1370,7 +1370,8 @@ void TabletSchema::build_current_tablet_schema(int64_t index_id, int32_t version
_row_store_page_size = ori_tablet_schema.row_store_page_size();
_storage_page_size = ori_tablet_schema.storage_page_size();
_storage_dict_page_size = ori_tablet_schema.storage_dict_page_size();
_enable_variant_flatten_nested = ori_tablet_schema.variant_flatten_nested();
_deprecated_enable_variant_flatten_nested =
ori_tablet_schema.deprecated_variant_flatten_nested();

// copy from table_schema_param
_schema_version = version;
Expand Down Expand Up @@ -1570,7 +1571,7 @@ void TabletSchema::to_schema_pb(TabletSchemaPB* tablet_schema_pb) const {
tablet_schema_pb->set_inverted_index_storage_format(_inverted_index_storage_format);
tablet_schema_pb->mutable_row_store_column_unique_ids()->Assign(
_row_store_column_unique_ids.begin(), _row_store_column_unique_ids.end());
tablet_schema_pb->set_enable_variant_flatten_nested(_enable_variant_flatten_nested);
tablet_schema_pb->set_enable_variant_flatten_nested(_deprecated_enable_variant_flatten_nested);
tablet_schema_pb->set_is_external_segment_column_meta_used(
_is_external_segment_column_meta_used);
tablet_schema_pb->set_integer_type_default_use_plain_encoding(
Expand Down Expand Up @@ -1964,7 +1965,10 @@ bool operator==(const TabletSchema& a, const TabletSchema& b) {
if (a._storage_page_size != b._storage_page_size) return false;
if (a._storage_dict_page_size != b._storage_dict_page_size) return false;
if (a._skip_write_index_on_load != b._skip_write_index_on_load) return false;
if (a._enable_variant_flatten_nested != b._enable_variant_flatten_nested) return false;
if (a._deprecated_enable_variant_flatten_nested !=
b._deprecated_enable_variant_flatten_nested) {
return false;
}
if (a._is_external_segment_column_meta_used != b._is_external_segment_column_meta_used)
return false;
if (a._integer_type_default_use_plain_encoding != b._integer_type_default_use_plain_encoding)
Expand Down
12 changes: 8 additions & 4 deletions be/src/storage/tablet/tablet_schema.h
Original file line number Diff line number Diff line change
Expand Up @@ -478,10 +478,14 @@ class TabletSchema : public MetadataAdder<TabletSchema> {
_disable_auto_compaction = disable_auto_compaction;
}
bool disable_auto_compaction() const { return _disable_auto_compaction; }
void set_enable_variant_flatten_nested(bool flatten_nested) {
_enable_variant_flatten_nested = flatten_nested;
// Deprecated legacy switch for flatten-nested variant behavior.
// It is distinct from variant_enable_nested_group.
void set_deprecated_variant_flatten_nested(bool flatten_nested) {
_deprecated_enable_variant_flatten_nested = flatten_nested;
}
bool deprecated_variant_flatten_nested() const {
return _deprecated_enable_variant_flatten_nested;
}
bool variant_flatten_nested() const { return _enable_variant_flatten_nested; }
void set_enable_single_replica_compaction(bool enable_single_replica_compaction) {
_enable_single_replica_compaction = enable_single_replica_compaction;
}
Expand Down Expand Up @@ -821,7 +825,7 @@ class TabletSchema : public MetadataAdder<TabletSchema> {
// Contains column ids of which columns should be encoded into row store.
// ATTN: For compability reason empty cids means all columns of tablet schema are encoded to row column
std::vector<int32_t> _row_store_column_unique_ids;
bool _enable_variant_flatten_nested = false;
bool _deprecated_enable_variant_flatten_nested = false;

std::map<size_t, int32_t> _vir_col_idx_to_unique_id;
std::map<int32_t, DataTypePtr> _pruned_columns_data_type;
Expand Down
10 changes: 5 additions & 5 deletions be/src/util/json/json_parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,9 @@ std::optional<ParseResult> JSONDataParser<ParserImpl>::parse(const char* begin,
return {};
}
ParseContext context;
// enable_flatten_nested controls nested path traversal
// deprecated_enable_flatten_nested controls nested path traversal
// NestedGroup expansion is now handled at storage layer
context.enable_flatten_nested = config.enable_flatten_nested;
context.deprecated_enable_flatten_nested = config.deprecated_enable_flatten_nested;
context.is_top_array = document.isArray();
traverse(document, context);
ParseResult result;
Expand All @@ -68,8 +68,8 @@ void JSONDataParser<ParserImpl>::traverse(const Element& element, ParseContext&
// handled by VariantNestedBuilder with a max-depth guard.
has_nested = false;
check_has_nested_object(element);
ctx.has_nested_in_flatten = has_nested && ctx.enable_flatten_nested;
if (has_nested && !ctx.enable_flatten_nested) {
ctx.has_nested_in_flatten = has_nested && ctx.deprecated_enable_flatten_nested;
if (has_nested && !ctx.deprecated_enable_flatten_nested) {
// Parse nested arrays to JsonbField
JsonbWriter writer;
traverseArrayAsJsonb(element.getArray(), writer);
Expand Down Expand Up @@ -206,7 +206,7 @@ void JSONDataParser<ParserImpl>::traverseArrayElement(const Element& element,
element_ctx.has_nested_in_flatten = ctx.has_nested_in_flatten;
element_ctx.is_top_array = ctx.is_top_array;
traverse(element, element_ctx);
auto& [_, paths, values, flatten_nested, __, is_top_array] = element_ctx;
auto& [_, paths, values, deprecated_flatten_nested, __, is_top_array] = element_ctx;

if (element_ctx.has_nested_in_flatten && is_top_array) {
checkAmbiguousStructure(ctx, paths);
Expand Down
4 changes: 2 additions & 2 deletions be/src/util/json/json_parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ void writeValueAsJsonb(const Element& element, JsonbWriter& writer) {
}

struct ParseConfig {
bool enable_flatten_nested = false;
bool deprecated_enable_flatten_nested = false;
enum class ParseTo {
OnlySubcolumns = 0,
OnlyDocValueColumn = 1,
Expand All @@ -127,7 +127,7 @@ class JSONDataParser {
PathInDataBuilder builder;
std::vector<PathInData::Parts> paths;
std::vector<Field> values;
bool enable_flatten_nested = false;
bool deprecated_enable_flatten_nested = false;
bool has_nested_in_flatten = false;
bool is_top_array = false;
};
Expand Down
22 changes: 11 additions & 11 deletions be/test/core/jsonb/json_parser_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ TEST(JsonParserTest, ParseMultiLevelNestedArray) {
EXPECT_EQ(result->paths.size(), 1);

// Test complex nested structure
config.enable_flatten_nested = false;
config.deprecated_enable_flatten_nested = false;
std::string json1 = R"({"a":[[1,2],[3],[4,5,6]]})";
// multi level nested array in object
result = parser.parse(json1.c_str(), json1.size(), config);
Expand All @@ -165,7 +165,7 @@ TEST(JsonParserTest, ParseMultiLevelNestedArray) {
EXPECT_EQ(result->values[0].get_type(), doris::PrimitiveType::TYPE_JSONB);

// test flatten nested
config.enable_flatten_nested = true;
config.deprecated_enable_flatten_nested = true;
// TODO: checkAmbiguousStructure is only called when has_nested_in_flatten && is_top_array.
// These JSONs are objects (not top-level arrays), so is_top_array=false and the check is skipped.
// EXPECT_ANY_THROW(parser.parse(json.c_str(), json.size(), config));
Expand All @@ -184,14 +184,14 @@ TEST(JsonParserTest, ParseMultiLevelNestedArray) {
TEST(JsonParserTest, ParseNestedAndFlatten) {
JSONDataParser<SimdJSONParser> parser;
ParseConfig config;
config.enable_flatten_nested = true;
config.deprecated_enable_flatten_nested = true;

std::string json = R"({"a":[{"b":1},{"b":2}]})";
auto result = parser.parse(json.c_str(), json.size(), config);
ASSERT_TRUE(result.has_value());
EXPECT_GT(result->values.size(), 0);

config.enable_flatten_nested = false;
config.deprecated_enable_flatten_nested = false;
std::string json2 = R"({"a":[{"b":1},{"b":2}]})";
result = parser.parse(json2.c_str(), json2.size(), config);
ASSERT_TRUE(result.has_value());
Expand Down Expand Up @@ -249,7 +249,7 @@ TEST(JsonParserTest, TestIsPrefixFunction) {
TEST(JsonParserTest, TestAmbiguousStructureDetection) {
JSONDataParser<SimdJSONParser> parser;
ParseConfig config;
config.enable_flatten_nested = true;
config.deprecated_enable_flatten_nested = true;

// TODO: The following 3 cases no longer throw because checkAmbiguousStructure requires
// has_nested_in_flatten && is_top_array. "b" contains plain arrays (not nested objects),
Expand All @@ -276,7 +276,7 @@ TEST(JsonParserTest, TestAmbiguousStructureDetection) {
TEST(JsonParserTest, TestNestedArrayHandling) {
JSONDataParser<SimdJSONParser> parser;
ParseConfig config;
config.enable_flatten_nested = true;
config.deprecated_enable_flatten_nested = true;

// Test case 1: Simple nested array handling
std::string json1 = R"([{"b": 1}, {"c": 2}])";
Expand All @@ -296,7 +296,7 @@ TEST(JsonParserTest, TestNestedArrayWithDifferentConfigs) {

// Test with flatten_nested = false
ParseConfig config1;
config1.enable_flatten_nested = false;
config1.deprecated_enable_flatten_nested = false;

std::string json1 = R"([{"b": [1, 2]}, {"b": [3, 4]}])";
auto result1 = parser.parse(json1.c_str(), json1.size(), config1);
Expand All @@ -306,7 +306,7 @@ TEST(JsonParserTest, TestNestedArrayWithDifferentConfigs) {

// Test with flatten_nested = true
ParseConfig config2;
config2.enable_flatten_nested = true;
config2.deprecated_enable_flatten_nested = true;

// TODO: "b" contains plain arrays (no nested objects), so has_nested=false,
// has_nested_in_flatten=false, and checkAmbiguousStructure is not called.
Expand Down Expand Up @@ -426,7 +426,7 @@ TEST(JsonParserTest, ParseUInt64) {
EXPECT_EQ(array_field[0].get<doris::PrimitiveType::TYPE_LARGEINT>(), 18446744073709551615ULL);

std::string nested_json = R"({"a": [{"b": 18446744073709551615}]})";
config.enable_flatten_nested = true;
config.deprecated_enable_flatten_nested = true;
result = parser.parse(nested_json.c_str(), nested_json.size(), config);
ASSERT_TRUE(result.has_value());
EXPECT_EQ(result->values.size(), 1);
Expand Down Expand Up @@ -458,7 +458,7 @@ TEST(JsonParserTest, KeyLengthLimitByConfig) {
std::string obj_json = "{\"" + key11 + "\": 1}";
EXPECT_ANY_THROW(parser.parse(obj_json.c_str(), obj_json.size(), config));

config.enable_flatten_nested = false;
config.deprecated_enable_flatten_nested = false;
std::string jsonb_json = "{\"a\": [{\"" + key11 + "\": 1}]}";
EXPECT_ANY_THROW(parser.parse(jsonb_json.c_str(), jsonb_json.size(), config));
}
Expand All @@ -471,7 +471,7 @@ TEST(JsonParserTest, KeyLengthLimitByConfig) {
auto result = parser.parse(obj_json.c_str(), obj_json.size(), config);
ASSERT_TRUE(result.has_value());

config.enable_flatten_nested = false;
config.deprecated_enable_flatten_nested = false;
std::string jsonb_json = "{\"a\": [{\"" + key255 + "\": 1}]}";
result = parser.parse(jsonb_json.c_str(), jsonb_json.size(), config);
ASSERT_TRUE(result.has_value());
Expand Down
2 changes: 1 addition & 1 deletion be/test/exec/common/schema_util_rowset_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ static void fill_varaint_column(auto& variant_column, int size, int uid) {
auto column_string = assert_cast<ColumnString*>(column.get());
fill_string_column_with_test_data(column_string, size, uid);
ParseConfig config;
config.enable_flatten_nested = false;
config.deprecated_enable_flatten_nested = false;
variant_util::parse_json_to_variant(*variant_column, *column_string, config);
}

Expand Down
2 changes: 1 addition & 1 deletion be/test/exec/common/schema_util_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1876,7 +1876,7 @@ TEST_F(SchemaUtilTest, parse_and_materialize_variant_columns_ambiguous_paths) {
// The variant column is at index 0
std::vector<uint32_t> variant_pos = {0};
ParseConfig config;
config.enable_flatten_nested = true;
config.deprecated_enable_flatten_nested = true;

// Should throw due to ambiguous paths
Status st = variant_util::parse_and_materialize_variant_columns(block, variant_pos, {config});
Expand Down
14 changes: 7 additions & 7 deletions be/test/storage/segment/variant_column_writer_reader_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,7 @@ static void fill_variant_column_with_doc_value_only(
VariantUtil::fill_string_column_with_test_data(column_string, num_rows, inserted);

ParseConfig config;
config.enable_flatten_nested = false;
config.deprecated_enable_flatten_nested = false;
config.parse_to = ParseConfig::ParseTo::OnlyDocValueColumn;
variant_util::parse_json_to_variant(*column_object, *column_string, config);
}
Expand Down Expand Up @@ -1340,7 +1340,7 @@ TEST_F(VariantColumnWriterReaderTest, test_write_doc_compact_writer_and_read_doc
}

ParseConfig config;
config.enable_flatten_nested = false;
config.deprecated_enable_flatten_nested = false;
config.parse_to = ParseConfig::ParseTo::OnlyDocValueColumn;

MutableColumnPtr root_variant =
Expand Down Expand Up @@ -1515,7 +1515,7 @@ TEST_F(VariantColumnWriterReaderTest, test_doc_compact_sparse_write_array_gap) {
strings->insert_data(row1.data(), row1.size());

ParseConfig parse_cfg;
parse_cfg.enable_flatten_nested = false;
parse_cfg.deprecated_enable_flatten_nested = false;
parse_cfg.parse_to = ParseConfig::ParseTo::OnlyDocValueColumn;

MutableColumnPtr bucket_variant =
Expand Down Expand Up @@ -1616,7 +1616,7 @@ TEST_F(VariantColumnWriterReaderTest, test_write_doc_sparse_write_array_gap_and_
strings->insert_data(inserted_json[1].data(), inserted_json[1].size());

ParseConfig parse_cfg;
parse_cfg.enable_flatten_nested = false;
parse_cfg.deprecated_enable_flatten_nested = false;
parse_cfg.parse_to = ParseConfig::ParseTo::OnlyDocValueColumn;

MutableColumnPtr variant_column =
Expand Down Expand Up @@ -2680,7 +2680,7 @@ TEST_F(VariantColumnWriterReaderTest, test_no_sub_in_sparse_column) {
}

ParseConfig config;
config.enable_flatten_nested = false;
config.deprecated_enable_flatten_nested = false;
variant_util::parse_json_to_variant(*column_object, *column_string, config);
std::cout << "column_object size: "
<< assert_cast<ColumnVariant*>(column_object.get())->debug_string() << std::endl;
Expand Down Expand Up @@ -2825,7 +2825,7 @@ TEST_F(VariantColumnWriterReaderTest, test_prefix_in_sub_and_sparse) {
}

ParseConfig config;
config.enable_flatten_nested = false;
config.deprecated_enable_flatten_nested = false;
variant_util::parse_json_to_variant(*column_object, *column_string, config);
std::cout << "column_object size: "
<< assert_cast<ColumnVariant*>(column_object.get())->debug_string() << std::endl;
Expand Down Expand Up @@ -3312,7 +3312,7 @@ TEST_F(VariantColumnWriterReaderTest, test_read_with_checksum) {
fill_string_column_with_test_data(column_string, size, inserted_jsonstr,
path_with_size);
ParseConfig config;
config.enable_flatten_nested = false;
config.deprecated_enable_flatten_nested = false;
variant_util::parse_json_to_variant(*column_object, *column_string, config);
};

Expand Down
12 changes: 6 additions & 6 deletions be/test/storage/segment/variant_util_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ TEST(VariantUtilTest, ParseDocValueToSubcolumns_FillsDefaultsAndValues) {
auto json_col = _make_json_column(jsons);

ParseConfig cfg;
cfg.enable_flatten_nested = false;
cfg.deprecated_enable_flatten_nested = false;
cfg.parse_to = ParseConfig::ParseTo::OnlyDocValueColumn;
parse_json_to_variant(*variant, *json_col, cfg);

Expand Down Expand Up @@ -105,7 +105,7 @@ TEST(VariantUtilTest, ParseOnlyDocValueColumn_SerializesMixedTypes) {
auto json_col = _make_json_column(jsons);

ParseConfig cfg;
cfg.enable_flatten_nested = false;
cfg.deprecated_enable_flatten_nested = false;
cfg.parse_to = ParseConfig::ParseTo::OnlyDocValueColumn;
parse_json_to_variant(*variant, *json_col, cfg);

Expand Down Expand Up @@ -222,7 +222,7 @@ TEST(VariantUtilTest, ParseVariantColumns_DocModeBinaryToSubcolumns) {
auto variant = ColumnVariant::create(0);
auto json_col = _make_json_column(jsons);
ParseConfig cfg;
cfg.enable_flatten_nested = false;
cfg.deprecated_enable_flatten_nested = false;
cfg.parse_to = ParseConfig::ParseTo::OnlyDocValueColumn;
parse_json_to_variant(*variant, *json_col, cfg);
ASSERT_TRUE(variant->is_doc_mode());
Expand All @@ -231,7 +231,7 @@ TEST(VariantUtilTest, ParseVariantColumns_DocModeBinaryToSubcolumns) {
block.insert({variant->get_ptr(), std::make_shared<DataTypeVariant>(0), "v"});

ParseConfig parse_cfg;
parse_cfg.enable_flatten_nested = false;
parse_cfg.deprecated_enable_flatten_nested = false;
parse_cfg.parse_to = ParseConfig::ParseTo::OnlyDocValueColumn;
Status st =
parse_and_materialize_variant_columns(block, std::vector<uint32_t> {0}, {parse_cfg});
Expand Down Expand Up @@ -275,7 +275,7 @@ TEST(VariantUtilTest, ParseVariantColumns_DocModeRejectOnlySubcolumnsConfig) {
auto json_col = _make_json_column(jsons);

ParseConfig cfg;
cfg.enable_flatten_nested = false;
cfg.deprecated_enable_flatten_nested = false;
cfg.parse_to = ParseConfig::ParseTo::OnlyDocValueColumn;
parse_json_to_variant(*variant, *json_col, cfg);
ASSERT_TRUE(variant->is_doc_mode());
Expand All @@ -284,7 +284,7 @@ TEST(VariantUtilTest, ParseVariantColumns_DocModeRejectOnlySubcolumnsConfig) {
block.insert({variant->get_ptr(), std::make_shared<DataTypeVariant>(0), "v"});

ParseConfig parse_cfg;
parse_cfg.enable_flatten_nested = false;
parse_cfg.deprecated_enable_flatten_nested = false;
parse_cfg.parse_to = ParseConfig::ParseTo::OnlyDocValueColumn;
Status st =
parse_and_materialize_variant_columns(block, std::vector<uint32_t> {0}, {parse_cfg});
Expand Down
Loading
Loading