Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions be/src/common/config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1111,9 +1111,13 @@ DEFINE_mInt64(workload_group_scan_task_wait_timeout_ms, "10000");
// Whether use schema dict in backend side instead of MetaService side(cloud mode)
DEFINE_mBool(variant_use_cloud_schema_dict_cache, "true");
DEFINE_mInt64(variant_threshold_rows_to_estimate_sparse_column, "2048");
DEFINE_mInt32(variant_max_json_key_length, "255");
DEFINE_mBool(variant_throw_exeception_on_invalid_json, "false");
DEFINE_mBool(enable_vertical_compact_variant_subcolumns, "true");

DEFINE_Validator(variant_max_json_key_length,
[](const int config) -> bool { return config > 0 && config <= 65535; });

// block file cache
DEFINE_Bool(enable_file_cache, "false");
// format: [{"path":"/path/to/file_cache","total_size":21474836480,"query_limit":10737418240}]
Expand Down
2 changes: 2 additions & 0 deletions be/src/common/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -1354,6 +1354,8 @@ DECLARE_mBool(variant_use_cloud_schema_dict_cache);
// Threshold to estimate a column is sparsed
// Notice: TEST ONLY
DECLARE_mInt64(variant_threshold_rows_to_estimate_sparse_column);
// Max json key length in bytes when parsing json into variant subcolumns/jsonb.
DECLARE_mInt32(variant_max_json_key_length);
// Treat invalid json format str as string, instead of throwing exception if false
DECLARE_mBool(variant_throw_exeception_on_invalid_json);
// Enable vertical compact subcolumns of variant column
Expand Down
18 changes: 12 additions & 6 deletions be/src/vec/json/json_parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -93,9 +93,12 @@ void JSONDataParser<ParserImpl>::traverseObject(const JSONObject& object, ParseC
ctx.values.reserve(ctx.values.size() + object.size());
for (auto it = object.begin(); it != object.end(); ++it) {
const auto& [key, value] = *it;
if (key.size() >= std::numeric_limits<uint8_t>::max()) {
throw doris::Exception(doris::ErrorCode::INVALID_ARGUMENT,
"Key length exceeds maximum allowed size of 255 bytes.");
const size_t max_key_length = cast_set<size_t>(config::variant_max_json_key_length);
if (key.size() > max_key_length) {
throw doris::Exception(
doris::ErrorCode::INVALID_ARGUMENT,
fmt::format("Key length exceeds maximum allowed size of {} bytes.",
max_key_length));
}
ctx.builder.append(key, false);
traverse(value, ctx);
Expand Down Expand Up @@ -133,9 +136,12 @@ void JSONDataParser<ParserImpl>::traverseObjectAsJsonb(const JSONObject& object,
writer.writeStartObject();
for (auto it = object.begin(); it != object.end(); ++it) {
const auto& [key, value] = *it;
if (key.size() >= std::numeric_limits<uint8_t>::max()) {
throw doris::Exception(doris::ErrorCode::INVALID_ARGUMENT,
"Key length exceeds maximum allowed size of 255 bytes.");
const size_t max_key_length = cast_set<size_t>(config::variant_max_json_key_length);
if (key.size() > max_key_length) {
throw doris::Exception(
doris::ErrorCode::INVALID_ARGUMENT,
fmt::format("Key length exceeds maximum allowed size of {} bytes.",
max_key_length));
}
writer.writeKey(key.data(), cast_set<uint8_t>(key.size()));
traverseAsJsonb(value, writer);
Expand Down
45 changes: 44 additions & 1 deletion be/test/vec/jsonb/json_parser_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@

#include <vector>

#include "common/config.h"
#include "vec/common/string_ref.h"

using doris::vectorized::JSONDataParser;
Expand Down Expand Up @@ -430,4 +431,46 @@ TEST(JsonParserTest, ParseUInt64) {
EXPECT_EQ(array_field_2.size(), 1);
EXPECT_EQ(array_field_2[0].get_type(), doris::PrimitiveType::TYPE_LARGEINT);
EXPECT_EQ(array_field_2[0].get<doris::PrimitiveType::TYPE_LARGEINT>(), 18446744073709551615ULL);
}
}

TEST(JsonParserTest, KeyLengthLimitByConfig) {
struct ScopedMaxJsonKeyLength {
int32_t old_value;
explicit ScopedMaxJsonKeyLength(int32_t new_value)
: old_value(doris::config::variant_max_json_key_length) {
doris::config::variant_max_json_key_length = new_value;
}
~ScopedMaxJsonKeyLength() { doris::config::variant_max_json_key_length = old_value; }
};

JSONDataParser<SimdJSONParser> parser;
ParseConfig config;

{
ScopedMaxJsonKeyLength guard(10);
std::string key11(11, 'a');

std::string obj_json = "{\"" + key11 + "\": 1}";
EXPECT_ANY_THROW(parser.parse(obj_json.c_str(), obj_json.size(), config));

config.enable_flatten_nested = false;
std::string jsonb_json = "{\"a\": [{\"" + key11 + "\": 1}]}";
EXPECT_ANY_THROW(parser.parse(jsonb_json.c_str(), jsonb_json.size(), config));
}

{
ScopedMaxJsonKeyLength guard(255);
std::string key255(255, 'b');

std::string obj_json = "{\"" + key255 + "\": 1}";
auto result = parser.parse(obj_json.c_str(), obj_json.size(), config);
ASSERT_TRUE(result.has_value());

config.enable_flatten_nested = false;
std::string jsonb_json = "{\"a\": [{\"" + key255 + "\": 1}]}";
result = parser.parse(jsonb_json.c_str(), jsonb_json.size(), config);
ASSERT_TRUE(result.has_value());
ASSERT_EQ(result->values.size(), 1);
EXPECT_EQ(result->values[0].get_type(), doris::PrimitiveType::TYPE_JSONB);
}
}
Loading