Skip to content

Commit 8b2cc7f

Browse files
authored
feat: bind literals with right type after serde (#562)
1 parent 8fdf346 commit 8b2cc7f

11 files changed

Lines changed: 799 additions & 10 deletions

src/iceberg/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,7 @@ set(ICEBERG_SOURCES
108108
util/murmurhash3_internal.cc
109109
util/property_util.cc
110110
util/snapshot_util.cc
111+
util/string_util.cc
111112
util/temporal_util.cc
112113
util/timepoint.cc
113114
util/transform_util.cc

src/iceberg/expression/json_serde.cc

Lines changed: 145 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
* under the License.
1818
*/
1919

20+
#include <limits>
2021
#include <string>
2122
#include <vector>
2223

@@ -298,10 +299,150 @@ Result<nlohmann::json> ToJson(const Literal& literal) {
298299
}
299300
}
300301

301-
Result<Literal> LiteralFromJson(const nlohmann::json& json, const Type* /*type*/) {
302-
// TODO(gangwu): implement type-aware literal parsing equivalent to Java's
303-
// SingleValueParser.fromJson(type, node).
304-
return LiteralFromJson(json);
302+
Result<Literal> LiteralFromJson(const nlohmann::json& json, const Type* type) {
303+
// If {"type": "literal", "value": <actual>} wrapper is present, unwrap it first.
304+
if (json.is_object() && json.contains(kType) &&
305+
json[kType].get<std::string>() == kLiteral && json.contains(kValue)) {
306+
return LiteralFromJson(json[kValue], type);
307+
}
308+
// If no type context is provided, fall back to untyped parsing.
309+
if (type == nullptr) return LiteralFromJson(json);
310+
311+
// Type-aware parsing equivalent to Java's SingleValueParser.fromJson(type, node).
312+
switch (type->type_id()) {
313+
case TypeId::kBoolean:
314+
if (!json.is_boolean()) [[unlikely]] {
315+
return JsonParseError("Cannot parse {} as a boolean value", SafeDumpJson(json));
316+
}
317+
return Literal::Boolean(json.get<bool>());
318+
319+
case TypeId::kInt: {
320+
if (!json.is_number_integer()) [[unlikely]] {
321+
return JsonParseError("Cannot parse {} as an int value", SafeDumpJson(json));
322+
}
323+
auto val = json.get<int64_t>();
324+
if (val < std::numeric_limits<int32_t>::min() ||
325+
val > std::numeric_limits<int32_t>::max()) [[unlikely]] {
326+
return JsonParseError("Cannot parse {} as an int value: out of range",
327+
SafeDumpJson(json));
328+
}
329+
return Literal::Int(static_cast<int32_t>(val));
330+
}
331+
332+
case TypeId::kLong:
333+
if (!json.is_number_integer()) [[unlikely]] {
334+
return JsonParseError("Cannot parse {} as a long value", SafeDumpJson(json));
335+
}
336+
return Literal::Long(json.get<int64_t>());
337+
338+
case TypeId::kFloat:
339+
if (!json.is_number_float()) [[unlikely]] {
340+
return JsonParseError("Cannot parse {} as a float value", SafeDumpJson(json));
341+
}
342+
return Literal::Float(json.get<float>());
343+
344+
case TypeId::kDouble:
345+
if (!json.is_number_float()) [[unlikely]] {
346+
return JsonParseError("Cannot parse {} as a double value", SafeDumpJson(json));
347+
}
348+
return Literal::Double(json.get<double>());
349+
350+
case TypeId::kString:
351+
if (!json.is_string()) [[unlikely]] {
352+
return JsonParseError("Cannot parse {} as a string value", SafeDumpJson(json));
353+
}
354+
return Literal::String(json.get<std::string>());
355+
356+
case TypeId::kDate: {
357+
if (!json.is_string()) [[unlikely]] {
358+
return JsonParseError("Cannot parse {} as a date value", SafeDumpJson(json));
359+
}
360+
ICEBERG_ASSIGN_OR_RAISE(auto days,
361+
TransformUtil::ParseDay(json.get<std::string>()));
362+
return Literal::Date(days);
363+
}
364+
365+
case TypeId::kTime: {
366+
if (!json.is_string()) [[unlikely]] {
367+
return JsonParseError("Cannot parse {} as a time value", SafeDumpJson(json));
368+
}
369+
ICEBERG_ASSIGN_OR_RAISE(auto micros,
370+
TransformUtil::ParseTime(json.get<std::string>()));
371+
return Literal::Time(micros);
372+
}
373+
374+
case TypeId::kTimestamp: {
375+
if (!json.is_string()) [[unlikely]] {
376+
return JsonParseError("Cannot parse {} as a timestamp value", SafeDumpJson(json));
377+
}
378+
ICEBERG_ASSIGN_OR_RAISE(auto micros,
379+
TransformUtil::ParseTimestamp(json.get<std::string>()));
380+
return Literal::Timestamp(micros);
381+
}
382+
383+
case TypeId::kTimestampTz: {
384+
if (!json.is_string()) [[unlikely]] {
385+
return JsonParseError("Cannot parse {} as a timestamptz value",
386+
SafeDumpJson(json));
387+
}
388+
ICEBERG_ASSIGN_OR_RAISE(
389+
auto micros, TransformUtil::ParseTimestampWithZone(json.get<std::string>()));
390+
return Literal::TimestampTz(micros);
391+
}
392+
393+
case TypeId::kUuid: {
394+
if (!json.is_string()) [[unlikely]] {
395+
return JsonParseError("Cannot parse {} as a uuid value", SafeDumpJson(json));
396+
}
397+
ICEBERG_ASSIGN_OR_RAISE(auto uuid, Uuid::FromString(json.get<std::string>()));
398+
return Literal::UUID(uuid);
399+
}
400+
401+
case TypeId::kBinary: {
402+
if (!json.is_string()) [[unlikely]] {
403+
return JsonParseError("Cannot parse {} as a binary value", SafeDumpJson(json));
404+
}
405+
ICEBERG_ASSIGN_OR_RAISE(auto bytes,
406+
StringUtils::HexStringToBytes(json.get<std::string>()));
407+
return Literal::Binary(std::move(bytes));
408+
}
409+
410+
case TypeId::kFixed: {
411+
if (!json.is_string()) [[unlikely]] {
412+
return JsonParseError("Cannot parse {} as a fixed value", SafeDumpJson(json));
413+
}
414+
const auto& fixed_type = internal::checked_cast<const FixedType&>(*type);
415+
const std::string& hex = json.get<std::string>();
416+
if (hex.size() != static_cast<size_t>(fixed_type.length()) * 2) [[unlikely]] {
417+
return JsonParseError("Cannot parse fixed[{}]: expected {} hex chars, got {}",
418+
fixed_type.length(), fixed_type.length() * 2, hex.size());
419+
}
420+
ICEBERG_ASSIGN_OR_RAISE(auto bytes, StringUtils::HexStringToBytes(hex));
421+
return Literal::Fixed(std::move(bytes));
422+
}
423+
424+
case TypeId::kDecimal: {
425+
if (!json.is_string()) [[unlikely]] {
426+
return JsonParseError("Cannot parse {} as a decimal value", SafeDumpJson(json));
427+
}
428+
const auto& dec_type = internal::checked_cast<const DecimalType&>(*type);
429+
int32_t parsed_precision = 0;
430+
int32_t parsed_scale = 0;
431+
ICEBERG_ASSIGN_OR_RAISE(
432+
auto dec,
433+
Decimal::FromString(json.get<std::string>(), &parsed_precision, &parsed_scale));
434+
if (parsed_precision > dec_type.precision() || parsed_scale != dec_type.scale())
435+
[[unlikely]] {
436+
return JsonParseError("Cannot parse {} as a {} value", SafeDumpJson(json),
437+
type->ToString());
438+
}
439+
return Literal::Decimal(dec.value(), dec_type.precision(), dec_type.scale());
440+
}
441+
442+
default:
443+
return NotSupported("Unsupported type for literal JSON parsing: {}",
444+
type->ToString());
445+
}
305446
}
306447

307448
Result<Literal> LiteralFromJson(const nlohmann::json& json) {

src/iceberg/expression/literal.cc

Lines changed: 48 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -23,11 +23,16 @@
2323
#include <concepts>
2424
#include <cstdint>
2525
#include <string>
26+
#include <vector>
2627

28+
#include "iceberg/type.h"
2729
#include "iceberg/util/checked_cast.h"
2830
#include "iceberg/util/conversions.h"
31+
#include "iceberg/util/decimal.h"
2932
#include "iceberg/util/macros.h"
33+
#include "iceberg/util/string_util.h"
3034
#include "iceberg/util/temporal_util.h"
35+
#include "iceberg/util/transform_util.h"
3136

3237
namespace iceberg {
3338

@@ -193,12 +198,49 @@ Result<Literal> LiteralCaster::CastFromString(
193198
ICEBERG_ASSIGN_OR_RAISE(auto uuid, Uuid::FromString(str_val));
194199
return Literal::UUID(uuid);
195200
}
196-
case TypeId::kDate:
197-
case TypeId::kTime:
198-
case TypeId::kTimestamp:
199-
case TypeId::kTimestampTz:
200-
return NotImplemented("Cast from String to {} is not implemented yet",
201-
target_type->ToString());
201+
case TypeId::kDate: {
202+
ICEBERG_ASSIGN_OR_RAISE(auto days, TransformUtil::ParseDay(str_val));
203+
return Literal::Date(days);
204+
}
205+
case TypeId::kTime: {
206+
ICEBERG_ASSIGN_OR_RAISE(auto micros, TransformUtil::ParseTime(str_val));
207+
return Literal::Time(micros);
208+
}
209+
case TypeId::kTimestamp: {
210+
ICEBERG_ASSIGN_OR_RAISE(auto micros, TransformUtil::ParseTimestamp(str_val));
211+
return Literal::Timestamp(micros);
212+
}
213+
case TypeId::kTimestampTz: {
214+
ICEBERG_ASSIGN_OR_RAISE(auto micros,
215+
TransformUtil::ParseTimestampWithZone(str_val));
216+
return Literal::TimestampTz(micros);
217+
}
218+
case TypeId::kBinary: {
219+
ICEBERG_ASSIGN_OR_RAISE(auto bytes, StringUtils::HexStringToBytes(str_val));
220+
return Literal::Binary(std::move(bytes));
221+
}
222+
case TypeId::kFixed: {
223+
const auto& fixed_type = internal::checked_cast<const FixedType&>(*target_type);
224+
if (str_val.size() != static_cast<size_t>(fixed_type.length()) * 2) {
225+
return InvalidArgument("Cannot cast string to {}: expected {} hex chars, got {}",
226+
target_type->ToString(), fixed_type.length() * 2,
227+
str_val.size());
228+
}
229+
ICEBERG_ASSIGN_OR_RAISE(auto bytes, StringUtils::HexStringToBytes(str_val));
230+
return Literal::Fixed(std::move(bytes));
231+
}
232+
case TypeId::kDecimal: {
233+
const auto& dec_type = internal::checked_cast<const DecimalType&>(*target_type);
234+
int32_t parsed_precision = 0;
235+
int32_t parsed_scale = 0;
236+
ICEBERG_ASSIGN_OR_RAISE(
237+
auto dec, Decimal::FromString(str_val, &parsed_precision, &parsed_scale));
238+
if (parsed_precision > dec_type.precision() || parsed_scale != dec_type.scale()) {
239+
return InvalidArgument("Cannot cast {} as a {} value", str_val,
240+
target_type->ToString());
241+
}
242+
return Literal::Decimal(dec.value(), dec_type.precision(), dec_type.scale());
243+
}
202244
default:
203245
return NotSupported("Cast from String to {} is not supported",
204246
target_type->ToString());

src/iceberg/meson.build

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,7 @@ iceberg_sources = files(
126126
'util/murmurhash3_internal.cc',
127127
'util/property_util.cc',
128128
'util/snapshot_util.cc',
129+
'util/string_util.cc',
129130
'util/temporal_util.cc',
130131
'util/timepoint.cc',
131132
'util/transform_util.cc',

src/iceberg/test/expression_json_test.cc

Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
*/
1919

2020
#include <memory>
21+
#include <optional>
2122
#include <string>
2223
#include <vector>
2324

@@ -31,6 +32,7 @@
3132
#include "iceberg/expression/literal.h"
3233
#include "iceberg/expression/predicate.h"
3334
#include "iceberg/schema.h"
35+
#include "iceberg/schema_field.h"
3436
#include "iceberg/test/matchers.h"
3537
#include "iceberg/type.h"
3638

@@ -405,4 +407,129 @@ INSTANTIATE_TEST_SUITE_P(
405407
return info.param.name;
406408
});
407409

410+
// --- LiteralFromJson(json, type) type-aware tests ---
411+
412+
struct LiteralFromJsonTypedParam {
413+
std::string name;
414+
nlohmann::json json;
415+
std::shared_ptr<Type> type;
416+
TypeId expected_type_id;
417+
std::optional<std::string> expected_str;
418+
};
419+
420+
class LiteralFromJsonTypedTest
421+
: public ::testing::TestWithParam<LiteralFromJsonTypedParam> {};
422+
423+
TEST_P(LiteralFromJsonTypedTest, Parses) {
424+
const auto& p = GetParam();
425+
ICEBERG_UNWRAP_OR_FAIL(auto lit, LiteralFromJson(p.json, p.type.get()));
426+
EXPECT_EQ(lit.type()->type_id(), p.expected_type_id);
427+
if (p.expected_str) {
428+
EXPECT_EQ(lit.ToString(), *p.expected_str);
429+
}
430+
}
431+
432+
INSTANTIATE_TEST_SUITE_P(
433+
LiteralFromJsonTyped, LiteralFromJsonTypedTest,
434+
::testing::Values(
435+
LiteralFromJsonTypedParam{"Boolean", nlohmann::json(true), boolean(),
436+
TypeId::kBoolean, "true"},
437+
LiteralFromJsonTypedParam{"Int", nlohmann::json(123), int32(), TypeId::kInt,
438+
"123"},
439+
LiteralFromJsonTypedParam{"Long", nlohmann::json(9876543210LL), int64(),
440+
TypeId::kLong, "9876543210"},
441+
LiteralFromJsonTypedParam{"Float", nlohmann::json(1.5), float32(), TypeId::kFloat,
442+
std::nullopt},
443+
LiteralFromJsonTypedParam{"Double", nlohmann::json(3.14), float64(),
444+
TypeId::kDouble, std::nullopt},
445+
LiteralFromJsonTypedParam{"String", nlohmann::json("hello"), string(),
446+
TypeId::kString, std::nullopt},
447+
LiteralFromJsonTypedParam{"DateString", nlohmann::json("2024-01-15"), date(),
448+
TypeId::kDate, std::nullopt},
449+
LiteralFromJsonTypedParam{"Uuid",
450+
nlohmann::json("f79c3e09-677c-4bbd-a479-3f349cb785e7"),
451+
uuid(), TypeId::kUuid, std::nullopt},
452+
LiteralFromJsonTypedParam{"Binary", nlohmann::json("deadbeef"), binary(),
453+
TypeId::kBinary, std::nullopt},
454+
LiteralFromJsonTypedParam{"Fixed", nlohmann::json("cafebabe"), fixed(4),
455+
TypeId::kFixed, std::nullopt},
456+
LiteralFromJsonTypedParam{"DecimalMatchingScale", nlohmann::json("123.4500"),
457+
decimal(9, 4), TypeId::kDecimal, "123.4500"},
458+
LiteralFromJsonTypedParam{"DecimalScaleZero", nlohmann::json("2"), decimal(9, 0),
459+
TypeId::kDecimal, "2"}),
460+
[](const ::testing::TestParamInfo<LiteralFromJsonTypedParam>& info) {
461+
return info.param.name;
462+
});
463+
464+
struct InvalidLiteralFromJsonTypedParam {
465+
std::string name;
466+
nlohmann::json json;
467+
std::shared_ptr<Type> type;
468+
};
469+
470+
class InvalidLiteralFromJsonTypedTest
471+
: public ::testing::TestWithParam<InvalidLiteralFromJsonTypedParam> {};
472+
473+
TEST_P(InvalidLiteralFromJsonTypedTest, ReturnsError) {
474+
const auto& p = GetParam();
475+
EXPECT_FALSE(LiteralFromJson(p.json, p.type.get()).has_value());
476+
}
477+
478+
INSTANTIATE_TEST_SUITE_P(
479+
LiteralFromJsonTyped, InvalidLiteralFromJsonTypedTest,
480+
::testing::Values(
481+
InvalidLiteralFromJsonTypedParam{"BooleanTypeMismatch", nlohmann::json(42),
482+
boolean()},
483+
InvalidLiteralFromJsonTypedParam{"DateTypeMismatch", nlohmann::json(true),
484+
date()},
485+
InvalidLiteralFromJsonTypedParam{"UuidTypeMismatch", nlohmann::json(42), uuid()},
486+
InvalidLiteralFromJsonTypedParam{"BinaryInvalidHex", nlohmann::json("xyz"),
487+
binary()},
488+
InvalidLiteralFromJsonTypedParam{"FixedLengthMismatch", nlohmann::json("cafe12"),
489+
fixed(4)},
490+
InvalidLiteralFromJsonTypedParam{"DecimalScaleMismatch", nlohmann::json("123.45"),
491+
decimal(9, 4)},
492+
InvalidLiteralFromJsonTypedParam{"DecimalNotString", nlohmann::json(123.45),
493+
decimal(9, 2)}),
494+
[](const ::testing::TestParamInfo<InvalidLiteralFromJsonTypedParam>& info) {
495+
return info.param.name;
496+
});
497+
498+
struct SchemaAwarePredicateParam {
499+
std::string name;
500+
std::string field_name;
501+
std::shared_ptr<Type> field_type;
502+
nlohmann::json value;
503+
};
504+
505+
class SchemaAwarePredicateRoundTripTest
506+
: public ::testing::TestWithParam<SchemaAwarePredicateParam> {};
507+
508+
TEST_P(SchemaAwarePredicateRoundTripTest, RoundTrip) {
509+
const auto& p = GetParam();
510+
auto schema = std::make_shared<Schema>(
511+
std::vector<SchemaField>{SchemaField::MakeOptional(1, p.field_name, p.field_type)});
512+
nlohmann::json pred_json = {{"type", "eq"}, {"term", p.field_name}, {"value", p.value}};
513+
ICEBERG_UNWRAP_OR_FAIL(auto expr, ExpressionFromJson(pred_json, schema.get()));
514+
ASSERT_NE(expr, nullptr);
515+
}
516+
517+
INSTANTIATE_TEST_SUITE_P(
518+
LiteralFromJsonTyped, SchemaAwarePredicateRoundTripTest,
519+
::testing::Values(
520+
SchemaAwarePredicateParam{"Date", "event_date", date(), "2024-01-15"},
521+
SchemaAwarePredicateParam{"Time", "event_time", time(), "14:30:00"},
522+
SchemaAwarePredicateParam{"Timestamp", "created_at", timestamp(),
523+
"2026-01-01T00:00:01.500"},
524+
SchemaAwarePredicateParam{"TimestampTz", "updated_at", timestamp_tz(),
525+
"2026-01-01T00:00:01.500+00:00"},
526+
SchemaAwarePredicateParam{"Uuid", "trace_id", uuid(),
527+
"f79c3e09-677c-4bbd-a479-3f349cb785e7"},
528+
SchemaAwarePredicateParam{"Binary", "payload", binary(), "deadbeef"},
529+
SchemaAwarePredicateParam{"Fixed", "hash", fixed(4), "cafebabe"},
530+
SchemaAwarePredicateParam{"Decimal", "amount", decimal(9, 2), "123.45"}),
531+
[](const ::testing::TestParamInfo<SchemaAwarePredicateParam>& info) {
532+
return info.param.name;
533+
});
534+
408535
} // namespace iceberg

0 commit comments

Comments
 (0)