Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
146 changes: 129 additions & 17 deletions parquet-variant/src/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,31 @@ pub(crate) fn int_size(v: usize) -> OffsetSizeBytes {
}
}

const ONE_TOP_LEVEL_VALUE_MSG: &str =
"VariantBuilder already contains a top-level variant value; only one is allowed";
const EMPTY_BUILDER_MSG: &str =
"VariantBuilder is empty; append a top-level value before calling finish()";

// Error/panic construction is kept out-of-line and cold so the per-call guards on the
// builder hot paths stay small enough to inline as a single predictable branch.
#[cold]
#[inline(never)]
fn top_level_value_panic() -> ! {
panic!("{ONE_TOP_LEVEL_VALUE_MSG}");
}

#[cold]
#[inline(never)]
fn top_level_value_error() -> ArrowError {
ArrowError::InvalidArgumentError(ONE_TOP_LEVEL_VALUE_MSG.into())
}

#[cold]
#[inline(never)]
fn empty_builder_error() -> ArrowError {
ArrowError::InvalidArgumentError(EMPTY_BUILDER_MSG.into())
}

/// Wrapper around a `Vec<u8>` that provides methods for appending
/// primitive values, variant types, and metadata.
///
Expand Down Expand Up @@ -792,30 +817,85 @@ impl VariantBuilder {
self.metadata_builder.upsert_field_name(field_name);
}

/// Returns true if a top-level variant value has already been written to this builder.
///
/// A [`VariantBuilder`] holds exactly one top-level variant value. Any committed top-level
/// value leaves bytes in the value buffer; a child builder dropped without `finish()` has
/// its bytes rolled back by [`ParentState`], so the offset is a faithful indicator.
#[inline]
fn has_top_level_value(&self) -> bool {
self.value_builder.offset() != 0
}

#[inline]
fn ensure_no_top_level_value(&self) {
if self.has_top_level_value() {
top_level_value_panic();
}
}

#[inline]
fn check_no_top_level_value(&self) -> Result<(), ArrowError> {
if self.has_top_level_value() {
return Err(top_level_value_error());
}
Ok(())
}

/// Create an [`ListBuilder`] for creating [`Variant::List`] values.
///
/// See the examples on [`VariantBuilder`] for usage.
///
/// # Panics
///
/// Panics if a top-level variant value has already been written to this builder. For a
/// fallible version, use [`VariantBuilder::try_new_list`].
pub fn new_list(&mut self) -> ListBuilder<'_, ()> {
self.try_new_list().unwrap()
}

/// Create an [`ListBuilder`] for creating [`Variant::List`] values.
///
/// Returns an error if a top-level variant value has already been written to this builder.
pub fn try_new_list(&mut self) -> Result<ListBuilder<'_, ()>, ArrowError> {
self.check_no_top_level_value()?;
let parent_state =
ParentState::variant(&mut self.value_builder, &mut self.metadata_builder);
ListBuilder::new(parent_state, self.validate_unique_fields)
Ok(ListBuilder::new(parent_state, self.validate_unique_fields))
}

/// Create an [`ObjectBuilder`] for creating [`Variant::Object`] values.
///
/// See the examples on [`VariantBuilder`] for usage.
///
/// # Panics
///
/// Panics if a top-level variant value has already been written to this builder. For a
/// fallible version, use [`VariantBuilder::try_new_object`].
pub fn new_object(&mut self) -> ObjectBuilder<'_, ()> {
self.try_new_object().unwrap()
}

/// Create an [`ObjectBuilder`] for creating [`Variant::Object`] values.
///
/// Returns an error if a top-level variant value has already been written to this builder.
pub fn try_new_object(&mut self) -> Result<ObjectBuilder<'_, ()>, ArrowError> {
self.check_no_top_level_value()?;
let parent_state =
ParentState::variant(&mut self.value_builder, &mut self.metadata_builder);
ObjectBuilder::new(parent_state, self.validate_unique_fields)
Ok(ObjectBuilder::new(
parent_state,
self.validate_unique_fields,
))
}

/// Append a value to the builder.
///
/// # Panics
///
/// This method will panic if the variant contains duplicate field names in objects
/// when validation is enabled. For a fallible version, use [`VariantBuilder::try_append_value`]
/// Panics if a top-level variant value has already been written to this builder, or if the
/// variant contains duplicate field names in objects when validation is enabled. For a
/// fallible version, use [`VariantBuilder::try_append_value`].
///
/// # Example
/// ```
Expand All @@ -825,15 +905,19 @@ impl VariantBuilder {
/// builder.append_value(42i8);
/// ```
pub fn append_value<'m, 'd, T: Into<Variant<'m, 'd>>>(&mut self, value: T) {
self.ensure_no_top_level_value();
let state = ParentState::variant(&mut self.value_builder, &mut self.metadata_builder);
ValueBuilder::append_variant(state, value.into())
}

/// Append a value to the builder.
///
/// Returns an error if a top-level variant value has already been written to this builder.
pub fn try_append_value<'m, 'd, T: Into<Variant<'m, 'd>>>(
&mut self,
value: T,
) -> Result<(), ArrowError> {
self.check_no_top_level_value()?;
let state = ParentState::variant(&mut self.value_builder, &mut self.metadata_builder);
ValueBuilder::try_append_variant(state, value.into())
}
Expand All @@ -846,18 +930,51 @@ impl VariantBuilder {
///
/// The caller must ensure that the metadata dictionary entries are already built and correct for
/// any objects or lists being appended.
///
/// # Panics
///
/// Panics if a top-level variant value has already been written to this builder. For a
/// fallible version, use [`VariantBuilder::try_append_value_bytes`].
pub fn append_value_bytes<'m, 'd>(&mut self, value: impl Into<Variant<'m, 'd>>) {
self.try_append_value_bytes(value).unwrap()
}

/// Tries to append a variant value to the builder by copying raw bytes when possible.
///
/// This is the fallible version of [`VariantBuilder::append_value_bytes`]. Returns an error
/// if a top-level variant value has already been written to this builder.
pub fn try_append_value_bytes<'m, 'd>(
&mut self,
value: impl Into<Variant<'m, 'd>>,
) -> Result<(), ArrowError> {
self.check_no_top_level_value()?;
let state = ParentState::variant(&mut self.value_builder, &mut self.metadata_builder);
ValueBuilder::append_variant_bytes(state, value.into());
Ok(())
}

/// Finish the builder and return the metadata and value buffers.
///
/// # Panics
///
/// Panics if no top-level variant value has been appended. For a fallible version, use
/// [`VariantBuilder::try_finish`].
pub fn finish(self) -> (Vec<u8>, Vec<u8>) {
self.try_finish().expect(EMPTY_BUILDER_MSG)
}

/// Finish the builder and return the metadata and value buffers.
pub fn finish(mut self) -> (Vec<u8>, Vec<u8>) {
///
/// Returns an error if no top-level variant value has been appended.
pub fn try_finish(mut self) -> Result<(Vec<u8>, Vec<u8>), ArrowError> {
if !self.has_top_level_value() {
return Err(empty_builder_error());
}
self.metadata_builder.finish();
(
Ok((
self.metadata_builder.into_inner(),
self.value_builder.into_inner(),
)
))
}
}

Expand Down Expand Up @@ -915,11 +1032,11 @@ impl VariantBuilderExt for VariantBuilder {
}

fn try_new_list(&mut self) -> Result<ListBuilder<'_, Self::State<'_>>, ArrowError> {
Ok(self.new_list())
self.try_new_list()
}

fn try_new_object(&mut self) -> Result<ObjectBuilder<'_, Self::State<'_>>, ArrowError> {
Ok(self.new_object())
self.try_new_object()
}
}

Expand Down Expand Up @@ -1044,14 +1161,9 @@ mod tests {
variant2.add_field_name("a");
assert!(!variant2.metadata_builder.is_sorted);

// per the spec, make sure the variant will fail to build if only metadata is provided
let (m, v) = variant2.finish();
let res = Variant::try_new(&m, &v);
assert!(res.is_err());

// since it is not sorted, make sure the metadata says so
let header = VariantMetadata::try_new(&m).unwrap();
assert!(!header.is_sorted());
// per the spec, a variant must have a top-level value; try_finish() rejects empty
let err = variant2.try_finish().unwrap_err();
assert!(err.to_string().contains("empty"), "unexpected error: {err}");
}

// write out variant1 and make sure the sorted flag is properly encoded
Expand Down
20 changes: 11 additions & 9 deletions parquet-variant/src/builder/metadata.rs
Original file line number Diff line number Diff line change
Expand Up @@ -268,7 +268,7 @@ impl<S: AsRef<str>> Extend<S> for WritableMetadataBuilder {
#[cfg(test)]
mod test {
use crate::{
ParentState, ValueBuilder, Variant, VariantBuilder, VariantMetadata,
ParentState, ValueBuilder, Variant, VariantMetadata,
builder::{
metadata::{ReadOnlyMetadataBuilder, WritableMetadataBuilder},
object::ObjectBuilder,
Expand Down Expand Up @@ -359,11 +359,12 @@ mod test {
#[test]
fn test_read_only_metadata_builder() {
// First create some metadata with a few field names
let mut default_builder = VariantBuilder::new();
default_builder.add_field_name("name");
default_builder.add_field_name("age");
default_builder.add_field_name("active");
let (metadata_bytes, _) = default_builder.finish();
let mut default_builder = WritableMetadataBuilder::default();
default_builder.upsert_field_name("name");
default_builder.upsert_field_name("age");
default_builder.upsert_field_name("active");
default_builder.finish();
let metadata_bytes = default_builder.into_inner();

// Use the metadata to build new variant values
let metadata = VariantMetadata::try_new(&metadata_bytes).unwrap();
Expand Down Expand Up @@ -394,9 +395,10 @@ mod test {
#[test]
fn test_read_only_metadata_builder_fails_on_unknown_field() {
// Create metadata with only one field
let mut default_builder = VariantBuilder::new();
default_builder.add_field_name("known_field");
let (metadata_bytes, _) = default_builder.finish();
let mut default_builder = WritableMetadataBuilder::default();
default_builder.upsert_field_name("known_field");
default_builder.finish();
let metadata_bytes = default_builder.into_inner();

// Use the metadata to build new variant values
let metadata = VariantMetadata::try_new(&metadata_bytes).unwrap();
Expand Down
15 changes: 9 additions & 6 deletions parquet-variant/src/builder/object.rs
Original file line number Diff line number Diff line change
Expand Up @@ -431,6 +431,7 @@ impl<S: BuilderSpecificState> VariantBuilderExt for ObjectFieldBuilder<'_, '_, '
mod tests {
use crate::{
ParentState, ValueBuilder, Variant, VariantBuilder, VariantMetadata,
WritableMetadataBuilder,
builder::{metadata::ReadOnlyMetadataBuilder, object::ObjectBuilder},
decoder::VariantBasicType,
};
Expand Down Expand Up @@ -501,11 +502,12 @@ mod tests {
#[test]
fn test_read_only_metadata_builder() {
// First create some metadata with a few field names
let mut default_builder = VariantBuilder::new();
default_builder.add_field_name("name");
default_builder.add_field_name("age");
default_builder.add_field_name("active");
let (metadata_bytes, _) = default_builder.finish();
let mut default_builder = WritableMetadataBuilder::default();
default_builder.upsert_field_name("name");
default_builder.upsert_field_name("age");
default_builder.upsert_field_name("active");
default_builder.finish();
let metadata_bytes = default_builder.into_inner();

// Use the metadata to build new variant values
let metadata = VariantMetadata::try_new(&metadata_bytes).unwrap();
Expand Down Expand Up @@ -899,7 +901,8 @@ mod tests {
inner_list.finish();
outer_list.finish();

// Valid object should succeed
// Valid object should succeed (fresh builder — one top-level value per VariantBuilder)
let mut builder = VariantBuilder::new().with_validate_unique_fields(true);
let mut list = builder.new_list();
let mut valid_obj = list.new_object();
valid_obj.insert("m", 1);
Expand Down
11 changes: 6 additions & 5 deletions parquet-variant/tests/variant_interop.rs
Original file line number Diff line number Diff line change
Expand Up @@ -466,11 +466,12 @@ fn generate_random_value(rng: &mut StdRng, builder: &mut VariantBuilder, max_dep
)
.unwrap();

// timestamp w/o timezone
builder.append_value(data_time.naive_local());

// timestamp with timezone
builder.append_value(data_time.naive_utc().and_utc());
// randomly pick timestamp with or without timezone
if rng.random_bool(0.5) {
builder.append_value(data_time.naive_local());
} else {
builder.append_value(data_time.naive_utc().and_utc());
}
}
17 => {
builder.append_value(Uuid::new_v4());
Expand Down
Loading