Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 8 additions & 13 deletions parquet-variant-compute/src/shred_variant.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1189,19 +1189,14 @@ mod tests {

let variant_array = shred_variant(&input, &DataType::FixedSizeBinary(16)).unwrap();

// // inspect the typed_value Field and make sure it contains the canonical Uuid extension type
// let typed_value_field = variant_array
// .inner()
// .fields()
// .into_iter()
// .find(|f| f.name() == "typed_value")
// .unwrap();

// assert!(
// typed_value_field
// .try_extension_type::<extension::Uuid>()
// .is_ok()
// );
let typed_value_field = variant_array
.inner()
.fields()
.into_iter()
.find(|f| f.name() == "typed_value")
.unwrap();

assert!(typed_value_field.has_valid_extension_type::<arrow_schema::extension::Uuid>());

// probe the downcasted typed_value array to make sure uuids are shredded correctly
let uuids = variant_array
Expand Down
30 changes: 27 additions & 3 deletions parquet-variant-compute/src/variant_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ use arrow::datatypes::{
TimestampMicrosecondType, TimestampNanosecondType,
};
use arrow::error::Result;
use arrow_schema::extension::ExtensionType;
use arrow_schema::extension::{ExtensionType, Uuid as UuidExtension};
use arrow_schema::{ArrowError, DataType, Field, FieldRef, Fields, TimeUnit};
use chrono::{DateTime, NaiveTime};
use parquet_variant::{
Expand Down Expand Up @@ -327,7 +327,7 @@ impl VariantArray {
builder = builder.with_field("value", value, true);
}
if let Some(typed_value) = typed_value.clone() {
builder = builder.with_field("typed_value", typed_value, true);
builder = builder.with_field_ref(typed_value_field(&typed_value), typed_value);
}
if let Some(nulls) = nulls {
builder = builder.with_nulls(nulls);
Expand Down Expand Up @@ -713,7 +713,7 @@ impl ShreddedVariantFieldArray {
builder = builder.with_field("value", value, true);
}
if let Some(typed_value) = typed_value.clone() {
builder = builder.with_field("typed_value", typed_value, true);
builder = builder.with_field_ref(typed_value_field(&typed_value), typed_value);
}
if let Some(nulls) = nulls {
builder = builder.with_nulls(nulls);
Expand Down Expand Up @@ -868,6 +868,20 @@ impl TryFrom<&StructArray> for ShreddingState {
}
}

/// Build the `typed_value` [`FieldRef`] for a shredded column.
///
/// The Variant spec maps `FixedSizeBinary(16)` exclusively to UUID, so any
/// shredded column of that type must carry the canonical [`UuidExtension`]
/// extension metadata on its field.
fn typed_value_field(array: &ArrayRef) -> FieldRef {
let field = Field::new("typed_value", array.data_type().clone(), true);
let field = match array.data_type() {
DataType::FixedSizeBinary(16) => field.with_extension_type(UuidExtension),
_ => field,
};
Arc::new(field)
}

/// Builds struct arrays from component fields
///
/// TODO: move to arrow crate
Expand All @@ -891,6 +905,16 @@ impl StructArrayBuilder {
self
}

/// Add an array to this struct array using a caller-supplied [`FieldRef`].
///
/// Use this when the field carries metadata (e.g. an extension type) that
/// would be lost if the field were synthesized from the array's data type alone.
pub fn with_field_ref(mut self, field: FieldRef, array: ArrayRef) -> Self {
self.fields.push(field);
self.arrays.push(array);
self
}

/// Set the null buffer for this struct array.
pub fn with_nulls(mut self, nulls: NullBuffer) -> Self {
self.nulls = Some(nulls);
Expand Down
Loading