Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions arrow-schema/src/field.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,9 @@ use crate::{
extension::{EXTENSION_TYPE_METADATA_KEY, EXTENSION_TYPE_NAME_KEY, ExtensionType},
};

/// The metadata key for the Parquet field id of a [`Field`].
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My one concern is that this now has a parquet specific implementation detail in the arrow crate 🤔

The dependency until now has been the other way around.

Maybe we could add this function as a ParquetFieldExt trait in the Parquet crate so we could keep the parquet specific logic in that crate rather than in arrow?

pub const PARQUET_FIELD_ID_META_KEY: &str = "PARQUET:field_id";

/// A reference counted [`Field`]
pub type FieldRef = Arc<Field>;

Expand Down Expand Up @@ -504,6 +507,16 @@ impl Field {
.map(String::as_ref)
}

/// Returns the parquet field id of this [`Field`], if set.
///
/// This returns the value of [`PARQUET_FIELD_ID_META_KEY`] metadata key, parsed
/// as an `i32`. Returns `None` if the key is not present or the value
/// is not a valid integer.
pub fn id(&self) -> Option<i32> {
let value = self.metadata().get(PARQUET_FIELD_ID_META_KEY)?;
value.parse().ok()
}

/// Returns an instance of the given [`ExtensionType`] of this [`Field`],
/// if set in the [`Field::metadata`].
///
Expand Down
2 changes: 1 addition & 1 deletion parquet/src/arrow/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,7 @@ pub const ARROW_SCHEMA_META_KEY: &str = "ARROW:schema";
///
/// [`Field::metadata`]: arrow_schema::Field::metadata
/// [`BasicTypeInfo::id`]: crate::schema::types::BasicTypeInfo::id
pub const PARQUET_FIELD_ID_META_KEY: &str = "PARQUET:field_id";
pub use arrow_schema::PARQUET_FIELD_ID_META_KEY;

/// A [`ProjectionMask`] identifies a set of columns within a potentially nested schema to project
///
Expand Down
7 changes: 1 addition & 6 deletions parquet/src/arrow/schema/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -543,7 +543,7 @@ fn arrow_to_parquet_type(field: &Field, coerce_types: bool) -> Result<Type> {
} else {
Repetition::REQUIRED
};
let id = field_id(field);
let id = field.id();
// create type from field
match field.data_type() {
DataType::Null => Type::primitive_type_builder(name, PhysicalType::INT32)
Expand Down Expand Up @@ -860,11 +860,6 @@ fn arrow_to_parquet_type(field: &Field, coerce_types: bool) -> Result<Type> {
}
}

fn field_id(field: &Field) -> Option<i32> {
let value = field.metadata().get(super::PARQUET_FIELD_ID_META_KEY)?;
value.parse().ok() // Fail quietly if not a valid integer
}

#[cfg(test)]
mod tests {
use super::*;
Expand Down
Loading