Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions vortex-array/src/arrays/constant/vtable/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -82,15 +82,15 @@ impl VTable for ConstantVTable {
bytes: &[u8],
dtype: &DType,
_len: usize,
_session: &VortexSession,
session: &VortexSession,
) -> VortexResult<Self::Metadata> {
// Empty bytes indicates an old writer that didn't produce metadata.
if bytes.is_empty() {
return Ok(None);
}

// Otherwise, deserialize the constant scalar from the metadata.
let scalar_value = ScalarValue::from_proto_bytes(bytes, dtype)?;
let scalar_value = ScalarValue::from_proto_bytes(bytes, dtype, session)?;
Some(Scalar::try_new(dtype.clone(), scalar_value)).transpose()
}

Expand Down
11 changes: 8 additions & 3 deletions vortex-array/src/stats/flatbuffers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ use vortex_error::vortex_bail;
use vortex_flatbuffers::WriteFlatBuffer;
use vortex_flatbuffers::array as fba;
use vortex_scalar::ScalarValue;
use vortex_session::VortexSession;

use crate::expr::stats::Precision;
use crate::expr::stats::Stat;
Expand Down Expand Up @@ -113,6 +114,7 @@ impl StatsSet {
pub fn from_flatbuffer<'a>(
fb: &fba::ArrayStats<'a>,
array_dtype: &DType,
session: &VortexSession,
) -> VortexResult<Self> {
let mut stats_set = StatsSet::default();

Expand Down Expand Up @@ -142,7 +144,8 @@ impl StatsSet {
if let Some(max) = fb.max()
&& let Some(stat_dtype) = stat_dtype
{
let value = ScalarValue::from_proto_bytes(max.bytes(), &stat_dtype)?;
let value =
ScalarValue::from_proto_bytes(max.bytes(), &stat_dtype, session)?;
let Some(value) = value else {
continue;
};
Expand All @@ -161,7 +164,8 @@ impl StatsSet {
if let Some(min) = fb.min()
&& let Some(stat_dtype) = stat_dtype
{
let value = ScalarValue::from_proto_bytes(min.bytes(), &stat_dtype)?;
let value =
ScalarValue::from_proto_bytes(min.bytes(), &stat_dtype, session)?;
let Some(value) = value else {
continue;
};
Expand Down Expand Up @@ -193,7 +197,8 @@ impl StatsSet {
if let Some(sum) = fb.sum()
&& let Some(stat_dtype) = stat_dtype
{
let value = ScalarValue::from_proto_bytes(sum.bytes(), &stat_dtype)?;
let value =
ScalarValue::from_proto_bytes(sum.bytes(), &stat_dtype, session)?;
let Some(value) = value else {
continue;
};
Expand Down
7 changes: 7 additions & 0 deletions vortex-dtype/src/datetime/timestamp.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ impl Timestamp {
Self::new_with_tz(time_unit, None, nullability)
}

// TODO(connor): This should probably be deprecated in favor of `new_with_options`.
/// Creates a new Timestamp extension dtype with the given time unit, timezone, and nullability.
pub fn new_with_tz(
time_unit: TimeUnit,
Expand All @@ -46,6 +47,12 @@ impl Timestamp {
)
.vortex_expect("failed to create timestamp dtype")
}

/// Creates a new Timestamp extension dtype with the given options and nullability.
pub fn new_with_options(options: TimestampOptions, nullability: Nullability) -> ExtDType<Self> {
ExtDType::try_new(options, DType::Primitive(PType::I64, nullability))
.vortex_expect("failed to create timestamp dtype")
}
}

/// Options for the Timestamp DType.
Expand Down
38 changes: 27 additions & 11 deletions vortex-dtype/src/extension/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ use crate::Nullability;
pub type ExtID = ArcRef<str>;

/// An extension data type.
#[derive(Clone)]
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct ExtDType<V: ExtDTypeVTable>(Arc<ExtDTypeAdapter<V>>);

// Convenience impls for zero-sized VTables
Expand Down Expand Up @@ -60,6 +60,11 @@ impl<V: ExtDTypeVTable> ExtDType<V> {
self.0.id()
}

/// Returns the vtable of the extension type.
pub fn vtable(&self) -> &V {
&self.0.vtable
}

/// Returns the metadata of the extension type.
pub fn metadata(&self) -> &V::Metadata {
&self.0.metadata
Expand All @@ -70,6 +75,18 @@ impl<V: ExtDTypeVTable> ExtDType<V> {
&self.0.storage_dtype
}

/// Returns the nullability of the storage dtype.
#[inline]
pub fn nullability(&self) -> Nullability {
self.storage_dtype().nullability()
}

/// Returns true if the storage dtype is nullable.
#[inline]
pub fn is_nullable(&self) -> bool {
self.nullability().is_nullable()
}

/// Erase the concrete type information, returning a type-erased extension dtype.
pub fn erased(self) -> ExtDTypeRef {
ExtDTypeRef(self.0)
Expand Down Expand Up @@ -135,9 +152,14 @@ impl ExtDTypeRef {
self.0.storage_dtype()
}

/// Returns the nullability of the storage dtype.
pub fn nullability(&self) -> Nullability {
self.storage_dtype().nullability()
}

/// Returns a new ExtDTypeRef with the given nullability.
pub fn with_nullability(&self, nullability: Nullability) -> Self {
if self.storage_dtype().nullability() == nullability {
if self.nullability() == nullability {
self.clone()
} else {
self.0.with_nullability(nullability)
Expand Down Expand Up @@ -211,7 +233,7 @@ impl ExtDTypeRef {

/// Wrapper for type-erased extension dtype metadata.
pub struct ExtDTypeMetadata<'a> {
pub(super) ext_dtype: &'a ExtDTypeRef,
ext_dtype: &'a ExtDTypeRef,
}

impl ExtDTypeMetadata<'_> {
Expand Down Expand Up @@ -249,7 +271,7 @@ impl Hash for ExtDTypeMetadata<'_> {
}

/// An object-safe trait encapsulating the behavior for extension DTypes.
trait ExtDTypeImpl: 'static + Send + Sync + private::Sealed {
trait ExtDTypeImpl: 'static + Send + Sync {
fn as_any(&self) -> &dyn Any;
fn id(&self) -> ExtID;
fn storage_dtype(&self) -> &DType;
Expand All @@ -262,6 +284,7 @@ trait ExtDTypeImpl: 'static + Send + Sync + private::Sealed {
fn with_nullability(&self, nullability: Nullability) -> ExtDTypeRef;
}

#[derive(Debug, Hash, PartialEq, Eq)]
struct ExtDTypeAdapter<V: ExtDTypeVTable> {
vtable: V,
metadata: V::Metadata,
Expand Down Expand Up @@ -314,10 +337,3 @@ impl<V: ExtDTypeVTable> ExtDTypeImpl for ExtDTypeAdapter<V> {
.vortex_expect("Extension DType {} incorrect fails validation with the same storage type but different nullability").erased()
}
}

mod private {
use super::ExtDTypeAdapter;

pub trait Sealed {}
impl<V: super::ExtDTypeVTable> Sealed for ExtDTypeAdapter<V> {}
}
1 change: 1 addition & 0 deletions vortex-scalar/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ arbitrary = { workspace = true, optional = true }
arrow-array = { workspace = true }
bytes = { workspace = true }
itertools = { workspace = true }
jiff = { workspace = true }
num-traits = { workspace = true }
paste = { workspace = true }
prost = { workspace = true }
Expand Down
44 changes: 1 addition & 43 deletions vortex-scalar/src/arrow.rs
Original file line number Diff line number Diff line change
Expand Up @@ -202,9 +202,6 @@ mod tests {
use vortex_dtype::datetime::TimeUnit;
use vortex_dtype::datetime::Timestamp;
use vortex_dtype::datetime::TimestampOptions;
use vortex_dtype::extension::ExtDTypeVTable;
use vortex_error::VortexResult;
use vortex_error::vortex_bail;

use crate::DecimalValue;
use crate::Scalar;
Expand Down Expand Up @@ -443,45 +440,6 @@ mod tests {
Arc::<dyn Datum>::try_from(&list_scalar).unwrap();
}

#[test]
#[should_panic(expected = "Cannot convert extension scalar")]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should still panic shouldn't it?

fn test_non_temporal_extension_to_arrow_todo() {
use vortex_dtype::ExtID;

#[derive(Clone, Debug, Default, PartialEq, Eq, Hash)]
struct SomeExt;
impl ExtDTypeVTable for SomeExt {
type Metadata = String;

fn id(&self) -> ExtID {
ExtID::new_ref("some_ext")
}

fn serialize(&self, _options: &Self::Metadata) -> VortexResult<Vec<u8>> {
vortex_bail!("not implemented")
}

fn deserialize(&self, _data: &[u8]) -> VortexResult<Self::Metadata> {
vortex_bail!("not implemented")
}

fn validate_dtype(
&self,
_options: &Self::Metadata,
_storage_dtype: &DType,
) -> VortexResult<()> {
Ok(())
}
}

let scalar = Scalar::extension::<SomeExt>(
"".into(),
Scalar::primitive(42i32, Nullability::NonNullable),
);

Arc::<dyn Datum>::try_from(&scalar).unwrap();
}

#[rstest]
#[case(TimeUnit::Nanoseconds, PType::I64, 123456789i64)]
#[case(TimeUnit::Microseconds, PType::I64, 123456789i64)]
Expand Down Expand Up @@ -553,7 +511,7 @@ mod tests {
#[rstest]
#[case(TimeUnit::Nanoseconds, "UTC", 1234567890000000000i64)]
#[case(TimeUnit::Microseconds, "EST", 1234567890000000i64)]
#[case(TimeUnit::Milliseconds, "ABC", 1234567890000i64)]
#[case(TimeUnit::Milliseconds, "CET", 1234567890000i64)]
#[case(TimeUnit::Seconds, "UTC", 1234567890i64)]
fn test_temporal_timestamp_tz_to_arrow(
#[case] time_unit: TimeUnit,
Expand Down
13 changes: 13 additions & 0 deletions vortex-scalar/src/cast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,19 @@ impl Scalar {
// If the target is an extension type, then we want to cast to its storage type.
if let Some(ext_dtype) = target_dtype.as_extension_opt() {
let cast_storage_scalar_value = self.cast(ext_dtype.storage_dtype())?.into_value();

// NEED A SESSION!
// let ext_scalar_registry = session.scalars();
// let dyn_scalar_vtable = ext_scalar_registry
// .registry()
// .find(&ext_dtype.id())
// .ok_or_else(|| {
// vortex_err!(
// "extension type scalar {} did not exist in the registry",
// ext_dtype.id()
// )
// })?;

return Scalar::try_new(target_dtype.clone(), cast_storage_scalar_value);
}

Expand Down
64 changes: 50 additions & 14 deletions vortex-scalar/src/constructor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,20 @@ use vortex_dtype::ExtDTypeRef;
use vortex_dtype::NativePType;
use vortex_dtype::Nullability;
use vortex_dtype::PType;
use vortex_dtype::extension::ExtDTypeVTable;
use vortex_error::VortexExpect;
use vortex_error::VortexResult;
use vortex_error::vortex_ensure_eq;
use vortex_error::vortex_err;
use vortex_error::vortex_panic;
use vortex_session::VortexSession;

use crate::DecimalValue;
use crate::PValue;
use crate::Scalar;
use crate::ScalarValue;
use crate::extension::ExtScalarVTable;
use crate::extension::ExtScalarValue;
use crate::session::ScalarSessionExt;

// TODO(connor): Really, we want `try_` constructors that return errors instead of just panic.
impl Scalar {
Expand Down Expand Up @@ -170,27 +176,57 @@ impl Scalar {
.vortex_expect("unable to construct a list `Scalar`")
}

/// Creates a new extension scalar wrapping the given storage value.
pub fn extension<V: ExtDTypeVTable + Default>(options: V::Metadata, value: Scalar) -> Self {
let ext_dtype = ExtDType::<V>::try_new(options, value.dtype().clone())
.vortex_expect("Failed to create extension dtype");
Self::try_new(DType::Extension(ext_dtype.erased()), value.into_value())
.vortex_expect("unable to construct an extension `Scalar`")
}

// TODO(connor): This needs to return a `VortexResult` instead.
/// Creates a new extension scalar wrapping the given storage value.
///
/// # Panics
///
/// Panics if the storage dtype of `ext_dtype` does not match `value`'s dtype.
pub fn extension_ref(ext_dtype: ExtDTypeRef, value: Scalar) -> Self {
assert_eq!(ext_dtype.storage_dtype(), value.dtype());
Self::try_new(DType::Extension(ext_dtype), value.into_value())
/// Panics if the storage dtype is incompatible with the extension type, or if the storage
/// value fails validation.
pub fn extension<V: ExtScalarVTable + Default>(metadata: V::Metadata, value: Scalar) -> Self {
let ext_dtype = ExtDType::<V>::try_new(metadata, value.dtype().clone())
.vortex_expect("Failed to create extension dtype");
let storage_value = value.into_value();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can value.into_parts() to avoid the dtype clone above.
Also rename value -> storage


let ext_value = storage_value.map(|sv| {
let owned = ExtScalarValue::<V>::try_new(ext_dtype.clone(), sv)
.vortex_expect("unable to construct an extension `Scalar`");
ScalarValue::Extension(owned.erased())
});

Self::try_new(DType::Extension(ext_dtype.erased()), ext_value)
.vortex_expect("unable to construct an extension `Scalar`")
}

/// TODO docs.
pub fn extension_ref(
ext_dtype: ExtDTypeRef,
value: Scalar,
session: &VortexSession,
) -> VortexResult<Self> {
let (storage_dtype, storage_value) = value.into_parts();
Self::extension_ref_from_value(ext_dtype, &storage_dtype, storage_value, session)
}

/// TODO docs.
pub fn extension_ref_from_value(
ext_dtype: ExtDTypeRef,
storage_dtype: &DType,
storage_value: Option<ScalarValue>,
session: &VortexSession,
) -> VortexResult<Self> {
vortex_ensure_eq!(ext_dtype.storage_dtype(), storage_dtype);

let ext_value = Self::extension_value(&ext_dtype, storage_value, session)?;

Ok(
// SAFETY: `create_ext_scalar_value_ref` validates that the scalar value is compatible.
unsafe { Scalar::new_unchecked(DType::Extension(ext_dtype), ext_value) },
)
}
}

/// A helper enum for creating a [`ListScalar`].
/// A helper enum for creating a list scalar.
enum ListKind {
/// Variable-length list.
Variable,
Expand Down
Loading
Loading