Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions encodings/runend/src/array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,21 @@ impl RunEndArray {
return Ok(());
}

debug_assert!({
// Run ends must be strictly sorted for binary search to work correctly.
let pre_validation = ends.statistics().to_owned();

let is_sorted = ends
.statistics()
.compute_is_strict_sorted()
.unwrap_or(false);

// Preserve the original statistics since compute_is_strict_sorted may have mutated them.
// We don't want to run with different stats in debug mode and outside.
ends.statistics().inherit(pre_validation.iter());
is_sorted
});

// Skip host-only validation when ends are not host-resident.
if !ends.is_host() {
return Ok(());
Expand Down
76 changes: 74 additions & 2 deletions encodings/sequence/src/array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,11 @@ use vortex_array::ProstMetadata;
use vortex_array::SerializeMetadata;
use vortex_array::arrays::PrimitiveArray;
use vortex_array::buffer::BufferHandle;
use vortex_array::expr::stats::Precision as StatPrecision;
use vortex_array::expr::stats::Stat;
use vortex_array::serde::ArrayChildren;
use vortex_array::stats::ArrayStats;
use vortex_array::stats::StatsSet;
use vortex_array::stats::StatsSetRef;
use vortex_array::validity::Validity;
use vortex_array::vtable;
Expand Down Expand Up @@ -127,13 +130,30 @@ impl SequenceArray {
length: usize,
) -> Self {
let dtype = DType::Primitive(ptype, nullability);

// A sequence A[i] = base + i * multiplier is sorted iff multiplier >= 0,
// and strictly sorted iff multiplier > 0.
let m_int = multiplier.cast::<i64>();
let is_sorted = m_int >= 0;
let is_strict_sorted = m_int > 0;

// SAFETY: we don't have duplicate stats
let stats_set = unsafe {
StatsSet::new_unchecked(vec![
(Stat::IsSorted, StatPrecision::Exact(is_sorted.into())),
(
Stat::IsStrictSorted,
StatPrecision::Exact(is_strict_sorted.into()),
),
])
};

Self {
base,
multiplier,
dtype,
len: length,
// TODO(joe): add stats, on construct or on use?
stats_set: Default::default(),
stats_set: ArrayStats::from(stats_set),
}
}

Expand Down Expand Up @@ -393,7 +413,11 @@ impl SequenceVTable {
mod tests {
use vortex_array::arrays::PrimitiveArray;
use vortex_array::assert_arrays_eq;
use vortex_array::expr::stats::Precision as StatPrecision;
use vortex_array::expr::stats::Stat;
use vortex_array::expr::stats::StatsProviderExt;
use vortex_dtype::Nullability;
use vortex_error::VortexResult;
use vortex_scalar::Scalar;
use vortex_scalar::ScalarValue;

Expand Down Expand Up @@ -444,4 +468,52 @@ mod tests {
assert!(SequenceArray::typed_new(127i8, 1i8, Nullability::NonNullable, 2).is_err());
assert!(SequenceArray::typed_new(-128i8, -1i8, Nullability::NonNullable, 2).is_err());
}

#[test]
fn positive_multiplier_is_strict_sorted() -> VortexResult<()> {
let arr = SequenceArray::typed_new(0i64, 3, Nullability::NonNullable, 4)?;

let is_sorted = arr
.statistics()
.with_typed_stats_set(|s| s.get_as::<bool>(Stat::IsSorted));
assert_eq!(is_sorted, Some(StatPrecision::Exact(true)));

let is_strict_sorted = arr
.statistics()
.with_typed_stats_set(|s| s.get_as::<bool>(Stat::IsStrictSorted));
assert_eq!(is_strict_sorted, Some(StatPrecision::Exact(true)));
Ok(())
}

#[test]
fn zero_multiplier_is_sorted_not_strict() -> VortexResult<()> {
let arr = SequenceArray::typed_new(5i64, 0, Nullability::NonNullable, 4)?;

let is_sorted = arr
.statistics()
.with_typed_stats_set(|s| s.get_as::<bool>(Stat::IsSorted));
assert_eq!(is_sorted, Some(StatPrecision::Exact(true)));

let is_strict_sorted = arr
.statistics()
.with_typed_stats_set(|s| s.get_as::<bool>(Stat::IsStrictSorted));
assert_eq!(is_strict_sorted, Some(StatPrecision::Exact(false)));
Ok(())
}

#[test]
fn negative_multiplier_not_sorted() -> VortexResult<()> {
let arr = SequenceArray::typed_new(10i64, -1, Nullability::NonNullable, 4)?;

let is_sorted = arr
.statistics()
.with_typed_stats_set(|s| s.get_as::<bool>(Stat::IsSorted));
assert_eq!(is_sorted, Some(StatPrecision::Exact(false)));

let is_strict_sorted = arr
.statistics()
.with_typed_stats_set(|s| s.get_as::<bool>(Stat::IsStrictSorted));
assert_eq!(is_strict_sorted, Some(StatPrecision::Exact(false)));
Ok(())
}
}
8 changes: 4 additions & 4 deletions vortex-array/src/arrays/validation_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -98,8 +98,9 @@ mod tests {
}

#[test]
fn test_varbin_array_validation_failure_offsets_not_monotonic() {
// Invalid case: offsets are not monotonically increasing.
fn test_varbin_array_validation_non_monotonic_offsets_accepted() {
// VarBin does not validate monotonicity of offsets at construction time.
// Sortedness is enforced at the builder level instead.
let offsets = buffer![0i32, 3, 2, 5].into_array(); // 3 -> 2 is decreasing.
let bytes = ByteBuffer::from(vec![0u8, 1, 2, 3, 4]);
let result = VarBinArray::try_new(
Expand All @@ -109,8 +110,7 @@ mod tests {
Validity::NonNullable,
);

assert!(matches!(result, Err(VortexError::InvalidArgument(_, _))));
assert!(result.is_err());
assert!(result.is_ok());
}

#[test]
Expand Down
5 changes: 0 additions & 5 deletions vortex-array/src/arrays/varbin/array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -200,11 +200,6 @@ impl VarBinArray {
"Offsets must have at least one element"
);

// Check offsets are sorted
if let Some(is_sorted) = offsets.statistics().compute_is_sorted() {
vortex_ensure!(is_sorted, "offsets must be sorted");
}

// Skip host-only validation when offsets/bytes are not host-resident.
if offsets.is_host() && bytes.is_on_host() {
let last_offset = offsets
Expand Down
46 changes: 46 additions & 0 deletions vortex-array/src/arrays/varbin/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ use vortex_error::vortex_panic;
use crate::IntoArray;
use crate::arrays::primitive::PrimitiveArray;
use crate::arrays::varbin::VarBinArray;
use crate::expr::stats::Precision;
use crate::expr::stats::Stat;
use crate::validity::Validity;

pub struct VarBinBuilder<O: IntegerPType> {
Expand Down Expand Up @@ -94,6 +96,17 @@ impl<O: IntegerPType> VarBinBuilder<O> {

let validity = Validity::from_bit_buffer(nulls, dtype.nullability());

// The builder guarantees offsets are monotonically increasing, so we can set
// this stat eagerly. This avoids an O(n) recomputation when the array is
// deserialized and VarBinArray::validate checks sortedness.
debug_assert!(
offsets.statistics().compute_is_sorted().unwrap_or(false),
"VarBinBuilder offsets must be sorted"
);
offsets
.statistics()
.set(Stat::IsSorted, Precision::Exact(true.into()));

// SAFETY: The builder maintains all invariants:
// - Offsets are monotonically increasing starting from 0 (guaranteed by builder logic).
// - Bytes buffer contains exactly the data referenced by offsets.
Expand All @@ -109,9 +122,13 @@ impl<O: IntegerPType> VarBinBuilder<O> {
mod tests {
use vortex_dtype::DType;
use vortex_dtype::Nullability::Nullable;
use vortex_error::VortexResult;
use vortex_scalar::Scalar;

use crate::arrays::varbin::builder::VarBinBuilder;
use crate::expr::stats::Precision;
use crate::expr::stats::Stat;
use crate::expr::stats::StatsProviderExt;

#[test]
fn test_builder() {
Expand All @@ -129,4 +146,33 @@ mod tests {
);
assert!(array.scalar_at(1).unwrap().is_null());
}

#[test]
fn offsets_have_is_sorted_stat() -> VortexResult<()> {
let mut builder = VarBinBuilder::<i32>::with_capacity(0);
builder.append_value(b"aaa");
builder.append_null();
builder.append_value(b"bbb");
let array = builder.finish(DType::Utf8(Nullable));

let is_sorted = array
.offsets()
.statistics()
.with_typed_stats_set(|s| s.get_as::<bool>(Stat::IsSorted));
assert_eq!(is_sorted, Some(Precision::Exact(true)));
Ok(())
}

#[test]
fn empty_builder_offsets_have_is_sorted_stat() -> VortexResult<()> {
let builder = VarBinBuilder::<i32>::new();
let array = builder.finish(DType::Utf8(Nullable));

let is_sorted = array
.offsets()
.statistics()
.with_typed_stats_set(|s| s.get_as::<bool>(Stat::IsSorted));
assert_eq!(is_sorted, Some(Precision::Exact(true)));
Ok(())
}
}
6 changes: 4 additions & 2 deletions vortex-array/src/expr/stats/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,11 @@ pub enum Stat {
/// Whether all values are the same (nulls are not equal to other non-null values,
/// so this is true iff all values are null or all values are the same non-null value)
IsConstant = 0,
/// Whether the non-null values in the array are sorted (i.e., we skip nulls)
/// Whether the non-null values in the array are sorted in ascending order (i.e., we skip nulls)
/// This may later be extended to support descending order, but for now we only support ascending order.
IsSorted = 1,
/// Whether the non-null values in the array are strictly sorted (i.e., sorted with no duplicates)
/// Whether the non-null values in the array are strictly sorted in ascending order (i.e., sorted with no duplicates)
/// This may later be extended to support descending order, but for now we only support ascending order.
IsStrictSorted = 2,
/// The maximum value in the array (ignoring nulls, unless all values are null)
Max = 3,
Expand Down
Loading