Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 61 additions & 0 deletions arrow/array/dictionary.go
Original file line number Diff line number Diff line change
Expand Up @@ -315,6 +315,7 @@ func arrayApproxEqualDict(l, r *Dictionary, opt equalOption) bool {
type IndexBuilder struct {
Builder
Append func(int)
UnsafeAppend func(int)
}

func createIndexBuilder(mem memory.Allocator, dt arrow.FixedWidthDataType) (ret IndexBuilder, err error) {
Expand All @@ -324,34 +325,58 @@ func createIndexBuilder(mem memory.Allocator, dt arrow.FixedWidthDataType) (ret
ret.Append = func(idx int) {
ret.Builder.(*Int8Builder).Append(int8(idx))
}
ret.UnsafeAppend = func(idx int) {
ret.Builder.(*Int8Builder).UnsafeAppend(int8(idx))
}
case arrow.UINT8:
ret.Append = func(idx int) {
ret.Builder.(*Uint8Builder).Append(uint8(idx))
}
ret.UnsafeAppend = func(idx int) {
ret.Builder.(*Uint8Builder).UnsafeAppend(uint8(idx))
}
case arrow.INT16:
ret.Append = func(idx int) {
ret.Builder.(*Int16Builder).Append(int16(idx))
}
ret.UnsafeAppend = func(idx int) {
ret.Builder.(*Int16Builder).UnsafeAppend(int16(idx))
}
case arrow.UINT16:
ret.Append = func(idx int) {
ret.Builder.(*Uint16Builder).Append(uint16(idx))
}
ret.UnsafeAppend = func(idx int) {
ret.Builder.(*Uint16Builder).UnsafeAppend(uint16(idx))
}
case arrow.INT32:
ret.Append = func(idx int) {
ret.Builder.(*Int32Builder).Append(int32(idx))
}
ret.UnsafeAppend = func(idx int) {
ret.Builder.(*Int32Builder).UnsafeAppend(int32(idx))
}
case arrow.UINT32:
ret.Append = func(idx int) {
ret.Builder.(*Uint32Builder).Append(uint32(idx))
}
ret.UnsafeAppend = func(idx int) {
ret.Builder.(*Uint32Builder).UnsafeAppend(uint32(idx))
}
case arrow.INT64:
ret.Append = func(idx int) {
ret.Builder.(*Int64Builder).Append(int64(idx))
}
ret.UnsafeAppend = func(idx int) {
ret.Builder.(*Int64Builder).UnsafeAppend(int64(idx))
}
case arrow.UINT64:
ret.Append = func(idx int) {
ret.Builder.(*Uint64Builder).Append(uint64(idx))
}
ret.UnsafeAppend = func(idx int) {
ret.Builder.(*Uint64Builder).UnsafeAppend(uint64(idx))
}
default:
debug.Assert(false, "dictionary index type must be integral")
err = fmt.Errorf("dictionary index type must be integral, not %s", dt)
Expand Down Expand Up @@ -646,6 +671,14 @@ func (b *dictionaryBuilder) AppendEmptyValues(n int) {
}
}

func (b *dictionaryBuilder) UnsafeAppendBoolToBitmap(v bool) {
if !v {
b.nulls += 1
}
b.length += 1
b.idxBuilder.UnsafeAppendBoolToBitmap(v)
}

func (b *dictionaryBuilder) Reserve(n int) {
b.idxBuilder.Reserve(n)
}
Expand Down Expand Up @@ -781,6 +814,13 @@ func (b *dictionaryBuilder) insertDictBytes(val []byte) error {
return err
}

func (b *dictionaryBuilder) unsafeAppendValue(val interface{}) error {
idx, _, err := b.memoTable.GetOrInsert(val)
b.idxBuilder.UnsafeAppend(idx)
b.length += 1
return err
}

func (b *dictionaryBuilder) appendValue(val interface{}) error {
idx, _, err := b.memoTable.GetOrInsert(val)
b.idxBuilder.Append(idx)
Expand Down Expand Up @@ -990,6 +1030,27 @@ type dictBuilder[T arrow.ValueType] struct {
dictionaryBuilder
}


func (b *dictBuilder[T]) UnsafeAppend(v T) error {
switch val := any(v).(type) {
case arrow.Duration:
return b.unsafeAppendValue(int64(val))
case arrow.Timestamp:
return b.unsafeAppendValue(int64(val))
case arrow.Time32:
return b.unsafeAppendValue(int32(val))
case arrow.Time64:
return b.unsafeAppendValue(int64(val))
case arrow.Date32:
return b.unsafeAppendValue(int32(val))
case arrow.Date64:
return b.unsafeAppendValue(int64(val))
case arrow.MonthInterval:
return b.unsafeAppendValue(int32(val))
}
return b.unsafeAppendValue(v)
}

func (b *dictBuilder[T]) Append(v T) error {
switch val := any(v).(type) {
case arrow.Duration:
Expand Down
41 changes: 41 additions & 0 deletions arrow/array/dictionary_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,47 @@ func (p *PrimitiveDictionaryTestSuite) TestDictionaryBuilderInit() {
p.True(array.Equal(expected, arr))
}

func (p *PrimitiveDictionaryTestSuite) TestDictionaryBuilderReserveAndAppend() {
expectedType := &arrow.DictionaryType{IndexType: &arrow.Int8Type{}, ValueType: p.typ}
bldr := array.NewDictionaryBuilder(p.mem, expectedType)
defer bldr.Release()

builder := reflect.ValueOf(bldr)
appendFn := builder.MethodByName("UnsafeAppend")
validFn := builder.MethodByName("UnsafeAppendBoolToBitmap")

bldr.Reserve(7)
validFn.Call([]reflect.Value{reflect.ValueOf(true)})
validFn.Call([]reflect.Value{reflect.ValueOf(false)})
appendFn.Call([]reflect.Value{reflect.ValueOf(0).Convert(p.reftyp)})
appendFn.Call([]reflect.Value{reflect.ValueOf(1).Convert(p.reftyp)})
validFn.Call([]reflect.Value{reflect.ValueOf(false)})
appendFn.Call([]reflect.Value{reflect.ValueOf(1).Convert(p.reftyp)})
appendFn.Call([]reflect.Value{reflect.ValueOf(2).Convert(p.reftyp)})

p.EqualValues(7, bldr.Len())
p.EqualValues(2, bldr.NullN())

p.EqualValues(3, bldr.DictionarySize())

arr := bldr.NewArray().(*array.Dictionary)
defer arr.Release()

p.True(arrow.TypeEqual(expectedType, arr.DataType()))
expectedDict, _, err := array.FromJSON(p.mem, expectedType.ValueType, strings.NewReader("[0, 1, 2]"))
p.NoError(err)
defer expectedDict.Release()

expectedIndices, _, err := array.FromJSON(p.mem, expectedType.IndexType, strings.NewReader("[0, null, 0, 1, null, 1, 2]"))
p.NoError(err)
defer expectedIndices.Release()

expected := array.NewDictionaryArray(expectedType, expectedIndices, expectedDict)
defer expected.Release()

p.True(array.Equal(expected, arr))
}

func (p *PrimitiveDictionaryTestSuite) TestDictionaryNewBuilder() {
valueType := p.typ
dictArr, _, err := array.FromJSON(p.mem, valueType, strings.NewReader("[1, 2]"))
Expand Down
4 changes: 4 additions & 0 deletions arrow/array/encoded.go
Original file line number Diff line number Diff line change
Expand Up @@ -398,6 +398,10 @@ func (b *RunEndEncodedBuilder) AppendNulls(n int) {
}
}

func (b *RunEndEncodedBuilder) UnsafeAppendBoolToBitmap(v bool) {
panic("Calling UnsafeAppendBoolToBitmap on a run-end encoded array is semantically undefined.")
}

func (b *RunEndEncodedBuilder) NullN() int {
return UnknownNullCount
}
Expand Down
Loading