Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
import static org.apache.arrow.memory.util.LargeMemoryUtil.checkedCastToInt;

import java.util.HashSet;
import java.util.List;
import org.apache.arrow.memory.ArrowBuf;
import org.apache.arrow.memory.util.MemoryUtil;
import org.apache.arrow.util.Preconditions;
import org.apache.arrow.vector.BaseFixedWidthVector;
Expand Down Expand Up @@ -91,7 +93,6 @@ public ValueVector visit(BaseFixedWidthVector deltaVector, Void value) {
deltaVector.getDataBuffer(),
deltaVector.getValueCount(),
targetVector.getDataBuffer());

} else {
MemoryUtil.copyMemory(
deltaVector.getDataBuffer().memoryAddress(),
Expand Down Expand Up @@ -247,8 +248,66 @@ public ValueVector visit(BaseLargeVariableWidthVector deltaVector, Void value) {
}

@Override
public ValueVector visit(BaseVariableWidthViewVector left, Void value) {
throw new UnsupportedOperationException("View vectors are not supported.");
public ValueVector visit(BaseVariableWidthViewVector deltaVector, Void value) {
Preconditions.checkArgument(
typeVisitor.equals(deltaVector),
"The targetVector to append must have the same type as the targetVector being appended");

if (deltaVector.getValueCount() == 0) {
return targetVector; // nothing to append, return
}

int oldTargetValueCount = targetVector.getValueCount();
int newValueCount = oldTargetValueCount + deltaVector.getValueCount();

// make sure there is enough capacity
while (targetVector.getValueCapacity() < newValueCount) {
// Do not call BaseVariableWidthViewVector#reAlloc() here,
// because reallocViewDataBuffer() is always unnecessary
((BaseVariableWidthViewVector) targetVector).reallocValidityBuffer();
((BaseVariableWidthViewVector) targetVector).reallocViewBuffer();
}

// append validity buffer
BitVectorHelper.concatBits(
targetVector.getValidityBuffer(),
oldTargetValueCount,
deltaVector.getValidityBuffer(),
deltaVector.getValueCount(),
targetVector.getValidityBuffer());

// append data buffers
BaseVariableWidthViewVector targetViewVector = (BaseVariableWidthViewVector) targetVector;
List<ArrowBuf> targetDataBuffers = targetViewVector.getDataBuffers();
final int oldTargetDataBufferCount = targetDataBuffers.size();
List<ArrowBuf> deltaVectorDataBuffers = deltaVector.getDataBuffers();
deltaVectorDataBuffers.forEach(buf -> buf.getReferenceManager().retain());
targetDataBuffers.addAll(deltaVectorDataBuffers);

// append view buffer
ArrowBuf targetViewBuffer = targetVector.getDataBuffer();
MemoryUtil.copyMemory(
deltaVector.getDataBuffer().memoryAddress(),
targetViewBuffer.memoryAddress()
+ (long) BaseVariableWidthViewVector.ELEMENT_SIZE * oldTargetValueCount,
(long) BaseVariableWidthViewVector.ELEMENT_SIZE * deltaVector.getValueCount());

// update view buffer
for (int i = oldTargetValueCount; i < newValueCount; i++) {
if (targetViewVector.isSet(i) > 0
&& targetViewVector.getValueLength(i) > BaseVariableWidthViewVector.INLINE_SIZE) {
long start =
(long) i * BaseVariableWidthViewVector.ELEMENT_SIZE
+ BaseVariableWidthViewVector.LENGTH_WIDTH
+ BaseVariableWidthViewVector.PREFIX_WIDTH;
// shift buf id
int bufferId = targetViewBuffer.getInt(start);
targetViewBuffer.setInt(start, bufferId + oldTargetDataBufferCount);
}
}

targetVector.setValueCount(newValueCount);
return targetVector;
}

@Override
Expand Down
10 changes: 10 additions & 0 deletions vector/src/test/java/org/apache/arrow/vector/TestUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
*/
package org.apache.arrow.vector;

import java.util.Random;
import org.apache.arrow.memory.BufferAllocator;
import org.apache.arrow.vector.types.Types.MinorType;
import org.apache.arrow.vector.types.pojo.ArrowType;
Expand Down Expand Up @@ -52,4 +53,13 @@ public static <T> T newVector(
Class<T> c, String name, MinorType type, BufferAllocator allocator) {
return c.cast(FieldType.nullable(type.getType()).createNewSingleVector(name, allocator, null));
}

public static String generateRandomString(int length) {
Random random = new Random();
StringBuilder sb = new StringBuilder(length);
for (int i = 0; i < length; i++) {
sb.append(random.nextInt(10)); // 0-9
}
return sb.toString();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ public void testDataBufferBasedAllocationInSameBuffer() {
try (final ViewVarCharVector viewVarCharVector = new ViewVarCharVector("myvector", allocator)) {
viewVarCharVector.allocateNew(48, 4);
final int valueCount = 4;
String str4 = generateRandomString(34);
String str4 = TestUtils.generateRandomString(34);
viewVarCharVector.set(0, STR1);
viewVarCharVector.set(1, STR2);
viewVarCharVector.set(2, STR3);
Expand Down Expand Up @@ -216,7 +216,7 @@ public void testDataBufferBasedAllocationInOtherBuffer() {
try (final ViewVarCharVector viewVarCharVector = new ViewVarCharVector("myvector", allocator)) {
viewVarCharVector.allocateNew(48, 4);
final int valueCount = 4;
String str4 = generateRandomString(35);
String str4 = TestUtils.generateRandomString(35);
viewVarCharVector.set(0, STR1);
viewVarCharVector.set(1, STR2);
viewVarCharVector.set(2, STR3);
Expand Down Expand Up @@ -271,7 +271,7 @@ public void testDataBufferBasedAllocationInOtherBuffer() {
public void testSetSafe() {
try (final ViewVarCharVector viewVarCharVector = new ViewVarCharVector("myvector", allocator)) {
viewVarCharVector.allocateNew(1, 1);
byte[] str6 = generateRandomString(40).getBytes();
byte[] str6 = TestUtils.generateRandomString(40).getBytes();
final List<byte[]> strings = List.of(STR0, STR1, STR2, STR3, STR4, STR5, str6);

// set data to a position out of capacity index
Expand Down Expand Up @@ -305,8 +305,8 @@ public void testMixedAllocation() {
try (final ViewVarCharVector viewVarCharVector = new ViewVarCharVector("myvector", allocator)) {
viewVarCharVector.allocateNew(128, 6);
final int valueCount = 6;
String str4 = generateRandomString(35);
String str6 = generateRandomString(40);
String str4 = TestUtils.generateRandomString(35);
String str6 = TestUtils.generateRandomString(40);
viewVarCharVector.set(0, STR1);
viewVarCharVector.set(1, STR2);
viewVarCharVector.set(2, STR3);
Expand Down Expand Up @@ -405,7 +405,7 @@ public void testSetNullableViewVarCharHolder() {
setAndCheck(viewVarCharVector, i, strings.get(size - i - 1), stringHolder);
}

String longString = generateRandomString(128);
String longString = TestUtils.generateRandomString(128);
setAndCheck(viewVarCharVector, 6, longString.getBytes(), stringHolder);
}
}
Expand Down Expand Up @@ -441,7 +441,7 @@ public void testSetNullableViewVarBinaryHolder() {
setAndCheck(viewVarBinaryVector, i, strings.get(size - i - 1), holder);
}

String longString = generateRandomString(128);
String longString = TestUtils.generateRandomString(128);
setAndCheck(viewVarBinaryVector, 6, longString.getBytes(), holder);
}
}
Expand Down Expand Up @@ -1169,7 +1169,7 @@ public void testOverwriteShortFromLongString() {
vector.setValueCount(5);

// overwrite index 2 with a long string
String longString = generateRandomString(128);
String longString = TestUtils.generateRandomString(128);
byte[] longStringBytes = longString.getBytes(StandardCharsets.UTF_8);
// since the append-only approach is used and the remaining capacity
// is not enough to store the new string; a new buffer will be allocated.
Expand Down Expand Up @@ -1373,7 +1373,7 @@ public void testOverwriteLongFromALongerLongString() {
// since a new buffer is added to the dataBuffers
final ArrowBuf currentDataBuf = vector.dataBuffers.get(0);
final long remainingCapacity = currentDataBuf.capacity() - currentDataBuf.writerIndex();
String longerString = generateRandomString(35);
String longerString = TestUtils.generateRandomString(35);
byte[] longerStringBytes = longerString.getBytes(StandardCharsets.UTF_8);
assertTrue(remainingCapacity < longerStringBytes.length);

Expand Down Expand Up @@ -1406,7 +1406,7 @@ public void testOverwriteLongFromALongerLongString() {
// the remaining capacity is enough to store in the same data buffer
final ArrowBuf currentDataBuf = vector.dataBuffers.get(0);
final long remainingCapacity = currentDataBuf.capacity() - currentDataBuf.writerIndex();
String longerString = generateRandomString(24);
String longerString = TestUtils.generateRandomString(24);
byte[] longerStringBytes = longerString.getBytes(StandardCharsets.UTF_8);
assertTrue(remainingCapacity > longerStringBytes.length);

Expand Down Expand Up @@ -1505,7 +1505,7 @@ public void testSafeOverwriteShortFromLongString() {
vector.setValueCount(5);

// overwrite index 2 with a long string
String longString = generateRandomString(128);
String longString = TestUtils.generateRandomString(128);
byte[] longStringBytes = longString.getBytes(StandardCharsets.UTF_8);

vector.setSafe(2, longStringBytes);
Expand Down Expand Up @@ -1671,7 +1671,7 @@ public void testSafeOverwriteLongFromALongerLongString() {
vector.setSafe(2, STR7);
vector.setValueCount(3);

String longerString = generateRandomString(35);
String longerString = TestUtils.generateRandomString(35);
byte[] longerStringBytes = longerString.getBytes(StandardCharsets.UTF_8);

vector.setSafe(1, longerStringBytes);
Expand All @@ -1697,7 +1697,7 @@ public void testSafeOverwriteLongFromALongerLongString() {
vector.setSafe(4, STR6);
vector.setValueCount(5);

String longerString = generateRandomString(24);
String longerString = TestUtils.generateRandomString(24);
byte[] longerStringBytes = longerString.getBytes(StandardCharsets.UTF_8);

vector.setSafe(2, longerStringBytes);
Expand Down Expand Up @@ -1869,7 +1869,7 @@ public void testCopyFromWithNulls(
// to avoid re-allocation. This is to test copyFrom() without re-allocation.
final int numberOfValues = initialCapacity / 2 / ViewVarCharVector.ELEMENT_SIZE;

final String prefixString = generateRandomString(12);
final String prefixString = TestUtils.generateRandomString(12);

for (int i = 0; i < numberOfValues; i++) {
if (i % 3 == 0) {
Expand Down Expand Up @@ -1965,7 +1965,7 @@ public void testCopyFromSafeWithNulls(

final int numberOfValues = initialCapacity / ViewVarCharVector.ELEMENT_SIZE;

final String prefixString = generateRandomString(12);
final String prefixString = TestUtils.generateRandomString(12);

for (int i = 0; i < numberOfValues; i++) {
if (i % 3 == 0) {
Expand Down Expand Up @@ -2746,7 +2746,7 @@ private void testSplitAndTransferWithMultipleDataBuffersHelper(
*/
@Test
public void testSplitAndTransferWithMultipleDataBuffers() {
final String str4 = generateRandomString(35);
final String str4 = TestUtils.generateRandomString(35);
final byte[][] data = new byte[][] {STR1, STR2, STR3, str4.getBytes(StandardCharsets.UTF_8)};
final int startIndex = 1;
final int length = 3;
Expand Down Expand Up @@ -2851,13 +2851,4 @@ public void testVectorLoadUnloadOnMixedTypes() {
}
}
}

private String generateRandomString(int length) {
Random random = new Random();
StringBuilder sb = new StringBuilder(length);
for (int i = 0; i < length; i++) {
sb.append(random.nextInt(10)); // 0-9
}
return sb.toString();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@
import org.apache.arrow.vector.VarBinaryVector;
import org.apache.arrow.vector.VarCharVector;
import org.apache.arrow.vector.VariableWidthFieldVector;
import org.apache.arrow.vector.ViewVarCharVector;
import org.apache.arrow.vector.complex.BaseLargeRepeatedValueViewVector;
import org.apache.arrow.vector.complex.BaseRepeatedValueVector;
import org.apache.arrow.vector.complex.BaseRepeatedValueViewVector;
Expand Down Expand Up @@ -606,6 +607,18 @@ public static void setVector(VarCharVector vector, String... values) {
vector.setValueCount(length);
}

/** Populate values for ViewVarCharVector. */
public static void setVector(ViewVarCharVector vector, String... values) {
final int length = values.length;
vector.allocateNewSafe();
for (int i = 0; i < length; i++) {
if (values[i] != null) {
vector.setSafe(i, values[i].getBytes(StandardCharsets.UTF_8));
}
}
vector.setValueCount(length);
}

/** Populate values for LargeVarCharVector. */
public static void setVector(LargeVarCharVector vector, String... values) {
final int length = values.length;
Expand Down
Loading
Loading