Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 60 additions & 0 deletions core/src/main/java/org/apache/iceberg/ColumnFileInfo.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.iceberg;

import java.util.List;
import org.apache.iceberg.types.Types;

/** Information about a column file. */
interface ColumnFileInfo {
Types.NestedField FIELD_IDS =
Types.NestedField.required(
159,
"field_ids",
Types.ListType.ofRequired(160, Types.IntegerType.get()),
"Field IDs this column file contains");
Types.NestedField LOCATION =
Types.NestedField.required(
161, "location", Types.StringType.get(), "Location of the column file");
Types.NestedField FILE_SIZE_IN_BYTES =
Types.NestedField.required(
162, "file_size_in_bytes", Types.LongType.get(), "Total column file size in bytes");
Types.NestedField SEQUENCE_NUMBER =
Types.NestedField.optional(
163, "sequence_number", Types.LongType.get(), "Sequence number of the column file");

static Types.StructType schema() {
return Types.StructType.of(FIELD_IDS, LOCATION, FILE_SIZE_IN_BYTES, SEQUENCE_NUMBER);
}

/** Returns the field IDs contained in this column file. */
List<Integer> fieldIds();

/** Returns the location of the column file. */
String location();

/** Returns the total size of the column file in bytes. */
long fileSizeInBytes();

/** Returns the sequence number of the column file, or null if not set. */
Long sequenceNumber();

/** Copies this column file info. */
ColumnFileInfo copy();
}
189 changes: 189 additions & 0 deletions core/src/main/java/org/apache/iceberg/ColumnFileInfoStruct.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,189 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.iceberg;

import java.io.Serializable;
import java.util.Arrays;
import java.util.List;
import org.apache.iceberg.avro.SupportsIndexProjection;
import org.apache.iceberg.relocated.com.google.common.base.MoreObjects;
import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
import org.apache.iceberg.types.Types;
import org.apache.iceberg.util.ArrayUtil;

/** Mutable {@link StructLike} implementation of {@link ColumnFileInfo}. */
class ColumnFileInfoStruct extends SupportsIndexProjection implements ColumnFileInfo, Serializable {
private static final Types.StructType BASE_TYPE =
Types.StructType.of(
ColumnFileInfo.FIELD_IDS,
ColumnFileInfo.LOCATION,
ColumnFileInfo.FILE_SIZE_IN_BYTES,
ColumnFileInfo.SEQUENCE_NUMBER);

private int[] fieldIds = null;
private String location = null;
private long fileSizeInBytes = -1L;
private Long sequenceNumber = null;

/** Used by internal readers to instantiate this class with a projection schema. */
ColumnFileInfoStruct(Types.StructType projection) {
super(BASE_TYPE, projection);
}

private ColumnFileInfoStruct(
int[] fieldIds, String location, long fileSizeInBytes, Long sequenceNumber) {
super(BASE_TYPE, BASE_TYPE);
this.fieldIds = fieldIds;
this.location = location;
this.fileSizeInBytes = fileSizeInBytes;
this.sequenceNumber = sequenceNumber;
}

/** Copy constructor. */
private ColumnFileInfoStruct(ColumnFileInfoStruct toCopy) {
super(toCopy);
this.fieldIds =
toCopy.fieldIds != null ? Arrays.copyOf(toCopy.fieldIds, toCopy.fieldIds.length) : null;
this.location = toCopy.location;
this.fileSizeInBytes = toCopy.fileSizeInBytes;
this.sequenceNumber = toCopy.sequenceNumber;
}

@Override
public List<Integer> fieldIds() {
return fieldIds != null ? ArrayUtil.toUnmodifiableIntList(fieldIds) : null;
}

@Override
public String location() {
return location;
}

@Override
public long fileSizeInBytes() {
return fileSizeInBytes;
}

@Override
public Long sequenceNumber() {
return sequenceNumber;
}

@Override
public ColumnFileInfoStruct copy() {
return new ColumnFileInfoStruct(this);
}

@Override
protected <T> T internalGet(int pos, Class<T> javaClass) {
return javaClass.cast(getByPos(pos));
}

private Object getByPos(int pos) {
switch (pos) {
case 0:
return fieldIds();
case 1:
return location;
case 2:
return fileSizeInBytes;
case 3:
return sequenceNumber;
default:
throw new UnsupportedOperationException("Unknown field ordinal: " + pos);
}
}

@Override
@SuppressWarnings("unchecked")
protected <T> void internalSet(int pos, T value) {
switch (pos) {
case 0:
this.fieldIds = ArrayUtil.toIntArray((List<Integer>) value);
break;
case 1:
// always coerce to String for Serializable
this.location = value.toString();
break;
case 2:
this.fileSizeInBytes = (Long) value;
break;
case 3:
this.sequenceNumber = (Long) value;
break;
default:
// ignore the object, it must be from a newer version of the format
}
}

static Builder builder() {
return new Builder();
}

@Override
public String toString() {
return MoreObjects.toStringHelper(this)
.add("field_ids", fieldIds == null ? "null" : fieldIds())
.add("location", location)
.add("file_size_in_bytes", fileSizeInBytes)
.add("sequence_number", sequenceNumber == null ? "null" : sequenceNumber)
.toString();
}

static class Builder {
private int[] fieldIds = null;
private String location = null;
private long fileSizeInBytes = -1L;
private Long sequenceNumber = null;

Builder fieldIds(List<Integer> ids) {
this.fieldIds = ids != null ? ArrayUtil.toIntArray(ids) : null;
return this;
}

Builder location(String columnFileLocation) {
this.location = columnFileLocation;
return this;
}

Builder fileSizeInBytes(long size) {
this.fileSizeInBytes = size;
return this;
}

Builder sequenceNumber(Long sequence) {
this.sequenceNumber = sequence;
return this;
}

ColumnFileInfoStruct build() {
Preconditions.checkArgument(fieldIds != null, "Invalid field IDs: null");
Preconditions.checkArgument(fieldIds.length > 0, "Invalid field IDs: empty");
Preconditions.checkArgument(location != null, "Invalid location: null");
Preconditions.checkArgument(!location.isEmpty(), "Invalid location: empty");
Preconditions.checkArgument(
fileSizeInBytes >= 0, "Invalid file size in bytes: %s (must be >= 0)", fileSizeInBytes);
Preconditions.checkArgument(
sequenceNumber == null || sequenceNumber >= 0,
"Invalid sequence number: %s (must be >= 0)",
sequenceNumber);
return new ColumnFileInfoStruct(fieldIds, location, fileSizeInBytes, sequenceNumber);
}
}
}
12 changes: 11 additions & 1 deletion core/src/main/java/org/apache/iceberg/TrackedFile.java
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,12 @@ interface TrackedFile {
"equality_ids",
Types.ListType.ofRequired(136, Types.IntegerType.get()),
"Field ids used to determine row equality in equality delete files");
Types.NestedField COLUMN_FILES =
Types.NestedField.optional(
157,
"column_files",
Types.ListType.ofRequired(158, ColumnFileInfo.schema()),
"Column update files");

static Types.StructType schemaWithContentStats(
Types.StructType partitionType, Types.StructType contentStatsType) {
Expand All @@ -110,7 +116,8 @@ static Types.StructType schemaWithContentStats(
MANIFEST_INFO,
KEY_METADATA,
SPLIT_OFFSETS,
EQUALITY_IDS);
EQUALITY_IDS,
COLUMN_FILES);
}

/** Returns the tracking information for this entry. */
Expand Down Expand Up @@ -158,6 +165,9 @@ static Types.StructType schemaWithContentStats(
/** Returns the set of field IDs used for equality comparison in equality delete files. */
List<Integer> equalityIds();

/** Returns the column files for this file. */
List<ColumnFileInfo> columnFiles();

/** Copies this tracked file. */
TrackedFile copy();

Expand Down
23 changes: 22 additions & 1 deletion core/src/main/java/org/apache/iceberg/TrackedFileStruct.java
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,10 @@
import java.io.Serializable;
import java.nio.ByteBuffer;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Set;
import java.util.stream.Collectors;
import org.apache.iceberg.avro.SupportsIndexProjection;
import org.apache.iceberg.relocated.com.google.common.base.MoreObjects;
import org.apache.iceberg.types.Type;
Expand Down Expand Up @@ -65,7 +67,8 @@ public PartitionData copy() {
TrackedFile.MANIFEST_INFO,
TrackedFile.KEY_METADATA,
TrackedFile.SPLIT_OFFSETS,
TrackedFile.EQUALITY_IDS);
TrackedFile.EQUALITY_IDS,
TrackedFile.COLUMN_FILES);

private FileContent contentType = null;
private String location = null;
Expand All @@ -81,6 +84,7 @@ public PartitionData copy() {
private Integer sortOrderId = null;
private DeletionVector deletionVector = null;
private ManifestInfo manifestInfo = null;
private List<ColumnFileInfo> columnFiles = null;
private byte[] keyMetadata = null;
private long[] splitOffsets = null;
private int[] equalityIds = null;
Expand Down Expand Up @@ -155,6 +159,10 @@ private TrackedFileStruct(TrackedFileStruct toCopy, boolean withStats, Set<Integ
toCopy.equalityIds != null
? Arrays.copyOf(toCopy.equalityIds, toCopy.equalityIds.length)
: null;
this.columnFiles =
toCopy.columnFiles != null
? toCopy.columnFiles.stream().map(ColumnFileInfo::copy).collect(Collectors.toList())
: null;
}

@Override
Expand Down Expand Up @@ -232,6 +240,11 @@ public List<Integer> equalityIds() {
return equalityIds != null ? ArrayUtil.toUnmodifiableIntList(equalityIds) : null;
}

@Override
public List<ColumnFileInfo> columnFiles() {
return columnFiles != null ? Collections.unmodifiableList(columnFiles) : null;
}

@Override
public TrackedFile copy() {
return new TrackedFileStruct(this, true, null);
Expand Down Expand Up @@ -279,6 +292,8 @@ private Object getByPos(int pos) {
return splitOffsets();
case 14:
return equalityIds();
case 15:
return columnFiles;
default:
throw new UnsupportedOperationException("Unknown field ordinal: " + pos);
}
Expand Down Expand Up @@ -333,6 +348,11 @@ protected <T> void internalSet(int pos, T value) {
case 14:
this.equalityIds = ArrayUtil.toIntArray((List<Integer>) value);
break;
case 15:
this.columnFiles =
((List<ColumnFileInfo>) value)
.stream().map(ColumnFileInfo::copy).collect(Collectors.toList());
break;
default:
// ignore the object, it must be from a newer version of the format
}
Expand All @@ -356,6 +376,7 @@ public String toString() {
.add("key_metadata", keyMetadata == null ? "null" : "(redacted)")
.add("split_offsets", splitOffsets == null ? "null" : splitOffsets())
.add("equality_ids", equalityIds == null ? "null" : equalityIds())
.add("column_files", columnFiles == null ? "null" : columnFiles)
.toString();
}
}
Loading
Loading