Skip to content

Commit 5cd132b

Browse files
committed
address feedback
1 parent 26f0e4f commit 5cd132b

File tree

3 files changed

+18
-33
lines changed

3 files changed

+18
-33
lines changed

src/iceberg/delete_file_index.cc

Lines changed: 17 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
#include "iceberg/delete_file_index.h"
2121

2222
#include <algorithm>
23+
#include <cstdint>
2324
#include <iterator>
2425
#include <ranges>
2526
#include <vector>
@@ -176,20 +177,18 @@ Status PositionDeletes::Add(ManifestEntry&& entry) {
176177
std::vector<std::shared_ptr<DataFile>> PositionDeletes::Filter(int64_t seq) {
177178
IndexIfNeeded();
178179

179-
size_t start = FindStartIndex(seqs_, seq);
180-
if (start >= files_.size()) {
180+
auto iter = std::ranges::lower_bound(seqs_, seq);
181+
if (iter == seqs_.end()) {
181182
return {};
182183
}
183-
184-
return files_ | std::views::drop(start) |
184+
return files_ | std::views::drop(iter - seqs_.begin()) |
185185
std::views::transform(&ManifestEntry::data_file) |
186186
std::ranges::to<std::vector<std::shared_ptr<DataFile>>>();
187187
}
188188

189189
std::vector<std::shared_ptr<DataFile>> PositionDeletes::ReferencedDeleteFiles() {
190190
IndexIfNeeded();
191-
return std::ranges::transform_view(files_,
192-
[](const auto& entry) { return entry.data_file; }) |
191+
return files_ | std::views::transform(&ManifestEntry::data_file) |
193192
std::ranges::to<std::vector<std::shared_ptr<DataFile>>>();
194193
}
195194

@@ -199,13 +198,11 @@ void PositionDeletes::IndexIfNeeded() {
199198
}
200199

201200
// Sort by data sequence number
202-
std::ranges::sort(files_, [](const auto& a, const auto& b) {
203-
return a.sequence_number.value() < b.sequence_number.value();
204-
});
201+
std::ranges::sort(files_, std::ranges::less{}, &ManifestEntry::sequence_number);
205202

206203
// Build sequence number array for binary search
207-
seqs_ = std::ranges::transform_view(
208-
files_, [](const auto& entry) { return entry.sequence_number.value(); }) |
204+
seqs_ = files_ |
205+
std::views::transform([](const auto& e) { return e.sequence_number.value(); }) |
209206
std::ranges::to<std::vector<int64_t>>();
210207

211208
indexed_ = true;
@@ -226,15 +223,13 @@ Result<std::vector<std::shared_ptr<DataFile>>> EqualityDeletes::Filter(
226223
int64_t seq, const DataFile& data_file) {
227224
IndexIfNeeded();
228225

229-
size_t start = FindStartIndex(seqs_, seq);
230-
if (start >= files_.size()) {
226+
auto iter = std::ranges::lower_bound(seqs_, seq);
227+
if (iter == seqs_.end()) {
231228
return {};
232229
}
233-
234230
std::vector<std::shared_ptr<DataFile>> result;
235-
result.reserve(files_.size() - start);
236-
for (size_t i = start; i < files_.size(); ++i) {
237-
const auto& delete_file = files_[i];
231+
result.reserve(seqs_.end() - iter);
232+
for (auto& delete_file : files_ | std::views::drop(iter - seqs_.begin())) {
238233
ICEBERG_ASSIGN_OR_RAISE(bool may_contain,
239234
CanContainEqDeletesForFile(data_file, delete_file));
240235
if (may_contain) {
@@ -247,8 +242,8 @@ Result<std::vector<std::shared_ptr<DataFile>>> EqualityDeletes::Filter(
247242

248243
std::vector<std::shared_ptr<DataFile>> EqualityDeletes::ReferencedDeleteFiles() {
249244
IndexIfNeeded();
250-
return std::ranges::transform_view(
251-
files_, [](const auto& file) { return file.wrapped.data_file; }) |
245+
return files_ |
246+
std::views::transform([](const auto& f) { return f.wrapped.data_file; }) |
252247
std::ranges::to<std::vector<std::shared_ptr<DataFile>>>();
253248
}
254249

@@ -258,13 +253,11 @@ void EqualityDeletes::IndexIfNeeded() {
258253
}
259254

260255
// Sort by apply sequence number
261-
std::ranges::sort(files_, [](const auto& a, const auto& b) {
262-
return a.apply_sequence_number < b.apply_sequence_number;
263-
});
256+
std::ranges::sort(files_, std::ranges::less{},
257+
&EqualityDeleteFile::apply_sequence_number);
264258

265259
// Build sequence number array for binary search
266-
seqs_ = std::ranges::transform_view(
267-
files_, [](const auto& file) { return file.apply_sequence_number; }) |
260+
seqs_ = files_ | std::views::transform(&EqualityDeleteFile::apply_sequence_number) |
268261
std::ranges::to<std::vector<int64_t>>();
269262

270263
indexed_ = true;

src/iceberg/delete_file_index.h

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@
2222
/// \file iceberg/delete_file_index.h
2323
/// An index of delete files by sequence number.
2424

25-
#include <algorithm>
2625
#include <functional>
2726
#include <map>
2827
#include <memory>
@@ -87,13 +86,6 @@ struct ICEBERG_EXPORT EqualityDeleteFile {
8786
Status ConvertBoundsIfNeeded() const;
8887
};
8988

90-
/// \brief Find the start index in a sorted array where all elements from that
91-
/// index onward have sequence numbers >= the given sequence number.
92-
inline size_t FindStartIndex(const std::vector<int64_t>& seqs, int64_t seq) {
93-
auto it = std::ranges::lower_bound(seqs, seq);
94-
return static_cast<size_t>(std::ranges::distance(seqs.cbegin(), it));
95-
}
96-
9789
/// \brief Check if two ranges overlap.
9890
inline bool RangesOverlap(const Literal& data_lower, const Literal& data_upper,
9991
const Literal& delete_lower, const Literal& delete_upper) {

src/iceberg/test/delete_file_index_test.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -950,7 +950,7 @@ TEST_P(DeleteFileIndexTest, TestInvalidDVSequenceNumber) {
950950

951951
// Querying with sequence number > DV sequence number should fail
952952
auto result = index->ForDataFile(2, *file_a_);
953-
EXPECT_THAT(result, IsError(ErrorKind::kInvalid));
953+
EXPECT_THAT(result, IsError(ErrorKind::kValidationFailed));
954954
EXPECT_THAT(result, HasErrorMessage(
955955
"must be greater than or equal to data file sequence number"));
956956
}

0 commit comments

Comments
 (0)