Skip to content

Commit bd0c539

Browse files
committed
add offset cache
1 parent db916c2 commit bd0c539

File tree

2 files changed

+46
-22
lines changed

2 files changed

+46
-22
lines changed

Framework/Core/include/Framework/ArrowTableSlicingCache.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@ using ListVector = std::vector<std::vector<int64_t>>;
2323
struct SliceInfoPtr {
2424
gsl::span<int const> values;
2525
gsl::span<int64_t const> counts;
26+
std::vector<int64_t> const* offsets;
27+
std::vector<int64_t> const* sizes;
2628

2729
std::pair<int64_t, int64_t> getSliceFor(int value) const;
2830
};
@@ -66,6 +68,8 @@ struct ArrowTableSlicingCache {
6668
Cache bindingsKeys;
6769
std::vector<std::shared_ptr<arrow::NumericArray<arrow::Int32Type>>> values;
6870
std::vector<std::shared_ptr<arrow::NumericArray<arrow::Int64Type>>> counts;
71+
std::vector<std::vector<int64_t>> offsets;
72+
std::vector<std::vector<int64_t>> sizes;
6973

7074
Cache bindingsKeysUnsorted;
7175
std::vector<std::vector<int>> valuesUnsorted;

Framework/Core/src/ArrowTableSlicingCache.cxx

Lines changed: 42 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -35,25 +35,8 @@ std::pair<int64_t, int64_t> SliceInfoPtr::getSliceFor(int value) const
3535
if (values.empty()) {
3636
return {offset, 0};
3737
}
38-
int64_t p = static_cast<int64_t>(values.size()) - 1;
39-
while (values[p] < 0) {
40-
--p;
41-
if (p < 0) {
42-
return {offset, 0};
43-
}
44-
}
4538

46-
if (value > values[p]) {
47-
return {offset, 0};
48-
}
49-
50-
for (auto i = 0U; i < values.size(); ++i) {
51-
if (values[i] == value) {
52-
return {offset, counts[i]};
53-
}
54-
offset += counts[i];
55-
}
56-
return {offset, 0};
39+
return {(*offsets)[value], (*sizes)[value]};
5740
}
5841

5942
gsl::span<const int64_t> SliceInfoUnsortedPtr::getSliceFor(int value) const
@@ -84,6 +67,8 @@ ArrowTableSlicingCache::ArrowTableSlicingCache(Cache&& bsks, Cache&& bsksUnsorte
8467
{
8568
values.resize(bindingsKeys.size());
8669
counts.resize(bindingsKeys.size());
70+
offsets.resize(bindingsKeys.size());
71+
sizes.resize(bindingsKeys.size());
8772

8873
valuesUnsorted.resize(bindingsKeysUnsorted.size());
8974
groups.resize(bindingsKeysUnsorted.size());
@@ -97,6 +82,10 @@ void ArrowTableSlicingCache::setCaches(Cache&& bsks, Cache&& bsksUnsorted)
9782
values.resize(bindingsKeys.size());
9883
counts.clear();
9984
counts.resize(bindingsKeys.size());
85+
offsets.clear();
86+
offsets.resize(bindingsKeys.size());
87+
sizes.clear();
88+
sizes.resize(bindingsKeys.size());
10089
valuesUnsorted.clear();
10190
valuesUnsorted.resize(bindingsKeysUnsorted.size());
10291
groups.clear();
@@ -108,6 +97,8 @@ arrow::Status ArrowTableSlicingCache::updateCacheEntry(int pos, std::shared_ptr<
10897
if (table->num_rows() == 0) {
10998
values[pos].reset();
11099
counts[pos].reset();
100+
offsets[pos].clear();
101+
sizes[pos].clear();
111102
return arrow::Status::OK();
112103
}
113104
auto& [b, k, e] = bindingsKeys[pos];
@@ -125,6 +116,31 @@ arrow::Status ArrowTableSlicingCache::updateCacheEntry(int pos, std::shared_ptr<
125116
counts[pos].reset();
126117
values[pos] = std::make_shared<arrow::NumericArray<arrow::Int32Type>>(pair.field(0)->data());
127118
counts[pos] = std::make_shared<arrow::NumericArray<arrow::Int64Type>>(pair.field(1)->data());
119+
120+
int maxValue = 0;
121+
for (auto i = values[pos]->length() - 1; i >= 0; --i) {
122+
if (values[pos]->Value(i) < 0) {
123+
continue;
124+
} else {
125+
maxValue = values[pos]->Value(i);
126+
break;
127+
}
128+
}
129+
130+
offsets[pos].resize(maxValue + 1);
131+
sizes[pos].resize(maxValue);
132+
std::fill(offsets[pos].begin(), offsets[pos].end(), 0);
133+
std::fill(sizes[pos].begin(), sizes[pos].end(), 0);
134+
int64_t offset = 0;
135+
for (auto i = 0U; i < values[pos]->length(); ++i) {
136+
auto value = values[pos]->Value(i);
137+
if (value >= 0) {
138+
offsets[pos][value] = offset;
139+
sizes[pos][value] = counts[pos]->Value(i);
140+
}
141+
offset += counts[pos]->Value(i);
142+
}
143+
offsets[pos][maxValue] = offset;
128144
return arrow::Status::OK();
129145
}
130146

@@ -221,14 +237,18 @@ SliceInfoPtr ArrowTableSlicingCache::getCacheForPos(int pos) const
221237
{
222238
if (values[pos] == nullptr && counts[pos] == nullptr) {
223239
return {
224-
{},
225-
{} //
240+
{},//
241+
{},//
242+
nullptr, //
243+
nullptr //
226244
};
227245
}
228246

229247
return {
230-
{reinterpret_cast<int const*>(values[pos]->values()->data()), static_cast<size_t>(values[pos]->length())},
231-
{reinterpret_cast<int64_t const*>(counts[pos]->values()->data()), static_cast<size_t>(counts[pos]->length())} //
248+
{reinterpret_cast<int const*>(values[pos]->values()->data()), static_cast<size_t>(values[pos]->length())}, //
249+
{reinterpret_cast<int64_t const*>(counts[pos]->values()->data()), static_cast<size_t>(counts[pos]->length())}, //
250+
&(offsets[pos]), //
251+
&(sizes[pos]) //
232252
};
233253
}
234254

0 commit comments

Comments
 (0)