@@ -35,25 +35,8 @@ std::pair<int64_t, int64_t> SliceInfoPtr::getSliceFor(int value) const
3535 if (values.empty ()) {
3636 return {offset, 0 };
3737 }
38- int64_t p = static_cast <int64_t >(values.size ()) - 1 ;
39- while (values[p] < 0 ) {
40- --p;
41- if (p < 0 ) {
42- return {offset, 0 };
43- }
44- }
4538
46- if (value > values[p]) {
47- return {offset, 0 };
48- }
49-
50- for (auto i = 0U ; i < values.size (); ++i) {
51- if (values[i] == value) {
52- return {offset, counts[i]};
53- }
54- offset += counts[i];
55- }
56- return {offset, 0 };
39+ return {(*offsets)[value], (*sizes)[value]};
5740}
5841
5942gsl::span<const int64_t > SliceInfoUnsortedPtr::getSliceFor (int value) const
@@ -84,6 +67,8 @@ ArrowTableSlicingCache::ArrowTableSlicingCache(Cache&& bsks, Cache&& bsksUnsorte
8467{
8568 values.resize (bindingsKeys.size ());
8669 counts.resize (bindingsKeys.size ());
70+ offsets.resize (bindingsKeys.size ());
71+ sizes.resize (bindingsKeys.size ());
8772
8873 valuesUnsorted.resize (bindingsKeysUnsorted.size ());
8974 groups.resize (bindingsKeysUnsorted.size ());
@@ -97,6 +82,10 @@ void ArrowTableSlicingCache::setCaches(Cache&& bsks, Cache&& bsksUnsorted)
9782 values.resize (bindingsKeys.size ());
9883 counts.clear ();
9984 counts.resize (bindingsKeys.size ());
85+ offsets.clear ();
86+ offsets.resize (bindingsKeys.size ());
87+ sizes.clear ();
88+ sizes.resize (bindingsKeys.size ());
10089 valuesUnsorted.clear ();
10190 valuesUnsorted.resize (bindingsKeysUnsorted.size ());
10291 groups.clear ();
@@ -108,6 +97,8 @@ arrow::Status ArrowTableSlicingCache::updateCacheEntry(int pos, std::shared_ptr<
10897 if (table->num_rows () == 0 ) {
10998 values[pos].reset ();
11099 counts[pos].reset ();
100+ offsets[pos].clear ();
101+ sizes[pos].clear ();
111102 return arrow::Status::OK ();
112103 }
113104 auto & [b, k, e] = bindingsKeys[pos];
@@ -125,6 +116,31 @@ arrow::Status ArrowTableSlicingCache::updateCacheEntry(int pos, std::shared_ptr<
125116 counts[pos].reset ();
126117 values[pos] = std::make_shared<arrow::NumericArray<arrow::Int32Type>>(pair.field (0 )->data ());
127118 counts[pos] = std::make_shared<arrow::NumericArray<arrow::Int64Type>>(pair.field (1 )->data ());
119+
120+ int maxValue = 0 ;
121+ for (auto i = values[pos]->length () - 1 ; i >= 0 ; --i) {
122+ if (values[pos]->Value (i) < 0 ) {
123+ continue ;
124+ } else {
125+ maxValue = values[pos]->Value (i);
126+ break ;
127+ }
128+ }
129+
130+ offsets[pos].resize (maxValue + 1 );
131+ sizes[pos].resize (maxValue);
132+ std::fill (offsets[pos].begin (), offsets[pos].end (), 0 );
133+ std::fill (sizes[pos].begin (), sizes[pos].end (), 0 );
134+ int64_t offset = 0 ;
135+ for (auto i = 0U ; i < values[pos]->length (); ++i) {
136+ auto value = values[pos]->Value (i);
137+ if (value >= 0 ) {
138+ offsets[pos][value] = offset;
139+ sizes[pos][value] = counts[pos]->Value (i);
140+ }
141+ offset += counts[pos]->Value (i);
142+ }
143+ offsets[pos][maxValue] = offset;
128144 return arrow::Status::OK ();
129145}
130146
@@ -221,14 +237,18 @@ SliceInfoPtr ArrowTableSlicingCache::getCacheForPos(int pos) const
221237{
222238 if (values[pos] == nullptr && counts[pos] == nullptr ) {
223239 return {
224- {},
225- {} //
240+ {},//
241+ {},//
242+ nullptr , //
243+ nullptr //
226244 };
227245 }
228246
229247 return {
230- {reinterpret_cast <int const *>(values[pos]->values ()->data ()), static_cast <size_t >(values[pos]->length ())},
231- {reinterpret_cast <int64_t const *>(counts[pos]->values ()->data ()), static_cast <size_t >(counts[pos]->length ())} //
248+ {reinterpret_cast <int const *>(values[pos]->values ()->data ()), static_cast <size_t >(values[pos]->length ())}, //
249+ {reinterpret_cast <int64_t const *>(counts[pos]->values ()->data ()), static_cast <size_t >(counts[pos]->length ())}, //
250+ &(offsets[pos]), //
251+ &(sizes[pos]) //
232252 };
233253}
234254
0 commit comments