@@ -32,28 +32,14 @@ void updatePairList(Cache& list, std::string const& binding, std::string const&
3232std::pair<int64_t , int64_t > SliceInfoPtr::getSliceFor (int value) const
3333{
3434 int64_t offset = 0 ;
35- if (values .empty ()) {
35+ if (offsets .empty ()) {
3636 return {offset, 0 };
3737 }
38- int64_t p = static_cast <int64_t >(values.size ()) - 1 ;
39- while (values[p] < 0 ) {
40- --p;
41- if (p < 0 ) {
42- return {offset, 0 };
43- }
44- }
45-
46- if (value > values[p]) {
38+ if ((size_t )value >= offsets.size ()) {
4739 return {offset, 0 };
4840 }
4941
50- for (auto i = 0U ; i < values.size (); ++i) {
51- if (values[i] == value) {
52- return {offset, counts[i]};
53- }
54- offset += counts[i];
55- }
56- return {offset, 0 };
42+ return {offsets[value], sizes[value]};
5743}
5844
5945gsl::span<const int64_t > SliceInfoUnsortedPtr::getSliceFor (int value) const
@@ -84,6 +70,8 @@ ArrowTableSlicingCache::ArrowTableSlicingCache(Cache&& bsks, Cache&& bsksUnsorte
8470{
8571 values.resize (bindingsKeys.size ());
8672 counts.resize (bindingsKeys.size ());
73+ offsets.resize (bindingsKeys.size ());
74+ sizes.resize (bindingsKeys.size ());
8775
8876 valuesUnsorted.resize (bindingsKeysUnsorted.size ());
8977 groups.resize (bindingsKeysUnsorted.size ());
@@ -97,6 +85,10 @@ void ArrowTableSlicingCache::setCaches(Cache&& bsks, Cache&& bsksUnsorted)
9785 values.resize (bindingsKeys.size ());
9886 counts.clear ();
9987 counts.resize (bindingsKeys.size ());
88+ offsets.clear ();
89+ offsets.resize (bindingsKeys.size ());
90+ sizes.clear ();
91+ sizes.resize (bindingsKeys.size ());
10092 valuesUnsorted.clear ();
10193 valuesUnsorted.resize (bindingsKeysUnsorted.size ());
10294 groups.clear ();
@@ -105,9 +97,11 @@ void ArrowTableSlicingCache::setCaches(Cache&& bsks, Cache&& bsksUnsorted)
10597
10698arrow::Status ArrowTableSlicingCache::updateCacheEntry (int pos, std::shared_ptr<arrow::Table> const & table)
10799{
100+ values[pos].reset ();
101+ counts[pos].reset ();
102+ offsets[pos].clear ();
103+ sizes[pos].clear ();
108104 if (table->num_rows () == 0 ) {
109- values[pos].reset ();
110- counts[pos].reset ();
111105 return arrow::Status::OK ();
112106 }
113107 auto & [b, k, e] = bindingsKeys[pos];
@@ -125,6 +119,31 @@ arrow::Status ArrowTableSlicingCache::updateCacheEntry(int pos, std::shared_ptr<
125119 counts[pos].reset ();
126120 values[pos] = std::make_shared<arrow::NumericArray<arrow::Int32Type>>(pair.field (0 )->data ());
127121 counts[pos] = std::make_shared<arrow::NumericArray<arrow::Int64Type>>(pair.field (1 )->data ());
122+
123+ int maxValue = -1 ;
124+ for (auto i = values[pos]->length () - 1 ; i >= 0 ; --i) {
125+ if (values[pos]->Value (i) < 0 ) {
126+ continue ;
127+ } else {
128+ maxValue = values[pos]->Value (i);
129+ break ;
130+ }
131+ }
132+
133+ offsets[pos].resize (maxValue + 1 );
134+ sizes[pos].resize (maxValue + 1 );
135+ std::fill (offsets[pos].begin (), offsets[pos].end (), 0 );
136+ std::fill (sizes[pos].begin (), sizes[pos].end (), 0 );
137+ int64_t offset = 0 ;
138+ for (auto i = 0U ; i < values[pos]->length (); ++i) {
139+ auto value = values[pos]->Value (i);
140+ auto count = counts[pos]->Value (i);
141+ if (value >= 0 ) {
142+ offsets[pos][value] = offset;
143+ sizes[pos][value] = count;
144+ }
145+ offset += count;
146+ }
128147 return arrow::Status::OK ();
129148}
130149
@@ -221,14 +240,14 @@ SliceInfoPtr ArrowTableSlicingCache::getCacheForPos(int pos) const
221240{
222241 if (values[pos] == nullptr && counts[pos] == nullptr ) {
223242 return {
224- {},
225- {} //
243+ {}, //
244+ {} //
226245 };
227246 }
228247
229248 return {
230- { reinterpret_cast < int const *>(values [pos]-> values ()-> data ()), static_cast < size_t >(values [pos]-> length ())},
231- { reinterpret_cast < int64_t const *>(counts [pos]-> values ()-> data ()), static_cast < size_t >(counts [pos]-> length ())} //
249+ gsl::span{offsets [pos]. data (), offsets [pos]. size ()}, //
250+ gsl::span (sizes [pos]. data (), sizes [pos]. size ()) //
232251 };
233252}
234253
0 commit comments