@@ -20,7 +20,9 @@ limitations under the License.
2020
2121#include < omp.h>
2222
23+ #include < algorithm>
2324#include < climits>
25+ #include < deque>
2426#include < list>
2527#include < map>
2628#include < set>
@@ -98,13 +100,13 @@ class GrowLocalAutoCoresParallel : public Scheduler<GraphT> {
98100 const VertexType n = endNode - startNode;
99101 const unsigned p = instance.NumberOfProcessors ();
100102
101- std::set <VertexType> ready;
103+ std::deque <VertexType> ready;
102104
103105 std::vector<VertexType> futureReady;
104106 std::vector<VertexType> bestFutureReady;
105107
106- std::vector<std::set <VertexType>> procReady (p);
107- std::vector<std::set <VertexType>> bestProcReady (p);
108+ std::vector<std::vector <VertexType>> procReady (p);
109+ std::vector<std::vector <VertexType>> bestProcReady (p);
108110
109111 std::vector<VertexType> predec (n, 0 );
110112
@@ -143,12 +145,15 @@ class GrowLocalAutoCoresParallel : public Scheduler<GraphT> {
143145 VertexType index = nodePos - startNode;
144146 if (predec[index] == 0 ) {
145147 if constexpr (hasVerticesInTopOrderV<GraphT>) {
146- ready.insert (nodePos);
148+ ready.emplace_back (nodePos);
147149 } else {
148- ready.insert (topOrder[nodePos]);
150+ ready.emplace_back (topOrder[nodePos]);
149151 }
150152 }
151153 }
154+ if constexpr (not hasVerticesInTopOrderV<GraphT>) {
155+ std::sort (ready.begin (), ready.end (), std::less<>{});
156+ }
152157
153158 std::vector<std::vector<VertexType>> newAssignments (p);
154159 std::vector<std::vector<VertexType>> bestNewAssignments (p);
@@ -166,8 +171,8 @@ class GrowLocalAutoCoresParallel : public Scheduler<GraphT> {
166171 double bestScore = 0 ;
167172 double bestParallelism = 0 ;
168173
169- typename std::set <VertexType>::iterator readyIter;
170- typename std::set <VertexType>::iterator bestReadyIter;
174+ typename std::deque <VertexType>::const_iterator readyIter;
175+ typename std::deque <VertexType>::const_iterator bestReadyIter;
171176
172177 bool continueSuperstepAttempts = true ;
173178
@@ -181,7 +186,7 @@ class GrowLocalAutoCoresParallel : public Scheduler<GraphT> {
181186 procReady[proc].clear ();
182187 }
183188
184- readyIter = ready.begin ();
189+ readyIter = ready.cbegin ();
185190
186191 VertexType newTotalAssigned = 0 ;
187192 VWorkwT<GraphT> weightLimit = 0 ;
@@ -191,9 +196,10 @@ class GrowLocalAutoCoresParallel : public Scheduler<GraphT> {
191196 while (newAssignments[0 ].size () < limit) {
192197 VertexType chosenNode = std::numeric_limits<VertexType>::max ();
193198 if (!procReady[0 ].empty ()) {
194- chosenNode = *procReady[0 ].begin ();
195- procReady[0 ].erase (procReady[0 ].begin ());
196- } else if (readyIter != ready.end ()) {
199+ std::pop_heap (procReady[0 ].begin (), procReady[0 ].end (), std::greater<>{});
200+ chosenNode = procReady[0 ].back ();
201+ procReady[0 ].pop_back ();
202+ } else if (readyIter != ready.cend ()) {
197203 chosenNode = *readyIter;
198204 readyIter++;
199205 } else {
@@ -238,7 +244,8 @@ class GrowLocalAutoCoresParallel : public Scheduler<GraphT> {
238244 --predec[succIndex];
239245 if (predec[succIndex] == 0 ) {
240246 if (schedule.AssignedProcessor (succ) == 0 ) {
241- procReady[0 ].insert (succ);
247+ procReady[0 ].emplace_back (succ);
248+ std::push_heap (procReady[0 ].begin (), procReady[0 ].end (), std::greater<>{});
242249 } else {
243250 futureReady.push_back (succ);
244251 }
@@ -254,9 +261,10 @@ class GrowLocalAutoCoresParallel : public Scheduler<GraphT> {
254261 while (currentWeightAssigned < weightLimit) {
255262 VertexType chosenNode = std::numeric_limits<VertexType>::max ();
256263 if (!procReady[proc].empty ()) {
257- chosenNode = *procReady[proc].begin ();
258- procReady[proc].erase (procReady[proc].begin ());
259- } else if (readyIter != ready.end ()) {
264+ std::pop_heap (procReady[proc].begin (), procReady[proc].end (), std::greater<>{});
265+ chosenNode = procReady[proc].back ();
266+ procReady[proc].pop_back ();
267+ } else if (readyIter != ready.cend ()) {
260268 chosenNode = *readyIter;
261269 readyIter++;
262270 } else {
@@ -301,7 +309,8 @@ class GrowLocalAutoCoresParallel : public Scheduler<GraphT> {
301309 --predec[succIndex];
302310 if (predec[succIndex] == 0 ) {
303311 if (schedule.AssignedProcessor (succ) == proc) {
304- procReady[proc].insert (succ);
312+ procReady[proc].emplace_back (succ);
313+ std::push_heap (procReady[proc].begin (), procReady[proc].end (), std::greater<>{});
305314 } else {
306315 futureReady.push_back (succ);
307316 }
@@ -345,6 +354,10 @@ class GrowLocalAutoCoresParallel : public Scheduler<GraphT> {
345354 }
346355 }
347356
357+ if (readyIter == ready.cend ()) {
358+ continueSuperstepAttempts = false ;
359+ }
360+
348361 if (totalAssigned + newTotalAssigned == n) {
349362 continueSuperstepAttempts = false ;
350363 }
@@ -424,10 +437,14 @@ class GrowLocalAutoCoresParallel : public Scheduler<GraphT> {
424437
425438 // apply best iteration
426439 ready.erase (ready.begin (), bestReadyIter);
427- ready.insert (bestFutureReady.begin (), bestFutureReady.end ());
440+ const auto lengthLeftoverReady = std::distance (ready.begin (), ready.end ());
441+ ready.insert (ready.end (), bestFutureReady.begin (), bestFutureReady.end ());
428442 for (unsigned proc = 0 ; proc < p; proc++) {
429- ready.merge ( bestProcReady[proc]);
443+ ready.insert (ready. end (), bestProcReady[proc]. begin (), bestProcReady[proc]. end () );
430444 }
445+ const auto middleIt = std::next (ready.begin (), lengthLeftoverReady);
446+ std::sort (middleIt, ready.end (), std::less<>{});
447+ std::inplace_merge (ready.begin (), middleIt, ready.end ());
431448
432449 for (unsigned proc = 0 ; proc < p; ++proc) {
433450 for (const VertexType &node : bestNewAssignments[proc]) {
0 commit comments