Skip to content

Commit b8bb17f

Browse files
faster growlocal
1 parent 9e63602 commit b8bb17f

1 file changed

Lines changed: 35 additions & 18 deletions

File tree

include/osp/bsp/scheduler/GreedySchedulers/GrowLocalAutoCoresParallel.hpp

Lines changed: 35 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,9 @@ limitations under the License.
2020

2121
#include <omp.h>
2222

23+
#include <algorithm>
2324
#include <climits>
25+
#include <deque>
2426
#include <list>
2527
#include <map>
2628
#include <set>
@@ -98,13 +100,13 @@ class GrowLocalAutoCoresParallel : public Scheduler<GraphT> {
98100
const VertexType n = endNode - startNode;
99101
const unsigned p = instance.NumberOfProcessors();
100102

101-
std::set<VertexType> ready;
103+
std::deque<VertexType> ready;
102104

103105
std::vector<VertexType> futureReady;
104106
std::vector<VertexType> bestFutureReady;
105107

106-
std::vector<std::set<VertexType>> procReady(p);
107-
std::vector<std::set<VertexType>> bestProcReady(p);
108+
std::vector<std::vector<VertexType>> procReady(p);
109+
std::vector<std::vector<VertexType>> bestProcReady(p);
108110

109111
std::vector<VertexType> predec(n, 0);
110112

@@ -143,12 +145,15 @@ class GrowLocalAutoCoresParallel : public Scheduler<GraphT> {
143145
VertexType index = nodePos - startNode;
144146
if (predec[index] == 0) {
145147
if constexpr (hasVerticesInTopOrderV<GraphT>) {
146-
ready.insert(nodePos);
148+
ready.emplace_back(nodePos);
147149
} else {
148-
ready.insert(topOrder[nodePos]);
150+
ready.emplace_back(topOrder[nodePos]);
149151
}
150152
}
151153
}
154+
if constexpr (not hasVerticesInTopOrderV<GraphT>) {
155+
std::sort(ready.begin(), ready.end(), std::less<>{});
156+
}
152157

153158
std::vector<std::vector<VertexType>> newAssignments(p);
154159
std::vector<std::vector<VertexType>> bestNewAssignments(p);
@@ -166,8 +171,8 @@ class GrowLocalAutoCoresParallel : public Scheduler<GraphT> {
166171
double bestScore = 0;
167172
double bestParallelism = 0;
168173

169-
typename std::set<VertexType>::iterator readyIter;
170-
typename std::set<VertexType>::iterator bestReadyIter;
174+
typename std::deque<VertexType>::const_iterator readyIter;
175+
typename std::deque<VertexType>::const_iterator bestReadyIter;
171176

172177
bool continueSuperstepAttempts = true;
173178

@@ -181,7 +186,7 @@ class GrowLocalAutoCoresParallel : public Scheduler<GraphT> {
181186
procReady[proc].clear();
182187
}
183188

184-
readyIter = ready.begin();
189+
readyIter = ready.cbegin();
185190

186191
VertexType newTotalAssigned = 0;
187192
VWorkwT<GraphT> weightLimit = 0;
@@ -191,9 +196,10 @@ class GrowLocalAutoCoresParallel : public Scheduler<GraphT> {
191196
while (newAssignments[0].size() < limit) {
192197
VertexType chosenNode = std::numeric_limits<VertexType>::max();
193198
if (!procReady[0].empty()) {
194-
chosenNode = *procReady[0].begin();
195-
procReady[0].erase(procReady[0].begin());
196-
} else if (readyIter != ready.end()) {
199+
std::pop_heap(procReady[0].begin(), procReady[0].end(), std::greater<>{});
200+
chosenNode = procReady[0].back();
201+
procReady[0].pop_back();
202+
} else if (readyIter != ready.cend()) {
197203
chosenNode = *readyIter;
198204
readyIter++;
199205
} else {
@@ -238,7 +244,8 @@ class GrowLocalAutoCoresParallel : public Scheduler<GraphT> {
238244
--predec[succIndex];
239245
if (predec[succIndex] == 0) {
240246
if (schedule.AssignedProcessor(succ) == 0) {
241-
procReady[0].insert(succ);
247+
procReady[0].emplace_back(succ);
248+
std::push_heap(procReady[0].begin(), procReady[0].end(), std::greater<>{});
242249
} else {
243250
futureReady.push_back(succ);
244251
}
@@ -254,9 +261,10 @@ class GrowLocalAutoCoresParallel : public Scheduler<GraphT> {
254261
while (currentWeightAssigned < weightLimit) {
255262
VertexType chosenNode = std::numeric_limits<VertexType>::max();
256263
if (!procReady[proc].empty()) {
257-
chosenNode = *procReady[proc].begin();
258-
procReady[proc].erase(procReady[proc].begin());
259-
} else if (readyIter != ready.end()) {
264+
std::pop_heap(procReady[proc].begin(), procReady[proc].end(), std::greater<>{});
265+
chosenNode = procReady[proc].back();
266+
procReady[proc].pop_back();
267+
} else if (readyIter != ready.cend()) {
260268
chosenNode = *readyIter;
261269
readyIter++;
262270
} else {
@@ -301,7 +309,8 @@ class GrowLocalAutoCoresParallel : public Scheduler<GraphT> {
301309
--predec[succIndex];
302310
if (predec[succIndex] == 0) {
303311
if (schedule.AssignedProcessor(succ) == proc) {
304-
procReady[proc].insert(succ);
312+
procReady[proc].emplace_back(succ);
313+
std::push_heap(procReady[proc].begin(), procReady[proc].end(), std::greater<>{});
305314
} else {
306315
futureReady.push_back(succ);
307316
}
@@ -345,6 +354,10 @@ class GrowLocalAutoCoresParallel : public Scheduler<GraphT> {
345354
}
346355
}
347356

357+
if (readyIter == ready.cend()) {
358+
continueSuperstepAttempts = false;
359+
}
360+
348361
if (totalAssigned + newTotalAssigned == n) {
349362
continueSuperstepAttempts = false;
350363
}
@@ -424,10 +437,14 @@ class GrowLocalAutoCoresParallel : public Scheduler<GraphT> {
424437

425438
// apply best iteration
426439
ready.erase(ready.begin(), bestReadyIter);
427-
ready.insert(bestFutureReady.begin(), bestFutureReady.end());
440+
const auto lengthLeftoverReady = std::distance(ready.begin(), ready.end());
441+
ready.insert(ready.end(), bestFutureReady.begin(), bestFutureReady.end());
428442
for (unsigned proc = 0; proc < p; proc++) {
429-
ready.merge(bestProcReady[proc]);
443+
ready.insert(ready.end(), bestProcReady[proc].begin(), bestProcReady[proc].end());
430444
}
445+
const auto middleIt = std::next(ready.begin(), lengthLeftoverReady);
446+
std::sort(middleIt, ready.end(), std::less<>{});
447+
std::inplace_merge(ready.begin(), middleIt, ready.end());
431448

432449
for (unsigned proc = 0; proc < p; ++proc) {
433450
for (const VertexType &node : bestNewAssignments[proc]) {

0 commit comments

Comments
 (0)