Skip to content

Commit 7c46eb0

Browse files
Chance to Finish GrowLocal SSP
1 parent 780fcb9 commit 7c46eb0

1 file changed

Lines changed: 170 additions & 64 deletions

File tree

include/osp/bsp/scheduler/GreedySchedulers/GrowLocalMaxBsp.hpp

Lines changed: 170 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -50,18 +50,117 @@ class GrowLocalSSP : public MaxBspScheduler<GraphT> {
5050
static constexpr unsigned staleness{2U};
5151
GrowLocalSSPParams<VertexIdxT<GraphT>, VWorkwT<GraphT>> params_;
5252

53-
inline typename std::deque<VertexType>::difference_type maxAllReadyUsage(const std::deque<VertexType> &currentlyReady,
53+
/*! Vertices ready in current superstep */
54+
std::deque<VertexType> currentlyReady_;
55+
56+
/*! For i = 1,2,..,staleness, the vertices in futureReady_[(superstep + i) % staleness] becomes ready globally in superstep + i */
57+
std::array<std::deque<VertexType>, staleness> futureReady_;
58+
/*! Vertices to be added to futureReady_[superstep % staleness] which become ready globally in superstep + staleness */
59+
std::deque<VertexType> bestFutureReady_;
60+
61+
/*! Local to processor ready vertices in current superstep in a heap */
62+
std::vector<std::vector<std::pair<VertexType, unsigned>>> currentProcReadyHeaps_;
63+
/*! Leftover local to processor ready vertices in current superstep in a heap */
64+
std::vector<std::vector<std::pair<VertexType, unsigned>>> bestCurrentProcReadyHeaps_;
65+
66+
/*! For i = 0,1,2,..,staleness-1 and p processor, the vertices in procReady_[(superstep + i) % staleness][p] are ready locally
67+
* in superstep + i on processor p */
68+
std::array<std::vector<std::vector<std::pair<VertexType, unsigned>>>, staleness> procReady_;
69+
/*! Additions to procReady_ in current superstep attempt */
70+
std::array<std::vector<std::vector<std::pair<VertexType, unsigned>>>, staleness> procReadyAdditions_;
71+
/*! Additions to procReady_ from best superstep attempt */
72+
std::array<std::vector<std::vector<std::pair<VertexType, unsigned>>>, staleness> bestProcReadyAdditions_;
73+
74+
void Init(const unsigned numProcs);
75+
void ReleaseMemory();
76+
77+
inline typename std::deque<VertexType>::difference_type MaxAllReadyUsage(const std::deque<VertexType> &currentlyReady,
5478
const std::deque<VertexType> &nextSuperstepReady) const;
5579

80+
bool ChanceToFinish(const unsigned superStep) const;
81+
5682
public:
5783
ReturnStatus ComputeSchedule(BspSchedule<GraphT> &schedule) override;
5884
ReturnStatus ComputeSchedule(MaxBspSchedule<GraphT> &schedule) override;
5985

86+
inline GrowLocalSSPParams<VertexIdxT<GraphT>, VWorkwT<GraphT>> &GetParameters();
87+
inline const GrowLocalSSPParams<VertexIdxT<GraphT>, VWorkwT<GraphT>> &GetParameters() const;
88+
6089
std::string GetScheduleName() const override { return "GrowLocalSSP"; }
6190
};
6291

6392
template <typename GraphT>
64-
inline typename std::deque<VertexIdxT<GraphT>>::difference_type GrowLocalSSP<GraphT>::maxAllReadyUsage(
93+
inline GrowLocalSSPParams<VertexIdxT<GraphT>, VWorkwT<GraphT>> &GrowLocalSSP<GraphT>::GetParameters() {
94+
return params_;
95+
}
96+
97+
template <typename GraphT>
98+
inline const GrowLocalSSPParams<VertexIdxT<GraphT>, VWorkwT<GraphT>> &GrowLocalSSP<GraphT>::GetParameters() const {
99+
return params_;
100+
}
101+
102+
template <typename GraphT>
103+
void GrowLocalSSP<GraphT>::Init(const unsigned numProcs) {
104+
currentlyReady_.clear();
105+
106+
for (auto &stepFutureReady : futureReady_) {
107+
stepFutureReady.clear();
108+
}
109+
110+
bestFutureReady_.clear();
111+
112+
currentProcReadyHeaps_ = std::vector<std::vector<std::pair<VertexType, unsigned>>>(numProcs);
113+
bestCurrentProcReadyHeaps_ = std::vector<std::vector<std::pair<VertexType, unsigned>>>(numProcs);
114+
115+
for (auto &stepProcReady : procReady_) {
116+
stepProcReady = std::vector<std::vector<std::pair<VertexType, unsigned>>>(numProcs);
117+
}
118+
119+
for (auto &stepProcReadyAdditions : procReadyAdditions_) {
120+
stepProcReadyAdditions = std::vector<std::vector<std::pair<VertexType, unsigned>>>(numProcs);
121+
}
122+
123+
for (auto &stepBestProcReadyAdditions : bestProcReadyAdditions_) {
124+
stepBestProcReadyAdditions = std::vector<std::vector<std::pair<VertexType, unsigned>>>(numProcs);
125+
}
126+
}
127+
128+
template <typename GraphT>
129+
void GrowLocalSSP<GraphT>::ReleaseMemory() {
130+
currentlyReady_.clear();
131+
currentlyReady_.shrink_to_fit();
132+
133+
for (auto &stepFutureReady : futureReady_) {
134+
stepFutureReady.clear();
135+
stepFutureReady.shrink_to_fit();
136+
}
137+
138+
bestFutureReady_.clear();
139+
140+
currentProcReadyHeaps_.clear();
141+
currentProcReadyHeaps_.shrink_to_fit();
142+
143+
bestCurrentProcReadyHeaps_.clear();
144+
bestCurrentProcReadyHeaps_.shrink_to_fit();
145+
146+
for (auto &stepProcReady : procReady_) {
147+
stepProcReady.clear();
148+
stepProcReady.shrink_to_fit();
149+
}
150+
151+
for (auto &stepProcReadyAdditions : procReadyAdditions_) {
152+
stepProcReadyAdditions.clear();
153+
stepProcReadyAdditions.shrink_to_fit();
154+
}
155+
156+
for (auto &stepBestProcReadyAdditions : bestProcReadyAdditions_) {
157+
stepBestProcReadyAdditions.clear();
158+
stepBestProcReadyAdditions.shrink_to_fit();
159+
}
160+
}
161+
162+
template <typename GraphT>
163+
inline typename std::deque<VertexIdxT<GraphT>>::difference_type GrowLocalSSP<GraphT>::MaxAllReadyUsage(
65164
const std::deque<VertexIdxT<GraphT>> &currentlyReady, const std::deque<VertexIdxT<GraphT>> &nextSuperstepReady) const {
66165
if constexpr (staleness == 1U) {
67166
return std::distance(currentlyReady.cbegin(), currentlyReady.cend());
@@ -77,6 +176,34 @@ inline typename std::deque<VertexIdxT<GraphT>>::difference_type GrowLocalSSP<Gra
77176
}
78177
}
79178

179+
template <typename GraphT>
180+
bool GrowLocalSSP<GraphT>::ChanceToFinish(const unsigned superStep) const {
181+
bool ans = std::all_of(futureReady_.cbegin(), futureReady_.cend(), [](const auto &deq) { return deq.empty(); });
182+
183+
if (ans) {
184+
for (unsigned i = 1U; i < staleness; ++i) {
185+
const auto &stepProcReady = procReady_[(i + superStep) % staleness];
186+
ans = std::all_of(stepProcReady.cbegin(), stepProcReady.cend(), [](const auto &vec) { return vec.empty(); });
187+
if (not ans) {
188+
break;
189+
}
190+
}
191+
}
192+
193+
if (ans) {
194+
for (unsigned i = 1U; i < staleness; ++i) {
195+
const auto &stepProcReadyAdditions = procReadyAdditions_[(i + superStep) % staleness];
196+
ans = std::all_of(
197+
stepProcReadyAdditions.cbegin(), stepProcReadyAdditions.cend(), [](const auto &vec) { return vec.empty(); });
198+
if (not ans) {
199+
break;
200+
}
201+
}
202+
}
203+
204+
return ans;
205+
}
206+
80207
template <typename GraphT>
81208
ReturnStatus GrowLocalSSP<GraphT>::ComputeSchedule(BspSchedule<GraphT> &schedule) {
82209
return MaxBspScheduler<GraphT>::ComputeSchedule(schedule);
@@ -89,41 +216,17 @@ ReturnStatus GrowLocalSSP<GraphT>::ComputeSchedule(MaxBspSchedule<GraphT> &sched
89216
const VertexType numVertices = graph.NumVertices();
90217
const unsigned numProcs = instance.NumberOfProcessors();
91218

92-
std::deque<VertexType> currentlyReady; // vertices ready in current superstep
93-
94-
std::array<std::deque<VertexType>, staleness> futureReady;
95-
// For i = 1,2,..,staleness, the vertices in futureReady[(superstep + i) % staleness] becomes ready globally in superstep + i
96-
std::deque<VertexType> bestFutureReady;
97-
// vertices to be added to futureReady[superstep % staleness] which become ready globally in superstep + staleness
98-
99-
std::vector<std::vector<std::pair<VertexType, unsigned>>> currentProcReadyHeaps(numProcs);
100-
std::vector<std::vector<std::pair<VertexType, unsigned>>> bestCurrentProcReadyHeaps(numProcs);
101-
102-
std::array<std::vector<std::vector<std::pair<VertexType, unsigned>>>, staleness> procReady;
103-
// For i = 0,1,2,..,staleness-1 and p processor, the vertices in procReady[(superstep + i) % staleness][p] are ready locally
104-
// in superstep + i on processor p
105-
std::array<std::vector<std::vector<std::pair<VertexType, unsigned>>>, staleness> procReadyAdditions;
106-
std::array<std::vector<std::vector<std::pair<VertexType, unsigned>>>, staleness> bestProcReadyAdditions;
107-
108-
for (auto &arrVal : procReady) {
109-
arrVal = std::vector<std::vector<std::pair<VertexType, unsigned>>>(numProcs);
110-
}
111-
for (auto &arrVal : procReadyAdditions) {
112-
arrVal = std::vector<std::vector<std::pair<VertexType, unsigned>>>(numProcs);
113-
}
114-
for (auto &arrVal : bestProcReadyAdditions) {
115-
arrVal = std::vector<std::vector<std::pair<VertexType, unsigned>>>(numProcs);
116-
}
219+
Init(numProcs);
117220

118221
std::vector<VertexType> predec(numVertices);
119222
for (const auto vert : graph.Vertices()) {
120223
predec[vert] = graph.InDegree(vert);
121224
if (predec[vert] == 0U) {
122-
currentlyReady.emplace_back(vert);
225+
currentlyReady_.emplace_back(vert);
123226
}
124227
}
125228
if constexpr (not hasVerticesInTopOrderV<GraphT>) {
126-
std::sort(currentlyReady.begin(), currentlyReady.end(), std::less<>{});
229+
std::sort(currentlyReady_.begin(), currentlyReady_.end(), std::less<>{});
127230
}
128231

129232
std::vector<std::vector<VertexType>> newAssignments(numProcs);
@@ -140,20 +243,22 @@ ReturnStatus GrowLocalSSP<GraphT>::ComputeSchedule(MaxBspSchedule<GraphT> &sched
140243
while (totalAssigned < numVertices) {
141244
const unsigned reducedSuperStep = superStep % staleness;
142245

143-
std::deque<VertexType> &stepFutureReady = futureReady[reducedSuperStep];
246+
std::deque<VertexType> &stepFutureReady = futureReady_[reducedSuperStep];
144247
std::sort(stepFutureReady.begin(), stepFutureReady.end(), std::less<>{});
145248
const typename std::deque<VertexType>::difference_type lengthCurrentlyReady
146-
= std::distance(currentlyReady.begin(), currentlyReady.end());
147-
currentlyReady.insert(currentlyReady.end(), stepFutureReady.begin(), stepFutureReady.end());
148-
std::inplace_merge(
149-
currentlyReady.begin(), std::next(currentlyReady.begin(), lengthCurrentlyReady), currentlyReady.end(), std::less<>{});
249+
= std::distance(currentlyReady_.begin(), currentlyReady_.end());
250+
currentlyReady_.insert(currentlyReady_.end(), stepFutureReady.begin(), stepFutureReady.end());
251+
std::inplace_merge(currentlyReady_.begin(),
252+
std::next(currentlyReady_.begin(), lengthCurrentlyReady),
253+
currentlyReady_.end(),
254+
std::less<>{});
150255

151256
const typename std::deque<VertexType>::difference_type maxCurrentlyReadyUsage
152257
= std::max(static_cast<typename std::deque<VertexType>::difference_type>(
153258
static_cast<double>(params_.minSuperstepSize_) * desiredParallelism),
154-
maxAllReadyUsage(currentlyReady, futureReady[(superStep + 1U) % staleness]));
259+
MaxAllReadyUsage(currentlyReady_, futureReady_[(superStep + 1U) % staleness]));
155260

156-
std::vector<std::vector<std::pair<VertexType, unsigned>>> &stepProcReady = procReady[reducedSuperStep];
261+
std::vector<std::vector<std::pair<VertexType, unsigned>>> &stepProcReady = procReady_[reducedSuperStep];
157262
for (auto &procHeap : stepProcReady) {
158263
std::make_heap(procHeap.begin(), procHeap.end(), std::greater<>{}); // min heap
159264
}
@@ -172,11 +277,11 @@ ReturnStatus GrowLocalSSP<GraphT>::ComputeSchedule(MaxBspSchedule<GraphT> &sched
172277
procAssignments.clear();
173278
}
174279
stepFutureReady.clear();
175-
currentProcReadyHeaps = stepProcReady;
280+
currentProcReadyHeaps_ = stepProcReady;
176281

177-
currentlyReadyIter = currentlyReady.cbegin();
282+
currentlyReadyIter = currentlyReady_.cbegin();
178283

179-
for (auto &stepProcReadyAdditions : procReadyAdditions) {
284+
for (auto &stepProcReadyAdditions : procReadyAdditions_) {
180285
for (auto &localStepProcReadyAdditions : stepProcReadyAdditions) {
181286
localStepProcReadyAdditions.clear();
182287
}
@@ -189,14 +294,14 @@ ReturnStatus GrowLocalSSP<GraphT>::ComputeSchedule(MaxBspSchedule<GraphT> &sched
189294
// Processor 0
190295
constexpr unsigned proc0{0U};
191296
while (newAssignments[proc0].size() < limit) {
192-
std::vector<std::pair<VertexType, unsigned>> &proc0Heap = currentProcReadyHeaps[proc0];
297+
std::vector<std::pair<VertexType, unsigned>> &proc0Heap = currentProcReadyHeaps_[proc0];
193298
VertexType chosenNode = std::numeric_limits<VertexType>::max();
194299
{
195300
if (proc0Heap.size() != 0U) {
196301
std::pop_heap(proc0Heap.begin(), proc0Heap.end(), std::greater<>{});
197302
chosenNode = proc0Heap.back().first;
198303
proc0Heap.pop_back();
199-
} else if (currentlyReadyIter != currentlyReady.cend()) {
304+
} else if (currentlyReadyIter != currentlyReady_.cend()) {
200305
chosenNode = *currentlyReadyIter;
201306
++currentlyReadyIter;
202307
} else {
@@ -223,7 +328,7 @@ ReturnStatus GrowLocalSSP<GraphT>::ComputeSchedule(MaxBspSchedule<GraphT> &sched
223328
proc0Heap.emplace_back(succ, superStep + staleness);
224329
std::push_heap(proc0Heap.begin(), proc0Heap.end(), std::greater<>{});
225330
} else if (earliest < superStep + staleness) {
226-
procReadyAdditions[earliest % staleness][proc0].emplace_back(succ, superStep + staleness);
331+
procReadyAdditions_[earliest % staleness][proc0].emplace_back(succ, superStep + staleness);
227332
} else {
228333
stepFutureReady.emplace_back(succ);
229334
}
@@ -237,14 +342,14 @@ ReturnStatus GrowLocalSSP<GraphT>::ComputeSchedule(MaxBspSchedule<GraphT> &sched
237342
for (unsigned proc = 1U; proc < numProcs; ++proc) {
238343
VWorkwT<GraphT> currentWeightAssigned = 0;
239344
while (currentWeightAssigned < weightLimit) {
240-
std::vector<std::pair<VertexType, unsigned>> &procHeap = currentProcReadyHeaps[proc];
345+
std::vector<std::pair<VertexType, unsigned>> &procHeap = currentProcReadyHeaps_[proc];
241346
VertexType chosenNode = std::numeric_limits<VertexType>::max();
242347
{
243348
if (procHeap.size() != 0U) {
244349
std::pop_heap(procHeap.begin(), procHeap.end(), std::greater<>{});
245350
chosenNode = procHeap.back().first;
246351
procHeap.pop_back();
247-
} else if (currentlyReadyIter != currentlyReady.cend()) {
352+
} else if (currentlyReadyIter != currentlyReady_.cend()) {
248353
chosenNode = *currentlyReadyIter;
249354
++currentlyReadyIter;
250355
} else {
@@ -271,7 +376,7 @@ ReturnStatus GrowLocalSSP<GraphT>::ComputeSchedule(MaxBspSchedule<GraphT> &sched
271376
procHeap.emplace_back(succ, superStep + staleness);
272377
std::push_heap(procHeap.begin(), procHeap.end(), std::greater<>{});
273378
} else if (earliest < superStep + staleness) {
274-
procReadyAdditions[earliest % staleness][proc].emplace_back(succ, superStep + staleness);
379+
procReadyAdditions_[earliest % staleness][proc].emplace_back(succ, superStep + staleness);
275380
} else {
276381
stepFutureReady.emplace_back(succ);
277382
}
@@ -314,16 +419,16 @@ ReturnStatus GrowLocalSSP<GraphT>::ComputeSchedule(MaxBspSchedule<GraphT> &sched
314419
}
315420
}
316421

317-
if (currentlyReadyIter == currentlyReady.cend()) {
318-
continueSuperstepAttemps = false;
319-
}
320-
321-
if (std::distance(currentlyReady.cbegin(), currentlyReadyIter) > maxCurrentlyReadyUsage) {
422+
if (currentlyReadyIter == currentlyReady_.cend()) {
322423
continueSuperstepAttemps = false;
323424
}
324425

325-
if (totalAssigned + newTotalAssigned == numVertices) {
326-
continueSuperstepAttemps = false;
426+
if (continueSuperstepAttemps) {
427+
if (std::distance(currentlyReady_.cbegin(), currentlyReadyIter) > maxCurrentlyReadyUsage) {
428+
if (not((totalAssigned + newTotalAssigned >= (numVertices / 4) * 3) && ChanceToFinish(superStep))) {
429+
continueSuperstepAttemps = false;
430+
}
431+
}
327432
}
328433

329434
// Undo predec decreases
@@ -336,41 +441,41 @@ ReturnStatus GrowLocalSSP<GraphT>::ComputeSchedule(MaxBspSchedule<GraphT> &sched
336441
}
337442

338443
if (acceptStep) {
339-
std::swap(bestFutureReady, stepFutureReady);
340-
std::swap(bestProcReadyAdditions, procReadyAdditions);
444+
std::swap(bestFutureReady_, stepFutureReady);
445+
std::swap(bestProcReadyAdditions_, procReadyAdditions_);
341446
std::swap(bestcurrentlyReadyIter, currentlyReadyIter);
342447
std::swap(bestNewAssignments, newAssignments);
343-
std::swap(bestCurrentProcReadyHeaps, currentProcReadyHeaps);
448+
std::swap(bestCurrentProcReadyHeaps_, currentProcReadyHeaps_);
344449
}
345450

346451
limit++;
347452
limit += (limit / 2);
348453
}
349454

350455
// apply best iteration
351-
currentlyReady.erase(currentlyReady.begin(), bestcurrentlyReadyIter);
352-
std::swap(futureReady[reducedSuperStep], bestFutureReady);
456+
currentlyReady_.erase(currentlyReady_.begin(), bestcurrentlyReadyIter);
457+
std::swap(futureReady_[reducedSuperStep], bestFutureReady_);
353458

354-
for (auto &localProcReady : procReady[reducedSuperStep]) {
459+
for (auto &localProcReady : procReady_[reducedSuperStep]) {
355460
localProcReady.clear();
356461
}
357462

358463
const unsigned nextSuperStep = superStep + 1U;
359464
for (unsigned proc = 0U; proc < numProcs; ++proc) {
360-
for (const auto &vertStepPair : bestCurrentProcReadyHeaps[proc]) {
465+
for (const auto &vertStepPair : bestCurrentProcReadyHeaps_[proc]) {
361466
if (vertStepPair.second <= nextSuperStep) {
362-
futureReady[nextSuperStep % staleness].emplace_back(vertStepPair.first);
467+
futureReady_[nextSuperStep % staleness].emplace_back(vertStepPair.first);
363468
} else {
364-
procReady[nextSuperStep % staleness][proc].emplace_back(vertStepPair);
469+
procReady_[nextSuperStep % staleness][proc].emplace_back(vertStepPair);
365470
}
366471
}
367472
}
368473

369474
for (std::size_t stepInd = 0U; stepInd < staleness; ++stepInd) {
370475
for (unsigned proc = 0U; proc < numProcs; ++proc) {
371-
procReady[stepInd][proc].insert(procReady[stepInd][proc].end(),
372-
bestProcReadyAdditions[stepInd][proc].begin(),
373-
bestProcReadyAdditions[stepInd][proc].end());
476+
procReady_[stepInd][proc].insert(procReady_[stepInd][proc].end(),
477+
bestProcReadyAdditions_[stepInd][proc].begin(),
478+
bestProcReadyAdditions_[stepInd][proc].end());
374479
}
375480
}
376481

@@ -391,6 +496,7 @@ ReturnStatus GrowLocalSSP<GraphT>::ComputeSchedule(MaxBspSchedule<GraphT> &sched
391496
}
392497

393498
schedule.SetNumberOfSupersteps(superStep);
499+
ReleaseMemory();
394500

395501
return ReturnStatus::OSP_SUCCESS;
396502
}

0 commit comments

Comments
 (0)