Skip to content

Commit 5226e6e

Browse files
algorithm updates to new cost model (+sync & other fixes)
1 parent 5a5fa6e commit 5226e6e

8 files changed

Lines changed: 187 additions & 100 deletions

File tree

include/osp/bsp/scheduler/GreedySchedulers/BspToMaxBspConverter.hpp

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,8 @@ class GreedyBspToMaxBspConverter {
4242
std::vector<std::vector<std::deque<vertex_idx_t<Graph_t>>>> createSuperstepLists(const BspScheduleCS<Graph_t>& schedule, std::vector<double>& priorities) const;
4343

4444
public:
45-
46-
MaxBspSchedule<Graph_t> Convert(const BspSchedule<Graph_t>& schedule) const;
45+
46+
MaxBspSchedule<Graph_t> Convert(const BspSchedule<Graph_t>& schedule) const;
4747
MaxBspScheduleCS<Graph_t> Convert(const BspScheduleCS<Graph_t>& schedule) const;
4848

4949
};
@@ -106,7 +106,7 @@ MaxBspScheduleCS<Graph_t> GreedyBspToMaxBspConverter<Graph_t>::Convert(const Bsp
106106
std::vector<std::pair<KeyTriple, unsigned>> newly_freed_comm_steps;
107107
std::vector<cost_type> send_sum_of_newly_free_on_proc(schedule.getInstance().numberOfProcessors(), 0),
108108
rec_sum_of_newly_free_on_proc(schedule.getInstance().numberOfProcessors(), 0);
109-
109+
110110
std::vector<std::pair<KeyTriple, unsigned>> comm_in_current_step;
111111

112112
std::vector<cost_type> send_on_proc(schedule.getInstance().numberOfProcessors(), 0),
@@ -118,10 +118,10 @@ MaxBspScheduleCS<Graph_t> GreedyBspToMaxBspConverter<Graph_t>::Convert(const Bsp
118118
// I. Select the next node (from any proc) with highest priority
119119
unsigned chosen_proc = schedule.getInstance().numberOfProcessors();
120120
double best_prio = std::numeric_limits<double>::max();
121-
121+
122122
for(unsigned proc = 0; proc < schedule.getInstance().numberOfProcessors(); ++proc)
123123
{
124-
if(!proc_list[proc][step].empty() && (chosen_proc == schedule.getInstance().numberOfProcessors() ||
124+
if(!proc_list[proc][step].empty() && (chosen_proc == schedule.getInstance().numberOfProcessors() ||
125125
priorities[proc_list[proc][step].front()] < best_prio))
126126
{
127127
chosen_proc = proc;
@@ -241,7 +241,7 @@ MaxBspScheduleCS<Graph_t> GreedyBspToMaxBspConverter<Graph_t>::Convert(const Bsp
241241

242242
for(const std::pair<KeyTriple, unsigned>& entry : newly_freed_comm_steps)
243243
free_comm_steps_for_superstep[step].insert(entry);
244-
244+
245245
if(free_comm_steps_for_superstep[step].empty())
246246
continue;
247247

@@ -256,8 +256,8 @@ MaxBspScheduleCS<Graph_t> GreedyBspToMaxBspConverter<Graph_t>::Convert(const Bsp
256256
send_on_proc.resize(schedule.getInstance().numberOfProcessors(), 0);
257257
rec_on_proc.clear();
258258
rec_on_proc.resize(schedule.getInstance().numberOfProcessors(), 0);
259-
260-
std::set<std::pair<vertex_idx, unsigned>> late_arriving_nodes;
259+
260+
std::set<std::pair<vertex_idx, unsigned>> late_arriving_nodes;
261261
for(const std::pair<KeyTriple, unsigned>& entry : free_comm_steps_for_superstep[step])
262262
{
263263
schedule_max.addCommunicationScheduleEntry(entry.first, current_step - 1);
@@ -293,8 +293,8 @@ MaxBspScheduleCS<Graph_t> GreedyBspToMaxBspConverter<Graph_t>::Convert(const Bsp
293293
max_comm_together = std::max(max_comm_together, rec_on_proc[proc]);
294294
}
295295

296-
cost_type work_limit = max_comm_after;
297-
if(max_comm_together + max_work_done <= max_comm_after + std::max(max_work_done, max_comm_current + schedule.getInstance().getArchitecture().synchronisationCosts()))
296+
cost_type work_limit = max_comm_after;
297+
if(max_comm_together + max_work_done <= max_comm_after + std::max(max_work_done, max_comm_current) + schedule.getInstance().getArchitecture().synchronisationCosts())
298298
{
299299
work_limit = max_comm_together;
300300
for(const std::pair<KeyTriple, unsigned>& entry : comm_in_current_step)
@@ -320,13 +320,13 @@ MaxBspScheduleCS<Graph_t> GreedyBspToMaxBspConverter<Graph_t>::Convert(const Bsp
320320
continue;
321321

322322
bool has_dependency = false;
323-
323+
324324
for (const vertex_idx &parent : dag.parents(node))
325325
{
326326
if(schedule.assignedProcessor(node) != schedule.assignedProcessor(parent) &&
327327
late_arriving_nodes.find(std::make_pair(parent, proc)) != late_arriving_nodes.end())
328328
has_dependency = true;
329-
329+
330330
if(schedule.assignedProcessor(node) == schedule.assignedProcessor(parent) &&
331331
schedule.assignedSuperstep(parent) == step + 1 &&
332332
brought_forward.find(parent) == brought_forward.end())
@@ -341,7 +341,7 @@ MaxBspScheduleCS<Graph_t> GreedyBspToMaxBspConverter<Graph_t>::Convert(const Bsp
341341
schedule_max.setAssignedSuperstep(node, current_step);
342342
work_remaining_proc_superstep[proc][step+1] -= dag.vertex_work_weight(node);
343343
--nodes_remaining_superstep[step+1];
344-
344+
345345
for(const std::pair<KeyTriple, unsigned>& entry : dependent_comm_steps_for_node[node])
346346
free_comm_steps_for_superstep[step+1].insert(entry);
347347
}
@@ -350,7 +350,7 @@ MaxBspScheduleCS<Graph_t> GreedyBspToMaxBspConverter<Graph_t>::Convert(const Bsp
350350
for(vertex_idx node : proc_list[proc][step+1])
351351
if(brought_forward.find(node) == brought_forward.end())
352352
remaining.push_back(node);
353-
353+
354354
proc_list[proc][step+1] = remaining;
355355
}
356356

include/osp/bsp/scheduler/IlpSchedulers/CoptCommScheduleOptimizer.hpp

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -38,13 +38,13 @@ class CoptCommScheduleOptimizer {
3838

3939
static_assert(is_computational_dag_v<Graph_t>, "CoptFullScheduler can only be used with computational DAGs.");
4040

41-
bool num_supersteps_can_change = true;
41+
bool ignore_latency = false;
4242

4343
unsigned int timeLimitSeconds = 600;
4444

4545
protected:
4646

47-
VarArray superstep_used_var;
47+
VarArray superstep_has_comm;
4848
VarArray max_comm_superstep_var;
4949
std::vector<std::vector<std::vector<VarArray>>> comm_processor_to_processor_superstep_node_var;
5050

@@ -67,7 +67,7 @@ class CoptCommScheduleOptimizer {
6767

6868
virtual void setTimeLimitSeconds(unsigned int limit) { timeLimitSeconds = limit; }
6969
inline unsigned int getTimeLimitSeconds() const { return timeLimitSeconds; }
70-
virtual void setNumSuperstepsCanChange(bool can_change_) { num_supersteps_can_change = can_change_; }
70+
virtual void setIgnoreLatency(bool ignore_latency_) { ignore_latency = ignore_latency_; }
7171
};
7272

7373

@@ -110,7 +110,7 @@ bool CoptCommScheduleOptimizer<Graph_t>::canShrinkResultingSchedule(unsigned num
110110

111111
for (unsigned step = 0; step < number_of_supersteps - 1; step++) {
112112

113-
if (superstep_used_var[static_cast<int>(step)].Get(COPT_DBLINFO_VALUE) <= 0.01)
113+
if (superstep_has_comm[static_cast<int>(step)].Get(COPT_DBLINFO_VALUE) <= 0.01)
114114
return true;
115115
}
116116
return false;
@@ -187,13 +187,13 @@ void CoptCommScheduleOptimizer<Graph_t>::setInitialSolution(BspScheduleCS<Graph_
187187
[static_cast<int>(node)], 0);
188188
}
189189

190-
if(num_supersteps_can_change)
190+
if(!ignore_latency)
191191
{
192192
std::vector<unsigned> comm_phase_used(num_supersteps, 0);
193193
for (auto const &[key, val] : cs)
194194
comm_phase_used[val] = 1;
195195
for (unsigned step = 0; step < num_supersteps; step++)
196-
model.SetMipStart(superstep_used_var[static_cast<int>(step)], comm_phase_used[step]);
196+
model.SetMipStart(superstep_has_comm[static_cast<int>(step)], comm_phase_used[step]);
197197
}
198198

199199
std::vector<std::vector<v_commw_t<Graph_t>>> send(num_supersteps, std::vector<v_commw_t<Graph_t>>(num_processors, 0));
@@ -227,8 +227,8 @@ void CoptCommScheduleOptimizer<Graph_t>::setupVariablesConstraintsObjective(cons
227227
const unsigned num_vertices = static_cast<unsigned>(schedule.getInstance().numberOfVertices());
228228

229229
// variables indicating if superstep is used at all
230-
if (num_supersteps_can_change) {
231-
superstep_used_var = model.AddVars(static_cast<int>(max_number_supersteps), COPT_BINARY, "superstep_used");
230+
if (!ignore_latency) {
231+
superstep_has_comm = model.AddVars(static_cast<int>(max_number_supersteps), COPT_BINARY, "superstep_has_comm");
232232
}
233233

234234
max_comm_superstep_var = model.AddVars(static_cast<int>(max_number_supersteps), COPT_INTEGER, "max_comm_superstep");
@@ -250,7 +250,7 @@ void CoptCommScheduleOptimizer<Graph_t>::setupVariablesConstraintsObjective(cons
250250
}
251251
}
252252

253-
if (num_supersteps_can_change) {
253+
if (!ignore_latency) {
254254
unsigned M = num_processors * num_processors * num_vertices;
255255
for (unsigned int step = 0; step < schedule.numberOfSupersteps(); step++) {
256256

@@ -269,7 +269,7 @@ void CoptCommScheduleOptimizer<Graph_t>::setupVariablesConstraintsObjective(cons
269269
}
270270
}
271271

272-
model.AddConstr(expr <= M * superstep_used_var[static_cast<int>(step)]);
272+
model.AddConstr(expr <= M * superstep_has_comm[static_cast<int>(step)]);
273273
}
274274
}
275275
// precedence constraint: if task is computed then all of its predecessors must have been present
@@ -356,11 +356,11 @@ void CoptCommScheduleOptimizer<Graph_t>::setupVariablesConstraintsObjective(cons
356356
*/
357357
Expr expr;
358358

359-
if (num_supersteps_can_change) {
359+
if (!ignore_latency) {
360360

361361
for (unsigned int step = 0; step < max_number_supersteps; step++) {
362362
expr += schedule.getInstance().communicationCosts() * max_comm_superstep_var[static_cast<int>(step)] +
363-
schedule.getInstance().synchronisationCosts() * superstep_used_var[static_cast<int>(step)];
363+
schedule.getInstance().synchronisationCosts() * superstep_has_comm[static_cast<int>(step)];
364364
}
365365
} else {
366366

include/osp/bsp/scheduler/IlpSchedulers/CoptFullScheduler.hpp

Lines changed: 60 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -62,17 +62,17 @@ class CoptFullScheduler : public Scheduler<Graph_t> {
6262

6363
private:
6464
bool allow_recomputation;
65-
bool is_max_bsp = false;
6665
bool use_memory_constraint;
66+
bool use_initial_schedule_recomp = false;
6767
bool use_initial_schedule = false;
6868
bool write_solutions_found;
69-
bool use_initial_schedule_recomp = false;
69+
bool is_max_bsp = false;
7070

7171
unsigned timeLimitSeconds = 0;
7272

7373
const BspScheduleCS<Graph_t> *initial_schedule;
7474
const BspScheduleRecomp<Graph_t> *initial_schedule_recomp;
75-
75+
7676
std::string write_solutions_path;
7777
std::string solution_file_prefix;
7878

@@ -172,7 +172,7 @@ class CoptFullScheduler : public Scheduler<Graph_t> {
172172

173173
return schedule;
174174
}
175-
175+
176176
BspScheduleRecomp<Graph_t> constructBspScheduleRecompFromCallback() {
177177

178178
unsigned number_of_supersteps = 0;
@@ -262,6 +262,7 @@ class CoptFullScheduler : public Scheduler<Graph_t> {
262262
if(is_max_bsp && number_of_supersteps>0) // can ignore last 2 comm phases in this case
263263
--number_of_supersteps;
264264

265+
schedule.getCommunicationSchedule().clear();
265266
for (const auto &node : instance.vertices()) {
266267

267268
for (unsigned int p_from = 0; p_from < instance.numberOfProcessors(); p_from++) {
@@ -310,6 +311,7 @@ class CoptFullScheduler : public Scheduler<Graph_t> {
310311
}
311312
}
312313

314+
schedule.getCommunicationSchedule().clear();
313315
for (unsigned int node = 0; node < schedule.getInstance().numberOfVertices(); node++) {
314316

315317
for (unsigned int p_from = 0; p_from < schedule.getInstance().numberOfProcessors(); p_from++) {
@@ -405,11 +407,11 @@ class CoptFullScheduler : public Scheduler<Graph_t> {
405407
}
406408
else
407409
{
408-
first_at[node][initial_schedule->assignedProcessor(node)] = std::min(first_at[node][initial_schedule->assignedProcessor(node)],
410+
first_at[node][initial_schedule->assignedProcessor(node)] = std::min(first_at[node][initial_schedule->assignedProcessor(node)],
409411
initial_schedule->assignedSuperstep(node) );
410412
}
411413
}
412-
414+
413415
unsigned staleness = is_max_bsp ? 2 : 1;
414416
for (const auto &node : DAG.vertices()) {
415417

@@ -452,10 +454,10 @@ class CoptFullScheduler : public Scheduler<Graph_t> {
452454
[static_cast<int>(node)], 1);
453455
else
454456
model.SetMipStart(comm_processor_to_processor_superstep_node_var[proc][proc][step]
455-
[static_cast<int>(node)], 0);
457+
[static_cast<int>(node)], 0);
456458
}
457459

458-
for (const auto &node : DAG.vertices()) {
460+
for (const auto &node : DAG.vertices()) {
459461

460462
for (unsigned proc = 0; proc < num_processors; proc++) {
461463

@@ -548,10 +550,19 @@ class CoptFullScheduler : public Scheduler<Graph_t> {
548550
// variables indicating if superstep is used at all
549551
superstep_used_var = model.AddVars(static_cast<int>(max_number_supersteps), COPT_BINARY, "superstep_used");
550552

553+
VarArray superstep_has_comm, mergeable_superstep_penalty;
554+
if(is_max_bsp)
555+
{
556+
// variables indicating if there is any communication in superstep
557+
superstep_has_comm = model.AddVars(static_cast<int>(max_number_supersteps), COPT_BINARY, "superstep_has_comm");
558+
// variables that incentivize the schedule to be continuous - needs to be done differently for maxBsp
559+
mergeable_superstep_penalty = model.AddVars(static_cast<int>(max_number_supersteps), COPT_BINARY, "mergeable_superstep_penalty");
560+
}
561+
562+
// variables for assigments of nodes to processor and superstep
551563
node_to_processor_superstep_var = std::vector<std::vector<VarArray>>(
552564
instance.numberOfVertices(), std::vector<VarArray>(instance.numberOfProcessors()));
553565

554-
// variables for assigments of nodes to processor and superstep
555566
for (const auto &node : instance.vertices()) {
556567

557568
for (unsigned int processor = 0; processor < instance.numberOfProcessors(); processor++) {
@@ -598,7 +609,7 @@ class CoptFullScheduler : public Scheduler<Graph_t> {
598609
}
599610
}
600611
model.AddConstr(expr <= static_cast<double>(instance.numberOfVertices() * instance.numberOfProcessors()) *
601-
superstep_used_var.GetVar(static_cast<int>(step)));
612+
superstep_used_var[static_cast<int>(step)]);
602613
}
603614

604615
// nodes are assigend depending on whether recomputation is allowed or not
@@ -688,6 +699,29 @@ class CoptFullScheduler : public Scheduler<Graph_t> {
688699
}
689700
}
690701

702+
// synchronization cost calculation & forcing continuous schedule in maxBsp
703+
if(is_max_bsp)
704+
{
705+
for (unsigned int step = 0; step < max_number_supersteps; step++) {
706+
Expr expr;
707+
for (const auto &node : instance.vertices()) {
708+
for (unsigned int p_from = 0; p_from < instance.numberOfProcessors(); p_from++) {
709+
for (unsigned int p_to = 0; p_to < instance.numberOfProcessors(); p_to++) {
710+
if(p_from != p_to)
711+
expr += comm_processor_to_processor_superstep_node_var[p_from][p_to][step][static_cast<int>(node)];
712+
}
713+
}
714+
}
715+
model.AddConstr(static_cast<unsigned>(instance.numberOfProcessors() * instance.numberOfProcessors() * instance.numberOfVertices()) *
716+
superstep_has_comm[static_cast<int>(step)] >= expr);
717+
}
718+
719+
// if step i and (i+1) has no comm, and (i+2) has work, then (i+1) and (i+2) are mergeable -> penalize
720+
for (unsigned int step = 0; step < max_number_supersteps - 2; step++)
721+
model.AddConstr(superstep_used_var[static_cast<int>(step + 2)] - superstep_has_comm[static_cast<int>(step)]
722+
- superstep_has_comm[static_cast<int>(step + 1)] <= mergeable_superstep_penalty[static_cast<int>(step)]);
723+
}
724+
691725
max_comm_superstep_var =
692726
model.AddVars(static_cast<int>(max_number_supersteps), COPT_INTEGER, "max_comm_superstep");
693727
// coptModel.AddVars(max_number_supersteps, 0, COPT_INFINITY, 0, COPT_INTEGER, "max_comm_superstep");
@@ -770,10 +804,10 @@ class CoptFullScheduler : public Scheduler<Graph_t> {
770804
model.AddConstr(max_superstep_var[static_cast<int>(step)] >= max_work_superstep_var[static_cast<int>(step)]);
771805
if(step > 0)
772806
model.AddConstr(max_superstep_var[static_cast<int>(step)] >= instance.communicationCosts() * max_comm_superstep_var[static_cast<int>(step-1)]);
773-
expr += max_superstep_var[static_cast<int>(step)]; +
774-
instance.synchronisationCosts() * superstep_used_var[static_cast<int>(step)];
807+
expr += max_superstep_var[static_cast<int>(step)];
808+
expr += instance.synchronisationCosts() * superstep_has_comm[static_cast<int>(step)];
809+
expr += instance.synchronisationCosts() * mergeable_superstep_penalty[static_cast<int>(step)];
775810
}
776-
777811
}
778812
else
779813
{
@@ -782,9 +816,10 @@ class CoptFullScheduler : public Scheduler<Graph_t> {
782816
instance.communicationCosts() * max_comm_superstep_var[static_cast<int>(step)] +
783817
instance.synchronisationCosts() * superstep_used_var[static_cast<int>(step)];
784818
}
819+
expr -= instance.synchronisationCosts();
785820
}
786821

787-
model.SetObjective(expr - instance.synchronisationCosts(), COPT_MINIMIZE);
822+
model.SetObjective(expr, COPT_MINIMIZE);
788823
}
789824

790825
RETURN_STATUS run_scheduler(BspScheduleCS<Graph_t> &schedule) {
@@ -824,7 +859,7 @@ class CoptFullScheduler : public Scheduler<Graph_t> {
824859

825860
public:
826861
CoptFullScheduler(unsigned steps = 5)
827-
: allow_recomputation(false), use_memory_constraint(false), use_initial_schedule(false),
862+
: allow_recomputation(false), use_memory_constraint(false), use_initial_schedule(false),
828863
write_solutions_found(false), initial_schedule(0), max_number_supersteps(steps) {
829864

830865
// solution_callback.comm_processor_to_processor_superstep_node_var_ptr =
@@ -889,15 +924,15 @@ class CoptFullScheduler : public Scheduler<Graph_t> {
889924
return status;
890925
}
891926
}
892-
927+
893928
virtual RETURN_STATUS computeMaxBspScheduleCS(MaxBspScheduleCS<Graph_t> &schedule) {
894929
allow_recomputation = false;
895930
is_max_bsp = true;
896931
return run_scheduler(schedule);
897932
}
898933

899934

900-
virtual RETURN_STATUS computeScheduleCS(BspScheduleCS<Graph_t> &schedule) override {
935+
virtual RETURN_STATUS computeScheduleCS(BspScheduleCS<Graph_t> &schedule) override {
901936
allow_recomputation = false;
902937
is_max_bsp = false;
903938
return run_scheduler(schedule);
@@ -942,7 +977,7 @@ class CoptFullScheduler : public Scheduler<Graph_t> {
942977
};
943978

944979
virtual void computeScheduleBase(const BspScheduleRecomp<Graph_t> &schedule, Model &model) {
945-
980+
946981
if (timeLimitSeconds > 0) {
947982
model.SetDblParam(COPT_DBLPARAM_TIMELIMIT, timeLimitSeconds);
948983
}
@@ -1064,6 +1099,13 @@ class CoptFullScheduler : public Scheduler<Graph_t> {
10641099
*/
10651100
inline unsigned getMaxNumberOfSupersteps() const { return max_number_supersteps; }
10661101

1102+
/**
1103+
* @brief Sets the time limit for the ILP solving.
1104+
*
1105+
* @param time_limit_seconds_ The time limit in seconds.
1106+
*/
1107+
inline void setTimeLimitSeconds(unsigned time_limit_seconds_) { timeLimitSeconds = time_limit_seconds_; }
1108+
10671109
/**
10681110
* @brief Get the name of the schedule.
10691111
*

0 commit comments

Comments
 (0)