Skip to content

Commit 90fcb01

Browse files
committed
trimmed scheduler revision
1 parent f5b183e commit 90fcb01

2 files changed

Lines changed: 65 additions & 55 deletions

File tree

include/osp/dag_divider/isomorphism_divider/PrecomputedHashComputer.hpp

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -29,10 +29,6 @@ namespace osp {
2929
* @class PrecomputedHashComputer
3030
* @brief A class to store precomputed hash values for a set of objects and provide an orbit-based interface.
3131
*
32-
* This class takes a vector of hash values for objects indexed from 0 to n-1. It then computes the orbits
33-
* (i.e., groups of objects with the same hash value) and provides an interface identical to `MerkleHashComputer`.
34-
* This is useful when hashes are generated by external means but the orbit-based analysis is still desired.
35-
*
3632
* @tparam IndexType The type used for indexing the objects
3733
*/
3834
template <typename IndexType>

include/osp/dag_divider/isomorphism_divider/TrimmedGroupScheduler.hpp

Lines changed: 65 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ Copyright 2024 Huawei Technologies Co., Ltd.
33
44
Licensed under the Apache License, Version 2.0 (the "License");
55
you may not use this file except in compliance with the License.
6-
You may obtain a copy of the License at
6+
you may obtain a copy of the License at
77
88
http://www.apache.org/licenses/LICENSE-2.0
99
@@ -18,8 +18,9 @@ limitations under the License.
1818

1919
#pragma once
2020

21-
#include <iostream>
2221
#include <numeric>
22+
#include <string>
23+
#include <vector>
2324

2425
#include "osp/bsp/scheduler/Scheduler.hpp"
2526
#include "osp/graph_algorithms/computational_dag_util.hpp"
@@ -28,22 +29,26 @@ limitations under the License.
2829
namespace osp {
2930

3031
/**
31-
* @brief A scheduler for a single trimmed group, which consists of multiple isomorphic connected components.
32-
*
3332
* @class TrimmedGroupScheduler
33+
* @brief A scheduler for a single trimmed group consisting of multiple isomorphic connected components.
34+
*
35+
* This scheduler partitions a disconnected subgraph (a pruned group) into its weakly connected components.
36+
* It assumes these components are isomorphic and distributes them among the available processor groups
37+
* to balance the load.
3438
*
35-
* This scheduler functions similarly to the ConnectedComponentScheduler but is tailored for a single,
36-
* potentially disconnected, subgraph that resulted from merging smaller isomorphic subgraphs. It divides
37-
* the input graph into its weakly connected components and schedules them on proportionally allocated processors.
39+
* @tparam ConstrGraphT The type of the graph.
3840
*/
3941
template <typename ConstrGraphT>
4042
class TrimmedGroupScheduler : public Scheduler<ConstrGraphT> {
4143
Scheduler<ConstrGraphT> *subScheduler_;
4244
unsigned minNonZeroProcs_;
4345

44-
static constexpr bool verbose_ = false;
45-
4646
public:
47+
/**
48+
* @brief Constructs a TrimmedGroupScheduler.
49+
* @param scheduler The sub-scheduler to use for scheduling individual component groups.
50+
* @param minNonZeroProcs The minimum number of non-zero processors to utilize.
51+
*/
4752
TrimmedGroupScheduler(Scheduler<ConstrGraphT> &scheduler, unsigned minNonZeroProcs)
4853
: subScheduler_(&scheduler), minNonZeroProcs_(minNonZeroProcs) {}
4954

@@ -52,9 +57,7 @@ class TrimmedGroupScheduler : public Scheduler<ConstrGraphT> {
5257
ReturnStatus ComputeSchedule(BspSchedule<ConstrGraphT> &schedule) override {
5358
const auto &instance = schedule.GetInstance();
5459
const ConstrGraphT &dag = instance.GetComputationalDag();
55-
const BspArchitecture<ConstrGraphT> &arch = instance.GetArchitecture();
5660

57-
// Find the weakly connected components. These are assumed to be isomorphic subgraphs.
5861
std::vector<VertexIdxT<ConstrGraphT>> componentMap(dag.NumVertices());
5962
size_t numComponents = ComputeWeaklyConnectedComponents(dag, componentMap);
6063

@@ -63,20 +66,24 @@ class TrimmedGroupScheduler : public Scheduler<ConstrGraphT> {
6366
return ReturnStatus::OSP_SUCCESS;
6467
}
6568

66-
if constexpr (verbose_) {
67-
std::cout << " [TrimmedGroupScheduler] min_non_zero_procs: " << minNonZeroProcs_
68-
<< ", num_components: " << numComponents << std::endl;
69-
}
70-
71-
// Group vertices by component.
7269
std::vector<std::vector<VertexIdxT<ConstrGraphT>>> componentsVertices(numComponents);
7370
for (VertexIdxT<ConstrGraphT> v = 0; v < dag.NumVertices(); ++v) {
7471
componentsVertices[componentMap[v]].push_back(v);
7572
}
7673

77-
// Distribute components among processor types.
78-
// The goal is to assign `base_count` components to each processor type group,
79-
// plus one extra for the first `remainder` groups.
74+
auto componentIndicesPerGroup = DistributeComponents(numComponents);
75+
auto subArch = BuildSubArchitecture(instance.GetArchitecture());
76+
77+
return SolveAndMapSubProblems(schedule, componentIndicesPerGroup, componentsVertices, subArch);
78+
}
79+
80+
private:
81+
/**
82+
* @brief Distributes components among the processor groups.
83+
* @param numComponents Total number of components.
84+
* @return A vector where each element is a list of component indices assigned to a processor group.
85+
*/
86+
std::vector<std::vector<unsigned>> DistributeComponents(size_t numComponents) {
8087
const unsigned baseCount = static_cast<unsigned>(numComponents) / minNonZeroProcs_;
8188
const unsigned remainder = static_cast<unsigned>(numComponents) % minNonZeroProcs_;
8289

@@ -90,43 +97,58 @@ class TrimmedGroupScheduler : public Scheduler<ConstrGraphT> {
9097
}
9198
}
9299
}
100+
return componentIndicesPerGroup;
101+
}
93102

94-
// Determine the processor allocation for a single sub-problem.
95-
// Calculate offsets for processor types within the main 'arch' (passed to TrimmedGroupScheduler)
96-
std::vector<unsigned> archProcTypeOffsets(arch.GetNumberOfProcessorTypes(), 0);
97-
const auto &archProcTypeCounts = arch.GetProcessorTypeCount();
98-
for (unsigned typeIdx = 1; typeIdx < arch.GetNumberOfProcessorTypes(); ++typeIdx) {
99-
archProcTypeOffsets[typeIdx] = archProcTypeOffsets[typeIdx - 1] + archProcTypeCounts[typeIdx - 1];
100-
}
101-
103+
/**
104+
* @brief Builds the architecture for a single sub-problem (one processor group).
105+
* @param arch The global architecture.
106+
* @return The sub-architecture.
107+
*/
108+
BspArchitecture<ConstrGraphT> BuildSubArchitecture(const BspArchitecture<ConstrGraphT> &arch) {
102109
std::vector<unsigned> subProcCounts(arch.GetNumberOfProcessorTypes());
103110
std::vector<VMemwT<ConstrGraphT>> memWeights(arch.GetNumberOfProcessorTypes(), 0);
111+
104112
for (unsigned typeIdx = 0; typeIdx < arch.GetNumberOfProcessorTypes(); ++typeIdx) {
105113
subProcCounts[typeIdx] = arch.GetProcessorTypeCount()[typeIdx] / minNonZeroProcs_;
106114
memWeights[typeIdx] = static_cast<VMemwT<ConstrGraphT>>(arch.MaxMemoryBoundProcType(typeIdx));
107115
}
108116

109-
if constexpr (verbose_) {
110-
std::cout << " [TrimmedGroupScheduler] Sub-problem processor counts per type: ";
111-
for (size_t typeIdx = 0; typeIdx < subProcCounts.size(); ++typeIdx) {
112-
std::cout << "T" << typeIdx << ":" << subProcCounts[typeIdx] << " ";
113-
}
114-
std::cout << std::endl;
115-
}
116-
117-
// Create the sub-architecture for one sub-problem.
118117
BspArchitecture<ConstrGraphT> subArch(arch);
119118
subArch.SetProcessorsConsequTypes(subProcCounts, memWeights);
119+
return subArch;
120+
}
121+
122+
/**
123+
* @brief Solves the sub-schedule for each group and maps the results back to the global schedule.
124+
*/
125+
ReturnStatus SolveAndMapSubProblems(BspSchedule<ConstrGraphT> &schedule,
126+
const std::vector<std::vector<unsigned>> &componentIndicesPerGroup,
127+
const std::vector<std::vector<VertexIdxT<ConstrGraphT>>> &componentsVertices,
128+
const BspArchitecture<ConstrGraphT> &subArch) {
129+
const auto &instance = schedule.GetInstance();
130+
const auto &arch = instance.GetArchitecture();
131+
const auto &dag = instance.GetComputationalDag();
132+
133+
// Calculate offsets for mapping local sub-processor IDs to global processor IDs
134+
std::vector<unsigned> archProcTypeOffsets(arch.GetNumberOfProcessorTypes(), 0);
135+
const auto &archProcTypeCounts = arch.GetProcessorTypeCount();
136+
for (unsigned typeIdx = 1; typeIdx < arch.GetNumberOfProcessorTypes(); ++typeIdx) {
137+
archProcTypeOffsets[typeIdx] = archProcTypeOffsets[typeIdx - 1] + archProcTypeCounts[typeIdx - 1];
138+
}
120139

121-
// Calculate offsets for processor types within the 'sub_arch'
122140
std::vector<unsigned> subArchProcTypeOffsets(subArch.GetNumberOfProcessorTypes(), 0);
123141
const auto &subArchProcTypeCounts = subArch.GetProcessorTypeCount();
124142
for (unsigned typeIdx = 1; typeIdx < subArch.GetNumberOfProcessorTypes(); ++typeIdx) {
125143
subArchProcTypeOffsets[typeIdx] = subArchProcTypeOffsets[typeIdx - 1] + subArchProcTypeCounts[typeIdx - 1];
126144
}
127145

146+
std::vector<unsigned> subProcCounts = subArch.GetProcessorTypeCount();
128147
unsigned maxSupersteps = 0;
148+
129149
for (unsigned i = 0; i < minNonZeroProcs_; ++i) {
150+
if (componentIndicesPerGroup[i].empty()) continue;
151+
130152
std::vector<VertexIdxT<ConstrGraphT>> groupVertices;
131153
for (unsigned compIdx : componentIndicesPerGroup[i]) {
132154
groupVertices.insert(groupVertices.end(), componentsVertices[compIdx].begin(), componentsVertices[compIdx].end());
@@ -135,34 +157,26 @@ class TrimmedGroupScheduler : public Scheduler<ConstrGraphT> {
135157

136158
BspInstance<ConstrGraphT> subInstance;
137159
subInstance.GetArchitecture() = subArch;
138-
subInstance.SetNodeProcessorCompatibility(instance.GetNodeProcessorCompatibilityMatrix()); // Inherit compatibility
139-
auto globalToLocalMap
140-
= CreateInducedSubgraphMap(dag, subInstance.GetComputationalDag(), groupVertices); // Create induced subgraph
160+
subInstance.SetNodeProcessorCompatibility(instance.GetNodeProcessorCompatibilityMatrix());
141161

142-
// Create a schedule object for the sub-problem
143-
BspSchedule<ConstrGraphT> subSchedule(subInstance);
162+
auto globalToLocalMap = CreateInducedSubgraphMap(dag, subInstance.GetComputationalDag(), groupVertices);
144163

145-
// Call the sub-scheduler to compute the schedule for this group of components
164+
BspSchedule<ConstrGraphT> subSchedule(subInstance);
146165
auto status = subScheduler_->ComputeSchedule(subSchedule);
166+
147167
if (status != ReturnStatus::OSP_SUCCESS && status != ReturnStatus::BEST_FOUND) {
148168
return status;
149169
}
150170

151-
// Map the sub-schedule back to the main schedule.
152171
for (const auto &vGlobal : groupVertices) {
153172
const auto vLocal = globalToLocalMap.at(vGlobal);
154173
const unsigned subProc = subSchedule.AssignedProcessor(vLocal);
155174
const unsigned subSuperstep = subSchedule.AssignedSuperstep(vLocal);
156175

157-
// Determine the processor type and its local index within that type in the sub_arch
158176
const unsigned procType = subArch.ProcessorType(subProc);
159177
const unsigned localIdxWithinType = subProc - subArchProcTypeOffsets[procType];
160-
161-
// Calculate the global processor ID by combining:
162-
// The base offset of this processor type in the main 'arch'.
163-
// The offset for the current 'i'-th block of processors of this type.
164-
// The local index within that type block.
165178
const unsigned globalProc = archProcTypeOffsets[procType] + (i * subProcCounts[procType]) + localIdxWithinType;
179+
166180
schedule.SetAssignedProcessor(vGlobal, globalProc);
167181
schedule.SetAssignedSuperstep(vGlobal, subSuperstep);
168182
}

0 commit comments

Comments
 (0)