@@ -3,7 +3,7 @@ Copyright 2024 Huawei Technologies Co., Ltd.
33
44Licensed under the Apache License, Version 2.0 (the "License");
55you may not use this file except in compliance with the License.
6- You may obtain a copy of the License at
6+ you may obtain a copy of the License at
77
88 http://www.apache.org/licenses/LICENSE-2.0
99
@@ -18,8 +18,9 @@ limitations under the License.
1818
1919#pragma once
2020
21- #include < iostream>
2221#include < numeric>
22+ #include < string>
23+ #include < vector>
2324
2425#include " osp/bsp/scheduler/Scheduler.hpp"
2526#include " osp/graph_algorithms/computational_dag_util.hpp"
@@ -28,22 +29,26 @@ limitations under the License.
2829namespace osp {
2930
3031/* *
31- * @brief A scheduler for a single trimmed group, which consists of multiple isomorphic connected components.
32- *
3332 * @class TrimmedGroupScheduler
33+ * @brief A scheduler for a single trimmed group consisting of multiple isomorphic connected components.
34+ *
35+ * This scheduler partitions a disconnected subgraph (a pruned group) into its weakly connected components.
36+ * It assumes these components are isomorphic and distributes them among the available processor groups
37+ * to balance the load.
3438 *
35- * This scheduler functions similarly to the ConnectedComponentScheduler but is tailored for a single,
36- * potentially disconnected, subgraph that resulted from merging smaller isomorphic subgraphs. It divides
37- * the input graph into its weakly connected components and schedules them on proportionally allocated processors.
39+ * @tparam ConstrGraphT The type of the graph.
3840 */
3941template <typename ConstrGraphT>
4042class TrimmedGroupScheduler : public Scheduler <ConstrGraphT> {
4143 Scheduler<ConstrGraphT> *subScheduler_;
4244 unsigned minNonZeroProcs_;
4345
44- static constexpr bool verbose_ = false ;
45-
4646 public:
47+ /* *
48+ * @brief Constructs a TrimmedGroupScheduler.
49+ * @param scheduler The sub-scheduler to use for scheduling individual component groups.
50+ * @param minNonZeroProcs The minimum number of non-zero processors to utilize.
51+ */
4752 TrimmedGroupScheduler (Scheduler<ConstrGraphT> &scheduler, unsigned minNonZeroProcs)
4853 : subScheduler_(&scheduler), minNonZeroProcs_(minNonZeroProcs) {}
4954
@@ -52,9 +57,7 @@ class TrimmedGroupScheduler : public Scheduler<ConstrGraphT> {
5257 ReturnStatus ComputeSchedule (BspSchedule<ConstrGraphT> &schedule) override {
5358 const auto &instance = schedule.GetInstance ();
5459 const ConstrGraphT &dag = instance.GetComputationalDag ();
55- const BspArchitecture<ConstrGraphT> &arch = instance.GetArchitecture ();
5660
57- // Find the weakly connected components. These are assumed to be isomorphic subgraphs.
5861 std::vector<VertexIdxT<ConstrGraphT>> componentMap (dag.NumVertices ());
5962 size_t numComponents = ComputeWeaklyConnectedComponents (dag, componentMap);
6063
@@ -63,20 +66,24 @@ class TrimmedGroupScheduler : public Scheduler<ConstrGraphT> {
6366 return ReturnStatus::OSP_SUCCESS;
6467 }
6568
66- if constexpr (verbose_) {
67- std::cout << " [TrimmedGroupScheduler] min_non_zero_procs: " << minNonZeroProcs_
68- << " , num_components: " << numComponents << std::endl;
69- }
70-
71- // Group vertices by component.
7269 std::vector<std::vector<VertexIdxT<ConstrGraphT>>> componentsVertices (numComponents);
7370 for (VertexIdxT<ConstrGraphT> v = 0 ; v < dag.NumVertices (); ++v) {
7471 componentsVertices[componentMap[v]].push_back (v);
7572 }
7673
77- // Distribute components among processor types.
78- // The goal is to assign `base_count` components to each processor type group,
79- // plus one extra for the first `remainder` groups.
74+ auto componentIndicesPerGroup = DistributeComponents (numComponents);
75+ auto subArch = BuildSubArchitecture (instance.GetArchitecture ());
76+
77+ return SolveAndMapSubProblems (schedule, componentIndicesPerGroup, componentsVertices, subArch);
78+ }
79+
80+ private:
81+ /* *
82+ * @brief Distributes components among the processor groups.
83+ * @param numComponents Total number of components.
84+ * @return A vector where each element is a list of component indices assigned to a processor group.
85+ */
86+ std::vector<std::vector<unsigned >> DistributeComponents (size_t numComponents) {
8087 const unsigned baseCount = static_cast <unsigned >(numComponents) / minNonZeroProcs_;
8188 const unsigned remainder = static_cast <unsigned >(numComponents) % minNonZeroProcs_;
8289
@@ -90,43 +97,58 @@ class TrimmedGroupScheduler : public Scheduler<ConstrGraphT> {
9097 }
9198 }
9299 }
100+ return componentIndicesPerGroup;
101+ }
93102
94- // Determine the processor allocation for a single sub-problem.
95- // Calculate offsets for processor types within the main 'arch' (passed to TrimmedGroupScheduler)
96- std::vector<unsigned > archProcTypeOffsets (arch.GetNumberOfProcessorTypes (), 0 );
97- const auto &archProcTypeCounts = arch.GetProcessorTypeCount ();
98- for (unsigned typeIdx = 1 ; typeIdx < arch.GetNumberOfProcessorTypes (); ++typeIdx) {
99- archProcTypeOffsets[typeIdx] = archProcTypeOffsets[typeIdx - 1 ] + archProcTypeCounts[typeIdx - 1 ];
100- }
101-
103+ /* *
104+ * @brief Builds the architecture for a single sub-problem (one processor group).
105+ * @param arch The global architecture.
106+ * @return The sub-architecture.
107+ */
108+ BspArchitecture<ConstrGraphT> BuildSubArchitecture (const BspArchitecture<ConstrGraphT> &arch) {
102109 std::vector<unsigned > subProcCounts (arch.GetNumberOfProcessorTypes ());
103110 std::vector<VMemwT<ConstrGraphT>> memWeights (arch.GetNumberOfProcessorTypes (), 0 );
111+
104112 for (unsigned typeIdx = 0 ; typeIdx < arch.GetNumberOfProcessorTypes (); ++typeIdx) {
105113 subProcCounts[typeIdx] = arch.GetProcessorTypeCount ()[typeIdx] / minNonZeroProcs_;
106114 memWeights[typeIdx] = static_cast <VMemwT<ConstrGraphT>>(arch.MaxMemoryBoundProcType (typeIdx));
107115 }
108116
109- if constexpr (verbose_) {
110- std::cout << " [TrimmedGroupScheduler] Sub-problem processor counts per type: " ;
111- for (size_t typeIdx = 0 ; typeIdx < subProcCounts.size (); ++typeIdx) {
112- std::cout << " T" << typeIdx << " :" << subProcCounts[typeIdx] << " " ;
113- }
114- std::cout << std::endl;
115- }
116-
117- // Create the sub-architecture for one sub-problem.
118117 BspArchitecture<ConstrGraphT> subArch (arch);
119118 subArch.SetProcessorsConsequTypes (subProcCounts, memWeights);
119+ return subArch;
120+ }
121+
122+ /* *
123+ * @brief Solves the sub-schedule for each group and maps the results back to the global schedule.
124+ */
125+ ReturnStatus SolveAndMapSubProblems (BspSchedule<ConstrGraphT> &schedule,
126+ const std::vector<std::vector<unsigned >> &componentIndicesPerGroup,
127+ const std::vector<std::vector<VertexIdxT<ConstrGraphT>>> &componentsVertices,
128+ const BspArchitecture<ConstrGraphT> &subArch) {
129+ const auto &instance = schedule.GetInstance ();
130+ const auto &arch = instance.GetArchitecture ();
131+ const auto &dag = instance.GetComputationalDag ();
132+
133+ // Calculate offsets for mapping local sub-processor IDs to global processor IDs
134+ std::vector<unsigned > archProcTypeOffsets (arch.GetNumberOfProcessorTypes (), 0 );
135+ const auto &archProcTypeCounts = arch.GetProcessorTypeCount ();
136+ for (unsigned typeIdx = 1 ; typeIdx < arch.GetNumberOfProcessorTypes (); ++typeIdx) {
137+ archProcTypeOffsets[typeIdx] = archProcTypeOffsets[typeIdx - 1 ] + archProcTypeCounts[typeIdx - 1 ];
138+ }
120139
121- // Calculate offsets for processor types within the 'sub_arch'
122140 std::vector<unsigned > subArchProcTypeOffsets (subArch.GetNumberOfProcessorTypes (), 0 );
123141 const auto &subArchProcTypeCounts = subArch.GetProcessorTypeCount ();
124142 for (unsigned typeIdx = 1 ; typeIdx < subArch.GetNumberOfProcessorTypes (); ++typeIdx) {
125143 subArchProcTypeOffsets[typeIdx] = subArchProcTypeOffsets[typeIdx - 1 ] + subArchProcTypeCounts[typeIdx - 1 ];
126144 }
127145
146+ std::vector<unsigned > subProcCounts = subArch.GetProcessorTypeCount ();
128147 unsigned maxSupersteps = 0 ;
148+
129149 for (unsigned i = 0 ; i < minNonZeroProcs_; ++i) {
150+ if (componentIndicesPerGroup[i].empty ()) continue ;
151+
130152 std::vector<VertexIdxT<ConstrGraphT>> groupVertices;
131153 for (unsigned compIdx : componentIndicesPerGroup[i]) {
132154 groupVertices.insert (groupVertices.end (), componentsVertices[compIdx].begin (), componentsVertices[compIdx].end ());
@@ -135,34 +157,26 @@ class TrimmedGroupScheduler : public Scheduler<ConstrGraphT> {
135157
136158 BspInstance<ConstrGraphT> subInstance;
137159 subInstance.GetArchitecture () = subArch;
138- subInstance.SetNodeProcessorCompatibility (instance.GetNodeProcessorCompatibilityMatrix ()); // Inherit compatibility
139- auto globalToLocalMap
140- = CreateInducedSubgraphMap (dag, subInstance.GetComputationalDag (), groupVertices); // Create induced subgraph
160+ subInstance.SetNodeProcessorCompatibility (instance.GetNodeProcessorCompatibilityMatrix ());
141161
142- // Create a schedule object for the sub-problem
143- BspSchedule<ConstrGraphT> subSchedule (subInstance);
162+ auto globalToLocalMap = CreateInducedSubgraphMap (dag, subInstance.GetComputationalDag (), groupVertices);
144163
145- // Call the sub-scheduler to compute the schedule for this group of components
164+ BspSchedule<ConstrGraphT> subSchedule (subInstance);
146165 auto status = subScheduler_->ComputeSchedule (subSchedule);
166+
147167 if (status != ReturnStatus::OSP_SUCCESS && status != ReturnStatus::BEST_FOUND) {
148168 return status;
149169 }
150170
151- // Map the sub-schedule back to the main schedule.
152171 for (const auto &vGlobal : groupVertices) {
153172 const auto vLocal = globalToLocalMap.at (vGlobal);
154173 const unsigned subProc = subSchedule.AssignedProcessor (vLocal);
155174 const unsigned subSuperstep = subSchedule.AssignedSuperstep (vLocal);
156175
157- // Determine the processor type and its local index within that type in the sub_arch
158176 const unsigned procType = subArch.ProcessorType (subProc);
159177 const unsigned localIdxWithinType = subProc - subArchProcTypeOffsets[procType];
160-
161- // Calculate the global processor ID by combining:
162- // The base offset of this processor type in the main 'arch'.
163- // The offset for the current 'i'-th block of processors of this type.
164- // The local index within that type block.
165178 const unsigned globalProc = archProcTypeOffsets[procType] + (i * subProcCounts[procType]) + localIdxWithinType;
179+
166180 schedule.SetAssignedProcessor (vGlobal, globalProc);
167181 schedule.SetAssignedSuperstep (vGlobal, subSuperstep);
168182 }
0 commit comments