Skip to content

Commit bb163ea

Browse files
committed
Removing GPUConstantMem, adding interOpNumThreads option
1 parent 41d80d2 commit bb163ea

File tree

8 files changed

+48
-44
lines changed

8 files changed

+48
-44
lines changed

Common/ML/include/ML/OrtInterface.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -84,8 +84,8 @@ class OrtModel
8484

8585
// Environment settings
8686
bool mInitialized = false;
87-
std::string modelPath, device = "cpu", dtype = "float"; // device options should be cpu, rocm, migraphx, cuda
88-
int intraOpNumThreads = 0, deviceId = 0, enableProfiling = 0, loggingLevel = 0, allocateDeviceMemory = 0, enableOptimizations = 0;
87+
std::string modelPath, device = "cpu", dtype = "float", thread_affinity = ""; // device options should be cpu, rocm, migraphx, cuda
88+
int intraOpNumThreads = 1, interOpNumThreads = 1, deviceId = 0, enableProfiling = 0, loggingLevel = 0, allocateDeviceMemory = 0, enableOptimizations = 0;
8989

9090
std::string printShape(const std::vector<int64_t>&);
9191
};

Common/ML/src/OrtInterface.cxx

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ void OrtModel::reset(std::unordered_map<std::string, std::string> optionsMap)
5252
deviceId = (optionsMap.contains("device-id") ? std::stoi(optionsMap["device-id"]) : 0);
5353
allocateDeviceMemory = (optionsMap.contains("allocate-device-memory") ? std::stoi(optionsMap["allocate-device-memory"]) : 0);
5454
intraOpNumThreads = (optionsMap.contains("intra-op-num-threads") ? std::stoi(optionsMap["intra-op-num-threads"]) : 0);
55+
interOpNumThreads = (optionsMap.contains("inter-op-num-threads") ? std::stoi(optionsMap["inter-op-num-threads"]) : 0);
5556
loggingLevel = (optionsMap.contains("logging-level") ? std::stoi(optionsMap["logging-level"]) : 0);
5657
enableProfiling = (optionsMap.contains("enable-profiling") ? std::stoi(optionsMap["enable-profiling"]) : 0);
5758
enableOptimizations = (optionsMap.contains("enable-optimizations") ? std::stoi(optionsMap["enable-optimizations"]) : 0);
@@ -90,13 +91,14 @@ void OrtModel::reset(std::unordered_map<std::string, std::string> optionsMap)
9091

9192
if (device == "CPU") {
9293
(pImplOrt->sessionOptions).SetIntraOpNumThreads(intraOpNumThreads);
93-
if (intraOpNumThreads > 1) {
94+
(pImplOrt->sessionOptions).SetInterOpNumThreads(interOpNumThreads);
95+
if (intraOpNumThreads > 1 || interOpNumThreads > 1) {
9496
(pImplOrt->sessionOptions).SetExecutionMode(ExecutionMode::ORT_PARALLEL);
9597
} else if (intraOpNumThreads == 1) {
9698
(pImplOrt->sessionOptions).SetExecutionMode(ExecutionMode::ORT_SEQUENTIAL);
9799
}
98100
if (loggingLevel < 2) {
99-
LOG(info) << "(ORT) CPU execution provider set with " << intraOpNumThreads << " threads";
101+
LOG(info) << "(ORT) CPU execution provider set with " << intraOpNumThreads << " (intraOpNumThreads) and " << interOpNumThreads << " (interOpNumThreads) threads";
100102
}
101103
}
102104

GPU/GPUTracking/Definitions/GPUSettingsList.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -228,7 +228,8 @@ AddOption(nnInferenceDevice, std::string, "CPU", "", 0, "(std::string) Specify i
228228
AddOption(nnInferenceDeviceId, unsigned int, 0, "", 0, "(unsigned int) Specify inference device id")
229229
AddOption(nnInferenceAllocateDevMem, int, 0, "", 0, "(bool, default = 0), if the device memory should be allocated for inference")
230230
AddOption(nnInferenceDtype, std::string, "fp32", "", 0, "(std::string) Specify the datatype for which inference is performed (fp32: default, fp16)") // fp32 or fp16
231-
AddOption(nnInferenceThreadsPerNN, int, 0, "", 0, "Number of threads used to evaluate one neural network")
231+
AddOption(nnInferenceIntraOpNumThreads, int, 1, "", 0, "Number of threads used to evaluate one neural network (ONNX: SetIntraOpNumThreads). 0 = auto-detect, can lead to problems on SLURM systems.")
232+
AddOption(nnInferenceInterOpNumThreads, int, 1, "", 0, "Number of threads used to evaluate one neural network (ONNX: SetInterOpNumThreads). 0 = auto-detect, can lead to problems on SLURM systems.")
232233
AddOption(nnInferenceEnableOrtOptimization, unsigned int, 99, "", 0, "Enables graph optimizations in ONNX Runtime. Can be [0, 1, 2, 99] -> see https://github.com/microsoft/onnxruntime/blob/3f71d637a83dc3540753a8bb06740f67e926dc13/include/onnxruntime/core/session/onnxruntime_c_api.h#L347")
233234
AddOption(nnInferenceOrtProfiling, int, 0, "", 0, "Enables profiling of model execution in ONNX Runtime")
234235
AddOption(nnInferenceOrtProfilingPath, std::string, ".", "", 0, "If nnInferenceOrtProfiling is set, the path to store the profiling data")

GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -927,23 +927,23 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
927927
size_t iSize = CAMath::Min((uint)clustererNN.nnClusterizerBatchedMode, (uint)(clusterer.mPmemory->counters.nClusters - batchStart));
928928

929929
auto start0 = std::chrono::high_resolution_clock::now();
930-
runKernel<GPUTPCNNClusterizerKernels, GPUTPCNNClusterizerKernels::fillInputNN>({GetGrid(iSize, lane, GPUReconstruction::krnlDeviceType::CPU), {iSector}}, processors(), iSector, clustererNN.nnClusterizerDtype, 0, batchStart); // Filling the data
930+
runKernel<GPUTPCNNClusterizerKernels, GPUTPCNNClusterizerKernels::fillInputNN>({GetGrid(iSize, lane), krnlRunRangeNone}, iSector, clustererNN.nnClusterizerDtype, 0, batchStart); // Filling the data
931931

932932
auto stop0 = std::chrono::high_resolution_clock::now();
933933
auto start1 = std::chrono::high_resolution_clock::now();
934934
nnApplication.networkInference(nnApplication.model_class, clustererNN, iSize, clustererNN.modelProbabilities, clustererNN.nnClusterizerDtype);
935935
if (nnApplication.model_class.getNumOutputNodes()[0][1] == 1) {
936-
runKernel<GPUTPCNNClusterizerKernels, GPUTPCNNClusterizerKernels::determineClass1Labels>({GetGrid(iSize, lane, GPUReconstruction::krnlDeviceType::CPU), {iSector}}, processors(), iSector, clustererNN.nnClusterizerDtype, 0, batchStart); // Assigning class labels
936+
runKernel<GPUTPCNNClusterizerKernels, GPUTPCNNClusterizerKernels::determineClass1Labels>({GetGrid(iSize, lane), krnlRunRangeNone}, iSector, clustererNN.nnClusterizerDtype, 0, batchStart); // Assigning class labels
937937
} else {
938-
runKernel<GPUTPCNNClusterizerKernels, GPUTPCNNClusterizerKernels::determineClass2Labels>({GetGrid(iSize, lane, GPUReconstruction::krnlDeviceType::CPU), {iSector}}, processors(), iSector, clustererNN.nnClusterizerDtype, 0, batchStart); // Assigning class labels
938+
runKernel<GPUTPCNNClusterizerKernels, GPUTPCNNClusterizerKernels::determineClass2Labels>({GetGrid(iSize, lane), krnlRunRangeNone}, iSector, clustererNN.nnClusterizerDtype, 0, batchStart); // Assigning class labels
939939
}
940940

941941
if (!clustererNN.nnClusterizerUseCfRegression) {
942942
nnApplication.networkInference(nnApplication.model_reg_1, clustererNN, iSize, clustererNN.outputDataReg1, clustererNN.nnClusterizerDtype);
943-
runKernel<GPUTPCNNClusterizerKernels, GPUTPCNNClusterizerKernels::publishClass1Regression>({GetGrid(iSize, lane, GPUReconstruction::krnlDeviceType::CPU), {iSector}}, processors(), iSector, clustererNN.nnClusterizerDtype, 0, batchStart); // Running the NN for regression class 1
943+
runKernel<GPUTPCNNClusterizerKernels, GPUTPCNNClusterizerKernels::publishClass1Regression>({GetGrid(iSize, lane), krnlRunRangeNone}, iSector, clustererNN.nnClusterizerDtype, 0, batchStart); // Running the NN for regression class 1
944944
if (nnApplication.model_class.getNumOutputNodes()[0][1] > 1 && nnApplication.reg_model_paths.size() > 1) {
945945
nnApplication.networkInference(nnApplication.model_reg_2, clustererNN, iSize, clustererNN.outputDataReg2, clustererNN.nnClusterizerDtype);
946-
runKernel<GPUTPCNNClusterizerKernels, GPUTPCNNClusterizerKernels::publishClass2Regression>({GetGrid(iSize, lane, GPUReconstruction::krnlDeviceType::CPU), {iSector}}, processors(), iSector, clustererNN.nnClusterizerDtype, 0, batchStart); // Running the NN for regression class 2
946+
runKernel<GPUTPCNNClusterizerKernels, GPUTPCNNClusterizerKernels::publishClass2Regression>({GetGrid(iSize, lane), krnlRunRangeNone}, iSector, clustererNN.nnClusterizerDtype, 0, batchStart); // Running the NN for regression class 2
947947
}
948948
}
949949
auto stop1 = std::chrono::high_resolution_clock::now();
@@ -953,7 +953,7 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
953953
}
954954
auto start1 = std::chrono::high_resolution_clock::now();
955955
if (clustererNN.nnClusterizerUseCfRegression) {
956-
runKernel<GPUTPCNNClusterizerKernels, GPUTPCNNClusterizerKernels::runCfClusterizer>({GetGrid(clusterer.mPmemory->counters.nClusters, lane, GPUReconstruction::krnlDeviceType::CPU), {iSector}}, processors(), iSector, clustererNN.nnClusterizerDtype, 0, 0); // Running the CF regression kernel - no batching needed: batchStart = 0
956+
runKernel<GPUTPCNNClusterizerKernels, GPUTPCNNClusterizerKernels::runCfClusterizer>({GetGrid(clusterer.mPmemory->counters.nClusters, lane), krnlRunRangeNone}, iSector, clustererNN.nnClusterizerDtype, 0, 0); // Running the CF regression kernel - no batching needed: batchStart = 0
957957
}
958958
auto stop1 = std::chrono::high_resolution_clock::now();
959959
time_clusterizer += std::chrono::duration_cast<std::chrono::nanoseconds>(stop1 - start1).count() / 1e9;
@@ -970,7 +970,7 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
970970
} else {
971971
runKernel<GPUTPCCFDeconvolution>({GetGrid(clusterer.mPmemory->counters.nPositions, lane), {iSector}});
972972
DoDebugAndDump(RecoStep::TPCClusterFinding, 262144 << 4, clusterer, &GPUTPCClusterFinder::DumpChargeMap, *mDebugFile, "Split Charges");
973-
runKernel<GPUTPCCFClusterizer>({GetGrid(clusterer.mPmemory->counters.nClusters, lane, GPUReconstruction::krnlDeviceType::CPU), {iSector}}, 0);
973+
runKernel<GPUTPCCFClusterizer>({GetGrid(clusterer.mPmemory->counters.nClusters, lane), {iSector}}, 0);
974974
}
975975

976976
if (doGPU && propagateMCLabels) {

GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.cxx

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,8 @@ GPUTPCNNClusterizerHost::GPUTPCNNClusterizerHost(const GPUSettingsProcessingNNcl
2727
{"device-id", std::to_string(settings.nnInferenceDeviceId)},
2828
{"allocate-device-memory", std::to_string(settings.nnInferenceAllocateDevMem)},
2929
{"dtype", settings.nnInferenceDtype},
30-
{"intra-op-num-threads", std::to_string(settings.nnInferenceThreadsPerNN)},
30+
{"intra-op-num-threads", std::to_string(settings.nnInferenceIntraOpNumThreads)},
31+
{"inter-op-num-threads", std::to_string(settings.nnInferenceInterOpNumThreads)},
3132
{"enable-optimizations", std::to_string(settings.nnInferenceEnableOrtOptimization)},
3233
{"enable-profiling", std::to_string(settings.nnInferenceOrtProfiling)},
3334
{"profiling-output-path", settings.nnInferenceOrtProfilingPath},

GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.cxx

Lines changed: 21 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -34,11 +34,11 @@ using namespace o2::gpu::tpccf;
3434

3535
// Defining individual thread functions for data filling, determining the class label and running the CF clusterizer
3636
template <>
37-
GPUdii() void GPUTPCNNClusterizerKernels::Thread<GPUTPCNNClusterizerKernels::runCfClusterizer>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& notUsed, GPUConstantMem* processors, uint8_t sector, int8_t dtype, int8_t onlyMC, uint batchStart)
37+
GPUdii() void GPUTPCNNClusterizerKernels::Thread<GPUTPCNNClusterizerKernels::runCfClusterizer>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t onlyMC, uint batchStart)
3838
{
3939
uint glo_idx = get_global_id(0);
40-
auto& clusterer = processors->tpcClusterer[sector];
41-
auto& clustererNN = processors->tpcNNClusterer[sector];
40+
auto& clusterer = processors.tpcClusterer[sector];
41+
auto& clustererNN = processors.tpcNNClusterer[sector];
4242
if (clustererNN.outputDataClass[glo_idx] == 0) { // default clusterizer should not be called in batched mode due to mess-up with thread indices
4343
return;
4444
}
@@ -50,22 +50,22 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread<GPUTPCNNClusterizerKernels::run
5050
}
5151

5252
template <>
53-
GPUdii() void GPUTPCNNClusterizerKernels::Thread<GPUTPCNNClusterizerKernels::fillInputNN>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& notUsed, GPUConstantMem* processors, uint8_t sector, int8_t dtype, int8_t onlyMC, uint batchStart)
53+
GPUdii() void GPUTPCNNClusterizerKernels::Thread<GPUTPCNNClusterizerKernels::fillInputNN>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t onlyMC, uint batchStart)
5454
{
5555
GPUTPCNNClusterizerKernels::fillInputData(nBlocks, nThreads, iBlock, iThread, processors, sector, dtype, batchStart);
5656
}
5757

5858
template <>
59-
GPUdii() void GPUTPCNNClusterizerKernels::Thread<GPUTPCNNClusterizerKernels::determineClass1Labels>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& notUsed, GPUConstantMem* processors, uint8_t sector, int8_t dtype, int8_t onlyMC, uint batchStart)
59+
GPUdii() void GPUTPCNNClusterizerKernels::Thread<GPUTPCNNClusterizerKernels::determineClass1Labels>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t onlyMC, uint batchStart)
6060
{
6161
uint glo_idx = get_global_id(0);
62-
processors->tpcNNClusterer[sector].outputDataClass[glo_idx + batchStart] = (int)(processors->tpcNNClusterer[sector].modelProbabilities[glo_idx] > processors->tpcNNClusterer[sector].nnClassThreshold);
62+
processors.tpcNNClusterer[sector].outputDataClass[glo_idx + batchStart] = (int)(processors.tpcNNClusterer[sector].modelProbabilities[glo_idx] > processors.tpcNNClusterer[sector].nnClassThreshold);
6363
}
6464

6565
template <>
66-
GPUdii() void GPUTPCNNClusterizerKernels::Thread<GPUTPCNNClusterizerKernels::determineClass2Labels>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& notUsed, GPUConstantMem* processors, uint8_t sector, int8_t dtype, int8_t onlyMC, uint batchStart)
66+
GPUdii() void GPUTPCNNClusterizerKernels::Thread<GPUTPCNNClusterizerKernels::determineClass2Labels>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t onlyMC, uint batchStart)
6767
{
68-
auto& clusterer = processors->tpcNNClusterer[sector];
68+
auto& clusterer = processors.tpcNNClusterer[sector];
6969
uint glo_idx = get_global_id(0);
7070
uint elem_iterator = glo_idx * clusterer.nnClusterizerModelClassNumOutputNodes;
7171
float current_max_prob = 0.f; // If the neural network doesn't contain the softmax as a last layer, the outputs can range in [-infty, infty]
@@ -82,20 +82,20 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread<GPUTPCNNClusterizerKernels::det
8282
}
8383

8484
template <>
85-
GPUdii() void GPUTPCNNClusterizerKernels::Thread<GPUTPCNNClusterizerKernels::publishClass1Regression>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& notUsed, GPUConstantMem* processors, uint8_t sector, int8_t dtype, int8_t onlyMC, uint batchStart)
85+
GPUdii() void GPUTPCNNClusterizerKernels::Thread<GPUTPCNNClusterizerKernels::publishClass1Regression>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t onlyMC, uint batchStart)
8686
{
8787
uint glo_idx = get_global_id(0);
88-
if (glo_idx >= processors->tpcClusterer[sector].mPmemory->counters.nClusters) {
88+
if (glo_idx >= processors.tpcClusterer[sector].mPmemory->counters.nClusters) {
8989
return;
9090
}
9191
GPUTPCNNClusterizerKernels::publishClustersReg1(glo_idx, smem, processors, sector, dtype, onlyMC, batchStart);
9292
}
9393

9494
template <>
95-
GPUdii() void GPUTPCNNClusterizerKernels::Thread<GPUTPCNNClusterizerKernels::publishClass2Regression>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& notUsed, GPUConstantMem* processors, uint8_t sector, int8_t dtype, int8_t onlyMC, uint batchStart)
95+
GPUdii() void GPUTPCNNClusterizerKernels::Thread<GPUTPCNNClusterizerKernels::publishClass2Regression>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t onlyMC, uint batchStart)
9696
{
9797
uint glo_idx = get_global_id(0);
98-
if (glo_idx >= processors->tpcClusterer[sector].mPmemory->counters.nClusters) {
98+
if (glo_idx >= processors.tpcClusterer[sector].mPmemory->counters.nClusters) {
9999
return;
100100
}
101101
GPUTPCNNClusterizerKernels::publishClustersReg2(glo_idx, smem, processors, sector, dtype, onlyMC, batchStart);
@@ -128,11 +128,11 @@ GPUd() bool GPUTPCNNClusterizerKernels::isBoundary(int row, int pad, int global_
128128
}
129129

130130
// Filling the input data for the neural network where there is no boundary
131-
GPUd() void GPUTPCNNClusterizerKernels::fillInputData(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUConstantMem* processors, uint8_t sector, int8_t dtype, uint batchStart)
131+
GPUd() void GPUTPCNNClusterizerKernels::fillInputData(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, processorType& processors, uint8_t sector, int8_t dtype, uint batchStart)
132132
{
133133
uint glo_idx = get_global_id(0);
134-
auto& clusterer = processors->tpcClusterer[sector];
135-
auto& clustererNN = processors->tpcNNClusterer[sector];
134+
auto& clusterer = processors.tpcClusterer[sector];
135+
auto& clustererNN = processors.tpcNNClusterer[sector];
136136
Array2D<PackedCharge> chargeMap(reinterpret_cast<PackedCharge*>(clusterer.mPchargeMap));
137137
Array2D<uint8_t> isPeakMap(clusterer.mPpeakMap);
138138

@@ -192,10 +192,10 @@ GPUd() void GPUTPCNNClusterizerKernels::fillInputData(int32_t nBlocks, int32_t n
192192
}
193193
}
194194

195-
GPUd() void GPUTPCNNClusterizerKernels::publishClustersReg1(uint glo_idx, GPUSharedMemory& smem, GPUConstantMem* processors, uint8_t sector, int8_t dtype, int8_t onlyMC, uint batchStart)
195+
GPUd() void GPUTPCNNClusterizerKernels::publishClustersReg1(uint glo_idx, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t onlyMC, uint batchStart)
196196
{
197-
auto& clusterer = processors->tpcClusterer[sector];
198-
auto& clustererNN = processors->tpcNNClusterer[sector];
197+
auto& clusterer = processors.tpcClusterer[sector];
198+
auto& clustererNN = processors.tpcNNClusterer[sector];
199199
Array2D<PackedCharge> chargeMap(reinterpret_cast<PackedCharge*>(clusterer.mPchargeMap));
200200
CPU_ONLY(MCLabelAccumulator labelAccElem(clusterer));
201201
MCLabelAccumulator* labelAcc = CPU_PTR(&labelAccElem);
@@ -272,10 +272,10 @@ GPUd() void GPUTPCNNClusterizerKernels::publishClustersReg1(uint glo_idx, GPUSha
272272
}
273273
}
274274

275-
GPUd() void GPUTPCNNClusterizerKernels::publishClustersReg2(uint glo_idx, GPUSharedMemory& smem, GPUConstantMem* processors, uint8_t sector, int8_t dtype, int8_t onlyMC, uint batchStart)
275+
GPUd() void GPUTPCNNClusterizerKernels::publishClustersReg2(uint glo_idx, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t onlyMC, uint batchStart)
276276
{
277-
auto& clusterer = processors->tpcClusterer[sector];
278-
auto& clustererNN = processors->tpcNNClusterer[sector];
277+
auto& clusterer = processors.tpcClusterer[sector];
278+
auto& clustererNN = processors.tpcNNClusterer[sector];
279279
Array2D<PackedCharge> chargeMap(reinterpret_cast<PackedCharge*>(clusterer.mPchargeMap));
280280
CPU_ONLY(MCLabelAccumulator labelAccElem(clusterer));
281281
MCLabelAccumulator* labelAcc = CPU_PTR(&labelAccElem);

0 commit comments

Comments
 (0)