Skip to content

Commit 5cae1bc

Browse files
authored
Bug-fix for memory allocation (#14554)
1 parent c01e120 commit 5cae1bc

File tree

1 file changed

+18
-16
lines changed

1 file changed

+18
-16
lines changed

GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx

Lines changed: 18 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -643,26 +643,28 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
643643

644644
// Maximum of 4 lanes supported
645645
HighResTimer* nnTimers[12];
646-
if (GetProcessingSettings().nn.applyNNclusterizer && GetProcessingSettings().debugLevel >= 1) {
647-
nnTimers[0] = &getTimer<GPUTPCNNClusterizer, 0>("GPUTPCNNClusterizer_ONNXClassification_0_", 0);
648-
nnTimers[1] = &getTimer<GPUTPCNNClusterizer, 1>("GPUTPCNNClusterizer_ONNXRegression_1_", 1);
649-
nnTimers[2] = &getTimer<GPUTPCNNClusterizer, 2>("GPUTPCNNClusterizer_ONNXRegression2_2_", 2);
650-
nnTimers[3] = &getTimer<GPUTPCNNClusterizer, 3>("GPUTPCNNClusterizer_ONNXClassification_0_", 3);
651-
nnTimers[4] = &getTimer<GPUTPCNNClusterizer, 4>("GPUTPCNNClusterizer_ONNXRegression_1_", 4);
652-
nnTimers[5] = &getTimer<GPUTPCNNClusterizer, 5>("GPUTPCNNClusterizer_ONNXRegression2_2_", 5);
653-
nnTimers[6] = &getTimer<GPUTPCNNClusterizer, 6>("GPUTPCNNClusterizer_ONNXClassification_0_", 6);
654-
nnTimers[7] = &getTimer<GPUTPCNNClusterizer, 7>("GPUTPCNNClusterizer_ONNXRegression_1_", 7);
655-
nnTimers[8] = &getTimer<GPUTPCNNClusterizer, 8>("GPUTPCNNClusterizer_ONNXRegression2_2_", 8);
656-
nnTimers[9] = &getTimer<GPUTPCNNClusterizer, 9>("GPUTPCNNClusterizer_ONNXClassification_0_", 9);
657-
nnTimers[10] = &getTimer<GPUTPCNNClusterizer, 10>("GPUTPCNNClusterizer_ONNXRegression_1_", 10);
658-
nnTimers[11] = &getTimer<GPUTPCNNClusterizer, 11>("GPUTPCNNClusterizer_ONNXRegression2_2_", 11);
659-
}
660646

661647
if (GetProcessingSettings().nn.applyNNclusterizer) {
662648
int32_t deviceId = -1;
663649
int32_t numLanes = GetProcessingSettings().nTPCClustererLanes;
664650
int32_t maxThreads = mRec->getNKernelHostThreads(true);
665651
// bool recreateMemoryAllocator = false;
652+
653+
if (GetProcessingSettings().debugLevel >= 1) {
654+
nnTimers[0] = &getTimer<GPUTPCNNClusterizer, 0>("GPUTPCNNClusterizer_ONNXClassification_0_", 0);
655+
nnTimers[1] = &getTimer<GPUTPCNNClusterizer, 1>("GPUTPCNNClusterizer_ONNXRegression_1_", 1);
656+
nnTimers[2] = &getTimer<GPUTPCNNClusterizer, 2>("GPUTPCNNClusterizer_ONNXRegression2_2_", 2);
657+
nnTimers[3] = &getTimer<GPUTPCNNClusterizer, 3>("GPUTPCNNClusterizer_ONNXClassification_0_", 3);
658+
nnTimers[4] = &getTimer<GPUTPCNNClusterizer, 4>("GPUTPCNNClusterizer_ONNXRegression_1_", 4);
659+
nnTimers[5] = &getTimer<GPUTPCNNClusterizer, 5>("GPUTPCNNClusterizer_ONNXRegression2_2_", 5);
660+
nnTimers[6] = &getTimer<GPUTPCNNClusterizer, 6>("GPUTPCNNClusterizer_ONNXClassification_0_", 6);
661+
nnTimers[7] = &getTimer<GPUTPCNNClusterizer, 7>("GPUTPCNNClusterizer_ONNXRegression_1_", 7);
662+
nnTimers[8] = &getTimer<GPUTPCNNClusterizer, 8>("GPUTPCNNClusterizer_ONNXRegression2_2_", 8);
663+
nnTimers[9] = &getTimer<GPUTPCNNClusterizer, 9>("GPUTPCNNClusterizer_ONNXClassification_0_", 9);
664+
nnTimers[10] = &getTimer<GPUTPCNNClusterizer, 10>("GPUTPCNNClusterizer_ONNXRegression_1_", 10);
665+
nnTimers[11] = &getTimer<GPUTPCNNClusterizer, 11>("GPUTPCNNClusterizer_ONNXRegression2_2_", 11);
666+
}
667+
666668
mRec->runParallelOuterLoop(doGPU, numLanes, [&](uint32_t lane) {
667669
nnApplications[lane].init(nn_settings, GetProcessingSettings().deterministicGPUReconstruction);
668670
if (nnApplications[lane].mModelsUsed[0]) {
@@ -708,7 +710,7 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
708710
LOG(info) << "(ORT) Allocated ONNX stream for lane " << lane << " and device " << deviceId;
709711
}
710712
});
711-
mRec->runParallelOuterLoop(doGPU, NSECTORS, [&](uint32_t sector) {
713+
for (int32_t sector = 0; sector < NSECTORS; sector++) {
712714
GPUTPCNNClusterizer& clustererNN = processors()->tpcNNClusterer[sector];
713715
GPUTPCNNClusterizer& clustererNNShadow = doGPU ? processorsShadow()->tpcNNClusterer[sector] : clustererNN;
714716
int32_t lane = sector % numLanes;
@@ -725,7 +727,7 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
725727
AllocateRegisteredMemory(clustererNN.mMemoryId);
726728
// nnApplications[lane].createBoundary(clustererNNShadow);
727729
// nnApplications[lane].createIndexLookup(clustererNNShadow);
728-
});
730+
}
729731
if (doGPU) {
730732
WriteToConstantMemory(RecoStep::TPCClusterFinding, (char*)&processors()->tpcNNClusterer - (char*)processors(), &processorsShadow()->tpcNNClusterer, sizeof(GPUTPCNNClusterizer) * NSECTORS, mRec->NStreams() - 1, &mEvents->init);
731733
}

0 commit comments

Comments
 (0)