Skip to content

Commit 2801c2e

Browse files
committed
Fixing memory assignment issue. Reconstruction runs through with FP32 networks
1 parent 4faaa4a commit 2801c2e

File tree

1 file changed

+22
-15
lines changed

1 file changed

+22
-15
lines changed

GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx

Lines changed: 22 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -617,18 +617,16 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
617617

618618
if (GetProcessingSettings().nn.applyNNclusterizer) {
619619
uint32_t maxClusters = 0;
620-
for (uint32_t lane = 0; lane < GetProcessingSettings().nTPCClustererLanes; lane++) {
620+
int32_t deviceId = -1;
621+
int32_t numLanes = GetProcessingSettings().nTPCClustererLanes;
622+
for (uint32_t lane = 0; lane < NSECTORS; lane++) {
621623
maxClusters = std::max(maxClusters, processors()->tpcClusterer[lane].mNMaxClusters);
622624
}
623-
for (uint32_t lane = 0; lane < GetProcessingSettings().nTPCClustererLanes; lane++) {
625+
mRec->runParallelOuterLoop(doGPU, numLanes, [&](uint32_t lane) {
624626
nnApplications[lane].init(nn_settings);
625627
GPUTPCNNClusterizer& clustererNN = processors()->tpcNNClusterer[lane];
626628
GPUTPCNNClusterizer& clustererNNShadow = doGPU ? processorsShadow()->tpcNNClusterer[lane] : clustererNN;
627629

628-
int32_t deviceId = -1;
629-
if (clustererNNShadow.nnClusterizerVerbosity < 3) {
630-
LOG(info) << "Allocating ONNX stream for lane " << lane << " and lane " << lane;
631-
}
632630
if (nnApplications[lane].modelsUsed[0]) {
633631
SetONNXGPUStream((nnApplications[lane].model_class).getSessionOptions(), lane, &deviceId);
634632
(nnApplications[lane].model_class).setDeviceId(deviceId);
@@ -644,21 +642,32 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
644642
(nnApplications[lane].model_reg_2).setDeviceId(deviceId);
645643
(nnApplications[lane].model_reg_2).initEnvironment();
646644
}
647-
645+
if (clustererNNShadow.nnClusterizerVerbosity < 3) {
646+
LOG(info) << "Allocated ONNX stream for lane " << lane << " and device " << deviceId;
647+
}
648+
});
649+
mRec->runParallelOuterLoop(doGPU, NSECTORS, [&](uint32_t sector) {
650+
GPUTPCNNClusterizer& clustererNN = processors()->tpcNNClusterer[sector];
651+
GPUTPCNNClusterizer& clustererNNShadow = doGPU ? processorsShadow()->tpcNNClusterer[sector] : clustererNN;
652+
int32_t lane = sector % numLanes;
648653
if (doGPU){
649654
clustererNNShadow.deviceId = deviceId;
650-
clustererNNShadow.mISector = lane;
655+
clustererNNShadow.mISector = sector;
651656
clustererNNShadow.nnClusterizerTotalClusters = maxClusters;
652657
nnApplications[lane].initClusterizer(nn_settings, clustererNNShadow);
653658
} else {
654659
// TODO: not sure if this part is needed at all
655660
clustererNN.deviceId = deviceId;
656-
clustererNN.mISector = lane;
661+
clustererNN.mISector = sector;
657662
clustererNN.nnClusterizerTotalClusters = maxClusters;
658663
nnApplications[lane].initClusterizer(nn_settings, clustererNN);
659664
}
660665
AllocateRegisteredMemory(clustererNN.mMemoryId);
661-
}
666+
if (doGPU){
667+
WriteToConstantMemory(RecoStep::TPCClusterFinding, (char*)&clustererNN - (char*)processors(), &clustererNNShadow, sizeof(clustererNN), lane);
668+
TransferMemoryResourcesToGPU(RecoStep::TPCClusterFinding, &clustererNNShadow, lane);
669+
}
670+
});
662671
}
663672
#endif
664673

@@ -934,12 +943,10 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
934943
GPUTPCNNClusterizer& clustererNNShadow = doGPU ? processorsShadow()->tpcNNClusterer[lane] : clustererNN;
935944
GPUTPCNNClusterizerHost& nnApplication = nnApplications[lane];
936945

946+
LOG(info) << "clustererNNShadow.inputData32: " << clustererNNShadow.inputData32;
947+
LOG(info) << "clustererShadow.mPclusterInRow: " << clustererShadow.mPclusterInRow;
948+
937949
int withMC = (doGPU && propagateMCLabels);
938-
if (doGPU){
939-
// SetupGPUProcessor(&clustererNN, true);
940-
WriteToConstantMemory(RecoStep::TPCClusterFinding, (char*)&clustererNN - (char*)processors(), &clustererNNShadow, sizeof(clustererNN), lane);
941-
TransferMemoryResourcesToGPU(RecoStep::TPCClusterFinding, &clustererNNShadow, lane);
942-
}
943950

944951
if (clustererNNShadow.nnClusterizerUseCfRegression || (int)(nn_settings.nnClusterizerApplyCfDeconvolution)) {
945952
runKernel<GPUTPCCFDeconvolution>({GetGrid(clusterer.mPmemory->counters.nPositions, lane), {iSector}});

0 commit comments

Comments
 (0)