Skip to content

Commit ea4bdbd

Browse files
committed
GPU TPC: Fix synchronization between update of occupancy map and running sector tracker on GPU
1 parent cd3de5c commit ea4bdbd

File tree

3 files changed

+9
-8
lines changed

3 files changed

+9
-8
lines changed

GPU/GPUTracking/Base/GPUReconstructionCPU.cxx

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -356,7 +356,7 @@ void GPUReconstructionCPU::ResetDeviceProcessorTypes()
356356
}
357357
}
358358

359-
void GPUReconstructionCPU::UpdateParamOccupancyMap(const uint32_t* mapHost, const uint32_t* mapGPU, uint32_t occupancyTotal, uint32_t mapSize, int32_t stream)
359+
void GPUReconstructionCPU::UpdateParamOccupancyMap(const uint32_t* mapHost, const uint32_t* mapGPU, uint32_t occupancyTotal, uint32_t mapSize, int32_t stream, deviceEvent* ev)
360360
{
361361
if (mapHost && mapSize != GPUTPCClusterOccupancyMapBin::getNBins(param())) {
362362
throw std::runtime_error("Updating occupancy map with object of invalid size");
@@ -375,6 +375,6 @@ void GPUReconstructionCPU::UpdateParamOccupancyMap(const uint32_t* mapHost, cons
375375
};
376376
tmpOccuapncyParam tmp = {mapGPU, occupancyTotal, mapSize};
377377
const auto holdContext = GetThreadContext();
378-
WriteToConstantMemory((char*)&processors()->param.occupancyMap - (char*)processors(), &tmp, sizeof(tmp), stream);
378+
WriteToConstantMemory((char*)&processors()->param.occupancyMap - (char*)processors(), &tmp, sizeof(tmp), stream, ev);
379379
}
380380
}

GPU/GPUTracking/Base/GPUReconstructionCPU.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ class GPUReconstructionCPU : public GPUReconstructionProcessing::KernelInterface
4848

4949
int32_t RunChains() override;
5050

51-
void UpdateParamOccupancyMap(const uint32_t* mapHost, const uint32_t* mapGPU, uint32_t occupancyTotal, uint32_t mapSize, int32_t stream = -1);
51+
void UpdateParamOccupancyMap(const uint32_t* mapHost, const uint32_t* mapGPU, uint32_t occupancyTotal, uint32_t mapSize, int32_t stream = -1, deviceEvent* ev = nullptr);
5252

5353
protected:
5454
struct GPUProcessorProcessors : public GPUProcessor {

GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,7 @@ int32_t GPUChainTracking::RunTPCTrackingSectors_internal()
100100
bool streamInit[GPUCA_MAX_STREAMS] = {false};
101101
int32_t streamInitAndOccMap = mRec->NStreams() - 1;
102102

103+
bool initializeOccMap = param().rec.tpc.occupancyMapTimeBins || param().rec.tpc.sysClusErrorC12Norm;
103104
if (doGPU) {
104105
// Copy Tracker Object to GPU Memory
105106
if (GetProcessingSettings().debugLevel >= 3) {
@@ -109,7 +110,7 @@ int32_t GPUChainTracking::RunTPCTrackingSectors_internal()
109110
return 2;
110111
}
111112

112-
WriteToConstantMemory(RecoStep::TPCSectorTracking, (char*)processors()->tpcTrackers - (char*)processors(), processorsShadow()->tpcTrackers, sizeof(GPUTPCTracker) * NSECTORS, streamInitAndOccMap, &mEvents->init);
113+
WriteToConstantMemory(RecoStep::TPCSectorTracking, (char*)processors()->tpcTrackers - (char*)processors(), processorsShadow()->tpcTrackers, sizeof(GPUTPCTracker) * NSECTORS, streamInitAndOccMap, !initializeOccMap ? &mEvents->init : nullptr);
113114

114115
std::fill(streamInit, streamInit + mRec->NStreams(), false);
115116
streamInit[streamInitAndOccMap] = true;
@@ -130,15 +131,15 @@ int32_t GPUChainTracking::RunTPCTrackingSectors_internal()
130131
mRec->ReturnVolatileMemory();
131132
mInputsHost->mTPCClusterOccupancyMap[1] = param().rec.tpc.occupancyMapTimeBins * 0x10000 + param().rec.tpc.occupancyMapTimeBinsAverage;
132133
if (doGPU) {
133-
GPUMemCpy(RecoStep::TPCSectorTracking, mInputsHost->mTPCClusterOccupancyMap + 2, mInputsShadow->mTPCClusterOccupancyMap + 2, sizeof(*ptr) * mInputsHost->mTPCClusterOccupancyMapSize, streamInitAndOccMap, false, &mEvents->init);
134+
GPUMemCpy(RecoStep::TPCSectorTracking, mInputsHost->mTPCClusterOccupancyMap + 2, mInputsShadow->mTPCClusterOccupancyMap + 2, sizeof(*ptr) * mInputsHost->mTPCClusterOccupancyMapSize, streamInitAndOccMap, false);
134135
} else {
135-
TransferMemoryResourceLinkToGPU(RecoStep::TPCSectorTracking, mInputsHost->mResourceOccupancyMap, streamInitAndOccMap, &mEvents->init);
136+
TransferMemoryResourceLinkToGPU(RecoStep::TPCSectorTracking, mInputsHost->mResourceOccupancyMap, streamInitAndOccMap);
136137
}
137138
}
138-
if (param().rec.tpc.occupancyMapTimeBins || param().rec.tpc.sysClusErrorC12Norm) {
139+
if (initializeOccMap) {
139140
uint32_t& occupancyTotal = *mInputsHost->mTPCClusterOccupancyMap;
140141
occupancyTotal = CAMath::Float2UIntRn(mRec->MemoryScalers()->nTPCHits / (mIOPtrs.settingsTF && mIOPtrs.settingsTF->hasNHBFPerTF ? mIOPtrs.settingsTF->nHBFPerTF : 128));
141-
mRec->UpdateParamOccupancyMap(param().rec.tpc.occupancyMapTimeBins ? mInputsHost->mTPCClusterOccupancyMap + 2 : nullptr, doGPU && param().rec.tpc.occupancyMapTimeBins ? mInputsShadow->mTPCClusterOccupancyMap + 2 : nullptr, occupancyTotal, mInputsHost->mTPCClusterOccupancyMapSize, streamInitAndOccMap);
142+
mRec->UpdateParamOccupancyMap(param().rec.tpc.occupancyMapTimeBins ? mInputsHost->mTPCClusterOccupancyMap + 2 : nullptr, doGPU && param().rec.tpc.occupancyMapTimeBins ? mInputsShadow->mTPCClusterOccupancyMap + 2 : nullptr, occupancyTotal, mInputsHost->mTPCClusterOccupancyMapSize, streamInitAndOccMap, &mEvents->init);
142143
}
143144

144145
int32_t streamMap[NSECTORS];

0 commit comments

Comments
 (0)