Skip to content

Commit 56423c7

Browse files
committed
GPU: Simplify and cleanup code
1 parent 3499eb3 commit 56423c7

File tree

1 file changed

+11
-27
lines changed

1 file changed

+11
-27
lines changed

GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx

Lines changed: 11 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,8 @@ int32_t GPUChainTracking::RunTPCTrackingSectors_internal()
9393
}
9494

9595
bool streamInit[GPUCA_MAX_STREAMS] = {false};
96+
int32_t streamInitAndOccMap = mRec->NStreams() - 1;
97+
9698
if (doGPU) {
9799
for (uint32_t iSector = 0; iSector < NSECTORS; iSector++) {
98100
processorsShadow()->tpcTrackers[iSector].GPUParametersConst()->gpumem = (char*)mRec->DeviceMemoryBase();
@@ -113,18 +115,12 @@ int32_t GPUChainTracking::RunTPCTrackingSectors_internal()
113115
return 2;
114116
}
115117

116-
WriteToConstantMemory(RecoStep::TPCSectorTracking, (char*)processors()->tpcTrackers - (char*)processors(), processorsShadow()->tpcTrackers, sizeof(GPUTPCTracker) * NSECTORS, mRec->NStreams() - 1, &mEvents->init);
118+
WriteToConstantMemory(RecoStep::TPCSectorTracking, (char*)processors()->tpcTrackers - (char*)processors(), processorsShadow()->tpcTrackers, sizeof(GPUTPCTracker) * NSECTORS, streamInitAndOccMap, &mEvents->init);
117119

118-
for (int32_t i = 0; i < mRec->NStreams() - 1; i++) {
119-
streamInit[i] = false;
120-
}
121-
streamInit[mRec->NStreams() - 1] = true;
122-
}
123-
if (GPUDebug("Initialization (1)", 0)) {
124-
return (2);
120+
std::fill(streamInit, streamInit + mRec->NStreams(), false);
121+
streamInit[streamInitAndOccMap] = true;
125122
}
126123

127-
int32_t streamOccMap = mRec->NStreams() - 1;
128124
if (param().rec.tpc.occupancyMapTimeBins || param().rec.tpc.sysClusErrorC12Norm) {
129125
AllocateRegisteredMemory(mInputsHost->mResourceOccupancyMap, mSubOutputControls[GPUTrackingOutputs::getIndex(&GPUTrackingOutputs::tpcOccupancyMap)]);
130126
}
@@ -134,21 +130,21 @@ int32_t GPUChainTracking::RunTPCTrackingSectors_internal()
134130
}
135131
uint32_t* ptr = doGPU ? mInputsShadow->mTPCClusterOccupancyMap : mInputsHost->mTPCClusterOccupancyMap;
136132
auto* ptrTmp = (GPUTPCClusterOccupancyMapBin*)mRec->AllocateVolatileMemory(GPUTPCClusterOccupancyMapBin::getTotalSize(param()), doGPU);
137-
runKernel<GPUMemClean16>(GetGridAutoStep(streamOccMap, RecoStep::TPCSectorTracking), ptrTmp, GPUTPCClusterOccupancyMapBin::getTotalSize(param()));
138-
runKernel<GPUTPCCreateOccupancyMap, GPUTPCCreateOccupancyMap::fill>(GetGridBlk(GPUCA_NSECTORS * GPUCA_ROW_COUNT, streamOccMap), ptrTmp);
139-
runKernel<GPUTPCCreateOccupancyMap, GPUTPCCreateOccupancyMap::fold>(GetGridBlk(GPUTPCClusterOccupancyMapBin::getNBins(param()), streamOccMap), ptrTmp, ptr + 2);
133+
runKernel<GPUMemClean16>(GetGridAutoStep(streamInitAndOccMap, RecoStep::TPCSectorTracking), ptrTmp, GPUTPCClusterOccupancyMapBin::getTotalSize(param()));
134+
runKernel<GPUTPCCreateOccupancyMap, GPUTPCCreateOccupancyMap::fill>(GetGridBlk(GPUCA_NSECTORS * GPUCA_ROW_COUNT, streamInitAndOccMap), ptrTmp);
135+
runKernel<GPUTPCCreateOccupancyMap, GPUTPCCreateOccupancyMap::fold>(GetGridBlk(GPUTPCClusterOccupancyMapBin::getNBins(param()), streamInitAndOccMap), ptrTmp, ptr + 2);
140136
mRec->ReturnVolatileMemory();
141137
mInputsHost->mTPCClusterOccupancyMap[1] = param().rec.tpc.occupancyMapTimeBins * 0x10000 + param().rec.tpc.occupancyMapTimeBinsAverage;
142138
if (doGPU) {
143-
GPUMemCpy(RecoStep::TPCSectorTracking, mInputsHost->mTPCClusterOccupancyMap + 2, mInputsShadow->mTPCClusterOccupancyMap + 2, sizeof(*ptr) * GPUTPCClusterOccupancyMapBin::getNBins(mRec->GetParam()), streamOccMap, false, &mEvents->init);
139+
GPUMemCpy(RecoStep::TPCSectorTracking, mInputsHost->mTPCClusterOccupancyMap + 2, mInputsShadow->mTPCClusterOccupancyMap + 2, sizeof(*ptr) * GPUTPCClusterOccupancyMapBin::getNBins(mRec->GetParam()), streamInitAndOccMap, false, &mEvents->init);
144140
} else {
145-
TransferMemoryResourceLinkToGPU(RecoStep::TPCSectorTracking, mInputsHost->mResourceOccupancyMap, streamOccMap, &mEvents->init);
141+
TransferMemoryResourceLinkToGPU(RecoStep::TPCSectorTracking, mInputsHost->mResourceOccupancyMap, streamInitAndOccMap, &mEvents->init);
146142
}
147143
}
148144
if (param().rec.tpc.occupancyMapTimeBins || param().rec.tpc.sysClusErrorC12Norm) {
149145
uint32_t& occupancyTotal = *mInputsHost->mTPCClusterOccupancyMap;
150146
occupancyTotal = CAMath::Float2UIntRn(mRec->MemoryScalers()->nTPCHits / (mIOPtrs.settingsTF && mIOPtrs.settingsTF->hasNHBFPerTF ? mIOPtrs.settingsTF->nHBFPerTF : 128));
151-
mRec->UpdateParamOccupancyMap(param().rec.tpc.occupancyMapTimeBins ? mInputsHost->mTPCClusterOccupancyMap + 2 : nullptr, param().rec.tpc.occupancyMapTimeBins ? mInputsShadow->mTPCClusterOccupancyMap + 2 : nullptr, occupancyTotal, streamOccMap);
147+
mRec->UpdateParamOccupancyMap(param().rec.tpc.occupancyMapTimeBins ? mInputsHost->mTPCClusterOccupancyMap + 2 : nullptr, param().rec.tpc.occupancyMapTimeBins ? mInputsShadow->mTPCClusterOccupancyMap + 2 : nullptr, occupancyTotal, streamInitAndOccMap);
152148
}
153149

154150
int32_t streamMap[NSECTORS];
@@ -190,19 +186,7 @@ int32_t GPUChainTracking::RunTPCTrackingSectors_internal()
190186
}
191187
}
192188

193-
// Initialize temporary memory where needed
194-
if (GetProcessingSettings().debugLevel >= 3) {
195-
GPUInfo("Copying Sector Data to GPU and initializing temporary memory");
196-
}
197189
runKernel<GPUMemClean16>(GetGridAutoStep(useStream, RecoStep::TPCSectorTracking), trkShadow.Data().HitWeights(), trkShadow.Data().NumberOfHitsPlusAlign() * sizeof(*trkShadow.Data().HitWeights()));
198-
199-
if (!doGPU) {
200-
TransferMemoryResourcesToGPU(RecoStep::TPCSectorTracking, &trk, useStream); // Copy Data to GPU Global Memory
201-
}
202-
if (GPUDebug("Initialization (3)", useStream)) {
203-
throw std::runtime_error("memcpy failure");
204-
}
205-
206190
runKernel<GPUTPCNeighboursFinder>({GetGridBlk(GPUCA_ROW_COUNT, useStream), {iSector}, {nullptr, streamInit[useStream] ? nullptr : &mEvents->init}});
207191
streamInit[useStream] = true;
208192

0 commit comments

Comments
 (0)