Skip to content

Commit 4c65d47

Browse files
committed
GPU: Store merged track ptrs in correct place in GPU constant memory
1 parent 9446c8d commit 4c65d47

File tree

2 files changed

+19
-11
lines changed

2 files changed

+19
-11
lines changed

GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.cxx

Lines changed: 10 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414
#include "GPUTPCCompressionKernels.h"
1515
#include "GPUConstantMem.h"
1616
#include "GPUO2DataTypes.h"
17-
#include "GPUTPCGMMerger.h"
1817
#include "GPUParam.h"
1918
#include "GPUCommonAlgorithm.h"
2019
#include "GPUTPCCompressionTrackModel.h"
@@ -27,16 +26,16 @@ using namespace o2::tpc;
2726
template <>
2827
GPUdii() void GPUTPCCompressionKernels::Thread<GPUTPCCompressionKernels::step0attached>(int nBlocks, int nThreads, int iBlock, int iThread, GPUsharedref() GPUSharedMemory& smem, processorType& processors)
2928
{
30-
const GPUTPCGMMerger& GPUrestrict() merger = processors.tpcMerger;
31-
const o2::tpc::ClusterNativeAccess* GPUrestrict() clusters = processors.ioPtrs.clustersNative;
29+
const GPUTrackingInOutPointers& GPUrestrict() ioPtrs = processors.ioPtrs;
30+
const o2::tpc::ClusterNativeAccess* GPUrestrict() clusters = ioPtrs.clustersNative;
3231
GPUTPCCompression& GPUrestrict() compressor = processors.tpcCompressor;
3332
const GPUParam& GPUrestrict() param = processors.param;
3433

3534
char lastLeg = 0;
3635
int myTrack = 0;
37-
for (unsigned int i = get_global_id(0); i < (unsigned int)merger.NOutputTracks(); i += get_global_size(0)) {
36+
for (unsigned int i = get_global_id(0); i < ioPtrs.nMergedTracks; i += get_global_size(0)) {
3837
GPUbarrierWarp();
39-
const GPUTPCGMMergedTrack& GPUrestrict() trk = merger.OutputTracks()[i];
38+
const GPUTPCGMMergedTrack& GPUrestrict() trk = ioPtrs.mergedTracks[i];
4039
if (!trk.OK()) {
4140
continue;
4241
}
@@ -47,13 +46,13 @@ GPUdii() void GPUTPCCompressionKernels::Thread<GPUTPCCompressionKernels::step0at
4746
GPUTPCCompressionTrackModel track;
4847
float zOffset = 0;
4948
for (int k = trk.NClusters() - 1; k >= 0; k--) {
50-
const GPUTPCGMMergedTrackHit& GPUrestrict() hit = merger.Clusters()[trk.FirstClusterRef() + k];
49+
const GPUTPCGMMergedTrackHit& GPUrestrict() hit = ioPtrs.mergedTrackHits[trk.FirstClusterRef() + k];
5150
if (hit.state & GPUTPCGMMergedTrackHit::flagReject) {
5251
continue;
5352
}
5453

5554
int hitId = hit.num;
56-
int attach = merger.ClusterAttachment()[hitId];
55+
int attach = ioPtrs.mergedTrackHitAttachment[hitId];
5756
if ((attach & gputpcgmmergertypes::attachTrackMask) != i) {
5857
continue; // Main attachment to different track
5958
}
@@ -179,8 +178,8 @@ GPUd() bool GPUTPCCompressionKernels::GPUTPCCompressionKernels_Compare<3>::opera
179178
template <>
180179
GPUdii() void GPUTPCCompressionKernels::Thread<GPUTPCCompressionKernels::step1unattached>(int nBlocks, int nThreads, int iBlock, int iThread, GPUsharedref() GPUSharedMemory& GPUrestrict() smem, processorType& GPUrestrict() processors)
181180
{
182-
const GPUTPCGMMerger& GPUrestrict() merger = processors.tpcMerger;
183-
const o2::tpc::ClusterNativeAccess* GPUrestrict() clusters = processors.ioPtrs.clustersNative;
181+
const GPUTrackingInOutPointers& GPUrestrict() ioPtrs = processors.ioPtrs;
182+
const o2::tpc::ClusterNativeAccess* GPUrestrict() clusters = ioPtrs.clustersNative;
184183
GPUTPCCompression& GPUrestrict() compressor = processors.tpcCompressor;
185184
GPUParam& GPUrestrict() param = processors.param;
186185
unsigned int* sortBuffer = smem.sortBuffer;
@@ -207,7 +206,7 @@ GPUdii() void GPUTPCCompressionKernels::Thread<GPUTPCCompressionKernels::step1un
207206
if (compressor.mClusterStatus[idx]) {
208207
break;
209208
}
210-
int attach = merger.ClusterAttachment()[idx];
209+
int attach = ioPtrs.mergedTrackHitAttachment[idx];
211210
bool unattached = attach == 0;
212211

213212
if (unattached) {
@@ -219,7 +218,7 @@ GPUdii() void GPUTPCCompressionKernels::Thread<GPUTPCCompressionKernels::step1un
219218
break;
220219
}
221220
int id = attach & gputpcgmmergertypes::attachTrackMask;
222-
auto& trk = merger.OutputTracks()[id];
221+
auto& trk = ioPtrs.mergedTracks[id];
223222
if (CAMath::Abs(trk.GetParam().GetQPt()) > processors.param.rec.tpcRejectQPt || trk.MergedLooper()) {
224223
break;
225224
}

GPU/GPUTracking/Global/GPUChainTracking.cxx

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2084,6 +2084,15 @@ int GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput)
20842084
mIOPtrs.nMergedTrackHits = Merger.NOutputTrackClusters();
20852085
mIOPtrs.mergedTrackHitAttachment = Merger.ClusterAttachment();
20862086
mIOPtrs.mergedTrackHitStates = Merger.ClusterStateExt();
2087+
if (doGPU) {
2088+
processorsShadow()->ioPtrs.mergedTracks = MergerShadow.OutputTracks();
2089+
processorsShadow()->ioPtrs.nMergedTracks = Merger.NOutputTracks();
2090+
processorsShadow()->ioPtrs.mergedTrackHits = MergerShadow.Clusters();
2091+
processorsShadow()->ioPtrs.nMergedTrackHits = Merger.NOutputTrackClusters();
2092+
processorsShadow()->ioPtrs.mergedTrackHitAttachment = MergerShadow.ClusterAttachment();
2093+
processorsShadow()->ioPtrs.mergedTrackHitStates = MergerShadow.ClusterStateExt();
2094+
WriteToConstantMemory(RecoStep::TPCMerging, (char*)&processors()->ioPtrs - (char*)processors(), &processorsShadow()->ioPtrs, sizeof(processorsShadow()->ioPtrs), 0);
2095+
}
20872096

20882097
if (GetProcessingSettings().debugLevel >= 2) {
20892098
GPUInfo("TPC Merger Finished (output clusters %d / input clusters %d)", Merger.NOutputTrackClusters(), Merger.NClusters());

0 commit comments

Comments
 (0)