Skip to content

Commit c62aef2

Browse files
f3schmconcas
authored andcommitted
ITS: GPU: fix deltaROF tracking
Signed-off-by: Felix Schlepper <felix.schlepper@cern.ch>
1 parent 0a6a962 commit c62aef2

File tree

3 files changed

+88
-26
lines changed

3 files changed

+88
-26
lines changed

Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TrackingKernels.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,7 @@ void countCellsHandler(const Cluster** sortedClusters,
131131
CellSeed* cells,
132132
int** cellsLUTsDeviceArray,
133133
int* cellsLUTsHost,
134+
const int deltaROF,
134135
const float bz,
135136
const float maxChi2ClusterAttachment,
136137
const float cellDeltaTanLambdaSigma,
@@ -148,6 +149,7 @@ void computeCellsHandler(const Cluster** sortedClusters,
148149
CellSeed* cells,
149150
int** cellsLUTsDeviceArray,
150151
int* cellsLUTsHost,
152+
const int deltaROF,
151153
const float bz,
152154
const float maxChi2ClusterAttachment,
153155
const float cellDeltaTanLambdaSigma,
@@ -160,6 +162,8 @@ unsigned int countCellNeighboursHandler(CellSeed** cellsLayersDevice,
160162
int** cellsLUTs,
161163
gpuPair<int, int>* cellNeighbours,
162164
int* neighboursIndexTable,
165+
const Tracklet** tracklets,
166+
const int deltaROF,
163167
const float maxChi2ClusterAttachment,
164168
const float bz,
165169
const int layerIndex,
@@ -174,6 +178,8 @@ void computeCellNeighboursHandler(CellSeed** cellsLayersDevice,
174178
int** cellsLUTs,
175179
gpuPair<int, int>* cellNeighbours,
176180
int* neighboursIndexTable,
181+
const Tracklet** tracklets,
182+
const int deltaROF,
177183
const float maxChi2ClusterAttachment,
178184
const float bz,
179185
const int layerIndex,

Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackerTraitsGPU.cxx

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,7 @@ void TrackerTraitsGPU<nLayers>::computeLayerCells(const int iteration)
140140
nullptr,
141141
mTimeFrameGPU->getDeviceArrayCellsLUT(),
142142
mTimeFrameGPU->getDeviceCellLUTs()[iLayer],
143+
this->mTrkParams[iteration].DeltaROF,
143144
this->mBz,
144145
this->mTrkParams[iteration].MaxChi2ClusterAttachment,
145146
this->mTrkParams[iteration].CellDeltaTanLambdaSigma,
@@ -157,6 +158,7 @@ void TrackerTraitsGPU<nLayers>::computeLayerCells(const int iteration)
157158
mTimeFrameGPU->getDeviceCells()[iLayer],
158159
mTimeFrameGPU->getDeviceArrayCellsLUT(),
159160
mTimeFrameGPU->getDeviceCellLUTs()[iLayer],
161+
this->mTrkParams[iteration].DeltaROF,
160162
this->mBz,
161163
this->mTrkParams[iteration].MaxChi2ClusterAttachment,
162164
this->mTrkParams[iteration].CellDeltaTanLambdaSigma,
@@ -185,6 +187,8 @@ void TrackerTraitsGPU<nLayers>::findCellsNeighbours(const int iteration)
185187
mTimeFrameGPU->getDeviceArrayCellsLUT(),
186188
mTimeFrameGPU->getDeviceNeighbourPairs(iLayer),
187189
mTimeFrameGPU->getDeviceNeighboursIndexTables(iLayer),
190+
(const Tracklet**)mTimeFrameGPU->getDeviceArrayTracklets(),
191+
this->mTrkParams[0].DeltaROF,
188192
this->mTrkParams[0].MaxChi2ClusterAttachment,
189193
this->mBz,
190194
iLayer,
@@ -201,6 +205,8 @@ void TrackerTraitsGPU<nLayers>::findCellsNeighbours(const int iteration)
201205
mTimeFrameGPU->getDeviceArrayCellsLUT(),
202206
mTimeFrameGPU->getDeviceNeighbourPairs(iLayer),
203207
mTimeFrameGPU->getDeviceNeighboursIndexTables(iLayer),
208+
(const Tracklet**)mTimeFrameGPU->getDeviceArrayTracklets(),
209+
this->mTrkParams[0].DeltaROF,
204210
this->mTrkParams[0].MaxChi2ClusterAttachment,
205211
this->mBz,
206212
iLayer,

Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu

Lines changed: 76 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -201,7 +201,13 @@ GPUd() o2::track::TrackParCov buildTrackSeed(const Cluster& cluster1,
201201
}
202202

203203
struct sort_tracklets {
204-
GPUhd() bool operator()(const Tracklet& a, const Tracklet& b) { return a.firstClusterIndex < b.firstClusterIndex || (a.firstClusterIndex == b.firstClusterIndex && a.secondClusterIndex < b.secondClusterIndex); }
204+
GPUhd() bool operator()(const Tracklet& a, const Tracklet& b)
205+
{
206+
if (a.firstClusterIndex != b.firstClusterIndex) {
207+
return a.firstClusterIndex < b.firstClusterIndex;
208+
}
209+
return a.secondClusterIndex < b.secondClusterIndex;
210+
}
205211
};
206212

207213
struct equal_tracklets {
@@ -263,23 +269,34 @@ struct compare_track_chi2 {
263269
}
264270
};
265271

266-
GPUd() gpuSpan<const Vertex> getPrimaryVertices(const int rof,
267-
const int* roframesPV,
268-
const int nROF,
269-
const uint8_t* mask,
270-
const Vertex* vertices)
272+
GPUdii() gpuSpan<const Vertex> getPrimaryVertices(const int rof,
273+
const int* roframesPV,
274+
const int nROF,
275+
const uint8_t* mask,
276+
const Vertex* vertices)
271277
{
272278
const int start_pv_id = roframesPV[rof];
273279
const int stop_rof = rof >= nROF - 1 ? nROF : rof + 1;
274-
size_t delta = mask[rof] ? roframesPV[stop_rof] - start_pv_id : 0; // return empty span if ROF is excluded
280+
const size_t delta = mask[rof] ? roframesPV[stop_rof] - start_pv_id : 0; // return empty span if ROF is excluded
275281
return gpuSpan<const Vertex>(&vertices[start_pv_id], delta);
276282
};
277283

278-
GPUd() gpuSpan<const Cluster> getClustersOnLayer(const int rof,
279-
const int totROFs,
280-
const int layer,
281-
const int** roframesClus,
282-
const Cluster** clusters)
284+
GPUdii() gpuSpan<const Vertex> getPrimaryVertices(const int romin,
285+
const int romax,
286+
const int* roframesPV,
287+
const int nROF,
288+
const Vertex* vertices)
289+
{
290+
const int start_pv_id = roframesPV[romin];
291+
const int stop_rof = romax >= nROF - 1 ? nROF : romax + 1;
292+
return gpuSpan<const Vertex>(&vertices[start_pv_id], roframesPV[stop_rof] - roframesPV[romin]);
293+
};
294+
295+
GPUdii() gpuSpan<const Cluster> getClustersOnLayer(const int rof,
296+
const int totROFs,
297+
const int layer,
298+
const int** roframesClus,
299+
const Cluster** clusters)
283300
{
284301
if (rof < 0 || rof >= totROFs) {
285302
return gpuSpan<const Cluster>();
@@ -360,6 +377,8 @@ GPUg() void computeLayerCellNeighboursKernel(
360377
int* neighboursIndexTable,
361378
int** cellsLUTs,
362379
gpuPair<int, int>* cellNeighbours,
380+
const Tracklet** tracklets,
381+
const int deltaROF,
363382
const float maxChi2ClusterAttachment,
364383
const float bz,
365384
const int layerIndex,
@@ -377,15 +396,29 @@ GPUg() void computeLayerCellNeighboursKernel(
377396
if (nextCellSeed.getFirstTrackletIndex() != nextLayerTrackletIndex) { // Check if cells share the same tracklet
378397
break;
379398
}
399+
400+
if (deltaROF) {
401+
const auto& trkl00 = tracklets[layerIndex][currentCellSeed.getFirstTrackletIndex()];
402+
const auto& trkl01 = tracklets[layerIndex + 1][currentCellSeed.getSecondTrackletIndex()];
403+
const auto& trkl10 = tracklets[layerIndex + 1][nextCellSeed.getFirstTrackletIndex()];
404+
const auto& trkl11 = tracklets[layerIndex + 2][nextCellSeed.getSecondTrackletIndex()];
405+
if ((o2::gpu::CAMath::Max(trkl00.getMaxRof(), o2::gpu::CAMath::Max(trkl01.getMaxRof(), o2::gpu::CAMath::Max(trkl10.getMaxRof(), trkl11.getMaxRof()))) -
406+
o2::gpu::CAMath::Min(trkl00.getMinRof(), o2::gpu::CAMath::Min(trkl01.getMinRof(), o2::gpu::CAMath::Min(trkl10.getMinRof(), trkl11.getMinRof())))) > deltaROF) {
407+
continue;
408+
}
409+
}
410+
380411
if (!nextCellSeed.rotate(currentCellSeed.getAlpha()) ||
381412
!nextCellSeed.propagateTo(currentCellSeed.getX(), bz)) {
382413
continue;
383414
}
415+
384416
float chi2 = currentCellSeed.getPredictedChi2(nextCellSeed);
385417
if (chi2 > maxChi2ClusterAttachment) /// TODO: switch to the chi2 wrt cluster to avoid correlation
386418
{
387419
continue;
388420
}
421+
389422
if constexpr (initRun) {
390423
atomicAdd(neighboursLUT + iNextCell, 1);
391424
neighboursIndexTable[iCurrentCellIndex]++;
@@ -412,6 +445,7 @@ GPUg() void computeLayerCellsKernel(
412445
const int layer,
413446
CellSeed* cells,
414447
int** cellsLUTs,
448+
const int deltaROF,
415449
const float bz,
416450
const float maxChi2ClusterAttachment,
417451
const float cellDeltaTanLambdaSigma,
@@ -432,6 +466,9 @@ GPUg() void computeLayerCellsKernel(
432466
break;
433467
}
434468
const Tracklet& nextTracklet = tracklets[layer + 1][iNextTrackletIndex];
469+
if (deltaROF && currentTracklet.getSpanRof(nextTracklet) > deltaROF) {
470+
continue;
471+
}
435472
const float deltaTanLambda{o2::gpu::CAMath::Abs(currentTracklet.tanLambda - nextTracklet.tanLambda)};
436473

437474
if (deltaTanLambda / cellDeltaTanLambdaSigma < nSigmaCut) {
@@ -515,9 +552,12 @@ GPUg() void computeLayerTrackletsMultiROFKernel(
515552
{
516553
const int phiBins{utils->getNphiBins()};
517554
const int zBins{utils->getNzBins()};
555+
const int tableSize{phiBins * zBins + 1};
518556
for (unsigned int iROF{blockIdx.x}; iROF < endROF - startROF; iROF += gridDim.x) {
519-
const short rof0 = iROF + startROF;
520-
auto primaryVertices = getPrimaryVertices(rof0, rofPV, totalROFs, multMask, vertices);
557+
const short pivotROF = iROF + startROF;
558+
const short minROF = o2::gpu::CAMath::Max(startROF, static_cast<int>(pivotROF - deltaROF));
559+
const short maxROF = o2::gpu::CAMath::Min(endROF - 1, static_cast<int>(pivotROF + deltaROF));
560+
auto primaryVertices = getPrimaryVertices(minROF, maxROF, rofPV, totalROFs, vertices);
521561
if (primaryVertices.empty()) {
522562
continue;
523563
}
@@ -526,17 +566,17 @@ GPUg() void computeLayerTrackletsMultiROFKernel(
526566
if ((endVtx - startVtx) <= 0) {
527567
continue;
528568
}
529-
const short minROF = o2::gpu::CAMath::Max(startROF, static_cast<int>(rof0 - deltaROF));
530-
const short maxROF = o2::gpu::CAMath::Min(endROF - 1, static_cast<int>(rof0 + deltaROF));
531-
auto clustersCurrentLayer = getClustersOnLayer(rof0, totalROFs, layerIndex, ROFClusters, clusters);
569+
570+
auto clustersCurrentLayer = getClustersOnLayer(pivotROF, totalROFs, layerIndex, ROFClusters, clusters);
532571
if (clustersCurrentLayer.empty()) {
533572
continue;
534573
}
535574

536575
for (int currentClusterIndex = threadIdx.x; currentClusterIndex < clustersCurrentLayer.size(); currentClusterIndex += blockDim.x) {
576+
537577
unsigned int storedTracklets{0};
538578
const auto& currentCluster{clustersCurrentLayer[currentClusterIndex]};
539-
const int currentSortedIndex{ROFClusters[layerIndex][rof0] + currentClusterIndex};
579+
const int currentSortedIndex{ROFClusters[layerIndex][pivotROF] + currentClusterIndex};
540580
if (usedClusters[layerIndex][currentCluster.clusterId]) {
541581
continue;
542582
}
@@ -564,18 +604,17 @@ GPUg() void computeLayerTrackletsMultiROFKernel(
564604
phiBinsNum += phiBins;
565605
}
566606

567-
const int tableSize{phiBins * zBins + 1};
568-
for (short rof1{minROF}; rof1 <= maxROF; ++rof1) {
569-
auto clustersNextLayer = getClustersOnLayer(rof1, totalROFs, layerIndex + 1, ROFClusters, clusters);
607+
for (short targetROF{minROF}; targetROF <= maxROF; ++targetROF) {
608+
auto clustersNextLayer = getClustersOnLayer(targetROF, totalROFs, layerIndex + 1, ROFClusters, clusters);
570609
if (clustersNextLayer.empty()) {
571610
continue;
572611
}
573612
for (int iPhiCount{0}; iPhiCount < phiBinsNum; iPhiCount++) {
574613
int iPhiBin = (selectedBinsRect.y + iPhiCount) % phiBins;
575614
const int firstBinIndex{utils->getBinIndex(selectedBinsRect.x, iPhiBin)};
576615
const int maxBinIndex{firstBinIndex + selectedBinsRect.z - selectedBinsRect.x + 1};
577-
const int firstRowClusterIndex = indexTables[layerIndex + 1][(rof1)*tableSize + firstBinIndex];
578-
const int maxRowClusterIndex = indexTables[layerIndex + 1][(rof1)*tableSize + maxBinIndex];
616+
const int firstRowClusterIndex = indexTables[layerIndex + 1][(targetROF)*tableSize + firstBinIndex];
617+
const int maxRowClusterIndex = indexTables[layerIndex + 1][(targetROF)*tableSize + maxBinIndex];
579618
for (int nextClusterIndex{firstRowClusterIndex}; nextClusterIndex < maxRowClusterIndex; ++nextClusterIndex) {
580619
if (nextClusterIndex >= clustersNextLayer.size()) {
581620
break;
@@ -592,8 +631,8 @@ GPUg() void computeLayerTrackletsMultiROFKernel(
592631
} else {
593632
const float phi{o2::gpu::CAMath::ATan2(currentCluster.yCoordinate - nextCluster.yCoordinate, currentCluster.xCoordinate - nextCluster.xCoordinate)};
594633
const float tanL{(currentCluster.zCoordinate - nextCluster.zCoordinate) / (currentCluster.radius - nextCluster.radius)};
595-
const int nextSortedIndex{ROFClusters[layerIndex + 1][rof1] + nextClusterIndex};
596-
new (tracklets[layerIndex] + trackletsLUT[layerIndex][currentSortedIndex] + storedTracklets) Tracklet{currentSortedIndex, nextSortedIndex, tanL, phi, rof0, rof1};
634+
const int nextSortedIndex{ROFClusters[layerIndex + 1][targetROF] + nextClusterIndex};
635+
new (tracklets[layerIndex] + trackletsLUT[layerIndex][currentSortedIndex] + storedTracklets) Tracklet{currentSortedIndex, nextSortedIndex, tanL, phi, pivotROF, targetROF};
597636
}
598637
++storedTracklets;
599638
}
@@ -1018,6 +1057,7 @@ void countCellsHandler(
10181057
CellSeed* cells,
10191058
int** cellsLUTsArrayDevice,
10201059
int* cellsLUTsHost,
1060+
const int deltaROF,
10211061
const float bz,
10221062
const float maxChi2ClusterAttachment,
10231063
const float cellDeltaTanLambdaSigma,
@@ -1035,6 +1075,7 @@ void countCellsHandler(
10351075
layer, // const int
10361076
cells, // CellSeed*
10371077
cellsLUTsArrayDevice, // int**
1078+
deltaROF, // const int
10381079
bz, // const float
10391080
maxChi2ClusterAttachment, // const float
10401081
cellDeltaTanLambdaSigma, // const float
@@ -1053,6 +1094,7 @@ void computeCellsHandler(
10531094
CellSeed* cells,
10541095
int** cellsLUTsArrayDevice,
10551096
int* cellsLUTsHost,
1097+
const int deltaROF,
10561098
const float bz,
10571099
const float maxChi2ClusterAttachment,
10581100
const float cellDeltaTanLambdaSigma,
@@ -1070,6 +1112,7 @@ void computeCellsHandler(
10701112
layer, // const int
10711113
cells, // CellSeed*
10721114
cellsLUTsArrayDevice, // int**
1115+
deltaROF, // const int
10731116
bz, // const float
10741117
maxChi2ClusterAttachment, // const float
10751118
cellDeltaTanLambdaSigma, // const float
@@ -1081,6 +1124,8 @@ unsigned int countCellNeighboursHandler(CellSeed** cellsLayersDevice,
10811124
int** cellsLUTs,
10821125
gpuPair<int, int>* cellNeighbours,
10831126
int* neighboursIndexTable,
1127+
const Tracklet** tracklets,
1128+
const int deltaROF,
10841129
const float maxChi2ClusterAttachment,
10851130
const float bz,
10861131
const int layerIndex,
@@ -1096,12 +1141,13 @@ unsigned int countCellNeighboursHandler(CellSeed** cellsLayersDevice,
10961141
neighboursIndexTable,
10971142
cellsLUTs,
10981143
cellNeighbours,
1144+
tracklets,
1145+
deltaROF,
10991146
maxChi2ClusterAttachment,
11001147
bz,
11011148
layerIndex,
11021149
nCells,
11031150
maxCellNeighbours);
1104-
11051151
gpu::cubInclusiveScanInPlace(neighboursLUT, nCellsNext);
11061152
gpu::cubExclusiveScanInPlace(neighboursIndexTable, nCells + 1);
11071153
unsigned int nNeighbours;
@@ -1114,6 +1160,8 @@ void computeCellNeighboursHandler(CellSeed** cellsLayersDevice,
11141160
int** cellsLUTs,
11151161
gpuPair<int, int>* cellNeighbours,
11161162
int* neighboursIndexTable,
1163+
const Tracklet** tracklets,
1164+
const int deltaROF,
11171165
const float maxChi2ClusterAttachment,
11181166
const float bz,
11191167
const int layerIndex,
@@ -1130,6 +1178,8 @@ void computeCellNeighboursHandler(CellSeed** cellsLayersDevice,
11301178
neighboursIndexTable,
11311179
cellsLUTs,
11321180
cellNeighbours,
1181+
tracklets,
1182+
deltaROF,
11331183
maxChi2ClusterAttachment,
11341184
bz,
11351185
layerIndex,

0 commit comments

Comments
 (0)