@@ -201,7 +201,13 @@ GPUd() o2::track::TrackParCov buildTrackSeed(const Cluster& cluster1,
201201}
202202
203203struct sort_tracklets {
204- GPUhd () bool operator ()(const Tracklet& a, const Tracklet& b) { return a.firstClusterIndex < b.firstClusterIndex || (a.firstClusterIndex == b.firstClusterIndex && a.secondClusterIndex < b.secondClusterIndex ); }
204+ GPUhd () bool operator ()(const Tracklet& a, const Tracklet& b)
205+ {
206+ if (a.firstClusterIndex != b.firstClusterIndex ) {
207+ return a.firstClusterIndex < b.firstClusterIndex ;
208+ }
209+ return a.secondClusterIndex < b.secondClusterIndex ;
210+ }
205211};
206212
207213struct equal_tracklets {
@@ -263,23 +269,34 @@ struct compare_track_chi2 {
263269 }
264270};
265271
266- GPUd () gpuSpan<const Vertex> getPrimaryVertices (const int rof,
267- const int * roframesPV,
268- const int nROF,
269- const uint8_t * mask,
270- const Vertex* vertices)
272+ GPUdii () gpuSpan<const Vertex> getPrimaryVertices (const int rof,
273+ const int * roframesPV,
274+ const int nROF,
275+ const uint8_t * mask,
276+ const Vertex* vertices)
271277{
272278 const int start_pv_id = roframesPV[rof];
273279 const int stop_rof = rof >= nROF - 1 ? nROF : rof + 1 ;
274- size_t delta = mask[rof] ? roframesPV[stop_rof] - start_pv_id : 0 ; // return empty span if ROF is excluded
280+ const size_t delta = mask[rof] ? roframesPV[stop_rof] - start_pv_id : 0 ; // return empty span if ROF is excluded
275281 return gpuSpan<const Vertex>(&vertices[start_pv_id], delta);
276282};
277283
278- GPUd () gpuSpan<const Cluster> getClustersOnLayer (const int rof,
279- const int totROFs,
280- const int layer,
281- const int ** roframesClus,
282- const Cluster** clusters)
284+ GPUdii () gpuSpan<const Vertex> getPrimaryVertices (const int romin,
285+ const int romax,
286+ const int * roframesPV,
287+ const int nROF,
288+ const Vertex* vertices)
289+ {
290+ const int start_pv_id = roframesPV[romin];
291+ const int stop_rof = romax >= nROF - 1 ? nROF : romax + 1 ;
292+ return gpuSpan<const Vertex>(&vertices[start_pv_id], roframesPV[stop_rof] - roframesPV[romin]);
293+ };
294+
295+ GPUdii () gpuSpan<const Cluster> getClustersOnLayer (const int rof,
296+ const int totROFs,
297+ const int layer,
298+ const int ** roframesClus,
299+ const Cluster** clusters)
283300{
284301 if (rof < 0 || rof >= totROFs) {
285302 return gpuSpan<const Cluster>();
@@ -360,6 +377,8 @@ GPUg() void computeLayerCellNeighboursKernel(
360377 int * neighboursIndexTable,
361378 int ** cellsLUTs,
362379 gpuPair<int , int >* cellNeighbours,
380+ const Tracklet** tracklets,
381+ const int deltaROF,
363382 const float maxChi2ClusterAttachment,
364383 const float bz,
365384 const int layerIndex,
@@ -377,15 +396,29 @@ GPUg() void computeLayerCellNeighboursKernel(
377396 if (nextCellSeed.getFirstTrackletIndex () != nextLayerTrackletIndex) { // Check if cells share the same tracklet
378397 break ;
379398 }
399+
400+ if (deltaROF) {
401+ const auto & trkl00 = tracklets[layerIndex][currentCellSeed.getFirstTrackletIndex ()];
402+ const auto & trkl01 = tracklets[layerIndex + 1 ][currentCellSeed.getSecondTrackletIndex ()];
403+ const auto & trkl10 = tracklets[layerIndex + 1 ][nextCellSeed.getFirstTrackletIndex ()];
404+ const auto & trkl11 = tracklets[layerIndex + 2 ][nextCellSeed.getSecondTrackletIndex ()];
405+ if ((o2::gpu::CAMath::Max (trkl00.getMaxRof (), o2::gpu::CAMath::Max (trkl01.getMaxRof (), o2::gpu::CAMath::Max (trkl10.getMaxRof (), trkl11.getMaxRof ()))) -
406+ o2::gpu::CAMath::Min (trkl00.getMinRof (), o2::gpu::CAMath::Min (trkl01.getMinRof (), o2::gpu::CAMath::Min (trkl10.getMinRof (), trkl11.getMinRof ())))) > deltaROF) {
407+ continue ;
408+ }
409+ }
410+
380411 if (!nextCellSeed.rotate (currentCellSeed.getAlpha ()) ||
381412 !nextCellSeed.propagateTo (currentCellSeed.getX (), bz)) {
382413 continue ;
383414 }
415+
384416 float chi2 = currentCellSeed.getPredictedChi2 (nextCellSeed);
385417 if (chi2 > maxChi2ClusterAttachment) // / TODO: switch to the chi2 wrt cluster to avoid correlation
386418 {
387419 continue ;
388420 }
421+
389422 if constexpr (initRun) {
390423 atomicAdd (neighboursLUT + iNextCell, 1 );
391424 neighboursIndexTable[iCurrentCellIndex]++;
@@ -412,6 +445,7 @@ GPUg() void computeLayerCellsKernel(
412445 const int layer,
413446 CellSeed* cells,
414447 int ** cellsLUTs,
448+ const int deltaROF,
415449 const float bz,
416450 const float maxChi2ClusterAttachment,
417451 const float cellDeltaTanLambdaSigma,
@@ -432,6 +466,9 @@ GPUg() void computeLayerCellsKernel(
432466 break ;
433467 }
434468 const Tracklet& nextTracklet = tracklets[layer + 1 ][iNextTrackletIndex];
469+ if (deltaROF && currentTracklet.getSpanRof (nextTracklet) > deltaROF) {
470+ continue ;
471+ }
435472 const float deltaTanLambda{o2::gpu::CAMath::Abs (currentTracklet.tanLambda - nextTracklet.tanLambda )};
436473
437474 if (deltaTanLambda / cellDeltaTanLambdaSigma < nSigmaCut) {
@@ -515,9 +552,12 @@ GPUg() void computeLayerTrackletsMultiROFKernel(
515552{
516553 const int phiBins{utils->getNphiBins ()};
517554 const int zBins{utils->getNzBins ()};
555+ const int tableSize{phiBins * zBins + 1 };
518556 for (unsigned int iROF{blockIdx .x }; iROF < endROF - startROF; iROF += gridDim .x ) {
519- const short rof0 = iROF + startROF;
520- auto primaryVertices = getPrimaryVertices (rof0, rofPV, totalROFs, multMask, vertices);
557+ const short pivotROF = iROF + startROF;
558+ const short minROF = o2::gpu::CAMath::Max (startROF, static_cast <int >(pivotROF - deltaROF));
559+ const short maxROF = o2::gpu::CAMath::Min (endROF - 1 , static_cast <int >(pivotROF + deltaROF));
560+ auto primaryVertices = getPrimaryVertices (minROF, maxROF, rofPV, totalROFs, vertices);
521561 if (primaryVertices.empty ()) {
522562 continue ;
523563 }
@@ -526,17 +566,17 @@ GPUg() void computeLayerTrackletsMultiROFKernel(
526566 if ((endVtx - startVtx) <= 0 ) {
527567 continue ;
528568 }
529- const short minROF = o2::gpu::CAMath::Max (startROF, static_cast <int >(rof0 - deltaROF));
530- const short maxROF = o2::gpu::CAMath::Min (endROF - 1 , static_cast <int >(rof0 + deltaROF));
531- auto clustersCurrentLayer = getClustersOnLayer (rof0, totalROFs, layerIndex, ROFClusters, clusters);
569+
570+ auto clustersCurrentLayer = getClustersOnLayer (pivotROF, totalROFs, layerIndex, ROFClusters, clusters);
532571 if (clustersCurrentLayer.empty ()) {
533572 continue ;
534573 }
535574
536575 for (int currentClusterIndex = threadIdx .x ; currentClusterIndex < clustersCurrentLayer.size (); currentClusterIndex += blockDim .x ) {
576+
537577 unsigned int storedTracklets{0 };
538578 const auto & currentCluster{clustersCurrentLayer[currentClusterIndex]};
539- const int currentSortedIndex{ROFClusters[layerIndex][rof0 ] + currentClusterIndex};
579+ const int currentSortedIndex{ROFClusters[layerIndex][pivotROF ] + currentClusterIndex};
540580 if (usedClusters[layerIndex][currentCluster.clusterId ]) {
541581 continue ;
542582 }
@@ -564,18 +604,17 @@ GPUg() void computeLayerTrackletsMultiROFKernel(
564604 phiBinsNum += phiBins;
565605 }
566606
567- const int tableSize{phiBins * zBins + 1 };
568- for (short rof1{minROF}; rof1 <= maxROF; ++rof1) {
569- auto clustersNextLayer = getClustersOnLayer (rof1, totalROFs, layerIndex + 1 , ROFClusters, clusters);
607+ for (short targetROF{minROF}; targetROF <= maxROF; ++targetROF) {
608+ auto clustersNextLayer = getClustersOnLayer (targetROF, totalROFs, layerIndex + 1 , ROFClusters, clusters);
570609 if (clustersNextLayer.empty ()) {
571610 continue ;
572611 }
573612 for (int iPhiCount{0 }; iPhiCount < phiBinsNum; iPhiCount++) {
574613 int iPhiBin = (selectedBinsRect.y + iPhiCount) % phiBins;
575614 const int firstBinIndex{utils->getBinIndex (selectedBinsRect.x , iPhiBin)};
576615 const int maxBinIndex{firstBinIndex + selectedBinsRect.z - selectedBinsRect.x + 1 };
577- const int firstRowClusterIndex = indexTables[layerIndex + 1 ][(rof1 )*tableSize + firstBinIndex];
578- const int maxRowClusterIndex = indexTables[layerIndex + 1 ][(rof1 )*tableSize + maxBinIndex];
616+ const int firstRowClusterIndex = indexTables[layerIndex + 1 ][(targetROF )*tableSize + firstBinIndex];
617+ const int maxRowClusterIndex = indexTables[layerIndex + 1 ][(targetROF )*tableSize + maxBinIndex];
579618 for (int nextClusterIndex{firstRowClusterIndex}; nextClusterIndex < maxRowClusterIndex; ++nextClusterIndex) {
580619 if (nextClusterIndex >= clustersNextLayer.size ()) {
581620 break ;
@@ -592,8 +631,8 @@ GPUg() void computeLayerTrackletsMultiROFKernel(
592631 } else {
593632 const float phi{o2::gpu::CAMath::ATan2 (currentCluster.yCoordinate - nextCluster.yCoordinate , currentCluster.xCoordinate - nextCluster.xCoordinate )};
594633 const float tanL{(currentCluster.zCoordinate - nextCluster.zCoordinate ) / (currentCluster.radius - nextCluster.radius )};
595- const int nextSortedIndex{ROFClusters[layerIndex + 1 ][rof1 ] + nextClusterIndex};
596- new (tracklets[layerIndex] + trackletsLUT[layerIndex][currentSortedIndex] + storedTracklets) Tracklet{currentSortedIndex, nextSortedIndex, tanL, phi, rof0, rof1 };
634+ const int nextSortedIndex{ROFClusters[layerIndex + 1 ][targetROF ] + nextClusterIndex};
635+ new (tracklets[layerIndex] + trackletsLUT[layerIndex][currentSortedIndex] + storedTracklets) Tracklet{currentSortedIndex, nextSortedIndex, tanL, phi, pivotROF, targetROF };
597636 }
598637 ++storedTracklets;
599638 }
@@ -1018,6 +1057,7 @@ void countCellsHandler(
10181057 CellSeed* cells,
10191058 int ** cellsLUTsArrayDevice,
10201059 int * cellsLUTsHost,
1060+ const int deltaROF,
10211061 const float bz,
10221062 const float maxChi2ClusterAttachment,
10231063 const float cellDeltaTanLambdaSigma,
@@ -1035,6 +1075,7 @@ void countCellsHandler(
10351075 layer, // const int
10361076 cells, // CellSeed*
10371077 cellsLUTsArrayDevice, // int**
1078+ deltaROF, // const int
10381079 bz, // const float
10391080 maxChi2ClusterAttachment, // const float
10401081 cellDeltaTanLambdaSigma, // const float
@@ -1053,6 +1094,7 @@ void computeCellsHandler(
10531094 CellSeed* cells,
10541095 int ** cellsLUTsArrayDevice,
10551096 int * cellsLUTsHost,
1097+ const int deltaROF,
10561098 const float bz,
10571099 const float maxChi2ClusterAttachment,
10581100 const float cellDeltaTanLambdaSigma,
@@ -1070,6 +1112,7 @@ void computeCellsHandler(
10701112 layer, // const int
10711113 cells, // CellSeed*
10721114 cellsLUTsArrayDevice, // int**
1115+ deltaROF, // const int
10731116 bz, // const float
10741117 maxChi2ClusterAttachment, // const float
10751118 cellDeltaTanLambdaSigma, // const float
@@ -1081,6 +1124,8 @@ unsigned int countCellNeighboursHandler(CellSeed** cellsLayersDevice,
10811124 int ** cellsLUTs,
10821125 gpuPair<int , int >* cellNeighbours,
10831126 int * neighboursIndexTable,
1127+ const Tracklet** tracklets,
1128+ const int deltaROF,
10841129 const float maxChi2ClusterAttachment,
10851130 const float bz,
10861131 const int layerIndex,
@@ -1096,12 +1141,13 @@ unsigned int countCellNeighboursHandler(CellSeed** cellsLayersDevice,
10961141 neighboursIndexTable,
10971142 cellsLUTs,
10981143 cellNeighbours,
1144+ tracklets,
1145+ deltaROF,
10991146 maxChi2ClusterAttachment,
11001147 bz,
11011148 layerIndex,
11021149 nCells,
11031150 maxCellNeighbours);
1104-
11051151 gpu::cubInclusiveScanInPlace (neighboursLUT, nCellsNext);
11061152 gpu::cubExclusiveScanInPlace (neighboursIndexTable, nCells + 1 );
11071153 unsigned int nNeighbours;
@@ -1114,6 +1160,8 @@ void computeCellNeighboursHandler(CellSeed** cellsLayersDevice,
11141160 int ** cellsLUTs,
11151161 gpuPair<int , int >* cellNeighbours,
11161162 int * neighboursIndexTable,
1163+ const Tracklet** tracklets,
1164+ const int deltaROF,
11171165 const float maxChi2ClusterAttachment,
11181166 const float bz,
11191167 const int layerIndex,
@@ -1130,6 +1178,8 @@ void computeCellNeighboursHandler(CellSeed** cellsLayersDevice,
11301178 neighboursIndexTable,
11311179 cellsLUTs,
11321180 cellNeighbours,
1181+ tracklets,
1182+ deltaROF,
11331183 maxChi2ClusterAttachment,
11341184 bz,
11351185 layerIndex,
0 commit comments