@@ -311,7 +311,7 @@ GPUdii() gpuSpan<const Cluster> getClustersOnLayer(const int rof,
311311}
312312
313313template <int nLayers>
314- GPUg () void fitTrackSeedsKernel (
314+ GPUg () void __launch_bounds__ ( 256 , 1 ) fitTrackSeedsKernel(
315315 CellSeed<nLayers>* trackSeeds,
316316 const TrackingFrameInfo** foundTrackingFrameInfo,
317317 o2::its::TrackITSExt* tracks,
@@ -374,7 +374,7 @@ GPUg() void fitTrackSeedsKernel(
374374}
375375
376376template <bool initRun, int nLayers = 7 >
377- GPUg () void computeLayerCellNeighboursKernel (
377+ GPUg () void __launch_bounds__ ( 256 , 1 ) computeLayerCellNeighboursKernel(
378378 CellSeed<nLayers>** cellSeedArray,
379379 int * neighboursLUT,
380380 int * neighboursIndexTable,
@@ -438,7 +438,7 @@ GPUg() void computeLayerCellNeighboursKernel(
438438}
439439
440440template <bool initRun, int nLayers>
441- GPUg () void computeLayerCellsKernel (
441+ GPUg () void __launch_bounds__ ( 256 , 1 ) computeLayerCellsKernel(
442442 const Cluster** sortedClusters,
443443 const Cluster** unsortedClusters,
444444 const TrackingFrameInfo** tfInfo,
@@ -525,7 +525,7 @@ GPUg() void computeLayerCellsKernel(
525525}
526526
527527template <bool initRun, int nLayers>
528- GPUg () void computeLayerTrackletsMultiROFKernel (
528+ GPUg () void __launch_bounds__ ( 256 , 1 ) computeLayerTrackletsMultiROFKernel(
529529 const IndexTableUtils<nLayers>* utils,
530530 const uint8_t * multMask,
531531 const int layerIndex,
@@ -652,33 +652,35 @@ GPUg() void computeLayerTrackletsMultiROFKernel(
652652 }
653653}
654654
655- GPUg () void compileTrackletsLookupTableKernel (const Tracklet* tracklets,
656- int * trackletsLookUpTable,
657- const int nTracklets)
655+ GPUg () void __launch_bounds__ (256 , 1 ) compileTrackletsLookupTableKernel(
656+ const Tracklet* tracklets,
657+ int * trackletsLookUpTable,
658+ const int nTracklets)
658659{
659660 for (int currentTrackletIndex = blockIdx .x * blockDim .x + threadIdx .x ; currentTrackletIndex < nTracklets; currentTrackletIndex += blockDim .x * gridDim .x ) {
660661 atomicAdd (&trackletsLookUpTable[tracklets[currentTrackletIndex].firstClusterIndex ], 1 );
661662 }
662663}
663664
664665template <bool dryRun, int nLayers = 7 >
665- GPUg () void processNeighboursKernel (const int layer,
666- const int level,
667- CellSeed<nLayers>** allCellSeeds,
668- CellSeed<nLayers>* currentCellSeeds,
669- const int * currentCellIds,
670- const unsigned int nCurrentCells,
671- CellSeed<nLayers>* updatedCellSeeds,
672- int * updatedCellsIds,
673- int * foundSeedsTable, // auxiliary only in GPU code to compute the number of cells per iteration
674- const unsigned char ** usedClusters, // Used clusters
675- int * neighbours,
676- int * neighboursLUT,
677- const TrackingFrameInfo** foundTrackingFrameInfo,
678- const float bz,
679- const float maxChi2ClusterAttachment,
680- const o2::base::Propagator* propagator,
681- const o2::base::PropagatorF::MatCorrType matCorrType)
666+ GPUg () void __launch_bounds__ (256 , 1 ) processNeighboursKernel(
667+ const int layer,
668+ const int level,
669+ CellSeed<nLayers>** allCellSeeds,
670+ CellSeed<nLayers>* currentCellSeeds,
671+ const int * currentCellIds,
672+ const unsigned int nCurrentCells,
673+ CellSeed<nLayers>* updatedCellSeeds,
674+ int * updatedCellsIds,
675+ int * foundSeedsTable, // auxiliary only in GPU code to compute the number of cells per iteration
676+ const unsigned char ** usedClusters, // Used clusters
677+ int * neighbours,
678+ int * neighboursLUT,
679+ const TrackingFrameInfo** foundTrackingFrameInfo,
680+ const float bz,
681+ const float maxChi2ClusterAttachment,
682+ const o2::base::Propagator* propagator,
683+ const o2::base::PropagatorF::MatCorrType matCorrType)
682684{
683685 constexpr float layerxX0[7 ] = {5 .e -3f , 5 .e -3f , 5 .e -3f , 1 .e -2f , 1 .e -2f , 1 .e -2f , 1 .e -2f }; // Hardcoded here for the moment.
684686 for (unsigned int iCurrentCell = blockIdx .x * blockDim .x + threadIdx .x ; iCurrentCell < nCurrentCells; iCurrentCell += blockDim .x * gridDim .x ) {
0 commit comments