Skip to content

Commit 6c326eb

Browse files
committed
ITS: GPU: added launch bounds for ITS kernels, not fully optimised for MI50
1 parent 17ae0d0 commit 6c326eb

File tree

2 files changed

+13
-11
lines changed

2 files changed

+13
-11
lines changed

Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -311,7 +311,7 @@ GPUdii() gpuSpan<const Cluster> getClustersOnLayer(const int rof,
311311
}
312312

313313
template <int nLayers>
314-
GPUg() void fitTrackSeedsKernel(
314+
GPUg() void __launch_bounds__(256, 1) fitTrackSeedsKernel(
315315
CellSeed<nLayers>* trackSeeds,
316316
const TrackingFrameInfo** foundTrackingFrameInfo,
317317
o2::its::TrackITSExt* tracks,
@@ -374,7 +374,7 @@ GPUg() void fitTrackSeedsKernel(
374374
}
375375

376376
template <bool initRun, int nLayers = 7>
377-
GPUg() void computeLayerCellNeighboursKernel(
377+
GPUg() void __launch_bounds__(256, 1) computeLayerCellNeighboursKernel(
378378
CellSeed<nLayers>** cellSeedArray,
379379
int* neighboursLUT,
380380
int* neighboursIndexTable,
@@ -438,7 +438,7 @@ GPUg() void computeLayerCellNeighboursKernel(
438438
}
439439

440440
template <bool initRun, int nLayers>
441-
GPUg() void computeLayerCellsKernel(
441+
GPUg() void __launch_bounds__(256, 1) computeLayerCellsKernel(
442442
const Cluster** sortedClusters,
443443
const Cluster** unsortedClusters,
444444
const TrackingFrameInfo** tfInfo,
@@ -525,7 +525,7 @@ GPUg() void computeLayerCellsKernel(
525525
}
526526

527527
template <bool initRun, int nLayers>
528-
GPUg() void computeLayerTrackletsMultiROFKernel(
528+
GPUg() void __launch_bounds__(256, 1) computeLayerTrackletsMultiROFKernel(
529529
const IndexTableUtils<nLayers>* utils,
530530
const uint8_t* multMask,
531531
const int layerIndex,
@@ -652,7 +652,8 @@ GPUg() void computeLayerTrackletsMultiROFKernel(
652652
}
653653
}
654654

655-
GPUg() void compileTrackletsLookupTableKernel(const Tracklet* tracklets,
655+
GPUg() void __launch_bounds__(256, 1) compileTrackletsLookupTableKernel(
656+
const Tracklet* tracklets,
656657
int* trackletsLookUpTable,
657658
const int nTracklets)
658659
{
@@ -662,7 +663,8 @@ GPUg() void compileTrackletsLookupTableKernel(const Tracklet* tracklets,
662663
}
663664

664665
template <bool dryRun, int nLayers = 7>
665-
GPUg() void processNeighboursKernel(const int layer,
666+
GPUg() void __launch_bounds__(256, 1) processNeighboursKernel(
667+
const int layer,
666668
const int level,
667669
CellSeed<nLayers>** allCellSeeds,
668670
CellSeed<nLayers>* currentCellSeeds,

Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackingConfigParam.h

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -121,19 +121,19 @@ struct ITSGpuTrackingParamConfig : public o2::conf::ConfigurableParamHelper<ITSG
121121
void maybeOverride() const;
122122

123123
/// Individual kernel launch parameter for each iteration
124-
int nBlocksLayerTracklets[MaxIter] = {30, 30, 30, 30};
124+
int nBlocksLayerTracklets[MaxIter] = {60, 60, 60, 60};
125125
int nThreadsLayerTracklets[MaxIter] = {256, 256, 256, 256};
126126

127-
int nBlocksLayerCells[MaxIter] = {30, 30, 30, 30};
127+
int nBlocksLayerCells[MaxIter] = {60, 60, 60, 60};
128128
int nThreadsLayerCells[MaxIter] = {256, 256, 256, 256};
129129

130-
int nBlocksFindNeighbours[MaxIter] = {30, 30, 30, 30};
130+
int nBlocksFindNeighbours[MaxIter] = {60, 60, 60, 60};
131131
int nThreadsFindNeighbours[MaxIter] = {256, 256, 256, 256};
132132

133-
int nBlocksProcessNeighbours[MaxIter] = {30, 30, 30, 30};
133+
int nBlocksProcessNeighbours[MaxIter] = {60, 60, 60, 60};
134134
int nThreadsProcessNeighbours[MaxIter] = {256, 256, 256, 256};
135135

136-
int nBlocksTracksSeeds[MaxIter] = {30, 30, 30, 30};
136+
int nBlocksTracksSeeds[MaxIter] = {60, 60, 60, 60};
137137
int nThreadsTracksSeeds[MaxIter] = {256, 256, 256, 256};
138138

139139
O2ParamDef(ITSGpuTrackingParamConfig, "ITSGpuTrackingParam");

0 commit comments

Comments
 (0)