Skip to content

Commit e4a4f1a

Browse files
f3schdavidrohr
authored andcommitted
ITS: GPU: add skipping of parts where nothing was found
Signed-off-by: Felix Schlepper <felix.schlepper@cern.ch>
1 parent 5f4f95a commit e4a4f1a

File tree

2 files changed

+22
-5
lines changed

2 files changed

+22
-5
lines changed

Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -399,6 +399,11 @@ GPUg() void __launch_bounds__(256, 1) computeLayerCellNeighboursKernel(
399399
const int maxCellNeighbours = 1e2)
400400
{
401401
for (int iCurrentCellIndex = blockIdx.x * blockDim.x + threadIdx.x; iCurrentCellIndex < nCells; iCurrentCellIndex += blockDim.x * gridDim.x) {
402+
if constexpr (!initRun) {
403+
if (neighboursIndexTable[iCurrentCellIndex] == neighboursIndexTable[iCurrentCellIndex + 1]) {
404+
continue;
405+
}
406+
}
402407
const auto& currentCellSeed{cellSeedArray[layerIndex][iCurrentCellIndex]};
403408
const int nextLayerTrackletIndex{currentCellSeed.getSecondTrackletIndex()};
404409
const int nextLayerFirstCellIndex{cellsLUTs[layerIndex + 1][nextLayerTrackletIndex]};
@@ -464,8 +469,13 @@ GPUg() void __launch_bounds__(256, 1) computeLayerCellsKernel(
464469
const float cellDeltaTanLambdaSigma,
465470
const float nSigmaCut)
466471
{
467-
constexpr float layerxX0[7] = {5.e-3f, 5.e-3f, 5.e-3f, 1.e-2f, 1.e-2f, 1.e-2f, 1.e-2f}; // Hardcoded here for the moment.
472+
constexpr float layerxX0[7] = {5.e-3f, 5.e-3f, 5.e-3f, 1.e-2f, 1.e-2f, 1.e-2f, 1.e-2f}; // FIXME: Hardcoded here for the moment.
468473
for (int iCurrentTrackletIndex = blockIdx.x * blockDim.x + threadIdx.x; iCurrentTrackletIndex < nTrackletsCurrent; iCurrentTrackletIndex += blockDim.x * gridDim.x) {
474+
if constexpr (!initRun) {
475+
if (cellsLUTs[layer][iCurrentTrackletIndex] == cellsLUTs[layer][iCurrentTrackletIndex + 1]) {
476+
continue;
477+
}
478+
}
469479
const Tracklet& currentTracklet = tracklets[layer][iCurrentTrackletIndex];
470480
const int nextLayerClusterIndex{currentTracklet.secondClusterIndex};
471481
const int nextLayerFirstTrackletIndex{trackletsLUT[layer + 1][nextLayerClusterIndex]};
@@ -526,11 +536,11 @@ GPUg() void __launch_bounds__(256, 1) computeLayerCellsKernel(
526536
new (cells + cellsLUTs[layer][iCurrentTrackletIndex] + foundCells) CellSeed<nLayers>{layer, clusId[0], clusId[1], clusId[2], iCurrentTrackletIndex, iNextTrackletIndex, track, chi2};
527537
}
528538
++foundCells;
529-
if constexpr (initRun) {
530-
cellsLUTs[layer][iCurrentTrackletIndex] = foundCells;
531-
}
532539
}
533540
}
541+
if constexpr (initRun) {
542+
cellsLUTs[layer][iCurrentTrackletIndex] = foundCells;
543+
}
534544
}
535545
}
536546

@@ -692,8 +702,13 @@ GPUg() void __launch_bounds__(256, 1) processNeighboursKernel(
692702
const o2::base::Propagator* propagator,
693703
const o2::base::PropagatorF::MatCorrType matCorrType)
694704
{
695-
constexpr float layerxX0[7] = {5.e-3f, 5.e-3f, 5.e-3f, 1.e-2f, 1.e-2f, 1.e-2f, 1.e-2f}; // Hardcoded here for the moment.
705+
constexpr float layerxX0[7] = {5.e-3f, 5.e-3f, 5.e-3f, 1.e-2f, 1.e-2f, 1.e-2f, 1.e-2f}; // FIXME: Hardcoded here for the moment.
696706
for (unsigned int iCurrentCell = blockIdx.x * blockDim.x + threadIdx.x; iCurrentCell < nCurrentCells; iCurrentCell += blockDim.x * gridDim.x) {
707+
if constexpr (!dryRun) {
708+
if (foundSeedsTable[iCurrentCell] == foundSeedsTable[iCurrentCell + 1]) {
709+
continue;
710+
}
711+
}
697712
int foundSeeds{0};
698713
const auto& currentCell{currentCellSeeds[iCurrentCell]};
699714
if (currentCell.getLevel() != level) {

Detectors/ITSMFT/ITS/tracking/GPU/hip/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@ if(HIP_ENABLED)
1313
message(STATUS "Building ITS HIP tracker")
1414
set(CMAKE_HIP_FLAGS "${CMAKE_HIP_FLAGS} -fgpu-rdc")
1515
# set(CMAKE_HIP_FLAGS "${CMAKE_HIP_FLAGS} -O0 -g -ggdb -fno-inline -fno-omit-frame-pointer -D__HIP_ENABLE_DEVICE_ASSERT__")
16+
# add_compile_definitions(ITS_MEASURE_GPU_TIME)
17+
# add_compile_definitions(ITS_GPU_LOG)
1618
o2_add_hipified_library(ITStrackingHIP
1719
SOURCES ../cuda/ClusterLinesGPU.cu
1820
../cuda/TimeFrameGPU.cu

0 commit comments

Comments
 (0)