Skip to content

Commit 250001f

Browse files
committed
ITS: recover single threaded performance in findCellsNeighbours
Signed-off-by: Felix Schlepper <felix.schlepper@cern.ch>
1 parent 8327107 commit 250001f

File tree

1 file changed

+95
-91
lines changed

1 file changed

+95
-91
lines changed

Detectors/ITSMFT/ITS/tracking/src/TrackerTraits.cxx

Lines changed: 95 additions & 91 deletions
Original file line numberDiff line numberDiff line change
@@ -444,128 +444,132 @@ void TrackerTraits<nLayers>::findCellsNeighbours(const int iteration)
444444
#ifdef OPTIMISATION_OUTPUT
445445
std::ofstream off(std::format("cellneighs{}.txt", iteration));
446446
#endif
447-
for (int iLayer{0}; iLayer < mTrkParams[iteration].CellsPerRoad() - 1; ++iLayer) {
448-
const int nextLayerCellsNum{static_cast<int>(mTimeFrame->getCells()[iLayer + 1].size())};
449-
deepVectorClear(mTimeFrame->getCellsNeighbours()[iLayer]);
450-
deepVectorClear(mTimeFrame->getCellsNeighboursLUT()[iLayer]);
451-
if (mTimeFrame->getCells()[iLayer + 1].empty() ||
452-
mTimeFrame->getCellsLookupTable()[iLayer].empty()) {
453-
continue;
454-
}
455447

456-
mTaskArena->execute([&] {
457-
int layerCellsNum{static_cast<int>(mTimeFrame->getCells()[iLayer].size())};
448+
struct Neighbor {
449+
int cell{-1}, nextCell{-1}, level{-1};
450+
};
458451

459-
bounded_vector<int> perCellCount(layerCellsNum + 1, 0, mMemoryPool.get());
460-
tbb::parallel_for(
461-
tbb::blocked_range<int>(0, layerCellsNum),
462-
[&](const tbb::blocked_range<int>& Cells) {
463-
for (int iCell = Cells.begin(); iCell < Cells.end(); ++iCell) {
464-
const auto& currentCellSeed{mTimeFrame->getCells()[iLayer][iCell]};
465-
const int nextLayerTrackletIndex{currentCellSeed.getSecondTrackletIndex()};
466-
const int nextLayerFirstCellIndex{mTimeFrame->getCellsLookupTable()[iLayer][nextLayerTrackletIndex]};
467-
const int nextLayerLastCellIndex{mTimeFrame->getCellsLookupTable()[iLayer][nextLayerTrackletIndex + 1]};
468-
469-
int foundNextCells{0};
470-
for (int iNextCell{nextLayerFirstCellIndex}; iNextCell < nextLayerLastCellIndex; ++iNextCell) {
471-
auto nextCellSeed{mTimeFrame->getCells()[iLayer + 1][iNextCell]}; /// copy
472-
if (nextCellSeed.getFirstTrackletIndex() != nextLayerTrackletIndex) {
473-
break;
474-
}
452+
mTaskArena->execute([&] {
453+
for (int iLayer{0}; iLayer < mTrkParams[iteration].CellsPerRoad() - 1; ++iLayer) {
454+
deepVectorClear(mTimeFrame->getCellsNeighbours()[iLayer]);
455+
deepVectorClear(mTimeFrame->getCellsNeighboursLUT()[iLayer]);
456+
if (mTimeFrame->getCells()[iLayer + 1].empty() ||
457+
mTimeFrame->getCellsLookupTable()[iLayer].empty()) {
458+
continue;
459+
}
475460

476-
if (!nextCellSeed.rotate(currentCellSeed.getAlpha()) ||
477-
!nextCellSeed.propagateTo(currentCellSeed.getX(), getBz())) {
478-
continue;
479-
}
480-
float chi2 = currentCellSeed.getPredictedChi2(nextCellSeed); /// TODO: switch to the chi2 wrt cluster to avoid correlation
461+
int nCells{static_cast<int>(mTimeFrame->getCells()[iLayer].size())};
462+
bounded_vector<Neighbor> cellsNeighbours(mMemoryPool.get());
463+
464+
auto forCellNeighbour = [&](auto Tag, int iCell, int offset = 0) -> int {
465+
const auto& currentCellSeed{mTimeFrame->getCells()[iLayer][iCell]};
466+
const int nextLayerTrackletIndex{currentCellSeed.getSecondTrackletIndex()};
467+
const int nextLayerFirstCellIndex{mTimeFrame->getCellsLookupTable()[iLayer][nextLayerTrackletIndex]};
468+
const int nextLayerLastCellIndex{mTimeFrame->getCellsLookupTable()[iLayer][nextLayerTrackletIndex + 1]};
469+
int foundNextCells{0};
470+
for (int iNextCell{nextLayerFirstCellIndex}; iNextCell < nextLayerLastCellIndex; ++iNextCell) {
471+
auto nextCellSeed{mTimeFrame->getCells()[iLayer + 1][iNextCell]}; /// copy
472+
if (nextCellSeed.getFirstTrackletIndex() != nextLayerTrackletIndex) {
473+
break;
474+
}
475+
476+
if (!nextCellSeed.rotate(currentCellSeed.getAlpha()) ||
477+
!nextCellSeed.propagateTo(currentCellSeed.getX(), getBz())) {
478+
continue;
479+
}
480+
float chi2 = currentCellSeed.getPredictedChi2(nextCellSeed); /// TODO: switch to the chi2 wrt cluster to avoid correlation
481481

482482
#ifdef OPTIMISATION_OUTPUT
483-
bool good{mTimeFrame->getCellsLabel(iLayer)[iCell] == mTimeFrame->getCellsLabel(iLayer + 1)[iNextCell]};
484-
off << std::format("{}\t{:d}\t{}", iLayer, good, chi2) << std::endl;
483+
bool good{mTimeFrame->getCellsLabel(iLayer)[iCell] == mTimeFrame->getCellsLabel(iLayer + 1)[iNextCell]};
484+
off << std::format("{}\t{:d}\t{}", iLayer, good, chi2) << std::endl;
485485
#endif
486486

487-
if (chi2 > mTrkParams[0].MaxChi2ClusterAttachment) {
488-
continue;
489-
}
490-
++foundNextCells;
491-
}
492-
perCellCount[iCell] = foundNextCells;
487+
if (chi2 > mTrkParams[0].MaxChi2ClusterAttachment) {
488+
continue;
493489
}
494-
});
495-
496-
std::exclusive_scan(perCellCount.begin(), perCellCount.end(), perCellCount.begin(), 0);
497-
int totalCellNeighbours = perCellCount.back();
498-
if (totalCellNeighbours == 0) {
499-
deepVectorClear(mTimeFrame->getCellsNeighbours()[iLayer]);
500-
return;
501-
}
502490

503-
struct Neighbor {
504-
int cell{-1}, nextCell{-1}, level{-1};
491+
if constexpr (decltype(Tag)::value == PassMode::OnePass::value) {
492+
cellsNeighbours.emplace_back(iCell, iNextCell, currentCellSeed.getLevel() + 1);
493+
} else if constexpr (decltype(Tag)::value == PassMode::TwoPassCount::value) {
494+
++foundNextCells;
495+
} else if constexpr (decltype(Tag)::value == PassMode::TwoPassInsert::value) {
496+
cellsNeighbours[offset++] = {iCell, iNextCell, currentCellSeed.getLevel() + 1};
497+
} else {
498+
static_assert(false, "Unknown mode!");
499+
}
500+
}
501+
return foundNextCells;
505502
};
506-
bounded_vector<Neighbor> cellsNeighbours(mMemoryPool.get());
507-
cellsNeighbours.resize(totalCellNeighbours);
508503

509-
tbb::parallel_for(
510-
tbb::blocked_range<int>(0, layerCellsNum),
511-
[&](const tbb::blocked_range<int>& Cells) {
512-
for (int iCell = Cells.begin(); iCell < Cells.end(); ++iCell) {
513-
if (perCellCount[iCell] == perCellCount[iCell + 1]) {
514-
continue;
504+
if (mTaskArena->max_concurrency() <= 1) {
505+
for (int iCell{0}; iCell < nCells; ++iCell) {
506+
forCellNeighbour(PassMode::OnePass{}, iCell);
507+
}
508+
} else {
509+
bounded_vector<int> perCellCount(nCells + 1, 0, mMemoryPool.get());
510+
tbb::parallel_for(
511+
tbb::blocked_range<int>(0, nCells),
512+
[&](const tbb::blocked_range<int>& Cells) {
513+
for (int iCell = Cells.begin(); iCell < Cells.end(); ++iCell) {
514+
perCellCount[iCell] = forCellNeighbour(PassMode::TwoPassCount{}, iCell);
515515
}
516-
const auto& currentCellSeed{mTimeFrame->getCells()[iLayer][iCell]};
517-
const int nextLayerTrackletIndex{currentCellSeed.getSecondTrackletIndex()};
518-
const int nextLayerFirstCellIndex{mTimeFrame->getCellsLookupTable()[iLayer][nextLayerTrackletIndex]};
519-
const int nextLayerLastCellIndex{mTimeFrame->getCellsLookupTable()[iLayer][nextLayerTrackletIndex + 1]};
520-
521-
int position = perCellCount[iCell];
522-
for (int iNextCell{nextLayerFirstCellIndex}; iNextCell < nextLayerLastCellIndex; ++iNextCell) {
523-
auto nextCellSeed{mTimeFrame->getCells()[iLayer + 1][iNextCell]}; /// copy
524-
if (nextCellSeed.getFirstTrackletIndex() != nextLayerTrackletIndex) {
525-
break;
526-
}
516+
});
527517

528-
if (!nextCellSeed.rotate(currentCellSeed.getAlpha()) ||
529-
!nextCellSeed.propagateTo(currentCellSeed.getX(), getBz())) {
530-
continue;
531-
}
532-
533-
float chi2 = currentCellSeed.getPredictedChi2(nextCellSeed); /// TODO: switch to the chi2 wrt cluster to avoid correlation
534-
if (chi2 > mTrkParams[0].MaxChi2ClusterAttachment) {
518+
std::exclusive_scan(perCellCount.begin(), perCellCount.end(), perCellCount.begin(), 0);
519+
int totalCellNeighbours = perCellCount.back();
520+
if (totalCellNeighbours == 0) {
521+
deepVectorClear(mTimeFrame->getCellsNeighbours()[iLayer]);
522+
continue;
523+
}
524+
cellsNeighbours.resize(totalCellNeighbours);
525+
526+
tbb::parallel_for(
527+
tbb::blocked_range<int>(0, nCells),
528+
[&](const tbb::blocked_range<int>& Cells) {
529+
for (int iCell = Cells.begin(); iCell < Cells.end(); ++iCell) {
530+
int offset = perCellCount[iCell];
531+
if (offset == perCellCount[iCell + 1]) {
535532
continue;
536533
}
537-
538-
cellsNeighbours[position++] = {iCell, iNextCell, currentCellSeed.getLevel() + 1};
534+
forCellNeighbour(PassMode::TwoPassInsert{}, iCell, offset);
539535
}
540-
}
541-
});
536+
});
537+
}
538+
539+
if (cellsNeighbours.empty()) {
540+
continue;
541+
}
542542

543543
tbb::parallel_sort(cellsNeighbours.begin(), cellsNeighbours.end(), [](const auto& a, const auto& b) {
544544
return a.nextCell < b.nextCell;
545545
});
546546

547547
auto& cellsNeighbourLUT = mTimeFrame->getCellsNeighboursLUT()[iLayer];
548-
cellsNeighbourLUT.assign(nextLayerCellsNum, 0);
548+
cellsNeighbourLUT.assign(mTimeFrame->getCells()[iLayer + 1].size(), 0);
549549
for (const auto& neigh : cellsNeighbours) {
550550
++cellsNeighbourLUT[neigh.nextCell];
551551
}
552552
std::inclusive_scan(cellsNeighbourLUT.begin(), cellsNeighbourLUT.end(), cellsNeighbourLUT.begin());
553553

554-
mTimeFrame->getCellsNeighbours()[iLayer].reserve(totalCellNeighbours);
554+
mTimeFrame->getCellsNeighbours()[iLayer].reserve(cellsNeighbours.size());
555555
std::ranges::transform(cellsNeighbours, std::back_inserter(mTimeFrame->getCellsNeighbours()[iLayer]), [](const auto& neigh) { return neigh.cell; });
556556

557557
auto it = cellsNeighbours.begin();
558-
while (it != cellsNeighbours.end()) {
559-
const int current_nextCell = it->nextCell;
560-
auto group_end = std::find_if_not(it, cellsNeighbours.end(),
561-
[current_nextCell](const auto& nb) { return nb.nextCell == current_nextCell; });
562-
const auto max_level_it = std::max_element(it, group_end,
563-
[](const auto& a, const auto& b) { return a.level < b.level; });
564-
mTimeFrame->getCells()[iLayer + 1][current_nextCell].setLevel(max_level_it->level);
565-
it = group_end;
558+
int current = it->nextCell;
559+
int maxLvl = it->level;
560+
++it;
561+
for (; it != cellsNeighbours.end(); ++it) {
562+
if (it->nextCell == current) {
563+
maxLvl = std::max(maxLvl, it->level);
564+
} else {
565+
mTimeFrame->getCells()[iLayer + 1][current].setLevel(maxLvl);
566+
current = it->nextCell;
567+
maxLvl = it->level;
568+
}
566569
}
567-
});
568-
}
570+
mTimeFrame->getCells()[iLayer + 1][current].setLevel(maxLvl);
571+
}
572+
});
569573
}
570574

571575
template <int nLayers>

0 commit comments

Comments
 (0)