Skip to content

Commit fe3894b

Browse files
committed
ITS: recover single threaded performance in processNeighbours
Signed-off-by: Felix Schlepper <felix.schlepper@cern.ch>
1 parent b942bba commit fe3894b

File tree

1 file changed

+114
-119
lines changed

1 file changed

+114
-119
lines changed

Detectors/ITSMFT/ITS/tracking/src/TrackerTraits.cxx

Lines changed: 114 additions & 119 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,11 @@
1414
///
1515

1616
#include <algorithm>
17+
#include <cwctype>
1718
#include <iostream>
1819
#include <iterator>
1920
#include <ranges>
21+
#include <type_traits>
2022

2123
#ifdef OPTIMISATION_OUTPUT
2224
#include <format>
@@ -43,6 +45,12 @@ namespace o2::its
4345

4446
static constexpr int debugLevel{0};
4547

48+
struct PassMode {
49+
using OnePass = std::integral_constant<int, 0>;
50+
using TwoPassCount = std::integral_constant<int, 1>;
51+
using TwoPassInsert = std::integral_constant<int, 2>;
52+
};
53+
4654
template <int nLayers>
4755
void TrackerTraits<nLayers>::computeLayerTracklets(const int iteration, int iROFslice, int iVertex)
4856
{
@@ -622,140 +630,127 @@ void TrackerTraits<nLayers>::processNeighbours(int iLayer, int iLevel, const bou
622630
#endif
623631

624632
mTaskArena->execute([&] {
625-
bounded_vector<int> perCellCount(currentCellSeed.size() + 1, 0, mMemoryPool.get());
626-
tbb::parallel_for(
627-
tbb::blocked_range<int>(0, (int)currentCellSeed.size()),
628-
[&](const tbb::blocked_range<int>& Cells) {
629-
for (int iCell = Cells.begin(); iCell < Cells.end(); ++iCell) {
630-
const CellSeed& currentCell{currentCellSeed[iCell]};
631-
int foundSeeds{0};
632-
if (currentCell.getLevel() != iLevel) {
633-
continue;
634-
}
635-
if (currentCellId.empty() && (mTimeFrame->isClusterUsed(iLayer, currentCell.getFirstClusterIndex()) ||
636-
mTimeFrame->isClusterUsed(iLayer + 1, currentCell.getSecondClusterIndex()) ||
637-
mTimeFrame->isClusterUsed(iLayer + 2, currentCell.getThirdClusterIndex()))) {
638-
continue; /// this we do only on the first iteration, hence the check on currentCellId
639-
}
640-
const int cellId = currentCellId.empty() ? iCell : currentCellId[iCell];
641-
const int startNeighbourId{cellId ? mTimeFrame->getCellsNeighboursLUT()[iLayer - 1][cellId - 1] : 0};
642-
const int endNeighbourId{mTimeFrame->getCellsNeighboursLUT()[iLayer - 1][cellId]};
643-
644-
for (int iNeighbourCell{startNeighbourId}; iNeighbourCell < endNeighbourId; ++iNeighbourCell) {
645-
CA_DEBUGGER(attempts++);
646-
const int neighbourCellId = mTimeFrame->getCellsNeighbours()[iLayer - 1][iNeighbourCell];
647-
const CellSeed& neighbourCell = mTimeFrame->getCells()[iLayer - 1][neighbourCellId];
648-
if (neighbourCell.getSecondTrackletIndex() != currentCell.getFirstTrackletIndex()) {
649-
CA_DEBUGGER(failedByMismatch++);
650-
continue;
651-
}
652-
if (mTimeFrame->isClusterUsed(iLayer - 1, neighbourCell.getFirstClusterIndex())) {
653-
continue;
654-
}
655-
if (currentCell.getLevel() - 1 != neighbourCell.getLevel()) {
656-
CA_DEBUGGER(failed[0]++);
657-
continue;
658-
}
659-
/// Let's start the fitting procedure
660-
CellSeed seed{currentCell};
661-
auto& trHit = mTimeFrame->getTrackingFrameInfoOnLayer(iLayer - 1)[neighbourCell.getFirstClusterIndex()];
633+
auto forCellNeighbours = [&](auto Tag, int iCell, int offset = 0) -> int {
634+
const CellSeed& currentCell{currentCellSeed[iCell]};
662635

663-
if (!seed.rotate(trHit.alphaTrackingFrame)) {
664-
CA_DEBUGGER(failed[1]++);
665-
continue;
666-
}
667-
668-
if (!propagator->propagateToX(seed, trHit.xTrackingFrame, getBz(), o2::base::PropagatorImpl<float>::MAX_SIN_PHI, o2::base::PropagatorImpl<float>::MAX_STEP, mCorrType)) {
669-
CA_DEBUGGER(failed[2]++);
670-
continue;
671-
}
636+
if constexpr (decltype(Tag)::value != PassMode::TwoPassInsert::value) {
637+
if (currentCell.getLevel() != iLevel) {
638+
return 0;
639+
}
640+
if (currentCellId.empty() && (mTimeFrame->isClusterUsed(iLayer, currentCell.getFirstClusterIndex()) ||
641+
mTimeFrame->isClusterUsed(iLayer + 1, currentCell.getSecondClusterIndex()) ||
642+
mTimeFrame->isClusterUsed(iLayer + 2, currentCell.getThirdClusterIndex()))) {
643+
return 0; /// this we do only on the first iteration, hence the check on currentCellId
644+
}
645+
}
672646

673-
if (mCorrType == o2::base::PropagatorF::MatCorrType::USEMatCorrNONE) {
674-
if (!seed.correctForMaterial(mTrkParams[0].LayerxX0[iLayer - 1], mTrkParams[0].LayerxX0[iLayer - 1] * constants::Radl * constants::Rho, true)) {
675-
continue;
676-
}
677-
}
647+
const int cellId = currentCellId.empty() ? iCell : currentCellId[iCell];
648+
const int startNeighbourId{cellId ? mTimeFrame->getCellsNeighboursLUT()[iLayer - 1][cellId - 1] : 0};
649+
const int endNeighbourId{mTimeFrame->getCellsNeighboursLUT()[iLayer - 1][cellId]};
650+
int foundSeeds{0};
651+
for (int iNeighbourCell{startNeighbourId}; iNeighbourCell < endNeighbourId; ++iNeighbourCell) {
652+
CA_DEBUGGER(attempts++);
653+
const int neighbourCellId = mTimeFrame->getCellsNeighbours()[iLayer - 1][iNeighbourCell];
654+
const CellSeed& neighbourCell = mTimeFrame->getCells()[iLayer - 1][neighbourCellId];
655+
if (neighbourCell.getSecondTrackletIndex() != currentCell.getFirstTrackletIndex()) {
656+
CA_DEBUGGER(failedByMismatch++);
657+
continue;
658+
}
659+
if (mTimeFrame->isClusterUsed(iLayer - 1, neighbourCell.getFirstClusterIndex())) {
660+
continue;
661+
}
662+
if (currentCell.getLevel() - 1 != neighbourCell.getLevel()) {
663+
CA_DEBUGGER(failed[0]++);
664+
continue;
665+
}
666+
/// Let's start the fitting procedure
667+
CellSeed seed{currentCell};
668+
const auto& trHit = mTimeFrame->getTrackingFrameInfoOnLayer(iLayer - 1)[neighbourCell.getFirstClusterIndex()];
678669

679-
auto predChi2{seed.getPredictedChi2Quiet(trHit.positionTrackingFrame, trHit.covarianceTrackingFrame)};
680-
if ((predChi2 > mTrkParams[0].MaxChi2ClusterAttachment) || predChi2 < 0.f) {
681-
CA_DEBUGGER(failed[3]++);
682-
continue;
683-
}
684-
seed.setChi2(seed.getChi2() + predChi2);
685-
if (!seed.o2::track::TrackParCov::update(trHit.positionTrackingFrame, trHit.covarianceTrackingFrame)) {
686-
CA_DEBUGGER(failed[4]++);
687-
continue;
688-
}
689-
++foundSeeds;
690-
}
691-
perCellCount[iCell] = foundSeeds;
670+
if (!seed.rotate(trHit.alphaTrackingFrame)) {
671+
CA_DEBUGGER(failed[1]++);
672+
continue;
692673
}
693-
});
694674

695-
std::exclusive_scan(perCellCount.begin(), perCellCount.end(), perCellCount.begin(), 0);
696-
auto totalNeighbours{perCellCount.back()};
697-
if (totalNeighbours == 0) {
698-
return;
699-
}
700-
updatedCellSeeds.resize(totalNeighbours);
701-
updatedCellsIds.resize(totalNeighbours);
675+
if (!propagator->propagateToX(seed, trHit.xTrackingFrame, getBz(), o2::base::PropagatorImpl<float>::MAX_SIN_PHI, o2::base::PropagatorImpl<float>::MAX_STEP, mCorrType)) {
676+
CA_DEBUGGER(failed[2]++);
677+
continue;
678+
}
702679

703-
tbb::parallel_for(
704-
tbb::blocked_range<int>(0, (int)currentCellSeed.size()),
705-
[&](const tbb::blocked_range<int>& Cells) {
706-
for (int iCell = Cells.begin(); iCell < Cells.end(); ++iCell) {
707-
if (perCellCount[iCell] == perCellCount[iCell + 1]) {
680+
if (mCorrType == o2::base::PropagatorF::MatCorrType::USEMatCorrNONE) {
681+
if (!seed.correctForMaterial(mTrkParams[0].LayerxX0[iLayer - 1], mTrkParams[0].LayerxX0[iLayer - 1] * constants::Radl * constants::Rho, true)) {
708682
continue;
709683
}
710-
// no need for further checks on cell level
711-
712-
const CellSeed& currentCell{currentCellSeed[iCell]};
713-
const int cellId = currentCellId.empty() ? iCell : currentCellId[iCell];
714-
const int startNeighbourId{cellId ? mTimeFrame->getCellsNeighboursLUT()[iLayer - 1][cellId - 1] : 0};
715-
const int endNeighbourId{mTimeFrame->getCellsNeighboursLUT()[iLayer - 1][cellId]};
716-
717-
int offset = perCellCount[iCell];
718-
for (int iNeighbourCell{startNeighbourId}; iNeighbourCell < endNeighbourId; ++iNeighbourCell) {
719-
const int neighbourCellId = mTimeFrame->getCellsNeighbours()[iLayer - 1][iNeighbourCell];
720-
const CellSeed& neighbourCell = mTimeFrame->getCells()[iLayer - 1][neighbourCellId];
721-
if (neighbourCell.getSecondTrackletIndex() != currentCell.getFirstTrackletIndex() ||
722-
mTimeFrame->isClusterUsed(iLayer - 1, neighbourCell.getFirstClusterIndex()) ||
723-
currentCell.getLevel() - 1 != neighbourCell.getLevel()) {
724-
continue;
725-
}
684+
}
726685

727-
auto seed = currentCell;
686+
auto predChi2{seed.getPredictedChi2Quiet(trHit.positionTrackingFrame, trHit.covarianceTrackingFrame)};
687+
if ((predChi2 > mTrkParams[0].MaxChi2ClusterAttachment) || predChi2 < 0.f) {
688+
CA_DEBUGGER(failed[3]++);
689+
continue;
690+
}
691+
seed.setChi2(seed.getChi2() + predChi2);
692+
if (!seed.o2::track::TrackParCov::update(trHit.positionTrackingFrame, trHit.covarianceTrackingFrame)) {
693+
CA_DEBUGGER(failed[4]++);
694+
continue;
695+
}
728696

729-
const auto& trHit = mTimeFrame->getTrackingFrameInfoOnLayer(iLayer - 1)[neighbourCell.getFirstClusterIndex()];
730-
if (!seed.rotate(trHit.alphaTrackingFrame) || !propagator->propagateToX(seed, trHit.xTrackingFrame, getBz(), o2::base::PropagatorImpl<float>::MAX_SIN_PHI, o2::base::PropagatorImpl<float>::MAX_STEP, mCorrType)) {
731-
continue;
732-
}
697+
if constexpr (decltype(Tag)::value != PassMode::TwoPassCount::value) {
698+
seed.getClusters()[iLayer - 1] = neighbourCell.getFirstClusterIndex();
699+
seed.setLevel(neighbourCell.getLevel());
700+
seed.setFirstTrackletIndex(neighbourCell.getFirstTrackletIndex());
701+
seed.setSecondTrackletIndex(neighbourCell.getSecondTrackletIndex());
702+
}
733703

734-
if (mCorrType == o2::base::PropagatorF::MatCorrType::USEMatCorrNONE) {
735-
if (!seed.correctForMaterial(mTrkParams[0].LayerxX0[iLayer - 1], mTrkParams[0].LayerxX0[iLayer - 1] * constants::Radl * constants::Rho, true)) {
736-
continue;
737-
}
738-
}
704+
if constexpr (decltype(Tag)::value == PassMode::OnePass::value) {
705+
updatedCellSeeds.push_back(seed);
706+
updatedCellsIds.push_back(neighbourCellId);
707+
} else if constexpr (decltype(Tag)::value == PassMode::TwoPassCount::value) {
708+
++foundSeeds;
709+
} else if constexpr (decltype(Tag)::value == PassMode::TwoPassInsert::value) {
710+
updatedCellSeeds[offset] = seed;
711+
updatedCellsIds[offset++] = neighbourCellId;
712+
} else {
713+
static_assert(false, "Unknown mode!");
714+
}
715+
}
716+
return foundSeeds;
717+
};
739718

740-
auto predChi2{seed.getPredictedChi2Quiet(trHit.positionTrackingFrame, trHit.covarianceTrackingFrame)};
741-
if ((predChi2 > mTrkParams[0].MaxChi2ClusterAttachment) || predChi2 < 0.f) {
742-
continue;
743-
}
744-
seed.setChi2(seed.getChi2() + predChi2);
745-
if (!seed.o2::track::TrackParCov::update(trHit.positionTrackingFrame, trHit.covarianceTrackingFrame)) {
746-
continue;
747-
}
719+
const int nCells = static_cast<int>(currentCellSeed.size());
720+
if (mTaskArena->max_concurrency() <= 1) {
721+
for (int iCell{0}; iCell < nCells; ++iCell) {
722+
forCellNeighbours(PassMode::OnePass{}, iCell);
723+
}
724+
} else {
725+
bounded_vector<int> perCellCount(nCells + 1, 0, mMemoryPool.get());
726+
tbb::parallel_for(
727+
tbb::blocked_range<int>(0, nCells),
728+
[&](const tbb::blocked_range<int>& Cells) {
729+
for (int iCell = Cells.begin(); iCell < Cells.end(); ++iCell) {
730+
perCellCount[iCell] = forCellNeighbours(PassMode::TwoPassCount{}, iCell);
731+
}
732+
});
748733

749-
seed.getClusters()[iLayer - 1] = neighbourCell.getFirstClusterIndex();
750-
seed.setLevel(neighbourCell.getLevel());
751-
seed.setFirstTrackletIndex(neighbourCell.getFirstTrackletIndex());
752-
seed.setSecondTrackletIndex(neighbourCell.getSecondTrackletIndex());
734+
std::exclusive_scan(perCellCount.begin(), perCellCount.end(), perCellCount.begin(), 0);
735+
auto totalNeighbours{perCellCount.back()};
736+
if (totalNeighbours == 0) {
737+
return;
738+
}
739+
updatedCellSeeds.resize(totalNeighbours);
740+
updatedCellsIds.resize(totalNeighbours);
753741

754-
updatedCellSeeds[offset] = seed;
755-
updatedCellsIds[offset++] = neighbourCellId;
742+
tbb::parallel_for(
743+
tbb::blocked_range<int>(0, nCells),
744+
[&](const tbb::blocked_range<int>& Cells) {
745+
for (int iCell = Cells.begin(); iCell < Cells.end(); ++iCell) {
746+
int offset = perCellCount[iCell];
747+
if (offset == perCellCount[iCell + 1]) {
748+
continue;
749+
}
750+
forCellNeighbours(PassMode::TwoPassInsert{}, iCell, offset);
756751
}
757-
}
758-
});
752+
});
753+
}
759754
});
760755

761756
#ifdef CA_DEBUG

0 commit comments

Comments
 (0)