Skip to content

Commit ef91595

Browse files
committed
ITS: recover single threaded performance in findRoads
Signed-off-by: Felix Schlepper <felix.schlepper@cern.ch>
1 parent 250001f commit ef91595

File tree

1 file changed

+62
-54
lines changed

1 file changed

+62
-54
lines changed

Detectors/ITSMFT/ITS/tracking/src/TrackerTraits.cxx

Lines changed: 62 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -761,65 +761,73 @@ void TrackerTraits<nLayers>::findRoads(const int iteration)
761761

762762
bounded_vector<TrackITSExt> tracks(mMemoryPool.get());
763763
mTaskArena->execute([&] {
764-
bounded_vector<int> perSeedCount(trackSeeds.size() + 1, 0, mMemoryPool.get());
765-
tbb::parallel_for(
766-
tbb::blocked_range<int>(0, (int)trackSeeds.size()),
767-
[&](const tbb::blocked_range<int>& Seeds) {
768-
for (int iSeed = Seeds.begin(); iSeed < Seeds.end(); ++iSeed) {
769-
const CellSeed& seed{trackSeeds[iSeed]};
770-
TrackITSExt temporaryTrack{seed};
771-
temporaryTrack.resetCovariance();
772-
temporaryTrack.setChi2(0);
773-
for (int iL{0}; iL < 7; ++iL) {
774-
temporaryTrack.setExternalClusterIndex(iL, seed.getCluster(iL), seed.getCluster(iL) != constants::UnusedIndex);
775-
}
764+
auto forSeed = [&](auto Tag, int iSeed, int offset = 0) {
765+
const CellSeed& seed{trackSeeds[iSeed]};
766+
TrackITSExt temporaryTrack{seed};
767+
temporaryTrack.resetCovariance();
768+
temporaryTrack.setChi2(0);
769+
for (int iL{0}; iL < 7; ++iL) {
770+
temporaryTrack.setExternalClusterIndex(iL, seed.getCluster(iL), seed.getCluster(iL) != constants::UnusedIndex);
771+
}
776772

777-
bool fitSuccess = fitTrack(temporaryTrack, 0, mTrkParams[0].NLayers, 1, mTrkParams[0].MaxChi2ClusterAttachment, mTrkParams[0].MaxChi2NDF);
778-
if (!fitSuccess) {
779-
continue;
780-
}
781-
temporaryTrack.getParamOut() = temporaryTrack.getParamIn();
782-
temporaryTrack.resetCovariance();
783-
temporaryTrack.setChi2(0);
784-
fitSuccess = fitTrack(temporaryTrack, mTrkParams[0].NLayers - 1, -1, -1, mTrkParams[0].MaxChi2ClusterAttachment, mTrkParams[0].MaxChi2NDF, 50.f);
785-
if (!fitSuccess || temporaryTrack.getPt() < mTrkParams[iteration].MinPt[mTrkParams[iteration].NLayers - temporaryTrack.getNClusters()]) {
786-
continue;
787-
}
788-
++perSeedCount[iSeed];
789-
}
790-
});
791-
std::exclusive_scan(perSeedCount.begin(), perSeedCount.end(), perSeedCount.begin(), 0);
792-
auto totalTracks{perSeedCount.back()};
793-
if (totalTracks == 0) {
794-
return;
795-
}
796-
tracks.resize(totalTracks);
773+
bool fitSuccess = fitTrack(temporaryTrack, 0, mTrkParams[0].NLayers, 1, mTrkParams[0].MaxChi2ClusterAttachment, mTrkParams[0].MaxChi2NDF);
774+
if (!fitSuccess) {
775+
return 0;
776+
}
797777

798-
tbb::parallel_for(
799-
tbb::blocked_range<int>(0, (int)trackSeeds.size()),
800-
[&](const tbb::blocked_range<int>& Seeds) {
801-
for (int iSeed = Seeds.begin(); iSeed < Seeds.end(); ++iSeed) {
802-
if (perSeedCount[iSeed] == perSeedCount[iSeed + 1]) {
803-
continue;
804-
}
805-
const CellSeed& seed{trackSeeds[iSeed]};
806-
auto& trk = tracks[perSeedCount[iSeed]] = TrackITSExt(seed);
807-
trk.resetCovariance();
808-
trk.setChi2(0);
809-
for (int iL{0}; iL < 7; ++iL) {
810-
trk.setExternalClusterIndex(iL, seed.getCluster(iL), seed.getCluster(iL) != constants::UnusedIndex);
778+
temporaryTrack.getParamOut() = temporaryTrack.getParamIn();
779+
temporaryTrack.resetCovariance();
780+
temporaryTrack.setChi2(0);
781+
fitSuccess = fitTrack(temporaryTrack, mTrkParams[0].NLayers - 1, -1, -1, mTrkParams[0].MaxChi2ClusterAttachment, mTrkParams[0].MaxChi2NDF, 50.f);
782+
if (!fitSuccess || temporaryTrack.getPt() < mTrkParams[iteration].MinPt[mTrkParams[iteration].NLayers - temporaryTrack.getNClusters()]) {
783+
return 0;
784+
}
785+
786+
if constexpr (decltype(Tag)::value == PassMode::OnePass::value) {
787+
tracks.push_back(temporaryTrack);
788+
} else if constexpr (decltype(Tag)::value == PassMode::TwoPassCount::value) {
789+
// nothing to do
790+
} else if constexpr (decltype(Tag)::value == PassMode::TwoPassInsert::value) {
791+
tracks[offset] = temporaryTrack;
792+
} else {
793+
static_assert(false, "Unknown mode!");
794+
}
795+
return 1;
796+
};
797+
798+
const int nSeeds = static_cast<int>(trackSeeds.size());
799+
if (mTaskArena->max_concurrency() <= 1) {
800+
for (int iSeed{0}; iSeed < nSeeds; ++iSeed) {
801+
forSeed(PassMode::OnePass{}, iSeed);
802+
}
803+
} else {
804+
bounded_vector<int> perSeedCount(nSeeds + 1, 0, mMemoryPool.get());
805+
tbb::parallel_for(
806+
tbb::blocked_range<int>(0, nSeeds),
807+
[&](const tbb::blocked_range<int>& Seeds) {
808+
for (int iSeed = Seeds.begin(); iSeed < Seeds.end(); ++iSeed) {
809+
perSeedCount[iSeed] = forSeed(PassMode::TwoPassCount{}, iSeed);
811810
}
811+
});
812812

813-
bool fitSuccess = fitTrack(trk, 0, mTrkParams[0].NLayers, 1, mTrkParams[0].MaxChi2ClusterAttachment, mTrkParams[0].MaxChi2NDF);
814-
if (!fitSuccess) {
815-
continue;
813+
std::exclusive_scan(perSeedCount.begin(), perSeedCount.end(), perSeedCount.begin(), 0);
814+
auto totalTracks{perSeedCount.back()};
815+
if (totalTracks == 0) {
816+
return;
817+
}
818+
tracks.resize(totalTracks);
819+
820+
tbb::parallel_for(
821+
tbb::blocked_range<int>(0, nSeeds),
822+
[&](const tbb::blocked_range<int>& Seeds) {
823+
for (int iSeed = Seeds.begin(); iSeed < Seeds.end(); ++iSeed) {
824+
if (perSeedCount[iSeed] == perSeedCount[iSeed + 1]) {
825+
continue;
826+
}
827+
forSeed(PassMode::TwoPassInsert{}, iSeed, perSeedCount[iSeed]);
816828
}
817-
trk.getParamOut() = trk.getParamIn();
818-
trk.resetCovariance();
819-
trk.setChi2(0);
820-
fitTrack(trk, mTrkParams[0].NLayers - 1, -1, -1, mTrkParams[0].MaxChi2ClusterAttachment, mTrkParams[0].MaxChi2NDF, 50.f);
821-
}
822-
});
829+
});
830+
}
823831

824832
deepVectorClear(trackSeeds);
825833
tbb::parallel_sort(tracks.begin(), tracks.end(), [](const auto& a, const auto& b) {

0 commit comments

Comments
 (0)