@@ -471,26 +471,20 @@ template <int nLayers>
471471void TrackerTraits<nLayers>::processNeighbours(int iLayer, int iLevel, const bounded_vector<CellSeed>& currentCellSeed, const bounded_vector<int >& currentCellId, bounded_vector<CellSeed>& updatedCellSeeds, bounded_vector<int >& updatedCellsIds)
472472{
473473 CA_DEBUGGER (std::cout << " Processing neighbours layer " << iLayer << " level " << iLevel << " , size of the cell seeds: " << currentCellSeed.size () << std::endl);
474- updatedCellSeeds.reserve (mTimeFrame ->getCellsNeighboursLUT ()[iLayer - 1 ].size ()); // / This is not the correct value, we could do a loop to count the number of neighbours
475- updatedCellsIds.reserve (updatedCellSeeds.size ());
476474 auto propagator = o2::base::Propagator::Instance ();
475+
477476#ifdef CA_DEBUG
478477 int failed[5 ]{0 , 0 , 0 , 0 , 0 }, attempts{0 }, failedByMismatch{0 };
479478#endif
480479
481480 mTaskArena .execute ([&] {
482- // TODO better to use concurrent vector?
483- tbb::combinable<std::pair<bounded_vector<int >, bounded_vector<CellSeed>>> locUpdatedData ([&] {
484- return std::make_pair (bounded_vector<int >(mMemoryPool .get ()), bounded_vector<CellSeed>(mMemoryPool .get ()));
485- });
486-
481+ bounded_vector<int > perCellCount (currentCellSeed.size () + 1 , 0 , mMemoryPool .get ());
487482 tbb::parallel_for (
488483 tbb::blocked_range<int >(0 , (int )currentCellSeed.size ()),
489484 [&](const tbb::blocked_range<int >& Cells) {
490- auto & [locUpdatedCellsIds, locUpdatedCellSeeds] = locUpdatedData.local ();
491-
492485 for (int iCell = Cells.begin (); iCell < Cells.end (); ++iCell) {
493486 const CellSeed& currentCell{currentCellSeed[iCell]};
487+ int foundSeeds{0 };
494488 if (currentCell.getLevel () != iLevel) {
495489 continue ;
496490 }
@@ -550,25 +544,79 @@ void TrackerTraits<nLayers>::processNeighbours(int iLayer, int iLevel, const bou
550544 CA_DEBUGGER (failed[4 ]++);
551545 continue ;
552546 }
547+ ++foundSeeds;
548+ }
549+ perCellCount[iCell] = foundSeeds;
550+ }
551+ });
552+
553+ std::exclusive_scan (perCellCount.begin (), perCellCount.end (), perCellCount.begin (), 0 );
554+ auto totalNeighbours{perCellCount.back ()};
555+ if (totalNeighbours == 0 ) {
556+ return ;
557+ }
558+ updatedCellSeeds.resize (totalNeighbours);
559+ updatedCellsIds.resize (totalNeighbours);
560+
561+ tbb::parallel_for (
562+ tbb::blocked_range<int >(0 , (int )currentCellSeed.size ()),
563+ [&](const tbb::blocked_range<int >& Cells) {
564+ for (int iCell = Cells.begin (); iCell < Cells.end (); ++iCell) {
565+ if (perCellCount[iCell] == perCellCount[iCell + 1 ]) {
566+ continue ;
567+ }
568+ // no need for further checks on cell level
569+
570+ const CellSeed& currentCell{currentCellSeed[iCell]};
571+ const int cellId = currentCellId.empty () ? iCell : currentCellId[iCell];
572+ const int startNeighbourId{cellId ? mTimeFrame ->getCellsNeighboursLUT ()[iLayer - 1 ][cellId - 1 ] : 0 };
573+ const int endNeighbourId{mTimeFrame ->getCellsNeighboursLUT ()[iLayer - 1 ][cellId]};
574+
575+ int offset = perCellCount[iCell];
576+ for (int iNeighbourCell{startNeighbourId}; iNeighbourCell < endNeighbourId; ++iNeighbourCell) {
577+ const int neighbourCellId = mTimeFrame ->getCellsNeighbours ()[iLayer - 1 ][iNeighbourCell];
578+ const CellSeed& neighbourCell = mTimeFrame ->getCells ()[iLayer - 1 ][neighbourCellId];
579+ if (neighbourCell.getSecondTrackletIndex () != currentCell.getFirstTrackletIndex () ||
580+ mTimeFrame ->isClusterUsed (iLayer - 1 , neighbourCell.getFirstClusterIndex ()) ||
581+ currentCell.getLevel () - 1 != neighbourCell.getLevel ()) {
582+ continue ;
583+ }
584+
585+ auto seed = currentCell;
586+
587+ const auto & trHit = mTimeFrame ->getTrackingFrameInfoOnLayer (iLayer - 1 ).at (neighbourCell.getFirstClusterIndex ());
588+ if (!seed.rotate (trHit.alphaTrackingFrame ) || !propagator->propagateToX (seed, trHit.xTrackingFrame , getBz (), o2::base::PropagatorImpl<float >::MAX_SIN_PHI, o2::base::PropagatorImpl<float >::MAX_STEP, mCorrType )) {
589+ continue ;
590+ }
591+
592+ if (mCorrType == o2::base::PropagatorF::MatCorrType::USEMatCorrNONE) {
593+ float radl = 9 .36f ; // Radiation length of Si [cm]
594+ float rho = 2 .33f ; // Density of Si [g/cm^3]
595+ if (!seed.correctForMaterial (mTrkParams [0 ].LayerxX0 [iLayer - 1 ], mTrkParams [0 ].LayerxX0 [iLayer - 1 ] * radl * rho, true )) {
596+ continue ;
597+ }
598+ }
599+
600+ auto predChi2{seed.getPredictedChi2Quiet (trHit.positionTrackingFrame , trHit.covarianceTrackingFrame )};
601+ if ((predChi2 > mTrkParams [0 ].MaxChi2ClusterAttachment ) || predChi2 < 0 .f ) {
602+ continue ;
603+ }
604+ seed.setChi2 (seed.getChi2 () + predChi2);
605+ if (!seed.o2 ::track::TrackParCov::update (trHit.positionTrackingFrame , trHit.covarianceTrackingFrame )) {
606+ continue ;
607+ }
608+
553609 seed.getClusters ()[iLayer - 1 ] = neighbourCell.getFirstClusterIndex ();
554610 seed.setLevel (neighbourCell.getLevel ());
555611 seed.setFirstTrackletIndex (neighbourCell.getFirstTrackletIndex ());
556612 seed.setSecondTrackletIndex (neighbourCell.getSecondTrackletIndex ());
557613
558- locUpdatedCellSeeds. push_back ( seed) ;
559- locUpdatedCellsIds. push_back ( neighbourCellId) ;
614+ updatedCellSeeds[offset] = seed;
615+ updatedCellsIds[offset++] = neighbourCellId;
560616 }
561617 }
562618 });
563-
564- locUpdatedData.combine_each ([&](const auto & localData) {
565- const auto & [ids, seeds] = localData;
566- updatedCellsIds.insert (updatedCellsIds.begin (), ids.begin (), ids.end ());
567- updatedCellSeeds.insert (updatedCellSeeds.begin (), seeds.begin (), seeds.end ());
568- });
569619 });
570- updatedCellSeeds.shrink_to_fit ();
571- updatedCellsIds.shrink_to_fit ();
572620
573621#ifdef CA_DEBUG
574622 std::cout << " \t\t - Found " << updatedCellSeeds.size () << " cell seeds out of " << attempts << " attempts" << std::endl;
0 commit comments