5656#include " utils/VcShim.h"
5757#include " utils/strtag.h"
5858#include < fstream>
59+ #include < numeric>
60+ #include < vector>
5961
6062using namespace o2 ::gpu;
6163using namespace o2 ::tpc;
@@ -748,14 +750,13 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
748750 ClusterNative* tmpNativeClusters = nullptr ;
749751 std::unique_ptr<ClusterNative[]> tmpNativeClusterBuffer;
750752
751- // setup MC Labels
752- bool propagateMCLabels = GetProcessingSettings ().runMC && processors ()->ioPtrs .tpcPackedDigits && processors ()->ioPtrs .tpcPackedDigits ->tpcDigitsMC ;
753+ const bool buildNativeGPU = doGPU && NeedTPCClustersOnGPU ();
754+ const bool buildNativeHost = (mRec ->GetRecoStepsOutputs () & GPUDataTypes::InOutType::TPCClusters) || GetProcessingSettings ().deterministicGPUReconstruction ; // TODO: Should do this also when clusters are needed for later steps on the host but not requested as output
755+ const bool propagateMCLabels = buildNativeHost && GetProcessingSettings ().runMC && processors ()->ioPtrs .tpcPackedDigits && processors ()->ioPtrs .tpcPackedDigits ->tpcDigitsMC ;
756+ const bool sortClusters = buildNativeHost && (GetProcessingSettings ().deterministicGPUReconstruction || GetProcessingSettings ().debugLevel >= 4 );
753757
754758 auto * digitsMC = propagateMCLabels ? processors ()->ioPtrs .tpcPackedDigits ->tpcDigitsMC : nullptr ;
755759
756- bool buildNativeGPU = doGPU && NeedTPCClustersOnGPU ();
757- bool buildNativeHost = (mRec ->GetRecoStepsOutputs () & GPUDataTypes::InOutType::TPCClusters) || GetProcessingSettings ().deterministicGPUReconstruction ; // TODO: Should do this also when clusters are needed for later steps on the host but not requested as output
758-
759760 mInputsHost ->mNClusterNative = mInputsShadow ->mNClusterNative = mRec ->MemoryScalers ()->nTPCHits * tpcHitLowOccupancyScalingFactor;
760761 if (buildNativeGPU) {
761762 AllocateRegisteredMemory (mInputsHost ->mResourceClusterNativeBuffer );
@@ -1261,21 +1262,20 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
12611262 }
12621263
12631264 ClusterNativeAccess::ConstMCLabelContainerView* mcLabelsConstView = nullptr ;
1264- if (propagateMCLabels) {
1265- // TODO: write to buffer directly
1265+ if (propagateMCLabels) { // TODO: write to buffer directly
12661266 o2::dataformats::MCTruthContainer<o2::MCCompLabel> mcLabels;
12671267 std::pair<ConstMCLabelContainer*, ConstMCLabelContainerView*> buffer;
1268- if (!GetProcessingSettings ().tpcWriteClustersAfterRejection && mSubOutputControls [GPUTrackingOutputs::getIndex (&GPUTrackingOutputs::clusterLabels)] && mSubOutputControls [GPUTrackingOutputs::getIndex (&GPUTrackingOutputs::clusterLabels)]->useExternal ()) {
1269- if (!mSubOutputControls [GPUTrackingOutputs::getIndex (&GPUTrackingOutputs::clusterLabels)]->allocator ) {
1268+ auto & labelOutputControl = mSubOutputControls [GPUTrackingOutputs::getIndex (&GPUTrackingOutputs::clusterLabels)];
1269+ if (!GetProcessingSettings ().tpcWriteClustersAfterRejection && !sortClusters && labelOutputControl && labelOutputControl->useExternal ()) {
1270+ if (!labelOutputControl->allocator ) {
12701271 throw std::runtime_error (" Cluster MC Label buffer missing" );
12711272 }
1272- ClusterNativeAccess::ConstMCLabelContainerViewWithBuffer* container = reinterpret_cast <ClusterNativeAccess::ConstMCLabelContainerViewWithBuffer*>(mSubOutputControls [ GPUTrackingOutputs::getIndex (&GPUTrackingOutputs::clusterLabels)] ->allocator (0 ));
1273+ ClusterNativeAccess::ConstMCLabelContainerViewWithBuffer* container = reinterpret_cast <ClusterNativeAccess::ConstMCLabelContainerViewWithBuffer*>(labelOutputControl ->allocator (0 ));
12731274 buffer = {&container->first , &container->second };
12741275 } else {
12751276 mIOMem .clusterNativeMCView = std::make_unique<ConstMCLabelContainerView>();
12761277 mIOMem .clusterNativeMCBuffer = std::make_unique<ConstMCLabelContainer>();
1277- buffer.first = mIOMem .clusterNativeMCBuffer .get ();
1278- buffer.second = mIOMem .clusterNativeMCView .get ();
1278+ buffer = {mIOMem .clusterNativeMCBuffer .get (), mIOMem .clusterNativeMCView .get ()};
12791279 }
12801280
12811281 assert (propagateMCLabels ? mcLinearLabels.header .size () == nClsTotal : true );
@@ -1330,15 +1330,8 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
13301330 if (doGPU && synchronizeCalibUpdate) {
13311331 SynchronizeStream (0 );
13321332 }
1333- if (buildNativeHost && (GetProcessingSettings ().deterministicGPUReconstruction || GetProcessingSettings ().debugLevel >= 4 )) {
1334- for (uint32_t i = 0 ; i < NSECTORS; i++) {
1335- for (uint32_t j = 0 ; j < GPUCA_ROW_COUNT; j++) {
1336- std::sort (&tmpNativeClusters[tmpNativeAccess->clusterOffset [i][j]], &tmpNativeClusters[tmpNativeAccess->clusterOffset [i][j] + tmpNativeAccess->nClusters [i][j]]);
1337- }
1338- }
1339- if (buildNativeGPU) {
1340- GPUMemCpy (RecoStep::TPCClusterFinding, (void *)mInputsShadow ->mPclusterNativeBuffer , (const void *)tmpNativeClusters, nClsTotal * sizeof (tmpNativeClusters[0 ]), -1 , true );
1341- }
1333+ if (sortClusters) {
1334+ SortClusters (buildNativeGPU, propagateMCLabels, tmpNativeAccess, tmpNativeClusters);
13421335 }
13431336 mRec ->MemoryScalers ()->nTPCHits = nClsTotal;
13441337 mRec ->PopNonPersistentMemory (RecoStep::TPCClusterFinding, qStr2Tag (" TPCCLUST" ));
@@ -1354,3 +1347,60 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
13541347#endif
13551348 return 0 ;
13561349}
1350+
1351+ void GPUChainTracking::SortClusters (bool buildNativeGPU, bool propagateMCLabels, ClusterNativeAccess* clusterAccess, ClusterNative* clusters)
1352+ {
1353+ if (propagateMCLabels) {
1354+ std::vector<uint32_t > clsOrder (clusterAccess->nClustersTotal );
1355+ std::iota (clsOrder.begin (), clsOrder.end (), 0 );
1356+ std::vector<ClusterNative> tmpClusters;
1357+ for (uint32_t i = 0 ; i < NSECTORS; i++) {
1358+ for (uint32_t j = 0 ; j < GPUCA_ROW_COUNT; j++) {
1359+ const uint32_t offset = clusterAccess->clusterOffset [i][j];
1360+ std::sort (&clsOrder[offset], &clsOrder[offset + clusterAccess->nClusters [i][j]], [&clusters](const uint32_t a, const uint32_t b) {
1361+ return clusters[a] < clusters[b];
1362+ });
1363+ tmpClusters.resize (clusterAccess->nClusters [i][j]);
1364+ memcpy (tmpClusters.data (), &clusters[offset], clusterAccess->nClusters [i][j] * sizeof (tmpClusters[0 ]));
1365+ for (uint32_t k = 0 ; k < tmpClusters.size (); k++) {
1366+ clusters[offset + k] = tmpClusters[clsOrder[offset + k] - offset];
1367+ }
1368+ }
1369+ }
1370+ tmpClusters.clear ();
1371+
1372+ std::pair<o2::dataformats::ConstMCLabelContainer*, o2::dataformats::ConstMCLabelContainerView*> labelBuffer;
1373+ GPUOutputControl* labelOutput = mSubOutputControls [GPUTrackingOutputs::getIndex (&GPUTrackingOutputs::clusterLabels)];
1374+ std::unique_ptr<ConstMCLabelContainerView> tmpUniqueContainerView;
1375+ std::unique_ptr<ConstMCLabelContainer> tmpUniqueContainerBuffer;
1376+ if (labelOutput && labelOutput->allocator ) {
1377+ ClusterNativeAccess::ConstMCLabelContainerViewWithBuffer* labelContainer = reinterpret_cast <ClusterNativeAccess::ConstMCLabelContainerViewWithBuffer*>(labelOutput->allocator (0 ));
1378+ labelBuffer = {&labelContainer->first , &labelContainer->second };
1379+ } else {
1380+ tmpUniqueContainerView = std::move (mIOMem .clusterNativeMCView );
1381+ tmpUniqueContainerBuffer = std::move (mIOMem .clusterNativeMCBuffer );
1382+ mIOMem .clusterNativeMCView = std::make_unique<ConstMCLabelContainerView>();
1383+ mIOMem .clusterNativeMCBuffer = std::make_unique<ConstMCLabelContainer>();
1384+ labelBuffer = {mIOMem .clusterNativeMCBuffer .get (), mIOMem .clusterNativeMCView .get ()};
1385+ }
1386+
1387+ o2::dataformats::MCLabelContainer tmpContainer;
1388+ for (uint32_t i = 0 ; i < clusterAccess->nClustersTotal ; i++) {
1389+ for (const auto & element : clusterAccess->clustersMCTruth ->getLabels (clsOrder[i])) {
1390+ tmpContainer.addElement (i, element);
1391+ }
1392+ }
1393+ tmpContainer.flatten_to (*labelBuffer.first );
1394+ *labelBuffer.second = *labelBuffer.first ;
1395+ clusterAccess->clustersMCTruth = labelBuffer.second ;
1396+ } else {
1397+ for (uint32_t i = 0 ; i < NSECTORS; i++) {
1398+ for (uint32_t j = 0 ; j < GPUCA_ROW_COUNT; j++) {
1399+ std::sort (&clusters[clusterAccess->clusterOffset [i][j]], &clusters[clusterAccess->clusterOffset [i][j] + clusterAccess->nClusters [i][j]]);
1400+ }
1401+ }
1402+ }
1403+ if (buildNativeGPU) {
1404+ GPUMemCpy (RecoStep::TPCClusterFinding, (void *)mInputsShadow ->mPclusterNativeBuffer , (const void *)clusters, clusterAccess->nClustersTotal * sizeof (clusters[0 ]), -1 , true );
1405+ }
1406+ }
0 commit comments