Skip to content

Commit 633d014

Browse files
committed
GPU TPC: Fix deterministic mode in combination of propagation of MC labels
1 parent 53cd7b0 commit 633d014

File tree

2 files changed

+72
-21
lines changed

2 files changed

+72
-21
lines changed

GPU/GPUTracking/Global/GPUChainTracking.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -306,6 +306,7 @@ class GPUChainTracking : public GPUChain
306306
void RunTPCClusterFilter(o2::tpc::ClusterNativeAccess* clusters, std::function<o2::tpc::ClusterNative*(size_t)> allocator, bool applyClusterCuts);
307307
bool NeedTPCClustersOnGPU();
308308
void WriteReducedClusters();
309+
void SortClusters(bool buildNativeGPU, bool propagateMCLabels, o2::tpc::ClusterNativeAccess* clusterAccess, o2::tpc::ClusterNative* clusters);
309310
template <int32_t I>
310311
int32_t RunTRDTrackingInternal();
311312
uint32_t StreamForSector(uint32_t sector) const;

GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx

Lines changed: 71 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,8 @@
5656
#include "utils/VcShim.h"
5757
#include "utils/strtag.h"
5858
#include <fstream>
59+
#include <numeric>
60+
#include <vector>
5961

6062
using namespace o2::gpu;
6163
using namespace o2::tpc;
@@ -748,14 +750,13 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
748750
ClusterNative* tmpNativeClusters = nullptr;
749751
std::unique_ptr<ClusterNative[]> tmpNativeClusterBuffer;
750752

751-
// setup MC Labels
752-
bool propagateMCLabels = GetProcessingSettings().runMC && processors()->ioPtrs.tpcPackedDigits && processors()->ioPtrs.tpcPackedDigits->tpcDigitsMC;
753+
const bool buildNativeGPU = doGPU && NeedTPCClustersOnGPU();
754+
const bool buildNativeHost = (mRec->GetRecoStepsOutputs() & GPUDataTypes::InOutType::TPCClusters) || GetProcessingSettings().deterministicGPUReconstruction; // TODO: Should do this also when clusters are needed for later steps on the host but not requested as output
755+
const bool propagateMCLabels = buildNativeHost && GetProcessingSettings().runMC && processors()->ioPtrs.tpcPackedDigits && processors()->ioPtrs.tpcPackedDigits->tpcDigitsMC;
756+
const bool sortClusters = buildNativeHost && (GetProcessingSettings().deterministicGPUReconstruction || GetProcessingSettings().debugLevel >= 4);
753757

754758
auto* digitsMC = propagateMCLabels ? processors()->ioPtrs.tpcPackedDigits->tpcDigitsMC : nullptr;
755759

756-
bool buildNativeGPU = doGPU && NeedTPCClustersOnGPU();
757-
bool buildNativeHost = (mRec->GetRecoStepsOutputs() & GPUDataTypes::InOutType::TPCClusters) || GetProcessingSettings().deterministicGPUReconstruction; // TODO: Should do this also when clusters are needed for later steps on the host but not requested as output
758-
759760
mInputsHost->mNClusterNative = mInputsShadow->mNClusterNative = mRec->MemoryScalers()->nTPCHits * tpcHitLowOccupancyScalingFactor;
760761
if (buildNativeGPU) {
761762
AllocateRegisteredMemory(mInputsHost->mResourceClusterNativeBuffer);
@@ -1261,21 +1262,20 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
12611262
}
12621263

12631264
ClusterNativeAccess::ConstMCLabelContainerView* mcLabelsConstView = nullptr;
1264-
if (propagateMCLabels) {
1265-
// TODO: write to buffer directly
1265+
if (propagateMCLabels) { // TODO: write to buffer directly
12661266
o2::dataformats::MCTruthContainer<o2::MCCompLabel> mcLabels;
12671267
std::pair<ConstMCLabelContainer*, ConstMCLabelContainerView*> buffer;
1268-
if (!GetProcessingSettings().tpcWriteClustersAfterRejection && mSubOutputControls[GPUTrackingOutputs::getIndex(&GPUTrackingOutputs::clusterLabels)] && mSubOutputControls[GPUTrackingOutputs::getIndex(&GPUTrackingOutputs::clusterLabels)]->useExternal()) {
1269-
if (!mSubOutputControls[GPUTrackingOutputs::getIndex(&GPUTrackingOutputs::clusterLabels)]->allocator) {
1268+
auto& labelOutputControl = mSubOutputControls[GPUTrackingOutputs::getIndex(&GPUTrackingOutputs::clusterLabels)];
1269+
if (!GetProcessingSettings().tpcWriteClustersAfterRejection && !sortClusters && labelOutputControl && labelOutputControl->useExternal()) {
1270+
if (!labelOutputControl->allocator) {
12701271
throw std::runtime_error("Cluster MC Label buffer missing");
12711272
}
1272-
ClusterNativeAccess::ConstMCLabelContainerViewWithBuffer* container = reinterpret_cast<ClusterNativeAccess::ConstMCLabelContainerViewWithBuffer*>(mSubOutputControls[GPUTrackingOutputs::getIndex(&GPUTrackingOutputs::clusterLabels)]->allocator(0));
1273+
ClusterNativeAccess::ConstMCLabelContainerViewWithBuffer* container = reinterpret_cast<ClusterNativeAccess::ConstMCLabelContainerViewWithBuffer*>(labelOutputControl->allocator(0));
12731274
buffer = {&container->first, &container->second};
12741275
} else {
12751276
mIOMem.clusterNativeMCView = std::make_unique<ConstMCLabelContainerView>();
12761277
mIOMem.clusterNativeMCBuffer = std::make_unique<ConstMCLabelContainer>();
1277-
buffer.first = mIOMem.clusterNativeMCBuffer.get();
1278-
buffer.second = mIOMem.clusterNativeMCView.get();
1278+
buffer = {mIOMem.clusterNativeMCBuffer.get(), mIOMem.clusterNativeMCView.get()};
12791279
}
12801280

12811281
assert(propagateMCLabels ? mcLinearLabels.header.size() == nClsTotal : true);
@@ -1330,15 +1330,8 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
13301330
if (doGPU && synchronizeCalibUpdate) {
13311331
SynchronizeStream(0);
13321332
}
1333-
if (buildNativeHost && (GetProcessingSettings().deterministicGPUReconstruction || GetProcessingSettings().debugLevel >= 4)) {
1334-
for (uint32_t i = 0; i < NSECTORS; i++) {
1335-
for (uint32_t j = 0; j < GPUCA_ROW_COUNT; j++) {
1336-
std::sort(&tmpNativeClusters[tmpNativeAccess->clusterOffset[i][j]], &tmpNativeClusters[tmpNativeAccess->clusterOffset[i][j] + tmpNativeAccess->nClusters[i][j]]);
1337-
}
1338-
}
1339-
if (buildNativeGPU) {
1340-
GPUMemCpy(RecoStep::TPCClusterFinding, (void*)mInputsShadow->mPclusterNativeBuffer, (const void*)tmpNativeClusters, nClsTotal * sizeof(tmpNativeClusters[0]), -1, true);
1341-
}
1333+
if (sortClusters) {
1334+
SortClusters(buildNativeGPU, propagateMCLabels, tmpNativeAccess, tmpNativeClusters);
13421335
}
13431336
mRec->MemoryScalers()->nTPCHits = nClsTotal;
13441337
mRec->PopNonPersistentMemory(RecoStep::TPCClusterFinding, qStr2Tag("TPCCLUST"));
@@ -1354,3 +1347,60 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
13541347
#endif
13551348
return 0;
13561349
}
1350+
1351+
void GPUChainTracking::SortClusters(bool buildNativeGPU, bool propagateMCLabels, ClusterNativeAccess* clusterAccess, ClusterNative* clusters)
1352+
{
1353+
if (propagateMCLabels) {
1354+
std::vector<uint32_t> clsOrder(clusterAccess->nClustersTotal);
1355+
std::iota(clsOrder.begin(), clsOrder.end(), 0);
1356+
std::vector<ClusterNative> tmpClusters;
1357+
for (uint32_t i = 0; i < NSECTORS; i++) {
1358+
for (uint32_t j = 0; j < GPUCA_ROW_COUNT; j++) {
1359+
const uint32_t offset = clusterAccess->clusterOffset[i][j];
1360+
std::sort(&clsOrder[offset], &clsOrder[offset + clusterAccess->nClusters[i][j]], [&clusters](const uint32_t a, const uint32_t b) {
1361+
return clusters[a] < clusters[b];
1362+
});
1363+
tmpClusters.resize(clusterAccess->nClusters[i][j]);
1364+
memcpy(tmpClusters.data(), &clusters[offset], clusterAccess->nClusters[i][j] * sizeof(tmpClusters[0]));
1365+
for (uint32_t k = 0; k < tmpClusters.size(); k++) {
1366+
clusters[offset + k] = tmpClusters[clsOrder[offset + k] - offset];
1367+
}
1368+
}
1369+
}
1370+
tmpClusters.clear();
1371+
1372+
std::pair<o2::dataformats::ConstMCLabelContainer*, o2::dataformats::ConstMCLabelContainerView*> labelBuffer;
1373+
GPUOutputControl* labelOutput = mSubOutputControls[GPUTrackingOutputs::getIndex(&GPUTrackingOutputs::clusterLabels)];
1374+
std::unique_ptr<ConstMCLabelContainerView> tmpUniqueContainerView;
1375+
std::unique_ptr<ConstMCLabelContainer> tmpUniqueContainerBuffer;
1376+
if (labelOutput && labelOutput->allocator) {
1377+
ClusterNativeAccess::ConstMCLabelContainerViewWithBuffer* labelContainer = reinterpret_cast<ClusterNativeAccess::ConstMCLabelContainerViewWithBuffer*>(labelOutput->allocator(0));
1378+
labelBuffer = {&labelContainer->first, &labelContainer->second};
1379+
} else {
1380+
tmpUniqueContainerView = std::move(mIOMem.clusterNativeMCView);
1381+
tmpUniqueContainerBuffer = std::move(mIOMem.clusterNativeMCBuffer);
1382+
mIOMem.clusterNativeMCView = std::make_unique<ConstMCLabelContainerView>();
1383+
mIOMem.clusterNativeMCBuffer = std::make_unique<ConstMCLabelContainer>();
1384+
labelBuffer = {mIOMem.clusterNativeMCBuffer.get(), mIOMem.clusterNativeMCView.get()};
1385+
}
1386+
1387+
o2::dataformats::MCLabelContainer tmpContainer;
1388+
for (uint32_t i = 0; i < clusterAccess->nClustersTotal; i++) {
1389+
for (const auto& element : clusterAccess->clustersMCTruth->getLabels(clsOrder[i])) {
1390+
tmpContainer.addElement(i, element);
1391+
}
1392+
}
1393+
tmpContainer.flatten_to(*labelBuffer.first);
1394+
*labelBuffer.second = *labelBuffer.first;
1395+
clusterAccess->clustersMCTruth = labelBuffer.second;
1396+
} else {
1397+
for (uint32_t i = 0; i < NSECTORS; i++) {
1398+
for (uint32_t j = 0; j < GPUCA_ROW_COUNT; j++) {
1399+
std::sort(&clusters[clusterAccess->clusterOffset[i][j]], &clusters[clusterAccess->clusterOffset[i][j] + clusterAccess->nClusters[i][j]]);
1400+
}
1401+
}
1402+
}
1403+
if (buildNativeGPU) {
1404+
GPUMemCpy(RecoStep::TPCClusterFinding, (void*)mInputsShadow->mPclusterNativeBuffer, (const void*)clusters, clusterAccess->nClustersTotal * sizeof(clusters[0]), -1, true);
1405+
}
1406+
}

0 commit comments

Comments
 (0)