Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions GPU/GPUTracking/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,7 @@ set(HDRS_INSTALL
Definitions/GPULogging.h
Definitions/GPUSettingsList.h
Global/GPUChainTrackingDefs.h
Global/GPUChainTrackingDebug.h
Global/GPUChainTrackingGetters.inc
Global/GPUErrorCodes.h
Merger/GPUTPCGMBorderTrack.h
Expand Down
56 changes: 56 additions & 0 deletions GPU/GPUTracking/DataCompression/GPUTPCCompression.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -130,3 +130,59 @@ void GPUTPCCompression::SetMaxData(const GPUTrackingInOutPointers& io)
mMaxClusters += 16 - (mMaxClusters % 16);
}
}

void GPUTPCCompression::DumpCompressedClusters(std::ostream& out)
{
const o2::tpc::CompressedClusters O = *mOutputFlat;
out << "\n\nCompressed Clusters:\n";
out << O.nTracks << " Tracks\n";
out << "Slice Row Clusters:\n";
for (uint32_t i = 0; i < NSECTORS; i++) {
out << "Sector " << i << ": ";
for (uint32_t j = 0; j < GPUCA_ROW_COUNT; j++) {
out << O.nSliceRowClusters[i * GPUCA_ROW_COUNT + j] << ", ";
}
out << "\n";
}
out << "\nTrack Clusters:\n";
for (uint32_t i = 0; i < O.nTracks; i++) {
if (i && i % 100 == 0) {
out << "\n";
}
out << O.nTrackClusters[i] << ", ";
}
out << "\n\nUnattached Clusters\n";
uint32_t offset = 0;
for (uint32_t i = 0; i < NSECTORS; i++) {
for (uint32_t j = 0; j < GPUCA_ROW_COUNT; j++) {
out << "Sector " << i << " Row " << j << ": ";
for (uint32_t k = 0; k < O.nSliceRowClusters[i * GPUCA_ROW_COUNT + j]; k++) {
if (k && k % 10 == 0) {
out << "\n ";
}
const uint32_t l = k + offset;
out << "[" << (uint32_t)O.qTotU[l] << ", " << (uint32_t)O.qMaxU[l] << ", " << (uint32_t)O.flagsU[l] << ", " << (int32_t)O.padDiffU[l] << ", " << (int32_t)O.timeDiffU[l] << ", " << (uint32_t)O.sigmaPadU[l] << ", " << (uint32_t)O.sigmaTimeU[l] << "] ";
}
offset += O.nSliceRowClusters[i * GPUCA_ROW_COUNT + j];
out << "\n";
}
}
out << "\n\nAttached Clusters\n";
offset = 0;
for (uint32_t i = 0; i < O.nTracks; i++) {
out << "Track " << i << ": {" << (uint32_t)O.qPtA[i] << ", " << (uint32_t)O.rowA[i] << ", " << (uint32_t)O.sliceA[i] << ", " << (uint32_t)O.timeA[i] << ", " << (uint32_t)O.padA[i] << "} - ";
for (uint32_t k = 0; k < O.nTrackClusters[i]; k++) {
if (k && k % 10 == 0) {
out << "\n ";
}
const uint32_t l1 = k + offset, l2 = k + offset - i;
out << "[";
if (k) {
out << (int32_t)O.rowDiffA[l2] << ", " << (int32_t)O.sliceLegDiffA[l2] << ", " << (uint32_t)O.padResA[l2] << ", " << (uint32_t)O.timeResA[l2] << ", ";
}
out << (uint32_t)O.qTotA[l1] << ", " << (uint32_t)O.qMaxA[l1] << ", " << (uint32_t)O.flagsA[l1] << ", " << (uint32_t)O.sigmaPadA[l1] << ", " << (uint32_t)O.sigmaTimeA[l1] << "] ";
}
offset += O.nTrackClusters[i];
out << "\n";
}
}
4 changes: 4 additions & 0 deletions GPU/GPUTracking/DataCompression/GPUTPCCompression.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,10 @@ class GPUTPCCompression : public GPUProcessor
GPUd() static void truncateSignificantBitsChargeMax(uint16_t& charge, const GPUParam& param) { truncateSignificantBits(charge, param.rec.tpc.sigBitsCharge, P_MAX_QMAX); }
GPUd() static void truncateSignificantBitsWidth(uint8_t& width, const GPUParam& param) { truncateSignificantBits(width, param.rec.tpc.sigBitsWidth, P_MAX_SIGMA); }

#ifndef GPUCA_GPUCODE
void DumpCompressedClusters(std::ostream& out);
#endif

protected:
struct memory {
uint32_t nStoredTracks = 0;
Expand Down
2 changes: 1 addition & 1 deletion GPU/GPUTracking/Definitions/GPUSettingsList.h
Original file line number Diff line number Diff line change
Expand Up @@ -295,7 +295,7 @@ AddOption(trdNCandidates, int32_t, 3, "", 0, "Number of branching track candidat
AddOption(trdTrackModelO2, bool, false, "", 0, "Use O2 track model instead of GPU track model for TRD tracking")
AddOption(debugLevel, int32_t, -1, "debug", 'd', "Set debug level (-2 = silent, -1 = autoselect (-2 for O2, 0 for standalone))")
AddOption(allocDebugLevel, int32_t, 0, "allocDebug", 0, "Some debug output for memory allocations (without messing with normal debug level)")
AddOption(debugMask, int32_t, 262143, "", 0, "Mask for debug output dumps to file")
AddOption(debugMask, uint32_t, 262143, "", 0, "Mask for debug output dumps to file")
AddOption(serializeGPU, int8_t, 0, "", 0, "Synchronize after each kernel call (bit 1) and DMA transfer (bit 2) and identify failures")
AddOption(recoTaskTiming, bool, 0, "", 0, "Perform summary timing after whole reconstruction tasks")
AddOption(deterministicGPUReconstruction, int32_t, -1, "", 0, "Make CPU and GPU debug output comparable (sort / skip concurrent parts), -1 = automatic if debugLevel >= 6", def(1))
Expand Down
31 changes: 11 additions & 20 deletions GPU/GPUTracking/Global/GPUChain.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include "GPUKernelClassesFwd.h"

#include <ctime>
#include <functional>

namespace o2::gpu
{
Expand Down Expand Up @@ -226,12 +227,19 @@ class GPUChain
virtual int32_t DoStuckProtection(int32_t stream, deviceEvent event) { return 0; }

template <class T, class S, typename... Args>
bool DoDebugAndDump(RecoStep step, int32_t mask, T& processor, S T::*func, Args&&... args)
bool DoDebugAndDump(RecoStep step, uint32_t mask, T& processor, S T::*func, Args&&... args)
{
return DoDebugAndDump(step, mask, true, processor, func, args...);
}
template <class T, class S, typename... Args>
bool DoDebugAndDump(RecoStep step, int32_t mask, bool transfer, T& processor, S T::*func, Args&&... args);
bool DoDebugAndDump(RecoStep step, uint32_t mask, bool transfer, T& processor, S T::*func, Args&&... args);
template <typename... Args>
bool DoDebugDump(uint32_t mask, std::function<void(Args&...)> func, Args&... args);
template <class S, typename... Args>
bool DoDebugDump(uint32_t mask, S* func, Args&&... args)
{
return DoDebugDump(mask, std::function<void(Args && ...)>([&func](Args&&... args_tmp) { (*func)(args_tmp...); }), args...);
}

template <class T, class S, typename... Args>
int32_t runRecoStep(RecoStep step, S T::*func, Args... args);
Expand Down Expand Up @@ -278,24 +286,7 @@ inline void GPUChain::timeCpy(RecoStep step, int32_t toGPU, S T::*func, Args...
}

template <class T, class S, typename... Args>
bool GPUChain::DoDebugAndDump(GPUChain::RecoStep step, int32_t mask, bool transfer, T& processor, S T::*func, Args&&... args)
{
if (GetProcessingSettings().keepAllMemory) {
if (transfer) {
TransferMemoryResourcesToHost(step, &processor, -1, true);
}
if (GetProcessingSettings().debugLevel >= 6 && (mask == 0 || (GetProcessingSettings().debugMask & mask))) {
if (func) {
(processor.*func)(args...);
}
return true;
}
}
return false;
}

template <class T, class S, typename... Args>
int32_t GPUChain::runRecoStep(RecoStep step, S T::*func, Args... args)
inline int32_t GPUChain::runRecoStep(RecoStep step, S T::*func, Args... args)
{
if (GetRecoSteps().isSet(step)) {
auto* timer = GetProcessingSettings().recoTaskTiming ? &mRec->getRecoStepTimer(step) : nullptr;
Expand Down
1 change: 1 addition & 0 deletions GPU/GPUTracking/Global/GPUChainTracking.h
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,7 @@ class GPUChainTracking : public GPUChain
void PrepareDebugOutput();
void PrintDebugOutput();
void PrintOutputStat();
static void DumpClusters(std::ostream& out, const o2::tpc::ClusterNativeAccess* clusters);

bool ValidateSteps();
bool ValidateSettings();
Expand Down
19 changes: 10 additions & 9 deletions GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

#include "GPUChainTracking.h"
#include "GPUChainTrackingDefs.h"
#include "GPUChainTrackingDebug.h"
#include "GPULogging.h"
#include "GPUO2DataTypes.h"
#include "GPUMemorySizeScalers.h"
Expand Down Expand Up @@ -813,7 +814,7 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
if (fragment.index == 0) {
runKernel<GPUMemClean16>({GetGridAutoStep(lane, RecoStep::TPCClusterFinding)}, clustererShadow.mPpadIsNoisy, TPC_PADS_IN_SECTOR * sizeof(*clustererShadow.mPpadIsNoisy));
}
DoDebugAndDump(RecoStep::TPCClusterFinding, 262144, clusterer, &GPUTPCClusterFinder::DumpChargeMap, *mDebugFile, "Zeroed Charges");
DoDebugAndDump(RecoStep::TPCClusterFinding, GPUChainTrackingDebugFlags::TPCClustererZeroedCharges, clusterer, &GPUTPCClusterFinder::DumpChargeMap, *mDebugFile, "Zeroed Charges");

if (doGPU) {
if (mIOPtrs.tpcZS && mCFContext->nPagesSector[iSector] && mCFContext->zsVersion != -1) {
Expand Down Expand Up @@ -900,7 +901,7 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
if (!mIOPtrs.tpcZS) {
runKernel<GPUTPCCFChargeMapFiller, GPUTPCCFChargeMapFiller::fillFromDigits>({GetGrid(clusterer.mPmemory->counters.nPositions, lane), {iSector}});
}
if (DoDebugAndDump(RecoStep::TPCClusterFinding, 262144 << 1, clusterer, &GPUTPCClusterFinder::DumpDigits, *mDebugFile)) {
if (DoDebugAndDump(RecoStep::TPCClusterFinding, GPUChainTrackingDebugFlags::TPCClustererDigits, clusterer, &GPUTPCClusterFinder::DumpDigits, *mDebugFile)) {
clusterer.DumpChargeMap(*mDebugFile, "Charges");
}

Expand All @@ -919,13 +920,13 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
}

runKernel<GPUTPCCFPeakFinder>({GetGrid(clusterer.mPmemory->counters.nPositions, lane), {iSector}});
if (DoDebugAndDump(RecoStep::TPCClusterFinding, 262144 << 2, clusterer, &GPUTPCClusterFinder::DumpPeaks, *mDebugFile)) {
if (DoDebugAndDump(RecoStep::TPCClusterFinding, GPUChainTrackingDebugFlags::TPCClustererPeaks, clusterer, &GPUTPCClusterFinder::DumpPeaks, *mDebugFile)) {
clusterer.DumpPeakMap(*mDebugFile, "Peaks");
}

RunTPCClusterizer_compactPeaks(clusterer, clustererShadow, 0, doGPU, lane);
TransferMemoryResourceLinkToHost(RecoStep::TPCClusterFinding, clusterer.mMemoryId, lane);
DoDebugAndDump(RecoStep::TPCClusterFinding, 262144 << 2, clusterer, &GPUTPCClusterFinder::DumpPeaksCompacted, *mDebugFile); // clang-format off
DoDebugAndDump(RecoStep::TPCClusterFinding, GPUChainTrackingDebugFlags::TPCClustererPeaks, clusterer, &GPUTPCClusterFinder::DumpPeaksCompacted, *mDebugFile); // clang-format off
});
mRec->runParallelOuterLoop(doGPU, maxLane, [&](uint32_t lane) {
uint32_t iSector = iSectorBase + lane;
Expand All @@ -939,13 +940,13 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
}
runKernel<GPUTPCCFNoiseSuppression, GPUTPCCFNoiseSuppression::noiseSuppression>({GetGrid(clusterer.mPmemory->counters.nPeaks, lane), {iSector}});
runKernel<GPUTPCCFNoiseSuppression, GPUTPCCFNoiseSuppression::updatePeaks>({GetGrid(clusterer.mPmemory->counters.nPeaks, lane), {iSector}});
if (DoDebugAndDump(RecoStep::TPCClusterFinding, 262144 << 3, clusterer, &GPUTPCClusterFinder::DumpSuppressedPeaks, *mDebugFile)) {
if (DoDebugAndDump(RecoStep::TPCClusterFinding, GPUChainTrackingDebugFlags::TPCClustererSuppressedPeaks, clusterer, &GPUTPCClusterFinder::DumpSuppressedPeaks, *mDebugFile)) {
clusterer.DumpPeakMap(*mDebugFile, "Suppressed Peaks");
}

RunTPCClusterizer_compactPeaks(clusterer, clustererShadow, 1, doGPU, lane);
TransferMemoryResourceLinkToHost(RecoStep::TPCClusterFinding, clusterer.mMemoryId, lane);
DoDebugAndDump(RecoStep::TPCClusterFinding, 262144 << 3, clusterer, &GPUTPCClusterFinder::DumpSuppressedPeaksCompacted, *mDebugFile); // clang-format off
DoDebugAndDump(RecoStep::TPCClusterFinding, GPUChainTrackingDebugFlags::TPCClustererSuppressedPeaks, clusterer, &GPUTPCClusterFinder::DumpSuppressedPeaksCompacted, *mDebugFile); // clang-format off
});
mRec->runParallelOuterLoop(doGPU, maxLane, [&](uint32_t lane) {
uint32_t iSector = iSectorBase + lane;
Expand Down Expand Up @@ -979,7 +980,7 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)

if (clustererNNShadow.mNnClusterizerUseCfRegression || (int)(nn_settings.nnClusterizerApplyCfDeconvolution)) {
runKernel<GPUTPCCFDeconvolution>({GetGrid(clusterer.mPmemory->counters.nPositions, lane), {iSector}});
DoDebugAndDump(RecoStep::TPCClusterFinding, 262144 << 4, clusterer, &GPUTPCClusterFinder::DumpChargeMap, *mDebugFile, "Split Charges");
DoDebugAndDump(RecoStep::TPCClusterFinding, GPUChainTrackingDebugFlags::TPCClustererChargeMap, clusterer, &GPUTPCClusterFinder::DumpChargeMap, *mDebugFile, "Split Charges");
}

// float time_clusterizer = 0, time_fill = 0, time_networks = 0;
Expand Down Expand Up @@ -1092,7 +1093,7 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
#endif
} else {
runKernel<GPUTPCCFDeconvolution>({GetGrid(clusterer.mPmemory->counters.nPositions, lane), {iSector}});
DoDebugAndDump(RecoStep::TPCClusterFinding, 262144 << 4, clusterer, &GPUTPCClusterFinder::DumpChargeMap, *mDebugFile, "Split Charges");
DoDebugAndDump(RecoStep::TPCClusterFinding, GPUChainTrackingDebugFlags::TPCClustererChargeMap, clusterer, &GPUTPCClusterFinder::DumpChargeMap, *mDebugFile, "Split Charges");
runKernel<GPUTPCCFClusterizer>({GetGrid(clusterer.mPmemory->counters.nClusters, lane), {iSector}}, 0);
}

Expand All @@ -1111,7 +1112,7 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
TransferMemoryResourcesToHost(RecoStep::TPCClusterFinding, &clusterer, lane);
laneHasData[lane] = true;
// Include clusters in default debug mask, exclude other debug output by default
DoDebugAndDump(RecoStep::TPCClusterFinding, 131072, clusterer, &GPUTPCClusterFinder::DumpClusters, *mDebugFile); // clang-format off
DoDebugAndDump(RecoStep::TPCClusterFinding, GPUChainTrackingDebugFlags::TPCClustererClusters, clusterer, &GPUTPCClusterFinder::DumpClusters, *mDebugFile); // clang-format off
});
mRec->SetNActiveThreadsOuterLoop(1);
}
Expand Down
3 changes: 3 additions & 0 deletions GPU/GPUTracking/Global/GPUChainTrackingCompression.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
/// \author David Rohr

#include "GPUChainTracking.h"
#include "GPUChainTrackingDebug.h"
#include "GPULogging.h"
#include "GPUO2DataTypes.h"
#include "GPUTrackingInputProvider.h"
Expand Down Expand Up @@ -202,6 +203,7 @@ int32_t GPUChainTracking::RunTPCCompression()
((GPUChainTracking*)GetNextChainInQueue())->mRec->BlockStackedMemory(mRec);
}
mRec->PopNonPersistentMemory(RecoStep::TPCCompression, qStr2Tag("TPCCOMPR"));
DoDebugAndDump(RecoStep::TPCCompression, GPUChainTrackingDebugFlags::TPCCompressedClusters, Compressor, &GPUTPCCompression::DumpCompressedClusters, *mDebugFile);
return 0;
}

Expand Down Expand Up @@ -425,5 +427,6 @@ int32_t GPUChainTracking::RunTPCDecompression()
}
mRec->PopNonPersistentMemory(RecoStep::TPCDecompression, qStr2Tag("TPCDCMPR"));
}
DoDebugDump(GPUChainTrackingDebugFlags::TPCDecompressedClusters, &GPUChainTracking::DumpClusters, *mDebugFile, mIOPtrs.clustersNative);
return 0;
}
79 changes: 79 additions & 0 deletions GPU/GPUTracking/Global/GPUChainTrackingDebug.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
// All rights not expressly granted are reserved.
//
// This software is distributed under the terms of the GNU General Public
// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
//
// In applying this license CERN does not waive the privileges and immunities
// granted to it by virtue of its status as an Intergovernmental Organization
// or submit itself to any jurisdiction.

/// \file GPUChainTrackingDEBUG.h
/// \author David Rohr

#ifndef GPUCHAINTRACKINGDEBUG_H
#define GPUCHAINTRACKINGDEBUG_H

#include <cstdint>
#include <functional>
#include <fstream>

namespace o2::gpu
{
// NOTE: Values below 262144 are activated by default with --debug 6 in GPUSettingsList.h::debugMask
enum GPUChainTrackingDebugFlags : uint32_t {
TPCSectorTrackingData = 1,
TPCPreLinks = 2,
TPCLinks = 4,
TPCStartHits = 8,
TPCTracklets = 16,
TPCSectorTracks = 32,
TPCHitWeights = 256,
TPCCompressedClusters = 512,
TPCDecompressedClusters = 1024,
TPCMergingRanges = 2048,
TPCMergingSectorTracks = 4096,
TPCMergingMergedTracks = 8192,
TPCMergingCollectedTracks = 16384,
TPCMergingCE = 32768,
TPCMergingRefit = 65536,
TPCClustererClusters = 131072,
TPCClusterer = 262144,
TPCClustererDigits = 262144 << 1,
TPCClustererPeaks = 262144 << 2,
TPCClustererSuppressedPeaks = 262144 << 3,
TPCClustererChargeMap = 262144 << 4,
TPCClustererZeroedCharges = 262144 << 5
};

template <class T, class S, typename... Args>
inline bool GPUChain::DoDebugAndDump(GPUChain::RecoStep step, uint32_t mask, bool transfer, T& processor, S T::*func, Args&&... args)
{
if (GetProcessingSettings().keepAllMemory) {
if (transfer) {
TransferMemoryResourcesToHost(step, &processor, -1, true);
}
std::function<void(Args && ...)> lambda = [&processor, &func](Args&... args_tmp) {
if (func) {
(processor.*func)(args_tmp...);
}
};
return DoDebugDump(mask, lambda, args...);
}
return false;
}

template <typename... Args>
inline bool GPUChain::DoDebugDump(uint32_t mask, std::function<void(Args&...)> func, Args&... args)
{
if (GetProcessingSettings().debugLevel >= 6 && (mask == 0 || (GetProcessingSettings().debugMask & mask))) {
func(args...);
return true;
}
return false;
}

} // namespace o2::gpu

#endif
15 changes: 15 additions & 0 deletions GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -333,3 +333,18 @@ void GPUChainTracking::RunTPCClusterFilter(o2::tpc::ClusterNativeAccess* cluster
}
}
}

void GPUChainTracking::DumpClusters(std::ostream& out, const o2::tpc::ClusterNativeAccess* clusters)
{
out << "\nTPC Clusters:\n";
for (uint32_t iSec = 0; iSec < GPUCA_NSECTORS; iSec++) {
out << "TPCClusters - Sector " << iSec << "\n";
for (uint32_t i = 0; i < GPUCA_ROW_COUNT; i++) {
out << " Row: " << i << ": " << clusters->nClusters[iSec][i] << " clusters:\n";
for (uint32_t j = 0; j < clusters->nClusters[iSec][i]; j++) {
const auto& cl = clusters->clusters[iSec][i][j];
out << " " << std::hex << cl.timeFlagsPacked << std::dec << " " << cl.padPacked << " " << int32_t{cl.sigmaTimePacked} << " " << int32_t{cl.sigmaPadPacked} << " " << cl.qMax << " " << cl.qTot << "\n";
}
}
}
}
Loading