Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 27 additions & 9 deletions GPU/GPUTracking/Base/GPUReconstruction.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@

#include "GPULogging.h"
#include "utils/strtag.h"
#include "utils/stdspinlock.h"

#ifdef GPUCA_O2_LIB
#include "GPUO2InterfaceConfiguration.h"
Expand Down Expand Up @@ -589,6 +590,7 @@ size_t GPUReconstruction::AllocateRegisteredMemoryHelper(GPUMemoryResource* res,
throw std::bad_alloc();
}
size_t retVal;
stdspinlock spinlock(mMemoryMutex);
if ((res->mType & GPUMemoryResource::MEMORY_STACK) && memorypoolend) {
retVal = ptrDiff((res->*setPtr)((char*)1), (char*)(1));
memorypoolend = (void*)((char*)memorypoolend - GPUProcessor::getAlignmentMod<GPUCA_MEMALIGN>(memorypoolend));
Expand Down Expand Up @@ -639,9 +641,10 @@ void GPUReconstruction::AllocateRegisteredMemoryInternal(GPUMemoryResource* res,
res->mPtr = GPUProcessor::alignPointer<GPUCA_BUFFER_ALIGNMENT>(res->mPtrDevice);
res->SetPointers(res->mPtr);
if (GetProcessingSettings().allocDebugLevel >= 2) {
std::cout << (res->mReuse >= 0 ? "Reused " : "Allocated ") << res->mName << ": " << res->mSize << "\n";
std::cout << (res->mReuse >= 0 ? "Reused " : "Allocated ") << res->mName << ": " << res->mSize << " (individual" << ((res->mType & GPUMemoryResource::MEMORY_STACK) ? " stack" : "") << ")\n";
}
if (res->mType & GPUMemoryResource::MEMORY_STACK) {
stdspinlock spinlock(mMemoryMutex);
mNonPersistentIndividualAllocations.emplace_back(res);
}
if ((size_t)res->mPtr % GPUCA_BUFFER_ALIGNMENT) {
Expand Down Expand Up @@ -722,6 +725,7 @@ size_t GPUReconstruction::AllocateRegisteredMemory(int16_t ires, GPUOutputContro

void* GPUReconstruction::AllocateDirectMemory(size_t size, int32_t type)
{
stdspinlock spinlock(mMemoryMutex);
if (GetProcessingSettings().memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_INDIVIDUAL) {
char* retVal = new (std::align_val_t(GPUCA_BUFFER_ALIGNMENT)) char[size];
if ((type & GPUMemoryResource::MEMORY_STACK)) {
Expand Down Expand Up @@ -763,6 +767,7 @@ void* GPUReconstruction::AllocateDirectMemory(size_t size, int32_t type)

void* GPUReconstruction::AllocateVolatileDeviceMemory(size_t size)
{
stdspinlock spinlock(mMemoryMutex);
if (mVolatileMemoryStart == nullptr) {
mVolatileMemoryStart = mDeviceMemoryPool;
}
Expand All @@ -788,6 +793,7 @@ void* GPUReconstruction::AllocateVolatileMemory(size_t size, bool device)
return AllocateVolatileDeviceMemory(size);
}
char* retVal = new (std::align_val_t(GPUCA_BUFFER_ALIGNMENT)) char[size];
stdspinlock spinlock(mMemoryMutex);
mVolatileChunks.emplace_back(retVal, alignedDeleter());
return retVal;
}
Expand Down Expand Up @@ -877,8 +883,11 @@ void GPUReconstruction::PushNonPersistentMemory(uint64_t tag)
mNonPersistentMemoryStack.emplace_back(mHostMemoryPoolEnd, mDeviceMemoryPoolEnd, mNonPersistentIndividualAllocations.size(), mNonPersistentIndividualDirectAllocations.size(), tag);
}

void GPUReconstruction::PopNonPersistentMemory(RecoStep step, uint64_t tag)
void GPUReconstruction::PopNonPersistentMemory(RecoStep step, uint64_t tag, const GPUProcessor* proc)
{
if (proc && GetProcessingSettings().memoryAllocationStrategy != GPUMemoryResource::ALLOCATION_INDIVIDUAL) {
GPUFatal("Processor-depending memory-free works only with allocation strategy ALLOCATION_INDIVIDUAL");
}
if (GetProcessingSettings().keepDisplayMemory || GetProcessingSettings().disableMemoryReuse) {
return;
}
Expand All @@ -888,25 +897,34 @@ void GPUReconstruction::PopNonPersistentMemory(RecoStep step, uint64_t tag)
if (tag != 0 && std::get<4>(mNonPersistentMemoryStack.back()) != tag) {
GPUFatal("Tag mismatch when popping non persistent memory from stack : pop %s vs on stack %s", qTag2Str(tag).c_str(), qTag2Str(std::get<4>(mNonPersistentMemoryStack.back())).c_str());
}
if ((GetProcessingSettings().debugLevel >= 3 || GetProcessingSettings().allocDebugLevel) && (IsGPU() || GetProcessingSettings().forceHostMemoryPoolSize)) {
if (!proc && (GetProcessingSettings().debugLevel >= 3 || GetProcessingSettings().allocDebugLevel) && (IsGPU() || GetProcessingSettings().forceHostMemoryPoolSize)) {
printf("Allocated memory after %30s (%8s) (Stack %zu): ", GPUDataTypes::RECO_STEP_NAMES[getRecoStepNum(step, true)], qTag2Str(std::get<4>(mNonPersistentMemoryStack.back())).c_str(), mNonPersistentMemoryStack.size());
PrintMemoryOverview();
printf("%76s", "");
PrintMemoryMax();
}
mHostMemoryPoolEnd = std::get<0>(mNonPersistentMemoryStack.back());
mDeviceMemoryPoolEnd = std::get<1>(mNonPersistentMemoryStack.back());
for (uint32_t i = std::get<2>(mNonPersistentMemoryStack.back()); i < mNonPersistentIndividualAllocations.size(); i++) {
GPUMemoryResource* res = mNonPersistentIndividualAllocations[i];
if (proc && res->mProcessor != proc) {
continue;
}
if (GetProcessingSettings().allocDebugLevel >= 2 && (res->mPtr || res->mPtrDevice)) {
std::cout << "Freeing NonPersistent " << res->mName << ": size " << res->mSize << " (reused " << res->mReuse << ")\n";
}
if (res->mReuse < 0) {
operator delete(res->mPtrDevice, std::align_val_t(GPUCA_BUFFER_ALIGNMENT));
}
res->mPtr = nullptr;
res->mPtrDevice = nullptr;
}
mNonPersistentIndividualAllocations.resize(std::get<2>(mNonPersistentMemoryStack.back()));
mNonPersistentIndividualDirectAllocations.resize(std::get<3>(mNonPersistentMemoryStack.back()));
mNonPersistentMemoryStack.pop_back();
if (!proc) {
stdspinlock spinlock(mMemoryMutex);
mHostMemoryPoolEnd = std::get<0>(mNonPersistentMemoryStack.back());
mDeviceMemoryPoolEnd = std::get<1>(mNonPersistentMemoryStack.back());
mNonPersistentIndividualAllocations.resize(std::get<2>(mNonPersistentMemoryStack.back()));
mNonPersistentIndividualDirectAllocations.resize(std::get<3>(mNonPersistentMemoryStack.back()));
mNonPersistentMemoryStack.pop_back();
}
}

void GPUReconstruction::BlockStackedMemory(GPUReconstruction* rec)
Expand Down Expand Up @@ -999,7 +1017,7 @@ void GPUReconstruction::PrintMemoryStatistics()
}
printf("%59s CPU / %9s GPU\n", "", "");
for (auto it = sizes.begin(); it != sizes.end(); it++) {
printf("Allocation %30s %s: Size %'14zu / %'14zu\n", it->first.c_str(), it->second[2] ? "P" : " ", it->second[0], it->second[1]);
printf("Allocation %50s %s: Size %'14zu / %'14zu\n", it->first.c_str(), it->second[2] ? "P" : " ", it->second[0], it->second[1]);
}
PrintMemoryOverview();
for (uint32_t i = 0; i < mChains.size(); i++) {
Expand Down
4 changes: 3 additions & 1 deletion GPU/GPUTracking/Base/GPUReconstruction.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#include <functional>
#include <unordered_map>
#include <unordered_set>
#include <atomic>

#include "GPUDataTypes.h"
#include "GPUMemoryResource.h"
Expand Down Expand Up @@ -179,7 +180,7 @@ class GPUReconstruction
void ReturnVolatileMemory();
ThrustVolatileAllocator getThrustVolatileDeviceAllocator();
void PushNonPersistentMemory(uint64_t tag);
void PopNonPersistentMemory(RecoStep step, uint64_t tag);
void PopNonPersistentMemory(RecoStep step, uint64_t tag, const GPUProcessor* proc = nullptr);
void BlockStackedMemory(GPUReconstruction* rec);
void UnblockStackedMemory();
void ResetRegisteredMemoryPointers(GPUProcessor* proc);
Expand Down Expand Up @@ -390,6 +391,7 @@ class GPUReconstruction
std::vector<std::unique_ptr<char[], alignedDeleter>> mNonPersistentIndividualDirectAllocations;
std::vector<std::unique_ptr<char[], alignedDeleter>> mDirectMemoryChunks;
std::vector<std::unique_ptr<char[], alignedDeleter>> mVolatileChunks;
std::atomic_flag mMemoryMutex = ATOMIC_FLAG_INIT;

std::unique_ptr<GPUReconstructionPipelineContext> mPipelineContext;

Expand Down
28 changes: 16 additions & 12 deletions GPU/GPUTracking/Base/GPUReconstructionCPU.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -231,26 +231,24 @@ int32_t GPUReconstructionCPU::RunChains()
}
mTimerTotal.Start();
const std::clock_t cpuTimerStart = std::clock();
int32_t retVal = 0;
if (GetProcessingSettings().doublePipeline) {
int32_t retVal = EnqueuePipeline();
if (retVal) {
return retVal;
}
retVal = EnqueuePipeline();
} else {
if (mSlaves.size() || mMaster) {
WriteConstantParams(); // Reinitialize // TODO: Get this in sync with GPUChainTracking::DoQueuedUpdates, and consider the doublePipeline
}
for (uint32_t i = 0; i < mChains.size(); i++) {
int32_t retVal = mChains[i]->RunChain();
if (retVal) {
return retVal;
}
}
if (GetProcessingSettings().tpcFreeAllocatedMemoryAfterProcessing) {
ClearAllocatedMemory();
retVal = mChains[i]->RunChain();
}
}
if (retVal != 0 && retVal != 2) {
return retVal;
}
mTimerTotal.Stop();
if (GetProcessingSettings().tpcFreeAllocatedMemoryAfterProcessing) {
ClearAllocatedMemory();
}
mStatCPUTime += (double)(std::clock() - cpuTimerStart) / CLOCKS_PER_SEC;
if (GetProcessingSettings().debugLevel >= 3 || GetProcessingSettings().allocDebugLevel) {
GPUInfo("Allocated memory when ending processing %36s", "");
Expand Down Expand Up @@ -339,7 +337,13 @@ int32_t GPUReconstructionCPU::RunChains()
mTimerTotal.Reset();
}

return 0;
if (GetProcessingSettings().memoryStat) {
PrintMemoryStatistics();
} else if (GetProcessingSettings().debugLevel >= 2) {
PrintMemoryOverview();
}

return retVal;
}

void GPUReconstructionCPU::ResetDeviceProcessorTypes()
Expand Down
12 changes: 6 additions & 6 deletions GPU/GPUTracking/DataCompression/GPUTPCDecompression.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -106,12 +106,12 @@ void GPUTPCDecompression::RegisterMemoryAllocation()
{
AllocateAndInitializeLate();
mMemoryResInputGPU = mRec->RegisterMemoryAllocation(this, &GPUTPCDecompression::SetPointersInputGPU, GPUMemoryResource::MEMORY_INPUT_FLAG | GPUMemoryResource::MEMORY_GPU | GPUMemoryResource::MEMORY_EXTERNAL | GPUMemoryResource::MEMORY_SCRATCH, "TPCDecompressionInput");
mRec->RegisterMemoryAllocation(this, &GPUTPCDecompression::SetPointersTmpNativeBuffersGPU, GPUMemoryResource::MEMORY_SCRATCH, "TPCDecompressionTmpBuffersGPU");
mResourceTmpIndexes = mRec->RegisterMemoryAllocation(this, &GPUTPCDecompression::SetPointersTmpNativeBuffersOutput, GPUMemoryResource::MEMORY_OUTPUT | GPUMemoryResource::MEMORY_SCRATCH, "TPCDecompressionTmpBuffersOutput");
mResourceTmpClustersOffsets = mRec->RegisterMemoryAllocation(this, &GPUTPCDecompression::SetPointersTmpNativeBuffersInput, GPUMemoryResource::MEMORY_INPUT | GPUMemoryResource::MEMORY_SCRATCH, "TPCDecompressionTmpBuffersInput");
mResourceTmpBufferBeforeFiltering = mRec->RegisterMemoryAllocation(this, &GPUTPCDecompression::SetPointersTmpClusterNativeAccessForFiltering, GPUMemoryResource::MEMORY_CUSTOM | GPUMemoryResource::MEMORY_SCRATCH, "TPCDecompressionTmpBufferForFiltering");
mResourceClusterNativeAccess = mRec->RegisterMemoryAllocation(this, &GPUTPCDecompression::SetPointersInputClusterNativeAccess, GPUMemoryResource::MEMORY_INPUT | GPUMemoryResource::MEMORY_CUSTOM | GPUMemoryResource::MEMORY_SCRATCH, "TPCDecompressionTmpClusterAccessForFiltering");
mResourceNClusterPerSectorRow = mRec->RegisterMemoryAllocation(this, &GPUTPCDecompression::SetPointersNClusterPerSectorRow, GPUMemoryResource::MEMORY_OUTPUT | GPUMemoryResource::MEMORY_CUSTOM | GPUMemoryResource::MEMORY_SCRATCH, "TPCDecompressionTmpClusterCountForFiltering");
mRec->RegisterMemoryAllocation(this, &GPUTPCDecompression::SetPointersTmpNativeBuffersGPU, GPUMemoryResource::MEMORY_SCRATCH | GPUMemoryResource::MEMORY_STACK, "TPCDecompressionTmpBuffersGPU");
mResourceTmpIndexes = mRec->RegisterMemoryAllocation(this, &GPUTPCDecompression::SetPointersTmpNativeBuffersOutput, GPUMemoryResource::MEMORY_OUTPUT | GPUMemoryResource::MEMORY_STACK, "TPCDecompressionTmpBuffersOutput");
mResourceTmpClustersOffsets = mRec->RegisterMemoryAllocation(this, &GPUTPCDecompression::SetPointersTmpNativeBuffersInput, GPUMemoryResource::MEMORY_INPUT | GPUMemoryResource::MEMORY_STACK, "TPCDecompressionTmpBuffersInput");
mResourceTmpBufferBeforeFiltering = mRec->RegisterMemoryAllocation(this, &GPUTPCDecompression::SetPointersTmpClusterNativeAccessForFiltering, GPUMemoryResource::MEMORY_CUSTOM | GPUMemoryResource::MEMORY_SCRATCH | GPUMemoryResource::MEMORY_STACK, "TPCDecompressionTmpBufferForFiltering");
mResourceClusterNativeAccess = mRec->RegisterMemoryAllocation(this, &GPUTPCDecompression::SetPointersInputClusterNativeAccess, GPUMemoryResource::MEMORY_INPUT | GPUMemoryResource::MEMORY_CUSTOM | GPUMemoryResource::MEMORY_STACK, "TPCDecompressionTmpClusterAccessForFiltering");
mResourceNClusterPerSectorRow = mRec->RegisterMemoryAllocation(this, &GPUTPCDecompression::SetPointersNClusterPerSectorRow, GPUMemoryResource::MEMORY_OUTPUT | GPUMemoryResource::MEMORY_CUSTOM | GPUMemoryResource::MEMORY_STACK, "TPCDecompressionTmpClusterCountForFiltering");
}

void GPUTPCDecompression::SetMaxData(const GPUTrackingInOutPointers& io)
Expand Down
2 changes: 1 addition & 1 deletion GPU/GPUTracking/Definitions/GPUSettingsList.h
Original file line number Diff line number Diff line change
Expand Up @@ -377,6 +377,7 @@ AddOption(debugOnFailureMaxFiles, uint32_t, 0, "", 0, "Max number of files to ha
AddOption(debugOnFailureMaxSize, uint32_t, 0, "", 0, "Max size of existing dumps in the target folder in GB")
AddOption(debugOnFailureDirectory, std::string, ".", "", 0, "Target folder for debug / dump")
AddOption(amdMI100SerializationWorkaround, bool, false, "", 0, "Enable workaround that mitigates MI100 serialization bug")
AddOption(memoryStat, bool, false, "", 0, "Print memory statistics")
AddVariable(eventDisplay, o2::gpu::GPUDisplayFrontendInterface*, nullptr)
AddSubConfig(GPUSettingsProcessingRTC, rtc)
AddSubConfig(GPUSettingsProcessingRTCtechnical, rtctech)
Expand Down Expand Up @@ -587,7 +588,6 @@ AddOption(zsVersion, int32_t, 2, "", 0, "ZS Version: 1 = 10-bit ADC row based, 2
AddOption(dumpEvents, bool, false, "", 0, "Dump events (after transformation such as encodeZS")
AddOption(stripDumpedEvents, bool, false, "", 0, "Remove redundant inputs (e.g. digits and ZS) before dumping")
AddOption(printSettings, int32_t, 0, "", 0, "Print all settings", def(1))
AddOption(memoryStat, bool, false, "", 0, "Print memory statistics")
AddOption(testSyncAsync, bool, false, "syncAsync", 0, "Test first synchronous and then asynchronous processing")
AddOption(testSync, bool, false, "sync", 0, "Test settings for synchronous phase")
AddOption(timeFrameTime, bool, false, "tfTime", 0, "Print some debug information about time frame processing time")
Expand Down
4 changes: 4 additions & 0 deletions GPU/GPUTracking/Global/GPUChainTracking.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -278,6 +278,10 @@ bool GPUChainTracking::ValidateSettings()
return false;
}
if (GetProcessingSettings().doublePipeline) {
if (GetProcessingSettings().tpcFreeAllocatedMemoryAfterProcessing) {
GPUError("Cannot use double pipeline with tpcFreeAllocatedMemoryAfterProcessing");
return false;
}
if (!GetRecoStepsOutputs().isOnlySet(GPUDataTypes::InOutType::TPCMergedTracks, GPUDataTypes::InOutType::TPCCompressedClusters, GPUDataTypes::InOutType::TPCClusters)) {
GPUError("Invalid outputs for double pipeline mode 0x%x", (uint32_t)GetRecoStepsOutputs());
return false;
Expand Down
26 changes: 17 additions & 9 deletions GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -142,8 +142,10 @@ void GPUChainTracking::PrintMemoryStatistics()
std::map<std::string, GPUChainTrackingMemUsage> usageMap;
for (int32_t i = 0; i < NSECTORS; i++) {
#ifdef GPUCA_TPC_GEOMETRY_O2
addToMap("TPC Clusterer Sector Peaks", usageMap, processors()->tpcClusterer[i].mPmemory->counters.nPeaks, processors()->tpcClusterer[i].mNMaxPeaks);
addToMap("TPC Clusterer Sector Clusters", usageMap, processors()->tpcClusterer[i].mPmemory->counters.nClusters, processors()->tpcClusterer[i].mNMaxClusters);
if (processors()->tpcClusterer[i].mPmemory) {
addToMap("TPC Clusterer Sector Peaks", usageMap, processors()->tpcClusterer[i].mPmemory->counters.nPeaks, processors()->tpcClusterer[i].mNMaxPeaks);
addToMap("TPC Clusterer Sector Clusters", usageMap, processors()->tpcClusterer[i].mPmemory->counters.nClusters, processors()->tpcClusterer[i].mNMaxClusters);
}
#endif
addToMap("TPC Sector Start Hits", usageMap, *processors()->tpcTrackers[i].NStartHits(), processors()->tpcTrackers[i].NMaxStartHits());
addToMap("TPC Sector Tracklets", usageMap, *processors()->tpcTrackers[i].NTracklets(), processors()->tpcTrackers[i].NMaxTracklets());
Expand All @@ -152,18 +154,22 @@ void GPUChainTracking::PrintMemoryStatistics()
addToMap("TPC Sector TrackHits", usageMap, *processors()->tpcTrackers[i].NTrackHits(), processors()->tpcTrackers[i].NMaxTrackHits());
}
addToMap("TPC Clusterer Clusters", usageMap, mRec->MemoryScalers()->nTPCHits, mRec->MemoryScalers()->NTPCClusters(mRec->MemoryScalers()->nTPCdigits));
addToMap("TPC Tracks", usageMap, processors()->tpcMerger.NMergedTracks(), processors()->tpcMerger.NMaxTracks());
addToMap("TPC TrackHits", usageMap, processors()->tpcMerger.NMergedTrackClusters(), processors()->tpcMerger.NMaxMergedTrackClusters());
if (processors()->tpcMerger.Memory()) {
addToMap("TPC Tracks", usageMap, processors()->tpcMerger.NMergedTracks(), processors()->tpcMerger.NMaxTracks());
addToMap("TPC TrackHits", usageMap, processors()->tpcMerger.NMergedTrackClusters(), processors()->tpcMerger.NMaxMergedTrackClusters());
}

if (mRec->GetProcessingSettings().createO2Output) {
addToMap("TPC O2 Tracks", usageMap, processors()->tpcMerger.NOutputTracksTPCO2(), processors()->tpcMerger.NOutputTracksTPCO2());
addToMap("TPC O2 ClusRefs", usageMap, processors()->tpcMerger.NOutputClusRefsTPCO2(), processors()->tpcMerger.NOutputClusRefsTPCO2());
}

#ifdef GPUCA_TPC_GEOMETRY_O2
addToMap("TPC ComprCache HitsAttached", usageMap, processors()->tpcCompressor.mOutput->nAttachedClusters, processors()->tpcCompressor.mMaxTrackClusters);
addToMap("TPC ComprCache HitsUnattached", usageMap, processors()->tpcCompressor.mOutput->nUnattachedClusters, processors()->tpcCompressor.mMaxClustersInCache);
addToMap("TPC ComprCache Tracks", usageMap, processors()->tpcCompressor.mOutput->nTracks, processors()->tpcCompressor.mMaxTracks);
if (processors()->tpcCompressor.mOutput) {
addToMap("TPC ComprCache HitsAttached", usageMap, processors()->tpcCompressor.mOutput->nAttachedClusters, processors()->tpcCompressor.mMaxTrackClusters);
addToMap("TPC ComprCache HitsUnattached", usageMap, processors()->tpcCompressor.mOutput->nUnattachedClusters, processors()->tpcCompressor.mMaxClustersInCache);
addToMap("TPC ComprCache Tracks", usageMap, processors()->tpcCompressor.mOutput->nTracks, processors()->tpcCompressor.mMaxTracks);
}
#endif

for (auto& elem : usageMap) {
Expand All @@ -180,8 +186,10 @@ void GPUChainTracking::PrintMemoryRelations()
GPUInfo("MEMREL SectorTracks NCl %d NTrk %d", processors()->tpcTrackers[i].NHitsTotal(), *processors()->tpcTrackers[i].NTracks());
GPUInfo("MEMREL SectorTrackHits NCl %d NTrkH %d", processors()->tpcTrackers[i].NHitsTotal(), *processors()->tpcTrackers[i].NTrackHits());
}
GPUInfo("MEMREL Tracks NCl %d NTrk %d", processors()->tpcMerger.NMaxClusters(), processors()->tpcMerger.NMergedTracks());
GPUInfo("MEMREL TrackHitss NCl %d NTrkH %d", processors()->tpcMerger.NMaxClusters(), processors()->tpcMerger.NMergedTrackClusters());
if (processors()->tpcMerger.Memory()) {
GPUInfo("MEMREL Tracks NCl %d NTrk %d", processors()->tpcMerger.NMaxClusters(), processors()->tpcMerger.NMergedTracks());
GPUInfo("MEMREL TrackHitss NCl %d NTrkH %d", processors()->tpcMerger.NMaxClusters(), processors()->tpcMerger.NMergedTrackClusters());
}
}

void GPUChainTracking::PrepareKernelDebugOutput()
Expand Down
3 changes: 3 additions & 0 deletions GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,9 @@ int32_t GPUChainTracking::RunTPCTrackingSectors_internal()
GPUInfo("Sector %u, Number of tracks: %d", iSector, *trk.NTracks());
}
DoDebugAndDump(RecoStep::TPCSectorTracking, GPUChainTrackingDebugFlags::TPCSectorTracks, trk, &GPUTPCTracker::DumpTrackHits, *mDebugFile);
if (GetProcessingSettings().memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_INDIVIDUAL && !trk.MemoryReuseAllowed()) {
mRec->PopNonPersistentMemory(RecoStep::TPCSectorTracking, qStr2Tag("TPCSLTRK"), &trk);
}
});
mRec->SetNActiveThreadsOuterLoop(1);
if (error) {
Expand Down
Loading