Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion GPU/GPUTracking/Base/GPUReconstruction.h
Original file line number Diff line number Diff line change
Expand Up @@ -378,7 +378,7 @@ class GPUReconstruction
std::vector<uint16_t> res;
};
struct alignedDeleter {
void operator()(void* ptr) { ::operator delete(ptr, std::align_val_t(GPUCA_BUFFER_ALIGNMENT)); };
void operator()(void* ptr) { ::operator delete[](ptr, std::align_val_t(GPUCA_BUFFER_ALIGNMENT)); };
};
std::unordered_map<GPUMemoryReuse::ID, MemoryReuseMeta> mMemoryReuse1to1;
std::vector<std::tuple<void*, void*, size_t, size_t, uint64_t>> mNonPersistentMemoryStack; // hostPoolAddress, devicePoolAddress, individualAllocationCount, directIndividualAllocationCound, tag
Expand Down
2 changes: 1 addition & 1 deletion GPU/GPUTracking/DataCompression/GPUTPCCompression.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ void GPUTPCCompression::SetMaxData(const GPUTrackingInOutPointers& io)
mMaxClusters = io.clustersNative->nClustersTotal;
mMaxClusterFactorBase1024 = mMaxClusters > 100000000 ? mRec->MemoryScalers()->NTPCUnattachedHitsBase1024(mRec->GetParam().rec.tpc.rejectionStrategy) : 1024;
mMaxClustersInCache = mMaxClusters * mMaxClusterFactorBase1024 / 1024;
mMaxTrackClusters = mRec->GetConstantMem().tpcMerger.NOutputTrackClusters(); // TODO: Why is this not using ioPtrs? Could remove GPUConstantMem.h include
mMaxTrackClusters = mRec->GetConstantMem().tpcMerger.NMergedTrackClusters(); // TODO: Why is this not using ioPtrs? Could remove GPUConstantMem.h include
mMaxTracks = mRec->GetConstantMem().tpcMerger.NMergedTracks();
if (mMaxClusters % 16) {
mMaxClusters += 16 - (mMaxClusters % 16);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ void GPUChainTracking::PrintMemoryStatistics()
}
addToMap("TPC Clusterer Clusters", usageMap, mRec->MemoryScalers()->nTPCHits, mRec->MemoryScalers()->NTPCClusters(mRec->MemoryScalers()->nTPCdigits));
addToMap("TPC Tracks", usageMap, processors()->tpcMerger.NMergedTracks(), processors()->tpcMerger.NMaxTracks());
addToMap("TPC TrackHits", usageMap, processors()->tpcMerger.NOutputTrackClusters(), processors()->tpcMerger.NMaxOutputTrackClusters());
addToMap("TPC TrackHits", usageMap, processors()->tpcMerger.NMergedTrackClusters(), processors()->tpcMerger.NMaxMergedTrackClusters());

if (mRec->GetProcessingSettings().createO2Output) {
addToMap("TPC O2 Tracks", usageMap, processors()->tpcMerger.NOutputTracksTPCO2(), processors()->tpcMerger.NOutputTracksTPCO2());
Expand Down Expand Up @@ -182,7 +182,7 @@ void GPUChainTracking::PrintMemoryRelations()
GPUInfo("MEMREL SectorTrackHits NCl %d NTrkH %d", processors()->tpcTrackers[i].NHitsTotal(), *processors()->tpcTrackers[i].NTrackHits());
}
GPUInfo("MEMREL Tracks NCl %d NTrk %d", processors()->tpcMerger.NMaxClusters(), processors()->tpcMerger.NMergedTracks());
GPUInfo("MEMREL TrackHitss NCl %d NTrkH %d", processors()->tpcMerger.NMaxClusters(), processors()->tpcMerger.NOutputTrackClusters());
GPUInfo("MEMREL TrackHitss NCl %d NTrkH %d", processors()->tpcMerger.NMaxClusters(), processors()->tpcMerger.NMergedTrackClusters());
}

void GPUChainTracking::PrepareKernelDebugOutput()
Expand Down
10 changes: 5 additions & 5 deletions GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -261,9 +261,9 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput)
if (param().dodEdxEnabled) {
GPUMemCpy(RecoStep::TPCMerging, Merger.MergedTracksdEdx(), MergerShadowAll.MergedTracksdEdx(), Merger.NMergedTracks() * sizeof(*Merger.MergedTracksdEdx()), outputStream, 0);
}
GPUMemCpy(RecoStep::TPCMerging, Merger.Clusters(), MergerShadowAll.Clusters(), Merger.NOutputTrackClusters() * sizeof(*Merger.Clusters()), outputStream, 0);
GPUMemCpy(RecoStep::TPCMerging, Merger.Clusters(), MergerShadowAll.Clusters(), Merger.NMergedTrackClusters() * sizeof(*Merger.Clusters()), outputStream, 0);
if (param().par.earlyTpcTransform) {
GPUMemCpy(RecoStep::TPCMerging, Merger.ClustersXYZ(), MergerShadowAll.ClustersXYZ(), Merger.NOutputTrackClusters() * sizeof(*Merger.ClustersXYZ()), outputStream, 0);
GPUMemCpy(RecoStep::TPCMerging, Merger.ClustersXYZ(), MergerShadowAll.ClustersXYZ(), Merger.NMergedTrackClusters() * sizeof(*Merger.ClustersXYZ()), outputStream, 0);
}
GPUMemCpy(RecoStep::TPCMerging, Merger.ClusterAttachment(), MergerShadowAll.ClusterAttachment(), Merger.NMaxClusters() * sizeof(*Merger.ClusterAttachment()), outputStream, 0);
}
Expand Down Expand Up @@ -330,7 +330,7 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput)
mIOPtrs.nMergedTracks = Merger.NMergedTracks();
mIOPtrs.mergedTrackHits = Merger.Clusters();
mIOPtrs.mergedTrackHitsXYZ = Merger.ClustersXYZ();
mIOPtrs.nMergedTrackHits = Merger.NOutputTrackClusters();
mIOPtrs.nMergedTrackHits = Merger.NMergedTrackClusters();
mIOPtrs.mergedTrackHitAttachment = Merger.ClusterAttachment();
mIOPtrs.mergedTrackHitStates = Merger.ClusterStateExt();
mIOPtrs.outputTracksTPCO2 = Merger.OutputTracksTPCO2();
Expand All @@ -344,7 +344,7 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput)
processorsShadow()->ioPtrs.nMergedTracks = Merger.NMergedTracks();
processorsShadow()->ioPtrs.mergedTrackHits = MergerShadow.Clusters();
processorsShadow()->ioPtrs.mergedTrackHitsXYZ = MergerShadow.ClustersXYZ();
processorsShadow()->ioPtrs.nMergedTrackHits = Merger.NOutputTrackClusters();
processorsShadow()->ioPtrs.nMergedTrackHits = Merger.NMergedTrackClusters();
processorsShadow()->ioPtrs.mergedTrackHitAttachment = MergerShadow.ClusterAttachment();
processorsShadow()->ioPtrs.mergedTrackHitStates = MergerShadow.ClusterStateExt();
processorsShadow()->ioPtrs.outputTracksTPCO2 = MergerShadow.OutputTracksTPCO2();
Expand All @@ -355,7 +355,7 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput)
}

if (GetProcessingSettings().debugLevel >= 2) {
GPUInfo("TPC Merger Finished (output clusters %d / input clusters %d)", Merger.NOutputTrackClusters(), Merger.NClusters());
GPUInfo("TPC Merger Finished (output clusters %d / input clusters %d)", Merger.NMergedTrackClusters(), Merger.NClusters());
}
return 0;
}
41 changes: 20 additions & 21 deletions GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -372,9 +372,9 @@ void* GPUTPCGMMerger::SetPointersOutput(void* mem)
computePointerWithAlignment(mem, mMergedTracksdEdxAlt, mNMaxTracks);
}
}
computePointerWithAlignment(mem, mClusters, mNMaxOutputTrackClusters);
computePointerWithAlignment(mem, mClusters, mNMaxMergedTrackClusters);
if (mRec->GetParam().par.earlyTpcTransform) {
computePointerWithAlignment(mem, mClustersXYZ, mNMaxOutputTrackClusters);
computePointerWithAlignment(mem, mClustersXYZ, mNMaxMergedTrackClusters);
}
computePointerWithAlignment(mem, mClusterAttachment, mNMaxClusters);
return mem;
Expand Down Expand Up @@ -446,7 +446,7 @@ void GPUTPCGMMerger::SetMaxData(const GPUTrackingInOutPointers& io)
mNMaxSingleSectorTracks = ntrk;
}
}
mNMaxOutputTrackClusters = mRec->MemoryScalers()->NTPCMergedTrackHits(mNClusters);
mNMaxMergedTrackClusters = mRec->MemoryScalers()->NTPCMergedTrackHits(mNClusters);
if (CAMath::Abs(Param().polynomialField.GetNominalBz()) < (gpu_common_constants::kZeroFieldCut * gpu_common_constants::kCLight)) {
mNMaxTracks = mRec->MemoryScalers()->getValue(mNTotalSectorTracks, mNTotalSectorTracks); // 0 magnetic field
} else {
Expand Down Expand Up @@ -1354,14 +1354,14 @@ GPUd() void GPUTPCGMMerger::MergeCE(int32_t nBlocks, int32_t nThreads, int32_t i
continue;
}

uint32_t newRef = CAMath::AtomicAdd(&mMemory->nOutputTrackClusters, trk[0]->NClusters() + trk[1]->NClusters());
if (newRef + trk[0]->NClusters() + trk[1]->NClusters() >= mNMaxOutputTrackClusters) {
raiseError(GPUErrors::ERROR_MERGER_CE_HIT_OVERFLOW, newRef + trk[0]->NClusters() + trk[1]->NClusters(), mNMaxOutputTrackClusters);
for (uint32_t k = newRef; k < mNMaxOutputTrackClusters; k++) {
uint32_t newRef = CAMath::AtomicAdd(&mMemory->nMergedTrackClusters, trk[0]->NClusters() + trk[1]->NClusters());
if (newRef + trk[0]->NClusters() + trk[1]->NClusters() >= mNMaxMergedTrackClusters) {
raiseError(GPUErrors::ERROR_MERGER_CE_HIT_OVERFLOW, newRef + trk[0]->NClusters() + trk[1]->NClusters(), mNMaxMergedTrackClusters);
for (uint32_t k = newRef; k < mNMaxMergedTrackClusters; k++) {
mClusters[k].num = 0;
mClusters[k].state = 0;
}
CAMath::AtomicExch(&mMemory->nOutputTrackClusters, mNMaxOutputTrackClusters);
CAMath::AtomicExch(&mMemory->nMergedTrackClusters, mNMaxMergedTrackClusters);
return;
}

Expand Down Expand Up @@ -1513,7 +1513,6 @@ GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThread
GPUTPCGMSectorTrack* trackParts[kMaxParts];

for (int32_t itr = iBlock * nThreads + iThread; itr < SectorTrackInfoLocalTotal(); itr += nThreads * nBlocks) {

GPUTPCGMSectorTrack& track = mSectorTrackInfos[itr];

if (track.PrevSegmentNeighbour() >= 0) {
Expand Down Expand Up @@ -1711,20 +1710,20 @@ GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThread
nHits = nFilteredHits;
}

const uint32_t iOutTrackFirstCluster = CAMath::AtomicAdd(&mMemory->nOutputTrackClusters, (uint32_t)nHits);
if (iOutTrackFirstCluster >= mNMaxOutputTrackClusters) {
raiseError(GPUErrors::ERROR_MERGER_HIT_OVERFLOW, iOutTrackFirstCluster, mNMaxOutputTrackClusters);
CAMath::AtomicExch(&mMemory->nOutputTrackClusters, mNMaxOutputTrackClusters);
const uint32_t iMergedTrackFirstCluster = CAMath::AtomicAdd(&mMemory->nMergedTrackClusters, (uint32_t)nHits);
if (iMergedTrackFirstCluster + nHits > mNMaxMergedTrackClusters) {
raiseError(GPUErrors::ERROR_MERGER_HIT_OVERFLOW, iMergedTrackFirstCluster, mNMaxMergedTrackClusters);
CAMath::AtomicExch(&mMemory->nMergedTrackClusters, mNMaxMergedTrackClusters);
continue;
}

GPUTPCGMMergedTrackHit* const cl = mClusters + iOutTrackFirstCluster;
GPUTPCGMMergedTrackHit* const cl = mClusters + iMergedTrackFirstCluster;

for (int32_t i = 0; i < nHits; i++) {
uint8_t state;
if (Param().par.earlyTpcTransform) {
const GPUTPCClusterData& c = GetConstantMem()->tpcTrackers[trackClusters[i].sector].ClusterData()[trackClusters[i].id - GetConstantMem()->tpcTrackers[trackClusters[i].sector].Data().ClusterIdOffset()];
GPUTPCGMMergedTrackHitXYZ* const clXYZ = mClustersXYZ + iOutTrackFirstCluster;
GPUTPCGMMergedTrackHitXYZ* const clXYZ = mClustersXYZ + iMergedTrackFirstCluster;
clXYZ[i].x = c.x;
clXYZ[i].y = c.y;
clXYZ[i].z = c.z;
Expand Down Expand Up @@ -1759,13 +1758,13 @@ GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThread
mergedTrack.SetLooper(leg > 0);
mergedTrack.SetLegs(leg);
mergedTrack.SetNClusters(nHits);
mergedTrack.SetFirstClusterRef(iOutTrackFirstCluster);
mergedTrack.SetFirstClusterRef(iMergedTrackFirstCluster);
GPUTPCGMTrackParam& p1 = mergedTrack.Param();
const GPUTPCGMSectorTrack& p2 = *trackParts[firstTrackIndex];
mergedTrack.SetCSide(p2.CSide());

GPUTPCGMBorderTrack b;
const float toX = Param().par.earlyTpcTransform ? mClustersXYZ[iOutTrackFirstCluster].x : GPUTPCGeometry::Row2X(cl[0].row);
const float toX = Param().par.earlyTpcTransform ? mClustersXYZ[iMergedTrackFirstCluster].x : GPUTPCGeometry::Row2X(cl[0].row);
if (p2.TransportToX(this, toX, Param().bzCLight, b, GPUCA_MAX_SIN_PHI, false)) {
p1.X() = toX;
p1.Y() = b.Par()[0];
Expand Down Expand Up @@ -1796,13 +1795,13 @@ GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThread
if (Param().rec.tpc.mergeCE) {
bool CEside;
if (Param().par.earlyTpcTransform) {
const GPUTPCGMMergedTrackHitXYZ* const clXYZ = mClustersXYZ + iOutTrackFirstCluster;
const GPUTPCGMMergedTrackHitXYZ* const clXYZ = mClustersXYZ + iMergedTrackFirstCluster;
CEside = (mergedTrack.CSide() != 0) ^ (clXYZ[0].z > clXYZ[nHits - 1].z);
} else {
auto& cls = mConstantMem->ioPtrs.clustersNative->clustersLinear;
CEside = cls[cl[0].num].getTime() < cls[cl[nHits - 1].num].getTime();
}
MergeCEFill(trackParts[CEside ? lastTrackIndex : firstTrackIndex], cl[CEside ? (nHits - 1) : 0], Param().par.earlyTpcTransform ? &(mClustersXYZ + iOutTrackFirstCluster)[CEside ? (nHits - 1) : 0] : nullptr, iOutputTrack);
MergeCEFill(trackParts[CEside ? lastTrackIndex : firstTrackIndex], cl[CEside ? (nHits - 1) : 0], Param().par.earlyTpcTransform ? &(mClustersXYZ + iMergedTrackFirstCluster)[CEside ? (nHits - 1) : 0] : nullptr, iOutputTrack);
}
} // itr
}
Expand Down Expand Up @@ -1855,7 +1854,7 @@ GPUd() void GPUTPCGMMerger::PrepareClustersForFit1(int32_t nBlocks, int32_t nThr

GPUd() void GPUTPCGMMerger::PrepareClustersForFit2(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread)
{
for (uint32_t i = iBlock * nThreads + iThread; i < mMemory->nOutputTrackClusters; i += nBlocks * nThreads) {
for (uint32_t i = iBlock * nThreads + iThread; i < mMemory->nMergedTrackClusters; i += nBlocks * nThreads) {
if (mSharedCount[mClusters[i].num] > 1) {
mClusters[i].state |= GPUTPCGMMergedTrackHit::flagShared;
}
Expand All @@ -1876,7 +1875,7 @@ GPUd() void GPUTPCGMMerger::Finalize0(int32_t nBlocks, int32_t nThreads, int32_t
for (uint32_t i = iBlock * nThreads + iThread; i < mMemory->nMergedTracks; i += nThreads * nBlocks) {
mTrackSort[mTrackOrderAttach[i]] = i;
}
for (uint32_t i = iBlock * nThreads + iThread; i < mMemory->nOutputTrackClusters; i += nThreads * nBlocks) {
for (uint32_t i = iBlock * nThreads + iThread; i < mMemory->nMergedTrackClusters; i += nThreads * nBlocks) {
mClusterAttachment[mClusters[i].num] = 0; // Reset adjacent attachment for attached clusters, set correctly below
}
}
Expand Down
8 changes: 4 additions & 4 deletions GPU/GPUTracking/Merger/GPUTPCGMMerger.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ class GPUTPCGMMerger : public GPUProcessor
GPUAtomic(uint32_t) nLoopData;
GPUAtomic(uint32_t) nUnpackedTracks;
GPUAtomic(uint32_t) nMergedTracks;
GPUAtomic(uint32_t) nOutputTrackClusters;
GPUAtomic(uint32_t) nMergedTrackClusters;
GPUAtomic(uint32_t) nO2Tracks;
GPUAtomic(uint32_t) nO2ClusRefs;
const GPUTPCTrack* firstExtrapolatedTracks[NSECTORS];
Expand Down Expand Up @@ -113,8 +113,8 @@ class GPUTPCGMMerger : public GPUProcessor
GPUhdi() uint32_t NClusters() const { return mNClusters; }
GPUhdi() uint32_t NMaxClusters() const { return mNMaxClusters; }
GPUhdi() uint32_t NMaxTracks() const { return mNMaxTracks; }
GPUhdi() uint32_t NMaxOutputTrackClusters() const { return mNMaxOutputTrackClusters; }
GPUhdi() uint32_t NOutputTrackClusters() const { return mMemory->nOutputTrackClusters; }
GPUhdi() uint32_t NMaxMergedTrackClusters() const { return mNMaxMergedTrackClusters; }
GPUhdi() uint32_t NMergedTrackClusters() const { return mMemory->nMergedTrackClusters; }
GPUhdi() const GPUTPCGMMergedTrackHit* Clusters() const { return mClusters; }
GPUhdi() GPUTPCGMMergedTrackHit* Clusters() { return (mClusters); }
GPUhdi() const GPUTPCGMMergedTrackHitXYZ* ClustersXYZ() const { return mClustersXYZ; }
Expand Down Expand Up @@ -249,7 +249,7 @@ class GPUTPCGMMerger : public GPUProcessor
uint32_t mNTotalSectorTracks = 0; // maximum number of incoming sector tracks
uint32_t mNMaxTracks = 0; // maximum number of output tracks
uint32_t mNMaxSingleSectorTracks = 0; // max N tracks in one sector
uint32_t mNMaxOutputTrackClusters = 0; // max number of clusters in output tracks (double-counting shared clusters)
uint32_t mNMaxMergedTrackClusters = 0; // max number of clusters in output tracks (double-counting shared clusters)
uint32_t mNMaxClusters = 0; // max total unique clusters (in event)
uint32_t mNMaxLooperMatches = 0; // Maximum number of candidate pairs for looper matching

Expand Down
3 changes: 0 additions & 3 deletions GPU/GPUTracking/SectorTracker/GPUTPCTrack.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,6 @@ class GPUTPCTrack
GPUhd() static int32_t GetSize(int32_t nClust) { return sizeof(GPUTPCTrack) + nClust * sizeof(GPUTPCSectorOutCluster); }
GPUhd() const GPUTPCTrack* GetNextTrack() const { return (const GPUTPCTrack*)(((char*)this) + GetSize(mNHits)); }
GPUhd() GPUTPCTrack* NextTrack() { return (GPUTPCTrack*)(((char*)this) + GetSize(mNHits)); }
GPUhd() void SetOutTrackCluster(int32_t i, const GPUTPCSectorOutCluster& v) { ((GPUTPCSectorOutCluster*)((char*)this + sizeof(*this)))[i] = v; }
GPUhd() const GPUTPCSectorOutCluster* OutTrackClusters() const { return (const GPUTPCSectorOutCluster*)((char*)this + sizeof(*this)); }
GPUhd() const GPUTPCSectorOutCluster& OutTrackCluster(int32_t i) const { return OutTrackClusters()[i]; }

private:
int32_t mFirstHitID; // index of the first track cell in the track->cell pointer array
Expand Down