Skip to content

Commit 408bae4

Browse files
committed
GPU: Add sorting of tracks of attached compressed clusters in deterministic mode
1 parent edea164 commit 408bae4

File tree

4 files changed

+61
-12
lines changed

4 files changed

+61
-12
lines changed

GPU/GPUTracking/DataCompression/GPUTPCCompression.cxx

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,7 @@ void GPUTPCCompression::DumpCompressedClusters(std::ostream& out)
140140
for (uint32_t i = 0; i < NSECTORS; i++) {
141141
out << "Sector " << i << ": ";
142142
for (uint32_t j = 0; j < GPUCA_ROW_COUNT; j++) {
143-
out << O.nSliceRowClusters[i * GPUCA_ROW_COUNT + j] << ", ";
143+
out << (O.nSliceRowClusters ? O.nSliceRowClusters[i * GPUCA_ROW_COUNT + j] : 0) << ", ";
144144
}
145145
out << "\n";
146146
}
@@ -153,18 +153,20 @@ void GPUTPCCompression::DumpCompressedClusters(std::ostream& out)
153153
}
154154
out << "\n\nUnattached Clusters\n";
155155
uint32_t offset = 0;
156-
for (uint32_t i = 0; i < NSECTORS; i++) {
157-
for (uint32_t j = 0; j < GPUCA_ROW_COUNT; j++) {
158-
out << "Sector " << i << " Row " << j << ": ";
159-
for (uint32_t k = 0; k < O.nSliceRowClusters[i * GPUCA_ROW_COUNT + j]; k++) {
160-
if (k && k % 10 == 0) {
161-
out << "\n ";
156+
if (O.nSliceRowClusters) {
157+
for (uint32_t i = 0; i < NSECTORS; i++) {
158+
for (uint32_t j = 0; j < GPUCA_ROW_COUNT; j++) {
159+
out << "Sector " << i << " Row " << j << ": ";
160+
for (uint32_t k = 0; k < O.nSliceRowClusters[i * GPUCA_ROW_COUNT + j]; k++) {
161+
if (k && k % 10 == 0) {
162+
out << "\n ";
163+
}
164+
const uint32_t l = k + offset;
165+
out << "[" << (uint32_t)O.qTotU[l] << ", " << (uint32_t)O.qMaxU[l] << ", " << (uint32_t)O.flagsU[l] << ", " << (int32_t)O.padDiffU[l] << ", " << (int32_t)O.timeDiffU[l] << ", " << (uint32_t)O.sigmaPadU[l] << ", " << (uint32_t)O.sigmaTimeU[l] << "] ";
162166
}
163-
const uint32_t l = k + offset;
164-
out << "[" << (uint32_t)O.qTotU[l] << ", " << (uint32_t)O.qMaxU[l] << ", " << (uint32_t)O.flagsU[l] << ", " << (int32_t)O.padDiffU[l] << ", " << (int32_t)O.timeDiffU[l] << ", " << (uint32_t)O.sigmaPadU[l] << ", " << (uint32_t)O.sigmaTimeU[l] << "] ";
167+
offset += O.nSliceRowClusters[i * GPUCA_ROW_COUNT + j];
168+
out << "\n";
165169
}
166-
offset += O.nSliceRowClusters[i * GPUCA_ROW_COUNT + j];
167-
out << "\n";
168170
}
169171
}
170172
out << "\n\nAttached Clusters\n";
@@ -175,7 +177,7 @@ void GPUTPCCompression::DumpCompressedClusters(std::ostream& out)
175177
if (k && k % 10 == 0) {
176178
out << "\n ";
177179
}
178-
const uint32_t l1 = k + offset, l2 = k + offset - i;
180+
const uint32_t l1 = offset + k, l2 = offset - i + k - 1;
179181
out << "[";
180182
if (k) {
181183
out << (int32_t)O.rowDiffA[l2] << ", " << (int32_t)O.sliceLegDiffA[l2] << ", " << (uint32_t)O.padResA[l2] << ", " << (uint32_t)O.timeResA[l2] << ", ";

GPU/GPUTracking/Global/GPUChainTracking.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -235,6 +235,7 @@ class GPUChainTracking : public GPUChain
235235
void PrintDebugOutput();
236236
void PrintOutputStat();
237237
static void DumpClusters(std::ostream& out, const o2::tpc::ClusterNativeAccess* clusters);
238+
static void DebugSortCompressedClusters(o2::tpc::CompressedClustersFlat* cls);
238239

239240
bool ValidateSteps();
240241
bool ValidateSettings();

GPU/GPUTracking/Global/GPUChainTrackingCompression.cxx

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -203,6 +203,10 @@ int32_t GPUChainTracking::RunTPCCompression()
203203
((GPUChainTracking*)GetNextChainInQueue())->mRec->BlockStackedMemory(mRec);
204204
}
205205
mRec->PopNonPersistentMemory(RecoStep::TPCCompression, qStr2Tag("TPCCOMPR"));
206+
if (GetProcessingSettings().deterministicGPUReconstruction) {
207+
SynchronizeGPU();
208+
DebugSortCompressedClusters(Compressor.mOutputFlat);
209+
}
206210
DoDebugAndDump(RecoStep::TPCCompression, GPUChainTrackingDebugFlags::TPCCompressedClusters, Compressor, &GPUTPCCompression::DumpCompressedClusters, *mDebugFile);
207211
return 0;
208212
}

GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
#include <map>
2121
#include <memory>
2222
#include <string>
23+
#include <numeric>
2324

2425
#ifdef GPUCA_TRACKLET_CONSTRUCTOR_DO_PROFILE
2526
#include "bitmapfile.h"
@@ -348,3 +349,44 @@ void GPUChainTracking::DumpClusters(std::ostream& out, const o2::tpc::ClusterNat
348349
}
349350
}
350351
}
352+
353+
void GPUChainTracking::DebugSortCompressedClusters(o2::tpc::CompressedClustersFlat* cls)
354+
{
355+
o2::tpc::CompressedClusters c = *cls;
356+
std::vector<uint32_t> sorted(c.nTracks), offsets(c.nTracks);
357+
std::iota(sorted.begin(), sorted.end(), 0);
358+
auto sorter = [&c](const auto a, const auto b) {
359+
return std::tie(c.sliceA[a], c.rowA[a], c.timeA[a], c.padA[a], c.qPtA[a]) <
360+
std::tie(c.sliceA[b], c.rowA[b], c.timeA[b], c.padA[b], c.qPtA[b]);
361+
};
362+
std::sort(sorted.begin(), sorted.end(), sorter);
363+
uint32_t offset = 0;
364+
for (uint32_t i = 0; i < c.nTracks; i++) {
365+
offsets[i] = offset;
366+
offset += c.nTrackClusters[i];
367+
}
368+
369+
auto sortArray = [&c, &sorted, &offsets](auto* src, size_t totalSize, auto getOffset, auto getSize) {
370+
auto buf = std::make_unique<std::remove_reference_t<decltype(src[0])>[]>(totalSize);
371+
memcpy(buf.get(), src, totalSize * sizeof(*src));
372+
uint32_t targetOffset = 0;
373+
for (uint32_t i = 0; i < c.nTracks; i++) {
374+
const uint32_t j = sorted[i];
375+
memcpy(src + targetOffset, buf.get() + getOffset(offsets[j], j), getSize(j) * sizeof(*src));
376+
targetOffset += getSize(j);
377+
}
378+
};
379+
auto sortMultiple = [&sortArray](size_t totalSize, auto getOffset, auto getSize, auto&&... arrays) {
380+
(..., sortArray(std::forward<decltype(arrays)>(arrays), totalSize, getOffset, getSize));
381+
};
382+
auto getFullOffset = [](uint32_t off, uint32_t ind) { return off; };
383+
auto getReducedOffset = [](uint32_t off, uint32_t ind) { return off - ind; };
384+
auto getIndex = [](uint32_t off, uint32_t ind) { return ind; };
385+
auto getN = [&c](uint32_t j) { return c.nTrackClusters[j]; };
386+
auto getN1 = [&c](uint32_t j) { return c.nTrackClusters[j] - 1; };
387+
auto get1 = [](uint32_t j) { return 1; };
388+
389+
sortMultiple(c.nAttachedClusters, getFullOffset, getN, c.qTotA, c.qMaxA, c.flagsA, c.sigmaPadA, c.sigmaTimeA);
390+
sortMultiple(c.nAttachedClustersReduced, getReducedOffset, getN1, c.rowDiffA, c.sliceLegDiffA, c.padResA, c.timeResA);
391+
sortMultiple(c.nTracks, getIndex, get1, c.qPtA, c.rowA, c.sliceA, c.timeA, c.padA, c.nTrackClusters); // NOTE: This must be last, since nTrackClusters is used for handling the arrays above!
392+
}

0 commit comments

Comments
 (0)