Skip to content

Commit 01aa0c5

Browse files
committed
GPU Display: Speed up drawing clusters with many collisions
1 parent a027d11 commit 01aa0c5

File tree

2 files changed

+78
-60
lines changed

2 files changed

+78
-60
lines changed

GPU/GPUTracking/display/GPUDisplay.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -193,7 +193,7 @@ class GPUDisplay : public GPUDisplayInterface
193193
void SetCollisionColor(int32_t col);
194194
void updateConfig();
195195
void drawPointLinestrip(int32_t iSector, int32_t cid, int32_t id, int32_t id_limit = TRACK_TYPE_ID_LIMIT);
196-
vboList DrawClusters(int32_t iSector, int32_t select, uint32_t iCol);
196+
void DrawClusters(int32_t iSector);
197197
vboList DrawSpacePointsTRD(int32_t iSector, int32_t select, int32_t iCol);
198198
vboList DrawSpacePointsTOF(int32_t iSector, int32_t select, int32_t iCol);
199199
vboList DrawSpacePointsITS(int32_t iSector, int32_t select, int32_t iCol);
@@ -256,6 +256,7 @@ class GPUDisplay : public GPUDisplayInterface
256256
vecpod<vtx> mVertexBuffer[NSECTORS];
257257
vecpod<int32_t> mVertexBufferStart[NSECTORS];
258258
vecpod<uint32_t> mVertexBufferCount[NSECTORS];
259+
std::vector<std::array<uint32_t, N_POINTS_TYPE_TPC>> mClusterBufferSizeCache[NSECTORS];
259260

260261
std::unique_ptr<float4[]> mGlobalPosPtr;
261262
std::unique_ptr<float4[]> mGlobalPosPtrTRD;

GPU/GPUTracking/display/render/GPUDisplayDraw.cxx

Lines changed: 76 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -122,70 +122,91 @@ GPUDisplay::vboList GPUDisplay::DrawSpacePointsITS(int32_t iSector, int32_t sele
122122
return (vboList(startCount, mVertexBufferStart[iSector].size() - startCount, iSector));
123123
}
124124

125-
GPUDisplay::vboList GPUDisplay::DrawClusters(int32_t iSector, int32_t select, uint32_t iCol)
125+
void GPUDisplay::DrawClusters(int32_t iSector)
126126
{
127-
size_t startCount = mVertexBufferStart[iSector].size();
128-
size_t startCountInner = mVertexBuffer[iSector].size();
129-
if (mOverlayTFClusters.size() > 0 || iCol == 0 || mNCollissions) {
130-
const int32_t firstCluster = (mOverlayTFClusters.size() > 1 && iCol > 0) ? mOverlayTFClusters[iCol - 1][iSector] : 0;
131-
const int32_t lastCluster = (mOverlayTFClusters.size() > 1 && iCol + 1 < mOverlayTFClusters.size()) ? mOverlayTFClusters[iCol][iSector] : (mParam->par.earlyTpcTransform ? mIOPtrs->nClusterData[iSector] : mIOPtrs->clustersNative ? mIOPtrs->clustersNative->nClustersSector[iSector] : 0);
132-
[[maybe_unused]] const bool checkClusterCollision = mQA && mNCollissions && mOverlayTFClusters.size() == 0 && mIOPtrs->clustersNative && mIOPtrs->clustersNative->clustersMCTruth;
133-
for (int32_t cidInSector = firstCluster; cidInSector < lastCluster; cidInSector++) {
134-
const int32_t cid = GET_CID(iSector, cidInSector);
127+
std::vector<std::array<vecpod<vtx>, N_POINTS_TYPE_TPC>> vertexCache(mNCollissions);
128+
if (mClusterBufferSizeCache[iSector].size() < (uint32_t)mNCollissions) {
129+
mClusterBufferSizeCache[iSector].resize(mNCollissions);
130+
}
131+
for (int32_t iCol = 0; iCol < mNCollissions; iCol++) {
132+
for (int32_t i = 0; i < N_POINTS_TYPE_TPC; i++) {
133+
vertexCache[iCol][i].reserve(mClusterBufferSizeCache[iSector][iCol][i]);
134+
}
135+
}
136+
137+
uint32_t col = 0;
138+
const int32_t nClustersInSector = mParam->par.earlyTpcTransform ? mIOPtrs->nClusterData[iSector] : (mIOPtrs->clustersNative ? mIOPtrs->clustersNative->nClustersSector[iSector] : 0);
139+
[[maybe_unused]] const bool checkClusterCollision = mQA && mNCollissions && mOverlayTFClusters.size() == 0 && mIOPtrs->clustersNative && mIOPtrs->clustersNative->clustersMCTruth;
140+
for (int32_t cidInSector = 0; cidInSector < nClustersInSector; cidInSector++) {
141+
const int32_t cid = GET_CID(iSector, cidInSector);
135142
#ifdef GPUCA_TPC_GEOMETRY_O2
136-
if (checkClusterCollision) {
137-
const auto& labels = mIOPtrs->clustersNative->clustersMCTruth->getLabels(cid);
138-
if (labels.size() ? (iCol != mQA->GetMCLabelCol(labels[0])) : (iCol != 0)) {
139-
continue;
140-
}
141-
}
143+
if (checkClusterCollision) {
144+
const auto& labels = mIOPtrs->clustersNative->clustersMCTruth->getLabels(cid);
145+
col = labels.size() ? mQA->GetMCLabelCol(labels[0]) : 0;
146+
} else
142147
#endif
143-
if (mCfgH.hideUnmatchedClusters && mQA && mQA->SuppressHit(cid)) {
144-
continue;
148+
if (mOverlayTFClusters.size()) {
149+
while (col < mOverlayTFClusters.size() && cidInSector >= mOverlayTFClusters[col][iSector]) {
150+
col++;
145151
}
146-
bool draw = mGlobalPos[cid].w == select;
147-
148-
if (mCfgH.markAdjacentClusters) {
149-
const int32_t attach = mIOPtrs->mergedTrackHitAttachment[cid];
150-
if (attach) {
151-
if (mCfgH.markAdjacentClusters >= 32) {
152-
if (mQA && mQA->clusterRemovable(attach, mCfgH.markAdjacentClusters == 33)) {
153-
draw = select == tMARKED;
154-
}
155-
} else if ((mCfgH.markAdjacentClusters & 2) && (attach & gputpcgmmergertypes::attachTube)) {
156-
draw = select == tMARKED;
157-
} else if ((mCfgH.markAdjacentClusters & 1) && (attach & (gputpcgmmergertypes::attachGood | gputpcgmmergertypes::attachTube)) == 0) {
158-
draw = select == tMARKED;
159-
} else if ((mCfgH.markAdjacentClusters & 4) && (attach & gputpcgmmergertypes::attachGoodLeg) == 0) {
160-
draw = select == tMARKED;
161-
} else if ((mCfgH.markAdjacentClusters & 16) && (attach & gputpcgmmergertypes::attachHighIncl)) {
162-
draw = select == tMARKED;
163-
} else if (mCfgH.markAdjacentClusters & 8) {
164-
if (fabsf(mIOPtrs->mergedTracks[attach & gputpcgmmergertypes::attachTrackMask].GetParam().GetQPt()) > 20.f) {
165-
draw = select == tMARKED;
166-
}
152+
}
153+
if (mCfgH.hideUnmatchedClusters && mQA && mQA->SuppressHit(cid)) {
154+
continue;
155+
}
156+
int32_t select = mGlobalPos[cid].w;
157+
158+
if (mCfgH.markAdjacentClusters) {
159+
const int32_t attach = mIOPtrs->mergedTrackHitAttachment[cid];
160+
if (attach) {
161+
if (mCfgH.markAdjacentClusters >= 32) {
162+
if (mQA && mQA->clusterRemovable(attach, mCfgH.markAdjacentClusters == 33)) {
163+
select = tMARKED;
164+
}
165+
} else if ((mCfgH.markAdjacentClusters & 2) && (attach & gputpcgmmergertypes::attachTube)) {
166+
select = tMARKED;
167+
} else if ((mCfgH.markAdjacentClusters & 1) && (attach & (gputpcgmmergertypes::attachGood | gputpcgmmergertypes::attachTube)) == 0) {
168+
select = tMARKED;
169+
} else if ((mCfgH.markAdjacentClusters & 4) && (attach & gputpcgmmergertypes::attachGoodLeg) == 0) {
170+
select = tMARKED;
171+
} else if ((mCfgH.markAdjacentClusters & 16) && (attach & gputpcgmmergertypes::attachHighIncl)) {
172+
select = tMARKED;
173+
} else if (mCfgH.markAdjacentClusters & 8) {
174+
if (fabsf(mIOPtrs->mergedTracks[attach & gputpcgmmergertypes::attachTrackMask].GetParam().GetQPt()) > 20.f) {
175+
select = tMARKED;
167176
}
168177
}
169-
} else if (mCfgH.markClusters) {
170-
int16_t flags;
171-
if (mParam->par.earlyTpcTransform) {
172-
flags = mIOPtrs->clusterData[iSector][cidInSector].flags;
173-
} else {
174-
flags = mIOPtrs->clustersNative->clustersLinear[cid].getFlags();
175-
}
176-
const bool match = flags & mCfgH.markClusters;
177-
draw = (select == tMARKED) ? (match) : (draw && !match);
178-
} else if (mCfgH.markFakeClusters) {
179-
const bool fake = (mQA->HitAttachStatus(cid));
180-
draw = (select == tMARKED) ? (fake) : (draw && !fake);
181178
}
182-
if (draw) {
183-
mVertexBuffer[iSector].emplace_back(mGlobalPos[cid].x, mGlobalPos[cid].y * mYFactor, mCfgH.projectXY ? 0 : mGlobalPos[cid].z);
179+
} else if (mCfgH.markClusters) {
180+
int16_t flags;
181+
if (mParam->par.earlyTpcTransform) {
182+
flags = mIOPtrs->clusterData[iSector][cidInSector].flags;
183+
} else {
184+
flags = mIOPtrs->clustersNative->clustersLinear[cid].getFlags();
185+
}
186+
if (flags & mCfgH.markClusters) {
187+
select = tMARKED;
188+
}
189+
} else if (mCfgH.markFakeClusters) {
190+
if (mQA->HitAttachStatus(cid)) {
191+
select = tMARKED;
184192
}
185193
}
194+
vertexCache[col][select].emplace_back(mGlobalPos[cid].x, mGlobalPos[cid].y * mYFactor, mCfgH.projectXY ? 0 : mGlobalPos[cid].z);
195+
}
196+
197+
size_t startCountInner = mVertexBuffer[iSector].size();
198+
mVertexBuffer[iSector].resize(mVertexBuffer[iSector].size() + nClustersInSector);
199+
for (int32_t iCol = 0; iCol < mNCollissions; iCol++) {
200+
for (int32_t i = 0; i < N_POINTS_TYPE_TPC; i++) {
201+
uint32_t count = vertexCache[iCol][i].size();
202+
mClusterBufferSizeCache[iSector][iCol][i] = std::max(mClusterBufferSizeCache[iSector][iCol][i], count);
203+
memcpy((void*)&mVertexBuffer[iSector][startCountInner], (const void*)vertexCache[iCol][i].data(), count * sizeof(vertexCache[iCol][i][0]));
204+
size_t startCount = mVertexBufferStart[iSector].size();
205+
insertVertexList(iSector, startCountInner, startCountInner + count);
206+
startCountInner += count;
207+
mGlDLPoints[iSector][i][iCol] = vboList(startCount, mVertexBufferStart[iSector].size() - startCount, iSector);
208+
}
186209
}
187-
insertVertexList(iSector, startCountInner, mVertexBuffer[iSector].size());
188-
return (vboList(startCount, mVertexBufferStart[iSector].size() - startCount, iSector));
189210
}
190211

191212
GPUDisplay::vboList GPUDisplay::DrawLinks(const GPUTPCTracker& tracker, int32_t id, bool dodown)
@@ -922,11 +943,7 @@ size_t GPUDisplay::DrawGLScene_updateVertexList()
922943
}
923944

924945
tbb::parallel_for(0, NSECTORS, [&](int32_t iSector) {
925-
for (int32_t i = 0; i < N_POINTS_TYPE_TPC; i++) {
926-
for (int32_t iCol = 0; iCol < mNCollissions; iCol++) {
927-
mGlDLPoints[iSector][i][iCol] = DrawClusters(iSector, i, iCol);
928-
}
929-
} // clang-format off
946+
DrawClusters(iSector); // clang-format off
930947
}, tbb::simple_partitioner()); // clang-format on
931948
if (timer.IsRunning()) {
932949
GPUInfo("Display Time: Vertex Clusters:\t\t\t%6.0f us", timer.GetCurrentElapsedTime(true) * 1e6);

0 commit comments

Comments
 (0)