Skip to content

Commit 19fb1fd

Browse files
mconcaschiarazampolli
authored andcommitted
ITS-Tracking: introduce multi-ROF seeding vertexer (#13323)
* Add multi rof vertexer idea * More vertices is better than less * Improve tracklet validation and manage late vertices * Fix GPU compilation * Fix leak and bugs * Add vertices in both rofs * Fix rebasing * Fix non-deltaRof behaviour * Fix multiple iterations * Refactor tracklets and fix second iteration with multirof (cherry picked from commit ba9e426)
1 parent c99385e commit 19fb1fd

File tree

22 files changed

+524
-343
lines changed

22 files changed

+524
-343
lines changed

DataFormats/Detectors/Common/src/CTFHeader.cxx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ using DetID = o2::detectors::DetID;
1818
/// describe itsel as a string
1919
std::string CTFHeader::describe() const
2020
{
21-
return fmt::format("Run:{:07d} TF:{} Orbit:{:08d} CteationTime:{} Detectors: {}", run, tfCounter, firstTForbit, creationTime, DetID::getNames(detectors));
21+
return fmt::format("Run:{:07d} TF:{} Orbit:{:08d} CreationTime:{} Detectors: {}", run, tfCounter, firstTForbit, creationTime, DetID::getNames(detectors));
2222
}
2323

2424
void CTFHeader::print() const

DataFormats/Reconstruction/include/ReconstructionDataFormats/Vertex.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -146,8 +146,8 @@ class Vertex : public VertexBase
146146
GPUd() void setChi2(float v) { mChi2 = v; }
147147
GPUd() float getChi2() const { return mChi2; }
148148

149-
GPUd() const Stamp& getTimeStamp() const { return mTimeStamp; }
150-
GPUd() Stamp& getTimeStamp() { return mTimeStamp; }
149+
GPUhd() const Stamp& getTimeStamp() const { return mTimeStamp; }
150+
GPUhd() Stamp& getTimeStamp() { return mTimeStamp; }
151151
GPUd() void setTimeStamp(const Stamp& v) { mTimeStamp = v; }
152152

153153
protected:

DataFormats/common/include/CommonDataFormat/TimeStamp.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ class TimeStamp
2828
GPUhdDefault() TimeStamp() CON_DEFAULT;
2929
GPUhdDefault() ~TimeStamp() CON_DEFAULT;
3030
GPUdi() TimeStamp(T time) { mTimeStamp = time; }
31-
GPUdi() T getTimeStamp() const { return mTimeStamp; }
31+
GPUhdi() T getTimeStamp() const { return mTimeStamp; }
3232
GPUdi() void setTimeStamp(T t) { mTimeStamp = t; }
3333
GPUdi() bool operator==(const TimeStamp<T>& t) const { return mTimeStamp == t.mTimeStamp; }
3434

Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TimeFrameGPU.h

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -210,15 +210,15 @@ class TimeFrameGPU : public TimeFrame
210210
/// interface
211211
int getNClustersInRofSpan(const int, const int, const int) const;
212212
IndexTableUtils* getDeviceIndexTableUtils() { return mIndexTableUtilsDevice; }
213-
int* getDeviceROframesClusters(const int layer) { return mROframesClustersDevice[layer]; }
213+
int* getDeviceROFramesClusters(const int layer) { return mROFramesClustersDevice[layer]; }
214214
std::vector<std::vector<Vertex>>& getVerticesInChunks() { return mVerticesInChunks; }
215215
std::vector<std::vector<int>>& getNVerticesInChunks() { return mNVerticesInChunks; }
216216
std::vector<o2::its::TrackITSExt>& getTrackITSExt() { return mTrackITSExt; }
217217
std::vector<std::vector<o2::MCCompLabel>>& getLabelsInChunks() { return mLabelsInChunks; }
218218
int getNAllocatedROFs() const { return mNrof; } // Allocated means maximum nROF for each chunk while populated is the number of loaded ones.
219219
StaticTrackingParameters<nLayers>* getDeviceTrackingParameters() { return mTrackingParamsDevice; }
220220
Vertex* getDeviceVertices() { return mVerticesDevice; }
221-
int* getDeviceROframesPV() { return mROframesPVDevice; }
221+
int* getDeviceROFramesPV() { return mROFramesPVDevice; }
222222
unsigned char* getDeviceUsedClusters(const int);
223223
const o2::base::Propagator* getChainPropagator();
224224

@@ -251,10 +251,10 @@ class TimeFrameGPU : public TimeFrame
251251
// Device pointers
252252
StaticTrackingParameters<nLayers>* mTrackingParamsDevice;
253253
IndexTableUtils* mIndexTableUtilsDevice;
254-
std::array<int*, nLayers> mROframesClustersDevice;
254+
std::array<int*, nLayers> mROFramesClustersDevice;
255255
std::array<unsigned char*, nLayers> mUsedClustersDevice;
256256
Vertex* mVerticesDevice;
257-
int* mROframesPVDevice;
257+
int* mROFramesPVDevice;
258258

259259
// Hybrid pref
260260
std::array<Cluster*, nLayers> mClustersDevice;
@@ -314,7 +314,7 @@ size_t TimeFrameGPU<nLayers>::loadChunkData(const size_t chunk, const size_t off
314314
template <int nLayers>
315315
inline int TimeFrameGPU<nLayers>::getNClustersInRofSpan(const int rofIdstart, const int rofSpanSize, const int layerId) const
316316
{
317-
return static_cast<int>(mROframesClusters[layerId][(rofIdstart + rofSpanSize) < mROframesClusters.size() ? rofIdstart + rofSpanSize : mROframesClusters.size() - 1] - mROframesClusters[layerId][rofIdstart]);
317+
return static_cast<int>(mROFramesClusters[layerId][(rofIdstart + rofSpanSize) < mROFramesClusters.size() ? rofIdstart + rofSpanSize : mROFramesClusters.size() - 1] - mROFramesClusters[layerId][rofIdstart]);
318318
}
319319
} // namespace gpu
320320
} // namespace its

Detectors/ITSMFT/ITS/tracking/GPU/cuda/TimeFrameGPU.cu

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -443,21 +443,21 @@ void TimeFrameGPU<nLayers>::initDevice(const int chunks,
443443
mMemChunks[iChunk].allocate(GpuTimeFrameChunk<nLayers>::computeRofPerChunk(mGpuParams, mAvailMemGB), mGpuStreams[iChunk]);
444444
}
445445
for (auto iLayer{0}; iLayer < nLayers; ++iLayer) {
446-
checkGPUError(cudaMalloc(reinterpret_cast<void**>(&mROframesClustersDevice[iLayer]), mROframesClusters[iLayer].size() * sizeof(int)));
446+
checkGPUError(cudaMalloc(reinterpret_cast<void**>(&mROFramesClustersDevice[iLayer]), mROFramesClusters[iLayer].size() * sizeof(int)));
447447
checkGPUError(cudaMalloc(reinterpret_cast<void**>(&(mUsedClustersDevice[iLayer])), sizeof(unsigned char) * mGpuParams.clustersPerROfCapacity * mNrof));
448448
}
449449
checkGPUError(cudaMalloc(reinterpret_cast<void**>(&mVerticesDevice), sizeof(Vertex) * mGpuParams.maxVerticesCapacity));
450-
checkGPUError(cudaMalloc(reinterpret_cast<void**>(&mROframesPVDevice), sizeof(int) * (mNrof + 1)));
450+
checkGPUError(cudaMalloc(reinterpret_cast<void**>(&mROFramesPVDevice), sizeof(int) * (mNrof + 1)));
451451

452452
mFirstInit = false;
453453
}
454454
if (maxLayers < nLayers) { // Vertexer
455455
for (auto iLayer{0}; iLayer < nLayers; ++iLayer) {
456-
checkGPUError(cudaMemcpy(mROframesClustersDevice[iLayer], mROframesClusters[iLayer].data(), mROframesClusters[iLayer].size() * sizeof(int), cudaMemcpyHostToDevice));
456+
checkGPUError(cudaMemcpy(mROFramesClustersDevice[iLayer], mROFramesClusters[iLayer].data(), mROFramesClusters[iLayer].size() * sizeof(int), cudaMemcpyHostToDevice));
457457
}
458458
} else { // Tracker
459459
checkGPUError(cudaMemcpy(mVerticesDevice, mPrimaryVertices.data(), sizeof(Vertex) * mPrimaryVertices.size(), cudaMemcpyHostToDevice));
460-
checkGPUError(cudaMemcpy(mROframesPVDevice, mROframesPV.data(), sizeof(int) * mROframesPV.size(), cudaMemcpyHostToDevice));
460+
checkGPUError(cudaMemcpy(mROFramesPVDevice, mROFramesPV.data(), sizeof(int) * mROFramesPV.size(), cudaMemcpyHostToDevice));
461461
if (!iteration) {
462462
for (auto iLayer{0}; iLayer < nLayers; ++iLayer) {
463463
checkGPUError(cudaMemset(mUsedClustersDevice[iLayer], 0, sizeof(unsigned char) * mGpuParams.clustersPerROfCapacity * mNrof));

Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -353,8 +353,8 @@ GPUg() void printTrackletsNotStrided(const Tracklet* t,
353353
// Compute the tracklets for a given layer
354354
template <int nLayers = 7>
355355
GPUg() void computeLayerTrackletsKernelSingleRof(
356-
const int rof0,
357-
const int maxRofs,
356+
const short rof0,
357+
const short maxRofs,
358358
const int layerIndex,
359359
const Cluster* clustersCurrentLayer, // input data rof0
360360
const Cluster* clustersNextLayer, // input data rof0-delta <rof0< rof0+delta (up to 3 rofs)
@@ -385,8 +385,8 @@ GPUg() void computeLayerTrackletsKernelSingleRof(
385385
if (usedClustersLayer[currentSortedIndex]) {
386386
continue;
387387
}
388-
int minRof = (rof0 >= trkPars->DeltaROF) ? rof0 - trkPars->DeltaROF : 0;
389-
int maxRof = (rof0 == maxRofs - trkPars->DeltaROF) ? rof0 : rof0 + trkPars->DeltaROF;
388+
short minRof = (rof0 >= trkPars->DeltaROF) ? rof0 - trkPars->DeltaROF : 0;
389+
short maxRof = (rof0 == static_cast<short>(maxRofs - trkPars->DeltaROF)) ? rof0 : rof0 + trkPars->DeltaROF;
390390
const float inverseR0{1.f / currentCluster.radius};
391391
for (int iPrimaryVertex{0}; iPrimaryVertex < nVertices; iPrimaryVertex++) {
392392
const auto& primaryVertex{vertices[iPrimaryVertex]};
@@ -410,7 +410,7 @@ GPUg() void computeLayerTrackletsKernelSingleRof(
410410
}
411411
constexpr int tableSize{256 * 128 + 1}; // hardcoded for the time being
412412

413-
for (int rof1{minRof}; rof1 <= maxRof; ++rof1) {
413+
for (short rof1{minRof}; rof1 <= maxRof; ++rof1) {
414414
if (!(roFrameClustersNext[rof1 + 1] - roFrameClustersNext[rof1])) { // number of clusters on next layer > 0
415415
continue;
416416
}
@@ -561,7 +561,7 @@ GPUg() void computeLayerTrackletsKernelMultipleRof(
561561
const float tanL{(currentCluster.zCoordinate - nextCluster.zCoordinate) / (currentCluster.radius - nextCluster.radius)};
562562
const size_t stride{currentClusterIndex * maxTrackletsPerCluster};
563563
if (storedTracklets < maxTrackletsPerCluster) {
564-
new (trackletsRof0 + stride + storedTracklets) Tracklet{currentSortedIndexChunk, nextClusterIndex, tanL, phi, static_cast<ushort>(rof0), static_cast<ushort>(rof1)};
564+
new (trackletsRof0 + stride + storedTracklets) Tracklet{currentSortedIndexChunk, nextClusterIndex, tanL, phi, static_cast<short>(rof0), static_cast<short>(rof1)};
565565
}
566566
// else {
567567
// printf("its-gpu-tracklet-finder: on rof %d layer: %d: found more tracklets (%d) than maximum allowed per cluster. This is lossy!\n", rof0, layerIndex, storedTracklets);

Detectors/ITSMFT/ITS/tracking/GPU/cuda/VertexerTraitsGPU.cu

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -178,7 +178,7 @@ GPUg() void trackleterKernelSingleRof(
178178
Tracklet* Tracklets,
179179
int* foundTracklets,
180180
const IndexTableUtils* utils,
181-
const int rofId,
181+
const short rofId,
182182
const size_t maxTrackletsPerCluster = 1e2)
183183
{
184184
const int phiBins{utils->getNphiBins()};
@@ -234,15 +234,15 @@ GPUg() void trackleterKernelMultipleRof(
234234
Tracklet* Tracklets,
235235
int* foundTracklets,
236236
const IndexTableUtils* utils,
237-
const unsigned int startRofId,
238-
const unsigned int rofSize,
237+
const short startRofId,
238+
const short rofSize,
239239
const float phiCut,
240240
const size_t maxTrackletsPerCluster = 1e2)
241241
{
242242
const int phiBins{utils->getNphiBins()};
243243
const int zBins{utils->getNzBins()};
244-
for (unsigned int iRof{blockIdx.x}; iRof < rofSize; iRof += gridDim.x) {
245-
auto rof = iRof + startRofId;
244+
for (auto iRof{blockIdx.x}; iRof < rofSize; iRof += gridDim.x) {
245+
short rof = static_cast<short>(iRof) + startRofId;
246246
auto* clustersNextLayerRof = clustersNextLayer + (sizeNextLClusters[rof] - sizeNextLClusters[startRofId]);
247247
auto* clustersCurrentLayerRof = clustersCurrentLayer + (sizeCurrentLClusters[rof] - sizeCurrentLClusters[startRofId]);
248248
auto nClustersNextLayerRof = sizeNextLClusters[rof + 1] - sizeNextLClusters[rof];
@@ -273,9 +273,9 @@ GPUg() void trackleterKernelMultipleRof(
273273
if (o2::gpu::GPUCommonMath::Abs(smallestAngleDifference(currentCluster.phi, nextCluster.phi)) < phiCut) {
274274
if (storedTracklets < maxTrackletsPerCluster) {
275275
if constexpr (Mode == TrackletMode::Layer0Layer1) {
276-
new (TrackletsRof + stride + storedTracklets) Tracklet{iNextLayerClusterIndex, iCurrentLayerClusterIndex, nextCluster, currentCluster, static_cast<int>(rof), static_cast<int>(rof)};
276+
new (TrackletsRof + stride + storedTracklets) Tracklet{iNextLayerClusterIndex, iCurrentLayerClusterIndex, nextCluster, currentCluster, rof, rof};
277277
} else {
278-
new (TrackletsRof + stride + storedTracklets) Tracklet{iCurrentLayerClusterIndex, iNextLayerClusterIndex, currentCluster, nextCluster, static_cast<int>(rof), static_cast<int>(rof)};
278+
new (TrackletsRof + stride + storedTracklets) Tracklet{iCurrentLayerClusterIndex, iNextLayerClusterIndex, currentCluster, nextCluster, rof, rof};
279279
}
280280
++storedTracklets;
281281
}
@@ -625,8 +625,8 @@ void VertexerTraitsGPU::computeTracklets(const int iteration)
625625
gpu::trackleterKernelMultipleRof<TrackletMode::Layer0Layer1><<<rofs, 1024, 0, mTimeFrameGPU->getStream(chunkId).get()>>>(
626626
mTimeFrameGPU->getChunk(chunkId).getDeviceClusters(0), // const Cluster* clustersNextLayer, // 0 2
627627
mTimeFrameGPU->getChunk(chunkId).getDeviceClusters(1), // const Cluster* clustersCurrentLayer, // 1 1
628-
mTimeFrameGPU->getDeviceROframesClusters(0), // const int* sizeNextLClusters,
629-
mTimeFrameGPU->getDeviceROframesClusters(1), // const int* sizeCurrentLClusters,
628+
mTimeFrameGPU->getDeviceROFramesClusters(0), // const int* sizeNextLClusters,
629+
mTimeFrameGPU->getDeviceROFramesClusters(1), // const int* sizeCurrentLClusters,
630630
mTimeFrameGPU->getChunk(chunkId).getDeviceIndexTables(0), // const int* nextIndexTables,
631631
mTimeFrameGPU->getChunk(chunkId).getDeviceTracklets(0), // Tracklet* Tracklets,
632632
mTimeFrameGPU->getChunk(chunkId).getDeviceNTrackletCluster(0), // int* foundTracklets,
@@ -639,8 +639,8 @@ void VertexerTraitsGPU::computeTracklets(const int iteration)
639639
gpu::trackleterKernelMultipleRof<TrackletMode::Layer1Layer2><<<rofs, 1024, 0, mTimeFrameGPU->getStream(chunkId).get()>>>(
640640
mTimeFrameGPU->getChunk(chunkId).getDeviceClusters(2), // const Cluster* clustersNextLayer, // 0 2
641641
mTimeFrameGPU->getChunk(chunkId).getDeviceClusters(1), // const Cluster* clustersCurrentLayer, // 1 1
642-
mTimeFrameGPU->getDeviceROframesClusters(2), // const int* sizeNextLClusters,
643-
mTimeFrameGPU->getDeviceROframesClusters(1), // const int* sizeCurrentLClusters,
642+
mTimeFrameGPU->getDeviceROFramesClusters(2), // const int* sizeNextLClusters,
643+
mTimeFrameGPU->getDeviceROFramesClusters(1), // const int* sizeCurrentLClusters,
644644
mTimeFrameGPU->getChunk(chunkId).getDeviceIndexTables(2), // const int* nextIndexTables,
645645
mTimeFrameGPU->getChunk(chunkId).getDeviceTracklets(1), // Tracklet* Tracklets,
646646
mTimeFrameGPU->getChunk(chunkId).getDeviceNTrackletCluster(1), // int* foundTracklets,
@@ -653,8 +653,8 @@ void VertexerTraitsGPU::computeTracklets(const int iteration)
653653
gpu::trackletSelectionKernelMultipleRof<true><<<rofs, 1024, 0, mTimeFrameGPU->getStream(chunkId).get()>>>(
654654
mTimeFrameGPU->getChunk(chunkId).getDeviceClusters(0), // const Cluster* clusters0, // Clusters on layer 0
655655
mTimeFrameGPU->getChunk(chunkId).getDeviceClusters(1), // const Cluster* clusters1, // Clusters on layer 1
656-
mTimeFrameGPU->getDeviceROframesClusters(0), // const int* sizeClustersL0, // Number of clusters on layer 0 per ROF
657-
mTimeFrameGPU->getDeviceROframesClusters(1), // const int* sizeClustersL1, // Number of clusters on layer 1 per ROF
656+
mTimeFrameGPU->getDeviceROFramesClusters(0), // const int* sizeClustersL0, // Number of clusters on layer 0 per ROF
657+
mTimeFrameGPU->getDeviceROFramesClusters(1), // const int* sizeClustersL1, // Number of clusters on layer 1 per ROF
658658
mTimeFrameGPU->getChunk(chunkId).getDeviceTracklets(0), // Tracklet* tracklets01, // Tracklets on layer 0-1
659659
mTimeFrameGPU->getChunk(chunkId).getDeviceTracklets(1), // Tracklet* tracklets12, // Tracklets on layer 1-2
660660
mTimeFrameGPU->getChunk(chunkId).getDeviceNTrackletCluster(0), // const int* nFoundTracklets01, // Number of tracklets found on layers 0-1
@@ -686,8 +686,8 @@ void VertexerTraitsGPU::computeTracklets(const int iteration)
686686
gpu::trackletSelectionKernelMultipleRof<false><<<rofs, 1024, 0, mTimeFrameGPU->getStream(chunkId).get()>>>(
687687
mTimeFrameGPU->getChunk(chunkId).getDeviceClusters(0), // const Cluster* clusters0, // Clusters on layer 0
688688
mTimeFrameGPU->getChunk(chunkId).getDeviceClusters(1), // const Cluster* clusters1, // Clusters on layer 1
689-
mTimeFrameGPU->getDeviceROframesClusters(0), // const int* sizeClustersL0, // Number of clusters on layer 0 per ROF
690-
mTimeFrameGPU->getDeviceROframesClusters(1), // const int* sizeClustersL1, // Number of clusters on layer 1 per ROF
689+
mTimeFrameGPU->getDeviceROFramesClusters(0), // const int* sizeClustersL0, // Number of clusters on layer 0 per ROF
690+
mTimeFrameGPU->getDeviceROFramesClusters(1), // const int* sizeClustersL1, // Number of clusters on layer 1 per ROF
691691
mTimeFrameGPU->getChunk(chunkId).getDeviceTracklets(0), // Tracklet* tracklets01, // Tracklets on layer 0-1
692692
mTimeFrameGPU->getChunk(chunkId).getDeviceTracklets(1), // Tracklet* tracklets12, // Tracklets on layer 1-2
693693
mTimeFrameGPU->getChunk(chunkId).getDeviceNTrackletCluster(0), // const int* nFoundTracklets01, // Number of tracklets found on layers 0-1
@@ -721,8 +721,8 @@ void VertexerTraitsGPU::computeTracklets(const int iteration)
721721
std::vector<bool> usedLines;
722722
for (int rofId{0}; rofId < rofs; ++rofId) {
723723
auto rof = offset + rofId;
724-
auto clustersL1offsetRof = mTimeFrameGPU->getROframeClusters(1)[rof] - mTimeFrameGPU->getROframeClusters(1)[offset]; // starting cluster offset for this ROF
725-
auto nClustersL1Rof = mTimeFrameGPU->getROframeClusters(1)[rof + 1] - mTimeFrameGPU->getROframeClusters(1)[rof]; // number of clusters for this ROF
724+
auto clustersL1offsetRof = mTimeFrameGPU->getROFrameClusters(1)[rof] - mTimeFrameGPU->getROFrameClusters(1)[offset]; // starting cluster offset for this ROF
725+
auto nClustersL1Rof = mTimeFrameGPU->getROFrameClusters(1)[rof + 1] - mTimeFrameGPU->getROFrameClusters(1)[rof]; // number of clusters for this ROF
726726
auto linesOffsetRof = exclusiveFoundLinesHost[clustersL1offsetRof]; // starting line offset for this ROF
727727
auto nLinesRof = exclusiveFoundLinesHost[clustersL1offsetRof + nClustersL1Rof] - linesOffsetRof;
728728
gsl::span<const o2::its::Line> linesInRof(lines.data() + linesOffsetRof, static_cast<gsl::span<o2::its::Line>::size_type>(nLinesRof));
@@ -754,7 +754,7 @@ void VertexerTraitsGPU::computeTracklets(const int iteration)
754754
int start{0};
755755
for (int rofId{0}; rofId < mTimeFrameGPU->getNVerticesInChunks()[chunkId].size(); ++rofId) {
756756
gsl::span<const Vertex> rofVerts{mTimeFrameGPU->getVerticesInChunks()[chunkId].data() + start, static_cast<gsl::span<Vertex>::size_type>(mTimeFrameGPU->getNVerticesInChunks()[chunkId][rofId])};
757-
mTimeFrameGPU->addPrimaryVertices(rofVerts);
757+
mTimeFrameGPU->addPrimaryVertices(rofVerts, rofId, 0);
758758
if (mTimeFrameGPU->hasMCinformation()) {
759759
// mTimeFrameGPU->getVerticesLabels().emplace_back();
760760
// TODO: add MC labels

0 commit comments

Comments
 (0)