Skip to content

Commit 208ea84

Browse files
committed
Add tracklet writing on the buffer
1 parent 1bd7e9f commit 208ea84

File tree

6 files changed

+62
-36
lines changed

6 files changed

+62
-36
lines changed

Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TimeFrameGPU.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,7 @@ class TimeFrameGPU : public TimeFrame
122122
std::vector<unsigned int> getClusterSizes();
123123
const unsigned char** getDeviceArrayUsedClusters() const { return mUsedClustersDeviceArray; }
124124
const int** getDeviceROframeClusters() const { return mROFrameClustersDeviceArray; }
125-
const Tracklet** getDeviceArrayTracklets() const { return mTrackletsDeviceArray; }
125+
Tracklet** getDeviceArrayTracklets() { return mTrackletsDeviceArray; }
126126
int** getDeviceArrayTrackletsLUT() const { return mTrackletsLUTDeviceArray; }
127127
int** getDeviceArrayCellsLUT() const { return mCellsLUTDeviceArray; }
128128
int** getDeviceArrayNeighboursCellLUT() const { return mNeighboursCellLUTDeviceArray; }
@@ -142,6 +142,7 @@ class TimeFrameGPU : public TimeFrame
142142
// Host-available device getters
143143
gsl::span<int*> getDeviceTrackletsLUTs() { return mTrackletsLUTDevice; }
144144
gsl::span<int*> getDeviceCellLUTs() { return mCellsLUTDevice; }
145+
gsl::span<Tracklet*> getDeviceTracklet() { return mTrackletsDevice; }
145146
gsl::span<CellSeed*> getDeviceCells() { return mCellsDevice; }
146147
gsl::span<int, nLayers - 2> getNCellsDevice() { return mNCells; }
147148

@@ -175,7 +176,7 @@ class TimeFrameGPU : public TimeFrame
175176
const unsigned char** mUsedClustersDeviceArray;
176177
const int** mROFrameClustersDeviceArray;
177178
std::array<Tracklet*, nLayers - 1> mTrackletsDevice;
178-
const Tracklet** mTrackletsDeviceArray;
179+
Tracklet** mTrackletsDeviceArray;
179180
std::array<int*, nLayers - 1> mTrackletsLUTDevice;
180181
std::array<int*, nLayers - 2> mCellsLUTDevice;
181182
std::array<int*, nLayers - 3> mNeighboursLUTDevice;

Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TrackingKernels.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ void computeTrackletsInROFsHandler(const IndexTableUtils* utils,
9696
const int** ROFClusters,
9797
const unsigned char** usedClusters,
9898
const int** clustersIndexTables,
99-
Tracklet* tracklets,
99+
Tracklet** tracklets,
100100
int** trackletsLUTs,
101101
const int iteration,
102102
const float NSigmaCut,
@@ -113,7 +113,7 @@ void computeTrackletsInROFsHandler(const IndexTableUtils* utils,
113113
void countCellsHandler(const Cluster** sortedClusters,
114114
const Cluster** unsortedClusters,
115115
const TrackingFrameInfo** tfInfo,
116-
const Tracklet** tracklets,
116+
Tracklet** tracklets,
117117
int** trackletsLUT,
118118
const int nTracklets,
119119
const int layer,
@@ -130,7 +130,7 @@ void countCellsHandler(const Cluster** sortedClusters,
130130
void computeCellsHandler(const Cluster** sortedClusters,
131131
const Cluster** unsortedClusters,
132132
const TrackingFrameInfo** tfInfo,
133-
const Tracklet** tracklets,
133+
Tracklet** tracklets,
134134
int** trackletsLUT,
135135
const int nTracklets,
136136
const int layer,

Detectors/ITSMFT/ITS/tracking/GPU/cuda/TimeFrameGPU.cu

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -270,9 +270,12 @@ void TimeFrameGPU<nLayers>::createTrackletsBuffers()
270270
for (auto iLayer{0}; iLayer < nLayers - 1; ++iLayer) {
271271
mNTracklets[iLayer] = 0;
272272
checkGPUError(cudaMemcpyAsync(&mNTracklets[iLayer], mTrackletsLUTDevice[iLayer] + mClusters[iLayer].size(), sizeof(int), cudaMemcpyDeviceToHost));
273-
LOGP(info, "gpu-transfer: creating tracklets buffer for {} elements on layer {}, for {} MB.", mNTracklets[iLayer], iLayer, mNTracklets[iLayer] * sizeof(Tracklet) / MB);
273+
LOGP(debug, "gpu-transfer: creating tracklets buffer for {} elements on layer {}, for {} MB.", mNTracklets[iLayer], iLayer, mNTracklets[iLayer] * sizeof(Tracklet) / MB);
274274
allocMemAsync(reinterpret_cast<void**>(&mTrackletsDevice[iLayer]), mNTracklets[iLayer] * sizeof(Tracklet), nullptr, getExtAllocator());
275275
}
276+
allocMemAsync(reinterpret_cast<void**>(&mTrackletsDeviceArray), (nLayers - 1) * sizeof(Tracklet*), nullptr, getExtAllocator());
277+
checkGPUError(cudaHostRegister(mTrackletsDevice.data(), (nLayers - 1) * sizeof(Tracklet*), cudaHostRegisterPortable));
278+
checkGPUError(cudaMemcpyAsync(mTrackletsDeviceArray, mTrackletsDevice.data(), (nLayers - 1) * sizeof(Tracklet*), cudaMemcpyHostToDevice, mGpuStreams[0].get()));
276279
STOP_GPU_STREAM_TIMER(mGpuStreams[0].get());
277280
}
278281

@@ -282,13 +285,9 @@ void TimeFrameGPU<nLayers>::loadTrackletsDevice()
282285
START_GPU_STREAM_TIMER(mGpuStreams[0].get(), "loading tracklets");
283286
for (auto iLayer{0}; iLayer < nLayers - 1; ++iLayer) {
284287
LOGP(debug, "gpu-transfer: loading {} tracklets on layer {}, for {} MB.", mTracklets[iLayer].size(), iLayer, mTracklets[iLayer].size() * sizeof(Tracklet) / MB);
285-
allocMemAsync(reinterpret_cast<void**>(&mTrackletsDevice[iLayer]), mTracklets[iLayer].size() * sizeof(Tracklet), nullptr, getExtAllocator());
286288
checkGPUError(cudaHostRegister(mTracklets[iLayer].data(), mTracklets[iLayer].size() * sizeof(Tracklet), cudaHostRegisterPortable));
287289
checkGPUError(cudaMemcpyAsync(mTrackletsDevice[iLayer], mTracklets[iLayer].data(), mTracklets[iLayer].size() * sizeof(Tracklet), cudaMemcpyHostToDevice, mGpuStreams[0].get()));
288290
}
289-
allocMemAsync(reinterpret_cast<void**>(&mTrackletsDeviceArray), (nLayers - 1) * sizeof(Tracklet*), nullptr, getExtAllocator());
290-
checkGPUError(cudaHostRegister(mTrackletsDevice.data(), (nLayers - 1) * sizeof(Tracklet*), cudaHostRegisterPortable));
291-
checkGPUError(cudaMemcpyAsync(mTrackletsDeviceArray, mTrackletsDevice.data(), (nLayers - 1) * sizeof(Tracklet*), cudaMemcpyHostToDevice, mGpuStreams[0].get()));
292291
STOP_GPU_STREAM_TIMER(mGpuStreams[0].get());
293292
}
294293

Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackerTraitsGPU.cxx

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ void TrackerTraitsGPU<nLayers>::computeTrackletsHybrid(const int iteration, int
115115
mTimeFrameGPU->getDeviceArrayUsedClusters(),
116116
mTimeFrameGPU->getDeviceArrayClustersIndexTables(),
117117
mTimeFrameGPU->getDeviceArrayTrackletsLUT(),
118-
mTimeFrameGPU->getDeviceTrackletsLUTs(),
118+
mTimeFrameGPU->getDeviceTrackletsLUTs(), // Required for the exclusive sums
119119
iteration,
120120
mTrkParams[iteration].NSigmaCut,
121121
mTimeFrameGPU->getPhiCuts(),
@@ -128,6 +128,34 @@ void TrackerTraitsGPU<nLayers>::computeTrackletsHybrid(const int iteration, int
128128
conf.nBlocks,
129129
conf.nThreads);
130130
mTimeFrameGPU->createTrackletsBuffers();
131+
computeTrackletsInROFsHandler<nLayers>(mTimeFrameGPU->getDeviceIndexTableUtils(),
132+
mTimeFrameGPU->getDeviceMultCutMask(),
133+
startROF,
134+
endROF,
135+
mTimeFrameGPU->getNrof(),
136+
mTrkParams[iteration].DeltaROF,
137+
iVertex,
138+
mTimeFrameGPU->getDeviceVertices(),
139+
mTimeFrameGPU->getDeviceROFramesPV(),
140+
mTimeFrameGPU->getPrimaryVerticesNum(),
141+
mTimeFrameGPU->getDeviceArrayClusters(),
142+
mTimeFrameGPU->getClusterSizes(),
143+
mTimeFrameGPU->getDeviceROframeClusters(),
144+
mTimeFrameGPU->getDeviceArrayUsedClusters(),
145+
mTimeFrameGPU->getDeviceArrayClustersIndexTables(),
146+
mTimeFrameGPU->getDeviceArrayTracklets(),
147+
mTimeFrameGPU->getDeviceArrayTrackletsLUT(),
148+
iteration,
149+
mTrkParams[iteration].NSigmaCut,
150+
mTimeFrameGPU->getPhiCuts(),
151+
mTrkParams[iteration].PVres,
152+
mTimeFrameGPU->getMinRs(),
153+
mTimeFrameGPU->getMaxRs(),
154+
mTimeFrameGPU->getPositionResolutions(),
155+
mTrkParams[iteration].LayerRadii,
156+
mTimeFrameGPU->getMSangles(),
157+
conf.nBlocks,
158+
conf.nThreads);
131159
}
132160

133161
template <int nLayers>

Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu

Lines changed: 20 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -369,7 +369,7 @@ GPUg() void computeLayerCellsKernel(
369369
const Cluster** sortedClusters,
370370
const Cluster** unsortedClusters,
371371
const TrackingFrameInfo** tfInfo,
372-
const Tracklet** tracklets,
372+
Tracklet** tracklets,
373373
int** trackletsLUT,
374374
const int nTrackletsCurrent,
375375
const int layer,
@@ -462,11 +462,11 @@ GPUg() void computeLayerTrackletsMultiROFKernel(
462462
const int* rofPV,
463463
const int nVertices,
464464
const int vertexId,
465-
const Cluster** clusters, // input data rof0
465+
const Cluster** clusters, // Input data rof0
466466
const int** ROFClusters, // Number of clusters on layers per ROF
467467
const unsigned char** usedClusters, // Used clusters
468-
const int** indexTables, // input data rof0-delta <rof0< rof0+delta (up to 3 rofs)
469-
Tracklet* tracklets, // output data
468+
const int** indexTables, // Input data rof0-delta <rof0< rof0+delta (up to 3 rofs)
469+
Tracklet** tracklets, // Output data
470470
int** trackletsLUT,
471471
const int iteration,
472472
const float NSigmaCut,
@@ -475,18 +475,18 @@ GPUg() void computeLayerTrackletsMultiROFKernel(
475475
const float minR,
476476
const float maxR,
477477
const float positionResolution,
478-
const float meanDeltaR = -666.f,
479-
const float MSAngle = -666.f)
478+
const float meanDeltaR = -42.f,
479+
const float MSAngle = -42.f)
480480
{
481481
const int phiBins{utils->getNphiBins()};
482482
const int zBins{utils->getNzBins()};
483483
for (unsigned int iROF{blockIdx.x}; iROF < endROF - startROF; iROF += gridDim.x) {
484-
const int rof0 = iROF + startROF;
484+
const short rof0 = iROF + startROF;
485485
auto primaryVertices = getPrimaryVertices(rof0, rofPV, totalROFs, multMask, vertices);
486486
const auto startVtx{vertexId >= 0 ? vertexId : 0};
487487
const auto endVtx{vertexId >= 0 ? o2::gpu::CAMath::Min(vertexId + 1, static_cast<int>(primaryVertices.size())) : static_cast<int>(primaryVertices.size())};
488-
auto minROF = o2::gpu::CAMath::Max(startROF, static_cast<int>(rof0 - deltaROF));
489-
auto maxROF = o2::gpu::CAMath::Min(endROF - 1, static_cast<int>(rof0 + deltaROF));
488+
const short minROF = o2::gpu::CAMath::Max(startROF, static_cast<int>(rof0 - deltaROF));
489+
const short maxROF = o2::gpu::CAMath::Min(endROF - 1, static_cast<int>(rof0 + deltaROF));
490490
auto clustersCurrentLayer = getClustersOnLayer(rof0, totalROFs, layerIndex, ROFClusters, clusters);
491491
if (clustersCurrentLayer.empty()) {
492492
continue;
@@ -523,7 +523,7 @@ GPUg() void computeLayerTrackletsMultiROFKernel(
523523
}
524524

525525
const int tableSize{phiBins * zBins + 1};
526-
for (int rof1{minROF}; rof1 <= maxROF; ++rof1) {
526+
for (short rof1{minROF}; rof1 <= maxROF; ++rof1) {
527527
auto clustersNextLayer = getClustersOnLayer(rof1, totalROFs, layerIndex + 1, ROFClusters, clusters);
528528
if (clustersNextLayer.empty()) {
529529
continue;
@@ -534,26 +534,24 @@ GPUg() void computeLayerTrackletsMultiROFKernel(
534534
const int maxBinIndex{firstBinIndex + selectedBinsRect.z - selectedBinsRect.x + 1};
535535
const int firstRowClusterIndex = indexTables[layerIndex + 1][(rof1 - startROF) * tableSize + firstBinIndex];
536536
const int maxRowClusterIndex = indexTables[layerIndex + 1][(rof1 - startROF) * tableSize + maxBinIndex];
537-
for (int iNextCluster{firstRowClusterIndex}; iNextCluster < maxRowClusterIndex; ++iNextCluster) {
538-
if (iNextCluster >= clustersNextLayer.size()) {
537+
for (int nextClusterIndex{firstRowClusterIndex}; nextClusterIndex < maxRowClusterIndex; ++nextClusterIndex) {
538+
if (nextClusterIndex >= clustersNextLayer.size()) {
539539
break;
540540
}
541-
const Cluster& nextCluster{clustersNextLayer[iNextCluster]};
541+
const Cluster& nextCluster{clustersNextLayer[nextClusterIndex]};
542542
if (usedClusters[layerIndex + 1][nextCluster.clusterId]) {
543543
continue;
544544
}
545545
const float deltaPhi{o2::gpu::CAMath::Abs(currentCluster.phi - nextCluster.phi)};
546-
const float deltaZ{o2::gpu::CAMath::Abs(tanLambda * (nextCluster.radius - currentCluster.radius) +
547-
currentCluster.zCoordinate - nextCluster.zCoordinate)};
546+
const float deltaZ{o2::gpu::CAMath::Abs(tanLambda * (nextCluster.radius - currentCluster.radius) + currentCluster.zCoordinate - nextCluster.zCoordinate)};
547+
const int nextSortedIndex{ROFClusters[layerIndex + 1][rof1] + nextClusterIndex};
548548
if (deltaZ / sigmaZ < NSigmaCut && (deltaPhi < phiCut || o2::gpu::CAMath::Abs(deltaPhi - constants::math::TwoPi) < phiCut)) {
549-
// if (layerIndex > 0) {
550549
if constexpr (initRun) {
551550
trackletsLUT[layerIndex][currentSortedIndex]++; // we need l0 as well for usual exclusive sums.
552551
} else {
553-
// }
554552
const float phi{o2::gpu::CAMath::ATan2(currentCluster.yCoordinate - nextCluster.yCoordinate, currentCluster.xCoordinate - nextCluster.xCoordinate)};
555553
const float tanL{(currentCluster.zCoordinate - nextCluster.zCoordinate) / (currentCluster.radius - nextCluster.radius)};
556-
// tf->getTracklets()[layerIndex].emplace_back(currentSortedIndex, tf->getSortedIndex(rof1, layerIndex + 1, iNextCluster), tanL, phi, rof0, rof1);
554+
new (tracklets[layerIndex] + trackletsLUT[layerIndex][currentSortedIndex] + storedTracklets) Tracklet{currentSortedIndex, nextSortedIndex, tanL, phi, rof0, rof1};
557555
}
558556
++storedTracklets;
559557
}
@@ -809,7 +807,7 @@ void computeTrackletsInROFsHandler(const IndexTableUtils* utils,
809807
const int** ROFClusters,
810808
const unsigned char** usedClusters,
811809
const int** clustersIndexTables,
812-
Tracklet* tracklets,
810+
Tracklet** tracklets,
813811
int** trackletsLUTs,
814812
const int iteration,
815813
const float NSigmaCut,
@@ -859,7 +857,7 @@ void countCellsHandler(
859857
const Cluster** sortedClusters,
860858
const Cluster** unsortedClusters,
861859
const TrackingFrameInfo** tfInfo,
862-
const Tracklet** tracklets,
860+
Tracklet** tracklets,
863861
int** trackletsLUT,
864862
const int nTracklets,
865863
const int layer,
@@ -909,7 +907,7 @@ void computeCellsHandler(
909907
const Cluster** sortedClusters,
910908
const Cluster** unsortedClusters,
911909
const TrackingFrameInfo** tfInfo,
912-
const Tracklet** tracklets,
910+
Tracklet** tracklets,
913911
int** trackletsLUT,
914912
const int nTracklets,
915913
const int layer,
@@ -1128,7 +1126,7 @@ template void computeTrackletsInROFsHandler<7>(const IndexTableUtils* utils,
11281126
const int** ROFClusters,
11291127
const unsigned char** usedClusters,
11301128
const int** clustersIndexTables,
1131-
Tracklet* tracklets,
1129+
Tracklet** tracklets,
11321130
int** trackletsLUTs,
11331131
const int iteration,
11341132
const float NSigmaCut,

Detectors/ITSMFT/ITS/tracking/src/TrackerTraits.cxx

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -211,9 +211,9 @@ void TrackerTraits::computeLayerTracklets(const int iteration, int iROFslice, in
211211
return;
212212
}
213213

214-
for (auto iLayer{0}; iLayer < tf->getTracklets().size(); ++iLayer) {
215-
std::cout << "tracklets layer " << iLayer << ": " << tf->getTracklets()[iLayer].size() << std::endl;
216-
}
214+
// for (auto iLayer{0}; iLayer < tf->getTracklets().size(); ++iLayer) {
215+
// std::cout << "tracklets layer " << iLayer << ": " << tf->getTracklets()[iLayer].size() << std::endl;
216+
// }
217217

218218
// for (auto iLayer{0}; iLayer < tf->getTrackletsLookupTable().size(); ++iLayer) {
219219
// auto lut = tf->getTrackletsLookupTable()[iLayer];

0 commit comments

Comments
 (0)