Skip to content

Commit f9a0a3e

Browse files
committed
Tracklet finder on GPU
1 parent ff486d4 commit f9a0a3e

File tree

5 files changed

+9
-15
lines changed

5 files changed

+9
-15
lines changed

Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TimeFrameGPU.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -136,8 +136,6 @@ class TimeFrameGPU : public TimeFrame
136136
void setDevicePropagator(const o2::base::PropagatorImpl<float>*) override;
137137

138138
// Host-specific getters
139-
gsl::span<int> getHostNTracklets(const int chunkId);
140-
gsl::span<int> getHostNCells(const int chunkId);
141139
gsl::span<int, nLayers - 1> getNTracklets() { return mNTracklets; }
142140
gsl::span<int, nLayers - 2> getNCells() { return mNCells; }
143141

Detectors/ITSMFT/ITS/tracking/GPU/cuda/TimeFrameGPU.cu

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -341,9 +341,9 @@ void TimeFrameGPU<nLayers>::createCellsLUTDevice()
341341
{
342342
START_GPU_STREAM_TIMER(mGpuStreams[0].get(), "creating cells LUTs");
343343
for (auto iLayer{0}; iLayer < nLayers - 2; ++iLayer) {
344-
LOGP(debug, "gpu-transfer: creating cell LUT for {} elements on layer {}, for {} MB.", mTracklets[iLayer].size() + 1, iLayer, (mTracklets[iLayer].size() + 1) * sizeof(int) / MB);
345-
allocMemAsync(reinterpret_cast<void**>(&mCellsLUTDevice[iLayer]), (mTracklets[iLayer].size() + 1) * sizeof(int), nullptr, getExtAllocator());
346-
checkGPUError(cudaMemsetAsync(mCellsLUTDevice[iLayer], 0, (mTracklets[iLayer].size() + 1) * sizeof(int), mGpuStreams[0].get()));
344+
LOGP(debug, "gpu-transfer: creating cell LUT for {} elements on layer {}, for {} MB.", mNTracklets[iLayer] + 1, iLayer, (mNTracklets[iLayer] + 1) * sizeof(int) / MB);
345+
allocMemAsync(reinterpret_cast<void**>(&mCellsLUTDevice[iLayer]), (mNTracklets[iLayer] + 1) * sizeof(int), nullptr, getExtAllocator());
346+
checkGPUError(cudaMemsetAsync(mCellsLUTDevice[iLayer], 0, (mNTracklets[iLayer] + 1) * sizeof(int), mGpuStreams[0].get()));
347347
}
348348
allocMemAsync(reinterpret_cast<void**>(&mCellsLUTDeviceArray), (nLayers - 2) * sizeof(int*), nullptr, getExtAllocator());
349349
checkGPUError(cudaMemcpyAsync(mCellsLUTDeviceArray, mCellsLUTDevice.data(), mCellsLUTDevice.size() * sizeof(int*), cudaMemcpyHostToDevice, mGpuStreams[0].get()));
@@ -355,7 +355,7 @@ void TimeFrameGPU<nLayers>::createCellsBuffers(const int layer)
355355
{
356356
START_GPU_STREAM_TIMER(mGpuStreams[0].get(), "creating cells buffers");
357357
mNCells[layer] = 0;
358-
checkGPUError(cudaMemcpyAsync(&mNCells[layer], mCellsLUTDevice[layer] + mTracklets[layer].size(), sizeof(int), cudaMemcpyDeviceToHost));
358+
checkGPUError(cudaMemcpyAsync(&mNCells[layer], mCellsLUTDevice[layer] + mNTracklets[layer], sizeof(int), cudaMemcpyDeviceToHost));
359359
LOGP(debug, "gpu-transfer: creating cell buffer for {} elements on layer {}, for {} MB.", mNCells[layer], layer, mNCells[layer] * sizeof(CellSeed) / MB);
360360
allocMemAsync(reinterpret_cast<void**>(&mCellsDevice[layer]), mNCells[layer] * sizeof(CellSeed), nullptr, getExtAllocator());
361361

@@ -446,9 +446,9 @@ void TimeFrameGPU<nLayers>::downloadCellsLUTDevice()
446446
{
447447
START_GPU_STREAM_TIMER(mGpuStreams[0].get(), "downloading cell luts");
448448
for (auto iLayer{0}; iLayer < nLayers - 3; ++iLayer) {
449-
LOGP(debug, "gpu-transfer: downloading cells lut on layer {} for {} elements", iLayer, (mTracklets[iLayer + 1].size() + 1));
450-
mCellsLookupTable[iLayer].resize(mTracklets[iLayer + 1].size() + 1);
451-
checkGPUError(cudaMemcpyAsync(mCellsLookupTable[iLayer].data(), mCellsLUTDevice[iLayer + 1], (mTracklets[iLayer + 1].size() + 1) * sizeof(int), cudaMemcpyDeviceToHost, mGpuStreams[0].get()));
449+
LOGP(debug, "gpu-transfer: downloading cells lut on layer {} for {} elements", iLayer, (mNTracklets[iLayer + 1] + 1));
450+
mCellsLookupTable[iLayer].resize(mNTracklets[iLayer + 1] + 1);
451+
checkGPUError(cudaMemcpyAsync(mCellsLookupTable[iLayer].data(), mCellsLUTDevice[iLayer + 1], (mNTracklets[iLayer + 1] + 1) * sizeof(int), cudaMemcpyDeviceToHost, mGpuStreams[0].get()));
452452
}
453453
STOP_GPU_STREAM_TIMER(mGpuStreams[0].get());
454454
}

Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackerTraitsGPU.cxx

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ int TrackerTraitsGPU<nLayers>::getTFNumberOfClusters() const
7676
template <int nLayers>
7777
int TrackerTraitsGPU<nLayers>::getTFNumberOfTracklets() const
7878
{
79-
return mTimeFrameGPU->getNumberOfTracklets();
79+
return std::accumulate(mTimeFrameGPU->getNTracklets().begin(), mTimeFrameGPU->getNTracklets().end(), 0);
8080
}
8181

8282
template <int nLayers>
@@ -91,7 +91,7 @@ template <int nLayers>
9191
void TrackerTraitsGPU<nLayers>::computeTrackletsHybrid(const int iteration, int iROFslice, int iVertex)
9292
{
9393
auto& conf = o2::its::ITSGpuTrackingParamConfig::Instance();
94-
TrackerTraits::computeLayerTracklets(iteration, iROFslice, iVertex);
94+
// TrackerTraits::computeLayerTracklets(iteration, iROFslice, iVertex);
9595
mTimeFrameGPU->createTrackletsLUTDevice(iteration);
9696

9797
const Vertex diamondVert({mTrkParams[iteration].Diamond[0], mTrkParams[iteration].Diamond[1], mTrkParams[iteration].Diamond[2]}, {25.e-6f, 0.f, 0.f, 25.e-6f, 0.f, 36.f}, 1, 1.f);
@@ -169,10 +169,8 @@ void TrackerTraitsGPU<nLayers>::computeCellsHybrid(const int iteration)
169169

170170
for (int iLayer = 0; iLayer < mTrkParams[iteration].CellsPerRoad(); ++iLayer) {
171171
if (!mTimeFrameGPU->getNTracklets()[iLayer + 1] || !mTimeFrameGPU->getNTracklets()[iLayer]) {
172-
LOGP(info, "continuing here");
173172
continue;
174173
}
175-
LOGP(info, "+> {}", mTimeFrameGPU->getNTracklets()[iLayer]);
176174
const int currentLayerTrackletsNum{static_cast<int>(mTimeFrameGPU->getNTracklets()[iLayer])};
177175
countCellsHandler(mTimeFrameGPU->getDeviceArrayClusters(),
178176
mTimeFrameGPU->getDeviceArrayUnsortedClusters(),

Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -863,7 +863,6 @@ void computeTrackletsInROFsHandler(const IndexTableUtils* utils,
863863
thrust::sort(thrust::device, tracklets_ptr, tracklets_ptr + nTracklets[iLayer], gpu::sort_tracklets());
864864
auto unique_end = thrust::unique(thrust::device, tracklets_ptr, tracklets_ptr + nTracklets[iLayer], gpu::equal_tracklets());
865865
nTracklets[iLayer] = unique_end - tracklets_ptr;
866-
LOGP(info, "=> {} {}", nTracklets[iLayer], unique_end - tracklets_ptr);
867866
if (iLayer > 0) {
868867
gpuCheckError(cudaMemset(trackletsLUTsHost[iLayer], 0, nClusters[iLayer] * sizeof(int)));
869868
gpu::compileTrackletsLookupTableKernel<<<nBlocks, nThreads>>>(spanTracklets[iLayer], trackletsLUTsHost[iLayer], nTracklets[iLayer]);

Detectors/ITSMFT/ITS/tracking/src/TrackerTraits.cxx

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -226,7 +226,6 @@ void TrackerTraits::computeLayerTracklets(const int iteration, int iROFslice, in
226226
/// Compute LUT
227227
std::exclusive_scan(lut.begin(), lut.end(), lut.begin(), 0);
228228
lut.push_back(trkl.size());
229-
LOGP(info, "CPU layer {} -> old size: {} - new size: {}", iLayer, oldsize, trkl.size());
230229
}
231230
/// Layer 0 is done outside the loop
232231
std::sort(tf->getTracklets()[0].begin(), tf->getTracklets()[0].end(), [](const Tracklet& a, const Tracklet& b) {

0 commit comments

Comments
 (0)