Skip to content

Commit fb7b17c

Browse files
authored
ITS::gpu: Update track selection logics to the state of the art (#13816) (#13899)
Add processNeighbours GPU kernel and handler Update Detectors/ITSMFT/ITS/tracking/GPU/cuda/CMakeLists.txt Fix second iteration Move the whole processNeighbours on GPU
1 parent d399bee commit fb7b17c

File tree

6 files changed

+157
-177
lines changed

6 files changed

+157
-177
lines changed

DataFormats/Reconstruction/include/ReconstructionDataFormats/TrackParametrization.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,7 @@ class TrackParametrization
160160
GPUd() value_t getZ() const;
161161
GPUd() value_t getSnp() const;
162162
GPUd() value_t getTgl() const;
163-
GPUd() value_t getQ2Pt() const;
163+
GPUhd() value_t getQ2Pt() const;
164164
GPUd() value_t getCharge2Pt() const;
165165
GPUd() int getAbsCharge() const;
166166
GPUd() PID getPID() const;
@@ -357,7 +357,7 @@ GPUdi() auto TrackParametrization<value_T>::getTgl() const -> value_t
357357

358358
//____________________________________________________________
359359
template <typename value_T>
360-
GPUdi() auto TrackParametrization<value_T>::getQ2Pt() const -> value_t
360+
GPUhdi() auto TrackParametrization<value_T>::getQ2Pt() const -> value_t
361361
{
362362
return mP[kQ2Pt];
363363
}

Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TimeFrameGPU.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,7 @@ class TimeFrameGPU : public TimeFrame
116116
int* getDeviceNeighboursLUT(const int layer) { return mNeighboursLUTDevice[layer]; }
117117
gsl::span<int*> getDeviceNeighboursLUTs() { return mNeighboursLUTDevice; }
118118
gpuPair<int, int>* getDeviceNeighbourPairs(const int layer) { return mNeighbourPairsDevice[layer]; }
119+
std::array<int*, nLayers - 2>& getDeviceNeighboursAll() { return mNeighboursDevice; }
119120
int* getDeviceNeighbours(const int layer) { return mNeighboursDevice[layer]; }
120121
int** getDeviceNeighboursArray() { return mNeighboursDeviceArray; }
121122
TrackingFrameInfo* getDeviceTrackingFrameInfo(const int);
@@ -142,6 +143,7 @@ class TimeFrameGPU : public TimeFrame
142143
// Host-specific getters
143144
gsl::span<int, nLayers - 1> getNTracklets() { return mNTracklets; }
144145
gsl::span<int, nLayers - 2> getNCells() { return mNCells; }
146+
std::array<int, nLayers - 2>& getArrayNCells() { return mNCells; }
145147

146148
// Host-available device getters
147149
gsl::span<int*> getDeviceTrackletsLUTs() { return mTrackletsLUTDevice; }

Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TrackingKernels.h

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -186,19 +186,17 @@ void processNeighboursHandler(const int startLayer,
186186
const int startLevel,
187187
CellSeed** allCellSeeds,
188188
CellSeed* currentCellSeeds,
189-
const unsigned int nCurrentCells,
189+
std::array<int, nLayers - 2>& nCells,
190190
const unsigned char** usedClusters,
191-
int* neighbours,
191+
std::array<int*, nLayers - 2>& neighbours,
192192
gsl::span<int*> neighboursDeviceLUTs,
193193
const TrackingFrameInfo** foundTrackingFrameInfo,
194+
std::vector<CellSeed>& seedsHost,
194195
const float bz,
195196
const float MaxChi2ClusterAttachment,
197+
const float maxChi2NDF,
196198
const o2::base::Propagator* propagator,
197199
const o2::base::PropagatorF::MatCorrType matCorrType,
198-
const std::vector<int>& lastCellIdHost, // temporary host vector
199-
const std::vector<CellSeed>& lastCellSeedHost, // temporary host vector
200-
std::vector<int>& updatedCellIdHost, // temporary host vector
201-
std::vector<CellSeed>& updatedCellSeedHost, // temporary host vector
202200
const int nBlocks,
203201
const int nThreads);
204202

Detectors/ITSMFT/ITS/tracking/GPU/cuda/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
if(CUDA_ENABLED)
1414
find_package(CUDAToolkit)
1515
message(STATUS "Building ITS CUDA tracker")
16-
# add_compile_options(-O0 -g -lineinfo -fPIC)
16+
add_compile_options(-O0 -g -lineinfo -fPIC)
1717
# add_compile_definitions(ITS_MEASURE_GPU_TIME)
1818
o2_add_library(ITStrackingCUDA
1919
SOURCES ClusterLinesGPU.cu

Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackerTraitsGPU.cxx

Lines changed: 11 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -205,9 +205,6 @@ void TrackerTraitsGPU<nLayers>::computeCellsHybrid(const int iteration)
205205
conf.nBlocks,
206206
conf.nThreads);
207207
}
208-
// Needed for processNeighbours() which is still on CPU.
209-
mTimeFrameGPU->downloadCellsDevice();
210-
mTimeFrameGPU->downloadCellsLUTDevice();
211208
}
212209

213210
template <int nLayers>
@@ -221,11 +218,11 @@ void TrackerTraitsGPU<nLayers>::findCellsNeighboursHybrid(const int iteration)
221218
mTimeFrameGPU->getCellsNeighboursLUT()[iLayer].clear();
222219
mTimeFrameGPU->getCellsNeighboursLUT()[iLayer].resize(nextLayerCellsNum, 0);
223220

224-
if (mTimeFrameGPU->getCells()[iLayer + 1].empty() ||
225-
mTimeFrameGPU->getCellsLookupTable()[iLayer].empty()) {
226-
mTimeFrameGPU->getCellsNeighbours()[iLayer].clear();
227-
continue;
228-
}
221+
// if (mTimeFrameGPU->getCells()[iLayer + 1].empty() ||
222+
// mTimeFrameGPU->getCellsLookupTable()[iLayer].empty()) {
223+
// mTimeFrameGPU->getCellsNeighbours()[iLayer].clear();
224+
// continue;
225+
// }
229226

230227
mTimeFrameGPU->createNeighboursLUTDevice(iLayer, nextLayerCellsNum);
231228
countCellNeighboursHandler(mTimeFrameGPU->getDeviceArrayCells(),
@@ -267,7 +264,6 @@ void TrackerTraitsGPU<nLayers>::findCellsNeighboursHybrid(const int iteration)
267264
cellsNeighboursLayer[iLayer].size());
268265
}
269266
mTimeFrameGPU->createNeighboursDeviceArray();
270-
mTimeFrameGPU->downloadCellsDevice();
271267
mTimeFrameGPU->unregisterRest();
272268
};
273269

@@ -289,55 +285,21 @@ void TrackerTraitsGPU<nLayers>::findRoads(const int iteration)
289285
startLevel,
290286
mTimeFrameGPU->getDeviceArrayCells(),
291287
mTimeFrameGPU->getDeviceCells()[startLayer],
292-
mTimeFrameGPU->getNCells()[startLayer],
288+
mTimeFrameGPU->getArrayNCells(),
293289
mTimeFrameGPU->getDeviceArrayUsedClusters(),
294-
mTimeFrameGPU->getDeviceNeighbours(startLayer - 1),
290+
mTimeFrameGPU->getDeviceNeighboursAll(),
295291
mTimeFrameGPU->getDeviceNeighboursLUTs(),
296292
mTimeFrameGPU->getDeviceArrayTrackingFrameInfo(),
293+
trackSeeds,
297294
mBz,
298-
mTrkParams[0].MaxChi2ClusterAttachment, // float maxChi2ClusterAttachment
295+
mTrkParams[0].MaxChi2ClusterAttachment,
296+
mTrkParams[0].MaxChi2NDF,
299297
mTimeFrameGPU->getDevicePropagator(),
300298
mCorrType,
301-
lastCellId, // temporary host vector
302-
lastCellSeed, // temporary host vector
303-
updatedCellId, // temporary host vectors
304-
updatedCellSeed, // temporary host vectors
305299
conf.nBlocks,
306300
conf.nThreads);
307-
308-
int level = startLevel;
309-
for (int iLayer{startLayer - 1}; iLayer > 0 && level > 2; --iLayer) {
310-
lastCellSeed.swap(updatedCellSeed);
311-
lastCellId.swap(updatedCellId);
312-
std::vector<CellSeed>().swap(updatedCellSeed); /// tame the memory peaks
313-
updatedCellId.clear();
314-
processNeighboursHandler<nLayers>(iLayer,
315-
--level,
316-
mTimeFrameGPU->getDeviceArrayCells(),
317-
mTimeFrameGPU->getDeviceCells()[iLayer],
318-
mTimeFrameGPU->getNCells()[iLayer],
319-
mTimeFrameGPU->getDeviceArrayUsedClusters(),
320-
mTimeFrameGPU->getDeviceNeighbours(iLayer - 1),
321-
mTimeFrameGPU->getDeviceNeighboursLUTs(),
322-
mTimeFrameGPU->getDeviceArrayTrackingFrameInfo(),
323-
mBz,
324-
mTrkParams[0].MaxChi2ClusterAttachment, // float maxChi2ClusterAttachment
325-
mTimeFrameGPU->getDevicePropagator(),
326-
mCorrType,
327-
lastCellId, // temporary host vector
328-
lastCellSeed, // temporary host vector
329-
updatedCellId, // temporary host vectors
330-
updatedCellSeed, // temporary host vectors
331-
conf.nBlocks,
332-
conf.nThreads);
333-
}
334-
for (auto& seed : updatedCellSeed) {
335-
if (seed.getQ2Pt() > 1.e3 || seed.getChi2() > mTrkParams[0].MaxChi2NDF * ((startLevel + 2) * 2 - 5)) {
336-
continue;
337-
}
338-
trackSeeds.push_back(seed);
339-
}
340301
}
302+
// fixme: I don't want to move tracks back and forth, but I need a way to use a thrust::allocator that is aware of our managed memory.
341303
if (!trackSeeds.size()) {
342304
LOGP(info, "No track seeds found, skipping track finding");
343305
continue;
@@ -362,9 +324,6 @@ void TrackerTraitsGPU<nLayers>::findRoads(const int iteration)
362324
mTimeFrameGPU->downloadTrackITSExtDevice(trackSeeds);
363325

364326
auto& tracks = mTimeFrameGPU->getTrackITSExt();
365-
std::sort(tracks.begin(), tracks.end(), [](const TrackITSExt& a, const TrackITSExt& b) {
366-
return a.getChi2() < b.getChi2();
367-
});
368327

369328
for (auto& track : tracks) {
370329
if (!track.getChi2()) {

0 commit comments

Comments
 (0)