Skip to content

Commit fb4df11

Browse files
authored
ITS: template Tracker, Cell and Road (#14597)
1 parent d4e16e1 commit fb4df11

File tree

17 files changed

+353
-231
lines changed

17 files changed

+353
-231
lines changed

Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TimeFrameGPU.h

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@ namespace o2::its::gpu
2727
template <int nLayers = 7>
2828
class TimeFrameGPU : public TimeFrame<nLayers>
2929
{
30+
using typename TimeFrame<nLayers>::CellSeedN;
31+
3032
public:
3133
TimeFrameGPU();
3234
~TimeFrameGPU() = default;
@@ -64,7 +66,7 @@ class TimeFrameGPU : public TimeFrame<nLayers>
6466
void loadTrackSeedsDevice();
6567
void loadTrackSeedsChi2Device();
6668
void loadRoadsDevice();
67-
void loadTrackSeedsDevice(bounded_vector<CellSeed>&);
69+
void loadTrackSeedsDevice(bounded_vector<CellSeedN>&);
6870
void createTrackletsBuffers(const int);
6971
void createTrackletsBuffersArray(const int);
7072
void createCellsBuffers(const int);
@@ -75,8 +77,8 @@ class TimeFrameGPU : public TimeFrame<nLayers>
7577
void createNeighboursIndexTablesDevice(const int);
7678
void createNeighboursDevice(const unsigned int layer);
7779
void createNeighboursLUTDevice(const int, const unsigned int);
78-
void createTrackITSExtDevice(bounded_vector<CellSeed>&);
79-
void downloadTrackITSExtDevice(bounded_vector<CellSeed>&);
80+
void createTrackITSExtDevice(bounded_vector<CellSeedN>&);
81+
void downloadTrackITSExtDevice(bounded_vector<CellSeedN>&);
8082
void downloadCellsNeighboursDevice(std::vector<bounded_vector<std::pair<int, int>>>&, const int);
8183
void downloadNeighboursLUTDevice(bounded_vector<int>&, const int);
8284
void downloadCellsDevice();
@@ -125,8 +127,8 @@ class TimeFrameGPU : public TimeFrame<nLayers>
125127
int** getDeviceArrayTrackletsLUT() const { return mTrackletsLUTDeviceArray; }
126128
int** getDeviceArrayCellsLUT() const { return mCellsLUTDeviceArray; }
127129
int** getDeviceArrayNeighboursCellLUT() const { return mNeighboursCellLUTDeviceArray; }
128-
CellSeed** getDeviceArrayCells() { return mCellsDeviceArray; }
129-
CellSeed* getDeviceTrackSeeds() { return mTrackSeedsDevice; }
130+
CellSeedN** getDeviceArrayCells() { return mCellsDeviceArray; }
131+
CellSeedN* getDeviceTrackSeeds() { return mTrackSeedsDevice; }
130132
o2::track::TrackParCovF** getDeviceArrayTrackSeeds() { return mCellSeedsDeviceArray; }
131133
float** getDeviceArrayTrackSeedsChi2() { return mCellSeedsChi2DeviceArray; }
132134
int* getDeviceNeighboursIndexTables(const int layer) { return mNeighboursIndexTablesDevice[layer]; }
@@ -145,7 +147,7 @@ class TimeFrameGPU : public TimeFrame<nLayers>
145147
gsl::span<int*> getDeviceTrackletsLUTs() { return mTrackletsLUTDevice; }
146148
gsl::span<int*> getDeviceCellLUTs() { return mCellsLUTDevice; }
147149
gsl::span<Tracklet*> getDeviceTracklets() { return mTrackletsDevice; }
148-
gsl::span<CellSeed*> getDeviceCells() { return mCellsDevice; }
150+
gsl::span<CellSeedN*> getDeviceCells() { return mCellsDevice; }
149151

150152
// Overridden getters
151153
int getNumberOfTracklets() const final;
@@ -189,10 +191,10 @@ class TimeFrameGPU : public TimeFrame<nLayers>
189191
int** mNeighboursCellDeviceArray{nullptr};
190192
int** mNeighboursCellLUTDeviceArray{nullptr};
191193
int** mTrackletsLUTDeviceArray{nullptr};
192-
std::array<CellSeed*, nLayers - 2> mCellsDevice;
193-
CellSeed** mCellsDeviceArray;
194+
std::array<CellSeedN*, nLayers - 2> mCellsDevice;
195+
CellSeedN** mCellsDeviceArray;
194196
std::array<int*, nLayers - 3> mNeighboursIndexTablesDevice;
195-
CellSeed* mTrackSeedsDevice{nullptr};
197+
CellSeedN* mTrackSeedsDevice{nullptr};
196198
std::array<o2::track::TrackParCovF*, nLayers - 2> mCellSeedsDevice;
197199
o2::track::TrackParCovF** mCellSeedsDeviceArray;
198200
std::array<float*, nLayers - 2> mCellSeedsChi2Device;

Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TrackerTraitsGPU.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ class TrackerTraitsGPU final : public TrackerTraits<nLayers>
4949

5050
private:
5151
IndexTableUtils* mDeviceIndexTableUtils;
52-
gpu::TimeFrameGPU<7>* mTimeFrameGPU;
52+
gpu::TimeFrameGPU<nLayers>* mTimeFrameGPU;
5353
};
5454

5555
} // namespace o2::its

Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TrackingKernels.h

Lines changed: 20 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -16,51 +16,21 @@
1616
#include <gsl/gsl>
1717

1818
#include "ITStracking/BoundedAllocator.h"
19+
#include "ITStracking/Definitions.h"
1920
#include "ITStrackingGPU/Utils.h"
2021
#include "DetectorsBase/Propagator.h"
2122
#include "GPUCommonDef.h"
2223

2324
namespace o2::its
2425
{
26+
template <int>
2527
class CellSeed;
28+
class TrackingFrameInfo;
29+
class Tracklet;
30+
class IndexTableUtils;
31+
class Cluster;
32+
class TrackITSExt;
2633
class ExternalAllocator;
27-
namespace gpu
28-
{
29-
30-
#ifdef GPUCA_GPUCODE // GPUg() global kernels must only when compiled by GPU compiler
31-
32-
GPUdii() int4 getEmptyBinsRect()
33-
{
34-
return int4{0, 0, 0, 0};
35-
}
36-
37-
GPUdii() bool fitTrack(TrackITSExt& track,
38-
int start,
39-
int end,
40-
int step,
41-
float chi2clcut,
42-
float chi2ndfcut,
43-
float maxQoverPt,
44-
int nCl,
45-
float Bz,
46-
TrackingFrameInfo** tfInfos,
47-
const o2::base::Propagator* prop,
48-
o2::base::PropagatorF::MatCorrType matCorrType = o2::base::PropagatorImpl<float>::MatCorrType::USEMatCorrNONE);
49-
50-
template <int nLayers = 7>
51-
GPUg() void fitTrackSeedsKernel(CellSeed* trackSeeds,
52-
const TrackingFrameInfo** foundTrackingFrameInfo,
53-
o2::its::TrackITSExt* tracks,
54-
const float* minPts,
55-
const unsigned int nSeeds,
56-
const float Bz,
57-
const int startLevel,
58-
float maxChi2ClusterAttachment,
59-
float maxChi2NDF,
60-
const o2::base::Propagator* propagator,
61-
const o2::base::PropagatorF::MatCorrType matCorrType = o2::base::PropagatorF::MatCorrType::USEMatCorrLUT);
62-
#endif
63-
} // namespace gpu
6434

6535
template <int nLayers = 7>
6636
void countTrackletsInROFsHandler(const IndexTableUtils* utils,
@@ -131,14 +101,15 @@ void computeTrackletsInROFsHandler(const IndexTableUtils* utils,
131101
const int nThreads,
132102
gpu::Streams& streams);
133103

104+
template <int nLayers>
134105
void countCellsHandler(const Cluster** sortedClusters,
135106
const Cluster** unsortedClusters,
136107
const TrackingFrameInfo** tfInfo,
137108
Tracklet** tracklets,
138109
int** trackletsLUT,
139110
const int nTracklets,
140111
const int layer,
141-
CellSeed* cells,
112+
CellSeed<nLayers>* cells,
142113
int** cellsLUTsDeviceArray,
143114
int* cellsLUTsHost,
144115
const int deltaROF,
@@ -151,14 +122,15 @@ void countCellsHandler(const Cluster** sortedClusters,
151122
const int nThreads,
152123
gpu::Streams& streams);
153124

125+
template <int nLayers>
154126
void computeCellsHandler(const Cluster** sortedClusters,
155127
const Cluster** unsortedClusters,
156128
const TrackingFrameInfo** tfInfo,
157129
Tracklet** tracklets,
158130
int** trackletsLUT,
159131
const int nTracklets,
160132
const int layer,
161-
CellSeed* cells,
133+
CellSeed<nLayers>* cells,
162134
int** cellsLUTsDeviceArray,
163135
int* cellsLUTsHost,
164136
const int deltaROF,
@@ -170,7 +142,8 @@ void computeCellsHandler(const Cluster** sortedClusters,
170142
const int nThreads,
171143
gpu::Streams& streams);
172144

173-
void countCellNeighboursHandler(CellSeed** cellsLayersDevice,
145+
template <int nLayers>
146+
void countCellNeighboursHandler(CellSeed<nLayers>** cellsLayersDevice,
174147
int* neighboursLUTs,
175148
int** cellsLUTs,
176149
gpuPair<int, int>* cellNeighbours,
@@ -188,7 +161,8 @@ void countCellNeighboursHandler(CellSeed** cellsLayersDevice,
188161
const int nThreads,
189162
gpu::Stream& stream);
190163

191-
void computeCellNeighboursHandler(CellSeed** cellsLayersDevice,
164+
template <int nLayers>
165+
void computeCellNeighboursHandler(CellSeed<nLayers>** cellsLayersDevice,
192166
int* neighboursLUTs,
193167
int** cellsLUTs,
194168
gpuPair<int, int>* cellNeighbours,
@@ -214,14 +188,14 @@ int filterCellNeighboursHandler(gpuPair<int, int>*,
214188
template <int nLayers = 7>
215189
void processNeighboursHandler(const int startLayer,
216190
const int startLevel,
217-
CellSeed** allCellSeeds,
218-
CellSeed* currentCellSeeds,
191+
CellSeed<nLayers>** allCellSeeds,
192+
CellSeed<nLayers>* currentCellSeeds,
219193
std::array<int, nLayers - 2>& nCells,
220194
const unsigned char** usedClusters,
221195
std::array<int*, nLayers - 2>& neighbours,
222196
gsl::span<int*> neighboursDeviceLUTs,
223197
const TrackingFrameInfo** foundTrackingFrameInfo,
224-
bounded_vector<CellSeed>& seedsHost,
198+
bounded_vector<CellSeed<nLayers>>& seedsHost,
225199
const float bz,
226200
const float MaxChi2ClusterAttachment,
227201
const float maxChi2NDF,
@@ -231,7 +205,8 @@ void processNeighboursHandler(const int startLayer,
231205
const int nBlocks,
232206
const int nThreads);
233207

234-
void trackSeedHandler(CellSeed* trackSeeds,
208+
template <int nLayers = 7>
209+
void trackSeedHandler(CellSeed<nLayers>* trackSeeds,
235210
const TrackingFrameInfo** foundTrackingFrameInfo,
236211
o2::its::TrackITSExt* tracks,
237212
std::vector<float>& minPtsHost,

Detectors/ITSMFT/ITS/tracking/GPU/cuda/TimeFrameGPU.cu

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -355,11 +355,11 @@ void TimeFrameGPU<nLayers>::loadCellsDevice()
355355
{
356356
GPUTimer timer(mGpuStreams, "loading cell seeds", nLayers - 2);
357357
for (auto iLayer{0}; iLayer < nLayers - 2; ++iLayer) {
358-
GPULog("gpu-transfer: loading {} cell seeds on layer {}, for {:.2f} MB.", this->mCells[iLayer].size(), iLayer, this->mCells[iLayer].size() * sizeof(CellSeed) / constants::MB);
359-
allocMemAsync(reinterpret_cast<void**>(&mCellsDevice[iLayer]), this->mCells[iLayer].size() * sizeof(CellSeed), mGpuStreams[iLayer], this->getExtAllocator());
358+
GPULog("gpu-transfer: loading {} cell seeds on layer {}, for {:.2f} MB.", this->mCells[iLayer].size(), iLayer, this->mCells[iLayer].size() * sizeof(CellSeedN) / constants::MB);
359+
allocMemAsync(reinterpret_cast<void**>(&mCellsDevice[iLayer]), this->mCells[iLayer].size() * sizeof(CellSeedN), mGpuStreams[iLayer], this->getExtAllocator());
360360
allocMemAsync(reinterpret_cast<void**>(&mNeighboursIndexTablesDevice[iLayer]), (this->mCells[iLayer].size() + 1) * sizeof(int), mGpuStreams[iLayer], this->getExtAllocator()); // accessory for the neigh. finding.
361361
GPUChkErrS(cudaMemsetAsync(mNeighboursIndexTablesDevice[iLayer], 0, (this->mCells[iLayer].size() + 1) * sizeof(int), mGpuStreams[iLayer].get()));
362-
GPUChkErrS(cudaMemcpyAsync(mCellsDevice[iLayer], this->mCells[iLayer].data(), this->mCells[iLayer].size() * sizeof(CellSeed), cudaMemcpyHostToDevice, mGpuStreams[iLayer].get()));
362+
GPUChkErrS(cudaMemcpyAsync(mCellsDevice[iLayer], this->mCells[iLayer].data(), this->mCells[iLayer].size() * sizeof(CellSeedN), cudaMemcpyHostToDevice, mGpuStreams[iLayer].get()));
363363
}
364364
}
365365

@@ -387,8 +387,8 @@ void TimeFrameGPU<nLayers>::createCellsBuffersArray(const int iteration)
387387
{
388388
if (!iteration) {
389389
GPUTimer timer("creating cells buffers array");
390-
allocMem(reinterpret_cast<void**>(&mCellsDeviceArray), (nLayers - 2) * sizeof(CellSeed*), this->getExtAllocator());
391-
GPUChkErrS(cudaMemcpy(mCellsDeviceArray, mCellsDevice.data(), mCellsDevice.size() * sizeof(CellSeed*), cudaMemcpyHostToDevice));
390+
allocMem(reinterpret_cast<void**>(&mCellsDeviceArray), (nLayers - 2) * sizeof(CellSeedN*), this->getExtAllocator());
391+
GPUChkErrS(cudaMemcpy(mCellsDeviceArray, mCellsDevice.data(), mCellsDevice.size() * sizeof(CellSeedN*), cudaMemcpyHostToDevice));
392392
}
393393
}
394394

@@ -399,9 +399,9 @@ void TimeFrameGPU<nLayers>::createCellsBuffers(const int layer)
399399
mNCells[layer] = 0;
400400
GPUChkErrS(cudaMemcpyAsync(&mNCells[layer], mCellsLUTDevice[layer] + mNTracklets[layer], sizeof(int), cudaMemcpyDeviceToHost, mGpuStreams[layer].get()));
401401
mGpuStreams[layer].sync(); // ensure number of cells is correct
402-
GPULog("gpu-transfer: creating cell buffer for {} elements on layer {}, for {:.2f} MB.", mNCells[layer], layer, mNCells[layer] * sizeof(CellSeed) / constants::MB);
403-
allocMemAsync(reinterpret_cast<void**>(&mCellsDevice[layer]), mNCells[layer] * sizeof(CellSeed), mGpuStreams[layer], this->getExtAllocator());
404-
GPUChkErrS(cudaMemcpyAsync(&mCellsDeviceArray[layer], &mCellsDevice[layer], sizeof(CellSeed*), cudaMemcpyHostToDevice, mGpuStreams[layer].get()));
402+
GPULog("gpu-transfer: creating cell buffer for {} elements on layer {}, for {:.2f} MB.", mNCells[layer], layer, mNCells[layer] * sizeof(CellSeedN) / constants::MB);
403+
allocMemAsync(reinterpret_cast<void**>(&mCellsDevice[layer]), mNCells[layer] * sizeof(CellSeedN), mGpuStreams[layer], this->getExtAllocator());
404+
GPUChkErrS(cudaMemcpyAsync(&mCellsDeviceArray[layer], &mCellsDevice[layer], sizeof(CellSeedN*), cudaMemcpyHostToDevice, mGpuStreams[layer].get()));
405405
}
406406

407407
template <int nLayers>
@@ -426,13 +426,13 @@ void TimeFrameGPU<nLayers>::loadRoadsDevice()
426426
}
427427

428428
template <int nLayers>
429-
void TimeFrameGPU<nLayers>::loadTrackSeedsDevice(bounded_vector<CellSeed>& seeds)
429+
void TimeFrameGPU<nLayers>::loadTrackSeedsDevice(bounded_vector<CellSeedN>& seeds)
430430
{
431431
GPUTimer timer("loading track seeds");
432-
GPULog("gpu-transfer: loading {} track seeds, for {:.2f} MB.", seeds.size(), seeds.size() * sizeof(CellSeed) / constants::MB);
433-
allocMem(reinterpret_cast<void**>(&mTrackSeedsDevice), seeds.size() * sizeof(CellSeed), this->getExtAllocator());
434-
GPUChkErrS(cudaHostRegister(seeds.data(), seeds.size() * sizeof(CellSeed), cudaHostRegisterPortable));
435-
GPUChkErrS(cudaMemcpy(mTrackSeedsDevice, seeds.data(), seeds.size() * sizeof(CellSeed), cudaMemcpyHostToDevice));
432+
GPULog("gpu-transfer: loading {} track seeds, for {:.2f} MB.", seeds.size(), seeds.size() * sizeof(CellSeedN) / constants::MB);
433+
allocMem(reinterpret_cast<void**>(&mTrackSeedsDevice), seeds.size() * sizeof(CellSeedN), this->getExtAllocator());
434+
GPUChkErrS(cudaHostRegister(seeds.data(), seeds.size() * sizeof(CellSeedN), cudaHostRegisterPortable));
435+
GPUChkErrS(cudaMemcpy(mTrackSeedsDevice, seeds.data(), seeds.size() * sizeof(CellSeedN), cudaMemcpyHostToDevice));
436436
}
437437

438438
template <int nLayers>
@@ -450,7 +450,7 @@ void TimeFrameGPU<nLayers>::createNeighboursDevice(const unsigned int layer)
450450
}
451451

452452
template <int nLayers>
453-
void TimeFrameGPU<nLayers>::createTrackITSExtDevice(bounded_vector<CellSeed>& seeds)
453+
void TimeFrameGPU<nLayers>::createTrackITSExtDevice(bounded_vector<CellSeedN>& seeds)
454454
{
455455
GPUTimer timer("reserving tracks");
456456
mTrackITSExt = bounded_vector<TrackITSExt>(seeds.size(), {}, this->getMemoryPool().get());
@@ -465,9 +465,9 @@ void TimeFrameGPU<nLayers>::downloadCellsDevice()
465465
{
466466
GPUTimer timer(mGpuStreams, "downloading cells", nLayers - 2);
467467
for (int iLayer{0}; iLayer < nLayers - 2; ++iLayer) {
468-
GPULog("gpu-transfer: downloading {} cells on layer: {}, for {:.2f} MB.", mNCells[iLayer], iLayer, mNCells[iLayer] * sizeof(CellSeed) / constants::MB);
468+
GPULog("gpu-transfer: downloading {} cells on layer: {}, for {:.2f} MB.", mNCells[iLayer], iLayer, mNCells[iLayer] * sizeof(CellSeedN) / constants::MB);
469469
this->mCells[iLayer].resize(mNCells[iLayer]);
470-
GPUChkErrS(cudaMemcpyAsync(this->mCells[iLayer].data(), this->mCellsDevice[iLayer], mNCells[iLayer] * sizeof(CellSeed), cudaMemcpyDeviceToHost, mGpuStreams[iLayer].get()));
470+
GPUChkErrS(cudaMemcpyAsync(this->mCells[iLayer].data(), this->mCellsDevice[iLayer], mNCells[iLayer] * sizeof(CellSeedN), cudaMemcpyDeviceToHost, mGpuStreams[iLayer].get()));
471471
}
472472
}
473473

@@ -499,7 +499,7 @@ void TimeFrameGPU<nLayers>::downloadNeighboursLUTDevice(bounded_vector<int>& lut
499499
}
500500

501501
template <int nLayers>
502-
void TimeFrameGPU<nLayers>::downloadTrackITSExtDevice(bounded_vector<CellSeed>& seeds)
502+
void TimeFrameGPU<nLayers>::downloadTrackITSExtDevice(bounded_vector<CellSeedN>& seeds)
503503
{
504504
GPUTimer timer("downloading tracks");
505505
GPULog("gpu-transfer: downloading {} tracks, for {:.2f} MB.", mTrackITSExt.size(), mTrackITSExt.size() * sizeof(o2::its::TrackITSExt) / constants::MB);

0 commit comments

Comments
 (0)