Skip to content

Commit 94be8d9

Browse files
committed
Move multiplicity mask to a vector<uint8_t>
1 parent 948620f commit 94be8d9

File tree

10 files changed

+163
-131
lines changed

10 files changed

+163
-131
lines changed

Detectors/ITSMFT/ITS/reconstruction/include/ITSReconstruction/FastMultEst.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ struct FastMultEst {
4545

4646
static uint32_t getCurrentRandomSeed();
4747
int selectROFs(const gsl::span<const o2::itsmft::ROFRecord> rofs, const gsl::span<const o2::itsmft::CompClusterExt> clus,
48-
const gsl::span<const o2::itsmft::PhysTrigger> trig, std::vector<bool>& sel);
48+
const gsl::span<const o2::itsmft::PhysTrigger> trig, std::vector<uint8_t>& sel);
4949

5050
void fillNClPerLayer(const gsl::span<const o2::itsmft::CompClusterExt>& clusters);
5151
float process(const std::array<int, NLayers> ncl)

Detectors/ITSMFT/ITS/reconstruction/src/FastMultEst.cxx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,7 @@ float FastMultEst::processNoiseImposed(const std::array<int, NLayers> ncl)
125125
}
126126

127127
int FastMultEst::selectROFs(const gsl::span<const o2::itsmft::ROFRecord> rofs, const gsl::span<const o2::itsmft::CompClusterExt> clus,
128-
const gsl::span<const o2::itsmft::PhysTrigger> trig, std::vector<bool>& sel)
128+
const gsl::span<const o2::itsmft::PhysTrigger> trig, std::vector<uint8_t>& sel)
129129
{
130130
int nrof = rofs.size(), nsel = 0;
131131
const auto& multEstConf = FastMultEstConfig::Instance(); // parameters for mult estimation and cuts

Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TimeFrameGPU.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,8 @@ class TimeFrameGPU : public TimeFrame
5454
void loadTrackingFrameInfoDevice(const int);
5555
void loadUnsortedClustersDevice(const int);
5656
void loadClustersDevice(const int);
57+
void loadMultiplicityCutMask(const int);
58+
///
5759
void loadTrackletsDevice();
5860
void loadTrackletsLUTDevice();
5961
void loadCellsDevice();
@@ -116,6 +118,7 @@ class TimeFrameGPU : public TimeFrame
116118
o2::track::TrackParCovF** getDeviceArrayTrackSeeds() { return mCellSeedsDeviceArray; }
117119
float** getDeviceArrayTrackSeedsChi2() { return mCellSeedsChi2DeviceArray; }
118120
int* getDeviceNeighboursIndexTables(const int layer) { return mNeighboursIndexTablesDevice[layer]; }
121+
bool* getDevicemMultMask() { return mMultMaskDevice; }
119122

120123
void setDevicePropagator(const o2::base::PropagatorImpl<float>*) override;
121124

@@ -147,6 +150,7 @@ class TimeFrameGPU : public TimeFrame
147150
int* mROFramesPVDevice;
148151

149152
// Hybrid pref
153+
bool* mMultMaskDevice;
150154
std::array<Cluster*, nLayers> mClustersDevice;
151155
std::array<Cluster*, nLayers> mUnsortedClustersDevice;
152156
const Cluster** mClustersDeviceArray;
@@ -186,10 +190,6 @@ class TimeFrameGPU : public TimeFrame
186190
std::vector<std::vector<int>> mNVerticesInChunks;
187191
std::vector<std::vector<o2::MCCompLabel>> mLabelsInChunks;
188192

189-
// Host memory used only in GPU tracking
190-
std::vector<int> mHostNTracklets;
191-
std::vector<int> mHostNCells;
192-
193193
// Temporary buffer for storing output tracks from GPU tracking
194194
std::vector<TrackITSExt> mTrackITSExt;
195195
};

Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TrackingKernels.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,10 @@ GPUg() void fitTrackSeedsKernel(
5252

5353
template <int nLayers = 7>
5454
void computeTrackletsInRofsHandler(const int startROF,
55-
const int endROF);
55+
const int endROF,
56+
const int vertexId,
57+
const int nBlocks,
58+
const int nThreads);
5659

5760
void countCellsHandler(const Cluster** sortedClusters,
5861
const Cluster** unsortedClusters,

Detectors/ITSMFT/ITS/tracking/GPU/cuda/TimeFrameGPU.cu

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,17 @@ void TimeFrameGPU<nLayers>::loadTrackingFrameInfoDevice(const int iteration)
146146
STOP_GPU_STREAM_TIMER(mGpuStreams[0].get());
147147
}
148148

149+
template <int nLayers>
150+
void TimeFrameGPU<nLayers>::loadMultiplicityCutMask(const int iteration)
151+
{
152+
START_GPU_STREAM_TIMER(mGpuStreams[0].get(), "loading multiplicity cut mask");
153+
if (!iteration) {
154+
LOGP(info, "gpu-transfer: loading multiplicity cut mask with {} elements, for {} MB.", mMultiplicityCutMask.size(), mMultiplicityCutMask.size() * sizeof(bool) / MB);
155+
allocMemAsync(reinterpret_cast<void**>(&mMultMaskDevice), mMultiplicityCutMask.size() * sizeof(bool), nullptr, getExtAllocator());
156+
checkGPUError(cudaMemcpyAsync(mMultMaskDevice, mMultiplicityCutMask.data(), mMultiplicityCutMask.size() * sizeof(uint8_t), cudaMemcpyHostToDevice, mGpuStreams[0].get()));
157+
}
158+
STOP_GPU_STREAM_TIMER(mGpuStreams[0].get());
159+
}
149160
template <int nLayers>
150161
void TimeFrameGPU<nLayers>::loadTrackletsDevice()
151162
{

Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackerTraitsGPU.cxx

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ void TrackerTraitsGPU<nLayers>::initialiseTimeFrame(const int iteration)
3232
mTimeFrameGPU->loadClustersDevice(iteration);
3333
mTimeFrameGPU->loadUnsortedClustersDevice(iteration);
3434
mTimeFrameGPU->loadTrackingFrameInfoDevice(iteration);
35+
mTimeFrameGPU->loadMultiplicityCutMask(iteration);
3536
}
3637

3738
template <int nLayers>
@@ -84,13 +85,18 @@ int TrackerTraitsGPU<nLayers>::getTFNumberOfCells() const
8485
template <int nLayers>
8586
void TrackerTraitsGPU<nLayers>::computeTrackletsHybrid(const int iteration, int iROFslice, int iVertex)
8687
{
88+
auto& conf = o2::its::ITSGpuTrackingParamConfig::Instance();
8789
TrackerTraits::computeLayerTracklets(iteration, iROFslice, iVertex);
8890

8991
const Vertex diamondVert({mTrkParams[iteration].Diamond[0], mTrkParams[iteration].Diamond[1], mTrkParams[iteration].Diamond[2]}, {25.e-6f, 0.f, 0.f, 25.e-6f, 0.f, 36.f}, 1, 1.f);
9092
gsl::span<const Vertex> diamondSpan(&diamondVert, 1);
9193
int startROF{mTrkParams[iteration].nROFsPerIterations > 0 ? iROFslice * mTrkParams[iteration].nROFsPerIterations : 0};
9294
int endROF{mTrkParams[iteration].nROFsPerIterations > 0 ? (iROFslice + 1) * mTrkParams[iteration].nROFsPerIterations + mTrkParams[iteration].DeltaROF : mTimeFrameGPU->getNrof()};
93-
computeTrackletsInRofsHandler<nLayers>(startROF, endROF);
95+
computeTrackletsInRofsHandler<nLayers>(startROF,
96+
endROF,
97+
iVertex,
98+
conf.nBlocks,
99+
conf.nThreads);
94100
}
95101

96102
template <int nLayers>

0 commit comments

Comments
 (0)