Skip to content

Commit 33f3b8a

Browse files
committed
Fix access in tracklet finding
1 parent 81e1e5e commit 33f3b8a

File tree

6 files changed

+76
-41
lines changed

6 files changed

+76
-41
lines changed

Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TimeFrameGPU.h

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@ class TimeFrameGPU : public TimeFrame
6363
void loadVertices(const int);
6464

6565
///
66+
void createTrackletsLUTDevice();
6667
void loadTrackletsDevice();
6768
void loadTrackletsLUTDevice();
6869
void loadCellsDevice();
@@ -120,7 +121,7 @@ class TimeFrameGPU : public TimeFrame
120121
const unsigned char** getDeviceArrayUsedClusters() const { return mUsedClustersDeviceArray; }
121122
const int** getDeviceROframeClusters() const { return mROFrameClustersDeviceArray; }
122123
const Tracklet** getDeviceArrayTracklets() const { return mTrackletsDeviceArray; }
123-
const int** getDeviceArrayTrackletsLUT() const { return mTrackletsLUTDeviceArray; }
124+
int** getDeviceArrayTrackletsLUT() const { return mTrackletsLUTDeviceArray; }
124125
int** getDeviceArrayCellsLUT() const { return mCellsLUTDeviceArray; }
125126
int** getDeviceArrayNeighboursCellLUT() const { return mNeighboursCellLUTDeviceArray; }
126127
CellSeed** getDeviceArrayCells() const { return mCellsDeviceArray; }
@@ -171,14 +172,14 @@ class TimeFrameGPU : public TimeFrame
171172
const int** mROFrameClustersDeviceArray;
172173
std::array<Tracklet*, nLayers - 1> mTrackletsDevice;
173174
const Tracklet** mTrackletsDeviceArray;
174-
const int** mTrackletsLUTDeviceArray;
175-
std::array<int*, nLayers - 2> mTrackletsLUTDevice;
175+
std::array<int*, nLayers - 1> mTrackletsLUTDevice;
176176
std::array<int*, nLayers - 2> mCellsLUTDevice;
177177
std::array<int*, nLayers - 3> mNeighboursLUTDevice;
178178

179179
int** mCellsLUTDeviceArray;
180180
int** mNeighboursCellDeviceArray;
181181
int** mNeighboursCellLUTDeviceArray;
182+
int** mTrackletsLUTDeviceArray;
182183
std::array<CellSeed*, nLayers - 2> mCellsDevice;
183184
std::array<int*, nLayers - 2> mNeighboursIndexTablesDevice;
184185
CellSeed* mTrackSeedsDevice;

Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TrackingKernels.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@ void computeTrackletsInROFsHandler(const IndexTableUtils* utils,
6565
const int** ROFClusters,
6666
const unsigned char** usedClusters,
6767
const int** clustersIndexTables,
68+
int** trackletsLUTs,
6869
const int iteration,
6970
const float NSigmaCut,
7071
std::vector<float>& phiCuts,
@@ -81,7 +82,7 @@ void countCellsHandler(const Cluster** sortedClusters,
8182
const Cluster** unsortedClusters,
8283
const TrackingFrameInfo** tfInfo,
8384
const Tracklet** tracklets,
84-
const int** trackletsLUT,
85+
int** trackletsLUT,
8586
const int nTracklets,
8687
const int layer,
8788
CellSeed* cells,
@@ -98,7 +99,7 @@ void computeCellsHandler(const Cluster** sortedClusters,
9899
const Cluster** unsortedClusters,
99100
const TrackingFrameInfo** tfInfo,
100101
const Tracklet** tracklets,
101-
const int** trackletsLUT,
102+
int** trackletsLUT,
102103
const int nTracklets,
103104
const int layer,
104105
CellSeed* cells,

Detectors/ITSMFT/ITS/tracking/GPU/cuda/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
if(CUDA_ENABLED)
1414
find_package(CUDAToolkit)
1515
message(STATUS "Building ITS CUDA tracker")
16-
# add_compile_options(-O0 -g -lineinfo -fPIC)
16+
add_compile_options(-O0 -g -lineinfo -fPIC)
1717
# add_compile_definitions(ITS_MEASURE_GPU_TIME)
1818
o2_add_library(ITStrackingCUDA
1919
SOURCES ClusterLinesGPU.cu

Detectors/ITSMFT/ITS/tracking/GPU/cuda/TimeFrameGPU.cu

Lines changed: 30 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -147,12 +147,12 @@ void TimeFrameGPU<nLayers>::loadClustersIndexTables(const int iteration)
147147
if (!iteration) {
148148
START_GPU_STREAM_TIMER(mGpuStreams[0].get(), "loading sorted clusters");
149149
for (auto iLayer{0}; iLayer < nLayers; ++iLayer) {
150-
LOGP(info, "gpu-transfer: loading clusters indextable for layer {} with {} elements, for {} MB.", iLayer, mIndexTables[iLayer].size(), mIndexTables[iLayer].size() * sizeof(int) / MB);
150+
LOGP(debug, "gpu-transfer: loading clusters indextable for layer {} with {} elements, for {} MB.", iLayer, mIndexTables[iLayer].size(), mIndexTables[iLayer].size() * sizeof(int) / MB);
151151
allocMemAsync(reinterpret_cast<void**>(&mClustersIndexTablesDevice[iLayer]), mIndexTables[iLayer].size() * sizeof(int), nullptr, getExtAllocator());
152152
checkGPUError(cudaMemcpyAsync(mClustersIndexTablesDevice[iLayer], mIndexTables[iLayer].data(), mIndexTables[iLayer].size() * sizeof(int), cudaMemcpyHostToDevice, mGpuStreams[0].get()));
153153
}
154154
allocMemAsync(reinterpret_cast<void**>(&mClustersIndexTablesDeviceArray), nLayers * sizeof(int), nullptr, getExtAllocator());
155-
checkGPUError(cudaMemcpyAsync(mClustersIndexTablesDeviceArray, mClustersIndexTablesDevice.data(), nLayers * sizeof(int), cudaMemcpyHostToDevice, mGpuStreams[0].get()));
155+
checkGPUError(cudaMemcpyAsync(mClustersIndexTablesDeviceArray, mClustersIndexTablesDevice.data(), nLayers * sizeof(int*), cudaMemcpyHostToDevice, mGpuStreams[0].get()));
156156
STOP_GPU_STREAM_TIMER(mGpuStreams[0].get());
157157
}
158158
}
@@ -245,6 +245,32 @@ void TimeFrameGPU<nLayers>::loadVertices(const int iteration)
245245
}
246246
}
247247

248+
template <int nLayers>
249+
void TimeFrameGPU<nLayers>::createTrackletsLUTDevice()
250+
{
251+
START_GPU_STREAM_TIMER(mGpuStreams[0].get(), "creating cells LUTs");
252+
for (auto iLayer{0}; iLayer < nLayers - 1; ++iLayer) {
253+
LOGP(debug, "gpu-transfer: creating tracklets LUT for {} elements on layer {}, for {} MB.", mClusters[iLayer].size() + 1, iLayer, (mClusters[iLayer].size() + 1) * sizeof(int) / MB);
254+
allocMemAsync(reinterpret_cast<void**>(&mTrackletsLUTDevice[iLayer]), (mClusters[iLayer].size() + 1) * sizeof(int), nullptr, getExtAllocator());
255+
checkGPUError(cudaMemsetAsync(mTrackletsLUTDevice[iLayer], 0, (mClusters[iLayer].size() + 1) * sizeof(int), mGpuStreams[0].get()));
256+
}
257+
allocMemAsync(reinterpret_cast<void**>(&mTrackletsLUTDeviceArray), (nLayers - 2) * sizeof(int*), nullptr, getExtAllocator());
258+
checkGPUError(cudaMemcpyAsync(mTrackletsLUTDeviceArray, mTrackletsLUTDevice.data(), mTrackletsLUTDevice.size() * sizeof(int*), cudaMemcpyHostToDevice, mGpuStreams[0].get()));
259+
STOP_GPU_STREAM_TIMER(mGpuStreams[0].get());
260+
}
261+
262+
// template<int nLayers> void TimeFrameGPU<nLayers>::createTrackletsBuffers()
263+
// {
264+
// START_GPU_STREAM_TIMER(mGpuStreams[0].get(), "creating cells buffers");
265+
// for (auto iLayer{0}; iLayer < nLayers - 1; ++iLayer) {
266+
// mNTracklets[iLayer] = 0;
267+
// checkGPUError(cudaMemcpyAsync(&mNTracklets[iLayer], mTrackletsLUTDevice[iLayer] + mClusters[iLayer].size(), sizeof(int), cudaMemcpyDeviceToHost));
268+
// LOGP(debug, "gpu-transfer: creating tracklets buffer for {} elements on layer {}, for {} MB.", mNTracklets[layer], iLayer, mNTracklets[iLayer] * sizeof(CellSeed) / MB);
269+
// allocMemAsync(reinterpret_cast<void**>(&mTrackletsDevice[iLayer]), mNTracklets[iLayer] * sizeof(Tracklet), nullptr, getExtAllocator());
270+
// }
271+
// STOP_GPU_STREAM_TIMER(mGpuStreams[0].get());
272+
// }
273+
248274
template <int nLayers>
249275
void TimeFrameGPU<nLayers>::loadTrackletsDevice()
250276
{
@@ -267,11 +293,11 @@ void TimeFrameGPU<nLayers>::loadTrackletsLUTDevice()
267293
START_GPU_STREAM_TIMER(mGpuStreams[0].get(), "loading tracklets");
268294
for (auto iLayer{0}; iLayer < nLayers - 2; ++iLayer) {
269295
LOGP(debug, "gpu-transfer: loading tracklets LUT for {} elements on layer {}, for {} MB", mTrackletsLookupTable[iLayer].size(), iLayer, mTrackletsLookupTable[iLayer].size() * sizeof(int) / MB);
270-
allocMemAsync(reinterpret_cast<void**>(&mTrackletsLUTDevice[iLayer]), mTrackletsLookupTable[iLayer].size() * sizeof(int), nullptr, getExtAllocator());
296+
// allocMemAsync(reinterpret_cast<void**>(&mTrackletsLUTDevice[iLayer]), mTrackletsLookupTable[iLayer].size() * sizeof(int), nullptr, getExtAllocator());
271297
checkGPUError(cudaHostRegister(mTrackletsLookupTable[iLayer].data(), mTrackletsLookupTable[iLayer].size() * sizeof(int), cudaHostRegisterPortable));
272298
checkGPUError(cudaMemcpyAsync(mTrackletsLUTDevice[iLayer], mTrackletsLookupTable[iLayer].data(), mTrackletsLookupTable[iLayer].size() * sizeof(int), cudaMemcpyHostToDevice));
273299
}
274-
allocMemAsync(reinterpret_cast<void**>(&mTrackletsLUTDeviceArray), (nLayers - 2) * sizeof(int*), nullptr, getExtAllocator());
300+
// allocMemAsync(reinterpret_cast<void**>(&mTrackletsLUTDeviceArray), (nLayers - 2) * sizeof(int*), nullptr, getExtAllocator());
275301
checkGPUError(cudaHostRegister(mTrackletsLUTDevice.data(), (nLayers - 2) * sizeof(int*), cudaHostRegisterPortable));
276302
checkGPUError(cudaMemcpyAsync(mTrackletsLUTDeviceArray, mTrackletsLUTDevice.data(), (nLayers - 2) * sizeof(int*), cudaMemcpyHostToDevice));
277303
STOP_GPU_STREAM_TIMER(mGpuStreams[0].get());

Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackerTraitsGPU.cxx

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,7 @@ void TrackerTraitsGPU<nLayers>::computeTrackletsHybrid(const int iteration, int
9292
{
9393
auto& conf = o2::its::ITSGpuTrackingParamConfig::Instance();
9494
TrackerTraits::computeLayerTracklets(iteration, iROFslice, iVertex);
95+
mTimeFrameGPU->createTrackletsLUTDevice();
9596

9697
const Vertex diamondVert({mTrkParams[iteration].Diamond[0], mTrkParams[iteration].Diamond[1], mTrkParams[iteration].Diamond[2]}, {25.e-6f, 0.f, 0.f, 25.e-6f, 0.f, 36.f}, 1, 1.f);
9798
gsl::span<const Vertex> diamondSpan(&diamondVert, 1);
@@ -112,6 +113,7 @@ void TrackerTraitsGPU<nLayers>::computeTrackletsHybrid(const int iteration, int
112113
mTimeFrameGPU->getDeviceROframeClusters(),
113114
mTimeFrameGPU->getDeviceArrayUsedClusters(),
114115
mTimeFrameGPU->getDeviceArrayClustersIndexTables(),
116+
mTimeFrameGPU->getDeviceArrayTrackletsLUT(),
115117
iteration,
116118
mTrkParams[iteration].NSigmaCut,
117119
mTimeFrameGPU->getPhiCuts(),

Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu

Lines changed: 36 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -370,7 +370,7 @@ GPUg() void computeLayerCellsKernel(
370370
const Cluster** unsortedClusters,
371371
const TrackingFrameInfo** tfInfo,
372372
const Tracklet** tracklets,
373-
const int** trackletsLUT,
373+
int** trackletsLUT,
374374
const int nTrackletsCurrent,
375375
const int layer,
376376
CellSeed* cells,
@@ -467,7 +467,7 @@ GPUg() void computeLayerTrackletsMultiROFKernel(
467467
const unsigned char** usedClusters, // Used clusters
468468
const int** indexTables, // input data rof0-delta <rof0< rof0+delta (up to 3 rofs)
469469
// Tracklet* tracklets, // output data
470-
// int* trackletsLUT,
470+
int** trackletsLUT,
471471
const int iteration,
472472
const float NSigmaCut,
473473
const float phiCut,
@@ -548,7 +548,7 @@ GPUg() void computeLayerTrackletsMultiROFKernel(
548548
if (deltaZ / sigmaZ < NSigmaCut && (deltaPhi < phiCut || o2::gpu::CAMath::Abs(deltaPhi - constants::math::TwoPi) < phiCut)) {
549549
// if (layerIndex > 0) {
550550
if constexpr (initRun) {
551-
// trackletsLUT[currentSortedIndex]++; // we need l0 as well for usual exclusive sums.
551+
trackletsLUT[layerIndex][currentSortedIndex]++; // we need l0 as well for usual exclusive sums.
552552
} else {
553553
// }
554554
const float phi{o2::gpu::CAMath::ATan2(currentCluster.yCoordinate - nextCluster.yCoordinate, currentCluster.xCoordinate - nextCluster.xCoordinate)};
@@ -612,7 +612,7 @@ GPUg() void printBufferLayerOnThread(const int layer, const int* v, unsigned int
612612
}
613613
}
614614

615-
GPUg() void printMatrixRow(const int row, const int** mat, const unsigned int rowLength, const int len = 150, const unsigned int tId = 0)
615+
GPUg() void printMatrixRow(const int row, const int** mat, const unsigned int rowLength, const int len = 256 * 128 + 1, const unsigned int tId = 0)
616616
{
617617
if (blockIdx.x * blockDim.x + threadIdx.x == tId) {
618618
for (int i{0}; i < rowLength; ++i) {
@@ -709,6 +709,7 @@ void computeTrackletsInROFsHandler(const IndexTableUtils* utils,
709709
const int** ROFClusters,
710710
const unsigned char** usedClusters,
711711
const int** clustersIndexTables,
712+
int** trackletsLUTs,
712713
const int iteration,
713714
const float NSigmaCut,
714715
std::vector<float>& phiCuts,
@@ -722,31 +723,34 @@ void computeTrackletsInROFsHandler(const IndexTableUtils* utils,
722723
const int nThreads)
723724
{
724725
for (int iLayer = 0; iLayer < nLayers - 1; ++iLayer) {
725-
gpu::computeLayerTrackletsMultiROFKernel<<<nBlocks, nThreads>>>(
726-
utils,
727-
multMask,
728-
iLayer,
729-
startROF,
730-
endROF,
731-
maxROF,
732-
deltaROF,
733-
vertices,
734-
rofPV,
735-
nVertices,
736-
vertexId,
737-
clusters,
738-
ROFClusters,
739-
usedClusters,
740-
clustersIndexTables,
741-
iteration,
742-
NSigmaCut,
743-
phiCuts[iLayer],
744-
resolutionPV,
745-
minRs[iLayer + 1],
746-
maxRs[iLayer + 1],
747-
resolutions[iLayer],
748-
radii[iLayer + 1] - radii[iLayer],
749-
mulScatAng[iLayer]);
726+
// gpu::computeLayerTrackletsMultiROFKernel<<<1, 1>>>(
727+
// utils,
728+
// multMask,
729+
// iLayer,
730+
// startROF,
731+
// endROF,
732+
// maxROF,
733+
// deltaROF,
734+
// vertices,
735+
// rofPV,
736+
// nVertices,
737+
// vertexId,
738+
// clusters,
739+
// ROFClusters,
740+
// usedClusters,
741+
// clustersIndexTables,
742+
// trackletsLUTs,
743+
// iteration,
744+
// NSigmaCut,
745+
// phiCuts[iLayer],
746+
// resolutionPV,
747+
// minRs[iLayer + 1],
748+
// maxRs[iLayer + 1],
749+
// resolutions[iLayer],
750+
// radii[iLayer + 1] - radii[iLayer],
751+
// mulScatAng[iLayer]);
752+
gpuCheckError(cudaPeekAtLastError());
753+
gpuCheckError(cudaDeviceSynchronize());
750754
}
751755
}
752756

@@ -755,7 +759,7 @@ void countCellsHandler(
755759
const Cluster** unsortedClusters,
756760
const TrackingFrameInfo** tfInfo,
757761
const Tracklet** tracklets,
758-
const int** trackletsLUT,
762+
int** trackletsLUT,
759763
const int nTracklets,
760764
const int layer,
761765
CellSeed* cells,
@@ -806,7 +810,7 @@ void computeCellsHandler(
806810
const Cluster** unsortedClusters,
807811
const TrackingFrameInfo** tfInfo,
808812
const Tracklet** tracklets,
809-
const int** trackletsLUT,
813+
int** trackletsLUT,
810814
const int nTracklets,
811815
const int layer,
812816
CellSeed* cells,
@@ -994,6 +998,7 @@ template void computeTrackletsInROFsHandler<7>(const IndexTableUtils* utils,
994998
const int** ROFClusters,
995999
const unsigned char** usedClusters,
9961000
const int** clustersIndexTables,
1001+
int** trackletsLUTs,
9971002
const int iteration,
9981003
const float NSigmaCut,
9991004
std::vector<float>& phiCuts,

0 commit comments

Comments
 (0)