Skip to content

Commit 1bd7e9f

Browse files
committed
Fix indices for used clusters
1 parent 4613f73 commit 1bd7e9f

File tree

5 files changed

+29
-14
lines changed

5 files changed

+29
-14
lines changed

Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TimeFrameGPU.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ class TimeFrameGPU : public TimeFrame
6363
void loadVertices(const int);
6464

6565
///
66-
void createTrackletsLUTDevice();
66+
void createTrackletsLUTDevice(const int);
6767
void loadTrackletsDevice();
6868
void loadTrackletsLUTDevice();
6969
void loadCellsDevice();

Detectors/ITSMFT/ITS/tracking/GPU/cuda/TimeFrameGPU.cu

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -176,7 +176,7 @@ void TimeFrameGPU<nLayers>::createUsedClustersDevice(const int iteration)
176176
template <int nLayers>
177177
void TimeFrameGPU<nLayers>::loadUsedClustersDevice()
178178
{
179-
START_GPU_STREAM_TIMER(mGpuStreams[0].get(), "creating used clusters flags");
179+
START_GPU_STREAM_TIMER(mGpuStreams[0].get(), "loading used clusters flags");
180180
for (auto iLayer{0}; iLayer < nLayers; ++iLayer) {
181181
LOGP(debug, "gpu-transfer: loading {} used clusters flags on layer {}, for {} MB.", mUsedClusters[iLayer].size(), iLayer, mClusters[iLayer].size() * sizeof(unsigned char) / MB);
182182
checkGPUError(cudaMemcpyAsync(mUsedClustersDevice[iLayer], mUsedClusters[iLayer].data(), mUsedClusters[iLayer].size() * sizeof(unsigned char), cudaMemcpyHostToDevice, mGpuStreams[0].get()));
@@ -246,20 +246,25 @@ void TimeFrameGPU<nLayers>::loadVertices(const int iteration)
246246
}
247247

248248
template <int nLayers>
249-
void TimeFrameGPU<nLayers>::createTrackletsLUTDevice()
249+
void TimeFrameGPU<nLayers>::createTrackletsLUTDevice(const int iteration)
250250
{
251-
START_GPU_STREAM_TIMER(mGpuStreams[0].get(), "creating cells LUTs");
251+
START_GPU_STREAM_TIMER(mGpuStreams[0].get(), "creating tracklets LUTs");
252252
for (auto iLayer{0}; iLayer < nLayers - 1; ++iLayer) {
253-
LOGP(debug, "gpu-transfer: creating tracklets LUT for {} elements on layer {}, for {} MB.", mClusters[iLayer].size() + 1, iLayer, (mClusters[iLayer].size() + 1) * sizeof(int) / MB);
254-
allocMemAsync(reinterpret_cast<void**>(&mTrackletsLUTDevice[iLayer]), (mClusters[iLayer].size() + 1) * sizeof(int), nullptr, getExtAllocator());
253+
if (!iteration) {
254+
LOGP(debug, "gpu-transfer: creating tracklets LUT for {} elements on layer {}, for {} MB.", mClusters[iLayer].size() + 1, iLayer, (mClusters[iLayer].size() + 1) * sizeof(int) / MB);
255+
allocMemAsync(reinterpret_cast<void**>(&mTrackletsLUTDevice[iLayer]), (mClusters[iLayer].size() + 1) * sizeof(int), nullptr, getExtAllocator());
256+
}
255257
checkGPUError(cudaMemsetAsync(mTrackletsLUTDevice[iLayer], 0, (mClusters[iLayer].size() + 1) * sizeof(int), mGpuStreams[0].get()));
256258
}
257-
allocMemAsync(reinterpret_cast<void**>(&mTrackletsLUTDeviceArray), (nLayers - 2) * sizeof(int*), nullptr, getExtAllocator());
258-
checkGPUError(cudaMemcpyAsync(mTrackletsLUTDeviceArray, mTrackletsLUTDevice.data(), mTrackletsLUTDevice.size() * sizeof(int*), cudaMemcpyHostToDevice, mGpuStreams[0].get()));
259+
if (!iteration) {
260+
allocMemAsync(reinterpret_cast<void**>(&mTrackletsLUTDeviceArray), (nLayers - 1) * sizeof(int*), nullptr, getExtAllocator());
261+
checkGPUError(cudaMemcpyAsync(mTrackletsLUTDeviceArray, mTrackletsLUTDevice.data(), mTrackletsLUTDevice.size() * sizeof(int*), cudaMemcpyHostToDevice, mGpuStreams[0].get()));
262+
}
259263
STOP_GPU_STREAM_TIMER(mGpuStreams[0].get());
260264
}
261265

262-
template<int nLayers> void TimeFrameGPU<nLayers>::createTrackletsBuffers()
266+
template <int nLayers>
267+
void TimeFrameGPU<nLayers>::createTrackletsBuffers()
263268
{
264269
START_GPU_STREAM_TIMER(mGpuStreams[0].get(), "creating cells buffers");
265270
for (auto iLayer{0}; iLayer < nLayers - 1; ++iLayer) {

Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackerTraitsGPU.cxx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ void TrackerTraitsGPU<nLayers>::computeTrackletsHybrid(const int iteration, int
9292
{
9393
auto& conf = o2::its::ITSGpuTrackingParamConfig::Instance();
9494
TrackerTraits::computeLayerTracklets(iteration, iROFslice, iVertex);
95-
mTimeFrameGPU->createTrackletsLUTDevice();
95+
mTimeFrameGPU->createTrackletsLUTDevice(iteration);
9696

9797
const Vertex diamondVert({mTrkParams[iteration].Diamond[0], mTrkParams[iteration].Diamond[1], mTrkParams[iteration].Diamond[2]}, {25.e-6f, 0.f, 0.f, 25.e-6f, 0.f, 36.f}, 1, 1.f);
9898
gsl::span<const Vertex> diamondSpan(&diamondVert, 1);

Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -472,8 +472,8 @@ GPUg() void computeLayerTrackletsMultiROFKernel(
472472
const float NSigmaCut,
473473
const float phiCut,
474474
const float resolutionPV,
475-
const float maxR,
476475
const float minR,
476+
const float maxR,
477477
const float positionResolution,
478478
const float meanDeltaR = -666.f,
479479
const float MSAngle = -666.f)
@@ -496,7 +496,7 @@ GPUg() void computeLayerTrackletsMultiROFKernel(
496496
unsigned int storedTracklets{0};
497497
auto currentCluster{clustersCurrentLayer[currentClusterIndex]};
498498
const int currentSortedIndex{ROFClusters[layerIndex][rof0] + currentClusterIndex};
499-
if (usedClusters[layerIndex][currentSortedIndex]) {
499+
if (usedClusters[layerIndex][currentCluster.clusterId]) {
500500
continue;
501501
}
502502

@@ -774,8 +774,6 @@ void countTrackletsInROFsHandler(const IndexTableUtils* utils,
774774
resolutions[iLayer],
775775
radii[iLayer + 1] - radii[iLayer],
776776
mulScatAng[iLayer]);
777-
// gpuCheckError(cudaPeekAtLastError());
778-
// gpuCheckError(cudaDeviceSynchronize());
779777
void* d_temp_storage = nullptr;
780778
size_t temp_storage_bytes = 0;
781779
gpuCheckError(cub::DeviceScan::ExclusiveSum(d_temp_storage, // d_temp_storage

Detectors/ITSMFT/ITS/tracking/src/TrackerTraits.cxx

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -193,6 +193,18 @@ void TrackerTraits::computeLayerTracklets(const int iteration, int iROFslice, in
193193
}
194194
}
195195
}
196+
// if (rof0 == 81) {
197+
// printf("CPU layer: %d -> %f %f %f %f %f %f %f %f\n",
198+
// iLayer,
199+
// mTrkParams[iteration].NSigmaCut,
200+
// tf->getPhiCut(iLayer),
201+
// mTrkParams[iteration].PVres,
202+
// tf->getMinR(iLayer + 1),
203+
// tf->getMaxR(iLayer + 1),
204+
// tf->getPositionResolution(iLayer),
205+
// meanDeltaR,
206+
// tf->getMSangle(iLayer));
207+
// }
196208
}
197209
}
198210
if (!tf->checkMemory(mTrkParams[iteration].MaxMemory)) {

0 commit comments

Comments
 (0)