Skip to content

Commit a6bc104

Browse files
committed
Fix tracklet LUTs issue
1 parent 33f3b8a commit a6bc104

File tree

3 files changed

+39
-36
lines changed

3 files changed

+39
-36
lines changed

Detectors/ITSMFT/ITS/tracking/GPU/cuda/TimeFrameGPU.cu

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -292,14 +292,12 @@ void TimeFrameGPU<nLayers>::loadTrackletsLUTDevice()
292292
{
293293
START_GPU_STREAM_TIMER(mGpuStreams[0].get(), "loading tracklets");
294294
for (auto iLayer{0}; iLayer < nLayers - 2; ++iLayer) {
295-
LOGP(debug, "gpu-transfer: loading tracklets LUT for {} elements on layer {}, for {} MB", mTrackletsLookupTable[iLayer].size(), iLayer, mTrackletsLookupTable[iLayer].size() * sizeof(int) / MB);
296-
// allocMemAsync(reinterpret_cast<void**>(&mTrackletsLUTDevice[iLayer]), mTrackletsLookupTable[iLayer].size() * sizeof(int), nullptr, getExtAllocator());
295+
LOGP(debug, "gpu-transfer: loading tracklets LUT for {} elements on layer {}, for {} MB", mTrackletsLookupTable[iLayer].size(), iLayer + 1, mTrackletsLookupTable[iLayer].size() * sizeof(int) / MB);
297296
checkGPUError(cudaHostRegister(mTrackletsLookupTable[iLayer].data(), mTrackletsLookupTable[iLayer].size() * sizeof(int), cudaHostRegisterPortable));
298-
checkGPUError(cudaMemcpyAsync(mTrackletsLUTDevice[iLayer], mTrackletsLookupTable[iLayer].data(), mTrackletsLookupTable[iLayer].size() * sizeof(int), cudaMemcpyHostToDevice));
297+
checkGPUError(cudaMemcpyAsync(mTrackletsLUTDevice[iLayer + 1], mTrackletsLookupTable[iLayer].data(), mTrackletsLookupTable[iLayer].size() * sizeof(int), cudaMemcpyHostToDevice));
299298
}
300-
// allocMemAsync(reinterpret_cast<void**>(&mTrackletsLUTDeviceArray), (nLayers - 2) * sizeof(int*), nullptr, getExtAllocator());
301-
checkGPUError(cudaHostRegister(mTrackletsLUTDevice.data(), (nLayers - 2) * sizeof(int*), cudaHostRegisterPortable));
302-
checkGPUError(cudaMemcpyAsync(mTrackletsLUTDeviceArray, mTrackletsLUTDevice.data(), (nLayers - 2) * sizeof(int*), cudaMemcpyHostToDevice));
299+
checkGPUError(cudaHostRegister(mTrackletsLUTDevice.data(), (nLayers - 1) * sizeof(int*), cudaHostRegisterPortable));
300+
checkGPUError(cudaMemcpyAsync(mTrackletsLUTDeviceArray, mTrackletsLUTDevice.data(), (nLayers - 1) * sizeof(int*), cudaMemcpyHostToDevice));
303301
STOP_GPU_STREAM_TIMER(mGpuStreams[0].get());
304302
}
305303

Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu

Lines changed: 30 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -386,8 +386,8 @@ GPUg() void computeLayerCellsKernel(
386386
for (int iCurrentTrackletIndex = blockIdx.x * blockDim.x + threadIdx.x; iCurrentTrackletIndex < nTrackletsCurrent; iCurrentTrackletIndex += blockDim.x * gridDim.x) {
387387
const Tracklet& currentTracklet = tracklets[layer][iCurrentTrackletIndex];
388388
const int nextLayerClusterIndex{currentTracklet.secondClusterIndex};
389-
const int nextLayerFirstTrackletIndex{trackletsLUT[layer][nextLayerClusterIndex]};
390-
const int nextLayerLastTrackletIndex{trackletsLUT[layer][nextLayerClusterIndex + 1]};
389+
const int nextLayerFirstTrackletIndex{trackletsLUT[layer + 1][nextLayerClusterIndex]};
390+
const int nextLayerLastTrackletIndex{trackletsLUT[layer + 1][nextLayerClusterIndex + 1]};
391391
if (nextLayerFirstTrackletIndex == nextLayerLastTrackletIndex) {
392392
continue;
393393
}
@@ -612,7 +612,7 @@ GPUg() void printBufferLayerOnThread(const int layer, const int* v, unsigned int
612612
}
613613
}
614614

615-
GPUg() void printMatrixRow(const int row, const int** mat, const unsigned int rowLength, const int len = 256 * 128 + 1, const unsigned int tId = 0)
615+
GPUg() void printMatrixRow(const int row, int** mat, const unsigned int rowLength, const int len = 256 * 128 + 1, const unsigned int tId = 0)
616616
{
617617
if (blockIdx.x * blockDim.x + threadIdx.x == tId) {
618618
for (int i{0}; i < rowLength; ++i) {
@@ -723,34 +723,35 @@ void computeTrackletsInROFsHandler(const IndexTableUtils* utils,
723723
const int nThreads)
724724
{
725725
for (int iLayer = 0; iLayer < nLayers - 1; ++iLayer) {
726-
// gpu::computeLayerTrackletsMultiROFKernel<<<1, 1>>>(
727-
// utils,
728-
// multMask,
729-
// iLayer,
730-
// startROF,
731-
// endROF,
732-
// maxROF,
733-
// deltaROF,
734-
// vertices,
735-
// rofPV,
736-
// nVertices,
737-
// vertexId,
738-
// clusters,
739-
// ROFClusters,
740-
// usedClusters,
741-
// clustersIndexTables,
742-
// trackletsLUTs,
743-
// iteration,
744-
// NSigmaCut,
745-
// phiCuts[iLayer],
746-
// resolutionPV,
747-
// minRs[iLayer + 1],
748-
// maxRs[iLayer + 1],
749-
// resolutions[iLayer],
750-
// radii[iLayer + 1] - radii[iLayer],
751-
// mulScatAng[iLayer]);
726+
gpu::computeLayerTrackletsMultiROFKernel<<<1, 1>>>(
727+
utils,
728+
multMask,
729+
iLayer,
730+
startROF,
731+
endROF,
732+
maxROF,
733+
deltaROF,
734+
vertices,
735+
rofPV,
736+
nVertices,
737+
vertexId,
738+
clusters,
739+
ROFClusters,
740+
usedClusters,
741+
clustersIndexTables,
742+
trackletsLUTs,
743+
iteration,
744+
NSigmaCut,
745+
phiCuts[iLayer],
746+
resolutionPV,
747+
minRs[iLayer + 1],
748+
maxRs[iLayer + 1],
749+
resolutions[iLayer],
750+
radii[iLayer + 1] - radii[iLayer],
751+
mulScatAng[iLayer]);
752752
gpuCheckError(cudaPeekAtLastError());
753753
gpuCheckError(cudaDeviceSynchronize());
754+
gpu::printMatrixRow<<<1, 1>>>(iLayer, trackletsLUTs, 3000);
754755
}
755756
}
756757

Detectors/ITSMFT/ITS/tracking/src/TrackerTraits.cxx

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -201,7 +201,11 @@ void TrackerTraits::computeLayerTracklets(const int iteration, int iROFslice, in
201201
if (!tf->checkMemory(mTrkParams[iteration].MaxMemory)) {
202202
return;
203203
}
204-
204+
for (auto& l : tf->getTrackletsLookupTable()) {
205+
for (auto& t : l) {
206+
std::cout << t << "\t";
207+
}
208+
}
205209
#pragma omp parallel for num_threads(mNThreads)
206210
for (int iLayer = 0; iLayer < mTrkParams[iteration].CellsPerRoad(); ++iLayer) {
207211
/// Sort tracklets

0 commit comments

Comments
 (0)