Skip to content

Commit 3bc9c9e

Browse files
committed
Debugging small discrepancies
1 parent a03b5be commit 3bc9c9e

File tree

5 files changed

+77
-9
lines changed

5 files changed

+77
-9
lines changed

Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TimeFrameGPU.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,7 @@ class TimeFrameGPU : public TimeFrame
118118
const Cluster** getDeviceArrayClusters() const { return mClustersDeviceArray; }
119119
const Cluster** getDeviceArrayUnsortedClusters() const { return mUnsortedClustersDeviceArray; }
120120
const int** getDeviceArrayClustersIndexTables() const { return mClustersIndexTablesDeviceArray; }
121+
std::vector<unsigned int> getClusterSizes();
121122
const unsigned char** getDeviceArrayUsedClusters() const { return mUsedClustersDeviceArray; }
122123
const int** getDeviceROframeClusters() const { return mROFrameClustersDeviceArray; }
123124
const Tracklet** getDeviceArrayTracklets() const { return mTrackletsDeviceArray; }
@@ -230,6 +231,16 @@ inline int TimeFrameGPU<nLayers>::getNClustersInRofSpan(const int rofIdstart, co
230231
{
231232
return static_cast<int>(mROFramesClusters[layerId][(rofIdstart + rofSpanSize) < mROFramesClusters.size() ? rofIdstart + rofSpanSize : mROFramesClusters.size() - 1] - mROFramesClusters[layerId][rofIdstart]);
232233
}
234+
235+
template <int nLayers>
236+
inline std::vector<unsigned int> TimeFrameGPU<nLayers>::getClusterSizes()
237+
{
238+
std::vector<unsigned int> sizes(mUnsortedClusters.size());
239+
std::transform(mUnsortedClusters.begin(), mUnsortedClusters.end(), sizes.begin(),
240+
[](const auto& v) { return static_cast<unsigned int>(v.size()); });
241+
return sizes;
242+
}
243+
233244
} // namespace gpu
234245
} // namespace its
235246
} // namespace o2

Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TrackingKernels.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ void computeTrackletsInROFsHandler(const IndexTableUtils* utils,
6262
const int* rofPV,
6363
const int nVertices,
6464
const Cluster** clusters,
65+
std::vector<unsigned int> nClusters,
6566
const int** ROFClusters,
6667
const unsigned char** usedClusters,
6768
const int** clustersIndexTables,

Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackerTraitsGPU.cxx

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,7 @@ void TrackerTraitsGPU<nLayers>::computeTrackletsHybrid(const int iteration, int
110110
mTimeFrameGPU->getDeviceROFramesPV(),
111111
mTimeFrameGPU->getPrimaryVerticesNum(),
112112
mTimeFrameGPU->getDeviceArrayClusters(),
113+
mTimeFrameGPU->getClusterSizes(),
113114
mTimeFrameGPU->getDeviceROframeClusters(),
114115
mTimeFrameGPU->getDeviceArrayUsedClusters(),
115116
mTimeFrameGPU->getDeviceArrayClustersIndexTables(),

Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu

Lines changed: 33 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -534,7 +534,13 @@ GPUg() void computeLayerTrackletsMultiROFKernel(
534534
const int maxBinIndex{firstBinIndex + selectedBinsRect.z - selectedBinsRect.x + 1};
535535
const int firstRowClusterIndex = indexTables[layerIndex + 1][(rof1 - startROF) * tableSize + firstBinIndex];
536536
const int maxRowClusterIndex = indexTables[layerIndex + 1][(rof1 - startROF) * tableSize + maxBinIndex];
537+
if (currentClusterIndex == 0 && layerIndex == 1 && rof0 == 81 && threadIdx.x == 0) {
538+
printf("GPU: rof0: %d rof1: %d nclus0: %d nclus1: %d vertId: %d fbi: %d, mbi: %d, frci: %d, mrci: %d \n", rof0, rof1, clustersCurrentLayer.size(), clustersNextLayer.size(), iV, firstBinIndex, maxBinIndex, firstRowClusterIndex, maxRowClusterIndex);
539+
}
537540
for (int iNextCluster{firstRowClusterIndex}; iNextCluster < maxRowClusterIndex; ++iNextCluster) {
541+
if (currentClusterIndex == 0 && layerIndex == 1 && rof0 == 81 && threadIdx.x == 0) {
542+
printf("\ttesting clId: %d ...\n", iNextCluster);
543+
}
538544
if (iNextCluster >= clustersNextLayer.size()) {
539545
break;
540546
}
@@ -612,7 +618,7 @@ GPUg() void printBufferLayerOnThread(const int layer, const int* v, unsigned int
612618
}
613619
}
614620

615-
GPUg() void printMatrixRow(const int row, int** mat, const unsigned int rowLength, const int len = 256 * 128 + 1, const unsigned int tId = 0)
621+
GPUg() void printMatrixRow(const int row, int** mat, const unsigned int rowLength, const int len = 150, const unsigned int tId = 0)
616622
{
617623
if (blockIdx.x * blockDim.x + threadIdx.x == tId) {
618624
for (int i{0}; i < rowLength; ++i) {
@@ -660,6 +666,28 @@ GPUg() void printNeighbours(const gpuPair<int, int>* neighbours,
660666
}
661667
}
662668

669+
GPUg() void printTrackletsLUTPerROF(const int layerId,
670+
const int** ROFClusters,
671+
int** luts,
672+
const int tId = 0)
673+
{
674+
if (blockIdx.x * blockDim.x + threadIdx.x == tId) {
675+
for (auto rofId{0}; rofId < 2304; ++rofId) {
676+
int nClus = ROFClusters[layerId][rofId + 1] - ROFClusters[layerId][rofId];
677+
if (!nClus) {
678+
continue;
679+
}
680+
printf("rof: %d (%d) ==> ", rofId, nClus);
681+
682+
for (int iC{0}; iC < nClus; ++iC) {
683+
int nT = luts[layerId][ROFClusters[layerId][rofId] + iC];
684+
printf("%d\t", nT);
685+
}
686+
printf("\n");
687+
}
688+
}
689+
}
690+
663691
template <int nLayers = 7>
664692
GPUg() void compileTrackletsLookupTableKernel(const Tracklet* tracklets,
665693
int* trackletsLookUpTable,
@@ -706,6 +734,7 @@ void computeTrackletsInROFsHandler(const IndexTableUtils* utils,
706734
const int* rofPV,
707735
const int nVertices,
708736
const Cluster** clusters,
737+
std::vector<unsigned int> nClusters,
709738
const int** ROFClusters,
710739
const unsigned char** usedClusters,
711740
const int** clustersIndexTables,
@@ -751,8 +780,9 @@ void computeTrackletsInROFsHandler(const IndexTableUtils* utils,
751780
mulScatAng[iLayer]);
752781
gpuCheckError(cudaPeekAtLastError());
753782
gpuCheckError(cudaDeviceSynchronize());
754-
gpu::printMatrixRow<<<1, 1>>>(iLayer, trackletsLUTs, 3000);
783+
// gpu::printMatrixRow<<<1, 1>>>(iLayer, trackletsLUTs, nClusters[iLayer]);
755784
}
785+
// gpu::printTrackletsLUTPerROF<<<1, 1>>>(1, ROFClusters, trackletsLUTs);
756786
}
757787

758788
void countCellsHandler(
@@ -996,6 +1026,7 @@ template void computeTrackletsInROFsHandler<7>(const IndexTableUtils* utils,
9961026
const int* rofPV,
9971027
const int nVertices,
9981028
const Cluster** clusters,
1029+
std::vector<unsigned int> nClusters,
9991030
const int** ROFClusters,
10001031
const unsigned char** usedClusters,
10011032
const int** clustersIndexTables,

Detectors/ITSMFT/ITS/tracking/src/TrackerTraits.cxx

Lines changed: 31 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -145,9 +145,13 @@ void TrackerTraits::computeLayerTracklets(const int iteration, int iROFslice, in
145145
}
146146
const int firstRowClusterIndex = tf->getIndexTable(rof1, iLayer + 1)[firstBinIndex];
147147
const int maxRowClusterIndex = tf->getIndexTable(rof1, iLayer + 1)[maxBinIndex];
148-
148+
if (iCluster == 0 && iLayer == 1 && rof0 == 81) {
149+
printf("CPU: rof0: %d rof1: %d nclus0: %d nclus1: %d vertId: %d fbi: %d, mbi: %d, frci: %d, mrci: %d \n", rof0, rof1, layer0.size(), layer1.size(), iV, firstBinIndex, maxBinIndex, firstRowClusterIndex, maxRowClusterIndex);
150+
}
149151
for (int iNextCluster{firstRowClusterIndex}; iNextCluster < maxRowClusterIndex; ++iNextCluster) {
150-
152+
if (iCluster == 0 && iLayer == 1 && rof0 == 81) {
153+
printf("\ttesting clId: %d ...\n", iNextCluster);
154+
}
151155
if (iNextCluster >= (int)layer1.size()) {
152156
break;
153157
}
@@ -201,11 +205,31 @@ void TrackerTraits::computeLayerTracklets(const int iteration, int iROFslice, in
201205
if (!tf->checkMemory(mTrkParams[iteration].MaxMemory)) {
202206
return;
203207
}
204-
for (auto& l : tf->getTrackletsLookupTable()) {
205-
for (auto& t : l) {
206-
std::cout << t << "\t";
207-
}
208-
}
208+
209+
// for (auto iLayer{0}; iLayer < tf->getTrackletsLookupTable().size(); ++iLayer) {
210+
// auto lut = tf->getTrackletsLookupTable()[iLayer];
211+
// for (unsigned int iC{0}; iC < lut.size(); ++iC) {
212+
// if (!(iC % 150)) {
213+
// printf("\n row %d: ===> %d/%d\t", iLayer, iC, (int)lut.size());
214+
// }
215+
// printf("%d\t", lut[iC]);
216+
// }
217+
// }
218+
219+
// for (auto rofId{0}; rofId < 2304; ++rofId) {
220+
// int nClus = tf->getClustersOnLayer(rofId, 1).size();
221+
// if (!nClus) {
222+
// continue;
223+
// }
224+
// printf("rof: %d (%d) ==> ", rofId, nClus);
225+
226+
// for (int iC{0}; iC < nClus; ++iC) {
227+
// int nT = tf->getTrackletsLookupTable()[0][tf->getSortedIndex(rofId, 1, iC)];
228+
// printf("%d\t", nT);
229+
// }
230+
// printf("\n");
231+
// }
232+
209233
#pragma omp parallel for num_threads(mNThreads)
210234
for (int iLayer = 0; iLayer < mTrkParams[iteration].CellsPerRoad(); ++iLayer) {
211235
/// Sort tracklets

0 commit comments

Comments
 (0)