@@ -534,7 +534,13 @@ GPUg() void computeLayerTrackletsMultiROFKernel(
534534 const int maxBinIndex{firstBinIndex + selectedBinsRect.z - selectedBinsRect.x + 1 };
535535 const int firstRowClusterIndex = indexTables[layerIndex + 1 ][(rof1 - startROF) * tableSize + firstBinIndex];
536536 const int maxRowClusterIndex = indexTables[layerIndex + 1 ][(rof1 - startROF) * tableSize + maxBinIndex];
537+ if (currentClusterIndex == 0 && layerIndex == 1 && rof0 == 81 && threadIdx .x == 0 ) {
538+ printf (" GPU: rof0: %d rof1: %d nclus0: %d nclus1: %d vertId: %d fbi: %d, mbi: %d, frci: %d, mrci: %d \n " , rof0, rof1, clustersCurrentLayer.size (), clustersNextLayer.size (), iV, firstBinIndex, maxBinIndex, firstRowClusterIndex, maxRowClusterIndex);
539+ }
537540 for (int iNextCluster{firstRowClusterIndex}; iNextCluster < maxRowClusterIndex; ++iNextCluster) {
541+ if (currentClusterIndex == 0 && layerIndex == 1 && rof0 == 81 && threadIdx .x == 0 ) {
542+ printf (" \t testing clId: %d ...\n " , iNextCluster);
543+ }
538544 if (iNextCluster >= clustersNextLayer.size ()) {
539545 break ;
540546 }
@@ -612,7 +618,7 @@ GPUg() void printBufferLayerOnThread(const int layer, const int* v, unsigned int
612618 }
613619}
614620
615- GPUg () void printMatrixRow (const int row, int ** mat, const unsigned int rowLength, const int len = 256 * 128 + 1 , const unsigned int tId = 0 )
621+ GPUg () void printMatrixRow (const int row, int ** mat, const unsigned int rowLength, const int len = 150 , const unsigned int tId = 0 )
616622{
617623 if (blockIdx .x * blockDim .x + threadIdx .x == tId) {
618624 for (int i{0 }; i < rowLength; ++i) {
@@ -660,6 +666,28 @@ GPUg() void printNeighbours(const gpuPair<int, int>* neighbours,
660666 }
661667}
662668
669+ GPUg () void printTrackletsLUTPerROF (const int layerId,
670+ const int ** ROFClusters,
671+ int ** luts,
672+ const int tId = 0 )
673+ {
674+ if (blockIdx .x * blockDim .x + threadIdx .x == tId) {
675+ for (auto rofId{0 }; rofId < 2304 ; ++rofId) {
676+ int nClus = ROFClusters[layerId][rofId + 1 ] - ROFClusters[layerId][rofId];
677+ if (!nClus) {
678+ continue ;
679+ }
680+ printf (" rof: %d (%d) ==> " , rofId, nClus);
681+
682+ for (int iC{0 }; iC < nClus; ++iC) {
683+ int nT = luts[layerId][ROFClusters[layerId][rofId] + iC];
684+ printf (" %d\t " , nT);
685+ }
686+ printf (" \n " );
687+ }
688+ }
689+ }
690+
663691template <int nLayers = 7 >
664692GPUg () void compileTrackletsLookupTableKernel (const Tracklet* tracklets,
665693 int * trackletsLookUpTable,
@@ -706,6 +734,7 @@ void computeTrackletsInROFsHandler(const IndexTableUtils* utils,
706734 const int * rofPV,
707735 const int nVertices,
708736 const Cluster** clusters,
737+ std::vector<unsigned int > nClusters,
709738 const int ** ROFClusters,
710739 const unsigned char ** usedClusters,
711740 const int ** clustersIndexTables,
@@ -751,8 +780,9 @@ void computeTrackletsInROFsHandler(const IndexTableUtils* utils,
751780 mulScatAng[iLayer]);
752781 gpuCheckError (cudaPeekAtLastError ());
753782 gpuCheckError (cudaDeviceSynchronize ());
754- gpu::printMatrixRow<<<1 , 1 >>> (iLayer, trackletsLUTs, 3000 );
783+ // gpu::printMatrixRow<<<1, 1>>>(iLayer, trackletsLUTs, nClusters[iLayer] );
755784 }
785+ // gpu::printTrackletsLUTPerROF<<<1, 1>>>(1, ROFClusters, trackletsLUTs);
756786}
757787
758788void countCellsHandler (
@@ -996,6 +1026,7 @@ template void computeTrackletsInROFsHandler<7>(const IndexTableUtils* utils,
9961026 const int * rofPV,
9971027 const int nVertices,
9981028 const Cluster** clusters,
1029+ std::vector<unsigned int > nClusters,
9991030 const int ** ROFClusters,
10001031 const unsigned char ** usedClusters,
10011032 const int ** clustersIndexTables,
0 commit comments