Skip to content

Commit 227d192

Browse files
committed
Adding publishing logic for deconvolution flags
1 parent 82f2153 commit 227d192

File tree

3 files changed

+32
-0
lines changed

3 files changed

+32
-0
lines changed

GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1003,6 +1003,10 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
10031003
// auto start0 = std::chrono::high_resolution_clock::now();
10041004
runKernel<GPUTPCNNClusterizerKernels, GPUTPCNNClusterizerKernels::fillInputNNSingleElement>({GetGrid(iSize * clustererNNShadow.mNnClusterizerElementSize, lane), krnlRunRangeNone}, iSector, clustererNNShadow.mNnInferenceInputDType, withMC, batchStart); // Filling the data
10051005

1006+
if (clustererNNShadow.mNnClusterizerSetDeconvolutionFlags) {
1007+
runKernel<GPUTPCNNClusterizerKernels, GPUTPCNNClusterizerKernels::publishDeconvolutionFlags>({GetGrid(iSize, lane), krnlRunRangeNone}, iSector, clustererNNShadow.mNnInferenceInputDType, withMC, batchStart); // Filling the regression data
1008+
}
1009+
10061010
// auto stop0 = std::chrono::high_resolution_clock::now();
10071011
// auto start1 = std::chrono::high_resolution_clock::now();
10081012

GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.cxx

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -459,6 +459,33 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread<GPUTPCNNClusterizerKernels::pub
459459
}
460460
}
461461

462+
// ---------------------------------
463+
template <>
464+
GPUdii() void GPUTPCNNClusterizerKernels::Thread<GPUTPCNNClusterizerKernels::publishDeconvolutionFlags>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t onlyMC, uint batchStart)
465+
{
466+
// Implements identical publishing logic as the heuristic clusterizer and deconvolution kernel
467+
uint32_t idx = get_global_id(0);
468+
auto& clusterer = processors.tpcClusterer[sector];
469+
auto& clustererNN = processors.tpcNNClusterer[sector];
470+
CfArray2D<PackedCharge> chargeMap(reinterpret_cast<PackedCharge*>(clusterer.mPchargeMap));
471+
CfChargePos peak = clusterer.mPfilteredPeakPositions[idx + batchStart];
472+
473+
for(int i = 0; i < 8; i++) {
474+
Delta2 d = cfconsts::InnerNeighbors[i];
475+
CfChargePos tmp_pos = peak.delta(d);
476+
PackedCharge charge = chargeMap[tmp_pos];
477+
clustererNN.mClusterFlags[2 * idx] += (d.y != 0 && charge.isSplit());
478+
clustererNN.mClusterFlags[2 * idx + 1] += (d.x != 0 && charge.isSplit());
479+
}
480+
for(int i = 0; i < 16; i++) {
481+
Delta2 d = cfconsts::OuterNeighbors[i];
482+
CfChargePos tmp_pos = peak.delta(d);
483+
PackedCharge charge = chargeMap[tmp_pos];
484+
clustererNN.mClusterFlags[2 * idx] += (d.y != 0 && charge.isSplit() && !charge.has3x3Peak());
485+
clustererNN.mClusterFlags[2 * idx + 1] += (d.x != 0 && charge.isSplit() && !charge.has3x3Peak());
486+
}
487+
}
488+
462489
// THe following arithmetic is done because the network is trained with a split between IROC and OROC boundary
463490
GPUd() int32_t GPUTPCNNClusterizerKernels::padOffset(int32_t row_ref, int32_t row_current)
464491
{

GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@ class GPUTPCNNClusterizerKernels : public GPUKernelTemplate
6565
determineClass2Labels = 4,
6666
publishClass1Regression = 5,
6767
publishClass2Regression = 6,
68+
publishDeconvolutionFlags = 7
6869
};
6970

7071
template <int32_t iKernel = defaultKernel, typename... Args>

0 commit comments

Comments
 (0)