Skip to content

Commit 82f2153

Browse files
committed
First bug-fixes and optimizations for deconvolution flags
1 parent 8e06932 commit 82f2153

File tree

7 files changed

+32
-19
lines changed

7 files changed

+32
-19
lines changed

GPU/GPUTracking/Definitions/GPUSettingsList.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -269,6 +269,7 @@ AddOption(nnClusterizerBatchedMode, unsigned int, 1, "", 0, "(int, default = 1)
269269
AddOption(nnClusterizerVerbosity, int, -1, "", 0, "(int, default = -1) If >0, logging messages of the clusterizer will be displayed")
270270
AddOption(nnClusterizerBoundaryFillValue, int, -1, "", 0, "Fill value for the boundary of the input to the NN")
271271
AddOption(nnClusterizerApplyNoiseSuppression, int, 1, "", 0, "Applies the NoiseSuppression kernel before the digits to the network are filled")
272+
AddOption(nnClusterizerSetDeconvolutionFlags, int, 1, "", 0, "Runs the deconvolution kernel without overwriting the charge in order to make cluster-to-track attachment identical to heuristic CF")
272273
AddOption(nnClassificationPath, std::string, "network_class.onnx", "", 0, "The classification network path")
273274
AddOption(nnClassThreshold, float, 0.5, "", 0, "The cutoff at which clusters will be accepted / rejected.")
274275
AddOption(nnRegressionPath, std::string, "network_reg.onnx", "", 0, "The regression network path")

GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -989,8 +989,10 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
989989
int withMC = (doGPU && propagateMCLabels);
990990

991991
if (clustererNNShadow.mNnClusterizerUseCfRegression || (int)(nn_settings.nnClusterizerApplyCfDeconvolution)) {
992-
runKernel<GPUTPCCFDeconvolution>({GetGrid(clusterer.mPmemory->counters.nPositions, lane), {iSector}});
992+
runKernel<GPUTPCCFDeconvolution>({GetGrid(clusterer.mPmemory->counters.nPositions, lane), {iSector}}, true);
993993
DoDebugAndDump(RecoStep::TPCClusterFinding, GPUChainTrackingDebugFlags::TPCClustererChargeMap, clusterer, &GPUTPCClusterFinder::DumpChargeMap, *mDebugFile, "Split Charges");
994+
} else if (clustererNNShadow.mNnClusterizerSetDeconvolutionFlags) {
995+
runKernel<GPUTPCCFDeconvolution>({GetGrid(clusterer.mPmemory->counters.nPositions, lane), {iSector}}, false);
994996
}
995997

996998
// float time_clusterizer = 0, time_fill = 0, time_networks = 0;
@@ -1102,7 +1104,7 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
11021104
GPUFatal("Project not compiled with neural network clusterization. Aborting.");
11031105
#endif
11041106
} else {
1105-
runKernel<GPUTPCCFDeconvolution>({GetGrid(clusterer.mPmemory->counters.nPositions, lane), {iSector}});
1107+
runKernel<GPUTPCCFDeconvolution>({GetGrid(clusterer.mPmemory->counters.nPositions, lane), {iSector}}, true);
11061108
DoDebugAndDump(RecoStep::TPCClusterFinding, GPUChainTrackingDebugFlags::TPCClustererChargeMap, clusterer, &GPUTPCClusterFinder::DumpChargeMap, *mDebugFile, "Split Charges");
11071109
runKernel<GPUTPCCFClusterizer>({GetGrid(clusterer.mPmemory->counters.nClusters, lane), {iSector}}, 0);
11081110
}

GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDeconvolution.cxx

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -22,18 +22,19 @@ using namespace o2::gpu;
2222
using namespace o2::gpu::tpccf;
2323

2424
template <>
25-
GPUdii() void GPUTPCCFDeconvolution::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& clusterer)
25+
GPUdii() void GPUTPCCFDeconvolution::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& clusterer, bool overwriteCharge)
2626
{
2727
CfArray2D<PackedCharge> chargeMap(reinterpret_cast<PackedCharge*>(clusterer.mPchargeMap));
2828
CfArray2D<uint8_t> isPeakMap(clusterer.mPpeakMap);
29-
GPUTPCCFDeconvolution::deconvolutionImpl(get_num_groups(0), get_local_size(0), get_group_id(0), get_local_id(0), smem, isPeakMap, chargeMap, clusterer.mPpositions, clusterer.mPmemory->counters.nPositions);
29+
GPUTPCCFDeconvolution::deconvolutionImpl(get_num_groups(0), get_local_size(0), get_group_id(0), get_local_id(0), smem, isPeakMap, chargeMap, clusterer.mPpositions, clusterer.mPmemory->counters.nPositions, overwriteCharge);
3030
}
3131

3232
GPUdii() void GPUTPCCFDeconvolution::deconvolutionImpl(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem,
3333
const CfArray2D<uint8_t>& peakMap,
3434
CfArray2D<PackedCharge>& chargeMap,
3535
const CfChargePos* positions,
36-
const uint32_t digitnum)
36+
const uint32_t digitnum,
37+
bool overwriteCharge)
3738
{
3839
SizeT idx = get_global_id(0);
3940

@@ -111,9 +112,14 @@ GPUdii() void GPUTPCCFDeconvolution::deconvolutionImpl(int32_t nBlocks, int32_t
111112
peakCount = (peakCount == 0) ? 1 : peakCount;
112113

113114
PackedCharge charge = chargeMap[pos];
114-
PackedCharge p(charge.unpack() / peakCount, has3x3, split);
115115

116-
chargeMap[pos] = p;
116+
if(overwriteCharge) {
117+
PackedCharge p(charge.unpack() / peakCount, has3x3, split);
118+
chargeMap[pos] = p;
119+
} else {
120+
PackedCharge p(charge.unpack(), has3x3, split);
121+
chargeMap[pos] = p;
122+
}
117123
}
118124

119125
GPUdi() uint8_t GPUTPCCFDeconvolution::countPeaksInner(

GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDeconvolution.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ class GPUTPCCFDeconvolution : public GPUKernelTemplate
5151
GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& clusterer, Args... args);
5252

5353
private:
54-
static GPUd() void deconvolutionImpl(int32_t, int32_t, int32_t, int32_t, GPUSharedMemory&, const CfArray2D<uint8_t>&, CfArray2D<PackedCharge>&, const CfChargePos*, const uint32_t);
54+
static GPUd() void deconvolutionImpl(int32_t, int32_t, int32_t, int32_t, GPUSharedMemory&, const CfArray2D<uint8_t>&, CfArray2D<PackedCharge>&, const CfChargePos*, const uint32_t, bool);
5555

5656
static GPUdi() uint8_t countPeaksInner(uint16_t, const uint8_t*, uint8_t*);
5757
static GPUdi() uint8_t countPeaksOuter(uint16_t, uint8_t, const uint8_t*);

GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizer.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ class GPUTPCNNClusterizer : public GPUProcessor
4444
bool mNnClusterizerAddIndexData = true;
4545
float mNnClassThreshold = 0.01;
4646
bool mNnSigmoidTrafoClassThreshold = 1;
47+
bool mNnClusterizerSetDeconvolutionFlags = true;
4748
int mNnClusterizerUseCfRegression = 0;
4849
int mNnClusterizerBatchedMode = 1;
4950
int mNnClusterizerTotalClusters = 1;

GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.cxx

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,7 @@ void GPUTPCNNClusterizerHost::initClusterizer(const GPUSettingsProcessingNNclust
9292
clustererNN.mNnClusterizerBatchedMode = settings.nnClusterizerBatchedMode;
9393
clustererNN.mNnClusterizerBoundaryFillValue = settings.nnClusterizerBoundaryFillValue;
9494
clustererNN.mNnSigmoidTrafoClassThreshold = settings.nnSigmoidTrafoClassThreshold;
95+
clustererNN.mNnClusterizerSetDeconvolutionFlags = (bool)settings.nnClusterizerSetDeconvolutionFlags;
9596
if (clustererNN.mNnSigmoidTrafoClassThreshold) {
9697
clustererNN.mNnClassThreshold = (float)std::log(settings.nnClassThreshold / (1.f - settings.nnClassThreshold));
9798
} else {

GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.cxx

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -126,11 +126,13 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread<GPUTPCNNClusterizerKernels::fil
126126

127127
if (clustererNN.mNnClusterizerAddIndexData && (int32_t)transient_index == (clustererNN.mNnClusterizerElementSize - 1)) {
128128
uint32_t top_idx = (base_idx + 1) * clustererNN.mNnClusterizerElementSize;
129-
for (uint16_t i = 0; i < 8; i++) {
130-
Delta2 d = cfconsts::InnerNeighbors[i];
131-
CfChargePos tmp_pos = peak.delta(d);
132-
clustererNN.mClusterFlags[2 * glo_idx] += CfUtils::isPeak(isPeakMap[tmp_pos]);
133-
clustererNN.mClusterFlags[2 * glo_idx + 1] = clustererNN.mClusterFlags[2 * glo_idx];
129+
if (!clustererNN.mNnClusterizerSetDeconvolutionFlags){ // Only if deconvolution flags are not set
130+
for (uint16_t i = 0; i < 8; i++) { // This solution needs testing. It is not the same as the deconvolution flags
131+
Delta2 d = cfconsts::InnerNeighbors[i];
132+
CfChargePos tmp_pos = peak.delta(d);
133+
clustererNN.mClusterFlags[2 * base_idx] += CfUtils::isPeak(isPeakMap[tmp_pos]);
134+
}
135+
clustererNN.mClusterFlags[2 * base_idx + 1] = clustererNN.mClusterFlags[2 * base_idx];
134136
}
135137
if (dtype == 0) {
136138
clustererNN.mInputData_16[top_idx - 3] = (OrtDataType::Float16_t)(sector / 36.f);
@@ -147,9 +149,9 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread<GPUTPCNNClusterizerKernels::fil
147149
bool is_row_boundary = ((row + r) > (o2::tpc::constants::MAXGLOBALPADROW - 1)) || ((row + r) < 0);
148150
if (is_row_boundary) {
149151
if (dtype == 0) {
150-
clustererNN.mInputData_16[base_idx * clustererNN.mNnClusterizerElementSize + transient_index] = (OrtDataType::Float16_t)(static_cast<float>(clustererNN.mNnClusterizerBoundaryFillValue));
152+
clustererNN.mInputData_16[glo_idx] = (OrtDataType::Float16_t)(static_cast<float>(clustererNN.mNnClusterizerBoundaryFillValue));
151153
} else {
152-
clustererNN.mInputData_32[base_idx * clustererNN.mNnClusterizerElementSize + transient_index] = static_cast<float>(clustererNN.mNnClusterizerBoundaryFillValue);
154+
clustererNN.mInputData_32[glo_idx] = static_cast<float>(clustererNN.mNnClusterizerBoundaryFillValue);
153155
}
154156
} else {
155157
int32_t row_offset = GPUTPCNNClusterizerKernels::rowOffset(row, clustererNN.mNnClusterizerSizeInputRow);
@@ -164,15 +166,15 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread<GPUTPCNNClusterizerKernels::fil
164166
float central_charge = static_cast<float>(chargeMap[peak].unpack());
165167
CfChargePos tmp_pos(row + r, pad + p, time + t);
166168
if (dtype == 0) {
167-
clustererNN.mInputData_16[base_idx * clustererNN.mNnClusterizerElementSize + transient_index] = (OrtDataType::Float16_t)(static_cast<float>(chargeMap[tmp_pos].unpack()) / central_charge);
169+
clustererNN.mInputData_16[glo_idx] = (OrtDataType::Float16_t)(static_cast<float>(chargeMap[tmp_pos].unpack()) / central_charge);
168170
} else if (dtype == 1) {
169-
clustererNN.mInputData_32[base_idx * clustererNN.mNnClusterizerElementSize + transient_index] = static_cast<float>(chargeMap[tmp_pos].unpack()) / central_charge;
171+
clustererNN.mInputData_32[glo_idx] = static_cast<float>(chargeMap[tmp_pos].unpack()) / central_charge;
170172
}
171173
} else {
172174
if (dtype == 0) {
173-
clustererNN.mInputData_16[base_idx * clustererNN.mNnClusterizerElementSize + transient_index] = (OrtDataType::Float16_t)(static_cast<float>(clustererNN.mNnClusterizerBoundaryFillValue));
175+
clustererNN.mInputData_16[glo_idx] = (OrtDataType::Float16_t)(static_cast<float>(clustererNN.mNnClusterizerBoundaryFillValue));
174176
} else {
175-
clustererNN.mInputData_32[base_idx * clustererNN.mNnClusterizerElementSize + transient_index] = static_cast<float>(clustererNN.mNnClusterizerBoundaryFillValue);
177+
clustererNN.mInputData_32[glo_idx] = static_cast<float>(clustererNN.mNnClusterizerBoundaryFillValue);
176178
}
177179
}
178180
}

0 commit comments

Comments
 (0)