Skip to content

Commit f4dcbaa

Browse files
committed
Adding force method to fill input like it is done on GPU
1 parent 5801e3a commit f4dcbaa

File tree

2 files changed

+2
-1
lines changed

2 files changed

+2
-1
lines changed

GPU/GPUTracking/Definitions/GPUSettingsList.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -276,6 +276,7 @@ AddOption(nnRegressionPath, std::string, "network_reg.onnx", "", 0, "The regress
276276
AddOption(nnSigmoidTrafoClassThreshold, int, 1, "", 0, "If true (default), then the classification threshold is transformed by an inverse sigmoid function. This depends on how the network was trained (with a sigmoid as acitvation function in the last layer or not).")
277277
AddOption(nnEvalMode, std::string, "c1:r1", "", 0, "Concatention of modes, e.g. c1:r1 (classification class 1, regression class 1)")
278278
AddOption(nnClusterizerUseClassification, int, 1, "", 0, "If 1, the classification output of the network is used to select clusters, else only the regression output is used and no clusters are rejected by classification")
279+
AddOption(nnClusterizerForceGpuInputFill, int, 0, "", 0, "Forces to use the fillInputNNGPU function")
279280
// CCDB
280281
AddOption(nnLoadFromCCDB, int, 0, "", 0, "If 1 networks are fetched from ccdb, else locally")
281282
AddOption(nnLocalFolder, std::string, ".", "", 0, "Local folder in which the networks will be fetched")

GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1008,7 +1008,7 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
10081008
size_t iSize = CAMath::Min((uint)clustererNNShadow.mNnClusterizerBatchedMode, (uint)(clusterer.mPmemory->counters.nClusters - batchStart));
10091009

10101010
// Filling the data
1011-
if (mRec->IsGPU()) {
1011+
if (mRec->IsGPU() || GetProcessingSettings().nn.nnClusterizerForceGpuInputFill) {
10121012
// Fills element by element of each input matrix -> better parallelizability, but worse on CPU due to unnecessary computations
10131013
runKernel<GPUTPCNNClusterizerKernels, GPUTPCNNClusterizerKernels::fillInputNNGPU>({GetGrid(iSize * clustererNNShadow.mNnClusterizerRowTimeSizeFull, lane), krnlRunRangeNone}, iSector, clustererNNShadow.mNnInferenceInputDType, propagateMCLabels, batchStart);
10141014
} else {

0 commit comments

Comments
 (0)