Skip to content

Commit 6a7b17c

Browse files
committed
Adding necessary if-statement to avoid automatic model loading
1 parent 78c342d commit 6a7b17c

File tree

2 files changed

+28
-26
lines changed

2 files changed

+28
-26
lines changed

GPU/GPUTracking/Definitions/GPUSettingsList.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -229,7 +229,7 @@ AddOption(nnInferenceDeviceId, unsigned int, 0, "", 0, "(unsigned int) Specify i
229229
AddOption(nnInferenceAllocateDevMem, int, 0, "", 0, "(bool, default = 0), if the device memory should be allocated for inference")
230230
AddOption(nnInferenceDtype, std::string, "fp32", "", 0, "(std::string) Specify the datatype for which inference is performed (fp32: default, fp16)") // fp32 or fp16
231231
AddOption(nnInferenceThreadsPerNN, int, 0, "", 0, "Number of threads used to evaluate one neural network")
232-
AddOption(nnInferenceEnableOrtOptimization, unsigned int, 1, "", 0, "Enables graph optimizations in ONNX Runtime. Can be greater than 1!")
232+
AddOption(nnInferenceEnableOrtOptimization, unsigned int, 99, "", 0, "Enables graph optimizations in ONNX Runtime. Can be [0, 1, 2, 99] -> see https://github.com/microsoft/onnxruntime/blob/3f71d637a83dc3540753a8bb06740f67e926dc13/include/onnxruntime/core/session/onnxruntime_c_api.h#L347")
233233
AddOption(nnInferenceOrtProfiling, int, 0, "", 0, "Enables profiling of model execution in ONNX Runtime")
234234
AddOption(nnInferenceOrtProfilingPath, std::string, ".", "", 0, "If nnInferenceOrtProfiling is set, the path to store the profiling data")
235235
AddOption(nnInferenceVerbosity, int, 1, "", 0, "0: No messages; 1: Warnings; 2: Warnings + major debugs; >3: All debugs")

GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx

Lines changed: 27 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -613,32 +613,34 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
613613
}
614614

615615
#ifdef GPUCA_HAS_ONNX
616-
uint32_t maxClusters = -1;
617-
for (uint32_t iSector = 0; iSector < NSECTORS; iSector++) {
618-
maxClusters = std::max(maxClusters, processors()->tpcClusterer[iSector].mNMaxClusters);
619-
}
620-
for (uint32_t iSector = 0; iSector < NSECTORS; iSector++) {
621-
GPUTPCNNClusterizer& clustererNN = processors()->tpcNNClusterer[iSector];
622-
const GPUSettingsProcessingNNclusterizer& nn_settings = GetProcessingSettings().nn;
623-
clustererNN.nnClusterizerUseCfRegression = nn_settings.nnClusterizerUseCfRegression;
624-
clustererNN.nnClusterizerSizeInputRow = nn_settings.nnClusterizerSizeInputRow;
625-
clustererNN.nnClusterizerSizeInputPad = nn_settings.nnClusterizerSizeInputPad;
626-
clustererNN.nnClusterizerSizeInputTime = nn_settings.nnClusterizerSizeInputTime;
627-
clustererNN.nnClusterizerAddIndexData = nn_settings.nnClusterizerAddIndexData;
628-
clustererNN.nnClusterizerElementSize = ((2 * nn_settings.nnClusterizerSizeInputRow + 1) * (2 * nn_settings.nnClusterizerSizeInputPad + 1) * (2 * nn_settings.nnClusterizerSizeInputTime + 1)) + (nn_settings.nnClusterizerAddIndexData ? 3 : 0);
629-
clustererNN.nnClusterizerBatchedMode = nn_settings.nnClusterizerBatchedMode;
630-
clustererNN.nnClusterizerBoundaryFillValue = nn_settings.nnClusterizerBoundaryFillValue;
631-
clustererNN.nnClusterizerTotalClusters = maxClusters;
632-
clustererNN.nnClassThreshold = nn_settings.nnClassThreshold;
633-
clustererNN.nnSigmoidTrafoClassThreshold = nn_settings.nnSigmoidTrafoClassThreshold;
634-
if (nn_settings.nnClusterizerVerbosity < 0) {
635-
clustererNN.nnClusterizerVerbosity = nn_settings.nnInferenceVerbosity;
636-
} else {
637-
clustererNN.nnClusterizerVerbosity = nn_settings.nnClusterizerVerbosity;
616+
if (GetProcessingSettings().nn.applyNNclusterizer) {
617+
uint32_t maxClusters = -1;
618+
for (uint32_t iSector = 0; iSector < NSECTORS; iSector++) {
619+
maxClusters = std::max(maxClusters, processors()->tpcClusterer[iSector].mNMaxClusters);
620+
}
621+
for (uint32_t iSector = 0; iSector < NSECTORS; iSector++) {
622+
GPUTPCNNClusterizer& clustererNN = processors()->tpcNNClusterer[iSector];
623+
const GPUSettingsProcessingNNclusterizer& nn_settings = GetProcessingSettings().nn;
624+
clustererNN.nnClusterizerUseCfRegression = nn_settings.nnClusterizerUseCfRegression;
625+
clustererNN.nnClusterizerSizeInputRow = nn_settings.nnClusterizerSizeInputRow;
626+
clustererNN.nnClusterizerSizeInputPad = nn_settings.nnClusterizerSizeInputPad;
627+
clustererNN.nnClusterizerSizeInputTime = nn_settings.nnClusterizerSizeInputTime;
628+
clustererNN.nnClusterizerAddIndexData = nn_settings.nnClusterizerAddIndexData;
629+
clustererNN.nnClusterizerElementSize = ((2 * nn_settings.nnClusterizerSizeInputRow + 1) * (2 * nn_settings.nnClusterizerSizeInputPad + 1) * (2 * nn_settings.nnClusterizerSizeInputTime + 1)) + (nn_settings.nnClusterizerAddIndexData ? 3 : 0);
630+
clustererNN.nnClusterizerBatchedMode = nn_settings.nnClusterizerBatchedMode;
631+
clustererNN.nnClusterizerBoundaryFillValue = nn_settings.nnClusterizerBoundaryFillValue;
632+
clustererNN.nnClusterizerTotalClusters = maxClusters;
633+
clustererNN.nnClassThreshold = nn_settings.nnClassThreshold;
634+
clustererNN.nnSigmoidTrafoClassThreshold = nn_settings.nnSigmoidTrafoClassThreshold;
635+
if (nn_settings.nnClusterizerVerbosity < 0) {
636+
clustererNN.nnClusterizerVerbosity = nn_settings.nnInferenceVerbosity;
637+
} else {
638+
clustererNN.nnClusterizerVerbosity = nn_settings.nnClusterizerVerbosity;
639+
}
640+
clustererNN.nnClusterizerDtype = nn_settings.nnInferenceDtype.find("32") != std::string::npos;
641+
GPUTPCNNClusterizerHost nnApplication(nn_settings, clustererNN);
642+
AllocateRegisteredMemory(clustererNN.mMemoryId);
638643
}
639-
clustererNN.nnClusterizerDtype = nn_settings.nnInferenceDtype.find("32") != std::string::npos;
640-
GPUTPCNNClusterizerHost nnApplication(nn_settings, clustererNN);
641-
AllocateRegisteredMemory(clustererNN.mMemoryId);
642644
}
643645
#endif
644646

0 commit comments

Comments
 (0)