Skip to content

Commit 381955a

Browse files
committed
Bug-fixes
1 parent 7da3793 commit 381955a

File tree

5 files changed

+49
-57
lines changed

5 files changed

+49
-57
lines changed

Common/ML/src/OrtInterface.cxx

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -246,9 +246,9 @@ void OrtModel::inference(I* input, size_t input_size, O* output)
246246
std::vector<int64_t> outputShape{input_size, mOutputShapes[0][1]};
247247
Ort::Value outputTensor = Ort::Value(nullptr);
248248
if constexpr (std::is_same_v<O, OrtDataType::Float16_t>) {
249-
Ort::Value outputTensor = Ort::Value::CreateTensor<Ort::Float16_t>(pImplOrt->memoryInfo, reinterpret_cast<Ort::Float16_t*>(output), input_size * mOutputShapes[0][1], outputShape.data(), outputShape.size());
249+
outputTensor = Ort::Value::CreateTensor<Ort::Float16_t>(pImplOrt->memoryInfo, reinterpret_cast<Ort::Float16_t*>(output), input_size * mOutputShapes[0][1], outputShape.data(), outputShape.size());
250250
} else {
251-
Ort::Value outputTensor = Ort::Value::CreateTensor<O>(pImplOrt->memoryInfo, output, input_size * mOutputShapes[0][1], outputShape.data(), outputShape.size());
251+
outputTensor = Ort::Value::CreateTensor<O>(pImplOrt->memoryInfo, output, input_size * mOutputShapes[0][1], outputShape.data(), outputShape.size());
252252
}
253253

254254
(pImplOrt->session)->Run(pImplOrt->runOptions, inputNamesChar.data(), &inputTensor, 1, outputNamesChar.data(), &outputTensor, outputNamesChar.size());

GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx

Lines changed: 17 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -611,6 +611,14 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
611611
RunTPCClusterizer_prepare(true); // Restore some pointers, allocated by the other pipeline, and set to 0 by SetupGPUProcessor (since not allocated in this pipeline)
612612
}
613613

614+
if (doGPU && mIOPtrs.tpcZS) {
615+
processorsShadow()->ioPtrs.tpcZS = mInputsShadow->mPzsMeta;
616+
WriteToConstantMemory(RecoStep::TPCClusterFinding, (char*)&processors()->ioPtrs - (char*)processors(), &processorsShadow()->ioPtrs, sizeof(processorsShadow()->ioPtrs), mRec->NStreams() - 1);
617+
}
618+
if (doGPU) {
619+
WriteToConstantMemory(RecoStep::TPCClusterFinding, (char*)processors()->tpcClusterer - (char*)processors(), processorsShadow()->tpcClusterer, sizeof(GPUTPCClusterFinder) * NSECTORS, mRec->NStreams() - 1, &mEvents->init);
620+
}
621+
614622
#ifdef GPUCA_HAS_ONNX
615623
const GPUSettingsProcessingNNclusterizer& nn_settings = GetProcessingSettings().nn;
616624
GPUTPCNNClusterizerHost nnApplications[GetProcessingSettings().nTPCClustererLanes];
@@ -624,9 +632,6 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
624632
}
625633
mRec->runParallelOuterLoop(doGPU, numLanes, [&](uint32_t lane) {
626634
nnApplications[lane].init(nn_settings);
627-
GPUTPCNNClusterizer& clustererNN = processors()->tpcNNClusterer[lane];
628-
GPUTPCNNClusterizer& clustererNNShadow = doGPU ? processorsShadow()->tpcNNClusterer[lane] : clustererNN;
629-
630635
if (nnApplications[lane].modelsUsed[0]) {
631636
SetONNXGPUStream((nnApplications[lane].model_class).getSessionOptions(), lane, &deviceId);
632637
(nnApplications[lane].model_class).setDeviceId(deviceId);
@@ -642,43 +647,32 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
642647
(nnApplications[lane].model_reg_2).setDeviceId(deviceId);
643648
(nnApplications[lane].model_reg_2).initEnvironment();
644649
}
645-
if (clustererNNShadow.nnClusterizerVerbosity < 3) {
650+
if (nn_settings.nnClusterizerVerbosity < 3) {
646651
LOG(info) << "Allocated ONNX stream for lane " << lane << " and device " << deviceId;
647652
}
648653
});
649654
mRec->runParallelOuterLoop(doGPU, NSECTORS, [&](uint32_t sector) {
650655
GPUTPCNNClusterizer& clustererNN = processors()->tpcNNClusterer[sector];
651656
GPUTPCNNClusterizer& clustererNNShadow = doGPU ? processorsShadow()->tpcNNClusterer[sector] : clustererNN;
652657
int32_t lane = sector % numLanes;
658+
clustererNN.deviceId = deviceId;
659+
clustererNN.mISector = sector;
660+
clustererNN.nnClusterizerTotalClusters = maxClusters;
661+
nnApplications[lane].initClusterizer(nn_settings, clustererNN);
653662
if (doGPU){
654663
clustererNNShadow.deviceId = deviceId;
655664
clustererNNShadow.mISector = sector;
656665
clustererNNShadow.nnClusterizerTotalClusters = maxClusters;
657666
nnApplications[lane].initClusterizer(nn_settings, clustererNNShadow);
658-
} else {
659-
// TODO: not sure if this part is needed at all
660-
clustererNN.deviceId = deviceId;
661-
clustererNN.mISector = sector;
662-
clustererNN.nnClusterizerTotalClusters = maxClusters;
663-
nnApplications[lane].initClusterizer(nn_settings, clustererNN);
664667
}
665668
AllocateRegisteredMemory(clustererNN.mMemoryId);
666-
if (doGPU){
667-
WriteToConstantMemory(RecoStep::TPCClusterFinding, (char*)&clustererNN - (char*)processors(), &clustererNNShadow, sizeof(clustererNN), lane);
668-
TransferMemoryResourcesToGPU(RecoStep::TPCClusterFinding, &clustererNNShadow, lane);
669-
}
670669
});
670+
if (doGPU){
671+
WriteToConstantMemory(RecoStep::TPCClusterFinding, (char*)&processors()->tpcNNClusterer - (char*)processors(), &processorsShadow()->tpcNNClusterer, sizeof(GPUTPCNNClusterizer)*NSECTORS, mRec->NStreams() - 1, &mEvents->init);
672+
}
671673
}
672674
#endif
673675

674-
if (doGPU && mIOPtrs.tpcZS) {
675-
processorsShadow()->ioPtrs.tpcZS = mInputsShadow->mPzsMeta;
676-
WriteToConstantMemory(RecoStep::TPCClusterFinding, (char*)&processors()->ioPtrs - (char*)processors(), &processorsShadow()->ioPtrs, sizeof(processorsShadow()->ioPtrs), mRec->NStreams() - 1);
677-
}
678-
if (doGPU) {
679-
WriteToConstantMemory(RecoStep::TPCClusterFinding, (char*)processors()->tpcClusterer - (char*)processors(), processorsShadow()->tpcClusterer, sizeof(GPUTPCClusterFinder) * NSECTORS, mRec->NStreams() - 1, &mEvents->init);
680-
}
681-
682676
size_t nClsTotal = 0;
683677
ClusterNativeAccess* tmpNativeAccess = mClusterNativeAccess.get();
684678
ClusterNative* tmpNativeClusters = nullptr;
@@ -961,7 +955,6 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
961955
auto stop0 = std::chrono::high_resolution_clock::now();
962956
auto start1 = std::chrono::high_resolution_clock::now();
963957

964-
// nnApplication.networkInference(nnApplication.model_class, clustererNNShadow, iSize, clustererNNShadow.modelProbabilities, clustererNNShadow.nnInferenceInputDType);
965958
if (clustererNNShadow.nnInferenceInputDType == 0) {
966959
if (clustererNNShadow.nnInferenceOutputDType == 0) {
967960
(nnApplication.model_class).inference(clustererNNShadow.inputData_16, iSize, clustererNNShadow.modelProbabilities_16);
@@ -975,6 +968,7 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
975968
(nnApplication.model_class).inference(clustererNNShadow.inputData_32, iSize, clustererNNShadow.modelProbabilities_32);
976969
}
977970
}
971+
978972
if (nnApplication.model_class.getNumOutputNodes()[0][1] == 1) {
979973
runKernel<GPUTPCNNClusterizerKernels, GPUTPCNNClusterizerKernels::determineClass1Labels>({GetGrid(iSize, lane), krnlRunRangeNone}, iSector, clustererNNShadow.nnInferenceOutputDType, withMC, batchStart); // Assigning class labels
980974
} else {

GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizer.cxx

Lines changed: 26 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -64,32 +64,32 @@ void* GPUTPCNNClusterizer::setIOPointers(void* mem)
6464
return mem;
6565
}
6666

67-
std::vector<int32_t> GPUTPCNNClusterizer::pointerSizes() {
68-
std::vector<int32_t> sizes(7, -1);
69-
if (nnClusterizerBatchedMode > 0) {
70-
if (nnInferenceInputDType == 0 && nnClusterizerElementSize > 0) {
71-
sizes[0] = nnClusterizerBatchedMode * nnClusterizerElementSize; // inputData16
72-
} else if (nnInferenceInputDType == 1 && nnClusterizerElementSize > 0) {
73-
sizes[1] = nnClusterizerBatchedMode * nnClusterizerElementSize; // inputData32
74-
}
75-
sizes[2] = 2 * nnClusterizerBatchedMode; // clusterFlags
76-
if (nnClusterizerModelClassNumOutputNodes > 0) {
77-
sizes[3] = nnClusterizerBatchedMode * nnClusterizerModelClassNumOutputNodes; // modelProbabilities
78-
}
79-
if (!nnClusterizerUseCfRegression) {
80-
if (nnClusterizerModelReg1NumOutputNodes > 0) {
81-
sizes[4] = nnClusterizerBatchedMode * nnClusterizerModelReg1NumOutputNodes; // outputDataReg1
82-
}
83-
if (nnClusterizerModelReg2NumOutputNodes > 0) {
84-
sizes[5] = nnClusterizerBatchedMode * nnClusterizerModelReg2NumOutputNodes; // outputDataReg2
85-
}
86-
}
87-
}
88-
if (nnClusterizerTotalClusters > 0) {
89-
sizes[6] = nnClusterizerTotalClusters; // outputDataClass
90-
}
91-
return sizes;
92-
}
67+
// std::vector<int32_t> GPUTPCNNClusterizer::pointerSizes() {
68+
// std::vector<int32_t> sizes(7, -1);
69+
// if (nnClusterizerBatchedMode > 0) {
70+
// if (nnInferenceInputDType == 0 && nnClusterizerElementSize > 0) {
71+
// sizes[0] = nnClusterizerBatchedMode * nnClusterizerElementSize; // inputData16
72+
// } else if (nnInferenceInputDType == 1 && nnClusterizerElementSize > 0) {
73+
// sizes[1] = nnClusterizerBatchedMode * nnClusterizerElementSize; // inputData32
74+
// }
75+
// sizes[2] = 2 * nnClusterizerBatchedMode; // clusterFlags
76+
// if (nnClusterizerModelClassNumOutputNodes > 0) {
77+
// sizes[3] = nnClusterizerBatchedMode * nnClusterizerModelClassNumOutputNodes; // modelProbabilities
78+
// }
79+
// if (!nnClusterizerUseCfRegression) {
80+
// if (nnClusterizerModelReg1NumOutputNodes > 0) {
81+
// sizes[4] = nnClusterizerBatchedMode * nnClusterizerModelReg1NumOutputNodes; // outputDataReg1
82+
// }
83+
// if (nnClusterizerModelReg2NumOutputNodes > 0) {
84+
// sizes[5] = nnClusterizerBatchedMode * nnClusterizerModelReg2NumOutputNodes; // outputDataReg2
85+
// }
86+
// }
87+
// }
88+
// if (nnClusterizerTotalClusters > 0) {
89+
// sizes[6] = nnClusterizerTotalClusters; // outputDataClass
90+
// }
91+
// return sizes;
92+
// }
9393

9494
void GPUTPCNNClusterizer::RegisterMemoryAllocation()
9595
{

GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizer.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,6 @@ class GPUTPCNNClusterizer : public GPUProcessor
3434
void RegisterMemoryAllocation();
3535
void InitializeProcessor();
3636
void SetMaxData(const GPUTrackingInOutPointers&);
37-
std::vector<int32_t> pointerSizes();
3837

3938
// Neural network clusterization
4039

@@ -50,8 +49,6 @@ class GPUTPCNNClusterizer : public GPUProcessor
5049
int nnClusterizerTotalClusters = 1;
5150
int nnClusterizerVerbosity = 0;
5251
int nnClusterizerBoundaryFillValue = -1;
53-
int nnClusterizerDumpDigits = 0;
54-
int nnClusterizerApplyCfDeconvolution = 0;
5552
int nnClusterizerModelClassNumOutputNodes = -1;
5653
int nnClusterizerModelReg1NumOutputNodes = -1;
5754
int nnClusterizerModelReg2NumOutputNodes = -1;

GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.cxx

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -138,10 +138,11 @@ void GPUTPCNNClusterizerHost::initClusterizer(const GPUSettingsProcessingNNclust
138138
clustererNN.nnClusterizerElementSize = ((2 * settings.nnClusterizerSizeInputRow + 1) * (2 * settings.nnClusterizerSizeInputPad + 1) * (2 * settings.nnClusterizerSizeInputTime + 1)) + (settings.nnClusterizerAddIndexData ? 3 : 0);
139139
clustererNN.nnClusterizerBatchedMode = settings.nnClusterizerBatchedMode;
140140
clustererNN.nnClusterizerBoundaryFillValue = settings.nnClusterizerBoundaryFillValue;
141-
clustererNN.nnClassThreshold = settings.nnClassThreshold;
142141
clustererNN.nnSigmoidTrafoClassThreshold = settings.nnSigmoidTrafoClassThreshold;
143142
if (clustererNN.nnSigmoidTrafoClassThreshold) {
144-
clustererNN.nnClassThreshold = (float)std::log(clustererNN.nnClassThreshold / (1.f - clustererNN.nnClassThreshold));
143+
clustererNN.nnClassThreshold = (float)std::log(settings.nnClassThreshold / (1.f - settings.nnClassThreshold));
144+
} else {
145+
clustererNN.nnClassThreshold = settings.nnClassThreshold;
145146
}
146147
if (settings.nnClusterizerVerbosity < 0) {
147148
clustererNN.nnClusterizerVerbosity = settings.nnInferenceVerbosity;
@@ -152,7 +153,7 @@ void GPUTPCNNClusterizerHost::initClusterizer(const GPUSettingsProcessingNNclust
152153
clustererNN.nnInferenceOutputDType = settings.nnInferenceOutputDType.find("32") != std::string::npos;
153154
clustererNN.nnClusterizerModelClassNumOutputNodes = model_class.getNumOutputNodes()[0][1];
154155
if (!settings.nnClusterizerUseCfRegression) {
155-
if (model_class.getNumOutputNodes()[0][1] == 1 || model_reg_2.isInitialized()) {
156+
if (model_class.getNumOutputNodes()[0][1] == 1 || !model_reg_2.isInitialized()) {
156157
clustererNN.nnClusterizerModelReg1NumOutputNodes = model_reg_1.getNumOutputNodes()[0][1];
157158
} else {
158159
clustererNN.nnClusterizerModelReg1NumOutputNodes = model_reg_1.getNumOutputNodes()[0][1];

0 commit comments

Comments
 (0)