Skip to content

Commit 9d9267f

Browse files
committed
Adding shadow instance. Not sure if this correctly allocates GPU memory using AllocateRegisteredMemory
1 parent 3174e39 commit 9d9267f

File tree

10 files changed

+100
-49
lines changed

10 files changed

+100
-49
lines changed

GPU/GPUTracking/Base/GPUReconstructionCPU.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ class GPUReconstructionCPU : public GPUReconstructionKernels<GPUReconstructionCP
116116
virtual size_t TransferMemoryInternal(GPUMemoryResource* res, int32_t stream, deviceEvent* ev, deviceEvent* evList, int32_t nEvents, bool toGPU, const void* src, void* dst);
117117

118118
// ONNX runtime
119-
virtual void SetONNXGPUStream(Ort::SessionOptions*, int32_t) {}
119+
virtual void SetONNXGPUStream(Ort::SessionOptions*, int32_t, int32_t*) {}
120120

121121
int32_t InitDevice() override;
122122
int32_t ExitDevice() override;

GPU/GPUTracking/Base/GPUReconstructionProcessing.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ class GPUReconstructionProcessing : public GPUReconstruction
9090
void AddGPUEvents(T*& events);
9191

9292
virtual std::unique_ptr<gpu_reconstruction_kernels::threadContext> GetThreadContext() override;
93-
virtual void SetONNXGPUStream(OrtSessionOptions* session_options, int32_t stream) {}
93+
virtual void SetONNXGPUStream(OrtSessionOptions*, int32_t, int32_t*) {}
9494

9595
struct RecoStepTimerMeta {
9696
HighResTimer timerToGPU;

GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -662,8 +662,9 @@ void GPUReconstructionCUDA::endGPUProfiling()
662662
}
663663

664664
#if defined(ORT_CUDA_BUILD) && ORT_CUDA_BUILD == 1
665-
void GPUReconstructionCUDA::SetONNXGPUStream(Ort::SessionOptions* session_options, int32_t stream)
665+
void GPUReconstructionCUDA::SetONNXGPUStream(Ort::SessionOptions* session_options, int32_t stream, int32_t* deviceId)
666666
{
667+
cudaGetDevice(deviceId);
667668
OrtCUDAProviderOptionsV2* cuda_options = nullptr;
668669
CreateCUDAProviderOptions(&cuda_options);
669670
OrtSessionOptions* raw_options = session_options->operator OrtSessionOptions*();
@@ -690,9 +691,10 @@ void* GPUReconstructionHIP::getGPUPointer(void* ptr)
690691
}
691692

692693
#if defined(ORT_ROCM_BUILD) && ORT_ROCM_BUILD == 1
693-
void GPUReconstructionHIP::SetONNXGPUStream(Ort::SessionOptions* session_options, int32_t stream)
694+
void GPUReconstructionHIP::SetONNXGPUStream(Ort::SessionOptions* session_options, int32_t stream, int32_t* deviceId)
694695
{
695696
// Create ROCm provider options
697+
cudaGetDevice(deviceId);
696698
const auto& api = Ort::GetApi();
697699
OrtROCMProviderOptions rocm_options{};
698700
rocm_options.has_user_compute_stream = 1; // Indicate that we are passing a user stream

GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ class GPUReconstructionCUDA : public GPUReconstructionKernels<GPUReconstructionC
8383
size_t GPUMemCpy(void* dst, const void* src, size_t size, int32_t stream, int32_t toGPU, deviceEvent* ev = nullptr, deviceEvent* evList = nullptr, int32_t nEvents = 1) override;
8484
void ReleaseEvent(deviceEvent ev) override;
8585
void RecordMarker(deviceEvent* ev, int32_t stream) override;
86-
void SetONNXGPUStream(Ort::SessionOptions* session_options, int32_t stream) override;
86+
void SetONNXGPUStream(Ort::SessionOptions* session_options, int32_t stream, int32_t* deviceId) override;
8787

8888
void GetITSTraits(std::unique_ptr<o2::its::TrackerTraits>* trackerTraits, std::unique_ptr<o2::its::VertexerTraits>* vertexerTraits, std::unique_ptr<o2::its::TimeFrame>* timeFrame) override;
8989

GPU/GPUTracking/Global/GPUChain.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ class GPUChain
8383
inline GPUParam& param() { return mRec->param(); }
8484
inline const GPUConstantMem* processors() const { return mRec->processors(); }
8585
inline void SynchronizeStream(int32_t stream) { mRec->SynchronizeStream(stream); }
86-
inline void SetONNXGPUStream(Ort::SessionOptions* opt, int32_t stream) { mRec->SetONNXGPUStream(opt, stream); }
86+
inline void SetONNXGPUStream(Ort::SessionOptions* opt, int32_t stream, int32_t* deviceId) { mRec->SetONNXGPUStream(opt, stream, deviceId); }
8787
inline void SynchronizeEvents(deviceEvent* evList, int32_t nEvents = 1) { mRec->SynchronizeEvents(evList, nEvents); }
8888
inline void SynchronizeEventAndRelease(deviceEvent& ev, bool doGPU = true)
8989
{

GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx

Lines changed: 58 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -622,28 +622,45 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
622622
}
623623
for (uint32_t iSector = 0; iSector < NSECTORS; iSector++) {
624624
GPUTPCNNClusterizer& clustererNN = processors()->tpcNNClusterer[iSector];
625-
clustererNN.nnClusterizerUseCfRegression = nn_settings.nnClusterizerUseCfRegression;
626-
clustererNN.nnClusterizerSizeInputRow = nn_settings.nnClusterizerSizeInputRow;
627-
clustererNN.nnClusterizerSizeInputPad = nn_settings.nnClusterizerSizeInputPad;
628-
clustererNN.nnClusterizerSizeInputTime = nn_settings.nnClusterizerSizeInputTime;
629-
clustererNN.nnClusterizerAddIndexData = nn_settings.nnClusterizerAddIndexData;
630-
clustererNN.nnClusterizerElementSize = ((2 * nn_settings.nnClusterizerSizeInputRow + 1) * (2 * nn_settings.nnClusterizerSizeInputPad + 1) * (2 * nn_settings.nnClusterizerSizeInputTime + 1)) + (nn_settings.nnClusterizerAddIndexData ? 3 : 0);
631-
clustererNN.nnClusterizerBatchedMode = nn_settings.nnClusterizerBatchedMode;
632-
clustererNN.nnClusterizerBoundaryFillValue = nn_settings.nnClusterizerBoundaryFillValue;
633-
clustererNN.nnClusterizerTotalClusters = maxClusters;
634-
clustererNN.nnClassThreshold = nn_settings.nnClassThreshold;
635-
clustererNN.nnSigmoidTrafoClassThreshold = nn_settings.nnSigmoidTrafoClassThreshold;
636-
if (clustererNN.nnSigmoidTrafoClassThreshold) {
637-
clustererNN.nnClassThreshold = (float)std::log(clustererNN.nnClassThreshold / (1.f - clustererNN.nnClassThreshold));
625+
GPUTPCNNClusterizer& clustererNNShadow = doGPU ? processorsShadow()->tpcNNClusterer[iSector] : clustererNN;
626+
clustererNNShadow.nnClusterizerUseCfRegression = nn_settings.nnClusterizerUseCfRegression;
627+
clustererNNShadow.nnClusterizerSizeInputRow = nn_settings.nnClusterizerSizeInputRow;
628+
clustererNNShadow.nnClusterizerSizeInputPad = nn_settings.nnClusterizerSizeInputPad;
629+
clustererNNShadow.nnClusterizerSizeInputTime = nn_settings.nnClusterizerSizeInputTime;
630+
clustererNNShadow.nnClusterizerAddIndexData = nn_settings.nnClusterizerAddIndexData;
631+
clustererNNShadow.nnClusterizerElementSize = ((2 * nn_settings.nnClusterizerSizeInputRow + 1) * (2 * nn_settings.nnClusterizerSizeInputPad + 1) * (2 * nn_settings.nnClusterizerSizeInputTime + 1)) + (nn_settings.nnClusterizerAddIndexData ? 3 : 0);
632+
clustererNNShadow.nnClusterizerBatchedMode = nn_settings.nnClusterizerBatchedMode;
633+
clustererNNShadow.nnClusterizerBoundaryFillValue = nn_settings.nnClusterizerBoundaryFillValue;
634+
clustererNNShadow.nnClusterizerTotalClusters = maxClusters;
635+
clustererNNShadow.nnClassThreshold = nn_settings.nnClassThreshold;
636+
clustererNNShadow.nnSigmoidTrafoClassThreshold = nn_settings.nnSigmoidTrafoClassThreshold;
637+
if (clustererNNShadow.nnSigmoidTrafoClassThreshold) {
638+
clustererNNShadow.nnClassThreshold = (float)std::log(clustererNNShadow.nnClassThreshold / (1.f - clustererNNShadow.nnClassThreshold));
638639
}
639640
if (nn_settings.nnClusterizerVerbosity < 0) {
640-
clustererNN.nnClusterizerVerbosity = nn_settings.nnInferenceVerbosity;
641+
clustererNNShadow.nnClusterizerVerbosity = nn_settings.nnInferenceVerbosity;
641642
} else {
642-
clustererNN.nnClusterizerVerbosity = nn_settings.nnClusterizerVerbosity;
643+
clustererNNShadow.nnClusterizerVerbosity = nn_settings.nnClusterizerVerbosity;
643644
}
644-
clustererNN.nnInferenceInputDType = nn_settings.nnInferenceInputDType.find("32") != std::string::npos;
645-
nnApplication.initClusterizer(nn_settings, clustererNN);
646-
AllocateRegisteredMemory(clustererNN.mMemoryId);
645+
clustererNNShadow.nnInferenceInputDType = nn_settings.nnInferenceInputDType.find("32") != std::string::npos;
646+
nnApplication.initClusterizer(nn_settings, clustererNNShadow);
647+
// if (doGPU) {
648+
// std::vector<int32_t> pointerSizes = clustererNNShadow.pointerSizes();
649+
// // FIXME: These are for sure not needed. The arrays are empty at this point, only the space needs to be reserved. Is this already handeled by computePointerWithAlignment?
650+
// // Once a GPU is available, everything should be done on the GPU for now.
651+
// GPUMemCpy(RecoStep::TPCClusterFinding, clustererNNShadow.inputData32, clustererNN.inputData32, pointerSizes[0], lane, true);
652+
// GPUMemCpy(RecoStep::TPCClusterFinding, clustererNNShadow.inputData16, clustererNN.inputData16, pointerSizes[1], lane, true);
653+
// GPUMemCpy(RecoStep::TPCClusterFinding, clustererNNShadow.outputDataClass, clustererNN.outputDataClass, pointerSizes[2], lane, true);
654+
// GPUMemCpy(RecoStep::TPCClusterFinding, clustererNNShadow.modelProbabilities, clustererNN.modelProbabilities, pointerSizes[3], lane, true);
655+
// GPUMemCpy(RecoStep::TPCClusterFinding, clustererNNShadow.outputDataReg1, clustererNN.outputDataReg1, pointerSizes[4], lane, true);
656+
// GPUMemCpy(RecoStep::TPCClusterFinding, clustererNNShadow.outputDataReg2, clustererNN.outputDataReg2, pointerSizes[5], lane, true);
657+
// GPUMemCpy(RecoStep::TPCClusterFinding, clustererNNShadow.peakPositions, clustererNN.peakPositions, pointerSizes[6], lane, true);
658+
// GPUMemCpy(RecoStep::TPCClusterFinding, clustererNNShadow.clusterFlags, clustererNN.clusterFlags, pointerSizes[7], lane, true);
659+
// GPUMemCpy(RecoStep::TPCClusterFinding, clustererNNShadow.centralCharges, clustererNN.centralCharges, pointerSizes[8], lane, true);
660+
// } else {
661+
// AllocateRegisteredMemory(clustererNNShadow.mMemoryId);
662+
// }
663+
AllocateRegisteredMemory(clustererNNShadow.mMemoryId);
647664
}
648665
}
649666
#endif
@@ -917,41 +934,43 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
917934
if (GetProcessingSettings().nn.applyNNclusterizer) {
918935
#ifdef GPUCA_HAS_ONNX
919936
GPUTPCNNClusterizer& clustererNN = processors()->tpcNNClusterer[iSector];
937+
GPUTPCNNClusterizer& clustererNNShadow = doGPU ? processorsShadow()->tpcNNClusterer[iSector] : clustererNN;
920938
const GPUSettingsProcessingNNclusterizer& nn_settings = GetProcessingSettings().nn;
921-
GPUTPCNNClusterizerHost nnApplication(nn_settings, lane);
922-
SetONNXGPUStream(nnApplication.model_class.updateSessionOptions(), lane);
923-
SetONNXGPUStream(nnApplication.model_reg_1.updateSessionOptions(), lane);
924-
SetONNXGPUStream(nnApplication.model_reg_2.updateSessionOptions(), lane);
939+
GPUTPCNNClusterizerHost nnApplication(nn_settings, lane); // FIXME: This needs to be the deviceID. If that is the lane, then this line is correct
940+
int32_t deviceId = -1;
941+
SetONNXGPUStream(nnApplication.model_class.updateSessionOptions(), lane, &deviceId);
942+
SetONNXGPUStream(nnApplication.model_reg_1.updateSessionOptions(), lane, &deviceId);
943+
SetONNXGPUStream(nnApplication.model_reg_2.updateSessionOptions(), lane, &deviceId);
925944
int withMC = (doGPU && propagateMCLabels);
926945

927-
if (clustererNN.nnClusterizerUseCfRegression || (int)(nn_settings.nnClusterizerApplyCfDeconvolution)) {
946+
if (clustererNNShadow.nnClusterizerUseCfRegression || (int)(nn_settings.nnClusterizerApplyCfDeconvolution)) {
928947
runKernel<GPUTPCCFDeconvolution>({GetGrid(clusterer.mPmemory->counters.nPositions, lane), {iSector}});
929948
DoDebugAndDump(RecoStep::TPCClusterFinding, 262144 << 4, clusterer, &GPUTPCClusterFinder::DumpChargeMap, *mDebugFile, "Split Charges");
930949
}
931950

932951
float time_clusterizer = 0, time_fill = 0;
933-
for (int batch = 0; batch < std::ceil((float)clusterer.mPmemory->counters.nClusters / clustererNN.nnClusterizerBatchedMode); batch++) {
934-
uint batchStart = batch * clustererNN.nnClusterizerBatchedMode;
935-
size_t iSize = CAMath::Min((uint)clustererNN.nnClusterizerBatchedMode, (uint)(clusterer.mPmemory->counters.nClusters - batchStart));
952+
for (int batch = 0; batch < std::ceil((float)clusterer.mPmemory->counters.nClusters / clustererNNShadow.nnClusterizerBatchedMode); batch++) {
953+
uint batchStart = batch * clustererNNShadow.nnClusterizerBatchedMode;
954+
size_t iSize = CAMath::Min((uint)clustererNNShadow.nnClusterizerBatchedMode, (uint)(clusterer.mPmemory->counters.nClusters - batchStart));
936955

937956
auto start0 = std::chrono::high_resolution_clock::now();
938-
runKernel<GPUTPCNNClusterizerKernels, GPUTPCNNClusterizerKernels::fillInputNN>({GetGrid(iSize, lane), krnlRunRangeNone}, iSector, clustererNN.nnInferenceInputDType, withMC, batchStart); // Filling the data
957+
runKernel<GPUTPCNNClusterizerKernels, GPUTPCNNClusterizerKernels::fillInputNN>({GetGrid(iSize, lane), krnlRunRangeNone}, iSector, clustererNNShadow.nnInferenceInputDType, withMC, batchStart); // Filling the data
939958

940959
auto stop0 = std::chrono::high_resolution_clock::now();
941960
auto start1 = std::chrono::high_resolution_clock::now();
942-
nnApplication.networkInference(nnApplication.model_class, clustererNN, iSize, clustererNN.modelProbabilities, clustererNN.nnInferenceInputDType);
961+
nnApplication.networkInference(nnApplication.model_class, clustererNN, iSize, clustererNNShadow.modelProbabilities, clustererNNShadow.nnInferenceInputDType, deviceId);
943962
if (nnApplication.model_class.getNumOutputNodes()[0][1] == 1) {
944-
runKernel<GPUTPCNNClusterizerKernels, GPUTPCNNClusterizerKernels::determineClass1Labels>({GetGrid(iSize, lane), krnlRunRangeNone}, iSector, clustererNN.nnInferenceInputDType, withMC, batchStart); // Assigning class labels
963+
runKernel<GPUTPCNNClusterizerKernels, GPUTPCNNClusterizerKernels::determineClass1Labels>({GetGrid(iSize, lane), krnlRunRangeNone}, iSector, clustererNNShadow.nnInferenceInputDType, withMC, batchStart); // Assigning class labels
945964
} else {
946-
runKernel<GPUTPCNNClusterizerKernels, GPUTPCNNClusterizerKernels::determineClass2Labels>({GetGrid(iSize, lane), krnlRunRangeNone}, iSector, clustererNN.nnInferenceInputDType, withMC, batchStart); // Assigning class labels
965+
runKernel<GPUTPCNNClusterizerKernels, GPUTPCNNClusterizerKernels::determineClass2Labels>({GetGrid(iSize, lane), krnlRunRangeNone}, iSector, clustererNNShadow.nnInferenceInputDType, withMC, batchStart); // Assigning class labels
947966
}
948967

949-
if (!clustererNN.nnClusterizerUseCfRegression) {
950-
nnApplication.networkInference(nnApplication.model_reg_1, clustererNN, iSize, clustererNN.outputDataReg1, clustererNN.nnInferenceInputDType);
951-
runKernel<GPUTPCNNClusterizerKernels, GPUTPCNNClusterizerKernels::publishClass1Regression>({GetGrid(iSize, lane), krnlRunRangeNone}, iSector, clustererNN.nnInferenceInputDType, withMC, batchStart); // Running the NN for regression class 1
968+
if (!clustererNNShadow.nnClusterizerUseCfRegression) {
969+
nnApplication.networkInference(nnApplication.model_reg_1, clustererNN, iSize, clustererNNShadow.outputDataReg1, clustererNNShadow.nnInferenceInputDType, deviceId);
970+
runKernel<GPUTPCNNClusterizerKernels, GPUTPCNNClusterizerKernels::publishClass1Regression>({GetGrid(iSize, lane), krnlRunRangeNone}, iSector, clustererNNShadow.nnInferenceInputDType, withMC, batchStart); // Running the NN for regression class 1
952971
if (nnApplication.model_class.getNumOutputNodes()[0][1] > 1 && nnApplication.model_reg_2.isInitialized()) {
953-
nnApplication.networkInference(nnApplication.model_reg_2, clustererNN, iSize, clustererNN.outputDataReg2, clustererNN.nnInferenceInputDType);
954-
runKernel<GPUTPCNNClusterizerKernels, GPUTPCNNClusterizerKernels::publishClass2Regression>({GetGrid(iSize, lane), krnlRunRangeNone}, iSector, clustererNN.nnInferenceInputDType, withMC, batchStart); // Running the NN for regression class 2
972+
nnApplication.networkInference(nnApplication.model_reg_2, clustererNN, iSize, clustererNNShadow.outputDataReg2, clustererNNShadow.nnInferenceInputDType, deviceId);
973+
runKernel<GPUTPCNNClusterizerKernels, GPUTPCNNClusterizerKernels::publishClass2Regression>({GetGrid(iSize, lane), krnlRunRangeNone}, iSector, clustererNNShadow.nnInferenceInputDType, withMC, batchStart); // Running the NN for regression class 2
955974
}
956975
}
957976
auto stop1 = std::chrono::high_resolution_clock::now();
@@ -960,15 +979,15 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
960979
time_fill += std::chrono::duration_cast<std::chrono::nanoseconds>(stop0 - start0).count() / 1e9;
961980
}
962981
auto start1 = std::chrono::high_resolution_clock::now();
963-
if (clustererNN.nnClusterizerUseCfRegression) {
964-
runKernel<GPUTPCNNClusterizerKernels, GPUTPCNNClusterizerKernels::runCfClusterizer>({GetGrid(clusterer.mPmemory->counters.nClusters, lane), krnlRunRangeNone}, iSector, clustererNN.nnInferenceInputDType, withMC, 0); // Running the CF regression kernel - no batching needed: batchStart = 0
982+
if (clustererNNShadow.nnClusterizerUseCfRegression) {
983+
runKernel<GPUTPCNNClusterizerKernels, GPUTPCNNClusterizerKernels::runCfClusterizer>({GetGrid(clusterer.mPmemory->counters.nClusters, lane), krnlRunRangeNone}, iSector, clustererNNShadow.nnInferenceInputDType, withMC, 0); // Running the CF regression kernel - no batching needed: batchStart = 0
965984
}
966985
auto stop1 = std::chrono::high_resolution_clock::now();
967986
time_clusterizer += std::chrono::duration_cast<std::chrono::nanoseconds>(stop1 - start1).count() / 1e9;
968-
if (clustererNN.nnClusterizerVerbosity < 3) {
987+
if (clustererNNShadow.nnClusterizerVerbosity < 3) {
969988
int acceptedClusters = 0;
970989
for (size_t i = 0; i < clusterer.mPmemory->counters.nClusters; ++i) {
971-
acceptedClusters += clustererNN.outputDataClass[i];
990+
acceptedClusters += clustererNNShadow.outputDataClass[i];
972991
}
973992
LOG(info) << "[NN CF] Apply NN (fragment " << fragment.index << ", lane: " << lane << ", sector: " << iSector << "): filling data " << time_fill << "s ; clusterizer: " << time_clusterizer << "s ; " << clusterer.mPmemory->counters.nClusters << " clusters, " << acceptedClusters << " accepted. --> " << clusterer.mPmemory->counters.nClusters / (time_fill + time_clusterizer) << " clusters/s";
974993
}

GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizer.cxx

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,35 @@ void* GPUTPCNNClusterizer::setIOPointers(void* mem)
5151
return mem;
5252
}
5353

54+
std::vector<int32_t> GPUTPCNNClusterizer::pointerSizes() {
55+
std::vector<int32_t> sizes(9, -1);
56+
if (nnClusterizerBatchedMode > 0) {
57+
if (nnInferenceInputDType == 0 && nnClusterizerElementSize > 0) {
58+
sizes[0] = nnClusterizerBatchedMode * nnClusterizerElementSize; // inputData16
59+
} else if (nnInferenceInputDType == 1 && nnClusterizerElementSize > 0) {
60+
sizes[1] = nnClusterizerBatchedMode * nnClusterizerElementSize; // inputData32
61+
}
62+
sizes[2] = nnClusterizerBatchedMode; // peakPositions
63+
sizes[3] = 2 * nnClusterizerBatchedMode; // clusterFlags
64+
sizes[4] = nnClusterizerBatchedMode; // centralCharges
65+
if (nnClusterizerModelClassNumOutputNodes > 0) {
66+
sizes[5] = nnClusterizerBatchedMode * nnClusterizerModelClassNumOutputNodes; // modelProbabilities
67+
}
68+
if (!nnClusterizerUseCfRegression) {
69+
if (nnClusterizerModelReg1NumOutputNodes > 0) {
70+
sizes[6] = nnClusterizerBatchedMode * nnClusterizerModelReg1NumOutputNodes; // outputDataReg1
71+
}
72+
if (nnClusterizerModelReg2NumOutputNodes > 0) {
73+
sizes[7] = nnClusterizerBatchedMode * nnClusterizerModelReg2NumOutputNodes; // outputDataReg2
74+
}
75+
}
76+
}
77+
if (nnClusterizerTotalClusters > 0) {
78+
sizes[8] = nnClusterizerTotalClusters; // outputDataClass
79+
}
80+
return sizes;
81+
}
82+
5483
void GPUTPCNNClusterizer::RegisterMemoryAllocation()
5584
{
5685
AllocateAndInitializeLate();

GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizer.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ class GPUTPCNNClusterizer : public GPUProcessor
3434
void RegisterMemoryAllocation();
3535
void InitializeProcessor();
3636
void SetMaxData(const GPUTrackingInOutPointers&);
37+
std::vector<int32_t> pointerSizes();
3738

3839
// Neural network clusterization
3940

0 commit comments

Comments
 (0)