1919#include " GPUSettings.h"
2020#include " ML/3rdparty/GPUORTFloat16.h"
2121#include " GPUReconstruction.h"
22+ #include " GPUTPCGeometry.h"
23+ #include " DataFormatsTPC/Constants.h"
2224
2325#ifdef GPUCA_HAS_ONNX
2426#include < onnxruntime_cxx_api.h>
@@ -87,8 +89,11 @@ void GPUTPCNNClusterizerHost::initClusterizer(const GPUSettingsProcessingNNclust
8789 clustererNN.mNnClusterizerSizeInputRow = settings.nnClusterizerSizeInputRow ;
8890 clustererNN.mNnClusterizerSizeInputPad = settings.nnClusterizerSizeInputPad ;
8991 clustererNN.mNnClusterizerSizeInputTime = settings.nnClusterizerSizeInputTime ;
92+ clustererNN.mNnClusterizerChargeArraySize = ((2 * settings.nnClusterizerSizeInputRow + 1 ) * (2 * settings.nnClusterizerSizeInputPad + 1 ) * (2 * settings.nnClusterizerSizeInputTime + 1 ));
93+ clustererNN.mNnClusterizerElementSize = clustererNN.mNnClusterizerChargeArraySize + (settings.nnClusterizerAddIndexData ? 3 : 0 );
94+ clustererNN.mBoundaryMapSize = (3 *clustererNN.mNnClusterizerSizeInputRow + o2::tpc::constants::MAXGLOBALPADROW)*(GPUTPCGeometry::NPads (o2::tpc::constants::MAXGLOBALPADROW) + 2 *clustererNN.mNnClusterizerSizeInputPad );
95+ clustererNN.mIndexLookupSize = 3 *clustererNN.mNnClusterizerElementSize ; // local row, pad, time coordinate from flat index
9096 clustererNN.mNnClusterizerAddIndexData = settings.nnClusterizerAddIndexData ;
91- clustererNN.mNnClusterizerElementSize = ((2 * settings.nnClusterizerSizeInputRow + 1 ) * (2 * settings.nnClusterizerSizeInputPad + 1 ) * (2 * settings.nnClusterizerSizeInputTime + 1 )) + (settings.nnClusterizerAddIndexData ? 3 : 0 );
9297 clustererNN.mNnClusterizerBatchedMode = settings.nnClusterizerBatchedMode ;
9398 clustererNN.mNnClusterizerBoundaryFillValue = settings.nnClusterizerBoundaryFillValue ;
9499 clustererNN.mNnSigmoidTrafoClassThreshold = settings.nnSigmoidTrafoClassThreshold ;
@@ -114,6 +119,41 @@ void GPUTPCNNClusterizerHost::initClusterizer(const GPUSettingsProcessingNNclust
114119 clustererNN.mNnClusterizerModelReg2NumOutputNodes = mModelReg2 .getNumOutputNodes ()[0 ][1 ];
115120 }
116121 }
122+ createBoundary (clustererNN);
123+ createIndexLookup (clustererNN);
124+ }
125+
126+ void GPUTPCNNClusterizerHost::createBoundary (GPUTPCNNClusterizer& clustererNN) {
127+ // Call after init of the clustererNN elements
128+ clustererNN.mBoundaryMapSizeRow = 3 * clustererNN.mNnClusterizerSizeInputRow + o2::tpc::constants::MAXGLOBALPADROW;
129+ clustererNN.mBoundaryMapSizePerRow = GPUTPCGeometry::NPads (o2::tpc::constants::MAXGLOBALPADROW) + 2 * clustererNN.mNnClusterizerSizeInputPad ;
130+ for (int r = 0 ; r < clustererNN.mBoundaryMapSizeRow ; r++) {
131+ for (int p = 0 ; p < clustererNN.mBoundaryMapSizePerRow ; p++) {
132+ int32_t i = r * clustererNN.mBoundaryMapSizePerRow + p;
133+ clustererNN.mIsBoundary [i] = 1 ;
134+ if (p >= clustererNN.mNnClusterizerSizeInputPad || r >= clustererNN.mNnClusterizerSizeInputRow ) {
135+ if ((r < (GPUTPCGeometry::EndIROC () + clustererNN.mNnClusterizerSizeInputRow )) ||
136+ (r >= (GPUTPCGeometry::EndIROC () + 2 *clustererNN.mNnClusterizerSizeInputRow ) && r < (o2::tpc::constants::MAXGLOBALPADROW + 2 *clustererNN.mNnClusterizerSizeInputRow ))) {
137+ clustererNN.mIsBoundary [i] = (int32_t )((p - clustererNN.mNnClusterizerSizeInputPad ) >= static_cast <int >(GPUTPCGeometry::NPads (r - clustererNN.mNnClusterizerSizeInputRow )));
138+ }
139+ if (clustererNN.mIsBoundary [i] == 1 ) {
140+ break ; // No need to check further pads in this row
141+ }
142+ }
143+ }
144+ }
145+ }
146+
147+ void GPUTPCNNClusterizerHost::createIndexLookup (GPUTPCNNClusterizer& clustererNN) {
148+ for (int32_t i = 0 ; i < clustererNN.mNnClusterizerChargeArraySize ; i++){
149+ int32_t r = CAMath::Floor (i / ((2 * clustererNN.mNnClusterizerSizeInputPad + 1 ) * (2 * clustererNN.mNnClusterizerSizeInputTime + 1 ))) - clustererNN.mNnClusterizerSizeInputRow ;
150+ int32_t rest_1 = i % ((2 * clustererNN.mNnClusterizerSizeInputPad + 1 ) * (2 * clustererNN.mNnClusterizerSizeInputTime + 1 ));
151+ int32_t p = CAMath::Floor (rest_1 / (2 * clustererNN.mNnClusterizerSizeInputTime + 1 )) - clustererNN.mNnClusterizerSizeInputPad ;
152+ int32_t t = (rest_1 % (2 * clustererNN.mNnClusterizerSizeInputTime + 1 )) - clustererNN.mNnClusterizerSizeInputTime ;
153+ clustererNN.mIndexLookup [3 *i] = r;
154+ clustererNN.mIndexLookup [3 *i + 1 ] = p;
155+ clustererNN.mIndexLookup [3 *i + 2 ] = t;
156+ }
117157}
118158
119159// MockedOrtAllocator implementation to be able to use volatile assignment
0 commit comments