|
19 | 19 | #include "GPUSettings.h" |
20 | 20 | #include "ML/3rdparty/GPUORTFloat16.h" |
21 | 21 | #include "GPUReconstruction.h" |
| 22 | +#include "GPUTPCGeometry.h" |
| 23 | +#include "DataFormatsTPC/Constants.h" |
22 | 24 |
|
23 | 25 | #ifdef GPUCA_HAS_ONNX |
24 | 26 | #include <onnxruntime_cxx_api.h> |
@@ -87,8 +89,20 @@ void GPUTPCNNClusterizerHost::initClusterizer(const GPUSettingsProcessingNNclust |
87 | 89 | clustererNN.mNnClusterizerSizeInputRow = settings.nnClusterizerSizeInputRow; |
88 | 90 | clustererNN.mNnClusterizerSizeInputPad = settings.nnClusterizerSizeInputPad; |
89 | 91 | clustererNN.mNnClusterizerSizeInputTime = settings.nnClusterizerSizeInputTime; |
| 92 | + clustererNN.mNnClusterizerFullRowSize = 2 * settings.nnClusterizerSizeInputRow + 1; |
| 93 | + clustererNN.mNnClusterizerFullPadSize = 2 * settings.nnClusterizerSizeInputPad + 1; |
| 94 | + clustererNN.mNnClusterizerFullTimeSize = 2 * settings.nnClusterizerSizeInputTime + 1; |
| 95 | + clustererNN.mNnClusterizerChargeArraySize = clustererNN.mNnClusterizerFullRowSize * clustererNN.mNnClusterizerFullPadSize * clustererNN.mNnClusterizerFullTimeSize; |
| 96 | + clustererNN.mNnClusterizerPadTimeSize = clustererNN.mNnClusterizerFullPadSize * clustererNN.mNnClusterizerFullTimeSize; |
| 97 | + clustererNN.mNnClusterizerRowTimeSize = clustererNN.mNnClusterizerFullRowSize * clustererNN.mNnClusterizerFullTimeSize; |
| 98 | + clustererNN.mNnClusterizerRowTimeSizeFull = clustererNN.mNnClusterizerRowTimeSize + (settings.nnClusterizerAddIndexData ? 3 : 0); |
| 99 | + clustererNN.mNnClusterizerElementSize = clustererNN.mNnClusterizerChargeArraySize + (settings.nnClusterizerAddIndexData ? 3 : 0); |
| 100 | + // clustererNN.mBoundaryMapSizeRow = 3 * clustererNN.mNnClusterizerSizeInputRow + o2::tpc::constants::MAXGLOBALPADROW; |
| 101 | + // clustererNN.mBoundaryPadding = 11; // padding on each side to account for pad_offset. N=11 since then mIsBoundary = 24320 ~< (1.5 x 2^14 = 24576) && N must be bigger than (NPads[row(end_iroc + 1)] - NPads[row(end_iroc)])/2 (=6) for pad_offset to work |
| 102 | + // clustererNN.mBoundaryMapSizePadsPerRow = GPUTPCGeometry::NPads(o2::tpc::constants::MAXGLOBALPADROW - 1) + 2 * clustererNN.mBoundaryPadding; |
| 103 | + // clustererNN.mBoundaryMapSize = clustererNN.mBoundaryMapSizeRow * clustererNN.mBoundaryMapSizePadsPerRow; |
| 104 | + // clustererNN.mIndexLookupSize = 3 * clustererNN.mNnClusterizerChargeArraySize; // local row, pad, time shift from flat index |
90 | 105 | clustererNN.mNnClusterizerAddIndexData = settings.nnClusterizerAddIndexData; |
91 | | - clustererNN.mNnClusterizerElementSize = ((2 * settings.nnClusterizerSizeInputRow + 1) * (2 * settings.nnClusterizerSizeInputPad + 1) * (2 * settings.nnClusterizerSizeInputTime + 1)) + (settings.nnClusterizerAddIndexData ? 3 : 0); |
92 | 106 | clustererNN.mNnClusterizerBatchedMode = settings.nnClusterizerBatchedMode; |
93 | 107 | clustererNN.mNnClusterizerBoundaryFillValue = settings.nnClusterizerBoundaryFillValue; |
94 | 108 | clustererNN.mNnSigmoidTrafoClassThreshold = settings.nnSigmoidTrafoClassThreshold; |
@@ -116,6 +130,39 @@ void GPUTPCNNClusterizerHost::initClusterizer(const GPUSettingsProcessingNNclust |
116 | 130 | } |
117 | 131 | } |
118 | 132 |
|
| 133 | +// void GPUTPCNNClusterizerHost::createBoundary(GPUTPCNNClusterizer& clustererNN) |
| 134 | +// { |
| 135 | +// // Call after init of the clustererNN elements |
| 136 | +// for (int r = 0; r < clustererNN.mBoundaryMapSizeRow; r++) { |
| 137 | +// int8_t skipCheckInRow = 0; |
| 138 | +// for (int p = 0; p < clustererNN.mBoundaryMapSizePadsPerRow; p++) { |
| 139 | +// int32_t i = r * clustererNN.mBoundaryMapSizePadsPerRow + p; |
| 140 | +// clustererNN.mIsBoundary[i] = 1; |
| 141 | +// if (!skipCheckInRow && (p >= clustererNN.mBoundaryPadding || r >= clustererNN.mNnClusterizerSizeInputRow)) { |
| 142 | +// if (r < (GPUTPCGeometry::EndIROC() + clustererNN.mNnClusterizerSizeInputRow)) { |
| 143 | +// clustererNN.mIsBoundary[i] = (int32_t)((p - clustererNN.mBoundaryPadding) >= static_cast<int>(GPUTPCGeometry::NPads(r - clustererNN.mNnClusterizerSizeInputRow))); |
| 144 | +// } else if (r >= (GPUTPCGeometry::EndIROC() + 2 * clustererNN.mNnClusterizerSizeInputRow) && r < (o2::tpc::constants::MAXGLOBALPADROW + 2 * clustererNN.mNnClusterizerSizeInputRow)) { |
| 145 | +// clustererNN.mIsBoundary[i] = (int32_t)((p - clustererNN.mBoundaryPadding) >= static_cast<int>(GPUTPCGeometry::NPads(r - 2 * clustererNN.mNnClusterizerSizeInputRow))); |
| 146 | +// } |
| 147 | +// skipCheckInRow = (clustererNN.mIsBoundary[i] == 1); // No need to check further pads in this row |
| 148 | +// } |
| 149 | +// } |
| 150 | +// } |
| 151 | +// } |
| 152 | + |
| 153 | +// void GPUTPCNNClusterizerHost::createIndexLookup(GPUTPCNNClusterizer& clustererNN) |
| 154 | +// { |
| 155 | +// for (int32_t i = 0; i < clustererNN.mNnClusterizerChargeArraySize; i++) { |
| 156 | +// int32_t r = CAMath::Floor(i / ((2 * clustererNN.mNnClusterizerSizeInputPad + 1) * (2 * clustererNN.mNnClusterizerSizeInputTime + 1))) - clustererNN.mNnClusterizerSizeInputRow; |
| 157 | +// int32_t rest_1 = i % ((2 * clustererNN.mNnClusterizerSizeInputPad + 1) * (2 * clustererNN.mNnClusterizerSizeInputTime + 1)); |
| 158 | +// int32_t p = CAMath::Floor(rest_1 / (2 * clustererNN.mNnClusterizerSizeInputTime + 1)) - clustererNN.mNnClusterizerSizeInputPad; |
| 159 | +// int32_t t = (rest_1 % (2 * clustererNN.mNnClusterizerSizeInputTime + 1)) - clustererNN.mNnClusterizerSizeInputTime; |
| 160 | +// clustererNN.mIndexLookup[3 * i] = r; |
| 161 | +// clustererNN.mIndexLookup[3 * i + 1] = p; |
| 162 | +// clustererNN.mIndexLookup[3 * i + 2] = t; |
| 163 | +// } |
| 164 | +// } |
| 165 | + |
119 | 166 | // MockedOrtAllocator implementation to be able to use volatile assignment |
120 | 167 | struct MockedOrtAllocator : OrtAllocator { |
121 | 168 | MockedOrtAllocator(GPUReconstruction* = nullptr, OrtMemoryInfo* = nullptr); |
|
0 commit comments