Skip to content

Commit e830697

Browse files
committed
Adding some documentation
1 parent 408787d commit e830697

File tree

2 files changed

+19
-10
lines changed

2 files changed

+19
-10
lines changed

GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizer.cxx

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
using namespace o2::gpu;
2727
using namespace o2::gpu::tpccf;
2828

29+
// Defining individual thread functions for data filling, determining the class label and running the CF clusterizer
2930
template <>
3031
GPUdii() void GPUTPCNNClusterizer::Thread<GPUTPCNNClusterizer::runCfClusterizer>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& clusterer, int8_t dtype, int8_t onlyMC, uint batchStart)
3132
{
@@ -58,7 +59,7 @@ GPUdii() void GPUTPCNNClusterizer::Thread<GPUTPCNNClusterizer::determineClass2La
5859
{
5960
uint glo_idx = get_global_id(0);
6061
auto elem_iterator = clusterer.modelProbabilities.begin() + (unsigned int)(glo_idx * clusterer.model_class.getNumOutputNodes()[0][1]);
61-
uint class_label = std::distance(elem_iterator, std::max_element(elem_iterator, elem_iterator + clusterer.model_class.getNumOutputNodes()[0][1]));
62+
uint class_label = std::distance(elem_iterator, std::max_element(elem_iterator, elem_iterator + clusterer.model_class.getNumOutputNodes()[0][1])); // Multiple outputs of the class network are the probabilities for each class. The highest one "wins"
6263
clusterer.outputDataClass[glo_idx + batchStart] = class_label;
6364
}
6465

@@ -107,6 +108,7 @@ void GPUTPCNNClusterizer::applyNetworkReg2(processorType& clusterer, int8_t dtyp
107108
}
108109
}
109110

111+
// THe following arithmetic is done because the network is trained with a split between IROC and OROC boundary
110112
int GPUTPCNNClusterizer::padOffset(int row_ref, int row_current, const GPUTPCGeometry& geo)
111113
{
112114
return (int)((geo.NPads(row_current) - geo.NPads(row_ref)) / 2);
@@ -117,7 +119,6 @@ int GPUTPCNNClusterizer::rowOffset(int row, int global_shift)
117119
return (row > 62 ? global_shift : 0);
118120
}
119121

120-
// ---------------------------------
121122
bool GPUTPCNNClusterizer::isBoundary(int row, int pad, int global_shift, const GPUTPCGeometry& geo)
122123
{
123124
if (pad < 0 || row < 0) { // Faster short-circuit
@@ -133,24 +134,25 @@ bool GPUTPCNNClusterizer::isBoundary(int row, int pad, int global_shift, const G
133134
}
134135
}
135136

136-
// ---------------------------------
137+
// Filling the input data for the neural network where there is no boundary
137138
GPUd() void GPUTPCNNClusterizer::fillInputData(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, processorType& clusterer, int8_t dtype, uint batchStart)
138139
{
139140

140141
Array2D<PackedCharge> chargeMap(reinterpret_cast<PackedCharge*>(clusterer.mPchargeMap));
141142

142143
uint glo_idx = get_global_id(0);
143144

144-
uint write_idx = glo_idx * clusterer.nnClusterizerElementSize; // For optimization: Either choose nnClusterizerBatchedMode as a power of 2 or calculate from threadId and blockId
145+
uint write_idx = glo_idx * clusterer.nnClusterizerElementSize; // Potential optimization: Either choose nnClusterizerBatchedMode as a power of 2 or calculate from threadId and blockId
145146

146147
ChargePos peak = clusterer.mPfilteredPeakPositions[glo_idx + batchStart];
147-
int row = static_cast<int>(peak.row()), pad = static_cast<int>(peak.pad()), time = static_cast<int>(peak.time());
148+
int row = static_cast<int>(peak.row()), pad = static_cast<int>(peak.pad()), time = static_cast<int>(peak.time()); // Explicit casting to avoid conversion errors
148149
float central_charge = static_cast<float>(chargeMap[peak].unpack());
149150

150151
clusterer.peakPositions[glo_idx] = peak;
151152
clusterer.centralCharges[glo_idx] = central_charge;
152153

153154
int row_offset = GPUTPCNNClusterizer::rowOffset(row, clusterer.nnClusterizerSizeInputRow);
155+
GPUCA_UNROLL(U(), U());
154156
for (int r = -clusterer.nnClusterizerSizeInputRow; r <= clusterer.nnClusterizerSizeInputRow; r++) {
155157
bool is_row_boundary = ((row + r) > (o2::tpc::constants::MAXGLOBALPADROW - 1)) || ((row + r) < 0);
156158
int pad_offset = is_row_boundary ? 0 : GPUTPCNNClusterizer::padOffset(row, row + r, clusterer.Param().tpcGeometry);
@@ -165,6 +167,7 @@ GPUd() void GPUTPCNNClusterizer::fillInputData(int32_t nBlocks, int32_t nThreads
165167
clusterer.inputData32[write_idx] = static_cast<float>(chargeMap[tmp_pos].unpack()) / central_charge;
166168
}
167169
} else {
170+
// Filling boundary just to make sure that no values are left unintentionally
168171
if(dtype == 0){
169172
clusterer.inputData16[write_idx] = (OrtDataType::Float16_t)(static_cast<float>(clusterer.nnClusterizerBoundaryFillValue));
170173
} else {
@@ -188,7 +191,6 @@ GPUd() void GPUTPCNNClusterizer::fillInputData(int32_t nBlocks, int32_t nThreads
188191
}
189192
}
190193

191-
// ---------------------------------
192194
GPUd() void GPUTPCNNClusterizer::publishClustersReg1(uint glo_idx, GPUSharedMemory& smem, processorType& clusterer, int8_t dtype, int8_t onlyMC, uint batchStart)
193195
{
194196
Array2D<PackedCharge> chargeMap(reinterpret_cast<PackedCharge*>(clusterer.mPchargeMap));
@@ -204,6 +206,7 @@ GPUd() void GPUTPCNNClusterizer::publishClustersReg1(uint glo_idx, GPUSharedMemo
204206

205207
ClusterAccumulator pc;
206208

209+
// Publishing logic is taken from default clusterizer
207210
if (onlyMC) {
208211
ClusterAccumulator dummy_pc;
209212
CPU_ONLY(labelAcc->collect(clusterer.peakPositions[glo_idx], chargeMap[clusterer.peakPositions[glo_idx]].unpack()));
@@ -252,10 +255,14 @@ GPUd() void GPUTPCNNClusterizer::publishClustersReg1(uint glo_idx, GPUSharedMemo
252255
rowIndex = clusterer.mPclusterPosInRow[full_glo_idx];
253256
}
254257
CPU_ONLY(labelAcc->commit(clusterer.peakPositions[glo_idx].row(), rowIndex, clusterer.mNMaxClusterPerRow));
258+
} else {
259+
if (clusterer.mPclusterPosInRow) {
260+
clusterer.mPclusterPosInRow[full_glo_idx] = clusterer.mNMaxClusterPerRow;
261+
}
262+
return;
255263
}
256264
}
257265

258-
// ---------------------------------
259266
GPUd() void GPUTPCNNClusterizer::publishClustersReg2(uint glo_idx, GPUSharedMemory& smem, processorType& clusterer, int8_t dtype, int8_t onlyMC, uint batchStart)
260267
{
261268
Array2D<PackedCharge> chargeMap(reinterpret_cast<PackedCharge*>(clusterer.mPchargeMap));
@@ -353,5 +360,10 @@ GPUd() void GPUTPCNNClusterizer::publishClustersReg2(uint glo_idx, GPUSharedMemo
353360
rowIndex = clusterer.mPclusterPosInRow[full_glo_idx];
354361
}
355362
// CPU_ONLY(labelAcc->commit(clusterer.peakPositions[glo_idx].row(), rowIndex, clusterer.mNMaxClusterPerRow)); // -> Is this needed? How to handle MC labels for split clusters?
363+
} else {
364+
if (clusterer.mPclusterPosInRow) {
365+
clusterer.mPclusterPosInRow[full_glo_idx] = clusterer.mNMaxClusterPerRow;
366+
}
367+
return;
356368
}
357369
}

GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizer.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -68,14 +68,11 @@ class GPUTPCNNClusterizer : public GPUKernelTemplate
6868
GPUd() static void Thread(int32_t, int32_t, int32_t, int32_t, GPUSharedMemory&, processorType&, int8_t = 0, int8_t = 0, uint = 0, Args...);
6969

7070
static GPUd() void fillInputData(int32_t, int32_t, int32_t, int32_t, processorType&, int8_t, uint);
71-
7271
static GPUd() void publishClustersReg1(uint, GPUSharedMemory&, processorType&, int8_t, int8_t, uint);
7372
static GPUd() void publishClustersReg2(uint, GPUSharedMemory&, processorType&, int8_t, int8_t, uint);
7473

7574
static void applyNetworkClass(processorType&, int8_t = 0, uint = 0);
76-
7775
static void applyNetworkReg1(processorType&, int8_t = 0);
78-
7976
static void applyNetworkReg2(processorType&, int8_t = 0);
8077

8178

0 commit comments

Comments
 (0)