Skip to content

Commit 0125c2a

Browse files
committed
Explicit casting solves regression issues. To be done: Correct publishing for class2 regression
1 parent c55cfc2 commit 0125c2a

File tree

1 file changed

+16
-29
lines changed

1 file changed

+16
-29
lines changed

GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizer.cxx

Lines changed: 16 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -123,19 +123,11 @@ bool GPUTPCNNClusterizer::isBoundary(int row, int pad, int global_shift, const G
123123
if (pad < 0 || row < 0) { // Faster short-circuit
124124
return true;
125125
} else if (row < 63) {
126-
if (pad >= geo.NPads(row)) {
127-
return true;
128-
} else {
129-
return false;
130-
}
126+
return (pad >= static_cast<int>(geo.NPads(row)))
131127
} else if (row < (63 + global_shift)) { // to account for the gap between IROC and OROC. Charge will be set to -1 in order to signal boundary to the neural network
132128
return true;
133129
} else if (row <= o2::tpc::constants::MAXGLOBALPADROW - 1 + global_shift) {
134-
if (pad >= geo.NPads(row - global_shift)) {
135-
return true;
136-
} else {
137-
return false;
138-
}
130+
return (pad >= static_cast<int>(geo.NPads(row - global_shift)));
139131
} else {
140132
return true;
141133
}
@@ -148,40 +140,35 @@ GPUd() void GPUTPCNNClusterizer::fillInputData(int32_t nBlocks, int32_t nThreads
148140
Array2D<PackedCharge> chargeMap(reinterpret_cast<PackedCharge*>(clusterer.mPchargeMap));
149141

150142
uint glo_idx = get_global_id(0);
151-
// SHouldn't be needed
152-
// if (glo_idx + batchStart >= clusterer.mPmemory->counters.nClusters)
153-
// {
154-
// return;
155-
// }
156143

157144
uint write_idx = glo_idx * clusterer.nnClusterizerElementSize; // For optimization: Either choose nnClusterizerBatchedMode as a power of 2 or calculate from threadId and blockId
158145

159146
ChargePos peak = clusterer.mPfilteredPeakPositions[glo_idx + batchStart];
160-
int row = peak.row(), pad = peak.pad(), time = peak.time();
161-
float central_charge = chargeMap[peak].unpack();
147+
int row = static_cast<int>(peak.row()), pad = static_cast<int>(peak.pad()), time = static_cast<int>(peak.time());
148+
float central_charge = static_cast<float>(chargeMap[peak].unpack());
162149

163150
clusterer.peakPositions[glo_idx] = peak;
164151
clusterer.centralCharges[glo_idx] = central_charge;
165152

166153
int row_offset = GPUTPCNNClusterizer::rowOffset(row, clusterer.nnClusterizerSizeInputRow);
167154
for (int r = -clusterer.nnClusterizerSizeInputRow; r <= clusterer.nnClusterizerSizeInputRow; r++) {
168-
bool is_boundary = ((row + r) > (o2::tpc::constants::MAXGLOBALPADROW - 1)) || ((row + r) < 0);
169-
int pad_offset = is_boundary ? 0 : GPUTPCNNClusterizer::padOffset(row, row + r, clusterer.Param().tpcGeometry);
155+
bool is_row_boundary = ((row + r) > (o2::tpc::constants::MAXGLOBALPADROW - 1)) || ((row + r) < 0);
156+
int pad_offset = is_row_boundary ? 0 : GPUTPCNNClusterizer::padOffset(row, row + r, clusterer.Param().tpcGeometry);
170157
for (int p = -clusterer.nnClusterizerSizeInputPad + pad_offset; p <= clusterer.nnClusterizerSizeInputPad + pad_offset; p++) {
171-
is_boundary = is_boundary || GPUTPCNNClusterizer::isBoundary(row + r + row_offset, pad + p, clusterer.nnClusterizerSizeInputRow, clusterer.Param().tpcGeometry);
158+
bool is_boundary = is_row_boundary || GPUTPCNNClusterizer::isBoundary(row + r + row_offset, pad + p, clusterer.nnClusterizerSizeInputRow, clusterer.Param().tpcGeometry);
172159
for (int t = -clusterer.nnClusterizerSizeInputTime; t <= clusterer.nnClusterizerSizeInputTime; t++) {
173-
if (is_boundary) {
160+
if (!is_boundary) {
161+
ChargePos tmp_pos(row + r, pad + p, time + t);
174162
if(dtype == 0){
175-
clusterer.inputData16[write_idx] = (OrtDataType::Float16_t)((float)clusterer.nnClusterizerBoundaryFillValue);
163+
clusterer.inputData16[write_idx] = (OrtDataType::Float16_t)(static_cast<float>(chargeMap[tmp_pos].unpack()) / central_charge);
176164
} else {
177-
clusterer.inputData32[write_idx] = (float)clusterer.nnClusterizerBoundaryFillValue;
165+
clusterer.inputData32[write_idx] = static_cast<float>(chargeMap[tmp_pos].unpack()) / central_charge;
178166
}
179167
} else {
180-
ChargePos tmp_pos(row + r, pad + p, time + t);
181168
if(dtype == 0){
182-
clusterer.inputData16[write_idx] = (OrtDataType::Float16_t)((float)chargeMap[tmp_pos].unpack() / central_charge);
169+
clusterer.inputData16[write_idx] = (OrtDataType::Float16_t)(static_cast<float>(clusterer.nnClusterizerBoundaryFillValue));
183170
} else {
184-
clusterer.inputData32[write_idx] = (float)chargeMap[tmp_pos].unpack() / central_charge;
171+
clusterer.inputData32[write_idx] = static_cast<float>(clusterer.nnClusterizerBoundaryFillValue);
185172
}
186173
}
187174
write_idx++;
@@ -192,11 +179,11 @@ GPUd() void GPUTPCNNClusterizer::fillInputData(int32_t nBlocks, int32_t nThreads
192179
if(dtype == 0){
193180
clusterer.inputData16[write_idx] = (OrtDataType::Float16_t)(clusterer.mISlice / 36.f);
194181
clusterer.inputData16[write_idx + 1] = (OrtDataType::Float16_t)(row / 152.f);
195-
clusterer.inputData16[write_idx + 2] = (OrtDataType::Float16_t)((float)pad / clusterer.Param().tpcGeometry.NPads(row));
182+
clusterer.inputData16[write_idx + 2] = (OrtDataType::Float16_t)(static_cast<float>(pad) / clusterer.Param().tpcGeometry.NPads(row));
196183
} else {
197184
clusterer.inputData32[write_idx] = clusterer.mISlice / 36.f;
198185
clusterer.inputData32[write_idx + 1] = row / 152.f;
199-
clusterer.inputData32[write_idx + 2] = (float)pad / clusterer.Param().tpcGeometry.NPads(row);
186+
clusterer.inputData32[write_idx + 2] = static_cast<float>(pad) / clusterer.Param().tpcGeometry.NPads(row);
200187
}
201188
}
202189
}
@@ -238,7 +225,7 @@ GPUd() void GPUTPCNNClusterizer::publishClustersReg1(uint glo_idx, GPUSharedMemo
238225
return;
239226
}
240227

241-
pc.setFull(clusterer.centralCharges[glo_idx] * clusterer.outputDataReg1[model_output_index + 4], clusterer.peakPositions[glo_idx].pad() + clusterer.outputDataReg1[model_output_index], clusterer.outputDataReg1[model_output_index + 2], (clusterer.mPmemory->fragment).start + clusterer.peakPositions[glo_idx].time() + clusterer.outputDataReg1[model_output_index + 1], clusterer.outputDataReg1[model_output_index + 3], 0, 0);
228+
pc.setFull(clusterer.centralCharges[glo_idx] * clusterer.outputDataReg1[model_output_index + 4], static_cast<float>clusterer.peakPositions[glo_idx].pad() + clusterer.outputDataReg1[model_output_index], clusterer.outputDataReg1[model_output_index + 2], static_cast<float>(clusterer.mPmemory->fragment).start + static_cast<float>clusterer.peakPositions[glo_idx].time() + clusterer.outputDataReg1[model_output_index + 1], clusterer.outputDataReg1[model_output_index + 3], 0, 0);
242229

243230
tpc::ClusterNative myCluster;
244231
bool rejectCluster = !pc.toNative(clusterer.peakPositions[glo_idx], clusterer.centralCharges[glo_idx], myCluster, clusterer.Param());

0 commit comments

Comments
 (0)