Skip to content

Commit 3377435

Browse files
committed
Adjusting CMakeLIsts and other bugs
1 parent ad9696e commit 3377435

File tree

4 files changed

+3
-236
lines changed

4 files changed

+3
-236
lines changed

GPU/GPUTracking/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -324,7 +324,7 @@ if(ALIGPU_BUILD_TYPE STREQUAL "O2")
324324
${targetName}
325325
PRIVATE $<TARGET_PROPERTY:O2::Framework,INTERFACE_INCLUDE_DIRECTORIES>)
326326

327-
target_compile_definitions(${targetName} PRIVATE GPUCA_O2_LIB GPUCA_TPC_GEOMETRY_O2)
327+
target_compile_definitions(${targetName} PRIVATE GPUCA_O2_LIB GPUCA_TPC_GEOMETRY_O2 GPUCA_HAS_ONNX=1)
328328

329329
o2_target_root_dictionary(${MODULE}
330330
HEADERS ${HDRS_CINT_O2} ${HDRS_CINT_O2_ADDITIONAL}

GPU/GPUTracking/TPCClusterFinder/GPUTPCCFClusterizer.cxx

Lines changed: 0 additions & 233 deletions
Original file line numberDiff line numberDiff line change
@@ -36,237 +36,4 @@ GPUdii() void GPUTPCCFClusterizer::Thread<0>(int32_t nBlocks, int32_t nThreads,
3636
tpc::ClusterNative* clusterOut = (onlyMC) ? nullptr : clusterer.mPclusterByRow;
3737

3838
GPUTPCCFClusterizer::computeClustersImpl(get_num_groups(0), get_local_size(0), get_group_id(0), get_local_id(0), clusterer, clusterer.mPmemory->fragment, smem, chargeMap, clusterer.mPfilteredPeakPositions, clusterer.Param().rec, CPU_PTR(&labelAcc), clusterer.mPmemory->counters.nClusters, clusterer.mNMaxClusterPerRow, clusterer.mPclusterInRow, clusterOut, clusterer.mPclusterPosInRow);
39-
<<<<<<< HEAD
4039
}
41-
=======
42-
}
43-
44-
GPUdii() void GPUTPCCFClusterizer::computeClustersImpl(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread,
45-
processorType& clusterer,
46-
const CfFragment& fragment,
47-
GPUSharedMemory& smem,
48-
const Array2D<PackedCharge>& chargeMap,
49-
const ChargePos* filteredPeakPositions,
50-
const GPUSettingsRec& calib,
51-
MCLabelAccumulator* labelAcc,
52-
uint32_t clusternum,
53-
uint32_t maxClusterPerRow,
54-
uint32_t* clusterInRow,
55-
tpc::ClusterNative* clusterByRow,
56-
uint32_t* clusterPosInRow)
57-
{
58-
uint32_t idx = get_global_id(0);
59-
60-
// For certain configurations dummy work items are added, so the total
61-
// number of work items is dividable by 64.
62-
// These dummy items also compute the last cluster but discard the result.
63-
ChargePos pos = filteredPeakPositions[CAMath::Min(idx, clusternum - 1)];
64-
Charge charge = chargeMap[pos].unpack();
65-
66-
ClusterAccumulator pc;
67-
CPU_ONLY(labelAcc->collect(pos, charge));
68-
69-
buildCluster(
70-
calib,
71-
chargeMap,
72-
pos,
73-
smem.posBcast,
74-
smem.buf,
75-
smem.innerAboveThreshold,
76-
&pc,
77-
labelAcc);
78-
79-
if (idx >= clusternum) {
80-
return;
81-
}
82-
if (fragment.isOverlap(pos.time())) {
83-
if (clusterPosInRow) {
84-
clusterPosInRow[idx] = maxClusterPerRow;
85-
}
86-
return;
87-
}
88-
tpc::ClusterNative myCluster;
89-
bool rejectCluster = !pc.toNative(pos, charge, myCluster, clusterer.Param(), fragment.start, chargeMap);
90-
91-
if (rejectCluster) {
92-
if (clusterPosInRow) {
93-
clusterPosInRow[idx] = maxClusterPerRow;
94-
}
95-
return;
96-
}
97-
98-
uint32_t rowIndex = 0;
99-
if (clusterByRow != nullptr) {
100-
rowIndex = sortIntoBuckets(
101-
clusterer,
102-
myCluster,
103-
pos.row(),
104-
maxClusterPerRow,
105-
clusterInRow,
106-
clusterByRow);
107-
if (clusterPosInRow != nullptr) {
108-
clusterPosInRow[idx] = rowIndex;
109-
}
110-
} else if (clusterPosInRow) {
111-
rowIndex = clusterPosInRow[idx];
112-
}
113-
114-
CPU_ONLY(labelAcc->commit(pos.row(), rowIndex, maxClusterPerRow));
115-
}
116-
117-
GPUdii() void GPUTPCCFClusterizer::updateClusterInner(
118-
const GPUSettingsRec& calib,
119-
uint16_t lid,
120-
uint16_t N,
121-
const PackedCharge* buf,
122-
const ChargePos& pos,
123-
ClusterAccumulator* cluster,
124-
MCLabelAccumulator* labelAcc,
125-
uint8_t* innerAboveThreshold)
126-
{
127-
uint8_t aboveThreshold = 0;
128-
129-
GPUCA_UNROLL(U(), U())
130-
for (uint16_t i = 0; i < N; i++) {
131-
Delta2 d = cfconsts::InnerNeighbors[i];
132-
133-
PackedCharge p = buf[N * lid + i];
134-
135-
Charge q = cluster->updateInner(p, d);
136-
137-
CPU_ONLY(labelAcc->collect(pos.delta(d), q));
138-
139-
aboveThreshold |= (uint8_t(q > calib.tpc.cfInnerThreshold) << i);
140-
}
141-
142-
innerAboveThreshold[lid] = aboveThreshold;
143-
144-
GPUbarrier();
145-
}
146-
147-
GPUdii() void GPUTPCCFClusterizer::updateClusterOuter(
148-
uint16_t lid,
149-
uint16_t N,
150-
uint16_t M,
151-
uint16_t offset,
152-
const PackedCharge* buf,
153-
const ChargePos& pos,
154-
ClusterAccumulator* cluster,
155-
MCLabelAccumulator* labelAcc)
156-
{
157-
GPUCA_UNROLL(U(), U())
158-
for (uint16_t i = offset; i < M + offset; i++) {
159-
PackedCharge p = buf[N * lid + i];
160-
161-
Delta2 d = cfconsts::OuterNeighbors[i];
162-
163-
Charge q = cluster->updateOuter(p, d);
164-
static_cast<void>(q); // Avoid unused varible warning on GPU.
165-
166-
CPU_ONLY(labelAcc->collect(pos.delta(d), q));
167-
}
168-
}
169-
170-
GPUdii() void GPUTPCCFClusterizer::buildCluster(
171-
const GPUSettingsRec& calib,
172-
const Array2D<PackedCharge>& chargeMap,
173-
ChargePos pos,
174-
ChargePos* posBcast,
175-
PackedCharge* buf,
176-
uint8_t* innerAboveThreshold,
177-
ClusterAccumulator* myCluster,
178-
MCLabelAccumulator* labelAcc)
179-
{
180-
uint16_t ll = get_local_id(0);
181-
182-
posBcast[ll] = pos;
183-
GPUbarrier();
184-
185-
CfUtils::blockLoad<PackedCharge>(
186-
chargeMap,
187-
SCRATCH_PAD_WORK_GROUP_SIZE,
188-
SCRATCH_PAD_WORK_GROUP_SIZE,
189-
ll,
190-
0,
191-
8,
192-
cfconsts::InnerNeighbors,
193-
posBcast,
194-
buf);
195-
updateClusterInner(
196-
calib,
197-
ll,
198-
8,
199-
buf,
200-
pos,
201-
myCluster,
202-
labelAcc,
203-
innerAboveThreshold);
204-
205-
uint16_t wgSizeHalf = (SCRATCH_PAD_WORK_GROUP_SIZE + 1) / 2;
206-
207-
bool inGroup1 = ll < wgSizeHalf;
208-
209-
uint16_t llhalf = (inGroup1) ? ll : (ll - wgSizeHalf);
210-
211-
CfUtils::condBlockLoad(
212-
chargeMap,
213-
wgSizeHalf,
214-
SCRATCH_PAD_WORK_GROUP_SIZE,
215-
ll,
216-
0,
217-
16,
218-
cfconsts::OuterNeighbors,
219-
posBcast,
220-
innerAboveThreshold,
221-
buf);
222-
223-
if (inGroup1) {
224-
updateClusterOuter(
225-
llhalf,
226-
16,
227-
16,
228-
0,
229-
buf,
230-
pos,
231-
myCluster,
232-
labelAcc);
233-
}
234-
235-
#if defined(GPUCA_GPUCODE)
236-
CfUtils::condBlockLoad(
237-
chargeMap,
238-
wgSizeHalf,
239-
SCRATCH_PAD_WORK_GROUP_SIZE,
240-
ll,
241-
0,
242-
16,
243-
cfconsts::OuterNeighbors,
244-
posBcast + wgSizeHalf,
245-
innerAboveThreshold + wgSizeHalf,
246-
buf);
247-
if (!inGroup1) {
248-
updateClusterOuter(
249-
llhalf,
250-
16,
251-
16,
252-
0,
253-
buf,
254-
pos,
255-
myCluster,
256-
labelAcc);
257-
}
258-
#endif
259-
}
260-
261-
GPUd() uint32_t GPUTPCCFClusterizer::sortIntoBuckets(processorType& clusterer, const tpc::ClusterNative& cluster, uint32_t row, uint32_t maxElemsPerBucket, uint32_t* elemsInBucket, tpc::ClusterNative* buckets)
262-
{
263-
uint32_t index = CAMath::AtomicAdd(&elemsInBucket[row], 1u);
264-
if (index < maxElemsPerBucket) {
265-
buckets[maxElemsPerBucket * row + index] = cluster;
266-
} else {
267-
clusterer.raiseError(GPUErrors::ERROR_CF_ROW_CLUSTER_OVERFLOW, clusterer.mISector * 1000 + row, index, maxElemsPerBucket);
268-
CAMath::AtomicExch(&elemsInBucket[row], maxElemsPerBucket);
269-
}
270-
return index;
271-
}
272-
>>>>>>> dev

GPU/GPUTracking/TPCClusterFinder/GPUTPCCFClusterizer.inc

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,6 @@ GPUdii() void GPUTPCCFClusterizer::computeClustersImpl(int32_t nBlocks, int32_t
5959
}
6060
return;
6161
}
62-
6362
tpc::ClusterNative myCluster;
6463
bool rejectCluster = !pc.toNative(pos, charge, myCluster, clusterer.Param(), fragment.start, chargeMap);
6564

GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.cxx

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,8 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread<GPUTPCNNClusterizerKernels::run
3838
{
3939
uint glo_idx = get_global_id(0);
4040
auto& clusterer = processors.tpcClusterer[sector];
41-
if (processors.tpcNNClusterer[sector].outputDataClass[glo_idx] == 0) { // default clusterizer should not be called in batched mode due to mess-up with thread indices
41+
auto& clustererNN = processors.tpcNNClusterer[sector];
42+
if (clustererNN.outputDataClass[glo_idx] == 0) { // default clusterizer should not be called in batched mode due to mess-up with thread indices
4243
return;
4344
}
4445
Array2D<PackedCharge> chargeMap(reinterpret_cast<PackedCharge*>(clusterer.mPchargeMap));

0 commit comments

Comments
 (0)