@@ -36,237 +36,4 @@ GPUdii() void GPUTPCCFClusterizer::Thread<0>(int32_t nBlocks, int32_t nThreads,
3636 tpc::ClusterNative* clusterOut = (onlyMC) ? nullptr : clusterer.mPclusterByRow ;
3737
3838 GPUTPCCFClusterizer::computeClustersImpl (get_num_groups (0 ), get_local_size (0 ), get_group_id (0 ), get_local_id (0 ), clusterer, clusterer.mPmemory ->fragment , smem, chargeMap, clusterer.mPfilteredPeakPositions , clusterer.Param ().rec , CPU_PTR (&labelAcc), clusterer.mPmemory ->counters .nClusters , clusterer.mNMaxClusterPerRow , clusterer.mPclusterInRow , clusterOut, clusterer.mPclusterPosInRow );
39- <<<<<<< HEAD
4039}
41- =======
42- }
43-
44- GPUdii () void GPUTPCCFClusterizer::computeClustersImpl(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread,
45- processorType& clusterer,
46- const CfFragment& fragment,
47- GPUSharedMemory& smem,
48- const Array2D<PackedCharge>& chargeMap,
49- const ChargePos* filteredPeakPositions,
50- const GPUSettingsRec& calib,
51- MCLabelAccumulator* labelAcc,
52- uint32_t clusternum,
53- uint32_t maxClusterPerRow,
54- uint32_t * clusterInRow,
55- tpc::ClusterNative* clusterByRow,
56- uint32_t * clusterPosInRow)
57- {
58- uint32_t idx = get_global_id (0 );
59-
60- // For certain configurations dummy work items are added, so the total
61- // number of work items is dividable by 64.
62- // These dummy items also compute the last cluster but discard the result.
63- ChargePos pos = filteredPeakPositions[CAMath::Min (idx, clusternum - 1 )];
64- Charge charge = chargeMap[pos].unpack ();
65-
66- ClusterAccumulator pc;
67- CPU_ONLY (labelAcc->collect (pos, charge));
68-
69- buildCluster (
70- calib,
71- chargeMap,
72- pos,
73- smem.posBcast ,
74- smem.buf ,
75- smem.innerAboveThreshold ,
76- &pc,
77- labelAcc);
78-
79- if (idx >= clusternum) {
80- return ;
81- }
82- if (fragment.isOverlap (pos.time ())) {
83- if (clusterPosInRow) {
84- clusterPosInRow[idx] = maxClusterPerRow;
85- }
86- return ;
87- }
88- tpc::ClusterNative myCluster;
89- bool rejectCluster = !pc.toNative (pos, charge, myCluster, clusterer.Param (), fragment.start , chargeMap);
90-
91- if (rejectCluster) {
92- if (clusterPosInRow) {
93- clusterPosInRow[idx] = maxClusterPerRow;
94- }
95- return ;
96- }
97-
98- uint32_t rowIndex = 0 ;
99- if (clusterByRow != nullptr ) {
100- rowIndex = sortIntoBuckets (
101- clusterer,
102- myCluster,
103- pos.row (),
104- maxClusterPerRow,
105- clusterInRow,
106- clusterByRow);
107- if (clusterPosInRow != nullptr ) {
108- clusterPosInRow[idx] = rowIndex;
109- }
110- } else if (clusterPosInRow) {
111- rowIndex = clusterPosInRow[idx];
112- }
113-
114- CPU_ONLY (labelAcc->commit (pos.row (), rowIndex, maxClusterPerRow));
115- }
116-
117- GPUdii () void GPUTPCCFClusterizer::updateClusterInner(
118- const GPUSettingsRec& calib,
119- uint16_t lid,
120- uint16_t N,
121- const PackedCharge* buf,
122- const ChargePos& pos,
123- ClusterAccumulator* cluster,
124- MCLabelAccumulator* labelAcc,
125- uint8_t * innerAboveThreshold)
126- {
127- uint8_t aboveThreshold = 0 ;
128-
129- GPUCA_UNROLL (U (), U ())
130- for (uint16_t i = 0 ; i < N; i++) {
131- Delta2 d = cfconsts::InnerNeighbors[i];
132-
133- PackedCharge p = buf[N * lid + i];
134-
135- Charge q = cluster->updateInner (p, d);
136-
137- CPU_ONLY (labelAcc->collect (pos.delta (d), q));
138-
139- aboveThreshold |= (uint8_t (q > calib.tpc .cfInnerThreshold ) << i);
140- }
141-
142- innerAboveThreshold[lid] = aboveThreshold;
143-
144- GPUbarrier ();
145- }
146-
147- GPUdii () void GPUTPCCFClusterizer::updateClusterOuter(
148- uint16_t lid,
149- uint16_t N,
150- uint16_t M,
151- uint16_t offset,
152- const PackedCharge* buf,
153- const ChargePos& pos,
154- ClusterAccumulator* cluster,
155- MCLabelAccumulator* labelAcc)
156- {
157- GPUCA_UNROLL (U (), U ())
158- for (uint16_t i = offset; i < M + offset; i++) {
159- PackedCharge p = buf[N * lid + i];
160-
161- Delta2 d = cfconsts::OuterNeighbors[i];
162-
163- Charge q = cluster->updateOuter (p, d);
164- static_cast <void >(q); // Avoid unused varible warning on GPU.
165-
166- CPU_ONLY (labelAcc->collect (pos.delta (d), q));
167- }
168- }
169-
170- GPUdii () void GPUTPCCFClusterizer::buildCluster(
171- const GPUSettingsRec& calib,
172- const Array2D<PackedCharge>& chargeMap,
173- ChargePos pos,
174- ChargePos* posBcast,
175- PackedCharge* buf,
176- uint8_t * innerAboveThreshold,
177- ClusterAccumulator* myCluster,
178- MCLabelAccumulator* labelAcc)
179- {
180- uint16_t ll = get_local_id (0 );
181-
182- posBcast[ll] = pos;
183- GPUbarrier ();
184-
185- CfUtils::blockLoad<PackedCharge>(
186- chargeMap,
187- SCRATCH_PAD_WORK_GROUP_SIZE,
188- SCRATCH_PAD_WORK_GROUP_SIZE,
189- ll,
190- 0 ,
191- 8 ,
192- cfconsts::InnerNeighbors,
193- posBcast,
194- buf);
195- updateClusterInner (
196- calib,
197- ll,
198- 8 ,
199- buf,
200- pos,
201- myCluster,
202- labelAcc,
203- innerAboveThreshold);
204-
205- uint16_t wgSizeHalf = (SCRATCH_PAD_WORK_GROUP_SIZE + 1 ) / 2 ;
206-
207- bool inGroup1 = ll < wgSizeHalf;
208-
209- uint16_t llhalf = (inGroup1) ? ll : (ll - wgSizeHalf);
210-
211- CfUtils::condBlockLoad (
212- chargeMap,
213- wgSizeHalf,
214- SCRATCH_PAD_WORK_GROUP_SIZE,
215- ll,
216- 0 ,
217- 16 ,
218- cfconsts::OuterNeighbors,
219- posBcast,
220- innerAboveThreshold,
221- buf);
222-
223- if (inGroup1) {
224- updateClusterOuter (
225- llhalf,
226- 16 ,
227- 16 ,
228- 0 ,
229- buf,
230- pos,
231- myCluster,
232- labelAcc);
233- }
234-
235- #if defined(GPUCA_GPUCODE)
236- CfUtils::condBlockLoad (
237- chargeMap,
238- wgSizeHalf,
239- SCRATCH_PAD_WORK_GROUP_SIZE,
240- ll,
241- 0 ,
242- 16 ,
243- cfconsts::OuterNeighbors,
244- posBcast + wgSizeHalf,
245- innerAboveThreshold + wgSizeHalf,
246- buf);
247- if (!inGroup1) {
248- updateClusterOuter (
249- llhalf,
250- 16 ,
251- 16 ,
252- 0 ,
253- buf,
254- pos,
255- myCluster,
256- labelAcc);
257- }
258- #endif
259- }
260-
261- GPUd () uint32_t GPUTPCCFClusterizer::sortIntoBuckets(processorType& clusterer, const tpc::ClusterNative& cluster, uint32_t row, uint32_t maxElemsPerBucket, uint32_t * elemsInBucket, tpc::ClusterNative* buckets)
262- {
263- uint32_t index = CAMath::AtomicAdd (&elemsInBucket[row], 1u );
264- if (index < maxElemsPerBucket) {
265- buckets[maxElemsPerBucket * row + index] = cluster;
266- } else {
267- clusterer.raiseError (GPUErrors::ERROR_CF_ROW_CLUSTER_OVERFLOW, clusterer.mISector * 1000 + row, index, maxElemsPerBucket);
268- CAMath::AtomicExch (&elemsInBucket[row], maxElemsPerBucket);
269- }
270- return index;
271- }
272- >>>>>>> dev
0 commit comments