Skip to content

Commit 41d80d2

Browse files
committed
Merge dev and fixes
2 parents 6a7b17c + 705ebfb commit 41d80d2

File tree

8 files changed

+50
-158
lines changed

8 files changed

+50
-158
lines changed

GPU/GPUTracking/Base/GPUReconstruction.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -200,7 +200,7 @@ class GPUReconstruction
200200
void SetOutputControl(void* ptr, size_t size);
201201
void SetInputControl(void* ptr, size_t size);
202202
GPUOutputControl& OutputControl() { return mOutputControl; }
203-
int32_t NStreams() const { return mNStreams; }
203+
uint32_t NStreams() const { return mNStreams; }
204204
const void* DeviceMemoryBase() const { return mDeviceMemoryBase; }
205205

206206
RecoStepField GetRecoSteps() const { return mRecoSteps.steps; }

GPU/GPUTracking/Global/GPUChainTracking.cxx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -266,7 +266,7 @@ bool GPUChainTracking::ValidateSettings()
266266
GPUError("configured max time bin exceeds 256 orbits");
267267
return false;
268268
}
269-
if ((GetRecoStepsGPU() & RecoStep::TPCClusterFinding) && std::max(GetProcessingSettings().nTPCClustererLanes + 1, GetProcessingSettings().nTPCClustererLanes * 2) + (GetProcessingSettings().doublePipeline ? 1 : 0) > mRec->NStreams()) {
269+
if ((GetRecoStepsGPU() & RecoStep::TPCClusterFinding) && std::max(GetProcessingSettings().nTPCClustererLanes + 1, GetProcessingSettings().nTPCClustererLanes * 2) + (GetProcessingSettings().doublePipeline ? 1 : 0) > (int32_t)mRec->NStreams()) {
270270
GPUError("NStreams (%d) must be > nTPCClustererLanes (%d)", mRec->NStreams(), (int32_t)GetProcessingSettings().nTPCClustererLanes);
271271
return false;
272272
}

GPU/GPUTracking/Global/GPUChainTracking.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -220,7 +220,7 @@ class GPUChainTracking : public GPUChain
220220

221221
GPUChainTracking(GPUReconstruction* rec, uint32_t maxTPCHits = GPUCA_MAX_CLUSTERS, uint32_t maxTRDTracklets = GPUCA_MAX_TRD_TRACKLETS);
222222

223-
int32_t ExtrapolationTracking(uint32_t iSector, int32_t threadId, bool synchronizeOutput = true);
223+
int32_t ExtrapolationTracking(uint32_t iSector, bool blocking);
224224

225225
int32_t PrepareProfile();
226226
int32_t DoProfile();
@@ -278,7 +278,6 @@ class GPUChainTracking : public GPUChain
278278

279279
// Synchronization and Locks
280280
eventStruct* mEvents = nullptr;
281-
volatile int32_t mSectorSelectorReady = 0;
282281
std::array<int8_t, NSECTORS> mExtrapolationTrackingDone;
283282

284283
std::vector<outputQueueEntry> mOutputQueue;
@@ -299,6 +298,7 @@ class GPUChainTracking : public GPUChain
299298
void RunTPCTrackingMerger_Resolve(int8_t useOrigTrackParam, int8_t mergeAll, GPUReconstruction::krnlDeviceType deviceType);
300299
void RunTPCClusterFilter(o2::tpc::ClusterNativeAccess* clusters, std::function<o2::tpc::ClusterNative*(size_t)> allocator, bool applyClusterCuts);
301300
bool NeedTPCClustersOnGPU();
301+
uint32_t StreamForSector(uint32_t sector) const;
302302

303303
std::mutex mMutexUpdateCalib;
304304
std::unique_ptr<GPUChainTrackingFinalContext> mPipelineFinalizationCtx;

GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx

Lines changed: 29 additions & 87 deletions
Original file line numberDiff line numberDiff line change
@@ -24,12 +24,18 @@
2424

2525
using namespace o2::gpu;
2626

27-
int32_t GPUChainTracking::ExtrapolationTracking(uint32_t iSector, int32_t threadId, bool synchronizeOutput)
27+
uint32_t GPUChainTracking::StreamForSector(uint32_t sector) const
2828
{
29-
runKernel<GPUTPCExtrapolationTracking>({GetGridBlk(256, iSector % mRec->NStreams()), {iSector}});
30-
TransferMemoryResourceLinkToHost(RecoStep::TPCSectorTracking, processors()->tpcTrackers[iSector].MemoryResCommon(), iSector % mRec->NStreams());
31-
if (synchronizeOutput) {
32-
SynchronizeStream(iSector % mRec->NStreams());
29+
return sector % mRec->NStreams();
30+
}
31+
32+
int32_t GPUChainTracking::ExtrapolationTracking(uint32_t iSector, bool blocking)
33+
{
34+
const uint32_t stream = StreamForSector(iSector);
35+
runKernel<GPUTPCExtrapolationTracking>({GetGridBlk(256, stream), {iSector}});
36+
TransferMemoryResourceLinkToHost(RecoStep::TPCSectorTracking, processors()->tpcTrackers[iSector].MemoryResCommon(), stream);
37+
if (blocking) {
38+
SynchronizeStream(stream);
3339
}
3440
return (0);
3541
}
@@ -153,7 +159,7 @@ int32_t GPUChainTracking::RunTPCTrackingSectors_internal()
153159
mRec->runParallelOuterLoop(doGPU, NSECTORS, [&](uint32_t iSector) {
154160
GPUTPCTracker& trk = processors()->tpcTrackers[iSector];
155161
GPUTPCTracker& trkShadow = doGPU ? processorsShadow()->tpcTrackers[iSector] : trk;
156-
int32_t useStream = (iSector % mRec->NStreams());
162+
int32_t useStream = StreamForSector(iSector);
157163

158164
if (GetProcessingSettings().debugLevel >= 3) {
159165
GPUInfo("Creating Sector Data (Sector %d)", iSector);
@@ -234,102 +240,38 @@ int32_t GPUChainTracking::RunTPCTrackingSectors_internal()
234240
}
235241

236242
if (doGPU || GetProcessingSettings().debugLevel >= 1) {
237-
if (doGPU) {
238-
ReleaseEvent(mEvents->init);
239-
}
240-
241-
mSectorSelectorReady = 0;
242-
243-
std::array<bool, NSECTORS> transferRunning;
244-
transferRunning.fill(true);
245-
if (doGPU && !(GetRecoStepsGPU() & RecoStep::TPCMerging)) { // TODO: This seems pretty obsolete code path, can probably be removed.
246-
if (param().rec.tpc.extrapolationTracking) {
247-
mExtrapolationTrackingDone.fill(0);
248-
}
249-
250-
uint32_t tmpSector = 0;
251-
for (uint32_t iSector = 0; iSector < NSECTORS; iSector++) {
252-
if (GetProcessingSettings().debugLevel >= 3) {
253-
GPUInfo("Transfering Tracks from GPU to Host");
254-
}
255-
256-
if (tmpSector == iSector) {
257-
SynchronizeEvents(&mEvents->sector[iSector]);
258-
}
259-
while (tmpSector < NSECTORS && (tmpSector == iSector || IsEventDone(&mEvents->sector[tmpSector]))) {
260-
ReleaseEvent(mEvents->sector[tmpSector]);
261-
if (*processors()->tpcTrackers[tmpSector].NTracks() > 0) {
262-
TransferMemoryResourceLinkToHost(RecoStep::TPCSectorTracking, processors()->tpcTrackers[tmpSector].MemoryResOutput(), streamMap[tmpSector], &mEvents->sector[tmpSector]);
263-
} else {
264-
transferRunning[tmpSector] = false;
265-
}
266-
tmpSector++;
267-
}
268-
269-
if (GetProcessingSettings().keepAllMemory) {
270-
TransferMemoryResourcesToHost(RecoStep::TPCSectorTracking, &processors()->tpcTrackers[iSector], -1, true);
271-
}
272-
273-
if (transferRunning[iSector]) {
274-
SynchronizeEvents(&mEvents->sector[iSector]);
275-
}
276-
if (GetProcessingSettings().debugLevel >= 3) {
277-
GPUInfo("Tracks Transfered: %d / %d", *processors()->tpcTrackers[iSector].NTracks(), *processors()->tpcTrackers[iSector].NTrackHits());
278-
}
279-
280-
if (GetProcessingSettings().debugLevel >= 3) {
281-
GPUInfo("Data ready for sector %d", iSector);
282-
}
283-
mSectorSelectorReady = iSector;
284-
285-
if (param().rec.tpc.extrapolationTracking) {
286-
for (uint32_t tmpSector2a = 0; tmpSector2a <= iSector; tmpSector2a++) {
287-
uint32_t tmpSector2 = GPUTPCExtrapolationTracking::ExtrapolationTrackingSectorOrder(tmpSector2a);
288-
uint32_t sectorLeft, sectorRight;
289-
GPUTPCExtrapolationTracking::ExtrapolationTrackingSectorLeftRight(tmpSector2, sectorLeft, sectorRight);
290-
291-
if (tmpSector2 <= iSector && sectorLeft <= iSector && sectorRight <= iSector && mExtrapolationTrackingDone[tmpSector2] == 0) {
292-
ExtrapolationTracking(tmpSector2, 0);
293-
mExtrapolationTrackingDone[tmpSector2] = 1;
294-
}
295-
}
296-
}
297-
}
298-
}
299243
if (param().rec.tpc.extrapolationTracking) {
300244
std::vector<bool> blocking(NSECTORS * mRec->NStreams());
301-
for (int32_t i = 0; i < NSECTORS; i++) {
302-
for (int32_t j = 0; j < mRec->NStreams(); j++) {
303-
blocking[i * mRec->NStreams() + j] = i % mRec->NStreams() == j;
245+
for (uint32_t iSector = 0; iSector < NSECTORS; iSector++) {
246+
for (uint32_t iStream = 0; iStream < mRec->NStreams(); iStream++) {
247+
blocking[iSector * mRec->NStreams() + iStream] = StreamForSector(iSector) == iStream;
304248
}
305249
}
306250
for (uint32_t iSector = 0; iSector < NSECTORS; iSector++) {
307251
uint32_t tmpSector = GPUTPCExtrapolationTracking::ExtrapolationTrackingSectorOrder(iSector);
308-
if (!(doGPU && !(GetRecoStepsGPU() & RecoStep::TPCMerging))) {
309-
uint32_t sectorLeft, sectorRight;
310-
GPUTPCExtrapolationTracking::ExtrapolationTrackingSectorLeftRight(tmpSector, sectorLeft, sectorRight);
311-
if (doGPU && !blocking[tmpSector * mRec->NStreams() + sectorLeft % mRec->NStreams()]) {
312-
StreamWaitForEvents(tmpSector % mRec->NStreams(), &mEvents->sector[sectorLeft]);
313-
blocking[tmpSector * mRec->NStreams() + sectorLeft % mRec->NStreams()] = true;
314-
}
315-
if (doGPU && !blocking[tmpSector * mRec->NStreams() + sectorRight % mRec->NStreams()]) {
316-
StreamWaitForEvents(tmpSector % mRec->NStreams(), &mEvents->sector[sectorRight]);
317-
blocking[tmpSector * mRec->NStreams() + sectorRight % mRec->NStreams()] = true;
318-
}
252+
uint32_t sectorLeft, sectorRight;
253+
GPUTPCExtrapolationTracking::ExtrapolationTrackingSectorLeftRight(tmpSector, sectorLeft, sectorRight);
254+
if (doGPU && !blocking[tmpSector * mRec->NStreams() + StreamForSector(sectorLeft)]) {
255+
StreamWaitForEvents(StreamForSector(tmpSector), &mEvents->sector[sectorLeft]);
256+
blocking[tmpSector * mRec->NStreams() + StreamForSector(sectorLeft)] = true;
257+
}
258+
if (doGPU && !blocking[tmpSector * mRec->NStreams() + StreamForSector(sectorRight)]) {
259+
StreamWaitForEvents(StreamForSector(tmpSector), &mEvents->sector[sectorRight]);
260+
blocking[tmpSector * mRec->NStreams() + StreamForSector(sectorRight)] = true;
319261
}
320-
ExtrapolationTracking(tmpSector, 0, false);
262+
ExtrapolationTracking(tmpSector, false);
321263
}
322264
}
323-
for (uint32_t iSector = 0; iSector < NSECTORS; iSector++) {
324-
if (doGPU && transferRunning[iSector]) {
265+
if (doGPU) {
266+
ReleaseEvent(mEvents->init);
267+
for (uint32_t iSector = 0; iSector < NSECTORS; iSector++) {
325268
ReleaseEvent(mEvents->sector[iSector]);
326269
}
327270
}
328271
} else {
329-
mSectorSelectorReady = NSECTORS;
330272
mRec->runParallelOuterLoop(doGPU, NSECTORS, [&](uint32_t iSector) {
331273
if (param().rec.tpc.extrapolationTracking) {
332-
ExtrapolationTracking(iSector, 0);
274+
ExtrapolationTracking(iSector, true);
333275
}
334276
});
335277
mRec->SetNActiveThreadsOuterLoop(1);

GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.cxx

Lines changed: 7 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ GPUd() Charge ClusterAccumulator::updateOuter(PackedCharge charge, Delta2 d)
5858
return q;
5959
}
6060

61-
GPUd() bool ClusterAccumulator::toNative(const ChargePos& pos, Charge q, tpc::ClusterNative& cn, const GPUParam& param, TPCTime timeOffset, const Array2D<PackedCharge>& chargeMap)
61+
GPUd() void ClusterAccumulator::finalize(const ChargePos& pos, const Charge q, TPCTime timeOffset)
6262
{
6363
mQtot += q;
6464

@@ -73,8 +73,13 @@ GPUd() bool ClusterAccumulator::toNative(const ChargePos& pos, Charge q, tpc::Cl
7373
Pad pad = pos.pad();
7474
mPadMean += pad;
7575
mTimeMean += timeOffset + pos.time();
76+
}
77+
78+
GPUd() bool ClusterAccumulator::toNative(const ChargePos& pos, const Charge q, tpc::ClusterNative& cn, const GPUParam& param, const Array2D<PackedCharge>& chargeMap)
79+
{
80+
Pad pad = pos.pad();
7681

77-
bool isEdgeCluster = pos.pad() < 2 || pos.pad() >= param.tpcGeometry.NPads(pos.row()) - 2; // Geometrical edge check, peak within 2 pads of sector edge
82+
bool isEdgeCluster = pad < 2 || pad >= param.tpcGeometry.NPads(pos.row()) - 2; // Geometrical edge check, peak within 2 pads of sector edge
7883
if (isEdgeCluster) {
7984
bool leftEdge = (pad < 2);
8085
if (leftEdge ? (pad == 1 && chargeMap[pos.delta({-1, 0})].unpack() < 1) : (pad == (param.tpcGeometry.NPads(pos.row()) - 2) && chargeMap[pos.delta({1, 0})].unpack() < 1)) {
@@ -119,52 +124,3 @@ GPUd() bool ClusterAccumulator::toNative(const ChargePos& pos, Charge q, tpc::Cl
119124

120125
return true;
121126
}
122-
123-
GPUd() bool ClusterAccumulator::toNativeSimple(const ChargePos& pos, Charge q, tpc::ClusterNative& cn, const GPUParam& param, const Array2D<PackedCharge>& chargeMap)
124-
{
125-
cn.qTot = CAMath::Float2UIntRn(mQtot);
126-
if (cn.qTot <= param.rec.tpc.cfQTotCutoff) {
127-
return false;
128-
}
129-
cn.qMax = q;
130-
if (cn.qMax <= param.rec.tpc.cfQMaxCutoff) {
131-
return false;
132-
}
133-
if (mTimeMean < param.rec.tpc.clustersShiftTimebinsClusterizer) {
134-
return false;
135-
}
136-
if (q <= param.rec.tpc.cfQMaxCutoffSingleTime && mTimeSigma == 0) {
137-
return false;
138-
}
139-
if (q <= param.rec.tpc.cfQMaxCutoffSinglePad && mPadSigma == 0) {
140-
return false;
141-
}
142-
143-
bool wasSplitInTime = mSplitInTime >= param.rec.tpc.cfMinSplitNum;
144-
bool wasSplitInPad = mSplitInPad >= param.rec.tpc.cfMinSplitNum;
145-
bool isSingleCluster = (mPadSigma == 0) || (mTimeSigma == 0);
146-
147-
uint8_t flags = 0;
148-
uint8_t pad = pos.pad();
149-
bool isEdgeCluster = pad < 2 || pad >= param.tpcGeometry.NPads(pos.row()) - 2; // Geometrical edge check, peak within 2 pads of sector edge
150-
if (isEdgeCluster) {
151-
bool leftEdge = (pad < 2);
152-
if (leftEdge ? (pad == 1 && chargeMap[pos.delta({-1, 0})].unpack() < 1) : (pad == (param.tpcGeometry.NPads(pos.row()) - 2) && chargeMap[pos.delta({1, 0})].unpack() < 1)) {
153-
isEdgeCluster = false; // No edge cluster if peak is close to edge but no charge at the edge.
154-
} else if (leftEdge ? (pad < mPadMean) : (pad > mPadMean)) {
155-
mPadMean = pad; // Correct to peak position if COG is close to middle of pad than peak
156-
}
157-
}
158-
159-
flags |= (isEdgeCluster) ? tpc::ClusterNative::flagEdge : 0;
160-
flags |= (wasSplitInTime) ? tpc::ClusterNative::flagSplitTime : 0;
161-
flags |= (wasSplitInPad) ? tpc::ClusterNative::flagSplitPad : 0;
162-
flags |= (isSingleCluster) ? tpc::ClusterNative::flagSingle : 0;
163-
164-
cn.setTimeFlags(mTimeMean - param.rec.tpc.clustersShiftTimebinsClusterizer, flags);
165-
cn.setPad(mPadMean);
166-
cn.setSigmaTime(mTimeSigma);
167-
cn.setSigmaPad(mPadSigma);
168-
169-
return true;
170-
}

GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.h

Lines changed: 5 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -41,26 +41,19 @@ class ClusterAccumulator
4141
GPUd() tpccf::Charge updateInner(PackedCharge, tpccf::Delta2);
4242
GPUd() tpccf::Charge updateOuter(PackedCharge, tpccf::Delta2);
4343

44-
GPUd() bool toNative(const ChargePos&, tpccf::Charge, tpc::ClusterNative&, const GPUParam&, tpccf::TPCTime, const Array2D<PackedCharge>&);
45-
GPUd() bool toNativeSimple(const ChargePos&, tpccf::Charge, tpc::ClusterNative&, const GPUParam&, const Array2D<PackedCharge>&);
46-
47-
GPUd() void setFull(float qtot, float padMean, float padSigma, float timeMean, float timeSigma, uint8_t splitInTime, uint8_t splitInPad)
44+
GPUd() void setFull(float qtot, float padMean, float padSigma, float timeMean, float timeSigma, uint8_t splitInPad, uint8_t splitInTime)
4845
{
4946
mQtot = qtot;
5047
mPadMean = padMean;
5148
mPadSigma = padSigma;
5249
mTimeMean = timeMean;
5350
mTimeSigma = timeSigma;
54-
mSplitInTime = splitInTime;
5551
mSplitInPad = splitInPad;
52+
mSplitInTime = splitInTime;
5653
}
57-
GPUd() void setQtot(float qtot) { mQtot = qtot; }
58-
GPUd() void setPadMean(float padMean) { mPadMean = padMean; }
59-
GPUd() void setPadSigma(float padSigma) { mPadSigma = padSigma; }
60-
GPUd() void setTimeMean(float timeMean) { mTimeMean = timeMean; }
61-
GPUd() void setTimeSigma(float timeSigma) { mTimeSigma = timeSigma; }
62-
GPUd() void setSplitInTime(uint8_t splitInTime) { mSplitInTime = splitInTime; }
63-
GPUd() void setSplitInPad(uint8_t splitInPad) { mSplitInPad = splitInPad; }
54+
55+
GPUd() void finalize(const ChargePos&, const tpccf::Charge, tpccf::TPCTime);
56+
GPUd() bool toNative(const ChargePos&, const tpccf::Charge, tpc::ClusterNative&, const GPUParam&, const Array2D<PackedCharge>&);
6457

6558
private:
6659
float mQtot = 0;

GPU/GPUTracking/TPCClusterFinder/GPUTPCCFClusterizer.inc

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,8 @@ GPUdii() void GPUTPCCFClusterizer::computeClustersImpl(int32_t nBlocks, int32_t
6060
return;
6161
}
6262
tpc::ClusterNative myCluster;
63-
bool rejectCluster = !pc.toNative(pos, charge, myCluster, clusterer.Param(), fragment.start, chargeMap);
63+
pc.finalize(pos, charge, fragment.start);
64+
bool rejectCluster = !pc.toNative(pos, charge, myCluster, clusterer.Param(), chargeMap);
6465

6566
if (rejectCluster) {
6667
if (clusterPosInRow) {

GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.cxx

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -240,7 +240,7 @@ GPUd() void GPUTPCNNClusterizerKernels::publishClustersReg1(uint glo_idx, GPUSha
240240
clustererNN.clusterFlags[2 * glo_idx + 1]);
241241

242242
tpc::ClusterNative myCluster;
243-
bool rejectCluster = !pc.toNativeSimple(clustererNN.peakPositions[glo_idx], clustererNN.centralCharges[glo_idx], myCluster, clusterer.Param(), chargeMap);
243+
bool rejectCluster = !pc.toNative(clustererNN.peakPositions[glo_idx], clustererNN.centralCharges[glo_idx], myCluster, clusterer.Param(), chargeMap);
244244
if (rejectCluster) {
245245
if (clusterer.mPclusterPosInRow) {
246246
clusterer.mPclusterPosInRow[full_glo_idx] = clusterer.mNMaxClusterPerRow;
@@ -320,7 +320,7 @@ GPUd() void GPUTPCNNClusterizerKernels::publishClustersReg2(uint glo_idx, GPUSha
320320
clustererNN.clusterFlags[2 * glo_idx + 1]);
321321

322322
tpc::ClusterNative myCluster;
323-
bool rejectCluster = !pc.toNativeSimple(clustererNN.peakPositions[glo_idx], clustererNN.centralCharges[glo_idx], myCluster, clusterer.Param(), chargeMap);
323+
bool rejectCluster = !pc.toNative(clustererNN.peakPositions[glo_idx], clustererNN.centralCharges[glo_idx], myCluster, clusterer.Param(), chargeMap);
324324
if (rejectCluster) {
325325
if (clusterer.mPclusterPosInRow) {
326326
clusterer.mPclusterPosInRow[full_glo_idx] = clusterer.mNMaxClusterPerRow;
@@ -354,7 +354,7 @@ GPUd() void GPUTPCNNClusterizerKernels::publishClustersReg2(uint glo_idx, GPUSha
354354
clustererNN.clusterFlags[2 * glo_idx],
355355
clustererNN.clusterFlags[2 * glo_idx + 1]);
356356

357-
rejectCluster = !pc.toNativeSimple(clustererNN.peakPositions[glo_idx], clustererNN.centralCharges[glo_idx], myCluster, clusterer.Param(), chargeMap);
357+
rejectCluster = !pc.toNative(clustererNN.peakPositions[glo_idx], clustererNN.centralCharges[glo_idx], myCluster, clusterer.Param(), chargeMap);
358358
if (rejectCluster) {
359359
if (clusterer.mPclusterPosInRow) {
360360
clusterer.mPclusterPosInRow[full_glo_idx] = clusterer.mNMaxClusterPerRow;

0 commit comments

Comments
 (0)