Skip to content

Commit 0d00fa6

Browse files
committed
GPU TPC: Add possibility to run last way of TPC fit in separate kernel to rebuild track ion between
1 parent eebb9e5 commit 0d00fa6

File tree

8 files changed

+42
-25
lines changed

8 files changed

+42
-25
lines changed

GPU/GPUTracking/Definitions/GPUSettingsList.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,7 @@ AddOptionRTC(cfNoiseSuppressionEpsilon, uint8_t, 10, "", 0, "Cluster Finder: Dif
138138
AddOptionRTC(cfNoiseSuppressionEpsilonRelative, uint8_t, 76, "", 0, "Cluster Finder: Difference between peak and charge for the charge to count as a minima during noise suppression, relative as fraction of 255")
139139
AddOptionRTC(cfEdgeTwoPads, uint8_t, 0, "", 0, "Flag clusters with peak on the 2 pads closes to the sector edge as edge cluster")
140140
AddOptionRTC(nWays, uint8_t, 3, "", 0, "Do N fit passes in final fit of merger (must be odd to end with inward fit)")
141+
AddOptionRTC(rebuildTrackInFit, uint8_t, 1, "", 0, "Rebuild track completely during fit based on clusters closed to interpolated track positions")
141142
AddOptionRTC(trackFitRejectMode, int8_t, 5, "", 0, "0: no limit on rejection or missed hits, >0: break after n rejected hits, <0: reject at max -n hits")
142143
AddOptionRTC(rejectIFCLowRadiusCluster, uint8_t, 1, "", 0, "Reject clusters that get the IFC mask error during refit")
143144
AddOptionRTC(dEdxTruncLow, uint8_t, 2, "", 0, "Low truncation threshold, fraction of 128")

GPU/GPUTracking/Global/GPUChainTracking.cxx

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -257,6 +257,10 @@ bool GPUChainTracking::ValidateSettings()
257257
GPUError("Cannot do error interpolation with NWays < 3!");
258258
return false;
259259
}
260+
if (param().rec.tpc.rebuildTrackInFit && !param().rec.tpc.mergerInterpolateErrors) {
261+
GPUError("Need error interpolation to rebuild tracks during fit");
262+
return false;
263+
}
260264
if (param().continuousMaxTimeBin > (int32_t)GPUSettings::TPC_MAX_TF_TIME_BIN) {
261265
GPUError("configured max time bin exceeds 256 orbits");
262266
return false;

GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -226,7 +226,10 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput)
226226
mOutputQueue.clear();
227227
}
228228

229-
runKernel<GPUTPCGMMergerTrackFit>(doGPU ? GetGrid(Merger.NMergedTracks(), 0) : GetGridAuto(0), mergerSortTracks ? 1 : 0);
229+
runKernel<GPUTPCGMMergerTrackFit>(doGPU ? GetGrid(Merger.NMergedTracks(), 0) : GetGridAuto(0), mergerSortTracks ? 1 : 0, 0);
230+
if (param().rec.tpc.rebuildTrackInFit) {
231+
runKernel<GPUTPCGMMergerTrackFit>(doGPU ? GetGrid(Merger.NMergedTracks(), 0) : GetGridAuto(0), mergerSortTracks ? 1 : 0, 1);
232+
}
230233
runKernel<GPUTPCGMMergerFollowLoopers>(GetGridAuto(0));
231234

232235
DoDebugAndDump(RecoStep::TPCMerging, GPUChainTrackingDebugFlags::TPCMergingRefit, Merger, &GPUTPCGMMerger::DumpRefit, *mDebugFile);

GPU/GPUTracking/Merger/GPUTPCGMMergerGPU.cxx

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,11 @@
1919
using namespace o2::gpu;
2020

2121
template <>
22-
GPUdii() void GPUTPCGMMergerTrackFit::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() merger, int32_t mode)
22+
GPUdii() void GPUTPCGMMergerTrackFit::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() merger, int32_t mode, int32_t rebuilt)
2323
{
2424
GPUCA_TBB_KERNEL_LOOP(merger.GetRec(), int32_t, ii, merger.NMergedTracks(), {
2525
const int32_t i = mode ? merger.TrackOrderProcess()[ii] : ii;
26-
GPUTPCGMTrackParam::RefitTrack(merger.MergedTracks()[i], i, &merger);
26+
GPUTPCGMTrackParam::RefitTrack(merger.MergedTracks()[i], i, &merger, rebuilt);
2727
});
2828
}
2929

GPU/GPUTracking/Merger/GPUTPCGMMergerGPU.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ class GPUTPCGMMergerTrackFit : public GPUTPCGMMergerGeneral
3636
{
3737
public:
3838
template <int32_t iKernel = defaultKernel>
39-
GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& merger, int32_t mode);
39+
GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& merger, int32_t mode, int32_t rebuilt);
4040
};
4141

4242
class GPUTPCGMMergerFollowLoopers : public GPUTPCGMMergerGeneral

GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx

Lines changed: 27 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@
4848
using namespace o2::gpu;
4949
using namespace o2::tpc;
5050

51-
GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_t iTrk, GPUTPCGMMergedTrackHit* GPUrestrict() clusters, int32_t& GPUrestrict() N, int32_t& GPUrestrict() NTolerated, float& GPUrestrict() Alpha, float maxSinPhi, GPUTPCGMMergedTrack& GPUrestrict() track)
51+
GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_t iTrk, GPUTPCGMMergedTrackHit* GPUrestrict() clusters, int32_t& GPUrestrict() N, int32_t& GPUrestrict() NTolerated, float& GPUrestrict() Alpha, float maxSinPhi, GPUTPCGMMergedTrack& GPUrestrict() track, bool rebuilt)
5252
{
5353
static constexpr float kDeg2Rad = M_PI / 180.f;
5454
CADEBUG(static constexpr float kSectAngle = 2 * M_PI / 18.f);
@@ -61,7 +61,7 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_
6161
prop.SetMaterialTPC();
6262
prop.SetPolynomialField(&param.polynomialField);
6363
prop.SetMaxSinPhi(maxSinPhi);
64-
if (param.rec.tpc.mergerInterpolateErrors) {
64+
if (param.rec.tpc.mergerInterpolateErrors && !rebuilt) {
6565
for (int32_t i = 0; i < N; i++) {
6666
interpolation.hit[i].errorY = -1;
6767
}
@@ -71,12 +71,9 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_
7171
const int32_t maxN = N;
7272
int32_t ihitStart = 0;
7373
float covYYUpd = 0.f;
74-
float lastUpdateX = -1.f;
75-
uint8_t lastRow = 255;
76-
uint8_t lastSector = 255;
7774
float deltaZ = 0.f;
7875

79-
for (int32_t iWay = 0; iWay < nWays; iWay++) {
76+
for (int32_t iWay = rebuilt ? nWays - 1 : 0; iWay < nWays; iWay++) { // DR: Unrolling has no performance improvement on GPU, why?
8077
int32_t nMissed = 0, nMissed2 = 0;
8178
float sumInvSqrtCharge = 0.f;
8279
int32_t nAvgCharge = 0;
@@ -96,12 +93,14 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_
9693
prop.SetFitInProjections(true); // param.rec.fitInProjections == -1 ? (iWay == 0) : param.rec.fitInProjections); // TODO: Reenable once fixed
9794
prop.SetPropagateBzOnly(param.rec.fitPropagateBzOnly == -1 ? !finalFit : param.rec.fitPropagateBzOnly);
9895
prop.SetMatLUT((param.rec.useMatLUT && finalFit) ? merger->GetConstantMem()->calibObjects.matLUT : nullptr);
99-
prop.SetTrack(this, iWay ? prop.GetAlpha() : Alpha);
96+
prop.SetTrack(this, iWay && !rebuilt ? prop.GetAlpha() : Alpha);
10097
ConstrainSinPhi(iWay == 0 ? 0.95f : GPUCA_MAX_SIN_PHI_LOW);
10198
CADEBUG(printf("Fitting track %d way %d (sector %d, alpha %f) !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n", iTrk, iWay, CAMath::Float2IntRn(prop.GetAlpha() / kSectAngle) + (mP[1] < 0 ? 18 : 0), prop.GetAlpha()));
10299

103100
N = 0;
104-
lastUpdateX = -1;
101+
uint8_t lastRow = 255;
102+
uint8_t lastSector = 255;
103+
float lastUpdateX = -1;
105104
const bool inFlyDirection = iWay & 1;
106105
const int32_t wayDirection = (iWay & 1) ? -1 : 1;
107106

@@ -111,9 +110,12 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_
111110
lastSector = clusters[ihit].sector;
112111
}
113112

114-
if ((param.rec.tpc.trackFitRejectMode > 0 && nMissed >= param.rec.tpc.trackFitRejectMode) || nMissed2 >= param.rec.tpc.trackFitMaxRowMissedHard || clusters[ihit].state & GPUTPCGMMergedTrackHit::flagReject) {
113+
if ((param.rec.tpc.trackFitRejectMode > 0 && nMissed >= param.rec.tpc.trackFitRejectMode) || nMissed2 >= param.rec.tpc.trackFitMaxRowMissedHard || (clusters[ihit].state & GPUTPCGMMergedTrackHit::flagReject) || (rebuilt && (clusters[ihit].state & GPUTPCGMMergedTrackHit::flagHighIncl))) {
115114
CADEBUG(printf("\tSkipping hit %d, %d hits rejected, flag %X\n", ihit, nMissed, (int32_t)clusters[ihit].state));
116-
if (finalOutInFit && !(clusters[ihit].state & GPUTPCGMMergedTrackHit::flagReject)) {
115+
if (rebuilt && (clusters[ihit].state & GPUTPCGMMergedTrackHit::flagHighIncl)) {
116+
NTolerated++;
117+
}
118+
if (finalOutInFit && !(clusters[ihit].state & (GPUTPCGMMergedTrackHit::flagReject | GPUTPCGMMergedTrackHit::flagHighIncl))) {
117119
clusters[ihit].state |= GPUTPCGMMergedTrackHit::flagRejectErr;
118120
}
119121
continue;
@@ -328,6 +330,14 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_
328330
} else {
329331
deltaZ = 0.f;
330332
}
333+
334+
if (param.rec.tpc.rebuildTrackInFit && iWay == nWays - 2) {
335+
Alpha = prop.GetAlpha();
336+
if (ihitStart != 0) {
337+
MarkClusters(clusters, 0, ihitStart - 1, 1, GPUTPCGMMergedTrackHit::flagHighIncl);
338+
}
339+
return true;
340+
}
331341
}
332342
ConstrainSinPhi();
333343

@@ -895,7 +905,7 @@ GPUd() bool GPUTPCGMTrackParam::CheckNumericalQuality(float overrideCovYY) const
895905
return ok;
896906
}
897907

898-
GPUdii() void GPUTPCGMTrackParam::RefitTrack(GPUTPCGMMergedTrack& GPUrestrict() track, int32_t iTrk, GPUTPCGMMerger* GPUrestrict() merger) // VS: GPUd changed to GPUdii. No change in output and no performance penalty.
908+
GPUdii() void GPUTPCGMTrackParam::RefitTrack(GPUTPCGMMergedTrack& GPUrestrict() track, int32_t iTrk, GPUTPCGMMerger* GPUrestrict() merger, bool rebuilt) // VS: GPUd changed to GPUdii. No change in output and no performance penalty.
899909
{
900910
if (!track.OK()) {
901911
return;
@@ -909,21 +919,21 @@ GPUdii() void GPUTPCGMTrackParam::RefitTrack(GPUTPCGMMergedTrack& GPUrestrict()
909919
int32_t NTolerated = 0; // Clusters not fit but tollerated for track length cut
910920
GPUTPCGMTrackParam t = track.Param();
911921
float Alpha = track.Alpha();
912-
CADEBUG(int32_t nTrackHitsOld = nTrackHits; float ptOld = t.QPt());
913-
bool ok = t.Fit(merger, iTrk, merger->Clusters() + track.FirstClusterRef(), nTrackHits, NTolerated, Alpha, GPUCA_MAX_SIN_PHI, track);
914-
CADEBUG(printf("Finished Fit Track %d\n", iTrk));
915-
CADEBUG(printf("OUTPUT hits %d -> %d+%d = %d, QPt %f -> %f, SP %f, OK %d chi2 %f chi2ndf %f\n", nTrackHitsOld, nTrackHits, NTolerated, nTrackHits + NTolerated, ptOld, t.QPt(), t.SinPhi(), (int32_t)ok, t.Chi2(), t.Chi2() / CAMath::Max(1, nTrackHits)));
922+
bool ok = t.Fit(merger, iTrk, merger->Clusters() + track.FirstClusterRef(), nTrackHits, NTolerated, Alpha, GPUCA_MAX_SIN_PHI, track, rebuilt);
923+
CADEBUG(if (!merger->Param().rec.tpc.rebuildTrackInFit || rebuilt) printf("Finished Fit Track %7d --- OUTPUT hits %d -> %d+%d = %d, QPt %f -> %f, SP %f, OK %d chi2 %f chi2ndf %f\n", iTrk, track.NClusters(), nTrackHits, NTolerated, nTrackHits + NTolerated, track.GetParam().GetQPt(), t.QPt(), t.SinPhi(), (int32_t)ok, t.Chi2(), t.Chi2() / CAMath::Max(1, nTrackHits)));
916924

917925
if (CAMath::Abs(t.QPt()) < 1.e-4f) {
918-
t.QPt() = 1.e-4f;
926+
t.QPt() = CAMath::Copysign(1.e-4f, t.QPt());
919927
}
920928

921929
CADEBUG(if (t.GetX() > 250) { printf("ERROR, Track %d at impossible X %f, Pt %f, Looper %d\n", iTrk, t.GetX(), CAMath::Abs(1.f / t.QPt()), (int32_t)merger->MergedTracks()[iTrk].Looper()); });
922930

923931
track.SetOK(ok);
924-
track.SetNClustersFitted(nTrackHits);
925932
track.Param() = t;
926933
track.Alpha() = Alpha;
934+
if (!merger->Param().rec.tpc.rebuildTrackInFit || rebuilt) {
935+
track.SetNClustersFitted(nTrackHits);
936+
}
927937

928938
// if (track.OK()) merger->DebugRefitMergedTrack(track);
929939
}

GPU/GPUTracking/Merger/GPUTPCGMTrackParam.h

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,8 @@ class GPUTPCGMTrackParam
141141
GPUd() bool CheckNumericalQuality(float overrideCovYY = -1.f) const;
142142
GPUd() bool CheckCov() const;
143143

144-
GPUd() bool Fit(GPUTPCGMMerger* merger, int32_t iTrk, GPUTPCGMMergedTrackHit* clusters, int32_t& N, int32_t& NTolerated, float& Alpha, float maxSinPhi, GPUTPCGMMergedTrack& track);
144+
GPUd() bool Fit(GPUTPCGMMerger* merger, int32_t iTrk, GPUTPCGMMergedTrackHit* clusters, int32_t& N, int32_t& NTolerated, float& Alpha, float maxSinPhi, GPUTPCGMMergedTrack& track, bool rebuilt);
145+
GPUd() static void RefitTrack(GPUTPCGMMergedTrack& track, int32_t iTrk, GPUTPCGMMerger* merger, bool rebuilt);
145146
GPUd() void MoveToReference(GPUTPCGMPropagator& prop, const GPUParam& param, float& alpha);
146147
GPUd() void MirrorTo(GPUTPCGMPropagator& prop, float toY, float toZ, bool inFlyDirection, const GPUParam& param, uint8_t row, uint8_t clusterState, bool mirrorParameters, int8_t sector);
147148
GPUd() int32_t MergeDoubleRowClusters(int32_t& ihit, int32_t wayDirection, GPUTPCGMMergedTrackHit* clusters, const GPUTPCGMMerger* merger, GPUTPCGMPropagator& prop, float& xx, float& yy, float& zz, int32_t maxN, float clAlpha, uint8_t& clusterState, bool rejectChi2);
@@ -200,8 +201,6 @@ class GPUTPCGMTrackParam
200201
}
201202
}
202203

203-
GPUd() static void RefitTrack(GPUTPCGMMergedTrack& track, int32_t iTrk, GPUTPCGMMerger* merger);
204-
205204
GPUdi() void ConstrainSinPhi(float limit = GPUCA_MAX_SIN_PHI)
206205
{
207206
if (mP[2] > limit) {

GPU/GPUTracking/kernels.cmake

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ o2_gpu_add_kernel("GPUTPCGlobalDebugSortKernels, mergedTracks2" "= TPC
5151
o2_gpu_add_kernel("GPUTPCGlobalDebugSortKernels, borderTracks" "= TPCMERGER" NO int8_t parameter)
5252
o2_gpu_add_kernel("GPUTPCCreateOccupancyMap, fill" "= TPCOCCUPANCY" LB GPUTPCClusterOccupancyMapBin* map)
5353
o2_gpu_add_kernel("GPUTPCCreateOccupancyMap, fold" "= TPCOCCUPANCY" LB GPUTPCClusterOccupancyMapBin* map uint32_t* output)
54-
o2_gpu_add_kernel("GPUTPCGMMergerTrackFit" "GPUTPCGMMergerGPU TPCMERGER TPCTRACKER MATLUT TPCDEDX" LB int32_t mode)
54+
o2_gpu_add_kernel("GPUTPCGMMergerTrackFit" "GPUTPCGMMergerGPU TPCMERGER TPCTRACKER MATLUT TPCDEDX" LB int32_t mode int32_t rebuilt)
5555
o2_gpu_add_kernel("GPUTPCGMMergerFollowLoopers" "GPUTPCGMMergerGPU TPCMERGER TPCTRACKER MATLUT" LB)
5656
o2_gpu_add_kernel("GPUTPCGMMergerUnpackResetIds" "GPUTPCGMMergerGPU TPCMERGER" LB int32_t iSector)
5757
o2_gpu_add_kernel("GPUTPCGMMergerSectorRefit" "GPUTPCGMMergerGPU TPCMERGER MATLUT" LB int32_t iSector)

0 commit comments

Comments
 (0)