Skip to content

Commit 178f5a2

Browse files
committed
GPU TPC: Add possibility to run last way of TPC fit in separate kernel to rebuild track ion between
1 parent 2c9494f commit 178f5a2

File tree

8 files changed

+42
-25
lines changed

8 files changed

+42
-25
lines changed

GPU/GPUTracking/Definitions/GPUSettingsList.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,7 @@ AddOptionRTC(cfNoiseSuppressionEpsilon, uint8_t, 10, "", 0, "Cluster Finder: Dif
134134
AddOptionRTC(cfNoiseSuppressionEpsilonRelative, uint8_t, 76, "", 0, "Cluster Finder: Difference between peak and charge for the charge to count as a minima during noise suppression, relative as fraction of 255")
135135
AddOptionRTC(cfEdgeTwoPads, uint8_t, 0, "", 0, "Flag clusters with peak on the 2 pads closes to the sector edge as edge cluster")
136136
AddOptionRTC(nWays, uint8_t, 3, "", 0, "Do N fit passes in final fit of merger (must be odd to end with inward fit)")
137+
AddOptionRTC(rebuildTrackInFit, uint8_t, 1, "", 0, "Rebuild track completely during fit based on clusters closed to interpolated track positions")
137138
AddOptionRTC(trackFitRejectMode, int8_t, 5, "", 0, "0: no limit on rejection or missed hits, >0: break after n rejected hits, <0: reject at max -n hits")
138139
AddOptionRTC(rejectIFCLowRadiusCluster, uint8_t, 1, "", 0, "Reject clusters that get the IFC mask error during refit")
139140
AddOptionRTC(dEdxTruncLow, uint8_t, 2, "", 0, "Low truncation threshold, fraction of 128")

GPU/GPUTracking/Global/GPUChainTracking.cxx

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -257,6 +257,10 @@ bool GPUChainTracking::ValidateSettings()
257257
GPUError("Cannot do error interpolation with NWays < 3!");
258258
return false;
259259
}
260+
if (param().rec.tpc.rebuildTrackInFit && !param().rec.tpc.mergerInterpolateErrors) {
261+
GPUError("Need error interpolation to rebuild tracks during fit");
262+
return false;
263+
}
260264
if (param().continuousMaxTimeBin > (int32_t)GPUSettings::TPC_MAX_TF_TIME_BIN) {
261265
GPUError("configured max time bin exceeds 256 orbits");
262266
return false;

GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -226,7 +226,10 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput)
226226
mOutputQueue.clear();
227227
}
228228

229-
runKernel<GPUTPCGMMergerTrackFit>(doGPU ? GetGrid(Merger.NMergedTracks(), 0) : GetGridAuto(0), mergerSortTracks ? 1 : 0);
229+
runKernel<GPUTPCGMMergerTrackFit>(doGPU ? GetGrid(Merger.NMergedTracks(), 0) : GetGridAuto(0), mergerSortTracks ? 1 : 0, 0);
230+
if (param().rec.tpc.rebuildTrackInFit) {
231+
runKernel<GPUTPCGMMergerTrackFit>(doGPU ? GetGrid(Merger.NMergedTracks(), 0) : GetGridAuto(0), mergerSortTracks ? 1 : 0, 1);
232+
}
230233
runKernel<GPUTPCGMMergerFollowLoopers>(GetGridAuto(0));
231234

232235
DoDebugAndDump(RecoStep::TPCMerging, GPUChainTrackingDebugFlags::TPCMergingRefit, Merger, &GPUTPCGMMerger::DumpRefit, *mDebugFile);

GPU/GPUTracking/Merger/GPUTPCGMMergerGPU.cxx

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,11 @@
1919
using namespace o2::gpu;
2020

2121
template <>
22-
GPUdii() void GPUTPCGMMergerTrackFit::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() merger, int32_t mode)
22+
GPUdii() void GPUTPCGMMergerTrackFit::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() merger, int32_t mode, int32_t rebuilt)
2323
{
2424
GPUCA_TBB_KERNEL_LOOP(merger.GetRec(), int32_t, ii, merger.NMergedTracks(), {
2525
const int32_t i = mode ? merger.TrackOrderProcess()[ii] : ii;
26-
GPUTPCGMTrackParam::RefitTrack(merger.MergedTracks()[i], i, &merger);
26+
GPUTPCGMTrackParam::RefitTrack(merger.MergedTracks()[i], i, &merger, rebuilt);
2727
});
2828
}
2929

GPU/GPUTracking/Merger/GPUTPCGMMergerGPU.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ class GPUTPCGMMergerTrackFit : public GPUTPCGMMergerGeneral
3636
{
3737
public:
3838
template <int32_t iKernel = defaultKernel>
39-
GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& merger, int32_t mode);
39+
GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& merger, int32_t mode, int32_t rebuilt);
4040
};
4141

4242
class GPUTPCGMMergerFollowLoopers : public GPUTPCGMMergerGeneral

GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx

Lines changed: 27 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@
5353
using namespace o2::gpu;
5454
using namespace o2::tpc;
5555

56-
GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_t iTrk, GPUTPCGMMergedTrackHit* GPUrestrict() clusters, int32_t& GPUrestrict() N, int32_t& GPUrestrict() NTolerated, float& GPUrestrict() Alpha, float maxSinPhi, GPUTPCGMMergedTrack& GPUrestrict() track)
56+
GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_t iTrk, GPUTPCGMMergedTrackHit* GPUrestrict() clusters, int32_t& GPUrestrict() N, int32_t& GPUrestrict() NTolerated, float& GPUrestrict() Alpha, float maxSinPhi, GPUTPCGMMergedTrack& GPUrestrict() track, bool rebuilt)
5757
{
5858
static constexpr float kDeg2Rad = M_PI / 180.f;
5959
CADEBUG(static constexpr float kSectAngle = 2 * M_PI / 18.f);
@@ -66,7 +66,7 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_
6666
prop.SetMaterialTPC();
6767
prop.SetPolynomialField(&param.polynomialField);
6868
prop.SetMaxSinPhi(maxSinPhi);
69-
if (param.rec.tpc.mergerInterpolateErrors) {
69+
if (param.rec.tpc.mergerInterpolateErrors && !rebuilt) {
7070
for (int32_t i = 0; i < N; i++) {
7171
interpolation.hit[i].errorY = -1;
7272
}
@@ -76,12 +76,9 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_
7676
const int32_t maxN = N;
7777
int32_t ihitStart = 0;
7878
float covYYUpd = 0.f;
79-
float lastUpdateX = -1.f;
80-
uint8_t lastRow = 255;
81-
uint8_t lastSector = 255;
8279
float deltaZ = 0.f;
8380

84-
for (int32_t iWay = 0; iWay < nWays; iWay++) {
81+
for (int32_t iWay = rebuilt ? nWays - 1 : 0; iWay < nWays; iWay++) { // DR: Unrolling has no performance improvement on GPU, why?
8582
int32_t nMissed = 0, nMissed2 = 0;
8683
float sumInvSqrtCharge = 0.f;
8784
int32_t nAvgCharge = 0;
@@ -101,12 +98,14 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_
10198
prop.SetFitInProjections(true); // param.rec.fitInProjections == -1 ? (iWay == 0) : param.rec.fitInProjections); // TODO: Reenable once fixed
10299
prop.SetPropagateBzOnly(param.rec.fitPropagateBzOnly == -1 ? !finalFit : param.rec.fitPropagateBzOnly);
103100
prop.SetMatLUT((param.rec.useMatLUT && finalFit) ? merger->GetConstantMem()->calibObjects.matLUT : nullptr);
104-
prop.SetTrack(this, iWay ? prop.GetAlpha() : Alpha);
101+
prop.SetTrack(this, iWay && !rebuilt ? prop.GetAlpha() : Alpha);
105102
ConstrainSinPhi(iWay == 0 ? 0.95f : GPUCA_MAX_SIN_PHI_LOW);
106103
CADEBUG(printf("Fitting track %d way %d (sector %d, alpha %f) !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n", iTrk, iWay, CAMath::Float2IntRn(prop.GetAlpha() / kSectAngle) + (mP[1] < 0 ? 18 : 0), prop.GetAlpha()));
107104

108105
N = 0;
109-
lastUpdateX = -1;
106+
uint8_t lastRow = 255;
107+
uint8_t lastSector = 255;
108+
float lastUpdateX = -1;
110109
const bool inFlyDirection = iWay & 1;
111110
const int32_t wayDirection = (iWay & 1) ? -1 : 1;
112111

@@ -116,9 +115,12 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_
116115
lastSector = clusters[ihit].sector;
117116
}
118117

119-
if ((param.rec.tpc.trackFitRejectMode > 0 && nMissed >= param.rec.tpc.trackFitRejectMode) || nMissed2 >= param.rec.tpc.trackFitMaxRowMissedHard || clusters[ihit].state & GPUTPCGMMergedTrackHit::flagReject) {
118+
if ((param.rec.tpc.trackFitRejectMode > 0 && nMissed >= param.rec.tpc.trackFitRejectMode) || nMissed2 >= param.rec.tpc.trackFitMaxRowMissedHard || (clusters[ihit].state & GPUTPCGMMergedTrackHit::flagReject) || (rebuilt && (clusters[ihit].state & GPUTPCGMMergedTrackHit::flagHighIncl))) {
120119
CADEBUG(printf("\tSkipping hit %d, %d hits rejected, flag %X\n", ihit, nMissed, (int32_t)clusters[ihit].state));
121-
if (finalOutInFit && !(clusters[ihit].state & GPUTPCGMMergedTrackHit::flagReject)) {
120+
if (rebuilt && (clusters[ihit].state & GPUTPCGMMergedTrackHit::flagHighIncl)) {
121+
NTolerated++;
122+
}
123+
if (finalOutInFit && !(clusters[ihit].state & (GPUTPCGMMergedTrackHit::flagReject | GPUTPCGMMergedTrackHit::flagHighIncl))) {
122124
clusters[ihit].state |= GPUTPCGMMergedTrackHit::flagRejectErr;
123125
}
124126
continue;
@@ -334,6 +336,14 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_
334336
} else {
335337
deltaZ = 0.f;
336338
}
339+
340+
if (param.rec.tpc.rebuildTrackInFit && iWay == nWays - 2) {
341+
Alpha = prop.GetAlpha();
342+
if (ihitStart != 0) {
343+
MarkClusters(clusters, 0, ihitStart - 1, 1, GPUTPCGMMergedTrackHit::flagHighIncl);
344+
}
345+
return true;
346+
}
337347
}
338348
ConstrainSinPhi();
339349

@@ -885,7 +895,7 @@ GPUd() bool GPUTPCGMTrackParam::CheckNumericalQuality(float overrideCovYY) const
885895
return ok;
886896
}
887897

888-
GPUdii() void GPUTPCGMTrackParam::RefitTrack(GPUTPCGMMergedTrack& GPUrestrict() track, int32_t iTrk, GPUTPCGMMerger* GPUrestrict() merger) // VS: GPUd changed to GPUdii. No change in output and no performance penalty.
898+
GPUdii() void GPUTPCGMTrackParam::RefitTrack(GPUTPCGMMergedTrack& GPUrestrict() track, int32_t iTrk, GPUTPCGMMerger* GPUrestrict() merger, bool rebuilt) // VS: GPUd changed to GPUdii. No change in output and no performance penalty.
889899
{
890900
if (!track.OK()) {
891901
return;
@@ -899,21 +909,21 @@ GPUdii() void GPUTPCGMTrackParam::RefitTrack(GPUTPCGMMergedTrack& GPUrestrict()
899909
int32_t NTolerated = 0; // Clusters not fit but tollerated for track length cut
900910
GPUTPCGMTrackParam t = track.Param();
901911
float Alpha = track.Alpha();
902-
CADEBUG(int32_t nTrackHitsOld = nTrackHits; float ptOld = t.QPt());
903-
bool ok = t.Fit(merger, iTrk, merger->Clusters() + track.FirstClusterRef(), nTrackHits, NTolerated, Alpha, GPUCA_MAX_SIN_PHI, track);
904-
CADEBUG(printf("Finished Fit Track %d\n", iTrk));
905-
CADEBUG(printf("OUTPUT hits %d -> %d+%d = %d, QPt %f -> %f, SP %f, OK %d chi2 %f chi2ndf %f\n", nTrackHitsOld, nTrackHits, NTolerated, nTrackHits + NTolerated, ptOld, t.QPt(), t.SinPhi(), (int32_t)ok, t.Chi2(), t.Chi2() / CAMath::Max(1, nTrackHits)));
912+
bool ok = t.Fit(merger, iTrk, merger->Clusters() + track.FirstClusterRef(), nTrackHits, NTolerated, Alpha, GPUCA_MAX_SIN_PHI, track, rebuilt);
913+
CADEBUG(if (!merger->Param().rec.tpc.rebuildTrackInFit || rebuilt) printf("Finished Fit Track %7d --- OUTPUT hits %d -> %d+%d = %d, QPt %f -> %f, SP %f, OK %d chi2 %f chi2ndf %f\n", iTrk, track.NClusters(), nTrackHits, NTolerated, nTrackHits + NTolerated, track.GetParam().GetQPt(), t.QPt(), t.SinPhi(), (int32_t)ok, t.Chi2(), t.Chi2() / CAMath::Max(1, nTrackHits)));
906914

907915
if (CAMath::Abs(t.QPt()) < 1.e-4f) {
908-
t.QPt() = 1.e-4f;
916+
t.QPt() = CAMath::Copysign(1.e-4f, t.QPt());
909917
}
910918

911919
CADEBUG(if (t.GetX() > 250) { printf("ERROR, Track %d at impossible X %f, Pt %f, Looper %d\n", iTrk, t.GetX(), CAMath::Abs(1.f / t.QPt()), (int32_t)merger->MergedTracks()[iTrk].Looper()); });
912920

913921
track.SetOK(ok);
914-
track.SetNClustersFitted(nTrackHits);
915922
track.Param() = t;
916923
track.Alpha() = Alpha;
924+
if (!merger->Param().rec.tpc.rebuildTrackInFit || rebuilt) {
925+
track.SetNClustersFitted(nTrackHits);
926+
}
917927

918928
// if (track.OK()) merger->DebugRefitMergedTrack(track);
919929
}

GPU/GPUTracking/Merger/GPUTPCGMTrackParam.h

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,8 @@ class GPUTPCGMTrackParam
141141
GPUd() bool CheckNumericalQuality(float overrideCovYY = -1.f) const;
142142
GPUd() bool CheckCov() const;
143143

144-
GPUd() bool Fit(GPUTPCGMMerger* merger, int32_t iTrk, GPUTPCGMMergedTrackHit* clusters, int32_t& N, int32_t& NTolerated, float& Alpha, float maxSinPhi, GPUTPCGMMergedTrack& track);
144+
GPUd() bool Fit(GPUTPCGMMerger* merger, int32_t iTrk, GPUTPCGMMergedTrackHit* clusters, int32_t& N, int32_t& NTolerated, float& Alpha, float maxSinPhi, GPUTPCGMMergedTrack& track, bool rebuilt);
145+
GPUd() static void RefitTrack(GPUTPCGMMergedTrack& track, int32_t iTrk, GPUTPCGMMerger* merger, bool rebuilt);
145146
GPUd() void MoveToReference(GPUTPCGMPropagator& prop, const GPUParam& param, float& alpha);
146147
GPUd() void MirrorTo(GPUTPCGMPropagator& prop, float toY, float toZ, bool inFlyDirection, const GPUParam& param, uint8_t row, uint8_t clusterState, bool mirrorParameters, int8_t sector);
147148
GPUd() int32_t MergeDoubleRowClusters(int32_t& ihit, int32_t wayDirection, GPUTPCGMMergedTrackHit* clusters, const GPUTPCGMMerger* merger, GPUTPCGMPropagator& prop, float& xx, float& yy, float& zz, int32_t maxN, float clAlpha, uint8_t& clusterState, bool rejectChi2);
@@ -200,8 +201,6 @@ class GPUTPCGMTrackParam
200201
}
201202
}
202203

203-
GPUd() static void RefitTrack(GPUTPCGMMergedTrack& track, int32_t iTrk, GPUTPCGMMerger* merger);
204-
205204
GPUdi() void ConstrainSinPhi(float limit = GPUCA_MAX_SIN_PHI)
206205
{
207206
if (mP[2] > limit) {

GPU/GPUTracking/kernels.cmake

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ o2_gpu_add_kernel("GPUTPCGlobalDebugSortKernels, mergedTracks2" "= TPC
5151
o2_gpu_add_kernel("GPUTPCGlobalDebugSortKernels, borderTracks" "= TPCMERGER" NO int8_t parameter)
5252
o2_gpu_add_kernel("GPUTPCCreateOccupancyMap, fill" "= TPCOCCUPANCY" LB GPUTPCClusterOccupancyMapBin* map)
5353
o2_gpu_add_kernel("GPUTPCCreateOccupancyMap, fold" "= TPCOCCUPANCY" LB GPUTPCClusterOccupancyMapBin* map uint32_t* output)
54-
o2_gpu_add_kernel("GPUTPCGMMergerTrackFit" "GPUTPCGMMergerGPU TPCMERGER TPCTRACKER MATLUT TPCDEDX" LB int32_t mode)
54+
o2_gpu_add_kernel("GPUTPCGMMergerTrackFit" "GPUTPCGMMergerGPU TPCMERGER TPCTRACKER MATLUT TPCDEDX" LB int32_t mode int32_t rebuilt)
5555
o2_gpu_add_kernel("GPUTPCGMMergerFollowLoopers" "GPUTPCGMMergerGPU TPCMERGER TPCTRACKER MATLUT" LB)
5656
o2_gpu_add_kernel("GPUTPCGMMergerUnpackResetIds" "GPUTPCGMMergerGPU TPCMERGER" LB int32_t iSector)
5757
o2_gpu_add_kernel("GPUTPCGMMergerSectorRefit" "GPUTPCGMMergerGPU TPCMERGER MATLUT" LB int32_t iSector)

0 commit comments

Comments
 (0)