Skip to content

Commit 030e396

Browse files
authored
MatLUT: faster phi-sector determination; reduction of divisions (#12030)
Following ticket https://alice.its.cern.ch/jira/browse/O2-4170, Profile analysis has shown that material budget calculations used in tracking, spent most time in (a) determination of a cylindrical layer (b) determination of a phi sector in a layer This commit proposes an improvement for (b): - Instead of using system function phi = atan2(y, x), we use a faster - approximate version. We can do this, since we never need to use the precise result of phi in a calculation. Instead, phi is merely used to lookup a slice/sector. The error of the approximate version is ~1e-5, which is enough to get the correct phi sector in 99.999 of the cases. (Rare cases, where we end up in a wrong neighbouring phi sector are probably ok ... since the material budget lookup is anyways an approximation.) In addition, we propose a smaller code mofication for ray.CrossZ to reduce the number of divisions and if statements. A previous check for ray orthogonal to z can not happen in the code since there is a previous condition `if (zID != zIDLast)`. ITS reco is shown to improve by ~15% in speed for larger PbPb MC timeframes (on Linux - tested on CC7 and Ubuntu22.04)
1 parent 8fe191e commit 030e396

File tree

2 files changed

+58
-7
lines changed

2 files changed

+58
-7
lines changed

Detectors/Base/include/DetectorsBase/Ray.h

Lines changed: 55 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ class Ray
4646
static constexpr float InvalidT = -1e9;
4747
static constexpr float Tiny = 1e-9;
4848

49-
GPUd() Ray() : mP{0.f}, mD{0.f}, mDistXY2(0.f), mDistXY2i(0.f), mDistXYZ(0.f), mXDxPlusYDy(0.f), mXDxPlusYDyRed(0.f), mXDxPlusYDy2(0.f), mR02(0.f), mR12(0.f)
49+
GPUd() Ray() : mP{0.f}, mD{0.f}, mDistXY2(0.f), mDistXY2i(0.f), mDistXYZ(0.f), mXDxPlusYDy(0.f), mXDxPlusYDyRed(0.f), mXDxPlusYDy2(0.f), mR02(0.f), mR12(0.f), mInvDz(Tiny)
5050
{
5151
}
5252
GPUdDefault() ~Ray() CON_DEFAULT;
@@ -61,6 +61,7 @@ class Ray
6161
GPUd() float crossRadial(const MatLayerCyl& lr, int sliceID) const;
6262
GPUd() float crossRadial(float cs, float sn) const;
6363
GPUd() float crossZ(float z) const;
64+
GPUd() float crossZ_fast(float z) const; // without check
6465

6566
GPUd() void getCrossParams(int i, float& par1, float& par2) const
6667
{
@@ -84,6 +85,49 @@ class Ray
8485
return p;
8586
}
8687

88+
// fast, approximate arctan calculation - sufficient for phi sector determination in material budget layers
89+
// source of inspirations:
90+
// - https://mazzo.li/posts/vectorized-atan2.html
91+
// - https://www.coranac.com/documents/arctangent/
92+
GPUd() float atan_approx_5terms(float x) const
93+
{
94+
// fast atan implementation
95+
// function has a max error of ~1.16825e-05f
96+
float a1 = 0.9998660f;
97+
float a3 = -0.3302995f;
98+
float a5 = 0.1801410f;
99+
float a7 = -0.0851330f;
100+
float a9 = 0.0208351f;
101+
102+
float x_sq = x * x;
103+
// todo: force fuse-multiply add
104+
return x * (a1 + x_sq * (a3 + x_sq * (a5 + x_sq * (a7 + x_sq * a9))));
105+
}
106+
107+
GPUd() float atan2_approx(float y, float x) const
108+
{
109+
const bool swap = fabs(x) < fabs(y);
110+
const float atan_input = (swap ? x : y) / (swap ? y : x);
111+
const float pi2 = M_PI_2;
112+
const float pi = M_PI;
113+
float res = atan_approx_5terms(atan_input);
114+
115+
// if swapped, adjust atan output
116+
res = swap ? (atan_input >= 0.0f ? pi2 : -pi2) - res : res;
117+
// adjust quadrants
118+
if (x < 0.0f) {
119+
res = ((y < 0.0f) ? -pi : pi) + res;
120+
}
121+
return res;
122+
}
123+
124+
GPUd() float getPhi_approx(float t) const
125+
{
126+
float p = atan2_approx(mP[1] + t * mD[1], mP[0] + t * mD[0]);
127+
o2::math_utils::bringTo02Pi(p);
128+
return p;
129+
}
130+
87131
GPUd() float getZ(float t) const { return mP[2] + t * mD[2]; }
88132

89133
GPUd() bool validateZRange(float& cpar1, float& cpar2, const MatLayerCyl& lr) const;
@@ -101,6 +145,7 @@ class Ray
101145
float mR12; ///< radius^2 of mP1
102146
float mCrossParams1[2]; ///< parameters of crossing the layer (first parameter)
103147
float mCrossParams2[2]; ///< parameters of crossing the layer (second parameter)
148+
float mInvDz;
104149

105150
ClassDefNV(Ray, 1);
106151
};
@@ -119,6 +164,7 @@ inline Ray::Ray(const math_utils::Point3D<float> point0, const math_utils::Point
119164
mXDxPlusYDy2 = mXDxPlusYDy * mXDxPlusYDy;
120165
mR02 = point0.Perp2();
121166
mR12 = point1.Perp2();
167+
mInvDz = 1.f / mD[2];
122168
}
123169
#endif // !GPUCA_ALIGPUCODE
124170

@@ -134,6 +180,7 @@ GPUdi() Ray::Ray(float x0, float y0, float z0, float x1, float y1, float z1)
134180
mXDxPlusYDy2 = mXDxPlusYDy * mXDxPlusYDy;
135181
mR02 = x0 * x0 + y0 * y0;
136182
mR12 = x1 * x1 + y1 * y1;
183+
mInvDz = 1.f / mD[2];
137184
}
138185

139186
//______________________________________________________
@@ -175,6 +222,13 @@ GPUdi() float Ray::crossZ(float z) const
175222
return mD[2] != 0. ? (z - mP[2]) / mD[2] : InvalidT;
176223
}
177224

225+
//______________________________________________________
226+
GPUdi() float Ray::crossZ_fast(float z) const
227+
{
228+
// calculate t of crossing XY plane at Z
229+
return (z - mP[2]) * mInvDz;
230+
}
231+
178232
//______________________________________________________
179233
GPUdi() bool Ray::validateZRange(float& cpar1, float& cpar2, const MatLayerCyl& lr) const
180234
{

Detectors/Base/src/MatLayerCylSet.cxx

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -294,8 +294,8 @@ GPUd() MatBudget MatLayerCylSet::getMatBudget(float x0, float y0, float z0, floa
294294
for (int ic = nc; ic--;) {
295295
float cross1, cross2;
296296
ray.getCrossParams(ic, cross1, cross2); // tmax,tmin of crossing the layer
297-
298-
auto phi0 = ray.getPhi(cross1), phi1 = ray.getPhi(cross2), dPhi = phi0 - phi1;
297+
auto phi0 = ray.getPhi_approx(cross1), phi1 = ray.getPhi_approx(cross2), dPhi = phi0 - phi1;
298+
// auto phi0 = ray.getPhi(cross1), phi1 = ray.getPhi(cross2), dPhi = phi0 - phi1;
299299
auto phiID = lr.getPhiSliceID(phi0), phiIDLast = lr.getPhiSliceID(phi1);
300300
// account for eventual wrapping around 0
301301
if (dPhi > 0.f) {
@@ -339,10 +339,7 @@ GPUd() MatBudget MatLayerCylSet::getMatBudget(float x0, float y0, float z0, floa
339339
tEndZ = tEndPhi;
340340
checkMoreZ = false;
341341
} else {
342-
tEndZ = ray.crossZ(lr.getZBinMin(stepZID > 0 ? zID + 1 : zID));
343-
if (tEndZ == Ray::InvalidT) { // track normal to Z axis, abandon Zbin change test
344-
break;
345-
}
342+
tEndZ = ray.crossZ_fast(lr.getZBinMin(stepZID > 0 ? zID + 1 : zID));
346343
}
347344
// account materials of this step
348345
float step = tEndZ > tStartZ ? tEndZ - tStartZ : tStartZ - tEndZ; // the real step is ray.getDist(tEnd-tStart), will rescale all later

0 commit comments

Comments
 (0)