Skip to content

Commit 4a2eae2

Browse files
committed
POD version of TPCFastSpaceChargeCorrection
Created from the TPCFastTransform using: std::vector<char> buff; // can be also pmr::vector from DPL make(..) const o2::gpu::TPCFastTransformPOD& pod = o2::gpu::TPCFastTransformPOD::create(buff, tpc_fast_transform); The buff vector will be expanded during creation and can be sent over DPL. On the receiving side, it should be cast as: const auto& podTransform = o2::gpu::TPCFastTransformPOD::get(pc.inputs().get<gsl::span<char>>(ref)); No initialization is needed, the transform methods (at the moment all methods of TPCFastSpaceChargeCorrection are implemented) can be directly queried from the object received over sh.memory. Add perfmormance test for pod map
1 parent 1511172 commit 4a2eae2

File tree

6 files changed

+1057
-0
lines changed

6 files changed

+1057
-0
lines changed

GPU/TPCFastTransformation/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ set(SRCS
2525
TPCFastSpaceChargeCorrection.cxx
2626
TPCFastSpaceChargeCorrectionMap.cxx
2727
TPCFastTransform.cxx
28+
TPCFastTransformPOD.cxx
2829
CorrectionMapsHelper.cxx
2930
)
3031

GPU/TPCFastTransformation/TPCFastSpaceChargeCorrection.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,8 @@ namespace gpu
3737
///
3838
class TPCFastSpaceChargeCorrection : public FlatObject
3939
{
40+
friend class TPCFastTransformPOD;
41+
4042
public:
4143
///
4244
/// \brief The struct contains necessary info for TPC padrow

GPU/TPCFastTransformation/TPCFastTransform.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -250,6 +250,9 @@ class TPCFastTransform : public FlatObject
250250
/// Return TOF correction (vdrift / C)
251251
GPUd() float getTOFCorr() const { return mLdriftCorr; }
252252

253+
/// Return nominal PV Z position correction (vdrift / C)
254+
GPUd() float getPrimVtxZ() const { return mPrimVtxZ; }
255+
253256
/// Return map lumi
254257
GPUd() float getLumi() const { return mLumi; }
255258

Lines changed: 243 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,243 @@
1+
// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
2+
// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
3+
// All rights not expressly granted are reserved.
4+
//
5+
// This software is distributed under the terms of the GNU General Public
6+
// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
7+
//
8+
// In applying this license CERN does not waive the privileges and immunities
9+
// granted to it by virtue of its status as an Intergovernmental Organization
10+
// or submit itself to any jurisdiction.
11+
12+
/// \file TPCFastTransformPOD.cxx
13+
/// \brief Implementation of POD correction map
14+
///
15+
/// \author ruben.shahoayn@cern.ch
16+
17+
#include "TPCFastTransformPOD.h"
18+
#include "GPUDebugStreamer.h"
19+
#if !defined(GPUCA_GPUCODE)
20+
#include <TRandom.h>
21+
#endif
22+
23+
namespace GPUCA_NAMESPACE
24+
{
25+
namespace gpu
26+
{
27+
28+
#if !defined(GPUCA_GPUCODE)
29+
30+
size_t TPCFastTransformPOD::estimateSize(const TPCFastSpaceChargeCorrection& origCorr)
31+
{
32+
// estimate size of own buffer
33+
const size_t selfSizeFix = sizeof(TPCFastTransformPOD);
34+
size_t nextDynOffs = alignOffset(selfSizeFix);
35+
nextDynOffs = alignOffset(nextDynOffs + origCorr.mNumberOfScenarios * sizeof(size_t)); // spline scenarios start here
36+
// space for splines
37+
for (int isc = 0; isc < origCorr.mNumberOfScenarios; isc++) {
38+
const auto& spline = origCorr.mScenarioPtr[isc];
39+
nextDynOffs = alignOffset(nextDynOffs + sizeof(spline));
40+
}
41+
// space for splines data
42+
for (int is = 0; is < 3; is++) {
43+
for (int slice = 0; slice < origCorr.mGeo.getNumberOfSlices(); slice++) {
44+
for (int row = 0; row < NROWS; row++) {
45+
const auto& spline = origCorr.getSpline(slice, row);
46+
int nPar = spline.getNumberOfParameters();
47+
if (is == 1) {
48+
nPar = nPar / 3;
49+
}
50+
if (is == 2) {
51+
nPar = nPar * 2 / 3;
52+
}
53+
nextDynOffs += nPar * sizeof(float);
54+
}
55+
}
56+
}
57+
nextDynOffs = alignOffset(nextDynOffs);
58+
return nextDynOffs;
59+
}
60+
61+
TPCFastTransformPOD& TPCFastTransformPOD::create(char* buff, size_t buffSize, const TPCFastSpaceChargeCorrection& origCorr)
62+
{
63+
// instantiate object to already created buffer of the right size
64+
assert(buffSize > sizeof(TPCFastTransformPOD));
65+
auto& podMap = getNonConst(buff);
66+
podMap.mApplyCorrections = true; // by default always apply corrections
67+
68+
// copy fixed size data --- start
69+
podMap.mNumberOfScenarios = origCorr.mNumberOfScenarios;
70+
std::memcpy(&podMap.mGeo, &origCorr.mGeo, sizeof(TPCFastTransformGeo)); // copy geometry (fixed size)
71+
for (int row = 0; row < NROWS; row++) {
72+
podMap.mRowInfo[row] = origCorr.getRowInfo(row); // dataOffsetBytes will be modified later
73+
}
74+
for (int slice = 0; slice < TPCFastTransformGeo::getNumberOfSlices(); slice++) {
75+
podMap.mSliceInfo[slice] = origCorr.getSliceInfo(slice);
76+
for (int row = 0; row < NROWS; row++) {
77+
podMap.mSliceRowInfo[NROWS * slice + row] = origCorr.getSliceRowInfo(slice, row);
78+
}
79+
}
80+
podMap.mInterpolationSafetyMargin = origCorr.fInterpolationSafetyMargin;
81+
podMap.mTimeStamp = origCorr.mTimeStamp;
82+
//
83+
// init data members coming from the TPCFastTrasform
84+
podMap.mVdrift = 0.;
85+
podMap.mT0 = 0.;
86+
podMap.mVdriftCorrY = 0.;
87+
podMap.mLdriftCorr = 0.;
88+
podMap.mTOFcorr = 0.;
89+
podMap.mPrimVtxZ = 0.;
90+
// copy fixed size data --- end
91+
92+
size_t nextDynOffs = alignOffset(sizeof(TPCFastTransformPOD));
93+
// copy slice scenarios
94+
podMap.mOffsScenariosOffsets = nextDynOffs; // spline scenarios offsets start here
95+
LOGP(debug, "Set mOffsScenariosOffsets = {}", podMap.mOffsScenariosOffsets);
96+
nextDynOffs = alignOffset(nextDynOffs + podMap.mNumberOfScenarios * sizeof(size_t)); // spline scenarios start here
97+
98+
// copy spline objects
99+
size_t* scenOffs = reinterpret_cast<size_t*>(buff + podMap.mOffsScenariosOffsets);
100+
for (int isc = 0; isc < origCorr.mNumberOfScenarios; isc++) {
101+
scenOffs[isc] = nextDynOffs;
102+
const auto& spline = origCorr.mScenarioPtr[isc];
103+
if (buffSize < nextDynOffs + sizeof(spline)) {
104+
throw std::runtime_error(fmt::format("attempt to copy {} bytes for spline for scenario {} to {}, overflowing the buffer of size {}", sizeof(spline), isc, nextDynOffs + sizeof(spline), buffSize));
105+
}
106+
std::memcpy(buff + scenOffs[isc], &spline, sizeof(spline));
107+
nextDynOffs = alignOffset(nextDynOffs + sizeof(spline));
108+
LOGP(debug, "Copy {} bytes for spline scenario {} (ptr:{}) to offsset {}", sizeof(spline), isc, (void*)&spline, scenOffs[isc]);
109+
}
110+
111+
// copy splines data
112+
for (int is = 0; is < 3; is++) {
113+
float* data = reinterpret_cast<float*>(buff + nextDynOffs);
114+
LOGP(debug, "splinID={} start offset {} -> {}", is, nextDynOffs, (void*)data);
115+
for (int slice = 0; slice < origCorr.mGeo.getNumberOfSlices(); slice++) {
116+
podMap.mSplineDataOffsets[slice][is] = nextDynOffs;
117+
size_t rowDataOffs = 0;
118+
for (int row = 0; row < NROWS; row++) {
119+
const auto& spline = origCorr.getSpline(slice, row);
120+
const float* dataOr = origCorr.getSplineData(slice, row, is);
121+
int nPar = spline.getNumberOfParameters();
122+
if (is == 1) {
123+
nPar = nPar / 3;
124+
}
125+
if (is == 2) {
126+
nPar = nPar * 2 / 3;
127+
}
128+
LOGP(debug, "Copying {} floats for spline{} of slice:{} row:{} to offset {}", nPar, is, slice, row, nextDynOffs);
129+
size_t nbcopy = nPar * sizeof(float);
130+
if (buffSize < nextDynOffs + nbcopy) {
131+
throw std::runtime_error(fmt::format("attempt to copy {} bytes of data for spline{} of slice{}/row{} to {}, overflowing the buffer of size {}", nbcopy, is, slice, row, nextDynOffs, buffSize));
132+
}
133+
std::memcpy(data, dataOr, nbcopy);
134+
podMap.getRowInfo(row).dataOffsetBytes[is] = rowDataOffs;
135+
rowDataOffs += nbcopy;
136+
data += nPar;
137+
nextDynOffs += nbcopy;
138+
}
139+
}
140+
}
141+
podMap.mTotalSize = alignOffset(nextDynOffs);
142+
if (buffSize != podMap.mTotalSize) {
143+
throw std::runtime_error(fmt::format("Estimated buffer size {} differs from filled one {}", buffSize, podMap.mTotalSize));
144+
}
145+
return podMap;
146+
}
147+
148+
TPCFastTransformPOD& TPCFastTransformPOD::create(char* buff, size_t buffSize, const TPCFastTransform& src)
149+
{
150+
// instantiate objec to already created buffer of the right size
151+
auto& podMap = create(buff, buffSize, src.getCorrection());
152+
// set data members of TPCFastTransform
153+
podMap.mVdrift = src.getVDrift();
154+
podMap.mT0 = src.getT0();
155+
podMap.mVdriftCorrY = src.getVdriftCorrY();
156+
podMap.mLdriftCorr = src.getLdriftCorr();
157+
podMap.mTOFcorr = src.getTOFCorr();
158+
podMap.mPrimVtxZ = src.getPrimVtxZ();
159+
// copy fixed size data --- end
160+
return podMap;
161+
}
162+
163+
bool TPCFastTransformPOD::test(const TPCFastSpaceChargeCorrection& origCorr, int npoints) const
164+
{
165+
if (npoints < 1) {
166+
return false;
167+
}
168+
std::vector<unsigned char> slice, row;
169+
std::vector<float> u, v, dxO, duO, dvO, dxP, duP, dvP, corrXO, corrXP, nomUO, nomVO, nomUP, nomVP;
170+
slice.reserve(npoints);
171+
row.reserve(npoints);
172+
u.reserve(npoints);
173+
v.reserve(npoints);
174+
dxO.resize(npoints);
175+
duO.resize(npoints);
176+
dvO.resize(npoints);
177+
corrXO.resize(npoints);
178+
nomUO.resize(npoints);
179+
nomVO.resize(npoints);
180+
dxP.resize(npoints);
181+
duP.resize(npoints);
182+
dvP.resize(npoints);
183+
corrXP.resize(npoints);
184+
nomUP.resize(npoints);
185+
nomVP.resize(npoints);
186+
187+
for (int i = 0; i < npoints; i++) {
188+
slice.push_back(gRandom->Integer(NSLICES));
189+
row.push_back(gRandom->Integer(NROWS));
190+
u.push_back(gRandom->Rndm() * 15);
191+
v.push_back(gRandom->Rndm() * 200);
192+
}
193+
long origStart[3], origEnd[3], thisStart[3], thisEnd[3];
194+
origStart[0] = std::chrono::time_point_cast<std::chrono::microseconds>(std::chrono::system_clock::now()).time_since_epoch().count();
195+
for (int i = 0; i < npoints; i++) {
196+
origCorr.getCorrection(slice[i], row[i], u[i], v[i], dxO[i], duO[i], dvO[i]);
197+
}
198+
origEnd[0] = origStart[1] = std::chrono::time_point_cast<std::chrono::microseconds>(std::chrono::system_clock::now()).time_since_epoch().count();
199+
for (int i = 0; i < npoints; i++) {
200+
origCorr.getCorrectionInvCorrectedX(slice[i], row[i], u[i], v[i], corrXO[i]);
201+
}
202+
origEnd[1] = origStart[2] = std::chrono::time_point_cast<std::chrono::microseconds>(std::chrono::system_clock::now()).time_since_epoch().count();
203+
for (int i = 0; i < npoints; i++) {
204+
origCorr.getCorrectionInvUV(slice[i], row[i], u[i], v[i], nomUO[i], nomVO[i]);
205+
}
206+
origEnd[2] = thisStart[0] = std::chrono::time_point_cast<std::chrono::microseconds>(std::chrono::system_clock::now()).time_since_epoch().count();
207+
for (int i = 0; i < npoints; i++) {
208+
this->getCorrection(slice[i], row[i], u[i], v[i], dxP[i], duP[i], dvP[i]);
209+
}
210+
thisEnd[0] = thisStart[1] = std::chrono::time_point_cast<std::chrono::microseconds>(std::chrono::system_clock::now()).time_since_epoch().count();
211+
for (int i = 0; i < npoints; i++) {
212+
this->getCorrectionInvCorrectedX(slice[i], row[i], u[i], v[i], corrXP[i]);
213+
}
214+
thisEnd[1] = thisStart[2] = std::chrono::time_point_cast<std::chrono::microseconds>(std::chrono::system_clock::now()).time_since_epoch().count();
215+
for (int i = 0; i < npoints; i++) {
216+
this->getCorrectionInvUV(slice[i], row[i], u[i], v[i], nomUP[i], nomVP[i]);
217+
}
218+
thisEnd[2] = std::chrono::time_point_cast<std::chrono::microseconds>(std::chrono::system_clock::now()).time_since_epoch().count();
219+
//
220+
size_t ndiff[3] = {};
221+
for (int i = 0; i < npoints; i++) {
222+
if (dxO[i] != dxP[i] || duO[i] != duP[i] || dvO[i] != dvP[i]) {
223+
ndiff[0]++;
224+
}
225+
if (corrXO[i] != corrXP[i]) {
226+
ndiff[1]++;
227+
}
228+
if (nomUO[i] != nomUP[i] || nomVO[i] != nomVP[i]) {
229+
ndiff[2]++;
230+
}
231+
}
232+
//
233+
LOGP(info, " (ns per call) original this Nmissmatch");
234+
LOGP(info, "getCorrection {:.3e} {:.3e} {}", double(origEnd[0] - origStart[0]) / npoints * 1000., double(thisEnd[0] - thisStart[0]) / npoints * 1000., ndiff[0]);
235+
LOGP(info, "getCorrectionInvCorrectedX {:.3e} {:.3e} {}", double(origEnd[1] - origStart[1]) / npoints * 1000., double(thisEnd[1] - thisStart[1]) / npoints * 1000., ndiff[1]);
236+
LOGP(info, "getCorrectionInvUV {:.3e} {:.3e} {}", double(origEnd[2] - origStart[2]) / npoints * 1000., double(thisEnd[2] - thisStart[2]) / npoints * 1000., ndiff[2]);
237+
return ndiff[0] == 0 && ndiff[1] == 0 && ndiff[2] == 0;
238+
}
239+
240+
#endif
241+
242+
} // namespace gpu
243+
} // namespace GPUCA_NAMESPACE

0 commit comments

Comments
 (0)