Skip to content

Commit 3233a5b

Browse files
committed
GPU: Better hiding of internal structures
1 parent 348dada commit 3233a5b

File tree

13 files changed

+113
-54
lines changed

13 files changed

+113
-54
lines changed

GPU/GPUTracking/Base/GPUReconstruction.cxx

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,8 @@
4646

4747
namespace o2::gpu
4848
{
49+
namespace // anonymous
50+
{
4951
struct GPUReconstructionPipelineQueue {
5052
uint32_t op = 0; // For now, 0 = process, 1 = terminate
5153
GPUChain* chain = nullptr;
@@ -54,6 +56,7 @@ struct GPUReconstructionPipelineQueue {
5456
bool done = false;
5557
int32_t retVal = 0;
5658
};
59+
} // namespace
5760

5861
struct GPUReconstructionPipelineContext {
5962
std::queue<GPUReconstructionPipelineQueue*> queue;

GPU/GPUTracking/Base/GPUReconstructionConvert.cxx

Lines changed: 17 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,8 @@ int32_t GPUReconstructionConvert::GetMaxTimeBin(const GPUTrackingInOutZS& zspage
174174
// ------------------------------------------------- TPC ZS -------------------------------------------------
175175

176176
#ifdef GPUCA_TPC_GEOMETRY_O2
177+
namespace o2::gpu
178+
{
177179
namespace // anonymous
178180
{
179181

@@ -1306,19 +1308,9 @@ size_t zsEncoderRun<T>::compare(std::vector<zsPage>* buffer, std::vector<o2::tpc
13061308
}
13071309

13081310
} // anonymous namespace
1311+
} // namespace o2::gpu
13091312
#endif // GPUCA_TPC_GEOMETRY_O2
13101313

1311-
namespace o2::gpu::internal
1312-
{
1313-
struct tmpReductionResult {
1314-
uint32_t totalPages = 0;
1315-
size_t totalSize = 0;
1316-
size_t nErrors = 0;
1317-
size_t digitsInput = 0;
1318-
size_t digitsEncoded = 0;
1319-
};
1320-
} // namespace o2::gpu::internal
1321-
13221314
template <class S>
13231315
void GPUReconstructionConvert::RunZSEncoder(const S& in, std::unique_ptr<uint64_t[]>* outBuffer, uint32_t* outSizes, o2::raw::RawFileWriter* raw, const o2::InteractionRecord* ir, const GPUParam& param, int32_t version, bool verify, float threshold, bool padding, std::function<void(std::vector<o2::tpc::Digit>&)> digitsFilter)
13241316
{
@@ -1329,7 +1321,14 @@ void GPUReconstructionConvert::RunZSEncoder(const S& in, std::unique_ptr<uint64_
13291321
}
13301322
#ifdef GPUCA_TPC_GEOMETRY_O2
13311323
std::vector<zsPage> buffer[NSECTORS][GPUTrackingInOutZS::NENDPOINTS];
1332-
auto reduced = tbb::parallel_reduce(tbb::blocked_range<uint32_t>(0, NSECTORS), o2::gpu::internal::tmpReductionResult(), [&](const auto range, auto red) {
1324+
struct tmpReductionResult {
1325+
uint32_t totalPages = 0;
1326+
size_t totalSize = 0;
1327+
size_t nErrors = 0;
1328+
size_t digitsInput = 0;
1329+
size_t digitsEncoded = 0;
1330+
};
1331+
auto reduced = tbb::parallel_reduce(tbb::blocked_range<uint32_t>(0, NSECTORS), tmpReductionResult(), [&](const auto range, auto red) {
13331332
for (uint32_t i = range.begin(); i < range.end(); i++) {
13341333
std::vector<o2::tpc::Digit> tmpBuffer;
13351334
red.digitsInput += ZSEncoderGetNDigits(in, i);
@@ -1455,6 +1454,8 @@ void GPUReconstructionConvert::RunZSFilter(std::unique_ptr<o2::tpc::Digit[]>* bu
14551454
}
14561455

14571456
#ifdef GPUCA_O2_LIB
1457+
namespace o2::gpu::internal
1458+
{
14581459
template <class T>
14591460
static inline auto GetDecoder_internal(const GPUParam* param, int32_t version)
14601461
{
@@ -1480,15 +1481,16 @@ static inline auto GetDecoder_internal(const GPUParam* param, int32_t version)
14801481
enc->decodePage(outBuffer, (const zsPage*)page, endpoint, firstTfOrbit, triggerBC);
14811482
};
14821483
}
1484+
} // namespace o2::gpu::internal
14831485

14841486
std::function<void(std::vector<o2::tpc::Digit>&, const void*, uint32_t, uint32_t)> GPUReconstructionConvert::GetDecoder(int32_t version, const GPUParam* param)
14851487
{
14861488
if (version >= o2::tpc::ZSVersion::ZSVersionRowBased10BitADC && version <= o2::tpc::ZSVersion::ZSVersionRowBased12BitADC) {
1487-
return GetDecoder_internal<zsEncoderRow>(param, version);
1489+
return o2::gpu::internal::GetDecoder_internal<zsEncoderRow>(param, version);
14881490
} else if (version == o2::tpc::ZSVersion::ZSVersionLinkBasedWithMeta) {
1489-
return GetDecoder_internal<zsEncoderImprovedLinkBased>(param, version);
1491+
return o2::gpu::internal::GetDecoder_internal<zsEncoderImprovedLinkBased>(param, version);
14901492
} else if (version >= o2::tpc::ZSVersion::ZSVersionDenseLinkBased && version <= o2::tpc::ZSVersion::ZSVersionDenseLinkBasedV2) {
1491-
return GetDecoder_internal<zsEncoderDenseLinkBased>(param, version);
1493+
return o2::gpu::internal::GetDecoder_internal<zsEncoderDenseLinkBased>(param, version);
14921494
} else {
14931495
throw std::runtime_error("Invalid ZS version "s + std::to_string(version) + ", cannot create decoder"s);
14941496
}

GPU/GPUTracking/Base/GPUReconstructionProcessing.cxx

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,13 @@ void GPUReconstructionProcessing::runParallelOuterLoop(bool doGPU, uint32_t nThr
5050
});
5151
}
5252

53+
namespace o2::gpu
54+
{
55+
namespace // anonymous
56+
{
5357
static std::atomic_flag timerFlag = ATOMIC_FLAG_INIT; // TODO: Should be a class member not global, but cannot be moved to header due to ROOT limitation
58+
} // anonymous namespace
59+
} // namespace o2::gpu
5460

5561
GPUReconstructionProcessing::timerMeta* GPUReconstructionProcessing::insertTimer(uint32_t id, std::string&& name, int32_t J, int32_t num, int32_t type, RecoStep step)
5662
{

GPU/GPUTracking/DataCompression/GPUTPCClusterStatistics.cxx

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,9 @@
2323
using namespace o2::gpu;
2424

2525
// Small helper to compute Huffman probabilities
26-
namespace
26+
namespace o2::gpu
27+
{
28+
namespace // anonymous
2729
{
2830
typedef std::vector<bool> HuffCode;
2931
typedef std::map<uint32_t, HuffCode> HuffCodeMap;
@@ -101,7 +103,8 @@ void GenerateCodes(const INode* node, const HuffCode& prefix, HuffCodeMap& outCo
101103
GenerateCodes(in->right, rightPrefix, outCodes);
102104
}
103105
}
104-
} // namespace
106+
} // anonymous namespace
107+
} // namespace o2::gpu
105108

106109
void GPUTPCClusterStatistics::RunStatistics(const o2::tpc::ClusterNativeAccess* clustersNative, const o2::tpc::CompressedClusters* clustersCompressed, const GPUParam& param)
107110
{

GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx

Lines changed: 38 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -58,22 +58,26 @@
5858
#include "SimulationDataFormat/MCCompLabel.h"
5959
#endif
6060

61+
namespace o2::gpu::internal
62+
{
63+
}
6164
using namespace o2::gpu;
65+
using namespace o2::gpu::internal;
6266
using namespace o2::tpc;
6367
using namespace gputpcgmmergertypes;
6468

6569
static constexpr int32_t kMaxParts = 400;
6670
static constexpr int32_t kMaxClusters = GPUCA_MERGER_MAX_TRACK_CLUSTERS;
6771

68-
namespace o2::gpu
72+
namespace o2::gpu::internal
6973
{
7074
struct MergeLooperParam {
7175
float refz;
7276
float x;
7377
float y;
7478
uint32_t id;
7579
};
76-
} // namespace o2::gpu
80+
} // namespace o2::gpu::internal
7781

7882
#ifndef GPUCA_GPUCODE
7983

@@ -741,6 +745,10 @@ GPUd() void GPUTPCGMMerger::MergeBorderTracks<1>(int32_t nBlocks, int32_t nThrea
741745
}
742746

743747
#if defined(GPUCA_SPECIALIZE_THRUST_SORTS) && !defined(GPUCA_GPUCODE_COMPILEKERNELS) // Specialize MergeBorderTracks<3>
748+
namespace o2::gpu::internal
749+
{
750+
namespace // anonymous
751+
{
744752
struct MergeBorderTracks_compMax {
745753
GPUd() bool operator()(const GPUTPCGMBorderRange& a, const GPUTPCGMBorderRange& b)
746754
{
@@ -761,6 +769,8 @@ struct MergeBorderTracks_compMin {
761769
#endif
762770
}
763771
};
772+
} // anonymous namespace
773+
} // namespace o2::gpu::internal
764774

765775
template <>
766776
inline void GPUCA_KRNL_BACKEND_CLASS::runKernelBackendInternal<GPUTPCGMMergerMergeBorders, 3>(const krnlSetupTime& _xyz, GPUTPCGMBorderRange* const& range, int32_t const& N, int32_t const& cmpMax)
@@ -1436,6 +1446,10 @@ GPUd() void GPUTPCGMMerger::MergeCE(int32_t nBlocks, int32_t nThreads, int32_t i
14361446
// for (int32_t i = 0;i < mMemory->nOutputTracks;i++) {if (mOutputTracks[i].CCE() == false) {mOutputTracks[i].SetNClusters(0);mOutputTracks[i].SetOK(false);}} //Remove all non-CE tracks
14371447
}
14381448

1449+
namespace o2::gpu::internal
1450+
{
1451+
namespace // anonymous
1452+
{
14391453
struct GPUTPCGMMerger_CompareClusterIdsLooper {
14401454
struct clcomparestruct {
14411455
uint8_t leg;
@@ -1489,6 +1503,8 @@ struct GPUTPCGMMerger_CompareClusterIds {
14891503
#endif
14901504
}
14911505
};
1506+
} // anonymous namespace
1507+
} // namespace o2::gpu::internal
14921508

14931509
GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread)
14941510
{
@@ -1803,6 +1819,10 @@ GPUd() void GPUTPCGMMerger::PrepareClustersForFit0(int32_t nBlocks, int32_t nThr
18031819
}
18041820

18051821
#if defined(GPUCA_SPECIALIZE_THRUST_SORTS) && !defined(GPUCA_GPUCODE_COMPILEKERNELS) // Specialize GPUTPCGMMergerSortTracks and GPUTPCGMMergerSortTracksQPt
1822+
namespace o2::gpu::internal
1823+
{
1824+
namespace // anonymous
1825+
{
18061826
struct GPUTPCGMMergerSortTracks_comp {
18071827
const GPUTPCGMMergedTrack* const mCmp;
18081828
GPUhd() GPUTPCGMMergerSortTracks_comp(GPUTPCGMMergedTrack* cmp) : mCmp(cmp) {}
@@ -1833,14 +1853,6 @@ struct GPUTPCGMMergerSortTracks_comp {
18331853
}
18341854
};
18351855

1836-
template <>
1837-
inline void GPUCA_KRNL_BACKEND_CLASS::runKernelBackendInternal<GPUTPCGMMergerSortTracks, 0>(const krnlSetupTime& _xyz)
1838-
{
1839-
thrust::device_ptr<uint32_t> trackSort((uint32_t*)mProcessorsShadow->tpcMerger.TrackOrderProcess());
1840-
ThrustVolatileAsyncAllocator alloc(this);
1841-
thrust::sort(GPUCA_THRUST_NAMESPACE::par(alloc).on(mInternals->Streams[_xyz.x.stream]), trackSort, trackSort + processors()->tpcMerger.NOutputTracks(), GPUTPCGMMergerSortTracks_comp(mProcessorsShadow->tpcMerger.OutputTracks()));
1842-
}
1843-
18441856
struct GPUTPCGMMergerSortTracksQPt_comp {
18451857
const GPUTPCGMMergedTrack* const mCmp;
18461858
GPUhd() GPUTPCGMMergerSortTracksQPt_comp(GPUTPCGMMergedTrack* cmp) : mCmp(cmp) {}
@@ -1861,6 +1873,16 @@ struct GPUTPCGMMergerSortTracksQPt_comp {
18611873
#endif
18621874
}
18631875
};
1876+
} // anonymous namespace
1877+
} // namespace o2::gpu::internal
1878+
1879+
template <>
1880+
inline void GPUCA_KRNL_BACKEND_CLASS::runKernelBackendInternal<GPUTPCGMMergerSortTracks, 0>(const krnlSetupTime& _xyz)
1881+
{
1882+
thrust::device_ptr<uint32_t> trackSort((uint32_t*)mProcessorsShadow->tpcMerger.TrackOrderProcess());
1883+
ThrustVolatileAsyncAllocator alloc(this);
1884+
thrust::sort(GPUCA_THRUST_NAMESPACE::par(alloc).on(mInternals->Streams[_xyz.x.stream]), trackSort, trackSort + processors()->tpcMerger.NOutputTracks(), GPUTPCGMMergerSortTracks_comp(mProcessorsShadow->tpcMerger.OutputTracks()));
1885+
}
18641886

18651887
template <>
18661888
inline void GPUCA_KRNL_BACKEND_CLASS::runKernelBackendInternal<GPUTPCGMMergerSortTracksQPt, 0>(const krnlSetupTime& _xyz)
@@ -2074,12 +2096,18 @@ GPUd() void GPUTPCGMMerger::MergeLoopersSort(int32_t nBlocks, int32_t nThreads,
20742096
}
20752097

20762098
#if defined(GPUCA_SPECIALIZE_THRUST_SORTS) && !defined(GPUCA_GPUCODE_COMPILEKERNELS) // Specialize GPUTPCGMMergerSortTracks and GPUTPCGMMergerSortTracksQPt
2099+
namespace o2::gpu::internal
2100+
{
2101+
namespace // anonymous
2102+
{
20772103
struct GPUTPCGMMergerMergeLoopers_comp {
20782104
GPUd() bool operator()(const MergeLooperParam& a, const MergeLooperParam& b)
20792105
{
20802106
return CAMath::Abs(a.refz) < CAMath::Abs(b.refz);
20812107
}
20822108
};
2109+
} // anonymous namespace
2110+
} // namespace o2::gpu::internal
20832111

20842112
template <>
20852113
inline void GPUCA_KRNL_BACKEND_CLASS::runKernelBackendInternal<GPUTPCGMMergerMergeLoopers, 1>(const krnlSetupTime& _xyz)

GPU/GPUTracking/Merger/GPUTPCGMMerger.h

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,10 @@ class GPUTPCTracker;
4848
class GPUChainTracking;
4949
class GPUTPCGMPolynomialField;
5050
struct GPUTPCGMLoopData;
51+
namespace internal
52+
{
5153
struct MergeLooperParam;
54+
} // namespace internal
5255

5356
/**
5457
* @class GPUTPCGMMerger
@@ -126,7 +129,7 @@ class GPUTPCGMMerger : public GPUProcessor
126129
GPUhdi() uint2* ClusRefTmp() { return mClusRefTmp; }
127130
GPUhdi() uint32_t* TrackSort() { return mTrackSort; }
128131
GPUhdi() tmpSort* TrackSortO2() { return mTrackSortO2; }
129-
GPUhdi() MergeLooperParam* LooperCandidates() { return mLooperCandidates; }
132+
GPUhdi() internal::MergeLooperParam* LooperCandidates() { return mLooperCandidates; }
130133
GPUhdi() GPUAtomic(uint32_t) * SharedCount() { return mSharedCount; }
131134
GPUhdi() gputpcgmmergertypes::GPUTPCGMBorderRange* BorderRange(int32_t i) { return mBorderRange[i]; }
132135
GPUhdi() const gputpcgmmergertypes::GPUTPCGMBorderRange* BorderRange(int32_t i) const { return mBorderRange[i]; }
@@ -267,7 +270,7 @@ class GPUTPCGMMerger : public GPUProcessor
267270
o2::tpc::TrackTPC* mOutputTracksTPCO2;
268271
uint32_t* mOutputClusRefsTPCO2;
269272
o2::MCCompLabel* mOutputTracksTPCO2MC;
270-
MergeLooperParam* mLooperCandidates;
273+
internal::MergeLooperParam* mLooperCandidates;
271274

272275
uint32_t* mTrackOrderAttach;
273276
uint32_t* mTrackOrderProcess;

GPU/GPUTracking/Refit/GPUTrackingRefit.cxx

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,9 @@ void GPUTrackingRefitProcessor::SetMaxData(const GPUTrackingInOutPointers& io)
6363
}
6464
#endif
6565

66-
namespace
66+
namespace o2::gpu::internal
67+
{
68+
namespace // anonymous
6769
{
6870
template <class T>
6971
struct refitTrackTypes;
@@ -76,6 +78,7 @@ struct refitTrackTypes<TrackParCov> {
7678
using propagator = const Propagator*;
7779
};
7880
} // anonymous namespace
81+
} // namespace o2::gpu::internal
7982

8083
template <>
8184
GPUd() void GPUTrackingRefit::initProp<GPUgeneric() GPUTPCGMPropagator>(GPUTPCGMPropagator& prop) // FIXME: GPUgeneric() needed to make the clang spirv output link correctly
@@ -210,10 +213,10 @@ template <class T, class S>
210213
GPUd() int32_t GPUTrackingRefit::RefitTrack(T& trkX, bool outward, bool resetCov)
211214
{
212215
CADEBUG(int32_t ii; printf("\nRefitting track\n"));
213-
typename refitTrackTypes<S>::propagator prop;
216+
typename internal::refitTrackTypes<S>::propagator prop;
214217
S trk;
215218
float TrackParCovChi2 = 0.f;
216-
convertTrack<S, T, typename refitTrackTypes<S>::propagator>(trk, trkX, prop, &TrackParCovChi2);
219+
convertTrack<S, T, typename internal::refitTrackTypes<S>::propagator>(trk, trkX, prop, &TrackParCovChi2);
217220
int32_t begin = 0, count;
218221
float tOffset;
219222
if constexpr (std::is_same_v<T, GPUTPCGMMergedTrack>) {
@@ -417,7 +420,7 @@ GPUd() int32_t GPUTrackingRefit::RefitTrack(T& trkX, bool outward, bool resetCov
417420
static_assert("Invalid template");
418421
}
419422

420-
convertTrack<T, S, typename refitTrackTypes<S>::propagator>(trkX, trk, prop, &TrackParCovChi2);
423+
convertTrack<T, S, typename internal::refitTrackTypes<S>::propagator>(trkX, trk, prop, &TrackParCovChi2);
421424
return nFitted;
422425
}
423426

GPU/GPUTracking/display/frontend/GPUDisplayFrontendWayland.cxx

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -39,16 +39,8 @@
3939

4040
using namespace o2::gpu;
4141

42-
GPUDisplayFrontendWayland::GPUDisplayFrontendWayland()
43-
{
44-
mFrontendType = TYPE_WAYLAND;
45-
mFrontendName = "Wayland";
46-
}
47-
48-
void GPUDisplayFrontendWayland::OpenGLPrint(const char* s, float x, float y, float r, float g, float b, float a, bool fromBotton)
42+
namespace o2::gpu::internal
4943
{
50-
}
51-
5244
template <class T, class... Args>
5345
struct CCallWrapper {
5446
std::function<T(Args...)> func;
@@ -58,6 +50,17 @@ struct CCallWrapper {
5850
return funcwrap->func(std::forward<Args>(args)...);
5951
}
6052
};
53+
} // namespace o2::gpu::internal
54+
55+
GPUDisplayFrontendWayland::GPUDisplayFrontendWayland()
56+
{
57+
mFrontendType = TYPE_WAYLAND;
58+
mFrontendName = "Wayland";
59+
}
60+
61+
void GPUDisplayFrontendWayland::OpenGLPrint(const char* s, float x, float y, float r, float g, float b, float a, bool fromBotton)
62+
{
63+
}
6164

6265
int32_t GPUDisplayFrontendWayland::GetKey(uint32_t key, uint32_t state)
6366
{
@@ -283,7 +286,7 @@ int32_t GPUDisplayFrontendWayland::FrontendMain()
283286
wl_keyboard_add_listener(mKeyboard, &keyboard_listener, this);
284287
}
285288
};
286-
auto seat_capabilities_c = CCallWrapper<void, wl_seat*, uint32_t>{[seat_capabilities](wl_seat* seat, uint32_t capabilities) { seat_capabilities(seat, capabilities); }};
289+
auto seat_capabilities_c = internal::CCallWrapper<void, wl_seat*, uint32_t>{[seat_capabilities](wl_seat* seat, uint32_t capabilities) { seat_capabilities(seat, capabilities); }};
287290

288291
auto seat_name = [](void* data, struct wl_seat* seat, const char* name) {
289292
if (((GPUDisplayFrontendWayland*)data)->mDisplay->param()->par.debugLevel >= 2) {
@@ -317,7 +320,7 @@ int32_t GPUDisplayFrontendWayland::FrontendMain()
317320
}
318321
};
319322

320-
auto registry_global_c = CCallWrapper<void, wl_registry*, uint32_t, const char*, uint32_t>{[registry_global](wl_registry* registry, uint32_t name, const char* interface, uint32_t version) { registry_global(registry, name, interface, version); }};
323+
auto registry_global_c = internal::CCallWrapper<void, wl_registry*, uint32_t, const char*, uint32_t>{[registry_global](wl_registry* registry, uint32_t name, const char* interface, uint32_t version) { registry_global(registry, name, interface, version); }};
321324
auto registry_global_remove = [](void* a, wl_registry* b, uint32_t c) {};
322325
const wl_registry_listener registry_listener = {.global = &registry_global_c.callback, .global_remove = registry_global_remove};
323326

0 commit comments

Comments
 (0)