Skip to content

Commit dc7e8e9

Browse files
davidrohrktf
authored andcommitted
GPU: Remove support for host helper threads (no longer used)
1 parent 15a7e2f commit dc7e8e9

File tree

11 files changed

+8
-300
lines changed

11 files changed

+8
-300
lines changed

Common/Topologies/o2prototype_topology.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ The following parameters need adjustment when extending the FLP-EPN configuratio
7474
</decltask>
7575

7676
<decltask id="tracker">
77-
<exe reachable="true">$ALICEO2_INSTALL_DIR/bin/aliceHLTWrapper Tracker_%collectionIndex%_%taskIndex% 1 --dds --poll-period 100 --input type=pull,size=5000,method=connect,property=EPNReceiverOutputAddress,count=1 --output type=push,size=500,method=bind,property=TrackingOutputAddress,min-port=48000 --library libAliHLTTPC.so --component TPCCATracker --run 167808 --parameter '-GlobalTracking -allowGPU -GPUHelperThreads 4 -loglevel=0x7c'</exe>
77+
<exe reachable="true">$ALICEO2_INSTALL_DIR/bin/aliceHLTWrapper Tracker_%collectionIndex%_%taskIndex% 1 --dds --poll-period 100 --input type=pull,size=5000,method=connect,property=EPNReceiverOutputAddress,count=1 --output type=push,size=500,method=bind,property=TrackingOutputAddress,min-port=48000 --library libAliHLTTPC.so --component TPCCATracker --run 167808 --parameter '-GlobalTracking -allowGPU -loglevel=0x7c'</exe>
7878
<!-- <requirement></requirement> -->
7979
<properties>
8080
<id access="read">EPNReceiverOutputAddress</id>

GPU/GPUTracking/Base/GPUReconstruction.cxx

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -278,9 +278,6 @@ int32_t GPUReconstruction::InitPhaseBeforeDevice()
278278
if (!(mRecoSteps.stepsGPUMask & GPUDataTypes::RecoStep::TPCMerging)) {
279279
mProcessingSettings.mergerSortTracks = false;
280280
}
281-
if (!IsGPU()) {
282-
mProcessingSettings.nDeviceHelperThreads = 0;
283-
}
284281

285282
if (mProcessingSettings.debugLevel > 3 || !IsGPU() || mProcessingSettings.deterministicGPUReconstruction) {
286283
mProcessingSettings.delayedOutput = false;

GPU/GPUTracking/Base/GPUReconstructionCPU.h

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616
#define GPURECONSTRUCTIONICPU_H
1717

1818
#include "GPUReconstruction.h"
19-
#include "GPUReconstructionHelpers.h"
2019
#include "GPUConstantMem.h"
2120
#include <stdexcept>
2221
#include "utils/timer.h"
@@ -117,13 +116,6 @@ class GPUReconstructionCPU : public GPUReconstructionKernels<GPUReconstructionCP
117116
virtual void RecordMarker(deviceEvent* ev, int32_t stream) {}
118117
virtual void SynchronizeGPU() {}
119118
virtual void ReleaseEvent(deviceEvent ev) {}
120-
virtual int32_t StartHelperThreads() { return 0; }
121-
virtual int32_t StopHelperThreads() { return 0; }
122-
virtual void RunHelperThreads(int32_t (GPUReconstructionHelpers::helperDelegateBase::*function)(int32_t, int32_t, GPUReconstructionHelpers::helperParam*), GPUReconstructionHelpers::helperDelegateBase* functionCls, int32_t count) {}
123-
virtual void WaitForHelperThreads() {}
124-
virtual int32_t HelperError(int32_t iThread) const { return 0; }
125-
virtual int32_t HelperDone(int32_t iThread) const { return 0; }
126-
virtual void ResetHelperThreads(int32_t helpers) {}
127119

128120
size_t TransferMemoryResourceToGPU(GPUMemoryResource* res, int32_t stream = -1, deviceEvent* ev = nullptr, deviceEvent* evList = nullptr, int32_t nEvents = 1) { return TransferMemoryInternal(res, stream, ev, evList, nEvents, true, res->Ptr(), res->PtrDevice()); }
129121
size_t TransferMemoryResourceToHost(GPUMemoryResource* res, int32_t stream = -1, deviceEvent* ev = nullptr, deviceEvent* evList = nullptr, int32_t nEvents = 1) { return TransferMemoryInternal(res, stream, ev, evList, nEvents, false, res->PtrDevice(), res->Ptr()); }
@@ -294,7 +286,7 @@ HighResTimer& GPUReconstructionCPU::getTimer(const char* name, int32_t num)
294286
static int32_t id = getNextTimerId();
295287
timerMeta* timer = getTimerById(id);
296288
if (timer == nullptr) {
297-
int32_t max = std::max<int32_t>({getOMPMaxThreads(), mProcessingSettings.nDeviceHelperThreads + 1, mProcessingSettings.nStreams});
289+
int32_t max = std::max<int32_t>({getOMPMaxThreads(), mProcessingSettings.nStreams});
298290
timer = insertTimer(id, name, J, max, 1, RecoStep::NoRecoStep);
299291
}
300292
if (num == -1) {

GPU/GPUTracking/Base/GPUReconstructionDeviceBase.cxx

Lines changed: 0 additions & 139 deletions
Original file line numberDiff line numberDiff line change
@@ -41,57 +41,6 @@ GPUReconstructionDeviceBase::GPUReconstructionDeviceBase(const GPUSettingsDevice
4141

4242
GPUReconstructionDeviceBase::~GPUReconstructionDeviceBase() = default;
4343

44-
void* GPUReconstructionDeviceBase::helperWrapper_static(void* arg)
45-
{
46-
GPUReconstructionHelpers::helperParam* par = (GPUReconstructionHelpers::helperParam*)arg;
47-
GPUReconstructionDeviceBase* cls = par->cls;
48-
return cls->helperWrapper(par);
49-
}
50-
51-
void* GPUReconstructionDeviceBase::helperWrapper(GPUReconstructionHelpers::helperParam* par)
52-
{
53-
if (mProcessingSettings.debugLevel >= 3) {
54-
GPUInfo("\tHelper thread %d starting", par->num);
55-
}
56-
57-
// cpu_set_t mask; //TODO add option
58-
// CPU_ZERO(&mask);
59-
// CPU_SET(par->num * 2 + 2, &mask);
60-
// sched_setaffinity(0, sizeof(mask), &mask);
61-
62-
par->mutex[0].lock();
63-
while (par->terminate == false) {
64-
for (int32_t i = par->num + 1; i < par->count; i += mProcessingSettings.nDeviceHelperThreads + 1) {
65-
// if (mProcessingSettings.debugLevel >= 3) GPUInfo("\tHelper Thread %d Running, Slice %d+%d, Phase %d", par->num, i, par->phase);
66-
if ((par->functionCls->*par->function)(i, par->num + 1, par)) {
67-
par->error = 1;
68-
}
69-
if (par->reset) {
70-
break;
71-
}
72-
par->done = i + 1;
73-
// if (mProcessingSettings.debugLevel >= 3) GPUInfo("\tHelper Thread %d Finished, Slice %d+%d, Phase %d", par->num, i, par->phase);
74-
}
75-
ResetThisHelperThread(par);
76-
par->mutex[0].lock();
77-
}
78-
if (mProcessingSettings.debugLevel >= 3) {
79-
GPUInfo("\tHelper thread %d terminating", par->num);
80-
}
81-
par->mutex[1].unlock();
82-
pthread_exit(nullptr);
83-
return (nullptr);
84-
}
85-
86-
void GPUReconstructionDeviceBase::ResetThisHelperThread(GPUReconstructionHelpers::helperParam* par)
87-
{
88-
if (par->reset) {
89-
GPUImportant("GPU Helper Thread %d reseting", par->num);
90-
}
91-
par->reset = false;
92-
par->mutex[1].unlock();
93-
}
94-
9544
int32_t GPUReconstructionDeviceBase::GetGlobalLock(void*& pLock)
9645
{
9746
#ifdef _WIN32
@@ -138,86 +87,6 @@ void GPUReconstructionDeviceBase::ReleaseGlobalLock(void* sem)
13887
#endif
13988
}
14089

141-
void GPUReconstructionDeviceBase::ResetHelperThreads(int32_t helpers)
142-
{
143-
GPUImportant("Error occurred, GPU tracker helper threads will be reset (Number of threads %d (%d))", mProcessingSettings.nDeviceHelperThreads, mNSlaveThreads);
144-
SynchronizeGPU();
145-
for (int32_t i = 0; i < mProcessingSettings.nDeviceHelperThreads; i++) {
146-
mHelperParams[i].reset = true;
147-
if (helpers || i >= mProcessingSettings.nDeviceHelperThreads) {
148-
pthread_mutex_lock(&((pthread_mutex_t*)mHelperParams[i].mutex)[1]);
149-
}
150-
}
151-
GPUImportant("GPU Tracker helper threads have ben reset");
152-
}
153-
154-
int32_t GPUReconstructionDeviceBase::StartHelperThreads()
155-
{
156-
int32_t nThreads = mProcessingSettings.nDeviceHelperThreads;
157-
if (nThreads) {
158-
mHelperParams = new GPUReconstructionHelpers::helperParam[nThreads];
159-
if (mHelperParams == nullptr) {
160-
GPUError("Memory allocation error");
161-
ExitDevice();
162-
return (1);
163-
}
164-
for (int32_t i = 0; i < nThreads; i++) {
165-
mHelperParams[i].cls = this;
166-
mHelperParams[i].terminate = false;
167-
mHelperParams[i].reset = false;
168-
mHelperParams[i].num = i;
169-
for (int32_t j = 0; j < 2; j++) {
170-
mHelperParams[i].mutex[j].lock();
171-
}
172-
173-
if (pthread_create(&mHelperParams[i].threadId, nullptr, helperWrapper_static, &mHelperParams[i])) {
174-
GPUError("Error starting slave thread");
175-
ExitDevice();
176-
return (1);
177-
}
178-
}
179-
}
180-
mNSlaveThreads = nThreads;
181-
return (0);
182-
}
183-
184-
int32_t GPUReconstructionDeviceBase::StopHelperThreads()
185-
{
186-
if (mNSlaveThreads) {
187-
for (int32_t i = 0; i < mNSlaveThreads; i++) {
188-
mHelperParams[i].terminate = true;
189-
mHelperParams[i].mutex[0].unlock();
190-
mHelperParams[i].mutex[1].lock();
191-
if (pthread_join(mHelperParams[i].threadId, nullptr)) {
192-
GPUError("Error waiting for thread to terminate");
193-
return (1);
194-
}
195-
}
196-
delete[] mHelperParams;
197-
}
198-
mNSlaveThreads = 0;
199-
return (0);
200-
}
201-
202-
void GPUReconstructionDeviceBase::WaitForHelperThreads()
203-
{
204-
for (int32_t i = 0; i < mProcessingSettings.nDeviceHelperThreads; i++) {
205-
pthread_mutex_lock(&((pthread_mutex_t*)mHelperParams[i].mutex)[1]);
206-
}
207-
}
208-
209-
void GPUReconstructionDeviceBase::RunHelperThreads(int32_t (GPUReconstructionHelpers::helperDelegateBase::*function)(int32_t i, int32_t t, GPUReconstructionHelpers::helperParam* p), GPUReconstructionHelpers::helperDelegateBase* functionCls, int32_t count)
210-
{
211-
for (int32_t i = 0; i < mProcessingSettings.nDeviceHelperThreads; i++) {
212-
mHelperParams[i].done = 0;
213-
mHelperParams[i].error = 0;
214-
mHelperParams[i].function = function;
215-
mHelperParams[i].functionCls = functionCls;
216-
mHelperParams[i].count = count;
217-
pthread_mutex_unlock(&((pthread_mutex_t*)mHelperParams[i].mutex)[0]);
218-
}
219-
}
220-
22190
int32_t GPUReconstructionDeviceBase::InitDevice()
22291
{
22392
// cpu_set_t mask;
@@ -262,10 +131,6 @@ int32_t GPUReconstructionDeviceBase::InitDevice()
262131
mProcShadow.mMemoryResProcessors = RegisterMemoryAllocation(&mProcShadow, &GPUProcessorProcessors::SetPointersDeviceProcessor, GPUMemoryResource::MEMORY_PERMANENT | GPUMemoryResource::MEMORY_HOST, "Processors");
263132
AllocateRegisteredMemory(mProcShadow.mMemoryResProcessors);
264133

265-
if (StartHelperThreads()) {
266-
return (1);
267-
}
268-
269134
if (mMaster == nullptr || mProcessingSettings.debugLevel >= 2) {
270135
GPUInfo("GPU Tracker initialization successfull"); // Verbosity reduced because GPU backend will print GPUImportant message!
271136
}
@@ -282,10 +147,6 @@ void* GPUReconstructionDeviceBase::GPUProcessorProcessors::SetPointersDeviceProc
282147

283148
int32_t GPUReconstructionDeviceBase::ExitDevice()
284149
{
285-
if (StopHelperThreads()) {
286-
return (1);
287-
}
288-
289150
int32_t retVal = ExitDevice_Runtime();
290151
mProcessorsShadow = nullptr;
291152
mHostMemoryPool = mHostMemoryBase = mDeviceMemoryPool = mDeviceMemoryBase = mHostMemoryPoolEnd = mDeviceMemoryPoolEnd = mHostMemoryPermanent = mDeviceMemoryPermanent = nullptr;

GPU/GPUTracking/Base/GPUReconstructionDeviceBase.h

Lines changed: 1 addition & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717

1818
#include "GPUReconstructionCPU.h"
1919
#include <pthread.h>
20-
#include "GPUReconstructionHelpers.h"
2120
#include "GPUChain.h"
2221
#include <vector>
2322

@@ -61,24 +60,10 @@ class GPUReconstructionDeviceBase : public GPUReconstructionCPU
6160
size_t GPUMemCpyAlways(bool onGpu, void* dst, const void* src, size_t size, int32_t stream, int32_t toGPU, deviceEvent* ev = nullptr, deviceEvent* evList = nullptr, int32_t nEvents = 1) override;
6261
size_t WriteToConstantMemory(size_t offset, const void* src, size_t size, int32_t stream = -1, deviceEvent* ev = nullptr) override = 0;
6362

64-
int32_t StartHelperThreads() override;
65-
int32_t StopHelperThreads() override;
66-
void RunHelperThreads(int32_t (GPUReconstructionHelpers::helperDelegateBase::*function)(int32_t, int32_t, GPUReconstructionHelpers::helperParam*), GPUReconstructionHelpers::helperDelegateBase* functionCls, int32_t count) override;
67-
int32_t HelperError(int32_t iThread) const override { return mHelperParams[iThread].error; }
68-
int32_t HelperDone(int32_t iThread) const override { return mHelperParams[iThread].done; }
69-
void WaitForHelperThreads() override;
70-
void ResetHelperThreads(int32_t helpers) override;
71-
void ResetThisHelperThread(GPUReconstructionHelpers::helperParam* par);
72-
7363
int32_t GetGlobalLock(void*& pLock);
7464
void ReleaseGlobalLock(void* sem);
7565

76-
static void* helperWrapper_static(void* arg);
77-
void* helperWrapper(GPUReconstructionHelpers::helperParam* par);
78-
79-
int32_t mDeviceId = -1; // Device ID used by backend
80-
GPUReconstructionHelpers::helperParam* mHelperParams = nullptr; // Control Struct for helper threads
81-
int32_t mNSlaveThreads = 0; // Number of slave threads currently active
66+
int32_t mDeviceId = -1; // Device ID used by backend
8267

8368
struct DebugEvents {
8469
deviceEvent DebugStart, DebugStop; // Debug timer events

GPU/GPUTracking/Base/GPUReconstructionHelpers.h

Lines changed: 0 additions & 50 deletions
This file was deleted.

GPU/GPUTracking/CMakeLists.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,6 @@ set(HDRS_INSTALL
104104
Base/GPUConstantMem.h
105105
Base/GPUParam.inc
106106
Base/GPUParamRTC.h
107-
Base/GPUReconstructionHelpers.h
108107
Base/GPUReconstructionIncludes.h
109108
Base/GPUReconstructionIncludesITS.h
110109
Base/GPUReconstructionKernelMacros.h

GPU/GPUTracking/Definitions/GPUSettingsList.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -252,7 +252,6 @@ AddOption(registerStandaloneInputMemory, bool, false, "registerInputMemory", 0,
252252
AddOption(ompThreads, int32_t, -1, "omp", 't', "Number of OMP threads to run (-1: all)", min(-1), message("Using %s OMP threads"))
253253
AddOption(ompKernels, uint8_t, 2, "", 0, "Parallelize with OMP inside kernels instead of over slices, 2 for nested parallelization over TPC sectors and inside kernels")
254254
AddOption(ompAutoNThreads, bool, true, "", 0, "Auto-adjust number of OMP threads, decreasing the number for small input data")
255-
AddOption(nDeviceHelperThreads, int32_t, 1, "", 0, "Number of CPU helper threads for CPU processing")
256255
AddOption(nStreams, int8_t, 8, "", 0, "Number of GPU streams / command queues")
257256
AddOption(nTPCClustererLanes, int8_t, -1, "", 0, "Number of TPC clusterers that can run in parallel (-1 = autoset)")
258257
AddOption(overrideClusterizerFragmentLen, int32_t, -1, "", 0, "Force the cluster max fragment len to a certain value (-1 = autodetect)")

GPU/GPUTracking/Global/GPUChain.h

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616
#define GPUCHAIN_H
1717

1818
#include "GPUReconstructionCPU.h"
19-
#include "GPUReconstructionHelpers.h"
2019

2120
namespace o2
2221
{
@@ -111,12 +110,6 @@ class GPUChain
111110
}
112111
}
113112
inline void StreamWaitForEvents(int32_t stream, deviceEvent* evList, int32_t nEvents = 1) { mRec->StreamWaitForEvents(stream, evList, nEvents); }
114-
template <class T>
115-
void RunHelperThreads(T function, GPUReconstructionHelpers::helperDelegateBase* functionCls, int32_t count);
116-
inline void WaitForHelperThreads() { mRec->WaitForHelperThreads(); }
117-
inline int32_t HelperError(int32_t iThread) const { return mRec->HelperError(iThread); }
118-
inline int32_t HelperDone(int32_t iThread) const { return mRec->HelperDone(iThread); }
119-
inline void ResetHelperThreads(int32_t helpers) { mRec->ResetHelperThreads(helpers); }
120113
inline int32_t GPUDebug(const char* state = "UNKNOWN", int32_t stream = -1) { return mRec->GPUDebug(state, stream); }
121114
// nEvents is forced to 0 if evList == nullptr
122115
inline void TransferMemoryResourceToGPU(RecoStep step, GPUMemoryResource* res, int32_t stream = -1, deviceEvent* ev = nullptr, deviceEvent* evList = nullptr, int32_t nEvents = 1) { timeCpy(step, true, &GPUReconstructionCPU::TransferMemoryResourceToGPU, res, stream, ev, evList, nEvents); }
@@ -242,12 +235,6 @@ class GPUChain
242235
void timeCpy(RecoStep step, int32_t toGPU, S T::*func, Args... args);
243236
};
244237

245-
template <class T>
246-
inline void GPUChain::RunHelperThreads(T function, GPUReconstructionHelpers::helperDelegateBase* functionCls, int32_t count)
247-
{
248-
mRec->RunHelperThreads((int32_t(GPUReconstructionHelpers::helperDelegateBase::*)(int32_t, int32_t, GPUReconstructionHelpers::helperParam*))function, functionCls, count);
249-
}
250-
251238
template <bool Always, class T, class S, typename... Args>
252239
inline void GPUChain::timeCpy(RecoStep step, int32_t toGPU, S T::*func, Args... args)
253240
{

GPU/GPUTracking/Global/GPUChainTracking.h

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616
#define GPUCHAINTRACKING_H
1717

1818
#include "GPUChain.h"
19-
#include "GPUReconstructionHelpers.h"
2019
#include "GPUDataTypes.h"
2120
#include <atomic>
2221
#include <mutex>
@@ -68,7 +67,7 @@ struct GPUTPCCFChainContext;
6867
struct GPUNewCalibValues;
6968
struct GPUTriggerOutputs;
7069

71-
class GPUChainTracking : public GPUChain, GPUReconstructionHelpers::helperDelegateBase
70+
class GPUChainTracking : public GPUChain
7271
{
7372
friend class GPUReconstruction;
7473

@@ -314,15 +313,11 @@ class GPUChainTracking : public GPUChain, GPUReconstructionHelpers::helperDelega
314313
void RunTPCClusterFilter(o2::tpc::ClusterNativeAccess* clusters, std::function<o2::tpc::ClusterNative*(size_t)> allocator, bool applyClusterCuts);
315314
bool NeedTPCClustersOnGPU();
316315

317-
std::atomic_flag mLockAtomicOutputBuffer = ATOMIC_FLAG_INIT;
318316
std::mutex mMutexUpdateCalib;
319317
std::unique_ptr<GPUChainTrackingFinalContext> mPipelineFinalizationCtx;
320318
GPUChainTrackingFinalContext* mPipelineNotifyCtx = nullptr;
321319
std::function<void()> mWaitForFinalInputs;
322320

323-
int32_t HelperReadEvent(int32_t iSlice, int32_t threadId, GPUReconstructionHelpers::helperParam* par);
324-
int32_t HelperOutput(int32_t iSlice, int32_t threadId, GPUReconstructionHelpers::helperParam* par);
325-
326321
int32_t OutputStream() const { return mRec->NStreams() - 2; }
327322
};
328323
} // namespace gpu

0 commit comments

Comments
 (0)