Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Common/Topologies/o2prototype_topology.xml
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ The following parameters need adjustment when extending the FLP-EPN configuratio
</decltask>

<decltask id="tracker">
<exe reachable="true">$ALICEO2_INSTALL_DIR/bin/aliceHLTWrapper Tracker_%collectionIndex%_%taskIndex% 1 --dds --poll-period 100 --input type=pull,size=5000,method=connect,property=EPNReceiverOutputAddress,count=1 --output type=push,size=500,method=bind,property=TrackingOutputAddress,min-port=48000 --library libAliHLTTPC.so --component TPCCATracker --run 167808 --parameter '-GlobalTracking -allowGPU -GPUHelperThreads 4 -loglevel=0x7c'</exe>
<exe reachable="true">$ALICEO2_INSTALL_DIR/bin/aliceHLTWrapper Tracker_%collectionIndex%_%taskIndex% 1 --dds --poll-period 100 --input type=pull,size=5000,method=connect,property=EPNReceiverOutputAddress,count=1 --output type=push,size=500,method=bind,property=TrackingOutputAddress,min-port=48000 --library libAliHLTTPC.so --component TPCCATracker --run 167808 --parameter '-GlobalTracking -allowGPU -loglevel=0x7c'</exe>
<!-- <requirement></requirement> -->
<properties>
<id access="read">EPNReceiverOutputAddress</id>
Expand Down
17 changes: 1 addition & 16 deletions GPU/GPUTracking/Base/GPUReconstruction.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -278,25 +278,10 @@ int32_t GPUReconstruction::InitPhaseBeforeDevice()
if (!(mRecoSteps.stepsGPUMask & GPUDataTypes::RecoStep::TPCMerging)) {
mProcessingSettings.mergerSortTracks = false;
}
if (!IsGPU()) {
mProcessingSettings.nDeviceHelperThreads = 0;
}

if (param().rec.nonConsecutiveIDs) {
param().rec.tpc.disableRefitAttachment = 0xFF;
}
if (!(mRecoSteps.stepsGPUMask & RecoStep::TPCMerging) || !param().rec.tpc.mergerReadFromTrackerDirectly) {
mProcessingSettings.fullMergerOnGPU = false;
}
if (mProcessingSettings.debugLevel > 3 || !IsGPU() || !mProcessingSettings.fullMergerOnGPU || mProcessingSettings.deterministicGPUReconstruction) {
if (mProcessingSettings.debugLevel > 3 || !IsGPU() || mProcessingSettings.deterministicGPUReconstruction) {
mProcessingSettings.delayedOutput = false;
}
if (!mProcessingSettings.fullMergerOnGPU && (GetRecoStepsGPU() & RecoStep::TPCMerging)) {
param().rec.tpc.looperInterpolationInExtraPass = 0;
if (param().rec.tpc.retryRefit == 1) {
param().rec.tpc.retryRefit = 2;
}
}

UpdateAutomaticProcessingSettings();
GPUCA_GPUReconstructionUpdateDefaults();
Expand Down
10 changes: 1 addition & 9 deletions GPU/GPUTracking/Base/GPUReconstructionCPU.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
#define GPURECONSTRUCTIONICPU_H

#include "GPUReconstruction.h"
#include "GPUReconstructionHelpers.h"
#include "GPUConstantMem.h"
#include <stdexcept>
#include "utils/timer.h"
Expand Down Expand Up @@ -117,13 +116,6 @@ class GPUReconstructionCPU : public GPUReconstructionKernels<GPUReconstructionCP
virtual void RecordMarker(deviceEvent* ev, int32_t stream) {}
virtual void SynchronizeGPU() {}
virtual void ReleaseEvent(deviceEvent ev) {}
virtual int32_t StartHelperThreads() { return 0; }
virtual int32_t StopHelperThreads() { return 0; }
virtual void RunHelperThreads(int32_t (GPUReconstructionHelpers::helperDelegateBase::*function)(int32_t, int32_t, GPUReconstructionHelpers::helperParam*), GPUReconstructionHelpers::helperDelegateBase* functionCls, int32_t count) {}
virtual void WaitForHelperThreads() {}
virtual int32_t HelperError(int32_t iThread) const { return 0; }
virtual int32_t HelperDone(int32_t iThread) const { return 0; }
virtual void ResetHelperThreads(int32_t helpers) {}

size_t TransferMemoryResourceToGPU(GPUMemoryResource* res, int32_t stream = -1, deviceEvent* ev = nullptr, deviceEvent* evList = nullptr, int32_t nEvents = 1) { return TransferMemoryInternal(res, stream, ev, evList, nEvents, true, res->Ptr(), res->PtrDevice()); }
size_t TransferMemoryResourceToHost(GPUMemoryResource* res, int32_t stream = -1, deviceEvent* ev = nullptr, deviceEvent* evList = nullptr, int32_t nEvents = 1) { return TransferMemoryInternal(res, stream, ev, evList, nEvents, false, res->PtrDevice(), res->Ptr()); }
Expand Down Expand Up @@ -294,7 +286,7 @@ HighResTimer& GPUReconstructionCPU::getTimer(const char* name, int32_t num)
static int32_t id = getNextTimerId();
timerMeta* timer = getTimerById(id);
if (timer == nullptr) {
int32_t max = std::max<int32_t>({getOMPMaxThreads(), mProcessingSettings.nDeviceHelperThreads + 1, mProcessingSettings.nStreams});
int32_t max = std::max<int32_t>({getOMPMaxThreads(), mProcessingSettings.nStreams});
timer = insertTimer(id, name, J, max, 1, RecoStep::NoRecoStep);
}
if (num == -1) {
Expand Down
139 changes: 0 additions & 139 deletions GPU/GPUTracking/Base/GPUReconstructionDeviceBase.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -41,57 +41,6 @@ GPUReconstructionDeviceBase::GPUReconstructionDeviceBase(const GPUSettingsDevice

GPUReconstructionDeviceBase::~GPUReconstructionDeviceBase() = default;

void* GPUReconstructionDeviceBase::helperWrapper_static(void* arg)
{
GPUReconstructionHelpers::helperParam* par = (GPUReconstructionHelpers::helperParam*)arg;
GPUReconstructionDeviceBase* cls = par->cls;
return cls->helperWrapper(par);
}

void* GPUReconstructionDeviceBase::helperWrapper(GPUReconstructionHelpers::helperParam* par)
{
if (mProcessingSettings.debugLevel >= 3) {
GPUInfo("\tHelper thread %d starting", par->num);
}

// cpu_set_t mask; //TODO add option
// CPU_ZERO(&mask);
// CPU_SET(par->num * 2 + 2, &mask);
// sched_setaffinity(0, sizeof(mask), &mask);

par->mutex[0].lock();
while (par->terminate == false) {
for (int32_t i = par->num + 1; i < par->count; i += mProcessingSettings.nDeviceHelperThreads + 1) {
// if (mProcessingSettings.debugLevel >= 3) GPUInfo("\tHelper Thread %d Running, Slice %d+%d, Phase %d", par->num, i, par->phase);
if ((par->functionCls->*par->function)(i, par->num + 1, par)) {
par->error = 1;
}
if (par->reset) {
break;
}
par->done = i + 1;
// if (mProcessingSettings.debugLevel >= 3) GPUInfo("\tHelper Thread %d Finished, Slice %d+%d, Phase %d", par->num, i, par->phase);
}
ResetThisHelperThread(par);
par->mutex[0].lock();
}
if (mProcessingSettings.debugLevel >= 3) {
GPUInfo("\tHelper thread %d terminating", par->num);
}
par->mutex[1].unlock();
pthread_exit(nullptr);
return (nullptr);
}

void GPUReconstructionDeviceBase::ResetThisHelperThread(GPUReconstructionHelpers::helperParam* par)
{
if (par->reset) {
GPUImportant("GPU Helper Thread %d reseting", par->num);
}
par->reset = false;
par->mutex[1].unlock();
}

int32_t GPUReconstructionDeviceBase::GetGlobalLock(void*& pLock)
{
#ifdef _WIN32
Expand Down Expand Up @@ -138,86 +87,6 @@ void GPUReconstructionDeviceBase::ReleaseGlobalLock(void* sem)
#endif
}

void GPUReconstructionDeviceBase::ResetHelperThreads(int32_t helpers)
{
GPUImportant("Error occurred, GPU tracker helper threads will be reset (Number of threads %d (%d))", mProcessingSettings.nDeviceHelperThreads, mNSlaveThreads);
SynchronizeGPU();
for (int32_t i = 0; i < mProcessingSettings.nDeviceHelperThreads; i++) {
mHelperParams[i].reset = true;
if (helpers || i >= mProcessingSettings.nDeviceHelperThreads) {
pthread_mutex_lock(&((pthread_mutex_t*)mHelperParams[i].mutex)[1]);
}
}
GPUImportant("GPU Tracker helper threads have ben reset");
}

int32_t GPUReconstructionDeviceBase::StartHelperThreads()
{
int32_t nThreads = mProcessingSettings.nDeviceHelperThreads;
if (nThreads) {
mHelperParams = new GPUReconstructionHelpers::helperParam[nThreads];
if (mHelperParams == nullptr) {
GPUError("Memory allocation error");
ExitDevice();
return (1);
}
for (int32_t i = 0; i < nThreads; i++) {
mHelperParams[i].cls = this;
mHelperParams[i].terminate = false;
mHelperParams[i].reset = false;
mHelperParams[i].num = i;
for (int32_t j = 0; j < 2; j++) {
mHelperParams[i].mutex[j].lock();
}

if (pthread_create(&mHelperParams[i].threadId, nullptr, helperWrapper_static, &mHelperParams[i])) {
GPUError("Error starting slave thread");
ExitDevice();
return (1);
}
}
}
mNSlaveThreads = nThreads;
return (0);
}

int32_t GPUReconstructionDeviceBase::StopHelperThreads()
{
if (mNSlaveThreads) {
for (int32_t i = 0; i < mNSlaveThreads; i++) {
mHelperParams[i].terminate = true;
mHelperParams[i].mutex[0].unlock();
mHelperParams[i].mutex[1].lock();
if (pthread_join(mHelperParams[i].threadId, nullptr)) {
GPUError("Error waiting for thread to terminate");
return (1);
}
}
delete[] mHelperParams;
}
mNSlaveThreads = 0;
return (0);
}

void GPUReconstructionDeviceBase::WaitForHelperThreads()
{
for (int32_t i = 0; i < mProcessingSettings.nDeviceHelperThreads; i++) {
pthread_mutex_lock(&((pthread_mutex_t*)mHelperParams[i].mutex)[1]);
}
}

void GPUReconstructionDeviceBase::RunHelperThreads(int32_t (GPUReconstructionHelpers::helperDelegateBase::*function)(int32_t i, int32_t t, GPUReconstructionHelpers::helperParam* p), GPUReconstructionHelpers::helperDelegateBase* functionCls, int32_t count)
{
for (int32_t i = 0; i < mProcessingSettings.nDeviceHelperThreads; i++) {
mHelperParams[i].done = 0;
mHelperParams[i].error = 0;
mHelperParams[i].function = function;
mHelperParams[i].functionCls = functionCls;
mHelperParams[i].count = count;
pthread_mutex_unlock(&((pthread_mutex_t*)mHelperParams[i].mutex)[0]);
}
}

int32_t GPUReconstructionDeviceBase::InitDevice()
{
// cpu_set_t mask;
Expand Down Expand Up @@ -262,10 +131,6 @@ int32_t GPUReconstructionDeviceBase::InitDevice()
mProcShadow.mMemoryResProcessors = RegisterMemoryAllocation(&mProcShadow, &GPUProcessorProcessors::SetPointersDeviceProcessor, GPUMemoryResource::MEMORY_PERMANENT | GPUMemoryResource::MEMORY_HOST, "Processors");
AllocateRegisteredMemory(mProcShadow.mMemoryResProcessors);

if (StartHelperThreads()) {
return (1);
}

if (mMaster == nullptr || mProcessingSettings.debugLevel >= 2) {
GPUInfo("GPU Tracker initialization successfull"); // Verbosity reduced because GPU backend will print GPUImportant message!
}
Expand All @@ -282,10 +147,6 @@ void* GPUReconstructionDeviceBase::GPUProcessorProcessors::SetPointersDeviceProc

int32_t GPUReconstructionDeviceBase::ExitDevice()
{
if (StopHelperThreads()) {
return (1);
}

int32_t retVal = ExitDevice_Runtime();
mProcessorsShadow = nullptr;
mHostMemoryPool = mHostMemoryBase = mDeviceMemoryPool = mDeviceMemoryBase = mHostMemoryPoolEnd = mDeviceMemoryPoolEnd = mHostMemoryPermanent = mDeviceMemoryPermanent = nullptr;
Expand Down
17 changes: 1 addition & 16 deletions GPU/GPUTracking/Base/GPUReconstructionDeviceBase.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@

#include "GPUReconstructionCPU.h"
#include <pthread.h>
#include "GPUReconstructionHelpers.h"
#include "GPUChain.h"
#include <vector>

Expand Down Expand Up @@ -61,24 +60,10 @@ class GPUReconstructionDeviceBase : public GPUReconstructionCPU
size_t GPUMemCpyAlways(bool onGpu, void* dst, const void* src, size_t size, int32_t stream, int32_t toGPU, deviceEvent* ev = nullptr, deviceEvent* evList = nullptr, int32_t nEvents = 1) override;
size_t WriteToConstantMemory(size_t offset, const void* src, size_t size, int32_t stream = -1, deviceEvent* ev = nullptr) override = 0;

int32_t StartHelperThreads() override;
int32_t StopHelperThreads() override;
void RunHelperThreads(int32_t (GPUReconstructionHelpers::helperDelegateBase::*function)(int32_t, int32_t, GPUReconstructionHelpers::helperParam*), GPUReconstructionHelpers::helperDelegateBase* functionCls, int32_t count) override;
int32_t HelperError(int32_t iThread) const override { return mHelperParams[iThread].error; }
int32_t HelperDone(int32_t iThread) const override { return mHelperParams[iThread].done; }
void WaitForHelperThreads() override;
void ResetHelperThreads(int32_t helpers) override;
void ResetThisHelperThread(GPUReconstructionHelpers::helperParam* par);

int32_t GetGlobalLock(void*& pLock);
void ReleaseGlobalLock(void* sem);

static void* helperWrapper_static(void* arg);
void* helperWrapper(GPUReconstructionHelpers::helperParam* par);

int32_t mDeviceId = -1; // Device ID used by backend
GPUReconstructionHelpers::helperParam* mHelperParams = nullptr; // Control Struct for helper threads
int32_t mNSlaveThreads = 0; // Number of slave threads currently active
int32_t mDeviceId = -1; // Device ID used by backend

struct DebugEvents {
deviceEvent DebugStart, DebugStop; // Debug timer events
Expand Down
50 changes: 0 additions & 50 deletions GPU/GPUTracking/Base/GPUReconstructionHelpers.h

This file was deleted.

1 change: 0 additions & 1 deletion GPU/GPUTracking/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,6 @@ set(HDRS_INSTALL
Base/GPUConstantMem.h
Base/GPUParam.inc
Base/GPUParamRTC.h
Base/GPUReconstructionHelpers.h
Base/GPUReconstructionIncludes.h
Base/GPUReconstructionIncludesITS.h
Base/GPUReconstructionKernelMacros.h
Expand Down
4 changes: 0 additions & 4 deletions GPU/GPUTracking/Definitions/GPUSettingsList.h
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,6 @@ AddOptionRTC(mergerInterpolateErrors, uint8_t, 1, "", 0, "Use interpolation inst
AddOptionRTC(mergeCE, uint8_t, 1, "", 0, "Merge tracks accross the central electrode")
AddOptionRTC(retryRefit, int8_t, 1, "", 0, "Retry refit with seeding errors and without cluster rejection when fit fails (=2 means retry in same kernel, =1 for separate kernel")
AddOptionRTC(looperInterpolationInExtraPass, int8_t, -1, "", 0, "Perform looper interpolation in an extra pass")
AddOptionRTC(mergerReadFromTrackerDirectly, int8_t, 1, "", 0, "Forward data directly from tracker to merger on GPU")
AddOptionRTC(dropSecondaryLegsInOutput, int8_t, 1, "", 0, "Do not store secondary legs of looping track in TrackTPC")
AddOptionRTC(enablePID, int8_t, 1, "", 0, "Enable PID response")
AddOptionRTC(PID_useNsigma, int8_t, 1, "", 0, "Use nSigma instead of absolute distance in PID response")
Expand Down Expand Up @@ -188,7 +187,6 @@ EndConfig()

BeginSubConfig(GPUSettingsRec, rec, configStandalone, "REC", 0, "Reconstruction settings", rec)
AddOptionRTC(maxTrackQPtB5, float, 1.f / GPUCA_MIN_TRACK_PTB5_DEFAULT, "", 0, "required max Q/Pt (==min Pt) of tracks")
AddOptionRTC(nonConsecutiveIDs, int8_t, false, "", 0, "Non-consecutive cluster IDs as in HLT, disables features that need access to slice data in TPC merger")
AddOptionRTC(fwdTPCDigitsAsClusters, uint8_t, 0, "", 0, "Forward TPC digits as clusters (if they pass the ZS threshold)")
AddOptionRTC(bz0Pt10MeV, uint8_t, 60, "", 0, "Nominal Pt to set when bz = 0 (in 10 MeV)")
AddOptionRTC(fitInProjections, int8_t, -1, "", 0, "Fit in projection, -1 to enable full fit for all but passes but the first one")
Expand Down Expand Up @@ -254,14 +252,12 @@ AddOption(registerStandaloneInputMemory, bool, false, "registerInputMemory", 0,
AddOption(ompThreads, int32_t, -1, "omp", 't', "Number of OMP threads to run (-1: all)", min(-1), message("Using %s OMP threads"))
AddOption(ompKernels, uint8_t, 2, "", 0, "Parallelize with OMP inside kernels instead of over slices, 2 for nested parallelization over TPC sectors and inside kernels")
AddOption(ompAutoNThreads, bool, true, "", 0, "Auto-adjust number of OMP threads, decreasing the number for small input data")
AddOption(nDeviceHelperThreads, int32_t, 1, "", 0, "Number of CPU helper threads for CPU processing")
AddOption(nStreams, int8_t, 8, "", 0, "Number of GPU streams / command queues")
AddOption(nTPCClustererLanes, int8_t, -1, "", 0, "Number of TPC clusterers that can run in parallel (-1 = autoset)")
AddOption(overrideClusterizerFragmentLen, int32_t, -1, "", 0, "Force the cluster max fragment len to a certain value (-1 = autodetect)")
AddOption(trackletSelectorSlices, int8_t, -1, "", 0, "Number of slices to processes in parallel at max")
AddOption(trackletConstructorInPipeline, int8_t, -1, "", 0, "Run tracklet constructor in the pipeline")
AddOption(trackletSelectorInPipeline, int8_t, -1, "", 0, "Run tracklet selector in the pipeline")
AddOption(fullMergerOnGPU, bool, true, "", 0, "Perform full TPC track merging on GPU instead of only refit")
AddOption(delayedOutput, bool, true, "", 0, "Delay output to be parallel to track fit")
AddOption(mergerSortTracks, int8_t, -1, "", 0, "Sort track indizes for GPU track fit")
AddOption(alternateBorderSort, int8_t, -1, "", 0, "Alternative implementation for sorting of border tracks")
Expand Down
13 changes: 0 additions & 13 deletions GPU/GPUTracking/Global/GPUChain.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
#define GPUCHAIN_H

#include "GPUReconstructionCPU.h"
#include "GPUReconstructionHelpers.h"

namespace o2
{
Expand Down Expand Up @@ -111,12 +110,6 @@ class GPUChain
}
}
inline void StreamWaitForEvents(int32_t stream, deviceEvent* evList, int32_t nEvents = 1) { mRec->StreamWaitForEvents(stream, evList, nEvents); }
template <class T>
void RunHelperThreads(T function, GPUReconstructionHelpers::helperDelegateBase* functionCls, int32_t count);
inline void WaitForHelperThreads() { mRec->WaitForHelperThreads(); }
inline int32_t HelperError(int32_t iThread) const { return mRec->HelperError(iThread); }
inline int32_t HelperDone(int32_t iThread) const { return mRec->HelperDone(iThread); }
inline void ResetHelperThreads(int32_t helpers) { mRec->ResetHelperThreads(helpers); }
inline int32_t GPUDebug(const char* state = "UNKNOWN", int32_t stream = -1) { return mRec->GPUDebug(state, stream); }
// nEvents is forced to 0 if evList == nullptr
inline void TransferMemoryResourceToGPU(RecoStep step, GPUMemoryResource* res, int32_t stream = -1, deviceEvent* ev = nullptr, deviceEvent* evList = nullptr, int32_t nEvents = 1) { timeCpy(step, true, &GPUReconstructionCPU::TransferMemoryResourceToGPU, res, stream, ev, evList, nEvents); }
Expand Down Expand Up @@ -242,12 +235,6 @@ class GPUChain
void timeCpy(RecoStep step, int32_t toGPU, S T::*func, Args... args);
};

template <class T>
inline void GPUChain::RunHelperThreads(T function, GPUReconstructionHelpers::helperDelegateBase* functionCls, int32_t count)
{
mRec->RunHelperThreads((int32_t(GPUReconstructionHelpers::helperDelegateBase::*)(int32_t, int32_t, GPUReconstructionHelpers::helperParam*))function, functionCls, count);
}

template <bool Always, class T, class S, typename... Args>
inline void GPUChain::timeCpy(RecoStep step, int32_t toGPU, S T::*func, Args... args)
{
Expand Down
Loading