Skip to content

Commit 7b8f406

Browse files
committed
GPU Workflow: Add dumpFirst and dumpLast options
1 parent a8aa013 commit 7b8f406

File tree

8 files changed

+50
-20
lines changed

8 files changed

+50
-20
lines changed

GPU/GPUTracking/Definitions/GPUSettingsList.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -625,6 +625,9 @@ AddOption(deviceType, std::string, "CPU", "", 0, "Device type, CPU | CUDA | HIP
625625
AddOption(forceDeviceType, bool, true, "", 0, "force device type, otherwise allows fall-back to CPU")
626626
AddOption(synchronousProcessing, bool, false, "", 0, "Apply performance shortcuts for synchronous processing, disable unneeded steps")
627627
AddOption(dump, int32_t, 0, "", 0, "Dump events for standalone benchmark: 1 = dump events, 2 = dump events and skip processing in workflow")
628+
AddOption(dumpFirst, int32_t, 0, "", 0, "First event to dump (referring to tfCounter)")
629+
AddOption(dumpLast, int32_t, -1, "", 0, "Last event to dump (-1 = all)")
630+
AddOption(dumpFolder, std::string, "", "", 0, "Folder to which to write dump files, [P] is replaced by process id")
628631
AddOption(display, bool, false, "", 0, "Enable standalone gpu tracking visualizaion")
629632
AddOption(rundEdx, int32_t, -1, "", 0, "Enable/disable dEdx processing (-1 for autoselect)")
630633
AddOption(dEdxSplineTopologyCorrFile, std::string, "", "", 0, "File name of the dE/dx spline track topology correction file")

GPU/GPUTracking/Interface/GPUO2Interface.cxx

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -137,29 +137,30 @@ void GPUO2Interface::Deinitialize()
137137
mNContexts = 0;
138138
}
139139

140-
void GPUO2Interface::DumpEvent(int32_t nEvent, GPUTrackingInOutPointers* data)
140+
void GPUO2Interface::DumpEvent(int32_t nEvent, GPUTrackingInOutPointers* data, uint32_t iThread, const char* dir)
141141
{
142-
mCtx[0].mChain->ClearIOPointers();
143-
mCtx[0].mChain->mIOPtrs = *data;
142+
const auto oldPtrs = mCtx[iThread].mChain->mIOPtrs;
143+
mCtx[iThread].mChain->mIOPtrs = *data;
144144
char fname[1024];
145-
snprintf(fname, 1024, "event.%d.dump", nEvent);
146-
mCtx[0].mChain->DumpData(fname);
145+
snprintf(fname, 1024, "%sevent.%d.dump", dir, nEvent);
146+
mCtx[iThread].mChain->DumpData(fname);
147147
if (nEvent == 0) {
148148
#ifdef GPUCA_BUILD_QA
149149
if (mConfig->configProcessing.runMC) {
150-
mCtx[0].mChain->ForceInitQA();
150+
mCtx[iThread].mChain->ForceInitQA();
151151
snprintf(fname, 1024, "mc.%d.dump", nEvent);
152-
mCtx[0].mChain->GetQA()->UpdateChain(mCtx[0].mChain);
153-
mCtx[0].mChain->GetQA()->DumpO2MCData(fname);
152+
mCtx[iThread].mChain->GetQA()->UpdateChain(mCtx[iThread].mChain);
153+
mCtx[iThread].mChain->GetQA()->DumpO2MCData(fname);
154154
}
155155
#endif
156156
}
157+
mCtx[iThread].mChain->mIOPtrs = oldPtrs;
157158
}
158159

159-
void GPUO2Interface::DumpSettings()
160+
void GPUO2Interface::DumpSettings(uint32_t iThread, const char* dir)
160161
{
161-
mCtx[0].mChain->DoQueuedUpdates(-1);
162-
mCtx[0].mRec->DumpSettings();
162+
mCtx[iThread].mChain->DoQueuedUpdates(-1);
163+
mCtx[iThread].mRec->DumpSettings(dir);
163164
}
164165

165166
int32_t GPUO2Interface::RunTracking(GPUTrackingInOutPointers* data, GPUInterfaceOutputs* outputs, uint32_t iThread, GPUInterfaceInputUpdate* inputUpdateCallback)

GPU/GPUTracking/Interface/GPUO2Interface.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -77,8 +77,8 @@ class GPUO2Interface
7777

7878
int32_t RunTracking(GPUTrackingInOutPointers* data, GPUInterfaceOutputs* outputs = nullptr, uint32_t iThread = 0, GPUInterfaceInputUpdate* inputUpdateCallback = nullptr);
7979
void Clear(bool clearOutputs, uint32_t iThread = 0);
80-
void DumpEvent(int32_t nEvent, GPUTrackingInOutPointers* data);
81-
void DumpSettings();
80+
void DumpEvent(int32_t nEvent, GPUTrackingInOutPointers* data, uint32_t iThread, const char* dir = "");
81+
void DumpSettings(uint32_t iThread, const char* dir = "");
8282

8383
void GetITSTraits(o2::its::TrackerTraits<7>*& trackerTraits, o2::its::VertexerTraits<7>*& vertexerTraits, o2::its::TimeFrame<7>*& timeFrame);
8484
const o2::base::Propagator* GetDeviceO2Propagator(int32_t iThread = 0) const;

GPU/Workflow/include/GPUWorkflow/GPUWorkflowSpec.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -225,6 +225,7 @@ class GPURecoWorkflowSpec : public o2::framework::Task
225225
int64_t mCreationForCalib = -1; ///< creation time for calib manipulation
226226
int32_t mVerbosity = 0;
227227
uint32_t mNTFs = 0;
228+
uint32_t mNTFDumps = 0;
228229
uint32_t mNDebugDumps = 0;
229230
uint32_t mNextThreadIndex = 0;
230231
bool mUpdateGainMapCCDB = true;

GPU/Workflow/src/GPUWorkflowInternal.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ struct GPURecoWorkflow_QueueObject {
4747
bool jobSubmitted = false;
4848
bool jobFinished = false;
4949
int32_t jobReturnValue = 0;
50+
volatile int32_t jobThreadIndex = -1;
5051
std::mutex jobFinishedMutex;
5152
std::condition_variable jobFinishedNotify;
5253
bool jobInputFinal = false;

GPU/Workflow/src/GPUWorkflowPipeline.cxx

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,7 @@ void GPURecoWorkflowSpec::RunWorkerThread(int32_t id)
9090
context = workerContext.inputQueue.front();
9191
workerContext.inputQueue.pop();
9292
}
93+
context->jobThreadIndex = id;
9394
context->jobReturnValue = runMain(nullptr, context->jobPtrs, context->jobOutputRegions, id, context->jobInputUpdateCallback.get());
9495
{
9596
std::lock_guard lk(context->jobFinishedMutex);
@@ -179,8 +180,7 @@ int32_t GPURecoWorkflowSpec::handlePipeline(ProcessingContext& pc, GPUTrackingIn
179180
}
180181
mPipeline->completionPolicyQueue.pop();
181182
}
182-
}
183-
if (mSpecConfig.enableDoublePipeline == 2) {
183+
} else if (mSpecConfig.enableDoublePipeline == 2) {
184184
auto prepareDummyMessage = pc.outputs().make<DataAllocator::UninitializedVector<char>>(Output{gDataOriginGPU, "PIPELINEPREPARE", 0}, 0u);
185185

186186
size_t ptrsTotal = 0;

GPU/Workflow/src/GPUWorkflowSpec.cxx

Lines changed: 26 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -825,11 +825,31 @@ void GPURecoWorkflowSpec::run(ProcessingContext& pc)
825825

826826
lockDecodeInput.reset();
827827

828+
uint32_t threadIndex;
828829
if (mConfParam->dump) {
829-
if (mNTFs == 1) {
830-
mGPUReco->DumpSettings();
830+
if (mSpecConfig.enableDoublePipeline && pipelineContext->jobSubmitted) {
831+
while (pipelineContext->jobThreadIndex == -1) {
832+
}
833+
threadIndex = pipelineContext->jobThreadIndex;
834+
} else {
835+
threadIndex = 0; // TODO: Not sure if this is safe, but it is not yet known which threadIndex will pick up the enqueued job
836+
}
837+
838+
std::string dir = "";
839+
if (mConfParam->dumpFolder != "") {
840+
dir = std::regex_replace(mConfParam->dumpFolder, std::regex("\\[P\\]"), std::to_string(getpid()));
841+
if (mNTFs == 1) {
842+
mkdir(dir.c_str(), S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH);
843+
}
844+
dir += "/";
845+
}
846+
if (mNTFs == 1) { // Must dump with first TF, since will enforce enqueued calib updates
847+
mGPUReco->DumpSettings(threadIndex, dir.c_str());
848+
}
849+
if (tinfo.tfCounter >= mConfParam->dumpFirst && (mConfParam->dumpLast == -1 || tinfo.tfCounter <= mConfParam->dumpLast)) {
850+
mGPUReco->DumpEvent(mNTFDumps, &ptrs, threadIndex, dir.c_str());
851+
mNTFDumps++;
831852
}
832-
mGPUReco->DumpEvent(mNTFs - 1, &ptrs);
833853
}
834854
std::unique_ptr<GPUTrackingInOutPointers> ptrsDump;
835855
if (mConfParam->dumpBadTFMode == 2) {
@@ -847,9 +867,10 @@ void GPURecoWorkflowSpec::run(ProcessingContext& pc)
847867
std::unique_lock lk(pipelineContext->jobFinishedMutex);
848868
pipelineContext->jobFinishedNotify.wait(lk, [context = pipelineContext.get()]() { return context->jobFinished; });
849869
retVal = pipelineContext->jobReturnValue;
870+
threadIndex = pipelineContext->jobThreadIndex;
850871
} else {
851872
// uint32_t threadIndex = pc.services().get<ThreadPool>().threadIndex;
852-
uint32_t threadIndex = mNextThreadIndex;
873+
threadIndex = mNextThreadIndex;
853874
if (mConfig->configProcessing.doublePipeline) {
854875
mNextThreadIndex = (mNextThreadIndex + 1) % 2;
855876
}
@@ -879,7 +900,7 @@ void GPURecoWorkflowSpec::run(ProcessingContext& pc)
879900
}
880901
fclose(fp);
881902
} else if (mConfParam->dumpBadTFMode == 2) {
882-
mGPUReco->DumpEvent(mNDebugDumps - 1, ptrsDump.get());
903+
mGPUReco->DumpEvent(mNDebugDumps - 1, ptrsDump.get(), threadIndex);
883904
}
884905
}
885906

prodtests/full-system-test/dpl-workflow.sh

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -235,6 +235,9 @@ if [[ $EPNSYNCMODE == 1 ]]; then
235235
fi
236236
fi
237237
fi
238+
if [[ $GPUTYPE != "CPU" && $NGPUS > 1 ]]; then
239+
GPU_CONFIG_KEY+="GPU_global.dumpFolder=gpu_dump_[P];"
240+
fi
238241
if [[ $SYNCRAWMODE == 1 ]]; then
239242
GPU_CONFIG_KEY+="GPU_proc.tpcIncreasedMinClustersPerRow=500000;GPU_proc.ignoreNonFatalGPUErrors=1;GPU_proc.throttleAlarms=1;"
240243
if [[ $RUNTYPE == "PHYSICS" || $RUNTYPE == "COSMICS" || $RUNTYPE == "TECHNICAL" ]]; then

0 commit comments

Comments
 (0)