Skip to content

Commit 93e99c9

Browse files
committed
GPU: Fix handling of non critical errors in double-pipeline mode
1 parent abe259d commit 93e99c9

File tree

2 files changed

+12
-14
lines changed

2 files changed

+12
-14
lines changed

GPU/GPUTracking/Base/GPUReconstruction.cxx

Lines changed: 9 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ struct GPUReconstructionPipelineQueue {
6363
} // namespace
6464

6565
struct GPUReconstructionPipelineContext {
66-
std::queue<GPUReconstructionPipelineQueue*> queue;
66+
std::queue<GPUReconstructionPipelineQueue*> pipelineQueue;
6767
std::mutex mutex;
6868
std::condition_variable cond;
6969
bool terminate = false;
@@ -1089,13 +1089,13 @@ void GPUReconstruction::RunPipelineWorker()
10891089
while (!terminate) {
10901090
{
10911091
std::unique_lock<std::mutex> lk(mPipelineContext->mutex);
1092-
mPipelineContext->cond.wait(lk, [this] { return this->mPipelineContext->queue.size() > 0; });
1092+
mPipelineContext->cond.wait(lk, [this] { return this->mPipelineContext->pipelineQueue.size() > 0; });
10931093
}
10941094
GPUReconstructionPipelineQueue* q;
10951095
{
10961096
std::lock_guard<std::mutex> lk(mPipelineContext->mutex);
1097-
q = mPipelineContext->queue.front();
1098-
mPipelineContext->queue.pop();
1097+
q = mPipelineContext->pipelineQueue.front();
1098+
mPipelineContext->pipelineQueue.pop();
10991099
}
11001100
if (q->op == 1) {
11011101
terminate = 1;
@@ -1132,26 +1132,23 @@ int32_t GPUReconstruction::EnqueuePipeline(bool terminate)
11321132
if (rec->mPipelineContext->terminate) {
11331133
throw std::runtime_error("Must not enqueue work after termination request");
11341134
}
1135-
rec->mPipelineContext->queue.push(q);
1135+
rec->mPipelineContext->pipelineQueue.push(q);
11361136
rec->mPipelineContext->terminate = terminate;
11371137
rec->mPipelineContext->cond.notify_one();
11381138
}
11391139
q->c.wait(lkdone, [&q]() { return q->done; });
1140-
if (q->retVal) {
1140+
if (terminate || (q->retVal && (q->retVal != 3 || !GetProcessingSettings().ignoreNonFatalGPUErrors))) {
11411141
return q->retVal;
11421142
}
1143-
if (terminate) {
1144-
return 0;
1145-
} else {
1146-
return mChains[0]->FinalizePipelinedProcessing();
1147-
}
1143+
int32_t retVal2 = mChains[0]->FinalizePipelinedProcessing();
1144+
return retVal2 ? retVal2 : q->retVal;
11481145
}
11491146

11501147
GPUChain* GPUReconstruction::GetNextChainInQueue()
11511148
{
11521149
GPUReconstruction* rec = mMaster ? mMaster : this;
11531150
std::lock_guard<std::mutex> lk(rec->mPipelineContext->mutex);
1154-
return rec->mPipelineContext->queue.size() && rec->mPipelineContext->queue.front()->op == 0 ? rec->mPipelineContext->queue.front()->chain : nullptr;
1151+
return rec->mPipelineContext->pipelineQueue.size() && rec->mPipelineContext->pipelineQueue.front()->op == 0 ? rec->mPipelineContext->pipelineQueue.front()->chain : nullptr;
11551152
}
11561153

11571154
void GPUReconstruction::PrepareEvent() // TODO: Clean this up, this should not be called from chainTracking but before

GPU/GPUTracking/Global/GPUChainTrackingCompression.cxx

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -62,8 +62,9 @@ int32_t GPUChainTracking::RunTPCCompression()
6262
#ifdef GPUCA_TPC_GEOMETRY_O2
6363
if (mPipelineFinalizationCtx && GetProcessingSettings().doublePipelineClusterizer) {
6464
SynchronizeEventAndRelease(mEvents->single);
65-
((GPUChainTracking*)GetNextChainInQueue())->RunTPCClusterizer_prepare(false);
66-
((GPUChainTracking*)GetNextChainInQueue())->mCFContext->ptrClusterNativeSave = processorsShadow()->ioPtrs.clustersNative;
65+
auto* foreignChain = (GPUChainTracking*)GetNextChainInQueue();
66+
foreignChain->RunTPCClusterizer_prepare(false);
67+
foreignChain->mCFContext->ptrClusterNativeSave = processorsShadow()->ioPtrs.clustersNative;
6768
}
6869
#endif
6970
SynchronizeStream(0);

0 commit comments

Comments
 (0)