Skip to content

Commit 8d21174

Browse files
authored
DPL: improve exitTransitionTimeout handling (#13331)
1 parent 5ba7ba4 commit 8d21174

File tree

2 files changed

+25
-10
lines changed

2 files changed

+25
-10
lines changed

Framework/Core/src/DataProcessingContext.cxx

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,13 @@
1111

1212
#include "Framework/DataProcessingContext.h"
1313
#include "Framework/DataProcessorSpec.h"
14+
#include "Framework/EndOfStreamContext.h"
15+
#include "Framework/TimingInfo.h"
1416
#include "Framework/Signpost.h"
1517

1618
O2_DECLARE_DYNAMIC_LOG(data_processor_context);
19+
O2_DECLARE_DYNAMIC_LOG(calibration);
20+
1721
namespace o2::framework
1822
{
1923

Framework/Core/src/DataProcessingDevice.cxx

Lines changed: 21 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,8 @@ struct formatter<o2::framework::CompletionPolicy::CompletionOp> : ostream_format
9292
O2_DECLARE_DYNAMIC_LOG(device);
9393
// Special log to keep track of the lifetime of the parts
9494
O2_DECLARE_DYNAMIC_LOG(parts);
95+
// Stream which keeps track of the calibration lifetime logic
96+
O2_DECLARE_DYNAMIC_LOG(calibration);
9597
// Special log to track the async queue behavior
9698
O2_DECLARE_DYNAMIC_LOG(async_queue);
9799
// Special log to track the forwarding requests
@@ -131,11 +133,18 @@ bool hasOnlyGenerated(DeviceSpec const& spec)
131133

132134
void on_transition_requested_expired(uv_timer_t* handle)
133135
{
134-
auto* state = (DeviceState*)handle->data;
135-
state->loopReason |= DeviceState::TIMER_EXPIRED;
136+
auto* ref = (ServiceRegistryRef*)handle->data;
137+
auto& state = ref->get<DeviceState>();
138+
state.loopReason |= DeviceState::TIMER_EXPIRED;
139+
// Check if this is a source device
136140
O2_SIGNPOST_ID_FROM_POINTER(cid, device, handle);
137-
O2_SIGNPOST_EVENT_EMIT_WARN(device, cid, "callback", "Exit transition timer expired. Exiting.");
138-
state->transitionHandling = TransitionHandlingState::Expired;
141+
auto& spec = ref->get<DeviceSpec const>();
142+
if (hasOnlyGenerated(spec)) {
143+
O2_SIGNPOST_EVENT_EMIT_INFO(calibration, cid, "callback", "Grace period for source expired. Exiting.");
144+
} else {
145+
O2_SIGNPOST_EVENT_EMIT_ERROR(calibration, cid, "callback", "Grace period for data / calibration expired. Exiting.");
146+
}
147+
state.transitionHandling = TransitionHandlingState::Expired;
139148
}
140149

141150
void on_communication_requested(uv_async_t* s)
@@ -928,7 +937,7 @@ void DataProcessingDevice::startPollers()
928937
}
929938

930939
deviceContext.gracePeriodTimer = (uv_timer_t*)malloc(sizeof(uv_timer_t));
931-
deviceContext.gracePeriodTimer->data = &state;
940+
deviceContext.gracePeriodTimer->data = new ServiceRegistryRef(mServiceRegistry);
932941
uv_timer_init(state.loop, deviceContext.gracePeriodTimer);
933942
}
934943

@@ -958,6 +967,7 @@ void DataProcessingDevice::stopPollers()
958967
}
959968

960969
uv_timer_stop(deviceContext.gracePeriodTimer);
970+
delete (ServiceRegistryRef*)deviceContext.gracePeriodTimer->data;
961971
free(deviceContext.gracePeriodTimer);
962972
deviceContext.gracePeriodTimer = nullptr;
963973
}
@@ -1306,17 +1316,18 @@ void DataProcessingDevice::Run()
13061316
if (state.transitionHandling == TransitionHandlingState::NoTransition && NewStatePending()) {
13071317
state.transitionHandling = TransitionHandlingState::Requested;
13081318
auto& deviceContext = ref.get<DeviceContext>();
1309-
auto timeout = deviceContext.exitTransitionTimeout;
13101319
// Check if we only have timers
13111320
auto& spec = ref.get<DeviceSpec const>();
13121321
if (hasOnlyTimers(spec)) {
13131322
state.streaming = StreamingState::EndOfStreaming;
13141323
}
1315-
if (timeout != 0 && state.streaming != StreamingState::Idle) {
1324+
1325+
if (deviceContext.exitTransitionTimeout != 0 && state.streaming != StreamingState::Idle) {
13161326
state.transitionHandling = TransitionHandlingState::Requested;
13171327
ref.get<CallbackService>().call<CallbackService::Id::ExitRequested>(ServiceRegistryRef{ref});
13181328
uv_update_time(state.loop);
1319-
uv_timer_start(deviceContext.gracePeriodTimer, on_transition_requested_expired, timeout * 1000, 0);
1329+
O2_SIGNPOST_EVENT_EMIT(calibration, lid, "timer_setup", "Starting %d s timer for exitTransitionTimeout.", deviceContext.exitTransitionTimeout);
1330+
uv_timer_start(deviceContext.gracePeriodTimer, on_transition_requested_expired, deviceContext.exitTransitionTimeout * 1000, 0);
13201331
if (mProcessingPolicies.termination == TerminationPolicy::QUIT) {
13211332
O2_SIGNPOST_EVENT_EMIT_INFO(device, lid, "run_loop", "New state requested. Waiting for %d seconds before quitting.", (int)deviceContext.exitTransitionTimeout);
13221333
} else {
@@ -1331,7 +1342,7 @@ void DataProcessingDevice::Run()
13311342
} else if (mProcessingPolicies.termination == TerminationPolicy::QUIT) {
13321343
O2_SIGNPOST_EVENT_EMIT_INFO(device, lid, "run_loop", "New state pending and we are already idle, quitting immediately as per --completion-policy");
13331344
} else {
1334-
O2_SIGNPOST_EVENT_EMIT_INFO(device, lid, "runb_loop", "New state pending and we are already idle, switching to READY immediately.");
1345+
O2_SIGNPOST_EVENT_EMIT_INFO(device, lid, "run_loop", "New state pending and we are already idle, switching to READY immediately.");
13351346
}
13361347
}
13371348
}
@@ -1721,6 +1732,7 @@ void DataProcessingDevice::doRun(ServiceRegistryRef ref)
17211732
// We should keep the data generated at end of stream only for those
17221733
// which are not sources.
17231734
timingInfo.keepAtEndOfStream = shouldProcess;
1735+
O2_SIGNPOST_EVENT_EMIT(calibration, dpid, "calibration", "TimingInfo.keepAtEndOfStream %d", timingInfo.keepAtEndOfStream);
17241736

17251737
EndOfStreamContext eosContext{*context.registry, ref.get<DataAllocator>()};
17261738

@@ -2348,7 +2360,6 @@ bool DataProcessingDevice::tryDispatchComputation(ServiceRegistryRef ref, std::v
23482360
*context.registry};
23492361
ProcessingContext processContext{record, ref, ref.get<DataAllocator>()};
23502362
{
2351-
O2_SIGNPOST_EVENT_EMIT(device, aid, "device", "Invoking preProcessingCallbacks");
23522363
// Notice this should be thread safe and reentrant
23532364
// as it is called from many threads.
23542365
streamContext.preProcessingCallbacks(processContext);

0 commit comments

Comments
 (0)