Skip to content

Commit 48f5bcc

Browse files
committed
Ensure that ODC Stop is called at FLP/QC-initiated GO_ERROR
This allows us to stop a run on EPNs at GO_ERROR transition by adding a corresponding ODC.EnsureStop hook. As GO_ERROR can occur with any source state, we make sure to make the actual STOP call only if the ODC partition is in RUNNING. At the same time, ODC partitions require us to call ODC.Stop if they voluntarily transition to ERROR. In such case, ODC.Stop allows the remaining healthy devices to finish processing. By keeping the original ODC.Stop behaviour, we preserve this functionality. Additionally, the commit includes minor corrections to a few related logs. Fixes OCTRL-1036.
1 parent 07ef0bb commit 48f5bcc

File tree

1 file changed

+71
-2
lines changed

1 file changed

+71
-2
lines changed

core/integration/odc/plugin.go

Lines changed: 71 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1437,20 +1437,21 @@ func (p *Plugin) CallStack(data interface{}) (stack map[string]interface{}) {
14371437
rn, ok := varStack["run_number"]
14381438
if !ok {
14391439
log.WithField("partition", envId).
1440-
WithField("call", "Start").
1440+
WithField("call", "Stop").
14411441
Warn("cannot acquire run number for ODC Stop")
14421442
}
14431443
runNumberu64, err = strconv.ParseUint(rn, 10, 32)
14441444
if err != nil {
14451445
log.WithField("partition", envId).
1446+
WithField("call", "Stop").
14461447
WithError(err).
14471448
Error("cannot acquire run number for ODC EOR")
14481449
runNumberu64 = 0
14491450
}
14501451
runEndTimeMs, ok := varStack["run_end_time_ms"]
14511452
if !ok {
14521453
log.WithField("partition", envId).
1453-
WithField("call", "Start").
1454+
WithField("call", "Stop").
14541455
Warn("cannot acquire run_end_time_ms")
14551456
}
14561457

@@ -1473,6 +1474,74 @@ func (p *Plugin) CallStack(data interface{}) (stack map[string]interface{}) {
14731474
}
14741475
return
14751476
}
1477+
stack["EnsureStop"] = func() (out string) {
1478+
// ODC Stop
1479+
callFailedStr := "EPN EnsureStop call failed"
1480+
var (
1481+
runNumberu64 uint64
1482+
err error
1483+
)
1484+
1485+
timeout := callable.AcquireTimeout(ODC_STOP_TIMEOUT, varStack, "EnsureStop", envId)
1486+
1487+
ctx, cancel := context.WithTimeout(context.Background(), timeout)
1488+
defer cancel()
1489+
1490+
state, err := handleGetState(ctx, p.odcClient, envId)
1491+
if err != nil {
1492+
log.WithError(err).
1493+
WithField("level", infologger.IL_Support).
1494+
WithField("partition", envId).
1495+
WithField("call", "EnsureStop").
1496+
Error("ODC error")
1497+
call.VarStack["__call_error_reason"] = err.Error()
1498+
call.VarStack["__call_error"] = callFailedStr
1499+
return
1500+
}
1501+
if state != "RUNNING" {
1502+
log.WithField("level", infologger.IL_Devel).
1503+
WithField("partition", envId).
1504+
WithField("call", "EnsureStop").
1505+
Infof("ODC EnsureStop attempted, while ODC partition is not in 'RUNNING' but '%s', skipping", state)
1506+
return
1507+
}
1508+
1509+
rn, ok := varStack["run_number"]
1510+
if !ok {
1511+
log.WithField("partition", envId).
1512+
WithField("call", "EnsureStop").
1513+
Warn("cannot acquire run number for ODC EnsureStop")
1514+
}
1515+
runNumberu64, err = strconv.ParseUint(rn, 10, 32)
1516+
if err != nil {
1517+
log.WithField("partition", envId).
1518+
WithField("call", "EnsureStop").
1519+
WithError(err).
1520+
Error("cannot acquire run number for ODC EOR")
1521+
runNumberu64 = 0
1522+
}
1523+
runEndTimeMs, ok := varStack["run_end_time_ms"]
1524+
if !ok {
1525+
log.WithField("partition", envId).
1526+
WithField("call", "EnsureStop").
1527+
Warn("cannot acquire run_end_time_ms")
1528+
}
1529+
1530+
arguments := make(map[string]string)
1531+
arguments["run_end_time_ms"] = runEndTimeMs
1532+
1533+
err = handleStop(ctx, p.odcClient, arguments, paddingTimeout, envId, runNumberu64, call)
1534+
if err != nil {
1535+
log.WithError(err).
1536+
WithField("level", infologger.IL_Support).
1537+
WithField("partition", envId).
1538+
WithField("call", "EnsureStop").
1539+
Error("ODC error")
1540+
call.VarStack["__call_error_reason"] = err.Error()
1541+
call.VarStack["__call_error"] = callFailedStr
1542+
}
1543+
return
1544+
}
14761545
stack["EnsureCleanup"] = func() (out string) {
14771546
// ODC Shutdown for current env + all orphans
14781547

0 commit comments

Comments
 (0)