Skip to content

Commit 5ce1357

Browse files
authored
Call ODC STOP only when the partition is in RUNNING (#732)
Ensure that ODC Stop is called at FLP/QC-initiated GO_ERROR This allows us to stop a run on EPNs at GO_ERROR transition by adding a corresponding ODC.EnsureStop hook. As GO_ERROR can occur with any source state, we make sure to make the actual STOP call only if the ODC partition is in RUNNING. At the same time, ODC partitions require us to call ODC.Stop if they voluntarily transition to ERROR. In such case, ODC.Stop allows the remaining healthy devices to finish processing. By keeping the original ODC.Stop behaviour, we preserve this functionality. Additionally, the commit includes minor corrections to a few related logs. Fixes OCTRL-1036.
1 parent 5b31372 commit 5ce1357

File tree

1 file changed

+71
-2
lines changed

1 file changed

+71
-2
lines changed

core/integration/odc/plugin.go

Lines changed: 71 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1439,20 +1439,21 @@ func (p *Plugin) CallStack(data interface{}) (stack map[string]interface{}) {
14391439
rn, ok := varStack["run_number"]
14401440
if !ok {
14411441
log.WithField("partition", envId).
1442-
WithField("call", "Start").
1442+
WithField("call", "Stop").
14431443
Warn("cannot acquire run number for ODC Stop")
14441444
}
14451445
runNumberu64, err = strconv.ParseUint(rn, 10, 32)
14461446
if err != nil {
14471447
log.WithField("partition", envId).
1448+
WithField("call", "Stop").
14481449
WithError(err).
14491450
Error("cannot acquire run number for ODC EOR")
14501451
runNumberu64 = 0
14511452
}
14521453
runEndTimeMs, ok := varStack["run_end_time_ms"]
14531454
if !ok {
14541455
log.WithField("partition", envId).
1455-
WithField("call", "Start").
1456+
WithField("call", "Stop").
14561457
Warn("cannot acquire run_end_time_ms")
14571458
}
14581459

@@ -1475,6 +1476,74 @@ func (p *Plugin) CallStack(data interface{}) (stack map[string]interface{}) {
14751476
}
14761477
return
14771478
}
1479+
stack["EnsureStop"] = func() (out string) {
1480+
// ODC Stop
1481+
callFailedStr := "EPN EnsureStop call failed"
1482+
var (
1483+
runNumberu64 uint64
1484+
err error
1485+
)
1486+
1487+
timeout := callable.AcquireTimeout(ODC_STOP_TIMEOUT, varStack, "EnsureStop", envId)
1488+
1489+
ctx, cancel := context.WithTimeout(context.Background(), timeout)
1490+
defer cancel()
1491+
1492+
state, err := handleGetState(ctx, p.odcClient, envId)
1493+
if err != nil {
1494+
log.WithError(err).
1495+
WithField("level", infologger.IL_Support).
1496+
WithField("partition", envId).
1497+
WithField("call", "EnsureStop").
1498+
Error("ODC error")
1499+
call.VarStack["__call_error_reason"] = err.Error()
1500+
call.VarStack["__call_error"] = callFailedStr
1501+
return
1502+
}
1503+
if state != "RUNNING" {
1504+
log.WithField("level", infologger.IL_Devel).
1505+
WithField("partition", envId).
1506+
WithField("call", "EnsureStop").
1507+
Infof("ODC EnsureStop attempted, while ODC partition is not in 'RUNNING' but '%s', skipping", state)
1508+
return
1509+
}
1510+
1511+
rn, ok := varStack["run_number"]
1512+
if !ok {
1513+
log.WithField("partition", envId).
1514+
WithField("call", "EnsureStop").
1515+
Warn("cannot acquire run number for ODC EnsureStop")
1516+
}
1517+
runNumberu64, err = strconv.ParseUint(rn, 10, 32)
1518+
if err != nil {
1519+
log.WithField("partition", envId).
1520+
WithField("call", "EnsureStop").
1521+
WithError(err).
1522+
Error("cannot acquire run number for ODC EOR")
1523+
runNumberu64 = 0
1524+
}
1525+
runEndTimeMs, ok := varStack["run_end_time_ms"]
1526+
if !ok {
1527+
log.WithField("partition", envId).
1528+
WithField("call", "EnsureStop").
1529+
Warn("cannot acquire run_end_time_ms")
1530+
}
1531+
1532+
arguments := make(map[string]string)
1533+
arguments["run_end_time_ms"] = runEndTimeMs
1534+
1535+
err = handleStop(ctx, p.odcClient, arguments, paddingTimeout, envId, runNumberu64, call)
1536+
if err != nil {
1537+
log.WithError(err).
1538+
WithField("level", infologger.IL_Support).
1539+
WithField("partition", envId).
1540+
WithField("call", "EnsureStop").
1541+
Error("ODC error")
1542+
call.VarStack["__call_error_reason"] = err.Error()
1543+
call.VarStack["__call_error"] = callFailedStr
1544+
}
1545+
return
1546+
}
14781547
stack["EnsureCleanup"] = func() (out string) {
14791548
// ODC Shutdown for current env + all orphans
14801549

0 commit comments

Comments
 (0)