Skip to content

Commit e0ee685

Browse files
committed
[core] add metrics to the transition
1 parent 37a4f2e commit e0ee685

9 files changed

Lines changed: 59 additions & 4 deletions

File tree

common/monitoring/metric.go

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,17 @@ func (metric *Metric) AddTag(tagName string, value string) {
7171
metric.tags = append(metric.tags, Tag{name: tagName, value: value})
7272
}
7373

74+
const (
75+
ERROR = "error"
76+
SUCCESS = "success"
77+
CANCELLED = "cancelled"
78+
TIMEOUT = "timeout"
79+
)
80+
81+
func (metric *Metric) AddResult(result string) {
82+
metric.AddTag("result", result)
83+
}
84+
7485
func (metric *Metric) setField(fieldName string, field any) {
7586
if metric.fields == nil {
7687
metric.fields = make(FieldsType)

core/environment/transition_configure.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ import (
3030
"github.com/AliceO2Group/Control/core/workflow"
3131

3232
"github.com/AliceO2Group/Control/common/event"
33+
"github.com/AliceO2Group/Control/common/monitoring"
3334
"github.com/AliceO2Group/Control/core/task"
3435
"github.com/AliceO2Group/Control/core/task/taskop"
3536
)
@@ -52,6 +53,9 @@ func (t ConfigureTransition) do(env *Environment) (err error) {
5253
return errors.New("cannot transition in NIL environment")
5354
}
5455

56+
metric := transitionMetric("configure", env)
57+
defer monitoring.TimerSendSingle(&metric, monitoring.Millisecond)()
58+
5559
wf := env.Workflow()
5660

5761
activeTasks := workflow.GetActiveTasks(wf)
@@ -64,9 +68,11 @@ func (t ConfigureTransition) do(env *Environment) (err error) {
6468
incomingEv := <-env.stateChangedCh
6569
// If some tasks failed to transition
6670
if tasksStateErrors := incomingEv.GetTasksStateChangedError(); tasksStateErrors != nil {
71+
metric.AddResult(monitoring.ERROR)
6772
return tasksStateErrors
6873
}
6974

7075
env.sendEnvironmentEvent(&event.EnvironmentEvent{EnvironmentID: env.Id().String(), State: "CONFIGURED"})
76+
metric.AddResult(monitoring.SUCCESS)
7177
return
7278
}

core/environment/transition_deploy.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ import (
3636

3737
"github.com/AliceO2Group/Control/common/event"
3838
"github.com/AliceO2Group/Control/common/logger/infologger"
39+
"github.com/AliceO2Group/Control/common/monitoring"
3940
"github.com/AliceO2Group/Control/core/task"
4041
"github.com/AliceO2Group/Control/core/task/sm"
4142
"github.com/AliceO2Group/Control/core/task/taskop"
@@ -66,6 +67,9 @@ func (t DeployTransition) do(env *Environment) (err error) {
6667
return errors.New("cannot transition in NIL environment")
6768
}
6869

70+
metric := transitionMetric("deploy", env)
71+
defer monitoring.TimerSendSingle(&metric, monitoring.Millisecond)()
72+
6973
wf := env.Workflow()
7074

7175
// Skip cleanup for anything other than readout-dataflow
@@ -347,10 +351,14 @@ func (t DeployTransition) do(env *Environment) (err error) {
347351
log.WithField("level", infologger.IL_Ops).
348352
WithField("partition", env.Id().String()).
349353
Error(err)
354+
metric.AddResult(monitoring.ERROR)
350355
return
351356
}
352357

353358
env.sendEnvironmentEvent(&event.EnvironmentEvent{EnvironmentID: env.Id().String(), State: "DEPLOYED"})
359+
360+
metric.AddResult(monitoring.SUCCESS)
361+
354362
return
355363
}
356364

core/environment/transition_goerror.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
package environment
2626

2727
import (
28+
"github.com/AliceO2Group/Control/common/monitoring"
2829
"github.com/AliceO2Group/Control/core/controlcommands"
2930
"github.com/AliceO2Group/Control/core/task"
3031
"github.com/AliceO2Group/Control/core/task/sm"
@@ -44,6 +45,8 @@ type GoErrorTransition struct {
4445
}
4546

4647
func (t GoErrorTransition) do(env *Environment) (err error) {
48+
metric := transitionMetric("goerror", env)
49+
defer monitoring.TimerSendSingle(&metric, monitoring.Millisecond)()
4750

4851
// we stop all tasks which are in RUNNING
4952
toStop := env.Workflow().GetTasks().Filtered(func(t *task.Task) bool {
@@ -72,5 +75,6 @@ func (t GoErrorTransition) do(env *Environment) (err error) {
7275
<-env.stateChangedCh
7376
}
7477

78+
metric.AddResult(monitoring.SUCCESS)
7579
return
7680
}

core/environment/transition_reset.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ import (
2828
"errors"
2929

3030
"github.com/AliceO2Group/Control/common/event"
31+
"github.com/AliceO2Group/Control/common/monitoring"
3132
"github.com/AliceO2Group/Control/core/task"
3233
"github.com/AliceO2Group/Control/core/task/sm"
3334
"github.com/AliceO2Group/Control/core/workflow"
@@ -51,6 +52,9 @@ func (t ResetTransition) do(env *Environment) (err error) {
5152
return errors.New("cannot transition in NIL environment")
5253
}
5354

55+
metric := transitionMetric("reset", env)
56+
defer monitoring.TimerSendSingle(&metric, monitoring.Millisecond)()
57+
5458
taskmanMessage := task.NewTransitionTaskMessage(
5559
workflow.GetActiveTasks(env.Workflow()),
5660
sm.CONFIGURED.String(),
@@ -64,9 +68,11 @@ func (t ResetTransition) do(env *Environment) (err error) {
6468
incomingEv := <-env.stateChangedCh
6569
// If some tasks failed to transition
6670
if tasksStateErrors := incomingEv.GetTasksStateChangedError(); tasksStateErrors != nil {
71+
metric.AddResult(monitoring.ERROR)
6772
return tasksStateErrors
6873
}
6974

7075
env.sendEnvironmentEvent(&event.EnvironmentEvent{EnvironmentID: env.Id().String(), State: "RESET"})
76+
metric.AddResult(monitoring.SUCCESS)
7177
return
7278
}

core/environment/transition_startactivity.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ import (
3333

3434
"github.com/AliceO2Group/Control/common/event"
3535
"github.com/AliceO2Group/Control/common/logger/infologger"
36+
"github.com/AliceO2Group/Control/common/monitoring"
3637
"github.com/AliceO2Group/Control/core/controlcommands"
3738
"github.com/AliceO2Group/Control/core/task"
3839
"github.com/iancoleman/strcase"
@@ -72,6 +73,9 @@ func (t StartActivityTransition) do(env *Environment) (err error) {
7273
return errors.New("cannot transition in NIL environment")
7374
}
7475

76+
metric := transitionMetric("startactivity", env)
77+
defer monitoring.TimerSendSingle(&metric, monitoring.Millisecond)()
78+
7579
runNumber := env.currentRunNumber
7680

7781
log.WithField(infologger.Run, runNumber).
@@ -120,6 +124,7 @@ func (t StartActivityTransition) do(env *Environment) (err error) {
120124
incomingEv := <-env.stateChangedCh
121125
// If some tasks failed to transition
122126
if tasksStateErrors := incomingEv.GetTasksStateChangedError(); tasksStateErrors != nil {
127+
metric.AddResult(monitoring.ERROR)
123128
return tasksStateErrors
124129
}
125130

@@ -133,5 +138,6 @@ func (t StartActivityTransition) do(env *Environment) (err error) {
133138
Run: env.currentRunNumber,
134139
})
135140

141+
metric.AddResult(monitoring.SUCCESS)
136142
return
137143
}

core/environment/transition_stopactivity.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ import (
2929

3030
"github.com/AliceO2Group/Control/common/event"
3131
"github.com/AliceO2Group/Control/common/logger/infologger"
32+
"github.com/AliceO2Group/Control/common/monitoring"
3233
"github.com/AliceO2Group/Control/core/controlcommands"
3334
"github.com/AliceO2Group/Control/core/task"
3435
"github.com/AliceO2Group/Control/core/task/sm"
@@ -63,6 +64,9 @@ func (t StopActivityTransition) do(env *Environment) (err error) {
6364
return errors.New("cannot transition in NIL environment")
6465
}
6566

67+
metric := transitionMetric("stopactivity", env)
68+
defer monitoring.TimerSendSingle(&metric, monitoring.Millisecond)()
69+
6670
log.WithField(infologger.Run, env.currentRunNumber).
6771
WithField("partition", env.Id().String()).
6872
WithField(infologger.Level, infologger.IL_Support).
@@ -98,6 +102,7 @@ func (t StopActivityTransition) do(env *Environment) (err error) {
98102
incomingEv := <-env.stateChangedCh
99103
// If some tasks failed to transition
100104
if tasksStateErrors := incomingEv.GetTasksStateChangedError(); tasksStateErrors != nil {
105+
metric.AddResult(monitoring.ERROR)
101106
return tasksStateErrors
102107
}
103108
env.sendEnvironmentEvent(&event.EnvironmentEvent{EnvironmentID: env.Id().String(), State: "CONFIGURED"})
@@ -107,5 +112,6 @@ func (t StopActivityTransition) do(env *Environment) (err error) {
107112
WithField(infologger.Level, infologger.IL_Support).
108113
Info("run stopped")
109114

115+
metric.AddResult(monitoring.SUCCESS)
110116
return
111117
}

core/environment/utils.go

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ import (
3333
"sort"
3434

3535
"github.com/AliceO2Group/Control/common/logger/infologger"
36+
"github.com/AliceO2Group/Control/common/monitoring"
3637
pb "github.com/AliceO2Group/Control/common/protos"
3738
"github.com/AliceO2Group/Control/core/task"
3839
"github.com/AliceO2Group/Control/core/task/sm"
@@ -52,7 +53,7 @@ type WorkflowPublicInfo struct {
5253
func parseWorkflowPublicInfo(workflowExpr string) (WorkflowPublicInfo, error) {
5354
repoManager := the.RepoManager()
5455

55-
resolvedWorkflowPath, _, err := repoManager.GetWorkflow(workflowExpr) //Will fail if repo unknown
56+
resolvedWorkflowPath, _, err := repoManager.GetWorkflow(workflowExpr) // Will fail if repo unknown
5657
if err != nil {
5758
return WorkflowPublicInfo{}, err
5859
}
@@ -166,3 +167,10 @@ func HandleFailedGoError(err error, env *Environment) {
166167
env.setState("ERROR")
167168
}
168169
}
170+
171+
func transitionMetric(transition string, env *Environment) monitoring.Metric {
172+
metric := monitoring.NewMetric("transition_do")
173+
metric.AddTag("transition", transition)
174+
metric.AddTag("envId", env.Id().String())
175+
return metric
176+
}

core/workflow/callable/call.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ func (c *Call) Call() error {
117117
WithField("level", infologger.IL_Devel).
118118
Debugf("calling hook function %s", c.Func)
119119

120-
metric := c.newMetric("callablecall")
120+
metric := c.callableMetric("callablecall")
121121
defer monitoring.TimerSendSingle(&metric, monitoring.Millisecond)()
122122

123123
the.EventWriterWithTopic(topic.Call).WriteEvent(&evpb.Ev_CallEvent{
@@ -227,7 +227,7 @@ func (c *Call) Call() error {
227227
return nil
228228
}
229229

230-
func (c *Call) newMetric(name string) monitoring.Metric {
230+
func (c *Call) callableMetric(name string) monitoring.Metric {
231231
metric := monitoring.NewMetric(name)
232232
metric.AddTag("runtype", c.getRunTypeTag())
233233
metric.AddTag("name", c.GetName())
@@ -241,7 +241,7 @@ func (c *Call) Start() {
241241
ctx, cancel := context.WithCancel(context.Background())
242242
c.awaitCancel = cancel
243243
go func() {
244-
metric := c.newMetric("callablewrapped")
244+
metric := c.callableMetric("callablewrapped")
245245
defer monitoring.TimerSendSingle(&metric, monitoring.Millisecond)()
246246

247247
callId := fmt.Sprintf("hook:%s:%s", c.GetTraits().Trigger, c.GetName())

0 commit comments

Comments
 (0)