Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,4 +1,18 @@
[
{
"name": "[Jira:\"Cluster Version Operator\"] cluster-version-operator should work with risks from alerts",
"labels": {
"Local": {},
"OTA-1813": {},
"Serial": {}
},
"resources": {
"isolation": {}
},
"source": "openshift:payload:cluster-version-operator",
"lifecycle": "blocking",
"environmentSelector": {}
},
{
"name": "[Jira:\"Cluster Version Operator\"] cluster-version-operator should work with accept risks",
"labels": {
Expand Down
4 changes: 2 additions & 2 deletions cmd/cluster-version-operator-tests/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ func main() {
Name: "openshift/cluster-version-operator/conformance/parallel",
Parents: []string{"openshift/conformance/parallel"},
Qualifiers: []string{
`!(name.contains("[Serial]") || "Serial" in labels || name.contains("[Slow]"))`,
`!(name.contains("[Serial]") || "Serial" in labels || name.contains("[Slow]") || "Local" in labels)`,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would you mind also add "Slow" in labels?

Copy link
Member Author

@hongkailiu hongkailiu Mar 16, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I admit that I like more on the label checking than strings.contains on names (we do not need to rename the test case to control if it is qualified in a suite).
But this is not really related to this pull, right?
We could do it by another separate pull.

Do you need the Slow label at the moment?

},
})

Expand All @@ -33,7 +33,7 @@ func main() {
Name: "openshift/cluster-version-operator/conformance/serial",
Parents: []string{"openshift/conformance/serial"},
Qualifiers: []string{
`name.contains("[Serial]") || "Serial" in labels`,
`(name.contains("[Serial]") || "Serial" in labels) && !("Local" in labels)`,
},
})

Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ require (
github.com/operator-framework/api v0.17.1
github.com/operator-framework/operator-lifecycle-manager v0.22.0
github.com/pkg/errors v0.9.1
github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.86.0
github.com/prometheus-operator/prometheus-operator/pkg/client v0.86.0
github.com/prometheus/client_golang v1.22.0
github.com/prometheus/client_model v0.6.1
Expand Down Expand Up @@ -73,7 +74,6 @@ require (
github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f // indirect
github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f // indirect
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect
github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.86.0 // indirect
github.com/prometheus/procfs v0.15.1 // indirect
github.com/robfig/cron v1.2.0 // indirect
github.com/sirupsen/logrus v1.9.3 // indirect
Expand Down
95 changes: 95 additions & 0 deletions pkg/clusterconditions/promql/alerts.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
package promql

import (
"context"
"fmt"
"sync"
"time"

"github.com/prometheus/client_golang/api"
prometheusv1 "github.com/prometheus/client_golang/api/prometheus/v1"
"github.com/prometheus/common/config"

"k8s.io/klog/v2"

"github.com/openshift/cluster-version-operator/pkg/clusterconditions"
)

type Getter interface {
Get(ctx context.Context) (prometheusv1.AlertsResult, error)
}

func NewAlertGetter(promQLTarget clusterconditions.PromQLTarget) Getter {
p := NewPromQL(promQLTarget)
condition := p.Condition
v, ok := condition.(*PromQL)
if !ok {
panic("invalid condition type")
}
return &ocAlertGetter{promQL: v, expiration: 1 * time.Minute}
}

type ocAlertGetter struct {
promQL *PromQL

mutex sync.Mutex
cached prometheusv1.AlertsResult
expiration time.Duration
lastRefresh time.Time
}

func (o *ocAlertGetter) Get(ctx context.Context) (prometheusv1.AlertsResult, error) {
if time.Now().After(o.lastRefresh.Add(o.expiration)) {
if err := o.refresh(ctx); err != nil {
klog.Errorf("Failed to refresh alerts, using stale cache instead: %v", err)
}
}
return o.cached, nil
}

func (o *ocAlertGetter) refresh(ctx context.Context) error {
o.mutex.Lock()
defer o.mutex.Unlock()

klog.Info("refresh alerts ...")
p := o.promQL
host, err := p.Host(ctx)
if err != nil {
return fmt.Errorf("failure determine thanos IP: %w", err)
}
p.url.Host = host
clientConfig := api.Config{Address: p.url.String()}

if roundTripper, err := config.NewRoundTripperFromConfig(p.HTTPClientConfig, "cluster-conditions"); err == nil {
clientConfig.RoundTripper = roundTripper
} else {
return fmt.Errorf("creating PromQL round-tripper: %w", err)
}

promqlClient, err := api.NewClient(clientConfig)
if err != nil {
return fmt.Errorf("creating PromQL client: %w", err)
}

client := &statusCodeNotImplementedForPostClient{
client: promqlClient,
}

v1api := prometheusv1.NewAPI(client)

queryContext := ctx
if p.QueryTimeout > 0 {
var cancel context.CancelFunc
queryContext, cancel = context.WithTimeout(ctx, p.QueryTimeout)
defer cancel()
}

r, err := v1api.Alerts(queryContext)
if err != nil {
return fmt.Errorf("failed to get alerts: %w", err)
}
o.cached = r
o.lastRefresh = time.Now()
klog.Infof("refreshed: %d alerts", len(o.cached.Alerts))
return nil
}
Loading