Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -61,10 +61,10 @@ require (
github.com/opencontainers/go-digest v1.0.0
github.com/openshift-eng/openshift-tests-extension v0.0.0-20251218142942-7ecc8801b9df
github.com/openshift-kni/commatrix v0.0.5-0.20251111204857-e5a931eff73f
github.com/openshift/api v0.0.0-20251015095338-264e80a2b6e7
github.com/openshift/api v0.0.0-20260127135951-36c258ad56e8
github.com/openshift/apiserver-library-go v0.0.0-20251015164739-79d04067059d
github.com/openshift/build-machinery-go v0.0.0-20250530140348-dc5b2804eeee
github.com/openshift/client-go v0.0.0-20251015124057-db0dee36e235
github.com/openshift/client-go v0.0.0-20260108185524-48f4ccfc4e13
github.com/openshift/library-go v0.0.0-20251015151611-6fc7a74b67c5
github.com/operator-framework/api v0.36.0
github.com/ovn-org/ovn-kubernetes/go-controller v0.0.0-20250118001652-a8b9c3c31417
Expand Down
8 changes: 4 additions & 4 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -828,14 +828,14 @@ github.com/openshift-eng/openshift-tests-extension v0.0.0-20251218142942-7ecc880
github.com/openshift-eng/openshift-tests-extension v0.0.0-20251218142942-7ecc8801b9df/go.mod h1:6gkP5f2HL0meusT0Aim8icAspcD1cG055xxBZ9yC68M=
github.com/openshift-kni/commatrix v0.0.5-0.20251111204857-e5a931eff73f h1:E72Zoc+JImPehBrXkgaCbIDbSFuItvyX6RCaZ0FQE5k=
github.com/openshift-kni/commatrix v0.0.5-0.20251111204857-e5a931eff73f/go.mod h1:cDVdp0eda7EHE6tLuSeo4IqPWdAX/KJK+ogBirIGtsI=
github.com/openshift/api v0.0.0-20251015095338-264e80a2b6e7 h1:Ot2fbEEPmF3WlPQkyEW/bUCV38GMugH/UmZvxpWceNc=
github.com/openshift/api v0.0.0-20251015095338-264e80a2b6e7/go.mod h1:d5uzF0YN2nQQFA0jIEWzzOZ+edmo6wzlGLvx5Fhz4uY=
github.com/openshift/api v0.0.0-20260127135951-36c258ad56e8 h1:h43iARQcqIso/0VkfweqpGDkUtPW7OUcEEKkzGzIrlc=
github.com/openshift/api v0.0.0-20260127135951-36c258ad56e8/go.mod h1:d5uzF0YN2nQQFA0jIEWzzOZ+edmo6wzlGLvx5Fhz4uY=
github.com/openshift/apiserver-library-go v0.0.0-20251015164739-79d04067059d h1:Mfya3RxHWvidOrKyHj3bmFn5x2B89DLZIvDAhwm+C2s=
github.com/openshift/apiserver-library-go v0.0.0-20251015164739-79d04067059d/go.mod h1:zm2/rIUp0p83pz0/1kkSoKTqhTr3uUKSKQ9fP7Z3g7Y=
github.com/openshift/build-machinery-go v0.0.0-20250530140348-dc5b2804eeee h1:+Sp5GGnjHDhT/a/nQ1xdp43UscBMr7G5wxsYotyhzJ4=
github.com/openshift/build-machinery-go v0.0.0-20250530140348-dc5b2804eeee/go.mod h1:8jcm8UPtg2mCAsxfqKil1xrmRMI3a+XU2TZ9fF8A7TE=
github.com/openshift/client-go v0.0.0-20251015124057-db0dee36e235 h1:9JBeIXmnHlpXTQPi7LPmu1jdxznBhAE7bb1K+3D8gxY=
github.com/openshift/client-go v0.0.0-20251015124057-db0dee36e235/go.mod h1:L49W6pfrZkfOE5iC1PqEkuLkXG4W0BX4w8b+L2Bv7fM=
github.com/openshift/client-go v0.0.0-20260108185524-48f4ccfc4e13 h1:6rd4zSo2UaWQcAPZfHK9yzKVqH0BnMv1hqMzqXZyTds=
github.com/openshift/client-go v0.0.0-20260108185524-48f4ccfc4e13/go.mod h1:YvOmPmV7wcJxpfhTDuFqqs2Xpb3M3ovsM6Qs/i2ptq4=
github.com/openshift/kubernetes v1.30.1-0.20251017123720-96593f323733 h1:Mpab1CmJPLVWGB0CNGoWnup/NScvv55MVPe94c8JgUk=
github.com/openshift/kubernetes v1.30.1-0.20251017123720-96593f323733/go.mod h1:w3+IfrXNp5RosdDXg3LB55yijJqR/FwouvVntYHQf0o=
github.com/openshift/kubernetes/staging/src/k8s.io/api v0.0.0-20251017123720-96593f323733 h1:42lm41QwjG8JoSicx4FHcuIG2kxHxlUnz6c+ftg2e0E=
Expand Down
328 changes: 193 additions & 135 deletions test/extended/two_node/tnf_node_replacement.go

Large diffs are not rendered by default.

56 changes: 56 additions & 0 deletions test/extended/two_node/two_node.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
package two_node

import (
"fmt"
"sort"
"strings"

g "github.com/onsi/ginkgo/v2"
o "github.com/onsi/gomega"

"github.com/openshift/origin/test/extended/two_node/utils"
)

// ReportAfterSuite validates that no tests were skipped due to unmet preconditions.
// This runs after all tests complete and after AfterSuite/SynchronizedAfterSuite.
//
// The two-node test suite requires a fully healthy cluster to run disruptive recovery tests.
// Individual tests skip (not fail) when preconditions aren't met to maintain test stability,
// but this reporting hook ensures the overall suite fails with diagnostic information about
// which tests were skipped and why, making precondition failures visible to CI analysis services.
//
// This validation only triggers if SkipIfClusterIsNotHealthy() was called and recorded skips,
// which means it automatically scopes to runs that included two-node tests with health issues.
var _ = g.ReportAfterSuite("Two Node Suite Precondition Validation", func(report g.Report) {
skips := utils.GetPreconditionSkips()

if len(skips) == 0 {
// No tests were skipped due to precondition failures.
// Either no two-node tests ran, or the cluster was healthy.
return
}

// Build detailed failure message
var testNames []string
for testName := range skips {
testNames = append(testNames, testName)
}
sort.Strings(testNames)

var messages []string
messages = append(messages, fmt.Sprintf("\n\n%d test(s) were skipped due to unmet cluster preconditions:", len(skips)))
messages = append(messages, "This indicates the cluster was not in a healthy state when tests attempted to run.")
messages = append(messages, "\nSkipped tests:")

for _, testName := range testNames {
reason := skips[testName]
messages = append(messages, fmt.Sprintf("\n • %s", testName))
messages = append(messages, fmt.Sprintf(" Reason: %s", reason))
}

messages = append(messages, "\n\nThe two-node test suite requires a fully healthy cluster.")
messages = append(messages, "Please investigate and resolve the cluster health issues before running this suite.")

failureMessage := strings.Join(messages, "\n")
o.Expect(skips).To(o.BeEmpty(), failureMessage)
})
20 changes: 10 additions & 10 deletions test/extended/two_node/utils/apis/csr.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ import (
corev1 "k8s.io/api/core/v1"
v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/klog/v2"
e2e "k8s.io/kubernetes/test/e2e/framework"
)

// ApproveCSRs monitors and approves pending CSRs until timeout or expected count reached.
Expand All @@ -20,12 +20,12 @@ func ApproveCSRs(oc *exutil.CLI, timeout time.Duration, pollInterval time.Durati
startTime := time.Now()
approvedCount := 0

klog.V(2).Infof("Starting CSR approval monitoring for %v", timeout)
e2e.Logf("Starting CSR approval monitoring for %v", timeout)

wait.PollUntilContextTimeout(context.Background(), pollInterval, timeout, true, func(ctx context.Context) (done bool, err error) {
csrList, err := oc.AdminKubeClient().CertificatesV1().CertificateSigningRequests().List(ctx, v1.ListOptions{})
if err != nil {
klog.V(4).Infof("Failed to get CSRs: %v", err)
e2e.Logf("Failed to get CSRs: %v", err)
return false, nil
}
pendingCSRs := []string{}
Expand All @@ -36,12 +36,12 @@ func ApproveCSRs(oc *exutil.CLI, timeout time.Duration, pollInterval time.Durati
}

for _, csrName := range pendingCSRs {
klog.V(2).Infof("Approving CSR: %s", csrName)
e2e.Logf("Approving CSR: %s", csrName)

// Get the CSR
csr, err := oc.AdminKubeClient().CertificatesV1().CertificateSigningRequests().Get(ctx, csrName, v1.GetOptions{})
if err != nil {
klog.V(4).Infof("Failed to get CSR %s: %v", csrName, err)
e2e.Logf("Failed to get CSR %s: %v", csrName, err)
continue
}

Expand All @@ -58,25 +58,25 @@ func ApproveCSRs(oc *exutil.CLI, timeout time.Duration, pollInterval time.Durati
_, err = oc.AdminKubeClient().CertificatesV1().CertificateSigningRequests().UpdateApproval(ctx, csrName, csr, v1.UpdateOptions{})
if err == nil {
approvedCount++
klog.V(2).Infof("Approved CSR %s (total approved: %d)", csrName, approvedCount)
e2e.Logf("Approved CSR %s (total approved: %d)", csrName, approvedCount)
} else {
klog.V(4).Infof("Failed to approve CSR %s: %v", csrName, err)
e2e.Logf("Failed to approve CSR %s: %v", csrName, err)
}
}

// Continue monitoring until timeout
if len(pendingCSRs) > 0 {
klog.V(4).Infof("Approved %d CSRs this iteration, continuing to monitor (elapsed: %v)", len(pendingCSRs), time.Since(startTime))
e2e.Logf("Approved %d CSRs this iteration, continuing to monitor (elapsed: %v)", len(pendingCSRs), time.Since(startTime))
}

// Check if we've reached the expected count
if expectedCSRCount > 0 && approvedCount >= expectedCSRCount {
klog.V(2).Infof("All %d expected CSRs approved! (elapsed: %v)", approvedCount, time.Since(startTime))
e2e.Logf("All %d expected CSRs approved! (elapsed: %v)", approvedCount, time.Since(startTime))
return true, nil
}
return false, nil
})

klog.V(2).Infof("CSR approval monitoring complete: approved %d CSRs in %v", approvedCount, time.Since(startTime))
e2e.Logf("CSR approval monitoring complete: approved %d CSRs in %v", approvedCount, time.Since(startTime))
return approvedCount
}
198 changes: 198 additions & 0 deletions test/extended/two_node/utils/apis/pacemaker.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,198 @@
package apis

import (
"context"
"fmt"
"time"

etcdv1alpha1 "github.com/openshift/api/etcd/v1alpha1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/klog/v2"

"github.com/openshift/origin/test/extended/two_node/utils/core"
exutil "github.com/openshift/origin/test/extended/util"
)

// PacemakerCluster GVR for dynamic client access
var pacemakerClusterGVR = schema.GroupVersionResource{
Group: "etcd.openshift.io",
Version: "v1alpha1",
Resource: "pacemakerclusters",
}

// pacemakerClusterName is the name of the singleton PacemakerCluster CR
const pacemakerClusterName = "cluster"

// GetPacemakerCluster retrieves the PacemakerCluster CR using dynamic client
func GetPacemakerCluster(oc *exutil.CLI) (*etcdv1alpha1.PacemakerCluster, error) {
obj, err := oc.AdminDynamicClient().Resource(pacemakerClusterGVR).Get(context.Background(), pacemakerClusterName, metav1.GetOptions{})
if err != nil {
return nil, fmt.Errorf("failed to get PacemakerCluster: %w", err)
}

pc := &etcdv1alpha1.PacemakerCluster{}
if err := runtime.DefaultUnstructuredConverter.FromUnstructured(obj.UnstructuredContent(), pc); err != nil {
return nil, fmt.Errorf("failed to convert PacemakerCluster: %w", err)
}
return pc, nil
}

// PacemakerClusterExists checks if the PacemakerCluster CR exists
func PacemakerClusterExists(oc *exutil.CLI) bool {
_, err := GetPacemakerCluster(oc)
return err == nil
}

// findCondition finds a condition by type in a list of conditions
func findCondition(conditions []metav1.Condition, conditionType string) *metav1.Condition {
for i := range conditions {
if conditions[i].Type == conditionType {
return &conditions[i]
}
}
return nil
}

// getNodeStatus finds a node's status by name
func getNodeStatus(pc *etcdv1alpha1.PacemakerCluster, nodeName string) *etcdv1alpha1.PacemakerClusterNodeStatus {
if pc.Status.Nodes == nil {
return nil
}
for i := range *pc.Status.Nodes {
if (*pc.Status.Nodes)[i].NodeName == nodeName {
return &(*pc.Status.Nodes)[i]
}
}
return nil
}

// getNodeCount returns the number of nodes in the PacemakerCluster
func getNodeCount(pc *etcdv1alpha1.PacemakerCluster) int {
if pc.Status.Nodes == nil {
return 0
}
return len(*pc.Status.Nodes)
}

// WaitForPacemakerClusterHealthy waits for the PacemakerCluster to become healthy
func WaitForPacemakerClusterHealthy(oc *exutil.CLI, timeout, pollInterval time.Duration) error {
klog.V(2).Infof("Waiting for PacemakerCluster to become healthy (timeout: %v)", timeout)

return core.PollUntil(func() (bool, error) {
pc, err := GetPacemakerCluster(oc)
if err != nil {
klog.V(4).Infof("Error getting PacemakerCluster: %v", err)
return false, nil
}

cond := findCondition(pc.Status.Conditions, etcdv1alpha1.ClusterHealthyConditionType)
if cond != nil && cond.Status == metav1.ConditionTrue {
klog.V(2).Infof("PacemakerCluster is healthy")
return true, nil
}

reason := ""
if cond != nil {
reason = cond.Reason
}
klog.V(4).Infof("PacemakerCluster not yet healthy, reason: %s", reason)
return false, nil
}, timeout, pollInterval, "PacemakerCluster to become healthy")
}

// WaitForNodeOffline waits for a specific node to be reported as offline
func WaitForNodeOffline(oc *exutil.CLI, nodeName string, timeout, pollInterval time.Duration) error {
klog.V(2).Infof("Waiting for node %s to be reported as offline in PacemakerCluster (timeout: %v)", nodeName, timeout)

return core.PollUntil(func() (bool, error) {
pc, err := GetPacemakerCluster(oc)
if err != nil {
klog.V(4).Infof("Error getting PacemakerCluster: %v", err)
return false, nil
}

nodeStatus := getNodeStatus(pc, nodeName)
if nodeStatus == nil {
klog.V(4).Infof("Node %s not found in PacemakerCluster status", nodeName)
return false, nil
}

cond := findCondition(nodeStatus.Conditions, etcdv1alpha1.NodeOnlineConditionType)
if cond == nil {
klog.V(4).Infof("Online condition not found for node %s", nodeName)
return false, nil
}

if cond.Status == metav1.ConditionFalse && cond.Reason == etcdv1alpha1.NodeOnlineReasonOffline {
klog.V(2).Infof("Node %s is now reported as offline", nodeName)
return true, nil
}

klog.V(4).Infof("Node %s Online condition: %s (reason: %s)", nodeName, cond.Status, cond.Reason)
return false, nil
}, timeout, pollInterval, fmt.Sprintf("node %s to be offline", nodeName))
}

// WaitForNodeCount waits for a specific number of nodes in the PacemakerCluster
func WaitForNodeCount(oc *exutil.CLI, expectedCount int, timeout, pollInterval time.Duration) error {
klog.V(2).Infof("Waiting for PacemakerCluster to have %d nodes (timeout: %v)", expectedCount, timeout)

return core.PollUntil(func() (bool, error) {
pc, err := GetPacemakerCluster(oc)
if err != nil {
klog.V(4).Infof("Error getting PacemakerCluster: %v", err)
return false, nil
}

actualCount := getNodeCount(pc)
if actualCount == expectedCount {
klog.V(2).Infof("PacemakerCluster has %d nodes", expectedCount)
return true, nil
}

klog.V(4).Infof("PacemakerCluster has %d nodes, waiting for %d", actualCount, expectedCount)
return false, nil
}, timeout, pollInterval, fmt.Sprintf("PacemakerCluster to have %d nodes", expectedCount))
}

// LogPacemakerClusterStatus logs the current PacemakerCluster status for debugging
func LogPacemakerClusterStatus(oc *exutil.CLI, context string) {
pc, err := GetPacemakerCluster(oc)
if err != nil {
klog.V(2).Infof("[%s] Failed to get PacemakerCluster status: %v", context, err)
return
}

healthyCond := findCondition(pc.Status.Conditions, etcdv1alpha1.ClusterHealthyConditionType)
nodeCountCond := findCondition(pc.Status.Conditions, etcdv1alpha1.ClusterNodeCountAsExpectedConditionType)

klog.V(2).Infof("[%s] PacemakerCluster status:", context)
klog.V(2).Infof(" LastUpdated: %s", pc.Status.LastUpdated.Format(time.RFC3339))
if healthyCond != nil {
klog.V(2).Infof(" Healthy: %s (reason: %s)", healthyCond.Status, healthyCond.Reason)
}
if nodeCountCond != nil {
klog.V(2).Infof(" NodeCountAsExpected: %s (reason: %s)", nodeCountCond.Status, nodeCountCond.Reason)
}
klog.V(2).Infof(" Node count: %d", getNodeCount(pc))

if pc.Status.Nodes != nil {
for _, node := range *pc.Status.Nodes {
healthyCond := findCondition(node.Conditions, etcdv1alpha1.NodeHealthyConditionType)
onlineCond := findCondition(node.Conditions, etcdv1alpha1.NodeOnlineConditionType)

healthyStr := "unknown"
if healthyCond != nil {
healthyStr = fmt.Sprintf("%s (%s)", healthyCond.Status, healthyCond.Reason)
}
onlineStr := "unknown"
if onlineCond != nil {
onlineStr = fmt.Sprintf("%s (%s)", onlineCond.Status, onlineCond.Reason)
}

klog.V(2).Infof(" Node %s: Healthy=%s, Online=%s", node.NodeName, healthyStr, onlineStr)
}
}
}
Loading