Skip to content

Commit 5b8024b

Browse files
chore(ci): Add test to check if user changes are preserved
Add a test to ensure that OLM is not reverting user changes like kubectl rollout restart. Assisted-by: Cursor/Claude
1 parent 147cfa8 commit 5b8024b

2 files changed

Lines changed: 245 additions & 0 deletions

File tree

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
@BoxcutterRuntime
2+
Feature: Rollout Restart User Changes
3+
# Verifies that user-added pod template annotations persist after OLM reconciliation.
4+
# Fixes: https://github.com/operator-framework/operator-lifecycle-manager/issues/3392
5+
6+
Background:
7+
Given OLM is available
8+
And ClusterCatalog "test" serves bundles
9+
And ServiceAccount "olm-sa" with needed permissions is available in ${TEST_NAMESPACE}
10+
11+
Scenario: User-initiated deployment changes persist after OLM reconciliation
12+
When ClusterExtension is applied
13+
"""
14+
apiVersion: olm.operatorframework.io/v1
15+
kind: ClusterExtension
16+
metadata:
17+
name: ${NAME}
18+
spec:
19+
namespace: ${TEST_NAMESPACE}
20+
serviceAccount:
21+
name: olm-sa
22+
source:
23+
sourceType: Catalog
24+
catalog:
25+
packageName: test
26+
selector:
27+
matchLabels:
28+
"olm.operatorframework.io/metadata.name": test-catalog
29+
"""
30+
Then ClusterExtension is rolled out
31+
And ClusterExtension is available
32+
And resource "deployment/test-operator" is available
33+
When user performs rollout restart on "deployment/test-operator"
34+
Then deployment "test-operator" has restart annotation
35+
And deployment "test-operator" rollout is complete
36+
And deployment "test-operator" has 2 replica sets
37+
When ClusterExtension reconciliation is triggered
38+
And ClusterExtension has been reconciled the latest generation
39+
Then deployment "test-operator" has restart annotation
40+
And deployment "test-operator" rollout is complete

test/e2e/steps/steps.go

Lines changed: 205 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,12 @@ func RegisterSteps(sc *godog.ScenarioContext) {
9090
sc.Step(`^(?i)resource apply fails with error msg containing "([^"]+)"$`, ResourceApplyFails)
9191
sc.Step(`^(?i)resource "([^"]+)" is eventually restored$`, ResourceRestored)
9292
sc.Step(`^(?i)resource "([^"]+)" matches$`, ResourceMatches)
93+
sc.Step(`^(?i)user performs rollout restart on "([^"]+)"$`, UserPerformsRolloutRestart)
94+
sc.Step(`^(?i)deployment "([^"]+)" has restart annotation$`, DeploymentHasRestartAnnotation)
95+
sc.Step(`^(?i)deployment "([^"]+)" rollout is progressing$`, DeploymentRolloutIsProgressing)
96+
sc.Step(`^(?i)deployment "([^"]+)" rollout is complete$`, DeploymentRolloutIsComplete)
97+
sc.Step(`^(?i)deployment "([^"]+)" has (\d+) replica sets?$`, DeploymentHasReplicaSets)
98+
sc.Step(`^(?i)ClusterExtension reconciliation is triggered$`, TriggerClusterExtensionReconciliation)
9399

94100
sc.Step(`^(?i)ServiceAccount "([^"]*)" with needed permissions is available in test namespace$`, ServiceAccountWithNeededPermissionsIsAvailableInNamespace)
95101
sc.Step(`^(?i)ServiceAccount "([^"]*)" with needed permissions is available in \${TEST_NAMESPACE}$`, ServiceAccountWithNeededPermissionsIsAvailableInNamespace)
@@ -1309,3 +1315,202 @@ func latestActiveRevisionForExtension(extName string) (*ocv1.ClusterExtensionRev
13091315

13101316
return latest, nil
13111317
}
1318+
1319+
// UserPerformsRolloutRestart simulates a user running "kubectl rollout restart deployment/<name>".
1320+
// This adds a restart annotation to trigger a rolling restart of pods.
1321+
// This is used to test the generic fix - OLM should not undo ANY user-added annotations.
1322+
// In OLMv0, OLM would undo this change. In OLMv1, it should stay because kubectl owns it.
1323+
// See: https://github.com/operator-framework/operator-lifecycle-manager/issues/3392
1324+
func UserPerformsRolloutRestart(ctx context.Context, resourceName string) error {
1325+
sc := scenarioCtx(ctx)
1326+
resourceName = substituteScenarioVars(resourceName, sc)
1327+
1328+
kind, deploymentName, ok := strings.Cut(resourceName, "/")
1329+
if !ok {
1330+
return fmt.Errorf("invalid resource name format: %q (expected kind/name)", resourceName)
1331+
}
1332+
1333+
if kind != "deployment" {
1334+
return fmt.Errorf("only deployment resources are supported for restart annotation, got: %q", kind)
1335+
}
1336+
1337+
// Run kubectl rollout restart to add the restart annotation.
1338+
// This is the real command users run, so we test actual user behavior.
1339+
out, err := k8sClient("rollout", "restart", resourceName, "-n", sc.namespace)
1340+
if err != nil {
1341+
return fmt.Errorf("failed to rollout restart %s: %w; stderr: %s", resourceName, err, stderrOutput(err))
1342+
}
1343+
1344+
logger.V(1).Info("Rollout restart initiated", "deployment", deploymentName, "output", out)
1345+
1346+
return nil
1347+
}
1348+
1349+
// DeploymentHasRestartAnnotation waits for the deployment's pod template to have
1350+
// the kubectl.kubernetes.io/restartedAt annotation. Uses JSON parsing to avoid
1351+
// JSONPath issues with dots in annotation keys. Polls with timeout.
1352+
func DeploymentHasRestartAnnotation(ctx context.Context, deploymentName string) error {
1353+
sc := scenarioCtx(ctx)
1354+
deploymentName = substituteScenarioVars(deploymentName, sc)
1355+
1356+
restartAnnotationKey := "kubectl.kubernetes.io/restartedAt"
1357+
waitFor(ctx, func() bool {
1358+
out, err := k8sClient("get", "deployment", deploymentName, "-n", sc.namespace, "-o", "json")
1359+
if err != nil {
1360+
return false
1361+
}
1362+
var d appsv1.Deployment
1363+
if err := json.Unmarshal([]byte(out), &d); err != nil {
1364+
return false
1365+
}
1366+
if v, found := d.Spec.Template.Annotations[restartAnnotationKey]; found {
1367+
logger.V(1).Info("Restart annotation found", "deployment", deploymentName, "restartedAt", v)
1368+
return true
1369+
}
1370+
logger.V(1).Info("Restart annotation not yet present", "deployment", deploymentName, "annotations", d.Spec.Template.Annotations)
1371+
return false
1372+
})
1373+
return nil
1374+
}
1375+
1376+
// TriggerClusterExtensionReconciliation patches the ClusterExtension spec to bump
1377+
// its metadata generation, forcing the controller to run a full reconciliation loop.
1378+
// Use with "ClusterExtension has been reconciled the latest generation" to confirm
1379+
// the controller processed the change before asserting on the cluster state.
1380+
//
1381+
// We set install.preflight.crdUpgradeSafety.enforcement to "None" because it is
1382+
// a real spec field that the API server will persist (unlike unknown fields, which
1383+
// are pruned by structural schemas). Any persisted spec change bumps
1384+
// .metadata.generation, giving us a reliable synchronization signal.
1385+
func TriggerClusterExtensionReconciliation(ctx context.Context) error {
1386+
sc := scenarioCtx(ctx)
1387+
payload := `{"spec":{"install":{"preflight":{"crdUpgradeSafety":{"enforcement":"None"}}}}}`
1388+
_, err := k8sClient("patch", "clusterextension", sc.clusterExtensionName,
1389+
"--type=merge",
1390+
"-p", payload)
1391+
if err != nil {
1392+
return fmt.Errorf("failed to trigger reconciliation for ClusterExtension %s: %w; stderr: %s", sc.clusterExtensionName, err, stderrOutput(err))
1393+
}
1394+
return nil
1395+
}
1396+
1397+
// DeploymentRolloutIsProgressing verifies that a deployment rollout is in progress.
1398+
// This checks that a new ReplicaSet has been created after the rollout restart.
1399+
func DeploymentRolloutIsProgressing(ctx context.Context, deploymentName string) error {
1400+
sc := scenarioCtx(ctx)
1401+
deploymentName = substituteScenarioVars(deploymentName, sc)
1402+
1403+
waitFor(ctx, func() bool {
1404+
out, err := k8sClient("rollout", "status", "deployment/"+deploymentName, "-n", sc.namespace, "--watch=false")
1405+
if err != nil {
1406+
return false
1407+
}
1408+
// Check if rollout is in progress (not complete yet)
1409+
if strings.Contains(out, "Waiting") || strings.Contains(out, "has been updated") {
1410+
logger.V(1).Info("Rollout is progressing", "deployment", deploymentName, "status", out)
1411+
return true
1412+
}
1413+
return false
1414+
})
1415+
return nil
1416+
}
1417+
1418+
// DeploymentRolloutIsComplete verifies that a deployment rollout has completed successfully.
1419+
// This ensures the new ReplicaSet is fully scaled up and the old one is scaled down.
1420+
func DeploymentRolloutIsComplete(ctx context.Context, deploymentName string) error {
1421+
sc := scenarioCtx(ctx)
1422+
deploymentName = substituteScenarioVars(deploymentName, sc)
1423+
1424+
waitFor(ctx, func() bool {
1425+
out, err := k8sClient("rollout", "status", "deployment/"+deploymentName, "-n", sc.namespace, "--watch=false")
1426+
if err != nil {
1427+
logger.V(1).Info("Failed to get rollout status", "deployment", deploymentName, "error", err)
1428+
return false
1429+
}
1430+
// Successful rollout shows "successfully rolled out"
1431+
if strings.Contains(out, "successfully rolled out") {
1432+
logger.V(1).Info("Rollout completed successfully", "deployment", deploymentName)
1433+
return true
1434+
}
1435+
logger.V(1).Info("Rollout not yet complete", "deployment", deploymentName, "status", out)
1436+
return false
1437+
})
1438+
return nil
1439+
}
1440+
1441+
// DeploymentHasReplicaSets verifies that a deployment has the expected number of ReplicaSets
1442+
// and that the latest one is active with pods running.
1443+
func DeploymentHasReplicaSets(ctx context.Context, deploymentName string, expectedCountStr string) error {
1444+
sc := scenarioCtx(ctx)
1445+
deploymentName = substituteScenarioVars(deploymentName, sc)
1446+
1447+
expectedCount := 2 // Default to 2 (original + restarted)
1448+
if n, err := fmt.Sscanf(expectedCountStr, "%d", &expectedCount); err != nil || n != 1 {
1449+
logger.V(1).Info("Failed to parse expected count, using default", "input", expectedCountStr, "default", 2)
1450+
expectedCount = 2
1451+
}
1452+
1453+
waitFor(ctx, func() bool {
1454+
// First, get the deployment to find its selector labels
1455+
deploymentOut, err := k8sClient("get", "deployment", deploymentName, "-n", sc.namespace, "-o", "json")
1456+
if err != nil {
1457+
logger.V(1).Info("Failed to get deployment", "deployment", deploymentName, "error", err)
1458+
return false
1459+
}
1460+
1461+
var deployment appsv1.Deployment
1462+
if err := json.Unmarshal([]byte(deploymentOut), &deployment); err != nil {
1463+
logger.V(1).Info("Failed to parse deployment", "error", err)
1464+
return false
1465+
}
1466+
1467+
// Get all ReplicaSets owned by this deployment using ownerReferences
1468+
out, err := k8sClient("get", "rs", "-n", sc.namespace, "-o", "json")
1469+
if err != nil {
1470+
logger.V(1).Info("Failed to get ReplicaSets", "deployment", deploymentName, "error", err)
1471+
return false
1472+
}
1473+
1474+
var allRsList struct {
1475+
Items []appsv1.ReplicaSet `json:"items"`
1476+
}
1477+
if err := json.Unmarshal([]byte(out), &allRsList); err != nil {
1478+
logger.V(1).Info("Failed to parse ReplicaSets", "error", err)
1479+
return false
1480+
}
1481+
1482+
// Filter ReplicaSets owned by this deployment
1483+
var rsList []appsv1.ReplicaSet
1484+
for _, rs := range allRsList.Items {
1485+
for _, owner := range rs.OwnerReferences {
1486+
if owner.Kind == "Deployment" && owner.Name == deploymentName {
1487+
rsList = append(rsList, rs)
1488+
break
1489+
}
1490+
}
1491+
}
1492+
1493+
if len(rsList) < expectedCount {
1494+
logger.V(1).Info("Not enough ReplicaSets yet", "deployment", deploymentName, "current", len(rsList), "expected", expectedCount)
1495+
return false
1496+
}
1497+
1498+
// Verify at least one ReplicaSet has active replicas
1499+
hasActiveRS := false
1500+
for _, rs := range rsList {
1501+
if rs.Status.Replicas > 0 && rs.Status.ReadyReplicas > 0 {
1502+
hasActiveRS = true
1503+
logger.V(1).Info("Found active ReplicaSet", "name", rs.Name, "replicas", rs.Status.Replicas, "ready", rs.Status.ReadyReplicas)
1504+
}
1505+
}
1506+
1507+
if !hasActiveRS {
1508+
logger.V(1).Info("No active ReplicaSet found yet", "deployment", deploymentName)
1509+
return false
1510+
}
1511+
1512+
logger.V(1).Info("ReplicaSet verification passed", "deployment", deploymentName, "count", len(rsList))
1513+
return true
1514+
})
1515+
return nil
1516+
}

0 commit comments

Comments
 (0)