Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -680,7 +680,8 @@ func TestClusterExtensionBoxcutterApplierFailsDoesNotLeakDeprecationErrors(t *te

cl, reconciler := newClientAndReconciler(t, func(d *deps) {
// Boxcutter keeps a rolling revision when apply fails. We mirror that state so the test uses
// the same inputs the runtime would see.
// the same inputs the runtime would see. The rolling-out revision has no ProgressDeadlineExceeded
// condition, so the controller reuses it without engaging the resolver.
d.RevisionStatesGetter = &MockRevisionStatesGetter{
RevisionStates: &controllers.RevisionStates{
RollingOut: []*controllers.RevisionMetadata{{}},
Expand Down Expand Up @@ -768,6 +769,172 @@ func TestClusterExtensionBoxcutterApplierFailsDoesNotLeakDeprecationErrors(t *te
require.NoError(t, cl.DeleteAllOf(ctx, &ocv1.ClusterExtension{}))
}

// TestClusterExtensionProgressDeadlineExceededReResolvesBundle verifies that when a
// rolling-out revision has exceeded its progression deadline, the controller re-resolves
// from the catalog. This is the primary mechanism for recovering from stuck rollouts.
//
// Scenario:
// - A revision for v1.0.2 is rolling out but has exceeded its progression deadline
// - The ClusterExtension spec requests v1.0.1 (admin changed spec to recover)
// - The controller detects ProgressDeadlineExceeded and re-resolves from catalog
// - The resolver returns v1.0.1, which is used for the new rollout
func TestClusterExtensionProgressDeadlineExceededReResolvesBundle(t *testing.T) {
require.NoError(t, features.OperatorControllerFeatureGate.Set(fmt.Sprintf("%s=true", features.BoxcutterRuntime)))
t.Cleanup(func() {
require.NoError(t, features.OperatorControllerFeatureGate.Set(fmt.Sprintf("%s=false", features.BoxcutterRuntime)))
})

resolverCalled := false
cl, reconciler := newClientAndReconciler(t, func(d *deps) {
d.RevisionStatesGetter = &MockRevisionStatesGetter{
RevisionStates: &controllers.RevisionStates{
Installed: &controllers.RevisionMetadata{
Package: "nginx88138",
Image: "quay.io/olmqe/nginxolm-operator-bundle:v1.0.0-nginxolm88138",
BundleMetadata: ocv1.BundleMetadata{
Name: "nginx88138.v1.0.0",
Version: "1.0.0",
},
},
RollingOut: []*controllers.RevisionMetadata{{
RevisionName: "extension-88138-2",
Package: "nginx88138",
Image: "quay.io/olmqe/nginxolm-operator-bundle:v1.0.2-nginx88138",
BundleMetadata: ocv1.BundleMetadata{
Name: "nginx88138.v1.0.2",
Version: "1.0.2",
},
Conditions: []metav1.Condition{{
Type: ocv1.ClusterExtensionRevisionTypeProgressing,
Status: metav1.ConditionFalse,
Reason: ocv1.ReasonProgressDeadlineExceeded,
}},
}},
},
}
d.Resolver = resolve.Func(func(ctx context.Context, ext *ocv1.ClusterExtension, installedBundle *ocv1.BundleMetadata) (*declcfg.Bundle, *bundle.VersionRelease, *declcfg.Deprecation, error) {
resolverCalled = true
v := bundle.VersionRelease{
Version: bsemver.MustParse("1.0.1"),
}
return &declcfg.Bundle{
Name: "nginx88138.v1.0.1",
Package: "nginx88138",
Image: "quay.io/olmqe/nginxolm-operator-bundle:v1.0.1-nginxolm88138",
}, &v, nil, nil
})
d.ImagePuller = &imageutil.MockPuller{ImageFS: fstest.MapFS{}}
d.Applier = &MockApplier{installCompleted: true}
})

ctx := context.Background()
extKey := types.NamespacedName{Name: fmt.Sprintf("cluster-extension-test-%s", rand.String(8))}

clusterExtension := &ocv1.ClusterExtension{
ObjectMeta: metav1.ObjectMeta{Name: extKey.Name},
Spec: ocv1.ClusterExtensionSpec{
Source: ocv1.SourceConfig{
SourceType: "Catalog",
Catalog: &ocv1.CatalogFilter{
PackageName: "nginx88138",
Version: "1.0.1",
},
},
Namespace: "default",
ServiceAccount: ocv1.ServiceAccountReference{
Name: "default",
},
},
}
require.NoError(t, cl.Create(ctx, clusterExtension))

res, err := reconciler.Reconcile(ctx, ctrl.Request{NamespacedName: extKey})
require.Equal(t, ctrl.Result{}, res)
require.NoError(t, err)

require.True(t, resolverCalled,
"resolver should be called because the rolling out revision exceeded its progression deadline")

require.NoError(t, cl.Get(ctx, extKey, clusterExtension))

installedCond := apimeta.FindStatusCondition(clusterExtension.Status.Conditions, ocv1.TypeInstalled)
require.NotNil(t, installedCond)
require.Equal(t, metav1.ConditionTrue, installedCond.Status)
require.Contains(t, installedCond.Message, "1.0.1")

require.NoError(t, cl.DeleteAllOf(ctx, &ocv1.ClusterExtension{}))
}

// TestClusterExtensionRollingOutNoDeadlineExceededReusesRevision verifies that when a
// revision is actively rolling out and has NOT exceeded its progression deadline,
// the controller reuses the existing revision without re-resolving from the catalog,
// regardless of whether the spec has changed.
func TestClusterExtensionRollingOutNoDeadlineExceededReusesRevision(t *testing.T) {
require.NoError(t, features.OperatorControllerFeatureGate.Set(fmt.Sprintf("%s=true", features.BoxcutterRuntime)))
t.Cleanup(func() {
require.NoError(t, features.OperatorControllerFeatureGate.Set(fmt.Sprintf("%s=false", features.BoxcutterRuntime)))
})

resolverCalled := false
cl, reconciler := newClientAndReconciler(t, func(d *deps) {
d.RevisionStatesGetter = &MockRevisionStatesGetter{
RevisionStates: &controllers.RevisionStates{
RollingOut: []*controllers.RevisionMetadata{{
Package: "nginx88138",
Image: "quay.io/olmqe/nginxolm-operator-bundle:v1.0.2-nginx88138",
BundleMetadata: ocv1.BundleMetadata{
Name: "nginx88138.v1.0.2",
Version: "1.0.2",
},
}},
},
}
d.Resolver = resolve.Func(func(ctx context.Context, ext *ocv1.ClusterExtension, installedBundle *ocv1.BundleMetadata) (*declcfg.Bundle, *bundle.VersionRelease, *declcfg.Deprecation, error) {
resolverCalled = true
v := bundle.VersionRelease{
Version: bsemver.MustParse("1.0.1"),
}
return &declcfg.Bundle{
Name: "nginx88138.v1.0.1",
Package: "nginx88138",
Image: "quay.io/olmqe/nginxolm-operator-bundle:v1.0.1-nginxolm88138",
}, &v, nil, nil
})
d.ImagePuller = &imageutil.MockPuller{ImageFS: fstest.MapFS{}}
d.Applier = &MockApplier{installCompleted: true}
})

ctx := context.Background()
extKey := types.NamespacedName{Name: fmt.Sprintf("cluster-extension-test-%s", rand.String(8))}

clusterExtension := &ocv1.ClusterExtension{
ObjectMeta: metav1.ObjectMeta{Name: extKey.Name},
Spec: ocv1.ClusterExtensionSpec{
Source: ocv1.SourceConfig{
SourceType: "Catalog",
Catalog: &ocv1.CatalogFilter{
PackageName: "nginx88138",
Version: "1.0.1",
},
},
Namespace: "default",
ServiceAccount: ocv1.ServiceAccountReference{
Name: "default",
},
},
}
require.NoError(t, cl.Create(ctx, clusterExtension))

_, err := reconciler.Reconcile(ctx, ctrl.Request{NamespacedName: extKey})
require.NoError(t, err)

require.False(t, resolverCalled,
"resolver should NOT be called: rollout is active and progression deadline has not been exceeded, "+
"even though the spec (v1.0.1) differs from the rolling out revision (v1.0.2)")

require.NoError(t, cl.DeleteAllOf(ctx, &ocv1.ClusterExtension{}))
}

func TestValidateClusterExtension(t *testing.T) {
tests := []struct {
name string
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -140,15 +140,34 @@ func ResolveBundle(r resolve.Resolver, c client.Client) ReconcileStepFunc {
return func(ctx context.Context, state *reconcileState, ext *ocv1.ClusterExtension) (*ctrl.Result, error) {
l := log.FromContext(ctx)

// If already rolling out, use existing revision and set deprecation to Unknown (no catalog check)
// When revisions are actively rolling out, the decision to re-engage the resolver
// is driven by the progression deadline — not by spec or catalog changes.
//
// If the latest rolling-out revision has NOT exceeded its progression deadline,
// we continue with the current rollout. Any spec changes the user made will be
// picked up once the rollout completes or the progression deadline is exceeded.
//
// If the latest rolling-out revision HAS exceeded its progression deadline,
// we re-resolve from the catalog. At that point the resolver will use the
// current spec (which may have been updated by the admin) and the installed
// bundle metadata to find the next appropriate version.
if len(state.revisionStates.RollingOut) > 0 {
installedBundleName := ""
if state.revisionStates.Installed != nil {
installedBundleName = state.revisionStates.Installed.Name
latestRollingOut := state.revisionStates.RollingOut[len(state.revisionStates.RollingOut)-1]

if !hasProgressDeadlineExceeded(latestRollingOut) {
installedBundleName := ""
if state.revisionStates.Installed != nil {
installedBundleName = state.revisionStates.Installed.Name
}
SetDeprecationStatus(ext, installedBundleName, nil, false)
state.resolvedRevisionMetadata = latestRollingOut
return nil, nil
}
SetDeprecationStatus(ext, installedBundleName, nil, false)
state.resolvedRevisionMetadata = state.revisionStates.RollingOut[0]
return nil, nil

l.Info("progression deadline exceeded for rolling out revision, re-resolving bundle",
"revision", latestRollingOut.RevisionName,
"version", latestRollingOut.Version,
)
}

// Resolve a new bundle from the catalog
Expand Down Expand Up @@ -199,6 +218,14 @@ func ResolveBundle(r resolve.Resolver, c client.Client) ReconcileStepFunc {
}
}

// hasProgressDeadlineExceeded checks whether a rolling-out revision has exceeded
// its progression deadline. This is the primary trigger for re-engaging the resolver
// during an active rollout.
func hasProgressDeadlineExceeded(rm *RevisionMetadata) bool {
cnd := apimeta.FindStatusCondition(rm.Conditions, ocv1.ClusterExtensionRevisionTypeProgressing)
return cnd != nil && cnd.Status == metav1.ConditionFalse && cnd.Reason == ocv1.ReasonProgressDeadlineExceeded
}

// handleResolutionError handles the case when bundle resolution fails.
//
// Decision logic (evaluated in order):
Expand Down
33 changes: 33 additions & 0 deletions test/e2e/features/update.feature
Original file line number Diff line number Diff line change
Expand Up @@ -243,3 +243,36 @@ Feature: Update ClusterExtension
And ClusterExtensionRevision "${NAME}-2" reports Progressing as True with Reason RollingOut
And ClusterExtensionRevision "${NAME}-2" reports Available as False with Reason ProbeFailure

@BoxcutterRuntime
@ProgressDeadline
Scenario: Progression deadline triggers re-resolution when rollout is stuck
Given min value for ClusterExtension .spec.progressDeadlineMinutes is set to 1
And min value for ClusterExtensionRevision .spec.progressDeadlineMinutes is set to 1
And ClusterExtension is applied
"""
apiVersion: olm.operatorframework.io/v1
kind: ClusterExtension
metadata:
name: ${NAME}
spec:
namespace: ${TEST_NAMESPACE}
serviceAccount:
name: olm-sa
source:
sourceType: Catalog
catalog:
packageName: test
selector:
matchLabels:
"olm.operatorframework.io/metadata.name": test-catalog
version: 1.0.2
upgradeConstraintPolicy: SelfCertified
progressDeadlineMinutes: 1
"""
And ClusterExtensionRevision "${NAME}-1" reports Available as False with Reason ProbeFailure
When ClusterExtension is updated to version "1.0.1"
And ClusterExtensionRevision "${NAME}-1" reports Progressing as False with Reason ProgressDeadlineExceeded
Then ClusterExtension is rolled out
And ClusterExtension is available
And bundle "test-operator.1.0.1" is installed in version "1.0.1"

Loading