@@ -11,6 +11,7 @@ import (
1111
1212 batchv1 "k8s.io/api/batch/v1"
1313 corev1 "k8s.io/api/core/v1"
14+ apierrors "k8s.io/apimachinery/pkg/api/errors"
1415 "k8s.io/apimachinery/pkg/api/meta"
1516 "k8s.io/apimachinery/pkg/api/resource"
1617 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
@@ -72,18 +73,69 @@ func verifyCatalogEndpoint(ctx SpecContext, catalog, endpoint, query string) {
7273
7374 By (fmt .Sprintf ("Creating curl Job to hit: %s" , serviceURL ))
7475
75- jobNamePrefix := fmt .Sprintf ("verify-%s-%s" ,
76+ jobNamePrefix := fmt .Sprintf ("verify-%s-%s-%s " ,
7677 strings .ReplaceAll (endpoint , "?" , "" ),
77- strings .ReplaceAll (catalog , "-" , "" ))
78+ strings .ReplaceAll (catalog , "-" , "" ),
79+ rand .String (5 ), // unique id per job to avoid race condition problems
80+ )
7881
79- job := buildCurlJob (jobNamePrefix , "default" , serviceURL )
80- err = k8sClient .Create (ctx , job )
81- Expect (err ).NotTo (HaveOccurred (), "failed to create Job" )
82+ // Create the ServiceAccount first
83+ serviceAccount := & corev1.ServiceAccount {
84+ ObjectMeta : metav1.ObjectMeta {
85+ Name : jobNamePrefix ,
86+ Namespace : "default" ,
87+ },
88+ }
8289
90+ // Create the Job
91+ job := buildCurlJob (jobNamePrefix , "default" , serviceURL , serviceAccount .Name )
92+
93+ // Use LIFO for DeferCleanup() to ensure the following order of deletion is followed.
94+
95+ // 2. Delete ServiceAccount (should happen second)
8396 DeferCleanup (func (ctx SpecContext ) {
84- _ = k8sClient .Delete (ctx , job )
97+ // We should also poll for service account deletion too.
98+ Eventually (func (ctx SpecContext ) error {
99+ err = k8sClient .Delete (ctx , serviceAccount )
100+ return client .IgnoreNotFound (err )
101+ }).WithTimeout (helpers .DefaultTimeout ).WithPolling (helpers .DefaultPolling ).Should (Succeed ())
102+ // Wait for the ServiceAccount to actually be deleted before continuing
103+ Eventually (func (g Gomega ) {
104+ err := k8sClient .Get (ctx , client .ObjectKeyFromObject (serviceAccount ), & corev1.ServiceAccount {})
105+ g .Expect (err ).To (WithTransform (apierrors .IsNotFound , BeTrue ()), "Expected ServiceAccount to be deleted" )
106+ }).WithTimeout (helpers .DefaultTimeout ).WithPolling (helpers .DefaultPolling ).Should (Succeed ())
85107 })
86108
109+ // 1. Delete Job (should happen first)
110+ DeferCleanup (func (ctx SpecContext ) {
111+ // Force delete job with zero grace period to ensure cleanup doesn't hang
112+ // Use Foreground propagation to ensure Pods are deleted before the Job is removed,
113+ // guaranteeing the ServiceAccount isn't deleted while Pods are still using it
114+ deletePolicy := metav1 .DeletePropagationForeground
115+ gracePeriod := int64 (0 )
116+ // Poll for service account deletion - in case we have race condtions
117+ // or a bad API call.
118+ Eventually (func (ctx SpecContext ) error {
119+ err := k8sClient .Delete (ctx , job , & client.DeleteOptions {
120+ GracePeriodSeconds : & gracePeriod ,
121+ PropagationPolicy : & deletePolicy ,
122+ })
123+ return client .IgnoreNotFound (err )
124+ }).WithContext (ctx ).WithTimeout (helpers .DefaultTimeout ).WithPolling (helpers .DefaultPolling ).Should (Succeed ())
125+ // While the delete call may be successful, we need to ensure the deletion itself has
126+ // occurred first before deleting the service account.
127+ Eventually (func (g Gomega ) {
128+ err := k8sClient .Get (ctx , client .ObjectKeyFromObject (job ), & batchv1.Job {})
129+ g .Expect (err ).To (WithTransform (apierrors .IsNotFound , BeTrue ()), "Expected a 'NotFound' error, but got: %v" , err )
130+ }).WithTimeout (helpers .DefaultTimeout ).WithPolling (helpers .DefaultPolling ).Should (Succeed ())
131+ })
132+
133+ err = k8sClient .Create (ctx , serviceAccount )
134+ Expect (err ).NotTo (HaveOccurred (), "failed to create ServiceAccount" )
135+
136+ err = k8sClient .Create (ctx , job )
137+ Expect (err ).NotTo (HaveOccurred (), "failed to create Job" )
138+
87139 By ("Waiting for Job to succeed" )
88140 Eventually (func (g Gomega ) {
89141 recheck := & batchv1.Job {}
@@ -94,7 +146,7 @@ func verifyCatalogEndpoint(ctx SpecContext, catalog, endpoint, query string) {
94146 return
95147 }
96148 if c .Type == batchv1 .JobFailed && c .Status == corev1 .ConditionTrue {
97- Fail (fmt .Sprintf ("Job failed: %s" , c .Message ))
149+ StopTrying (fmt .Sprintf ("Job failed: %s" , c .Message )). Now ( )
98150 }
99151 }
100152 }).WithTimeout (helpers .DefaultTimeout ).WithPolling (helpers .DefaultPolling ).Should (Succeed ())
@@ -203,7 +255,7 @@ var _ = Describe("[sig-olmv1][OCPFeatureGate:NewOLM][Skipped:Disconnected] OLMv1
203255 })
204256})
205257
206- func buildCurlJob (prefix , namespace , url string ) * batchv1.Job {
258+ func buildCurlJob (prefix , namespace , url , serviceAccountName string ) * batchv1.Job {
207259 backoff := int32 (1 )
208260 // This means the k8s garbage collector will automatically delete the job 5 minutes
209261 // after it has completed or failed.
@@ -232,7 +284,8 @@ func buildCurlJob(prefix, namespace, url string) *batchv1.Job {
232284 BackoffLimit : & backoff ,
233285 Template : corev1.PodTemplateSpec {
234286 Spec : corev1.PodSpec {
235- RestartPolicy : corev1 .RestartPolicyNever ,
287+ ServiceAccountName : serviceAccountName ,
288+ RestartPolicy : corev1 .RestartPolicyNever ,
236289 Containers : []corev1.Container {{
237290 Name : "api-tester" ,
238291 Image : "registry.redhat.io/rhel8/httpd-24:latest" ,
0 commit comments