Skip to content

Commit 65f9467

Browse files
authored
feat: enhance Gateway API plugin with in-progress label management (#155)
* feat: enhance Gateway API plugin with in-progress label management - Added support for automatic labeling of Gateway API routes during canary deployments to prevent GitOps drift. - Updated documentation to reflect new features, including the ability to customize or disable the in-progress label. - Improved tests to verify the addition and removal of the in-progress label for HTTP, gRPC, TCP, and TLS routes. This change enhances the integration with GitOps tools like Argo CD, ensuring smoother deployments and better resource management. Signed-off-by: rick.stokkingreef <rick.stokkingreef@airalo.com> * ci Signed-off-by: rick.stokkingreef <rick.stokkingreef@airalo.com> * fix ignorediff doc section Signed-off-by: rick.stokkingreef <rick.stokkingreef@airalo.com> * chore: Add test Signed-off-by: rick.stokkingreef <rick.stokkingreef@airalo.com> * update versions of rollouts and traefik Signed-off-by: rick.stokkingreef <rick.stokkingreef@airalo.com> * cleanup Signed-off-by: rick.stokkingreef <rick.stokkingreef@airalo.com> * test Signed-off-by: rick.stokkingreef <rick.stokkingreef@airalo.com> * add wait to sanity check Signed-off-by: rick.stokkingreef <rick.stokkingreef@airalo.com> * chore: move label change into it's own test Signed-off-by: rick.stokkingreef <rick.stokkingreef@airalo.com> * Fix lint Signed-off-by: rick.stokkingreef <rick.stokkingreef@airalo.com> * cleanup Signed-off-by: rick.stokkingreef <rick.stokkingreef@airalo.com> * Enhance sanity check script to retry gateway class acceptance with a maximum of 5 attempts, improving reliability of e2e tests. Signed-off-by: rick.stokkingreef <rick.stokkingreef@airalo.com> --------- Signed-off-by: rick.stokkingreef <rick.stokkingreef@airalo.com>
1 parent 986c3af commit 65f9467

20 files changed

+639
-36
lines changed

Makefile

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,11 @@ E2E_CLUSTER_NAME=gatewayapi-plugin-e2e
44
IS_E2E_CLUSTER=$(shell kind get clusters | grep -e "^${E2E_CLUSTER_NAME}$$")
55

66
# Versions of components used in e2e tests
7-
GATEWAY_API_VERSION=v1.1.0
7+
GATEWAY_API_VERSION=v1.4.0
88
# See more versions at https://artifacthub.io/packages/helm/argo/argo-rollouts
9-
ARGO_ROLLOUTS_HELM_VERSION=2.37.2 # Contains Argo Rollouts 1.7.1
9+
ARGO_ROLLOUTS_HELM_VERSION=2.40.5 # Contains Argo Rollouts 1.8.3
1010
# See more versions at https://artifacthub.io/packages/helm/traefik/traefik
11-
TRAEFIK_HELM_VERSION=31.0.0 # Contains Traefik proxy v3.1.2
11+
TRAEFIK_HELM_VERSION=37.4.0 # Contains Traefik proxy v3.6.2
1212

1313

1414

@@ -21,9 +21,9 @@ define add_helm_repo
2121
endef
2222

2323
define setup_cluster
24-
kubectl apply -f https://github.com/kubernetes-sigs/gateway-api/releases/download/${GATEWAY_API_VERSION}/experimental-install.yaml
2524
helm install argo-rollouts argo/argo-rollouts --values ./test/cluster-setup/argo-rollouts-values.yml --version ${ARGO_ROLLOUTS_HELM_VERSION} --wait
2625
helm install traefik traefik/traefik --values ./test/cluster-setup/traefik-values.yml --version ${TRAEFIK_HELM_VERSION} --wait
26+
kubectl apply -f https://github.com/kubernetes-sigs/gateway-api/releases/download/${GATEWAY_API_VERSION}/experimental-install.yaml --server-side=true --force-conflicts
2727
endef
2828

2929
define install_k8s_resources
@@ -60,7 +60,7 @@ unit-tests:
6060
go test -v -count=1 ./pkg/...
6161

6262
.PHONY: setup-e2e-cluster
63-
setup-e2e-cluster:
63+
setup-e2e-cluster:
6464
make BIN_NAME=gatewayapi-plugin-linux-amd64 GOOS=linux GOARCH=amd64 gatewayapi-plugin-build
6565
ifeq (${IS_E2E_CLUSTER},)
6666
kind create cluster --name ${E2E_CLUSTER_NAME} --config ./test/cluster-setup/cluster-config.yml
@@ -79,20 +79,20 @@ endif
7979
sanity-check-e2e:
8080
./test/cluster-setup/sanity-check.sh
8181

82-
.PHONY: run-e2e-tests
82+
.PHONY: run-e2e-tests
8383
run-e2e-tests: sanity-check-e2e
8484
go test -v -timeout 5m -count=1 -run ${RUN} ./test/e2e/...
8585

8686
# Flaky tests usually fail with GitHub actions. You should be able to run them locally though.
8787
.PHONY: e2e-tests-flaky
88-
e2e-tests-flaky: setup-e2e-cluster run-e2e-tests-flaky
88+
e2e-tests-flaky: setup-e2e-cluster run-e2e-tests-flaky
8989
ifeq (${CLUSTER_DELETE},true)
9090
make clear-e2e-cluster
9191
endif
9292

93-
.PHONY: run-e2e-tests-flaky
93+
.PHONY: run-e2e-tests-flaky
9494
run-e2e-tests-flaky: sanity-check-e2e
95-
go test -tags "flaky" -v -timeout 5m -count=1 -run ${RUN} ./test/e2e/...
95+
go test -tags "flaky" -v -timeout 5m -count=1 -run ${RUN} ./test/e2e/...
9696

9797
.PHONY: clear-e2e-cluster
9898
clear-e2e-cluster:

RELEASE_NOTES.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,3 @@
11
* Added support for [TLSRoute](https://rollouts-plugin-trafficrouter-gatewayapi.readthedocs.io/en/latest/features/tls/).
2-
* You can now use [filters with Header based routing](https://github.com/argoproj-labs/rollouts-plugin-trafficrouter-gatewayapi/issues/87).
2+
* You can now use [filters with Header based routing](https://github.com/argoproj-labs/rollouts-plugin-trafficrouter-gatewayapi/issues/87).
3+
* Gateway API routes are labeled while a canary is running to avoid GitOps drift and the label is removed once traffic returns to 100% stable.

docs/features/multiple-routes.md

Lines changed: 31 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ spec:
2323
parentRefs:
2424
- name: eg
2525
hostnames:
26-
- backend.example.com
26+
- backend.example.com
2727
rules:
2828
- matches:
2929
- path:
@@ -46,7 +46,7 @@ spec:
4646
parentRefs:
4747
- name: eg
4848
hostnames:
49-
- api.example.com
49+
- api.example.com
5050
rules:
5151
- matches:
5252
- path:
@@ -106,10 +106,36 @@ spec:
106106
- name: http
107107
containerPort: 8080
108108
protocol: TCP
109-
```
109+
```
110110

111111
If you now start a canary deployment both routes will change to 10%, 50% and 100% as the canary progresses to all its steps.
112112

113+
## Working with GitOps controllers
114+
115+
GitOps tools such as Argo CD continuously reconcile Gateway API resources and can revert the temporary weight changes that occur
116+
while a canary is progressing. The plugin automatically adds the label
117+
`rollouts.argoproj.io/gatewayapi-canary=in-progress` to every HTTPRoute/GRPCRoute/TCPRoute/TLSRoute it mutates so that you can
118+
configure your GitOps policy to ignore those resources during a rollout. The label disappears as soon as the stable service
119+
returns to 100% weight. You can customise the key/value or disable the feature altogether with the
120+
`inProgressLabelKey`, `inProgressLabelValue` and `disableInProgressLabel` fields under the plugin configuration.
121+
122+
### Argo CD `ignoreDifferences`
123+
124+
When you use Argo CD (either through the Application CRD or its Helm chart), add the following snippet so that Argo CD skips the
125+
temporary rule edits while the `rollouts.argoproj.io/gatewayapi-canary` label is present:
126+
127+
```yaml
128+
configs:
129+
cm:
130+
resource.customizations.ignoreDifferences.gateway.networking.k8s.io_HTTPRoute: |
131+
jqPathExpressions:
132+
- select(.metadata.labels["rollouts.argoproj.io/gatewayapi-canary"] == "in-progress") | .spec.rules
133+
```
134+
135+
Duplicate the block for `GRPCRoute`, `TCPRoute` and `TLSRoute` if you manage those kinds as well. If you have customised the
136+
label key or value on the plugin, update the `jqPathExpressions` condition to match your configuration. The same structure applies
137+
when you configure `resource.customizations` directly on an Application manifest (outside of Helm).
138+
113139
## Automatic Route Discovery with Label Selectors
114140

115141
Instead of explicitly listing each route name, you can use label selectors to automatically discover routes. This is particularly useful when managing many routes or when routes are created dynamically.
@@ -200,7 +226,7 @@ trafficRouting:
200226
The plugin supports selectors for different route types:
201227

202228
- `httpRouteSelector`: Discovers HTTPRoutes
203-
- `grpcRouteSelector`: Discovers GRPCRoutes
229+
- `grpcRouteSelector`: Discovers GRPCRoutes
204230
- `tcpRouteSelector`: Discovers TCPRoutes
205231

206232
You can use multiple selectors simultaneously:
@@ -247,4 +273,4 @@ To verify which routes will be discovered by your selector, use kubectl:
247273
kubectl get httproutes -n default -l app=my-app,canary-enabled=true
248274
```
249275

250-
The plugin logs discovered routes during reconciliation, which can help with debugging.
276+
The plugin logs discovered routes during reconciliation, which can help with debugging.

docs/quick-start.md

Lines changed: 34 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ to control your Http Routes. In this guide we will see how to use [the Rollouts
55

66
You can find more examples at the [provider status page](provider-status.md).
77

8-
## Prerequisites
8+
## Prerequisites
99

1010
Get access to a Kubernetes cluster. You can use a cluster on the cloud or on your workstation like [k3s](https://k3s.io/), [k3d](https://k3d.io/) or [Docker for Desktop](https://www.docker.com/products/docker-desktop/).
1111

@@ -40,7 +40,7 @@ kubectl wait --timeout=5m -n envoy-gateway-system deployment/envoy-gateway --for
4040
!!! note
4141
This process needs to happen only once per cluster. The task is normally handled by infrastructure operators.
4242

43-
Create a Gateway
43+
Create a Gateway
4444

4545
```yaml
4646
---
@@ -63,7 +63,7 @@ spec:
6363
- name: http
6464
protocol: HTTP
6565
port: 80
66-
```
66+
```
6767
Apply the file with kubectl and then verify it works correctly with
6868
6969
```
@@ -109,12 +109,12 @@ subjects:
109109
- namespace: argo-rollouts
110110
kind: ServiceAccount
111111
name: argo-rollouts
112-
```
112+
```
113113
114114
Apply the file with kubectl. Note that this role is **NOT** to be used in production clusters as it is super permissive.
115115
116116
117-
## Step 4 - Create an HTTP route
117+
## Step 4 - Create an HTTP route
118118
119119
!!! note
120120
This process needs to happen only once per application. The task is normally handled by cluster operators or application developers.
@@ -135,7 +135,7 @@ spec:
135135
- matches:
136136
- path:
137137
type: PathPrefix
138-
value: /
138+
value: /
139139
backendRefs:
140140
- name: argo-rollouts-stable-service
141141
kind: Service
@@ -148,7 +148,7 @@ spec:
148148
Apply the file with kubectl.
149149
Verify it with `kubectl get httproutes`
150150

151-
## Step 5 - Create a Rollout
151+
## Step 5 - Create a Rollout
152152

153153
!!! note
154154
This process needs to happen only once per application. The task is normally handled by cluster operators or application developers.
@@ -207,6 +207,10 @@ spec:
207207
argoproj-labs/gatewayAPI:
208208
httpRoute: argo-rollouts-http-route # our created httproute
209209
namespace: default
210+
# Optional: customize or disable the temporary label that marks routes as managed during a canary
211+
# inProgressLabelKey: rollouts.argoproj.io/gatewayapi-canary
212+
# inProgressLabelValue: in-progress
213+
# disableInProgressLabel: false
210214
steps:
211215
- setWeight: 50
212216
- pause: {}
@@ -241,7 +245,7 @@ You should see that all requests return with blue color:
241245
![First deployment](images/quick-start/canary-start.png)
242246

243247

244-
## Daily Task - Perform a Canary
248+
## Daily Task - Perform a Canary
245249

246250
!!! note
247251
This process happens multiple times per day/week. The task is normally handled by application developers.
@@ -260,14 +264,34 @@ At this point each color should get 50% of requests. You can see this visually i
260264
261265
You should also inspect the Http Route and verify that Argo Rollouts has changed the weights of the backend services
262266
263-
Run
267+
Run
264268
265269
```
266270
kubectl get httproute -o yaml
267271
```
268272
269273
In the response you should see the following information about the weights for each backing service.
270274
275+
!!! info
276+
While the canary is running, the plugin adds the label `rollouts.argoproj.io/gatewayapi-canary=in-progress` to every managed
277+
Gateway API route so that GitOps tools such as Argo CD can be configured to ignore those temporary changes. The label is
278+
removed automatically once the stable service goes back to 100% weight. Use `disableInProgressLabel`, `inProgressLabelKey`
279+
or `inProgressLabelValue` if you need to adjust this behaviour.
280+
281+
**Argo CD example (Helm chart values)**
282+
283+
```yaml
284+
configs:
285+
cm:
286+
resource.customizations.ignoreDifferences.gateway.networking.k8s.io_HTTPRoute: |
287+
jqPathExpressions:
288+
- if .metadata.labels["rollouts.argoproj.io/gatewayapi-canary"] == "in-progress" then .spec.rules
289+
```
290+
291+
Apply the same snippet to `GRPCRoute`, `TCPRoute` and `TLSRoute` kinds if you manage them. If you configure `resource.customizations`
292+
directly inside an Application manifest rather than Helm values, reuse the same structure under `spec.source.plugin` or
293+
`spec.source.helm.values`.
294+
271295
```yaml
272296
[...snip...]
273297
spec:
@@ -307,4 +331,4 @@ The application should gradually change now to yellow.
307331

308332
The deployment has finished. If you change the Rollout image again, the process will start over.
309333

310-
Feel free to learn more about all Rollout options in the [Specification documentation](https://argo-rollouts.readthedocs.io/en/stable/features/specification/).
334+
Feel free to learn more about all Rollout options in the [Specification documentation](https://argo-rollouts.readthedocs.io/en/stable/features/specification/).

internal/defaults/defaults.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
11
package defaults
22

3-
const ConfigMap = "argo-gatewayapi-configmap"
3+
const (
4+
ConfigMap = "argo-gatewayapi-configmap"
5+
InProgressLabelKey = "rollouts.argoproj.io/gatewayapi-canary"
6+
InProgressLabelValue = "in-progress"
7+
)

pkg/plugin/grpcroute.go

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ func (r *RpcPlugin) setGRPCRouteWeight(rollout *v1alpha1.Rollout, desiredWeight
5252
for _, ref := range stableBackendRefs {
5353
ref.Weight = &restWeight
5454
}
55+
ensureInProgressLabel(grpcRoute, desiredWeight, gatewayAPIConfig)
5556
updatedGRPCRoute, err := grpcRouteClient.Update(ctx, grpcRoute, metav1.UpdateOptions{})
5657
if r.IsTest {
5758
r.UpdatedGRPCRouteMock = updatedGRPCRoute
@@ -396,6 +397,16 @@ func removeManagedGRPCRouteEntry(managedRouteMap ManagedRouteMap, routeRuleList
396397
managedRouteMapKey := managedRouteName + "." + grpcRouteName
397398
return nil, fmt.Errorf(ManagedRouteMapEntryDeleteError, managedRouteMapKey, managedRouteMapKey)
398399
}
400+
if managedRouteIndex < 0 || managedRouteIndex >= len(routeRuleList) {
401+
// stale or corrupted managed route index; clean references for this route and continue gracefully
402+
for name, managedMap := range managedRouteMap {
403+
delete(managedMap, grpcRouteName)
404+
if len(managedMap) == 0 {
405+
delete(managedRouteMap, name)
406+
}
407+
}
408+
return routeRuleList, nil
409+
}
399410
delete(routeManagedRouteMap, grpcRouteName)
400411
if len(managedRouteMap[managedRouteName]) == 0 {
401412
delete(managedRouteMap, managedRouteName)

pkg/plugin/httproute.go

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ func (r *RpcPlugin) setHTTPRouteWeight(rollout *v1alpha1.Rollout, desiredWeight
5656
if err != nil {
5757
r.LogCtx.Error(err, "Failed to handle experiment services")
5858
}
59+
ensureInProgressLabel(httpRoute, desiredWeight, gatewayAPIConfig)
5960
updatedHTTPRoute, err := httpRouteClient.Update(ctx, httpRoute, metav1.UpdateOptions{})
6061
if r.IsTest {
6162
r.UpdatedHTTPRouteMock = updatedHTTPRoute
@@ -395,6 +396,16 @@ func removeManagedHTTPRouteEntry(managedRouteMap ManagedRouteMap, routeRuleList
395396
managedRouteMapKey := managedRouteName + "." + httpRouteName
396397
return nil, fmt.Errorf(ManagedRouteMapEntryDeleteError, managedRouteMapKey, managedRouteMapKey)
397398
}
399+
if managedRouteIndex < 0 || managedRouteIndex >= len(routeRuleList) {
400+
// stale or corrupted managed route index; clean references for this route and continue gracefully
401+
for name, managedMap := range managedRouteMap {
402+
delete(managedMap, httpRouteName)
403+
if len(managedMap) == 0 {
404+
delete(managedRouteMap, name)
405+
}
406+
}
407+
return routeRuleList, nil
408+
}
398409
delete(routeManagedRouteMap, httpRouteName)
399410
if len(managedRouteMap[managedRouteName]) == 0 {
400411
delete(managedRouteMap, managedRouteName)

pkg/plugin/labels.go

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
package plugin
2+
3+
import (
4+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
5+
6+
"github.com/argoproj-labs/rollouts-plugin-trafficrouter-gatewayapi/internal/defaults"
7+
)
8+
9+
func ensureInProgressLabel(obj metav1.Object, desiredWeight int32, config *GatewayAPITrafficRouting) bool {
10+
if obj == nil || config == nil || config.DisableInProgressLabel {
11+
return false
12+
}
13+
14+
key := config.inProgressLabelKey()
15+
if key == "" {
16+
return false
17+
}
18+
19+
labels := obj.GetLabels()
20+
if desiredWeight == 0 {
21+
if labels == nil {
22+
return false
23+
}
24+
if _, ok := labels[key]; ok {
25+
delete(labels, key)
26+
obj.SetLabels(labels)
27+
return true
28+
}
29+
return false
30+
}
31+
32+
value := config.inProgressLabelValue()
33+
if labels == nil {
34+
labels = make(map[string]string)
35+
}
36+
if current, ok := labels[key]; ok && current == value {
37+
return false
38+
}
39+
labels[key] = value
40+
obj.SetLabels(labels)
41+
return true
42+
}
43+
44+
func (c *GatewayAPITrafficRouting) inProgressLabelKey() string {
45+
if c.InProgressLabelKey != "" {
46+
return c.InProgressLabelKey
47+
}
48+
return defaults.InProgressLabelKey
49+
}
50+
51+
func (c *GatewayAPITrafficRouting) inProgressLabelValue() string {
52+
if c.InProgressLabelValue != "" {
53+
return c.InProgressLabelValue
54+
}
55+
return defaults.InProgressLabelValue
56+
}

0 commit comments

Comments
 (0)