-
Notifications
You must be signed in to change notification settings - Fork 251
feat(rcv1p): unify cert bootstrap flow and add Windows CA refresh task #8096
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
daea5db
565657e
f6a3b37
d20ae96
cb6feb8
fe82404
31580f7
ef38f91
1c4eb30
8de2420
bcc496e
06efc8e
38a74b8
c8751d4
43e3e1c
9a46910
32e80ea
75338c9
164d5c6
eb47c45
6cd1b6b
33f2cd2
ae25575
1039e7d
597a523
54e4111
d59b611
bf3b006
da6f852
9fd2a4c
8189ca7
a10cc42
2bc7a70
b330111
ba9d9cc
6cb27c5
1b89b8a
5da27b7
29be2c6
3525706
6e92ae5
8cbb6f8
a4b005f
50b4189
eae935c
7fba23b
d467ca7
d0a9d98
59901f6
31f9a3c
47e5bbb
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,19 @@ | ||
| name: $(Date:yyyyMMdd)$(Rev:.r) | ||
| variables: | ||
| TAGS_TO_RUN: "rcv1pcertmode=true" | ||
| SKIP_E2E_TESTS: false | ||
| E2E_GO_TEST_TIMEOUT: "75m" | ||
| schedules: | ||
| - cron: "0 11 * * *" | ||
| displayName: Daily 3am PST | ||
| branches: | ||
| include: | ||
| - main | ||
| always: true | ||
| trigger: none | ||
| pr: none | ||
| jobs: | ||
| - template: ./templates/e2e-template.yaml | ||
| parameters: | ||
| name: RCV1P Cert Mode Tests | ||
| IgnoreScenariosWithMissingVhd: false |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -35,6 +35,7 @@ VHD_BUILD_ID="${VHD_BUILD_ID:-}" | |
| IGNORE_SCENARIOS_WITH_MISSING_VHD="${IGNORE_SCENARIOS_WITH_MISSING_VHD:-}" | ||
| LOGGING_DIR="${LOGGING_DIR:-}" | ||
| E2E_SUBSCRIPTION_ID="${E2E_SUBSCRIPTION_ID:-}" | ||
| RCV1P_SUBSCRIPTION_ID="${RCV1P_SUBSCRIPTION_ID:-}" | ||
| ENABLE_SECURE_TLS_BOOTSTRAPPING="${ENABLE_SECURE_TLS_BOOTSTRAPPING:-true}" | ||
| TAGS_TO_SKIP="${TAGS_TO_SKIP:-}" | ||
| TAGS_TO_RUN="${TAGS_TO_RUN:-}" | ||
|
|
@@ -47,6 +48,7 @@ echo "VHD_BUILD_ID: ${VHD_BUILD_ID}" | |
| echo "IGNORE_SCENARIOS_WITH_MISSING_VHD: ${IGNORE_SCENARIOS_WITH_MISSING_VHD}" | ||
| echo "LOGGING_DIR: ${LOGGING_DIR}" | ||
| echo "E2E_SUBSCRIPTION_ID: ${E2E_SUBSCRIPTION_ID}" | ||
| echo "RCV1P_SUBSCRIPTION_ID: ${RCV1P_SUBSCRIPTION_ID}" | ||
| echo "ENABLE_SECURE_TLS_BOOTSTRAPPING: ${ENABLE_SECURE_TLS_BOOTSTRAPPING}" | ||
| echo "TAGS_TO_SKIP: ${TAGS_TO_SKIP}" | ||
| echo "TAGS_TO_RUN: ${TAGS_TO_RUN}" | ||
|
|
@@ -95,10 +97,11 @@ tar -xzf "$temp_file" -C bin | |
| chmod +x bin/gotestsum | ||
| rm -f "$temp_file" | ||
|
|
||
| # REVERT ME: added -v to see t.Logf output from passing tests (azcopy/wireserver diagnostics) | ||
| # gotestsum configure to only show logs for failed tests, json file for detailed logs | ||
| # Run the tests! Yey! | ||
| test_exit_code=0 | ||
| ./bin/gotestsum --format testdox --junitfile "${BUILD_SRC_DIR}/e2e/report.xml" --jsonfile "${BUILD_SRC_DIR}/e2e/test-log.json" -- -parallel 60 -timeout "${E2E_GO_TEST_TIMEOUT}" || test_exit_code=$? | ||
| ./bin/gotestsum --format testdox --junitfile "${BUILD_SRC_DIR}/e2e/report.xml" --jsonfile "${BUILD_SRC_DIR}/e2e/test-log.json" -- -v -parallel 60 -timeout "${E2E_GO_TEST_TIMEOUT}" || test_exit_code=$? | ||
|
|
||
|
Comment on lines
+100
to
105
|
||
| # Upload test results as Azure DevOps artifacts | ||
| echo "##vso[artifact.upload containerfolder=test-results;artifactname=e2e-test-log]${BUILD_SRC_DIR}/e2e/test-log.json" | ||
|
|
||
| Original file line number | Diff line number | Diff line change | ||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| @@ -1,6 +1,7 @@ | ||||||||||||||||
| echo $(date),$(hostname) > ${PROVISION_OUTPUT}; | ||||||||||||||||
| {{if getIsAksCustomCloud .CustomCloudConfig}} | ||||||||||||||||
| REPO_DEPOT_ENDPOINT="{{.CustomCloudConfig.RepoDepotEndpoint}}" | ||||||||||||||||
| {{getInitAKSCustomCloudFilepath}} >> /var/log/azure/cluster-provision.log 2>&1; | ||||||||||||||||
| {{end}} | ||||||||||||||||
| LOCATION="{{getCloudLocation .}}" | ||||||||||||||||
|
rchincha marked this conversation as resolved.
|
||||||||||||||||
| LOCATION="{{getCloudLocation .}}" | |
| export LOCATION="{{getCloudLocation .}}" |
Copilot
AI
Mar 26, 2026
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
{{getInitAKSCustomCloudFilepath}} is executed even when getIsAksCustomCloud is false, which would run the custom-cloud init logic (repo/chrony/cert refresh) on all clusters. Also LOCATION is assigned but not exported, so the init script won’t receive it when executed as a child process. Move export LOCATION=... and the init script invocation back under the {{if getIsAksCustomCloud ...}} block (or pass location as an argument).
| {{end}} | |
| LOCATION="{{getCloudLocation .}}" | |
| {{getInitAKSCustomCloudFilepath}} >> /var/log/azure/cluster-provision.log 2>&1; | |
| LOCATION="{{getCloudLocation .}}" | |
| export LOCATION | |
| {{getInitAKSCustomCloudFilepath}} >> /var/log/azure/cluster-provision.log 2>&1; | |
| {{end}} |
Copilot
AI
Apr 16, 2026
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LOCATION (and REPO_DEPOT_ENDPOINT) are set but not exported before running init-aks-custom-cloud.sh. Because the init script runs in a separate process, it won’t see these variables unless exported (or passed as explicit args), which can break cert mode selection and repo depot/chrony initialization.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -299,22 +299,23 @@ func getFirewall(ctx context.Context, location, firewallSubnetID, publicIPID str | |
| } | ||
|
|
||
| func addFirewallRules( | ||
| ctx context.Context, clusterModel *armcontainerservice.ManagedCluster, | ||
| ctx context.Context, infra *ClusterInfra, clusterModel *armcontainerservice.ManagedCluster, | ||
| ) error { | ||
| location := *clusterModel.Location | ||
| defer toolkit.LogStepCtx(ctx, "adding firewall rules")() | ||
|
|
||
| rg := *clusterModel.Properties.NodeResourceGroup | ||
| vnet, err := getClusterVNet(ctx, rg) | ||
| vnet, err := getClusterVNet(ctx, infra, rg) | ||
| if err != nil { | ||
| return err | ||
| } | ||
|
|
||
| // For kubenet, the AKS-managed route table must stay attached so that pod | ||
| // routes (managed by cloud-provider-azure) and firewall routes coexist. | ||
| // For Azure CNI variants, the subnet may not have any route table, so we | ||
| // create and associate a dedicated one before adding the firewall routes. | ||
| aksSubnetResp, err := config.Azure.Subnet.Get(ctx, rg, vnet.name, "aks-subnet", nil) | ||
| // Find the AKS-managed route table currently associated with the subnet. | ||
| // We add firewall routes directly to this table so that both pod routes | ||
| // (managed by cloud-provider-azure) and firewall routes coexist. Creating | ||
| // a separate route table and swapping the subnet association disconnects | ||
| // the pod routes and breaks kubenet networking. | ||
| aksSubnetResp, err := infra.Azure.Subnet.Get(ctx, rg, vnet.name, "aks-subnet", nil) | ||
| if err != nil { | ||
| return fmt.Errorf("failed to get AKS subnet: %w", err) | ||
| } | ||
|
|
@@ -332,7 +333,7 @@ func addFirewallRules( | |
| } | ||
|
|
||
| toolkit.Logf(ctx, "Creating subnet %s in VNet %s", firewallSubnetName, vnet.name) | ||
| subnetPoller, err := config.Azure.Subnet.BeginCreateOrUpdate( | ||
| subnetPoller, err := infra.Azure.Subnet.BeginCreateOrUpdate( | ||
| ctx, | ||
| rg, | ||
| vnet.name, | ||
|
|
@@ -365,7 +366,7 @@ func addFirewallRules( | |
| } | ||
|
|
||
| toolkit.Logf(ctx, "Creating public IP %s", publicIPName) | ||
| pipPoller, err := config.Azure.PublicIPAddresses.BeginCreateOrUpdate( | ||
| pipPoller, err := infra.Azure.PublicIPAddresses.BeginCreateOrUpdate( | ||
| ctx, | ||
| rg, | ||
| publicIPName, | ||
|
|
@@ -386,7 +387,7 @@ func addFirewallRules( | |
|
|
||
| firewallName := "abe2e-fw" | ||
| firewall := getFirewall(ctx, location, firewallSubnetID, publicIPID) | ||
| fwPoller, err := config.Azure.AzureFirewall.BeginCreateOrUpdate(ctx, rg, firewallName, *firewall, nil) | ||
| fwPoller, err := infra.Azure.AzureFirewall.BeginCreateOrUpdate(ctx, rg, firewallName, *firewall, nil) | ||
| if err != nil { | ||
| return fmt.Errorf("failed to start Firewall creation: %w", err) | ||
| } | ||
|
|
@@ -432,7 +433,7 @@ func addFirewallRules( | |
|
|
||
| for _, route := range firewallRoutes { | ||
| toolkit.Logf(ctx, "Adding route %q to AKS route table %q", *route.Name, aksRTName) | ||
| poller, err := config.Azure.Routes.BeginCreateOrUpdate(ctx, rg, aksRTName, *route.Name, route, nil) | ||
| poller, err := infra.Azure.Routes.BeginCreateOrUpdate(ctx, rg, aksRTName, *route.Name, route, nil) | ||
| if err != nil { | ||
| return fmt.Errorf("failed to start adding route %q: %w", *route.Name, err) | ||
| } | ||
|
|
@@ -510,7 +511,7 @@ func addPrivateAzureContainerRegistry(ctx context.Context, cluster *armcontainer | |
| if err := createPrivateAzureContainerRegistryPullSecret(ctx, cluster, kube, resourceGroupName, isNonAnonymousPull); err != nil { | ||
| return fmt.Errorf("create private acr pull secret: %w", err) | ||
| } | ||
| vnet, err := getClusterVNet(ctx, *cluster.Properties.NodeResourceGroup) | ||
| vnet, err := getClusterVNet(ctx, DefaultClusterInfra, *cluster.Properties.NodeResourceGroup) | ||
|
||
| if err != nil { | ||
| return err | ||
| } | ||
|
|
@@ -531,7 +532,7 @@ func addNetworkIsolatedSettings(ctx context.Context, clusterModel *armcontainers | |
| location := *clusterModel.Location | ||
| defer toolkit.LogStepCtx(ctx, fmt.Sprintf("Adding network settings for network isolated cluster %s in rg %s", *clusterModel.Name, *clusterModel.Properties.NodeResourceGroup)) | ||
|
|
||
| vnet, err := getClusterVNet(ctx, *clusterModel.Properties.NodeResourceGroup) | ||
| vnet, err := getClusterVNet(ctx, DefaultClusterInfra, *clusterModel.Properties.NodeResourceGroup) | ||
|
||
| if err != nil { | ||
| return err | ||
| } | ||
|
|
@@ -678,7 +679,7 @@ func createPrivateAzureContainerRegistry(ctx context.Context, cluster *armcontai | |
| } | ||
| // if ACR gets recreated so should the cluster | ||
| toolkit.Logf(ctx, "Private ACR deleted, deleting cluster %s", *cluster.Name) | ||
| if err := deleteCluster(ctx, *cluster.Name, resourceGroup); err != nil { | ||
| if err := deleteCluster(ctx, DefaultClusterInfra, *cluster.Name, resourceGroup); err != nil { | ||
| return fmt.Errorf("failed to delete cluster: %w", err) | ||
| } | ||
| } else { | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The pipeline test runner now forces
go test -v(explicitly markedREVERT ME). This will significantly increase log volume for all E2E runs and can slow CI / inflate artifact sizes. Please remove this or gate it behind an env var (e.g.,E2E_VERBOSE=true) before merging.