Skip to content

Commit c08fb3a

Browse files
Ambient Code Botclaude
andcommitted
fix(ci,pr-test): address PR #1032 review comments
- components-build-deploy: push immutable SHA-tagged PR image alongside mutable pr-<N>-<arch> tag to prevent race conditions across commits - pr-e2e-openshift: concurrency group null safety (|| workflow_run.id), clamp SLUG_MAX to 0 min / skip branch slug if empty, remove --insecure-skip-tls-verify, install kustomize for kustomize-edit steps - install.sh: add both :latest and untagged kustomize set-image mappings so all image name variants are rewritten; add vteam_control_plane and vteam_mcp; replace kustomize build with oc kustomize; make health check fail-fast with curl timeouts and exit on missing routes - provision.sh: atomic ConfigMap lock before capacity check (TOCTOU fix); wait for TenantNamespace Ready condition, not only Namespace Active; fix destroy() to detect only (NotFound) as success, not all oc errors; delete lock ConfigMap on destroy - openshift.md: add vteam_state_sync to internal registry push loop 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
1 parent e3c6210 commit c08fb3a

5 files changed

Lines changed: 81 additions & 17 deletions

File tree

.github/workflows/components-build-deploy.yml

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -222,8 +222,10 @@ jobs:
222222
context: ${{ matrix.component.context }}
223223
file: ${{ matrix.component.dockerfile }}
224224
platforms: ${{ matrix.arch.platform }}
225-
push: false
226-
tags: ${{ matrix.component.image }}:pr-${{ github.event.pull_request.number }}-${{ matrix.arch.suffix }}
225+
push: true
226+
tags: |
227+
${{ matrix.component.image }}:pr-${{ github.event.pull_request.number }}-${{ matrix.arch.suffix }}
228+
${{ matrix.component.image }}:pr-${{ github.event.pull_request.number }}-${{ github.sha }}-${{ matrix.arch.suffix }}
227229
build-args: AMBIENT_VERSION=${{ github.sha }}
228230
cache-from: type=gha,scope=${{ matrix.component.name }}-${{ matrix.arch.suffix }}
229231

.github/workflows/pr-e2e-openshift.yml

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ on:
66
types: [completed]
77

88
concurrency:
9-
group: pr-e2e-openshift-${{ github.event.workflow_run.pull_requests[0].number }}
9+
group: pr-e2e-openshift-${{ github.event.workflow_run.pull_requests[0].number || github.event.workflow_run.id }}
1010
cancel-in-progress: true
1111

1212
jobs:
@@ -38,9 +38,14 @@ jobs:
3838
# Max namespace = 63. Slug budget = 63 - 14 - 4 - ${#PR_NUMBER}
3939
PR_LEN=${#PR_NUMBER}
4040
SLUG_MAX=$(( 63 - 14 - 4 - PR_LEN ))
41+
[[ $SLUG_MAX -lt 0 ]] && SLUG_MAX=0
4142
BRANCH_SLUG="${SAFE_BRANCH:0:$SLUG_MAX}"
4243
43-
INSTANCE_ID="pr-${PR_NUMBER}-${BRANCH_SLUG}"
44+
if [[ -n "$BRANCH_SLUG" ]]; then
45+
INSTANCE_ID="pr-${PR_NUMBER}-${BRANCH_SLUG}"
46+
else
47+
INSTANCE_ID="pr-${PR_NUMBER}"
48+
fi
4449
NAMESPACE="ambient-code--${INSTANCE_ID}"
4550
IMAGE_TAG="pr-${PR_NUMBER}-amd64"
4651
@@ -74,7 +79,8 @@ jobs:
7479
- name: Log in to OpenShift
7580
run: |
7681
oc login "${{ secrets.TEST_OPENSHIFT_SERVER }}" \
77-
--token="${{ secrets.TEST_OPENSHIFT_TOKEN }}"
82+
--token="${{ secrets.TEST_OPENSHIFT_TOKEN }}" \
83+
--insecure-skip-tls-verify=false
7884
7985
- name: Provision namespace
8086
env:
@@ -99,10 +105,16 @@ jobs:
99105
with:
100106
oc_version: 'latest'
101107

108+
- name: Install kustomize
109+
uses: imranismail/setup-kustomize@v2
110+
with:
111+
kustomize-version: '5.4.3'
112+
102113
- name: Log in to OpenShift
103114
run: |
104115
oc login "${{ secrets.TEST_OPENSHIFT_SERVER }}" \
105-
--token="${{ secrets.TEST_OPENSHIFT_TOKEN }}"
116+
--token="${{ secrets.TEST_OPENSHIFT_TOKEN }}" \
117+
--insecure-skip-tls-verify=false
106118
107119
- name: Install Ambient
108120
id: install
@@ -175,7 +187,8 @@ jobs:
175187
- name: Log in to OpenShift
176188
run: |
177189
oc login "${{ secrets.TEST_OPENSHIFT_SERVER }}" \
178-
--token="${{ secrets.TEST_OPENSHIFT_TOKEN }}"
190+
--token="${{ secrets.TEST_OPENSHIFT_TOKEN }}" \
191+
--insecure-skip-tls-verify=false
179192
180193
- name: Destroy namespace
181194
env:

components/pr-test/install.sh

Lines changed: 31 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -98,12 +98,23 @@ pushd "$TMPOVERLAY" > /dev/null
9898
kustomize edit set namespace "$NAMESPACE"
9999
kustomize edit set image \
100100
"quay.io/ambient_code/vteam_frontend:latest=quay.io/ambient_code/vteam_frontend:${IMAGE_TAG}" \
101+
"quay.io/ambient_code/vteam_frontend=quay.io/ambient_code/vteam_frontend:${IMAGE_TAG}" \
101102
"quay.io/ambient_code/vteam_backend:latest=quay.io/ambient_code/vteam_backend:${IMAGE_TAG}" \
103+
"quay.io/ambient_code/vteam_backend=quay.io/ambient_code/vteam_backend:${IMAGE_TAG}" \
102104
"quay.io/ambient_code/vteam_operator:latest=quay.io/ambient_code/vteam_operator:${IMAGE_TAG}" \
105+
"quay.io/ambient_code/vteam_operator=quay.io/ambient_code/vteam_operator:${IMAGE_TAG}" \
103106
"quay.io/ambient_code/vteam_claude_runner:latest=quay.io/ambient_code/vteam_claude_runner:${IMAGE_TAG}" \
107+
"quay.io/ambient_code/vteam_claude_runner=quay.io/ambient_code/vteam_claude_runner:${IMAGE_TAG}" \
104108
"quay.io/ambient_code/vteam_state_sync:latest=quay.io/ambient_code/vteam_state_sync:${IMAGE_TAG}" \
109+
"quay.io/ambient_code/vteam_state_sync=quay.io/ambient_code/vteam_state_sync:${IMAGE_TAG}" \
105110
"quay.io/ambient_code/vteam_api_server:latest=quay.io/ambient_code/vteam_api_server:${IMAGE_TAG}" \
106-
"quay.io/ambient_code/vteam_public_api:latest=quay.io/ambient_code/vteam_public_api:${IMAGE_TAG}"
111+
"quay.io/ambient_code/vteam_api_server=quay.io/ambient_code/vteam_api_server:${IMAGE_TAG}" \
112+
"quay.io/ambient_code/vteam_public_api:latest=quay.io/ambient_code/vteam_public_api:${IMAGE_TAG}" \
113+
"quay.io/ambient_code/vteam_public_api=quay.io/ambient_code/vteam_public_api:${IMAGE_TAG}" \
114+
"quay.io/ambient_code/vteam_control_plane:latest=quay.io/ambient_code/vteam_control_plane:${IMAGE_TAG}" \
115+
"quay.io/ambient_code/vteam_control_plane=quay.io/ambient_code/vteam_control_plane:${IMAGE_TAG}" \
116+
"quay.io/ambient_code/vteam_mcp:latest=quay.io/ambient_code/vteam_mcp:${IMAGE_TAG}" \
117+
"quay.io/ambient_code/vteam_mcp=quay.io/ambient_code/vteam_mcp:${IMAGE_TAG}"
107118

108119
FILTER_SCRIPT="$TMPDIR/filter.py"
109120
cat > "$FILTER_SCRIPT" << 'PYEOF'
@@ -148,7 +159,7 @@ for doc in sys.stdin.read().split('\n---\n'):
148159
print(doc)
149160
PYEOF
150161

151-
kustomize build . \
162+
oc kustomize . \
152163
| NAMESPACE="$NAMESPACE" PR_ID="$PR_ID" \
153164
python3 "$FILTER_SCRIPT" \
154165
| oc apply --token="$ARGOCD_TOKEN" -n "$NAMESPACE" -f -
@@ -196,9 +207,26 @@ echo "==> Step 9: Verifying health"
196207
FRONTEND_URL=$(oc get route frontend-route -n "$NAMESPACE" \
197208
-o jsonpath='https://{.spec.host}' 2>/dev/null || true)
198209

210+
if [[ -z "$FRONTEND_URL" ]]; then
211+
echo "ERROR: frontend-route not found in $NAMESPACE"
212+
exit 1
213+
fi
214+
215+
BACKEND_HOST=$(oc get route backend-api-route -n "$NAMESPACE" \
216+
-o jsonpath='{.spec.host}' 2>/dev/null || true)
217+
218+
if [[ -z "$BACKEND_HOST" ]]; then
219+
echo "ERROR: backend-api-route not found in $NAMESPACE"
220+
exit 1
221+
fi
222+
223+
HEALTH=$(curl -fsS --connect-timeout 5 --max-time 20 \
224+
--retry 3 --retry-all-errors "https://${BACKEND_HOST}/health" || true)
225+
echo " Backend health: ${HEALTH:-<no response>}"
226+
199227
echo ""
200228
echo "==> Ambient installed successfully in $NAMESPACE"
201-
echo " Frontend: ${FRONTEND_URL:-<no route yet>}"
229+
echo " Frontend: ${FRONTEND_URL}"
202230
echo " Image tag: $IMAGE_TAG"
203231

204232
if [[ -n "${GITHUB_OUTPUT:-}" ]]; then

components/pr-test/provision.sh

Lines changed: 27 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,17 @@ usage() {
2727
NAMESPACE="ambient-code--${INSTANCE_ID}"
2828

2929
create() {
30+
echo "==> Reserving slot via ConfigMap lock..."
31+
LOCK_NAME="pr-test-slot-${INSTANCE_ID}"
32+
if ! oc create configmap "$LOCK_NAME" -n "$CONFIG_NAMESPACE" \
33+
--from-literal=instance="$INSTANCE_ID" \
34+
--from-literal=created="$(date -u +%Y-%m-%dT%H:%M:%SZ)" \
35+
2>/dev/null; then
36+
echo "ERROR: Slot already reserved for instance $INSTANCE_ID (lock $LOCK_NAME exists)"
37+
exit 1
38+
fi
39+
echo " Slot reserved: $LOCK_NAME"
40+
3041
echo "==> Checking S0.x instance capacity..."
3142
ACTIVE=$(oc get tenantnamespace -n "$CONFIG_NAMESPACE" \
3243
-l ambient-code/instance-type=s0x --no-headers 2>/dev/null | wc -l | tr -d ' ')
@@ -36,6 +47,7 @@ create() {
3647
echo "Active instances:"
3748
oc get tenantnamespace -n "$CONFIG_NAMESPACE" \
3849
-l ambient-code/instance-type=s0x -o name
50+
oc delete configmap "$LOCK_NAME" -n "$CONFIG_NAMESPACE" --ignore-not-found=true
3951
exit 1
4052
fi
4153
echo " Capacity OK: $ACTIVE/$MAX_S0X_INSTANCES"
@@ -60,17 +72,19 @@ EOF
6072
echo "==> Waiting for namespace ${NAMESPACE} to become Active (timeout: ${READY_TIMEOUT}s)..."
6173
DEADLINE=$((SECONDS + READY_TIMEOUT))
6274
while [ $SECONDS -lt $DEADLINE ]; do
63-
STATUS=$(oc get namespace "$NAMESPACE" -o jsonpath='{.status.phase}' 2>/dev/null || true)
64-
if [ "$STATUS" == "Active" ]; then
65-
echo " Namespace ${NAMESPACE} is Active."
75+
NS_STATUS=$(oc get namespace "$NAMESPACE" -o jsonpath='{.status.phase}' 2>/dev/null || true)
76+
TN_READY=$(oc get tenantnamespace "$INSTANCE_ID" -n "$CONFIG_NAMESPACE" \
77+
-o jsonpath='{.status.conditions[?(@.type=="Ready")].status}' 2>/dev/null || true)
78+
if [ "$NS_STATUS" == "Active" ] && [ "$TN_READY" == "True" ]; then
79+
echo " Namespace ${NAMESPACE} is Active and TenantNamespace is Ready."
6680
echo "$NAMESPACE"
6781
exit 0
6882
fi
69-
echo " status=${STATUS:-NotFound}, retrying..."
83+
echo " ns=${NS_STATUS:-NotFound} tn-ready=${TN_READY:-unknown}, retrying..."
7084
sleep 3
7185
done
7286

73-
echo "ERROR: Namespace ${NAMESPACE} did not become Active within ${READY_TIMEOUT}s."
87+
echo "ERROR: Namespace ${NAMESPACE} did not become Active+Ready within ${READY_TIMEOUT}s."
7488
oc describe tenantnamespace "$INSTANCE_ID" -n "$CONFIG_NAMESPACE" || true
7589
exit 1
7690
}
@@ -85,10 +99,17 @@ destroy() {
8599
oc delete tenantnamespace "$INSTANCE_ID" -n "$CONFIG_NAMESPACE" \
86100
--ignore-not-found=true
87101

102+
LOCK_NAME="pr-test-slot-${INSTANCE_ID}"
103+
oc delete configmap "$LOCK_NAME" -n "$CONFIG_NAMESPACE" --ignore-not-found=true 2>/dev/null || true
104+
88105
echo "==> Waiting for namespace ${NAMESPACE} to be deleted (timeout: ${DELETE_TIMEOUT}s)..."
89106
DEADLINE=$((SECONDS + DELETE_TIMEOUT))
90107
while [ $SECONDS -lt $DEADLINE ]; do
91-
if ! oc get namespace "$NAMESPACE" &>/dev/null; then
108+
NS_CHECK=$(oc get namespace "$NAMESPACE" 2>&1 || true)
109+
if echo "$NS_CHECK" | grep -q '(NotFound)\|not found'; then
110+
echo " Namespace ${NAMESPACE} deleted."
111+
exit 0
112+
elif [ -z "$(oc get namespace "$NAMESPACE" -o name 2>/dev/null || true)" ]; then
92113
echo " Namespace ${NAMESPACE} deleted."
93114
exit 0
94115
fi

docs/internal/developer/local-development/openshift.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ REGISTRY_HOST=$(oc get route default-route -n openshift-image-registry \
4949
--template='{{ .spec.host }}')
5050
INTERNAL_REG="image-registry.openshift-image-registry.svc:5000/ambient-code"
5151

52-
for img in vteam_frontend vteam_backend vteam_operator vteam_public_api vteam_claude_runner vteam_api_server vteam_mcp vteam_control_plane; do
52+
for img in vteam_frontend vteam_backend vteam_operator vteam_public_api vteam_claude_runner vteam_state_sync vteam_api_server vteam_mcp vteam_control_plane; do
5353
podman tag localhost/${img}:latest ${REGISTRY_HOST}/ambient-code/${img}:latest
5454
podman push ${REGISTRY_HOST}/ambient-code/${img}:latest
5555
done

0 commit comments

Comments
 (0)