-
Notifications
You must be signed in to change notification settings - Fork 0
39 lines (32 loc) · 1.19 KB
/
test.yml
File metadata and controls
39 lines (32 loc) · 1.19 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
name: Step 4 - Test GPU Operator Installation
on:
workflow_dispatch:
env:
RESOURCE_GROUP: rg-pvt-aks-h100
CLUSTER_NAME: pvt-aks-h100
jobs:
test-gpu-operator:
runs-on: ubuntu-latest
permissions:
id-token: write
contents: read
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Log in to Azure with federated identity (User Assigned Managed Identity)
uses: azure/login@v2
with:
client-id: ${{ secrets.AZURE_CLIENT_ID }}
tenant-id: ${{ secrets.AZURE_TENANT_ID }}
subscription-id: ${{ secrets.AZURE_SUBSCRIPTION_ID }}
- name: Apply pod-check-nvidia-smi.yml and check logs
run: |
az aks command invoke \
--resource-group $RESOURCE_GROUP \
--name $CLUSTER_NAME \
--command "kubectl apply -f pod-check-nvidia-smi.yml -n default && kubectl wait --for=condition=Succeeded pod/nvidia-gpu-test -n default --timeout=120s" \
--file pod-check-nvidia-smi.yml
az aks command invoke \
--resource-group $RESOURCE_GROUP \
--name $CLUSTER_NAME \
--command "kubectl logs nvidia-gpu-test -n default"