|
| 1 | +--- |
| 2 | +- name: Install nomad nvidia plugin |
| 3 | + any_errors_fatal: true |
| 4 | + hosts: "{{ hosts_limit | default('infrastructure') }}" |
| 5 | + gather_facts: true |
| 6 | + become: true |
| 7 | + vars: |
| 8 | + build_work_dir: /tmp |
| 9 | + upstream_file_url: https://github.com/hashicorp/nomad-device-nvidia.git |
| 10 | + nvidia_container_toolkit_version: "1.17.8-1" |
| 11 | + nvidia_gpg_key_url: "https://nvidia.github.io/libnvidia-container/gpgkey" |
| 12 | + nvidia_repo_list_url: "https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list" |
| 13 | + nvidia_keyring_path: "/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg" |
| 14 | + nvidia_list_path: "/etc/apt/sources.list.d/nvidia-container-toolkit.list" |
| 15 | + |
| 16 | + roles: |
| 17 | + - golang |
| 18 | + |
| 19 | + pre_tasks: |
| 20 | + |
| 21 | + - name: Créer le répertoire du keyring s'il n'existe pas |
| 22 | + ansible.builtin.file: |
| 23 | + path: "{{ nvidia_keyring_path | dirname }}" |
| 24 | + state: directory |
| 25 | + mode: "0755" |
| 26 | + |
| 27 | + - name: Télécharger et enregistrer la clé GPG NVIDIA |
| 28 | + ansible.builtin.get_url: |
| 29 | + url: "{{ nvidia_gpg_key_url }}" |
| 30 | + dest: /tmp/nvidia-container-toolkit.gpg |
| 31 | + mode: "0644" |
| 32 | + |
| 33 | + - name: Convertir la clé GPG en format keyring |
| 34 | + ansible.builtin.command: |
| 35 | + cmd: "gpg --dearmor -o {{ nvidia_keyring_path }} /tmp/nvidia-container-toolkit.gpg" |
| 36 | + creates: "{{ nvidia_keyring_path }}" |
| 37 | + |
| 38 | + - name: Télécharger le fichier de dépôt NVIDIA et ajouter le signed-by |
| 39 | + ansible.builtin.shell: | |
| 40 | + curl -s -L {{ nvidia_repo_list_url }} | \ |
| 41 | + sed 's#deb https://#deb [signed-by={{ nvidia_keyring_path }}] https://#g' > {{ nvidia_list_path }} |
| 42 | + args: |
| 43 | + creates: "{{ nvidia_list_path }}" |
| 44 | + |
| 45 | + - name: Activer la section experimental (décommenter) |
| 46 | + ansible.builtin.replace: |
| 47 | + path: "{{ nvidia_list_path }}" |
| 48 | + regexp: '^#(.*experimental.*)$' |
| 49 | + replace: '\1' |
| 50 | + |
| 51 | + - name: Mettre à jour la liste des paquets |
| 52 | + ansible.builtin.apt: |
| 53 | + update_cache: true |
| 54 | + |
| 55 | + - name: Installer les paquets NVIDIA Container Toolkit |
| 56 | + ansible.builtin.apt: |
| 57 | + name: |
| 58 | + - "nvidia-container-toolkit={{ nvidia_container_toolkit_version }}" |
| 59 | + - "nvidia-container-toolkit-base={{ nvidia_container_toolkit_version }}" |
| 60 | + - "libnvidia-container-tools={{ nvidia_container_toolkit_version }}" |
| 61 | + - "libnvidia-container1={{ nvidia_container_toolkit_version }}" |
| 62 | + state: present |
| 63 | + |
| 64 | + tasks: |
| 65 | + - name: Install dependencies |
| 66 | + ansible.builtin.apt: |
| 67 | + pkg: |
| 68 | + - nvidia-utils-580 |
| 69 | + - nvidia-driver-580 |
| 70 | + - nvidia-container-runtime |
| 71 | + - nomad-device-nvidia |
| 72 | + state: present |
| 73 | + install_recommends: true |
| 74 | + update_cache: true |
| 75 | + register: apt_status |
| 76 | + until: apt_status is success |
| 77 | + delay: 6 |
| 78 | + retries: 10 |
| 79 | + |
| 80 | + - name: Nomad-nvidia-plugin | Git checkout |
| 81 | + ansible.builtin.git: |
| 82 | + repo: https://github.com/hashicorp/nomad-device-nvidia.git |
| 83 | + dest: "{{ build_work_dir }}/nomad-device-nvidia" |
| 84 | + version: main |
| 85 | + force: true |
| 86 | + |
| 87 | + - name: Nomad-nvidia-plugin | Build binary |
| 88 | + ansible.builtin.command: |
| 89 | + cmd: make compile |
| 90 | + chdir: "{{ build_work_dir }}/nomad-device-nvidia" |
| 91 | + environment: |
| 92 | + PATH: "/usr/local/go/bin:{{ ansible_env.PATH }}" |
| 93 | + register: my_output |
| 94 | + changed_when: my_output.rc != 0 |
| 95 | + |
| 96 | + - name: Create nomad plugin directory |
| 97 | + ansible.builtin.file: |
| 98 | + path: /opt/nomad/plugins |
| 99 | + state: directory |
| 100 | + owner: root |
| 101 | + group: root |
| 102 | + mode: "0755" |
| 103 | + |
| 104 | + - name: Nomad-nvidia-plugin | Copy binary |
| 105 | + ansible.builtin.copy: |
| 106 | + src: /tmp/nomad-plugins/nomad-device-nvidia |
| 107 | + dest: /opt/nomad/plugins/nomad-device-nvidia |
| 108 | + owner: root |
| 109 | + group: root |
| 110 | + mode: '0755' |
| 111 | + remote_src: true |
| 112 | + |
| 113 | + |
| 114 | + - name: Copy using inline content |
| 115 | + ansible.builtin.copy: |
| 116 | + content: | |
| 117 | + plugin "nomad-device-nvidia" { |
| 118 | + config { |
| 119 | + enabled = true |
| 120 | + fingerprint_period = "5s" |
| 121 | + } |
| 122 | + } |
| 123 | + dest: /etc/nomad.d/nvidia.hcl |
| 124 | + owner: root |
| 125 | + group: root |
| 126 | + mode: '0644' |
| 127 | + |
| 128 | + # https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html |
| 129 | + - name: Nomad-nvidia-plugin | Test nvidia support |
| 130 | + ansible.builtin.command: nvidia-ctk runtime configure --runtime=docker |
| 131 | + |
| 132 | + - name: Nomad-nvidia-plugin | Restart docker |
| 133 | + ansible.builtin.command: systemctl restart docker |
| 134 | + |
| 135 | + # https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/sample-workload.html |
| 136 | + - name: Nomad-nvidia-plugin | Test nvidia support |
| 137 | + ansible.builtin.command: docker run --rm --runtime=nvidia --gpus all ubuntu nvidia-smi |
| 138 | + register: docker_run |
| 139 | + |
| 140 | + - name: Nomad-nvidia-plugin | Debug |
| 141 | + ansible.builtin.debug: |
| 142 | + msg: "{{ docker_run }}" |
0 commit comments