101 lines
2.6 KiB
YAML
101 lines
2.6 KiB
YAML
|
- name: Bootstrap debian-nvidia-cuda
|
||
|
hosts: graphics_devices
|
||
|
become: yes
|
||
|
vars_files:
|
||
|
# Secrets
|
||
|
- ../secrets/gluttonycluster-credentials.yaml
|
||
|
|
||
|
tasks:
|
||
|
- name: Update APT package index
|
||
|
apt:
|
||
|
update_cache: yes
|
||
|
|
||
|
- name: Install prerequisites
|
||
|
apt:
|
||
|
name: "{{ packages }}"
|
||
|
vars:
|
||
|
packages:
|
||
|
- build-essential
|
||
|
- dkms
|
||
|
- curl
|
||
|
- gnupg2
|
||
|
- ca-certificates
|
||
|
- software-properties-common
|
||
|
|
||
|
- name: Add NVIDIA PPA repository
|
||
|
apt_repository:
|
||
|
repo: "ppa:graphics-drivers/ppa"
|
||
|
state: present
|
||
|
|
||
|
- name: Add NVIDIA container runtime GPG key
|
||
|
apt_key:
|
||
|
url: https://nvidia.github.io/nvidia-docker/gpgkey
|
||
|
state: present
|
||
|
|
||
|
- name: Add NVIDIA container runtime repository
|
||
|
apt_repository:
|
||
|
repo: "deb https://nvidia.github.io/libnvidia-container/stable/ubuntu18.04/$(ARCH) /"
|
||
|
filename: "nvidia-container-runtime"
|
||
|
state: present
|
||
|
|
||
|
- name: Update APT package index after adding PPA
|
||
|
apt:
|
||
|
update_cache: yes
|
||
|
|
||
|
- name: Install the latest NVIDIA driver
|
||
|
apt:
|
||
|
name: "{{ nvidia_driver }}"
|
||
|
state: present
|
||
|
vars:
|
||
|
nvidia_driver: "nvidia-driver-535" # Replace with the latest driver version if needed
|
||
|
|
||
|
- name: Install CUDA toolkit (optional)
|
||
|
apt:
|
||
|
name: "{{ cuda_packages }}"
|
||
|
state: present
|
||
|
vars:
|
||
|
cuda_packages:
|
||
|
- nvidia-cuda-toolkit
|
||
|
|
||
|
- name: Install NVIDIA container runtime
|
||
|
apt:
|
||
|
name: "{{ nvidia_container_packages }}"
|
||
|
state: present
|
||
|
vars:
|
||
|
nvidia_container_packages:
|
||
|
- nvidia-container-toolkit
|
||
|
- nvidia-container-runtime
|
||
|
|
||
|
- name: Configure Docker to use the NVIDIA runtime
|
||
|
lineinfile:
|
||
|
path: /etc/docker/daemon.json
|
||
|
create: yes
|
||
|
line: '"default-runtime": "nvidia"'
|
||
|
insertafter: '"runtimes": {'
|
||
|
state: present
|
||
|
notify: Restart Docker
|
||
|
|
||
|
- name: Reboot the server to apply changes
|
||
|
reboot:
|
||
|
msg: "Rebooting to apply NVIDIA driver installation"
|
||
|
|
||
|
- name: Verify NVIDIA driver installation
|
||
|
shell: nvidia-smi
|
||
|
register: nvidia_smi_output
|
||
|
ignore_errors: yes
|
||
|
|
||
|
- name: Display NVIDIA driver installation result
|
||
|
debug:
|
||
|
var: nvidia_smi_output.stdout
|
||
|
|
||
|
- name: Fail if NVIDIA driver is not installed correctly
|
||
|
fail:
|
||
|
msg: "NVIDIA driver installation failed. Please check the output."
|
||
|
when: "'NVIDIA-SMI' not in nvidia_smi_output.stdout"
|
||
|
|
||
|
handlers:
|
||
|
- name: Restart Docker
|
||
|
service:
|
||
|
name: docker
|
||
|
state: restarted
|