summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSuren A. Chilingaryan <csa@suren.me>2019-10-06 04:37:01 +0200
committerSuren A. Chilingaryan <csa@suren.me>2019-10-06 04:37:01 +0200
commitb17d3d74eb5a9e7640d94f98f6b27ce4891b3c26 (patch)
tree35b03ed3f6349de7db2482761ff6295aee5ae813
parent1c830e285e19f2d571cf50ef912f01f0f7d68e10 (diff)
downloaditm-b17d3d74eb5a9e7640d94f98f6b27ce4891b3c26.tar.gz
itm-b17d3d74eb5a9e7640d94f98f6b27ce4891b3c26.tar.bz2
itm-b17d3d74eb5a9e7640d94f98f6b27ce4891b3c26.tar.xz
itm-b17d3d74eb5a9e7640d94f98f6b27ce4891b3c26.zip
Integration of CentOS8 and ipecompute nodes
-rw-r--r--docker.yml6
-rw-r--r--install.yml20
-rw-r--r--inventories/ipe.erb17
-rw-r--r--rocm.yml7
-rw-r--r--roles/common/tasks/main.yml13
-rw-r--r--roles/common/tasks/main_dnf.yml15
-rw-r--r--roles/common/tasks/main_yum.yml24
-rw-r--r--roles/common/tasks/software.yml17
-rw-r--r--roles/cuda/vars/centos-8.yml4
-rw-r--r--roles/cuda/vars/redhat-8.yml4
-rw-r--r--roles/docker/defaults/main.yml8
-rw-r--r--roles/docker/handlers/main.yml4
-rw-r--r--roles/docker/tasks/configure_apt.yml15
-rw-r--r--roles/docker/tasks/configure_dnf.yml18
-rw-r--r--roles/docker/tasks/configure_yum.yml38
-rw-r--r--roles/docker/tasks/install_docker.yml14
-rw-r--r--roles/docker/tasks/install_podman.yml12
-rw-r--r--roles/docker/tasks/main.yml24
-rw-r--r--roles/docker/vars/centos-7.yml3
-rw-r--r--roles/docker/vars/centos-8.yml11
-rw-r--r--roles/rocm/tasks/main.yml20
-rw-r--r--roles/storage/defaults/main.yml1
-rw-r--r--roles/storage/tasks/ipecompute2.yml17
-rw-r--r--roles/storage/tasks/ipecompute4.yml35
-rw-r--r--roles/storage/tasks/main.yml20
-rw-r--r--roles/storage/tasks/nfs.yml12
26 files changed, 348 insertions, 31 deletions
diff --git a/docker.yml b/docker.yml
new file mode 100644
index 0000000..ea91aed
--- /dev/null
+++ b/docker.yml
@@ -0,0 +1,6 @@
+- name: Docker
+ hosts: all
+ remote_user: root
+ roles:
+ - role: docker
+
diff --git a/install.yml b/install.yml
index 278dac9..f1acdd8 100644
--- a/install.yml
+++ b/install.yml
@@ -15,9 +15,29 @@
remote_user: root
roles:
- role: cuda
+
+# The AMD driver is "surprisingly" crashing
+#- name: ROCM
+# hosts: rcom
+# remote_user: root
+# roles:
+# - role: rocm
+
+
+- name: Docker
+ hosts: docker
+ remote_user: root
+ roles:
+ - role: docker
- name: Desktop
hosts: desktop
remote_user: root
roles:
- role: desktop
+
+- name: Additional Local and Network Storage
+ hosts: infra
+ remote_user: root
+ roles:
+ - role: storage
diff --git a/inventories/ipe.erb b/inventories/ipe.erb
index df62890..20edf72 100644
--- a/inventories/ipe.erb
+++ b/inventories/ipe.erb
@@ -1,6 +1,9 @@
[ands]
192.168.26.[140:149]
+[compute]
+192.168.26.[130:139]
+
[camera]
192.168.26.[80:89]
@@ -13,7 +16,21 @@ student
[cuda]
192.168.26.[80:84]
192.168.26.[86:89]
+192.168.26.[131:133]
+
+[rocm]
+192.168.26.134
+
+[docker]
+192.168.26.[131:139]
[ib]
192.168.26.[60:69]
192.168.26.[80:89]
+192.168.26.[130:139]
+192.168.26.[140:149]
+
+[infra]
+192.168.26.[80:89]
+192.168.26.[130:139]
+192.168.26.[140:149]
diff --git a/rocm.yml b/rocm.yml
new file mode 100644
index 0000000..c76c068
--- /dev/null
+++ b/rocm.yml
@@ -0,0 +1,7 @@
+- name: Common Software
+ hosts: all
+ remote_user: root
+ roles:
+ - role: rocm
+
+
diff --git a/roles/common/tasks/main.yml b/roles/common/tasks/main.yml
index 286a027..9f3cf79 100644
--- a/roles/common/tasks/main.yml
+++ b/roles/common/tasks/main.yml
@@ -5,14 +5,11 @@
- epel-release
when: ansible_distribution == 'CentOS' or ansible_distribution == 'Red Hat Enterprise Linux'
-- name: Add our repository with updates and overrides
- yum_repository: name="{{ item.name }}" description= "{{ item.description | default('Ands repository') }}" baseurl="{{ item.url }}" enabled="yes" gpgcheck="no" cost="{{ item.cost | default(1) }}"
- with_items: "{{ ands_repositories | default([]) }}"
-
-# We always update on first install and if requested
-- name: Update CentOS
- yum: name=* state=latest update_cache=yes
- when: (result | changed) or (os_update | default(false))
+- include_tasks: main_yum.yml
+ when: ansible_pkg_mgr == 'yum'
+
+- include_tasks: main_dnf.yml
+ when: ansible_pkg_mgr == 'dnf'
- name: Install additional software
include_tasks: software.yml
diff --git a/roles/common/tasks/main_dnf.yml b/roles/common/tasks/main_dnf.yml
new file mode 100644
index 0000000..0572132
--- /dev/null
+++ b/roles/common/tasks/main_dnf.yml
@@ -0,0 +1,15 @@
+- name: Add our repository with updates and overrides
+ yum_repository: name="{{ item.name }}" description= "{{ item.description | default('Ands repository') }}" baseurl="{{ item.url }}" enabled="yes" gpgcheck="no" cost="{{ item.cost | default(1) }}"
+ with_items: "{{ ands_repositories | default([]) }}"
+
+# We always update on first install and if requested
+- name: Update CentOS
+ dnf: name=* state=latest
+ when: (result | changed) or (os_update | default(false))
+
+- name: Install various ansible requirements
+ package: name={{item}} state=present
+ with_items:
+ - yum-plugin-versionlock
+ - python-rhsm-certificates
+# - iptables-services
diff --git a/roles/common/tasks/main_yum.yml b/roles/common/tasks/main_yum.yml
new file mode 100644
index 0000000..2b320d5
--- /dev/null
+++ b/roles/common/tasks/main_yum.yml
@@ -0,0 +1,24 @@
+- name: Add our repository with updates and overrides
+ yum_repository: name="{{ item.name }}" description= "{{ item.description | default('Ands repository') }}" baseurl="{{ item.url }}" enabled="yes" gpgcheck="no" cost="{{ item.cost | default(1) }}"
+ with_items: "{{ ands_repositories | default([]) }}"
+
+# We always update on first install and if requested
+- name: Update CentOS
+ yum: name=* state=latest update_cache=yes
+ when: (result | changed) or (os_update | default(false))
+
+- name: Install various ansible requirements
+ package: name={{item}} state=present
+ with_items:
+ - yum-plugin-versionlock
+ - libselinux-python
+ - libsemanage-python
+ - yamllint
+ - pyOpenSSL
+ - python-passlib
+ - python2-ruamel-yaml
+ - python2-jmespath
+ - python-ipaddress
+ - iptables-services
+ - PyYAML
+ - python-rhsm-certificates
diff --git a/roles/common/tasks/software.yml b/roles/common/tasks/software.yml
index c621ef3..3a1a5c1 100644
--- a/roles/common/tasks/software.yml
+++ b/roles/common/tasks/software.yml
@@ -1,19 +1,3 @@
-- name: Install various ansible requirements
- package: name={{item}} state=present
- with_items:
- - yum-plugin-versionlock
- - libselinux-python
- - libsemanage-python
- - yamllint
- - pyOpenSSL
- - python-passlib
- - python2-ruamel-yaml
- - python2-jmespath
- - python-ipaddress
- - iptables-services
- - PyYAML
- - python-rhsm-certificates
-
- name: Install various administrative tools
package: name={{item}} state=present
with_items:
@@ -21,7 +5,6 @@
- telnet
- lsof
- strace
- - bzr
- git
- pciutils
diff --git a/roles/cuda/vars/centos-8.yml b/roles/cuda/vars/centos-8.yml
new file mode 100644
index 0000000..935e84d
--- /dev/null
+++ b/roles/cuda/vars/centos-8.yml
@@ -0,0 +1,4 @@
+---
+cuda_repo_subfolder: rhel8
+
+# vim:ft=ansible: \ No newline at end of file
diff --git a/roles/cuda/vars/redhat-8.yml b/roles/cuda/vars/redhat-8.yml
new file mode 100644
index 0000000..935e84d
--- /dev/null
+++ b/roles/cuda/vars/redhat-8.yml
@@ -0,0 +1,4 @@
+---
+cuda_repo_subfolder: rhel8
+
+# vim:ft=ansible: \ No newline at end of file
diff --git a/roles/docker/defaults/main.yml b/roles/docker/defaults/main.yml
new file mode 100644
index 0000000..a5bcb04
--- /dev/null
+++ b/roles/docker/defaults/main.yml
@@ -0,0 +1,8 @@
+---
+docker_repo_url: "https://download.docker.com/linux"
+nvidia_docker_repo_url: "https://nvidia.github.io"
+
+nvidia_repos:
+ - libnvidia-container
+ - nvidia-container-runtime
+ - nvidia-docker
diff --git a/roles/docker/handlers/main.yml b/roles/docker/handlers/main.yml
new file mode 100644
index 0000000..3eb0349
--- /dev/null
+++ b/roles/docker/handlers/main.yml
@@ -0,0 +1,4 @@
+---
+- name: restart docker
+ systemd: name="docker" daemon_reload="yes" state="restarted"
+ become: yes
diff --git a/roles/docker/tasks/configure_apt.yml b/roles/docker/tasks/configure_apt.yml
new file mode 100644
index 0000000..3fd961a
--- /dev/null
+++ b/roles/docker/tasks/configure_apt.yml
@@ -0,0 +1,15 @@
+---
+# tasks file for ansible-role-cuda
+- name: Trust packaging key for Nvidia repositories (apt)
+ apt_key:
+ data: "{{ lookup('file', 'files/nvidia_docker_packaging_key.asc') }}"
+ id: "{{ nvidia_docker_packaging_key_id }}"
+ state: present
+
+- name: Configure Nvidia repository (apt)
+ apt_repository:
+ repo: "deb {{ nvidia_docker_repo_url }}/{{ nvidia_docker_repo_subfolder }} /"
+ filename: nvidia_docker
+ state: present
+
+# vim:ft=ansible:
diff --git a/roles/docker/tasks/configure_dnf.yml b/roles/docker/tasks/configure_dnf.yml
new file mode 100644
index 0000000..73ecb30
--- /dev/null
+++ b/roles/docker/tasks/configure_dnf.yml
@@ -0,0 +1,18 @@
+---
+- name: Import NVIDIA Docker repository gpg keys
+ rpm_key:
+ key: "{{ nvidia_docker_repo_url }}/{{ item }}/gpgkey"
+ state: present
+ with_items: "{{ nvidia_repos }}"
+ when: "'cuda' in group_names"
+
+- name: Configure Nvidia repositories (yum)
+ yum_repository:
+ name: "{{ item }}"
+ description: Official {{ item }} repository
+ baseurl: "{{ nvidia_docker_repo_url }}/{{ item }}/{{ nvidia_docker_repo_subfolder }}/x86_64/"
+ gpgkey: "{{ nvidia_docker_repo_url }}/{{ item }}/gpgkey"
+ gpgcheck: no
+ enabled: yes
+ with_items: "{{ nvidia_repos }}"
+ when: "'cuda' in group_names"
diff --git a/roles/docker/tasks/configure_yum.yml b/roles/docker/tasks/configure_yum.yml
new file mode 100644
index 0000000..99a2743
--- /dev/null
+++ b/roles/docker/tasks/configure_yum.yml
@@ -0,0 +1,38 @@
+---
+- name: Upload packaging key for docker repositories
+ copy:
+ src: docker_packaging_key.asc
+ dest: "{{ docker_rpm_key_path }}"
+ mode: 0644
+
+- name: Import Docker CE repository gpg key
+ rpm_key:
+ key: https://download.docker.com/linux/centos/gpg
+ state: present
+
+- name: Import NVIDIA Docker repository gpg keys
+ rpm_key:
+ key: "{{ nvidia_docker_repo_url }}/{{ item }}/gpgkey"
+ state: present
+ with_items: "{{ nvidia_repos }}"
+
+- name: Configure docker repositories (yum)
+ yum_repository:
+ name: "docker-ce-{{ item }}"
+ description: Official docker-ce repository
+ baseurl: "{{ docker_repo_url }}/{{ docker_repo_subfolder }}/x86_64/{{ item }}"
+ gpgkey: https://download.docker.com/linux/centos/gpg
+ gpgcheck: yes
+ enabled: yes
+ with_items:
+ - stable
+
+- name: Configure Nvidia repositories (yum)
+ yum_repository:
+ name: "{{ item }}"
+ description: Official {{ item }} repository
+ baseurl: "{{ nvidia_docker_repo_url }}/{{ item }}/{{ nvidia_docker_repo_subfolder }}/x86_64/"
+ gpgkey: "{{ nvidia_docker_repo_url }}/{{ item }}/gpgkey"
+ gpgcheck: no
+ enabled: yes
+ with_items: "{{ nvidia_repos }}"
diff --git a/roles/docker/tasks/install_docker.yml b/roles/docker/tasks/install_docker.yml
new file mode 100644
index 0000000..9ae0cb9
--- /dev/null
+++ b/roles/docker/tasks/install_docker.yml
@@ -0,0 +1,14 @@
+- name: Install requirements
+ package: name="{{ item }}" state=present
+ with_items:
+ - lvm2
+
+- name: Install nvidia docker
+ package: name="nvidia-docker2" state=present
+ notify:
+ - restart docker
+
+- name: Install additional packages
+ package: name="{{ item }}" state=present
+ with_items:
+ - docker-compose
diff --git a/roles/docker/tasks/install_podman.yml b/roles/docker/tasks/install_podman.yml
new file mode 100644
index 0000000..3498aa7
--- /dev/null
+++ b/roles/docker/tasks/install_podman.yml
@@ -0,0 +1,12 @@
+- name: Install docker packages
+ package: name="{{ item }}" state=present
+ with_items:
+ - podman
+ - buildah
+ - skopeo
+
+- name: Install NVIDIA packages
+ package: name="{{ item }}" state=present
+ with_items:
+ - nvidia-container-runtime
+ when: "'cuda' in group_names"
diff --git a/roles/docker/tasks/main.yml b/roles/docker/tasks/main.yml
new file mode 100644
index 0000000..f13f99f
--- /dev/null
+++ b/roles/docker/tasks/main.yml
@@ -0,0 +1,24 @@
+---
+- name: "Gather OS specific variables"
+ include_vars: "{{ item }}"
+ with_first_found:
+ - "{{ ansible_distribution|lower }}-{{ ansible_distribution_version }}.yml"
+ - "{{ ansible_distribution|lower }}-{{ ansible_distribution_major_version }}.yml"
+ - "{{ ansible_distribution|lower }}.yml"
+ - "{{ ansible_os_family|lower }}.yml"
+
+- include_tasks: configure_yum.yml
+ when: ansible_pkg_mgr == 'yum'
+
+- include_tasks: configure_dnf.yml
+ when: ansible_pkg_mgr == 'dnf'
+
+- include_tasks: configure_apt.yml
+ when: ansible_pkg_mgr == 'apt'
+
+- include_tasks: install_docker.yml
+ when: ansible_pkg_mgr == 'yum' or ansible_pkg_mgr == 'apt'
+
+- include_tasks: install_podman.yml
+ when: ansible_pkg_mgr == 'dnf'
+
diff --git a/roles/docker/vars/centos-7.yml b/roles/docker/vars/centos-7.yml
new file mode 100644
index 0000000..e681468
--- /dev/null
+++ b/roles/docker/vars/centos-7.yml
@@ -0,0 +1,3 @@
+---
+nvidia_docker_repo_subfolder: centos7
+docker_repo_subfolder: centos/7
diff --git a/roles/docker/vars/centos-8.yml b/roles/docker/vars/centos-8.yml
new file mode 100644
index 0000000..d4d24fe
--- /dev/null
+++ b/roles/docker/vars/centos-8.yml
@@ -0,0 +1,11 @@
+---
+# While we have RHEL8 repo, in fact it references centos7 packages
+# https://nvidia.github.io/nvidia-docker/rhel8.0/nvidia-docker.repo
+# nvidia_docker_repo_subfolder: rhel8.0
+nvidia_docker_repo_subfolder: centos7
+#docker_repo_subfolder: centos/8
+
+
+nvidia_repos:
+ - libnvidia-container
+ - nvidia-container-runtime
diff --git a/roles/rocm/tasks/main.yml b/roles/rocm/tasks/main.yml
new file mode 100644
index 0000000..4ae1a87
--- /dev/null
+++ b/roles/rocm/tasks/main.yml
@@ -0,0 +1,20 @@
+- name: Configure DarkSoft repositories (for packages mangling provides/requires to suit ROCm)
+ yum_repository:
+ name: "ands_centos8"
+ description: Various packages for CentOS8
+ baseurl: "http://ufo.kit.edu/ands/repos/centos8/centos8/"
+ gpgcheck: no
+ enabled: yes
+
+- name: Configure ROCm repositories (yum)
+ yum_repository:
+ name: "rocm"
+ description: AMD ROCm Drivers and Infrastructure
+ baseurl: "http://repo.radeon.com/rocm/yum/rpm/"
+ gpgcheck: no
+ enabled: yes
+
+- name: Install ROCm drivers and packages
+ package: name="{{ item }}" state=present
+ with_items:
+ - rocm-dkms
diff --git a/roles/storage/defaults/main.yml b/roles/storage/defaults/main.yml
new file mode 100644
index 0000000..ca36e70
--- /dev/null
+++ b/roles/storage/defaults/main.yml
@@ -0,0 +1 @@
+compute4_ssds: ['sda','sdb','sdc','sdd','sde','sdf','sdg','sdh']
diff --git a/roles/storage/tasks/ipecompute2.yml b/roles/storage/tasks/ipecompute2.yml
new file mode 100644
index 0000000..9b2cef8
--- /dev/null
+++ b/roles/storage/tasks/ipecompute2.yml
@@ -0,0 +1,17 @@
+- name: Delete partitions
+ parted: device="/dev/sda" label="gpt" number="{{ item }}" state="absent"
+ with_items: [ 2, 3, 4 ]
+
+- name: Create partition
+ parted:
+ device: "/dev/sda"
+ label: "gpt"
+ number: 1
+ name: "fast"
+ state: "present"
+
+- name: arrays | Creating Array(s) Filesystem
+ filesystem: dev="/dev/sda1" fstype="xfs"
+
+- name: arrays | Mounting Array(s)
+ mount: name="/mnt/fast" src="/dev/sda1" fstype="xfs" state="mounted"
diff --git a/roles/storage/tasks/ipecompute4.yml b/roles/storage/tasks/ipecompute4.yml
new file mode 100644
index 0000000..5b3a88f
--- /dev/null
+++ b/roles/storage/tasks/ipecompute4.yml
@@ -0,0 +1,35 @@
+---
+#- name: Delete partitions
+# parted: device="/dev/{{ item[0] }}" label="gpt" number="{{ item[1] }}" state="absent"
+# with_nested:
+# - "{{ compute4_ssds }}"
+# - [ 2, 3, 4 ]
+
+- name: Create partition
+ parted:
+ device: "/dev/{{ item }}"
+ label: "gpt"
+ number: 1
+ name: "softraid"
+ flags: [raid]
+ state: "present"
+ failed_when: false
+ with_items: "{{ compute4_ssds }}"
+
+- name: arrays | Checking Status Of Array(s)
+ shell: "cat /proc/mdstat | grep md10"
+ register: "array_check"
+ changed_when: false
+ failed_when: false
+ check_mode: no
+
+- name: arrays | Creating Array(s)
+ shell: "yes | mdadm --create /dev/md10 --level=0 --raid-devices={{ compute4_ssds | count }} {{ compute4_ssds | map('regex_replace', '(.*)', '/dev/\\1') | join ('1 ') }}1"
+ register: "array_created"
+ when: array_check.rc != 0
+
+- name: arrays | Creating Array(s) Filesystem
+ filesystem: dev="/dev/md10" fstype="xfs"
+
+- name: arrays | Mounting Array(s)
+ mount: name="/mnt/fast" src="/dev/md10" fstype="xfs" state="mounted"
diff --git a/roles/storage/tasks/main.yml b/roles/storage/tasks/main.yml
index 871e785..014e396 100644
--- a/roles/storage/tasks/main.yml
+++ b/roles/storage/tasks/main.yml
@@ -1,9 +1,17 @@
---
-- name: Ensure NFS common is installed.
- package: name=nfs-utils state=present
+- name: Ensure required software is installed.
+ package: name="{{ item }}" state=present
+ with_items: [ 'parted', 'mdadm', 'nfs-utils' ]
-- name: Create mountable dir
- file: path=/mnt/ands state=directory mode=755 owner=root group=root
+- debug: msg="{{ inventory_hostname }}"
-- name: set mountpoints
- mount: name=/mnt/ands src=192.168.26.140:/mnt/ands fstype=nfs4 opts=defaults,minorversion=1,_netdev,nofail,soft,nodiratime,noatime dump=0 passno=0 state=mounted
+- name: configure network fs
+ include_tasks: nfs.yml
+
+- name: configure ipepdvcompute2
+ include_tasks: ipecompute2.yml
+ when: inventory_hostname == '192.168.26.132'
+
+- name: configure ipepdvcompute4
+ include_tasks: ipecompute4.yml
+ when: inventory_hostname == '192.168.26.134'
diff --git a/roles/storage/tasks/nfs.yml b/roles/storage/tasks/nfs.yml
new file mode 100644
index 0000000..9dbd467
--- /dev/null
+++ b/roles/storage/tasks/nfs.yml
@@ -0,0 +1,12 @@
+---
+- name: Create mountable dir
+ file: path=/mnt/ands state=directory mode=755 owner=root group=root
+
+- name: Create mountable dir
+ file: path=/mnt/pdv state=directory mode=755 owner=root group=root
+
+- name: set mountpoints
+ mount: name=/mnt/ands src=192.168.26.140:/mnt/ands fstype=nfs4 opts=defaults,minorversion=1,_netdev,nofail,soft,nodiratime,noatime dump=0 passno=0 state=absent
+
+- name: set mountpoints
+ mount: name=/mnt/pdv src=192.168.26.170:/pdv fstype=nfs opts=defaults,_netdev,nofail,soft,nodiratime,noatime dump=0 passno=0 state=mounted