diff options
Diffstat (limited to 'playbooks/common/openshift-cluster/upgrades/etcd')
10 files changed, 325 insertions, 0 deletions
diff --git a/playbooks/common/openshift-cluster/upgrades/etcd/backup.yml b/playbooks/common/openshift-cluster/upgrades/etcd/backup.yml new file mode 100644 index 000000000..d0eadf1fc --- /dev/null +++ b/playbooks/common/openshift-cluster/upgrades/etcd/backup.yml @@ -0,0 +1,94 @@ +--- +- name: Backup etcd + hosts: etcd_hosts_to_backup + vars: + embedded_etcd: "{{ groups.oo_etcd_to_config | default([]) | length == 0 }}" + timestamp: "{{ lookup('pipe', 'date +%Y%m%d%H%M%S') }}" + etcdctl_command: "{{ 'etcdctl' if not openshift.common.is_containerized or embedded_etcd else 'docker exec etcd_container etcdctl' }}" + roles: + - openshift_facts + tasks: + # Ensure we persist the etcd role for this host in openshift_facts + - openshift_facts: + role: etcd + local_facts: {} + when: "'etcd' not in openshift" + + - stat: path=/var/lib/openshift + register: var_lib_openshift + + - stat: path=/var/lib/origin + register: var_lib_origin + + - name: Create origin symlink if necessary + file: src=/var/lib/openshift/ dest=/var/lib/origin state=link + when: var_lib_openshift.stat.exists == True and var_lib_origin.stat.exists == False + + # TODO: replace shell module with command and update later checks + # We assume to be using the data dir for all backups. + - name: Check available disk space for etcd backup + shell: df --output=avail -k {{ openshift.common.data_dir }} | tail -n 1 + register: avail_disk + + # TODO: replace shell module with command and update later checks + - name: Check current embedded etcd disk usage + shell: du -k {{ openshift.etcd.etcd_data_dir }} | tail -n 1 | cut -f1 + register: etcd_disk_usage + when: embedded_etcd | bool + + - name: Abort if insufficient disk space for etcd backup + fail: + msg: > + {{ etcd_disk_usage.stdout }} Kb disk space required for etcd backup, + {{ avail_disk.stdout }} Kb available. + when: (embedded_etcd | bool) and (etcd_disk_usage.stdout|int > avail_disk.stdout|int) + + # For non containerized and non embedded we should have the correct version of + # etcd installed already. So don't do anything. + # + # For embedded or containerized we need to use the latest because OCP 3.3 uses + # a version of etcd that can only be backed up with etcd-3.x and if it's + # containerized then etcd version may be newer than that on the host so + # upgrade it. + # + # On atomic we have neither yum nor dnf so ansible throws a hard to debug error + # if you use package there, like this: "Could not find a module for unknown." + # see https://bugzilla.redhat.com/show_bug.cgi?id=1408668 + # + # TODO - We should refactor all containerized backups to use the containerized + # version of etcd to perform the backup rather than relying on the host's + # binaries. Until we do that we'll continue to have problems backing up etcd + # when atomic host has an older version than the version that's running in the + # container whether that's embedded or not + - name: Install latest etcd for containerized or embedded + package: + name: etcd + state: latest + when: ( embedded_etcd | bool or openshift.common.is_containerized ) and not openshift.common.is_atomic + + - name: Generate etcd backup + command: > + {{ etcdctl_command }} backup --data-dir={{ openshift.etcd.etcd_data_dir }} + --backup-dir={{ openshift.common.data_dir }}/etcd-backup-{{ backup_tag | default('') }}{{ timestamp }} + + - set_fact: + etcd_backup_complete: True + + - name: Display location of etcd backup + debug: + msg: "Etcd backup created in {{ openshift.common.data_dir }}/etcd-backup-{{ backup_tag | default('') }}{{ timestamp }}" + +- name: Gate on etcd backup + hosts: localhost + connection: local + become: no + tasks: + - set_fact: + etcd_backup_completed: "{{ hostvars + | oo_select_keys(groups.etcd_hosts_to_backup) + | oo_collect('inventory_hostname', {'etcd_backup_complete': true}) }}" + - set_fact: + etcd_backup_failed: "{{ groups.etcd_hosts_to_backup | difference(etcd_backup_completed) }}" + - fail: + msg: "Upgrade cannot continue. The following hosts did not complete etcd backup: {{ etcd_backup_failed | join(',') }}" + when: etcd_backup_failed | length > 0 diff --git a/playbooks/common/openshift-cluster/upgrades/etcd/containerized_tasks.yml b/playbooks/common/openshift-cluster/upgrades/etcd/containerized_tasks.yml new file mode 100644 index 000000000..5f8b59e17 --- /dev/null +++ b/playbooks/common/openshift-cluster/upgrades/etcd/containerized_tasks.yml @@ -0,0 +1,46 @@ +--- +- name: Verify cluster is healthy pre-upgrade + command: "etcdctl --cert-file /etc/etcd/peer.crt --key-file /etc/etcd/peer.key --ca-file /etc/etcd/ca.crt -C https://{{ openshift.common.hostname }}:2379 cluster-health" + +- name: Get current image + shell: grep 'ExecStart=' /etc/systemd/system/etcd_container.service | awk '{print $NF}' + register: current_image + +- name: Set new_etcd_image + set_fact: + new_etcd_image: "{{ current_image.stdout | regex_replace('/etcd.*$','/etcd:' ~ upgrade_version ) }}" + +- name: Pull new etcd image + command: "docker pull {{ new_etcd_image }}" + +- name: Update to latest etcd image + replace: + dest: /etc/systemd/system/etcd_container.service + regexp: "{{ current_image.stdout }}$" + replace: "{{ new_etcd_image }}" + +- name: Restart etcd_container + systemd: + name: etcd_container + daemon_reload: yes + state: restarted + +## TODO: probably should just move this into the backup playbooks, also this +## will fail on atomic host. We need to revisit how to do etcd backups there as +## the container may be newer than etcdctl on the host. Assumes etcd3 obsoletes etcd (7.3.1) +- name: Upgrade etcd for etcdctl when not atomic + package: name=etcd state=latest + when: not openshift.common.is_atomic | bool + +- name: Verify cluster is healthy + command: "etcdctl --cert-file /etc/etcd/peer.crt --key-file /etc/etcd/peer.key --ca-file /etc/etcd/ca.crt -C https://{{ openshift.common.hostname }}:2379 cluster-health" + register: etcdctl + until: etcdctl.rc == 0 + retries: 3 + delay: 10 + +- name: Store new etcd_image + openshift_facts: + role: etcd + local_facts: + etcd_image: "{{ new_etcd_image }}" diff --git a/playbooks/common/openshift-cluster/upgrades/etcd/fedora_tasks.yml b/playbooks/common/openshift-cluster/upgrades/etcd/fedora_tasks.yml new file mode 100644 index 000000000..30232110e --- /dev/null +++ b/playbooks/common/openshift-cluster/upgrades/etcd/fedora_tasks.yml @@ -0,0 +1,23 @@ +--- +# F23 GA'd with etcd 2.0, currently has 2.2 in updates +# F24 GA'd with etcd-2.2, currently has 2.2 in updates +# F25 Beta currently has etcd 3.0 +- name: Verify cluster is healthy pre-upgrade + command: "etcdctl --cert-file /etc/etcd/peer.crt --key-file /etc/etcd/peer.key --ca-file /etc/etcd/ca.crt -C https://{{ openshift.common.hostname }}:2379 cluster-health" + +- name: Update etcd + package: + name: "etcd" + state: "latest" + +- name: Restart etcd + service: + name: etcd + state: restarted + +- name: Verify cluster is healthy + command: "etcdctl --cert-file /etc/etcd/peer.crt --key-file /etc/etcd/peer.key --ca-file /etc/etcd/ca.crt -C https://{{ openshift.common.hostname }}:2379 cluster-health" + register: etcdctl + until: etcdctl.rc == 0 + retries: 3 + delay: 10 diff --git a/playbooks/common/openshift-cluster/upgrades/etcd/files/etcdctl.sh b/playbooks/common/openshift-cluster/upgrades/etcd/files/etcdctl.sh new file mode 120000 index 000000000..641e04e44 --- /dev/null +++ b/playbooks/common/openshift-cluster/upgrades/etcd/files/etcdctl.sh @@ -0,0 +1 @@ +../roles/etcd/files/etcdctl.sh
\ No newline at end of file diff --git a/playbooks/common/openshift-cluster/upgrades/etcd/filter_plugins b/playbooks/common/openshift-cluster/upgrades/etcd/filter_plugins new file mode 120000 index 000000000..27ddaa18b --- /dev/null +++ b/playbooks/common/openshift-cluster/upgrades/etcd/filter_plugins @@ -0,0 +1 @@ +../../../../../filter_plugins
\ No newline at end of file diff --git a/playbooks/common/openshift-cluster/upgrades/etcd/lookup_plugins b/playbooks/common/openshift-cluster/upgrades/etcd/lookup_plugins new file mode 120000 index 000000000..cf407f69b --- /dev/null +++ b/playbooks/common/openshift-cluster/upgrades/etcd/lookup_plugins @@ -0,0 +1 @@ +../../../../../lookup_plugins
\ No newline at end of file diff --git a/playbooks/common/openshift-cluster/upgrades/etcd/main.yml b/playbooks/common/openshift-cluster/upgrades/etcd/main.yml new file mode 100644 index 000000000..8268adc2e --- /dev/null +++ b/playbooks/common/openshift-cluster/upgrades/etcd/main.yml @@ -0,0 +1,44 @@ +--- +# For 1.4/3.4 we want to upgrade everyone to etcd-3.0. etcd docs say to +# upgrade from 2.0.x to 2.1.x to 2.2.x to 2.3.x to 3.0.x. While this is a tedius +# task for RHEL and CENTOS it's simply not possible in Fedora unless you've +# mirrored packages on your own because only the GA and latest versions are +# available in the repos. So for Fedora we'll simply skip this, sorry. + +- include: ../../evaluate_groups.yml + tags: + - always + +# We use two groups one for hosts we're upgrading which doesn't include embedded etcd +# The other for backing up which includes the embedded etcd host, there's no need to +# upgrade embedded etcd that just happens when the master is updated. +- name: Evaluate additional groups for etcd + hosts: localhost + connection: local + become: no + tasks: + - name: Evaluate etcd_hosts_to_upgrade + add_host: + name: "{{ item }}" + groups: etcd_hosts_to_upgrade + with_items: "{{ groups.oo_etcd_to_config if groups.oo_etcd_to_config is defined and groups.oo_etcd_to_config | length > 0 else [] }}" + - name: Evaluate etcd_hosts_to_backup + add_host: + name: "{{ item }}" + groups: etcd_hosts_to_backup + with_items: "{{ groups.oo_etcd_to_config if groups.oo_etcd_to_config is defined and groups.oo_etcd_to_config | length > 0 else groups.oo_first_master }}" + +- name: Backup etcd before upgrading anything + include: backup.yml + vars: + backup_tag: "pre-upgrade-" + when: openshift_etcd_backup | default(true) | bool + +- name: Drop etcdctl profiles + hosts: etcd_hosts_to_upgrade + tasks: + - include: roles/etcd/tasks/etcdctl.yml + +- name: Perform etcd upgrade + include: ./upgrade.yml + when: openshift_etcd_upgrade | default(true) | bool diff --git a/playbooks/common/openshift-cluster/upgrades/etcd/rhel_tasks.yml b/playbooks/common/openshift-cluster/upgrades/etcd/rhel_tasks.yml new file mode 100644 index 000000000..3a972e8ab --- /dev/null +++ b/playbooks/common/openshift-cluster/upgrades/etcd/rhel_tasks.yml @@ -0,0 +1,20 @@ +--- +- name: Verify cluster is healthy pre-upgrade + command: "etcdctl --cert-file /etc/etcd/peer.crt --key-file /etc/etcd/peer.key --ca-file /etc/etcd/ca.crt -C https://{{ openshift.common.hostname }}:2379 cluster-health" + +- name: Update etcd RPM + package: + name: etcd-{{ upgrade_version }}* + state: latest + +- name: Restart etcd + service: + name: etcd + state: restarted + +- name: Verify cluster is healthy + command: "etcdctl --cert-file /etc/etcd/peer.crt --key-file /etc/etcd/peer.key --ca-file /etc/etcd/ca.crt -C https://{{ openshift.common.hostname }}:2379 cluster-health" + register: etcdctl + until: etcdctl.rc == 0 + retries: 3 + delay: 10 diff --git a/playbooks/common/openshift-cluster/upgrades/etcd/roles b/playbooks/common/openshift-cluster/upgrades/etcd/roles new file mode 120000 index 000000000..6bc1a7aef --- /dev/null +++ b/playbooks/common/openshift-cluster/upgrades/etcd/roles @@ -0,0 +1 @@ +../../../../../roles
\ No newline at end of file diff --git a/playbooks/common/openshift-cluster/upgrades/etcd/upgrade.yml b/playbooks/common/openshift-cluster/upgrades/etcd/upgrade.yml new file mode 100644 index 000000000..0f8d94737 --- /dev/null +++ b/playbooks/common/openshift-cluster/upgrades/etcd/upgrade.yml @@ -0,0 +1,94 @@ +--- +- name: Determine etcd version + hosts: etcd_hosts_to_upgrade + tasks: + - name: Record RPM based etcd version + command: rpm -qa --qf '%{version}' etcd\* + args: + warn: no + register: etcd_rpm_version + failed_when: false + when: not openshift.common.is_containerized | bool + - name: Record containerized etcd version + command: docker exec etcd_container rpm -qa --qf '%{version}' etcd\* + register: etcd_container_version + failed_when: false + when: openshift.common.is_containerized | bool + +# I really dislike this copy/pasta but I wasn't able to find a way to get it to loop +# through hosts, then loop through tasks only when appropriate +- name: Upgrade to 2.1 + hosts: etcd_hosts_to_upgrade + serial: 1 + vars: + upgrade_version: '2.1' + tasks: + - include: rhel_tasks.yml + when: etcd_rpm_version.stdout | default('99') | version_compare('2.1','<') and ansible_distribution == 'RedHat' and not openshift.common.is_containerized | bool + +- name: Upgrade RPM hosts to 2.2 + hosts: etcd_hosts_to_upgrade + serial: 1 + vars: + upgrade_version: '2.2' + tasks: + - include: rhel_tasks.yml + when: etcd_rpm_version.stdout | default('99') | version_compare('2.2','<') and ansible_distribution == 'RedHat' and not openshift.common.is_containerized | bool + +- name: Upgrade containerized hosts to 2.2.5 + hosts: etcd_hosts_to_upgrade + serial: 1 + vars: + upgrade_version: 2.2.5 + tasks: + - include: containerized_tasks.yml + when: etcd_container_version.stdout | default('99') | version_compare('2.2','<') and openshift.common.is_containerized | bool + +- name: Upgrade RPM hosts to 2.3 + hosts: etcd_hosts_to_upgrade + serial: 1 + vars: + upgrade_version: '2.3' + tasks: + - include: rhel_tasks.yml + when: etcd_rpm_version.stdout | default('99') | version_compare('2.3','<') and ansible_distribution == 'RedHat' and not openshift.common.is_containerized | bool + +- name: Upgrade containerized hosts to 2.3.7 + hosts: etcd_hosts_to_upgrade + serial: 1 + vars: + upgrade_version: 2.3.7 + tasks: + - include: containerized_tasks.yml + when: etcd_container_version.stdout | default('99') | version_compare('2.3','<') and openshift.common.is_containerized | bool + +- name: Upgrade RPM hosts to 3.0 + hosts: etcd_hosts_to_upgrade + serial: 1 + vars: + upgrade_version: '3.0' + tasks: + - include: rhel_tasks.yml + when: etcd_rpm_version.stdout | default('99') | version_compare('3.0','<') and ansible_distribution == 'RedHat' and not openshift.common.is_containerized | bool + +- name: Upgrade containerized hosts to etcd3 image + hosts: etcd_hosts_to_upgrade + serial: 1 + vars: + upgrade_version: 3.0.15 + tasks: + - include: containerized_tasks.yml + when: etcd_container_version.stdout | default('99') | version_compare('3.0','<') and openshift.common.is_containerized | bool + +- name: Upgrade fedora to latest + hosts: etcd_hosts_to_upgrade + serial: 1 + tasks: + - include: fedora_tasks.yml + when: ansible_distribution == 'Fedora' and not openshift.common.is_containerized | bool + +- name: Backup etcd + include: backup.yml + vars: + backup_tag: "post-3.0-" + when: openshift_etcd_backup | default(true) | bool |