summaryrefslogtreecommitdiffstats
path: root/roles/etcd
diff options
context:
space:
mode:
Diffstat (limited to 'roles/etcd')
-rw-r--r--roles/etcd/meta/main.yml1
-rw-r--r--roles/etcd/tasks/auxiliary/clean_data.yml5
-rw-r--r--roles/etcd/tasks/clean_data.yml2
-rw-r--r--roles/etcd/tasks/migrate.add_ttls.yml2
-rw-r--r--roles/etcd/tasks/migrate.configure_master.yml2
-rw-r--r--roles/etcd/tasks/migrate.pre_check.yml2
-rw-r--r--roles/etcd/tasks/migrate.yml2
-rw-r--r--roles/etcd/tasks/migration/add_ttls.yml34
-rw-r--r--roles/etcd/tasks/migration/check.yml56
-rw-r--r--roles/etcd/tasks/migration/check_cluster_health.yml23
-rw-r--r--roles/etcd/tasks/migration/check_cluster_status.yml32
-rw-r--r--roles/etcd/tasks/migration/configure_master.yml13
-rw-r--r--roles/etcd/tasks/migration/migrate.yml56
-rw-r--r--roles/etcd/tasks/upgrade/upgrade_image.yml55
-rw-r--r--roles/etcd/tasks/upgrade/upgrade_rpm.yml32
-rw-r--r--roles/etcd/tasks/upgrade_image.yml2
-rw-r--r--roles/etcd/tasks/upgrade_rpm.yml2
17 files changed, 321 insertions, 0 deletions
diff --git a/roles/etcd/meta/main.yml b/roles/etcd/meta/main.yml
index d69366a39..537c01c0e 100644
--- a/roles/etcd/meta/main.yml
+++ b/roles/etcd/meta/main.yml
@@ -18,4 +18,5 @@ galaxy_info:
dependencies:
- role: lib_openshift
- role: lib_os_firewall
+- role: lib_utils
- role: etcd_common
diff --git a/roles/etcd/tasks/auxiliary/clean_data.yml b/roles/etcd/tasks/auxiliary/clean_data.yml
new file mode 100644
index 000000000..95a0e7c0a
--- /dev/null
+++ b/roles/etcd/tasks/auxiliary/clean_data.yml
@@ -0,0 +1,5 @@
+---
+- name: Remove member data
+ file:
+ path: /var/lib/etcd/member
+ state: absent
diff --git a/roles/etcd/tasks/clean_data.yml b/roles/etcd/tasks/clean_data.yml
new file mode 100644
index 000000000..d131ffd21
--- /dev/null
+++ b/roles/etcd/tasks/clean_data.yml
@@ -0,0 +1,2 @@
+---
+- include: auxiliary/clean_data.yml
diff --git a/roles/etcd/tasks/migrate.add_ttls.yml b/roles/etcd/tasks/migrate.add_ttls.yml
new file mode 100644
index 000000000..bc27e4ea1
--- /dev/null
+++ b/roles/etcd/tasks/migrate.add_ttls.yml
@@ -0,0 +1,2 @@
+---
+- include: migration/add_ttls.yml
diff --git a/roles/etcd/tasks/migrate.configure_master.yml b/roles/etcd/tasks/migrate.configure_master.yml
new file mode 100644
index 000000000..3ada6e362
--- /dev/null
+++ b/roles/etcd/tasks/migrate.configure_master.yml
@@ -0,0 +1,2 @@
+---
+- include: migration/configure_master.yml
diff --git a/roles/etcd/tasks/migrate.pre_check.yml b/roles/etcd/tasks/migrate.pre_check.yml
new file mode 100644
index 000000000..124d21561
--- /dev/null
+++ b/roles/etcd/tasks/migrate.pre_check.yml
@@ -0,0 +1,2 @@
+---
+- include: migration/check.yml
diff --git a/roles/etcd/tasks/migrate.yml b/roles/etcd/tasks/migrate.yml
new file mode 100644
index 000000000..5d5385873
--- /dev/null
+++ b/roles/etcd/tasks/migrate.yml
@@ -0,0 +1,2 @@
+---
+- include: migration/migrate.yml
diff --git a/roles/etcd/tasks/migration/add_ttls.yml b/roles/etcd/tasks/migration/add_ttls.yml
new file mode 100644
index 000000000..14625e49e
--- /dev/null
+++ b/roles/etcd/tasks/migration/add_ttls.yml
@@ -0,0 +1,34 @@
+---
+# To be executed on first master
+- slurp:
+ src: "{{ openshift.common.config_base }}/master/master-config.yaml"
+ register: g_master_config_output
+
+- set_fact:
+ accessTokenMaxAgeSeconds: "{{ (g_master_config_output.content|b64decode|from_yaml).oauthConfig.tokenConfig.accessTokenMaxAgeSeconds | default(86400) }}"
+ authroizeTokenMaxAgeSeconds: "{{ (g_master_config_output.content|b64decode|from_yaml).oauthConfig.tokenConfig.authroizeTokenMaxAgeSeconds | default(500) }}"
+ controllerLeaseTTL: "{{ (g_master_config_output.content|b64decode|from_yaml).controllerLeaseTTL | default(30) }}"
+
+- name: Re-introduce leases (as a replacement for key TTLs)
+ command: >
+ oadm migrate etcd-ttl \
+ --cert {{ r_etcd_common_master_peer_cert_file }} \
+ --key {{ r_etcd_common_master_peer_key_file }} \
+ --cacert {{ r_etcd_common_master_peer_ca_file }} \
+ --etcd-address 'https://{{ etcd_peer }}:{{ etcd_client_port }}' \
+ --ttl-keys-prefix {{ item.keys }} \
+ --lease-duration {{ item.ttl }}
+ environment:
+ ETCDCTL_API: 3
+ PATH: "/usr/local/bin:/var/usrlocal/bin:{{ ansible_env.PATH }}"
+ with_items:
+ - keys: "/kubernetes.io/events"
+ ttl: "1h"
+ - keys: "/kubernetes.io/masterleases"
+ ttl: "10s"
+ - keys: "/openshift.io/oauth/accesstokens"
+ ttl: "{{ accessTokenMaxAgeSeconds }}s"
+ - keys: "/openshift.io/oauth/authorizetokens"
+ ttl: "{{ authroizeTokenMaxAgeSeconds }}s"
+ - keys: "/openshift.io/leases/controllers"
+ ttl: "{{ controllerLeaseTTL }}s"
diff --git a/roles/etcd/tasks/migration/check.yml b/roles/etcd/tasks/migration/check.yml
new file mode 100644
index 000000000..0804d9e1c
--- /dev/null
+++ b/roles/etcd/tasks/migration/check.yml
@@ -0,0 +1,56 @@
+---
+
+# Check the cluster is healthy
+- include: check_cluster_health.yml
+
+# Check if the member has v3 data already
+# Run the migration only if the data are v2
+- name: Check if there are any v3 data
+ command: >
+ etcdctl --cert {{ etcd_peer_cert_file }} --key {{ etcd_peer_key_file }} --cacert {{ etcd_peer_ca_file }} --endpoints 'https://{{ etcd_peer }}:{{ etcd_client_port }}' get "" --from-key --keys-only -w json --limit 1
+ environment:
+ ETCDCTL_API: 3
+ register: l_etcdctl_output
+
+- fail:
+ msg: "Unable to get a number of v3 keys"
+ when: l_etcdctl_output.rc != 0
+
+- fail:
+ msg: "The etcd has at least one v3 key"
+ when: "'count' in (l_etcdctl_output.stdout | from_json) and (l_etcdctl_output.stdout | from_json).count != 0"
+
+
+# TODO(jchaloup): once the until loop can be used over include/block,
+# remove the repetive code
+# - until loop not supported over include statement (nor block)
+# https://github.com/ansible/ansible/issues/17098
+# - with_items not supported over block
+
+# Check the cluster status for the first time
+- include: check_cluster_status.yml
+
+# Check the cluster status for the second time
+- block:
+ - debug:
+ msg: "l_etcd_cluster_status_ok: {{ l_etcd_cluster_status_ok }}"
+ - name: Wait a while before another check
+ pause:
+ seconds: 5
+ when: not l_etcd_cluster_status_ok | bool
+
+ - include: check_cluster_status.yml
+ when: not l_etcd_cluster_status_ok | bool
+
+
+# Check the cluster status for the third time
+- block:
+ - debug:
+ msg: "l_etcd_cluster_status_ok: {{ l_etcd_cluster_status_ok }}"
+ - name: Wait a while before another check
+ pause:
+ seconds: 5
+ when: not l_etcd_cluster_status_ok | bool
+
+ - include: check_cluster_status.yml
+ when: not l_etcd_cluster_status_ok | bool
diff --git a/roles/etcd/tasks/migration/check_cluster_health.yml b/roles/etcd/tasks/migration/check_cluster_health.yml
new file mode 100644
index 000000000..201d83f99
--- /dev/null
+++ b/roles/etcd/tasks/migration/check_cluster_health.yml
@@ -0,0 +1,23 @@
+---
+- name: Check cluster health
+ command: >
+ etcdctl --cert-file {{ etcd_peer_cert_file }} --key-file {{ etcd_peer_key_file }} --ca-file {{ etcd_peer_ca_file }} --endpoint https://{{ etcd_peer }}:{{ etcd_client_port }} cluster-health
+ register: etcd_cluster_health
+ changed_when: false
+ failed_when: false
+
+- name: Assume a member is not healthy
+ set_fact:
+ etcd_member_healthy: false
+
+- name: Get member item health status
+ set_fact:
+ etcd_member_healthy: true
+ with_items: "{{ etcd_cluster_health.stdout_lines }}"
+ when: "(etcd_peer in item) and ('is healthy' in item)"
+
+- name: Check the etcd cluster health
+ # TODO(jchaloup): should we fail or ask user if he wants to continue? Or just wait until the cluster is healthy?
+ fail:
+ msg: "Etcd member {{ etcd_peer }} is not healthy"
+ when: not etcd_member_healthy
diff --git a/roles/etcd/tasks/migration/check_cluster_status.yml b/roles/etcd/tasks/migration/check_cluster_status.yml
new file mode 100644
index 000000000..b69fb5a52
--- /dev/null
+++ b/roles/etcd/tasks/migration/check_cluster_status.yml
@@ -0,0 +1,32 @@
+---
+# etcd_ip originates from etcd_common role
+- name: Check cluster status
+ command: >
+ etcdctl --cert {{ etcd_peer_cert_file }} --key {{ etcd_peer_key_file }} --cacert {{ etcd_peer_ca_file }} --endpoints 'https://{{ etcd_peer }}:{{ etcd_client_port }}' -w json endpoint status
+ environment:
+ ETCDCTL_API: 3
+ register: l_etcd_cluster_status
+
+- name: Retrieve raftIndex
+ set_fact:
+ etcd_member_raft_index: "{{ (l_etcd_cluster_status.stdout | from_json)[0]['Status']['raftIndex'] }}"
+
+- block:
+ # http://docs.ansible.com/ansible/playbooks_filters.html#extracting-values-from-containers
+ - name: Group all raftIndices into a list
+ set_fact:
+ etcd_members_raft_indices: "{{ groups['oo_etcd_to_migrate'] | map('extract', hostvars, 'etcd_member_raft_index') | list | unique }}"
+
+ - name: Check the minimum and the maximum of raftIndices is at most 1
+ set_fact:
+ etcd_members_raft_indices_diff: "{{ ((etcd_members_raft_indices | max | int) - (etcd_members_raft_indices | min | int)) | int }}"
+
+ - debug:
+ msg: "Raft indices difference: {{ etcd_members_raft_indices_diff }}"
+
+ when: inventory_hostname in groups.oo_etcd_to_migrate[0]
+
+# The cluster raft status is ok if the difference of the max and min raft index is at most 1
+- name: capture the status
+ set_fact:
+ l_etcd_cluster_status_ok: "{{ hostvars[groups.oo_etcd_to_migrate[0]]['etcd_members_raft_indices_diff'] | int < 2 }}"
diff --git a/roles/etcd/tasks/migration/configure_master.yml b/roles/etcd/tasks/migration/configure_master.yml
new file mode 100644
index 000000000..a305d5bf3
--- /dev/null
+++ b/roles/etcd/tasks/migration/configure_master.yml
@@ -0,0 +1,13 @@
+---
+- name: Configure master to use etcd3 storage backend
+ yedit:
+ src: /etc/origin/master/master-config.yaml
+ key: "{{ item.key }}"
+ value: "{{ item.value }}"
+ with_items:
+ - key: kubernetesMasterConfig.apiServerArguments.storage-backend
+ value:
+ - etcd3
+ - key: kubernetesMasterConfig.apiServerArguments.storage-media-type
+ value:
+ - application/vnd.kubernetes.protobuf
diff --git a/roles/etcd/tasks/migration/migrate.yml b/roles/etcd/tasks/migration/migrate.yml
new file mode 100644
index 000000000..54a9c74ff
--- /dev/null
+++ b/roles/etcd/tasks/migration/migrate.yml
@@ -0,0 +1,56 @@
+---
+# Should this be run in a serial manner?
+- set_fact:
+ l_etcd_service: "{{ 'etcd_container' if openshift.common.is_containerized else 'etcd' }}"
+
+- name: Migrate etcd data
+ command: >
+ etcdctl migrate --data-dir={{ etcd_data_dir }}
+ environment:
+ ETCDCTL_API: 3
+ register: l_etcdctl_migrate
+# TODO(jchaloup): If any of the members fails, we need to restore all members to v2 from the pre-migrate backup
+- name: Check the etcd v2 data are correctly migrated
+ fail:
+ msg: "Failed to migrate a member"
+ when: "'finished transforming keys' not in l_etcdctl_migrate.stdout and 'no v2 keys to migrate' not in l_etcdctl_migrate.stdout"
+- name: Migration message
+ debug:
+ msg: "Etcd migration finished with: {{ l_etcdctl_migrate.stdout }}"
+- name: Set ETCD_FORCE_NEW_CLUSTER=true on first etcd host
+ lineinfile:
+ line: "ETCD_FORCE_NEW_CLUSTER=true"
+ dest: /etc/etcd/etcd.conf
+ backup: true
+- name: Start etcd
+ systemd:
+ name: "{{ l_etcd_service }}"
+ state: started
+- name: Wait for cluster to become healthy after bringing up first member
+ command: >
+ etcdctl --cert-file {{ etcd_peer_cert_file }} --key-file {{ etcd_peer_key_file }} --ca-file {{ etcd_peer_ca_file }} --endpoint https://{{ etcd_peer }}:{{ etcd_client_port }} cluster-health
+ register: l_etcd_migrate_health
+ until: l_etcd_migrate_health.rc == 0
+ retries: 3
+ delay: 30
+- name: Unset ETCD_FORCE_NEW_CLUSTER=true on first etcd host
+ lineinfile:
+ line: "ETCD_FORCE_NEW_CLUSTER=true"
+ dest: /etc/etcd/etcd.conf
+ state: absent
+ backup: true
+- name: Restart first etcd host
+ systemd:
+ name: "{{ l_etcd_service }}"
+ state: restarted
+
+- name: Wait for cluster to become healthy after bringing up first member
+ command: >
+ etcdctl --cert-file {{ etcd_peer_cert_file }} --key-file {{ etcd_peer_key_file }} --ca-file {{ etcd_peer_ca_file }} --endpoint https://{{ etcd_peer }}:{{ etcd_client_port }} cluster-health
+ register: l_etcd_migrate_health
+ until: l_etcd_migrate_health.rc == 0
+ retries: 3
+ delay: 30
+
+- set_fact:
+ r_etcd_migrate_success: true
diff --git a/roles/etcd/tasks/upgrade/upgrade_image.yml b/roles/etcd/tasks/upgrade/upgrade_image.yml
new file mode 100644
index 000000000..cea95a1b3
--- /dev/null
+++ b/roles/etcd/tasks/upgrade/upgrade_image.yml
@@ -0,0 +1,55 @@
+---
+# INPUT r_etcd_upgrade_version
+- name: Verify cluster is healthy pre-upgrade
+ command: "{{ etcdctlv2 }} cluster-health"
+
+- name: Get current image
+ shell: "grep 'ExecStart=' {{ etcd_service_file }} | awk '{print $NF}'"
+ register: current_image
+
+- name: Set new_etcd_image
+ set_fact:
+ new_etcd_image: "{{ current_image.stdout | regex_replace('/etcd.*$','/etcd:' ~ r_etcd_upgrade_version ) }}"
+
+- name: Pull new etcd image
+ command: "docker pull {{ new_etcd_image }}"
+
+- name: Update to latest etcd image
+ replace:
+ dest: "{{ etcd_service_file }}"
+ regexp: "{{ current_image.stdout }}$"
+ replace: "{{ new_etcd_image }}"
+
+- name: Restart etcd_container
+ systemd:
+ name: "{{ etcd_service }}"
+ daemon_reload: yes
+ state: restarted
+
+## TODO: probably should just move this into the backup playbooks, also this
+## will fail on atomic host. We need to revisit how to do etcd backups there as
+## the container may be newer than etcdctl on the host. Assumes etcd3 obsoletes etcd (7.3.1)
+- name: Detecting Atomic Host Operating System
+ stat:
+ path: /run/ostree-booted
+ register: l_ostree_booted
+
+- name: Upgrade etcd for etcdctl when not atomic
+ package:
+ name: etcd
+ state: latest
+ when: not l_ostree_booted.stat.exists | bool
+
+- name: Verify cluster is healthy
+ command: "{{ etcdctlv2 }} cluster-health"
+ register: etcdctl
+ until: etcdctl.rc == 0
+ retries: 3
+ delay: 10
+
+- name: Store new etcd_image
+ # DEPENDENCY openshift_facts
+ openshift_facts:
+ role: etcd
+ local_facts:
+ etcd_image: "{{ new_etcd_image }}"
diff --git a/roles/etcd/tasks/upgrade/upgrade_rpm.yml b/roles/etcd/tasks/upgrade/upgrade_rpm.yml
new file mode 100644
index 000000000..324b69605
--- /dev/null
+++ b/roles/etcd/tasks/upgrade/upgrade_rpm.yml
@@ -0,0 +1,32 @@
+---
+# INPUT r_etcd_upgrade_version?
+
+# F23 GA'd with etcd 2.0, currently has 2.2 in updates
+# F24 GA'd with etcd-2.2, currently has 2.2 in updates
+# F25 Beta currently has etcd 3.0
+# RHEL 7.3.4 with etcd-3.1.3-1.el7
+# RHEL 7.3.3 with etcd-3.1.0-2.el7
+# RHEL 7.3.2 with etcd-3.0.15-1.el7
+
+- name: Verify cluster is healthy pre-upgrade
+ command: "{{ etcdctlv2 }} cluster-health"
+
+- set_fact:
+ l_etcd_target_package: "{{ 'etcd' if r_etcd_upgrade_version is not defined else 'etcd-'+r_etcd_upgrade_version+'*' }}"
+
+- name: Update etcd RPM to {{ l_etcd_target_package }}
+ package:
+ name: "{{ l_etcd_target_package }}"
+ state: latest
+
+- name: Restart etcd
+ service:
+ name: "{{ etcd_service }}"
+ state: restarted
+
+- name: Verify cluster is healthy
+ command: "{{ etcdctlv2 }} cluster-health"
+ register: etcdctl
+ until: etcdctl.rc == 0
+ retries: 3
+ delay: 10
diff --git a/roles/etcd/tasks/upgrade_image.yml b/roles/etcd/tasks/upgrade_image.yml
new file mode 100644
index 000000000..9e69027eb
--- /dev/null
+++ b/roles/etcd/tasks/upgrade_image.yml
@@ -0,0 +1,2 @@
+---
+- include: upgrade/upgrade_image.yml
diff --git a/roles/etcd/tasks/upgrade_rpm.yml b/roles/etcd/tasks/upgrade_rpm.yml
new file mode 100644
index 000000000..29603d2b6
--- /dev/null
+++ b/roles/etcd/tasks/upgrade_rpm.yml
@@ -0,0 +1,2 @@
+---
+- include: upgrade/upgrade_rpm.yml