From f8664e17ef5e6bead61d8471facd2859fd10c180 Mon Sep 17 00:00:00 2001 From: Jan Chaloupka Date: Tue, 19 Sep 2017 15:14:18 +0200 Subject: consolidate etcd_migrate role --- playbooks/common/openshift-etcd/migrate.yml | 67 ++++++++++++---------- roles/etcd/meta/main.yml | 1 + roles/etcd/tasks/auxiliary/clean_data.yml | 5 ++ roles/etcd/tasks/clean_data.yml | 2 + roles/etcd/tasks/migrate.add_ttls.yml | 2 + roles/etcd/tasks/migrate.configure_master.yml | 2 + roles/etcd/tasks/migrate.pre_check.yml | 2 + roles/etcd/tasks/migrate.yml | 2 + roles/etcd/tasks/migration/add_ttls.yml | 34 +++++++++++ roles/etcd/tasks/migration/check.yml | 56 ++++++++++++++++++ .../etcd/tasks/migration/check_cluster_health.yml | 23 ++++++++ .../etcd/tasks/migration/check_cluster_status.yml | 32 +++++++++++ roles/etcd/tasks/migration/configure_master.yml | 13 +++++ roles/etcd/tasks/migration/migrate.yml | 56 ++++++++++++++++++ roles/etcd_migrate/README.md | 53 ----------------- roles/etcd_migrate/defaults/main.yml | 3 - roles/etcd_migrate/meta/main.yml | 17 ------ roles/etcd_migrate/tasks/add_ttls.yml | 33 ----------- roles/etcd_migrate/tasks/check.yml | 56 ------------------ roles/etcd_migrate/tasks/check_cluster_health.yml | 23 -------- roles/etcd_migrate/tasks/check_cluster_status.yml | 32 ----------- roles/etcd_migrate/tasks/clean_data.yml | 5 -- roles/etcd_migrate/tasks/configure.yml | 13 ----- roles/etcd_migrate/tasks/main.yml | 25 -------- roles/etcd_migrate/tasks/migrate.yml | 56 ------------------ 25 files changed, 267 insertions(+), 346 deletions(-) create mode 100644 roles/etcd/tasks/auxiliary/clean_data.yml create mode 100644 roles/etcd/tasks/clean_data.yml create mode 100644 roles/etcd/tasks/migrate.add_ttls.yml create mode 100644 roles/etcd/tasks/migrate.configure_master.yml create mode 100644 roles/etcd/tasks/migrate.pre_check.yml create mode 100644 roles/etcd/tasks/migrate.yml create mode 100644 roles/etcd/tasks/migration/add_ttls.yml create mode 100644 roles/etcd/tasks/migration/check.yml create mode 100644 roles/etcd/tasks/migration/check_cluster_health.yml create mode 100644 roles/etcd/tasks/migration/check_cluster_status.yml create mode 100644 roles/etcd/tasks/migration/configure_master.yml create mode 100644 roles/etcd/tasks/migration/migrate.yml delete mode 100644 roles/etcd_migrate/README.md delete mode 100644 roles/etcd_migrate/defaults/main.yml delete mode 100644 roles/etcd_migrate/meta/main.yml delete mode 100644 roles/etcd_migrate/tasks/add_ttls.yml delete mode 100644 roles/etcd_migrate/tasks/check.yml delete mode 100644 roles/etcd_migrate/tasks/check_cluster_health.yml delete mode 100644 roles/etcd_migrate/tasks/check_cluster_status.yml delete mode 100644 roles/etcd_migrate/tasks/clean_data.yml delete mode 100644 roles/etcd_migrate/tasks/configure.yml delete mode 100644 roles/etcd_migrate/tasks/main.yml delete mode 100644 roles/etcd_migrate/tasks/migrate.yml diff --git a/playbooks/common/openshift-etcd/migrate.yml b/playbooks/common/openshift-etcd/migrate.yml index e4ab0aa41..06e0607bd 100644 --- a/playbooks/common/openshift-etcd/migrate.yml +++ b/playbooks/common/openshift-etcd/migrate.yml @@ -1,11 +1,13 @@ --- - name: Run pre-checks hosts: oo_etcd_to_migrate - roles: - - role: etcd_migrate - r_etcd_migrate_action: check - r_etcd_common_embedded_etcd: "{{ groups.oo_etcd_to_config | default([]) | length == 0 }}" - etcd_peer: "{{ ansible_default_ipv4.address }}" + tasks: + - include_role: + name: etcd + tasks_from: migrate.pre_check + vars: + r_etcd_common_embedded_etcd: "{{ groups.oo_etcd_to_config | default([]) | length == 0 }}" + etcd_peer: "{{ ansible_default_ipv4.address }}" # TODO: This will be different for release-3.6 branch - name: Prepare masters for etcd data migration @@ -65,25 +67,28 @@ - name: Migrate data on first etcd hosts: oo_etcd_to_migrate[0] gather_facts: no - roles: - - role: etcd_migrate - r_etcd_migrate_action: migrate - r_etcd_common_embedded_etcd: "{{ groups.oo_etcd_to_config | default([]) | length == 0 }}" - etcd_peer: "{{ openshift.common.ip }}" - etcd_url_scheme: "https" - etcd_peer_url_scheme: "https" + tasks: + - include_role: + name: etcd + tasks_from: migrate + vars: + r_etcd_common_embedded_etcd: "{{ groups.oo_etcd_to_config | default([]) | length == 0 }}" + etcd_peer: "{{ openshift.common.ip }}" + etcd_url_scheme: "https" + etcd_peer_url_scheme: "https" - name: Clean data stores on remaining etcd hosts hosts: oo_etcd_to_migrate[1:] gather_facts: no - roles: - - role: etcd_migrate - r_etcd_migrate_action: clean_data - r_etcd_common_embedded_etcd: "{{ groups.oo_etcd_to_config | default([]) | length == 0 }}" - etcd_peer: "{{ openshift.common.ip }}" - etcd_url_scheme: "https" - etcd_peer_url_scheme: "https" - post_tasks: + tasks: + - include_role: + name: etcd + tasks_from: clean_data + vars: + r_etcd_common_embedded_etcd: "{{ groups.oo_etcd_to_config | default([]) | length == 0 }}" + etcd_peer: "{{ openshift.common.ip }}" + etcd_url_scheme: "https" + etcd_peer_url_scheme: "https" - name: Add etcd hosts delegate_to: localhost add_host: @@ -112,21 +117,23 @@ - name: Add TTLs on the first master hosts: oo_first_master[0] - roles: - - role: etcd_migrate - r_etcd_migrate_action: add_ttls - etcd_peer: "{{ hostvars[groups.oo_etcd_to_migrate.0].openshift.common.ip }}" - etcd_url_scheme: "https" - etcd_peer_url_scheme: "https" + tasks: + - include_role: + name: etcd + tasks_from: migrate.add_ttls + vars: + etcd_peer: "{{ hostvars[groups.oo_etcd_to_migrate.0].openshift.common.ip }}" + etcd_url_scheme: "https" + etcd_peer_url_scheme: "https" when: etcd_migration_failed | length == 0 - name: Configure masters if etcd data migration is succesfull hosts: oo_masters_to_config - roles: - - role: etcd_migrate - r_etcd_migrate_action: configure - when: etcd_migration_failed | length == 0 tasks: + - include_role: + name: etcd + tasks_from: migrate.configure_master + when: etcd_migration_failed | length == 0 - debug: msg: "Skipping master re-configuration since migration failed." when: diff --git a/roles/etcd/meta/main.yml b/roles/etcd/meta/main.yml index d69366a39..537c01c0e 100644 --- a/roles/etcd/meta/main.yml +++ b/roles/etcd/meta/main.yml @@ -18,4 +18,5 @@ galaxy_info: dependencies: - role: lib_openshift - role: lib_os_firewall +- role: lib_utils - role: etcd_common diff --git a/roles/etcd/tasks/auxiliary/clean_data.yml b/roles/etcd/tasks/auxiliary/clean_data.yml new file mode 100644 index 000000000..95a0e7c0a --- /dev/null +++ b/roles/etcd/tasks/auxiliary/clean_data.yml @@ -0,0 +1,5 @@ +--- +- name: Remove member data + file: + path: /var/lib/etcd/member + state: absent diff --git a/roles/etcd/tasks/clean_data.yml b/roles/etcd/tasks/clean_data.yml new file mode 100644 index 000000000..d131ffd21 --- /dev/null +++ b/roles/etcd/tasks/clean_data.yml @@ -0,0 +1,2 @@ +--- +- include: auxiliary/clean_data.yml diff --git a/roles/etcd/tasks/migrate.add_ttls.yml b/roles/etcd/tasks/migrate.add_ttls.yml new file mode 100644 index 000000000..bc27e4ea1 --- /dev/null +++ b/roles/etcd/tasks/migrate.add_ttls.yml @@ -0,0 +1,2 @@ +--- +- include: migration/add_ttls.yml diff --git a/roles/etcd/tasks/migrate.configure_master.yml b/roles/etcd/tasks/migrate.configure_master.yml new file mode 100644 index 000000000..3ada6e362 --- /dev/null +++ b/roles/etcd/tasks/migrate.configure_master.yml @@ -0,0 +1,2 @@ +--- +- include: migration/configure_master.yml diff --git a/roles/etcd/tasks/migrate.pre_check.yml b/roles/etcd/tasks/migrate.pre_check.yml new file mode 100644 index 000000000..124d21561 --- /dev/null +++ b/roles/etcd/tasks/migrate.pre_check.yml @@ -0,0 +1,2 @@ +--- +- include: migration/check.yml diff --git a/roles/etcd/tasks/migrate.yml b/roles/etcd/tasks/migrate.yml new file mode 100644 index 000000000..5d5385873 --- /dev/null +++ b/roles/etcd/tasks/migrate.yml @@ -0,0 +1,2 @@ +--- +- include: migration/migrate.yml diff --git a/roles/etcd/tasks/migration/add_ttls.yml b/roles/etcd/tasks/migration/add_ttls.yml new file mode 100644 index 000000000..14625e49e --- /dev/null +++ b/roles/etcd/tasks/migration/add_ttls.yml @@ -0,0 +1,34 @@ +--- +# To be executed on first master +- slurp: + src: "{{ openshift.common.config_base }}/master/master-config.yaml" + register: g_master_config_output + +- set_fact: + accessTokenMaxAgeSeconds: "{{ (g_master_config_output.content|b64decode|from_yaml).oauthConfig.tokenConfig.accessTokenMaxAgeSeconds | default(86400) }}" + authroizeTokenMaxAgeSeconds: "{{ (g_master_config_output.content|b64decode|from_yaml).oauthConfig.tokenConfig.authroizeTokenMaxAgeSeconds | default(500) }}" + controllerLeaseTTL: "{{ (g_master_config_output.content|b64decode|from_yaml).controllerLeaseTTL | default(30) }}" + +- name: Re-introduce leases (as a replacement for key TTLs) + command: > + oadm migrate etcd-ttl \ + --cert {{ r_etcd_common_master_peer_cert_file }} \ + --key {{ r_etcd_common_master_peer_key_file }} \ + --cacert {{ r_etcd_common_master_peer_ca_file }} \ + --etcd-address 'https://{{ etcd_peer }}:{{ etcd_client_port }}' \ + --ttl-keys-prefix {{ item.keys }} \ + --lease-duration {{ item.ttl }} + environment: + ETCDCTL_API: 3 + PATH: "/usr/local/bin:/var/usrlocal/bin:{{ ansible_env.PATH }}" + with_items: + - keys: "/kubernetes.io/events" + ttl: "1h" + - keys: "/kubernetes.io/masterleases" + ttl: "10s" + - keys: "/openshift.io/oauth/accesstokens" + ttl: "{{ accessTokenMaxAgeSeconds }}s" + - keys: "/openshift.io/oauth/authorizetokens" + ttl: "{{ authroizeTokenMaxAgeSeconds }}s" + - keys: "/openshift.io/leases/controllers" + ttl: "{{ controllerLeaseTTL }}s" diff --git a/roles/etcd/tasks/migration/check.yml b/roles/etcd/tasks/migration/check.yml new file mode 100644 index 000000000..0804d9e1c --- /dev/null +++ b/roles/etcd/tasks/migration/check.yml @@ -0,0 +1,56 @@ +--- + +# Check the cluster is healthy +- include: check_cluster_health.yml + +# Check if the member has v3 data already +# Run the migration only if the data are v2 +- name: Check if there are any v3 data + command: > + etcdctl --cert {{ etcd_peer_cert_file }} --key {{ etcd_peer_key_file }} --cacert {{ etcd_peer_ca_file }} --endpoints 'https://{{ etcd_peer }}:{{ etcd_client_port }}' get "" --from-key --keys-only -w json --limit 1 + environment: + ETCDCTL_API: 3 + register: l_etcdctl_output + +- fail: + msg: "Unable to get a number of v3 keys" + when: l_etcdctl_output.rc != 0 + +- fail: + msg: "The etcd has at least one v3 key" + when: "'count' in (l_etcdctl_output.stdout | from_json) and (l_etcdctl_output.stdout | from_json).count != 0" + + +# TODO(jchaloup): once the until loop can be used over include/block, +# remove the repetive code +# - until loop not supported over include statement (nor block) +# https://github.com/ansible/ansible/issues/17098 +# - with_items not supported over block + +# Check the cluster status for the first time +- include: check_cluster_status.yml + +# Check the cluster status for the second time +- block: + - debug: + msg: "l_etcd_cluster_status_ok: {{ l_etcd_cluster_status_ok }}" + - name: Wait a while before another check + pause: + seconds: 5 + when: not l_etcd_cluster_status_ok | bool + + - include: check_cluster_status.yml + when: not l_etcd_cluster_status_ok | bool + + +# Check the cluster status for the third time +- block: + - debug: + msg: "l_etcd_cluster_status_ok: {{ l_etcd_cluster_status_ok }}" + - name: Wait a while before another check + pause: + seconds: 5 + when: not l_etcd_cluster_status_ok | bool + + - include: check_cluster_status.yml + when: not l_etcd_cluster_status_ok | bool diff --git a/roles/etcd/tasks/migration/check_cluster_health.yml b/roles/etcd/tasks/migration/check_cluster_health.yml new file mode 100644 index 000000000..201d83f99 --- /dev/null +++ b/roles/etcd/tasks/migration/check_cluster_health.yml @@ -0,0 +1,23 @@ +--- +- name: Check cluster health + command: > + etcdctl --cert-file {{ etcd_peer_cert_file }} --key-file {{ etcd_peer_key_file }} --ca-file {{ etcd_peer_ca_file }} --endpoint https://{{ etcd_peer }}:{{ etcd_client_port }} cluster-health + register: etcd_cluster_health + changed_when: false + failed_when: false + +- name: Assume a member is not healthy + set_fact: + etcd_member_healthy: false + +- name: Get member item health status + set_fact: + etcd_member_healthy: true + with_items: "{{ etcd_cluster_health.stdout_lines }}" + when: "(etcd_peer in item) and ('is healthy' in item)" + +- name: Check the etcd cluster health + # TODO(jchaloup): should we fail or ask user if he wants to continue? Or just wait until the cluster is healthy? + fail: + msg: "Etcd member {{ etcd_peer }} is not healthy" + when: not etcd_member_healthy diff --git a/roles/etcd/tasks/migration/check_cluster_status.yml b/roles/etcd/tasks/migration/check_cluster_status.yml new file mode 100644 index 000000000..b69fb5a52 --- /dev/null +++ b/roles/etcd/tasks/migration/check_cluster_status.yml @@ -0,0 +1,32 @@ +--- +# etcd_ip originates from etcd_common role +- name: Check cluster status + command: > + etcdctl --cert {{ etcd_peer_cert_file }} --key {{ etcd_peer_key_file }} --cacert {{ etcd_peer_ca_file }} --endpoints 'https://{{ etcd_peer }}:{{ etcd_client_port }}' -w json endpoint status + environment: + ETCDCTL_API: 3 + register: l_etcd_cluster_status + +- name: Retrieve raftIndex + set_fact: + etcd_member_raft_index: "{{ (l_etcd_cluster_status.stdout | from_json)[0]['Status']['raftIndex'] }}" + +- block: + # http://docs.ansible.com/ansible/playbooks_filters.html#extracting-values-from-containers + - name: Group all raftIndices into a list + set_fact: + etcd_members_raft_indices: "{{ groups['oo_etcd_to_migrate'] | map('extract', hostvars, 'etcd_member_raft_index') | list | unique }}" + + - name: Check the minimum and the maximum of raftIndices is at most 1 + set_fact: + etcd_members_raft_indices_diff: "{{ ((etcd_members_raft_indices | max | int) - (etcd_members_raft_indices | min | int)) | int }}" + + - debug: + msg: "Raft indices difference: {{ etcd_members_raft_indices_diff }}" + + when: inventory_hostname in groups.oo_etcd_to_migrate[0] + +# The cluster raft status is ok if the difference of the max and min raft index is at most 1 +- name: capture the status + set_fact: + l_etcd_cluster_status_ok: "{{ hostvars[groups.oo_etcd_to_migrate[0]]['etcd_members_raft_indices_diff'] | int < 2 }}" diff --git a/roles/etcd/tasks/migration/configure_master.yml b/roles/etcd/tasks/migration/configure_master.yml new file mode 100644 index 000000000..a305d5bf3 --- /dev/null +++ b/roles/etcd/tasks/migration/configure_master.yml @@ -0,0 +1,13 @@ +--- +- name: Configure master to use etcd3 storage backend + yedit: + src: /etc/origin/master/master-config.yaml + key: "{{ item.key }}" + value: "{{ item.value }}" + with_items: + - key: kubernetesMasterConfig.apiServerArguments.storage-backend + value: + - etcd3 + - key: kubernetesMasterConfig.apiServerArguments.storage-media-type + value: + - application/vnd.kubernetes.protobuf diff --git a/roles/etcd/tasks/migration/migrate.yml b/roles/etcd/tasks/migration/migrate.yml new file mode 100644 index 000000000..54a9c74ff --- /dev/null +++ b/roles/etcd/tasks/migration/migrate.yml @@ -0,0 +1,56 @@ +--- +# Should this be run in a serial manner? +- set_fact: + l_etcd_service: "{{ 'etcd_container' if openshift.common.is_containerized else 'etcd' }}" + +- name: Migrate etcd data + command: > + etcdctl migrate --data-dir={{ etcd_data_dir }} + environment: + ETCDCTL_API: 3 + register: l_etcdctl_migrate +# TODO(jchaloup): If any of the members fails, we need to restore all members to v2 from the pre-migrate backup +- name: Check the etcd v2 data are correctly migrated + fail: + msg: "Failed to migrate a member" + when: "'finished transforming keys' not in l_etcdctl_migrate.stdout and 'no v2 keys to migrate' not in l_etcdctl_migrate.stdout" +- name: Migration message + debug: + msg: "Etcd migration finished with: {{ l_etcdctl_migrate.stdout }}" +- name: Set ETCD_FORCE_NEW_CLUSTER=true on first etcd host + lineinfile: + line: "ETCD_FORCE_NEW_CLUSTER=true" + dest: /etc/etcd/etcd.conf + backup: true +- name: Start etcd + systemd: + name: "{{ l_etcd_service }}" + state: started +- name: Wait for cluster to become healthy after bringing up first member + command: > + etcdctl --cert-file {{ etcd_peer_cert_file }} --key-file {{ etcd_peer_key_file }} --ca-file {{ etcd_peer_ca_file }} --endpoint https://{{ etcd_peer }}:{{ etcd_client_port }} cluster-health + register: l_etcd_migrate_health + until: l_etcd_migrate_health.rc == 0 + retries: 3 + delay: 30 +- name: Unset ETCD_FORCE_NEW_CLUSTER=true on first etcd host + lineinfile: + line: "ETCD_FORCE_NEW_CLUSTER=true" + dest: /etc/etcd/etcd.conf + state: absent + backup: true +- name: Restart first etcd host + systemd: + name: "{{ l_etcd_service }}" + state: restarted + +- name: Wait for cluster to become healthy after bringing up first member + command: > + etcdctl --cert-file {{ etcd_peer_cert_file }} --key-file {{ etcd_peer_key_file }} --ca-file {{ etcd_peer_ca_file }} --endpoint https://{{ etcd_peer }}:{{ etcd_client_port }} cluster-health + register: l_etcd_migrate_health + until: l_etcd_migrate_health.rc == 0 + retries: 3 + delay: 30 + +- set_fact: + r_etcd_migrate_success: true diff --git a/roles/etcd_migrate/README.md b/roles/etcd_migrate/README.md deleted file mode 100644 index 369e78ff2..000000000 --- a/roles/etcd_migrate/README.md +++ /dev/null @@ -1,53 +0,0 @@ -Role Name -========= - -Offline etcd migration of data from v2 to v3 - -Requirements ------------- - -It is expected all consumers of the etcd data are not accessing the data. -Otherwise the migrated data can be out-of-sync with the v2 and can result in unhealthy etcd cluster. - -The role itself is responsible for: -- checking etcd cluster health and raft status before the migration -- checking of presence of any v3 data (in that case the migration is stopped) -- migration of v2 data to v3 data (including attaching leases of keys prefixed with "/kubernetes.io/events" and "/kubernetes.io/masterleases" string) -- validation of migrated data (all v2 keys and in v3 keys and are set to the identical value) - -The migration itself requires an etcd member to be down in the process. Once the migration is done, the etcd member is started. - -Role Variables --------------- - -TBD - -Dependencies ------------- - -- etcd_common -- lib_utils - -Example Playbook ----------------- - -```yaml -- name: Migrate etcd data from v2 to v3 - hosts: oo_etcd_to_config - gather_facts: no - tasks: - - include_role: - name: openshift_etcd_migrate - vars: - etcd_peer: "{{ ansible_default_ipv4.address }}" -``` - -License -------- - -Apache License, Version 2.0 - -Author Information ------------------- - -Jan Chaloupka (jchaloup@redhat.com) diff --git a/roles/etcd_migrate/defaults/main.yml b/roles/etcd_migrate/defaults/main.yml deleted file mode 100644 index 05cf41fbb..000000000 --- a/roles/etcd_migrate/defaults/main.yml +++ /dev/null @@ -1,3 +0,0 @@ ---- -# Default action when calling this role, choices: check, migrate, configure -r_etcd_migrate_action: migrate diff --git a/roles/etcd_migrate/meta/main.yml b/roles/etcd_migrate/meta/main.yml deleted file mode 100644 index f3cabbef6..000000000 --- a/roles/etcd_migrate/meta/main.yml +++ /dev/null @@ -1,17 +0,0 @@ ---- -galaxy_info: - author: Jan Chaloupka - description: Etcd migration - company: Red Hat, Inc. - license: Apache License, Version 2.0 - min_ansible_version: 2.1 - platforms: - - name: EL - versions: - - 7 - categories: - - cloud - - system -dependencies: -- { role: etcd_common } -- { role: lib_utils } diff --git a/roles/etcd_migrate/tasks/add_ttls.yml b/roles/etcd_migrate/tasks/add_ttls.yml deleted file mode 100644 index c10465af9..000000000 --- a/roles/etcd_migrate/tasks/add_ttls.yml +++ /dev/null @@ -1,33 +0,0 @@ ---- -# To be executed on first master -- slurp: - src: "{{ openshift.common.config_base }}/master/master-config.yaml" - register: g_master_config_output - -- set_fact: - accessTokenMaxAgeSeconds: "{{ (g_master_config_output.content|b64decode|from_yaml).oauthConfig.tokenConfig.accessTokenMaxAgeSeconds | default(86400) }}" - authroizeTokenMaxAgeSeconds: "{{ (g_master_config_output.content|b64decode|from_yaml).oauthConfig.tokenConfig.authroizeTokenMaxAgeSeconds | default(500) }}" - controllerLeaseTTL: "{{ (g_master_config_output.content|b64decode|from_yaml).controllerLeaseTTL | default(30) }}" -- name: Re-introduce leases (as a replacement for key TTLs) - command: > - oadm migrate etcd-ttl \ - --cert {{ r_etcd_common_master_peer_cert_file }} \ - --key {{ r_etcd_common_master_peer_key_file }} \ - --cacert {{ r_etcd_common_master_peer_ca_file }} \ - --etcd-address 'https://{{ etcd_peer }}:{{ etcd_client_port }}' \ - --ttl-keys-prefix {{ item.keys }} \ - --lease-duration {{ item.ttl }} - environment: - ETCDCTL_API: 3 - PATH: "/usr/local/bin:/var/usrlocal/bin:{{ ansible_env.PATH }}" - with_items: - - keys: "/kubernetes.io/events" - ttl: "1h" - - keys: "/kubernetes.io/masterleases" - ttl: "10s" - - keys: "/openshift.io/oauth/accesstokens" - ttl: "{{ accessTokenMaxAgeSeconds }}s" - - keys: "/openshift.io/oauth/authorizetokens" - ttl: "{{ authroizeTokenMaxAgeSeconds }}s" - - keys: "/openshift.io/leases/controllers" - ttl: "{{ controllerLeaseTTL }}s" diff --git a/roles/etcd_migrate/tasks/check.yml b/roles/etcd_migrate/tasks/check.yml deleted file mode 100644 index 0804d9e1c..000000000 --- a/roles/etcd_migrate/tasks/check.yml +++ /dev/null @@ -1,56 +0,0 @@ ---- - -# Check the cluster is healthy -- include: check_cluster_health.yml - -# Check if the member has v3 data already -# Run the migration only if the data are v2 -- name: Check if there are any v3 data - command: > - etcdctl --cert {{ etcd_peer_cert_file }} --key {{ etcd_peer_key_file }} --cacert {{ etcd_peer_ca_file }} --endpoints 'https://{{ etcd_peer }}:{{ etcd_client_port }}' get "" --from-key --keys-only -w json --limit 1 - environment: - ETCDCTL_API: 3 - register: l_etcdctl_output - -- fail: - msg: "Unable to get a number of v3 keys" - when: l_etcdctl_output.rc != 0 - -- fail: - msg: "The etcd has at least one v3 key" - when: "'count' in (l_etcdctl_output.stdout | from_json) and (l_etcdctl_output.stdout | from_json).count != 0" - - -# TODO(jchaloup): once the until loop can be used over include/block, -# remove the repetive code -# - until loop not supported over include statement (nor block) -# https://github.com/ansible/ansible/issues/17098 -# - with_items not supported over block - -# Check the cluster status for the first time -- include: check_cluster_status.yml - -# Check the cluster status for the second time -- block: - - debug: - msg: "l_etcd_cluster_status_ok: {{ l_etcd_cluster_status_ok }}" - - name: Wait a while before another check - pause: - seconds: 5 - when: not l_etcd_cluster_status_ok | bool - - - include: check_cluster_status.yml - when: not l_etcd_cluster_status_ok | bool - - -# Check the cluster status for the third time -- block: - - debug: - msg: "l_etcd_cluster_status_ok: {{ l_etcd_cluster_status_ok }}" - - name: Wait a while before another check - pause: - seconds: 5 - when: not l_etcd_cluster_status_ok | bool - - - include: check_cluster_status.yml - when: not l_etcd_cluster_status_ok | bool diff --git a/roles/etcd_migrate/tasks/check_cluster_health.yml b/roles/etcd_migrate/tasks/check_cluster_health.yml deleted file mode 100644 index 201d83f99..000000000 --- a/roles/etcd_migrate/tasks/check_cluster_health.yml +++ /dev/null @@ -1,23 +0,0 @@ ---- -- name: Check cluster health - command: > - etcdctl --cert-file {{ etcd_peer_cert_file }} --key-file {{ etcd_peer_key_file }} --ca-file {{ etcd_peer_ca_file }} --endpoint https://{{ etcd_peer }}:{{ etcd_client_port }} cluster-health - register: etcd_cluster_health - changed_when: false - failed_when: false - -- name: Assume a member is not healthy - set_fact: - etcd_member_healthy: false - -- name: Get member item health status - set_fact: - etcd_member_healthy: true - with_items: "{{ etcd_cluster_health.stdout_lines }}" - when: "(etcd_peer in item) and ('is healthy' in item)" - -- name: Check the etcd cluster health - # TODO(jchaloup): should we fail or ask user if he wants to continue? Or just wait until the cluster is healthy? - fail: - msg: "Etcd member {{ etcd_peer }} is not healthy" - when: not etcd_member_healthy diff --git a/roles/etcd_migrate/tasks/check_cluster_status.yml b/roles/etcd_migrate/tasks/check_cluster_status.yml deleted file mode 100644 index b69fb5a52..000000000 --- a/roles/etcd_migrate/tasks/check_cluster_status.yml +++ /dev/null @@ -1,32 +0,0 @@ ---- -# etcd_ip originates from etcd_common role -- name: Check cluster status - command: > - etcdctl --cert {{ etcd_peer_cert_file }} --key {{ etcd_peer_key_file }} --cacert {{ etcd_peer_ca_file }} --endpoints 'https://{{ etcd_peer }}:{{ etcd_client_port }}' -w json endpoint status - environment: - ETCDCTL_API: 3 - register: l_etcd_cluster_status - -- name: Retrieve raftIndex - set_fact: - etcd_member_raft_index: "{{ (l_etcd_cluster_status.stdout | from_json)[0]['Status']['raftIndex'] }}" - -- block: - # http://docs.ansible.com/ansible/playbooks_filters.html#extracting-values-from-containers - - name: Group all raftIndices into a list - set_fact: - etcd_members_raft_indices: "{{ groups['oo_etcd_to_migrate'] | map('extract', hostvars, 'etcd_member_raft_index') | list | unique }}" - - - name: Check the minimum and the maximum of raftIndices is at most 1 - set_fact: - etcd_members_raft_indices_diff: "{{ ((etcd_members_raft_indices | max | int) - (etcd_members_raft_indices | min | int)) | int }}" - - - debug: - msg: "Raft indices difference: {{ etcd_members_raft_indices_diff }}" - - when: inventory_hostname in groups.oo_etcd_to_migrate[0] - -# The cluster raft status is ok if the difference of the max and min raft index is at most 1 -- name: capture the status - set_fact: - l_etcd_cluster_status_ok: "{{ hostvars[groups.oo_etcd_to_migrate[0]]['etcd_members_raft_indices_diff'] | int < 2 }}" diff --git a/roles/etcd_migrate/tasks/clean_data.yml b/roles/etcd_migrate/tasks/clean_data.yml deleted file mode 100644 index 95a0e7c0a..000000000 --- a/roles/etcd_migrate/tasks/clean_data.yml +++ /dev/null @@ -1,5 +0,0 @@ ---- -- name: Remove member data - file: - path: /var/lib/etcd/member - state: absent diff --git a/roles/etcd_migrate/tasks/configure.yml b/roles/etcd_migrate/tasks/configure.yml deleted file mode 100644 index a305d5bf3..000000000 --- a/roles/etcd_migrate/tasks/configure.yml +++ /dev/null @@ -1,13 +0,0 @@ ---- -- name: Configure master to use etcd3 storage backend - yedit: - src: /etc/origin/master/master-config.yaml - key: "{{ item.key }}" - value: "{{ item.value }}" - with_items: - - key: kubernetesMasterConfig.apiServerArguments.storage-backend - value: - - etcd3 - - key: kubernetesMasterConfig.apiServerArguments.storage-media-type - value: - - application/vnd.kubernetes.protobuf diff --git a/roles/etcd_migrate/tasks/main.yml b/roles/etcd_migrate/tasks/main.yml deleted file mode 100644 index e82f6a6b4..000000000 --- a/roles/etcd_migrate/tasks/main.yml +++ /dev/null @@ -1,25 +0,0 @@ ---- -- name: Fail if invalid r_etcd_migrate_action provided - fail: - msg: "etcd_migrate role can only be called with 'check', 'migrate', 'configure', 'add_ttls', or 'clean_data'" - when: r_etcd_migrate_action not in ['check', 'migrate', 'configure', 'add_ttls', 'clean_data'] - -- name: Include main action task file - include: "{{ r_etcd_migrate_action }}.yml" - -# 2. migrate v2 datadir into v3: -# ETCDCTL_API=3 ./etcdctl migrate --data-dir=${data_dir} --no-ttl -# backup the etcd datadir first -# Provide a way for an operator to specify transformer - -# 3. re-configure OpenShift master at /etc/origin/master/master-config.yml -# set storage-backend to “etcd3” -# 4. we could leave the master restart to current logic (there is already the code ready (single vs. HA master)) - -# Run -# etcdctl --cert-file /etc/etcd/peer.crt --key-file /etc/etcd/peer.key --ca-file /etc/etcd/ca.crt --endpoint https://172.16.186.45:2379 cluster-health -# to check the cluster health (from the etcdctl.sh aliases file) - -# Another assumption: -# - in order to migrate all etcd v2 data into v3, we need to shut down the cluster (let's verify that on Wednesday meeting) -# - diff --git a/roles/etcd_migrate/tasks/migrate.yml b/roles/etcd_migrate/tasks/migrate.yml deleted file mode 100644 index 54a9c74ff..000000000 --- a/roles/etcd_migrate/tasks/migrate.yml +++ /dev/null @@ -1,56 +0,0 @@ ---- -# Should this be run in a serial manner? -- set_fact: - l_etcd_service: "{{ 'etcd_container' if openshift.common.is_containerized else 'etcd' }}" - -- name: Migrate etcd data - command: > - etcdctl migrate --data-dir={{ etcd_data_dir }} - environment: - ETCDCTL_API: 3 - register: l_etcdctl_migrate -# TODO(jchaloup): If any of the members fails, we need to restore all members to v2 from the pre-migrate backup -- name: Check the etcd v2 data are correctly migrated - fail: - msg: "Failed to migrate a member" - when: "'finished transforming keys' not in l_etcdctl_migrate.stdout and 'no v2 keys to migrate' not in l_etcdctl_migrate.stdout" -- name: Migration message - debug: - msg: "Etcd migration finished with: {{ l_etcdctl_migrate.stdout }}" -- name: Set ETCD_FORCE_NEW_CLUSTER=true on first etcd host - lineinfile: - line: "ETCD_FORCE_NEW_CLUSTER=true" - dest: /etc/etcd/etcd.conf - backup: true -- name: Start etcd - systemd: - name: "{{ l_etcd_service }}" - state: started -- name: Wait for cluster to become healthy after bringing up first member - command: > - etcdctl --cert-file {{ etcd_peer_cert_file }} --key-file {{ etcd_peer_key_file }} --ca-file {{ etcd_peer_ca_file }} --endpoint https://{{ etcd_peer }}:{{ etcd_client_port }} cluster-health - register: l_etcd_migrate_health - until: l_etcd_migrate_health.rc == 0 - retries: 3 - delay: 30 -- name: Unset ETCD_FORCE_NEW_CLUSTER=true on first etcd host - lineinfile: - line: "ETCD_FORCE_NEW_CLUSTER=true" - dest: /etc/etcd/etcd.conf - state: absent - backup: true -- name: Restart first etcd host - systemd: - name: "{{ l_etcd_service }}" - state: restarted - -- name: Wait for cluster to become healthy after bringing up first member - command: > - etcdctl --cert-file {{ etcd_peer_cert_file }} --key-file {{ etcd_peer_key_file }} --ca-file {{ etcd_peer_ca_file }} --endpoint https://{{ etcd_peer }}:{{ etcd_client_port }} cluster-health - register: l_etcd_migrate_health - until: l_etcd_migrate_health.rc == 0 - retries: 3 - delay: 30 - -- set_fact: - r_etcd_migrate_success: true -- cgit v1.2.3