From 2b521c8ae6c468fd2bb98362221483eced103696 Mon Sep 17 00:00:00 2001 From: Jan Chaloupka Date: Fri, 23 Jun 2017 12:14:24 +0200 Subject: attach leases via the first master only and only once - move openshift-etcd/migrate.yml from byo to common and keep just the entry point - replace std_include with essential plays (e.g. no need to detect openshift version) - delegate the ttl re-attaching to the first master --- playbooks/byo/openshift-etcd/migrate.yml | 118 +------------------- .../common/openshift-cluster/evaluate_groups.yml | 9 ++ playbooks/common/openshift-etcd/migrate.yml | 120 +++++++++++++++++++++ roles/etcd_migrate/tasks/check.yml | 2 +- roles/etcd_migrate/tasks/check_cluster_health.yml | 2 +- roles/etcd_migrate/tasks/check_cluster_status.yml | 8 +- roles/etcd_migrate/tasks/migrate.yml | 17 ++- 7 files changed, 144 insertions(+), 132 deletions(-) create mode 100644 playbooks/common/openshift-etcd/migrate.yml diff --git a/playbooks/byo/openshift-etcd/migrate.yml b/playbooks/byo/openshift-etcd/migrate.yml index fd02e066e..143016159 100644 --- a/playbooks/byo/openshift-etcd/migrate.yml +++ b/playbooks/byo/openshift-etcd/migrate.yml @@ -3,122 +3,6 @@ tags: - always -- include: ../../common/openshift-cluster/evaluate_groups.yml +- include: ../../common/openshift-etcd/migrate.yml tags: - always - -- name: Run pre-checks - hosts: oo_etcd_to_config - tags: - - always - roles: - - role: etcd_migrate - r_etcd_migrate_action: check - etcd_peer: "{{ ansible_default_ipv4.address }}" - -# TODO(jchaloup): replace the std_include with something minimal so the entire playbook is faster -# e.g. I don't need to detect the OCP version, install deps, etc. -- include: ../../common/openshift-cluster/std_include.yml - tags: - - always - -- name: Backup v2 data - hosts: oo_etcd_to_config - gather_facts: no - tags: - - always - roles: - - role: openshift_facts - - role: etcd_common - r_etcd_common_action: backup - r_etcd_common_etcd_runtime: "{{ openshift.common.etcd_runtime }}" - r_etcd_common_backup_tag: pre-migration - r_etcd_common_embedded_etcd: "{{ groups.oo_etcd_to_config | default([]) | length == 0 }}" - r_etcd_common_backup_sufix_name: "{{ lookup('pipe', 'date +%Y%m%d%H%M%S') }}" - -- name: Gate on etcd backup - hosts: localhost - connection: local - become: no - tasks: - - set_fact: - etcd_backup_completed: "{{ hostvars - | oo_select_keys(groups.oo_etcd_to_config) - | oo_collect('inventory_hostname', {'r_etcd_common_backup_complete': true}) }}" - - set_fact: - etcd_backup_failed: "{{ groups.oo_etcd_to_config | difference(etcd_backup_completed) }}" - - fail: - msg: "Migration cannot continue. The following hosts did not complete etcd backup: {{ etcd_backup_failed | join(',') }}" - when: - - etcd_backup_failed | length > 0 - -- name: Prepare masters for etcd data migration - hosts: oo_masters_to_config - tasks: - - set_fact: - master_services: - - "{{ openshift.common.service_type + '-master' }}" - - set_fact: - master_services: - - "{{ openshift.common.service_type + '-master-controllers' }}" - - "{{ openshift.common.service_type + '-master-api' }}" - when: - - (openshift_master_cluster_method is defined and openshift_master_cluster_method == "native") or openshift.common.is_master_system_container | bool - - debug: - msg: "master service name: {{ master_services }}" - - name: Stop masters - service: - name: "{{ item }}" - state: stopped - with_items: "{{ master_services }}" - -- name: Migrate etcd data from v2 to v3 - hosts: oo_etcd_to_config - gather_facts: no - tags: - - always - roles: - - role: etcd_migrate - r_etcd_migrate_action: migrate - etcd_peer: "{{ ansible_default_ipv4.address }}" - -- name: Gate on etcd migration - hosts: oo_masters_to_config - gather_facts: no - tasks: - - set_fact: - etcd_migration_completed: "{{ hostvars - | oo_select_keys(groups.oo_etcd_to_config) - | oo_collect('inventory_hostname', {'r_etcd_migrate_success': true}) }}" - - set_fact: - etcd_migration_failed: "{{ groups.oo_etcd_to_config | difference(etcd_migration_completed) }}" - -- name: Configure masters if etcd data migration is succesfull - hosts: oo_masters_to_config - roles: - - role: etcd_migrate - r_etcd_migrate_action: configure - when: etcd_migration_failed | length == 0 - tasks: - - debug: - msg: "Skipping master re-configuration since migration failed." - when: - - etcd_migration_failed | length > 0 - -- name: Start masters after etcd data migration - hosts: oo_masters_to_config - tasks: - - name: Start master services - service: - name: "{{ item }}" - state: started - register: service_status - # Sometimes the master-api, resp. master-controllers fails to start for the first time - until: service_status.state is defined and service_status.state == "started" - retries: 5 - delay: 10 - with_items: "{{ master_services[::-1] }}" - - fail: - msg: "Migration failed. The following hosts were not properly migrated: {{ etcd_migration_failed | join(',') }}" - when: - - etcd_migration_failed | length > 0 diff --git a/playbooks/common/openshift-cluster/evaluate_groups.yml b/playbooks/common/openshift-cluster/evaluate_groups.yml index c28ce4c14..baca72c58 100644 --- a/playbooks/common/openshift-cluster/evaluate_groups.yml +++ b/playbooks/common/openshift-cluster/evaluate_groups.yml @@ -157,3 +157,12 @@ ansible_become: "{{ g_sudo | default(omit) }}" with_items: "{{ g_glusterfs_hosts | union(g_glusterfs_registry_hosts | default([])) }}" changed_when: no + + - name: Evaluate oo_etcd_to_migrate + add_host: + name: "{{ item }}" + groups: oo_etcd_to_migrate + ansible_ssh_user: "{{ g_ssh_user | default(omit) }}" + ansible_become: "{{ g_sudo | default(omit) }}" + with_items: "{{ groups.oo_etcd_to_config if groups.oo_etcd_to_config | default([]) | length != 0 else groups.oo_first_master }}" + changed_when: no diff --git a/playbooks/common/openshift-etcd/migrate.yml b/playbooks/common/openshift-etcd/migrate.yml new file mode 100644 index 000000000..c655449fa --- /dev/null +++ b/playbooks/common/openshift-etcd/migrate.yml @@ -0,0 +1,120 @@ +--- +- include: ../openshift-cluster/evaluate_groups.yml + tags: + - always + +- name: Run pre-checks + hosts: oo_etcd_to_migrate + tags: + - always + roles: + - role: etcd_migrate + r_etcd_migrate_action: check + r_etcd_common_embedded_etcd: "{{ groups.oo_etcd_to_config | default([]) | length == 0 }}" + etcd_peer: "{{ ansible_default_ipv4.address }}" + +- include: ../openshift-cluster/initialize_facts.yml + tags: + - always + +- name: Backup v2 data + hosts: oo_etcd_to_migrate + gather_facts: no + tags: + - always + roles: + - role: openshift_facts + - role: etcd_common + r_etcd_common_action: backup + r_etcd_common_etcd_runtime: "{{ openshift.common.etcd_runtime }}" + r_etcd_common_backup_tag: pre-migration + r_etcd_common_embedded_etcd: "{{ groups.oo_etcd_to_config | default([]) | length == 0 }}" + r_etcd_common_backup_sufix_name: "{{ lookup('pipe', 'date +%Y%m%d%H%M%S') }}" + +- name: Gate on etcd backup + hosts: localhost + connection: local + become: no + tasks: + - set_fact: + etcd_backup_completed: "{{ hostvars + | oo_select_keys(groups.oo_etcd_to_migrate) + | oo_collect('inventory_hostname', {'r_etcd_common_backup_complete': true}) }}" + - set_fact: + etcd_backup_failed: "{{ groups.oo_etcd_to_migrate | difference(etcd_backup_completed) }}" + - fail: + msg: "Migration cannot continue. The following hosts did not complete etcd backup: {{ etcd_backup_failed | join(',') }}" + when: + - etcd_backup_failed | length > 0 + +- name: Prepare masters for etcd data migration + hosts: oo_masters_to_config + tasks: + - set_fact: + master_services: + - "{{ openshift.common.service_type + '-master' }}" + - set_fact: + master_services: + - "{{ openshift.common.service_type + '-master-controllers' }}" + - "{{ openshift.common.service_type + '-master-api' }}" + when: + - (openshift_master_cluster_method is defined and openshift_master_cluster_method == "native") or openshift.common.is_master_system_container | bool + - debug: + msg: "master service name: {{ master_services }}" + - name: Stop masters + service: + name: "{{ item }}" + state: stopped + with_items: "{{ master_services }}" + +- name: Migrate etcd data from v2 to v3 + hosts: oo_etcd_to_migrate + gather_facts: no + tags: + - always + roles: + - role: etcd_migrate + r_etcd_migrate_action: migrate + r_etcd_common_embedded_etcd: "{{ groups.oo_etcd_to_config | default([]) | length == 0 }}" + etcd_peer: "{{ ansible_default_ipv4.address }}" + +- name: Gate on etcd migration + hosts: oo_masters_to_config + gather_facts: no + tasks: + - set_fact: + etcd_migration_completed: "{{ hostvars + | oo_select_keys(groups.oo_etcd_to_migrate) + | oo_collect('inventory_hostname', {'r_etcd_migrate_success': true}) }}" + - set_fact: + etcd_migration_failed: "{{ groups.oo_etcd_to_migrate | difference(etcd_migration_completed) }}" + +- name: Configure masters if etcd data migration is succesfull + hosts: oo_masters_to_config + roles: + - role: etcd_migrate + r_etcd_migrate_action: configure + when: etcd_migration_failed | length == 0 + tasks: + - debug: + msg: "Skipping master re-configuration since migration failed." + when: + - etcd_migration_failed | length > 0 + +- name: Start masters after etcd data migration + hosts: oo_masters_to_config + tasks: + - name: Start master services + service: + name: "{{ item }}" + state: started + register: service_status + # Sometimes the master-api, resp. master-controllers fails to start for the first time + until: service_status.state is defined and service_status.state == "started" + retries: 5 + delay: 10 + with_items: "{{ master_services[::-1] }}" + - fail: + msg: "Migration failed. The following hosts were not properly migrated: {{ etcd_migration_failed | join(',') }}" + when: + - etcd_migration_failed | length > 0 diff --git a/roles/etcd_migrate/tasks/check.yml b/roles/etcd_migrate/tasks/check.yml index 2f07713bc..800073873 100644 --- a/roles/etcd_migrate/tasks/check.yml +++ b/roles/etcd_migrate/tasks/check.yml @@ -6,7 +6,7 @@ # Run the migration only if the data are v2 - name: Check if there are any v3 data command: > - etcdctl --cert {{ etcd_peer_cert_file }} --key {{ etcd_peer_key_file }} --cacert {{ etcd_peer_ca_file }} --endpoints 'https://{{ etcd_peer }}:2379' get "" --from-key --keys-only -w json --limit 1 + etcdctl --cert {{ etcd_peer_cert_file }} --key {{ etcd_peer_key_file }} --cacert {{ etcd_peer_ca_file }} --endpoints 'https://{{ etcd_peer }}:{{ etcd_client_port }}' get "" --from-key --keys-only -w json --limit 1 environment: ETCDCTL_API: 3 register: l_etcdctl_output diff --git a/roles/etcd_migrate/tasks/check_cluster_health.yml b/roles/etcd_migrate/tasks/check_cluster_health.yml index 1abd6a32f..201d83f99 100644 --- a/roles/etcd_migrate/tasks/check_cluster_health.yml +++ b/roles/etcd_migrate/tasks/check_cluster_health.yml @@ -1,7 +1,7 @@ --- - name: Check cluster health command: > - etcdctl --cert-file /etc/etcd/peer.crt --key-file /etc/etcd/peer.key --ca-file /etc/etcd/ca.crt --endpoint https://{{ etcd_peer }}:2379 cluster-health + etcdctl --cert-file {{ etcd_peer_cert_file }} --key-file {{ etcd_peer_key_file }} --ca-file {{ etcd_peer_ca_file }} --endpoint https://{{ etcd_peer }}:{{ etcd_client_port }} cluster-health register: etcd_cluster_health changed_when: false failed_when: false diff --git a/roles/etcd_migrate/tasks/check_cluster_status.yml b/roles/etcd_migrate/tasks/check_cluster_status.yml index 90fe385c1..b69fb5a52 100644 --- a/roles/etcd_migrate/tasks/check_cluster_status.yml +++ b/roles/etcd_migrate/tasks/check_cluster_status.yml @@ -2,7 +2,7 @@ # etcd_ip originates from etcd_common role - name: Check cluster status command: > - etcdctl --cert /etc/etcd/peer.crt --key /etc/etcd/peer.key --cacert /etc/etcd/ca.crt --endpoints 'https://{{ etcd_peer }}:2379' -w json endpoint status + etcdctl --cert {{ etcd_peer_cert_file }} --key {{ etcd_peer_key_file }} --cacert {{ etcd_peer_ca_file }} --endpoints 'https://{{ etcd_peer }}:{{ etcd_client_port }}' -w json endpoint status environment: ETCDCTL_API: 3 register: l_etcd_cluster_status @@ -15,7 +15,7 @@ # http://docs.ansible.com/ansible/playbooks_filters.html#extracting-values-from-containers - name: Group all raftIndices into a list set_fact: - etcd_members_raft_indices: "{{ groups['oo_etcd_to_config'] | map('extract', hostvars, 'etcd_member_raft_index') | list | unique }}" + etcd_members_raft_indices: "{{ groups['oo_etcd_to_migrate'] | map('extract', hostvars, 'etcd_member_raft_index') | list | unique }}" - name: Check the minimum and the maximum of raftIndices is at most 1 set_fact: @@ -24,9 +24,9 @@ - debug: msg: "Raft indices difference: {{ etcd_members_raft_indices_diff }}" - when: inventory_hostname in groups.oo_etcd_to_config[0] + when: inventory_hostname in groups.oo_etcd_to_migrate[0] # The cluster raft status is ok if the difference of the max and min raft index is at most 1 - name: capture the status set_fact: - l_etcd_cluster_status_ok: "{{ hostvars[groups.oo_etcd_to_config[0]]['etcd_members_raft_indices_diff'] | int < 2 }}" + l_etcd_cluster_status_ok: "{{ hostvars[groups.oo_etcd_to_migrate[0]]['etcd_members_raft_indices_diff'] | int < 2 }}" diff --git a/roles/etcd_migrate/tasks/migrate.yml b/roles/etcd_migrate/tasks/migrate.yml index cb479b0cc..27eb945aa 100644 --- a/roles/etcd_migrate/tasks/migrate.yml +++ b/roles/etcd_migrate/tasks/migrate.yml @@ -20,10 +20,12 @@ - name: Check the etcd v2 data are correctly migrated fail: msg: "Failed to migrate a member" - when: "'finished transforming keys' not in l_etcdctl_migrate.stdout" + when: "'finished transforming keys' not in l_etcdctl_migrate.stdout and 'no v2 keys to migrate' not in l_etcdctl_migrate.stdout" + +- name: Migration message + debug: + msg: "Etcd migration finished with: {{ l_etcdctl_migrate.stdout }}" -# TODO(jchaloup): start the etcd on a different port so noone can access it -# Once the validation is done - name: Enable etcd member service: name: "{{ l_etcd_service }}" @@ -35,7 +37,7 @@ --cert {{ etcd_peer_cert_file }} \ --key {{ etcd_peer_key_file }} \ --cacert {{ etcd_peer_ca_file }} \ - --etcd-address 'https://{{ etcd_peer }}:2379' \ + --etcd-address 'https://{{ etcd_peer }}:{{ etcd_client_port }}' \ --ttl-keys-prefix {{ item }} \ --lease-duration 1h environment: @@ -43,11 +45,8 @@ with_items: - "/kubernetes.io/events" - "/kubernetes.io/masterleases" + delegate_to: "{{ groups.oo_first_master[0] }}" + run_once: true - set_fact: r_etcd_migrate_success: true - -- name: Enable etcd member - service: - name: "{{ l_etcd_service }}" - state: started -- cgit v1.2.3