diff options
Diffstat (limited to 'roles/openshift_node_upgrade')
15 files changed, 524 insertions, 0 deletions
| diff --git a/roles/openshift_node_upgrade/README.md b/roles/openshift_node_upgrade/README.md new file mode 100644 index 000000000..e21bee412 --- /dev/null +++ b/roles/openshift_node_upgrade/README.md @@ -0,0 +1,108 @@ +OpenShift/Atomic Enterprise Node upgrade +========= + +Role responsible for a single node upgrade. +It is expected a node is functioning and a part of an OpenShift cluster. + +Requirements +------------ + +TODO + +Role Variables +-------------- +From this role: + +| Name                           | Default value         |                                                        | +|--------------------------------|-----------------------|--------------------------------------------------------| +| deployment_type                |                       | Inventory var                                          | +| docker_upgrade_nuke_images     |                       | Optional inventory var                                 | +| docker_version                 |                       | Optional inventory var                                 | +| l_docker_upgrade               |                       |                                                        | +| node_config_hook               |                       |                                                        | +| openshift.docker.gte_1_10      |                       |                                                        | +| openshift_image_tag            |                       | Set by openshift_version role                          | +| openshift_pkg_version          |                       | Set by openshift_version role                          | +| openshift_release              |                       | Set by openshift_version role                          | +| skip_docker_restart            |                       |                                                        | +| openshift_cloudprovider_kind   |                       |                                                        | + +From openshift.common: + +| Name                               |  Default Value      |                     | +|------------------------------------|---------------------|---------------------| +| openshift.common.config_base       |---------------------|---------------------| +| openshift.common.data_dir          |---------------------|---------------------| +| openshift.common.hostname          |---------------------|---------------------| +| openshift.common.http_proxy        |---------------------|---------------------| +| openshift.common.is_atomic         |---------------------|---------------------| +| openshift.common.is_containerized  |---------------------|---------------------| +| openshift.common.portal_net        |---------------------|---------------------| +| openshift.common.service_type      |---------------------|---------------------| +| openshift.common.use_openshift_sdn |---------------------|---------------------| + +From openshift.master: + +| Name                               |  Default Value      |                     | +|------------------------------------|---------------------|---------------------| +| openshift.master.api_port          |---------------------|---------------------| + +From openshift.node: + +| Name                               |  Default Value      |                     | +|------------------------------------|---------------------|---------------------| +| openshift.node.debug_level         |---------------------|---------------------| +| openshift.node.node_image          |---------------------|---------------------| +| openshift.node.ovs_image           |---------------------|---------------------| + + +Dependencies +------------ +openshift_common + +TODO + +Example Playbook +---------------- + +Including an example of how to use your role (for instance, with variables passed in as parameters) is always nice for users too: + +``` +--- +- name: Upgrade nodes +  hosts: oo_nodes_to_upgrade +  serial: 1 +  any_errors_fatal: true + +  pre_tasks: +  - name: Mark unschedulable +    command: > +      {{ hostvars[groups.oo_first_master.0].openshift.common.client_binary }} adm manage-node {{ openshift.node.nodename | lower }} --schedulable=false +    delegate_to: "{{ groups.oo_first_master.0 }}" + +  - name: Drain Node for Kubelet upgrade +    command: > +      {{ hostvars[groups.oo_first_master.0].openshift.common.admin_binary }} drain {{ openshift.node.nodename | lower }} --force --delete-local-data +    delegate_to: "{{ groups.oo_first_master.0 }}" + +  roles: +  - openshift_facts +  - docker +  - openshift_node_upgrade + +  post_tasks: +  - name: Set node schedulability +    command: > +      {{ hostvars[groups.oo_first_master.0].openshift.common.client_binary }} adm manage-node {{ openshift.node.nodename | lower }} --schedulable=true +    delegate_to: "{{ groups.oo_first_master.0 }}" +``` + +License +------- + +Apache License, Version 2.0 + +Author Information +------------------ + +TODO diff --git a/roles/openshift_node_upgrade/files/nuke_images.sh b/roles/openshift_node_upgrade/files/nuke_images.sh new file mode 100644 index 000000000..8635eab0d --- /dev/null +++ b/roles/openshift_node_upgrade/files/nuke_images.sh @@ -0,0 +1,25 @@ +#!/bin/bash + +# Stop any running containers +running_container_ids=`docker ps -q` +if test -n "$running_container_ids" +then +    docker stop $running_container_ids +fi + +# Delete all containers +container_ids=`docker ps -a -q` +if test -n "$container_ids" +then +    docker rm -f -v $container_ids +fi + +# Delete all images (forcefully) +image_ids=`docker images -aq` +if test -n "$image_ids" +then +    # Some layers are deleted recursively and are no longer present +    # when docker goes to remove them: +    docker rmi -f `docker images -aq` || true +fi + diff --git a/roles/openshift_node_upgrade/handlers/main.yml b/roles/openshift_node_upgrade/handlers/main.yml new file mode 100644 index 000000000..cb51416d4 --- /dev/null +++ b/roles/openshift_node_upgrade/handlers/main.yml @@ -0,0 +1,14 @@ +--- +- name: restart openvswitch +  systemd: name=openvswitch state=restarted +  when: (not skip_node_svc_handlers | default(False) | bool) and not (ovs_service_status_changed | default(false) | bool) and openshift.common.use_openshift_sdn | bool +  notify: +  - restart openvswitch pause + +- name: restart openvswitch pause +  pause: seconds=15 +  when: (not skip_node_svc_handlers | default(False) | bool) and openshift.common.is_containerized | bool + +- name: restart node +  systemd: name={{ openshift.common.service_type }}-node state=restarted +  when: (not skip_node_svc_handlers | default(False) | bool) and not (node_service_status_changed | default(false) | bool) diff --git a/roles/openshift_node_upgrade/meta/main.yml b/roles/openshift_node_upgrade/meta/main.yml new file mode 100644 index 000000000..cd2f362aa --- /dev/null +++ b/roles/openshift_node_upgrade/meta/main.yml @@ -0,0 +1,13 @@ +--- +galaxy_info: +  author: your name +  description: OpenShift Node upgrade +  company: Red Hat, Inc. +  license: Apache License, Version 2.0 +  min_ansible_version: 2.1 +  platforms: +  - name: EL +    versions: +    - 7 +dependencies: +- role: openshift_common diff --git a/roles/openshift_node_upgrade/tasks/containerized_node_upgrade.yml b/roles/openshift_node_upgrade/tasks/containerized_node_upgrade.yml new file mode 100644 index 000000000..07b0ac715 --- /dev/null +++ b/roles/openshift_node_upgrade/tasks/containerized_node_upgrade.yml @@ -0,0 +1,14 @@ +--- +# This is a hack to allow us to use systemd_units.yml, but skip the handlers which +# restart services. We will unconditionally restart all containerized services +# because we have to unconditionally restart Docker: +- set_fact: +    skip_node_svc_handlers: True + +- name: Update systemd units +  include: systemd_units.yml + +# This is a no-op because of skip_node_svc_handlers, but lets us trigger it before end of +# play when the node has already been marked schedulable again. (this would look strange +# in logs otherwise) +- meta: flush_handlers diff --git a/roles/openshift_node_upgrade/tasks/docker/restart.yml b/roles/openshift_node_upgrade/tasks/docker/restart.yml new file mode 100644 index 000000000..176fc3c0b --- /dev/null +++ b/roles/openshift_node_upgrade/tasks/docker/restart.yml @@ -0,0 +1,33 @@ +--- +# input variables: +# - openshift.common.service_type +# - openshift.common.is_containerized +# - openshift.common.hostname +# - openshift.master.api_port + +- name: Restart docker +  service: name=docker state=restarted + +- name: Update docker facts +  openshift_facts: +    role: docker + +- name: Restart containerized services +  service: name={{ item }} state=started +  with_items: +    - etcd_container +    - openvswitch +    - "{{ openshift.common.service_type }}-master" +    - "{{ openshift.common.service_type }}-master-api" +    - "{{ openshift.common.service_type }}-master-controllers" +    - "{{ openshift.common.service_type }}-node" +  failed_when: false +  when: openshift.common.is_containerized | bool + +- name: Wait for master API to come back online +  wait_for: +    host: "{{ openshift.common.hostname }}" +    state: started +    delay: 10 +    port: "{{ openshift.master.api_port }}" +  when: inventory_hostname in groups.oo_masters_to_config diff --git a/roles/openshift_node_upgrade/tasks/docker/upgrade.yml b/roles/openshift_node_upgrade/tasks/docker/upgrade.yml new file mode 100644 index 000000000..e91891ca9 --- /dev/null +++ b/roles/openshift_node_upgrade/tasks/docker/upgrade.yml @@ -0,0 +1,49 @@ +--- +# input variables: +# - openshift.common.service_type +# - openshift.common.is_containerized +# - docker_upgrade_nuke_images +# - docker_version +# - skip_docker_restart + +# We need docker service up to remove all the images, but these services will keep +# trying to re-start and thus re-pull the images we're trying to delete. +- name: Stop containerized services +  service: name={{ item }} state=stopped +  with_items: +    - "{{ openshift.common.service_type }}-master" +    - "{{ openshift.common.service_type }}-master-api" +    - "{{ openshift.common.service_type }}-master-controllers" +    - "{{ openshift.common.service_type }}-node" +    - etcd_container +    - openvswitch +  failed_when: false +  when: openshift.common.is_containerized | bool + +- name: Check Docker image count +  shell: "docker images -aq | wc -l" +  register: docker_image_count + +- debug: var=docker_image_count.stdout + +# TODO(jchaloup): put all docker_upgrade_nuke_images into a block with only one condition +- name: Remove all containers and images +  script: nuke_images.sh +  register: nuke_images_result +  when: docker_upgrade_nuke_images is defined and docker_upgrade_nuke_images | bool + +- name: Check Docker image count +  shell: "docker images -aq | wc -l" +  register: docker_image_count +  when: docker_upgrade_nuke_images is defined and docker_upgrade_nuke_images | bool + +- debug: var=docker_image_count.stdout +  when: docker_upgrade_nuke_images is defined and docker_upgrade_nuke_images | bool + +- service: name=docker state=stopped + +- name: Upgrade Docker +  package: name=docker{{ '-' + docker_version }} state=present + +- include: restart.yml +  when: not skip_docker_restart | default(False) | bool diff --git a/roles/openshift_node_upgrade/tasks/main.yml b/roles/openshift_node_upgrade/tasks/main.yml new file mode 100644 index 000000000..b1d5f0e0f --- /dev/null +++ b/roles/openshift_node_upgrade/tasks/main.yml @@ -0,0 +1,77 @@ +--- +# input variables: +# - l_docker_upgrade +# - openshift.common.is_atomic +# - node_config_hook +# - openshift_pkg_version +# - openshift.common.is_containerized +# - deployment_type +# - openshift_release + +# tasks file for openshift_node_upgrade +- include: docker/upgrade.yml +  vars: +    # We will restart Docker ourselves after everything is ready: +    skip_docker_restart: True +  when: l_docker_upgrade is defined and l_docker_upgrade | bool and not openshift.common.is_atomic | bool + +- include: "{{ node_config_hook }}" +  when: node_config_hook is defined + +- include: rpm_upgrade.yml +  vars: +    component: "node" +    openshift_version: "{{ openshift_pkg_version | default('') }}" +  when: not openshift.common.is_containerized | bool + +- name: Remove obsolete docker-sdn-ovs.conf +  file: path=/etc/systemd/system/docker.service.d/docker-sdn-ovs.conf state=absent +  when: (deployment_type == 'openshift-enterprise' and openshift_release | version_compare('3.4', '>=')) or (deployment_type == 'origin' and openshift_release | version_compare('1.4', '>=')) + +- include: containerized_node_upgrade.yml +  when: openshift.common.is_containerized | bool + +- name: Ensure containerized services stopped before Docker restart +  service: name={{ item }} state=stopped +  with_items: +  - etcd_container +  - openvswitch +  - "{{ openshift.common.service_type }}-master" +  - "{{ openshift.common.service_type }}-master-api" +  - "{{ openshift.common.service_type }}-master-controllers" +  - "{{ openshift.common.service_type }}-node" +  failed_when: false +  when: openshift.common.is_containerized | bool + +- name: Upgrade openvswitch +  package: +    name: openvswitch +    state: latest +  register: ovs_pkg +  when: not openshift.common.is_containerized | bool + +- name: Restart openvswitch +  systemd: +    name: openvswitch +    state: restarted +  when: +  - not openshift.common.is_containerized | bool +  - ovs_pkg | changed + +# Mandatory Docker restart, ensure all containerized services are running: +- include: docker/restart.yml + +- name: Restart rpm node service +  service: name="{{ openshift.common.service_type }}-node" state=restarted +  when: not openshift.common.is_containerized | bool + +- name: Wait for node to be ready +  command: > +    {{ hostvars[groups.oo_first_master.0].openshift.common.client_binary }} get node {{ openshift.common.hostname | lower }} --no-headers +  register: node_output +  delegate_to: "{{ groups.oo_first_master.0 }}" +  until: "{{ node_output.stdout.split()[1].startswith('Ready')}}" +  # Give the node two minutes to come back online. Note that we pre-pull images now +  # so containerized services should restart quickly as well. +  retries: 24 +  delay: 5 diff --git a/roles/openshift_node_upgrade/tasks/rpm_upgrade.yml b/roles/openshift_node_upgrade/tasks/rpm_upgrade.yml new file mode 100644 index 000000000..480e87d58 --- /dev/null +++ b/roles/openshift_node_upgrade/tasks/rpm_upgrade.yml @@ -0,0 +1,14 @@ +--- +# input variables: +# - openshift.common.service_type +# - component +# - openshift_pkg_version +# - openshift.common.is_atomic + +# We verified latest rpm available is suitable, so just yum update. +- name: Upgrade packages +  package: "name={{ openshift.common.service_type }}-{{ component }}{{ openshift_pkg_version }} state=present" + +- name: Ensure python-yaml present for config upgrade +  package: name=PyYAML state=present +  when: not openshift.common.is_atomic | bool diff --git a/roles/openshift_node_upgrade/tasks/systemd_units.yml b/roles/openshift_node_upgrade/tasks/systemd_units.yml new file mode 100644 index 000000000..862cd19c4 --- /dev/null +++ b/roles/openshift_node_upgrade/tasks/systemd_units.yml @@ -0,0 +1,119 @@ +--- +# input variables +# - openshift.node.node_image +# - openshift_image_tag +# - openshift.common.is_containerized +# - openshift.node.ovs_image +# - openshift.common.use_openshift_sdn +# - openshift.common.service_type +# - openshift.node.debug_level +# - openshift.common.config_base +# - openshift.common.http_proxy +# - openshift.common.portal_net +# - openshift.common +# - openshift.common.http_proxy +# notify: +# - restart openvswitch +# - restart node + +# This file is included both in the openshift_master role and in the upgrade +# playbooks. + +- name: Pre-pull node image +  command: > +    docker pull {{ openshift.node.node_image }}:{{ openshift_image_tag }} +  register: pull_result +  changed_when: "'Downloaded newer image' in pull_result.stdout" +  when: openshift.common.is_containerized | bool + +- name: Pre-pull openvswitch image +  command: > +    docker pull {{ openshift.node.ovs_image }}:{{ openshift_image_tag }} +  register: pull_result +  changed_when: "'Downloaded newer image' in pull_result.stdout" +  when: openshift.common.is_containerized | bool and openshift.common.use_openshift_sdn | bool + +- name: Install Node dependencies docker service file +  template: +    dest: "/etc/systemd/system/{{ openshift.common.service_type }}-node-dep.service" +    src: openshift.docker.node.dep.service +  register: install_node_dep_result +  when: openshift.common.is_containerized | bool + +- name: Install Node docker service file +  template: +    dest: "/etc/systemd/system/{{ openshift.common.service_type }}-node.service" +    src: openshift.docker.node.service +  register: install_node_result +  when: openshift.common.is_containerized | bool + +- name: Create the openvswitch service env file +  template: +    src: openvswitch.sysconfig.j2 +    dest: /etc/sysconfig/openvswitch +  when: openshift.common.is_containerized | bool +  register: install_ovs_sysconfig +  notify: +  - restart openvswitch + +# May be a temporary workaround. +# https://bugzilla.redhat.com/show_bug.cgi?id=1331590 +- name: Create OpenvSwitch service.d directory +  file: path=/etc/systemd/system/openvswitch.service.d/ state=directory +  when: openshift.common.use_openshift_sdn | default(true) | bool + +- name: Install OpenvSwitch service OOM fix +  template: +    dest: "/etc/systemd/system/openvswitch.service.d/01-avoid-oom.conf" +    src: openvswitch-avoid-oom.conf +  when: openshift.common.use_openshift_sdn | default(true) | bool +  register: install_oom_fix_result +  notify: +  - restart openvswitch + +- name: Install OpenvSwitch docker service file +  template: +    dest: "/etc/systemd/system/openvswitch.service" +    src: openvswitch.docker.service +  when: openshift.common.is_containerized | bool and openshift.common.use_openshift_sdn | default(true) | bool +  notify: +  - restart openvswitch + +- name: Configure Node settings +  lineinfile: +    dest: /etc/sysconfig/{{ openshift.common.service_type }}-node +    regexp: "{{ item.regex }}" +    line: "{{ item.line }}" +    create: true +  with_items: +  - regex: '^OPTIONS=' +    line: "OPTIONS=--loglevel={{ openshift.node.debug_level | default(2) }}" +  - regex: '^CONFIG_FILE=' +    line: "CONFIG_FILE={{ openshift.common.config_base }}/node/node-config.yaml" +  - regex: '^IMAGE_VERSION=' +    line: "IMAGE_VERSION={{ openshift_image_tag }}" +  notify: +  - restart node + +- name: Configure Proxy Settings +  lineinfile: +    dest: /etc/sysconfig/{{ openshift.common.service_type }}-node +    regexp: "{{ item.regex }}" +    line: "{{ item.line }}" +    create: true +  with_items: +  - regex: '^HTTP_PROXY=' +    line: "HTTP_PROXY={{ openshift.common.http_proxy | default('') }}" +  - regex: '^HTTPS_PROXY=' +    line: "HTTPS_PROXY={{ openshift.common.https_proxy | default('') }}" +  - regex: '^NO_PROXY=' +    line: "NO_PROXY={{ openshift.common.no_proxy | default([]) }},{{ openshift.common.portal_net }},{{ hostvars[groups.oo_first_master.0].openshift.master.sdn_cluster_network_cidr }}" +  when: ('http_proxy' in openshift.common and openshift.common.http_proxy != '') +  notify: +  - restart node + +- name: Reload systemd units +  command: systemctl daemon-reload +  when: (openshift.common.is_containerized | bool and (install_node_result | changed or install_ovs_sysconfig | changed or install_node_dep_result | changed)) or install_oom_fix_result | changed +  notify: +  - restart node diff --git a/roles/openshift_node_upgrade/templates/openshift.docker.node.dep.service b/roles/openshift_node_upgrade/templates/openshift.docker.node.dep.service new file mode 100644 index 000000000..0fb34cffd --- /dev/null +++ b/roles/openshift_node_upgrade/templates/openshift.docker.node.dep.service @@ -0,0 +1,11 @@ +[Unit] +Requires=docker.service +After=docker.service +PartOf={{ openshift.common.service_type }}-node.service +Before={{ openshift.common.service_type }}-node.service + + +[Service] +ExecStart=/bin/bash -c "if [[ -f /usr/bin/docker-current ]]; then echo \"DOCKER_ADDTL_BIND_MOUNTS=--volume=/usr/bin/docker-current:/usr/bin/docker-current:ro --volume=/etc/sysconfig/docker:/etc/sysconfig/docker:ro\" > /etc/sysconfig/{{ openshift.common.service_type }}-node-dep; else echo \"#DOCKER_ADDTL_BIND_MOUNTS=\" > /etc/sysconfig/{{ openshift.common.service_type }}-node-dep; fi" +ExecStop= +SyslogIdentifier={{ openshift.common.service_type }}-node-dep diff --git a/roles/openshift_node_upgrade/templates/openshift.docker.node.service b/roles/openshift_node_upgrade/templates/openshift.docker.node.service new file mode 100644 index 000000000..e33d5d497 --- /dev/null +++ b/roles/openshift_node_upgrade/templates/openshift.docker.node.service @@ -0,0 +1,26 @@ +[Unit] +After={{ openshift.common.service_type }}-master.service +After=docker.service +After=openvswitch.service +PartOf=docker.service +Requires=docker.service +{% if openshift.common.use_openshift_sdn %} +Requires=openvswitch.service +{% endif %} +Wants={{ openshift.common.service_type }}-master.service +Requires={{ openshift.common.service_type }}-node-dep.service +After={{ openshift.common.service_type }}-node-dep.service + +[Service] +EnvironmentFile=/etc/sysconfig/{{ openshift.common.service_type }}-node +EnvironmentFile=/etc/sysconfig/{{ openshift.common.service_type }}-node-dep +ExecStartPre=-/usr/bin/docker rm -f {{ openshift.common.service_type }}-node +ExecStart=/usr/bin/docker run --name {{ openshift.common.service_type }}-node --rm --privileged --net=host --pid=host --env-file=/etc/sysconfig/{{ openshift.common.service_type }}-node -v /:/rootfs:ro -e CONFIG_FILE=${CONFIG_FILE} -e OPTIONS=${OPTIONS} -e HOST=/rootfs -e HOST_ETC=/host-etc -v {{ openshift.common.data_dir }}:{{ openshift.common.data_dir }}{{ ':rslave' if openshift.docker.gte_1_10 | default(False) | bool else '' }} -v {{ openshift.common.config_base }}/node:{{ openshift.common.config_base }}/node {% if openshift_cloudprovider_kind | default('') != '' -%} -v {{ openshift.common.config_base }}/cloudprovider:{{ openshift.common.config_base}}/cloudprovider {% endif -%} -v /etc/localtime:/etc/localtime:ro -v /etc/machine-id:/etc/machine-id:ro -v /run:/run -v /sys:/sys:rw -v /sys/fs/cgroup:/sys/fs/cgroup:rw -v /usr/bin/docker:/usr/bin/docker:ro -v /var/lib/docker:/var/lib/docker -v /lib/modules:/lib/modules -v /etc/origin/openvswitch:/etc/openvswitch -v /etc/origin/sdn:/etc/openshift-sdn -v /etc/systemd/system:/host-etc/systemd/system -v /var/log:/var/log -v /dev:/dev $DOCKER_ADDTL_BIND_MOUNTS {{ openshift.node.node_image }}:${IMAGE_VERSION} +ExecStartPost=/usr/bin/sleep 10 +ExecStop=/usr/bin/docker stop {{ openshift.common.service_type }}-node +SyslogIdentifier={{ openshift.common.service_type }}-node +Restart=always +RestartSec=5s + +[Install] +WantedBy=docker.service diff --git a/roles/openshift_node_upgrade/templates/openvswitch-avoid-oom.conf b/roles/openshift_node_upgrade/templates/openvswitch-avoid-oom.conf new file mode 100644 index 000000000..3229bc56b --- /dev/null +++ b/roles/openshift_node_upgrade/templates/openvswitch-avoid-oom.conf @@ -0,0 +1,3 @@ +# Avoid the OOM killer for openvswitch and it's children: +[Service] +OOMScoreAdjust=-1000 diff --git a/roles/openshift_node_upgrade/templates/openvswitch.docker.service b/roles/openshift_node_upgrade/templates/openvswitch.docker.service new file mode 100644 index 000000000..1e1f8967d --- /dev/null +++ b/roles/openshift_node_upgrade/templates/openvswitch.docker.service @@ -0,0 +1,17 @@ +[Unit] +After=docker.service +Requires=docker.service +PartOf=docker.service + +[Service] +EnvironmentFile=/etc/sysconfig/openvswitch +ExecStartPre=-/usr/bin/docker rm -f openvswitch +ExecStart=/usr/bin/docker run --name openvswitch --rm --privileged --net=host --pid=host -v /lib/modules:/lib/modules -v /run:/run -v /sys:/sys:ro -v /etc/origin/openvswitch:/etc/openvswitch {{ openshift.node.ovs_image }}:${IMAGE_VERSION} +ExecStartPost=/usr/bin/sleep 5 +ExecStop=/usr/bin/docker stop openvswitch +SyslogIdentifier=openvswitch +Restart=always +RestartSec=5s + +[Install] +WantedBy=docker.service diff --git a/roles/openshift_node_upgrade/templates/openvswitch.sysconfig.j2 b/roles/openshift_node_upgrade/templates/openvswitch.sysconfig.j2 new file mode 100644 index 000000000..da7c3742a --- /dev/null +++ b/roles/openshift_node_upgrade/templates/openvswitch.sysconfig.j2 @@ -0,0 +1 @@ +IMAGE_VERSION={{ openshift_image_tag }} | 
