diff options
Diffstat (limited to 'playbooks/openshift-checks')
18 files changed, 351 insertions, 0 deletions
| diff --git a/playbooks/openshift-checks/README.md b/playbooks/openshift-checks/README.md new file mode 100644 index 000000000..0b7ea91ff --- /dev/null +++ b/playbooks/openshift-checks/README.md @@ -0,0 +1,104 @@ +# OpenShift health checks + +This directory contains Ansible playbooks for detecting potential problems prior +to an install, as well as health checks to run on existing OpenShift clusters. + +Ansible's default operation mode is to fail fast, on the first error. However, +when performing checks, it is useful to gather as much information about +problems as possible in a single run. + +Thus, the playbooks run a battery of checks against the inventory hosts and +gather intermediate errors, giving a more complete diagnostic of the state of +each host. If any check failed, the playbook run will be marked as failed. + +To facilitate understanding the problems that were encountered, a custom +callback plugin summarizes execution errors at the end of a playbook run. + +## Available playbooks + +1. Pre-install playbook ([pre-install.yml](pre-install.yml)) - verifies system +   requirements and look for common problems that can prevent a successful +   installation of a production cluster. + +2. Diagnostic playbook ([health.yml](health.yml)) - check an existing cluster +   for known signs of problems. + +3. Certificate expiry playbooks ([certificate_expiry](certificate_expiry)) - +   check that certificates in use are valid and not expiring soon. + +4. Adhoc playbook ([adhoc.yml](adhoc.yml)) - use it to run adhoc checks or to +   list existing checks. +   See the [next section](#the-adhoc-playbook) for a usage example. + +## Running + +With a [recent installation of Ansible](../../../README.md#setup), run the playbook +against your inventory file. Here is the step-by-step: + +1. If you haven't done it yet, clone this repository: + +    ```console +    $ git clone https://github.com/openshift/openshift-ansible +    $ cd openshift-ansible +    ``` + +2. Install the [dependencies](../../../README.md#setup) + +3. Run the appropriate playbook: + +    ```console +    $ ansible-playbook -i <inventory file> playbooks/openshift-checks/pre-install.yml +    ``` + +    or + +    ```console +    $ ansible-playbook -i <inventory file> playbooks/openshift-checks/health.yml +    ``` + +    or + +    ```console +    $ ansible-playbook -i <inventory file> playbooks/openshift-checks/certificate_expiry/default.yaml -v +    ``` + +### The adhoc playbook + +The adhoc playbook gives flexibility to run any check or a custom group of +checks. What will be run is determined by the `openshift_checks` variable, +which, among other ways supported by Ansible, can be set on the command line +using the `-e` flag. + +For example, to run the `docker_storage` check: + +```console +$ ansible-playbook -i <inventory file> playbooks/openshift-checks/adhoc.yml -e openshift_checks=docker_storage +``` + +To run more checks, use a comma-separated list of check names: + +```console +$ ansible-playbook -i <inventory file> playbooks/openshift-checks/adhoc.yml -e openshift_checks=docker_storage,disk_availability +``` + +To run an entire class of checks, use the name of a check group tag, prefixed by `@`. This will run all checks tagged `preflight`: + +```console +$ ansible-playbook -i <inventory file> playbooks/openshift-checks/adhoc.yml -e openshift_checks=@preflight +``` + +It is valid to specify multiple check tags and individual check names together +in a comma-separated list. + +To list all of the available checks and tags, run the adhoc playbook without +setting the `openshift_checks` variable: + +```console +$ ansible-playbook -i <inventory file> playbooks/openshift-checks/adhoc.yml +``` + +## Running in a container + +This repository is built into a Docker image including Ansible so that it can +be run anywhere Docker is available, without the need to manually install dependencies. +Instructions for doing so may be found [in the README](../../../README_CONTAINER_IMAGE.md). diff --git a/playbooks/openshift-checks/adhoc.yml b/playbooks/openshift-checks/adhoc.yml new file mode 100644 index 000000000..414090733 --- /dev/null +++ b/playbooks/openshift-checks/adhoc.yml @@ -0,0 +1,25 @@ +--- +# NOTE: ideally this would be just part of a single play in +# private/adhoc.yml that lists the existing checks when +# openshift_checks is not set or run the requested checks. However, to actually +# run the checks we need to have the included dependencies to run first and that +# takes time. To speed up listing checks, we use this separate play that runs +# before the include of dependencies to save time and improve the UX. +- name: OpenShift health checks +  # NOTE: though the openshift_checks variable could be potentially defined on +  # individual hosts while not defined for localhost, we do not support that +  # usage. Running this play only in localhost speeds up execution. +  hosts: localhost +  connection: local +  roles: +  - openshift_health_checker +  vars: +  - r_openshift_health_checker_playbook_context: adhoc +  pre_tasks: +  - name: List known health checks +    action: openshift_health_check +    when: openshift_checks is undefined or not openshift_checks + +- import_playbook: ../init/main.yml + +- import_playbook: private/adhoc.yml diff --git a/playbooks/openshift-checks/certificate_expiry/default.yaml b/playbooks/openshift-checks/certificate_expiry/default.yaml new file mode 100644 index 000000000..630135cae --- /dev/null +++ b/playbooks/openshift-checks/certificate_expiry/default.yaml @@ -0,0 +1,10 @@ +--- +# Default behavior, you will need to ensure you run ansible with the +# -v option to see report results: + +- name: Check cert expirys +  hosts: nodes:masters:etcd +  become: yes +  gather_facts: no +  roles: +    - role: openshift_certificate_expiry diff --git a/playbooks/openshift-checks/certificate_expiry/easy-mode-upload.yaml b/playbooks/openshift-checks/certificate_expiry/easy-mode-upload.yaml new file mode 100644 index 000000000..378d1f154 --- /dev/null +++ b/playbooks/openshift-checks/certificate_expiry/easy-mode-upload.yaml @@ -0,0 +1,40 @@ +# This example generates HTML and JSON reports and +# +# Copies of the generated HTML and JSON reports are uploaded to the masters, +# which is particularly useful when this playbook is run from a container. +# +# All certificates (healthy or not) are included in the results +# +# Optional environment variables to alter the behaviour of the playbook: +# CERT_EXPIRY_WARN_DAYS:  Length of the warning window in days (45) +# COPY_TO_PATH: path to copy reports to in the masters (/etc/origin/certificate_expiration_report) +--- +- name: Generate certificate expiration reports +  hosts: nodes:masters:etcd +  gather_facts: no +  vars: +    openshift_certificate_expiry_save_json_results: yes +    openshift_certificate_expiry_generate_html_report: yes +    openshift_certificate_expiry_show_all: yes +    openshift_certificate_expiry_warning_days: "{{ lookup('env', 'CERT_EXPIRY_WARN_DAYS') | default('45', true) }}" +  roles: +    - role: openshift_certificate_expiry + +- name: Upload reports to master +  hosts: masters +  gather_facts: no +  vars: +    destination_path: "{{ lookup('env', 'COPY_TO_PATH') | default('/etc/origin/certificate_expiration_report', true) }}" +    timestamp: "{{ lookup('pipe', 'date +%Y%m%d') }}" +  tasks: +    - name: Ensure that the target directory exists +      file: +        path: "{{ destination_path }}" +        state: directory +    - name: Copy the reports +      copy: +        dest: "{{ destination_path }}/{{ timestamp }}-{{ item }}" +        src: "/tmp/{{ item }}" +      with_items: +        - "cert-expiry-report.html" +        - "cert-expiry-report.json" diff --git a/playbooks/openshift-checks/certificate_expiry/easy-mode.yaml b/playbooks/openshift-checks/certificate_expiry/easy-mode.yaml new file mode 100644 index 000000000..ae41c7c14 --- /dev/null +++ b/playbooks/openshift-checks/certificate_expiry/easy-mode.yaml @@ -0,0 +1,18 @@ +--- +# This example playbook is great if you're just wanting to try the +# role out. +# +# This example enables HTML and JSON reports +# +# All certificates (healthy or not) are included in the results + +- name: Check cert expirys +  hosts: nodes:masters:etcd +  become: yes +  gather_facts: no +  vars: +    openshift_certificate_expiry_save_json_results: yes +    openshift_certificate_expiry_generate_html_report: yes +    openshift_certificate_expiry_show_all: yes +  roles: +    - role: openshift_certificate_expiry diff --git a/playbooks/openshift-checks/certificate_expiry/html_and_json_default_paths.yaml b/playbooks/openshift-checks/certificate_expiry/html_and_json_default_paths.yaml new file mode 100644 index 000000000..d80cb6ff4 --- /dev/null +++ b/playbooks/openshift-checks/certificate_expiry/html_and_json_default_paths.yaml @@ -0,0 +1,12 @@ +--- +# Generate HTML and JSON artifacts in their default paths: + +- name: Check cert expirys +  hosts: nodes:masters:etcd +  become: yes +  gather_facts: no +  vars: +    openshift_certificate_expiry_generate_html_report: yes +    openshift_certificate_expiry_save_json_results: yes +  roles: +    - role: openshift_certificate_expiry diff --git a/playbooks/openshift-checks/certificate_expiry/html_and_json_timestamp.yaml b/playbooks/openshift-checks/certificate_expiry/html_and_json_timestamp.yaml new file mode 100644 index 000000000..2189455b7 --- /dev/null +++ b/playbooks/openshift-checks/certificate_expiry/html_and_json_timestamp.yaml @@ -0,0 +1,16 @@ +--- +# Generate timestamped HTML and JSON reports in /var/lib/certcheck + +- name: Check cert expirys +  hosts: nodes:masters:etcd +  become: yes +  gather_facts: no +  vars: +    openshift_certificate_expiry_generate_html_report: yes +    openshift_certificate_expiry_save_json_results: yes +    openshift_certificate_expiry_show_all: yes +    timestamp: "{{ lookup('pipe', 'date +%Y%m%d') }}" +    openshift_certificate_expiry_html_report_path: "/var/lib/certcheck/{{ timestamp }}-cert-expiry-report.html" +    openshift_certificate_expiry_json_results_path: "/var/lib/certcheck/{{ timestamp }}-cert-expiry-report.json" +  roles: +    - role: openshift_certificate_expiry diff --git a/playbooks/openshift-checks/certificate_expiry/longer-warning-period-json-results.yaml b/playbooks/openshift-checks/certificate_expiry/longer-warning-period-json-results.yaml new file mode 100644 index 000000000..87a0f3be4 --- /dev/null +++ b/playbooks/openshift-checks/certificate_expiry/longer-warning-period-json-results.yaml @@ -0,0 +1,13 @@ +--- +# Change the expiration warning window to 1500 days (good for testing +# the module out) and save the results as a JSON file: + +- name: Check cert expirys +  hosts: nodes:masters:etcd +  become: yes +  gather_facts: no +  vars: +    openshift_certificate_expiry_warning_days: 1500 +    openshift_certificate_expiry_save_json_results: yes +  roles: +    - role: openshift_certificate_expiry diff --git a/playbooks/openshift-checks/certificate_expiry/longer_warning_period.yaml b/playbooks/openshift-checks/certificate_expiry/longer_warning_period.yaml new file mode 100644 index 000000000..960457c4b --- /dev/null +++ b/playbooks/openshift-checks/certificate_expiry/longer_warning_period.yaml @@ -0,0 +1,12 @@ +--- +# Change the expiration warning window to 1500 days (good for testing +# the module out): + +- name: Check cert expirys +  hosts: nodes:masters:etcd +  become: yes +  gather_facts: no +  vars: +    openshift_certificate_expiry_warning_days: 1500 +  roles: +    - role: openshift_certificate_expiry diff --git a/playbooks/openshift-checks/certificate_expiry/roles b/playbooks/openshift-checks/certificate_expiry/roles new file mode 120000 index 000000000..20c4c58cf --- /dev/null +++ b/playbooks/openshift-checks/certificate_expiry/roles @@ -0,0 +1 @@ +../../../roles
\ No newline at end of file diff --git a/playbooks/openshift-checks/health.yml b/playbooks/openshift-checks/health.yml new file mode 100644 index 000000000..caac06626 --- /dev/null +++ b/playbooks/openshift-checks/health.yml @@ -0,0 +1,4 @@ +--- +- import_playbook: ../init/main.yml + +- import_playbook: private/health.yml diff --git a/playbooks/openshift-checks/pre-install.yml b/playbooks/openshift-checks/pre-install.yml new file mode 100644 index 000000000..4511f6e3c --- /dev/null +++ b/playbooks/openshift-checks/pre-install.yml @@ -0,0 +1,4 @@ +--- +- import_playbook: ../init/main.yml + +- import_playbook: private/pre-install.yml diff --git a/playbooks/openshift-checks/private/adhoc.yml b/playbooks/openshift-checks/private/adhoc.yml new file mode 100644 index 000000000..d0deaeb65 --- /dev/null +++ b/playbooks/openshift-checks/private/adhoc.yml @@ -0,0 +1,13 @@ +--- +- name: OpenShift Health Checks +  hosts: oo_all_hosts + +  roles: +  - openshift_health_checker +  vars: +  - r_openshift_health_checker_playbook_context: adhoc +  post_tasks: +  - name: Run health checks (adhoc) +    action: openshift_health_check +    args: +      checks: '{{ openshift_checks | default([]) }}' diff --git a/playbooks/openshift-checks/private/health.yml b/playbooks/openshift-checks/private/health.yml new file mode 100644 index 000000000..d0921b9d3 --- /dev/null +++ b/playbooks/openshift-checks/private/health.yml @@ -0,0 +1,13 @@ +--- +- name: OpenShift Health Checks +  hosts: oo_all_hosts + +  roles: +  - openshift_health_checker +  vars: +  - r_openshift_health_checker_playbook_context: health +  post_tasks: +  - name: Run health checks (@health) +    action: openshift_health_check +    args: +      checks: ['@health'] diff --git a/playbooks/openshift-checks/private/install.yml b/playbooks/openshift-checks/private/install.yml new file mode 100644 index 000000000..93cf6c359 --- /dev/null +++ b/playbooks/openshift-checks/private/install.yml @@ -0,0 +1,51 @@ +--- +- name: Health Check Checkpoint Start +  hosts: all +  gather_facts: false +  tasks: +  - name: Set Health Check 'In Progress' +    run_once: true +    set_stats: +      data: +        installer_phase_health: +          status: "In Progress" +          start: "{{ lookup('pipe', 'date +%Y%m%d%H%M%SZ') }}" + +- name: OpenShift Health Checks +  hosts: oo_all_hosts +  any_errors_fatal: true +  roles: +  - openshift_health_checker +  vars: +  - r_openshift_health_checker_playbook_context: install +  post_tasks: +  - name: Run health checks (install) - EL +    when: ansible_distribution != "Fedora" +    action: openshift_health_check +    args: +      checks: +      - disk_availability +      - memory_availability +      - package_availability +      - package_version +      - docker_image_availability +      - docker_storage + +  - name: Run health checks (install) - Fedora +    when: ansible_distribution == "Fedora" +    action: openshift_health_check +    args: +      checks: +      - docker_image_availability + +- name: Health Check Checkpoint End +  hosts: all +  gather_facts: false +  tasks: +  - name: Set Health Check 'Complete' +    run_once: true +    set_stats: +      data: +        installer_phase_health: +          status: "Complete" +          end: "{{ lookup('pipe', 'date +%Y%m%d%H%M%SZ') }}" diff --git a/playbooks/openshift-checks/private/pre-install.yml b/playbooks/openshift-checks/private/pre-install.yml new file mode 100644 index 000000000..32449d4e4 --- /dev/null +++ b/playbooks/openshift-checks/private/pre-install.yml @@ -0,0 +1,13 @@ +--- +- name: OpenShift Health Checks +  hosts: oo_all_hosts + +  roles: +  - openshift_health_checker +  vars: +  - r_openshift_health_checker_playbook_context: pre-install +  post_tasks: +  - name: Run health checks (@preflight) +    action: openshift_health_check +    args: +      checks: ['@preflight'] diff --git a/playbooks/openshift-checks/private/roles b/playbooks/openshift-checks/private/roles new file mode 120000 index 000000000..20c4c58cf --- /dev/null +++ b/playbooks/openshift-checks/private/roles @@ -0,0 +1 @@ +../../../roles
\ No newline at end of file diff --git a/playbooks/openshift-checks/roles b/playbooks/openshift-checks/roles new file mode 120000 index 000000000..b741aa3db --- /dev/null +++ b/playbooks/openshift-checks/roles @@ -0,0 +1 @@ +../../roles
\ No newline at end of file | 
