From 82ef5bc291006b51207b9e32626251a03c776548 Mon Sep 17 00:00:00 2001 From: Rodolfo Carvalho Date: Mon, 19 Dec 2016 14:47:10 +0100 Subject: Add RPM checks as an adhoc playbook --- ansible.cfg.example | 4 + callback_plugins/default_plus_summary.py | 94 ++++++++++++++++++ library/aos_version.py | 100 +++++++++++++++++++ library/check_yum_update.py | 116 ++++++++++++++++++++++ playbooks/adhoc/preflight/README.md | 51 ++++++++++ playbooks/adhoc/preflight/check.yml | 159 +++++++++++++++++++++++++++++++ playbooks/adhoc/preflight/library | 1 + 7 files changed, 525 insertions(+) create mode 100644 callback_plugins/default_plus_summary.py create mode 100755 library/aos_version.py create mode 100755 library/check_yum_update.py create mode 100644 playbooks/adhoc/preflight/README.md create mode 100644 playbooks/adhoc/preflight/check.yml create mode 120000 playbooks/adhoc/preflight/library diff --git a/ansible.cfg.example b/ansible.cfg.example index 6a7722ad8..c8cc5db03 100644 --- a/ansible.cfg.example +++ b/ansible.cfg.example @@ -13,6 +13,10 @@ roles_path = roles/ # Set the log_path log_path = /tmp/ansible.log +# prints a descriptive summary of failed tasks +callback_plugins = ./callback_plugins +stdout_callback = default_plus_summary + # Uncomment to use the provided BYO inventory #hostfile = inventory/byo/hosts diff --git a/callback_plugins/default_plus_summary.py b/callback_plugins/default_plus_summary.py new file mode 100644 index 000000000..610a3e98a --- /dev/null +++ b/callback_plugins/default_plus_summary.py @@ -0,0 +1,94 @@ +# vim: expandtab:tabstop=4:shiftwidth=4 +''' +Ansible callback plugin. +''' + +from ansible.plugins.callback.default import CallbackModule as CallbackModule_default +from ansible import constants as C +from ansible.utils.color import stringc + + +class CallbackModule(CallbackModule_default): + ''' + This is like the default callback plugin, but also stores results and + summarizes failures. + ''' + + CALLBACK_VERSION = 2.0 + CALLBACK_TYPE = 'stdout' + CALLBACK_NAME = 'default_plus_summary' + + def __init__(self): + super(CallbackModule, self).__init__() + self.__failures = [] + + def v2_runner_on_failed(self, result, ignore_errors=False): + super(CallbackModule, self).v2_runner_on_failed(result, ignore_errors) + self.__failures.append(dict(result=result, ignore_errors=ignore_errors)) + + def v2_playbook_on_stats(self, stats): + super(CallbackModule, self).v2_playbook_on_stats(stats) + # TODO: update condition to consider a host var or env var to + # enable/disable the summary, so that we can control the output from a + # play. + if self.__failures: + self._print_failure_summary() + + def _print_failure_summary(self): + '''Print a summary of failed tasks (including ignored failures).''' + self._display.display(u'\nFailure summary:\n') + + # TODO: group failures by host or by task. If grouped by host, it is + # easy to see all problems of a given host. If grouped by task, it is + # easy to see what hosts needs the same fix. + + width = len(str(len(self.__failures))) + initial_indent_format = u' {{:>{width}}}. '.format(width=width) + initial_indent_len = len(initial_indent_format.format(0)) + subsequent_indent = u' ' * initial_indent_len + subsequent_extra_indent = u' ' * (initial_indent_len + 10) + + for i, failure in enumerate(self.__failures, 1): + lines = _format_failure(failure) + self._display.display(u'\n{}{}'.format(initial_indent_format.format(i), lines[0])) + for line in lines[1:]: + line = line.replace(u'\n', u'\n' + subsequent_extra_indent) + indented = u'{}{}'.format(subsequent_indent, line) + self._display.display(indented) + + +# Reason: disable pylint protected-access because we need to access _* +# attributes of a task result to implement this method. +# Status: permanently disabled unless Ansible's API changes. +# pylint: disable=protected-access +def _format_failure(failure): + '''Return a list of pretty-formatted lines describing a failure, including + relevant information about it. Line separators are not included.''' + result = failure['result'] + host = result._host.get_name() + play = _get_play(result._task) + if play: + play = play.get_name() + task = result._task.get_name() + msg = result._result.get('msg', u'???') + rows = ( + (u'Host', host), + (u'Play', play), + (u'Task', task), + (u'Message', stringc(msg, C.COLOR_ERROR)), + ) + row_format = '{:10}{}' + return [row_format.format(header + u':', body) for header, body in rows] + + +# Reason: disable pylint protected-access because we need to access _* +# attributes of obj to implement this function. +# This is inspired by ansible.playbook.base.Base.dump_me. +# Status: permanently disabled unless Ansible's API changes. +# pylint: disable=protected-access +def _get_play(obj): + '''Given a task or block, recursively tries to find its parent play.''' + if hasattr(obj, '_play'): + return obj._play + if getattr(obj, '_parent'): + return _get_play(obj._parent) diff --git a/library/aos_version.py b/library/aos_version.py new file mode 100755 index 000000000..f7fcb6da5 --- /dev/null +++ b/library/aos_version.py @@ -0,0 +1,100 @@ +#!/usr/bin/python +# vim: expandtab:tabstop=4:shiftwidth=4 +''' +An ansible module for determining if more than one minor version +of any atomic-openshift package is available, which would indicate +that multiple repos are enabled for different versions of the same +thing which may cause problems. + +Also, determine if the version requested is available down to the +precision requested. +''' + +# import os +# import sys +import yum # pylint: disable=import-error +from ansible.module_utils.basic import AnsibleModule + + +def main(): # pylint: disable=missing-docstring + module = AnsibleModule( + argument_spec=dict( + version=dict(required=True) + ), + supports_check_mode=True + ) + + # NOTE(rhcarvalho): sosiouxme added _unmute, but I couldn't find a case yet + # for when it is actually necessary. Leaving it commented out for now, + # though this comment and the commented out code related to _unmute should + # be deleted later if not proven necessary. + + # sys.stdout = os.devnull # mute yum so it doesn't break our output + # sys.stderr = os.devnull # mute yum so it doesn't break our output + + # def _unmute(): # pylint: disable=missing-docstring + # sys.stdout = sys.__stdout__ + + def bail(error): # pylint: disable=missing-docstring + # _unmute() + module.fail_json(msg=error) + + yb = yum.YumBase() # pylint: disable=invalid-name + + # search for package versions available for aos pkgs + expected_pkgs = [ + 'atomic-openshift', + 'atomic-openshift-master', + 'atomic-openshift-node', + ] + try: + pkgs = yb.pkgSack.returnPackages(patterns=expected_pkgs) + except yum.Errors.PackageSackError as e: # pylint: disable=invalid-name + # you only hit this if *none* of the packages are available + bail('Unable to find any atomic-openshift packages. \nCheck your subscription and repo settings. \n%s' % e) + + # determine what level of precision we're expecting for the version + expected_version = module.params['version'] + if expected_version.startswith('v'): # v3.3 => 3.3 + expected_version = expected_version[1:] + num_dots = expected_version.count('.') + + pkgs_by_name_version = {} + pkgs_precise_version_found = {} + for pkg in pkgs: + # get expected version precision + match_version = '.'.join(pkg.version.split('.')[:num_dots + 1]) + if match_version == expected_version: + pkgs_precise_version_found[pkg.name] = True + # get x.y version precision + minor_version = '.'.join(pkg.version.split('.')[:2]) + if pkg.name not in pkgs_by_name_version: + pkgs_by_name_version[pkg.name] = {} + pkgs_by_name_version[pkg.name][minor_version] = True + + # see if any packages couldn't be found at requested version + # see if any packages are available in more than one minor version + not_found = [] + multi_found = [] + for name in expected_pkgs: + if name not in pkgs_precise_version_found: + not_found.append(name) + if name in pkgs_by_name_version and len(pkgs_by_name_version[name]) > 1: + multi_found.append(name) + if not_found: + msg = 'Not all of the required packages are available at requested version %s:\n' % expected_version + for name in not_found: + msg += ' %s\n' % name + bail(msg + 'Please check your subscriptions and enabled repositories.') + if multi_found: + msg = 'Multiple minor versions of these packages are available\n' + for name in multi_found: + msg += ' %s\n' % name + bail(msg + "There should only be one OpenShift version's repository enabled at a time.") + + # _unmute() + module.exit_json(changed=False) + + +if __name__ == '__main__': + main() diff --git a/library/check_yum_update.py b/library/check_yum_update.py new file mode 100755 index 000000000..0c1974324 --- /dev/null +++ b/library/check_yum_update.py @@ -0,0 +1,116 @@ +#!/usr/bin/python +# vim: expandtab:tabstop=4:shiftwidth=4 +''' +Ansible module to test whether a yum update or install will succeed, +without actually performing it or running yum. +parameters: + packages: (optional) A list of package names to install or update. + If omitted, all installed RPMs are considered for updates. +''' + +# import os +import sys +import yum # pylint: disable=import-error +from ansible.module_utils.basic import AnsibleModule + + +def main(): # pylint: disable=missing-docstring,too-many-branches + module = AnsibleModule( + argument_spec=dict( + packages=dict(type='list', default=[]) + ), + supports_check_mode=True + ) + + # NOTE(rhcarvalho): sosiouxme added _unmute, but I couldn't find a case yet + # for when it is actually necessary. Leaving it commented out for now, + # though this comment and the commented out code related to _unmute should + # be deleted later if not proven necessary. + + # sys.stdout = os.devnull # mute yum so it doesn't break our output + + # def _unmute(): # pylint: disable=missing-docstring + # sys.stdout = sys.__stdout__ + + def bail(error): # pylint: disable=missing-docstring + # _unmute() + module.fail_json(msg=error) + + yb = yum.YumBase() # pylint: disable=invalid-name + # determine if the existing yum configuration is valid + try: + yb.repos.populateSack(mdtype='metadata', cacheonly=1) + # for error of type: + # 1. can't reach the repo URL(s) + except yum.Errors.NoMoreMirrorsRepoError as e: # pylint: disable=invalid-name + bail('Error getting data from at least one yum repository: %s' % e) + # 2. invalid repo definition + except yum.Errors.RepoError as e: # pylint: disable=invalid-name + bail('Error with yum repository configuration: %s' % e) + # 3. other/unknown + # * just report the problem verbatim + except: # pylint: disable=bare-except + bail('Unexpected error with yum repository: %s' % sys.exc_info()[1]) + + packages = module.params['packages'] + no_such_pkg = [] + for pkg in packages: + try: + yb.install(name=pkg) + except yum.Errors.InstallError as e: # pylint: disable=invalid-name + no_such_pkg.append(pkg) + except: # pylint: disable=bare-except + bail('Unexpected error with yum install/update: %s' % + sys.exc_info()[1]) + if not packages: + # no packages requested means test a yum update of everything + yb.update() + elif no_such_pkg: + # wanted specific packages to install but some aren't available + user_msg = 'Cannot install all of the necessary packages. Unavailable:\n' + for pkg in no_such_pkg: + user_msg += ' %s\n' % pkg + user_msg += 'You may need to enable one or more yum repositories to make this content available.' + bail(user_msg) + + try: + txn_result, txn_msgs = yb.buildTransaction() + except: # pylint: disable=bare-except + bail('Unexpected error during dependency resolution for yum update: \n %s' % + sys.exc_info()[1]) + + # find out if there are any errors with the update/install + if txn_result == 0: # 'normal exit' meaning there's nothing to install/update + pass + elif txn_result == 1: # error with transaction + user_msg = 'Could not perform a yum update.\n' + if len(txn_msgs) > 0: + user_msg += 'Errors from dependency resolution:\n' + for msg in txn_msgs: + user_msg += ' %s\n' % msg + user_msg += 'You should resolve these issues before proceeding with an install.\n' + user_msg += 'You may need to remove or downgrade packages or enable/disable yum repositories.' + bail(user_msg) + # TODO: it would be nice depending on the problem: + # 1. dependency for update not found + # * construct the dependency tree + # * find the installed package(s) that required the missing dep + # * determine if any of these packages matter to openshift + # * build helpful error output + # 2. conflicts among packages in available content + # * analyze dependency tree and build helpful error output + # 3. other/unknown + # * report the problem verbatim + # * add to this list as we come across problems we can clearly diagnose + elif txn_result == 2: # everything resolved fine + pass + else: + bail('Unknown error(s) from dependency resolution. Exit Code: %d:\n%s' % + (txn_result, txn_msgs)) + + # _unmute() + module.exit_json(changed=False) + + +if __name__ == '__main__': + main() diff --git a/playbooks/adhoc/preflight/README.md b/playbooks/adhoc/preflight/README.md new file mode 100644 index 000000000..871c6af01 --- /dev/null +++ b/playbooks/adhoc/preflight/README.md @@ -0,0 +1,51 @@ +# Preflight checks + +Here we provide an Ansible playbook for detecting potential roadblocks prior to +an install or upgrade. + +Ansible's default operation mode is to fail fast, on the first error. However, +when performing checks, it is useful to gather as much information about +problems as possible in a single run. + +The `check.yml` playbook runs a battery of checks against the inventory hosts +and tells Ansible to ignore intermediate errors, thus giving a more complete +diagnostic of the state of each host. Still, if any check failed, the playbook +run will be marked as having failed. + +To facilitate understanding the problems that were encountered, we provide a +custom callback plugin to summarize execution errors at the end of a playbook +run. + +--- + +*Note that currently the `check.yml` playbook is only useful for RPM-based +installations. Containerized installs are excluded from checks for now, but +might be included in the future if there is demand for that.* + +--- + +## Running + +With an installation of Ansible 2.2 or greater, run the playbook directly +against your inventory file. Here is the step-by-step: + +1. If you haven't done it yet, clone this repository: + + ```console + $ git clone https://github.com/openshift/openshift-ansible + $ cd openshift-ansible + ``` + +2. Configure a custom callback plugin to get a summary of problems at the end of +the playbook run: + + ```console + $ export ANSIBLE_CALLBACK_PLUGINS=callback_plugins \ + ANSIBLE_STDOUT_CALLBACK=default_plus_summary + ``` + +3. Run the playbook: + + ```console + $ ansible-playbook -i playbooks/adhoc/preflight/check.yml + ``` diff --git a/playbooks/adhoc/preflight/check.yml b/playbooks/adhoc/preflight/check.yml new file mode 100644 index 000000000..b66c1a824 --- /dev/null +++ b/playbooks/adhoc/preflight/check.yml @@ -0,0 +1,159 @@ +--- +- hosts: OSEv3 + gather_facts: no + tasks: + - set_fact: + deployment_type: "{{ deployment_type | default('openshift-enterprise') }}" + containerized: "{{ containerized | default('no') | bool }}" + openshift_release: "{{ openshift_release | default('3.3') }}" + oo_preflight_check_results: "{{ oo_preflight_check_results | default([]) }}" + +- hosts: OSEv3 + name: check content available on all hosts + gather_facts: no + ignore_errors: yes + tasks: + - when: + - not containerized + block: + + - name: determine if yum update will work + action: check_yum_update + register: r + + - set_fact: + oo_preflight_check_results: "{{ oo_preflight_check_results + [r|combine({'_task': 'check content available on all hosts'})] }}" + + - name: determine if expected version matches what is available + aos_version: + version: "{{ openshift_release }}" + when: + - deployment_type == "openshift-enterprise" + register: r + + - set_fact: + oo_preflight_check_results: "{{ oo_preflight_check_results + [r|combine({'_task': 'determine if expected version matches what is available'})] }}" + +- hosts: masters + name: determine if yum install of master pkgs will work + gather_facts: no + ignore_errors: yes + tasks: + - when: + - not containerized + block: + + - name: main packages for enterprise + when: + - deployment_type == "openshift-enterprise" + check_yum_update: + packages: + - atomic-openshift + - atomic-openshift-clients + - atomic-openshift-master + register: r + + - set_fact: + oo_preflight_check_results: "{{ oo_preflight_check_results + [r|combine({'_task': 'main packages for enterprise'})] }}" + + - name: main packages for origin + when: + - deployment_type == "origin" + check_yum_update: + packages: + - origin + - origin-clients + - origin-master + register: r + + - set_fact: + oo_preflight_check_results: "{{ oo_preflight_check_results + [r|combine({'_task': 'main packages for origin'})] }}" + + - name: other master packages + check_yum_update: + packages: + - etcd + - bash-completion + - cockpit-bridge + - cockpit-docker + - cockpit-kubernetes + - cockpit-shell + - cockpit-ws + - httpd-tools + register: r + + - set_fact: + oo_preflight_check_results: "{{ oo_preflight_check_results + [r|combine({'_task': 'other master packages'})] }}" + +- hosts: nodes + name: determine if yum install of node pkgs will work + gather_facts: no + ignore_errors: yes + tasks: + - when: + - not containerized + block: + + - name: main packages for enterprise + when: + - deployment_type == "openshift-enterprise" + check_yum_update: + packages: + - atomic-openshift + - atomic-openshift-node + - atomic-openshift-sdn-ovs + register: r + + - set_fact: + oo_preflight_check_results: "{{ oo_preflight_check_results + [r|combine({'_task': 'main packages for enterprise'})] }}" + + - name: main packages for origin + when: + - deployment_type == "origin" + check_yum_update: + packages: + - origin + - origin-node + - origin-sdn-ovs + register: r + + - set_fact: + oo_preflight_check_results: "{{ oo_preflight_check_results + [r|combine({'_task': 'main packages for origin'})] }}" + + - name: other node packages + check_yum_update: + packages: + - docker + - PyYAML + - firewalld + - iptables + - iptables-services + - nfs-utils + - ntp + - yum-utils + - dnsmasq + - libselinux-python + - ceph-common + - glusterfs-fuse + - iscsi-initiator-utils + - pyparted + - python-httplib2 + - openssl + - flannel + - bind + register: r + + - set_fact: + oo_preflight_check_results: "{{ oo_preflight_check_results + [r|combine({'_task': 'other node packages'})] }}" + +- hosts: OSEv3 + name: verify check results + gather_facts: no + tasks: + + - set_fact: + oo_preflight_check_failures: "{{ oo_preflight_check_results | select('failed', 'equalto', True) | list }}" + + - name: ensure all checks succeed + action: fail + when: oo_preflight_check_failures diff --git a/playbooks/adhoc/preflight/library b/playbooks/adhoc/preflight/library new file mode 120000 index 000000000..ba40d2f56 --- /dev/null +++ b/playbooks/adhoc/preflight/library @@ -0,0 +1 @@ +../../../library \ No newline at end of file -- cgit v1.2.3