diff options
Diffstat (limited to 'roles/openshift_health_checker')
23 files changed, 643 insertions, 228 deletions
diff --git a/roles/openshift_health_checker/action_plugins/openshift_health_check.py b/roles/openshift_health_checker/action_plugins/openshift_health_check.py index 8d35db6b5..326176273 100644 --- a/roles/openshift_health_checker/action_plugins/openshift_health_check.py +++ b/roles/openshift_health_checker/action_plugins/openshift_health_check.py @@ -3,7 +3,10 @@ Ansible action plugin to execute health checks in OpenShift clusters. """ import sys import os +import base64 import traceback +import errno +import json from collections import defaultdict from ansible.plugins.action import ActionBase @@ -38,8 +41,13 @@ class ActionModule(ActionBase): # storing the information we need in the result. result['playbook_context'] = task_vars.get('r_openshift_health_checker_playbook_context') + # if the user wants to write check results to files, they provide this directory: + output_dir = task_vars.get("openshift_checks_output_dir") + if output_dir: + output_dir = os.path.join(output_dir, task_vars["ansible_host"]) + try: - known_checks = self.load_known_checks(tmp, task_vars) + known_checks = self.load_known_checks(tmp, task_vars, output_dir) args = self._task.args requested_checks = normalize(args.get('checks', [])) @@ -65,21 +73,20 @@ class ActionModule(ActionBase): for name in resolved_checks: display.banner("CHECK [{} : {}]".format(name, task_vars["ansible_host"])) - check = known_checks[name] - check_results[name] = run_check(name, check, user_disabled_checks) - if check.changed: - check_results[name]["changed"] = True + check_results[name] = run_check(name, known_checks[name], user_disabled_checks, output_dir) result["changed"] = any(r.get("changed") for r in check_results.values()) if any(r.get("failed") for r in check_results.values()): result["failed"] = True result["msg"] = "One or more checks failed" + write_result_to_output_dir(output_dir, result) return result - def load_known_checks(self, tmp, task_vars): + def load_known_checks(self, tmp, task_vars, output_dir=None): """Find all existing checks and return a mapping of names to instances.""" load_checks() + want_full_results = bool(output_dir) known_checks = {} for cls in OpenShiftCheck.subclasses(): @@ -90,7 +97,12 @@ class ActionModule(ActionBase): "duplicate check name '{}' in: '{}' and '{}'" "".format(name, full_class_name(cls), full_class_name(other_cls)) ) - known_checks[name] = cls(execute_module=self._execute_module, tmp=tmp, task_vars=task_vars) + known_checks[name] = cls( + execute_module=self._execute_module, + tmp=tmp, + task_vars=task_vars, + want_full_results=want_full_results + ) return known_checks @@ -185,9 +197,11 @@ def normalize(checks): return [name.strip() for name in checks if name.strip()] -def run_check(name, check, user_disabled_checks): +def run_check(name, check, user_disabled_checks, output_dir=None): """Run a single check if enabled and return a result dict.""" - if name in user_disabled_checks: + + # determine if we're going to run the check (not inactive or disabled) + if name in user_disabled_checks or '*' in user_disabled_checks: return dict(skipped=True, skipped_reason="Disabled by user request") # pylint: disable=broad-except; capturing exceptions broadly is intentional, @@ -201,12 +215,134 @@ def run_check(name, check, user_disabled_checks): if not is_active: return dict(skipped=True, skipped_reason="Not active for this host") + # run the check + result = {} try: - return check.run() + result = check.run() except OpenShiftCheckException as exc: - return dict(failed=True, msg=str(exc)) + check.register_failure(exc) + except Exception as exc: + check.register_failure("\n".join([str(exc), traceback.format_exc()])) + + # process the check state; compose the result hash, write files as needed + if check.changed: + result["changed"] = True + if check.failures or result.get("failed"): + if "msg" in result: # failure result has msg; combine with any registered failures + check.register_failure(result.get("msg")) + result["failures"] = [(fail.name, str(fail)) for fail in check.failures] + result["failed"] = True + result["msg"] = "\n".join(str(fail) for fail in check.failures) + write_to_output_file(output_dir, name + ".failures.json", result["failures"]) + if check.logs: + write_to_output_file(output_dir, name + ".log.json", check.logs) + if check.files_to_save: + write_files_to_save(output_dir, check) + + return result + + +def prepare_output_dir(dirname): + """Create the directory, including parents. Return bool for success/failure.""" + try: + os.makedirs(dirname) + return True + except OSError as exc: + # trying to create existing dir leads to error; + # that error is fine, but for any other, assume the dir is not there + return exc.errno == errno.EEXIST + + +def copy_remote_file_to_dir(check, file_to_save, output_dir, fname): + """Copy file from remote host to local file in output_dir, if given.""" + if not output_dir or not prepare_output_dir(output_dir): + return + local_file = os.path.join(output_dir, fname) + + # pylint: disable=broad-except; do not need to do anything about failure to write dir/file + # and do not want exceptions to break anything. + try: + # NOTE: it would have been nice to copy the file directly without loading it into + # memory, but there does not seem to be a good way to do this via ansible. + result = check.execute_module("slurp", dict(src=file_to_save), register=False) + if result.get("failed"): + display.warning("Could not retrieve file {}: {}".format(file_to_save, result.get("msg"))) + return + + content = result["content"] + if result.get("encoding") == "base64": + content = base64.b64decode(content) + with open(local_file, "wb") as outfile: + outfile.write(content) + except Exception as exc: + display.warning("Failed writing remote {} to local {}: {}".format(file_to_save, local_file, exc)) + return + + +def _no_fail(obj): + # pylint: disable=broad-except; do not want serialization to fail for any reason + try: + return str(obj) + except Exception: + return "[not serializable]" + + +def write_to_output_file(output_dir, filename, data): + """If output_dir provided, write data to file. Serialize as JSON if data is not a string.""" + + if not output_dir or not prepare_output_dir(output_dir): + return + filename = os.path.join(output_dir, filename) + try: + with open(filename, 'w') as outfile: + if isinstance(data, string_types): + outfile.write(data) + else: + json.dump(data, outfile, sort_keys=True, indent=4, default=_no_fail) + # pylint: disable=broad-except; do not want serialization/write to break for any reason + except Exception as exc: + display.warning("Could not write output file {}: {}".format(filename, exc)) + + +def write_result_to_output_dir(output_dir, result): + """If output_dir provided, write the result as json to result.json. + + Success/failure of the write is recorded as "output_files" in the result hash afterward. + Otherwise this is much like write_to_output_file. + """ + + if not output_dir: + return + if not prepare_output_dir(output_dir): + result["output_files"] = "Error creating output directory " + output_dir + return + + filename = os.path.join(output_dir, "result.json") + try: + with open(filename, 'w') as outfile: + json.dump(result, outfile, sort_keys=True, indent=4, default=_no_fail) + result["output_files"] = "Check results for this host written to " + filename + # pylint: disable=broad-except; do not want serialization/write to break for any reason except Exception as exc: - return dict(failed=True, msg=str(exc), exception=traceback.format_exc()) + result["output_files"] = "Error writing check results to {}:\n{}".format(filename, exc) + + +def write_files_to_save(output_dir, check): + """Write files to check subdir in output dir.""" + if not output_dir: + return + output_dir = os.path.join(output_dir, check.name) + seen_file = defaultdict(lambda: 0) + for file_to_save in check.files_to_save: + fname = file_to_save.filename + while seen_file[fname]: # just to be sure we never re-write a file, append numbers as needed + seen_file[fname] += 1 + fname = "{}.{}".format(fname, seen_file[fname]) + seen_file[fname] += 1 + if file_to_save.remote_filename: + copy_remote_file_to_dir(check, file_to_save.remote_filename, output_dir, fname) + else: + write_to_output_file(output_dir, fname, file_to_save.contents) def full_class_name(cls): diff --git a/roles/openshift_health_checker/callback_plugins/zz_failure_summary.py b/roles/openshift_health_checker/callback_plugins/zz_failure_summary.py index 349655966..dcaf87eca 100644 --- a/roles/openshift_health_checker/callback_plugins/zz_failure_summary.py +++ b/roles/openshift_health_checker/callback_plugins/zz_failure_summary.py @@ -10,6 +10,7 @@ import traceback from ansible.plugins.callback import CallbackBase from ansible import constants as C from ansible.utils.color import stringc +from ansible.module_utils.six import string_types FAILED_NO_MSG = u'Failed without returning a message.' @@ -140,11 +141,19 @@ def deduplicate_failures(failures): Returns a new list of failures such that identical failures from different hosts are grouped together in a single entry. The relative order of failures is preserved. + + If failures is unhashable, the original list of failures is returned. """ groups = defaultdict(list) for failure in failures: group_key = tuple(sorted((key, value) for key, value in failure.items() if key != 'host')) - groups[group_key].append(failure) + try: + groups[group_key].append(failure) + except TypeError: + # abort and return original list of failures when failures has an + # unhashable type. + return failures + result = [] for failure in failures: group_key = tuple(sorted((key, value) for key, value in failure.items() if key != 'host')) @@ -159,7 +168,10 @@ def format_failure(failure): """Return a list of pretty-formatted text entries describing a failure, including relevant information about it. Expect that the list of text entries will be joined by a newline separator when output to the user.""" - host = u', '.join(failure['host']) + if isinstance(failure['host'], string_types): + host = failure['host'] + else: + host = u', '.join(failure['host']) play = failure['play'] task = failure['task'] msg = failure['msg'] diff --git a/roles/openshift_health_checker/openshift_checks/__init__.py b/roles/openshift_health_checker/openshift_checks/__init__.py index 02ee1d0f9..28cb53cc5 100644 --- a/roles/openshift_health_checker/openshift_checks/__init__.py +++ b/roles/openshift_health_checker/openshift_checks/__init__.py @@ -2,8 +2,11 @@ Health checks for OpenShift clusters. """ +import json import operator import os +import time +import collections from abc import ABCMeta, abstractmethod, abstractproperty from importlib import import_module @@ -27,7 +30,7 @@ class OpenShiftCheckException(Exception): class OpenShiftCheckExceptionList(OpenShiftCheckException): - """A container for multiple logging errors that may be detected in one check.""" + """A container for multiple errors that may be detected in one check.""" def __init__(self, errors): self.errors = errors super(OpenShiftCheckExceptionList, self).__init__( @@ -40,26 +43,53 @@ class OpenShiftCheckExceptionList(OpenShiftCheckException): return self.errors[index] +FileToSave = collections.namedtuple("FileToSave", "filename contents remote_filename") + + +# pylint: disable=too-many-instance-attributes; all represent significantly different state. +# Arguably they could be separated into two hashes, one for storing parameters, and one for +# storing result state; but that smells more like clutter than clarity. @six.add_metaclass(ABCMeta) class OpenShiftCheck(object): - """ - A base class for defining checks for an OpenShift cluster environment. + """A base class for defining checks for an OpenShift cluster environment. - Expect optional params: method execute_module, dict task_vars, and string tmp. + Optional init params: method execute_module, dict task_vars, and string tmp execute_module is expected to have a signature compatible with _execute_module from ansible plugins/action/__init__.py, e.g.: def execute_module(module_name=None, module_args=None, tmp=None, task_vars=None, *args): This is stored so that it can be invoked in subclasses via check.execute_module("name", args) which provides the check's stored task_vars and tmp. + + Optional init param: want_full_results + If the check can gather logs, tarballs, etc., do so when True; but no need to spend + the time if they're not wanted (won't be written to output directory). """ - def __init__(self, execute_module=None, task_vars=None, tmp=None): + def __init__(self, execute_module=None, task_vars=None, tmp=None, want_full_results=False): + # store a method for executing ansible modules from the check self._execute_module = execute_module + # the task variables and tmpdir passed into the health checker task self.task_vars = task_vars or {} self.tmp = tmp + # a boolean for disabling the gathering of results (files, computations) that won't + # actually be recorded/used + self.want_full_results = want_full_results + + # mainly for testing purposes; see execute_module_with_retries + self._module_retries = 3 + self._module_retry_interval = 5 # seconds + # state to be recorded for inspection after the check runs: + # # set to True when the check changes the host, for accurate total "changed" count self.changed = False + # list of OpenShiftCheckException for check to report (alternative to returning a failed result) + self.failures = [] + # list of FileToSave - files the check specifies to be written locally if so configured + self.files_to_save = [] + # log messages for the check - tuples of (description, msg) where msg is serializable. + # These are intended to be a sequential record of what the check observed and determined. + self.logs = [] @abstractproperty def name(self): @@ -82,7 +112,13 @@ class OpenShiftCheck(object): @abstractmethod def run(self): - """Executes a check, normally implemented as a module.""" + """Executes a check against a host and returns a result hash similar to Ansible modules. + + Actually the direction ahead is to record state in the attributes and + not bother building a result hash. Instead, return an empty hash and let + the action plugin fill it in. Or raise an OpenShiftCheckException. + Returning a hash may become deprecated if it does not prove necessary. + """ return {} @classmethod @@ -94,7 +130,43 @@ class OpenShiftCheck(object): for subclass in subclass.subclasses(): yield subclass - def execute_module(self, module_name=None, module_args=None): + def register_failure(self, error): + """Record in the check that a failure occurred. + + Recorded failures are merged into the result hash for now. They are also saved to output directory + (if provided) <check>.failures.json and registered as a log entry for context <check>.log.json. + """ + # It should be an exception; make it one if not + if not isinstance(error, OpenShiftCheckException): + error = OpenShiftCheckException(str(error)) + self.failures.append(error) + # duplicate it in the logs so it can be seen in the context of any + # information that led to the failure + self.register_log("failure: " + error.name, str(error)) + + def register_log(self, context, msg): + """Record an entry for the check log. + + Notes are intended to serve as context of the whole sequence of what the check observed. + They are be saved as an ordered list in a local check log file. + They are not to included in the result or in the ansible log; it's just for the record. + """ + self.logs.append([context, msg]) + + def register_file(self, filename, contents=None, remote_filename=""): + """Record a file that a check makes available to be saved individually to output directory. + + Either file contents should be passed in, or a file to be copied from the remote host + should be specified. Contents that are not a string are to be serialized as JSON. + + NOTE: When copying a file from remote host, it is slurped into memory as base64, meaning + you should avoid using this on huge files (more than say 10M). + """ + if contents is None and not remote_filename: + raise OpenShiftCheckException("File data/source not specified; this is a bug in the check.") + self.files_to_save.append(FileToSave(filename, contents, remote_filename)) + + def execute_module(self, module_name=None, module_args=None, save_as_name=None, register=True): """Invoke an Ansible module from a check. Invoke stored _execute_module, normally copied from the action @@ -106,6 +178,12 @@ class OpenShiftCheck(object): Ansible version). So e.g. check.execute_module("foo", dict(arg1=...)) + + save_as_name specifies a file name for saving the result to an output directory, + if needed, and is intended to uniquely identify the result of invoking execute_module. + If not provided, the module name will be used. + If register is set False, then the result won't be registered in logs or files to save. + Return: result hash from module execution. """ if self._execute_module is None: @@ -113,7 +191,33 @@ class OpenShiftCheck(object): self.__class__.__name__ + " invoked execute_module without providing the method at initialization." ) - return self._execute_module(module_name, module_args, self.tmp, self.task_vars) + result = self._execute_module(module_name, module_args, self.tmp, self.task_vars) + if result.get("changed"): + self.changed = True + for output in ["result", "stdout"]: + # output is often JSON; attempt to decode + try: + result[output + "_json"] = json.loads(result[output]) + except (KeyError, ValueError): + pass + + if register: + self.register_log("execute_module: " + module_name, result) + self.register_file(save_as_name or module_name + ".json", result) + return result + + def execute_module_with_retries(self, module_name, module_args): + """Run execute_module and retry on failure.""" + result = {} + tries = 0 + while True: + res = self.execute_module(module_name, module_args) + if tries > self._module_retries or not res.get("failed"): + result.update(res) + return result + result["last_failed"] = res + tries += 1 + time.sleep(self._module_retry_interval) def get_var(self, *keys, **kwargs): """Get deeply nested values from task_vars. @@ -171,8 +275,12 @@ class OpenShiftCheck(object): 'There is a bug in this check. While trying to convert variable \n' ' "{var}={value}"\n' 'the given converter cannot be used or failed unexpectedly:\n' - '{error}'.format(var=".".join(keys), value=value, error=error) - ) + '{type}: {error}'.format( + var=".".join(keys), + value=value, + type=error.__class__.__name__, + error=error + )) @staticmethod def get_major_minor_version(openshift_image_tag): @@ -214,7 +322,9 @@ class OpenShiftCheck(object): mount_point = os.path.dirname(mount_point) try: - return mount_for_path[mount_point] + mount = mount_for_path[mount_point] + self.register_log("mount point for " + path, mount) + return mount except KeyError: known_mounts = ', '.join('"{}"'.format(mount) for mount in sorted(mount_for_path)) raise OpenShiftCheckException( @@ -242,7 +352,7 @@ def load_checks(path=None, subpkg=""): modules = modules + load_checks(os.path.join(path, name), subpkg + "." + name) continue - if name.endswith(".py") and not name.startswith(".") and name not in LOADER_EXCLUDES: + if name.endswith(".py") and name not in LOADER_EXCLUDES: modules.append(import_module(__package__ + subpkg + "." + name[:-3])) return modules diff --git a/roles/openshift_health_checker/openshift_checks/disk_availability.py b/roles/openshift_health_checker/openshift_checks/disk_availability.py index f302fd14b..cdf56e959 100644 --- a/roles/openshift_health_checker/openshift_checks/disk_availability.py +++ b/roles/openshift_health_checker/openshift_checks/disk_availability.py @@ -70,6 +70,10 @@ class DiskAvailability(OpenShiftCheck): # If it is not a number, then it should be a nested dict. pass + self.register_log("recommended thresholds", self.recommended_disk_space_bytes) + if user_config: + self.register_log("user-configured thresholds", user_config) + # TODO: as suggested in # https://github.com/openshift/openshift-ansible/pull/4436#discussion_r122180021, # maybe we could support checking disk availability in paths that are @@ -113,10 +117,7 @@ class DiskAvailability(OpenShiftCheck): 'in your Ansible inventory, and lower the recommended disk space availability\n' 'if necessary for this upgrade.').format(config_bytes) - return { - 'failed': True, - 'msg': msg, - } + self.register_failure(msg) return {} diff --git a/roles/openshift_health_checker/openshift_checks/docker_image_availability.py b/roles/openshift_health_checker/openshift_checks/docker_image_availability.py index 866c74d7c..9c35f0f92 100644 --- a/roles/openshift_health_checker/openshift_checks/docker_image_availability.py +++ b/roles/openshift_health_checker/openshift_checks/docker_image_availability.py @@ -32,7 +32,12 @@ class DockerImageAvailability(DockerHostMixin, OpenShiftCheck): # we use python-docker-py to check local docker for images, and skopeo # to look for images available remotely without waiting to pull them. dependencies = ["python-docker-py", "skopeo"] - skopeo_img_check_command = "timeout 10 skopeo inspect --tls-verify=false" + skopeo_img_check_command = "timeout 10 skopeo inspect --tls-verify=false docker://{registry}/{image}" + + def __init__(self, *args, **kwargs): + super(DockerImageAvailability, self).__init__(*args, **kwargs) + # record whether we could reach a registry or not (and remember results) + self.reachable_registries = {} def is_active(self): """Skip hosts with unsupported deployment types.""" @@ -64,15 +69,21 @@ class DockerImageAvailability(DockerHostMixin, OpenShiftCheck): unavailable_images = set(missing_images) - set(available_images) if unavailable_images: - return { - "failed": True, - "msg": ( - "One or more required Docker images are not available:\n {}\n" - "Configured registries: {}\n" - "Checked by: {}" - ).format(",\n ".join(sorted(unavailable_images)), ", ".join(registries), - self.skopeo_img_check_command), - } + registries = [ + reg if self.reachable_registries.get(reg, True) else reg + " (unreachable)" + for reg in registries + ] + msg = ( + "One or more required Docker images are not available:\n {}\n" + "Configured registries: {}\n" + "Checked by: {}" + ).format( + ",\n ".join(sorted(unavailable_images)), + ", ".join(registries), + self.skopeo_img_check_command + ) + + return dict(failed=True, msg=msg) return {} @@ -128,31 +139,31 @@ class DockerImageAvailability(DockerHostMixin, OpenShiftCheck): def local_images(self, images): """Filter a list of images and return those available locally.""" - return [ - image for image in images - if self.is_image_local(image) - ] + registries = self.known_docker_registries() + found_images = [] + for image in images: + # docker could have the image name as-is or prefixed with any registry + imglist = [image] + [reg + "/" + image for reg in registries] + if self.is_image_local(imglist): + found_images.append(image) + return found_images def is_image_local(self, image): """Check if image is already in local docker index.""" result = self.execute_module("docker_image_facts", {"name": image}) - if result.get("failed", False): - return False - - return bool(result.get("images", [])) + return bool(result.get("images")) and not result.get("failed") def known_docker_registries(self): """Build a list of docker registries available according to inventory vars.""" - docker_facts = self.get_var("openshift", "docker") - regs = set(docker_facts["additional_registries"]) + regs = list(self.get_var("openshift.docker.additional_registries", default=[])) deployment_type = self.get_var("openshift_deployment_type") - if deployment_type == "origin": - regs.update(["docker.io"]) - elif "enterprise" in deployment_type: - regs.update(["registry.access.redhat.com"]) + if deployment_type == "origin" and "docker.io" not in regs: + regs.append("docker.io") + elif "enterprise" in deployment_type and "registry.access.redhat.com" not in regs: + regs.append("registry.access.redhat.com") - return list(regs) + return regs def available_images(self, images, default_registries): """Search remotely for images. Returns: list of images found.""" @@ -165,17 +176,35 @@ class DockerImageAvailability(DockerHostMixin, OpenShiftCheck): """Use Skopeo to determine if required image exists in known registry(s).""" registries = default_registries - # if image already includes a registry, only use that + # If image already includes a registry, only use that. + # NOTE: This logic would incorrectly identify images that do not use a namespace, e.g. + # registry.access.redhat.com/rhel7 as if the registry were a namespace. + # It's not clear that there's any way to distinguish them, but fortunately + # the current set of images all look like [registry/]namespace/name[:version]. if image.count("/") > 1: registry, image = image.split("/", 1) registries = [registry] for registry in registries: - args = { - "_raw_params": self.skopeo_img_check_command + " docker://{}/{}".format(registry, image) - } - result = self.execute_module("command", args) + if registry not in self.reachable_registries: + self.reachable_registries[registry] = self.connect_to_registry(registry) + if not self.reachable_registries[registry]: + continue + + args = {"_raw_params": self.skopeo_img_check_command.format(registry=registry, image=image)} + result = self.execute_module_with_retries("command", args) if result.get("rc", 0) == 0 and not result.get("failed"): return True + if result.get("rc") == 124: # RC 124 == timed out; mark unreachable + self.reachable_registries[registry] = False return False + + def connect_to_registry(self, registry): + """Use ansible wait_for module to test connectivity from host to registry. Returns bool.""" + # test a simple TCP connection + host, _, port = registry.partition(":") + port = port or 443 + args = dict(host=host, port=port, state="started", timeout=30) + result = self.execute_module("wait_for", args) + return result.get("rc", 0) == 0 and not result.get("failed") diff --git a/roles/openshift_health_checker/openshift_checks/logging/elasticsearch.py b/roles/openshift_health_checker/openshift_checks/logging/elasticsearch.py index 7fc843fd7..986a01f38 100644 --- a/roles/openshift_health_checker/openshift_checks/logging/elasticsearch.py +++ b/roles/openshift_health_checker/openshift_checks/logging/elasticsearch.py @@ -72,7 +72,7 @@ class Elasticsearch(LoggingCheck): for pod_name in pods_by_name.keys(): # Compare what each ES node reports as master and compare for split brain get_master_cmd = self._build_es_curl_cmd(pod_name, "https://localhost:9200/_cat/master") - master_name_str = self.exec_oc(get_master_cmd, []) + master_name_str = self.exec_oc(get_master_cmd, [], save_as_name="get_master_names.json") master_names = (master_name_str or '').split(' ') if len(master_names) > 1: es_master_names.add(master_names[1]) @@ -113,7 +113,7 @@ class Elasticsearch(LoggingCheck): # get ES cluster nodes node_cmd = self._build_es_curl_cmd(list(pods_by_name.keys())[0], 'https://localhost:9200/_nodes') - cluster_node_data = self.exec_oc(node_cmd, []) + cluster_node_data = self.exec_oc(node_cmd, [], save_as_name="get_es_nodes.json") try: cluster_nodes = json.loads(cluster_node_data)['nodes'] except (ValueError, KeyError): @@ -142,7 +142,7 @@ class Elasticsearch(LoggingCheck): errors = [] for pod_name in pods_by_name.keys(): cluster_health_cmd = self._build_es_curl_cmd(pod_name, 'https://localhost:9200/_cluster/health?pretty=true') - cluster_health_data = self.exec_oc(cluster_health_cmd, []) + cluster_health_data = self.exec_oc(cluster_health_cmd, [], save_as_name='get_es_health.json') try: health_res = json.loads(cluster_health_data) if not health_res or not health_res.get('status'): @@ -171,7 +171,7 @@ class Elasticsearch(LoggingCheck): errors = [] for pod_name in pods_by_name.keys(): df_cmd = 'exec {} -- df --output=ipcent,pcent /elasticsearch/persistent'.format(pod_name) - disk_output = self.exec_oc(df_cmd, []) + disk_output = self.exec_oc(df_cmd, [], save_as_name='get_pv_diskspace.json') lines = disk_output.splitlines() # expecting one header looking like 'IUse% Use%' and one body line body_re = r'\s*(\d+)%?\s+(\d+)%?\s*$' diff --git a/roles/openshift_health_checker/openshift_checks/logging/logging.py b/roles/openshift_health_checker/openshift_checks/logging/logging.py index ecd8adb64..06bdfebf6 100644 --- a/roles/openshift_health_checker/openshift_checks/logging/logging.py +++ b/roles/openshift_health_checker/openshift_checks/logging/logging.py @@ -78,7 +78,7 @@ class LoggingCheck(OpenShiftCheck): """Returns the namespace in which logging is configured to deploy.""" return self.get_var("openshift_logging_namespace", default="logging") - def exec_oc(self, cmd_str="", extra_args=None): + def exec_oc(self, cmd_str="", extra_args=None, save_as_name=None): """ Execute an 'oc' command in the remote host. Returns: output of command and namespace, @@ -92,7 +92,7 @@ class LoggingCheck(OpenShiftCheck): "extra_args": list(extra_args) if extra_args else [], } - result = self.execute_module("ocutil", args) + result = self.execute_module("ocutil", args, save_as_name=save_as_name) if result.get("failed"): if result['result'] == '[Errno 2] No such file or directory': raise CouldNotUseOc( diff --git a/roles/openshift_health_checker/openshift_checks/logging/logging_index_time.py b/roles/openshift_health_checker/openshift_checks/logging/logging_index_time.py index d781db649..cacdf4213 100644 --- a/roles/openshift_health_checker/openshift_checks/logging/logging_index_time.py +++ b/roles/openshift_health_checker/openshift_checks/logging/logging_index_time.py @@ -104,7 +104,7 @@ class LoggingIndexTime(LoggingCheck): "https://logging-es:9200/project.{namespace}*/_count?q=message:{uuid}" ) exec_cmd = exec_cmd.format(pod_name=pod_name, namespace=self.logging_namespace(), uuid=uuid) - result = self.exec_oc(exec_cmd, []) + result = self.exec_oc(exec_cmd, [], save_as_name="query_for_uuid.json") try: count = json.loads(result)["count"] diff --git a/roles/openshift_health_checker/openshift_checks/mixins.py b/roles/openshift_health_checker/openshift_checks/mixins.py index e9bae60a3..b90ebf6dd 100644 --- a/roles/openshift_health_checker/openshift_checks/mixins.py +++ b/roles/openshift_health_checker/openshift_checks/mixins.py @@ -36,7 +36,7 @@ class DockerHostMixin(object): # NOTE: we would use the "package" module but it's actually an action plugin # and it's not clear how to invoke one of those. This is about the same anyway: - result = self.execute_module( + result = self.execute_module_with_retries( self.get_var("ansible_pkg_mgr", default="yum"), {"name": self.dependencies, "state": "present"}, ) @@ -49,5 +49,4 @@ class DockerHostMixin(object): " {deps}\n{msg}" ).format(deps=',\n '.join(self.dependencies), msg=msg) failed = result.get("failed", False) or result.get("rc", 0) != 0 - self.changed = result.get("changed", False) return msg, failed diff --git a/roles/openshift_health_checker/openshift_checks/package_availability.py b/roles/openshift_health_checker/openshift_checks/package_availability.py index a86180b00..21355c2f0 100644 --- a/roles/openshift_health_checker/openshift_checks/package_availability.py +++ b/roles/openshift_health_checker/openshift_checks/package_availability.py @@ -26,7 +26,7 @@ class PackageAvailability(NotContainerizedMixin, OpenShiftCheck): packages.update(self.node_packages(rpm_prefix)) args = {"packages": sorted(set(packages))} - return self.execute_module("check_yum_update", args) + return self.execute_module_with_retries("check_yum_update", args) @staticmethod def master_packages(rpm_prefix): diff --git a/roles/openshift_health_checker/openshift_checks/package_update.py b/roles/openshift_health_checker/openshift_checks/package_update.py index 1e9aecbe0..8464e8a5e 100644 --- a/roles/openshift_health_checker/openshift_checks/package_update.py +++ b/roles/openshift_health_checker/openshift_checks/package_update.py @@ -11,4 +11,4 @@ class PackageUpdate(NotContainerizedMixin, OpenShiftCheck): def run(self): args = {"packages": []} - return self.execute_module("check_yum_update", args) + return self.execute_module_with_retries("check_yum_update", args) diff --git a/roles/openshift_health_checker/openshift_checks/package_version.py b/roles/openshift_health_checker/openshift_checks/package_version.py index 0b795b6c4..d4aec3ed8 100644 --- a/roles/openshift_health_checker/openshift_checks/package_version.py +++ b/roles/openshift_health_checker/openshift_checks/package_version.py @@ -76,7 +76,7 @@ class PackageVersion(NotContainerizedMixin, OpenShiftCheck): ], } - return self.execute_module("aos_version", args) + return self.execute_module_with_retries("aos_version", args) def get_required_ovs_version(self): """Return the correct Open vSwitch version(s) for the current OpenShift version.""" diff --git a/roles/openshift_health_checker/test/action_plugin_test.py b/roles/openshift_health_checker/test/action_plugin_test.py index c109ebd24..f14887303 100644 --- a/roles/openshift_health_checker/test/action_plugin_test.py +++ b/roles/openshift_health_checker/test/action_plugin_test.py @@ -3,10 +3,12 @@ import pytest from ansible.playbook.play_context import PlayContext from openshift_health_check import ActionModule, resolve_checks -from openshift_checks import OpenShiftCheckException +from openshift_health_check import copy_remote_file_to_dir, write_result_to_output_dir, write_to_output_file +from openshift_checks import OpenShiftCheckException, FileToSave -def fake_check(name='fake_check', tags=None, is_active=True, run_return=None, run_exception=None, changed=False): +def fake_check(name='fake_check', tags=None, is_active=True, run_return=None, run_exception=None, + run_logs=None, run_files=None, changed=False, get_var_return=None): """Returns a new class that is compatible with OpenShiftCheck for testing.""" _name, _tags = name, tags @@ -14,12 +16,16 @@ def fake_check(name='fake_check', tags=None, is_active=True, run_return=None, ru class FakeCheck(object): name = _name tags = _tags or [] - changed = False - def __init__(self, execute_module=None, task_vars=None, tmp=None): - pass + def __init__(self, **_): + self.changed = False + self.failures = [] + self.logs = run_logs or [] + self.files_to_save = run_files or [] def is_active(self): + if isinstance(is_active, Exception): + raise is_active return is_active def run(self): @@ -28,6 +34,13 @@ def fake_check(name='fake_check', tags=None, is_active=True, run_return=None, ru raise run_exception return run_return + def get_var(*args, **_): + return get_var_return + + def register_failure(self, exc): + self.failures.append(OpenShiftCheckException(str(exc))) + return + return FakeCheck @@ -98,23 +111,33 @@ def test_action_plugin_cannot_load_checks_with_the_same_name(plugin, task_vars, assert failed(result, msg_has=['duplicate', 'duplicate_name', 'FakeCheck']) -def test_action_plugin_skip_non_active_checks(plugin, task_vars, monkeypatch): - checks = [fake_check(is_active=False)] +@pytest.mark.parametrize('is_active, skipped_reason', [ + (False, "Not active for this host"), + (Exception("borked"), "exception"), +]) +def test_action_plugin_skip_non_active_checks(is_active, skipped_reason, plugin, task_vars, monkeypatch): + checks = [fake_check(is_active=is_active)] monkeypatch.setattr('openshift_checks.OpenShiftCheck.subclasses', classmethod(lambda cls: checks)) result = plugin.run(tmp=None, task_vars=task_vars) - assert result['checks']['fake_check'] == dict(skipped=True, skipped_reason="Not active for this host") + assert result['checks']['fake_check'].get('skipped') + assert skipped_reason in result['checks']['fake_check'].get('skipped_reason') assert not failed(result) assert not changed(result) assert not skipped(result) -def test_action_plugin_skip_disabled_checks(plugin, task_vars, monkeypatch): +@pytest.mark.parametrize('to_disable', [ + 'fake_check', + ['fake_check', 'spam'], + '*,spam,eggs', +]) +def test_action_plugin_skip_disabled_checks(to_disable, plugin, task_vars, monkeypatch): checks = [fake_check('fake_check', is_active=True)] monkeypatch.setattr('openshift_checks.OpenShiftCheck.subclasses', classmethod(lambda cls: checks)) - task_vars['openshift_disable_check'] = 'fake_check' + task_vars['openshift_disable_check'] = to_disable result = plugin.run(tmp=None, task_vars=task_vars) assert result['checks']['fake_check'] == dict(skipped=True, skipped_reason="Disabled by user request") @@ -123,10 +146,21 @@ def test_action_plugin_skip_disabled_checks(plugin, task_vars, monkeypatch): assert not skipped(result) +def test_action_plugin_run_list_checks(monkeypatch): + task = FakeTask('openshift_health_check', {'checks': []}) + plugin = ActionModule(task, None, PlayContext(), None, None, None) + monkeypatch.setattr(plugin, 'load_known_checks', lambda *_: {}) + result = plugin.run() + + assert failed(result, msg_has="Available checks") + assert not changed(result) + assert not skipped(result) + + def test_action_plugin_run_check_ok(plugin, task_vars, monkeypatch): check_return_value = {'ok': 'test'} - check_class = fake_check(run_return=check_return_value) - monkeypatch.setattr(plugin, 'load_known_checks', lambda tmp, task_vars: {'fake_check': check_class()}) + check_class = fake_check(run_return=check_return_value, run_files=[None]) + monkeypatch.setattr(plugin, 'load_known_checks', lambda *_: {'fake_check': check_class()}) monkeypatch.setattr('openshift_health_check.resolve_checks', lambda *args: ['fake_check']) result = plugin.run(tmp=None, task_vars=task_vars) @@ -140,7 +174,7 @@ def test_action_plugin_run_check_ok(plugin, task_vars, monkeypatch): def test_action_plugin_run_check_changed(plugin, task_vars, monkeypatch): check_return_value = {'ok': 'test'} check_class = fake_check(run_return=check_return_value, changed=True) - monkeypatch.setattr(plugin, 'load_known_checks', lambda tmp, task_vars: {'fake_check': check_class()}) + monkeypatch.setattr(plugin, 'load_known_checks', lambda *_: {'fake_check': check_class()}) monkeypatch.setattr('openshift_health_check.resolve_checks', lambda *args: ['fake_check']) result = plugin.run(tmp=None, task_vars=task_vars) @@ -153,9 +187,9 @@ def test_action_plugin_run_check_changed(plugin, task_vars, monkeypatch): def test_action_plugin_run_check_fail(plugin, task_vars, monkeypatch): - check_return_value = {'failed': True} + check_return_value = {'failed': True, 'msg': 'this is a failure'} check_class = fake_check(run_return=check_return_value) - monkeypatch.setattr(plugin, 'load_known_checks', lambda tmp, task_vars: {'fake_check': check_class()}) + monkeypatch.setattr(plugin, 'load_known_checks', lambda *_: {'fake_check': check_class()}) monkeypatch.setattr('openshift_health_check.resolve_checks', lambda *args: ['fake_check']) result = plugin.run(tmp=None, task_vars=task_vars) @@ -166,24 +200,51 @@ def test_action_plugin_run_check_fail(plugin, task_vars, monkeypatch): assert not skipped(result) -def test_action_plugin_run_check_exception(plugin, task_vars, monkeypatch): +@pytest.mark.parametrize('exc_class, expect_traceback', [ + (OpenShiftCheckException, False), + (Exception, True), +]) +def test_action_plugin_run_check_exception(plugin, task_vars, exc_class, expect_traceback, monkeypatch): exception_msg = 'fake check has an exception' - run_exception = OpenShiftCheckException(exception_msg) + run_exception = exc_class(exception_msg) check_class = fake_check(run_exception=run_exception, changed=True) - monkeypatch.setattr(plugin, 'load_known_checks', lambda tmp, task_vars: {'fake_check': check_class()}) + monkeypatch.setattr(plugin, 'load_known_checks', lambda *_: {'fake_check': check_class()}) monkeypatch.setattr('openshift_health_check.resolve_checks', lambda *args: ['fake_check']) result = plugin.run(tmp=None, task_vars=task_vars) assert failed(result['checks']['fake_check'], msg_has=exception_msg) + assert expect_traceback == ("Traceback" in result['checks']['fake_check']['msg']) assert failed(result, msg_has=['failed']) assert changed(result['checks']['fake_check']) assert changed(result) assert not skipped(result) +def test_action_plugin_run_check_output_dir(plugin, task_vars, tmpdir, monkeypatch): + check_class = fake_check( + run_return={}, + run_logs=[('thing', 'note')], + run_files=[ + FileToSave('save.file', 'contents', None), + FileToSave('save.file', 'duplicate', None), + FileToSave('copy.file', None, 'foo'), # note: copy runs execute_module => exception + ], + ) + task_vars['openshift_checks_output_dir'] = str(tmpdir) + check_class.get_var = lambda self, name, **_: task_vars.get(name) + monkeypatch.setattr(plugin, 'load_known_checks', lambda *_: {'fake_check': check_class()}) + monkeypatch.setattr('openshift_health_check.resolve_checks', lambda *args: ['fake_check']) + + plugin.run(tmp=None, task_vars=task_vars) + assert any(path.basename == task_vars['ansible_host'] for path in tmpdir.listdir()) + assert any(path.basename == 'fake_check.log.json' for path in tmpdir.visit()) + assert any(path.basename == 'save.file' for path in tmpdir.visit()) + assert any(path.basename == 'save.file.2' for path in tmpdir.visit()) + + def test_action_plugin_resolve_checks_exception(plugin, task_vars, monkeypatch): - monkeypatch.setattr(plugin, 'load_known_checks', lambda tmp, task_vars: {}) + monkeypatch.setattr(plugin, 'load_known_checks', lambda *_: {}) result = plugin.run(tmp=None, task_vars=task_vars) @@ -249,3 +310,38 @@ def test_resolve_checks_failure(names, all_checks, words_in_exception): resolve_checks(names, all_checks) for word in words_in_exception: assert word in str(excinfo.value) + + +@pytest.mark.parametrize('give_output_dir, result, expect_file', [ + (False, None, False), + (True, dict(content="c3BhbQo=", encoding="base64"), True), + (True, dict(content="encoding error", encoding="base64"), False), + (True, dict(content="spam", no_encoding=None), True), + (True, dict(failed=True, msg="could not slurp"), False), +]) +def test_copy_remote_file_to_dir(give_output_dir, result, expect_file, tmpdir): + check = fake_check()() + check.execute_module = lambda *args, **_: result + copy_remote_file_to_dir(check, "remote_file", str(tmpdir) if give_output_dir else "", "local_file") + assert expect_file == any(path.basename == "local_file" for path in tmpdir.listdir()) + + +def test_write_to_output_exceptions(tmpdir, monkeypatch, capsys): + + class Spam(object): + def __str__(self): + raise Exception("break str") + + test = {1: object(), 2: Spam()} + test[3] = test + write_result_to_output_dir(str(tmpdir), test) + assert "Error writing" in test["output_files"] + + output_dir = tmpdir.join("eggs") + output_dir.write("spam") # so now it's not a dir + write_to_output_file(str(output_dir), "somefile", "somedata") + assert "Could not write" in capsys.readouterr()[1] + + monkeypatch.setattr("openshift_health_check.prepare_output_dir", lambda *_: False) + write_result_to_output_dir(str(tmpdir), test) + assert "Error creating" in test["output_files"] diff --git a/roles/openshift_health_checker/test/disk_availability_test.py b/roles/openshift_health_checker/test/disk_availability_test.py index f4fd2dfed..9ae679b79 100644 --- a/roles/openshift_health_checker/test/disk_availability_test.py +++ b/roles/openshift_health_checker/test/disk_availability_test.py @@ -183,11 +183,12 @@ def test_fails_with_insufficient_disk_space(name, group_names, configured_min, a ansible_mounts=ansible_mounts, ) - result = DiskAvailability(fake_execute_module, task_vars).run() + check = DiskAvailability(fake_execute_module, task_vars) + check.run() - assert result['failed'] + assert check.failures for chunk in 'below recommended'.split() + expect_chunks: - assert chunk in result.get('msg', '') + assert chunk in str(check.failures[0]) @pytest.mark.parametrize('name,group_names,context,ansible_mounts,failed,extra_words', [ @@ -237,11 +238,11 @@ def test_min_required_space_changes_with_upgrade_context(name, group_names, cont ) check = DiskAvailability(fake_execute_module, task_vars) - result = check.run() + check.run() - assert result.get("failed", False) == failed + assert bool(check.failures) == failed for word in extra_words: - assert word in result.get('msg', '') + assert word in str(check.failures[0]) def fake_execute_module(*args): diff --git a/roles/openshift_health_checker/test/docker_image_availability_test.py b/roles/openshift_health_checker/test/docker_image_availability_test.py index 8d0a53df9..6a7c16c7e 100644 --- a/roles/openshift_health_checker/test/docker_image_availability_test.py +++ b/roles/openshift_health_checker/test/docker_image_availability_test.py @@ -3,6 +3,23 @@ import pytest from openshift_checks.docker_image_availability import DockerImageAvailability +@pytest.fixture() +def task_vars(): + return dict( + openshift=dict( + common=dict( + service_type='origin', + is_containerized=False, + is_atomic=False, + ), + docker=dict(), + ), + openshift_deployment_type='origin', + openshift_image_tag='', + group_names=['nodes', 'masters'], + ) + + @pytest.mark.parametrize('deployment_type, is_containerized, group_names, expect_active', [ ("origin", True, [], True), ("openshift-enterprise", True, [], True), @@ -15,12 +32,10 @@ from openshift_checks.docker_image_availability import DockerImageAvailability ("origin", False, ["nodes", "masters"], True), ("openshift-enterprise", False, ["etcd"], False), ]) -def test_is_active(deployment_type, is_containerized, group_names, expect_active): - task_vars = dict( - openshift=dict(common=dict(is_containerized=is_containerized)), - openshift_deployment_type=deployment_type, - group_names=group_names, - ) +def test_is_active(task_vars, deployment_type, is_containerized, group_names, expect_active): + task_vars['openshift_deployment_type'] = deployment_type + task_vars['openshift']['common']['is_containerized'] = is_containerized + task_vars['group_names'] = group_names assert DockerImageAvailability(None, task_vars).is_active() == expect_active @@ -30,10 +45,10 @@ def test_is_active(deployment_type, is_containerized, group_names, expect_active (True, False), (False, True), ]) -def test_all_images_available_locally(is_containerized, is_atomic): +def test_all_images_available_locally(task_vars, is_containerized, is_atomic): def execute_module(module_name, module_args, *_): if module_name == "yum": - return {"changed": True} + return {} assert module_name == "docker_image_facts" assert 'name' in module_args @@ -42,19 +57,9 @@ def test_all_images_available_locally(is_containerized, is_atomic): 'images': [module_args['name']], } - result = DockerImageAvailability(execute_module, task_vars=dict( - openshift=dict( - common=dict( - service_type='origin', - is_containerized=is_containerized, - is_atomic=is_atomic, - ), - docker=dict(additional_registries=["docker.io"]), - ), - openshift_deployment_type='origin', - openshift_image_tag='3.4', - group_names=['nodes', 'masters'], - )).run() + task_vars['openshift']['common']['is_containerized'] = is_containerized + task_vars['openshift']['common']['is_atomic'] = is_atomic + result = DockerImageAvailability(execute_module, task_vars).run() assert not result.get('failed', False) @@ -63,53 +68,36 @@ def test_all_images_available_locally(is_containerized, is_atomic): False, True, ]) -def test_all_images_available_remotely(available_locally): +def test_all_images_available_remotely(task_vars, available_locally): def execute_module(module_name, *_): if module_name == 'docker_image_facts': return {'images': [], 'failed': available_locally} - return {'changed': False} + return {} - result = DockerImageAvailability(execute_module, task_vars=dict( - openshift=dict( - common=dict( - service_type='origin', - is_containerized=False, - is_atomic=False, - ), - docker=dict(additional_registries=["docker.io", "registry.access.redhat.com"]), - ), - openshift_deployment_type='origin', - openshift_image_tag='v3.4', - group_names=['nodes', 'masters'], - )).run() + task_vars['openshift']['docker']['additional_registries'] = ["docker.io", "registry.access.redhat.com"] + task_vars['openshift_image_tag'] = 'v3.4' + check = DockerImageAvailability(execute_module, task_vars) + check._module_retry_interval = 0 + result = check.run() assert not result.get('failed', False) -def test_all_images_unavailable(): - def execute_module(module_name=None, *_): - if module_name == "command": - return { - 'failed': True, - } +def test_all_images_unavailable(task_vars): + def execute_module(module_name=None, *args): + if module_name == "wait_for": + return {} + elif module_name == "command": + return {'failed': True} - return { - 'changed': False, - } + return {} # docker_image_facts failure - actual = DockerImageAvailability(execute_module, task_vars=dict( - openshift=dict( - common=dict( - service_type='origin', - is_containerized=False, - is_atomic=False, - ), - docker=dict(additional_registries=["docker.io"]), - ), - openshift_deployment_type="openshift-enterprise", - openshift_image_tag='latest', - group_names=['nodes', 'masters'], - )).run() + task_vars['openshift']['docker']['additional_registries'] = ["docker.io"] + task_vars['openshift_deployment_type'] = "openshift-enterprise" + task_vars['openshift_image_tag'] = 'latest' + check = DockerImageAvailability(execute_module, task_vars) + check._module_retry_interval = 0 + actual = check.run() assert actual['failed'] assert "required Docker images are not available" in actual['msg'] @@ -125,62 +113,63 @@ def test_all_images_unavailable(): ["dependencies can be installed via `yum`"] ), ]) -def test_skopeo_update_failure(message, extra_words): +def test_skopeo_update_failure(task_vars, message, extra_words): def execute_module(module_name=None, *_): if module_name == "yum": return { "failed": True, "msg": message, - "changed": False, } - return {'changed': False} + return {} - actual = DockerImageAvailability(execute_module, task_vars=dict( - openshift=dict( - common=dict( - service_type='origin', - is_containerized=False, - is_atomic=False, - ), - docker=dict(additional_registries=["unknown.io"]), - ), - openshift_deployment_type="openshift-enterprise", - openshift_image_tag='', - group_names=['nodes', 'masters'], - )).run() + task_vars['openshift']['docker']['additional_registries'] = ["unknown.io"] + task_vars['openshift_deployment_type'] = "openshift-enterprise" + check = DockerImageAvailability(execute_module, task_vars) + check._module_retry_interval = 0 + actual = check.run() assert actual["failed"] for word in extra_words: assert word in actual["msg"] -@pytest.mark.parametrize("deployment_type,registries", [ - ("origin", ["unknown.io"]), - ("openshift-enterprise", ["registry.access.redhat.com"]), - ("openshift-enterprise", []), -]) -def test_registry_availability(deployment_type, registries): +@pytest.mark.parametrize( + "image, registries, connection_test_failed, skopeo_failed, " + "expect_success, expect_registries_reached", [ + ( + "spam/eggs:v1", ["test.reg"], + True, True, + False, + {"test.reg": False}, + ), + ( + "spam/eggs:v1", ["test.reg"], + False, True, + False, + {"test.reg": True}, + ), + ( + "eggs.reg/spam/eggs:v1", ["test.reg"], + False, False, + True, + {"eggs.reg": True}, + ), + ]) +def test_registry_availability(image, registries, connection_test_failed, skopeo_failed, + expect_success, expect_registries_reached): def execute_module(module_name=None, *_): - return { - 'changed': False, - } + if module_name == "wait_for": + return dict(msg="msg", failed=connection_test_failed) + elif module_name == "command": + return dict(msg="msg", failed=skopeo_failed) - actual = DockerImageAvailability(execute_module, task_vars=dict( - openshift=dict( - common=dict( - service_type='origin', - is_containerized=False, - is_atomic=False, - ), - docker=dict(additional_registries=registries), - ), - openshift_deployment_type=deployment_type, - openshift_image_tag='', - group_names=['nodes', 'masters'], - )).run() + check = DockerImageAvailability(execute_module, task_vars()) + check._module_retry_interval = 0 - assert not actual.get("failed", False) + available = check.is_available_skopeo_image(image, registries) + assert available == expect_success + assert expect_registries_reached == check.reachable_registries @pytest.mark.parametrize("deployment_type, is_containerized, groups, oreg_url, expected", [ @@ -257,7 +246,7 @@ def test_required_images(deployment_type, is_containerized, groups, oreg_url, ex openshift_image_tag='vtest', ) - assert expected == DockerImageAvailability("DUMMY", task_vars).required_images() + assert expected == DockerImageAvailability(task_vars=task_vars).required_images() def test_containerized_etcd(): @@ -271,4 +260,4 @@ def test_containerized_etcd(): group_names=['etcd'], ) expected = set(['registry.access.redhat.com/rhel7/etcd']) - assert expected == DockerImageAvailability("DUMMY", task_vars).required_images() + assert expected == DockerImageAvailability(task_vars=task_vars).required_images() diff --git a/roles/openshift_health_checker/test/elasticsearch_test.py b/roles/openshift_health_checker/test/elasticsearch_test.py index 09bacd9ac..3fa5e8929 100644 --- a/roles/openshift_health_checker/test/elasticsearch_test.py +++ b/roles/openshift_health_checker/test/elasticsearch_test.py @@ -72,7 +72,7 @@ def test_check_elasticsearch(): assert_error_in_list('NoRunningPods', excinfo.value) # canned oc responses to match so all the checks pass - def exec_oc(cmd, args): + def exec_oc(cmd, args, **_): if '_cat/master' in cmd: return 'name logging-es' elif '/_nodes' in cmd: @@ -97,7 +97,7 @@ def test_check_running_es_pods(): def test_check_elasticsearch_masters(): pods = [plain_es_pod] - check = canned_elasticsearch(task_vars_config_base, lambda *_: plain_es_pod['_test_master_name_str']) + check = canned_elasticsearch(task_vars_config_base, lambda *args, **_: plain_es_pod['_test_master_name_str']) assert not check.check_elasticsearch_masters(pods_by_name(pods)) @@ -117,7 +117,7 @@ def test_check_elasticsearch_masters(): ]) def test_check_elasticsearch_masters_error(pods, expect_error): test_pods = list(pods) - check = canned_elasticsearch(task_vars_config_base, lambda *_: test_pods.pop(0)['_test_master_name_str']) + check = canned_elasticsearch(task_vars_config_base, lambda *args, **_: test_pods.pop(0)['_test_master_name_str']) assert_error_in_list(expect_error, check.check_elasticsearch_masters(pods_by_name(pods))) @@ -129,7 +129,7 @@ es_node_list = { def test_check_elasticsearch_node_list(): - check = canned_elasticsearch(task_vars_config_base, lambda *_: json.dumps(es_node_list)) + check = canned_elasticsearch(task_vars_config_base, lambda *args, **_: json.dumps(es_node_list)) assert not check.check_elasticsearch_node_list(pods_by_name([plain_es_pod])) @@ -151,13 +151,13 @@ def test_check_elasticsearch_node_list(): ), ]) def test_check_elasticsearch_node_list_errors(pods, node_list, expect_error): - check = canned_elasticsearch(task_vars_config_base, lambda cmd, args: json.dumps(node_list)) + check = canned_elasticsearch(task_vars_config_base, lambda cmd, args, **_: json.dumps(node_list)) assert_error_in_list(expect_error, check.check_elasticsearch_node_list(pods_by_name(pods))) def test_check_elasticsearch_cluster_health(): test_health_data = [{"status": "green"}] - check = canned_elasticsearch(exec_oc=lambda *_: json.dumps(test_health_data.pop(0))) + check = canned_elasticsearch(exec_oc=lambda *args, **_: json.dumps(test_health_data.pop(0))) assert not check.check_es_cluster_health(pods_by_name([plain_es_pod])) @@ -175,12 +175,12 @@ def test_check_elasticsearch_cluster_health(): ]) def test_check_elasticsearch_cluster_health_errors(pods, health_data, expect_error): test_health_data = list(health_data) - check = canned_elasticsearch(exec_oc=lambda *_: json.dumps(test_health_data.pop(0))) + check = canned_elasticsearch(exec_oc=lambda *args, **_: json.dumps(test_health_data.pop(0))) assert_error_in_list(expect_error, check.check_es_cluster_health(pods_by_name(pods))) def test_check_elasticsearch_diskspace(): - check = canned_elasticsearch(exec_oc=lambda *_: 'IUse% Use%\n 3% 4%\n') + check = canned_elasticsearch(exec_oc=lambda *args, **_: 'IUse% Use%\n 3% 4%\n') assert not check.check_elasticsearch_diskspace(pods_by_name([plain_es_pod])) @@ -199,5 +199,5 @@ def test_check_elasticsearch_diskspace(): ), ]) def test_check_elasticsearch_diskspace_errors(disk_data, expect_error): - check = canned_elasticsearch(exec_oc=lambda *_: disk_data) + check = canned_elasticsearch(exec_oc=lambda *args, **_: disk_data) assert_error_in_list(expect_error, check.check_elasticsearch_diskspace(pods_by_name([plain_es_pod]))) diff --git a/roles/openshift_health_checker/test/logging_index_time_test.py b/roles/openshift_health_checker/test/logging_index_time_test.py index 22566b295..c48ade9b8 100644 --- a/roles/openshift_health_checker/test/logging_index_time_test.py +++ b/roles/openshift_health_checker/test/logging_index_time_test.py @@ -102,7 +102,7 @@ def test_with_running_pods(): ), ], ids=lambda argval: argval[0]) def test_wait_until_cmd_or_err_succeeds(name, json_response, uuid, timeout): - check = canned_loggingindextime(lambda *_: json.dumps(json_response)) + check = canned_loggingindextime(lambda *args, **_: json.dumps(json_response)) check.wait_until_cmd_or_err(plain_running_elasticsearch_pod, uuid, timeout) @@ -131,7 +131,7 @@ def test_wait_until_cmd_or_err_succeeds(name, json_response, uuid, timeout): ) ], ids=lambda argval: argval[0]) def test_wait_until_cmd_or_err(name, json_response, timeout, expect_error): - check = canned_loggingindextime(lambda *_: json.dumps(json_response)) + check = canned_loggingindextime(lambda *args, **_: json.dumps(json_response)) with pytest.raises(OpenShiftCheckException) as error: check.wait_until_cmd_or_err(plain_running_elasticsearch_pod, SAMPLE_UUID, timeout) @@ -139,7 +139,7 @@ def test_wait_until_cmd_or_err(name, json_response, timeout, expect_error): def test_curl_kibana_with_uuid(): - check = canned_loggingindextime(lambda *_: json.dumps({"statusCode": 404})) + check = canned_loggingindextime(lambda *args, **_: json.dumps({"statusCode": 404})) check.generate_uuid = lambda: SAMPLE_UUID assert SAMPLE_UUID == check.curl_kibana_with_uuid(plain_running_kibana_pod) @@ -161,7 +161,7 @@ def test_curl_kibana_with_uuid(): ), ], ids=lambda argval: argval[0]) def test_failed_curl_kibana_with_uuid(name, json_response, expect_error): - check = canned_loggingindextime(lambda *_: json.dumps(json_response)) + check = canned_loggingindextime(lambda *args, **_: json.dumps(json_response)) check.generate_uuid = lambda: SAMPLE_UUID with pytest.raises(OpenShiftCheckException) as error: diff --git a/roles/openshift_health_checker/test/openshift_check_test.py b/roles/openshift_health_checker/test/openshift_check_test.py index 789784c77..bc0c3b26c 100644 --- a/roles/openshift_health_checker/test/openshift_check_test.py +++ b/roles/openshift_health_checker/test/openshift_check_test.py @@ -106,13 +106,40 @@ def test_get_var_convert(task_vars, keys, convert, expected): assert dummy_check(task_vars).get_var(*keys, convert=convert) == expected -@pytest.mark.parametrize("keys, convert", [ - (("bar", "baz"), int), - (("bar.baz"), float), - (("foo"), "bogus"), - (("foo"), lambda a, b: 1), - (("foo"), lambda a: 1 / 0), +def convert_oscexc(_): + raise OpenShiftCheckException("known failure") + + +def convert_exc(_): + raise Exception("failure unknown") + + +@pytest.mark.parametrize("keys, convert, expect_text", [ + (("bar", "baz"), int, "Cannot convert"), + (("bar.baz",), float, "Cannot convert"), + (("foo",), "bogus", "TypeError"), + (("foo",), lambda a, b: 1, "TypeError"), + (("foo",), lambda a: 1 / 0, "ZeroDivisionError"), + (("foo",), convert_oscexc, "known failure"), + (("foo",), convert_exc, "failure unknown"), ]) -def test_get_var_convert_error(task_vars, keys, convert): - with pytest.raises(OpenShiftCheckException): +def test_get_var_convert_error(task_vars, keys, convert, expect_text): + with pytest.raises(OpenShiftCheckException) as excinfo: dummy_check(task_vars).get_var(*keys, convert=convert) + assert expect_text in str(excinfo.value) + + +def test_register(task_vars): + check = dummy_check(task_vars) + + check.register_failure(OpenShiftCheckException("spam")) + assert "spam" in str(check.failures[0]) + + with pytest.raises(OpenShiftCheckException) as excinfo: + check.register_file("spam") # no file contents specified + assert "not specified" in str(excinfo.value) + + # normally execute_module registers the result file; test disabling that + check._execute_module = lambda *args, **_: dict() + check.execute_module("eggs", module_args={}, register=False) + assert not check.files_to_save diff --git a/roles/openshift_health_checker/test/ovs_version_test.py b/roles/openshift_health_checker/test/ovs_version_test.py index e1bf29d2a..602f32989 100644 --- a/roles/openshift_health_checker/test/ovs_version_test.py +++ b/roles/openshift_health_checker/test/ovs_version_test.py @@ -50,7 +50,7 @@ def test_ovs_package_version(openshift_release, expected_ovs_version): openshift_release=openshift_release, openshift_image_tag='v' + openshift_release, ) - return_value = object() + return_value = {} # note: check.execute_module modifies return hash contents def execute_module(module_name=None, module_args=None, *_): assert module_name == 'rpm_version' diff --git a/roles/openshift_health_checker/test/package_availability_test.py b/roles/openshift_health_checker/test/package_availability_test.py index 1fe648b75..b34e8fbfc 100644 --- a/roles/openshift_health_checker/test/package_availability_test.py +++ b/roles/openshift_health_checker/test/package_availability_test.py @@ -49,14 +49,14 @@ def test_is_active(pkg_mgr, is_containerized, is_active): ), ]) def test_package_availability(task_vars, must_have_packages, must_not_have_packages): - return_value = object() + return_value = {} def execute_module(module_name=None, module_args=None, *_): assert module_name == 'check_yum_update' assert 'packages' in module_args assert set(module_args['packages']).issuperset(must_have_packages) assert not set(module_args['packages']).intersection(must_not_have_packages) - return return_value + return {'foo': return_value} result = PackageAvailability(execute_module, task_vars).run() - assert result is return_value + assert result['foo'] is return_value diff --git a/roles/openshift_health_checker/test/package_update_test.py b/roles/openshift_health_checker/test/package_update_test.py index 06489b0d7..85d3c9cab 100644 --- a/roles/openshift_health_checker/test/package_update_test.py +++ b/roles/openshift_health_checker/test/package_update_test.py @@ -2,14 +2,14 @@ from openshift_checks.package_update import PackageUpdate def test_package_update(): - return_value = object() + return_value = {} def execute_module(module_name=None, module_args=None, *_): assert module_name == 'check_yum_update' assert 'packages' in module_args # empty list of packages means "generic check if 'yum update' will work" assert module_args['packages'] == [] - return return_value + return {'foo': return_value} result = PackageUpdate(execute_module).run() - assert result is return_value + assert result['foo'] is return_value diff --git a/roles/openshift_health_checker/test/package_version_test.py b/roles/openshift_health_checker/test/package_version_test.py index e871f39f0..8564cd4db 100644 --- a/roles/openshift_health_checker/test/package_version_test.py +++ b/roles/openshift_health_checker/test/package_version_test.py @@ -52,7 +52,7 @@ def test_invalid_openshift_release_format(): ]) def test_package_version(openshift_release): - return_value = object() + return_value = {"foo": object()} def execute_module(module_name=None, module_args=None, tmp=None, task_vars=None, *_): assert module_name == 'aos_version' @@ -66,7 +66,7 @@ def test_package_version(openshift_release): check = PackageVersion(execute_module, task_vars_for(openshift_release, 'origin')) result = check.run() - assert result is return_value + assert result == return_value @pytest.mark.parametrize('deployment_type,openshift_release,expected_docker_version', [ @@ -79,7 +79,7 @@ def test_package_version(openshift_release): ]) def test_docker_package_version(deployment_type, openshift_release, expected_docker_version): - return_value = object() + return_value = {"foo": object()} def execute_module(module_name=None, module_args=None, *_): assert module_name == 'aos_version' @@ -93,7 +93,7 @@ def test_docker_package_version(deployment_type, openshift_release, expected_doc check = PackageVersion(execute_module, task_vars_for(openshift_release, deployment_type)) result = check.run() - assert result is return_value + assert result == return_value @pytest.mark.parametrize('group_names,is_containerized,is_active', [ diff --git a/roles/openshift_health_checker/test/zz_failure_summary_test.py b/roles/openshift_health_checker/test/zz_failure_summary_test.py index 0fc258133..69f27653c 100644 --- a/roles/openshift_health_checker/test/zz_failure_summary_test.py +++ b/roles/openshift_health_checker/test/zz_failure_summary_test.py @@ -65,6 +65,21 @@ import pytest }, ], ), + # if a failure contain an unhashable value, it will not be deduplicated + ( + [ + { + 'host': 'master1', + 'msg': {'unhashable': 'value'}, + }, + ], + [ + { + 'host': 'master1', + 'msg': {'unhashable': 'value'}, + }, + ], + ), ]) def test_deduplicate_failures(failures, deduplicated): assert deduplicate_failures(failures) == deduplicated |