diff options
Diffstat (limited to 'roles/openshift_health_checker')
21 files changed, 705 insertions, 283 deletions
diff --git a/roles/openshift_health_checker/action_plugins/openshift_health_check.py b/roles/openshift_health_checker/action_plugins/openshift_health_check.py index a62e4331e..0390dc82e 100644 --- a/roles/openshift_health_checker/action_plugins/openshift_health_check.py +++ b/roles/openshift_health_checker/action_plugins/openshift_health_check.py @@ -38,14 +38,13 @@ class ActionModule(ActionBase): try: known_checks = self.load_known_checks() + args = self._task.args + resolved_checks = resolve_checks(args.get("checks", []), known_checks.values()) except OpenShiftCheckException as e: result["failed"] = True result["msg"] = str(e) return result - args = self._task.args - resolved_checks = resolve_checks(args.get("checks", []), known_checks.values()) - result["checks"] = check_results = {} user_disabled_checks = [ diff --git a/roles/openshift_health_checker/callback_plugins/zz_failure_summary.py b/roles/openshift_health_checker/callback_plugins/zz_failure_summary.py index 64c29a8d9..443b76ea1 100644 --- a/roles/openshift_health_checker/callback_plugins/zz_failure_summary.py +++ b/roles/openshift_health_checker/callback_plugins/zz_failure_summary.py @@ -39,7 +39,8 @@ class CallbackModule(CallbackBase): def v2_runner_on_failed(self, result, ignore_errors=False): super(CallbackModule, self).v2_runner_on_failed(result, ignore_errors) - self.__failures.append(dict(result=result, ignore_errors=ignore_errors)) + if not ignore_errors: + self.__failures.append(dict(result=result, ignore_errors=ignore_errors)) def v2_playbook_on_stats(self, stats): super(CallbackModule, self).v2_playbook_on_stats(stats) diff --git a/roles/openshift_health_checker/library/aos_version.py b/roles/openshift_health_checker/library/aos_version.py index 4c205e48c..4f43ee751 100755 --- a/roles/openshift_health_checker/library/aos_version.py +++ b/roles/openshift_health_checker/library/aos_version.py @@ -19,6 +19,10 @@ the inventory, the version comparison checks just pass. ''' from ansible.module_utils.basic import AnsibleModule +# NOTE: because of the dependency on yum (Python 2-only), this module does not +# work under Python 3. But since we run unit tests against both Python 2 and +# Python 3, we use six for cross compatibility in this module alone: +from ansible.module_utils.six import string_types IMPORT_EXCEPTION = None try: @@ -122,12 +126,15 @@ def _check_precise_version_found(pkgs, expected_pkgs_dict): for pkg in pkgs: if pkg.name not in expected_pkgs_dict: continue - # does the version match, to the precision requested? - # and, is it strictly greater, at the precision requested? - expected_pkg_version = expected_pkgs_dict[pkg.name]["version"] - match_version = '.'.join(pkg.version.split('.')[:expected_pkg_version.count('.') + 1]) - if match_version == expected_pkg_version: - pkgs_precise_version_found.add(pkg.name) + expected_pkg_versions = expected_pkgs_dict[pkg.name]["version"] + if isinstance(expected_pkg_versions, string_types): + expected_pkg_versions = [expected_pkg_versions] + for expected_pkg_version in expected_pkg_versions: + # does the version match, to the precision requested? + # and, is it strictly greater, at the precision requested? + match_version = '.'.join(pkg.version.split('.')[:expected_pkg_version.count('.') + 1]) + if match_version == expected_pkg_version: + pkgs_precise_version_found.add(pkg.name) not_found = [] for name, pkg in expected_pkgs_dict.items(): @@ -157,8 +164,13 @@ def _check_higher_version_found(pkgs, expected_pkgs_dict): for pkg in pkgs: if pkg.name not in expected_pkg_names: continue - expected_pkg_version = expected_pkgs_dict[pkg.name]["version"] - req_release_arr = [int(segment) for segment in expected_pkg_version.split(".")] + expected_pkg_versions = expected_pkgs_dict[pkg.name]["version"] + if isinstance(expected_pkg_versions, string_types): + expected_pkg_versions = [expected_pkg_versions] + # NOTE: the list of versions is assumed to be sorted so that the highest + # desirable version is the last. + highest_desirable_version = expected_pkg_versions[-1] + req_release_arr = [int(segment) for segment in highest_desirable_version.split(".")] version = [int(segment) for segment in pkg.version.split(".")] too_high = version[:len(req_release_arr)] > req_release_arr higher_than_seen = version > higher_version_for_pkg.get(pkg.name, []) diff --git a/roles/openshift_health_checker/openshift_checks/disk_availability.py b/roles/openshift_health_checker/openshift_checks/disk_availability.py index 962148cb8..e93e81efa 100644 --- a/roles/openshift_health_checker/openshift_checks/disk_availability.py +++ b/roles/openshift_health_checker/openshift_checks/disk_availability.py @@ -1,9 +1,12 @@ -# pylint: disable=missing-docstring +"""Check that there is enough disk space in predefined paths.""" + +import os.path +import tempfile + from openshift_checks import OpenShiftCheck, OpenShiftCheckException, get_var -from openshift_checks.mixins import NotContainerizedMixin -class DiskAvailability(NotContainerizedMixin, OpenShiftCheck): +class DiskAvailability(OpenShiftCheck): """Check that recommended disk space is available before a first-time install.""" name = "disk_availability" @@ -12,56 +15,101 @@ class DiskAvailability(NotContainerizedMixin, OpenShiftCheck): # Values taken from the official installation documentation: # https://docs.openshift.org/latest/install_config/install/prerequisites.html#system-requirements recommended_disk_space_bytes = { - "masters": 40 * 10**9, - "nodes": 15 * 10**9, - "etcd": 20 * 10**9, + '/var': { + 'masters': 40 * 10**9, + 'nodes': 15 * 10**9, + 'etcd': 20 * 10**9, + }, + # Used to copy client binaries into, + # see roles/openshift_cli/library/openshift_container_binary_sync.py. + '/usr/local/bin': { + 'masters': 1 * 10**9, + 'nodes': 1 * 10**9, + 'etcd': 1 * 10**9, + }, + # Used as temporary storage in several cases. + tempfile.gettempdir(): { + 'masters': 1 * 10**9, + 'nodes': 1 * 10**9, + 'etcd': 1 * 10**9, + }, } @classmethod def is_active(cls, task_vars): """Skip hosts that do not have recommended disk space requirements.""" group_names = get_var(task_vars, "group_names", default=[]) - has_disk_space_recommendation = bool(set(group_names).intersection(cls.recommended_disk_space_bytes)) + active_groups = set() + for recommendation in cls.recommended_disk_space_bytes.values(): + active_groups.update(recommendation.keys()) + has_disk_space_recommendation = bool(active_groups.intersection(group_names)) return super(DiskAvailability, cls).is_active(task_vars) and has_disk_space_recommendation def run(self, tmp, task_vars): group_names = get_var(task_vars, "group_names") ansible_mounts = get_var(task_vars, "ansible_mounts") - free_bytes = self.openshift_available_disk(ansible_mounts) - - recommended_min = max(self.recommended_disk_space_bytes.get(name, 0) for name in group_names) - configured_min = int(get_var(task_vars, "openshift_check_min_host_disk_gb", default=0)) * 10**9 - min_free_bytes = configured_min or recommended_min - - if free_bytes < min_free_bytes: - return { - 'failed': True, - 'msg': ( - 'Available disk space ({:.1f} GB) for the volume containing ' - '"/var" is below minimum recommended space ({:.1f} GB)' - ).format(float(free_bytes) / 10**9, float(min_free_bytes) / 10**9) + ansible_mounts = {mount['mount']: mount for mount in ansible_mounts} + + user_config = get_var(task_vars, "openshift_check_min_host_disk_gb", default={}) + try: + # For backwards-compatibility, if openshift_check_min_host_disk_gb + # is a number, then it overrides the required config for '/var'. + number = float(user_config) + user_config = { + '/var': { + 'masters': number, + 'nodes': number, + 'etcd': number, + }, } + except TypeError: + # If it is not a number, then it should be a nested dict. + pass + + # TODO: as suggested in + # https://github.com/openshift/openshift-ansible/pull/4436#discussion_r122180021, + # maybe we could support checking disk availability in paths that are + # not part of the official recommendation but present in the user + # configuration. + for path, recommendation in self.recommended_disk_space_bytes.items(): + free_bytes = self.free_bytes(path, ansible_mounts) + recommended_bytes = max(recommendation.get(name, 0) for name in group_names) + + config = user_config.get(path, {}) + # NOTE: the user config is in GB, but we compare bytes, thus the + # conversion. + config_bytes = max(config.get(name, 0) for name in group_names) * 10**9 + recommended_bytes = config_bytes or recommended_bytes + + if free_bytes < recommended_bytes: + free_gb = float(free_bytes) / 10**9 + recommended_gb = float(recommended_bytes) / 10**9 + return { + 'failed': True, + 'msg': ( + 'Available disk space in "{}" ({:.1f} GB) ' + 'is below minimum recommended ({:.1f} GB)' + ).format(path, free_gb, recommended_gb) + } return {} @staticmethod - def openshift_available_disk(ansible_mounts): - """Determine the available disk space for an OpenShift installation. - - ansible_mounts should be a list of dicts like the 'setup' Ansible module - returns. - """ - # priority list in descending order - supported_mnt_paths = ["/var", "/"] - available_mnts = {mnt.get("mount"): mnt for mnt in ansible_mounts} + def free_bytes(path, ansible_mounts): + """Return the size available in path based on ansible_mounts.""" + mount_point = path + # arbitry value to prevent an infinite loop, in the unlike case that '/' + # is not in ansible_mounts. + max_depth = 32 + while mount_point not in ansible_mounts and max_depth > 0: + mount_point = os.path.dirname(mount_point) + max_depth -= 1 try: - for path in supported_mnt_paths: - if path in available_mnts: - return available_mnts[path]["size_available"] + free_bytes = ansible_mounts[mount_point]['size_available'] except KeyError: - pass + known_mounts = ', '.join('"{}"'.format(mount) for mount in sorted(ansible_mounts)) or 'none' + msg = 'Unable to determine disk availability for "{}". Known mount points: {}.' + raise OpenShiftCheckException(msg.format(path, known_mounts)) - paths = ''.join(sorted(available_mnts)) or 'none' - msg = "Unable to determine available disk space. Paths mounted: {}.".format(paths) - raise OpenShiftCheckException(msg) + return free_bytes diff --git a/roles/openshift_health_checker/openshift_checks/docker_image_availability.py b/roles/openshift_health_checker/openshift_checks/docker_image_availability.py index 27e6fe383..bde81ad2c 100644 --- a/roles/openshift_health_checker/openshift_checks/docker_image_availability.py +++ b/roles/openshift_health_checker/openshift_checks/docker_image_availability.py @@ -1,8 +1,24 @@ -# pylint: disable=missing-docstring +"""Check that required Docker images are available.""" + from openshift_checks import OpenShiftCheck, get_var from openshift_checks.mixins import DockerHostMixin +NODE_IMAGE_SUFFIXES = ["haproxy-router", "docker-registry", "deployer", "pod"] +DEPLOYMENT_IMAGE_INFO = { + "origin": { + "namespace": "openshift", + "name": "origin", + "registry_console_image": "cockpit/kubernetes", + }, + "openshift-enterprise": { + "namespace": "openshift3", + "name": "ose", + "registry_console_image": "registry.access.redhat.com/openshift3/registry-console", + }, +} + + class DockerImageAvailability(DockerHostMixin, OpenShiftCheck): """Check that required Docker images are available. @@ -13,25 +29,13 @@ class DockerImageAvailability(DockerHostMixin, OpenShiftCheck): name = "docker_image_availability" tags = ["preflight"] - dependencies = ["skopeo", "python-docker-py"] - deployment_image_info = { - "origin": { - "namespace": "openshift", - "name": "origin", - }, - "openshift-enterprise": { - "namespace": "openshift3", - "name": "ose", - }, - } - @classmethod def is_active(cls, task_vars): """Skip hosts with unsupported deployment types.""" deployment_type = get_var(task_vars, "openshift_deployment_type") - has_valid_deployment_type = deployment_type in cls.deployment_image_info + has_valid_deployment_type = deployment_type in DEPLOYMENT_IMAGE_INFO return super(DockerImageAvailability, cls).is_active(task_vars) and has_valid_deployment_type @@ -70,51 +74,56 @@ class DockerImageAvailability(DockerHostMixin, OpenShiftCheck): return {"changed": changed} - def required_images(self, task_vars): - deployment_type = get_var(task_vars, "openshift_deployment_type") - image_info = self.deployment_image_info[deployment_type] - - openshift_release = get_var(task_vars, "openshift_release", default="latest") - openshift_image_tag = get_var(task_vars, "openshift_image_tag") - is_containerized = get_var(task_vars, "openshift", "common", "is_containerized") - - images = set(self.required_docker_images( - image_info["namespace"], - image_info["name"], - ["registry-console"] if "enterprise" in deployment_type else [], # include enterprise-only image names - openshift_release, - is_containerized, - )) - - # append images with qualified image tags to our list of required images. - # these are images with a (v0.0.0.0) tag, rather than a standard release - # format tag (v0.0). We want to check this set in both containerized and - # non-containerized installations. - images.update( - self.required_qualified_docker_images( - image_info["namespace"], - image_info["name"], - openshift_image_tag, - ), - ) - - return images - @staticmethod - def required_docker_images(namespace, name, additional_image_names, version, is_containerized): - if is_containerized: - return ["{}/{}:{}".format(namespace, name, version)] if name else [] - - # include additional non-containerized images specific to the current deployment type - return ["{}/{}:{}".format(namespace, img_name, version) for img_name in additional_image_names] - - @staticmethod - def required_qualified_docker_images(namespace, name, version): - # pylint: disable=invalid-name - return [ - "{}/{}-{}:{}".format(namespace, name, suffix, version) - for suffix in ["haproxy-router", "docker-registry", "deployer", "pod"] - ] + def required_images(task_vars): + """ + Determine which images we expect to need for this host. + Returns: a set of required images like 'openshift/origin:v3.6' + + The thorny issue of determining the image names from the variables is under consideration + via https://github.com/openshift/openshift-ansible/issues/4415 + + For now we operate as follows: + * For containerized components (master, node, ...) we look at the deployment type and + use openshift/origin or openshift3/ose as the base for those component images. The + version is openshift_image_tag as determined by the openshift_version role. + * For OpenShift-managed infrastructure (router, registry...) we use oreg_url if + it is defined; otherwise we again use the base that depends on the deployment type. + Registry is not included in constructed images. It may be in oreg_url or etcd image. + """ + required = set() + deployment_type = get_var(task_vars, "openshift_deployment_type") + host_groups = get_var(task_vars, "group_names") + # containerized etcd may not have openshift_image_tag, see bz 1466622 + image_tag = get_var(task_vars, "openshift_image_tag", default="latest") + image_info = DEPLOYMENT_IMAGE_INFO[deployment_type] + if not image_info: + return required + + # template for images that run on top of OpenShift + image_url = "{}/{}-{}:{}".format(image_info["namespace"], image_info["name"], "${component}", "${version}") + image_url = get_var(task_vars, "oreg_url", default="") or image_url + if 'nodes' in host_groups: + for suffix in NODE_IMAGE_SUFFIXES: + required.add(image_url.replace("${component}", suffix).replace("${version}", image_tag)) + # The registry-console is for some reason not prefixed with ose- like the other components. + # Nor is it versioned the same, so just look for latest. + # Also a completely different name is used for Origin. + required.add(image_info["registry_console_image"]) + + # images for containerized components + if get_var(task_vars, "openshift", "common", "is_containerized"): + components = set() + if 'nodes' in host_groups: + components.update(["node", "openvswitch"]) + if 'masters' in host_groups: # name is "origin" or "ose" + components.add(image_info["name"]) + for component in components: + required.add("{}/{}:{}".format(image_info["namespace"], component, image_tag)) + if 'etcd' in host_groups: # special case, note it is the same for origin/enterprise + required.add("registry.access.redhat.com/rhel7/etcd") # and no image tag + + return required def local_images(self, images, task_vars): """Filter a list of images and return those available locally.""" @@ -124,7 +133,8 @@ class DockerImageAvailability(DockerHostMixin, OpenShiftCheck): ] def is_image_local(self, image, task_vars): - result = self.module_executor("docker_image_facts", {"name": image}, task_vars) + """Check if image is already in local docker index.""" + result = self.execute_module("docker_image_facts", {"name": image}, task_vars=task_vars) if result.get("failed", False): return False @@ -132,6 +142,7 @@ class DockerImageAvailability(DockerHostMixin, OpenShiftCheck): @staticmethod def known_docker_registries(task_vars): + """Build a list of docker registries available according to inventory vars.""" docker_facts = get_var(task_vars, "openshift", "docker") regs = set(docker_facts["additional_registries"]) @@ -147,17 +158,21 @@ class DockerImageAvailability(DockerHostMixin, OpenShiftCheck): """Inspect existing images using Skopeo and return all images successfully inspected.""" return [ image for image in images - if any(self.is_available_skopeo_image(image, registry, task_vars) for registry in registries) + if self.is_available_skopeo_image(image, registries, task_vars) ] - def is_available_skopeo_image(self, image, registry, task_vars): - """Uses Skopeo to determine if required image exists in a given registry.""" + def is_available_skopeo_image(self, image, registries, task_vars): + """Use Skopeo to determine if required image exists in known registry(s).""" + + # if image does already includes a registry, just use that + if image.count("/") > 1: + registry, image = image.split("/", 1) + registries = [registry] - cmd_str = "skopeo inspect docker://{registry}/{image}".format( - registry=registry, - image=image, - ) + for registry in registries: + args = {"_raw_params": "skopeo inspect --tls-verify=false docker://{}/{}".format(registry, image)} + result = self.execute_module("command", args, task_vars=task_vars) + if result.get("rc", 0) == 0 and not result.get("failed"): + return True - args = {"_raw_params": cmd_str} - result = self.module_executor("command", args, task_vars) - return not result.get("failed", False) and result.get("rc", 0) == 0 + return False diff --git a/roles/openshift_health_checker/openshift_checks/docker_storage.py b/roles/openshift_health_checker/openshift_checks/docker_storage.py index 7f1751b36..d2227d244 100644 --- a/roles/openshift_health_checker/openshift_checks/docker_storage.py +++ b/roles/openshift_health_checker/openshift_checks/docker_storage.py @@ -1,5 +1,6 @@ """Check Docker storage driver and usage.""" import json +import os.path import re from openshift_checks import OpenShiftCheck, OpenShiftCheckException, get_var from openshift_checks.mixins import DockerHostMixin @@ -17,13 +18,30 @@ class DockerStorage(DockerHostMixin, OpenShiftCheck): tags = ["pre-install", "health", "preflight"] dependencies = ["python-docker-py"] - storage_drivers = ["devicemapper", "overlay2"] + storage_drivers = ["devicemapper", "overlay", "overlay2"] max_thinpool_data_usage_percent = 90.0 max_thinpool_meta_usage_percent = 90.0 + max_overlay_usage_percent = 90.0 + + # TODO(lmeyer): mention these in the output when check fails + configuration_variables = [ + ( + "max_thinpool_data_usage_percent", + "For 'devicemapper' storage driver, usage threshold percentage for data. " + "Format: float. Default: {:.1f}".format(max_thinpool_data_usage_percent), + ), + ( + "max_thinpool_meta_usage_percent", + "For 'devicemapper' storage driver, usage threshold percentage for metadata. " + "Format: float. Default: {:.1f}".format(max_thinpool_meta_usage_percent), + ), + ( + "max_overlay_usage_percent", + "For 'overlay' or 'overlay2' storage driver, usage threshold percentage. " + "Format: float. Default: {:.1f}".format(max_overlay_usage_percent), + ), + ] - # pylint: disable=too-many-return-statements - # Reason: permanent stylistic exception; - # it is clearer to return on failures and there are just many ways to fail here. def run(self, tmp, task_vars): msg, failed, changed = self.ensure_dependencies(task_vars) if failed: @@ -34,17 +52,17 @@ class DockerStorage(DockerHostMixin, OpenShiftCheck): } # attempt to get the docker info hash from the API - info = self.execute_module("docker_info", {}, task_vars) - if info.get("failed"): + docker_info = self.execute_module("docker_info", {}, task_vars=task_vars) + if docker_info.get("failed"): return {"failed": True, "changed": changed, "msg": "Failed to query Docker API. Is docker running on this host?"} - if not info.get("info"): # this would be very strange + if not docker_info.get("info"): # this would be very strange return {"failed": True, "changed": changed, - "msg": "Docker API query missing info:\n{}".format(json.dumps(info))} - info = info["info"] + "msg": "Docker API query missing info:\n{}".format(json.dumps(docker_info))} + docker_info = docker_info["info"] # check if the storage driver we saw is valid - driver = info.get("Driver", "[NONE]") + driver = docker_info.get("Driver", "[NONE]") if driver not in self.storage_drivers: msg = ( "Detected unsupported Docker storage driver '{driver}'.\n" @@ -53,26 +71,34 @@ class DockerStorage(DockerHostMixin, OpenShiftCheck): return {"failed": True, "changed": changed, "msg": msg} # driver status info is a list of tuples; convert to dict and validate based on driver - driver_status = {item[0]: item[1] for item in info.get("DriverStatus", [])} + driver_status = {item[0]: item[1] for item in docker_info.get("DriverStatus", [])} + + result = {} + if driver == "devicemapper": - if driver_status.get("Data loop file"): - msg = ( - "Use of loopback devices with the Docker devicemapper storage driver\n" - "(the default storage configuration) is unsupported in production.\n" - "Please use docker-storage-setup to configure a backing storage volume.\n" - "See http://red.ht/2rNperO for further information." - ) - return {"failed": True, "changed": changed, "msg": msg} - result = self._check_dm_usage(driver_status, task_vars) - result['changed'] = result.get('changed', False) or changed - return result + result = self.check_devicemapper_support(driver_status, task_vars) - # TODO(lmeyer): determine how to check usage for overlay2 + if driver in ['overlay', 'overlay2']: + result = self.check_overlay_support(docker_info, driver_status, task_vars) - return {"changed": changed} + result['changed'] = result.get('changed', False) or changed + return result - def _check_dm_usage(self, driver_status, task_vars): - """ + def check_devicemapper_support(self, driver_status, task_vars): + """Check if dm storage driver is supported as configured. Return: result dict.""" + if driver_status.get("Data loop file"): + msg = ( + "Use of loopback devices with the Docker devicemapper storage driver\n" + "(the default storage configuration) is unsupported in production.\n" + "Please use docker-storage-setup to configure a backing storage volume.\n" + "See http://red.ht/2rNperO for further information." + ) + return {"failed": True, "msg": msg} + result = self.check_dm_usage(driver_status, task_vars) + return result + + def check_dm_usage(self, driver_status, task_vars): + """Check usage thresholds for Docker dm storage driver. Return: result dict. Backing assumptions: We expect devicemapper to be backed by an auto-expanding thin pool implemented as an LV in an LVM2 VG. This is how docker-storage-setup currently configures devicemapper storage. The LV is "thin" because it does not use all available storage @@ -83,7 +109,7 @@ class DockerStorage(DockerHostMixin, OpenShiftCheck): could run out of space first; so we check both. """ vals = dict( - vg_free=self._get_vg_free(driver_status.get("Pool Name"), task_vars), + vg_free=self.get_vg_free(driver_status.get("Pool Name"), task_vars), data_used=driver_status.get("Data Space Used"), data_total=driver_status.get("Data Space Total"), metadata_used=driver_status.get("Metadata Space Used"), @@ -93,7 +119,7 @@ class DockerStorage(DockerHostMixin, OpenShiftCheck): # convert all human-readable strings to bytes for key, value in vals.copy().items(): try: - vals[key + "_bytes"] = self._convert_to_bytes(value) + vals[key + "_bytes"] = self.convert_to_bytes(value) except ValueError as err: # unlikely to hit this from API info, but just to be safe return { "failed": True, @@ -131,10 +157,12 @@ class DockerStorage(DockerHostMixin, OpenShiftCheck): vals["msg"] = "\n".join(messages or ["Thinpool usage is within thresholds."]) return vals - def _get_vg_free(self, pool, task_vars): - # Determine which VG to examine according to the pool name, the only indicator currently - # available from the Docker API driver info. We assume a name that looks like - # "vg--name-docker--pool"; vg and lv names with inner hyphens doubled, joined by a hyphen. + def get_vg_free(self, pool, task_vars): + """Determine which VG to examine according to the pool name. Return: size vgs reports. + Pool name is the only indicator currently available from the Docker API driver info. + We assume a name that looks like "vg--name-docker--pool"; + vg and lv names with inner hyphens doubled, joined by a hyphen. + """ match = re.match(r'((?:[^-]|--)+)-(?!-)', pool) # matches up to the first single hyphen if not match: # unlikely, but... be clear if we assumed wrong raise OpenShiftCheckException( @@ -143,10 +171,10 @@ class DockerStorage(DockerHostMixin, OpenShiftCheck): "so the available storage in the VG cannot be determined.".format(pool) ) vg_name = match.groups()[0].replace("--", "-") - vgs_cmd = "/sbin/vgs --noheadings -o vg_free --select vg_name=" + vg_name + vgs_cmd = "/sbin/vgs --noheadings -o vg_free --units g --select vg_name=" + vg_name # should return free space like " 12.00g" if the VG exists; empty if it does not - ret = self.execute_module("command", {"_raw_params": vgs_cmd}, task_vars) + ret = self.execute_module("command", {"_raw_params": vgs_cmd}, task_vars=task_vars) if ret.get("failed") or ret.get("rc", 0) != 0: raise OpenShiftCheckException( "Is LVM installed? Failed to run /sbin/vgs " @@ -163,7 +191,8 @@ class DockerStorage(DockerHostMixin, OpenShiftCheck): return size @staticmethod - def _convert_to_bytes(string): + def convert_to_bytes(string): + """Convert string like "10.3 G" to bytes (binary units assumed). Return: float bytes.""" units = dict( b=1, k=1024, @@ -183,3 +212,87 @@ class DockerStorage(DockerHostMixin, OpenShiftCheck): raise ValueError("Cannot convert to a byte size: " + string) return float(number) * multiplier + + def check_overlay_support(self, docker_info, driver_status, task_vars): + """Check if overlay storage driver is supported for this host. Return: result dict.""" + # check for xfs as backing store + backing_fs = driver_status.get("Backing Filesystem", "[NONE]") + if backing_fs != "xfs": + msg = ( + "Docker storage drivers 'overlay' and 'overlay2' are only supported with\n" + "'xfs' as the backing storage, but this host's storage is type '{fs}'." + ).format(fs=backing_fs) + return {"failed": True, "msg": msg} + + # check support for OS and kernel version + o_s = docker_info.get("OperatingSystem", "[NONE]") + if "Red Hat Enterprise Linux" in o_s or "CentOS" in o_s: + # keep it simple, only check enterprise kernel versions; assume everyone else is good + kernel = docker_info.get("KernelVersion", "[NONE]") + kernel_arr = [int(num) for num in re.findall(r'\d+', kernel)] + if kernel_arr < [3, 10, 0, 514]: # rhel < 7.3 + msg = ( + "Docker storage drivers 'overlay' and 'overlay2' are only supported beginning with\n" + "kernel version 3.10.0-514; but Docker reports kernel version {version}." + ).format(version=kernel) + return {"failed": True, "msg": msg} + # NOTE: we could check for --selinux-enabled here but docker won't even start with + # that option until it's supported in the kernel so we don't need to. + + return self.check_overlay_usage(docker_info, task_vars) + + def check_overlay_usage(self, docker_info, task_vars): + """Check disk usage on OverlayFS backing store volume. Return: result dict.""" + path = docker_info.get("DockerRootDir", "/var/lib/docker") + "/" + docker_info["Driver"] + + threshold = get_var(task_vars, "max_overlay_usage_percent", default=self.max_overlay_usage_percent) + try: + threshold = float(threshold) + except ValueError: + return { + "failed": True, + "msg": "Specified 'max_overlay_usage_percent' is not a percentage: {}".format(threshold), + } + + mount = self.find_ansible_mount(path, get_var(task_vars, "ansible_mounts")) + try: + free_bytes = mount['size_available'] + total_bytes = mount['size_total'] + usage = 100.0 * (total_bytes - free_bytes) / total_bytes + except (KeyError, ZeroDivisionError): + return { + "failed": True, + "msg": "The ansible_mount found for path {} is invalid.\n" + "This is likely to be an Ansible bug. The record was:\n" + "{}".format(path, json.dumps(mount, indent=2)), + } + + if usage > threshold: + return { + "failed": True, + "msg": ( + "For Docker OverlayFS mount point {path},\n" + "usage percentage {pct:.1f} is higher than threshold {thresh:.1f}." + ).format(path=mount["mount"], pct=usage, thresh=threshold) + } + + return {} + + # TODO(lmeyer): migrate to base class + @staticmethod + def find_ansible_mount(path, ansible_mounts): + """Return the mount point for path from ansible_mounts.""" + + mount_for_path = {mount['mount']: mount for mount in ansible_mounts} + mount_point = path + while mount_point not in mount_for_path: + if mount_point in ["/", ""]: # "/" not in ansible_mounts??? + break + mount_point = os.path.dirname(mount_point) + + try: + return mount_for_path[mount_point] + except KeyError: + known_mounts = ', '.join('"{}"'.format(mount) for mount in sorted(mount_for_path)) or 'none' + msg = 'Unable to determine mount point for path "{}". Known mount points: {}.' + raise OpenShiftCheckException(msg.format(path, known_mounts)) diff --git a/roles/openshift_health_checker/openshift_checks/logging/kibana.py b/roles/openshift_health_checker/openshift_checks/logging/kibana.py index 442f407b1..551e8dfa0 100644 --- a/roles/openshift_health_checker/openshift_checks/logging/kibana.py +++ b/roles/openshift_health_checker/openshift_checks/logging/kibana.py @@ -62,7 +62,7 @@ class Kibana(LoggingCheck): # TODO(lmeyer): give users option to validate certs status_code=302, ) - result = self.execute_module('uri', args, task_vars) + result = self.execute_module('uri', args, None, task_vars) if result.get('failed'): return result['msg'] return None diff --git a/roles/openshift_health_checker/openshift_checks/logging/logging.py b/roles/openshift_health_checker/openshift_checks/logging/logging.py index 05b4d300c..6e951e82c 100644 --- a/roles/openshift_health_checker/openshift_checks/logging/logging.py +++ b/roles/openshift_health_checker/openshift_checks/logging/logging.py @@ -54,12 +54,12 @@ class LoggingCheck(OpenShiftCheck): """Returns: list of pods not in a ready and running state""" return [ pod for pod in pods - if any( + if not pod.get("status", {}).get("containerStatuses") or any( container['ready'] is False for container in pod['status']['containerStatuses'] ) or not any( condition['type'] == 'Ready' and condition['status'] == 'True' - for condition in pod['status']['conditions'] + for condition in pod['status'].get('conditions', []) ) ] @@ -78,7 +78,7 @@ class LoggingCheck(OpenShiftCheck): "extra_args": list(extra_args) if extra_args else [], } - result = execute_module("ocutil", args, task_vars) + result = execute_module("ocutil", args, None, task_vars) if result.get("failed"): msg = ( 'Unexpected error using `oc` to validate the logging stack components.\n' diff --git a/roles/openshift_health_checker/openshift_checks/mixins.py b/roles/openshift_health_checker/openshift_checks/mixins.py index 7f3d78cc4..2cb2e21aa 100644 --- a/roles/openshift_health_checker/openshift_checks/mixins.py +++ b/roles/openshift_health_checker/openshift_checks/mixins.py @@ -40,8 +40,11 @@ class DockerHostMixin(object): # NOTE: we would use the "package" module but it's actually an action plugin # and it's not clear how to invoke one of those. This is about the same anyway: - pkg_manager = get_var(task_vars, "ansible_pkg_mgr", default="yum") - result = self.module_executor(pkg_manager, {"name": self.dependencies, "state": "present"}, task_vars) + result = self.execute_module( + get_var(task_vars, "ansible_pkg_mgr", default="yum"), + {"name": self.dependencies, "state": "present"}, + task_vars=task_vars, + ) msg = result.get("msg", "") if result.get("failed"): if "No package matching" in msg: diff --git a/roles/openshift_health_checker/openshift_checks/ovs_version.py b/roles/openshift_health_checker/openshift_checks/ovs_version.py index 1e45ae3af..2dd045f1f 100644 --- a/roles/openshift_health_checker/openshift_checks/ovs_version.py +++ b/roles/openshift_health_checker/openshift_checks/ovs_version.py @@ -43,7 +43,7 @@ class OvsVersion(NotContainerizedMixin, OpenShiftCheck): }, ], } - return self.execute_module("rpm_version", args, task_vars) + return self.execute_module("rpm_version", args, task_vars=task_vars) def get_required_ovs_version(self, task_vars): """Return the correct Open vSwitch version for the current OpenShift version""" diff --git a/roles/openshift_health_checker/openshift_checks/package_availability.py b/roles/openshift_health_checker/openshift_checks/package_availability.py index a7eb720fd..0dd2b1286 100644 --- a/roles/openshift_health_checker/openshift_checks/package_availability.py +++ b/roles/openshift_health_checker/openshift_checks/package_availability.py @@ -25,7 +25,7 @@ class PackageAvailability(NotContainerizedMixin, OpenShiftCheck): packages.update(self.node_packages(rpm_prefix)) args = {"packages": sorted(set(packages))} - return self.execute_module("check_yum_update", args, tmp, task_vars) + return self.execute_module("check_yum_update", args, tmp=tmp, task_vars=task_vars) @staticmethod def master_packages(rpm_prefix): @@ -36,8 +36,7 @@ class PackageAvailability(NotContainerizedMixin, OpenShiftCheck): "bash-completion", "cockpit-bridge", "cockpit-docker", - "cockpit-kubernetes", - "cockpit-shell", + "cockpit-system", "cockpit-ws", "etcd", "httpd-tools", diff --git a/roles/openshift_health_checker/openshift_checks/package_update.py b/roles/openshift_health_checker/openshift_checks/package_update.py index fd0c0a755..f432380c6 100644 --- a/roles/openshift_health_checker/openshift_checks/package_update.py +++ b/roles/openshift_health_checker/openshift_checks/package_update.py @@ -11,4 +11,4 @@ class PackageUpdate(NotContainerizedMixin, OpenShiftCheck): def run(self, tmp, task_vars): args = {"packages": []} - return self.execute_module("check_yum_update", args, tmp, task_vars) + return self.execute_module("check_yum_update", args, tmp=tmp, task_vars=task_vars) diff --git a/roles/openshift_health_checker/openshift_checks/package_version.py b/roles/openshift_health_checker/openshift_checks/package_version.py index 2e737818b..204752bd0 100644 --- a/roles/openshift_health_checker/openshift_checks/package_version.py +++ b/roles/openshift_health_checker/openshift_checks/package_version.py @@ -10,8 +10,8 @@ class PackageVersion(NotContainerizedMixin, OpenShiftCheck): tags = ["preflight"] openshift_to_ovs_version = { - "3.6": "2.6", - "3.5": "2.6", + "3.6": ["2.6", "2.7"], + "3.5": ["2.6", "2.7"], "3.4": "2.4", } @@ -71,7 +71,7 @@ class PackageVersion(NotContainerizedMixin, OpenShiftCheck): ], } - return self.execute_module("aos_version", args, tmp, task_vars) + return self.execute_module("aos_version", args, tmp=tmp, task_vars=task_vars) def get_required_ovs_version(self, task_vars): """Return the correct Open vSwitch version for the current OpenShift version. diff --git a/roles/openshift_health_checker/test/action_plugin_test.py b/roles/openshift_health_checker/test/action_plugin_test.py index 6ebf0ebb2..9383b233c 100644 --- a/roles/openshift_health_checker/test/action_plugin_test.py +++ b/roles/openshift_health_checker/test/action_plugin_test.py @@ -59,7 +59,7 @@ def failed(result, msg_has=None): if msg_has is not None: assert 'msg' in result for term in msg_has: - assert term in result['msg'] + assert term.lower() in result['msg'].lower() return result.get('failed', False) @@ -178,6 +178,16 @@ def test_action_plugin_run_check_exception(plugin, task_vars, monkeypatch): assert not skipped(result) +def test_action_plugin_resolve_checks_exception(plugin, task_vars, monkeypatch): + monkeypatch.setattr(plugin, 'load_known_checks', lambda: {}) + + result = plugin.run(tmp=None, task_vars=task_vars) + + assert failed(result, msg_has=['unknown', 'name']) + assert not changed(result) + assert not skipped(result) + + @pytest.mark.parametrize('names,all_checks,expected', [ ([], [], set()), ( diff --git a/roles/openshift_health_checker/test/aos_version_test.py b/roles/openshift_health_checker/test/aos_version_test.py index 697805dd2..4100f6c70 100644 --- a/roles/openshift_health_checker/test/aos_version_test.py +++ b/roles/openshift_health_checker/test/aos_version_test.py @@ -18,7 +18,43 @@ expected_pkgs = { } -@pytest.mark.parametrize('pkgs, expect_not_found', [ +@pytest.mark.parametrize('pkgs,expected_pkgs_dict', [ + ( + # all found + [Package('spam', '3.2.1'), Package('eggs', '3.2.1')], + expected_pkgs, + ), + ( + # found with more specific version + [Package('spam', '3.2.1'), Package('eggs', '3.2.1.5')], + expected_pkgs, + ), + ( + [Package('ovs', '2.6'), Package('ovs', '2.4')], + { + "ovs": { + "name": "ovs", + "version": ["2.6", "2.7"], + "check_multi": False, + } + }, + ), + ( + [Package('ovs', '2.7')], + { + "ovs": { + "name": "ovs", + "version": ["2.6", "2.7"], + "check_multi": False, + } + }, + ), +]) +def test_check_precise_version_found(pkgs, expected_pkgs_dict): + aos_version._check_precise_version_found(pkgs, expected_pkgs_dict) + + +@pytest.mark.parametrize('pkgs,expect_not_found', [ ( [], { @@ -55,14 +91,6 @@ expected_pkgs = { }, # not the right version ), ( - [Package('spam', '3.2.1'), Package('eggs', '3.2.1')], - {}, # all found - ), - ( - [Package('spam', '3.2.1'), Package('eggs', '3.2.1.5')], - {}, # found with more specific version - ), - ( [Package('eggs', '1.2.3'), Package('eggs', '3.2.1.5')], { "spam": { @@ -73,64 +101,86 @@ expected_pkgs = { }, # eggs found with multiple versions ), ]) -def test_check_pkgs_for_precise_version(pkgs, expect_not_found): - if expect_not_found: - with pytest.raises(aos_version.PreciseVersionNotFound) as e: - aos_version._check_precise_version_found(pkgs, expected_pkgs) - - assert list(expect_not_found.values()) == e.value.problem_pkgs - else: +def test_check_precise_version_found_fail(pkgs, expect_not_found): + with pytest.raises(aos_version.PreciseVersionNotFound) as e: aos_version._check_precise_version_found(pkgs, expected_pkgs) + assert list(expect_not_found.values()) == e.value.problem_pkgs -@pytest.mark.parametrize('pkgs, expect_higher', [ +@pytest.mark.parametrize('pkgs,expected_pkgs_dict', [ ( [], - [], + expected_pkgs, ), ( + # more precise but not strictly higher [Package('spam', '3.2.1.9')], - [], # more precise but not strictly higher + expected_pkgs, ), ( + [Package('ovs', '2.7')], + { + "ovs": { + "name": "ovs", + "version": ["2.6", "2.7"], + "check_multi": False, + } + }, + ), +]) +def test_check_higher_version_found(pkgs, expected_pkgs_dict): + aos_version._check_higher_version_found(pkgs, expected_pkgs_dict) + + +@pytest.mark.parametrize('pkgs,expected_pkgs_dict,expect_higher', [ + ( [Package('spam', '3.3')], + expected_pkgs, ['spam-3.3'], # lower precision, but higher ), ( [Package('spam', '3.2.1'), Package('eggs', '3.3.2')], + expected_pkgs, ['eggs-3.3.2'], # one too high ), ( [Package('eggs', '1.2.3'), Package('eggs', '3.2.1.5'), Package('eggs', '3.4')], + expected_pkgs, ['eggs-3.4'], # multiple versions, one is higher ), ( [Package('eggs', '3.2.1'), Package('eggs', '3.4'), Package('eggs', '3.3')], + expected_pkgs, ['eggs-3.4'], # multiple versions, two are higher ), + ( + [Package('ovs', '2.8')], + { + "ovs": { + "name": "ovs", + "version": ["2.6", "2.7"], + "check_multi": False, + } + }, + ['ovs-2.8'], + ), ]) -def test_check_pkgs_for_greater_version(pkgs, expect_higher): - if expect_higher: - with pytest.raises(aos_version.FoundHigherVersion) as e: - aos_version._check_higher_version_found(pkgs, expected_pkgs) - assert set(expect_higher) == set(e.value.problem_pkgs) - else: - aos_version._check_higher_version_found(pkgs, expected_pkgs) +def test_check_higher_version_found_fail(pkgs, expected_pkgs_dict, expect_higher): + with pytest.raises(aos_version.FoundHigherVersion) as e: + aos_version._check_higher_version_found(pkgs, expected_pkgs_dict) + assert set(expect_higher) == set(e.value.problem_pkgs) -@pytest.mark.parametrize('pkgs, expect_to_flag_pkgs', [ - ( - [], - [], - ), - ( - [Package('spam', '3.2.1')], - [], - ), - ( - [Package('spam', '3.2.1'), Package('eggs', '3.2.2')], - [], - ), +@pytest.mark.parametrize('pkgs', [ + [], + [Package('spam', '3.2.1')], + [Package('spam', '3.2.1'), Package('eggs', '3.2.2')], +]) +def test_check_multi_minor_release(pkgs): + aos_version._check_multi_minor_release(pkgs, expected_pkgs) + + +@pytest.mark.parametrize('pkgs,expect_to_flag_pkgs', [ ( [Package('spam', '3.2.1'), Package('spam', '3.3.2')], ['spam'], @@ -140,10 +190,7 @@ def test_check_pkgs_for_greater_version(pkgs, expect_higher): ['eggs'], ), ]) -def test_check_pkgs_for_multi_release(pkgs, expect_to_flag_pkgs): - if expect_to_flag_pkgs: - with pytest.raises(aos_version.FoundMultiRelease) as e: - aos_version._check_multi_minor_release(pkgs, expected_pkgs) - assert set(expect_to_flag_pkgs) == set(e.value.problem_pkgs) - else: +def test_check_multi_minor_release_fail(pkgs, expect_to_flag_pkgs): + with pytest.raises(aos_version.FoundMultiRelease) as e: aos_version._check_multi_minor_release(pkgs, expected_pkgs) + assert set(expect_to_flag_pkgs) == set(e.value.problem_pkgs) diff --git a/roles/openshift_health_checker/test/disk_availability_test.py b/roles/openshift_health_checker/test/disk_availability_test.py index b353fa610..945b9eafc 100644 --- a/roles/openshift_health_checker/test/disk_availability_test.py +++ b/roles/openshift_health_checker/test/disk_availability_test.py @@ -3,22 +3,19 @@ import pytest from openshift_checks.disk_availability import DiskAvailability, OpenShiftCheckException -@pytest.mark.parametrize('group_names,is_containerized,is_active', [ - (['masters'], False, True), - # ensure check is skipped on containerized installs - (['masters'], True, False), - (['nodes'], False, True), - (['etcd'], False, True), - (['masters', 'nodes'], False, True), - (['masters', 'etcd'], False, True), - ([], False, False), - (['lb'], False, False), - (['nfs'], False, False), +@pytest.mark.parametrize('group_names,is_active', [ + (['masters'], True), + (['nodes'], True), + (['etcd'], True), + (['masters', 'nodes'], True), + (['masters', 'etcd'], True), + ([], False), + (['lb'], False), + (['nfs'], False), ]) -def test_is_active(group_names, is_containerized, is_active): +def test_is_active(group_names, is_active): task_vars = dict( group_names=group_names, - openshift=dict(common=dict(is_containerized=is_containerized)), ) assert DiskAvailability.is_active(task_vars=task_vars) == is_active @@ -38,7 +35,7 @@ def test_cannot_determine_available_disk(ansible_mounts, extra_words): with pytest.raises(OpenShiftCheckException) as excinfo: check.run(tmp=None, task_vars=task_vars) - for word in 'determine available disk'.split() + extra_words: + for word in 'determine disk availability'.split() + extra_words: assert word in str(excinfo.value) @@ -81,7 +78,7 @@ def test_cannot_determine_available_disk(ansible_mounts, extra_words): [{ # not enough space on / ... 'mount': '/', - 'size_available': 0, + 'size_available': 2 * 10**9, }, { # ... but enough on /var 'mount': '/var', diff --git a/roles/openshift_health_checker/test/docker_image_availability_test.py b/roles/openshift_health_checker/test/docker_image_availability_test.py index 197c65f51..3b9e097fb 100644 --- a/roles/openshift_health_checker/test/docker_image_availability_test.py +++ b/roles/openshift_health_checker/test/docker_image_availability_test.py @@ -31,15 +31,15 @@ def test_is_active(deployment_type, is_containerized, group_names, expect_active (False, True), ]) def test_all_images_available_locally(is_containerized, is_atomic): - def execute_module(module_name, args, task_vars): + def execute_module(module_name, module_args, task_vars): if module_name == "yum": return {"changed": True} assert module_name == "docker_image_facts" - assert 'name' in args - assert args['name'] + assert 'name' in module_args + assert module_args['name'] return { - 'images': [args['name']], + 'images': [module_args['name']], } result = DockerImageAvailability(execute_module=execute_module).run(tmp=None, task_vars=dict( @@ -52,8 +52,8 @@ def test_all_images_available_locally(is_containerized, is_atomic): docker=dict(additional_registries=["docker.io"]), ), openshift_deployment_type='origin', - openshift_release='v3.4', openshift_image_tag='3.4', + group_names=['nodes', 'masters'], )) assert not result.get('failed', False) @@ -64,7 +64,7 @@ def test_all_images_available_locally(is_containerized, is_atomic): True, ]) def test_all_images_available_remotely(available_locally): - def execute_module(module_name, args, task_vars): + def execute_module(module_name, module_args, task_vars): if module_name == 'docker_image_facts': return {'images': [], 'failed': available_locally} return {'changed': False} @@ -79,8 +79,8 @@ def test_all_images_available_remotely(available_locally): docker=dict(additional_registries=["docker.io", "registry.access.redhat.com"]), ), openshift_deployment_type='origin', - openshift_release='3.4', openshift_image_tag='v3.4', + group_names=['nodes', 'masters'], )) assert not result.get('failed', False) @@ -108,8 +108,8 @@ def test_all_images_unavailable(): docker=dict(additional_registries=["docker.io"]), ), openshift_deployment_type="openshift-enterprise", - openshift_release=None, - openshift_image_tag='latest' + openshift_image_tag='latest', + group_names=['nodes', 'masters'], )) assert actual['failed'] @@ -147,8 +147,8 @@ def test_skopeo_update_failure(message, extra_words): docker=dict(additional_registries=["unknown.io"]), ), openshift_deployment_type="openshift-enterprise", - openshift_release='', openshift_image_tag='', + group_names=['nodes', 'masters'], )) assert actual["failed"] @@ -177,8 +177,99 @@ def test_registry_availability(deployment_type, registries): docker=dict(additional_registries=registries), ), openshift_deployment_type=deployment_type, - openshift_release='', openshift_image_tag='', + group_names=['nodes', 'masters'], )) assert not actual.get("failed", False) + + +@pytest.mark.parametrize("deployment_type, is_containerized, groups, oreg_url, expected", [ + ( # standard set of stuff required on nodes + "origin", False, ['nodes'], None, + set([ + 'openshift/origin-pod:vtest', + 'openshift/origin-deployer:vtest', + 'openshift/origin-docker-registry:vtest', + 'openshift/origin-haproxy-router:vtest', + 'cockpit/kubernetes', # origin version of registry-console + ]) + ), + ( # set a different URL for images + "origin", False, ['nodes'], 'foo.io/openshift/origin-${component}:${version}', + set([ + 'foo.io/openshift/origin-pod:vtest', + 'foo.io/openshift/origin-deployer:vtest', + 'foo.io/openshift/origin-docker-registry:vtest', + 'foo.io/openshift/origin-haproxy-router:vtest', + 'cockpit/kubernetes', # AFAICS this is not built from the URL + ]) + ), + ( + "origin", True, ['nodes', 'masters', 'etcd'], None, + set([ + # images running on top of openshift + 'openshift/origin-pod:vtest', + 'openshift/origin-deployer:vtest', + 'openshift/origin-docker-registry:vtest', + 'openshift/origin-haproxy-router:vtest', + 'cockpit/kubernetes', + # containerized component images + 'openshift/origin:vtest', + 'openshift/node:vtest', + 'openshift/openvswitch:vtest', + 'registry.access.redhat.com/rhel7/etcd', + ]) + ), + ( # enterprise images + "openshift-enterprise", True, ['nodes'], 'foo.io/openshift3/ose-${component}:f13ac45', + set([ + 'foo.io/openshift3/ose-pod:f13ac45', + 'foo.io/openshift3/ose-deployer:f13ac45', + 'foo.io/openshift3/ose-docker-registry:f13ac45', + 'foo.io/openshift3/ose-haproxy-router:f13ac45', + # registry-console is not constructed/versioned the same as the others. + 'registry.access.redhat.com/openshift3/registry-console', + # containerized images aren't built from oreg_url + 'openshift3/node:vtest', + 'openshift3/openvswitch:vtest', + ]) + ), + ( + "openshift-enterprise", True, ['etcd', 'lb'], 'foo.io/openshift3/ose-${component}:f13ac45', + set([ + 'registry.access.redhat.com/rhel7/etcd', + # lb does not yet come in a containerized version + ]) + ), + +]) +def test_required_images(deployment_type, is_containerized, groups, oreg_url, expected): + task_vars = dict( + openshift=dict( + common=dict( + is_containerized=is_containerized, + is_atomic=False, + ), + ), + openshift_deployment_type=deployment_type, + group_names=groups, + oreg_url=oreg_url, + openshift_image_tag='vtest', + ) + + assert expected == DockerImageAvailability("DUMMY").required_images(task_vars) + + +def test_containerized_etcd(): + task_vars = dict( + openshift=dict( + common=dict( + is_containerized=True, + ), + ), + openshift_deployment_type="origin", + group_names=['etcd'], + ) + expected = set(['registry.access.redhat.com/rhel7/etcd']) + assert expected == DockerImageAvailability("DUMMY").required_images(task_vars) diff --git a/roles/openshift_health_checker/test/docker_storage_test.py b/roles/openshift_health_checker/test/docker_storage_test.py index 292a323db..99c529054 100644 --- a/roles/openshift_health_checker/test/docker_storage_test.py +++ b/roles/openshift_health_checker/test/docker_storage_test.py @@ -23,7 +23,8 @@ def test_is_active(is_containerized, group_names, is_active): assert DockerStorage.is_active(task_vars=task_vars) == is_active -non_atomic_task_vars = {"openshift": {"common": {"is_atomic": False}}} +def non_atomic_task_vars(): + return {"openshift": {"common": {"is_atomic": False}}} @pytest.mark.parametrize('docker_info, failed, expect_msg', [ @@ -56,7 +57,7 @@ non_atomic_task_vars = {"openshift": {"common": {"is_atomic": False}}} ( dict(info={ "Driver": "overlay2", - "DriverStatus": [] + "DriverStatus": [("Backing Filesystem", "xfs")], }), False, [], @@ -64,9 +65,30 @@ non_atomic_task_vars = {"openshift": {"common": {"is_atomic": False}}} ( dict(info={ "Driver": "overlay", + "DriverStatus": [("Backing Filesystem", "btrfs")], }), True, - ["unsupported Docker storage driver"], + ["storage is type 'btrfs'", "only supported with\n'xfs'"], + ), + ( + dict(info={ + "Driver": "overlay2", + "DriverStatus": [("Backing Filesystem", "xfs")], + "OperatingSystem": "Red Hat Enterprise Linux Server release 7.2 (Maipo)", + "KernelVersion": "3.10.0-327.22.2.el7.x86_64", + }), + True, + ["Docker reports kernel version 3.10.0-327"], + ), + ( + dict(info={ + "Driver": "overlay", + "DriverStatus": [("Backing Filesystem", "xfs")], + "OperatingSystem": "CentOS", + "KernelVersion": "3.10.0-514", + }), + False, + [], ), ( dict(info={ @@ -77,7 +99,7 @@ non_atomic_task_vars = {"openshift": {"common": {"is_atomic": False}}} ), ]) def test_check_storage_driver(docker_info, failed, expect_msg): - def execute_module(module_name, args, tmp=None, task_vars=None): + def execute_module(module_name, module_args, tmp=None, task_vars=None): if module_name == "yum": return {} if module_name != "docker_info": @@ -85,8 +107,9 @@ def test_check_storage_driver(docker_info, failed, expect_msg): return docker_info check = dummy_check(execute_module=execute_module) - check._check_dm_usage = lambda status, task_vars: dict() # stub out for this test - result = check.run(tmp=None, task_vars=non_atomic_task_vars) + check.check_dm_usage = lambda status, task_vars: dict() # stub out for this test + check.check_overlay_usage = lambda info, task_vars: dict() # stub out for this test + result = check.run(tmp=None, task_vars=non_atomic_task_vars()) if failed: assert result["failed"] @@ -146,8 +169,8 @@ not_enough_space = { ]) def test_dm_usage(task_vars, driver_status, vg_free, success, expect_msg): check = dummy_check() - check._get_vg_free = lambda pool, task_vars: vg_free - result = check._check_dm_usage(driver_status, task_vars) + check.get_vg_free = lambda pool, task_vars: vg_free + result = check.check_dm_usage(driver_status, task_vars) result_success = not result.get("failed") assert result_success is success @@ -187,7 +210,7 @@ def test_dm_usage(task_vars, driver_status, vg_free, success, expect_msg): ) ]) def test_vg_free(pool, command_returns, raises, returns): - def execute_module(module_name, args, tmp=None, task_vars=None): + def execute_module(module_name, module_args, tmp=None, task_vars=None): if module_name != "command": raise ValueError("not expecting module " + module_name) return command_returns @@ -195,10 +218,10 @@ def test_vg_free(pool, command_returns, raises, returns): check = dummy_check(execute_module=execute_module) if raises: with pytest.raises(OpenShiftCheckException) as err: - check._get_vg_free(pool, {}) + check.get_vg_free(pool, {}) assert raises in str(err.value) else: - ret = check._get_vg_free(pool, {}) + ret = check.get_vg_free(pool, {}) assert ret == returns @@ -209,7 +232,7 @@ def test_vg_free(pool, command_returns, raises, returns): ("12g", 12.0 * 1024**3), ]) def test_convert_to_bytes(string, expect_bytes): - got = DockerStorage._convert_to_bytes(string) + got = DockerStorage.convert_to_bytes(string) assert got == expect_bytes @@ -219,6 +242,70 @@ def test_convert_to_bytes(string, expect_bytes): ]) def test_convert_to_bytes_error(string): with pytest.raises(ValueError) as err: - DockerStorage._convert_to_bytes(string) + DockerStorage.convert_to_bytes(string) assert "Cannot convert" in str(err.value) assert string in str(err.value) + + +ansible_mounts_enough = [{ + 'mount': '/var/lib/docker', + 'size_available': 50 * 10**9, + 'size_total': 50 * 10**9, +}] +ansible_mounts_not_enough = [{ + 'mount': '/var/lib/docker', + 'size_available': 0, + 'size_total': 50 * 10**9, +}] +ansible_mounts_missing_fields = [dict(mount='/var/lib/docker')] +ansible_mounts_zero_size = [{ + 'mount': '/var/lib/docker', + 'size_available': 0, + 'size_total': 0, +}] + + +@pytest.mark.parametrize('ansible_mounts, threshold, expect_fail, expect_msg', [ + ( + ansible_mounts_enough, + None, + False, + [], + ), + ( + ansible_mounts_not_enough, + None, + True, + ["usage percentage", "higher than threshold"], + ), + ( + ansible_mounts_not_enough, + "bogus percent", + True, + ["is not a percentage"], + ), + ( + ansible_mounts_missing_fields, + None, + True, + ["Ansible bug"], + ), + ( + ansible_mounts_zero_size, + None, + True, + ["Ansible bug"], + ), +]) +def test_overlay_usage(ansible_mounts, threshold, expect_fail, expect_msg): + check = dummy_check() + task_vars = non_atomic_task_vars() + task_vars["ansible_mounts"] = ansible_mounts + if threshold is not None: + task_vars["max_overlay_usage_percent"] = threshold + docker_info = dict(DockerRootDir="/var/lib/docker", Driver="overlay") + result = check.check_overlay_usage(docker_info, task_vars) + + assert expect_fail == bool(result.get("failed")) + for msg in expect_msg: + assert msg in result["msg"] diff --git a/roles/openshift_health_checker/test/kibana_test.py b/roles/openshift_health_checker/test/kibana_test.py index 19140a1b6..40a5d19d8 100644 --- a/roles/openshift_health_checker/test/kibana_test.py +++ b/roles/openshift_health_checker/test/kibana_test.py @@ -169,7 +169,7 @@ def test_get_kibana_url(route, expect_url, expect_error): ), ]) def test_verify_url_internal_failure(exec_result, expect): - check = Kibana(execute_module=lambda module_name, args, task_vars: dict(failed=True, msg=exec_result)) + check = Kibana(execute_module=lambda module_name, args, tmp, task_vars: dict(failed=True, msg=exec_result)) check._get_kibana_url = lambda task_vars: ('url', None) error = check._check_kibana_route({}) diff --git a/roles/openshift_health_checker/test/logging_check_test.py b/roles/openshift_health_checker/test/logging_check_test.py index b6db34fe3..128b76b12 100644 --- a/roles/openshift_health_checker/test/logging_check_test.py +++ b/roles/openshift_health_checker/test/logging_check_test.py @@ -50,6 +50,16 @@ plain_kibana_pod = { } } +plain_kibana_pod_no_containerstatus = { + "metadata": { + "labels": {"component": "kibana", "deploymentconfig": "logging-kibana"}, + "name": "logging-kibana-1", + }, + "status": { + "conditions": [{"status": "True", "type": "Ready"}], + } +} + fluentd_pod_node1 = { "metadata": { "labels": {"component": "fluentd", "deploymentconfig": "logging-fluentd"}, @@ -80,7 +90,7 @@ plain_curator_pod = { ("Permission denied", "Unexpected error using `oc`"), ]) def test_oc_failure(problem, expect): - def execute_module(module_name, args, task_vars): + def execute_module(module_name, args, tmp, task_vars): if module_name == "ocutil": return dict(failed=True, result=problem) return dict(changed=False) @@ -135,3 +145,23 @@ def test_get_pods_for_component(pod_output, expect_pods, expect_error): {} ) assert_error(error, expect_error) + + +@pytest.mark.parametrize('name, pods, expected_pods', [ + ( + 'test single pod found, scheduled, but no containerStatuses field', + [plain_kibana_pod_no_containerstatus], + [plain_kibana_pod_no_containerstatus], + ), + ( + 'set of pods has at least one pod with containerStatuses (scheduled); should still fail', + [plain_kibana_pod_no_containerstatus, plain_kibana_pod], + [plain_kibana_pod_no_containerstatus], + ), + +], ids=lambda argvals: argvals[0]) +def test_get_not_running_pods_no_container_status(name, pods, expected_pods): + check = canned_loggingcheck(lambda exec_module, namespace, cmd, args, task_vars: '') + result = check.not_running_pods(pods) + + assert result == expected_pods diff --git a/roles/openshift_health_checker/test/package_version_test.py b/roles/openshift_health_checker/test/package_version_test.py index 91eace512..1bb6371ae 100644 --- a/roles/openshift_health_checker/test/package_version_test.py +++ b/roles/openshift_health_checker/test/package_version_test.py @@ -72,36 +72,6 @@ def test_package_version(openshift_release): assert result is return_value -@pytest.mark.parametrize('deployment_type,openshift_release,expected_ovs_version', [ - ("openshift-enterprise", "3.5", "2.6"), - ("origin", "3.6", "2.6"), - ("openshift-enterprise", "3.4", "2.4"), - ("origin", "3.3", "2.4"), -]) -def test_ovs_package_version(deployment_type, openshift_release, expected_ovs_version): - task_vars = dict( - openshift=dict(common=dict(service_type='origin')), - openshift_release=openshift_release, - openshift_image_tag='v' + openshift_release, - openshift_deployment_type=deployment_type, - ) - return_value = object() - - def execute_module(module_name=None, module_args=None, tmp=None, task_vars=None): - assert module_name == 'aos_version' - assert "package_list" in module_args - - for pkg in module_args["package_list"]: - if pkg["name"] == "openvswitch": - assert pkg["version"] == expected_ovs_version - - return return_value - - check = PackageVersion(execute_module=execute_module) - result = check.run(tmp=None, task_vars=task_vars) - assert result is return_value - - @pytest.mark.parametrize('deployment_type,openshift_release,expected_docker_version', [ ("origin", "3.5", "1.12"), ("openshift-enterprise", "3.4", "1.12"), |