diff options
Diffstat (limited to 'roles/openshift_health_checker')
9 files changed, 645 insertions, 113 deletions
diff --git a/roles/openshift_health_checker/openshift_checks/docker_image_availability.py b/roles/openshift_health_checker/openshift_checks/docker_image_availability.py index 4588ed634..60aacf715 100644 --- a/roles/openshift_health_checker/openshift_checks/docker_image_availability.py +++ b/roles/openshift_health_checker/openshift_checks/docker_image_availability.py @@ -1,8 +1,25 @@ -# pylint: disable=missing-docstring +"""Check that required Docker images are available.""" + from openshift_checks import OpenShiftCheck, get_var +from openshift_checks.mixins import DockerHostMixin + +NODE_IMAGE_SUFFIXES = ["haproxy-router", "docker-registry", "deployer", "pod"] +DEPLOYMENT_IMAGE_INFO = { + "origin": { + "namespace": "openshift", + "name": "origin", + "registry_console_image": "cockpit/kubernetes", + }, + "openshift-enterprise": { + "namespace": "openshift3", + "name": "ose", + "registry_console_image": "registry.access.redhat.com/openshift3/registry-console", + }, +} -class DockerImageAvailability(OpenShiftCheck): + +class DockerImageAvailability(DockerHostMixin, OpenShiftCheck): """Check that required Docker images are available. This check attempts to ensure that required docker images are @@ -12,43 +29,23 @@ class DockerImageAvailability(OpenShiftCheck): name = "docker_image_availability" tags = ["preflight"] - dependencies = ["skopeo", "python-docker-py"] - deployment_image_info = { - "origin": { - "namespace": "openshift", - "name": "origin", - }, - "openshift-enterprise": { - "namespace": "openshift3", - "name": "ose", - }, - } - @classmethod def is_active(cls, task_vars): """Skip hosts with unsupported deployment types.""" deployment_type = get_var(task_vars, "openshift_deployment_type") - has_valid_deployment_type = deployment_type in cls.deployment_image_info + has_valid_deployment_type = deployment_type in DEPLOYMENT_IMAGE_INFO return super(DockerImageAvailability, cls).is_active(task_vars) and has_valid_deployment_type def run(self, tmp, task_vars): msg, failed, changed = self.ensure_dependencies(task_vars) - - # exit early if Skopeo update fails if failed: - if "No package matching" in msg: - msg = "Ensure that all required dependencies can be installed via `yum`.\n" return { "failed": True, "changed": changed, - "msg": ( - "Unable to update or install required dependency packages on this host;\n" - "These are required in order to check Docker image availability:" - "\n {deps}\n{msg}" - ).format(deps=',\n '.join(self.dependencies), msg=msg), + "msg": "Some dependencies are required in order to check Docker image availability.\n" + msg } required_images = self.required_images(task_vars) @@ -77,51 +74,55 @@ class DockerImageAvailability(OpenShiftCheck): return {"changed": changed} - def required_images(self, task_vars): - deployment_type = get_var(task_vars, "openshift_deployment_type") - image_info = self.deployment_image_info[deployment_type] - - openshift_release = get_var(task_vars, "openshift_release", default="latest") - openshift_image_tag = get_var(task_vars, "openshift_image_tag") - is_containerized = get_var(task_vars, "openshift", "common", "is_containerized") - - images = set(self.required_docker_images( - image_info["namespace"], - image_info["name"], - ["registry-console"] if "enterprise" in deployment_type else [], # include enterprise-only image names - openshift_release, - is_containerized, - )) - - # append images with qualified image tags to our list of required images. - # these are images with a (v0.0.0.0) tag, rather than a standard release - # format tag (v0.0). We want to check this set in both containerized and - # non-containerized installations. - images.update( - self.required_qualified_docker_images( - image_info["namespace"], - image_info["name"], - openshift_image_tag, - ), - ) - - return images - - @staticmethod - def required_docker_images(namespace, name, additional_image_names, version, is_containerized): - if is_containerized: - return ["{}/{}:{}".format(namespace, name, version)] if name else [] - - # include additional non-containerized images specific to the current deployment type - return ["{}/{}:{}".format(namespace, img_name, version) for img_name in additional_image_names] - @staticmethod - def required_qualified_docker_images(namespace, name, version): - # pylint: disable=invalid-name - return [ - "{}/{}-{}:{}".format(namespace, name, suffix, version) - for suffix in ["haproxy-router", "docker-registry", "deployer", "pod"] - ] + def required_images(task_vars): + """ + Determine which images we expect to need for this host. + Returns: a set of required images like 'openshift/origin:v3.6' + + The thorny issue of determining the image names from the variables is under consideration + via https://github.com/openshift/openshift-ansible/issues/4415 + + For now we operate as follows: + * For containerized components (master, node, ...) we look at the deployment type and + use openshift/origin or openshift3/ose as the base for those component images. The + version is openshift_image_tag as determined by the openshift_version role. + * For OpenShift-managed infrastructure (router, registry...) we use oreg_url if + it is defined; otherwise we again use the base that depends on the deployment type. + Registry is not included in constructed images. It may be in oreg_url or etcd image. + """ + required = set() + deployment_type = get_var(task_vars, "openshift_deployment_type") + host_groups = get_var(task_vars, "group_names") + image_tag = get_var(task_vars, "openshift_image_tag") + image_info = DEPLOYMENT_IMAGE_INFO[deployment_type] + if not image_info: + return required + + # template for images that run on top of OpenShift + image_url = "{}/{}-{}:{}".format(image_info["namespace"], image_info["name"], "${component}", "${version}") + image_url = get_var(task_vars, "oreg_url", default="") or image_url + if 'nodes' in host_groups: + for suffix in NODE_IMAGE_SUFFIXES: + required.add(image_url.replace("${component}", suffix).replace("${version}", image_tag)) + # The registry-console is for some reason not prefixed with ose- like the other components. + # Nor is it versioned the same, so just look for latest. + # Also a completely different name is used for Origin. + required.add(image_info["registry_console_image"]) + + # images for containerized components + if get_var(task_vars, "openshift", "common", "is_containerized"): + components = set() + if 'nodes' in host_groups: + components.update(["node", "openvswitch"]) + if 'masters' in host_groups: # name is "origin" or "ose" + components.add(image_info["name"]) + for component in components: + required.add("{}/{}:{}".format(image_info["namespace"], component, image_tag)) + if 'etcd' in host_groups: # special case, note it is the same for origin/enterprise + required.add("registry.access.redhat.com/rhel7/etcd") # and no image tag + + return required def local_images(self, images, task_vars): """Filter a list of images and return those available locally.""" @@ -131,7 +132,8 @@ class DockerImageAvailability(OpenShiftCheck): ] def is_image_local(self, image, task_vars): - result = self.module_executor("docker_image_facts", {"name": image}, task_vars) + """Check if image is already in local docker index.""" + result = self.execute_module("docker_image_facts", {"name": image}, task_vars=task_vars) if result.get("failed", False): return False @@ -139,6 +141,7 @@ class DockerImageAvailability(OpenShiftCheck): @staticmethod def known_docker_registries(task_vars): + """Build a list of docker registries available according to inventory vars.""" docker_facts = get_var(task_vars, "openshift", "docker") regs = set(docker_facts["additional_registries"]) @@ -154,26 +157,21 @@ class DockerImageAvailability(OpenShiftCheck): """Inspect existing images using Skopeo and return all images successfully inspected.""" return [ image for image in images - if any(self.is_available_skopeo_image(image, registry, task_vars) for registry in registries) + if self.is_available_skopeo_image(image, registries, task_vars) ] - def is_available_skopeo_image(self, image, registry, task_vars): - """Uses Skopeo to determine if required image exists in a given registry.""" - - cmd_str = "skopeo inspect docker://{registry}/{image}".format( - registry=registry, - image=image, - ) + def is_available_skopeo_image(self, image, registries, task_vars): + """Use Skopeo to determine if required image exists in known registry(s).""" - args = {"_raw_params": cmd_str} - result = self.module_executor("command", args, task_vars) - return not result.get("failed", False) and result.get("rc", 0) == 0 + # if image does already includes a registry, just use that + if image.count("/") > 1: + registry, image = image.split("/", 1) + registries = [registry] - # ensures that the skopeo and python-docker-py packages exist - # check is skipped on atomic installations - def ensure_dependencies(self, task_vars): - if get_var(task_vars, "openshift", "common", "is_atomic"): - return "", False, False + for registry in registries: + args = {"_raw_params": "skopeo inspect docker://{}/{}".format(registry, image)} + result = self.execute_module("command", args, task_vars=task_vars) + if result.get("rc", 0) == 0 and not result.get("failed"): + return True - result = self.module_executor("yum", {"name": self.dependencies, "state": "latest"}, task_vars) - return result.get("msg", ""), result.get("failed", False) or result.get("rc", 0) != 0, result.get("changed") + return False diff --git a/roles/openshift_health_checker/openshift_checks/docker_storage.py b/roles/openshift_health_checker/openshift_checks/docker_storage.py new file mode 100644 index 000000000..2bd615457 --- /dev/null +++ b/roles/openshift_health_checker/openshift_checks/docker_storage.py @@ -0,0 +1,185 @@ +"""Check Docker storage driver and usage.""" +import json +import re +from openshift_checks import OpenShiftCheck, OpenShiftCheckException, get_var +from openshift_checks.mixins import DockerHostMixin + + +class DockerStorage(DockerHostMixin, OpenShiftCheck): + """Check Docker storage driver compatibility. + + This check ensures that Docker is using a supported storage driver, + and that loopback is not being used (if using devicemapper). + Also that storage usage is not above threshold. + """ + + name = "docker_storage" + tags = ["pre-install", "health", "preflight"] + + dependencies = ["python-docker-py"] + storage_drivers = ["devicemapper", "overlay2"] + max_thinpool_data_usage_percent = 90.0 + max_thinpool_meta_usage_percent = 90.0 + + # pylint: disable=too-many-return-statements + # Reason: permanent stylistic exception; + # it is clearer to return on failures and there are just many ways to fail here. + def run(self, tmp, task_vars): + msg, failed, changed = self.ensure_dependencies(task_vars) + if failed: + return { + "failed": True, + "changed": changed, + "msg": "Some dependencies are required in order to query docker storage on host:\n" + msg + } + + # attempt to get the docker info hash from the API + info = self.execute_module("docker_info", {}, task_vars=task_vars) + if info.get("failed"): + return {"failed": True, "changed": changed, + "msg": "Failed to query Docker API. Is docker running on this host?"} + if not info.get("info"): # this would be very strange + return {"failed": True, "changed": changed, + "msg": "Docker API query missing info:\n{}".format(json.dumps(info))} + info = info["info"] + + # check if the storage driver we saw is valid + driver = info.get("Driver", "[NONE]") + if driver not in self.storage_drivers: + msg = ( + "Detected unsupported Docker storage driver '{driver}'.\n" + "Supported storage drivers are: {drivers}" + ).format(driver=driver, drivers=', '.join(self.storage_drivers)) + return {"failed": True, "changed": changed, "msg": msg} + + # driver status info is a list of tuples; convert to dict and validate based on driver + driver_status = {item[0]: item[1] for item in info.get("DriverStatus", [])} + if driver == "devicemapper": + if driver_status.get("Data loop file"): + msg = ( + "Use of loopback devices with the Docker devicemapper storage driver\n" + "(the default storage configuration) is unsupported in production.\n" + "Please use docker-storage-setup to configure a backing storage volume.\n" + "See http://red.ht/2rNperO for further information." + ) + return {"failed": True, "changed": changed, "msg": msg} + result = self._check_dm_usage(driver_status, task_vars) + result['changed'] = result.get('changed', False) or changed + return result + + # TODO(lmeyer): determine how to check usage for overlay2 + + return {"changed": changed} + + def _check_dm_usage(self, driver_status, task_vars): + """ + Backing assumptions: We expect devicemapper to be backed by an auto-expanding thin pool + implemented as an LV in an LVM2 VG. This is how docker-storage-setup currently configures + devicemapper storage. The LV is "thin" because it does not use all available storage + from its VG, instead expanding as needed; so to determine available space, we gather + current usage as the Docker API reports for the driver as well as space available for + expansion in the pool's VG. + Usage within the LV is divided into pools allocated to data and metadata, either of which + could run out of space first; so we check both. + """ + vals = dict( + vg_free=self._get_vg_free(driver_status.get("Pool Name"), task_vars), + data_used=driver_status.get("Data Space Used"), + data_total=driver_status.get("Data Space Total"), + metadata_used=driver_status.get("Metadata Space Used"), + metadata_total=driver_status.get("Metadata Space Total"), + ) + + # convert all human-readable strings to bytes + for key, value in vals.copy().items(): + try: + vals[key + "_bytes"] = self._convert_to_bytes(value) + except ValueError as err: # unlikely to hit this from API info, but just to be safe + return { + "failed": True, + "values": vals, + "msg": "Could not interpret {} value '{}' as bytes: {}".format(key, value, str(err)) + } + + # determine the threshold percentages which usage should not exceed + for name, default in [("data", self.max_thinpool_data_usage_percent), + ("metadata", self.max_thinpool_meta_usage_percent)]: + percent = get_var(task_vars, "max_thinpool_" + name + "_usage_percent", default=default) + try: + vals[name + "_threshold"] = float(percent) + except ValueError: + return { + "failed": True, + "msg": "Specified thinpool {} usage limit '{}' is not a percentage".format(name, percent) + } + + # test whether the thresholds are exceeded + messages = [] + for name in ["data", "metadata"]: + vals[name + "_pct_used"] = 100 * vals[name + "_used_bytes"] / ( + vals[name + "_total_bytes"] + vals["vg_free_bytes"]) + if vals[name + "_pct_used"] > vals[name + "_threshold"]: + messages.append( + "Docker thinpool {name} usage percentage {pct:.1f} " + "is higher than threshold {thresh:.1f}.".format( + name=name, + pct=vals[name + "_pct_used"], + thresh=vals[name + "_threshold"], + )) + vals["failed"] = True + + vals["msg"] = "\n".join(messages or ["Thinpool usage is within thresholds."]) + return vals + + def _get_vg_free(self, pool, task_vars): + # Determine which VG to examine according to the pool name, the only indicator currently + # available from the Docker API driver info. We assume a name that looks like + # "vg--name-docker--pool"; vg and lv names with inner hyphens doubled, joined by a hyphen. + match = re.match(r'((?:[^-]|--)+)-(?!-)', pool) # matches up to the first single hyphen + if not match: # unlikely, but... be clear if we assumed wrong + raise OpenShiftCheckException( + "This host's Docker reports it is using a storage pool named '{}'.\n" + "However this name does not have the expected format of 'vgname-lvname'\n" + "so the available storage in the VG cannot be determined.".format(pool) + ) + vg_name = match.groups()[0].replace("--", "-") + vgs_cmd = "/sbin/vgs --noheadings -o vg_free --select vg_name=" + vg_name + # should return free space like " 12.00g" if the VG exists; empty if it does not + + ret = self.execute_module("command", {"_raw_params": vgs_cmd}, task_vars=task_vars) + if ret.get("failed") or ret.get("rc", 0) != 0: + raise OpenShiftCheckException( + "Is LVM installed? Failed to run /sbin/vgs " + "to determine docker storage usage:\n" + ret.get("msg", "") + ) + size = ret.get("stdout", "").strip() + if not size: + raise OpenShiftCheckException( + "This host's Docker reports it is using a storage pool named '{pool}'.\n" + "which we expect to come from local VG '{vg}'.\n" + "However, /sbin/vgs did not find this VG. Is Docker for this host" + "running and using the storage on the host?".format(pool=pool, vg=vg_name) + ) + return size + + @staticmethod + def _convert_to_bytes(string): + units = dict( + b=1, + k=1024, + m=1024**2, + g=1024**3, + t=1024**4, + p=1024**5, + ) + string = string or "" + match = re.match(r'(\d+(?:\.\d+)?)\s*(\w)?', string) # float followed by optional unit + if not match: + raise ValueError("Cannot convert to a byte size: " + string) + + number, unit = match.groups() + multiplier = 1 if not unit else units.get(unit.lower()) + if not multiplier: + raise ValueError("Cannot convert to a byte size: " + string) + + return float(number) * multiplier diff --git a/roles/openshift_health_checker/openshift_checks/mixins.py b/roles/openshift_health_checker/openshift_checks/mixins.py index 20d160eaf..2cb2e21aa 100644 --- a/roles/openshift_health_checker/openshift_checks/mixins.py +++ b/roles/openshift_health_checker/openshift_checks/mixins.py @@ -1,4 +1,3 @@ -# pylint: disable=missing-docstring,too-few-public-methods """ Mixin classes meant to be used with subclasses of OpenShiftCheck. """ @@ -8,8 +7,52 @@ from openshift_checks import get_var class NotContainerizedMixin(object): """Mixin for checks that are only active when not in containerized mode.""" + # permanent # pylint: disable=too-few-public-methods + # Reason: The mixin is not intended to stand on its own as a class. @classmethod def is_active(cls, task_vars): + """Only run on non-containerized hosts.""" is_containerized = get_var(task_vars, "openshift", "common", "is_containerized") return super(NotContainerizedMixin, cls).is_active(task_vars) and not is_containerized + + +class DockerHostMixin(object): + """Mixin for checks that are only active on hosts that require Docker.""" + + dependencies = [] + + @classmethod + def is_active(cls, task_vars): + """Only run on hosts that depend on Docker.""" + is_containerized = get_var(task_vars, "openshift", "common", "is_containerized") + is_node = "nodes" in get_var(task_vars, "group_names", default=[]) + return super(DockerHostMixin, cls).is_active(task_vars) and (is_containerized or is_node) + + def ensure_dependencies(self, task_vars): + """ + Ensure that docker-related packages exist, but not on atomic hosts + (which would not be able to install but should already have them). + Returns: msg, failed, changed + """ + if get_var(task_vars, "openshift", "common", "is_atomic"): + return "", False, False + + # NOTE: we would use the "package" module but it's actually an action plugin + # and it's not clear how to invoke one of those. This is about the same anyway: + result = self.execute_module( + get_var(task_vars, "ansible_pkg_mgr", default="yum"), + {"name": self.dependencies, "state": "present"}, + task_vars=task_vars, + ) + msg = result.get("msg", "") + if result.get("failed"): + if "No package matching" in msg: + msg = "Ensure that all required dependencies can be installed via `yum`.\n" + msg = ( + "Unable to install required packages on this host:\n" + " {deps}\n{msg}" + ).format(deps=',\n '.join(self.dependencies), msg=msg) + failed = result.get("failed", False) or result.get("rc", 0) != 0 + changed = result.get("changed", False) + return msg, failed, changed diff --git a/roles/openshift_health_checker/openshift_checks/ovs_version.py b/roles/openshift_health_checker/openshift_checks/ovs_version.py index 1e45ae3af..2dd045f1f 100644 --- a/roles/openshift_health_checker/openshift_checks/ovs_version.py +++ b/roles/openshift_health_checker/openshift_checks/ovs_version.py @@ -43,7 +43,7 @@ class OvsVersion(NotContainerizedMixin, OpenShiftCheck): }, ], } - return self.execute_module("rpm_version", args, task_vars) + return self.execute_module("rpm_version", args, task_vars=task_vars) def get_required_ovs_version(self, task_vars): """Return the correct Open vSwitch version for the current OpenShift version""" diff --git a/roles/openshift_health_checker/openshift_checks/package_availability.py b/roles/openshift_health_checker/openshift_checks/package_availability.py index a7eb720fd..e87567fe6 100644 --- a/roles/openshift_health_checker/openshift_checks/package_availability.py +++ b/roles/openshift_health_checker/openshift_checks/package_availability.py @@ -25,7 +25,7 @@ class PackageAvailability(NotContainerizedMixin, OpenShiftCheck): packages.update(self.node_packages(rpm_prefix)) args = {"packages": sorted(set(packages))} - return self.execute_module("check_yum_update", args, tmp, task_vars) + return self.execute_module("check_yum_update", args, tmp=tmp, task_vars=task_vars) @staticmethod def master_packages(rpm_prefix): @@ -36,7 +36,6 @@ class PackageAvailability(NotContainerizedMixin, OpenShiftCheck): "bash-completion", "cockpit-bridge", "cockpit-docker", - "cockpit-kubernetes", "cockpit-shell", "cockpit-ws", "etcd", diff --git a/roles/openshift_health_checker/openshift_checks/package_update.py b/roles/openshift_health_checker/openshift_checks/package_update.py index fd0c0a755..f432380c6 100644 --- a/roles/openshift_health_checker/openshift_checks/package_update.py +++ b/roles/openshift_health_checker/openshift_checks/package_update.py @@ -11,4 +11,4 @@ class PackageUpdate(NotContainerizedMixin, OpenShiftCheck): def run(self, tmp, task_vars): args = {"packages": []} - return self.execute_module("check_yum_update", args, tmp, task_vars) + return self.execute_module("check_yum_update", args, tmp=tmp, task_vars=task_vars) diff --git a/roles/openshift_health_checker/openshift_checks/package_version.py b/roles/openshift_health_checker/openshift_checks/package_version.py index 2e737818b..6a76bb93d 100644 --- a/roles/openshift_health_checker/openshift_checks/package_version.py +++ b/roles/openshift_health_checker/openshift_checks/package_version.py @@ -71,7 +71,7 @@ class PackageVersion(NotContainerizedMixin, OpenShiftCheck): ], } - return self.execute_module("aos_version", args, tmp, task_vars) + return self.execute_module("aos_version", args, tmp=tmp, task_vars=task_vars) def get_required_ovs_version(self, task_vars): """Return the correct Open vSwitch version for the current OpenShift version. diff --git a/roles/openshift_health_checker/test/docker_image_availability_test.py b/roles/openshift_health_checker/test/docker_image_availability_test.py index 0379cafb5..0a7c0f8d3 100644 --- a/roles/openshift_health_checker/test/docker_image_availability_test.py +++ b/roles/openshift_health_checker/test/docker_image_availability_test.py @@ -3,19 +3,25 @@ import pytest from openshift_checks.docker_image_availability import DockerImageAvailability -@pytest.mark.parametrize('deployment_type,is_active', [ - ("origin", True), - ("openshift-enterprise", True), - ("enterprise", False), - ("online", False), - ("invalid", False), - ("", False), +@pytest.mark.parametrize('deployment_type, is_containerized, group_names, expect_active', [ + ("origin", True, [], True), + ("openshift-enterprise", True, [], True), + ("enterprise", True, [], False), + ("online", True, [], False), + ("invalid", True, [], False), + ("", True, [], False), + ("origin", False, [], False), + ("openshift-enterprise", False, [], False), + ("origin", False, ["nodes", "masters"], True), + ("openshift-enterprise", False, ["etcd"], False), ]) -def test_is_active(deployment_type, is_active): +def test_is_active(deployment_type, is_containerized, group_names, expect_active): task_vars = dict( + openshift=dict(common=dict(is_containerized=is_containerized)), openshift_deployment_type=deployment_type, + group_names=group_names, ) - assert DockerImageAvailability.is_active(task_vars=task_vars) == is_active + assert DockerImageAvailability.is_active(task_vars=task_vars) == expect_active @pytest.mark.parametrize("is_containerized,is_atomic", [ @@ -25,15 +31,15 @@ def test_is_active(deployment_type, is_active): (False, True), ]) def test_all_images_available_locally(is_containerized, is_atomic): - def execute_module(module_name, args, task_vars): + def execute_module(module_name, module_args, task_vars): if module_name == "yum": return {"changed": True} assert module_name == "docker_image_facts" - assert 'name' in args - assert args['name'] + assert 'name' in module_args + assert module_args['name'] return { - 'images': [args['name']], + 'images': [module_args['name']], } result = DockerImageAvailability(execute_module=execute_module).run(tmp=None, task_vars=dict( @@ -46,8 +52,8 @@ def test_all_images_available_locally(is_containerized, is_atomic): docker=dict(additional_registries=["docker.io"]), ), openshift_deployment_type='origin', - openshift_release='v3.4', openshift_image_tag='3.4', + group_names=['nodes', 'masters'], )) assert not result.get('failed', False) @@ -58,7 +64,7 @@ def test_all_images_available_locally(is_containerized, is_atomic): True, ]) def test_all_images_available_remotely(available_locally): - def execute_module(module_name, args, task_vars): + def execute_module(module_name, module_args, task_vars): if module_name == 'docker_image_facts': return {'images': [], 'failed': available_locally} return {'changed': False} @@ -73,8 +79,8 @@ def test_all_images_available_remotely(available_locally): docker=dict(additional_registries=["docker.io", "registry.access.redhat.com"]), ), openshift_deployment_type='origin', - openshift_release='3.4', openshift_image_tag='v3.4', + group_names=['nodes', 'masters'], )) assert not result.get('failed', False) @@ -102,8 +108,8 @@ def test_all_images_unavailable(): docker=dict(additional_registries=["docker.io"]), ), openshift_deployment_type="openshift-enterprise", - openshift_release=None, - openshift_image_tag='latest' + openshift_image_tag='latest', + group_names=['nodes', 'masters'], )) assert actual['failed'] @@ -141,8 +147,8 @@ def test_skopeo_update_failure(message, extra_words): docker=dict(additional_registries=["unknown.io"]), ), openshift_deployment_type="openshift-enterprise", - openshift_release='', openshift_image_tag='', + group_names=['nodes', 'masters'], )) assert actual["failed"] @@ -171,8 +177,85 @@ def test_registry_availability(deployment_type, registries): docker=dict(additional_registries=registries), ), openshift_deployment_type=deployment_type, - openshift_release='', openshift_image_tag='', + group_names=['nodes', 'masters'], )) assert not actual.get("failed", False) + + +@pytest.mark.parametrize("deployment_type, is_containerized, groups, oreg_url, expected", [ + ( # standard set of stuff required on nodes + "origin", False, ['nodes'], None, + set([ + 'openshift/origin-pod:vtest', + 'openshift/origin-deployer:vtest', + 'openshift/origin-docker-registry:vtest', + 'openshift/origin-haproxy-router:vtest', + 'cockpit/kubernetes', # origin version of registry-console + ]) + ), + ( # set a different URL for images + "origin", False, ['nodes'], 'foo.io/openshift/origin-${component}:${version}', + set([ + 'foo.io/openshift/origin-pod:vtest', + 'foo.io/openshift/origin-deployer:vtest', + 'foo.io/openshift/origin-docker-registry:vtest', + 'foo.io/openshift/origin-haproxy-router:vtest', + 'cockpit/kubernetes', # AFAICS this is not built from the URL + ]) + ), + ( + "origin", True, ['nodes', 'masters', 'etcd'], None, + set([ + # images running on top of openshift + 'openshift/origin-pod:vtest', + 'openshift/origin-deployer:vtest', + 'openshift/origin-docker-registry:vtest', + 'openshift/origin-haproxy-router:vtest', + 'cockpit/kubernetes', + # containerized component images + 'openshift/origin:vtest', + 'openshift/node:vtest', + 'openshift/openvswitch:vtest', + 'registry.access.redhat.com/rhel7/etcd', + ]) + ), + ( # enterprise images + "openshift-enterprise", True, ['nodes'], 'foo.io/openshift3/ose-${component}:f13ac45', + set([ + 'foo.io/openshift3/ose-pod:f13ac45', + 'foo.io/openshift3/ose-deployer:f13ac45', + 'foo.io/openshift3/ose-docker-registry:f13ac45', + 'foo.io/openshift3/ose-haproxy-router:f13ac45', + # registry-console is not constructed/versioned the same as the others. + 'registry.access.redhat.com/openshift3/registry-console', + # containerized images aren't built from oreg_url + 'openshift3/node:vtest', + 'openshift3/openvswitch:vtest', + ]) + ), + ( + "openshift-enterprise", True, ['etcd', 'lb'], 'foo.io/openshift3/ose-${component}:f13ac45', + set([ + 'registry.access.redhat.com/rhel7/etcd', + # lb does not yet come in a containerized version + ]) + ), + +]) +def test_required_images(deployment_type, is_containerized, groups, oreg_url, expected): + task_vars = dict( + openshift=dict( + common=dict( + is_containerized=is_containerized, + is_atomic=False, + ), + ), + openshift_deployment_type=deployment_type, + group_names=groups, + oreg_url=oreg_url, + openshift_image_tag='vtest', + ) + + assert expected == DockerImageAvailability("DUMMY").required_images(task_vars) diff --git a/roles/openshift_health_checker/test/docker_storage_test.py b/roles/openshift_health_checker/test/docker_storage_test.py new file mode 100644 index 000000000..876614b1d --- /dev/null +++ b/roles/openshift_health_checker/test/docker_storage_test.py @@ -0,0 +1,224 @@ +import pytest + +from openshift_checks import OpenShiftCheckException +from openshift_checks.docker_storage import DockerStorage + + +def dummy_check(execute_module=None): + def dummy_exec(self, status, task_vars): + raise Exception("dummy executor called") + return DockerStorage(execute_module=execute_module or dummy_exec) + + +@pytest.mark.parametrize('is_containerized, group_names, is_active', [ + (False, ["masters", "etcd"], False), + (False, ["masters", "nodes"], True), + (True, ["etcd"], True), +]) +def test_is_active(is_containerized, group_names, is_active): + task_vars = dict( + openshift=dict(common=dict(is_containerized=is_containerized)), + group_names=group_names, + ) + assert DockerStorage.is_active(task_vars=task_vars) == is_active + + +non_atomic_task_vars = {"openshift": {"common": {"is_atomic": False}}} + + +@pytest.mark.parametrize('docker_info, failed, expect_msg', [ + ( + dict(failed=True, msg="Error connecting: Error while fetching server API version"), + True, + ["Is docker running on this host?"], + ), + ( + dict(msg="I have no info"), + True, + ["missing info"], + ), + ( + dict(info={ + "Driver": "devicemapper", + "DriverStatus": [("Pool Name", "docker-docker--pool")], + }), + False, + [], + ), + ( + dict(info={ + "Driver": "devicemapper", + "DriverStatus": [("Data loop file", "true")], + }), + True, + ["loopback devices with the Docker devicemapper storage driver"], + ), + ( + dict(info={ + "Driver": "overlay2", + "DriverStatus": [] + }), + False, + [], + ), + ( + dict(info={ + "Driver": "overlay", + }), + True, + ["unsupported Docker storage driver"], + ), + ( + dict(info={ + "Driver": "unsupported", + }), + True, + ["unsupported Docker storage driver"], + ), +]) +def test_check_storage_driver(docker_info, failed, expect_msg): + def execute_module(module_name, module_args, tmp=None, task_vars=None): + if module_name == "yum": + return {} + if module_name != "docker_info": + raise ValueError("not expecting module " + module_name) + return docker_info + + check = dummy_check(execute_module=execute_module) + check._check_dm_usage = lambda status, task_vars: dict() # stub out for this test + result = check.run(tmp=None, task_vars=non_atomic_task_vars) + + if failed: + assert result["failed"] + else: + assert not result.get("failed", False) + + for word in expect_msg: + assert word in result["msg"] + + +enough_space = { + "Pool Name": "docker--vg-docker--pool", + "Data Space Used": "19.92 MB", + "Data Space Total": "8.535 GB", + "Metadata Space Used": "40.96 kB", + "Metadata Space Total": "25.17 MB", +} + +not_enough_space = { + "Pool Name": "docker--vg-docker--pool", + "Data Space Used": "10 GB", + "Data Space Total": "10 GB", + "Metadata Space Used": "42 kB", + "Metadata Space Total": "43 kB", +} + + +@pytest.mark.parametrize('task_vars, driver_status, vg_free, success, expect_msg', [ + ( + {"max_thinpool_data_usage_percent": "not a float"}, + enough_space, + "12g", + False, + ["is not a percentage"], + ), + ( + {}, + {}, # empty values from driver status + "bogus", # also does not parse as bytes + False, + ["Could not interpret", "as bytes"], + ), + ( + {}, + enough_space, + "12.00g", + True, + [], + ), + ( + {}, + not_enough_space, + "0.00", + False, + ["data usage", "metadata usage", "higher than threshold"], + ), +]) +def test_dm_usage(task_vars, driver_status, vg_free, success, expect_msg): + check = dummy_check() + check._get_vg_free = lambda pool, task_vars: vg_free + result = check._check_dm_usage(driver_status, task_vars) + result_success = not result.get("failed") + + assert result_success is success + for msg in expect_msg: + assert msg in result["msg"] + + +@pytest.mark.parametrize('pool, command_returns, raises, returns', [ + ( + "foo-bar", + { # vgs missing + "msg": "[Errno 2] No such file or directory", + "failed": True, + "cmd": "/sbin/vgs", + "rc": 2, + }, + "Failed to run /sbin/vgs", + None, + ), + ( + "foo", # no hyphen in name - should not happen + {}, + "name does not have the expected format", + None, + ), + ( + "foo-bar", + dict(stdout=" 4.00g\n"), + None, + "4.00g", + ), + ( + "foo-bar", + dict(stdout="\n"), # no matching VG + "vgs did not find this VG", + None, + ) +]) +def test_vg_free(pool, command_returns, raises, returns): + def execute_module(module_name, module_args, tmp=None, task_vars=None): + if module_name != "command": + raise ValueError("not expecting module " + module_name) + return command_returns + + check = dummy_check(execute_module=execute_module) + if raises: + with pytest.raises(OpenShiftCheckException) as err: + check._get_vg_free(pool, {}) + assert raises in str(err.value) + else: + ret = check._get_vg_free(pool, {}) + assert ret == returns + + +@pytest.mark.parametrize('string, expect_bytes', [ + ("12", 12.0), + ("12 k", 12.0 * 1024), + ("42.42 MB", 42.42 * 1024**2), + ("12g", 12.0 * 1024**3), +]) +def test_convert_to_bytes(string, expect_bytes): + got = DockerStorage._convert_to_bytes(string) + assert got == expect_bytes + + +@pytest.mark.parametrize('string', [ + "bork", + "42 Qs", +]) +def test_convert_to_bytes_error(string): + with pytest.raises(ValueError) as err: + DockerStorage._convert_to_bytes(string) + assert "Cannot convert" in str(err.value) + assert string in str(err.value) |