diff options
Diffstat (limited to 'roles/openshift_health_checker/openshift_checks/docker_image_availability.py')
-rw-r--r-- | roles/openshift_health_checker/openshift_checks/docker_image_availability.py | 323 |
1 files changed, 177 insertions, 146 deletions
diff --git a/roles/openshift_health_checker/openshift_checks/docker_image_availability.py b/roles/openshift_health_checker/openshift_checks/docker_image_availability.py index cce289b95..9c35f0f92 100644 --- a/roles/openshift_health_checker/openshift_checks/docker_image_availability.py +++ b/roles/openshift_health_checker/openshift_checks/docker_image_availability.py @@ -1,179 +1,210 @@ -# pylint: disable=missing-docstring -from openshift_checks import OpenShiftCheck, get_var +"""Check that required Docker images are available.""" +from openshift_checks import OpenShiftCheck +from openshift_checks.mixins import DockerHostMixin -class DockerImageAvailability(OpenShiftCheck): + +NODE_IMAGE_SUFFIXES = ["haproxy-router", "docker-registry", "deployer", "pod"] +DEPLOYMENT_IMAGE_INFO = { + "origin": { + "namespace": "openshift", + "name": "origin", + "registry_console_image": "cockpit/kubernetes", + }, + "openshift-enterprise": { + "namespace": "openshift3", + "name": "ose", + "registry_console_image": "registry.access.redhat.com/openshift3/registry-console", + }, +} + + +class DockerImageAvailability(DockerHostMixin, OpenShiftCheck): """Check that required Docker images are available. - This check attempts to ensure that required docker images are - either present locally, or able to be pulled down from available - registries defined in a host machine. + Determine docker images that an install would require and check that they + are either present in the host's docker index, or available for the host to pull + with known registries as defined in our inventory file (or defaults). """ name = "docker_image_availability" tags = ["preflight"] + # we use python-docker-py to check local docker for images, and skopeo + # to look for images available remotely without waiting to pull them. + dependencies = ["python-docker-py", "skopeo"] + skopeo_img_check_command = "timeout 10 skopeo inspect --tls-verify=false docker://{registry}/{image}" - skopeo_image = "openshift/openshift-ansible" - - # FIXME(juanvallejo): we should consider other possible values of - # `deployment_type` (the key here). See - # https://github.com/openshift/openshift-ansible/blob/8e26f8c/roles/openshift_repos/vars/main.yml#L7 - docker_image_base = { - "origin": { - "repo": "openshift", - "image": "origin", - }, - "openshift-enterprise": { - "repo": "openshift3", - "image": "ose", - }, - } - - def run(self, tmp, task_vars): - required_images = self.required_images(task_vars) - missing_images = set(required_images) - set(self.local_images(required_images, task_vars)) + def __init__(self, *args, **kwargs): + super(DockerImageAvailability, self).__init__(*args, **kwargs) + # record whether we could reach a registry or not (and remember results) + self.reachable_registries = {} - # exit early if all images were found locally - if not missing_images: - return {"changed": False} + def is_active(self): + """Skip hosts with unsupported deployment types.""" + deployment_type = self.get_var("openshift_deployment_type") + has_valid_deployment_type = deployment_type in DEPLOYMENT_IMAGE_INFO - msg, failed, changed = self.update_skopeo_image(task_vars) + return super(DockerImageAvailability, self).is_active() and has_valid_deployment_type - # exit early if Skopeo update fails + def run(self): + msg, failed = self.ensure_dependencies() if failed: return { "failed": True, - "changed": changed, - "msg": "Failed to update Skopeo image ({img_name}). {msg}".format(img_name=self.skopeo_image, msg=msg), - } - - registries = self.known_docker_registries(task_vars) - available_images = self.available_images(missing_images, registries, task_vars) - unavailable_images = set(missing_images) - set(available_images) - - if unavailable_images: - return { - "failed": True, - "msg": ( - "One or more required images are not available: {}.\n" - "Configured registries: {}" - ).format(", ".join(sorted(unavailable_images)), ", ".join(registries)), - "changed": changed, + "msg": "Some dependencies are required in order to check Docker image availability.\n" + msg } - return {"changed": changed} - - def required_images(self, task_vars): - deployment_type = get_var(task_vars, "deployment_type") - # FIXME(juanvallejo): we should handle gracefully with a proper error - # message when given an unexpected value for `deployment_type`. - image_base_name = self.docker_image_base[deployment_type] - - openshift_release = get_var(task_vars, "openshift_release") - # FIXME(juanvallejo): this variable is not required when the - # installation is non-containerized. The example inventories have it - # commented out. We should handle gracefully and with a proper error - # message when this variable is required and not set. - openshift_image_tag = get_var(task_vars, "openshift_image_tag") + required_images = self.required_images() + missing_images = set(required_images) - set(self.local_images(required_images)) - is_containerized = get_var(task_vars, "openshift", "common", "is_containerized") - - if is_containerized: - images = set(self.containerized_docker_images(image_base_name, openshift_release)) - else: - images = set(self.rpm_docker_images(image_base_name, openshift_release)) + # exit early if all images were found locally + if not missing_images: + return {} - # append images with qualified image tags to our list of required images. - # these are images with a (v0.0.0.0) tag, rather than a standard release - # format tag (v0.0). We want to check this set in both containerized and - # non-containerized installations. - images.update( - self.qualified_docker_images(self.image_from_base_name(image_base_name), "v" + openshift_image_tag) - ) + registries = self.known_docker_registries() + if not registries: + return {"failed": True, "msg": "Unable to retrieve any docker registries."} - return images + available_images = self.available_images(missing_images, registries) + unavailable_images = set(missing_images) - set(available_images) - def local_images(self, images, task_vars): + if unavailable_images: + registries = [ + reg if self.reachable_registries.get(reg, True) else reg + " (unreachable)" + for reg in registries + ] + msg = ( + "One or more required Docker images are not available:\n {}\n" + "Configured registries: {}\n" + "Checked by: {}" + ).format( + ",\n ".join(sorted(unavailable_images)), + ", ".join(registries), + self.skopeo_img_check_command + ) + + return dict(failed=True, msg=msg) + + return {} + + def required_images(self): + """ + Determine which images we expect to need for this host. + Returns: a set of required images like 'openshift/origin:v3.6' + + The thorny issue of determining the image names from the variables is under consideration + via https://github.com/openshift/openshift-ansible/issues/4415 + + For now we operate as follows: + * For containerized components (master, node, ...) we look at the deployment type and + use openshift/origin or openshift3/ose as the base for those component images. The + version is openshift_image_tag as determined by the openshift_version role. + * For OpenShift-managed infrastructure (router, registry...) we use oreg_url if + it is defined; otherwise we again use the base that depends on the deployment type. + Registry is not included in constructed images. It may be in oreg_url or etcd image. + """ + required = set() + deployment_type = self.get_var("openshift_deployment_type") + host_groups = self.get_var("group_names") + # containerized etcd may not have openshift_image_tag, see bz 1466622 + image_tag = self.get_var("openshift_image_tag", default="latest") + image_info = DEPLOYMENT_IMAGE_INFO[deployment_type] + if not image_info: + return required + + # template for images that run on top of OpenShift + image_url = "{}/{}-{}:{}".format(image_info["namespace"], image_info["name"], "${component}", "${version}") + image_url = self.get_var("oreg_url", default="") or image_url + if 'nodes' in host_groups: + for suffix in NODE_IMAGE_SUFFIXES: + required.add(image_url.replace("${component}", suffix).replace("${version}", image_tag)) + # The registry-console is for some reason not prefixed with ose- like the other components. + # Nor is it versioned the same, so just look for latest. + # Also a completely different name is used for Origin. + required.add(image_info["registry_console_image"]) + + # images for containerized components + if self.get_var("openshift", "common", "is_containerized"): + components = set() + if 'nodes' in host_groups: + components.update(["node", "openvswitch"]) + if 'masters' in host_groups: # name is "origin" or "ose" + components.add(image_info["name"]) + for component in components: + required.add("{}/{}:{}".format(image_info["namespace"], component, image_tag)) + if 'etcd' in host_groups: # special case, note it is the same for origin/enterprise + required.add("registry.access.redhat.com/rhel7/etcd") # and no image tag + + return required + + def local_images(self, images): """Filter a list of images and return those available locally.""" + registries = self.known_docker_registries() + found_images = [] + for image in images: + # docker could have the image name as-is or prefixed with any registry + imglist = [image] + [reg + "/" + image for reg in registries] + if self.is_image_local(imglist): + found_images.append(image) + return found_images + + def is_image_local(self, image): + """Check if image is already in local docker index.""" + result = self.execute_module("docker_image_facts", {"name": image}) + return bool(result.get("images")) and not result.get("failed") + + def known_docker_registries(self): + """Build a list of docker registries available according to inventory vars.""" + regs = list(self.get_var("openshift.docker.additional_registries", default=[])) + + deployment_type = self.get_var("openshift_deployment_type") + if deployment_type == "origin" and "docker.io" not in regs: + regs.append("docker.io") + elif "enterprise" in deployment_type and "registry.access.redhat.com" not in regs: + regs.append("registry.access.redhat.com") + + return regs + + def available_images(self, images, default_registries): + """Search remotely for images. Returns: list of images found.""" return [ image for image in images - if self.is_image_local(image, task_vars) + if self.is_available_skopeo_image(image, default_registries) ] - def is_image_local(self, image, task_vars): - result = self.module_executor("docker_image_facts", {"name": image}, task_vars) - if result.get("failed", False): - return False - - return bool(result.get("images", [])) - - def known_docker_registries(self, task_vars): - result = self.module_executor("docker_info", {}, task_vars) + def is_available_skopeo_image(self, image, default_registries): + """Use Skopeo to determine if required image exists in known registry(s).""" + registries = default_registries - if result.get("failed", False): - return [] + # If image already includes a registry, only use that. + # NOTE: This logic would incorrectly identify images that do not use a namespace, e.g. + # registry.access.redhat.com/rhel7 as if the registry were a namespace. + # It's not clear that there's any way to distinguish them, but fortunately + # the current set of images all look like [registry/]namespace/name[:version]. + if image.count("/") > 1: + registry, image = image.split("/", 1) + registries = [registry] - # FIXME(juanvallejo): wrong default type, result["info"] is expected to - # contain a dictionary (see how we call `docker_info.get` below). - docker_info = result.get("info", "") - return [registry.get("Name", "") for registry in docker_info.get("Registries", {})] - - def available_images(self, images, registries, task_vars): - """Inspect existing images using Skopeo and return all images successfully inspected.""" - return [ - image for image in images - if self.is_image_available(image, registries, task_vars) - ] - - def is_image_available(self, image, registries, task_vars): for registry in registries: - if self.is_available_skopeo_image(image, registry, task_vars): + if registry not in self.reachable_registries: + self.reachable_registries[registry] = self.connect_to_registry(registry) + if not self.reachable_registries[registry]: + continue + + args = {"_raw_params": self.skopeo_img_check_command.format(registry=registry, image=image)} + result = self.execute_module_with_retries("command", args) + if result.get("rc", 0) == 0 and not result.get("failed"): return True + if result.get("rc") == 124: # RC 124 == timed out; mark unreachable + self.reachable_registries[registry] = False return False - def is_available_skopeo_image(self, image, registry, task_vars): - """Uses Skopeo to determine if required image exists in a given registry.""" - - cmd_str = "skopeo inspect docker://{registry}/{image}".format( - registry=registry, - image=image, - ) - - args = { - "name": "skopeo_inspect", - "image": self.skopeo_image, - "command": cmd_str, - "detach": False, - "cleanup": True, - } - result = self.module_executor("docker_container", args, task_vars) - return result.get("failed", False) - - def containerized_docker_images(self, base_name, version): - return [ - "{image}:{version}".format(image=self.image_from_base_name(base_name), version=version) - ] - - @staticmethod - def rpm_docker_images(base, version): - return [ - "{image_repo}/registry-console:{version}".format(image_repo=base["repo"], version=version) - ] - - @staticmethod - def qualified_docker_images(image_name, version): - return [ - "{}-{}:{}".format(image_name, component, version) - for component in "haproxy-router docker-registry deployer pod".split() - ] - - @staticmethod - def image_from_base_name(base): - return "".join([base["repo"], "/", base["image"]]) - - # ensures that the skopeo docker image exists, and updates it - # with latest if image was already present locally. - def update_skopeo_image(self, task_vars): - result = self.module_executor("docker_image", {"name": self.skopeo_image}, task_vars) - return result.get("msg", ""), result.get("failed", False), result.get("changed", False) + def connect_to_registry(self, registry): + """Use ansible wait_for module to test connectivity from host to registry. Returns bool.""" + # test a simple TCP connection + host, _, port = registry.partition(":") + port = port or 443 + args = dict(host=host, port=port, state="started", timeout=30) + result = self.execute_module("wait_for", args) + return result.get("rc", 0) == 0 and not result.get("failed") |