diff options
Diffstat (limited to 'roles/openshift_health_checker/openshift_checks')
10 files changed, 104 insertions, 75 deletions
diff --git a/roles/openshift_health_checker/openshift_checks/disk_availability.py b/roles/openshift_health_checker/openshift_checks/disk_availability.py index cdf56e959..7956559c6 100644 --- a/roles/openshift_health_checker/openshift_checks/disk_availability.py +++ b/roles/openshift_health_checker/openshift_checks/disk_availability.py @@ -15,31 +15,31 @@ class DiskAvailability(OpenShiftCheck): # https://docs.openshift.org/latest/install_config/install/prerequisites.html#system-requirements recommended_disk_space_bytes = { '/var': { - 'masters': 40 * 10**9, - 'nodes': 15 * 10**9, - 'etcd': 20 * 10**9, + 'oo_masters_to_config': 40 * 10**9, + 'oo_nodes_to_config': 15 * 10**9, + 'oo_etcd_to_config': 20 * 10**9, }, # Used to copy client binaries into, # see roles/openshift_cli/library/openshift_container_binary_sync.py. '/usr/local/bin': { - 'masters': 1 * 10**9, - 'nodes': 1 * 10**9, - 'etcd': 1 * 10**9, + 'oo_masters_to_config': 1 * 10**9, + 'oo_nodes_to_config': 1 * 10**9, + 'oo_etcd_to_config': 1 * 10**9, }, # Used as temporary storage in several cases. tempfile.gettempdir(): { - 'masters': 1 * 10**9, - 'nodes': 1 * 10**9, - 'etcd': 1 * 10**9, + 'oo_masters_to_config': 1 * 10**9, + 'oo_nodes_to_config': 1 * 10**9, + 'oo_etcd_to_config': 1 * 10**9, }, } # recommended disk space for each location under an upgrade context recommended_disk_upgrade_bytes = { '/var': { - 'masters': 10 * 10**9, - 'nodes': 5 * 10 ** 9, - 'etcd': 5 * 10 ** 9, + 'oo_masters_to_config': 10 * 10**9, + 'oo_nodes_to_config': 5 * 10 ** 9, + 'oo_etcd_to_config': 5 * 10 ** 9, }, } @@ -61,9 +61,9 @@ class DiskAvailability(OpenShiftCheck): number = float(user_config) user_config = { '/var': { - 'masters': number, - 'nodes': number, - 'etcd': number, + 'oo_masters_to_config': number, + 'oo_nodes_to_config': number, + 'oo_etcd_to_config': number, }, } except TypeError: diff --git a/roles/openshift_health_checker/openshift_checks/docker_image_availability.py b/roles/openshift_health_checker/openshift_checks/docker_image_availability.py index fa07c1dde..7c8ac78fe 100644 --- a/roles/openshift_health_checker/openshift_checks/docker_image_availability.py +++ b/roles/openshift_health_checker/openshift_checks/docker_image_availability.py @@ -1,5 +1,6 @@ """Check that required Docker images are available.""" +from pipes import quote from ansible.module_utils import six from openshift_checks import OpenShiftCheck from openshift_checks.mixins import DockerHostMixin @@ -33,10 +34,39 @@ class DockerImageAvailability(DockerHostMixin, OpenShiftCheck): # we use python-docker-py to check local docker for images, and skopeo # to look for images available remotely without waiting to pull them. dependencies = ["python-docker-py", "skopeo"] - skopeo_img_check_command = "timeout 10 skopeo inspect --tls-verify=false docker://{registry}/{image}" + # command for checking if remote registries have an image, without docker pull + skopeo_command = "timeout 10 skopeo inspect --tls-verify={tls} {creds} docker://{registry}/{image}" + skopeo_example_command = "skopeo inspect [--tls-verify=false] [--creds=<user>:<pass>] docker://<registry>/<image>" def __init__(self, *args, **kwargs): super(DockerImageAvailability, self).__init__(*args, **kwargs) + + self.registries = dict( + # set of registries that need to be checked insecurely (note: not accounting for CIDR entries) + insecure=set(self.ensure_list("openshift_docker_insecure_registries")), + # set of registries that should never be queried even if given in the image + blocked=set(self.ensure_list("openshift_docker_blocked_registries")), + ) + + # ordered list of registries (according to inventory vars) that docker will try for unscoped images + regs = self.ensure_list("openshift_docker_additional_registries") + # currently one of these registries is added whether the user wants it or not. + deployment_type = self.get_var("openshift_deployment_type") + if deployment_type == "origin" and "docker.io" not in regs: + regs.append("docker.io") + elif deployment_type == 'openshift-enterprise' and "registry.access.redhat.com" not in regs: + regs.append("registry.access.redhat.com") + self.registries["configured"] = regs + + # for the oreg_url registry there may be credentials specified + components = self.get_var("oreg_url", default="").split('/') + self.registries["oreg"] = "" if len(components) < 3 else components[0] + self.skopeo_command_creds = "" + oreg_auth_user = self.get_var('oreg_auth_user', default='') + oreg_auth_password = self.get_var('oreg_auth_password', default='') + if oreg_auth_user != '' and oreg_auth_password != '': + self.skopeo_command_creds = "--creds={}:{}".format(quote(oreg_auth_user), quote(oreg_auth_password)) + # record whether we could reach a registry or not (and remember results) self.reachable_registries = {} @@ -62,26 +92,25 @@ class DockerImageAvailability(DockerHostMixin, OpenShiftCheck): if not missing_images: return {} - registries = self.known_docker_registries() - if not registries: - return {"failed": True, "msg": "Unable to retrieve any docker registries."} - - available_images = self.available_images(missing_images, registries) + available_images = self.available_images(missing_images) unavailable_images = set(missing_images) - set(available_images) if unavailable_images: - registries = [ - reg if self.reachable_registries.get(reg, True) else reg + " (unreachable)" - for reg in registries - ] + unreachable = [reg for reg, reachable in self.reachable_registries.items() if not reachable] + unreachable_msg = "Failed connecting to: {}\n".format(", ".join(unreachable)) + blocked_msg = "Blocked registries: {}\n".format(", ".join(self.registries["blocked"])) msg = ( - "One or more required Docker images are not available:\n {}\n" - "Configured registries: {}\n" - "Checked by: {}" + "One or more required container images are not available:\n {missing}\n" + "Checked with: {cmd}\n" + "Default registries searched: {registries}\n" + "{blocked}" + "{unreachable}" ).format( - ",\n ".join(sorted(unavailable_images)), - ", ".join(registries), - self.skopeo_img_check_command + missing=",\n ".join(sorted(unavailable_images)), + cmd=self.skopeo_example_command, + registries=", ".join(self.registries["configured"]), + blocked=blocked_msg if self.registries["blocked"] else "", + unreachable=unreachable_msg if unreachable else "", ) return dict(failed=True, msg=msg) @@ -114,7 +143,7 @@ class DockerImageAvailability(DockerHostMixin, OpenShiftCheck): # template for images that run on top of OpenShift image_url = "{}/{}-{}:{}".format(image_info["namespace"], image_info["name"], "${component}", "${version}") image_url = self.get_var("oreg_url", default="") or image_url - if 'nodes' in host_groups: + if 'oo_nodes_to_config' in host_groups: for suffix in NODE_IMAGE_SUFFIXES: required.add(image_url.replace("${component}", suffix).replace("${version}", image_tag)) # The registry-console is for some reason not prefixed with ose- like the other components. @@ -125,24 +154,23 @@ class DockerImageAvailability(DockerHostMixin, OpenShiftCheck): # images for containerized components if self.get_var("openshift", "common", "is_containerized"): components = set() - if 'nodes' in host_groups: + if 'oo_nodes_to_config' in host_groups: components.update(["node", "openvswitch"]) - if 'masters' in host_groups: # name is "origin" or "ose" + if 'oo_masters_to_config' in host_groups: # name is "origin" or "ose" components.add(image_info["name"]) for component in components: required.add("{}/{}:{}".format(image_info["namespace"], component, image_tag)) - if 'etcd' in host_groups: # special case, note it is the same for origin/enterprise + if 'oo_etcd_to_config' in host_groups: # special case, note it is the same for origin/enterprise required.add("registry.access.redhat.com/rhel7/etcd") # and no image tag return required def local_images(self, images): """Filter a list of images and return those available locally.""" - registries = self.known_docker_registries() found_images = [] for image in images: # docker could have the image name as-is or prefixed with any registry - imglist = [image] + [reg + "/" + image for reg in registries] + imglist = [image] + [reg + "/" + image for reg in self.registries["configured"]] if self.is_image_local(imglist): found_images.append(image) return found_images @@ -152,37 +180,27 @@ class DockerImageAvailability(DockerHostMixin, OpenShiftCheck): result = self.execute_module("docker_image_facts", {"name": image}) return bool(result.get("images")) and not result.get("failed") - def known_docker_registries(self): - """Build a list of docker registries available according to inventory vars.""" - regs = self.get_var("openshift_docker_additional_registries", default=[]) + def ensure_list(self, registry_param): + """Return the task var as a list.""" # https://bugzilla.redhat.com/show_bug.cgi?id=1497274 - # if the result was a string type, place it into a list. We must do this + # If the result was a string type, place it into a list. We must do this # as using list() on a string will split the string into its characters. - if isinstance(regs, six.string_types): - regs = [regs] - else: - # Otherwise cast to a list as was done previously - regs = list(regs) + # Otherwise cast to a list as was done previously. + registry = self.get_var(registry_param, default=[]) + if not isinstance(registry, six.string_types): + return list(registry) + return self.normalize(registry) - deployment_type = self.get_var("openshift_deployment_type") - if deployment_type == "origin" and "docker.io" not in regs: - regs.append("docker.io") - elif deployment_type == 'openshift-enterprise' and "registry.access.redhat.com" not in regs: - regs.append("registry.access.redhat.com") - - return regs - - def available_images(self, images, default_registries): + def available_images(self, images): """Search remotely for images. Returns: list of images found.""" return [ image for image in images - if self.is_available_skopeo_image(image, default_registries) + if self.is_available_skopeo_image(image) ] - def is_available_skopeo_image(self, image, default_registries): + def is_available_skopeo_image(self, image): """Use Skopeo to determine if required image exists in known registry(s).""" - registries = default_registries - + registries = self.registries["configured"] # If image already includes a registry, only use that. # NOTE: This logic would incorrectly identify images that do not use a namespace, e.g. # registry.access.redhat.com/rhel7 as if the registry were a namespace. @@ -193,13 +211,18 @@ class DockerImageAvailability(DockerHostMixin, OpenShiftCheck): registries = [registry] for registry in registries: + if registry in self.registries["blocked"]: + continue # blocked will never be consulted if registry not in self.reachable_registries: self.reachable_registries[registry] = self.connect_to_registry(registry) if not self.reachable_registries[registry]: - continue + continue # do not keep trying unreachable registries + + args = dict(registry=registry, image=image) + args["tls"] = "false" if registry in self.registries["insecure"] else "true" + args["creds"] = self.skopeo_command_creds if registry == self.registries["oreg"] else "" - args = {"_raw_params": self.skopeo_img_check_command.format(registry=registry, image=image)} - result = self.execute_module_with_retries("command", args) + result = self.execute_module_with_retries("command", {"_raw_params": self.skopeo_command.format(**args)}) if result.get("rc", 0) == 0 and not result.get("failed"): return True if result.get("rc") == 124: # RC 124 == timed out; mark unreachable diff --git a/roles/openshift_health_checker/openshift_checks/etcd_traffic.py b/roles/openshift_health_checker/openshift_checks/etcd_traffic.py index b4c8957e9..8b20ccb49 100644 --- a/roles/openshift_health_checker/openshift_checks/etcd_traffic.py +++ b/roles/openshift_health_checker/openshift_checks/etcd_traffic.py @@ -12,7 +12,7 @@ class EtcdTraffic(OpenShiftCheck): def is_active(self): """Skip hosts that do not have etcd in their group names.""" group_names = self.get_var("group_names", default=[]) - valid_group_names = "etcd" in group_names + valid_group_names = "oo_etcd_to_config" in group_names version = self.get_major_minor_version(self.get_var("openshift_image_tag")) valid_version = version in ((3, 4), (3, 5)) diff --git a/roles/openshift_health_checker/openshift_checks/etcd_volume.py b/roles/openshift_health_checker/openshift_checks/etcd_volume.py index 79955cb2f..3d75da6f9 100644 --- a/roles/openshift_health_checker/openshift_checks/etcd_volume.py +++ b/roles/openshift_health_checker/openshift_checks/etcd_volume.py @@ -15,7 +15,11 @@ class EtcdVolume(OpenShiftCheck): etcd_mount_path = "/var/lib/etcd" def is_active(self): - etcd_hosts = self.get_var("groups", "etcd", default=[]) or self.get_var("groups", "masters", default=[]) or [] + etcd_hosts = ( + self.get_var("groups", "oo_etcd_to_config", default=[]) or + self.get_var("groups", "oo_masters_to_config", default=[]) or + [] + ) is_etcd_host = self.get_var("ansible_host") in etcd_hosts return super(EtcdVolume, self).is_active() and is_etcd_host diff --git a/roles/openshift_health_checker/openshift_checks/logging/fluentd_config.py b/roles/openshift_health_checker/openshift_checks/logging/fluentd_config.py index d783e6760..e93cc9028 100644 --- a/roles/openshift_health_checker/openshift_checks/logging/fluentd_config.py +++ b/roles/openshift_health_checker/openshift_checks/logging/fluentd_config.py @@ -46,7 +46,7 @@ class FluentdConfig(LoggingCheck): # if check is running on a master, retrieve all running pods # and check any pod's container for the env var "USE_JOURNAL" group_names = self.get_var("group_names") - if "masters" in group_names: + if "oo_masters_to_config" in group_names: use_journald = self.check_fluentd_env_var() docker_info = self.execute_module("docker_info", {}) diff --git a/roles/openshift_health_checker/openshift_checks/memory_availability.py b/roles/openshift_health_checker/openshift_checks/memory_availability.py index 765ba072d..e7a8ec976 100644 --- a/roles/openshift_health_checker/openshift_checks/memory_availability.py +++ b/roles/openshift_health_checker/openshift_checks/memory_availability.py @@ -14,9 +14,9 @@ class MemoryAvailability(OpenShiftCheck): # Values taken from the official installation documentation: # https://docs.openshift.org/latest/install_config/install/prerequisites.html#system-requirements recommended_memory_bytes = { - "masters": 16 * GIB, - "nodes": 8 * GIB, - "etcd": 8 * GIB, + "oo_masters_to_config": 16 * GIB, + "oo_nodes_to_config": 8 * GIB, + "oo_etcd_to_config": 8 * GIB, } # https://access.redhat.com/solutions/3006511 physical RAM is partly reserved from memtotal memtotal_adjustment = 1 * GIB diff --git a/roles/openshift_health_checker/openshift_checks/mixins.py b/roles/openshift_health_checker/openshift_checks/mixins.py index b90ebf6dd..cfbdea303 100644 --- a/roles/openshift_health_checker/openshift_checks/mixins.py +++ b/roles/openshift_health_checker/openshift_checks/mixins.py @@ -21,9 +21,11 @@ class DockerHostMixin(object): def is_active(self): """Only run on hosts that depend on Docker.""" - is_containerized = self.get_var("openshift", "common", "is_containerized") - is_node = "nodes" in self.get_var("group_names", default=[]) - return super(DockerHostMixin, self).is_active() and (is_containerized or is_node) + group_names = set(self.get_var("group_names", default=[])) + needs_docker = set(["oo_nodes_to_config"]) + if self.get_var("openshift.common.is_containerized"): + needs_docker.update(["oo_masters_to_config", "oo_etcd_to_config"]) + return super(DockerHostMixin, self).is_active() and bool(group_names.intersection(needs_docker)) def ensure_dependencies(self): """ diff --git a/roles/openshift_health_checker/openshift_checks/ovs_version.py b/roles/openshift_health_checker/openshift_checks/ovs_version.py index 363c12def..416805c4d 100644 --- a/roles/openshift_health_checker/openshift_checks/ovs_version.py +++ b/roles/openshift_health_checker/openshift_checks/ovs_version.py @@ -24,7 +24,7 @@ class OvsVersion(NotContainerizedMixin, OpenShiftCheck): def is_active(self): """Skip hosts that do not have package requirements.""" group_names = self.get_var("group_names", default=[]) - master_or_node = 'masters' in group_names or 'nodes' in group_names + master_or_node = 'oo_masters_to_config' in group_names or 'oo_nodes_to_config' in group_names return super(OvsVersion, self).is_active() and master_or_node def run(self): diff --git a/roles/openshift_health_checker/openshift_checks/package_availability.py b/roles/openshift_health_checker/openshift_checks/package_availability.py index 21355c2f0..090e438ff 100644 --- a/roles/openshift_health_checker/openshift_checks/package_availability.py +++ b/roles/openshift_health_checker/openshift_checks/package_availability.py @@ -20,9 +20,9 @@ class PackageAvailability(NotContainerizedMixin, OpenShiftCheck): packages = set() - if "masters" in group_names: + if "oo_masters_to_config" in group_names: packages.update(self.master_packages(rpm_prefix)) - if "nodes" in group_names: + if "oo_nodes_to_config" in group_names: packages.update(self.node_packages(rpm_prefix)) args = {"packages": sorted(set(packages))} diff --git a/roles/openshift_health_checker/openshift_checks/package_version.py b/roles/openshift_health_checker/openshift_checks/package_version.py index d4aec3ed8..2f09b22fc 100644 --- a/roles/openshift_health_checker/openshift_checks/package_version.py +++ b/roles/openshift_health_checker/openshift_checks/package_version.py @@ -36,7 +36,7 @@ class PackageVersion(NotContainerizedMixin, OpenShiftCheck): def is_active(self): """Skip hosts that do not have package requirements.""" group_names = self.get_var("group_names", default=[]) - master_or_node = 'masters' in group_names or 'nodes' in group_names + master_or_node = 'oo_masters_to_config' in group_names or 'oo_nodes_to_config' in group_names return super(PackageVersion, self).is_active() and master_or_node def run(self): |