diff options
Diffstat (limited to 'roles/openshift_health_checker/openshift_checks')
5 files changed, 93 insertions, 44 deletions
diff --git a/roles/openshift_health_checker/openshift_checks/disk_availability.py b/roles/openshift_health_checker/openshift_checks/disk_availability.py index 962148cb8..e93e81efa 100644 --- a/roles/openshift_health_checker/openshift_checks/disk_availability.py +++ b/roles/openshift_health_checker/openshift_checks/disk_availability.py @@ -1,9 +1,12 @@ -# pylint: disable=missing-docstring +"""Check that there is enough disk space in predefined paths.""" + +import os.path +import tempfile + from openshift_checks import OpenShiftCheck, OpenShiftCheckException, get_var -from openshift_checks.mixins import NotContainerizedMixin -class DiskAvailability(NotContainerizedMixin, OpenShiftCheck): +class DiskAvailability(OpenShiftCheck): """Check that recommended disk space is available before a first-time install.""" name = "disk_availability" @@ -12,56 +15,101 @@ class DiskAvailability(NotContainerizedMixin, OpenShiftCheck): # Values taken from the official installation documentation: # https://docs.openshift.org/latest/install_config/install/prerequisites.html#system-requirements recommended_disk_space_bytes = { - "masters": 40 * 10**9, - "nodes": 15 * 10**9, - "etcd": 20 * 10**9, + '/var': { + 'masters': 40 * 10**9, + 'nodes': 15 * 10**9, + 'etcd': 20 * 10**9, + }, + # Used to copy client binaries into, + # see roles/openshift_cli/library/openshift_container_binary_sync.py. + '/usr/local/bin': { + 'masters': 1 * 10**9, + 'nodes': 1 * 10**9, + 'etcd': 1 * 10**9, + }, + # Used as temporary storage in several cases. + tempfile.gettempdir(): { + 'masters': 1 * 10**9, + 'nodes': 1 * 10**9, + 'etcd': 1 * 10**9, + }, } @classmethod def is_active(cls, task_vars): """Skip hosts that do not have recommended disk space requirements.""" group_names = get_var(task_vars, "group_names", default=[]) - has_disk_space_recommendation = bool(set(group_names).intersection(cls.recommended_disk_space_bytes)) + active_groups = set() + for recommendation in cls.recommended_disk_space_bytes.values(): + active_groups.update(recommendation.keys()) + has_disk_space_recommendation = bool(active_groups.intersection(group_names)) return super(DiskAvailability, cls).is_active(task_vars) and has_disk_space_recommendation def run(self, tmp, task_vars): group_names = get_var(task_vars, "group_names") ansible_mounts = get_var(task_vars, "ansible_mounts") - free_bytes = self.openshift_available_disk(ansible_mounts) - - recommended_min = max(self.recommended_disk_space_bytes.get(name, 0) for name in group_names) - configured_min = int(get_var(task_vars, "openshift_check_min_host_disk_gb", default=0)) * 10**9 - min_free_bytes = configured_min or recommended_min - - if free_bytes < min_free_bytes: - return { - 'failed': True, - 'msg': ( - 'Available disk space ({:.1f} GB) for the volume containing ' - '"/var" is below minimum recommended space ({:.1f} GB)' - ).format(float(free_bytes) / 10**9, float(min_free_bytes) / 10**9) + ansible_mounts = {mount['mount']: mount for mount in ansible_mounts} + + user_config = get_var(task_vars, "openshift_check_min_host_disk_gb", default={}) + try: + # For backwards-compatibility, if openshift_check_min_host_disk_gb + # is a number, then it overrides the required config for '/var'. + number = float(user_config) + user_config = { + '/var': { + 'masters': number, + 'nodes': number, + 'etcd': number, + }, } + except TypeError: + # If it is not a number, then it should be a nested dict. + pass + + # TODO: as suggested in + # https://github.com/openshift/openshift-ansible/pull/4436#discussion_r122180021, + # maybe we could support checking disk availability in paths that are + # not part of the official recommendation but present in the user + # configuration. + for path, recommendation in self.recommended_disk_space_bytes.items(): + free_bytes = self.free_bytes(path, ansible_mounts) + recommended_bytes = max(recommendation.get(name, 0) for name in group_names) + + config = user_config.get(path, {}) + # NOTE: the user config is in GB, but we compare bytes, thus the + # conversion. + config_bytes = max(config.get(name, 0) for name in group_names) * 10**9 + recommended_bytes = config_bytes or recommended_bytes + + if free_bytes < recommended_bytes: + free_gb = float(free_bytes) / 10**9 + recommended_gb = float(recommended_bytes) / 10**9 + return { + 'failed': True, + 'msg': ( + 'Available disk space in "{}" ({:.1f} GB) ' + 'is below minimum recommended ({:.1f} GB)' + ).format(path, free_gb, recommended_gb) + } return {} @staticmethod - def openshift_available_disk(ansible_mounts): - """Determine the available disk space for an OpenShift installation. - - ansible_mounts should be a list of dicts like the 'setup' Ansible module - returns. - """ - # priority list in descending order - supported_mnt_paths = ["/var", "/"] - available_mnts = {mnt.get("mount"): mnt for mnt in ansible_mounts} + def free_bytes(path, ansible_mounts): + """Return the size available in path based on ansible_mounts.""" + mount_point = path + # arbitry value to prevent an infinite loop, in the unlike case that '/' + # is not in ansible_mounts. + max_depth = 32 + while mount_point not in ansible_mounts and max_depth > 0: + mount_point = os.path.dirname(mount_point) + max_depth -= 1 try: - for path in supported_mnt_paths: - if path in available_mnts: - return available_mnts[path]["size_available"] + free_bytes = ansible_mounts[mount_point]['size_available'] except KeyError: - pass + known_mounts = ', '.join('"{}"'.format(mount) for mount in sorted(ansible_mounts)) or 'none' + msg = 'Unable to determine disk availability for "{}". Known mount points: {}.' + raise OpenShiftCheckException(msg.format(path, known_mounts)) - paths = ''.join(sorted(available_mnts)) or 'none' - msg = "Unable to determine available disk space. Paths mounted: {}.".format(paths) - raise OpenShiftCheckException(msg) + return free_bytes diff --git a/roles/openshift_health_checker/openshift_checks/docker_image_availability.py b/roles/openshift_health_checker/openshift_checks/docker_image_availability.py index 60aacf715..bde81ad2c 100644 --- a/roles/openshift_health_checker/openshift_checks/docker_image_availability.py +++ b/roles/openshift_health_checker/openshift_checks/docker_image_availability.py @@ -94,7 +94,8 @@ class DockerImageAvailability(DockerHostMixin, OpenShiftCheck): required = set() deployment_type = get_var(task_vars, "openshift_deployment_type") host_groups = get_var(task_vars, "group_names") - image_tag = get_var(task_vars, "openshift_image_tag") + # containerized etcd may not have openshift_image_tag, see bz 1466622 + image_tag = get_var(task_vars, "openshift_image_tag", default="latest") image_info = DEPLOYMENT_IMAGE_INFO[deployment_type] if not image_info: return required @@ -169,7 +170,7 @@ class DockerImageAvailability(DockerHostMixin, OpenShiftCheck): registries = [registry] for registry in registries: - args = {"_raw_params": "skopeo inspect docker://{}/{}".format(registry, image)} + args = {"_raw_params": "skopeo inspect --tls-verify=false docker://{}/{}".format(registry, image)} result = self.execute_module("command", args, task_vars=task_vars) if result.get("rc", 0) == 0 and not result.get("failed"): return True diff --git a/roles/openshift_health_checker/openshift_checks/docker_storage.py b/roles/openshift_health_checker/openshift_checks/docker_storage.py index 2bd615457..e80691ef3 100644 --- a/roles/openshift_health_checker/openshift_checks/docker_storage.py +++ b/roles/openshift_health_checker/openshift_checks/docker_storage.py @@ -17,7 +17,7 @@ class DockerStorage(DockerHostMixin, OpenShiftCheck): tags = ["pre-install", "health", "preflight"] dependencies = ["python-docker-py"] - storage_drivers = ["devicemapper", "overlay2"] + storage_drivers = ["devicemapper", "overlay", "overlay2"] max_thinpool_data_usage_percent = 90.0 max_thinpool_meta_usage_percent = 90.0 @@ -143,7 +143,7 @@ class DockerStorage(DockerHostMixin, OpenShiftCheck): "so the available storage in the VG cannot be determined.".format(pool) ) vg_name = match.groups()[0].replace("--", "-") - vgs_cmd = "/sbin/vgs --noheadings -o vg_free --select vg_name=" + vg_name + vgs_cmd = "/sbin/vgs --noheadings -o vg_free --units g --select vg_name=" + vg_name # should return free space like " 12.00g" if the VG exists; empty if it does not ret = self.execute_module("command", {"_raw_params": vgs_cmd}, task_vars=task_vars) diff --git a/roles/openshift_health_checker/openshift_checks/logging/kibana.py b/roles/openshift_health_checker/openshift_checks/logging/kibana.py index 442f407b1..551e8dfa0 100644 --- a/roles/openshift_health_checker/openshift_checks/logging/kibana.py +++ b/roles/openshift_health_checker/openshift_checks/logging/kibana.py @@ -62,7 +62,7 @@ class Kibana(LoggingCheck): # TODO(lmeyer): give users option to validate certs status_code=302, ) - result = self.execute_module('uri', args, task_vars) + result = self.execute_module('uri', args, None, task_vars) if result.get('failed'): return result['msg'] return None diff --git a/roles/openshift_health_checker/openshift_checks/logging/logging.py b/roles/openshift_health_checker/openshift_checks/logging/logging.py index 05b4d300c..6e951e82c 100644 --- a/roles/openshift_health_checker/openshift_checks/logging/logging.py +++ b/roles/openshift_health_checker/openshift_checks/logging/logging.py @@ -54,12 +54,12 @@ class LoggingCheck(OpenShiftCheck): """Returns: list of pods not in a ready and running state""" return [ pod for pod in pods - if any( + if not pod.get("status", {}).get("containerStatuses") or any( container['ready'] is False for container in pod['status']['containerStatuses'] ) or not any( condition['type'] == 'Ready' and condition['status'] == 'True' - for condition in pod['status']['conditions'] + for condition in pod['status'].get('conditions', []) ) ] @@ -78,7 +78,7 @@ class LoggingCheck(OpenShiftCheck): "extra_args": list(extra_args) if extra_args else [], } - result = execute_module("ocutil", args, task_vars) + result = execute_module("ocutil", args, None, task_vars) if result.get("failed"): msg = ( 'Unexpected error using `oc` to validate the logging stack components.\n' |