diff options
Diffstat (limited to 'roles/openshift_health_checker/openshift_checks/etcd_volume.py')
-rw-r--r-- | roles/openshift_health_checker/openshift_checks/etcd_volume.py | 58 |
1 files changed, 58 insertions, 0 deletions
diff --git a/roles/openshift_health_checker/openshift_checks/etcd_volume.py b/roles/openshift_health_checker/openshift_checks/etcd_volume.py new file mode 100644 index 000000000..7452c9cc1 --- /dev/null +++ b/roles/openshift_health_checker/openshift_checks/etcd_volume.py @@ -0,0 +1,58 @@ +"""A health check for OpenShift clusters.""" + +from openshift_checks import OpenShiftCheck, OpenShiftCheckException, get_var + + +class EtcdVolume(OpenShiftCheck): + """Ensures etcd storage usage does not exceed a given threshold.""" + + name = "etcd_volume" + tags = ["etcd", "health"] + + # Default device usage threshold. Value should be in the range [0, 100]. + default_threshold_percent = 90 + # Where to find ectd data, higher priority first. + supported_mount_paths = ["/var/lib/etcd", "/var/lib", "/var", "/"] + + @classmethod + def is_active(cls, task_vars): + etcd_hosts = get_var(task_vars, "groups", "etcd", default=[]) or get_var(task_vars, "groups", "masters", + default=[]) or [] + is_etcd_host = get_var(task_vars, "ansible_ssh_host") in etcd_hosts + return super(EtcdVolume, cls).is_active(task_vars) and is_etcd_host + + def run(self, tmp, task_vars): + mount_info = self._etcd_mount_info(task_vars) + available = mount_info["size_available"] + total = mount_info["size_total"] + used = total - available + + threshold = get_var( + task_vars, + "etcd_device_usage_threshold_percent", + default=self.default_threshold_percent + ) + + used_percent = 100.0 * used / total + + if used_percent > threshold: + device = mount_info.get("device", "unknown") + mount = mount_info.get("mount", "unknown") + msg = "etcd storage usage ({:.1f}%) is above threshold ({:.1f}%). Device: {}, mount: {}.".format( + used_percent, threshold, device, mount + ) + return {"failed": True, "msg": msg} + + return {"changed": False} + + def _etcd_mount_info(self, task_vars): + ansible_mounts = get_var(task_vars, "ansible_mounts") + mounts = {mnt.get("mount"): mnt for mnt in ansible_mounts} + + for path in self.supported_mount_paths: + if path in mounts: + return mounts[path] + + paths = ', '.join(sorted(mounts)) or 'none' + msg = "Unable to find etcd storage mount point. Paths mounted: {}.".format(paths) + raise OpenShiftCheckException(msg) |