1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
|
"""A health check for OpenShift clusters."""
from openshift_checks import OpenShiftCheck, OpenShiftCheckException, get_var
class EtcdVolume(OpenShiftCheck):
"""Ensures etcd storage usage does not exceed a given threshold."""
name = "etcd_volume"
tags = ["etcd", "health"]
# pylint: disable=invalid-name
default_etcd_device_usage_threshold_percent = 90
# where to find ectd data, higher priority first.
supported_mount_paths = ["/var/lib/etcd", "/var/lib", "/var", "/"]
@classmethod
def is_active(cls, task_vars):
# TODO: only execute this check on hosts in the 'ectd' group?
# Maybe also 'masters' if there are no standalone etcd hosts?
return super(EtcdVolume, cls).is_active(task_vars)
def run(self, tmp, task_vars):
mount_info = self._etcd_mount_info(task_vars)
available = mount_info["size_available"]
total = mount_info["size_total"]
used = total - available
threshold = get_var(
task_vars,
"etcd_device_usage_threshold_percent",
default=self.default_etcd_device_usage_threshold_percent
)
used_percent = 100.0 * used / total
if used_percent > threshold:
device = mount_info.get("device", "unknown")
mount = mount_info.get("mount", "unknown")
msg = "etcd storage usage ({:.1f}%) is above threshold ({:.1f}%). Device: {}, mount: {}.".format(
used_percent, threshold, device, mount
)
return {"failed": True, "msg": msg}
return {"changed": False}
def _etcd_mount_info(self, task_vars):
ansible_mounts = get_var(task_vars, "ansible_mounts")
mounts = {mnt.get("mount"): mnt for mnt in ansible_mounts}
for path in self.supported_mount_paths:
if path in mounts:
return mounts[path]
paths = ', '.join(sorted(mounts)) or 'none'
msg = "Unable to find etcd storage mount point. Paths mounted: {}.".format(paths)
raise OpenShiftCheckException(msg)
|