diff options
Diffstat (limited to 'roles/openshift_health_checker/openshift_checks')
4 files changed, 80 insertions, 9 deletions
diff --git a/roles/openshift_health_checker/openshift_checks/__init__.py b/roles/openshift_health_checker/openshift_checks/__init__.py index 28cb53cc5..ce05b44a4 100644 --- a/roles/openshift_health_checker/openshift_checks/__init__.py +++ b/roles/openshift_health_checker/openshift_checks/__init__.py @@ -13,6 +13,7 @@ from importlib import import_module from ansible.module_utils import six from ansible.module_utils.six.moves import reduce # pylint: disable=import-error,redefined-builtin +from ansible.module_utils.six import string_types from ansible.plugins.filter.core import to_bool as ansible_to_bool @@ -110,6 +111,11 @@ class OpenShiftCheck(object): """Returns true if this check applies to the ansible-playbook run.""" return True + def is_first_master(self): + """Determine if running on first master. Returns: bool""" + masters = self.get_var("groups", "oo_first_master", default=None) or [None] + return masters[0] == self.get_var("ansible_host") + @abstractmethod def run(self): """Executes a check against a host and returns a result hash similar to Ansible modules. @@ -283,6 +289,17 @@ class OpenShiftCheck(object): )) @staticmethod + def normalize(name_list): + """Return a clean list of names. + + The input may be a comma-separated string or a sequence. Leading and + trailing whitespace characters are removed. Empty items are discarded. + """ + if isinstance(name_list, string_types): + name_list = name_list.split(',') + return [name.strip() for name in name_list if name.strip()] + + @staticmethod def get_major_minor_version(openshift_image_tag): """Parse and return the deployed version of OpenShift as a tuple.""" if openshift_image_tag and openshift_image_tag[0] == 'v': diff --git a/roles/openshift_health_checker/openshift_checks/diagnostics.py b/roles/openshift_health_checker/openshift_checks/diagnostics.py new file mode 100644 index 000000000..1cfdc1129 --- /dev/null +++ b/roles/openshift_health_checker/openshift_checks/diagnostics.py @@ -0,0 +1,62 @@ +""" +A check to run relevant diagnostics via `oc adm diagnostics`. +""" + +import os + +from openshift_checks import OpenShiftCheck, OpenShiftCheckException + + +DIAGNOSTIC_LIST = ( + "AggregatedLogging ClusterRegistry ClusterRoleBindings ClusterRoles " + "ClusterRouter DiagnosticPod NetworkCheck" +).split() + + +class DiagnosticCheck(OpenShiftCheck): + """A check to run relevant diagnostics via `oc adm diagnostics`.""" + + name = "diagnostics" + tags = ["health"] + + def is_active(self): + return super(DiagnosticCheck, self).is_active() and self.is_first_master() + + def run(self): + if self.exec_diagnostic("ConfigContexts"): + # only run the other diagnostics if that one succeeds (otherwise, all will fail) + diagnostics = self.get_var("openshift_check_diagnostics", default=DIAGNOSTIC_LIST) + for diagnostic in self.normalize(diagnostics): + self.exec_diagnostic(diagnostic) + return {} + + def exec_diagnostic(self, diagnostic): + """ + Execute an 'oc adm diagnostics' command on the remote host. + Raises OcNotFound or registers OcDiagFailed. + Returns True on success or False on failure (non-zero rc). + """ + config_base = self.get_var("openshift.common.config_base") + args = { + "config_file": os.path.join(config_base, "master", "admin.kubeconfig"), + "cmd": "adm diagnostics", + "extra_args": [diagnostic], + } + + result = self.execute_module("ocutil", args, save_as_name=diagnostic + ".failure.json") + self.register_file(diagnostic + ".txt", result['result']) + if result.get("failed"): + if result['result'] == '[Errno 2] No such file or directory': + raise OpenShiftCheckException( + "OcNotFound", + "This host is supposed to be a master but does not have the `oc` command where expected.\n" + "Has an installation been run on this host yet?" + ) + + self.register_failure(OpenShiftCheckException( + 'OcDiagFailed', + 'The {diag} diagnostic reported an error:\n' + '{error}'.format(diag=diagnostic, error=result['result']) + )) + return False + return True diff --git a/roles/openshift_health_checker/openshift_checks/etcd_volume.py b/roles/openshift_health_checker/openshift_checks/etcd_volume.py index e5d93ff3f..79955cb2f 100644 --- a/roles/openshift_health_checker/openshift_checks/etcd_volume.py +++ b/roles/openshift_health_checker/openshift_checks/etcd_volume.py @@ -16,7 +16,7 @@ class EtcdVolume(OpenShiftCheck): def is_active(self): etcd_hosts = self.get_var("groups", "etcd", default=[]) or self.get_var("groups", "masters", default=[]) or [] - is_etcd_host = self.get_var("ansible_ssh_host") in etcd_hosts + is_etcd_host = self.get_var("ansible_host") in etcd_hosts return super(EtcdVolume, self).is_active() and is_etcd_host def run(self): diff --git a/roles/openshift_health_checker/openshift_checks/logging/logging.py b/roles/openshift_health_checker/openshift_checks/logging/logging.py index 06bdfebf6..05ba73ca1 100644 --- a/roles/openshift_health_checker/openshift_checks/logging/logging.py +++ b/roles/openshift_health_checker/openshift_checks/logging/logging.py @@ -30,14 +30,6 @@ class LoggingCheck(OpenShiftCheck): logging_deployed = self.get_var("openshift_hosted_logging_deploy", convert=bool, default=False) return logging_deployed and super(LoggingCheck, self).is_active() and self.is_first_master() - def is_first_master(self): - """Determine if running on first master. Returns: bool""" - # Note: It would be nice to use membership in oo_first_master group, however for now it - # seems best to avoid requiring that setup and just check this is the first master. - hostname = self.get_var("ansible_ssh_host") or [None] - masters = self.get_var("groups", "masters", default=None) or [None] - return masters[0] == hostname - def run(self): return {} |