diff options
Diffstat (limited to 'roles/openshift_health_checker/openshift_checks/logging/kibana.py')
-rw-r--r-- | roles/openshift_health_checker/openshift_checks/logging/kibana.py | 226 |
1 files changed, 226 insertions, 0 deletions
diff --git a/roles/openshift_health_checker/openshift_checks/logging/kibana.py b/roles/openshift_health_checker/openshift_checks/logging/kibana.py new file mode 100644 index 000000000..3b1cf8baa --- /dev/null +++ b/roles/openshift_health_checker/openshift_checks/logging/kibana.py @@ -0,0 +1,226 @@ +""" +Module for performing checks on a Kibana logging deployment +""" + +import json +import ssl + +try: + from urllib2 import HTTPError, URLError + import urllib2 +except ImportError: + from urllib.error import HTTPError, URLError + import urllib.request as urllib2 + +from openshift_checks.logging.logging import LoggingCheck, OpenShiftCheckException + + +class Kibana(LoggingCheck): + """Module that checks an integrated logging Kibana deployment""" + + name = "kibana" + tags = ["health", "logging"] + + def run(self): + """Check various things and gather errors. Returns: result as hash""" + + kibana_pods = self.get_pods_for_component("kibana") + self.check_kibana(kibana_pods) + self.check_kibana_route() + # TODO(lmeyer): run it all again for the ops cluster + + return {} + + def _verify_url_internal(self, url): + """ + Try to reach a URL from the host. + Returns: success (bool), reason (for failure) + """ + args = dict( + url=url, + follow_redirects='none', + validate_certs='no', # likely to be signed with internal CA + # TODO(lmeyer): give users option to validate certs + status_code=302, + ) + result = self.execute_module('uri', args) + if result.get('failed'): + return result['msg'] + return None + + @staticmethod + def _verify_url_external(url): + """ + Try to reach a URL from ansible control host. + Raise an OpenShiftCheckException if anything goes wrong. + """ + # This actually checks from the ansible control host, which may or may not + # really be "external" to the cluster. + + # Disable SSL cert validation to work around internally signed certs + ctx = ssl.create_default_context() + ctx.check_hostname = False # or setting CERT_NONE is refused + ctx.verify_mode = ssl.CERT_NONE + + # Verify that the url is returning a valid response + try: + # We only care if the url connects and responds + return_code = urllib2.urlopen(url, context=ctx).getcode() + except HTTPError as httperr: + return httperr.reason + except URLError as urlerr: + return str(urlerr) + + # there appears to be no way to prevent urlopen from following redirects + if return_code != 200: + return 'Expected success (200) but got return code {}'.format(int(return_code)) + + return None + + def check_kibana(self, pods): + """Check to see if Kibana is up and working. Raises OpenShiftCheckException if not.""" + + if not pods: + raise OpenShiftCheckException( + "MissingComponentPods", + "There are no Kibana pods deployed, so no access to the logging UI." + ) + + not_running = self.not_running_pods(pods) + if len(not_running) == len(pods): + raise OpenShiftCheckException( + "NoRunningPods", + "No Kibana pod is in a running state, so there is no access to the logging UI." + ) + elif not_running: + raise OpenShiftCheckException( + "PodNotRunning", + "The following Kibana pods are not currently in a running state:\n" + " {pods}\n" + "However at least one is, so service may not be impacted.".format( + pods="\n ".join(pod['metadata']['name'] for pod in not_running) + ) + ) + + def _get_kibana_url(self): + """ + Get kibana route or report error. + Returns: url + """ + + # Get logging url + get_route = self.exec_oc("get route logging-kibana -o json", []) + if not get_route: + raise OpenShiftCheckException( + 'no_route_exists', + 'No route is defined for Kibana in the logging namespace,\n' + 'so the logging stack is not accessible. Is logging deployed?\n' + 'Did something remove the logging-kibana route?' + ) + + try: + route = json.loads(get_route) + # check that the route has been accepted by a router + ingress = route["status"]["ingress"] + except (ValueError, KeyError): + raise OpenShiftCheckException( + 'get_route_failed', + '"oc get route" returned an unexpected response:\n' + get_route + ) + + # ingress can be null if there is no router, or empty if not routed + if not ingress or not ingress[0]: + raise OpenShiftCheckException( + 'route_not_accepted', + 'The logging-kibana route is not being routed by any router.\n' + 'Is the router deployed and working?' + ) + + host = route.get("spec", {}).get("host") + if not host: + raise OpenShiftCheckException( + 'route_missing_host', + 'The logging-kibana route has no hostname defined,\n' + 'which should never happen. Did something alter its definition?' + ) + + return 'https://{}/'.format(host) + + def check_kibana_route(self): + """ + Check to see if kibana route is up and working. + Raises exception if not. + """ + + kibana_url = self._get_kibana_url() + + # first, check that kibana is reachable from the master. + error = self._verify_url_internal(kibana_url) + if error: + if 'urlopen error [Errno 111] Connection refused' in error: + raise OpenShiftCheckException( + 'FailedToConnectInternal', + 'Failed to connect from this master to Kibana URL {url}\n' + 'Is kibana running, and is at least one router routing to it?'.format(url=kibana_url) + ) + elif 'urlopen error [Errno -2] Name or service not known' in error: + raise OpenShiftCheckException( + 'FailedToResolveInternal', + 'Failed to connect from this master to Kibana URL {url}\n' + 'because the hostname does not resolve.\n' + 'Is DNS configured for the Kibana hostname?'.format(url=kibana_url) + ) + elif 'Status code was not' in error: + raise OpenShiftCheckException( + 'WrongReturnCodeInternal', + 'A request from this master to the Kibana URL {url}\n' + 'did not return the correct status code (302).\n' + 'This could mean that Kibana is malfunctioning, the hostname is\n' + 'resolving incorrectly, or other network issues. The output was:\n' + ' {error}'.format(url=kibana_url, error=error) + ) + raise OpenShiftCheckException( + 'MiscRouteErrorInternal', + 'Error validating the logging Kibana route internally:\n' + error + ) + + # in production we would like the kibana route to work from outside the + # cluster too; but that may not be the case, so allow disabling just this part. + if self.get_var("openshift_check_efk_kibana_external", default="True").lower() != "true": + return + error = self._verify_url_external(kibana_url) + + if not error: + return + + error_fmt = ( + 'Error validating the logging Kibana route:\n{error}\n' + 'To disable external Kibana route validation, set the variable:\n' + ' openshift_check_efk_kibana_external=False' + ) + if 'urlopen error [Errno 111] Connection refused' in error: + msg = ( + 'Failed to connect from the Ansible control host to Kibana URL {url}\n' + 'Is the router for the Kibana hostname exposed externally?' + ).format(url=kibana_url) + raise OpenShiftCheckException('FailedToConnect', error_fmt.format(error=msg)) + elif 'urlopen error [Errno -2] Name or service not known' in error: + msg = ( + 'Failed to resolve the Kibana hostname in {url}\n' + 'from the Ansible control host.\n' + 'Is DNS configured to resolve this Kibana hostname externally?' + ).format(url=kibana_url) + raise OpenShiftCheckException('FailedToResolve', error_fmt.format(error=msg)) + elif 'Expected success (200)' in error: + msg = ( + 'A request to Kibana at {url}\n' + 'returned the wrong error code:\n' + ' {error}\n' + 'This could mean that Kibana is malfunctioning, the hostname is\n' + 'resolving incorrectly, or other network issues.' + ).format(url=kibana_url, error=error) + raise OpenShiftCheckException('WrongReturnCode', error_fmt.format(error=msg)) + raise OpenShiftCheckException( + 'MiscRouteError', + 'Error validating the logging Kibana route externally:\n' + error + ) |