diff options
Diffstat (limited to 'roles')
12 files changed, 695 insertions, 45 deletions
diff --git a/roles/ansible_service_broker/defaults/main.yml b/roles/ansible_service_broker/defaults/main.yml index aa1c5022b..12929b354 100644 --- a/roles/ansible_service_broker/defaults/main.yml +++ b/roles/ansible_service_broker/defaults/main.yml @@ -4,6 +4,7 @@ ansible_service_broker_remove: false ansible_service_broker_log_level: info ansible_service_broker_output_request: false ansible_service_broker_recovery: true +ansible_service_broker_bootstrap_on_startup: true # Recommended you do not enable this for now ansible_service_broker_dev_broker: false ansible_service_broker_launch_apb_on_bind: false diff --git a/roles/ansible_service_broker/tasks/install.yml b/roles/ansible_service_broker/tasks/install.yml index 65dffc89b..b3797ef96 100644 --- a/roles/ansible_service_broker/tasks/install.yml +++ b/roles/ansible_service_broker/tasks/install.yml @@ -42,6 +42,14 @@ namespace: openshift-ansible-service-broker state: present +- name: Set SA cluster-role + oc_adm_policy_user: + state: present + namespace: "openshift-ansible-service-broker" + resource_kind: cluster-role + resource_name: admin + user: "system:serviceaccount:openshift-ansible-service-broker:asb" + - name: create ansible-service-broker service oc_service: name: asb @@ -254,6 +262,7 @@ launch_apb_on_bind: {{ ansible_service_broker_launch_apb_on_bind | bool | lower }} recovery: {{ ansible_service_broker_recovery | bool | lower }} output_request: {{ ansible_service_broker_output_request | bool | lower }} + bootstrap_on_startup: {{ ansible_service_broker_bootstrap_on_startup | bool | lower }} - name: Create the Broker resource in the catalog oc_obj: diff --git a/roles/docker/tasks/package_docker.yml b/roles/docker/tasks/package_docker.yml index 5f536005d..bc52ab60c 100644 --- a/roles/docker/tasks/package_docker.yml +++ b/roles/docker/tasks/package_docker.yml @@ -93,7 +93,7 @@ dest: /etc/sysconfig/docker regexp: '^OPTIONS=.*$' line: "OPTIONS='\ - {% if ansible_selinux.status | default(None) == '''enabled''' and docker_selinux_enabled | default(true) %} --selinux-enabled {% endif %}\ + {% if ansible_selinux.status | default(None) == 'enabled' and docker_selinux_enabled | default(true) | bool %} --selinux-enabled {% endif %}\ {% if docker_log_driver is defined %} --log-driver {{ docker_log_driver }}{% endif %}\ {% if docker_log_options is defined %} {{ docker_log_options | oo_split() | oo_prepend_strings_in_list('--log-opt ') | join(' ')}}{% endif %}\ {% if docker_options is defined %} {{ docker_options }}{% endif %}\ diff --git a/roles/openshift_default_storage_class/defaults/main.yml b/roles/openshift_default_storage_class/defaults/main.yml index bda83c933..4f371fd89 100644 --- a/roles/openshift_default_storage_class/defaults/main.yml +++ b/roles/openshift_default_storage_class/defaults/main.yml @@ -12,6 +12,7 @@ openshift_storageclass_defaults: provisioner: kubernetes.io/gce-pd type: pd-standard +openshift_storageclass_default: "true" openshift_storageclass_name: "{{ openshift_storageclass_defaults[openshift_cloudprovider_kind]['name'] }}" openshift_storageclass_provisioner: "{{ openshift_storageclass_defaults[openshift_cloudprovider_kind]['provisioner'] }}" openshift_storageclass_parameters: "{{ openshift_storageclass_defaults[openshift_cloudprovider_kind]['parameters'] }}" diff --git a/roles/openshift_default_storage_class/tasks/main.yml b/roles/openshift_default_storage_class/tasks/main.yml index fd5e4fabe..82cab6746 100644 --- a/roles/openshift_default_storage_class/tasks/main.yml +++ b/roles/openshift_default_storage_class/tasks/main.yml @@ -2,9 +2,8 @@ # Install default storage classes in GCE & AWS - name: Ensure storageclass object oc_storageclass: - kind: storageclass name: "{{ openshift_storageclass_name }}" - default_storage_class: "true" + default_storage_class: "{{ openshift_storageclass_default | default('true') | string}}" parameters: type: "{{ openshift_storageclass_parameters.type | default('gp2') }}" encrypted: "{{ openshift_storageclass_parameters.encrypted | default('false') | string }}" diff --git a/roles/openshift_health_checker/library/search_journalctl.py b/roles/openshift_health_checker/library/search_journalctl.py new file mode 100644 index 000000000..3631f71c8 --- /dev/null +++ b/roles/openshift_health_checker/library/search_journalctl.py @@ -0,0 +1,150 @@ +#!/usr/bin/python +"""Interface to journalctl.""" + +from time import time +import json +import re +import subprocess + +from ansible.module_utils.basic import AnsibleModule + + +class InvalidMatcherRegexp(Exception): + """Exception class for invalid matcher regexp.""" + pass + + +class InvalidLogEntry(Exception): + """Exception class for invalid / non-json log entries.""" + pass + + +class LogInputSubprocessError(Exception): + """Exception class for errors that occur while executing a subprocess.""" + pass + + +def main(): + """Scan a given list of "log_matchers" for journalctl messages containing given patterns. + "log_matchers" is a list of dicts consisting of three keys that help fine-tune log searching: + 'start_regexp', 'regexp', and 'unit'. + + Sample "log_matchers" list: + + [ + { + 'start_regexp': r'Beginning of systemd unit', + 'regexp': r'the specific log message to find', + 'unit': 'etcd', + } + ] + """ + module = AnsibleModule( + argument_spec=dict( + log_count_limit=dict(type="int", default=500), + log_matchers=dict(type="list", required=True), + ), + ) + + timestamp_limit_seconds = time() - 60 * 60 # 1 hour + + log_count_limit = module.params["log_count_limit"] + log_matchers = module.params["log_matchers"] + + matched_regexp, errors = get_log_matches(log_matchers, log_count_limit, timestamp_limit_seconds) + + module.exit_json( + changed=False, + failed=bool(errors), + errors=errors, + matched=matched_regexp, + ) + + +def get_log_matches(matchers, log_count_limit, timestamp_limit_seconds): + """Return a list of up to log_count_limit matches for each matcher. + + Log entries are only considered if newer than timestamp_limit_seconds. + """ + matched_regexp = [] + errors = [] + + for matcher in matchers: + try: + log_output = get_log_output(matcher) + except LogInputSubprocessError as err: + errors.append(str(err)) + continue + + try: + matched = find_matches(log_output, matcher, log_count_limit, timestamp_limit_seconds) + if matched: + matched_regexp.append(matcher.get("regexp", "")) + except InvalidMatcherRegexp as err: + errors.append(str(err)) + except InvalidLogEntry as err: + errors.append(str(err)) + + return matched_regexp, errors + + +def get_log_output(matcher): + """Return an iterator on the logs of a given matcher.""" + try: + cmd_output = subprocess.Popen(list([ + '/bin/journalctl', + '-ru', matcher.get("unit", ""), + '--output', 'json', + ]), stdout=subprocess.PIPE) + + return iter(cmd_output.stdout.readline, '') + + except subprocess.CalledProcessError as exc: + msg = "Could not obtain journalctl logs for the specified systemd unit: {}: {}" + raise LogInputSubprocessError(msg.format(matcher.get("unit", "<missing>"), str(exc))) + except OSError as exc: + raise LogInputSubprocessError(str(exc)) + + +def find_matches(log_output, matcher, log_count_limit, timestamp_limit_seconds): + """Return log messages matched in iterable log_output by a given matcher. + + Ignore any log_output items older than timestamp_limit_seconds. + """ + try: + regexp = re.compile(matcher.get("regexp", "")) + start_regexp = re.compile(matcher.get("start_regexp", "")) + except re.error as err: + msg = "A log matcher object was provided with an invalid regular expression: {}" + raise InvalidMatcherRegexp(msg.format(str(err))) + + matched = None + + for log_count, line in enumerate(log_output): + if log_count >= log_count_limit: + break + + try: + obj = json.loads(line) + + # don't need to look past the most recent service restart + if start_regexp.match(obj["MESSAGE"]): + break + + log_timestamp_seconds = float(obj["__REALTIME_TIMESTAMP"]) / 1000000 + if log_timestamp_seconds < timestamp_limit_seconds: + break + + if regexp.match(obj["MESSAGE"]): + matched = line + break + + except ValueError: + msg = "Log entry for systemd unit {} contained invalid json syntax: {}" + raise InvalidLogEntry(msg.format(matcher.get("unit"), line)) + + return matched + + +if __name__ == '__main__': + main() diff --git a/roles/openshift_health_checker/openshift_checks/docker_storage.py b/roles/openshift_health_checker/openshift_checks/docker_storage.py index e80691ef3..d2227d244 100644 --- a/roles/openshift_health_checker/openshift_checks/docker_storage.py +++ b/roles/openshift_health_checker/openshift_checks/docker_storage.py @@ -1,5 +1,6 @@ """Check Docker storage driver and usage.""" import json +import os.path import re from openshift_checks import OpenShiftCheck, OpenShiftCheckException, get_var from openshift_checks.mixins import DockerHostMixin @@ -20,10 +21,27 @@ class DockerStorage(DockerHostMixin, OpenShiftCheck): storage_drivers = ["devicemapper", "overlay", "overlay2"] max_thinpool_data_usage_percent = 90.0 max_thinpool_meta_usage_percent = 90.0 + max_overlay_usage_percent = 90.0 + + # TODO(lmeyer): mention these in the output when check fails + configuration_variables = [ + ( + "max_thinpool_data_usage_percent", + "For 'devicemapper' storage driver, usage threshold percentage for data. " + "Format: float. Default: {:.1f}".format(max_thinpool_data_usage_percent), + ), + ( + "max_thinpool_meta_usage_percent", + "For 'devicemapper' storage driver, usage threshold percentage for metadata. " + "Format: float. Default: {:.1f}".format(max_thinpool_meta_usage_percent), + ), + ( + "max_overlay_usage_percent", + "For 'overlay' or 'overlay2' storage driver, usage threshold percentage. " + "Format: float. Default: {:.1f}".format(max_overlay_usage_percent), + ), + ] - # pylint: disable=too-many-return-statements - # Reason: permanent stylistic exception; - # it is clearer to return on failures and there are just many ways to fail here. def run(self, tmp, task_vars): msg, failed, changed = self.ensure_dependencies(task_vars) if failed: @@ -34,17 +52,17 @@ class DockerStorage(DockerHostMixin, OpenShiftCheck): } # attempt to get the docker info hash from the API - info = self.execute_module("docker_info", {}, task_vars=task_vars) - if info.get("failed"): + docker_info = self.execute_module("docker_info", {}, task_vars=task_vars) + if docker_info.get("failed"): return {"failed": True, "changed": changed, "msg": "Failed to query Docker API. Is docker running on this host?"} - if not info.get("info"): # this would be very strange + if not docker_info.get("info"): # this would be very strange return {"failed": True, "changed": changed, - "msg": "Docker API query missing info:\n{}".format(json.dumps(info))} - info = info["info"] + "msg": "Docker API query missing info:\n{}".format(json.dumps(docker_info))} + docker_info = docker_info["info"] # check if the storage driver we saw is valid - driver = info.get("Driver", "[NONE]") + driver = docker_info.get("Driver", "[NONE]") if driver not in self.storage_drivers: msg = ( "Detected unsupported Docker storage driver '{driver}'.\n" @@ -53,26 +71,34 @@ class DockerStorage(DockerHostMixin, OpenShiftCheck): return {"failed": True, "changed": changed, "msg": msg} # driver status info is a list of tuples; convert to dict and validate based on driver - driver_status = {item[0]: item[1] for item in info.get("DriverStatus", [])} + driver_status = {item[0]: item[1] for item in docker_info.get("DriverStatus", [])} + + result = {} + if driver == "devicemapper": - if driver_status.get("Data loop file"): - msg = ( - "Use of loopback devices with the Docker devicemapper storage driver\n" - "(the default storage configuration) is unsupported in production.\n" - "Please use docker-storage-setup to configure a backing storage volume.\n" - "See http://red.ht/2rNperO for further information." - ) - return {"failed": True, "changed": changed, "msg": msg} - result = self._check_dm_usage(driver_status, task_vars) - result['changed'] = result.get('changed', False) or changed - return result + result = self.check_devicemapper_support(driver_status, task_vars) - # TODO(lmeyer): determine how to check usage for overlay2 + if driver in ['overlay', 'overlay2']: + result = self.check_overlay_support(docker_info, driver_status, task_vars) - return {"changed": changed} + result['changed'] = result.get('changed', False) or changed + return result - def _check_dm_usage(self, driver_status, task_vars): - """ + def check_devicemapper_support(self, driver_status, task_vars): + """Check if dm storage driver is supported as configured. Return: result dict.""" + if driver_status.get("Data loop file"): + msg = ( + "Use of loopback devices with the Docker devicemapper storage driver\n" + "(the default storage configuration) is unsupported in production.\n" + "Please use docker-storage-setup to configure a backing storage volume.\n" + "See http://red.ht/2rNperO for further information." + ) + return {"failed": True, "msg": msg} + result = self.check_dm_usage(driver_status, task_vars) + return result + + def check_dm_usage(self, driver_status, task_vars): + """Check usage thresholds for Docker dm storage driver. Return: result dict. Backing assumptions: We expect devicemapper to be backed by an auto-expanding thin pool implemented as an LV in an LVM2 VG. This is how docker-storage-setup currently configures devicemapper storage. The LV is "thin" because it does not use all available storage @@ -83,7 +109,7 @@ class DockerStorage(DockerHostMixin, OpenShiftCheck): could run out of space first; so we check both. """ vals = dict( - vg_free=self._get_vg_free(driver_status.get("Pool Name"), task_vars), + vg_free=self.get_vg_free(driver_status.get("Pool Name"), task_vars), data_used=driver_status.get("Data Space Used"), data_total=driver_status.get("Data Space Total"), metadata_used=driver_status.get("Metadata Space Used"), @@ -93,7 +119,7 @@ class DockerStorage(DockerHostMixin, OpenShiftCheck): # convert all human-readable strings to bytes for key, value in vals.copy().items(): try: - vals[key + "_bytes"] = self._convert_to_bytes(value) + vals[key + "_bytes"] = self.convert_to_bytes(value) except ValueError as err: # unlikely to hit this from API info, but just to be safe return { "failed": True, @@ -131,10 +157,12 @@ class DockerStorage(DockerHostMixin, OpenShiftCheck): vals["msg"] = "\n".join(messages or ["Thinpool usage is within thresholds."]) return vals - def _get_vg_free(self, pool, task_vars): - # Determine which VG to examine according to the pool name, the only indicator currently - # available from the Docker API driver info. We assume a name that looks like - # "vg--name-docker--pool"; vg and lv names with inner hyphens doubled, joined by a hyphen. + def get_vg_free(self, pool, task_vars): + """Determine which VG to examine according to the pool name. Return: size vgs reports. + Pool name is the only indicator currently available from the Docker API driver info. + We assume a name that looks like "vg--name-docker--pool"; + vg and lv names with inner hyphens doubled, joined by a hyphen. + """ match = re.match(r'((?:[^-]|--)+)-(?!-)', pool) # matches up to the first single hyphen if not match: # unlikely, but... be clear if we assumed wrong raise OpenShiftCheckException( @@ -163,7 +191,8 @@ class DockerStorage(DockerHostMixin, OpenShiftCheck): return size @staticmethod - def _convert_to_bytes(string): + def convert_to_bytes(string): + """Convert string like "10.3 G" to bytes (binary units assumed). Return: float bytes.""" units = dict( b=1, k=1024, @@ -183,3 +212,87 @@ class DockerStorage(DockerHostMixin, OpenShiftCheck): raise ValueError("Cannot convert to a byte size: " + string) return float(number) * multiplier + + def check_overlay_support(self, docker_info, driver_status, task_vars): + """Check if overlay storage driver is supported for this host. Return: result dict.""" + # check for xfs as backing store + backing_fs = driver_status.get("Backing Filesystem", "[NONE]") + if backing_fs != "xfs": + msg = ( + "Docker storage drivers 'overlay' and 'overlay2' are only supported with\n" + "'xfs' as the backing storage, but this host's storage is type '{fs}'." + ).format(fs=backing_fs) + return {"failed": True, "msg": msg} + + # check support for OS and kernel version + o_s = docker_info.get("OperatingSystem", "[NONE]") + if "Red Hat Enterprise Linux" in o_s or "CentOS" in o_s: + # keep it simple, only check enterprise kernel versions; assume everyone else is good + kernel = docker_info.get("KernelVersion", "[NONE]") + kernel_arr = [int(num) for num in re.findall(r'\d+', kernel)] + if kernel_arr < [3, 10, 0, 514]: # rhel < 7.3 + msg = ( + "Docker storage drivers 'overlay' and 'overlay2' are only supported beginning with\n" + "kernel version 3.10.0-514; but Docker reports kernel version {version}." + ).format(version=kernel) + return {"failed": True, "msg": msg} + # NOTE: we could check for --selinux-enabled here but docker won't even start with + # that option until it's supported in the kernel so we don't need to. + + return self.check_overlay_usage(docker_info, task_vars) + + def check_overlay_usage(self, docker_info, task_vars): + """Check disk usage on OverlayFS backing store volume. Return: result dict.""" + path = docker_info.get("DockerRootDir", "/var/lib/docker") + "/" + docker_info["Driver"] + + threshold = get_var(task_vars, "max_overlay_usage_percent", default=self.max_overlay_usage_percent) + try: + threshold = float(threshold) + except ValueError: + return { + "failed": True, + "msg": "Specified 'max_overlay_usage_percent' is not a percentage: {}".format(threshold), + } + + mount = self.find_ansible_mount(path, get_var(task_vars, "ansible_mounts")) + try: + free_bytes = mount['size_available'] + total_bytes = mount['size_total'] + usage = 100.0 * (total_bytes - free_bytes) / total_bytes + except (KeyError, ZeroDivisionError): + return { + "failed": True, + "msg": "The ansible_mount found for path {} is invalid.\n" + "This is likely to be an Ansible bug. The record was:\n" + "{}".format(path, json.dumps(mount, indent=2)), + } + + if usage > threshold: + return { + "failed": True, + "msg": ( + "For Docker OverlayFS mount point {path},\n" + "usage percentage {pct:.1f} is higher than threshold {thresh:.1f}." + ).format(path=mount["mount"], pct=usage, thresh=threshold) + } + + return {} + + # TODO(lmeyer): migrate to base class + @staticmethod + def find_ansible_mount(path, ansible_mounts): + """Return the mount point for path from ansible_mounts.""" + + mount_for_path = {mount['mount']: mount for mount in ansible_mounts} + mount_point = path + while mount_point not in mount_for_path: + if mount_point in ["/", ""]: # "/" not in ansible_mounts??? + break + mount_point = os.path.dirname(mount_point) + + try: + return mount_for_path[mount_point] + except KeyError: + known_mounts = ', '.join('"{}"'.format(mount) for mount in sorted(mount_for_path)) or 'none' + msg = 'Unable to determine mount point for path "{}". Known mount points: {}.' + raise OpenShiftCheckException(msg.format(path, known_mounts)) diff --git a/roles/openshift_health_checker/openshift_checks/etcd_traffic.py b/roles/openshift_health_checker/openshift_checks/etcd_traffic.py new file mode 100644 index 000000000..40c87873d --- /dev/null +++ b/roles/openshift_health_checker/openshift_checks/etcd_traffic.py @@ -0,0 +1,47 @@ +"""Check that scans journalctl for messages caused as a symptom of increased etcd traffic.""" + +from openshift_checks import OpenShiftCheck, get_var + + +class EtcdTraffic(OpenShiftCheck): + """Check if host is being affected by an increase in etcd traffic.""" + + name = "etcd_traffic" + tags = ["health", "etcd"] + + @classmethod + def is_active(cls, task_vars): + """Skip hosts that do not have etcd in their group names.""" + group_names = get_var(task_vars, "group_names", default=[]) + valid_group_names = "etcd" in group_names + + version = get_var(task_vars, "openshift", "common", "short_version") + valid_version = version in ("3.4", "3.5", "1.4", "1.5") + + return super(EtcdTraffic, cls).is_active(task_vars) and valid_group_names and valid_version + + def run(self, tmp, task_vars): + is_containerized = get_var(task_vars, "openshift", "common", "is_containerized") + unit = "etcd_container" if is_containerized else "etcd" + + log_matchers = [{ + "start_regexp": r"Starting Etcd Server", + "regexp": r"etcd: sync duration of [^,]+, expected less than 1s", + "unit": unit + }] + + match = self.execute_module("search_journalctl", { + "log_matchers": log_matchers, + }, task_vars) + + if match.get("matched"): + msg = ("Higher than normal etcd traffic detected.\n" + "OpenShift 3.4 introduced an increase in etcd traffic.\n" + "Upgrading to OpenShift 3.6 is recommended in order to fix this issue.\n" + "Please refer to https://access.redhat.com/solutions/2916381 for more information.") + return {"failed": True, "msg": msg} + + if match.get("failed"): + return {"failed": True, "msg": "\n".join(match.get("errors"))} + + return {} diff --git a/roles/openshift_health_checker/test/docker_storage_test.py b/roles/openshift_health_checker/test/docker_storage_test.py index bb25e3f66..99c529054 100644 --- a/roles/openshift_health_checker/test/docker_storage_test.py +++ b/roles/openshift_health_checker/test/docker_storage_test.py @@ -23,7 +23,8 @@ def test_is_active(is_containerized, group_names, is_active): assert DockerStorage.is_active(task_vars=task_vars) == is_active -non_atomic_task_vars = {"openshift": {"common": {"is_atomic": False}}} +def non_atomic_task_vars(): + return {"openshift": {"common": {"is_atomic": False}}} @pytest.mark.parametrize('docker_info, failed, expect_msg', [ @@ -56,7 +57,7 @@ non_atomic_task_vars = {"openshift": {"common": {"is_atomic": False}}} ( dict(info={ "Driver": "overlay2", - "DriverStatus": [] + "DriverStatus": [("Backing Filesystem", "xfs")], }), False, [], @@ -64,6 +65,27 @@ non_atomic_task_vars = {"openshift": {"common": {"is_atomic": False}}} ( dict(info={ "Driver": "overlay", + "DriverStatus": [("Backing Filesystem", "btrfs")], + }), + True, + ["storage is type 'btrfs'", "only supported with\n'xfs'"], + ), + ( + dict(info={ + "Driver": "overlay2", + "DriverStatus": [("Backing Filesystem", "xfs")], + "OperatingSystem": "Red Hat Enterprise Linux Server release 7.2 (Maipo)", + "KernelVersion": "3.10.0-327.22.2.el7.x86_64", + }), + True, + ["Docker reports kernel version 3.10.0-327"], + ), + ( + dict(info={ + "Driver": "overlay", + "DriverStatus": [("Backing Filesystem", "xfs")], + "OperatingSystem": "CentOS", + "KernelVersion": "3.10.0-514", }), False, [], @@ -85,8 +107,9 @@ def test_check_storage_driver(docker_info, failed, expect_msg): return docker_info check = dummy_check(execute_module=execute_module) - check._check_dm_usage = lambda status, task_vars: dict() # stub out for this test - result = check.run(tmp=None, task_vars=non_atomic_task_vars) + check.check_dm_usage = lambda status, task_vars: dict() # stub out for this test + check.check_overlay_usage = lambda info, task_vars: dict() # stub out for this test + result = check.run(tmp=None, task_vars=non_atomic_task_vars()) if failed: assert result["failed"] @@ -146,8 +169,8 @@ not_enough_space = { ]) def test_dm_usage(task_vars, driver_status, vg_free, success, expect_msg): check = dummy_check() - check._get_vg_free = lambda pool, task_vars: vg_free - result = check._check_dm_usage(driver_status, task_vars) + check.get_vg_free = lambda pool, task_vars: vg_free + result = check.check_dm_usage(driver_status, task_vars) result_success = not result.get("failed") assert result_success is success @@ -195,10 +218,10 @@ def test_vg_free(pool, command_returns, raises, returns): check = dummy_check(execute_module=execute_module) if raises: with pytest.raises(OpenShiftCheckException) as err: - check._get_vg_free(pool, {}) + check.get_vg_free(pool, {}) assert raises in str(err.value) else: - ret = check._get_vg_free(pool, {}) + ret = check.get_vg_free(pool, {}) assert ret == returns @@ -209,7 +232,7 @@ def test_vg_free(pool, command_returns, raises, returns): ("12g", 12.0 * 1024**3), ]) def test_convert_to_bytes(string, expect_bytes): - got = DockerStorage._convert_to_bytes(string) + got = DockerStorage.convert_to_bytes(string) assert got == expect_bytes @@ -219,6 +242,70 @@ def test_convert_to_bytes(string, expect_bytes): ]) def test_convert_to_bytes_error(string): with pytest.raises(ValueError) as err: - DockerStorage._convert_to_bytes(string) + DockerStorage.convert_to_bytes(string) assert "Cannot convert" in str(err.value) assert string in str(err.value) + + +ansible_mounts_enough = [{ + 'mount': '/var/lib/docker', + 'size_available': 50 * 10**9, + 'size_total': 50 * 10**9, +}] +ansible_mounts_not_enough = [{ + 'mount': '/var/lib/docker', + 'size_available': 0, + 'size_total': 50 * 10**9, +}] +ansible_mounts_missing_fields = [dict(mount='/var/lib/docker')] +ansible_mounts_zero_size = [{ + 'mount': '/var/lib/docker', + 'size_available': 0, + 'size_total': 0, +}] + + +@pytest.mark.parametrize('ansible_mounts, threshold, expect_fail, expect_msg', [ + ( + ansible_mounts_enough, + None, + False, + [], + ), + ( + ansible_mounts_not_enough, + None, + True, + ["usage percentage", "higher than threshold"], + ), + ( + ansible_mounts_not_enough, + "bogus percent", + True, + ["is not a percentage"], + ), + ( + ansible_mounts_missing_fields, + None, + True, + ["Ansible bug"], + ), + ( + ansible_mounts_zero_size, + None, + True, + ["Ansible bug"], + ), +]) +def test_overlay_usage(ansible_mounts, threshold, expect_fail, expect_msg): + check = dummy_check() + task_vars = non_atomic_task_vars() + task_vars["ansible_mounts"] = ansible_mounts + if threshold is not None: + task_vars["max_overlay_usage_percent"] = threshold + docker_info = dict(DockerRootDir="/var/lib/docker", Driver="overlay") + result = check.check_overlay_usage(docker_info, task_vars) + + assert expect_fail == bool(result.get("failed")) + for msg in expect_msg: + assert msg in result["msg"] diff --git a/roles/openshift_health_checker/test/etcd_traffic_test.py b/roles/openshift_health_checker/test/etcd_traffic_test.py new file mode 100644 index 000000000..287175e29 --- /dev/null +++ b/roles/openshift_health_checker/test/etcd_traffic_test.py @@ -0,0 +1,80 @@ +import pytest + +from openshift_checks.etcd_traffic import EtcdTraffic + + +@pytest.mark.parametrize('group_names,version,is_active', [ + (['masters'], "3.5", False), + (['masters'], "3.6", False), + (['nodes'], "3.4", False), + (['etcd'], "3.4", True), + (['etcd'], "3.5", True), + (['etcd'], "3.1", False), + (['masters', 'nodes'], "3.5", False), + (['masters', 'etcd'], "3.5", True), + ([], "3.4", False), +]) +def test_is_active(group_names, version, is_active): + task_vars = dict( + group_names=group_names, + openshift=dict( + common=dict(short_version=version), + ), + ) + assert EtcdTraffic.is_active(task_vars=task_vars) == is_active + + +@pytest.mark.parametrize('group_names,matched,failed,extra_words', [ + (["masters"], True, True, ["Higher than normal", "traffic"]), + (["masters", "etcd"], False, False, []), + (["etcd"], False, False, []), +]) +def test_log_matches_high_traffic_msg(group_names, matched, failed, extra_words): + def execute_module(module_name, args, task_vars): + return { + "matched": matched, + "failed": failed, + } + + task_vars = dict( + group_names=group_names, + openshift=dict( + common=dict(service_type="origin", is_containerized=False), + ) + ) + + check = EtcdTraffic(execute_module=execute_module) + result = check.run(tmp=None, task_vars=task_vars) + + for word in extra_words: + assert word in result.get("msg", "") + + assert result.get("failed", False) == failed + + +@pytest.mark.parametrize('is_containerized,expected_unit_value', [ + (False, "etcd"), + (True, "etcd_container"), +]) +def test_systemd_unit_matches_deployment_type(is_containerized, expected_unit_value): + task_vars = dict( + openshift=dict( + common=dict(is_containerized=is_containerized), + ) + ) + + def execute_module(module_name, args, task_vars): + assert module_name == "search_journalctl" + matchers = args["log_matchers"] + + for matcher in matchers: + assert matcher["unit"] == expected_unit_value + + return {"failed": False} + + check = EtcdTraffic(execute_module=execute_module) + check.run(tmp=None, task_vars=task_vars) + + +def fake_execute_module(*args): + raise AssertionError('this function should not be called') diff --git a/roles/openshift_health_checker/test/search_journalctl_test.py b/roles/openshift_health_checker/test/search_journalctl_test.py new file mode 100644 index 000000000..724928aa1 --- /dev/null +++ b/roles/openshift_health_checker/test/search_journalctl_test.py @@ -0,0 +1,157 @@ +import pytest +import search_journalctl + + +def canned_search_journalctl(get_log_output=None): + """Create a search_journalctl object with canned get_log_output method""" + module = search_journalctl + if get_log_output: + module.get_log_output = get_log_output + return module + + +DEFAULT_TIMESTAMP = 1496341364 + + +def get_timestamp(modifier=0): + return DEFAULT_TIMESTAMP + modifier + + +def get_timestamp_microseconds(modifier=0): + return get_timestamp(modifier) * 1000000 + + +def create_test_log_object(stamp, msg): + return '{{"__REALTIME_TIMESTAMP": "{}", "MESSAGE": "{}"}}'.format(stamp, msg) + + +@pytest.mark.parametrize('name,matchers,log_input,expected_matches,expected_errors', [ + ( + 'test with valid params', + [ + { + "start_regexp": r"Sample Logs Beginning", + "regexp": r"test log message", + "unit": "test", + }, + ], + [ + create_test_log_object(get_timestamp_microseconds(), "test log message"), + create_test_log_object(get_timestamp_microseconds(), "Sample Logs Beginning"), + ], + ["test log message"], + [], + ), + ( + 'test with invalid json in log input', + [ + { + "start_regexp": r"Sample Logs Beginning", + "regexp": r"test log message", + "unit": "test-unit", + }, + ], + [ + '{__REALTIME_TIMESTAMP: ' + str(get_timestamp_microseconds()) + ', "MESSAGE": "test log message"}', + ], + [], + [ + ["invalid json", "test-unit", "test log message"], + ], + ), + ( + 'test with invalid regexp', + [ + { + "start_regexp": r"Sample Logs Beginning", + "regexp": r"test [ log message", + "unit": "test", + }, + ], + [ + create_test_log_object(get_timestamp_microseconds(), "test log message"), + create_test_log_object(get_timestamp_microseconds(), "sample log message"), + create_test_log_object(get_timestamp_microseconds(), "fake log message"), + create_test_log_object(get_timestamp_microseconds(), "dummy log message"), + create_test_log_object(get_timestamp_microseconds(), "Sample Logs Beginning"), + ], + [], + [ + ["invalid regular expression"], + ], + ), +], ids=lambda argval: argval[0]) +def test_get_log_matches(name, matchers, log_input, expected_matches, expected_errors): + def get_log_output(matcher): + return log_input + + module = canned_search_journalctl(get_log_output) + matched_regexp, errors = module.get_log_matches(matchers, 500, 60 * 60) + + assert set(matched_regexp) == set(expected_matches) + assert len(expected_errors) == len(errors) + + for idx, partial_err_set in enumerate(expected_errors): + for partial_err_msg in partial_err_set: + assert partial_err_msg in errors[idx] + + +@pytest.mark.parametrize('name,matcher,log_count_lim,stamp_lim_seconds,log_input,expected_match', [ + ( + 'test with matching log message, but out of bounds of log_count_lim', + { + "start_regexp": r"Sample Logs Beginning", + "regexp": r"dummy log message", + "unit": "test", + }, + 3, + get_timestamp(-100 * 60 * 60), + [ + create_test_log_object(get_timestamp_microseconds(), "test log message"), + create_test_log_object(get_timestamp_microseconds(), "sample log message"), + create_test_log_object(get_timestamp_microseconds(), "fake log message"), + create_test_log_object(get_timestamp_microseconds(), "dummy log message"), + create_test_log_object(get_timestamp_microseconds(), "Sample Logs Beginning"), + ], + None, + ), + ( + 'test with matching log message, but with timestamp too old', + { + "start_regexp": r"Sample Logs Beginning", + "regexp": r"dummy log message", + "unit": "test", + }, + 100, + get_timestamp(-10), + [ + create_test_log_object(get_timestamp_microseconds(), "test log message"), + create_test_log_object(get_timestamp_microseconds(), "sample log message"), + create_test_log_object(get_timestamp_microseconds(), "fake log message"), + create_test_log_object(get_timestamp_microseconds(-1000), "dummy log message"), + create_test_log_object(get_timestamp_microseconds(-1000), "Sample Logs Beginning"), + ], + None, + ), + ( + 'test with matching log message, and timestamp within time limit', + { + "start_regexp": r"Sample Logs Beginning", + "regexp": r"dummy log message", + "unit": "test", + }, + 100, + get_timestamp(-1010), + [ + create_test_log_object(get_timestamp_microseconds(), "test log message"), + create_test_log_object(get_timestamp_microseconds(), "sample log message"), + create_test_log_object(get_timestamp_microseconds(), "fake log message"), + create_test_log_object(get_timestamp_microseconds(-1000), "dummy log message"), + create_test_log_object(get_timestamp_microseconds(-1000), "Sample Logs Beginning"), + ], + create_test_log_object(get_timestamp_microseconds(-1000), "dummy log message"), + ), +], ids=lambda argval: argval[0]) +def test_find_matches_skips_logs(name, matcher, log_count_lim, stamp_lim_seconds, log_input, expected_match): + match = search_journalctl.find_matches(log_input, matcher, log_count_lim, stamp_lim_seconds) + assert match == expected_match diff --git a/roles/openshift_service_catalog/files/kubeservicecatalog_roles_bindings.yml b/roles/openshift_service_catalog/files/kubeservicecatalog_roles_bindings.yml index bcc7fb590..c30c15778 100644 --- a/roles/openshift_service_catalog/files/kubeservicecatalog_roles_bindings.yml +++ b/roles/openshift_service_catalog/files/kubeservicecatalog_roles_bindings.yml @@ -137,6 +137,12 @@ objects: - serviceclasses verbs: - create + - delete + - update + - patch + - get + - list + - watch - apiGroups: - settings.k8s.io resources: |