diff options
author | OpenShift Bot <eparis+openshiftbot@redhat.com> | 2017-08-07 12:06:24 -0400 |
---|---|---|
committer | GitHub <noreply@github.com> | 2017-08-07 12:06:24 -0400 |
commit | 4c5906fe7309e82986803f87654df3381bfd1748 (patch) | |
tree | eaab0f75266e845648c5d1adc10eb7a9f979a03e | |
parent | e9eec293ab67f90c2a5e70f05e33cec65f88b42a (diff) | |
parent | fc905cf14bbfc75e25a0a58fdb3c92f602ce9450 (diff) | |
download | openshift-4c5906fe7309e82986803f87654df3381bfd1748.tar.gz openshift-4c5906fe7309e82986803f87654df3381bfd1748.tar.bz2 openshift-4c5906fe7309e82986803f87654df3381bfd1748.tar.xz openshift-4c5906fe7309e82986803f87654df3381bfd1748.zip |
Merge pull request #4960 from juanvallejo/jvallejo/verify-disk-memory-before-upgrade-no-flake
Merged by openshift-bot
4 files changed, 115 insertions, 5 deletions
diff --git a/playbooks/common/openshift-cluster/upgrades/pre/verify_health_checks.yml b/playbooks/common/openshift-cluster/upgrades/pre/verify_health_checks.yml new file mode 100644 index 000000000..497709d25 --- /dev/null +++ b/playbooks/common/openshift-cluster/upgrades/pre/verify_health_checks.yml @@ -0,0 +1,13 @@ +--- +- name: Verify Host Requirements + hosts: oo_all_hosts + roles: + - openshift_health_checker + vars: + - r_openshift_health_checker_playbook_context: upgrade + post_tasks: + - action: openshift_health_check + args: + checks: + - disk_availability + - memory_availability diff --git a/playbooks/common/openshift-cluster/upgrades/v3_6/upgrade.yml b/playbooks/common/openshift-cluster/upgrades/v3_6/upgrade.yml index 5b9ac9e8f..da4444867 100644 --- a/playbooks/common/openshift-cluster/upgrades/v3_6/upgrade.yml +++ b/playbooks/common/openshift-cluster/upgrades/v3_6/upgrade.yml @@ -70,6 +70,10 @@ # docker is configured and running. skip_docker_role: True +- include: ../pre/verify_health_checks.yml + tags: + - pre_upgrade + - include: ../pre/verify_control_plane_running.yml tags: - pre_upgrade diff --git a/roles/openshift_health_checker/openshift_checks/disk_availability.py b/roles/openshift_health_checker/openshift_checks/disk_availability.py index 283461294..39ac0e4ec 100644 --- a/roles/openshift_health_checker/openshift_checks/disk_availability.py +++ b/roles/openshift_health_checker/openshift_checks/disk_availability.py @@ -35,6 +35,15 @@ class DiskAvailability(OpenShiftCheck): }, } + # recommended disk space for each location under an upgrade context + recommended_disk_upgrade_bytes = { + '/var': { + 'masters': 10 * 10**9, + 'nodes': 5 * 10 ** 9, + 'etcd': 5 * 10 ** 9, + }, + } + def is_active(self): """Skip hosts that do not have recommended disk space requirements.""" group_names = self.get_var("group_names", default=[]) @@ -80,9 +89,34 @@ class DiskAvailability(OpenShiftCheck): config_bytes = max(config.get(name, 0) for name in group_names) * 10**9 recommended_bytes = config_bytes or recommended_bytes + # if an "upgrade" context is set, update the minimum disk requirement + # as this signifies an in-place upgrade - the node might have the + # required total disk space, but some of that space may already be + # in use by the existing OpenShift deployment. + context = self.get_var("r_openshift_health_checker_playbook_context", default="") + if context == "upgrade": + recommended_upgrade_paths = self.recommended_disk_upgrade_bytes.get(path, {}) + if recommended_upgrade_paths: + recommended_bytes = config_bytes or max(recommended_upgrade_paths.get(name, 0) + for name in group_names) + if free_bytes < recommended_bytes: free_gb = float(free_bytes) / 10**9 recommended_gb = float(recommended_bytes) / 10**9 + msg = ( + 'Available disk space in "{}" ({:.1f} GB) ' + 'is below minimum recommended ({:.1f} GB)' + ).format(path, free_gb, recommended_gb) + + # warn if check failed under an "upgrade" context + # due to limits imposed by the user config + if config_bytes and context == "upgrade": + msg += ('\n\nMake sure to account for decreased disk space during an upgrade\n' + 'due to an existing OpenShift deployment. Please check the value of\n' + ' openshift_check_min_host_disk_gb={}\n' + 'in your Ansible inventory, and lower the recommended disk space availability\n' + 'if necessary for this upgrade.').format(config_bytes) + return { 'failed': True, 'msg': ( diff --git a/roles/openshift_health_checker/test/disk_availability_test.py b/roles/openshift_health_checker/test/disk_availability_test.py index e98d02c58..5720eeacf 100644 --- a/roles/openshift_health_checker/test/disk_availability_test.py +++ b/roles/openshift_health_checker/test/disk_availability_test.py @@ -97,8 +97,9 @@ def test_succeeds_with_recommended_disk_space(group_names, configured_min, ansib assert not result.get('failed', False) -@pytest.mark.parametrize('group_names,configured_min,ansible_mounts,extra_words', [ +@pytest.mark.parametrize('name,group_names,configured_min,ansible_mounts,extra_words', [ ( + 'test with no space available', ['masters'], 0, [{ @@ -108,6 +109,7 @@ def test_succeeds_with_recommended_disk_space(group_names, configured_min, ansib ['0.0 GB'], ), ( + 'test with a higher configured required value', ['masters'], 100, # set a higher threshold [{ @@ -117,6 +119,7 @@ def test_succeeds_with_recommended_disk_space(group_names, configured_min, ansib ['100.0 GB'], ), ( + 'test with 1GB available, but "0" GB space requirement', ['nodes'], 0, [{ @@ -126,6 +129,7 @@ def test_succeeds_with_recommended_disk_space(group_names, configured_min, ansib ['1.0 GB'], ), ( + 'test with no space available, but "0" GB space requirement', ['etcd'], 0, [{ @@ -135,16 +139,17 @@ def test_succeeds_with_recommended_disk_space(group_names, configured_min, ansib ['0.0 GB'], ), ( + 'test with enough space for a node, but not for a master', ['nodes', 'masters'], 0, [{ 'mount': '/', - # enough space for a node, not enough for a master 'size_available': 15 * 10**9 + 1, }], ['15.0 GB'], ), ( + 'test failure with enough space on "/", but not enough on "/var"', ['etcd'], 0, [{ @@ -158,8 +163,8 @@ def test_succeeds_with_recommended_disk_space(group_names, configured_min, ansib }], ['0.0 GB'], ), -]) -def test_fails_with_insufficient_disk_space(group_names, configured_min, ansible_mounts, extra_words): +], ids=lambda argval: argval[0]) +def test_fails_with_insufficient_disk_space(name, group_names, configured_min, ansible_mounts, extra_words): task_vars = dict( group_names=group_names, openshift_check_min_host_disk_gb=configured_min, @@ -170,7 +175,61 @@ def test_fails_with_insufficient_disk_space(group_names, configured_min, ansible assert result['failed'] for word in 'below recommended'.split() + extra_words: - assert word in result['msg'] + assert word in result.get('msg', '') + + +@pytest.mark.parametrize('name,group_names,context,ansible_mounts,failed,extra_words', [ + ( + 'test without enough space for master under "upgrade" context', + ['nodes', 'masters'], + "upgrade", + [{ + 'mount': '/', + 'size_available': 1 * 10**9 + 1, + 'size_total': 21 * 10**9 + 1, + }], + True, + ["1.0 GB"], + ), + ( + 'test with enough space for master under "upgrade" context', + ['nodes', 'masters'], + "upgrade", + [{ + 'mount': '/', + 'size_available': 10 * 10**9 + 1, + 'size_total': 21 * 10**9 + 1, + }], + False, + [], + ), + ( + 'test with not enough space for master, and non-upgrade context', + ['nodes', 'masters'], + "health", + [{ + 'mount': '/', + # not enough space for a master, + # "health" context should not lower requirement + 'size_available': 20 * 10**9 + 1, + }], + True, + ["20.0 GB", "below minimum"], + ), +], ids=lambda argval: argval[0]) +def test_min_required_space_changes_with_upgrade_context(name, group_names, context, ansible_mounts, failed, extra_words): + task_vars = dict( + r_openshift_health_checker_playbook_context=context, + group_names=group_names, + ansible_mounts=ansible_mounts, + ) + + check = DiskAvailability(fake_execute_module, task_vars) + result = check.run() + + assert result.get("failed", False) == failed + for word in extra_words: + assert word in result.get('msg', '') def fake_execute_module(*args): |