9 files changed, 645 insertions, 113 deletions
diff --git a/roles/openshift_health_checker/openshift_checks/docker_image_availability.py b/roles/openshift_health_checker/openshift_checks/docker_image_availability.py
index 4588ed634..60aacf715 100644
--- a/roles/openshift_health_checker/openshift_checks/docker_image_availability.py
+++ b/roles/openshift_health_checker/openshift_checks/docker_image_availability.py
@@ -1,8 +1,25 @@
-# pylint: disable=missing-docstring
+"""Check that required Docker images are available."""
+
 from openshift_checks import OpenShiftCheck, get_var
+from openshift_checks.mixins import DockerHostMixin
+
 
+NODE_IMAGE_SUFFIXES = ["haproxy-router", "docker-registry", "deployer", "pod"]
+DEPLOYMENT_IMAGE_INFO = {
+    "origin": {
+        "namespace": "openshift",
+        "name": "origin",
+        "registry_console_image": "cockpit/kubernetes",
+    },
+    "openshift-enterprise": {
+        "namespace": "openshift3",
+        "name": "ose",
+        "registry_console_image": "registry.access.redhat.com/openshift3/registry-console",
+    },
+}
 
-class DockerImageAvailability(OpenShiftCheck):
+
+class DockerImageAvailability(DockerHostMixin, OpenShiftCheck):
     """Check that required Docker images are available.
 
     This check attempts to ensure that required docker images are
@@ -12,43 +29,23 @@ class DockerImageAvailability(OpenShiftCheck):
 
     name = "docker_image_availability"
     tags = ["preflight"]
-
     dependencies = ["skopeo", "python-docker-py"]
 
-    deployment_image_info = {
-        "origin": {
-            "namespace": "openshift",
-            "name": "origin",
-        },
-        "openshift-enterprise": {
-            "namespace": "openshift3",
-            "name": "ose",
-        },
-    }
-
     @classmethod
     def is_active(cls, task_vars):
         """Skip hosts with unsupported deployment types."""
         deployment_type = get_var(task_vars, "openshift_deployment_type")
-        has_valid_deployment_type = deployment_type in cls.deployment_image_info
+        has_valid_deployment_type = deployment_type in DEPLOYMENT_IMAGE_INFO
 
         return super(DockerImageAvailability, cls).is_active(task_vars) and has_valid_deployment_type
 
     def run(self, tmp, task_vars):
         msg, failed, changed = self.ensure_dependencies(task_vars)
-
-        # exit early if Skopeo update fails
         if failed:
-            if "No package matching" in msg:
-                msg = "Ensure that all required dependencies can be installed via `yum`.\n"
             return {
                 "failed": True,
                 "changed": changed,
-                "msg": (
-                    "Unable to update or install required dependency packages on this host;\n"
-                    "These are required in order to check Docker image availability:"
-                    "\n    {deps}\n{msg}"
-                ).format(deps=',\n    '.join(self.dependencies), msg=msg),
+                "msg": "Some dependencies are required in order to check Docker image availability.\n" + msg
             }
 
         required_images = self.required_images(task_vars)
@@ -77,51 +74,55 @@ class DockerImageAvailability(OpenShiftCheck):
 
         return {"changed": changed}
 
-    def required_images(self, task_vars):
-        deployment_type = get_var(task_vars, "openshift_deployment_type")
-        image_info = self.deployment_image_info[deployment_type]
-
-        openshift_release = get_var(task_vars, "openshift_release", default="latest")
-        openshift_image_tag = get_var(task_vars, "openshift_image_tag")
-        is_containerized = get_var(task_vars, "openshift", "common", "is_containerized")
-
-        images = set(self.required_docker_images(
-            image_info["namespace"],
-            image_info["name"],
-            ["registry-console"] if "enterprise" in deployment_type else [],  # include enterprise-only image names
-            openshift_release,
-            is_containerized,
-        ))
-
-        # append images with qualified image tags to our list of required images.
-        # these are images with a (v0.0.0.0) tag, rather than a standard release
-        # format tag (v0.0). We want to check this set in both containerized and
-        # non-containerized installations.
-        images.update(
-            self.required_qualified_docker_images(
-                image_info["namespace"],
-                image_info["name"],
-                openshift_image_tag,
-            ),
-        )
-
-        return images
-
-    @staticmethod
-    def required_docker_images(namespace, name, additional_image_names, version, is_containerized):
-        if is_containerized:
-            return ["{}/{}:{}".format(namespace, name, version)] if name else []
-
-        # include additional non-containerized images specific to the current deployment type
-        return ["{}/{}:{}".format(namespace, img_name, version) for img_name in additional_image_names]
-
     @staticmethod
-    def required_qualified_docker_images(namespace, name, version):
-        # pylint: disable=invalid-name
-        return [
-            "{}/{}-{}:{}".format(namespace, name, suffix, version)
-            for suffix in ["haproxy-router", "docker-registry", "deployer", "pod"]
-        ]
+    def required_images(task_vars):
+        """
+        Determine which images we expect to need for this host.
+        Returns: a set of required images like 'openshift/origin:v3.6'
+
+        The thorny issue of determining the image names from the variables is under consideration
+        via https://github.com/openshift/openshift-ansible/issues/4415
+
+        For now we operate as follows:
+        * For containerized components (master, node, ...) we look at the deployment type and
+          use openshift/origin or openshift3/ose as the base for those component images. The
+          version is openshift_image_tag as determined by the openshift_version role.
+        * For OpenShift-managed infrastructure (router, registry...) we use oreg_url if
+          it is defined; otherwise we again use the base that depends on the deployment type.
+        Registry is not included in constructed images. It may be in oreg_url or etcd image.
+        """
+        required = set()
+        deployment_type = get_var(task_vars, "openshift_deployment_type")
+        host_groups = get_var(task_vars, "group_names")
+        image_tag = get_var(task_vars, "openshift_image_tag")
+        image_info = DEPLOYMENT_IMAGE_INFO[deployment_type]
+        if not image_info:
+            return required
+
+        # template for images that run on top of OpenShift
+        image_url = "{}/{}-{}:{}".format(image_info["namespace"], image_info["name"], "${component}", "${version}")
+        image_url = get_var(task_vars, "oreg_url", default="") or image_url
+        if 'nodes' in host_groups:
+            for suffix in NODE_IMAGE_SUFFIXES:
+                required.add(image_url.replace("${component}", suffix).replace("${version}", image_tag))
+            # The registry-console is for some reason not prefixed with ose- like the other components.
+            # Nor is it versioned the same, so just look for latest.
+            # Also a completely different name is used for Origin.
+            required.add(image_info["registry_console_image"])
+
+        # images for containerized components
+        if get_var(task_vars, "openshift", "common", "is_containerized"):
+            components = set()
+            if 'nodes' in host_groups:
+                components.update(["node", "openvswitch"])
+            if 'masters' in host_groups:  # name is "origin" or "ose"
+                components.add(image_info["name"])
+            for component in components:
+                required.add("{}/{}:{}".format(image_info["namespace"], component, image_tag))
+            if 'etcd' in host_groups:  # special case, note it is the same for origin/enterprise
+                required.add("registry.access.redhat.com/rhel7/etcd")  # and no image tag
+
+        return required
 
     def local_images(self, images, task_vars):
         """Filter a list of images and return those available locally."""
@@ -131,7 +132,8 @@ class DockerImageAvailability(OpenShiftCheck):
         ]
 
     def is_image_local(self, image, task_vars):
-        result = self.module_executor("docker_image_facts", {"name": image}, task_vars)
+        """Check if image is already in local docker index."""
+        result = self.execute_module("docker_image_facts", {"name": image}, task_vars=task_vars)
         if result.get("failed", False):
             return False
 
@@ -139,6 +141,7 @@ class DockerImageAvailability(OpenShiftCheck):
 
     @staticmethod
     def known_docker_registries(task_vars):
+        """Build a list of docker registries available according to inventory vars."""
         docker_facts = get_var(task_vars, "openshift", "docker")
         regs = set(docker_facts["additional_registries"])
 
@@ -154,26 +157,21 @@ class DockerImageAvailability(OpenShiftCheck):
         """Inspect existing images using Skopeo and return all images successfully inspected."""
         return [
             image for image in images
-            if any(self.is_available_skopeo_image(image, registry, task_vars) for registry in registries)
+            if self.is_available_skopeo_image(image, registries, task_vars)
         ]
 
-    def is_available_skopeo_image(self, image, registry, task_vars):
-        """Uses Skopeo to determine if required image exists in a given registry."""
-
-        cmd_str = "skopeo inspect docker://{registry}/{image}".format(
-            registry=registry,
-            image=image,
-        )
+    def is_available_skopeo_image(self, image, registries, task_vars):
+        """Use Skopeo to determine if required image exists in known registry(s)."""
 
-        args = {"_raw_params": cmd_str}
-        result = self.module_executor("command", args, task_vars)
-        return not result.get("failed", False) and result.get("rc", 0) == 0
+        # if image does already includes a registry, just use that
+        if image.count("/") > 1:
+            registry, image = image.split("/", 1)
+            registries = [registry]
 
-    # ensures that the skopeo and python-docker-py packages exist
-    # check is skipped on atomic installations
-    def ensure_dependencies(self, task_vars):
-        if get_var(task_vars, "openshift", "common", "is_atomic"):
-            return "", False, False
+        for registry in registries:
+            args = {"_raw_params": "skopeo inspect docker://{}/{}".format(registry, image)}
+            result = self.execute_module("command", args, task_vars=task_vars)
+            if result.get("rc", 0) == 0 and not result.get("failed"):
+                return True
 
-        result = self.module_executor("yum", {"name": self.dependencies, "state": "latest"}, task_vars)
-        return result.get("msg", ""), result.get("failed", False) or result.get("rc", 0) != 0, result.get("changed")
+        return False
diff --git a/roles/openshift_health_checker/openshift_checks/docker_storage.py b/roles/openshift_health_checker/openshift_checks/docker_storage.py
new file mode 100644
index 000000000..2bd615457
--- /dev/null
+++ b/roles/openshift_health_checker/openshift_checks/docker_storage.py
@@ -0,0 +1,185 @@
+"""Check Docker storage driver and usage."""
+import json
+import re
+from openshift_checks import OpenShiftCheck, OpenShiftCheckException, get_var
+from openshift_checks.mixins import DockerHostMixin
+
+
+class DockerStorage(DockerHostMixin, OpenShiftCheck):
+    """Check Docker storage driver compatibility.
+
+    This check ensures that Docker is using a supported storage driver,
+    and that loopback is not being used (if using devicemapper).
+    Also that storage usage is not above threshold.
+    """
+
+    name = "docker_storage"
+    tags = ["pre-install", "health", "preflight"]
+
+    dependencies = ["python-docker-py"]
+    storage_drivers = ["devicemapper", "overlay2"]
+    max_thinpool_data_usage_percent = 90.0
+    max_thinpool_meta_usage_percent = 90.0
+
+    # pylint: disable=too-many-return-statements
+    # Reason: permanent stylistic exception;
+    #         it is clearer to return on failures and there are just many ways to fail here.
+    def run(self, tmp, task_vars):
+        msg, failed, changed = self.ensure_dependencies(task_vars)
+        if failed:
+            return {
+                "failed": True,
+                "changed": changed,
+                "msg": "Some dependencies are required in order to query docker storage on host:\n" + msg
+            }
+
+        # attempt to get the docker info hash from the API
+        info = self.execute_module("docker_info", {}, task_vars=task_vars)
+        if info.get("failed"):
+            return {"failed": True, "changed": changed,
+                    "msg": "Failed to query Docker API. Is docker running on this host?"}
+        if not info.get("info"):  # this would be very strange
+            return {"failed": True, "changed": changed,
+                    "msg": "Docker API query missing info:\n{}".format(json.dumps(info))}
+        info = info["info"]
+
+        # check if the storage driver we saw is valid
+        driver = info.get("Driver", "[NONE]")
+        if driver not in self.storage_drivers:
+            msg = (
+                "Detected unsupported Docker storage driver '{driver}'.\n"
+                "Supported storage drivers are: {drivers}"
+            ).format(driver=driver, drivers=', '.join(self.storage_drivers))
+            return {"failed": True, "changed": changed, "msg": msg}
+
+        # driver status info is a list of tuples; convert to dict and validate based on driver
+        driver_status = {item[0]: item[1] for item in info.get("DriverStatus", [])}
+        if driver == "devicemapper":
+            if driver_status.get("Data loop file"):
+                msg = (
+                    "Use of loopback devices with the Docker devicemapper storage driver\n"
+                    "(the default storage configuration) is unsupported in production.\n"
+                    "Please use docker-storage-setup to configure a backing storage volume.\n"
+                    "See http://red.ht/2rNperO for further information."
+                )
+                return {"failed": True, "changed": changed, "msg": msg}
+            result = self._check_dm_usage(driver_status, task_vars)
+            result['changed'] = result.get('changed', False) or changed
+            return result
+
+        # TODO(lmeyer): determine how to check usage for overlay2
+
+        return {"changed": changed}
+
+    def _check_dm_usage(self, driver_status, task_vars):
+        """
+        Backing assumptions: We expect devicemapper to be backed by an auto-expanding thin pool
+        implemented as an LV in an LVM2 VG. This is how docker-storage-setup currently configures
+        devicemapper storage. The LV is "thin" because it does not use all available storage
+        from its VG, instead expanding as needed; so to determine available space, we gather
+        current usage as the Docker API reports for the driver as well as space available for
+        expansion in the pool's VG.
+        Usage within the LV is divided into pools allocated to data and metadata, either of which
+        could run out of space first; so we check both.
+        """
+        vals = dict(
+            vg_free=self._get_vg_free(driver_status.get("Pool Name"), task_vars),
+            data_used=driver_status.get("Data Space Used"),
+            data_total=driver_status.get("Data Space Total"),
+            metadata_used=driver_status.get("Metadata Space Used"),
+            metadata_total=driver_status.get("Metadata Space Total"),
+        )
+
+        # convert all human-readable strings to bytes
+        for key, value in vals.copy().items():
+            try:
+                vals[key + "_bytes"] = self._convert_to_bytes(value)
+            except ValueError as err:  # unlikely to hit this from API info, but just to be safe
+                return {
+                    "failed": True,
+                    "values": vals,
+                    "msg": "Could not interpret {} value '{}' as bytes: {}".format(key, value, str(err))
+                }
+
+        # determine the threshold percentages which usage should not exceed
+        for name, default in [("data", self.max_thinpool_data_usage_percent),
+                              ("metadata", self.max_thinpool_meta_usage_percent)]:
+            percent = get_var(task_vars, "max_thinpool_" + name + "_usage_percent", default=default)
+            try:
+                vals[name + "_threshold"] = float(percent)
+            except ValueError:
+                return {
+                    "failed": True,
+                    "msg": "Specified thinpool {} usage limit '{}' is not a percentage".format(name, percent)
+                }
+
+        # test whether the thresholds are exceeded
+        messages = []
+        for name in ["data", "metadata"]:
+            vals[name + "_pct_used"] = 100 * vals[name + "_used_bytes"] / (
+                vals[name + "_total_bytes"] + vals["vg_free_bytes"])
+            if vals[name + "_pct_used"] > vals[name + "_threshold"]:
+                messages.append(
+                    "Docker thinpool {name} usage percentage {pct:.1f} "
+                    "is higher than threshold {thresh:.1f}.".format(
+                        name=name,
+                        pct=vals[name + "_pct_used"],
+                        thresh=vals[name + "_threshold"],
+                    ))
+                vals["failed"] = True
+
+        vals["msg"] = "\n".join(messages or ["Thinpool usage is within thresholds."])
+        return vals
+
+    def _get_vg_free(self, pool, task_vars):
+        # Determine which VG to examine according to the pool name, the only indicator currently
+        # available from the Docker API driver info. We assume a name that looks like
+        # "vg--name-docker--pool"; vg and lv names with inner hyphens doubled, joined by a hyphen.
+        match = re.match(r'((?:[^-]|--)+)-(?!-)', pool)  # matches up to the first single hyphen
+        if not match:  # unlikely, but... be clear if we assumed wrong
+            raise OpenShiftCheckException(
+                "This host's Docker reports it is using a storage pool named '{}'.\n"
+                "However this name does not have the expected format of 'vgname-lvname'\n"
+                "so the available storage in the VG cannot be determined.".format(pool)
+            )
+        vg_name = match.groups()[0].replace("--", "-")
+        vgs_cmd = "/sbin/vgs --noheadings -o vg_free --select vg_name=" + vg_name
+        # should return free space like "  12.00g" if the VG exists; empty if it does not
+
+        ret = self.execute_module("command", {"_raw_params": vgs_cmd}, task_vars=task_vars)
+        if ret.get("failed") or ret.get("rc", 0) != 0:
+            raise OpenShiftCheckException(
+                "Is LVM installed? Failed to run /sbin/vgs "
+                "to determine docker storage usage:\n" + ret.get("msg", "")
+            )
+        size = ret.get("stdout", "").strip()
+        if not size:
+            raise OpenShiftCheckException(
+                "This host's Docker reports it is using a storage pool named '{pool}'.\n"
+                "which we expect to come from local VG '{vg}'.\n"
+                "However, /sbin/vgs did not find this VG. Is Docker for this host"
+                "running and using the storage on the host?".format(pool=pool, vg=vg_name)
+            )
+        return size
+
+    @staticmethod
+    def _convert_to_bytes(string):
+        units = dict(
+            b=1,
+            k=1024,
+            m=1024**2,
+            g=1024**3,
+            t=1024**4,
+            p=1024**5,
+        )
+        string = string or ""
+        match = re.match(r'(\d+(?:\.\d+)?)\s*(\w)?', string)  # float followed by optional unit
+        if not match:
+            raise ValueError("Cannot convert to a byte size: " + string)
+
+        number, unit = match.groups()
+        multiplier = 1 if not unit else units.get(unit.lower())
+        if not multiplier:
+            raise ValueError("Cannot convert to a byte size: " + string)
+
+        return float(number) * multiplier
diff --git a/roles/openshift_health_checker/openshift_checks/mixins.py b/roles/openshift_health_checker/openshift_checks/mixins.py
index 20d160eaf..2cb2e21aa 100644
--- a/roles/openshift_health_checker/openshift_checks/mixins.py
+++ b/roles/openshift_health_checker/openshift_checks/mixins.py
@@ -1,4 +1,3 @@
-# pylint: disable=missing-docstring,too-few-public-methods
 """
 Mixin classes meant to be used with subclasses of OpenShiftCheck.
 """
@@ -8,8 +7,52 @@ from openshift_checks import get_var
 
 class NotContainerizedMixin(object):
     """Mixin for checks that are only active when not in containerized mode."""
+    # permanent # pylint: disable=too-few-public-methods
+    # Reason: The mixin is not intended to stand on its own as a class.
 
     @classmethod
     def is_active(cls, task_vars):
+        """Only run on non-containerized hosts."""
         is_containerized = get_var(task_vars, "openshift", "common", "is_containerized")
         return super(NotContainerizedMixin, cls).is_active(task_vars) and not is_containerized
+
+
+class DockerHostMixin(object):
+    """Mixin for checks that are only active on hosts that require Docker."""
+
+    dependencies = []
+
+    @classmethod
+    def is_active(cls, task_vars):
+        """Only run on hosts that depend on Docker."""
+        is_containerized = get_var(task_vars, "openshift", "common", "is_containerized")
+        is_node = "nodes" in get_var(task_vars, "group_names", default=[])
+        return super(DockerHostMixin, cls).is_active(task_vars) and (is_containerized or is_node)
+
+    def ensure_dependencies(self, task_vars):
+        """
+        Ensure that docker-related packages exist, but not on atomic hosts
+        (which would not be able to install but should already have them).
+        Returns: msg, failed, changed
+        """
+        if get_var(task_vars, "openshift", "common", "is_atomic"):
+            return "", False, False
+
+        # NOTE: we would use the "package" module but it's actually an action plugin
+        # and it's not clear how to invoke one of those. This is about the same anyway:
+        result = self.execute_module(
+            get_var(task_vars, "ansible_pkg_mgr", default="yum"),
+            {"name": self.dependencies, "state": "present"},
+            task_vars=task_vars,
+        )
+        msg = result.get("msg", "")
+        if result.get("failed"):
+            if "No package matching" in msg:
+                msg = "Ensure that all required dependencies can be installed via `yum`.\n"
+            msg = (
+                "Unable to install required packages on this host:\n"
+                "    {deps}\n{msg}"
+            ).format(deps=',\n    '.join(self.dependencies), msg=msg)
+        failed = result.get("failed", False) or result.get("rc", 0) != 0
+        changed = result.get("changed", False)
+        return msg, failed, changed
diff --git a/roles/openshift_health_checker/openshift_checks/ovs_version.py b/roles/openshift_health_checker/openshift_checks/ovs_version.py
index 1e45ae3af..2dd045f1f 100644
--- a/roles/openshift_health_checker/openshift_checks/ovs_version.py
+++ b/roles/openshift_health_checker/openshift_checks/ovs_version.py
@@ -43,7 +43,7 @@ class OvsVersion(NotContainerizedMixin, OpenShiftCheck):
                 },
             ],
         }
-        return self.execute_module("rpm_version", args, task_vars)
+        return self.execute_module("rpm_version", args, task_vars=task_vars)
 
     def get_required_ovs_version(self, task_vars):
         """Return the correct Open vSwitch version for the current OpenShift version"""
diff --git a/roles/openshift_health_checker/openshift_checks/package_availability.py b/roles/openshift_health_checker/openshift_checks/package_availability.py
index a7eb720fd..e87567fe6 100644
--- a/roles/openshift_health_checker/openshift_checks/package_availability.py
+++ b/roles/openshift_health_checker/openshift_checks/package_availability.py
@@ -25,7 +25,7 @@ class PackageAvailability(NotContainerizedMixin, OpenShiftCheck):
             packages.update(self.node_packages(rpm_prefix))
 
         args = {"packages": sorted(set(packages))}
-        return self.execute_module("check_yum_update", args, tmp, task_vars)
+        return self.execute_module("check_yum_update", args, tmp=tmp, task_vars=task_vars)
 
     @staticmethod
     def master_packages(rpm_prefix):
@@ -36,7 +36,6 @@ class PackageAvailability(NotContainerizedMixin, OpenShiftCheck):
             "bash-completion",
             "cockpit-bridge",
             "cockpit-docker",
-            "cockpit-kubernetes",
             "cockpit-shell",
             "cockpit-ws",
             "etcd",
diff --git a/roles/openshift_health_checker/openshift_checks/package_update.py b/roles/openshift_health_checker/openshift_checks/package_update.py
index fd0c0a755..f432380c6 100644
--- a/roles/openshift_health_checker/openshift_checks/package_update.py
+++ b/roles/openshift_health_checker/openshift_checks/package_update.py
@@ -11,4 +11,4 @@ class PackageUpdate(NotContainerizedMixin, OpenShiftCheck):
 
     def run(self, tmp, task_vars):
         args = {"packages": []}
-        return self.execute_module("check_yum_update", args, tmp, task_vars)
+        return self.execute_module("check_yum_update", args, tmp=tmp, task_vars=task_vars)
diff --git a/roles/openshift_health_checker/openshift_checks/package_version.py b/roles/openshift_health_checker/openshift_checks/package_version.py
index 2e737818b..6a76bb93d 100644
--- a/roles/openshift_health_checker/openshift_checks/package_version.py
+++ b/roles/openshift_health_checker/openshift_checks/package_version.py
@@ -71,7 +71,7 @@ class PackageVersion(NotContainerizedMixin, OpenShiftCheck):
             ],
         }
 
-        return self.execute_module("aos_version", args, tmp, task_vars)
+        return self.execute_module("aos_version", args, tmp=tmp, task_vars=task_vars)
 
     def get_required_ovs_version(self, task_vars):
         """Return the correct Open vSwitch version for the current OpenShift version.
diff --git a/roles/openshift_health_checker/test/docker_image_availability_test.py b/roles/openshift_health_checker/test/docker_image_availability_test.py
index 0379cafb5..0a7c0f8d3 100644
--- a/roles/openshift_health_checker/test/docker_image_availability_test.py
+++ b/roles/openshift_health_checker/test/docker_image_availability_test.py
@@ -3,19 +3,25 @@ import pytest
 from openshift_checks.docker_image_availability import DockerImageAvailability
 
 
-@pytest.mark.parametrize('deployment_type,is_active', [
-    ("origin", True),
-    ("openshift-enterprise", True),
-    ("enterprise", False),
-    ("online", False),
-    ("invalid", False),
-    ("", False),
+@pytest.mark.parametrize('deployment_type, is_containerized, group_names, expect_active', [
+    ("origin", True, [], True),
+    ("openshift-enterprise", True, [], True),
+    ("enterprise", True, [], False),
+    ("online", True, [], False),
+    ("invalid", True, [], False),
+    ("", True, [], False),
+    ("origin", False, [], False),
+    ("openshift-enterprise", False, [], False),
+    ("origin", False, ["nodes", "masters"], True),
+    ("openshift-enterprise", False, ["etcd"], False),
 ])
-def test_is_active(deployment_type, is_active):
+def test_is_active(deployment_type, is_containerized, group_names, expect_active):
     task_vars = dict(
+        openshift=dict(common=dict(is_containerized=is_containerized)),
         openshift_deployment_type=deployment_type,
+        group_names=group_names,
     )
-    assert DockerImageAvailability.is_active(task_vars=task_vars) == is_active
+    assert DockerImageAvailability.is_active(task_vars=task_vars) == expect_active
 
 
 @pytest.mark.parametrize("is_containerized,is_atomic", [
@@ -25,15 +31,15 @@ def test_is_active(deployment_type, is_active):
     (False, True),
 ])
 def test_all_images_available_locally(is_containerized, is_atomic):
-    def execute_module(module_name, args, task_vars):
+    def execute_module(module_name, module_args, task_vars):
         if module_name == "yum":
             return {"changed": True}
 
         assert module_name == "docker_image_facts"
-        assert 'name' in args
-        assert args['name']
+        assert 'name' in module_args
+        assert module_args['name']
         return {
-            'images': [args['name']],
+            'images': [module_args['name']],
         }
 
     result = DockerImageAvailability(execute_module=execute_module).run(tmp=None, task_vars=dict(
@@ -46,8 +52,8 @@ def test_all_images_available_locally(is_containerized, is_atomic):
             docker=dict(additional_registries=["docker.io"]),
         ),
         openshift_deployment_type='origin',
-        openshift_release='v3.4',
         openshift_image_tag='3.4',
+        group_names=['nodes', 'masters'],
     ))
 
     assert not result.get('failed', False)
@@ -58,7 +64,7 @@ def test_all_images_available_locally(is_containerized, is_atomic):
     True,
 ])
 def test_all_images_available_remotely(available_locally):
-    def execute_module(module_name, args, task_vars):
+    def execute_module(module_name, module_args, task_vars):
         if module_name == 'docker_image_facts':
             return {'images': [], 'failed': available_locally}
         return {'changed': False}
@@ -73,8 +79,8 @@ def test_all_images_available_remotely(available_locally):
             docker=dict(additional_registries=["docker.io", "registry.access.redhat.com"]),
         ),
         openshift_deployment_type='origin',
-        openshift_release='3.4',
         openshift_image_tag='v3.4',
+        group_names=['nodes', 'masters'],
     ))
 
     assert not result.get('failed', False)
@@ -102,8 +108,8 @@ def test_all_images_unavailable():
             docker=dict(additional_registries=["docker.io"]),
         ),
         openshift_deployment_type="openshift-enterprise",
-        openshift_release=None,
-        openshift_image_tag='latest'
+        openshift_image_tag='latest',
+        group_names=['nodes', 'masters'],
     ))
 
     assert actual['failed']
@@ -141,8 +147,8 @@ def test_skopeo_update_failure(message, extra_words):
             docker=dict(additional_registries=["unknown.io"]),
         ),
         openshift_deployment_type="openshift-enterprise",
-        openshift_release='',
         openshift_image_tag='',
+        group_names=['nodes', 'masters'],
     ))
 
     assert actual["failed"]
@@ -171,8 +177,85 @@ def test_registry_availability(deployment_type, registries):
             docker=dict(additional_registries=registries),
         ),
         openshift_deployment_type=deployment_type,
-        openshift_release='',
         openshift_image_tag='',
+        group_names=['nodes', 'masters'],
     ))
 
     assert not actual.get("failed", False)
+
+
+@pytest.mark.parametrize("deployment_type, is_containerized, groups, oreg_url, expected", [
+    (  # standard set of stuff required on nodes
+        "origin", False, ['nodes'], None,
+        set([
+            'openshift/origin-pod:vtest',
+            'openshift/origin-deployer:vtest',
+            'openshift/origin-docker-registry:vtest',
+            'openshift/origin-haproxy-router:vtest',
+            'cockpit/kubernetes',  # origin version of registry-console
+        ])
+    ),
+    (  # set a different URL for images
+        "origin", False, ['nodes'], 'foo.io/openshift/origin-${component}:${version}',
+        set([
+            'foo.io/openshift/origin-pod:vtest',
+            'foo.io/openshift/origin-deployer:vtest',
+            'foo.io/openshift/origin-docker-registry:vtest',
+            'foo.io/openshift/origin-haproxy-router:vtest',
+            'cockpit/kubernetes',  # AFAICS this is not built from the URL
+        ])
+    ),
+    (
+        "origin", True, ['nodes', 'masters', 'etcd'], None,
+        set([
+            # images running on top of openshift
+            'openshift/origin-pod:vtest',
+            'openshift/origin-deployer:vtest',
+            'openshift/origin-docker-registry:vtest',
+            'openshift/origin-haproxy-router:vtest',
+            'cockpit/kubernetes',
+            # containerized component images
+            'openshift/origin:vtest',
+            'openshift/node:vtest',
+            'openshift/openvswitch:vtest',
+            'registry.access.redhat.com/rhel7/etcd',
+        ])
+    ),
+    (  # enterprise images
+        "openshift-enterprise", True, ['nodes'], 'foo.io/openshift3/ose-${component}:f13ac45',
+        set([
+            'foo.io/openshift3/ose-pod:f13ac45',
+            'foo.io/openshift3/ose-deployer:f13ac45',
+            'foo.io/openshift3/ose-docker-registry:f13ac45',
+            'foo.io/openshift3/ose-haproxy-router:f13ac45',
+            # registry-console is not constructed/versioned the same as the others.
+            'registry.access.redhat.com/openshift3/registry-console',
+            # containerized images aren't built from oreg_url
+            'openshift3/node:vtest',
+            'openshift3/openvswitch:vtest',
+        ])
+    ),
+    (
+        "openshift-enterprise", True, ['etcd', 'lb'], 'foo.io/openshift3/ose-${component}:f13ac45',
+        set([
+            'registry.access.redhat.com/rhel7/etcd',
+            # lb does not yet come in a containerized version
+        ])
+    ),
+
+])
+def test_required_images(deployment_type, is_containerized, groups, oreg_url, expected):
+    task_vars = dict(
+        openshift=dict(
+            common=dict(
+                is_containerized=is_containerized,
+                is_atomic=False,
+            ),
+        ),
+        openshift_deployment_type=deployment_type,
+        group_names=groups,
+        oreg_url=oreg_url,
+        openshift_image_tag='vtest',
+    )
+
+    assert expected == DockerImageAvailability("DUMMY").required_images(task_vars)
diff --git a/roles/openshift_health_checker/test/docker_storage_test.py b/roles/openshift_health_checker/test/docker_storage_test.py
new file mode 100644
index 000000000..876614b1d
--- /dev/null
+++ b/roles/openshift_health_checker/test/docker_storage_test.py
@@ -0,0 +1,224 @@
+import pytest
+
+from openshift_checks import OpenShiftCheckException
+from openshift_checks.docker_storage import DockerStorage
+
+
+def dummy_check(execute_module=None):
+    def dummy_exec(self, status, task_vars):
+        raise Exception("dummy executor called")
+    return DockerStorage(execute_module=execute_module or dummy_exec)
+
+
+@pytest.mark.parametrize('is_containerized, group_names, is_active', [
+    (False, ["masters", "etcd"], False),
+    (False, ["masters", "nodes"], True),
+    (True, ["etcd"], True),
+])
+def test_is_active(is_containerized, group_names, is_active):
+    task_vars = dict(
+        openshift=dict(common=dict(is_containerized=is_containerized)),
+        group_names=group_names,
+    )
+    assert DockerStorage.is_active(task_vars=task_vars) == is_active
+
+
+non_atomic_task_vars = {"openshift": {"common": {"is_atomic": False}}}
+
+
+@pytest.mark.parametrize('docker_info, failed, expect_msg', [
+    (
+        dict(failed=True, msg="Error connecting: Error while fetching server API version"),
+        True,
+        ["Is docker running on this host?"],
+    ),
+    (
+        dict(msg="I have no info"),
+        True,
+        ["missing info"],
+    ),
+    (
+        dict(info={
+            "Driver": "devicemapper",
+            "DriverStatus": [("Pool Name", "docker-docker--pool")],
+        }),
+        False,
+        [],
+    ),
+    (
+        dict(info={
+            "Driver": "devicemapper",
+            "DriverStatus": [("Data loop file", "true")],
+        }),
+        True,
+        ["loopback devices with the Docker devicemapper storage driver"],
+    ),
+    (
+        dict(info={
+            "Driver": "overlay2",
+            "DriverStatus": []
+        }),
+        False,
+        [],
+    ),
+    (
+        dict(info={
+            "Driver": "overlay",
+        }),
+        True,
+        ["unsupported Docker storage driver"],
+    ),
+    (
+        dict(info={
+            "Driver": "unsupported",
+        }),
+        True,
+        ["unsupported Docker storage driver"],
+    ),
+])
+def test_check_storage_driver(docker_info, failed, expect_msg):
+    def execute_module(module_name, module_args, tmp=None, task_vars=None):
+        if module_name == "yum":
+            return {}
+        if module_name != "docker_info":
+            raise ValueError("not expecting module " + module_name)
+        return docker_info
+
+    check = dummy_check(execute_module=execute_module)
+    check._check_dm_usage = lambda status, task_vars: dict()  # stub out for this test
+    result = check.run(tmp=None, task_vars=non_atomic_task_vars)
+
+    if failed:
+        assert result["failed"]
+    else:
+        assert not result.get("failed", False)
+
+    for word in expect_msg:
+        assert word in result["msg"]
+
+
+enough_space = {
+    "Pool Name": "docker--vg-docker--pool",
+    "Data Space Used": "19.92 MB",
+    "Data Space Total": "8.535 GB",
+    "Metadata Space Used": "40.96 kB",
+    "Metadata Space Total": "25.17 MB",
+}
+
+not_enough_space = {
+    "Pool Name": "docker--vg-docker--pool",
+    "Data Space Used": "10 GB",
+    "Data Space Total": "10 GB",
+    "Metadata Space Used": "42 kB",
+    "Metadata Space Total": "43 kB",
+}
+
+
+@pytest.mark.parametrize('task_vars, driver_status, vg_free, success, expect_msg', [
+    (
+        {"max_thinpool_data_usage_percent": "not a float"},
+        enough_space,
+        "12g",
+        False,
+        ["is not a percentage"],
+    ),
+    (
+        {},
+        {},  # empty values from driver status
+        "bogus",  # also does not parse as bytes
+        False,
+        ["Could not interpret", "as bytes"],
+    ),
+    (
+        {},
+        enough_space,
+        "12.00g",
+        True,
+        [],
+    ),
+    (
+        {},
+        not_enough_space,
+        "0.00",
+        False,
+        ["data usage", "metadata usage", "higher than threshold"],
+    ),
+])
+def test_dm_usage(task_vars, driver_status, vg_free, success, expect_msg):
+    check = dummy_check()
+    check._get_vg_free = lambda pool, task_vars: vg_free
+    result = check._check_dm_usage(driver_status, task_vars)
+    result_success = not result.get("failed")
+
+    assert result_success is success
+    for msg in expect_msg:
+        assert msg in result["msg"]
+
+
+@pytest.mark.parametrize('pool, command_returns, raises, returns', [
+    (
+        "foo-bar",
+        {  # vgs missing
+            "msg": "[Errno 2] No such file or directory",
+            "failed": True,
+            "cmd": "/sbin/vgs",
+            "rc": 2,
+        },
+        "Failed to run /sbin/vgs",
+        None,
+    ),
+    (
+        "foo",  # no hyphen in name - should not happen
+        {},
+        "name does not have the expected format",
+        None,
+    ),
+    (
+        "foo-bar",
+        dict(stdout="  4.00g\n"),
+        None,
+        "4.00g",
+    ),
+    (
+        "foo-bar",
+        dict(stdout="\n"),  # no matching VG
+        "vgs did not find this VG",
+        None,
+    )
+])
+def test_vg_free(pool, command_returns, raises, returns):
+    def execute_module(module_name, module_args, tmp=None, task_vars=None):
+        if module_name != "command":
+            raise ValueError("not expecting module " + module_name)
+        return command_returns
+
+    check = dummy_check(execute_module=execute_module)
+    if raises:
+        with pytest.raises(OpenShiftCheckException) as err:
+            check._get_vg_free(pool, {})
+        assert raises in str(err.value)
+    else:
+        ret = check._get_vg_free(pool, {})
+        assert ret == returns
+
+
+@pytest.mark.parametrize('string, expect_bytes', [
+    ("12", 12.0),
+    ("12 k", 12.0 * 1024),
+    ("42.42 MB", 42.42 * 1024**2),
+    ("12g", 12.0 * 1024**3),
+])
+def test_convert_to_bytes(string, expect_bytes):
+    got = DockerStorage._convert_to_bytes(string)
+    assert got == expect_bytes
+
+
+@pytest.mark.parametrize('string', [
+    "bork",
+    "42 Qs",
+])
+def test_convert_to_bytes_error(string):
+    with pytest.raises(ValueError) as err:
+        DockerStorage._convert_to_bytes(string)
+    assert "Cannot convert" in str(err.value)
+    assert string in str(err.value)