diff options
| -rwxr-xr-x | bin/ohi | 68 | ||||
| -rw-r--r-- | bin/openshift_ansible/awsutil.py | 38 | ||||
| -rwxr-xr-x | inventory/multi_inventory.py | 18 | ||||
| -rw-r--r-- | roles/openshift_master/tasks/main.yml | 6 | ||||
| -rw-r--r-- | roles/openshift_node/tasks/main.yml | 6 | ||||
| -rw-r--r-- | roles/os_zabbix/vars/template_docker.yml | 10 | ||||
| -rw-r--r-- | roles/os_zabbix/vars/template_openshift_master.yml | 8 | ||||
| -rw-r--r-- | roles/os_zabbix/vars/template_zagg_server.yml | 16 | ||||
| -rw-r--r-- | roles/oso_host_monitoring/templates/oso-rhel7-zagg-client.service.j2 | 2 | 
9 files changed, 113 insertions, 59 deletions
@@ -1,14 +1,16 @@  #!/usr/bin/env python +''' +Ohi = Openshift Host Inventory + +This script provides an easy way to look at your host inventory. + +This depends on multi_inventory being setup correctly. +'''  # vim: expandtab:tabstop=4:shiftwidth=4  import argparse -import traceback  import sys  import os -import re -import tempfile -import time -import subprocess  import ConfigParser  from openshift_ansible import awsutil @@ -20,6 +22,9 @@ CONFIG_HOST_TYPE_ALIAS_SECTION = 'host_type_aliases'  class Ohi(object): +    ''' +        Class for managing openshift host inventory +    '''      def __init__(self):          self.host_type_aliases = {}          self.file_path = os.path.join(os.path.dirname(os.path.realpath(__file__))) @@ -35,6 +40,10 @@ class Ohi(object):          self.aws = awsutil.AwsUtil(self.host_type_aliases)      def run(self): +        ''' +            Call into awsutil and retrieve the desired hosts and environments +        ''' +          if self.args.list_host_types:              self.aws.print_host_types()              return 0 @@ -43,18 +52,24 @@ class Ohi(object):          if self.args.host_type is not None and \             self.args.env is not None:              # Both env and host-type specified -            hosts = self.aws.get_host_list(host_type=self.args.host_type, \ -                                           envs=self.args.env) +            hosts = self.aws.get_host_list(host_type=self.args.host_type, +                                           envs=self.args.env, +                                           version=self.args.openshift_version, +                                           cached=self.args.cache_only)          if self.args.host_type is None and \             self.args.env is not None:              # Only env specified -            hosts = self.aws.get_host_list(envs=self.args.env) +            hosts = self.aws.get_host_list(envs=self.args.env, +                                           version=self.args.openshift_version, +                                           cached=self.args.cache_only)          if self.args.host_type is not None and \             self.args.env is None:              # Only host-type specified -            hosts = self.aws.get_host_list(host_type=self.args.host_type) +            hosts = self.aws.get_host_list(host_type=self.args.host_type, +                                           version=self.args.openshift_version, +                                           cached=self.args.cache_only)          if hosts is None:              # We weren't able to determine what they wanted to do @@ -69,6 +84,9 @@ class Ohi(object):          return 0      def parse_config_file(self): +        ''' +            Parse the config file for ohi +        '''          if os.path.isfile(self.config_path):              config = ConfigParser.ConfigParser()              config.read(self.config_path) @@ -85,23 +103,27 @@ class Ohi(object):          parser = argparse.ArgumentParser(description='OpenShift Host Inventory') -        parser.add_argument('--list-host-types', default=False, action='store_true', -                       help='List all of the host types') +        parser.add_argument('--list-host-types', default=False, action='store_true', help='List all of the host types') -        parser.add_argument('-e', '--env', action="store", -                       help="Which environment to use") +        parser.add_argument('-e', '--env', action="store", help="Which environment to use") -        parser.add_argument('-t', '--host-type', action="store", -                       help="Which host type to use") +        parser.add_argument('-t', '--host-type', action="store", help="Which host type to use") -        parser.add_argument('-l', '--user', action='store', default=None, -                               help='username') +        parser.add_argument('-l', '--user', action='store', default=None, help='username') +        parser.add_argument('-c', '--cache-only', action='store_true', default=False, +                            help='Retrieve the host inventory by cache only. Default is false.') -        self.args = parser.parse_args() +        parser.add_argument('-o', '--openshift-version', action='store', default='2', +                            help='Specify the openshift version. Default is 2') -if __name__ == '__main__': +        self.args = parser.parse_args() + +def main(): +    ''' +    Ohi will do its work here +    '''      if len(sys.argv) == 1:          print "\nError: No options given. Use --help to see the available options\n"          sys.exit(0) @@ -110,5 +132,9 @@ if __name__ == '__main__':          ohi = Ohi()          exitcode = ohi.run()          sys.exit(exitcode) -    except ArgumentError as e: -        print "\nError: %s\n" % e.message +    except ArgumentError as err: +        print "\nError: %s\n" % err.message + +if __name__ == '__main__': +    main() + diff --git a/bin/openshift_ansible/awsutil.py b/bin/openshift_ansible/awsutil.py index ba32b4dbd..1ea2f914c 100644 --- a/bin/openshift_ansible/awsutil.py +++ b/bin/openshift_ansible/awsutil.py @@ -46,14 +46,17 @@ class AwsUtil(object):                  self.alias_lookup[value] = key      @staticmethod -    def get_inventory(args=None): +    def get_inventory(args=None, cached=False):          """Calls the inventory script and returns a dictionary containing the inventory."          Keyword arguments:          args -- optional arguments to pass to the inventory script          """          minv = multi_inventory.MultiInventory(args) -        minv.run() +        if cached: +            minv.get_inventory_from_cache() +        else: +            minv.run()          return minv.result      def get_environments(self): @@ -168,11 +171,12 @@ class AwsUtil(object):          host_type = self.resolve_host_type(host_type)          return "tag_env-host-type_%s-%s" % (env, host_type) -    def get_host_list(self, host_type=None, envs=None): +    def get_host_list(self, host_type=None, envs=None, version=None, cached=False):          """Get the list of hosts from the inventory using host-type and environment          """ +        retval = set([])          envs = envs or [] -        inv = self.get_inventory() +        inv = self.get_inventory(cached=cached)          # We prefer to deal with a list of environments          if issubclass(type(envs), basestring): @@ -183,29 +187,25 @@ class AwsUtil(object):          if host_type and envs:              # Both host type and environment were specified -            retval = []              for env in envs: -                env_host_type_tag = self.gen_env_host_type_tag(host_type, env) -                if env_host_type_tag in inv.keys(): -                    retval += inv[env_host_type_tag] -            return set(retval) +                retval.update(inv.get('tag_environment_%s' % env, [])) +            retval.intersection_update(inv.get(self.gen_host_type_tag(host_type), [])) -        if envs and not host_type: +        elif envs and not host_type:              # Just environment was specified -            retval = []              for env in envs:                  env_tag = AwsUtil.gen_env_tag(env)                  if env_tag in inv.keys(): -                    retval += inv[env_tag] -            return set(retval) +                    retval.update(inv.get(env_tag, [])) -        if host_type and not envs: +        elif host_type and not envs:              # Just host-type was specified -            retval = []              host_type_tag = self.gen_host_type_tag(host_type)              if host_type_tag in inv.keys(): -                retval = inv[host_type_tag] -            return set(retval) +                retval.update(inv.get(host_type_tag, [])) + +        # If version is specified then return only hosts in that version +        if version: +            retval.intersection_update(inv.get('oo_version_%s' % version, [])) -        # We should never reach here! -        raise ArgumentError("Invalid combination of parameters") +        return retval diff --git a/inventory/multi_inventory.py b/inventory/multi_inventory.py index 232f2402d..20fc48aa9 100755 --- a/inventory/multi_inventory.py +++ b/inventory/multi_inventory.py @@ -56,15 +56,6 @@ class MultiInventory(object):          else:              self.config_file = None # expect env vars - -    def run(self): -        '''This method checks to see if the local -           cache is valid for the inventory. - -           if the cache is valid; return cache -           else the credentials are loaded from multi_inventory.yaml or from the env -           and we attempt to get the inventory from the provider specified. -        '''          # load yaml          if self.config_file and os.path.isfile(self.config_file):              self.config = self.load_yaml_config() @@ -91,6 +82,15 @@ class MultiInventory(object):          if self.config.has_key('cache_location'):              self.cache_path = self.config['cache_location'] +    def run(self): +        '''This method checks to see if the local +           cache is valid for the inventory. + +           if the cache is valid; return cache +           else the credentials are loaded from multi_inventory.yaml or from the env +           and we attempt to get the inventory from the provider specified. +        ''' +          if self.args.get('refresh_cache', None):              self.get_inventory()              self.write_to_cache() diff --git a/roles/openshift_master/tasks/main.yml b/roles/openshift_master/tasks/main.yml index 8995863ec..43647cc49 100644 --- a/roles/openshift_master/tasks/main.yml +++ b/roles/openshift_master/tasks/main.yml @@ -228,7 +228,7 @@    register: start_result  - set_fact: -    master_service_status_changed = start_result | changed +    master_service_status_changed: start_result | changed    when: not openshift_master_ha | bool  - name: Start and enable master api @@ -237,7 +237,7 @@    register: start_result  - set_fact: -    master_api_service_status_changed = start_result | changed +    master_api_service_status_changed: start_result | changed    when: openshift_master_ha | bool and openshift.master.cluster_method == 'native'  - name: Start and enable master controller @@ -246,7 +246,7 @@    register: start_result  - set_fact: -    master_controllers_service_status_changed = start_result | changed +    master_controllers_service_status_changed: start_result | changed    when: openshift_master_ha | bool and openshift.master.cluster_method == 'native'  - name: Install cluster packages diff --git a/roles/openshift_node/tasks/main.yml b/roles/openshift_node/tasks/main.yml index eef7bec9a..38bffc2e5 100644 --- a/roles/openshift_node/tasks/main.yml +++ b/roles/openshift_node/tasks/main.yml @@ -85,11 +85,11 @@      docker_additional_registries: "{{ lookup('oo_option', 'docker_additional_registries')                                        | oo_split() | union(['registry.access.redhat.com'])                                        | difference(['']) }}" -  when: openshift.common.deployment_type == 'enterprise' +  when: openshift.common.deployment_type in ['enterprise', 'openshift-enterprise', 'atomic-enterprise']  - set_fact:      docker_additional_registries: "{{ lookup('oo_option', 'docker_additional_registries')                                        | oo_split() | difference(['']) }}" -  when: openshift.common.deployment_type != 'enterprise' +  when: openshift.common.deployment_type not in ['enterprise', 'openshift-enterprise', 'atomic-enterprise']  - name: Add personal registries    lineinfile: @@ -131,4 +131,4 @@    register: start_result  - set_fact: -    node_service_status_changed = start_result | changed +    node_service_status_changed: start_result | changed diff --git a/roles/os_zabbix/vars/template_docker.yml b/roles/os_zabbix/vars/template_docker.yml index bfabf50c5..91a2c400e 100644 --- a/roles/os_zabbix/vars/template_docker.yml +++ b/roles/os_zabbix/vars/template_docker.yml @@ -12,6 +12,11 @@ g_template_docker:      - Docker Daemon      value_type: int +  - key: docker.container.dns.resolution +    applications: +    - Docker Daemon +    value_type: int +    - key: docker.storage.is_loopback      applications:      - Docker Storage @@ -62,6 +67,11 @@ g_template_docker:      url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_docker_ping.asciidoc'      priority: high +  - name: 'docker.container.dns.resolution failed on {HOST.NAME}' +    expression: '{Template Docker:docker.container.dns.resolution.max(#3)}>0' +    url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_docker_dns.asciidoc' +    priority: high +    - name: 'Docker storage is using LOOPBACK on {HOST.NAME}'      expression: '{Template Docker:docker.storage.is_loopback.last()}<>0'      url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_docker_loopback.asciidoc' diff --git a/roles/os_zabbix/vars/template_openshift_master.yml b/roles/os_zabbix/vars/template_openshift_master.yml index 514d6fd24..a0ba8d104 100644 --- a/roles/os_zabbix/vars/template_openshift_master.yml +++ b/roles/os_zabbix/vars/template_openshift_master.yml @@ -269,6 +269,14 @@ g_template_openshift_master:      - 'Openshift Master process not running on {HOST.NAME}'      priority: avg +  - name: 'Application creation has failed multiple times in the last hour on {HOST.NAME}' +    expression: '{Template Openshift Master:create_app.sum(1h)}>3' +    url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_create_app.asciidoc' +    dependencies: +    - 'Openshift Master process not running on {HOST.NAME}' +    description: The application create loop has failed 4 or more times in the last hour +    priority: avg +    - name: 'Openshift Master API health check is failing on {HOST.NAME}'      expression: '{Template Openshift Master:openshift.master.api.healthz.max(#3)}<1'      url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc' diff --git a/roles/os_zabbix/vars/template_zagg_server.yml b/roles/os_zabbix/vars/template_zagg_server.yml index 0e8e53bb7..db5665993 100644 --- a/roles/os_zabbix/vars/template_zagg_server.yml +++ b/roles/os_zabbix/vars/template_zagg_server.yml @@ -7,7 +7,12 @@ g_template_zagg_server:      - Zagg Server      value_type: int -  - key: zagg.server.processor.errors +  - key: zagg.server.metrics.errors +    applications: +    - Zagg Server +    value_type: int + +  - key: zagg.server.heartbeat.errors      applications:      - Zagg Server      value_type: int @@ -18,8 +23,13 @@ g_template_zagg_server:      value_type: int    ztriggers: -  - name: 'Error sending metrics on {HOST.NAME}' -    expression: '{Template Zagg Server:zagg.server.processor.errors.min(#3)}>0' +  - name: 'Error processing metrics on {HOST.NAME}' +    expression: '{Template Zagg Server:zagg.server.metrics.errors.min(#3)}>0' +    url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/zagg_server.asciidoc' +    priority: average + +  - name: 'Error processing heartbeats on {HOST.NAME}' +    expression: '{Template Zagg Server:zagg.server.heartbeat.errors.min(#3)}>0'      url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/zagg_server.asciidoc'      priority: average diff --git a/roles/oso_host_monitoring/templates/oso-rhel7-zagg-client.service.j2 b/roles/oso_host_monitoring/templates/oso-rhel7-zagg-client.service.j2 index 978e40b88..bcc8a5e03 100644 --- a/roles/oso_host_monitoring/templates/oso-rhel7-zagg-client.service.j2 +++ b/roles/oso_host_monitoring/templates/oso-rhel7-zagg-client.service.j2 @@ -42,7 +42,7 @@ ExecStart=/usr/bin/docker run --name {{ osohm_zagg_client }}             -v /etc/localtime:/etc/localtime                                                  \             -v /run/pcp:/run/pcp                                                              \             -v /var/run/docker.sock:/var/run/docker.sock                                      \ -           -v /var/run/openvswitch/db.sock:/var/run/openvswitch/db.sock                      \ +           -v /var/run/openvswitch:/var/run/openvswitch                      \  {% if hostvars[inventory_hostname]['ec2_tag_host-type'] == 'master' %}             -v /etc/openshift/master/admin.kubeconfig:/etc/openshift/master/admin.kubeconfig  \             -v /etc/openshift/master/master.etcd-client.crt:/etc/openshift/master/master.etcd-client.crt \  | 
