From 5800ca35fd005957cb325165ac7d109d3892ea28 Mon Sep 17 00:00:00 2001 From: Kenny Woodson Date: Mon, 26 Oct 2015 16:49:43 -0400 Subject: Adding zabbix type and fixing zabbix agent vars --- roles/os_zabbix/vars/template_app_zabbix_agent.yml | 4 +- .../os_zabbix/vars/template_app_zabbix_server.yml | 60 +++++++++++----------- 2 files changed, 32 insertions(+), 32 deletions(-) (limited to 'roles/os_zabbix') diff --git a/roles/os_zabbix/vars/template_app_zabbix_agent.yml b/roles/os_zabbix/vars/template_app_zabbix_agent.yml index 6349b6384..d636d4822 100644 --- a/roles/os_zabbix/vars/template_app_zabbix_agent.yml +++ b/roles/os_zabbix/vars/template_app_zabbix_agent.yml @@ -6,14 +6,14 @@ g_template_app_zabbix_agent: applications: - Zabbix agent value_type: character - zabbix_type: 0 + zabbix_type: agent - key: agent.ping applications: - Zabbix agent description: The agent always returns 1 for this item. It could be used in combination with nodata() for availability check. value_type: int - zabbix_type: 0 + zabbix_type: agent ztriggers: - name: '[Reboot] Zabbix agent on {HOST.NAME} is unreachable for 15 minutes' diff --git a/roles/os_zabbix/vars/template_app_zabbix_server.yml b/roles/os_zabbix/vars/template_app_zabbix_server.yml index aeec16254..43517113b 100644 --- a/roles/os_zabbix/vars/template_app_zabbix_server.yml +++ b/roles/os_zabbix/vars/template_app_zabbix_server.yml @@ -8,7 +8,7 @@ g_template_app_zabbix_server: description: A simple count of the number of partition creates output by the housekeeper script. units: '' value_type: int - zabbix_type: 5 + zabbix_type: internal - key: housekeeper_drops applications: @@ -16,7 +16,7 @@ g_template_app_zabbix_server: description: A simple count of the number of partition drops output by the housekeeper script. units: '' value_type: int - zabbix_type: 5 + zabbix_type: internal - key: housekeeper_errors applications: @@ -24,7 +24,7 @@ g_template_app_zabbix_server: description: A simple count of the number of errors output by the housekeeper script. units: '' value_type: int - zabbix_type: 5 + zabbix_type: internal - key: housekeeper_total applications: @@ -33,7 +33,7 @@ g_template_app_zabbix_server: script. units: '' value_type: int - zabbix_type: 5 + zabbix_type: internal - key: zabbix[process,alerter,avg,busy] applications: @@ -41,7 +41,7 @@ g_template_app_zabbix_server: description: '' units: '%' value_type: float - zabbix_type: 5 + zabbix_type: internal - key: zabbix[process,configuration syncer,avg,busy] applications: @@ -49,7 +49,7 @@ g_template_app_zabbix_server: description: '' units: '%' value_type: float - zabbix_type: 5 + zabbix_type: internal - key: zabbix[process,db watchdog,avg,busy] applications: @@ -57,7 +57,7 @@ g_template_app_zabbix_server: description: '' units: '%' value_type: float - zabbix_type: 5 + zabbix_type: internal - key: zabbix[process,discoverer,avg,busy] applications: @@ -65,7 +65,7 @@ g_template_app_zabbix_server: description: '' units: '%' value_type: float - zabbix_type: 5 + zabbix_type: internal - key: zabbix[process,escalator,avg,busy] applications: @@ -73,7 +73,7 @@ g_template_app_zabbix_server: description: '' units: '%' value_type: float - zabbix_type: 5 + zabbix_type: internal - key: zabbix[process,history syncer,avg,busy] applications: @@ -81,7 +81,7 @@ g_template_app_zabbix_server: description: '' units: '%' value_type: float - zabbix_type: 5 + zabbix_type: internal - key: zabbix[process,housekeeper,avg,busy] applications: @@ -89,7 +89,7 @@ g_template_app_zabbix_server: description: '' units: '%' value_type: float - zabbix_type: 5 + zabbix_type: internal - key: zabbix[process,http poller,avg,busy] applications: @@ -97,7 +97,7 @@ g_template_app_zabbix_server: description: '' units: '%' value_type: float - zabbix_type: 5 + zabbix_type: internal - key: zabbix[process,icmp pinger,avg,busy] applications: @@ -105,7 +105,7 @@ g_template_app_zabbix_server: description: '' units: '%' value_type: float - zabbix_type: 5 + zabbix_type: internal - key: zabbix[process,ipmi poller,avg,busy] applications: @@ -113,7 +113,7 @@ g_template_app_zabbix_server: description: '' units: '%' value_type: float - zabbix_type: 5 + zabbix_type: internal - key: zabbix[process,java poller,avg,busy] applications: @@ -121,7 +121,7 @@ g_template_app_zabbix_server: description: '' units: '%' value_type: float - zabbix_type: 5 + zabbix_type: internal - key: zabbix[process,node watcher,avg,busy] applications: @@ -129,7 +129,7 @@ g_template_app_zabbix_server: description: '' units: '%' value_type: float - zabbix_type: 5 + zabbix_type: internal - key: zabbix[process,poller,avg,busy] applications: @@ -137,7 +137,7 @@ g_template_app_zabbix_server: description: '' units: '%' value_type: float - zabbix_type: 5 + zabbix_type: internal - key: zabbix[process,proxy poller,avg,busy] applications: @@ -145,7 +145,7 @@ g_template_app_zabbix_server: description: '' units: '%' value_type: float - zabbix_type: 5 + zabbix_type: internal - key: zabbix[process,self-monitoring,avg,busy] applications: @@ -153,7 +153,7 @@ g_template_app_zabbix_server: description: '' units: '%' value_type: float - zabbix_type: 5 + zabbix_type: internal - key: zabbix[process,snmp trapper,avg,busy] applications: @@ -161,7 +161,7 @@ g_template_app_zabbix_server: description: '' units: '%' value_type: float - zabbix_type: 5 + zabbix_type: internal - key: zabbix[process,timer,avg,busy] applications: @@ -169,7 +169,7 @@ g_template_app_zabbix_server: description: '' units: '%' value_type: float - zabbix_type: 5 + zabbix_type: internal - key: zabbix[process,trapper,avg,busy] applications: @@ -177,7 +177,7 @@ g_template_app_zabbix_server: description: '' units: '%' value_type: float - zabbix_type: 5 + zabbix_type: internal - key: zabbix[process,unreachable poller,avg,busy] applications: @@ -185,7 +185,7 @@ g_template_app_zabbix_server: description: '' units: '%' value_type: float - zabbix_type: 5 + zabbix_type: internal - key: zabbix[queue,10m] applications: @@ -193,7 +193,7 @@ g_template_app_zabbix_server: description: '' units: '' value_type: int - zabbix_type: 5 + zabbix_type: internal interval: 600 - key: zabbix[queue] @@ -202,7 +202,7 @@ g_template_app_zabbix_server: description: '' units: '' value_type: int - zabbix_type: 5 + zabbix_type: internal interval: 600 - key: zabbix[rcache,buffer,pfree] @@ -211,7 +211,7 @@ g_template_app_zabbix_server: description: '' units: '' value_type: float - zabbix_type: 5 + zabbix_type: internal - key: zabbix[wcache,history,pfree] applications: @@ -219,7 +219,7 @@ g_template_app_zabbix_server: description: '' units: '' value_type: float - zabbix_type: 5 + zabbix_type: internal - key: zabbix[wcache,text,pfree] applications: @@ -227,7 +227,7 @@ g_template_app_zabbix_server: description: '' units: '' value_type: float - zabbix_type: 5 + zabbix_type: internal - key: zabbix[wcache,trend,pfree] applications: @@ -235,7 +235,7 @@ g_template_app_zabbix_server: description: '' units: '' value_type: float - zabbix_type: 5 + zabbix_type: internal - key: zabbix[wcache,values] applications: @@ -243,7 +243,7 @@ g_template_app_zabbix_server: description: '' units: '' value_type: float - zabbix_type: 5 + zabbix_type: internal delta: 1 # speed per second ztriggers: -- cgit v1.2.3 From 70f1050d12f85932f3c1c8d22993d71b49b2c9d9 Mon Sep 17 00:00:00 2001 From: Joel Diaz Date: Wed, 28 Oct 2015 12:44:21 -0400 Subject: Start tracking docker info execution time --- roles/os_zabbix/vars/template_docker.yml | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'roles/os_zabbix') diff --git a/roles/os_zabbix/vars/template_docker.yml b/roles/os_zabbix/vars/template_docker.yml index 395e054de..bfabf50c5 100644 --- a/roles/os_zabbix/vars/template_docker.yml +++ b/roles/os_zabbix/vars/template_docker.yml @@ -7,6 +7,11 @@ g_template_docker: - Docker Daemon value_type: int + - key: docker.info_elapsed_ms + applications: + - Docker Daemon + value_type: int + - key: docker.storage.is_loopback applications: - Docker Storage -- cgit v1.2.3 From 4eb8cd34bef062b3b9e0f86d221f85d9c69bfcd3 Mon Sep 17 00:00:00 2001 From: Matt Woodson Date: Mon, 2 Nov 2015 11:59:21 -0500 Subject: changed the cpu alert to only alert if cpu idle more than 5x. Change alert to warning --- roles/os_zabbix/vars/template_os_linux.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'roles/os_zabbix') diff --git a/roles/os_zabbix/vars/template_os_linux.yml b/roles/os_zabbix/vars/template_os_linux.yml index 69432273f..aeeec4b8d 100644 --- a/roles/os_zabbix/vars/template_os_linux.yml +++ b/roles/os_zabbix/vars/template_os_linux.yml @@ -246,15 +246,15 @@ g_template_os_linux: # CPU Utilization # - name: 'CPU idle less than 5% on {HOST.NAME}' - expression: '{Template OS Linux:kernel.all.cpu.idle.last()}<5 and {Template OS Linux:kernel.all.cpu.idle.last(#2)}<5' + expression: '{Template OS Linux:kernel.all.cpu.idle.max(#5)}<5' url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_cpu_idle.asciidoc' - priority: high + priority: average description: 'CPU is less than 5% idle' - name: 'CPU idle less than 10% on {HOST.NAME}' - expression: '{Template OS Linux:kernel.all.cpu.idle.last()}<10 and {Template OS Linux:kernel.all.cpu.idle.last(#2)}<10' + expression: '{Template OS Linux:kernel.all.cpu.idle.max(#5)}<10' url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_cpu_idle.asciidoc' - priority: warn + priority: average description: 'CPU is less than 10% idle' dependencies: - 'CPU idle less than 5% on {HOST.NAME}' -- cgit v1.2.3 From 44ea44a3738f961222d5656e965923ef847b1bec Mon Sep 17 00:00:00 2001 From: Joel Diaz Date: Mon, 2 Nov 2015 11:06:39 -0500 Subject: get zabbix ready to start tracking status of pcp --- roles/os_zabbix/tasks/main.yml | 9 +++++++++ roles/os_zabbix/vars/template_performance_copilot.yml | 14 ++++++++++++++ 2 files changed, 23 insertions(+) create mode 100644 roles/os_zabbix/vars/template_performance_copilot.yml (limited to 'roles/os_zabbix') diff --git a/roles/os_zabbix/tasks/main.yml b/roles/os_zabbix/tasks/main.yml index a503b24d7..82bf78b57 100644 --- a/roles/os_zabbix/tasks/main.yml +++ b/roles/os_zabbix/tasks/main.yml @@ -15,6 +15,7 @@ - include_vars: template_ops_tools.yml - include_vars: template_app_zabbix_server.yml - include_vars: template_app_zabbix_agent.yml +- include_vars: template_performance_copilot.yml - name: Include Template Heartbeat include: ../../lib_zabbix/tasks/create_template.yml @@ -79,3 +80,11 @@ server: "{{ ozb_server }}" user: "{{ ozb_user }}" password: "{{ ozb_password }}" + +- name: Include Template Performance Copilot + include: ../../lib_zabbix/tasks/create_template.yml + vars: + template: "{{ g_template_performance_copilot }}" + server: "{{ ozb_server }}" + user: "{{ ozb_user }}" + password: "{{ ozb_password }}" diff --git a/roles/os_zabbix/vars/template_performance_copilot.yml b/roles/os_zabbix/vars/template_performance_copilot.yml new file mode 100644 index 000000000..b62fa0228 --- /dev/null +++ b/roles/os_zabbix/vars/template_performance_copilot.yml @@ -0,0 +1,14 @@ +--- +g_template_performance_copilot: + name: Template Performance Copilot + zitems: + - key: pcp.ping + applications: + - Performance Copilot + value_type: int + + ztriggers: + - name: 'pcp.ping failed on {HOST.NAME}' + expression: '{Template Performance Copilot:pcp.ping.max(#3)}<1' + url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_pcp_ping.asciidoc' + priority: average -- cgit v1.2.3