blob: 3173c79b2a1775915a561f1e1340e51293a07e0f (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
|
---
g_template_os_linux:
name: Template OS Linux
zitems:
- key: kernel.uname.sysname
applications:
- Kernel
value_type: string
- key: kernel.all.cpu.wait.total
applications:
- Kernel
value_type: int
- key: kernel.all.cpu.irq.hard
applications:
- Kernel
value_type: int
- key: kernel.all.cpu.idle
applications:
- Kernel
value_type: int
- key: kernel.uname.distro
applications:
- Kernel
value_type: string
- key: kernel.uname.nodename
applications:
- Kernel
value_type: string
- key: kernel.all.cpu.irq.soft
applications:
- Kernel
value_type: int
- key: kernel.all.load.15_minute
applications:
- Kernel
value_type: float
- key: kernel.all.cpu.sys
applications:
- Kernel
value_type: int
- key: kernel.all.load.5_minute
applications:
- Kernel
value_type: float
- key: kernel.all.cpu.nice
applications:
- Kernel
value_type: int
- key: kernel.all.load.1_minute
applications:
- Kernel
value_type: float
- key: kernel.uname.version
applications:
- Kernel
value_type: string
- key: kernel.all.uptime
applications:
- Kernel
value_type: int
- key: kernel.all.cpu.user
applications:
- Kernel
value_type: int
- key: kernel.uname.machine
applications:
- Kernel
value_type: string
- key: hinv.ncpu
applications:
- Kernel
value_type: int
- key: kernel.all.cpu.steal
applications:
- Kernel
value_type: int
- key: kernel.all.pswitch
applications:
- Kernel
value_type: int
- key: kernel.uname.release
applications:
- Kernel
value_type: string
- key: proc.nprocs
applications:
- Kernel
value_type: int
# Memory Items
- key: mem.freemem
applications:
- Memory
value_type: int
description: "PCP: free system memory metric from /proc/meminfo"
multiplier: 1024
units: B
- key: mem.util.bufmem
applications:
- Memory
value_type: int
description: "PCP: Memory allocated for buffer_heads.; I/O buffers metric from /proc/meminfo"
multiplier: 1024
units: B
- key: swap.used
applications:
- Memory
value_type: int
description: "PCP: swap used metric from /proc/meminfo"
multiplier: 1024
units: B
- key: swap.length
applications:
- Memory
value_type: int
description: "PCP: total swap available metric from /proc/meminfo"
multiplier: 1024
units: B
- key: mem.physmem
applications:
- Memory
value_type: int
description: "PCP: The value of this metric corresponds to the \"MemTotal\" field reported by /proc/meminfo. Note that this does not necessarily correspond to actual installed physical memory - there may be areas of the physical address space mapped as ROM in various peripheral devices and the bios may be mirroring certain ROMs in RAM."
multiplier: 1024
units: B
- key: swap.free
applications:
- Memory
value_type: int
description: "PCP: swap free metric from /proc/meminfo"
multiplier: 1024
units: B
- key: mem.util.available
applications:
- Memory
value_type: int
description: "PCP: The amount of memory that is available for a new workload, without pushing the system into swap. Estimated from MemFree, Active(file), Inactive(file), and SReclaimable, as well as the \"low\" watermarks from /proc/zoneinfo.; available memory from /proc/meminfo"
multiplier: 1024
units: B
- key: mem.util.used
applications:
- Memory
value_type: int
description: "PCP: Used memory is the difference between mem.physmem and mem.freemem; used memory metric from /proc/meminfo"
multiplier: 1024
units: B
- key: mem.util.cached
applications:
- Memory
value_type: int
description: "PCP: Memory used by the page cache, including buffered file data. This is in-memory cache for files read from the disk (the pagecache) but doesn't include SwapCached.; page cache metric from /proc/meminfo"
multiplier: 1024
units: B
# Disk items
- key: filesys.full.xvda2
applications:
- Disk
value_type: float
- key: filesys.full.xvda3
applications:
- Disk
value_type: float
ztriggers:
- name: 'Filesystem: / has less than 10% free on {HOST.NAME}'
expression: '{Template OS Linux:filesys.full.xvda2.last()}>90'
url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_filesys_full.asciidoc'
priority: warn
- name: 'Filesystem: / has less than 5% free on {HOST.NAME}'
expression: '{Template OS Linux:filesys.full.xvda2.last()}>95'
url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_filesys_full.asciidoc'
priority: high
- name: 'Filesystem: /var has less than 10% free on {HOST.NAME}'
expression: '{Template OS Linux:filesys.full.xvda3.last()}>90'
url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_filesys_full.asciidoc'
priority: warn
- name: 'Filesystem: /var has less than 5% free on {HOST.NAME}'
expression: '{Template OS Linux:filesys.full.xvda3.last()}>95'
url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_filesys_full.asciidoc'
priority: high
- name: 'Too many TOTAL processes on {HOST.NAME}'
expression: '{Template OS Linux:proc.nprocs.last()}>5000'
url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_proc.asciidoc'
priority: warn
- name: 'Lack of available memory on {HOST.NAME}'
expression: '{Template OS Linux:mem.freemem.last()}<30720000'
url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_memory.asciidoc'
priority: warn
description: 'Alert on less than 30MegaBytes. This is 30 Million Bytes. 30000 KB x 1024'
|