blob: 36c890da941d37df3af6b2972390d8ee009f21b9 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
|
---
g_template_os_linux:
name: Template OS Linux
zitems:
- key: kernel.uname.sysname
applications:
- Kernel
value_type: string
- key: kernel.all.cpu.wait.total
applications:
- Kernel
value_type: float
units: '%'
- key: kernel.all.cpu.irq.hard
applications:
- Kernel
value_type: float
units: '%'
- key: kernel.all.cpu.idle
applications:
- Kernel
value_type: float
units: '%'
- key: kernel.uname.distro
applications:
- Kernel
value_type: string
- key: kernel.uname.nodename
applications:
- Kernel
value_type: string
- key: kernel.all.cpu.irq.soft
applications:
- Kernel
value_type: float
units: '%'
- key: kernel.all.load.15_minute
applications:
- Kernel
value_type: float
- key: kernel.all.cpu.sys
applications:
- Kernel
value_type: float
units: '%'
- key: kernel.all.load.5_minute
applications:
- Kernel
value_type: float
- key: kernel.all.cpu.nice
applications:
- Kernel
value_type: float
units: '%'
- key: kernel.all.load.1_minute
applications:
- Kernel
value_type: float
- key: kernel.uname.version
applications:
- Kernel
value_type: string
- key: kernel.all.uptime
applications:
- Kernel
value_type: int
- key: kernel.all.cpu.user
applications:
- Kernel
value_type: float
units: '%'
- key: kernel.uname.machine
applications:
- Kernel
value_type: string
- key: hinv.ncpu
applications:
- Kernel
value_type: int
- key: kernel.all.cpu.steal
applications:
- Kernel
value_type: float
units: '%'
- key: kernel.all.pswitch
applications:
- Kernel
value_type: int
- key: kernel.uname.release
applications:
- Kernel
value_type: string
- key: proc.nprocs
applications:
- Kernel
value_type: int
# Memory Items
- key: mem.freemem
applications:
- Memory
value_type: int
description: "PCP: free system memory metric from /proc/meminfo"
multiplier: 1024
units: B
- key: mem.util.bufmem
applications:
- Memory
value_type: int
description: "PCP: Memory allocated for buffer_heads.; I/O buffers metric from /proc/meminfo"
multiplier: 1024
units: B
- key: swap.used
applications:
- Memory
value_type: int
description: "PCP: swap used metric from /proc/meminfo"
multiplier: 1024
units: B
- key: swap.length
applications:
- Memory
value_type: int
description: "PCP: total swap available metric from /proc/meminfo"
multiplier: 1024
units: B
- key: mem.physmem
applications:
- Memory
value_type: int
description: "PCP: The value of this metric corresponds to the \"MemTotal\" field reported by /proc/meminfo. Note that this does not necessarily correspond to actual installed physical memory - there may be areas of the physical address space mapped as ROM in various peripheral devices and the bios may be mirroring certain ROMs in RAM."
multiplier: 1024
units: B
- key: swap.free
applications:
- Memory
value_type: int
description: "PCP: swap free metric from /proc/meminfo"
multiplier: 1024
units: B
- key: mem.util.available
applications:
- Memory
value_type: int
description: "PCP: The amount of memory that is available for a new workload, without pushing the system into swap. Estimated from MemFree, Active(file), Inactive(file), and SReclaimable, as well as the \"low\" watermarks from /proc/zoneinfo.; available memory from /proc/meminfo"
multiplier: 1024
units: B
- key: mem.util.used
applications:
- Memory
value_type: int
description: "PCP: Used memory is the difference between mem.physmem and mem.freemem; used memory metric from /proc/meminfo"
multiplier: 1024
units: B
- key: mem.util.cached
applications:
- Memory
value_type: int
description: "PCP: Memory used by the page cache, including buffered file data. This is in-memory cache for files read from the disk (the pagecache) but doesn't include SwapCached.; page cache metric from /proc/meminfo"
multiplier: 1024
units: B
# Disk items
- key: filesys.full.xvda2
applications:
- Disk
value_type: float
- key: filesys.full.xvda3
applications:
- Disk
value_type: float
zdiscoveryrules:
- name: disc.filesys
key: disc.filesys
lifetime: 1
template_name: Template OS Linux
description: "Dynamically register the filesystems"
zitemprototypes:
- discoveryrule_key: disc.filesys
template_name: Template OS Linux
name: "disc.filesys.full.{#OSO_FILESYS}"
key: "disc.filesys.full[{#OSO_FILESYS}]"
value_type: float
description: "PCP filesys.full option. This is the percent full returned from pcp filesys.full"
applications:
- Disk
ztriggerprototypes:
- name: 'Filesystem: {#OSO_FILESYS} has less than 10% free on {HOST.NAME}'
expression: '{Template OS Linux:disc.filesys.full[{#OSO_FILESYS}].last()}>90'
url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_filesys_full.asciidoc'
priority: warn
- name: 'Filesystem: {#OSO_FILESYS} has less than 5% free on {HOST.NAME}'
expression: '{Template OS Linux:disc.filesys.full[{#OSO_FILESYS}].last()}>95'
url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_filesys_full.asciidoc'
priority: high
ztriggers:
- name: 'Filesystem: / has less than 10% free on {HOST.NAME}'
expression: '{Template OS Linux:filesys.full.xvda2.last()}>90'
url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_filesys_full.asciidoc'
priority: warn
- name: 'Filesystem: / has less than 5% free on {HOST.NAME}'
expression: '{Template OS Linux:filesys.full.xvda2.last()}>95'
url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_filesys_full.asciidoc'
priority: high
- name: 'Filesystem: /var has less than 10% free on {HOST.NAME}'
expression: '{Template OS Linux:filesys.full.xvda3.last()}>90'
url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_filesys_full.asciidoc'
priority: warn
- name: 'Filesystem: /var has less than 5% free on {HOST.NAME}'
expression: '{Template OS Linux:filesys.full.xvda3.last()}>95'
url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_filesys_full.asciidoc'
priority: high
- name: 'Too many TOTAL processes on {HOST.NAME}'
expression: '{Template OS Linux:proc.nprocs.last()}>5000'
url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_proc.asciidoc'
priority: warn
- name: 'Lack of available memory on {HOST.NAME}'
expression: '{Template OS Linux:mem.freemem.last()}<30720000'
url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_memory.asciidoc'
priority: warn
description: 'Alert on less than 30MegaBytes. This is 30 Million Bytes. 30000 KB x 1024'
|