1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
|
rule_files:
- '*.rules'
# A scrape configuration for running Prometheus on a Kubernetes cluster.
# This uses separate scrape configs for cluster components (i.e. API server, node)
# and services to allow each to use different authentication configs.
#
# Kubernetes labels will be added as Prometheus labels on metrics via the
# `labelmap` relabeling action.
# Scrape config for API servers.
#
# Kubernetes exposes API servers as endpoints to the default/kubernetes
# service so this uses `endpoints` role and uses relabelling to only keep
# the endpoints associated with the default/kubernetes service using the
# default named port `https`. This works for single API server deployments as
# well as HA API server deployments.
scrape_configs:
- job_name: 'kubernetes-apiservers'
kubernetes_sd_configs:
- role: endpoints
scheme: https
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
# Keep only the default/kubernetes service endpoints for the https port. This
# will add targets for each API server which Kubernetes adds an endpoint to
# the default/kubernetes service.
relabel_configs:
- source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name]
action: keep
regex: default;kubernetes;https
# Scrape config for controllers.
#
# Each master node exposes a /metrics endpoint on :8444 that contains operational metrics for
# the controllers.
#
- job_name: 'kubernetes-controllers'
scheme: https
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
kubernetes_sd_configs:
- role: endpoints
# Keep only the default/kubernetes service endpoints for the https port, and then
# set the port to 8444. This is the default configuration for the controllers on OpenShift
# masters.
relabel_configs:
- source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name]
action: keep
regex: default;kubernetes;https
- source_labels: [__address__]
action: replace
target_label: __address__
regex: (.+)(?::\d+)
replacement: $1:8444
# Scrape config for nodes.
#
# Each node exposes a /metrics endpoint that contains operational metrics for
# the Kubelet and other components.
- job_name: 'kubernetes-nodes'
scheme: https
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
kubernetes_sd_configs:
- role: node
# Drop a very high cardinality metric that is incorrect in 3.7. It will be
# fixed in 3.9.
metric_relabel_configs:
- source_labels: [__name__]
action: drop
regex: 'openshift_sdn_pod_(setup|teardown)_latency(.*)'
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
# Scrape config for cAdvisor.
#
# Beginning in Kube 1.7, each node exposes a /metrics/cadvisor endpoint that
# reports container metrics for each running pod. Scrape those by default.
- job_name: 'kubernetes-cadvisor'
scheme: https
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
{% if kubernetes_version | float() >= 1.7 | float() %}
metrics_path: /metrics/cadvisor
{% else %}
metrics_path: /metrics
{% endif %}
kubernetes_sd_configs:
- role: node
# Exclude a set of high cardinality metrics that can contribute to significant
# memory use in large clusters. These can be selectively enabled as necessary
# for medium or small clusters.
metric_relabel_configs:
- source_labels: [__name__]
action: drop
regex: 'container_(cpu_user_seconds_total|cpu_cfs_periods_total|memory_usage_bytes|memory_swap|memory_working_set_bytes|memory_cache|last_seen|fs_(read_seconds_total|write_seconds_total|sector_(.*)|io_(.*)|reads_merged_total|writes_merged_total)|tasks_state|memory_failcnt|memory_failures_total|spec_memory_swap_limit_bytes|fs_(.*)_bytes_total|spec_(.*))'
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
# Scrape config for service endpoints.
#
# The relabeling allows the actual service scrape endpoint to be configured
# via the following annotations:
#
# * `prometheus.io/scrape`: Only scrape services that have a value of `true`
# * `prometheus.io/scheme`: If the metrics endpoint is secured then you will need
# to set this to `https` & most likely set the `tls_config` of the scrape config.
# * `prometheus.io/path`: If the metrics path is not `/metrics` override this.
# * `prometheus.io/port`: If the metrics are exposed on a different port to the
# service then set this appropriately.
- job_name: 'kubernetes-service-endpoints'
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
# TODO: this should be per target
insecure_skip_verify: true
kubernetes_sd_configs:
- role: endpoints
relabel_configs:
# only scrape infrastructure components
- source_labels: [__meta_kubernetes_namespace]
action: keep
regex: 'default|logging|metrics|kube-.+|openshift|openshift-.+'
# drop infrastructure components managed by other scrape targets
- source_labels: [__meta_kubernetes_service_name]
action: drop
regex: 'prometheus-node-exporter'
# only those that have requested scraping
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape]
action: keep
regex: true
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme]
action: replace
target_label: __scheme__
regex: (https?)
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path]
action: replace
target_label: __metrics_path__
regex: (.+)
- source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port]
action: replace
target_label: __address__
regex: (.+)(?::\d+);(\d+)
replacement: $1:$2
- action: labelmap
regex: __meta_kubernetes_service_label_(.+)
- source_labels: [__meta_kubernetes_namespace]
action: replace
target_label: kubernetes_namespace
- source_labels: [__meta_kubernetes_service_name]
action: replace
target_label: kubernetes_name
# Scrape config for node-exporter, which is expected to be running on port 9100.
- job_name: 'kubernetes-nodes-exporter'
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
kubernetes_sd_configs:
- role: node
metric_relabel_configs:
- source_labels: [__name__]
action: drop
regex: 'node_cpu|node_(disk|scrape_collector)_.+'
# preserve a subset of the network, netstat, vmstat, and filesystem series
- source_labels: [__name__]
action: replace
regex: '(node_(netstat_Ip_.+|vmstat_(nr|thp)_.+|filesystem_(free|size|device_error)|network_(transmit|receive)_(drop|errs)))'
target_label: __name__
replacement: renamed_$1
- source_labels: [__name__]
action: drop
regex: 'node_(netstat|vmstat|filesystem|network)_.+'
- source_labels: [__name__]
action: replace
regex: 'renamed_(.+)'
target_label: __name__
replacement: $1
# drop any partial expensive series
- source_labels: [__name__, device]
action: drop
regex: 'node_network_.+;veth.+'
- source_labels: [__name__, mountpoint]
action: drop
regex: 'node_filesystem_(free|size|device_error);([^/].*|/.+)'
relabel_configs:
- source_labels: [__address__]
regex: '(.*):10250'
replacement: '${1}:9100'
target_label: __address__
- source_labels: [__meta_kubernetes_node_label_kubernetes_io_hostname]
target_label: __instance__
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
# Scrape config for the template service broker
- job_name: 'openshift-template-service-broker'
scheme: https
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/service-ca.crt
server_name: apiserver.openshift-template-service-broker.svc
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
kubernetes_sd_configs:
- role: endpoints
relabel_configs:
- source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name]
action: keep
regex: openshift-template-service-broker;apiserver;https
alerting:
alertmanagers:
- scheme: http
static_configs:
- targets:
- "localhost:9093"
|