使用prometheus来监控ingress-nginx
ingress-nginx配置了metrics
通过标签去查看ingress的pod
[root@master01 manifests]# kubectl get pods -n ingress-nginx -l app=ingress-nginx -owide
NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES
nginx-ingress-controller-7rxlt 1/1 Running 0 6m30s 10.1.19.56 10.1.19.56
nginx-ingress-controller-cgsmn 1/1 Running 0 6m30s 10.1.19.55 10.1.19.55
nginx-ingress-controller-kxw97 1/1 Running 0 6m30s 10.1.19.54 10.1.19.54
#分别部署在114 和115机器上 我们要监控这两个pod 的ingress-nginx
- 1.
- 2.
- 3.
- 4.
- 5.
- 6.
删了从建
---
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
name: ingress-nginx-monitor #监控名称 也就是prometheus ui页面显示
namespace: ingress-nginx
labels:
app: ingress-nginx
spec:
selector: #这个标签要匹配到 被监控监控的标签
matchLabels:
app: ingress-nginx
endpoints:
- port: metrics #目标服务上暴露指标的端口名称。
path: /metrics #目标服务上暴露指标的 HTTP 路径
interval: 30s #监控频率 每隔30s抓取一次
# jobLabel: app #指定应该使用哪个标签来将目标分组为 Prometheus 中的一个作业。在这里,使用 app 标签来标识和分组共享此标签的所有目标为同一作业。
namespaceSelector: #指定 ServiceMonitor 应监控哪些命名空间。
matchNames:
- ingress-nginx #仅选择 ingress-nginx 命名空间
[root@master01 manifests]# cat /data/aqsc_k8s_yaml/
ingress-nginx/ kube-prometheus/ nfs/ prometheus/
[root@master01 manifests]# cat /data/aqsc_k8s_yaml/ingress-nginx/ingress-nginx.yaml
apiVersion: v1
kind: Namespace
metadata:
name: ingress-nginx
labels:
app: ingress-nginx
app.kubernetes.io/name: ingress-nginx
app.kubernetes.io/part-of: ingress-nginx
---
kind: ConfigMap
apiVersion: v1
metadata:
name: nginx-configuration
namespace: ingress-nginx
labels:
app: ingress-nginx
app.kubernetes.io/name: ingress-nginx
app.kubernetes.io/part-of: ingress-nginx
---
kind: ConfigMap
apiVersion: v1
metadata:
name: tcp-services
namespace: ingress-nginx
labels:
app: ingress-nginx
app.kubernetes.io/name: ingress-nginx
app.kubernetes.io/part-of: ingress-nginx
---
kind: ConfigMap
apiVersion: v1
metadata:
name: udp-services
namespace: ingress-nginx
labels:
app: ingress-nginx
app.kubernetes.io/name: ingress-nginx
app.kubernetes.io/part-of: ingress-nginx
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: nginx-ingress-serviceaccount
namespace: ingress-nginx
labels:
app: ingress-nginx
app.kubernetes.io/name: ingress-nginx
app.kubernetes.io/part-of: ingress-nginx
---
apiVersion: rbac.authorization.k8s.io/v1beta1
kind: ClusterRole
metadata:
name: nginx-ingress-clusterrole
labels:
app: ingress-nginx
app.kubernetes.io/name: ingress-nginx
app.kubernetes.io/part-of: ingress-nginx
rules:
- apiGroups:
- ""
resources:
- configmaps
- endpoints
- nodes
- pods
- secrets
verbs:
- list
- watch
- apiGroups:
- ""
resources:
- nodes
verbs:
- get
- apiGroups:
- ""
resources:
- services
verbs:
- get
- list
- watch
- apiGroups:
- "extensions"
resources:
- ingresses
verbs:
- get
- list
- watch
- apiGroups:
- ""
resources:
- events
verbs:
- create
- patch
- apiGroups:
- "extensions"
resources:
- ingresses/status
verbs:
- update
---
apiVersion: rbac.authorization.k8s.io/v1beta1
kind: Role
metadata:
name: nginx-ingress-role
namespace: ingress-nginx
labels:
app: ingress-nginx
app.kubernetes.io/name: ingress-nginx
app.kubernetes.io/part-of: ingress-nginx
rules:
- apiGroups:
- ""
resources:
- configmaps
- pods
- secrets
- namespaces
verbs:
- get
- apiGroups:
- ""
resources:
- configmaps
resourceNames:
# Defaults to "<election-id>-<ingress-class>"
# Here: "<ingress-controller-leader>-<nginx>"
# This has to be adapted if you change either parameter
# when launching the nginx-ingress-controller.
- "ingress-controller-leader-nginx"
verbs:
- get
- update
- apiGroups:
- ""
resources:
- configmaps
verbs:
- create
- apiGroups:
- ""
resources:
- endpoints
verbs:
- get
---
apiVersion: rbac.authorization.k8s.io/v1beta1
kind: RoleBinding
metadata:
name: nginx-ingress-role-nisa-binding
namespace: ingress-nginx
labels:
app: ingress-nginx
app.kubernetes.io/name: ingress-nginx
app.kubernetes.io/part-of: ingress-nginx
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: nginx-ingress-role
subjects:
- kind: ServiceAccount
name: nginx-ingress-serviceaccount
namespace: ingress-nginx
---
apiVersion: rbac.authorization.k8s.io/v1beta1
kind: ClusterRoleBinding
metadata:
name: nginx-ingress-clusterrole-nisa-binding
labels:
app: ingress-nginx
app.kubernetes.io/name: ingress-nginx
app.kubernetes.io/part-of: ingress-nginx
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: nginx-ingress-clusterrole
subjects:
- kind: ServiceAccount
name: nginx-ingress-serviceaccount
namespace: ingress-nginx
---
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: nginx-ingress-controller
namespace: ingress-nginx
labels:
app: ingress-nginx
app.kubernetes.io/name: ingress-nginx
app.kubernetes.io/part-of: ingress-nginx
spec:
selector:
matchLabels:
app.kubernetes.io/name: ingress-nginx
app.kubernetes.io/part-of: ingress-nginx
app: ingress-nginx
template:
metadata:
labels:
app: ingress-nginx
app.kubernetes.io/name: ingress-nginx
app.kubernetes.io/part-of: ingress-nginx
annotations:
prometheus.io/port: "10254"
prometheus.io/scrape: "true"
spec:
hostNetwork: true
serviceAccountName: nginx-ingress-serviceaccount
containers:
- name: nginx-ingress-controller
#image: siriuszg/nginx-ingress-controller:0.20.0
image: images.aqsc.com/siriuszg/nginx-ingress-controller:0.20.0
args:
- /nginx-ingress-controller
- --configmap=$(POD_NAMESPACE)/nginx-configuration
- --tcp-services-configmap=$(POD_NAMESPACE)/tcp-services
- --udp-services-configmap=$(POD_NAMESPACE)/udp-services
- --publish-service=$(POD_NAMESPACE)/ingress-nginx
- --annotations-prefix=nginx.ingress.kubernetes.io
securityContext:
allowPrivilegeEscalation: true
capabilities:
drop:
- ALL
add:
- NET_BIND_SERVICE
# www-data -> 33
runAsUser: 33
env:
- name: POD_NAME
valueFrom:
fieldRef:
fieldPath: metadata.name
- name: POD_NAMESPACE
valueFrom:
fieldRef:
fieldPath: metadata.namespace
ports:
- name: http
containerPort: 80
- name: https
containerPort: 443
- name: metrics
containerPort: 10254
livenessProbe:
failureThreshold: 3
httpGet:
path: /healthz
port: 10254
scheme: HTTP
initialDelaySeconds: 10
periodSeconds: 10
successThreshold: 1
timeoutSeconds: 10
readinessProbe:
failureThreshold: 3
httpGet:
path: /healthz
port: 10254
scheme: HTTP
periodSeconds: 10
successThreshold: 1
timeoutSeconds: 10
nodeSelector:
aqsc/ingress-controller-ready: "true"
# kubectl label nodes <节点名称> aqsc/ingress-controller-ready="true"
---
apiVersion: v1
kind: Service
metadata:
name: ingress-nginx
namespace: ingress-nginx
labels:
app: ingress-nginx
spec:
#type: NodePort
ports:
- name: metrics
port: 10254
targetPort: 10254
protocol: TCP
- name: http
port: 80
targetPort: 80
protocol: TCP
- name: https
port: 443
targetPort: 443
protocol: TCP
selector:
app: ingress-nginx
app.kubernetes.io/name: ingress-nginx
app.kubernetes.io/part-of: ingress-nginx
- 1.
- 2.
- 3.
- 4.
- 5.
- 6.
- 7.
- 8.
- 9.
- 10.
- 11.
- 12.
- 13.
- 14.
- 15.
- 16.
- 17.
- 18.
- 19.
- 20.
- 21.
- 22.
- 23.
- 24.
- 25.
- 26.
- 27.
- 28.
- 29.
- 30.
- 31.
- 32.
- 33.
- 34.
- 35.
- 36.
- 37.
- 38.
- 39.
- 40.
- 41.
- 42.
- 43.
- 44.
- 45.
- 46.
- 47.
- 48.
- 49.
- 50.
- 51.
- 52.
- 53.
- 54.
- 55.
- 56.
- 57.
- 58.
- 59.
- 60.
- 61.
- 62.
- 63.
- 64.
- 65.
- 66.
- 67.
- 68.
- 69.
- 70.
- 71.
- 72.
- 73.
- 74.
- 75.
- 76.
- 77.
- 78.
- 79.
- 80.
- 81.
- 82.
- 83.
- 84.
- 85.
- 86.
- 87.
- 88.
- 89.
- 90.
- 91.
- 92.
- 93.
- 94.
- 95.
- 96.
- 97.
- 98.
- 99.
- 100.
- 101.
- 102.
- 103.
- 104.
- 105.
- 106.
- 107.
- 108.
- 109.
- 110.
- 111.
- 112.
- 113.
- 114.
- 115.
- 116.
- 117.
- 118.
- 119.
- 120.
- 121.
- 122.
- 123.
- 124.
- 125.
- 126.
- 127.
- 128.
- 129.
- 130.
- 131.
- 132.
- 133.
- 134.
- 135.
- 136.
- 137.
- 138.
- 139.
- 140.
- 141.
- 142.
- 143.
- 144.
- 145.
- 146.
- 147.
- 148.
- 149.
- 150.
- 151.
- 152.
- 153.
- 154.
- 155.
- 156.
- 157.
- 158.
- 159.
- 160.
- 161.
- 162.
- 163.
- 164.
- 165.
- 166.
- 167.
- 168.
- 169.
- 170.
- 171.
- 172.
- 173.
- 174.
- 175.
- 176.
- 177.
- 178.
- 179.
- 180.
- 181.
- 182.
- 183.
- 184.
- 185.
- 186.
- 187.
- 188.
- 189.
- 190.
- 191.
- 192.
- 193.
- 194.
- 195.
- 196.
- 197.
- 198.
- 199.
- 200.
- 201.
- 202.
- 203.
- 204.
- 205.
- 206.
- 207.
- 208.
- 209.
- 210.
- 211.
- 212.
- 213.
- 214.
- 215.
- 216.
- 217.
- 218.
- 219.
- 220.
- 221.
- 222.
- 223.
- 224.
- 225.
- 226.
- 227.
- 228.
- 229.
- 230.
- 231.
- 232.
- 233.
- 234.
- 235.
- 236.
- 237.
- 238.
- 239.
- 240.
- 241.
- 242.
- 243.
- 244.
- 245.
- 246.
- 247.
- 248.
- 249.
- 250.
- 251.
- 252.
- 253.
- 254.
- 255.
- 256.
- 257.
- 258.
- 259.
- 260.
- 261.
- 262.
- 263.
- 264.
- 265.
- 266.
- 267.
- 268.
- 269.
- 270.
- 271.
- 272.
- 273.
- 274.
- 275.
- 276.
- 277.
- 278.
- 279.
- 280.
- 281.
- 282.
- 283.
- 284.
- 285.
- 286.
- 287.
- 288.
- 289.
- 290.
- 291.
- 292.
- 293.
- 294.
- 295.
- 296.
- 297.
- 298.
- 299.
- 300.
- 301.
- 302.
- 303.
- 304.
- 305.
- 306.
- 307.
- 308.
- 309.
- 310.
- 311.
- 312.
- 313.
- 314.
- 315.
- 316.
- 317.
- 318.
- 319.
- 320.
- 321.
- 322.
- 323.
- 324.
- 325.
- 326.
- 327.
- 328.
- 329.
- 330.
- 331.
- 332.
- 333.
- 334.
过滤一下 暴漏的metrics端口号 上面配置文件暴露了!!!!
[root@master01 manifests]# kubectl get pods -n ingress-nginx nginx-ingress-controller-7rxlt -o yaml | egrep -A2 "port|metrics"
prometheus.io/port: "10254"
prometheus.io/scrape: "true"
creationTimestamp: "2025-04-17T05:45:26Z"
--
f:prometheus.io/port: {}
f:prometheus.io/scrape: {}
f:generateName: {}
--
f:port: {}
f:scheme: {}
f:initialDelaySeconds: {}
--
f:ports:
.: {}
k:{"containerPort":80,"protocol":"TCP"}:
--
f:port: {}
f:scheme: {}
f:periodSeconds: {}
--
port: 10254
scheme: HTTP
initialDelaySeconds: 10
--
ports:
- containerPort: 80
hostPort: 80
--
port: 10254
scheme: HTTP
periodSeconds: 10
- 1.
- 2.
- 3.
- 4.
- 5.
- 6.
- 7.
- 8.
- 9.
- 10.
- 11.
- 12.
- 13.
- 14.
- 15.
- 16.
- 17.
- 18.
- 19.
- 20.
- 21.
- 22.
- 23.
- 24.
- 25.
- 26.
- 27.
- 28.
- 29.
- 30.
- 31.
- 32.
[root@master01 manifests]# kubectl get svc -n ingress-nginx ingress-nginx
NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE
ingress-nginx ClusterIP 10.101.95.4 <none> 80/TCP,443/TCP 14h
#只暴露了 443和80 没有ingress的端口号
[root@master01 manifests]# kubectl edit svc -n ingress-nginx ingress-nginx
- name: http-ingress
port: 10254
protocol: TCP
targetPort: 10254
[root@master01 manifests]# kubectl get svc -n ingress-nginx
NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE
ingress-nginx ClusterIP 10.101.95.4 <none> 10254/TCP,80/TCP,443/TCP 16h
- 1.
- 2.
- 3.
- 4.
- 5.
- 6.
- 7.
- 8.
- 9.
- 10.
- 11.
- 12.
- 13.
- 14.
- 15.
- 16.
- 17.
加到文件里面
看下metrics指标
[root@master01 manifests]# curl 10.1.19.54:10254/metrics | tail -3
% Total % Received % Xferd Average Speed Time Time Time Current
Dload Upload Total Spent Left Speed
100 11794 100 11794 0 0 5758k 0 --:--:-- --:--:-- --:--:-- 5758k
promhttp_metric_handler_requests_total{code="200"} 5
promhttp_metric_handler_requests_total{code="500"} 0
promhttp_metric_handler_requests_total{code="503"} 0
[root@master01 manifests]# curl 10.1.19.55:10254/metrics | tail -3
% Total % Received % Xferd Average Speed Time Time Time Current
Dload Upload Total Spent Left Speed
100 11775 100 11775 0 0 5749k 0 --:--:-- --:--:-- --:--:-- 5749k
promhttp_metric_handler_requests_total{code="200"} 0
promhttp_metric_handler_requests_total{code="500"} 0
promhttp_metric_handler_requests_total{code="503"} 0
[root@master01 manifests]# curl 10.1.19.56:10254/metrics | tail -3
% Total % Received % Xferd Average Speed Time Time Time Current
Dload Upload Total Spent Left Speed
100 11782 100 11782 0 0 5752k 0 --:--:-- --:--:-- --:--:-- 5752k
promhttp_metric_handler_requests_total{code="200"} 0
promhttp_metric_handler_requests_total{code="500"} 0
promhttp_metric_handler_requests_total{code="503"} 0
- 1.
- 2.
- 3.
- 4.
- 5.
- 6.
- 7.
- 8.
- 9.
- 10.
- 11.
- 12.
- 13.
- 14.
- 15.
- 16.
- 17.
- 18.
- 19.
- 20.
- 21.
可以看到数据
创建个ServiceMonitor
ServiceMonitor
是Prometheus Operator提供的一种自定义资源(Custom Resource, CR),用于定义Prometheus监控服务发现的目标。它允许用户指定哪些Kubernetes服务(Service)和Pod的监控数据应该被Prometheus抓取,以及抓取数据的频率、路径等配置。
---
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
name: ingress-nginx-monitor #监控名称 也就是prometheus ui页面显示
namespace: ingress-nginx
labels:
app: ingress-nginx
spec:
selector: #这个标签要匹配到 被监控监控的标签
matchLabels:
app: ingress-nginx
endpoints:
- port: metrics #目标服务上暴露指标的端口名称。
path: /metrics #目标服务上暴露指标的 HTTP 路径
interval: 30s #监控频率 每隔30s抓取一次
# jobLabel: app #指定应该使用哪个标签来将目标分组为 Prometheus 中的一个作业。在这里,使用 app 标签来标识和分组共享此标签的所有目标为同一作业。
namespaceSelector: #指定 ServiceMonitor 应监控哪些命名空间。
matchNames:
- ingress-nginx #仅选择 ingress-nginx 命名空间
- 1.
- 2.
- 3.
- 4.
- 5.
- 6.
- 7.
- 8.
- 9.
- 10.
- 11.
- 12.
- 13.
- 14.
- 15.
- 16.
- 17.
- 18.
- 19.
- 20.
root@guoguo-M5-Pro:/apps/k8s/prometheus/servicemonitor# kubectl get svc -n kube-system nginx-ingress-lb -oyaml
.....
.....
ports:
- name: http
port: 80
protocol: TCP
targetPort: 80
- name: https
port: 443
protocol: TCP
targetPort: 443
- name: metrics #这里就是给这个端口号取个名字 上面的ServiceMonitor.spce.port 引用的
port: 10254
protocol: TCP
targetPort: 10254
selector:
app: ingress-nginx
sessionAffinity: None
type: ClusterIP
status:
loadBalancer: {}
- 1.
- 2.
- 3.
- 4.
- 5.
- 6.
- 7.
- 8.
- 9.
- 10.
- 11.
- 12.
- 13.
- 14.
- 15.
- 16.
- 17.
- 18.
- 19.
- 20.
- 21.
- 22.
新创建的prometheus 都会遇到一个权限的报错
现在promehtues ui 页面是看不到 监控项的
查看报错
root@guoguo-M5-Pro:/apps/k8s/prometheus/servicemonitor# kubectl -n monitoring logs prometheus-k8s-0 -c prometheus
......
......
level=error ts=2024-08-25T07:07:41.107Z caller=klog.go:96 component=k8s_client_runtime func=ErrorDepth msg="pkg/mod/k8s.io/client-go@v0.20.5/tools/cache/reflector.go:167: Failed to watch *v1.Endpoints: failed to list *v1.Endpoints: endpoints is forbidden: User \"system:serviceaccount:monitoring:prometheus-k8s\" cannot list resource \"endpoints\" in API group \"\" in the namespace \"ingress-nginx\""
# 当看到forbidden 就是权限问题
- 1.
- 2.
- 3.
- 4.
- 5.
- 6.
root@guoguo-M5-Pro:/apps/k8s/prometheus/servicemonitor# kubectl -n monitoring logs prometheus-k8s-1 -c prometheus
......
......
level=error ts=2024-08-25T07:09:33.891Z caller=klog.go:96 component=k8s_client_runtime func=ErrorDepth msg="pkg/mod/k8s.io/client-go@v0.20.5/tools/cache/reflector.go:167: Failed to watch *v1.Endpoints: failed to list *v1.Endpoints: endpoints is forbidden: User \"system:serviceaccount:monitoring:prometheus-k8s\" cannot list resource \"endpoints\" in API group \"\" in the namespace \"ingress-nginx\""
- 1.
- 2.
- 3.
- 4.
去修改prometheus 的集群角色clusterrole
root@guoguo-M5-Pro:/apps/k8s/prometheus/servicemonitor# kubectl edit clusterrole prometheus-k8s
......
...... #rules 部分改为下面 权限
rules:
- apiGroups:
- ""
resources:
- nodes
- services
- endpoints
- pods
- nodes/proxy
verbs:
- get
- list
- watch
- apiGroups:
- ""
resources:
- configmaps
- nodes/metrics
verbs:
- get
- nonResourceURLs:
- /metrics
verbs:
- get
- 1.
- 2.
- 3.
- 4.
- 5.
- 6.
- 7.
- 8.
- 9.
- 10.
- 11.
- 12.
- 13.
- 14.
- 15.
- 16.
- 17.
- 18.
- 19.
- 20.
- 21.
- 22.
- 23.
- 24.
- 25.
- 26.
- 27.
登录prometheus ui网站 就可以看到了
yaml配置文件也修改下
改为
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
labels:
app.kubernetes.io/component: prometheus
app.kubernetes.io/name: prometheus
app.kubernetes.io/part-of: kube-prometheus
app.kubernetes.io/version: 2.26.0
name: prometheus-k8s
rules:
- apiGroups:
- ""
resources:
- nodes
- services
- endpoints
- pods
- nodes/proxy
verbs:
- get
- list
- watch
- apiGroups:
- ""
resources:
- configmaps
- nodes/metrics
verbs:
- get
- nonResourceURLs:
- /metrics
verbs:
- get
- 1.
- 2.
- 3.
- 4.
- 5.
- 6.
- 7.
- 8.
- 9.
- 10.
- 11.
- 12.
- 13.
- 14.
- 15.
- 16.
- 17.
- 18.
- 19.
- 20.
- 21.
- 22.
- 23.
- 24.
- 25.
- 26.
- 27.
- 28.
- 29.
- 30.
- 31.
- 32.
- 33.
这样就完成了