创建名称空间
kubectl create ns monitoring-loki
部署loki
创建 loki pvc
cat >loki-pvc.yaml <<EOF
kind: PersistentVolumeClaim
apiVersion: v1
metadata:
name: loki-pvc
namespace: monitoring-loki
spec:
accessModes: ["ReadWriteMany"]
resources:
requests:
storage: 1Gi
storageClassName: nfs ####使用自己的动态存储名称
EOF
Loki部署deployment
cat loki-deployment.yaml
apiVersion: v1
kind: ConfigMap
metadata:
name: loki-config
namespace: monitoring-loki
data:
loki.yaml: |
auth_enabled: false
server:
http_listen_port: 3100
ingester:
lifecycler:
address: 127.0.0.1
ring:
kvstore:
store: inmemory
replication_factor: 1
final_sleep: 0s
chunk_idle_period: 3m
chunk_retain_period: 1m
max_transfer_retries: 0
wal:
dir: /loki/wal
schema_config:
configs:
- from: 2020-10-24
store: boltdb-shipper
object_store: filesystem
schema: v11
index:
prefix: index_
period: 24h
storage_config:
boltdb_shipper:
active_index_directory: /loki/index
cache_location: /loki/cache
shared_store: filesystem
filesystem:
directory: /loki/chunks
chunk_store_config:
max_look_back_period: 0s
table_manager:
retention_deletes_enabled: false
retention_period: 0s
compactor:
working_directory: /loki/compactor
shared_store: filesystem
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: loki
namespace: monitoring-loki
spec:
replicas: 1
selector:
matchLabels:
name: loki
template:
metadata:
labels:
name: loki
spec:
containers:
- name: loki
image: docker.1ms.run/grafana/loki:2.8.1
args:
- -config.file=/etc/loki/loki.yaml
ports:
- containerPort: 3100
volumeMounts:
- name: config
mountPath: /etc/loki
- name: storage
mountPath: /loki
volumes:
- name: config
configMap:
name: loki-config
- name: storage
persistentVolumeClaim:
claimName: loki-pvc
---
apiVersion: v1
kind: Service
metadata:
name: loki
namespace: monitoring-loki
spec:
ports:
- port: 3100
targetPort: 3100
selector:
name: loki
Promtail部署
Promtail pvc
cat >Promtail-pvc.yaml <<EOF
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: promtail-pvc
namespace: monitoring-loki
spec:
accessModes:
- ReadWriteMany
resources:
requests:
storage: 10Gi
storageClassName: nfs
EOF
创建rbac
apiVersion: v1
kind: ServiceAccount
metadata:
name: promtail
namespace: monitoring-loki
labels:
app: promtail
component: log-collector
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: promtail
labels:
app: promtail
component: log-collector
rules:
- apiGroups: [""]
resources:
- nodes # 节点基本信息
- nodes/proxy # 新增:访问 Kubelet API(需谨慎)
- pods # Pod 发现
- pods/log # 日志读取(核心权限)
- services # 服务发现
- endpoints # 新增:端点监控
- namespaces # 命名空间元数据
verbs: ["get", "list", "watch"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: promtail
labels:
app: promtail
component: log-collector
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: promtail
subjects:
- kind: ServiceAccount
name: promtail
namespace: monitoring-loki
configmap
apiVersion: v1
kind: ConfigMap
metadata:
name: promtail-config
namespace: monitoring-loki
labels:
app.kubernetes.io/name: promtail # 【注释】与 DaemonSet 标签保持一致
data:
promtail.yaml: |
# ================= 全局配置 =================
server:
http_listen_port: 3101 # 【注释】Promtail 自身 HTTP 监听端口
grpc_listen_port: 0 # 【注释】0 表示关闭 gRPC
log_level: info # 【注释】Promtail 自身日志级别
client:
# 【注释】向 Loki 推送失败时的退避重试策略
backoff_config:
max_period: 5m # 最大重试间隔
max_retries: 10 # 最大重试次数
min_period: 500ms # 初始重试间隔
batchsize: 1048576 # 【注释】1 MiB,每批最多推送大小
batchwait: 1s # 【注释】最长等待打包 1 s
external_labels: {} # 【注释】所有日志额外携带的全局静态标签
timeout: 10s # 【注释】单次推送超时时间
url: http://loki:3100/loki/api/v1/push # 【注释】Loki 接收端点
positions:
filename: /var/lib/promtail-positions/positions.yaml # 【注释】记录已读文件偏移量,防止重启后重复收集
# ================= 日志抓取规则 =================
scrape_configs:
# ========== Docker 容器日志采集(兼容旧路径,可按需删除) ==========
- job_name: docker-containers
pipeline_stages:
- docker: {} # 【注释】按 Docker JSON 格式解析
static_configs:
- targets: [localhost] # 【注释】占位,无实际意义
labels:
job: docker # 【注释】给这批日志打上 job=docker 标签
__path__: /data/docker_storage/containers/*/*.log # 【注释】Docker 旧数据路径
host: ${HOSTNAME} # 【注释】使用 DaemonSet 注入的环境变量
# ========== Kubernetes Pod 日志主配置 ==========
- job_name: kubernetes-pods
kubernetes_sd_configs:
- role: pod # 【注释】基于 Pod 对象做服务发现
pipeline_stages:
- cri: {} # 【注释】按 CRI/containerd 日志格式解析
relabel_configs:
# 系统命名空间过滤:丢弃 kube-system、kube-public、loki
- action: drop
regex: 'kube-system|kube-public|loki'
source_labels: [__meta_kubernetes_namespace]
# 【注释】构造最终日志文件路径
# 路径模板:/var/log/pods/<namespace>_<pod_name>_<pod_uid>/<container_name>/*.log
- action: replace
source_labels:
- __meta_kubernetes_pod_uid
- __meta_kubernetes_pod_container_name
separator: /
target_label: __path__
replacement: /var/log/pods/*$1/*.log
# 将 Pod 上的标签映射为日志标签
- action: labelmap
regex: __meta_kubernetes_pod_label_(.+) # 【注释】将所有 Pod label 加上前缀
# 固定命名空间、Pod、容器名
- action: replace
source_labels: [__meta_kubernetes_namespace]
target_label: namespace
- action: replace
source_labels: [__meta_kubernetes_pod_name]
target_label: pod
- action: replace
source_labels: [__meta_kubernetes_pod_container_name]
target_label: container
# 节点名
- action: replace
source_labels: [__meta_kubernetes_node_name]
target_label: node
# 业务常用标签:app、release
- action: replace
source_labels: [__meta_kubernetes_pod_label_app]
target_label: app
regex: (.+) # 【注释】仅当值非空时写入
- action: replace
source_labels: [__meta_kubernetes_pod_label_release]
target_label: release
regex: (.+)
# ========== 精简控制器日志采集(可选,用于只看 Deployment/StatefulSet) ==========
- job_name: kubernetes-controllers
kubernetes_sd_configs:
- role: pod
pipeline_stages:
- cri: {}
relabel_configs:
# 同样过滤系统命名空间
- action: drop
regex: 'kube-system|kube-public|loki'
source_labels: [__meta_kubernetes_namespace]
# 仅保留由控制器管理的 Pod(名称后缀为 -<hash>)
- action: keep
regex: '[0-9a-z-.]+-[0-9a-f]{8,10}'
source_labels: [__meta_kubernetes_pod_controller_name]
# 提取控制器名(去掉末尾 hash)
- action: replace
regex: '([0-9a-z-.]+)-[0-9a-f]{8,10}'
source_labels: [__meta_kubernetes_pod_controller_name]
target_label: controller
# 节点名
- action: replace
source_labels: [__meta_kubernetes_node_name]
target_label: node
promtail-pod.yaml
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: promtail
namespace: monitoring-loki
labels:
app.kubernetes.io/name: promtail
app.kubernetes.io/version: "2.8.1"
spec:
selector:
matchLabels:
app.kubernetes.io/name: promtail
updateStrategy:
type: RollingUpdate
rollingUpdate:
maxUnavailable: 1
template:
metadata:
labels:
app.kubernetes.io/name: promtail
spec:
serviceAccountName: promtail
hostNetwork: false
hostPID: false
hostIPC: false
securityContext:
runAsUser: 0
runAsGroup: 0
fsGroup: 0
seccompProfile:
type: RuntimeDefault
containers:
- name: promtail
image: docker.1ms.run/grafana/promtail:2.8.1
imagePullPolicy: IfNotPresent
args:
- -config.file=/etc/promtail/promtail.yaml
- -client.url=http://loki:3100/loki/api/v1/push
env:
- name: HOSTNAME
valueFrom:
fieldRef:
fieldPath: spec.nodeName
ports:
- containerPort: 3101
name: http-metrics
protocol: TCP
volumeMounts:
- name: config
mountPath: /etc/promtail
- name: pods-logs
mountPath: /var/log/pods
readOnly: true
- name: container-logs
mountPath: /var/log/containers
readOnly: true
- name: positions
mountPath: /var/lib/promtail-positions
securityContext:
readOnlyRootFilesystem: true
privileged: false
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
add:
- DAC_READ_SEARCH
readinessProbe:
httpGet:
path: /ready
port: http-metrics
initialDelaySeconds: 10
timeoutSeconds: 1
tolerations:
- key: node-role.kubernetes.io/control-plane
operator: Exists
effect: NoSchedule
- operator: Exists # 容忍所有其它 taints
volumes:
- name: config
configMap:
name: promtail-config
- name: pods-logs
hostPath:
path: /var/log/pods
type: Directory
- name: container-logs
hostPath:
path: /var/log/containers
type: Directory
- name: positions
persistentVolumeClaim:
claimName: promtail-pvc
grafana
创建grafana-pvc
cat >grafana-pvc.yaml <<EOF
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: grafana-pvc
namespace: monitoring-loki
spec:
accessModes:
- ReadWriteMany
resources:
requests:
storage: 10Gi
storageClassName: nfs
EOF
grafana-pod
cat grafana-deployment.yaml
apiVersion: v1
kind: ConfigMap
metadata:
name: grafana-config
namespace: monitoring-loki
data:
grafana.ini: |
[server]
root_url = %(protocol)s://%(domain)s:%(http_port)s/grafana/
serve_from_sub_path = true
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: grafana
namespace: monitoring-loki
spec:
replicas: 1
selector:
matchLabels:
name: grafana
template:
metadata:
labels:
name: grafana
spec:
containers:
- name: grafana
image: docker.1ms.run/grafana/grafana:9.5.2
ports:
- containerPort: 3000
volumeMounts:
- name: grafana-config
mountPath: /etc/grafana
- name: grafanadata-pvc
mountPath: /var/lib/grafana
env:
- name: GF_SECURITY_ADMIN_PASSWORD
value: "admin"
volumes:
- name: grafana-storage
persistentVolumeClaim:
claimName: grafana-pvc
- name: grafana-config
configMap:
name: grafana-config
- name: grafanadata-pvc
persistentVolumeClaim:
claimName: grafanadata-pvc
---
apiVersion: v1
kind: Service
metadata:
name: grafana
namespace: monitoring-loki
spec:
ports:
- port: 3000
targetPort: 3000
selector:
name: grafana
---
apiVersion: v1
kind: Service
metadata:
name: grafana-nodeport
namespace: monitoring-loki
spec:
type: NodePort # 添加这一行
ports:
- port: 3000
targetPort: 3000
nodePort: 32000 # 选择一个在30000-32767范围内的端口号
selector:
name: grafana
查看日志


