最终版的 Kubernetes 容器内存使用巡检脚本,特点如下:
- 基于
crictl inspect和jq,适配 containerd 运行时 - 排除 sandbox 容器(pause 容器)
- 安全提取容器名、Pod 名、Namespace(防止 null)
- 显示容器的 Page Cache(cache)、RSS 内存占用
- 计算 Page Cache 占节点总内存百分比
- 额外显示容器镜像名和内存限制
- 支持参数指定 Top N 容器,默认 10
脚本内容:top_container_memory_final.sh
#!/bin/bash
TOP_N=${1:-10}
echo -e "Collecting container memory usage on node...\n"
# 检查依赖
for cmd in crictl jq; do
command -v $cmd >/dev/null 2>&1 || { echo >&2 "$cmd is required but not installed. Aborting."; exit 1; }
done
TOTAL_MEM_KB=$(grep MemTotal /proc/meminfo | awk '{print $2}')
[[ -z "$TOTAL_MEM_KB" ]] && { echo "Cannot detect total memory"; exit 1; }
TMPFILE=$(mktemp)
for cid in $(crictl ps -q); do
INFO=$(crictl inspect $cid 2>/dev/null)
[[ -z "$INFO" ]] && continue
# 排除 sandbox (pause) 容器
IS_SANDBOX=$(echo "$INFO" | jq -r '.info.config.labels."io.kubernetes.container.name" // empty')
if [[ "$IS_SANDBOX" == "POD" ]]; then
continue
fi
# 安全提取字段,避免 null
CONTAINER_NAME=$(echo "$INFO" | jq -r '.info.name // .info.config.metadata.name // "unknown-container"')
NAMESPACE=$(echo "$INFO" | jq -r '.info.config.metadata.namespace // "unknown-namespace"')
POD_NAME=$(echo "$INFO" | jq -r '.info.config.metadata.name // "unknown-pod"')
IMAGE=$(echo "$INFO" | jq -r '.info.imageSpec.image // "unknown-image"')
MEM_LIMIT=$(echo "$INFO" | jq -r '.info.config.linux.resources.memoryLimitInBytes // "unlimited"')
# 找 cgroup memory.stat 路径
CGROUP_PATH=$(find /sys/fs/cgroup -type f -name memory.stat -path "*$cid*" 2>/dev/null | head -n 1)
[[ ! -f "$CGROUP_PATH" ]] && continue
CACHE_KB=$(grep '^cache ' "$CGROUP_PATH" | awk '{print $2}')
RSS_KB=$(grep '^rss ' "$CGROUP_PATH" | awk '{print $2}')
[[ -z "$CACHE_KB" ]] && CACHE_KB=0
[[ -z "$RSS_KB" ]] && RSS_KB=0
# 计算占比
CACHE_PERCENT=$(awk "BEGIN {printf \"%.2f\", ($CACHE_KB/$TOTAL_MEM_KB)*100}")
# 转换限制内存为 MB,方便阅读
if [[ "$MEM_LIMIT" =~ ^[0-9]+$ ]]; then
MEM_LIMIT_MB=$(awk "BEGIN {printf \"%.2f\", $MEM_LIMIT/1024/1024}")
else
MEM_LIMIT_MB="$MEM_LIMIT"
fi
echo -e "${CACHE_KB}\t${RSS_KB}\t${CACHE_PERCENT}\t${cid}\t${NAMESPACE}\t${POD_NAME}\t${CONTAINER_NAME}\t${IMAGE}\t${MEM_LIMIT_MB}MB" >> "$TMPFILE"
done
# 输出表头
echo -e "CACHE(KB)\tRSS(KB)\tCACHE%\tCONTAINER_ID\tNAMESPACE\tPOD\tCONTAINER\tIMAGE\tMEM_LIMIT"
echo "-----------------------------------------------------------------------------------------------------------------------"
sort -nr "$TMPFILE" | head -n "$TOP_N"
rm -f "$TMPFILE"
使用示例
bash top_container_memory_final.sh 10
输出示例
CACHE(KB) RSS(KB) CACHE% CONTAINER_ID NAMESPACE POD CONTAINER IMAGE MEM_LIMIT
-----------------------------------------------------------------------------------------------------------------------
10381238 2833723 65.88 4222c3ab... prometheus prometheus prometheus prom/prometheus:v2.42.0 2048.00MB
1359884 828960 14.50 b9b8b7b4... database postgres-db postgres postgres:13.3-alpine unlimited
...

492

被折叠的 条评论
为什么被折叠?



