ClickHouse 健康巡检与故障快排（适配 22.3.20.29）

最新推荐文章于 2025-11-23 01:52:40 发布

原创最新推荐文章于 2025-11-23 01:52:40 发布 · 1.2k 阅读

13 ·

CC 4.0 BY-SA版权

文章标签：

#clickhouse #数据库 #mysql

DBA 专栏收录该内容

1067 篇文章

订阅专栏

目录标题

ClickHouse 健康巡检与故障快排（适配 22.3.20.29）

下面给你一份ClickHouse 22.3.20.29 健康巡检与故障快排完整报告（Markdown 版）。已按你当前版本适配（22.3 不包含 system.background_fetches，统一改用 system.replicated_fetches），并把所有查询、关键字段解释、判读阈值、定位→处置建议都放好了。直接复制执行即可。

ClickHouse 健康巡检与故障快排（适配 22.3.20.29）

使用建议

执行工具：clickhouse-client -q
输出风格：多数数值转 String，便于导出/拼接；统一增加 ORDER BY 以获得稳定结果
若你的 system.parts 使用 partition_id，将文中 partition 替换为 partition_id

0. 版本与可用 system 表确认（一次性）

/* 确认可用的复制/合并相关 system 表（22.3 应显示下列 3 个） */
SELECT name
FROM system.tables
WHERE database = 'system'
  AND name IN ('replicated_fetches','replication_queue','merges')
ORDER BY name;

期望：应返回 merges / replicated_fetches / replication_queue 三项。

1. 集群关键项 5 分钟快速体检

复制/合并是否在“跑”（卡顿常见入口）

/* 正在进行的合并与拉取（22.3 版） */
SELECT
    'merge' AS kind, database, table,
    toString(count())                         AS running,
    toString(coalesce(maxOrNull(elapsed), 0)) AS max_elapsed_seconds
FROM system.merges
GROUP BY database, table
UNION ALL
SELECT
    'fetch' AS kind, database, table,
    toString(count())                         AS running,
    toString(coalesce(maxOrNull(elapsed), 0)) AS max_elapsed_seconds
FROM system.replicated_fetches
GROUP BY database, table
ORDER BY kind, database, table;

看点

running：>0 代表当前确有合并/拉取
max_elapsed_seconds：持续升高多分钟 ⇒ 合并/拉取卡顿（磁盘/网络/源副本）

复制队列是否“堵”（没有在跑 ≠ 没有堆积）

/* 副本队列长度与最久等待时长 */
SELECT
  database, table,
  toString(count()) AS queued_tasks,
  toString( max( toInt64( now() - create_time ) ) ) AS max_wait_seconds,
  arrayStringConcat( groupArrayDistinct(type), ',' ) AS types
FROM system.replication_queue
GROUP BY database, table
ORDER BY toInt64(queued_tasks) DESC, database, table;

看点

queued_tasks：>0 代表有待处理任务
max_wait_seconds：>300s 基本可判定异常（网络/源端/磁盘/并发资源）

副本会话/只读/队列老化

/* 副本状态与队列老化 */
SELECT
  database, table,
  toString(is_session_expired) AS is_session_expired,   -- 1=Keeper 会话过期
  toString(is_readonly)        AS is_readonly,          -- 1=只读，不会合并/拉取
  toString(queue_size)         AS queue_size,           -- 队列长度
  toString(inserts_in_queue)   AS inserts_in_queue,
  toString(merges_in_queue)    AS merges_in_queue,
  toString( if(queue_oldest_time = toDateTime(0), 0, toInt64(now() - queue_oldest_time)) ) AS queue_oldest_seconds
FROM system.replicas
ORDER BY database, table;

看点

is_session_expired=1：副本“掉线”状态（需优先处理 Keeper/网络）
is_readonly=1：副本不会执行后台任务（配置/权限/磁盘问题）
queue_oldest_seconds：队列最老任务等待秒数（配合上一条的 max_wait_seconds 观察）

磁盘空间

SELECT
  name,
  toString(free_space)  AS free_space,
  toString(total_space) AS total_space
FROM system.disks
ORDER BY name;

表部件/分区规模（碎片与资源占用）

/* 每表（按 active）部件与资源占用 */
SELECT
  database, table,
  toString(active)                                     AS active,
  toString(uniqExact(partition))                       AS partitions,       -- 如为 partition_id，请替换
  toString(count())                                    AS parts,
  toString(sum(bytes))                                 AS bytes,
  toString(sum(data_uncompressed_bytes))               AS uncompressed_bytes,
  toString(sum(rows))                                  AS rows,
  toString(sum(bytes_on_disk))                         AS metric_DiskDataBytes,
  toString(sum(primary_key_bytes_in_memory_allocated)) AS metric_MemoryPrimaryKeyBytesAllocated
FROM system.parts
GROUP BY active, database, table
ORDER BY database, table, active;

2. 深入定位（Top N 视角）

Top N 慢拉取

SELECT
  database, table,
  toString(elapsed) AS elapsed_seconds,
  toString(total_size_bytes_compressed) AS total_bytes_compressed,
  toString(bytes_read_compressed)       AS read_bytes_compressed,
  toString( round( 100 * bytes_read_compressed / nullIf(total_size_bytes_compressed,0), 2) ) AS progress_pct
FROM system.replicated_fetches
ORDER BY elapsed DESC
LIMIT 10;

Top N 慢合并

SELECT
  database, table,
  toString(elapsed)    AS elapsed_seconds,
  toString(num_parts)  AS num_parts_in_merge
FROM system.merges
ORDER BY elapsed DESC
LIMIT 10;

未完成 Mutations

SELECT
  database, table,
  count()          AS mutations,
  sum(parts_to_do) AS parts_to_do
FROM system.mutations
WHERE is_done = 0
GROUP BY database, table
ORDER BY database, table;

Detached Parts 汇总（含原因归一）

SELECT
  count() AS detached_parts,
  database, table, disk,
  if(coalesce(reason,'unknown')='', 'detached_by_user', coalesce(reason,'unknown')) AS detach_reason
FROM system.detached_parts
GROUP BY
  database, table, disk,
  if(coalesce(reason,'unknown')='', 'detached_by_user', coalesce(reason,'unknown'))
ORDER BY database, table, disk, detach_reason;

3. 运行态指标总览（监控友好格式）

/* 异步指标 + 即时指标 + 事件累计 + 字典内存 + 最长查询 + 变更设置哈希 */
SELECT
  concat('metric.', metric) AS metric,
  toString(value)           AS value,
  ''                        AS description,
  'gauge'                   AS type
FROM system.asynchronous_metrics

UNION ALL
SELECT
  concat('metric.', metric) AS metric,
  toString(value)           AS value,
  ''                        AS description,
  'gauge'                   AS type
FROM system.metrics

UNION ALL
SELECT
  concat('event.', event) AS metric,
  toString(value)         AS value,
  ''                      AS description,
  'counter'               AS type
FROM system.events

UNION ALL
SELECT
  'metric.MemoryDictionaryBytesAllocated'  AS metric,
  toString(sum(bytes_allocated))           AS value,
  'Memory size allocated for dictionaries' AS description,
  'gauge'                                  AS type
FROM system.dictionaries

UNION ALL
SELECT
  'metric.LongestRunningQuery'               AS metric,
  toString(coalesce(maxOrNull(elapsed), 0))  AS value,
  'Longest running query time'               AS description,
  'gauge'                                    AS type
FROM system.processes
WHERE is_initial_query = 1

UNION ALL
SELECT
  'metric.ChangedSettingsHash'                   AS metric,
  toString(groupBitXor(cityHash64(name, value))) AS value,
  'Control sum for changed settings'             AS description,
  'gauge'                                        AS type
FROM system.settings
WHERE changed

ORDER BY metric;

如需列出有哪些设置被改动（便于追溯配置漂移）：

SELECT name, value, changed
FROM system.settings
WHERE changed
ORDER BY name;

4. 判读阈值与告警建议（可做首版基线）

复制队列：queued_tasks > 0 且 max_wait_seconds > 300（5 分钟） → 告警 Warning；> 900s → Critical
慢拉取/合并：Top1 elapsed_seconds > 300 且进度停滞（progress_pct 静止或 num_parts_in_merge 大且不下降） → Warning
副本会话：is_session_expired=1 或 is_readonly=1 → Critical
Detached parts：同一表 detached_parts 连续两轮上升 → Warning（常见：磁盘错误/合并失败/手工操作）
磁盘：free_space / total_space < 10% -> Warning；< 5% -> Critical
主键内存：metric_MemoryPrimaryKeyBytesAllocated 异常突增 + OOM 迹象 → 标记重点表，评估 TTL/分区策略/索引列

5. 常见异常 → 定位 → 处置（Playbook）

复制队列持续堆积（queued_tasks 高、max_wait_seconds 升高）
- 定位：看 types 中是否以 GET_PART/MERGE_PARTS 为主；结合慢拉取/合并 TopN
- 快处置：
  - 检查目标/源副本网络（RTT/丢包/带宽限速）
  - 检查目标机 I/O（iostat/blkstat；是否有高 iowait）
  - 避免在高峰期执行重负载 Mutations/大范围 OPTIMIZE
  - 分时段“温和”合并：按表执行小粒度 OPTIMIZE TABLE <db>.<tbl> PARTITION <id>（生产慎用，先低峰/从库）
慢拉取（replicated_fetches TopN 持续高）
- 定位：progress_pct 长时间极低/极高；对应表在源副本是否也繁忙
- 快处置：
  - 检查源副本磁盘与网络；必要时切换拉取来源（若多源可选）
  - 排查目标副本磁盘拥堵（合并与拉取竞争 I/O）
慢合并/碎片多（system.merges 持续高、单表 parts 很多）
- 定位：num_parts_in_merge 偏大；system.parts 显示部件数量/分区过多
- 快处置：
  - 调整写入批量/分区策略，降低碎片生成
  - 低峰期逐分区 OPTIMIZE；必要时缩小并发，避免“雪崩”
  - 确认后台池并发是否足够（合并/拉取线程，与你的磁盘数量/类型匹配）
副本只读/会话过期
- 定位：is_readonly=1 或 is_session_expired=1
- 快处置：
  - 检查 Keeper（连接数/超时/会话数）、网络与 DNS
  - 检查磁盘只读挂载/配额/权限，确保实例可写
Detached parts 异常
- 定位：查看 detach_reason（常见 No space left on device、broken part、detached_by_user）
- 快处置：
  - 空间不足先清理/扩容，再评估是否可安全丢弃（业务允许情况下）
  - broken part 结合 server 日志排查文件损坏/磁盘异常
Mutations 长时间未完成
- 定位：parts_to_do 持续高、对应表 I/O 紧张
- 快处置：
  - 避免高峰执行大范围 UPDATE/DELETE
  - 必要时分批、加条件拆小；低峰重试

6. Kubernetes 环境中的快速采集示例（可选）

# 在 Pod 内快速跑一段 SQL（示例，替换 <ns> 和 <pod>）
kubectl -n <ns> exec -it <pod> -- \
  clickhouse-client -q "
    SELECT database,table,count() AS queued_tasks
    FROM system.replication_queue GROUP BY database,table
    ORDER BY queued_tasks DESC LIMIT 20"

7. 附录：一键巡检 SQL （含注释，可保存为 `ck_health_223.sql`）

可按模块单独执行，也可整段执行（多数查询彼此独立）

/* 1) 正在进行的合并与拉取 */
SELECT
    'merge' AS kind, database, table,
    toString(count())                         AS running,
    toString(coalesce(maxOrNull(elapsed), 0)) AS max_elapsed_seconds
FROM system.merges
GROUP BY database, table
UNION ALL
SELECT
    'fetch' AS kind, database, table,
    toString(count())                         AS running,
    toString(coalesce(maxOrNull(elapsed), 0)) AS max_elapsed_seconds
FROM system.replicated_fetches
GROUP BY database, table
ORDER BY kind, database, table;

/* 2) 复制队列堆积与最久等待 */
SELECT
  database, table,
  toString(count()) AS queued_tasks,
  toString( max( toInt64( now() - create_time ) ) ) AS max_wait_seconds,
  arrayStringConcat( groupArrayDistinct(type), ',' ) AS types
FROM system.replication_queue
GROUP BY database, table
ORDER BY toInt64(queued_tasks) DESC, database, table;

/* 3) 副本会话/只读/队列老化 */
SELECT
  database, table,
  toString(is_session_expired) AS is_session_expired,
  toString(is_readonly)        AS is_readonly,
  toString(queue_size)         AS queue_size,
  toString(inserts_in_queue)   AS inserts_in_queue,
  toString(merges_in_queue)    AS merges_in_queue,
  toString( if(queue_oldest_time = toDateTime(0), 0, toInt64(now() - queue_oldest_time)) ) AS queue_oldest_seconds
FROM system.replicas
ORDER BY database, table;

/* 4) 表部件/分区/资源占用 */
SELECT
  database, table,
  toString(active)                                     AS active,
  toString(uniqExact(partition))                       AS partitions,
  toString(count())                                    AS parts,
  toString(sum(bytes))                                 AS bytes,
  toString(sum(data_uncompressed_bytes))               AS uncompressed_bytes,
  toString(sum(rows))                                  AS rows,
  toString(sum(bytes_on_disk))                         AS metric_DiskDataBytes,
  toString(sum(primary_key_bytes_in_memory_allocated)) AS metric_MemoryPrimaryKeyBytesAllocated
FROM system.parts
GROUP BY active, database, table
ORDER BY database, table, active;

/* 5) 未完成 Mutations */
SELECT
  database, table,
  count()          AS mutations,
  sum(parts_to_do) AS parts_to_do
FROM system.mutations
WHERE is_done = 0
GROUP BY database, table
ORDER BY database, table;

/* 6) 磁盘空间 */
SELECT
  name,
  toString(free_space)  AS free_space,
  toString(total_space) AS total_space
FROM system.disks
ORDER BY name;

/* 7) Detached parts 汇总（含原因归一） */
SELECT
  count() AS detached_parts,
  database, table, disk,
  if(coalesce(reason,'unknown')='', 'detached_by_user', coalesce(reason,'unknown')) AS detach_reason
FROM system.detached_parts
GROUP BY
  database, table, disk,
  if(coalesce(reason,'unknown')='', 'detached_by_user', coalesce(reason,'unknown'))
ORDER BY database, table, disk, detach_reason;

/* 8) Top N 慢拉取 */
SELECT
  database, table,
  toString(elapsed) AS elapsed_seconds,
  toString(total_size_bytes_compressed) AS total_bytes_compressed,
  toString(bytes_read_compressed)       AS read_bytes_compressed,
  toString( round( 100 * bytes_read_compressed / nullIf(total_size_bytes_compressed,0), 2) ) AS progress_pct
FROM system.replicated_fetches
ORDER BY elapsed DESC
LIMIT 10;

/* 9) Top N 慢合并 */
SELECT
  database, table,
  toString(elapsed)    AS elapsed_seconds,
  toString(num_parts)  AS num_parts_in_merge
FROM system.merges
ORDER BY elapsed DESC
LIMIT 10;

/* 10) 运行期指标聚合（监控导出） */
SELECT
  concat('metric.', metric) AS metric,
  toString(value)           AS value,
  ''                        AS description,
  'gauge'                   AS type
FROM system.asynchronous_metrics
UNION ALL
SELECT
  concat('metric.', metric) AS metric,
  toString(value)           AS value,
  ''                        AS description,
  'gauge'                   AS type
FROM system.metrics
UNION ALL
SELECT
  concat('event.', event) AS metric,
  toString(value)         AS value,
  ''                      AS description,
  'counter'               AS type
FROM system.events
UNION ALL
SELECT
  'metric.MemoryDictionaryBytesAllocated'  AS metric,
  toString(sum(bytes_allocated))           AS value,
  'Memory size allocated for dictionaries' AS description,
  'gauge'                                  AS type
FROM system.dictionaries
UNION ALL
SELECT
  'metric.LongestRunningQuery'               AS metric,
  toString(coalesce(maxOrNull(elapsed), 0))  AS value,
  'Longest running query time'               AS description,
  'gauge'                                    AS type
FROM system.processes
WHERE is_initial_query = 1
UNION ALL
SELECT
  'metric.ChangedSettingsHash'                   AS metric,
  toString(groupBitXor(cityHash64(name, value))) AS value,
  'Control sum for changed settings'             AS description,
  'gauge'                                        AS type
FROM system.settings
WHERE changed
ORDER BY metric;

/* 11) 被改动的设置（用于配置漂移排查） */
SELECT name, value, changed
FROM system.settings
WHERE changed
ORDER BY name;