#!/bin/bash
#set -x
source /etc/profile
Kerberos认证
kinit -kt /etc/security/keytabs/smokeuser.headless.keytab ocdp-goertekvn@GOERTEK.VN
定义主备节点
PRIMARY_NODE=“vn-ns-bpit-dp-nn-17-6”
SECONDARY_NODE=“vn-ns-bpit-dp-nn-17-7”
PORT=“8088”
函数:检查节点是否可用
check_node_availability() {
local node=1ifcurl−−negotiate−u:−−connect−timeout10−−max−time30−s"http://1
if curl --negotiate -u : --connect-timeout 10 --max-time 30 -s "http://1ifcurl−−negotiate−u:−−connect−timeout10−−max−time30−s"http://{node}😒{PORT}/ws/v1/cluster/info" > /dev/null 2>&1; then
echo “节点 ${node} 可用”
return 0
else
echo “节点 ${node} 不可用”
return 1
fi
}
函数:获取并处理应用信息
process_applications() {
local node=$1
echo “正在从节点 ${node} 获取应用信息…”
# 获取运行时间超过1小时的TEZ和SPARK应用
local apps_json
apps_json=$(curl --negotiate -u : --connect-timeout 30 --max-time 60 -s "http://${node}:${PORT}/ws/v1/cluster/apps?states=RUNNING")
if [ $? -ne 0 ] || [ -z "$apps_json" ]; then
echo "从节点 ${node} 获取应用信息失败"
return 1
fi
# 检查JSON响应是否有效
if ! echo "$apps_json" | jq -e . > /dev/null 2>&1; then
echo "从节点 ${node} 获取的JSON响应无效"
return 1
fi
# 提取需要终止的TEZ和SPARK应用ID
local app_ids
app_ids=$(echo "$apps_json" | jq -r '.apps.app // [] | map(select((.applicationType == "TEZ" or .applicationType == "SPARK") and .elapsedTime >= 3600000)) | .[].id' 2>/dev/null)
if [ -z "$app_ids" ] || [ "$app_ids" = "null" ]; then
echo "未找到需要终止的TEZ或SPARK应用"
return 0
fi
# 终止应用
local count=0
while IFS= read -r app_id; do
if [ -n "$app_id" ]; then
echo "正在终止应用: $app_id"
if yarn application -kill "$app_id" > /dev/null 2>&1; then
echo "成功终止应用: $app_id"
((count++))
else
echo "终止应用失败: $app_id"
fi
fi
done <<< "$app_ids"
echo "总共终止了 $count 个TEZ和SPARK应用"
return 0
}
主执行逻辑
echo “开始检查节点可用性…”
优先使用主节点
if check_node_availability “$PRIMARY_NODE”; then
echo “使用主节点: PRIMARYNODE"ifprocessapplications"PRIMARY_NODE"
if process_applications "PRIMARYNODE"ifprocessapplications"PRIMARY_NODE”; then
echo “任务执行完成”
exit 0
else
echo “主节点处理失败,尝试备节点…”
fi
else
echo “主节点不可用,尝试备节点…”
fi
备节点回退
if check_node_availability “$SECONDARY_NODE”; then
echo “使用备节点: SECONDARYNODE"ifprocessapplications"SECONDARY_NODE"
if process_applications "SECONDARYNODE"ifprocessapplications"SECONDARY_NODE”; then
echo “任务执行完成”
exit 0
else
echo “备节点处理也失败”
fi
else
echo “备节点也不可用”
fi
echo “错误: 所有节点都不可用,任务执行失败”
exit 1
AI编写的Hive高可用脚本

被折叠的 条评论
为什么被折叠?



