1 #!/bin/bash2
3 source /etc/profile4
5 runlog='/tmp/zewei/check_schema_log'
6 hive_database_schema=/tmp/hive_database_schema/hive/
7 mysql_database_schema=/tmp/hive_database_schema/mysql/
8
9 >$runlog10 #每天下午一点删除现有的表结构缓存文件.11 #防止hive有变动.12 if [ `date +%k` -eq 13]13 then
14 rm -rf /tmp/hive_database_schema/hive/*
15 fi16
17 while read DB; do18 HiveDB=`echo $DB |awk '{print $1}'`19 MysqlDB=`echo $DB |awk '{print $2}'`20 MysqlHost=`echo $DB |awk '{print $3}'`21 MysqlPort=`echo $DB |awk '{print $4}'`22 PartTag=`echo $DB |awk '{print $5}'`23
24 connect_mysql="mysql -u TableSchemaCheck -pSchemacheck666 -h $MysqlHost -P $MysqlPort $MysqlDB -BNe"25 #缓存文件夹不在就创建26 ls $mysql_database_schema/$MysqlDB > /dev/null || mkdir -p $mysql_database_schema/$MysqlDB27 ls $hive_database_schema/$HiveDB > /dev/null || mkdir -p $hive_database_schema/$HiveDB28
29 #通过part标记检查是否为分区库30 if [ "$PartTag" == 'part' ]31 then32 table_list=`hive -S -e "use $HiveDB; show tables;" | grep "_part$"`33 else34 table_list=`hive -S -e "use $HiveDB; show tables;"`35 fi36
37 #对获取到的tables进行循环检查38 for table in $table_list39 do40 if [ "$PartTag" == 'part' ]41 then42 hive_table_name=$table43 mysql_table_name=`echo $table | sed 's/_part$//'`44 else45 hive_table_name=$table46 mysql_table_name=$table47 fi48
49 #获取mysql的表结构50 $connect_mysql "desc $mysql_table_name;" > /tmp/mysql_column51 if [ $? -ne 0 ]52 then53 continue54 else55 #把大写转换为小写.因为hive里面的列名没有大小写之分,所以转换一下56 cat /tmp/mysql_column | awk '{print $1}' | tr '[A-Z]' '[a-z]' > $mysql_database_schema/$MysqlDB/$mysql_table_name57 fi58
59 #如果没有hive的表结构缓存文件就去hive取...然后对分区字段进行删除,因为这对于MySQL的表结构来说是多余的60 if ! ls $hive_database_schema/$HiveDB/$table > /dev/null61 then62 hive -S -e "use $HiveDB; desc ${hive_table_name};" | awk '{print $1 }' > /tmp/hive_column63 part_column=`sed -n '/\#/,$p' /tmp/hive_column | egrep -v '#|^$'`64 for i in $part_column65 do66 sed -i "/\b${i}\b/d" /tmp/hive_column67 done68 egrep -v "#|^$" /tmp/hive_column | tee $hive_database_schema/$HiveDB/$table69 fi70
71 #获取MD572 md5ForMysql=`md5sum $mysql_database_schema/$MysqlDB/$mysql_table_name | awk '{print $1}'`73 md5ForHive=`md5sum $hive_database_schema/$HiveDB/$hive_table_name | awk '{print $1}'`74
75 #MD5不同就记录日志76 if [ ! "$md5ForMysql"x == "$md5ForHive"x ]; then77 echo -e "HiveDB:\t$HiveDB\t\ttable:\t$hive_table_name\t\tnot equal to MysqlDB table:\t$mysql_table_name" >> $runlog78 fi79 done80 done < /root/script/DBlist81
82 #统计并报警.报警阈值在监控里面设置83 err_line=`wc -l $runlog | awk '{print $1}'`84 [报警]