#! /bin/bash
# sh /data/disk1/workspace/group_sjzt/bigdata_zdev/cxp/check/CheckData_test.sh CK_ZDEV.CLAIM_CALCULATE_DETAIL_nocar CK_ZDEV.CLAIM_CALCULATE_DETAIL -k calculate_detail_no calculate_id -d Etl_Upd_Dttm $current_date &
Usage(){
echo "脚本名称: $sh_name"
echo "脚本功能: eg: 全部字段比对(只针对有主键的表)"
echo "脚本调用: sh $sh_name <table1 table2 path(若全字段则传 1) 日期字段 -k key> "
echo "参数说明: startdate 开始时间;enddate 结束时间"
echo "使用示例: sh $sh_name table1 table2 path(若全字段则传 1) 日期字段 -k key "
# sh CheckData_test.sh CH_ZDEV.dr_provider_agent_agreement CH_ZDEV.dr_provider_agent_agreement -k agreement_no agreement_name -d etl_inst_dttm 2020-01-01
exit 1
}
if [ $# -lt 6 ] #判断输入参数个数
then
echo "调用${sh_name}错误!"
echo "请参考如下用法"
Usage
fi
########################################################################
#source ~/mis/.default.cfg
#source ${SRC_BIN_PATH}/Proc_Function.sh
group_home="/data/disk1/workspace/group_sjzt"
source ${group_home}/public/bin/Function.sh
source ${group_home}/public/bin/Proc_Function.sh
source ${group_home}/public/bin/date_deal_fun.sh
# mkdir -p ${SRC_MIS_LOG_PATH}/${tbdate}/${sh_name%%.*}/
#logfile=${SRC_MIS_LOG_PATH}/${tbdate}/${sh_name%%.*}/${sh_name%%.*}.$$.log
########################################################################
table_old=$1
table_new=$2
# path=$3
date_f=$4
r_who="bigdata_zdev" ##该脚本执行用户,根据实际情况修改!!!
owner="CH_ZDEV" ##目标表所属database名,根据项目应用规范修改,必改
target_table="CheckData_test" ##目标表名,根据实际修改,必改
current_date=`date "+%Y%m%d"` ##当前时间
current_time=$(date "+%Y-%m-%d_%H:%M:%S")
#tgt_table="${owner}.${target_table}" ##目标表database+目标表tabllename
log_dir="${group_home}/public/log/${owner}/${current_date}"
##创建日志文件目录及当前脚本日志文件
if [ ! -d ${log_dir} ]
then
mkdir -p ${log_dir}
if [ $? -ne 0 ]
then
echo echo "["`date "+%Y-%m-%d %H:%M:%S"`"]" "[ERROR ] 日志目录${log_dir}创建失败!"
exit 1
fi
fi
logfile=${log_dir}/CheckData_${table_old}_$current_date.log
date_now=`date +%Y%m%d`
v_year_first=`date +%Y-01-01`
#v_beg_date='2021-10-15'
v_beg_date=`date +%Y-%m-%d`
array=(${table_old//./ })
# tmp_table=${array[1]}"_$date_now"
# tgt_table="CH_ZDEV.${array[1]}"
tgt_table=$owner.${array[1]}"_$date_now"
cur_dir=$(pwd)
echo $cur_dir
dname=$(dirname "$PWD")
echo $dname
########################################################################
shift 2
while [ $# -gt 0 ]
do
if [ "$1" = "-k" ]
then
shift 1
key="on a.$1 = b.$1"
echo $key
else
if [ "$1" = "-d" ]
then
shift 1
#date_f="a.$1"
date_f="$1"
echo 'date_f :'$date_f
shift 1
data=$1
#date_f= $date_f=\'${data}\' # echo ''\'\\u0005\'''
else
key="$key and a.$1 = b.$1"
echo $key
fi
fi
shift 1
done
#########################################################################
res=`impala-shell -i phadoopvip:21001 -q "desc $table_old"`
query_str1=`echo "$res" | awk '{print $0}' | grep -iE " string " | sed "s/ //g"`
query_val1=`echo "$res" | awk '{print $0}' | grep -iE " double | int | decimal " | sed "s/ //g"`
query_sum_cl=`echo "$res" | awk '{print $0}' | grep -iE " string | double | int | decimal " | sed "s/ //g"`
query_sum=`echo "$query_sum_cl" | awk -F "[|]" '{print "sum("$2") as "$2","}'`
query_sum="$query_sum 0 as add_such "
echo $query_sum
create_table_sum="
drop table if exists ${tgt_table}_sum ;
create table ${tgt_table}_sum as
select $query_sum
from ${tgt_table}
;
"
echo $create_table_sum >> query_sum.sql
echo "select * from ${tgt_table}_sum;" >> query_sum.sql
mian_key() {
query_str=`echo "$query_str1" | awk -F "[|]" '{print "a."$2" as "$2"_s, \n b."$2" as "$2"_z , \n case when nvl(a."$2", \"\") = nvl(b."$2", \"\") then 0 else 1 end as "$2","}'`
#query_val=`echo "$query_val1" | awk -F "[|]" '{print "a."$2" as "$2"_s, \n b."$2" as "$2"_z , \n nvl(a."$2", 0) - nvl(b."$2", 0) as "$2","}'`
if [ ! $query_val1 ]; then
query_val=''
else
query_val=`echo "$query_val1" | awk -F "[|]" '{print "a."$2" as "$2"_s, \n b."$2" as "$2"_z , \n case when abs(nvl(a."$2", 0)-nvl(b."$2", 0))<=0.0001 then 0 else 1 end as "$2","}'`
fi
query="
SELECT
$query_str
$query_val
0 as add_such
FROM $table_old A
JOIN $table_new B
$key WHERE TO_DATE(A.${date_f})='${data}'
--WHERE TO_DATE(A.${date_f}) >= \"${v_year_first}\"
--AND TO_DATE(A.${date_f}) <= \"${v_beg_date}\"
--AND TO_DATE(B.${date_f}) >= \"${v_year_first}\"
--AND TO_DATE(B.${date_f}) <= \"${v_beg_date}\"
;
"
echo "$query"
}
# create
tab_str1=`echo "$query_str1" | awk -F "[|]" 'NR==1{print $2"_s string ,"$2"_z string ,"$2" int"}'`
tab_str2=`echo "$query_str1" | awk -F "[|]" 'NR>1{print ","$2"_s string ,"$2"_z string ,"$2" int"}'`
tab_val=`echo "$query_val1" | awk -F "[|]" '{print ","$2"_s double ,"$2"_z double, "$2" int"}'`
if [ ! $query_val1 ]; then
tab_val=''
else
tab_val=`echo "$query_val1" | awk -F "[|]" '{print ","$2"_s double ,"$2"_z double, "$2" int"}'`
fi
# query_end1=`echo "$query_str1" | awk -F "[|]" 'NR==1{print $2"_s ,"$2"_z ,"$2}'`
# query_end2=`echo "$query_str1" | awk -F "[|]" 'NR>1{print ","$2"_s ,"$2"_z ,"$2}'`
# query_val=`echo "$query_val1" | awk -F "[|]" '{print ","$2"_s ,"$2"_z ,"$2}'`
create_table="
drop table if exists $tgt_table ;
create table if not exists $tgt_table (
$tab_str1
$tab_str2
$tab_val
,add_such int
) --partitioned by (reportdate string)
stored as parquet;
"
query=`mian_key`
#query=`echo "$query" | sed "s/'//g"`
query=`echo "$query" | sed "s/\"/\'/g"`
#sql_Text=`echo $sql_Text | tr "[uU][pP][sS][eE][rR][tT]" "\n\n--upsert替换成insert\nINSERT "`
# query=`echo "$query" | tr "ck_" "CK_"`
query=`echo "$query" | tr a-z A-Z`
# echo 'query' | tr "ck_" "CK_"
# echo 'hello world'|tr a-z A-Z
# echo 'HELLO WORLD'|tr A-Z a-z
# echo 'HELLO WORLD'|tr [:upper:] [:lower:]
# echo 'hello world'|tr [:lower:] [:upper:]
query=`echo "$query" | sed "s/CK_/CG_/g"`
query_pa="
--ALTER TABLE ${tgt_table} ADD IF NOT EXISTS PARTITION(REPORTDATE='$v_beg_date')
INSERT OVERWRITE TABLE ${tgt_table} --PARTITION(REPORTDATE='$v_beg_date')
$query
"
kinit -kt ${group_home}/${r_who}/${r_who}.keytab ${r_who}@CDH.CCIC
#kinit -kt $WORK_HOME/bigdata_zdev.keytab bigdata_zdev@CDH.CCIC
Shlog "INFO" "sql_Text=$create_table" ${logfile}
impala-shell -i phadoopvip:21001 -q "$create_table"
#Insert_Partition "${query_pa}" "${logfile}"
CONF_SPARK_PARA="--driver-memory 2G --num-executors 60 --executor-cores 2 --executor-memory 8G "
# r_who="bigdata_zdev"
principal_para="${r_who}@CDH.CCIC"
keytab_para="${group_home}/${r_who}/${r_who}.keytab"
Shlog "INFO" "sql_Text=$query_pa" ${logfile}
Insert_Partition "${query_pa}" "${logfile}" "${target_table}.$current_date.$PID" "${CONF_SPARK_PARA}" "${principal_para}" "${keytab_para}"
impala-shell -i phadoopvip:21001 -q "invalidate metadata $tgt_table ; refresh $tgt_table "
#"data_compare" "mis_source.cfg"
Shlog "INFO" "sql_Text=$create_table_sum" ${logfile}
kinit -kt ${group_home}/${r_who}/${r_who}.keytab ${r_who}@CDH.CCIC
Insert_Partition "${create_table_sum}" "${logfile}" "${target_table}.$current_date.$PID" "${CONF_SPARK_PARA}" "${principal_para}" "${keytab_para}"
echo "select * from ${tgt_table}_sum;" >> $cur_dir/result.sql
kinit -kt ${group_home}/${r_who}/${r_who}.keytab ${r_who}@CDH.CCIC
impala-shell -i phadoopvip:21001 -q "invalidate metadata ${tgt_table}_sum ; refresh ${tgt_table}_sum"
impala-shell -i phadoopvip:21001 -q "select * from ${tgt_table}_sum;" >>$cur_dir/result.sql
这是一个用于对比两张表数据的Shell脚本,通过传入参数进行全字段比对。脚本中定义了参数用法、检查输入参数、创建日志目录、构建SQL查询语句,并调用impala-shell执行SQL,将结果写入日志和文件。主要用于数据校验和全量字段比对。
6891

被折叠的 条评论
为什么被折叠?



