对比两个文件相同列 的其他列数据不一致的输出 ,(订单号相同,)
awk 'NR==FNR{a[$1]=$2;next;}{if($1 in a && $2!=a[$1]){print $1}}' tmp_20181025_hive tmp_20181025_mysql
awk 'NR=FNR{a[$1]=$2;next;}{if($1 in a){print $0}}' a.txt b.txt
awk 'NR==FNR{a[$1]=$2;next;}{if($1 in a && $2!=a[$1]){print $1}}' tmp_20181025_hive tmp_20181025_mysql|wc -l
awk 'NR==FNR{a[$1]=$2;next;}{if($1 in a && $2!=a[$1]){print $1}}' tmp_20181025_hive tmp_20181025_mysql >diff_ordr
cat diff_ordr
ls
awk 'NR==FNR{a[$1]=$2;next;}{if($1 in a && $2!=a[$1]){print $1,a[$1],$2}}' tmp_20181025_hive tmp_20181025_mysql >diff_ordr
-----对比两个文件的差异
head pay_order_no.txt
history|grep output
ll -ht
sed -i '1d' output1.txt
//g' output1.txt
cat output1.txt|grep -E 'QUNAR_ALREADY_PAY|CTRIP_COMPLETE' >output11.txt
wc -l output11.txt
head output11.txt
history|grep output
awk 'NR==FNR{a[$1]=$0;next;}{if($1 in a){}else{print $1}}' output11.txt pay_order_no.txt >pay_notin_out11
awk 'NR==FNR{a[$1]=$0;next;}{if($1 in a){}else{print $1}}' pay_order_no.txt output11.txt >out_notin_pay11
ll -ht
sz pay_notin_out11
wc -l pay_notin_out11
学习路径:
https://www.cnblogs.com/zwgblog/p/6031256.html
#for tablename in "promotion_help_package" "abtest"
for tablename in "promotion_help_package" "abtest" "agent_ad_report" "agent_alipay_no" "agent_alipay_no_test_cn" "agent_bid_operate_record" "alipay_info_record" "append_order_payment" "change_order" "compensate_offline_detail" "grab_order_train_no" "group_info" "group_member" "group_pull_user" "hcs_apply_transaction" "hcs_coupon_stat" "hcs_grab_help_activity" "hcs_purchase_card" "hcs_refund_action" "help_package" "help_package_order" "help_package_share" "insurance_prod" "ivr_node_monitor" "multi_pay_item" "new_pay_payment" "occupy_ticket" "occupy_ticket_fail_15" "open_order_map" "order_account_mapping_15" "order_log" "order_transport" "order_vip_experience" "package_purchase" "predict_info_201805" "promotion_help_package" "r_product_data" "r_product_data_new" "rdb_orders" "rob_card_order" "rob_card_use_record" "rob_change_ticket_info" "rob_change_train_info" "rob_modify_order" "rob_orders" "share_record" "star_card_order" "ticket" "ticket_change_info" "traffic_order" "train_qunar_12306_account_mapping_3" "train_user_12306_account" "user_12306_account_binding" "user_history_orders" "uv_by_date_old" "vip_card_order"
do
path=`sudo -utraindev hive -e "show create table ${tablename}"|grep 'hdfs://qunarcluster/'|sed "s/'//g"`
lastday=`sudo -utraindev hadoop dfs -ls ${path}|sed 's/.*dt=//g'|tail -1`
echo "表名:$tablename,最后同步数据:${lastday}" >>result
done
---过滤文件l-hdfs3.t.cn1 /home/q/tmp/check_file
cat result |grep -vE '2018-09-10|2018-09-09|-rwxr-xr-x'|less
cat result |grep -vE '2018-09-10|2018-09-09|drwxr'|less
cat result |grep -vE '2018-09-10|2018-09-09|rwxr'|less
cat result |grep -vE '2018-09-10|2018-09-11|rwxr' >RESULT
awk 'BEGIN{printf "序号\t名字\t课程\t分数\n"} {print}' marks