shell脚本:
#!/bin/sh
source ExitCodeCheck.sh
opts=$@
getparam(){
arg=$1
echo $opts |xargs -n1 |cut -b 2- |awk -F'=' '{if($1=="'"$arg"'") print $2}'
}
IncStart=`getparam inc_start`
IncEnd=`getparam inc_end`
pg_connection=`getparam jdbc_str`
pg_username=`getparam db_user`
pg_password=`getparam db_psw`
dataName=`getparam db_sid`
queueName=`getparam hdp_queue`
hdfshostname=`getparam hdfs_host`
IncStartYear=`echo ${IncStart:0:4}`;
IncStartMonth=`echo ${IncStart:4:2}`;
IncStartDay=`echo ${IncStart:6:2}`;
IncStartAll=${IncStartYear}"-"${IncStartMonth}"-"${IncStartDay}" 00:00:00.0";
IncStartAllFormat=${IncStartYear}"-"${IncStartMonth}"-"${IncStartDay};
IncEndYear=`echo ${IncEnd:0:4}`;
IncEndMonth=`echo ${IncEnd:4:2}`;
IncEndDay=`echo ${IncEnd:6:2}`;
IncEndAll=${IncEndYear}"-"${IncEndMonth}"-"${IncEndDay}" 00:00:00.0";
twoDayAgo=`date -d "$IncStart 2 days ago " +%Y%m%d `;
twoDayAgoYear=`echo ${twoDayAgo:0:4}`;
twoDayAgoMonth=`echo ${twoDayAgo:4:2}`;
twoDayAgoDay=`echo ${twoDayAgo:6:2}`;
twoDayAgoAll=${twoDayAgoYear}"-"${twoDayAgoMonth}"-"${twoDayAgoDay}" 00:00:00.0";
twoDayAgoAllFormat=${twoDayAgoYear}"-"${twoDayAgoMonth}"-"${twoDayAgoDay};
job_name=$0
#需要导出的数据pg表名
export_table_name=n_par_life_hrm_org_excelude_camp;
#需要导出到pg的数据的临时文件名
sqoop_export_data_filename=${export_table_name};
#需要导出的数据pg列名
export_table_columns=statis_date,um_num,um_name,group_id,group_name,department_id,department_name,center_id,center_name
#需要导出到pg的数据的临时文件目录
sqoop_export_data_dir=/apps-data/hduser0101/an_pafc_safe/export/${sqoop_export_data_filename};
hadoop dfs -rmr ${sqoop_export_data_dir};
#创建用于导出到pg的临时数据
hive -v -e "set mapred.job.queue.name=${queueName};
set mapred.job.name=${job_name}_1;
use an_pafc_safe;
insert overwrite directory '${sqoop_export_data_dir}'
select
'${IncStartAllFormat}' as statis_date,
um_num,
um_name,
group_id,
group_name,
department_id,
department_name,
center_id,
center_name
from sx_360_safe.n_par_life_hrm_org_excelude_camp ;";
exitCodeCheck $?
#先删除目的数据库的数据2天前数
sqoop eval -Dmapred.job.queue.name=${queueName} \
--connect ${pg_connection} \
--username ${pg_username} \
--password ${pg_password} \
--verbose \
--query "delete from ${export_table_name} A where a.statis_date=date('${twoDayAgoAllFormat}')";
exitCodeCheck $?
#先删除目的数据库的数据,支持二次运行
sqoop eval -Dmapred.job.queue.name=${queueName} \
--connect ${pg_connection} \
--username ${pg_username} \
--password ${pg_password} \
--verbose \
--query "delete from ${export_table_name} A where a.statis_date=date('${IncStartAllFormat}')";
exitCodeCheck $?
#再导出数据
sqoop export -D mapred.job.name=${job_name}_2 -D sqoop.export.statements.per.transaction=4500 -D mapreduce.map.tasks=1 -D mapred.map.max.attempts=1 -D mapred.reduce.max.attempts=1 -D mapreduce.map.maxattempts=1 -D mapreduce.reduce.maxattempts=1 -D mapred.job.queue.name=${queueName} \
--connect ${pg_connection} \
--username ${pg_username} \
--password ${pg_password} \
--export-dir ${sqoop_export_data_dir} \
--verbose \
--num-mappers 1 \
--table ${export_table_name} \
--columns ${export_table_columns} \
--input-fields-terminated-by '\001' \
--input-lines-terminated-by '\n' \
--input-null-string '\\N' \
--input-null-non-string '\\N';
exitCodeCheck $?
执行命令:
导pg的链接串
./sqoop_export_of_drawer.sh -inc_start=20171025 -inc_end=20171026 -jdbc_str=jdbc:postgresql://10.20.130.25:7434/crsp -db_user=crspsqp -db_psw=paic1234 -db_sid=LUBJ0 -hdp_queue=queue_0101_01 -hdfs_host=hdfs://hdp-hdfs01