方式一(推荐):
sh /data/dfp/etl/etl.sh jdbc:postgresql://192.168.0.36:5432/postgres test 'test' /user/hive/warehouse/s66_postgres.db/test_miaodazhuang1/dt=20210107 "SELECT CAST(EXTRACT(EPOCH FROM COALESCE(update_time,create_time)::TIMESTAMP WITH TIME ZONE)* 1000 AS int8)||'|9' AS event_timestamp,CAST('INSERT' AS VARCHAR) AS event_type,tt.* FROM test_miaodazhuang1 AS tt WHERE ((create_time >= '2021-01-07 00:00:00' and create_time<= '2021-01-08') OR (update_time >= '2021-01-07 00:00:00' and update_time<= '2021-01-08'))" create_time test_miaodazhuang1 root.etl
etl.sh 详细代码如下:
#!/bin/bash
export LNAG=zh_CN.UTF-8
# 补数逻辑,pg => hdfs
# -- --schema <name>
connect=$1
username=$2
password=$3
hdfs_path=$4
query_condition=$5
alias_time=$6
tablename=$7
queue=$8
hdfs_path_tmp=${hdfs_path}_tmp
query_condition="${query_condition}"
echo "query_condition==> ${query_condition}"
sqoop_sql="sqoop import -D mapred.job.queue.name=${queue}\
--connect ${connect}?connectTimeout=300 \
--username ${username} \
--password ${password} \
--target-dir ${hdfs_path_tmp} \
--fields-terminated-by '\t' \
--hive-delims-replacement ' ' \
--null-string '\\\\N' \
--null-non-string '\\\\N' \
--query \"${query_condition} AND \\\$CONDITIONS\" \
--split-by ${alias_time} \
"
echo "执行的sqoop脚本==> ${sqoop_sql}"
eval $sqoop_sql
if [ $? -eq 0 ] ;then
echo "`date "+%Y-%m-%d %H:%M:%S"` ==> 提交抽数任务到 hdfs ${hdfs_path_tmp} 成功"
echo "==>准备删除_SUCCESS文件"
rm_sucess_file=" hdfs dfs -rm ${hdfs_path_tmp}/_SUCCESS "
echo "rm_sucess_file == ${rm_sucess_file}"
eval $rm_sucess_file
if [ $? -eq 0 ] ;then
echo "删除${rm_sucess_file}文件成功"
else
echo "删除${rm_sucess_file}文件失败"
fi
echo "==>判断${hdfs_path}路径是否存在"
hdfs dfs -test -e ${hdfs_path}
if [ $? -eq 0 ] ;then
echo "${hdfs_path} exist"
else
echo "${hdfs_path} is not exist! create it"
hdfs dfs -mkdir -p ${hdfs_path}
fi
echo "==>数据从临时路径${hdfs_path_tmp} 移动到${hdfs_path}"
files=`hadoop fs -ls ${hdfs_path_tmp} | awk -F " " '{print $8}'`
for file in ${files}
do
time_stamp=`date +%s`
filename=`echo ${file} | awk -F "part-m-" '{print $2}'`
mv_sql=" hdfs dfs -mv ${file}

最低0.47元/天 解锁文章
454

被折叠的 条评论
为什么被折叠?



