一,全量同步
该方法每次都是覆盖原来数据
mysql_test_db.sh
#/bin/sh
source /home/hadoop/bigdata/mysql_test_db.sh
/home/hadoop/sqoop-1.4.7/bin/sqoop import \
--connect ${jdbc} \
--username ${username} \
--password ${password} \
--target-dir /user/hive/warehouse/ods_test.db/ods_person_external \
--delete-target-dir \
--num-mappers 1 \
--fields-terminated-by “\u0001” \
--table person_count \
--where “id>2”
对应hive建表为
create external table ods_person_external (
area_code string comment '区域编码',
area_name string comment '区域名称',
peo_sum string comment '区域数量'
)
row format delimited fields terminated by '\u0001'
stored as textfile
location '/user/hive/warehouse/ods_test.db/ods_person_external';
二,自由查询导入
-- 该文件位于linux:/home/hadoop/bigdata/test_db
#/bin/sh
source /home/hadoop/bigdata/mysql_test_db.sh
/home/hadoop/sqoop-1.4.7/bin/sqoop import \
--connect ${jdbc} \
--username ${username} \
--password ${password} \
--target-dir '/user/hive/warehouse/ods_test.db/ods_person_count_d' \
--query 'select area_code,area_name,peo_sum from person_count where id>20 and $CONDITIONS' -m 1
三 ,增量导入
ods_js_test_d.sh脚本
#/bin/sh
source /home/hadoop/bigdata/mysql_test_db.sh
/home/hadoop/sqoop-1.4.7/bin/sqoop import \
--connect ${jdbc} \
--username ${username} \
--password ${password} \
--table ods_js_test_d \
--target-dir '/user/hive/warehouse/ods_test.db/ods_js_test_d' \
--columns "id,real_name,sync_time,sum_qty" \
--fields-terminated-by "," \
--check-column "sync_time" \
--incremental "append" \
--last-value ${yesterday}
其他依赖项
mysql_test_db.sh
#/bin/sh
jdbc=jdbc:mysql://192.168.100.1:3306/test_db
username=root
password=12345678
table=mysql_tbles_name
hive-database=ods_testmysql_tbles_name
yesterday=$(date -d"$this_day -1 day" +'%Y-%m-%d')