# Sqoop 安装指南
下载 ,解压,编译
wget http://mirrors.cnnic.cn/apache/sqoop/1.4.6/sqoop-1.4.6.tar.gz
tar -xzvf sqoop-1.4.6.tar.gz
ant package
配置
修改conf/sqoop-env-templates.sh
#Set path to where bin/hadoop is available
export HADOOP_COMMON_HOME=/data/bigdata/app/hadoop
#Set path to where hadoop-*-core.jar is available
export HADOOP_MAPRED_HOME=$HADOOP_HOME
#set the path to where bin/hbase is available
#export HBASE_HOME=$HBASE_HOME
#Set the path to where bin/hive is available
export HIVE_HOME=$HIVE_HOME
#Set the path for where zookeper config dir is
export ZOOCFGDIR=$ZOOKEEPER_HOME
前提是需要在~/.bash_profile 提前设置好$HADOOP_HOME这些环境变量
使用
方式1
- step1: 把数据放到hdfs
bin/sqoop import --connect jdbc:mysql://url/db --username root --password root --table tablename --target-dir /tmp/tablename --delete-target-dir --fields-terminated-by '\t' --lines-terminated-by '\n'
- hive 创建表
CREATE TABLE `tablename` (
`id` int ,
`is_valid` int ,
`name` int ,
`level` int)
partitioned by(dt string)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
LINES TERMINATED BY '\n'
STORED AS TEXTFILE;
- load data inpath语句导入表
load data inpath '/tmp/tablename' into table db.tablename partition(dt='2016-07-07');;
方式2
直接一步完成导入:
bin/sqoop import --connect jdbc:mysql://url/db --username root --password root --table tablename --target-dir /tmp/tablename --delete-target-dir --fields-terminated-by '\t' --lines-terminated-by '\n' --hive-import --hive-table db.tablename
这种方式的缺点是,如果你的hive server 使用了remote的db座位metastore,暂时只能到处数据到default 数据库下,因为sqoop1.4.6 暂时只支持hsqldb
常见问题
- Error: Could not find or load main class org.apache.sqoop.Sqoop
- 没有执行ant 编译sqoop源代码;解决方案是执行ant或者从网上下载sqoop-1.4.6.jar 放到$HADOOP_HOME/lib目录下