一、 规划:
二、配置mysql,具体的安装过程也可参照: sqoop-1.4.4导入导出mysql数据到hadoop2.2.0 HDSF集群
三、hive安装
6.建表(默认是内部表),进入hive命令行
集群规划:Hive只在一个节点(hadoop1)上安装即可,hadoop安装参照:hadoop2.2.0集群安装系列
主机名 IP 安装的软件 运行的进程
hadoop1 192.168.31.10 jdk、hadoop NameNode、DFSZKFailoverController、
hadoop2 192.168.31.20 jdk、hadoop、hive、mysql、hbase NameNode、DFSZKFailoverController、HMaster,hive,mysql
hadoop3 192.168.31.30 jdk、hadoop、hbase ResourceManager、HMaster
hadoop4 192.168.31.40 jdk、hadoop、zookeeper、hbase DataNode、NodeManager、JournalNode、QuorumPeerMain、HRegionServer
hadoop5 192.168.31.50 jdk、hadoop、zookeeper、hbase DataNode、NodeManager、JournalNode、QuorumPeerMain、HRegionServer
hadoop6 192.168.31.60 jdk、hadoop、zookeeper、hbase DataNode、NodeManager、JournalNode、QuorumPeerMain、HRegionServer
二、配置mysql,具体的安装过程也可参照: sqoop-1.4.4导入导出mysql数据到hadoop2.2.0 HDSF集群
# 配置mysql metastore(切换到root用户)
#配置HIVE_HOME环境变量
rpm -qa | grep mysql
rpm -e mysql-libs-5.1.66-2.el6_3.i686 --nodeps
rpm -ivh MySQL-server-5.1.73-1.glibc23.i386.rpm
rpm -ivh MySQL-client-5.1.73-1.glibc23.i386.rpm
#修改mysql的密码
/usr/bin/mysql_secure_installation
(注意:删除匿名用户,允许用户远程连接)
登陆mysql
#如果出现没有权限的问题,在mysql授权(在安装mysql的机器上执行)
mysql -uroot -p
#(执行下面的语句 *.*:所有库下的所有表 %:任何IP地址或主机都可以连接)
GRANT ALL PRIVILEGES ON *.* TO 'root'@'%' IDENTIFIED BY '123' WITH GRANT OPTION;
FLUSH PRIVILEGES;
三、hive安装
1. 下载hive:http://archive.apache.org/dist/hive/
2. 上传hive到linux,安装
tar -zxvf hive-0.9.0.tar.gz -C /usr/cloud/hive
3. 配置环境变量
vim /etc/profile
export JAVA_HOME=/usr/cloud/java/jdk1.6.0_24
export HADOOP_HOME=/usr/cloud/hadoop/hadoop-2.2.0
export HIVE_HOME=/usr/cloud/hive/hive-0.13.0
export PATH=$PATH:$JAVA_HOME/bin:$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$HIVE_HOME/bin
4.配置hive
cp hive-default.xml.template hive-site.xml
#修改hive-site.xml(删除所有内容,只留一个<property></property>)
#添加如下内容:
<property>
<name>javax.jdo.option.ConnectionURL</name>
<value>jdbc:mysql://hadoop2:3306/hive?createDatabaseIfNotExist=true</value>
<description>JDBC connect string for a JDBC metastore</description>
</property>
<property>
<name>javax.jdo.option.ConnectionDriverName</name>
<value>com.mysql.jdbc.Driver</value>
<description>Driver class name for a JDBC metastore</description>
</property>
<property>
<name>javax.jdo.option.ConnectionUserName</name>
<value>root</value>
<description>username to use against metastore database</description>
</property>
<property>
<name>javax.jdo.option.ConnectionPassword</name>
<value>hadoop</value>
<description>password to use against metastore database</description>
</property>
6.建表(默认是内部表),进入hive命令行
$HIVE_HOME/hive
create table count_detail(id bigint, account string, income double, expenses double, time string) row format delimited fields terminated by '\t';
#建分区表
create table tabled_part(id bigint, account string, income double, expenses double, time string) partitioned by (logdate string) row format delimited fields terminated by '\t';
#建外部表
create external table tabled_ext(id bigint, account string, income double, expenses double, time string) row format delimited fields terminated by '\t' location '/td_ext';
#检查表是否创建成功:
show talbes
在hadoop hdfs上检查下表对应文件是否存在:
查看mysql中表格元数据信息:
7.创建分区
#普通表和分区表区别:有大量数据增加的需要建分区表
create table person(id bigint, name string) partitioned by (pubdate string) row format delimited fields terminated by '\t';
#分区表加载数据
load data local inpath './person.txt' overwrite into table person partition (pubdate='2010-08-22');