hive 安装
上传hive及MySQL连接jar包至/usr/local/soft/ 并解压
tar -zxvf apache-hive-1.2.1-bin.tar.gz mv apache-hive-1.2.1-bin hive-1.2.1
进入conf目录配置文件
cp hive-env.sh.template hive-env.sh cp hive-default.xml.template hive-site.xml
配置hive-env.sh
添加:根据实际情况填写
HADOOP_HOME=/usr/local/soft/hadoop-2.7.6 JAVA_HOME=/usr/local/soft/jdk1.8.0_171 HIVE_HOME=/usr/local/soft/hive-1.2.1
配置 hive-site.xml
修改内容:
/javax.jdo.option.ConnectionURL:直接定位到需要修改的位置
<property> <name>javax.jdo.option.ConnectionURL</name> <value>jdbc:mysql://master:3306/hive?characterEncoding=UTF-8&createDatabaseIfNotExist=true&useSSL=false</value> </property> <property> <name>javax.jdo.option.ConnectionDriverName</name> <value>com.mysql.jdbc.Driver</value> </property> <property> <name>javax.jdo.option.ConnectionUserName</name> <value>root</value> </property> <property> <name>javax.jdo.option.ConnectionPassword</name> <value>123456</value> </property> <property> <name>hive.scratch.dir.permission</name> <value>777</value> <description>The permission for the user specific scratch directories that get created.</description> </property>
添加内容,在配置文件最前面<configuration>内添加:
<property> <name>system:java.io.tmpdir</name> <value>/tmp/hive/java</value> </property> <property> <name>system:user.name</name> <value>${user.name}</value> </property>
配置MySQL
cd /usr/local/soft/ cp mysql-connector-java-5.1.49.jar hive-1.2.1/lib/
修改MySQL参数
进入:vim /etc/my.cnf
character-set-server=utf8 [client]
default-character-set=utf8
重启MySQL
systemctl restart mysqld
进入MySQL
mysql -uroot -p123456
创建hive库
create database hive;
配置完成,退出mysal
quit;
修改jar包
1.删除:rm -rf /usr/local/soft/hadoop-2.7.6/share/hadoop/yarn/lib/jline-2.12.jar 2.复制:cp /usr/local/soft/hive-1.2.1/lib/jline-2.12.jar /usr/local/soft/hadoop-2.7.6/share/hadoop/yarn/lib/
添加环境变量
vim /etc/profile # HIVE_HOME export HIVE_HOME=/usr/local/soft/hive-1.2.1 export PATH=$PATH:$HIV #环境变量生效 source /etc/profile
启动HIVE
hive
HIVE创建数据库
创建数据库
create database learn1;
创建表
create table test(id string,name string);
插入数据
INSERT INTO TABLE learn1.student_partition2 partition(clazz='理科六班',gender='女') Values ("1500100003",'单乐蕊',22);
HIVE 存储格式
TEXTFILE
hive> CREATE TABLE learn1.student1( id string, name string, age int, gender string, clazz string )ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' ; #上传文件 cd /usr/local/soft/data [root@master data]# hdfs dfs -put ./students.txt /user/hive/warehouse/learn1.db/student1/
特点:
①在HDFS中的数据可以直接被查看
②数据没有经过任何压缩处理,数据比较大
③文本中各字段间存在分隔符
RCFile
CREATE TABLE learn1.student2( id string, name string, age int, gender string, clazz string ) STORED AS RCFile ; INSERT INTO TABLE learn1.student2 SELECT * FROM learn1.student1
ORCFile
CREATE TABLE learn1.student3( id string, name string, age int, gender string, clazz string ) STORED AS ORCFile ; INSERT INTO TABLE learn1.student3 SELECT * FROM learn1.student1
和RCFile相比,压缩率很高,并且写入速度相差不大
生产过程中经常会使用ORCFile来存储数据
Parquet
CREATE TABLE learn1.student4( id string, name string, age int, gender string, clazz string ) STORED AS Parquet ; INSERT INTO TABLE learn1.student4 SELECT * FROM learn1.student1
压缩后大小为3.01 MB 介于RC与ORC之间
JDBC连接
# 启动命令(后台启动) [root@master soft]# (1)hive --service hiveserver2 (这种方式会使得hiveserver一直占用命令行,需要用Ctrl+C强制中断命令) (2)hive --service hiveserver2 & (这个命令会使得hiveserver以后台进程的方式运行(敲个回车就可以回到linux命令行),通过kill -9 进程号实现关闭,例如hiveserver2的进程号为30314,则关闭命令为 kill -9 30314) (3)nohup hive --service hiveserver2 & (发生错误时,可以去nohup.out查看日志) # 连接命令 beeline -u jdbc:hive2://master:10000 -n root # 查看端口情况 netstat -nplt | grep 10000 # 查看进程,出现runjar jps # beeline 退出命令 !quit !exit