hive 元数据保存在mysql中 对应的表:
TBLS :hive 表的详情 TBL_ID(2) SD_ID(2)
COLUMNS_V2 :hive 表的字段信息 CD_ID(2)
SDS:hive表数据在hdfs 存放的位置 SD_ID(2)
---------------------------------------------------------------------------------------------------
hive数据导入mysql表中:
./sqoop export --connect jdbc:mysql://192.168.8.114:3306/djk --username root --password 123456 --export-dir '/user/hive/warehouse/sqoop_result' --table sqoop_result -m 2 --input-fields-terminated-by '\t';
-------------------------------------------------------------------------------------------------------------------
1.上传hive安装包
2.解压
=============
./hive
=====================
3.配置
3.1安装mysql
查询以前安装的mysql相关包
rpm -qa | grep mysql
暴力删除这个包
rpm -e mysql-libs-5.1.66-2.el6_3.i686 --nodeps
rpm -ivh MySQL-server-5.1.73-1.glibc23.i386.rpm
rpm -ivh MySQL-client-5.1.73-1.glibc23.i386.rpm
执行命令设置mysql
/usr/bin/mysql_secure_installation
将hive添加到环境变量当中
GRANT ALL PRIVILEGES ON hive.* TO 'root'@'%' IDENTIFIED BY '123' WITH GRANT OPTION;
FLUSH PRIVILEGES
在hive当中创建两张表
create table trade_detail (id bigint, account string, income double, expenses double, time string) row format delimited fields terminated by '\t';
create table user_info (id bigint, account string, name string, age int) row format delimited fields terminated by '\t';
将mysq当中的数据直接导入到hive当中
=============================================
hadoop fs -rmr /user/hive
[root@hadoop03 conf]# mv hive-default.xml.template hive-site.xml
==================================================
sqoop import --connect jdbc:mysql://192.168.1.10:3306/itcast --username root --password 123 --table trade_detail --hive-import --hive-overwrite --hive-table trade_detail --fields-terminated-by '\t'
sqoop import --connect jdbc:mysql://192.168.1.10:3306/itcast --username root --password 123 --table user_info --hive-import --hive-overwrite --hive-table user_info --fields-terminated-by '\t'
创建一个result表保存前一个sql执行的结果
create table result row format delimited fields terminated by '\t' as select t2.account, t2.name, t1.income, t1.expenses, t1.surplus from user_info t2 join (select account, sum(income) as income, sum(expenses) as expenses, sum(income-expenses) as surplus from trade_detail group by account) t1 on (t1.account = t2.account);
create table user (id int, name string) row format delimited fields terminated by '\t'
将本地文件系统上的数据导入到HIVE当中
load data local inpath '/root/user.txt' into table user;
=========================================================
load data local inpath '/root/student.txt' into table student;
create table teacher3 (id bigint,name string) row format delimited fields terminated by '\t' ;
load data local inpath '/root/student.txt' into table teacher3;
=====================================================
创建外部表
create external table stubak (id int, name string) row format delimited fields terminated by '\t' location '/stubak';
创建分区表
普通表和分区表区别:有大量数据增加的需要建分区表
create table book (id bigint, name string) partitioned by (pubdate string) row format delimited fields terminated by '\t';
分区表加载数据
load data local inpath './book.txt' overwrite into table book partition (pubdate='2010-08-22');
//练习:
create table trade_detail(id bigint, account string, income double, expenses double, time string) row format delimited fields terminated by '\t';
create table user_info (id bigint, account string, name string, age int) row format delimited fields terminated by '\t';
==========================================================
create table student (id bigint, name string) row format delimited fields terminated by '\t';
load data local inpath '/root/student' into table student;
==============================================
load data local inpath '/home/hadoop/data/trade_detail' overwrite into table trade_detail;
load data local inpath '/home/hadoop/data/user_info' overwrite into table user_info;
create table result row format delimited fields terminated by '\t' as select t1.account, t1.income, t1.expenses, t1.surplus, t2.name from user_info t2 join (select account, sum(income) as income, sum(expenses) as expenses, sum(income-expenses) as surplus from
trade_detail group by account) t1 on(t1.account = t2.account);
创建外部表
create external table t_detail(id bigint, account string, income double, expenses double, time string) partitioned by (logdate string) row format delimited fields terminated by '\t' location '/hive/td_partition';
============================================================
创建外部表(先有数据 后创建表)创建表目录指向数据就ok!
在hive里运行hdfs:
dfs -mkdir data
dfs put /root/student /data/a.txt
hadoop fs -mkdir /data;
hadoop fs -put /root/student /data/a.txt
create external table ext_student(id bigint,name string) row format delimited fields terminated by '\t' location '/data';
//分区表:
create external table ext_people(id bigint,name string) partitioned by (nation string) row format delimited fields terminated by '\t' location '/people' ;
hadoop fs -put /root/student.txt /people
load data local inpath '/root/student.txt' into table ext_people partition (nation='China');
dfs -mkdir /people/nation=japan;
dfs fs -put /root/student.txt /people/nation=japan;
//手动修改hive 表数据 添加分区信息(hive 命令行里修改)
alter table ext_people add partition (nation='japan') location "/people/nation=japan";
select * from ext_people where nation='japan'
=======================================================
create temporary function AreaUDF as 'cn.itcast.hive.udf.AreaUDF';
load data inpath '/apache_cleaned/2013-05-31/part-r-00000' into table hmbbs partition (logdate='2013-05-31');
pv
create table pv_2013_05_31 row format delimited fields terminated by '\t' as select count(*) from hmbbs where logdate='2013-05-31';
select count(distinct ip) from hmbbs where logdate='2013-05-31';
sqoop export --connect jdbc:mysql://192.168.8.103:3306/hmbbs --username root --password hadoop --export-dir '/user/hive/warehouse/pv_2013_05_31/000000_0' --table pv
SELECT COUNT(1) FROM (SELECT ip, COUNT(1) FROM hmbbs WHERE logdate='2013-05-31' GROUP BY ip HAVING COUNT(1)>50) t;
select t.ip, t.c from (SELECT ip, COUNT(1) as c FROM hmbbs WHERE logdate='2013-05-31' GROUP BY ip HAVING c>50) t order by t.c desc limit 10
SELECT COUNT(1) FROM hmbbs WHERE logdate='140421' AND instr(url, 'member.php?mod=register')>0;
===========================================
1 mysql 数据导入hive表中(用sqoop)
1 创建hive表
crate table trade_detail(id bignit,account string,income double,expenses double,times string) row format delimited fields terminated by '\t';
crate table user_info (id bignit,account string,name string,age int) row format delimited fields terminated by '\t';
2 mysql 数据导入hive表中(用sqoop)
将mysq当中的数据直接导入到hive当中
sqoop import --connect jdbc:mysql://192.168.1.10:3306/itcast --username root --password 123 --table trade_detail --hive-import --hive-overwrite --hive-table trade_detail --fields-terminated-by
'\t'
3 hive 添加环境变量中
/itcast/apache-hive-0.13.0-bin/bin
[root@hadoop03 bin]# vim /etc/profile
[root@hadoop03 bin]# source /etc/profile
[root@hadoop03 bin]# which hive
2 hive 查询
创建一个result表保存前一个sql执行的结果
create table result row format delimited fields terminated by '\t' as select t2.account, t2.name, t1.income, t1.expenses, t1.surplus from user_info t2 join (select account, sum(income) as income, sum(expenses) as expenses,
sum(income-expenses) as surplus from trade_detail group by account) t1 on (t1.account = t2.account);
==============================================================================================================================================
create table result row format delimited fields terminated by '\t' as select t1.account, t1.name, t2.income, t2.expenses, t2.surplus from user_info t1 join (select account, sum(income) as income, sum(expenses) as expenses, sum(income-expenses) as surplus from
trade_detail group by account) t2 on (t1.account = t2.account);
load data inpath '/out15/p*' into table hmbbs partition (logdate='2013-05-31');
select '2013-05-31', count (distinct ip) from hmbbs where logdate='2013-05-31';
SELECT COUNT(1) FROM hmbbs WHERE logdate='140314' AND instr(url, 'member.php?mod=register')>0;
select '140421', ip, count(ip) as cnum from hmbbs where logdate='140421' group by ip having cnum > 50 order by cnum desc limit 20;