hive笔记.txt

最新推荐文章于 2024-10-14 17:13:11 发布

元永真

最新推荐文章于 2024-10-14 17:13:11 发布

阅读量226

点赞数

hadoop学习总结专栏收录该内容

32 篇文章

订阅专栏

hive 元数据保存在mysql中对应的表：

TBLS ：hive 表的详情 TBL_ID(2) SD_ID(2)

COLUMNS_V2 :hive 表的字段信息 CD_ID(2)

SDS：hive表数据在hdfs 存放的位置 SD_ID(2)

---------------------------------------------------------------------------------------------------

hive数据导入mysql表中：

./sqoop export --connect jdbc:mysql://192.168.8.114:3306/djk --username root --password 123456 --export-dir '/user/hive/warehouse/sqoop_result' --table sqoop_result -m 2 --input-fields-terminated-by '\t';

-------------------------------------------------------------------------------------------------------------------

1.上传hive安装包

2.解压
=============
./hive

=====================
3.配置
3.1安装mysql
查询以前安装的mysql相关包
rpm -qa | grep mysql
暴力删除这个包
rpm -e mysql-libs-5.1.66-2.el6_3.i686 --nodeps

rpm -ivh MySQL-server-5.1.73-1.glibc23.i386.rpm
rpm -ivh MySQL-client-5.1.73-1.glibc23.i386.rpm

执行命令设置mysql
/usr/bin/mysql_secure_installation

将hive添加到环境变量当中

GRANT ALL PRIVILEGES ON hive.* TO 'root'@'%' IDENTIFIED BY '123' WITH GRANT OPTION;
FLUSH PRIVILEGES
在hive当中创建两张表
create table trade_detail (id bigint, account string, income double, expenses double, time string) row format delimited fields terminated by '\t';
create table user_info (id bigint, account string, name string, age int) row format delimited fields terminated by '\t';

将mysq当中的数据直接导入到hive当中
=============================================
hadoop fs -rmr /user/hive
[root@hadoop03 conf]# mv hive-default.xml.template hive-site.xml

==================================================
sqoop import --connect jdbc:mysql://192.168.1.10:3306/itcast --username root --password 123 --table trade_detail --hive-import --hive-overwrite --hive-table trade_detail --fields-terminated-by '\t'
sqoop import --connect jdbc:mysql://192.168.1.10:3306/itcast --username root --password 123 --table user_info --hive-import --hive-overwrite --hive-table user_info --fields-terminated-by '\t'

创建一个result表保存前一个sql执行的结果
create table result row format delimited fields terminated by '\t' as select t2.account, t2.name, t1.income, t1.expenses, t1.surplus from user_info t2 join (select account, sum(income) as income, sum(expenses) as expenses, sum(income-expenses) as surplus from trade_detail group by account) t1 on (t1.account = t2.account);

create table user (id int, name string) row format delimited fields terminated by '\t'
将本地文件系统上的数据导入到HIVE当中
load data local inpath '/root/user.txt' into table user;
=========================================================
load data local inpath '/root/student.txt' into table student;
create table teacher3 (id bigint,name string) row format delimited fields terminated by '\t' ;
load data local inpath '/root/student.txt' into table teacher3;
=====================================================
创建外部表
create external table stubak (id int, name string) row format delimited fields terminated by '\t' location '/stubak';

创建分区表
普通表和分区表区别：有大量数据增加的需要建分区表
create table book (id bigint, name string) partitioned by (pubdate string) row format delimited fields terminated by '\t';

分区表加载数据

load data local inpath './book.txt' overwrite into table book partition (pubdate='2010-08-22');

//练习：

create table trade_detail(id bigint, account string, income double, expenses double, time string) row format delimited fields terminated by '\t';
create table user_info (id bigint, account string, name string, age int) row format delimited fields terminated by '\t';
==========================================================
create table student (id bigint, name string) row format delimited fields terminated by '\t';
load data local inpath '/root/student' into table student;
==============================================
load data local inpath '/home/hadoop/data/trade_detail' overwrite into table trade_detail;
load data local inpath '/home/hadoop/data/user_info' overwrite into table user_info;

create table result row format delimited fields terminated by '\t' as select t1.account, t1.income, t1.expenses, t1.surplus, t2.name from user_info t2 join (select account, sum(income) as income, sum(expenses) as expenses, sum(income-expenses) as surplus from trade_detail group by account) t1 on(t1.account = t2.account);

创建外部表
create external table t_detail(id bigint, account string, income double, expenses double, time string) partitioned by (logdate string) row format delimited fields terminated by '\t' location '/hive/td_partition';
============================================================
创建外部表(先有数据后创建表)创建表目录指向数据就ok！
在hive里运行hdfs：
dfs -mkdir data
dfs put /root/student /data/a.txt
hadoop fs -mkdir /data;
hadoop fs -put /root/student /data/a.txt

create external table ext_student(id bigint,name string) row format delimited fields terminated by '\t' location '/data';

//分区表：
create external table ext_people(id bigint,name string) partitioned by (nation string) row format delimited fields terminated by '\t' location '/people' ;

hadoop fs -put /root/student.txt /people
load data local inpath '/root/student.txt' into table ext_people partition (nation='China');

dfs -mkdir /people/nation=japan;
dfs fs -put /root/student.txt /people/nation=japan;
//手动修改hive 表数据添加分区信息(hive 命令行里修改)
alter table ext_people add partition (nation='japan') location "/people/nation=japan";
select * from ext_people where nation='japan'

=======================================================
create temporary function AreaUDF as 'cn.itcast.hive.udf.AreaUDF';

load data inpath '/apache_cleaned/2013-05-31/part-r-00000' into table hmbbs partition (logdate='2013-05-31');

pv

create table pv_2013_05_31 row format delimited fields terminated by '\t' as select count(*) from hmbbs where logdate='2013-05-31';

select count(distinct ip) from hmbbs where logdate='2013-05-31';

sqoop export --connect jdbc:mysql://192.168.8.103:3306/hmbbs --username root --password hadoop --export-dir '/user/hive/warehouse/pv_2013_05_31/000000_0' --table pv

SELECT COUNT(1) FROM (SELECT ip, COUNT(1) FROM hmbbs WHERE logdate='2013-05-31' GROUP BY ip HAVING COUNT(1)>50) t;

select t.ip, t.c from (SELECT ip, COUNT(1) as c FROM hmbbs WHERE logdate='2013-05-31' GROUP BY ip HAVING c>50) t order by t.c desc limit 10

SELECT COUNT(1) FROM hmbbs WHERE logdate='140421' AND instr(url, 'member.php?mod=register')>0;

===========================================
1 mysql 数据导入hive表中（用sqoop）
1 创建hive表
crate table trade_detail(id bignit,account string,income double,expenses double,times string) row format delimited fields terminated by '\t';

crate table user_info (id bignit,account string,name string,age int) row format delimited fields terminated by '\t';

2 mysql 数据导入hive表中（用sqoop）
将mysq当中的数据直接导入到hive当中
sqoop import --connect jdbc:mysql://192.168.1.10:3306/itcast --username root --password 123 --table trade_detail --hive-import --hive-overwrite --hive-table trade_detail --fields-terminated-by '\t'
3 hive 添加环境变量中
/itcast/apache-hive-0.13.0-bin/bin
[root@hadoop03 bin]# vim /etc/profile
[root@hadoop03 bin]# source /etc/profile
[root@hadoop03 bin]# which hive

2 hive 查询
创建一个result表保存前一个sql执行的结果
create table result row format delimited fields terminated by '\t' as select t2.account, t2.name, t1.income, t1.expenses, t1.surplus from user_info t2 join (select account, sum(income) as income, sum(expenses) as expenses, sum(income-expenses) as surplus from trade_detail group by account) t1 on (t1.account = t2.account);

==============================================================================================================================================

create table result row format delimited fields terminated by '\t' as select t1.account, t1.name, t2.income, t2.expenses, t2.surplus from user_info t1 join (select account, sum(income) as income, sum(expenses) as expenses, sum(income-expenses) as surplus from trade_detail group by account) t2 on (t1.account = t2.account);

load data inpath '/out15/p*' into table hmbbs partition (logdate='2013-05-31');

select '2013-05-31', count (distinct ip) from hmbbs where logdate='2013-05-31';

SELECT COUNT(1) FROM hmbbs WHERE logdate='140314' AND instr(url, 'member.php?mod=register')>0;

select '140421', ip, count(ip) as cnum from hmbbs where logdate='140421' group by ip having cnum > 50 order by cnum desc limit 20;