hive笔记.txt

hive   元数据保存在mysql中   对应的表:

TBLS :hive 表的详情    TBL_ID(2)   SD_ID(2)

COLUMNS_V2 :hive 表的字段信息   CD_ID(2)

SDS:hive表数据在hdfs  存放的位置  SD_ID(2)

---------------------------------------------------------------------------------------------------

hive数据导入mysql表中:

./sqoop export --connect jdbc:mysql://192.168.8.114:3306/djk --username root --password 123456  --export-dir '/user/hive/warehouse/sqoop_result' --table sqoop_result -m 2 --input-fields-terminated-by '\t';

-------------------------------------------------------------------------------------------------------------------

1.上传hive安装包



2.解压
   =============
   ./hive
   
   =====================
3.配置
3.1安装mysql 
查询以前安装的mysql相关包
rpm -qa | grep mysql
暴力删除这个包
rpm -e mysql-libs-5.1.66-2.el6_3.i686 --nodeps

rpm -ivh MySQL-server-5.1.73-1.glibc23.i386.rpm 
rpm -ivh MySQL-client-5.1.73-1.glibc23.i386.rpm

执行命令设置mysql
/usr/bin/mysql_secure_installation

将hive添加到环境变量当中

GRANT ALL PRIVILEGES ON hive.* TO 'root'@'%' IDENTIFIED BY '123' WITH GRANT OPTION;
FLUSH PRIVILEGES
在hive当中创建两张表
create table trade_detail (id bigint, account string, income double, expenses double, time string) row format delimited fields terminated by '\t';
create table user_info (id bigint, account string, name  string, age int) row format delimited fields terminated by '\t';

将mysq当中的数据直接导入到hive当中
=============================================
hadoop fs -rmr /user/hive
[root@hadoop03 conf]# mv hive-default.xml.template hive-site.xml




==================================================
sqoop import --connect jdbc:mysql://192.168.1.10:3306/itcast --username root --password 123 --table trade_detail --hive-import --hive-overwrite --hive-table trade_detail --fields-terminated-by '\t'
sqoop import --connect jdbc:mysql://192.168.1.10:3306/itcast --username root --password 123 --table user_info --hive-import --hive-overwrite --hive-table user_info --fields-terminated-by '\t'


创建一个result表保存前一个sql执行的结果
create table result row format delimited fields terminated by '\t' as select t2.account, t2.name, t1.income, t1.expenses, t1.surplus from user_info t2 join (select account, sum(income) as income, sum(expenses) as expenses, sum(income-expenses) as surplus from trade_detail group by account) t1 on (t1.account = t2.account);

create table user (id int, name string) row format delimited fields terminated by '\t'
将本地文件系统上的数据导入到HIVE当中
load data local inpath '/root/user.txt' into table user;
=========================================================
load data local inpath '/root/student.txt' into table student;
create table teacher3 (id bigint,name string) row format delimited fields terminated by '\t' ;
load data local inpath '/root/student.txt' into table teacher3;
=====================================================
创建外部表
create external table stubak (id int, name string) row format delimited fields terminated by '\t' location '/stubak';

创建分区表
普通表和分区表区别:有大量数据增加的需要建分区表
create table book (id bigint, name string) partitioned by (pubdate string) row format delimited fields terminated by '\t'; 


分区表加载数据

load data local inpath './book.txt' overwrite into table book partition (pubdate='2010-08-22');


//练习:

create table trade_detail(id bigint, account string, income double, expenses double, time string) row format delimited fields terminated by '\t';
create table user_info (id bigint, account string, name string, age int) row format delimited fields terminated by '\t'; 
==========================================================
create table student (id bigint, name string) row format delimited fields terminated by '\t'; 
load data local inpath '/root/student'  into table student;
==============================================
load data local inpath '/home/hadoop/data/trade_detail' overwrite into table trade_detail;
load data local inpath '/home/hadoop/data/user_info' overwrite into table user_info;    


create table result row format delimited fields terminated by '\t' as select t1.account, t1.income, t1.expenses, t1.surplus, t2.name from user_info t2 join (select account, sum(income) as income, sum(expenses) as expenses, sum(income-expenses) as surplus from trade_detail group by account) t1 on(t1.account = t2.account);


创建外部表
create external table t_detail(id bigint, account string, income double, expenses double, time string) partitioned by (logdate string) row format delimited fields terminated by '\t' location '/hive/td_partition';
============================================================
创建外部表(先有数据   后创建表)创建表目录指向数据就ok!
在hive里运行hdfs:
dfs -mkdir data
dfs put /root/student /data/a.txt
hadoop fs -mkdir  /data;
hadoop fs -put /root/student /data/a.txt


create external table ext_student(id bigint,name string)  row format delimited fields terminated by '\t' location '/data';


//分区表:
create external table ext_people(id bigint,name string) partitioned by (nation string) row format delimited fields terminated by '\t' location '/people' ;


hadoop fs -put /root/student.txt /people
load data local inpath '/root/student.txt' into table ext_people partition (nation='China');


dfs -mkdir  /people/nation=japan;
dfs fs -put /root/student.txt /people/nation=japan;
//手动修改hive 表数据 添加分区信息(hive 命令行里修改)
alter table ext_people add partition (nation='japan') location "/people/nation=japan";
select * from ext_people where nation='japan'


=======================================================
create temporary function AreaUDF as 'cn.itcast.hive.udf.AreaUDF';




load data inpath '/apache_cleaned/2013-05-31/part-r-00000'  into table hmbbs partition (logdate='2013-05-31');


pv


create table pv_2013_05_31 row format delimited fields terminated by '\t' as select count(*) from hmbbs where logdate='2013-05-31';


select count(distinct ip) from hmbbs where logdate='2013-05-31';


sqoop export --connect jdbc:mysql://192.168.8.103:3306/hmbbs --username root --password hadoop --export-dir '/user/hive/warehouse/pv_2013_05_31/000000_0' --table pv


 SELECT COUNT(1) FROM (SELECT ip, COUNT(1) FROM hmbbs WHERE logdate='2013-05-31' GROUP BY ip HAVING COUNT(1)>50) t;
 
select t.ip, t.c from (SELECT ip, COUNT(1) as c FROM hmbbs WHERE logdate='2013-05-31' GROUP BY ip HAVING c>50) t order by t.c desc limit 10




SELECT COUNT(1) FROM hmbbs WHERE logdate='140421' AND instr(url, 'member.php?mod=register')>0;


===========================================
1 mysql 数据导入hive表中(用sqoop)
1 创建hive表
crate table trade_detail(id bignit,account string,income double,expenses double,times string) row format delimited fields terminated by '\t';


crate table user_info (id bignit,account string,name string,age int) row format delimited fields terminated by '\t';


2 mysql 数据导入hive表中(用sqoop)
 将mysq当中的数据直接导入到hive当中
  sqoop import --connect jdbc:mysql://192.168.1.10:3306/itcast --username root --password 123 --table trade_detail --hive-import --hive-overwrite --hive-table trade_detail --fields-terminated-by '\t'
3 hive 添加环境变量中
/itcast/apache-hive-0.13.0-bin/bin
[root@hadoop03 bin]# vim /etc/profile
[root@hadoop03 bin]# source /etc/profile
[root@hadoop03 bin]# which hive

2 hive  查询
创建一个result表保存前一个sql执行的结果
create table result row format delimited fields terminated by '\t' as select t2.account, t2.name, t1.income, t1.expenses, t1.surplus from user_info t2 join (select account, sum(income) as income, sum(expenses) as expenses, sum(income-expenses) as surplus from trade_detail group by account) t1 on (t1.account = t2.account);


==============================================================================================================================================


create table result row format delimited fields terminated by '\t' as select t1.account, t1.name, t2.income, t2.expenses, t2.surplus from user_info t1 join (select account, sum(income) as income, sum(expenses) as expenses, sum(income-expenses) as surplus from trade_detail group by account) t2 on (t1.account = t2.account);




load data inpath '/out15/p*' into table hmbbs partition (logdate='2013-05-31');


select '2013-05-31', count (distinct ip) from hmbbs where logdate='2013-05-31';




SELECT COUNT(1) FROM hmbbs WHERE logdate='140314' AND instr(url, 'member.php?mod=register')>0;






select '140421', ip, count(ip) as cnum from hmbbs where logdate='140421' group by ip having cnum > 50 order by cnum desc limit 20;





评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值