数据类型:
分基本数据类型和复杂数据类型
复杂:
array:col array<基本类型>,下标从0开始,越界不报错,以NULL来代替
map:col map<string,string>
struct:col struct<property:string[,property:string]>
array示例:
gaoyuanyuan 87,98,96,60
jiajingwen 78,85,94,98,100
create table if not exists arr1(
name string,
score array<string>
)
row format delimited
fields terminated by '\t' --字段之间是用tab分隔
;
加载数据:load data local inpath '/hivedata/arr.txt' into table arr1;

只当做一个元素,一个引号内。一个元素是因为只是简单的做了数据的分隔,但是元数内没有告诉用什么分隔
--顺序是先截字段,再截字段内部的数
create table if not exists arr2(
name string,
score array<string>
)
row format delimited
fields terminated by '\t'
collection items terminated by ',' --元数内用逗号分隔
;
加载数据:load data local inpath '/hivedata/arr.txt' into table arr2;
查询每个学生第5个成绩:
select name,score[4]
from arr2
;
1、查询每一个学生的总成绩
select name,score[0] + score[1] --但是不能确定到底要几个score,所以需要用到下面的展开来做
2、explode展开:(用展开来做)
select explode(score) from arr2;

3、lateral view 虚拟表:(现在要结合这个进行查询)
--sc 表示虚拟表,相当于一个子查询
select name,mscore
from arr2 lateral view explode(score) sc as mscore
;

每行的都有出来了
create table if not exists arr3(
name string,
score array<string>
)
row format delimited
fields terminated by '\t'
collection items terminated by ','
;
将数据写入到array表
create table if not exists arr_tmp
as
select name,mscore
from arr2 lateral view explode(score) sc as mscore
;
collect_set: 去重
collect_list: 不去重
array:
name math english chinese nature
zhangsan 78 89 100 96
上面这个就可以用array了
--写的语句
insert into arr3
select
name,
collect_list(cast(mscore as string))
from arr_tmp
group by name
;

map:(是key-value的键值对)
先创建点数据
marong lazhu:2,pibian:1,shoukao:1,huanghong:2
songjiji lazhu:3,pilian:2,huashengmi:2,huixiangdou:2
---先创建一个表
create table if not exists map1(
name string,
item map<string,int>
)
row format delimited
fields terminated by '\t'
collection items terminated by ','--项与项之间逗号分隔
map keys terminated by ':'--键值对之间用冒号分隔
;
将数据保存到某个文件中,然后通过load加载进来
![]()
检查一下数据是否正确
查找买了手铐的用户的皮鞭的购买量(查找的是皮鞭的购买量,但是过滤条件是买手铐的用户)
select
name,
item['pibian']
from map1
where item['shoukao'] is not null
;
求每个商品的销售数量
explode展开
select explode(item) from map1;
lateral view 虚拟表:--将explode展开的内容作为虚拟表,和原表做join操作
select itemname,sum(num)
from (
select
name
,itemname
,num
from map1 lateral view explode(item) it as itemname,num
) a
group by itemname
;
将数据写入到map字段
str_to_map:
map:
lazhu:2,pibian:1,shoukao:1,huanghong:2
str_to_map:
marong lazhu 2
marong pibian 1
marong shoukao 1
marong huanghong 2
songjiji lazhu 3
songjiji pilian 2
songjiji huashengmi 2
songjiji huixiangdou 2
create table if not exists map_tmp
as
select
name
,itemname
,num
from map1 lateral view explode(item) it as itemname,num
;
--将上面表的数据导到下面的表里面,现在是需要如何导的问题
create table if not exists map2(
name string,
item map<string,string>
)
row format delimited
fields terminated by '\t'
collection items terminated by ','
map keys terminated by ':'
;
lazhu:2,pibian:1,shoukao:1,huanghong:2
str_to_map:
marong lazhu 2
marong pibian 1
marong shoukao 1
marong huanghong 2
songjiji lazhu 3
songjiji pilian 2
songjiji huashengmi 2
songjiji huixiangdou 2
insert into map2
select name,str_to_map(concat_ws(",",collect_list(concat(itemname,":",num))),",",":")
from map_tmp
group by name
;
cls japan,dongjing,xiasnqi18hao
wangxuejia japan,dongjing,gebi
Hive数据类型与操作详解
本文详细介绍了Hive中的数据类型,包括基本数据类型和复杂数据类型如array、map和struct。深入探讨了array和map的使用方法,数据加载、查询及数据转换技巧。通过实例展示了如何使用explode和lateral view进行数据展开,以及如何利用collect_list和str_to_map进行数据聚合。
6676

被折叠的 条评论
为什么被折叠?



