一般情况下,数仓存储的数据类型都是整型(BIGINT),字符串型(STRING),浮点型(DOUBLE),但是有些时候也需要用到一些复合数据结构来存储数据,常用的复合数据结构有Map,Array,Struct,本文将主要针对这三种数据类型做一个介绍
1、定义
类型 | 定义方法 | 描述 |
---|---|---|
Array | ARRAY < data_type > | Array数组,主要是是以数组形式存储可以通过字段名[0]进行访问,其中0表示的是数组中的第一位,以此类推 |
Map | MAP < primitive_type, data_type > | Map数据类型,主要是以K:V形式进行存储可以通过字段名[‘key’]进行访问,将返回这个key对应的Value |
Struct | STRUCT < col_name : data_type [COMMENT col_comment], …> | Struct结构体,主要以结构体形式进行存储可以通过字段名.列名进行访问,其中列名就是定义中的col_name |
2、举例
2.1 Map举例
# 创建数据库表
create table score
(
name string
, score map<string,int>
)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\t'
COLLECTION ITEMS TERMINATED BY ','
MAP KEYS TERMINATED BY ':';
# 要入库的数据
biansutao '数学':80,'语文':89,'英语':95
jobs '语文':60,'数学':80,'英语':99
# 入库数据
LOAD DATA LOCAL INPATH '/home/hadoop/score.txt' OVERWRITE INTO TABLE score;
# 查询
hive> select * from score;
biansutao {"数学":80,"语文":89,"英语":95}
jobs {"语文":60,"数学":80,"英语":99}
Time taken: 0.665 seconds
hive> select name from score;
jobs
biansutao
Time taken: 19.778 seconds
hive> select t.score from score t;
{"语文":60,"数学":80,"英语":99}
{"数学":80,"语文":89,"英语":95}
Time taken: 19.353 seconds
hive> select t.score['语文'] from score t;
60
89
Time taken: 13.054 seconds
hive> select t.score['英语'] from score t;
99
95
Time taken: 13.769 seconds
2.2 Array举例
# 创建数据库表,以array作为数据类型
create table person(
name string
,work_locations array<string>
)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\t'
COLLECTION ITEMS TERMINATED BY ',';
# 数据
biansutao beijing,shanghai,tianjin,hangzhou
linan changchu,chengdu,wuhan
# 入库数据
LOAD DATA LOCAL INPATH '/home/hadoop/person.txt' OVERWRITE INTO TABLE person;
# 查询
hive> select * from person;
biansutao ["beijing","shanghai","tianjin","hangzhou"]
linan ["changchu","chengdu","wuhan"]
Time taken: 0.355 seconds
hive> select name from person;
linan
biansutao
Time taken: 12.397 seconds
hive> select work_locations[0] from person;
changchu
beijing
Time taken: 13.214 seconds
hive> select work_locations from person;
["changchu","chengdu","wuhan"]
["beijing","shanghai","tianjin","hangzhou"]
Time taken: 13.755 seconds
hive> select work_locations[3] from person;
NULL
hangzhou
Time taken: 12.722 seconds
hive> select work_locations[4] from person;
NULL
NULL
Time taken: 15.958 seconds
2.3 Struct举例
# 创建数据表
CREATE TABLE test(
id int
,course struct<course:string,score:int>
)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\t'
COLLECTION ITEMS TERMINATED BY ',';
# 数据
1 english,80
2 math,89
3 chinese,95
# 入库
LOAD DATA LOCAL INPATH '/home/hadoop/test.txt' OVERWRITE INTO TABLE test;
# 查询
hive> select * from test;
OK
1 {"course":"english","score":80}
2 {"course":"math","score":89}
3 {"course":"chinese","score":95}
Time taken: 0.275 seconds
hive> select course from test;
{"course":"english","score":80}
{"course":"math","score":89}
{"course":"chinese","score":95}
Time taken: 44.968 seconds
select t.course.course from test t;
english
math
chinese
Time taken: 15.827 seconds
hive> select t.course.score from test t;
80
89
95
Time taken: 13.235 seconds