hive的复杂数据类型:
- Array(1,2,3,4) 装的数据类型是一样的
- Map(‘a’,1,‘b’,c) key的类型一样
- Struct(‘a’,1,2,34,4)装的数据是完全混乱的
Array数据类型
1.存放
创建一张存放array数据类型的表
[hadoop@hadoop001 data]$ cat hive_array.txt
zhangsan beijing,shanghai,tianjin,hangzhou
lisi changchu,chengdu,wuhan,beijing
[hadoop@hadoop001 data]$
0: jdbc:hive2://hadoop001:10000/default> create table hive_array(name string,work_locations array<string>)
0: jdbc:hive2://hadoop001:10000/default> row format delimited fields terminated by '\t'
0: jdbc:hive2://hadoop001:10000/default> collection items terminated by ','; //创建一张hive_array表
INFO : Compiling command(queryId=hadoop_20190721172424_0b897d7d-2785-48a1-9067-b7e7e322da25): create table hive_array(name string,work_locations array<string>)
row format delimited fields terminated by '\t'
collection items terminated by ','
INFO : Semantic Analysis Completed
INFO : Returning Hive schema: Schema(fieldSchemas:null, properties:null)
INFO : Completed compiling command(queryId=hadoop_20190721172424_0b897d7d-2785-48a1-9067-b7e7e322da25); Time taken: 0.006 seconds
INFO : Concurrency mode is disabled, not creating a lock manager
INFO : Executing command(queryId=hadoop_20190721172424_0b897d7d-2785-48a1-9067-b7e7e322da25): create table hive_array(name string,work_locations array<string>)
row format delimited fields terminated by '\t'
collection items terminated by ','
INFO : Starting task [Stage-0:DDL] in serial mode
INFO : Completed executing command(queryId=hadoop_20190721172424_0b897d7d-2785-48a1-9067-b7e7e322da25); Time taken: 0.303 seconds
INFO : OK
No rows affected (0.315 seconds)
0: jdbc:hive2://hadoop001:10000/default> load data local inpath '/home/hadoop/data/hive_array.txt' overwrite into table hive_array; //向表中存放array数据
INFO : Compiling command(queryId=hadoop_20190721172525_eb920e8d-3099-4ddd-9b0f-81615557122c): load data local inpath '/home/hadoop/data/hive_array.txt' overwrite into table hive_array
INFO : Semantic Analysis Completed
INFO : Returning Hive schema: Schema(fieldSchemas:null, properties:null)
INFO : Completed compiling command(queryId=hadoop_20190721172525_eb920e8d-3099-4ddd-9b0f-81615557122c); Time taken: 0.015 seconds
INFO : Concurrency mode is disabled, not creating a lock manager
INFO : Executing command(queryId=hadoop_20190721172525_eb920e8d-3099-4ddd-9b0f-81615557122c): load data local inpath '/home/hadoop/data/hive_array.txt' overwrite into table hive_array
INFO : Starting task [Stage-0:MOVE] in serial mode
INFO : Loading data to table default.hive_array from file:/home/hadoop/data/hive_array.txt
INFO : Starting task [Stage-1:STATS] in serial mode
INFO : Table default.hive_array stats: [numFiles=1, numRows=0, totalSize=81, rawDataSize=0]
INFO : Completed executing command(queryId=hadoop_20190721172525_eb920e8d-3099-4ddd-9b0f-81615557122c); Time taken: 0.35 seconds
INFO : OK
No rows affected (0.371 seconds)
0: jdbc:hive2://hadoop001:10000/default> select * from hive_array; //查询数据是否导入成功
INFO : Compiling command(queryId=hadoop_20190721172525_5e5fb5c2-fe06-4655-ba16-efdddf11a535): select * from hive_array
INFO : Semantic Analysis Completed
INFO : Returning Hive schema: Schema(fieldSchemas:[FieldSchema(name:hive_array.name, type:string, comment:null), FieldSchema(name:hive_array.work_locations, type:array<string>, comment:null)], properties:null)
INFO : Completed compiling command(queryId=hadoop_20190721172525_5e5fb5c2-fe06-4655-ba16-efdddf11a535); Time taken: 0.034 seconds
INFO : Concurrency mode is disabled, not creating a lock manager
INFO : Executing command(queryId=hadoop_20190721172525_5e5fb5c2-fe06-4655-ba16-efdddf11a535): select * from hive_array
INFO : Completed executing command(queryId=hadoop_20190721172525_5e5fb5c2-fe06-4655-ba16-efdddf11a535); Time taken: 0.0 seconds
INFO : OK
+------------------+----------------------------------------------+--+
| hive_array.name | hive_array.work_locations |
+------------------+----------------------------------------------+--+
| zhangsan | ["beijing","shanghai","tianjin","hangzhou"] |
| lisi | ["changchu","chengdu","wuhan","beijing"] |
+------------------+----------------------------------------------+--+
2 rows selected (0.052 seconds)
2.查询
1.查询属组的第一个数时什么
0: jdbc:hive2://hadoop001:10000/default> select name,work_locations[0] from hive_array;
INFO : Compiling command(queryId=hadoop_20190721173131_2eafd3be-b9f2-4627-b784-c931a899ee4e): select name,work_locations[0] from hive_array
INFO : Semantic Analysis Completed
INFO : Returning Hive schema: Schema(fieldSchemas:[FieldSchema(name:name, type:string, comment:null), FieldSchema(name:_c1, type:string, comment:null)], properties:null)
INFO : Completed compiling command(queryId=hadoop_20190721173131_2eafd3be-b9f2-4627-b784-c931a899ee4e); Time taken: 0.053 seconds
INFO : Concurrency mode is disabled, not creating a lock manager
INFO : Executing command(queryId=hadoop_20190721173131_2eafd3be-b9f2-4627-b784-c931a899ee4e): select name,work_locations[0] from hive_array
INFO : Completed executing command(queryId=hadoop_20190721173131_2eafd3be-b9f2-4627-b784-c931a899ee4e); Time taken: 0.0 seconds
INFO : OK
+-----------+-----------+--+
| name | _c1 |
+-----------+-----------+--+
| zhangsan | beijing |
| lisi | changchu |
+-----------+-----------+--+
2 rows selected (0.065 seconds)
0: jdbc:hive2://hadoop001:10000/default>
2.查询表中每个人的工作地点数(size()函数的用法)
0: jdbc:hive2://hadoop001:10000/default> select name,size(work_locations) from hive_array;
INFO : Compiling command(queryId=hadoop_20190721173636_d1aabbe0-bb8b-4116-89bc-6c3b23081e2e): select name,size(work_locations) from hive_array
INFO : Semantic Analysis Completed
INFO : Returning Hive schema: Schema(fieldSchemas:[FieldSchema(name:name, type:string, comment:null), FieldSchema(name:_c1, type:int, comment:null)], properties:null)
INFO : Completed compiling command(queryId=hadoop_20190721173636_d1aabbe0-bb8b-4116-89bc-6c3b23081e2e); Time taken: 0.027 seconds
INFO : Concurrency mode is disabled, not creating a lock manager
INFO : Executing command(queryId=hadoop_20190721173636_d1aabbe0-bb8b-4116-89bc-6c3b23081e2e): select name,size(work_locations) from hive_array
INFO : Completed executing command(queryId=hadoop_20190721173636_d1aabbe0-bb8b-4116-89bc-6c3b23081e2e); Time taken: 0.0 seconds
INFO : OK
+-----------+------+--+
| name | _c1 |
+-----------+------+--+
| zhangsan | 4 |
| lisi | 4 |
+-----------+------+--+
2 rows selected (0.053 seconds)
3.在北京上班的有哪些人(array_contains数组包含函数的用法)
0: jdbc:hive2://hadoop001:10000/default> select * from hive_array where array_contains(work_locations,'beijing');
INFO : Compiling command(queryId=hadoop_20190721174343_4c574fc3-cab5-4b1d-8e28-c81782af3044): select * from hive_array where array_contains(work_locations,'beijing')
INFO : Semantic Analysis Completed
INFO : Returning Hive schema: Schema(fieldSchemas:[FieldSchema(name:hive_array.name, type:string, comment:null), FieldSchema(name:hive_array.work_locations, type:array<string>, comment:null)], properties:null)
INFO : Completed compiling command(queryId=hadoop_20190721174343_4c574fc3-cab5-4b1d-8e28-c81782af3044); Time taken: 0.036 seconds
INFO : Concurrency mode is disabled, not creating a lock manager
INFO : Executing command(queryId=hadoop_20190721174343_4c574fc3-cab5-4b1d-8e28-c81782af3044): select * from hive_array where array_contains(work_locations,'beijing')
INFO : Completed executing command(queryId=hadoop_20190721174343_4c574fc3-cab5-4b1d-8e28-c81782af3044); Time taken: 0.0 seconds
INFO : OK
+------------------+----------------------------------------------+--+
| hive_array.name | hive_array.work_locations |
+------------------+----------------------------------------------+--+
| zhangsan | ["beijing","shanghai","tianjin","hangzhou"] |
| lisi | ["changchu","chengdu","wuhan","beijing"] |
+------------------+----------------------------------------------+--+
2 rows selected (0.054 seconds)
0: jdbc:hive2://hadoop001:10000/default>
Map数据类型
1.存放
0: jdbc:hive2://hadoop001:10000/default>
0: jdbc:hive2://hadoop001:10000/default> create table hive_map(id int,name string,members map<string,string>,age int)
0: jdbc:hive2://hadoop001:10000/default> row format delimited fields terminated by ','
0: jdbc:hive2://hadoop001:10000/default> collection items terminated by '#'
0: jdbc:hive2://hadoop001:10000/default> map keys terminated by ':'; //在默认的default库里创建一张存放可以存放map的表
No rows affected (0.622 seconds)
0: jdbc:hive2://hadoop001:10000/default> load data local inpath '/home/hadoop/data/hive_map.txt' overwrite into table hive_map; //从本地导入数据到该表
No rows affected (0.387 seconds)
0: jdbc:hive2://hadoop001:10000/default> select * from hive_map; //查询是否导入成功
+--------------+----------------+----------------------------------------------------------------+---------------+--+
| hive_map.id | hive_map.name | hive_map.members | hive_map.age |
+--------------+----------------+----------------------------------------------------------------+---------------+--+
| 1 | zhangsan | {"father":"xiaoming","mother":"xiaohuang","brother":"xiaoxu"} | 28 |
| 2 | lisi | {"father":"mayun","mother":"huangyi","brother":"guanyu"} | 22 |
| 3 | wangwu | {"father":"wangjianlin","mother":"ruhua","sister":"jingtian"} | 29 |
| 4 | mayun | {"father":"mayongzhen","mother":"angelababy"} | 26 |
+--------------+----------------+----------------------------------------------------------------+---------------+--+
4 rows selected (0.051 seconds)
0: jdbc:hive2://hadoop001:10000/default>
2.查询
1.只需要map中的其中一个key的值
0: jdbc:hive2://hadoop001:10000/default> select id,name,members["father"] from hive_map;
+-----+-----------+--------------+--+
| id | name | _c2 |
+-----+-----------+--------------+--+
| 1 | zhangsan | xiaoming |
| 2 | lisi | mayun |
| 3 | wangwu | wangjianlin |
| 4 | mayun | mayongzhen |
+-----+-----------+--------------+--+
4 rows selected (0.045 seconds)
0: jdbc:hive2://hadoop001:10000/default>
0: jdbc:hive2://hadoop001:10000/default> select id,name,members["brother"] from hive_map;
+-----+-----------+---------+--+
| id | name | _c2 |
+-----+-----------+---------+--+
| 1 | zhangsan | xiaoxu |
| 2 | lisi | guanyu |
| 3 | wangwu | NULL |
| 4 | mayun | NULL |
+-----+-----------+---------+--+
4 rows selected (0.044 seconds)
0: jdbc:hive2://hadoop001:10000/default>
2.查出每个人有哪些家庭成员(map_keys()函数的使用)
0: jdbc:hive2://hadoop001:10000/default> select id,name,map_keys(members) as map_keys from hive_map;
+-----+-----------+--------------------------------+--+
| id | name | map_keys |
+-----+-----------+--------------------------------+--+
| 1 | zhangsan | ["father","mother","brother"] |
| 2 | lisi | ["father","mother","brother"] |
| 3 | wangwu | ["father","mother","sister"] |
| 4 | mayun | ["father","mother"] |
+-----+-----------+--------------------------------+--+
4 rows selected (0.04 seconds)
0: jdbc:hive2://hadoop001:10000/default>
3.只需要家庭成员的名字(map_values()函数的使用)
0: jdbc:hive2://hadoop001:10000/default> select id,name,map_values(members) as map_values from hive_map;
+-----+-----------+-------------------------------------+--+
| id | name | map_values |
+-----+-----------+-------------------------------------+--+
| 1 | zhangsan | ["xiaoming","xiaohuang","xiaoxu"] |
| 2 | lisi | ["mayun","huangyi","guanyu"] |
| 3 | wangwu | ["wangjianlin","ruhua","jingtian"] |
| 4 | mayun | ["mayongzhen","angelababy"] |
+-----+-----------+-------------------------------------+--+
4 rows selected (0.039 seconds)
0: jdbc:hive2://hadoop001:10000/default>
4.查询所有人的家庭成员人数
0: jdbc:hive2://hadoop001:10000/default> select id,name,size(members) as members_sum from hive_map;
+-----+-----------+--------------+--+
| id | name | members_sum |
+-----+-----------+--------------+--+
| 1 | zhangsan | 3 |
| 2 | lisi | 3 |
| 3 | wangwu | 3 |
| 4 | mayun | 2 |
+-----+-----------+--------------+--+
4 rows selected (0.04 seconds)
0: jdbc:hive2://hadoop001:10000/default>
5.仅仅只需要显示每人的father
0: jdbc:hive2://hadoop001:10000/default> select id,name,members["father"] as father from hive_map where array_contains(map_keys(members),"father");
+-----+-----------+--------------+--+
| id | name | father |
+-----+-----------+--------------+--+
| 1 | zhangsan | xiaoming |
| 2 | lisi | mayun |
| 3 | wangwu | wangjianlin |
| 4 | mayun | mayongzhen |
+-----+-----------+--------------+--+
4 rows selected (0.035 seconds)
0: jdbc:hive2://hadoop001:10000/default>
struct(结构化)数据类型
1.存放
0: jdbc:hive2://hadoop001:10000/default> create table hive_struct(ip string,userinfo struct<name:string,age:int>)
0: jdbc:hive2://hadoop001:10000/default> row format delimited fields terminated by '#'
0: jdbc:hive2://hadoop001:10000/default> collection items terminated by ':'; //在默认defualt库里创建一个能够存放struct数据的表
No rows affected (0.392 seconds)
0: jdbc:hive2://hadoop001:10000/default> load data local inpath '/home/hadoop/data/hive_struct.txt' overwrite into table hive_struct; //从本地向该表中导入数据
No rows affected (0.831 seconds)
0: jdbc:hive2://hadoop001:10000/default> select * from hive_struct; //查询数据是否导入成功
+-----------------+-------------------------------+--+
| hive_struct.ip | hive_struct.userinfo |
+-----------------+-------------------------------+--+
| 192.168.1.1 | {"name":"zhangsan","age":40} |
| 192.168.1.2 | {"name":"lisi","age":50} |
| 192.168.1.3 | {"name":"wangwu","age":60} |
| 192.168.1.4 | {"name":"zhaoliu","age":70} |
+-----------------+-------------------------------+--+
4 rows selected (0.032 seconds)
0: jdbc:hive2://hadoop001:10000/default>
2.查询
从struct数据类型中获取数据只需 userinfo.xxx
1.查询每个ip的用户名和年龄
0: jdbc:hive2://hadoop001:10000/default> select userinfo.name,userinfo.age from hive_struct;
+-----------+------+--+
| name | age |
+-----------+------+--+
| zhangsan | 40 |
| lisi | 50 |
| wangwu | 60 |
| zhaoliu | 70 |
+-----------+------+--+
4 rows selected (0.045 seconds)
0: jdbc:hive2://hadoop001:10000/default>