单词统计:
数据:
hello hello word
hello wc wc wc
bye bye
hello word
hello nihao chaina
创建数据表:(只有一个列不需要row format)
create table wc (
line string
);
放入数据:
load data local inpath '/root/table/wc' into table wc;
分析:使用 ’ '空格切割成数组,在嵌套explode( )进行拆分,每个参数成为一列;
select explode(split(line, ’ ')) as word from wc;
创建结果表:(word用来存放单词,ct用来存放出现的个数)
create table wcjg(
word string,
ct int
);
放入数据:group by 集合函数 count单词数量
from (select explode(split(line, ' ')) as word from wc) tmp
insert into table wcjg
select word , count(word) as ct group by word;
查询数据:
select * from wcjg;
基站掉话率:
数据表:
create table cell_monitor(
record_time string,
imei string,
cell string,
ph_num int,
call_num int,
drop_num int,
duration int,
drop_rate double,
net_type string