相关函数说明
- OVER():指定分析函数工作的数据窗口大小,这个数据窗口大小可能会随着行的变化而变化
- CURRENT ROW:当前行
- PRECEDINGn:往前n行数据
- FOLLOWINGn:往后n行数据
- UNBOUNDED:起点,
- UNBOUNDED PRECEDING 表示从前面的起点,
- UNBOUNDED FOLLOWING表示到后面的终点
- LAG(col,n):往前第n行数据
- LEAD(col,n):往后第n行数据
- NTILE(n):把有序分区中的行分发到指定数据的组中,各个组有编号,编号从1开始,对于每一行,NTILE返回此行所属的组的编号。注意:n必须为int类型。
- 数据准备
jack,2017-01-01,10
tony,2017-01-02,15
jack,2017-02-03,23
tony,2017-01-04,29
jack,2017-01-05,46
jack,2017-04-06,42
tony,2017-01-07,50
jack,2017-01-08,55
mart,2017-04-08,62
- 建表并导入数据
create table business(name string, orderdate string,cost int) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',';
load data local inpath "/opt/module/datas/business.txt" into table business;
- 查询在2017年4月份购买过的顾客及总人数
select name,count(*) over ()
from business where substring(orderdate,1,7) = '2017-04' group by name;
# 查询结果
name c1
mart 2
jack 2
- 查询顾客的购买明细及月购买总额
select name,orderdate,cost,sum(cost) over(partition by month(orderdate)) from business;
或者
select name,orderdate,cost,sum(cost) over(distribute by month(orderdate)) from business;
# 查询结果
business.name business.orderdate business.cost c1
jack 2017-01-08 55 205
tony 2017-01-07 50 205
jack 2017-01-05 46 205
tony 2017-01-04 29 205
tony 2017-01-02 15 205
jack 2017-01-01 10 205
jack 2017-02-03 23 23
mart 2017-04-08 62 104
jack 2017-04-06 42 104
- 将cost按照日期进行累加
select *,sum(cost) over(sort by orderdate rows between UNBOUNDED PRECEDING and CURRENT ROW) from business;
# 查询结果
business.name business.orderdate business.cost c1
jack 2017-01-01 10 10
tony 2017-01-02 15 25
tony 2017-01-04 29 54
jack 2017-01-05 46 100
tony 2017-01-07 50 150
jack 2017-01-08 55 205
jack 2017-02-03 23 228
jack 2017-04-06 42 270
mart 2017-04-08 62 332
- 查看顾客上次的购买时间
select *,lag(orderdate,1) over(distribute by name sort by orderdate) from business;
# 查询结果
business.name business.orderdate business.cost c1
jack 2017-01-01 10 NULL
jack 2017-01-05 46 2017-01-01
jack 2017-01-08 55 2017-01-05
jack 2017-02-03 23 2017-01-08
jack 2017-04-06 42 2017-02-03
mart 2017-04-08 62 NULL
tony 2017-01-02 15 NULL
tony 2017-01-04 29 2017-01-02
tony 2017-01-07 50 2017-01-04
7.查询前20%时间的订单信息
# 用NTILE(n)函数给查询结果分组
select *,ntile(5) over(sort by orderdate) from business;
# 分组结果
business.name business.orderdate business.cost c1
jack 2017-01-01 10 1
tony 2017-01-02 15 1
tony 2017-01-04 29 2
jack 2017-01-05 46 2
tony 2017-01-07 50 3
jack 2017-01-08 55 3
jack 2017-02-03 23 4
jack 2017-04-06 42 4
mart 2017-04-08 62 5
# 将上一步查询到的结果作为子查询来实现需求
select * from (
select name,orderdate,cost,ntile(5) over(sort by orderdate) gid from business ) t
where gid = 1;
# 查询结果
t.name t.orderdate t.cost t.gid
jack 2017-01-01 10 1
tony 2017-01-02 15 1