连续N天销售记录的店铺案例
数据:
店铺名,销售日期,销售金额
a,2017-02-05,200
a,2017-02-06,300
a,2017-02-07,200
a,2017-02-08,400
a,2017-02-10,600
b,2017-02-05,200
b,2017-02-06,300
b,2017-02-08,200
b,2017-02-09,400
b,2017-02-10,600
c,2017-01-31,200
c,2017-02-01,300
c,2017-02-02,200
c,2017-02-03,400
c,2017-02-10,600
a,2017-03-01,200
a,2017-03-02,300
a,2017-03-03,200
a,2017-03-04,400
a,2017-03-05,600
建表,导入数据
create table if not exists tb_shop (
name string ,
dt string ,
money double
)
row format delimited fields terminated by "," ;
load data local inpath "/hive/data/shop.txt" into table tb_shop ;
查询结果:
+---------------+-------------+----------------+
| tb_shop.name | tb_shop.dt | tb_shop.money |
+---------------+-------------+----------------+
| a | 2017-02-05 | 200.0 |
| a | 2017-02-06 | 300.0 |
| a | 2017-02-07 | 200.0 |
| a | 2017-02-08 | 400.0 |
| a | 2017-02-10 | 600.0 |
| b | 2017-02-05 | 200.0 |
| b | 2017-02-06 | 300.0 |
| b | 2017-02-08 | 200.0 |
| b | 2017-02-09 | 400.0 |
| b | 2017-02-10 | 600.0 |
| c | 2017-01-31 | 200.0 |
| c | 2017-02-01 | 300.0 |
| c | 2017-02-02 | 200.0 |
| c | 2017-02-03 | 400.0 |
| c | 2017-02-10 | 600.0 |
| a | 2017-03-01 | 200.0 |
| a | 2017-03-02 | 300.0 |
| a | 2017-03-03 | 200.0 |
| a | 2017-03-04 | 400.0 |
| a | 2017-03-05 | 600.0 |
+---------------+-------------+----------------+\
- 店铺分组 按照日期排序
select
* ,
row_number() over(partition by name order by dt) n
from
tb_shop ;
结果如下:
+---------------+-------------+----------------+-----+
| tb_shop.name | tb_shop.dt | tb_shop.money | n |
+---------------+-------------+----------------+-----+
| a | 2017-02-05 | 200.0 | 1 |
| a | 2017-02-06 | 300.0 | 2 |
| a | 2017-02-07 | 200.0 | 3 |
| a | 2017-02-08 | 400.0 | 4 |
| a | 2017-02-10 | 600.0 | 5 |
| a | 2017-03-01 | 200.0 | 6 |
| a | 2017-03-02 | 300.0 | 7 |
| a | 2017-03-03 | 200.0 | 8 |
| a | 2017-03-04 | 400.0 | 9 |
| a | 2017-03-05 | 600.0 | 10 |
| b | 2017-02-05 | 200.0 | 1 |
| b | 2017-02-06 | 300.0 | 2 |
| b | 2017-02-08 | 200.0 | 3 |
| b | 2017-02-09 | 400.0 | 4 |
| b | 2017-02-10 | 600.0 | 5 |
| c | 2017-01-31 | 200.0 | 1 |
| c | 2017-02-01 | 300.0 | 2 |
| c | 2017-02-02 | 200.0 | 3 |
| c | 2017-02-03 | 400.0 | 4 |
| c | 2017-02-10 | 600.0 | 5 |
+---------------+-------------+----------------+-----+
- 日期和编号求差值
select
name ,
dt ,
money ,
n ,
date_sub(dt , n) diff
from
(select
* ,
row_number() over(partition by name order by dt) n
from
tb_shop) t ;
结果如下:
+-------+-------------+--------+-----+-------------+
| name | dt | money | n | diff |
+-------+-------------+--------+-----+-------------+
| a | 2017-02-05 | 200.0 | 1 | 2017-02-04 |
| a | 2017-02-06 | 300.0 | 2 | 2017-02-04 |
| a | 2017-02-07 | 200.0 | 3 | 2017-02-04 |
| a | 2017-02-08 | 400.0 | 4 | 2017-02-04 |
| a | 2017-02-10 | 600.0 | 5 | 2017-02-05 |
| a | 2017-03-01 | 200.0 | 6 | 2017-02-23 |
| a | 2017-03-02 | 300.0 | 7 | 2017-02-23 |
| a | 2017-03-03 | 200.0 | 8 | 2017-02-23 |
| a | 2017-03-04 | 400.0 | 9 | 2017-02-23 |
| a | 2017-03-05 | 600.0 | 10 | 2017-02-23 |
| b | 2017-02-05 | 200.0 | 1 | 2017-02-04 |
| b | 2017-02-06 | 300.0 | 2 | 2017-02-04 |
| b | 2017-02-08 | 200.0 | 3 | 2017-02-05 |
| b | 2017-02-09 | 400.0 | 4 | 2017-02-05 |
| b | 2017-02-10 | 600.0 | 5 | 2017-02-05 |
| c | 2017-01-31 | 200.0 | 1 | 2017-01-30 |
| c | 2017-02-01 | 300.0 | 2 | 2017-01-30 |
| c | 2017-02-02 | 200.0 | 3 | 2017-01-30 |
| c | 2017-02-03 | 400.0 | 4 | 2017-01-30 |
| c | 2017-02-10 | 600.0 | 5 | 2017-02-05 |
+-------+-------------+--------+-----+-------------+
- 名字 差值分组聚合 统计次数 过滤掉小于3的
select
name ,
diff ,
count(*) cnt
from
(select
name ,
dt ,
money ,
n ,
date_sub(dt , n) diff
from
(select
* ,
row_number() over(partition by name order by dt) n
from
tb_shop)t) t2
group by name , diff
having cnt >= 3 ;
结果如下:
+-------+-------------+------+
| name | diff | cnt |
+-------+-------------+------+
| a | 2017-02-04 | 4 |
| a | 2017-02-23 | 5 |
| b | 2017-02-05 | 3 |
| c | 2017-01-30 | 4 |
+-------+-------------+------+
4)去重重复名字的店铺
select
distinct name
from
(select
name ,
diff ,
count(*) cnt
from
(select
name ,
dt ,
money ,
n ,
date_sub(dt , n) diff
from
(select
* ,
row_number() over(partition by name order by dt) n
from
tb_shop)t) t2
group by name , diff
having cnt >= 3) t3;
结果如下:
+-------+
| name |
+-------+
| a |
| b |
| c |
+-------+