连续问题 - 等差数列法
题目
数据
1001 2021-12-13 45
1001 2021-12-13 23
1002 2021-12-14 45
1001 2021-12-14 230
1002 2021-12-15 45
1001 2021-12-15 23
建表
create table if not exists continuous(
id int,
dt date,
lowcarbon int
)row format delimited fields terminated by '\t';
加载数据
load data local inpath '/opt/module/data/hive-interviews/continuous' into table continuous;
解题步骤
步骤一 构造等差数列
思路:两个步长相同的等差数列相减,得到的值全部相同
sql语句
select id,dt,sum(lowcarbon) lowcarbon
from continuous
group by id,dt
having lowcarbon>=100
运行结果
id dt lowcarbon
1001 2021-12-12 123
1001 2021-12-13 111
1001 2021-12-14 230
步骤二 加序号
sql语句
select id,
dt,
lowcarbon,
rank() over(partition by id order by dt) rk
from(
select id,dt,sum(lowcarbon) lowcarbon
from continuous
group by id,dt
having lowcarbon>=100
)t1
运行结果
id dt lowcarbon rk
1001 2021-12-12 123 1
1001 2021-12-13 111 2
1001 2021-12-14 230 3
步骤三 日期减去序号
sql语句
select
id,dt,lowcarbon,date_sub(dt,rk) flag
from(
select id,
dt,
lowcarbon,
rank() over(partition by id order by dt) rk
from(
select id,dt,sum(lowcarbon) lowcarbon
from continuous
group by id,dt
having lowcarbon>=100
)t1
)t2
运行结果
id dt lowcarbon flag
1001 2021-12-12 123 2021-12-11
1001 2021-12-13 111 2021-12-11
1001 2021-12-14 230 2021-12-11
步骤四 按照id 及flag 分组
sql语句
select id
from(
select
id,dt,lowcarbon,date_sub(dt,rk) flag
from(
select id,
dt,
lowcarbon,
rank() over(partition by id order by dt) rk
from(
select id,dt,sum(lowcarbon) lowcarbon
from continuous
group by id,dt
having lowcarbon>=100
)t1
)t2
)t3
group by id,flag
having count(id)>=3
运行结果
id
1001