背景
日常的数仓开发中,我们会遇到各种各样的业务用SQL去解决,有一些问题的复杂SQL处理可以抽象为固定的范式,如果抽取为常用的范式,会大大提高我们的开发效率
案例
-- 人工接管里程(有人工接管且由自动驾驶模式切换为了手动驾驶模式,然后又切回自动,该时段对应里程)
-- 我们如果想实现这个指标要有个前提,就是需要按照车辆分区时间排序后,车辆每切换一次驾驶模式,分组伪列内容都不能一样
--先看终极SQL
select carno,
nvl(round(sum(manual), 2), 0) manual_inv
from (
select carno,
s_time,
e_time,
manual,
drivingmode,
intervention,
lead(drivingmode, 1) over (partition by carno order by s_time) lead_dri,
lag(drivingmode, 1) over (partition by carno order by s_time) lag_dri
from (
select carno,
drivingmode,
intervention,
count(car_time) * 0.5 / 3600 manual_t,
min(car_time) s_time,
max(car_time) e_time,
sum(speed * 0.5) / 1000 manual
from (
select car_time,
carno,
intervention,
drivingmode,
speed,
sum(flag2) over (partition by carno order by car_time) flag3 --伪列flag3
from (
select car_time,
carno,
intervention,
drivingmode,
speed,
lag(flag1, 1) over (partition by carno order by car_time) flag2 --伪列flag2
from (
select l1.car_time car_time,
l1.carno carno,
intervention,
drivingmode,
case
when drivingmode !=
lead(drivingmode, 1) over (partition by l1.carno order by l1.car_time)
then 1
else 0 end flag1,-- 给分界点标记为1
speed
from (
select car_time,
carno,
intervention --人工介入次数
from cowa_dwd.dwd_dcloud_mqtt_now_static_1d_d
where dt = date_add(`current_date`(), -1)
) l1
left join(
select carno,
car_time,
case when drivingmode is null then 'MANUAL' else drivingmode end drivingmode,--驾驶模式
speed --速度
from cowa_dwd.dwd_dcloud_mqtt_drive_1d_d
where dt = date_add(`current_date`(), -1)
) l2
on l1.carno = l2.carno and l1.car_time = l2.car_time
where drivingmode != 'REMOTE_DRIVE'
) l3
) l4
) g1
group by carno, drivingmode, `intervention`, flag3
) g2
) g3
where lead_dri = 'AUTO_DRIVE'
and lag_dri = 'AUTO_DRIVE'
and drivingmode = 'MANUAL'
and intervention >= 1
group by carno;
- 接下来我们拆解一下来理解,拿某车辆举例来看:
select l1.car_time car_time,
l1.carno carno,
intervention,
drivingmode,
case
when drivingmode !=
lead(drivingmode, 1) over (partition by l1.carno order by l1.car_time)
then 1
else 0 end flag1,-- 给分界点标记为1
speed
from (
select car_time,
carno,
intervention
from cowa_dwd.dwd_dcloud_mqtt_now_static_1d_d
where dt = date_add(`current_date`(), -1)
and carno = 10044
) l1
left join(
select carno,
car_time,
case when drivingmode is null then 'MANUAL' else drivingmode end drivingmode,
speed
from cowa_dwd.dwd_dcloud_mqtt_drive_1d_d
where dt = date_add(`current_date`(), -1)
) l2
on l1.carno = l2.carno and l1.car_time = l2.car_time
where drivingmode != 'REMOTE_DRIVE'
- 运行结果拉到下面这条记录,可以看到在MANUAL模式和AUTO_DRIVE模式切换时,已经给这个切换的分界打上标记1
接下来我们将flag1整体上提一行:
select car_time,
carno,
intervention,
drivingmode,
speed,
lag(flag1, 1) over (partition by carno order by car_time) flag2 --伪列flag2
from (
select l1.car_time car_time,
l1.carno carno,
intervention,
drivingmode,
case
when drivingmode !=
lead(drivingmode, 1) over (partition by l1.carno order by l1.car_time)
then 1
else 0 end flag1,-- 给分界点标记为1
speed
from (
select car_time,
carno,
intervention
from cowa_dwd.dwd_dcloud_mqtt_now_static_1d_d
where dt = date_add(`current_date`(), -1)
and carno = 10044
) l1
left join(
select carno,
car_time,
case when drivingmode is null then 'MANUAL' else drivingmode end drivingmode,
speed
from cowa_dwd.dwd_dcloud_mqtt_drive_1d_d
where dt = date_add(`current_date`(), -1)
) l2
on l1.carno = l2.carno and l1.car_time = l2.car_time
where drivingmode != 'REMOTE_DRIVE'
) l3
- 效果如下:
- 这里可以看到车辆从每切换驾驶模式,我们的标记1就打在切换后的第一条,这也为后面的
sum()over()
分组伪列埋下伏笔 - 继续拆解查看分组伪列flag2 :
select car_time,
carno,
intervention,
drivingmode,
speed,
sum(flag2) over (partition by carno order by car_time) flag3 --伪列flag3
from (
select car_time,
carno,
intervention,
drivingmode,
speed,
lag(flag1, 1) over (partition by carno order by car_time) flag2 --伪列flag2
from (
select l1.car_time car_time,
l1.carno carno,
intervention,
drivingmode,
case
when drivingmode !=
lead(drivingmode, 1) over (partition by l1.carno order by l1.car_time)
then 1
else 0 end flag1,-- 给分界点标记为1
speed
from (
select car_time,
carno,
intervention
from cowa_dwd.dwd_dcloud_mqtt_now_static_1d_d
where dt = date_add(`current_date`(), -1)
and carno = 10044
) l1
left join(
select carno,
car_time,
case when drivingmode is null then 'MANUAL' else drivingmode end drivingmode,
speed
from cowa_dwd.dwd_dcloud_mqtt_drive_1d_d
where dt = date_add(`current_date`(), -1)
) l2
on l1.carno = l2.carno and l1.car_time = l2.car_time
where drivingmode != 'REMOTE_DRIVE'
) l3
) l4
- 效果如下:
- 我们可以看到每切换驾驶模式flag3内容都不一样,至此再看文章最开始的SQL大家应该能理解了
小结
这个SQL中我抽取这类问题的范式为:
- 分组分界点标记flag1
- 标记flag1的字段整体下移一行为flag2
- 最后
sum()over()
开窗实现分组的目标伪列
这类问题HIVE官方也总结过 称之为Gaps and Islands