【SQL逻辑】groupings sets场景应用

最新推荐文章于 2025-03-31 10:17:57 发布

一条咸鱼的记录

最新推荐文章于 2025-03-31 10:17:57 发布

阅读量384

点赞数

分类专栏： SQL逻辑文章标签： sql 数据库

本文链接：https://blog.youkuaiyun.com/weixin_44505713/article/details/120174757

版权

SQL逻辑专栏收录该内容

1 篇文章

订阅专栏

背景

今天遇到一个SQL场景有如下表table，每个订单对应买了多个品类（品类数枚举值只有（A、B、C））。

order_id	cate
001	A
001	B
001	C
002	A
002	B
003	A
004	B
004	D

需要提前统计，各个类型品类组合下的下单量(对订单去重)，结果如下

cate	order_qty
A	3
B	3
C	1
D	1
AB	4
AC	3
AD	4
BC	3
BD	3
CD	2
ABC	4
ACD	4
ACD	4
BCD	3
ABCD	4

方案

建表写数据

create table tmp (
	order_id String
	, cate String
)comment '测试表'
row format delimited fields terminated by ',' stored as textfile

方案一
一开始想到的，这种场景，union all一定能成，所以简单粗暴。如下，虽然代码冗余，但是确实很直白。

select cate,count(distinct order_id) from tmp group by cate
union all
select 'AB',count(distinct order_id) from tmp where cate in ('A','B')
union all
select 'AC',count(distinct order_id) from tmp where cate in ('A','C')
union all
select 'AD',count(distinct order_id) from tmp where cate in ('A','D')
union all
select 'BC',count(distinct order_id) from tmp where cate in ('B','C')
union all
select 'BD',count(distinct order_id) from tmp where cate in ('B','D')
union all
select 'CD',count(distinct order_id) from tmp where cate in ('C','D')
union all
select 'ABC',count(distinct order_id) from tmp where cate in ('A','B','C')
union all
select 'ABD',count(distinct order_id) from tmp where cate in ('A','B','D')
union all
select 'BCD',count(distinct order_id) from tmp where cate in ('B','C','D')
union all
select 'ABCD',count(distinct order_id) from tmp where cate in ('A','B','C','D')

方案二
后面灵机一动，可以用case when+grouping sets

with tmp as (
  select order_id,cate
  ,case when cate = 'A' then '1' else '0' end as is_A
  ,case when cate = 'B' then '1' else '0' end as is_B
  ,case when cate = 'C' then '1' else '0' end as is_C
  ,case when cate = 'D' then '1' else '0' end as is_D
   from tmp
), base as (
  select is_A,is_B,is_C,is_D,grouping__id,count(distinct order_id) as qty from tmp
  group by is_A,is_B,is_C,is_D
  grouping sets(
    (is_A,is_B,is_C), -- D的聚合
    (is_A,is_B,is_D), -- C的聚合
    (is_A,is_C,is_D), -- B的聚合
    (is_B,is_C,is_D), -- A的聚合
    (is_A,is_B), -- CD的聚合
    (is_A,is_C), -- BD的聚合
    (is_A,is_D), -- BC的聚合
    (is_B,is_C), -- AD的聚合
    (is_B,is_D), -- AC的聚合
    (is_C,is_D), -- AB的聚合
    (is_A), -- BCD的聚合
    (is_B), -- ACD的聚合
    (is_C), -- ABD的聚合
    (is_D), -- ABC的聚合
    ()      -- ABCD的聚合
  )
)

select
case when grouping__id = 15 then 'ABCD'
     when grouping__id = 14 then 'ABC'
     when grouping__id = 13 then 'ABD'
     when grouping__id = 12 then 'AB'
     when grouping__id = 11 then 'ACD'
     when grouping__id = 10 then 'AC'
     when grouping__id = 9 then 'AD'
     when grouping__id = 8 then 'A'
     when grouping__id = 7 then 'BCD'
     when grouping__id = 6 then 'BC'
     when grouping__id = 5 then 'BD'
     when grouping__id = 4 then 'B'
     when grouping__id = 3 then 'CD'
     when grouping__id = 2 then 'C'
     when grouping__id = 1 then 'D'
     else null end as val
,qty
from base
where 
-- 过滤带1的都不是我们需要的结果
concat(coaLesce(is_A,''),Coalesce(is_B,''),coalesce(is_C,''),coalesce(is_D,'')) = '0000'
or concat(coaLesce(is_A,''),Coalesce(is_B,''),coalesce(is_C,''),coalesce(is_D,'')) = '000'
or concat(coaLesce(is_A,''),Coalesce(is_B,''),coalesce(is_C,''),coalesce(is_D,'')) = '00'
or concat(coaLesce(is_A,''),Coalesce(is_B,''),coalesce(is_C,''),coalesce(is_D,'')) = '0'
or concat(coaLesce(is_A,''),Coalesce(is_B,''),coalesce(is_C,''),coalesce(is_D,'')) = ''
order by val