1 三个分组排序函数
1 row_number() over([partition by col1] [order by col2])
1
2
3
2 rank() over([partition col1][order by col2])
1
1
3
3 dense_rank() over([partition by col1][order by col2])
1
1
2
解决场景:
1 做CT检查患者住院排序TOP3
select
full_name,exam_class,admission_number,od
from
(
select
*,row_number() over(partition by exam_class order by admission_number desc) as od
from nfyy_cdm_20220210_1_incr.exam_master
) t
where t.exam_class = 'CT'
limit 3;
思路:
1 row_number() over(partition by .. order by ..)
2 全数据集分类排序,设定排序规则desc/asc,形成子查询
3 外层topN的过滤语句
2 语文成绩是80分的排名
1 涉及的列:
语文 sub
成绩 score
2 限定条件:
成绩80分
3 结果列:
排名是多少
select od
(
select *,row_number()(partititon by sub order by score desc) as od from t
) tt where tt.sub='语文' and tt.score = '80';
3 分页
select * from
(
select *, row_number() over () as rn from t
) t1 where rn between 1 and 5;
2 行列转换
1 列转行
concat_ws(',',collect_list(colName)) ... group by user_id
1 列转行:
concat_ws(',',collect_list(..)) group by ..
2 注意项:
1 搭配 group by ,实现同类别的 多列转一行
2 where 限定类别,放置在group by 前面
select
user_id,
concat_ws(',',collect_list(order_id)) as order_value
from order_user
group by user_id
limit 10;
select exam_class,concat_ws(',',collect_list(full_name)) as names
from
nfyy_cdm_20220226_1_incr.exam_master
where exam_class = 'CT'
group by exam_class
//结果
user_id order_value
10111 1223232,1223232,1958233,1223238
总结:
列传行函数:
collect_list 不去重
collect_set 去重
列类型要求:string
2 行转列
lateral view explode(split(order_value,','))
select
user_id,order_value,order_id
from lie_col
lateral view explode(split(order_value,',')) num as order_id
limit 10;
select
exam_class,names_value,name
from
z_xubing.tmp_exam_master
lateral view explode(split(names_value,',')) nn as name
//结果
user_id order_value order_id
1022567 123,124,125 123
1022567 123,124,125 124
1022567 123,124,125 125