–1 需求
– 编写sql语句实现每班前三名,分数一样并列,
– 同时求出前三名按名次排序的分差
–2 建表语句
create table student(
sid string,–学号
cid string,–班级号
score string – 分数
);
– 说明:
– 数据具有通用性,适合重复数据的情况
insert overwrite table student values
(“1”,“1901”,“90”),
(“2”,“1901”,“90”),
(“3”,“1901”,“80”),
(“4”,“1901”,“80”),
(“5”,“1901”,“78”),
(“6”,“1901”,“78”),
(“7”,“1902”,“66”),
(“8”,“1902”,“66”),
(“9”,“1902”,“99”),
(“10”,“1902”,“87”),
(“11”,“1902”,“87”);
SELECT * from student;
–sql 解答
create table tmp01 AS
select
cid,
score,
rk
from
(select
cid,
score,
dense_rank() over(partition by cid order by score desc) rk
from
student)t1
where t1.rk<=3;
create table tmp02 AS
SELECT
cid,
score,
rk,
lag(score) over(partition by cid order by score desc) lag_score
from tmp01;
SELECT
cid,
score,
rk,
ABS(COALESCE(score-lag_score,0)) diff_score
from tmp02;
–最终sql
SELECT cid
,score
,rk
,ABS(COALESCE(score-lag_score,0)) diff_score
FROM (
SELECT cid
,score
,rk
,lag(score) OVER(PARTITION BY cid ORDER BY score DESC) lag_score
FROM (
SELECT cid
,score
,rk
FROM (
SELECT cid
,score
,dense_rank() OVER(PARTITION BY cid ORDER BY score DESC) rk
FROM student
) t1
WHERE t1.rk <= 3
) tmp01
) tmp02
;
----但是以上解决方法有bug 具体的看下运行情况一目了然。
– #补充
– lag函数根据排序规则默认取上一行,比如数据排序是[90,80,80…],那么第二个80取到的还是80,
– 导致计算差值时结果变成了[0,-10,0],显然这不是我们想要的结果。
– 解决办法【序号和差值分开做,再关联】:先做排序,再补充分数差a表。
– 分数差的做法,做一个去重排序差值,得到有序名词、分数、差值的b表,再用a表和b表通过排序序号关联补充分数差。
–group by 后select的字段可以跟开窗函数
select
cid,
score,
abs(coalesce(score-lag(score) over(partition by cid order by score desc),0)) diff_score
from
student
group by cid,score;
–等价于 上来按照班级和成绩分组过滤掉然后这个样班级里面就没有重复的成绩了,成绩排名也就唯一了 因为学生可能是多人成绩一样,最后我们用班级和成绩去关联拿出学号
create table abs_table_02 AS
SELECT
cid ,
score,
RANK() over(partition by cid order by score desc) rk,
abs(COALESCE(score-LAG(score) OVER(PARTITION BY cid ORDER BY score DESC ),0)) diff_score
from student
group by
cid ,
score ; —这就叫绝对的班级分组,绝对的分数 绝对的排名 绝对的分差
SELECT s.cid
,s.sid
,s.score
,at.diff_score
FROM student s
JOIN (
SELECT *
FROM abs_table_02
WHERE rk <= 3
) at
ON s.cid = at.cid
AND s.score = at.score
;