– dense_rank()over() 使用dense_rank() 排序
在zltest.csv_IDFA20210521 表中:
answersheetRecorddate 为学员回答问题的时间
userid 为客户的唯一编号
学员每答题一次就会产生一条数据
需求是 按年份展示该年份的答题中位数
select answeryear,avg(num) median_num---为序号最大为奇数时只有一个 平均值为中间值,为序号最大为偶数时 取两中间值的平均数为中位数
from(
select d.answeryear,d.num,d.dense_num,b.max_dense_num,
case when b.max_dense_num%2=1 then (b.max_dense_num+1)/2
else 0
end as median1,
case when b.max_dense_num%2=0 then b.max_dense_num/2+1
else 0
end as median2,
case when b.max_dense_num%2=0 then b.max_dense_num/2
else 0
end as median3 ---分奇偶性 获取中间值的序号
from(
select answeryear,
num ,
dense_rank()over(partition by answeryear
order by num) dense_num --得到年内排序后的 序号 操作行为次数
from (
select year(answersheetRecorddate) answeryear,userid,count(1) num--- 这里计算一年内同一个客户进行了几次行为操作
from zltest.csv_IDFA20210521
where userid <> 'userid'---这里只是为了过滤表头
group by year(answersheetRecorddate),userid
)as a
) as d
left join (
select answeryear,max(dense_num)max_dense_num --得到年内排序后的 序号最大值
from(
select answeryear,
dense_rank()over(partition by answeryear
order by num) dense_num ---按年内 客户的操作行为次数进行 排序
from (
select year(answersheetRecorddate)answeryear,userid,count(1) num
from zltest.csv_IDFA20210521
where userid <> 'userid'
group by year(answersheetRecorddate),userid
)as d
) as d group by answeryear
)as b
on d.answeryear=b.answeryear
group by d.answeryear,d.num,d.dense_num,b.max_dense_num,
case when b.max_dense_num%2=1 then (b.max_dense_num+1)/2
else 0
end ,
case when b.max_dense_num%2=0 then b.max_dense_num/2+1
else 0
end ,
case when b.max_dense_num%2=0 then b.max_dense_num/2
else 0
end
) as d
where dense_num=median1 or median2=dense_num or median3=dense_num ---这里过滤掉了 所有非中间值 数据的序号
group by answeryear;