源数据
cuid event_day 1 2020-04-01 2 2020-04-01 3 2020-04-01 1 2020-04-02 2 2020-04-02 5 2020-04-02 1 2020-04-08 2 2020-04-08 3 2020-04-08 3 2020-04-09
题目
求次日、七日的用户留存率
给的数据中是没有重复数据的,但是当做这种题的时候我的第一反应就是要先去重,因为一个用户一天可能会登录多次,但是只能计数一次,因此我又自己给他加上了一条数据,这样可以将题目更加的完善。
1 2020-04-01
在做的时候我是分三步完成的:
1、去重 保证一天内没有重复登录的用户 2、选出 rn=1 即一天内只记录用户登录一次,在计算当天登录的用户、次日登录的用户、七日登录的用户 3、计算当天人数、次日人数、七日人数并求解
1、去重
select cuid, event_day, row_number() over(partition by cuid,event_day order by event_day) as rn1 from baidu
2、求解
select cuid, event_day, case when event_day = '2020-04-01' then 1 end one , case when (cuid,event_day) in (select cuid,date_sub(event_day,interval 1 day) from baidu) then 2 end two , case when (cuid,event_day) in (select cuid,date_sub(event_day,interval 7 day) from baidu) then 7 end seven from t1 where rn1 = 1 and event_day = '2020-04-01'
3、计算
select event_day, round(count(two) / count(one),3) as ciri,round(count(seven) / count(one),3) as qiri from t2 group by event_day
完整代码
with t1 as(select cuid, event_day, row_number() over(partition by cuid,event_day order by event_day) as rn1 from baidu) ,t2 as(select cuid, event_day, case when event_day = '2020-04-01' then 1 end one , case when (cuid,event_day) in (select cuid,date_sub(event_day,interval 1 day) from baidu) then 2 end two , case when (cuid,event_day) in (select cuid,date_sub(event_day,interval 7 day) from baidu) then 7 end seven from t1 where rn1 = 1 and event_day = '2020-04-01') select event_day, round(count(two) / count(one),3) as ciri,round(count(seven) / count(one),3) as qiri from t2 group by event_day;