Postgresql版本是我工作在用的,写的留存是给我们公司后台用的,所以确保可用
Mysql版本是我回忆Mysql语法改的,八九不离十,稍微改改问题也不大。这个也是可以扩展的,比如我们的需求是渠道筛选的留存, 拿在从用户表里取用户的时候可以通过where来进行渠道筛选
postgresql版本
---- 1. 获取需要查看留存的用户uid,create_time
SELECT id, to_char(created_at, 'YYYY-MM-DD') create_time
FROM users
WHERE to_char(users.created_at , 'YYYY-MM-DD') >= '2021-03-01'
AND to_char(users.created_at , 'YYYY-MM-DD') <= '2021-03-10'
运行结果
---- 2. 获取用户在1-30天内活跃状况
SELECT uid,
create_time,
to_char(access_at, 'YYYY-MM-DD') use_time
FROM (SELECT id, to_char(created_at, 'YYYY-MM-DD') create_time
FROM users
WHERE to_char(users.created_at , 'YYYY-MM-DD') >= '2021-03-01'
AND to_char(users.created_at, 'YYYY-MM-DD') <= '2021-03-10') usersdb
LEFT JOIN daus ON uid = usersdb.id
WHERE to_char(access_at , 'YYYY-MM-DD') >= '2021-03-01'
AND to_char(access_at , 'YYYY-MM-DD') <= '2021-04-01'
AND to_char(access_at , 'YYYY-MM-DD') > create_time
GROUP BY create_time, use_time, uid
运行结果
---- 3. 获取活跃时间与注册时间差by_day
SELECT uid,
create_time,
to_char(access_at, 'YYYY-MM-DD') use_time,
EXTRACT(epoch FROM age(to_char(access_at, 'YYYY-MM-DD')::date, create_time::date)) /86400 by_day
FROM (SELECT id, to_char(created_at , 'YYYY-MM-DD') create_time
FROM users
WHERE to_char(users.created_at, 'YYYY-MM-DD') >= '2021-03-01'
AND to_char(users.created_at, 'YYYY-MM-DD') <= '2021-03-10') usersdb
LEFT JOIN daus ON uid = usersdb.id
WHERE to_char(access_at', 'YYYY-MM-DD') >= '2021-03-01'
AND to_char(access_at, 'YYYY-MM-DD') <= '2021-04-01'
AND to_char(access_at , 'YYYY-MM-DD') > create_time
GROUP BY create_time, use_time, uid
运行结果
---- 4. 通过sum(case when by_day = 1 then 1 else 0 end) day_1 算出多天的留存人数,并联查出总人数,可以方便的算出留存率(可以通过with来提取公共的查询,但是效率不高,只是sql语句看着简单了,不推荐)
SELECT retention.*, users
FROM (SELECT create_time date,
sum(case when by_day = 1 then 1 else 0 end) day1,
sum(case when by_day = 2 then 1 else 0 end) day2,
sum(case when by_day = 3 then 1 else 0 end) day3,
sum(case when by_day = 4 then 1 else 0 end) day4,
sum(case when by_day = 5 then 1 else 0 end) day5,
sum(case when by_day = 6 then 1 else 0 end) day6,
sum(case when by_day = 7 then 1 else 0 end) day7,
sum(case when by_day = 14 then 1 else 0 end) day14,
sum(case when by_day = 30 then 1 else 0 end) day30
FROM (
SELECT uid,
create_time,
to_char(access_at + interval '8 hour', 'YYYY-MM-DD') use_time,
EXTRACT(epoch FROM
age(to_char(access_at + interval '8 hour', 'YYYY-MM-DD')::date, create_time::date)) /
86400 by_day
FROM ((SELECT id, to_char(users.created_at + interval '8 hour', 'YYYY-MM-DD') create_time
FROM users
LEFT JOIN channels ON users.channel = channels.identifier
WHERE to_char(users.created_at + interval '8 hour', 'YYYY-MM-DD') >= '2021-03-01'
AND to_char(users.created_at + interval '8 hour', 'YYYY-MM-DD') <= '2021-03-10')) usersdb
LEFT JOIN daus ON uid = usersdb.id
WHERE to_char(access_at + interval '8 hour', 'YYYY-MM-DD') >= '2021-03-01'
AND to_char(access_at + interval '8 hour', 'YYYY-MM-DD') <= '2021-03-10'
AND to_char(access_at + interval '8 hour', 'YYYY-MM-DD') > create_time
GROUP BY create_time, use_time, uid) tmp
GROUP BY date) retention
LEFT JOIN (SELECT create_time date, count(*) users
FROM ((SELECT id, to_char(users.created_at + interval '8 hour', 'YYYY-MM-DD') create_time
FROM users
LEFT JOIN channels ON users.channel = channels.identifier
WHERE to_char(users.created_at + interval '8 hour', 'YYYY-MM-DD') >= '2021-03-01'
AND to_char(users.created_at + interval '8 hour', 'YYYY-MM-DD') <= '2021-03-10')) usersdb
GROUP BY create_time) newusers
ON newusers.date = retention.date
ORDER BY date desc;
运行结果
mysql版本
SELECT create_time date,
sum(case when by_day = 1 then 1 else 0 end) day_1,
sum(case when by_day = 2 then 1 else 0 end) day_2,
sum(case when by_day = 3 then 1 else 0 end) day_3,
sum(case when by_day = 4 then 1 else 0 end) day_4,
sum(case when by_day = 5 then 1 else 0 end) day_5,
sum(case when by_day = 6 then 1 else 0 end) day_6,
sum(case when by_day = 7 then 1 else 0 end) day_7,
sum(case when by_day = 14 then 1 else 0 end) day_14,
sum(case when by_day = 30 then 1 else 0 end) day_30
FROM (
SELECT uid,
create_time,
date_format(access_at, 'YYYY-MM-DD') use_time,
DATEDIFF(create_time,date_format(access_at, 'YYYY-MM-DD')) by_day
FROM (SELECT id, date_format(create_at, 'YYYY-MM-DD') create_time
FROM users
WHERE date_format(create_at, 'YYYY-MM-DD') >= '2021-03-01'
AND date_format(create_at, 'YYYY-MM-DD') <= '2021-03-10') usersdb
LEFT JOIN daus ON uid = usersdb.id
WHERE date_format(access_at, 'YYYY-MM-DD') >= '2021-03-01'
AND date_format(access_at, 'YYYY-MM-DD') <= '2021-04-01'
AND date_format(access_at, 'YYYY-MM-DD') > create_time
GROUP BY create_time, use_time, uid
ORDER BY uid) tmp
GROUP BY date;