新用户数和日活跃数
题目链接:
https://www.nowcoder.com/practice/dbbc9b03794a48f6b34f1131b1a903eb?tpId=268&tqId=2285346&ru=/exam/oj&qru=/ta/sql-factory-interview/question-ranking&sourceUrl=%2Fexam%2Foj%3Ftab%3DSQL%25E7%25AF%2587%26topicId%3D268
描述
用户行为日志表tb_user_log
id | uid | artical_id | in_time | out_time | sign_cin |
---|---|---|---|---|---|
1 | 101 | 9001 | 2021-10-31 10:00:00 | 2021-10-31 10:00:09 | 0 |
2 | 102 | 9001 | 2021-10-31 10:00:00 | 2021-10-31 10:00:09 | 0 |
3 | 101 | 0 | 2021-11-01 10:00:00 | 2021-11-01 10:00:42 | 1 |
4 | 102 | 9001 | 2021-11-01 10:00:00 | 2021-11-01 10:00:09 | 0 |
5 | 108 | 9001 | 2021-11-01 10:00:01 | 2021-11-01 10:00:50 | 0 |
6 | 108 | 9001 | 2021-11-02 10:00:01 | 2021-11-02 10:00:50 | 0 |
7 | 104 | 9001 | 2021-11-02 10:00:28 | 2021-11-02 10:00:50 | 0 |
8 | 106 | 9001 | 2021-11-02 10:00:28 | 2021-11-02 10:00:50 | 0 |
9 | 108 | 9001 | 2021-11-03 10:00:01 | 2021-11-03 10:00:50 | 0 |
10 | 109 | 9002 | 2021-11-03 11:00:55 | 2021-11-03 11:00:59 | 0 |
11 | 104 | 9003 | 2021-11-03 11:00:45 | 2021-11-03 11:00:55 | 0 |
12 | 105 | 9003 | 2021-11-03 11:00:53 | 2021-11-03 11:00:59 | 0 |
13 | 106 | 9003 | 2021-11-03 11:00:45 | 2021-11-03 11:00:55 | 0 |
(uid-用户ID, artical_id-文章ID, in_time-进入时间, out_time-离开时间, sign_in-是否签到)
问题:统计每天的日活数及新用户占比
注:
- 新用户占比=当天的新用户数÷当天活跃用户数(日活数)。
- 如果in_time-进入时间和out_time-离开时间跨天了,在两天里都记为该用户活跃过。
- 新用户占比保留2位小数,结果按日期升序排序。
输出示例:
示例数据的输出结果如下
dt | dau | uv_new_ratio |
---|---|---|
2021-10-30 | 2 | 1.00 |
2021-11-01 | 3 | 0.33 |
2021-11-02 | 3 | 0.67 |
2021-11-03 | 5 | 0.40 |
解释:
2021年10月31日有2个用户活跃,都为新用户,新用户占比1.00;
2021年11月1日有3个用户活跃,其中1个新用户,新用户占比0.33;
示例1
输入:
DROP TABLE IF EXISTS tb_user_log;
CREATE TABLE tb_user_log (
id INT PRIMARY KEY AUTO_INCREMENT COMMENT '自增ID',
uid INT NOT NULL COMMENT '用户ID',
artical_id INT NOT NULL COMMENT '视频ID',
in_time datetime COMMENT '进入时间',
out_time datetime COMMENT '离开时间',
sign_in TINYINT DEFAULT 0 COMMENT '是否签到'
) CHARACTER SET utf8 COLLATE utf8_bin;
INSERT INTO tb_user_log(uid, artical_id, in_time, out_time, sign_in) VALUES
(101, 9001, '2021-10-31 10:00:00', '2021-10-31 10:00:09', 0),
(102, 9001, '2021-10-31 10:00:00', '2021-10-31 10:00:09', 0),
(101, 0, '2021-11-01 10:00:00', '2021-11-01 10:00:42', 1),
(102, 9001, '2021-11-01 10:00:00', '2021-11-01 10:00:09', 0),
(108, 9001, '2021-11-01 10:00:01', '2021-11-01 10:01:50', 0),
(108, 9001, '2021-11-02 10:00:01', '2021-11-02 10:01:50', 0),
(104, 9001, '2021-11-02 10:00:28', '2021-11-02 10:00:50', 0),
(106, 9001, '2021-11-02 10:00:28', '2021-11-02 10:00:50', 0),
(108, 9001, '2021-11-03 10:00:01', '2021-11-03 10:01:50', 0),
(109, 9002, '2021-11-03 11:00:55', '2021-11-03 11:00:59', 0),
(104, 9003, '2021-11-03 11:00:45', '2021-11-03 11:00:55', 0),
(105, 9003, '2021-11-03 11:00:53', '2021-11-03 11:00:59', 0),
(106, 9003, '2021-11-03 11:00:45', '2021-11-03 11:00:55', 0);
复制
输出:
2021-10-31|2|1.00
2021-11-01|3|0.33
2021-11-02|3|0.67
2021-11-03|5|0.40
分析
SELECT * FROM tb_user_log
# 分析:tb_user_log 里的数据里 用户 u_id 每天(in_time)可以进入多次,所以 u_id 可以有多条记录。
SELECT count(DISTINCT uid) 人数 FROM tb_user_log
# union : 合并相同的列,每列不允许重复
# union all : 合并相同的列每列允许重复
# 查找每天的新用户数 : 新用户:首次使用该软件或者首次进入
SELECT uid, DATE(min(in_time)) 进入时间 FROM tb_user_log GROUP BY uid # 每个用户首次进入的时间
-- 每天的新用户数:
DROP VIEW IF EXISTS V_NewUser;
CREATE VIEW V_NewUser
AS
SELECT dt, count(uid) NewUser
FROM (SELECT uid, DATE(min(in_time)) dt FROM tb_user_log GROUP BY uid) a
GROUP BY dt
SELECT * FROM V_NewUser
-- 每日活跃用户数(DAU): 日期分组 用户计数
DROP VIEW IF EXISTS v_dau;
CREATE VIEW v_dau
as
SELECT dt, COUNT(uid) dau
FROM
(select uid, date(in_time) dt from tb_user_log
union
select uid, date(out_time) dt from tb_user_log) b
GROUP BY b.dt
SELECT * FROM v_dau
-- 先统计新用户,再统计日活数(DAU),再计算新用户占比。
SELECT a.dt,
b.dau,
IFNULL(ROUND(a.newuser/b.dau, 2) ,0) uv_new_ratio
FROM V_dau b
LEFT JOIN V_NewUser a
ON a.dt = b.dt
ORDER BY a.dt
SELECT * FROM V_NewUser;
SELECT * FROM V_dau;
答案
SELECT b.dt,
b.dau,
IFNULL(ROUND(a.newuser/b.dau, 2) ,0) uv_new_ratio
FROM
(SELECT dt, count(uid) NewUser
FROM (SELECT uid, DATE(min(in_time)) dt FROM tb_user_log GROUP BY uid) a
GROUP BY dt) a -- 新用户
Right JOIN
(SELECT dt, COUNT(uid) dau
FROM
(select uid, date(in_time) dt from tb_user_log
union
select uid, date(out_time) dt from tb_user_log) b
GROUP BY b.dt) b -- 日活跃
ON a.dt = b.dt
ORDER BY b.dt
使用窗口函数解答
WITH t1
AS
(
SELECT
*, row_number() over (PARTITION BY uid ORDER BY date) AS times
FROM
(
SELECT uid, LEFT (in_time, 10) AS date FROM tb_user_log
UNION
SELECT uid, LEFT (out_time, 10) AS date FROM tb_user_log
) tmp
)
SELECT
date,
count(uid) AS dau,
round(
sum(IF(times = 1, 1, 0)) / count(uid),
2
) AS uv_new_ratio
FROM t1
GROUP BY
date
ORDER BY
date;
解释:
1. WITH 语句
WITH 表名
AS
()
SELECT * FROM 表名;
with语句相当于把一个中间查询语句用一个表名表示,方便后续在该表的基础上进行查询。
2. 窗口函数
基本语法:
窗口函数 OVER (PARTITION BY 字段 ORDER BY 字段)