Platform address:vue-sqleditor
题目需求:
从订单信息表(order_info)中查询首次下单后第二天仍然下单的用户占所有下单用户的比例,结果保留一位小数,使用百分数显示,
期望结果如下:
percentage
<string>
70.0%
需要用到的表:
订单信息表:order_info
order_id (订单id) user_id (用户id) create_date (下单日期) total_amount (订单金额)
1 101 2021-09-30 29000.00
10 103 2020-10-02 28000.00
建表语句:
create table order_info(
order_id BIGINT,
user_id BIGINT,
create_date STRING,
total_amount DOUBLE)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\t'
STORED AS INPUTFORMAT
'org.apache.hadoop.mapred.TextInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat';
插入样例数据:
Insert into order_info
Select 1,101,'2021-09-27',29000.00
Union all
Select 2,103,'2020-10-02',28000.00
Union all
Select 3,101,'2021-09-28',29000.00
Union all
Select 4,101,'2021-09-30',29000.00
Union all
Select 5,103,'2020-10-03',28000.00
Union all
Select 6,103,'2020-10-04',28000.00
Union all
Select 7,104,'2020-10-02',28000.00
Union all
Select 8,104,'2020-10-02',28000.00
Union all
Select 9,104,'2020-10-03',28000.00
Union all
Select 10,104,'2020-10-04',28000.00;
需求分析:
排除用户一天购买多次的情况
select user_id,create_date from order_info group by user_id,create_date
取出每个用户按天排序的序号
select user_id,create_date,row_number() over(partition by user_id order by create_date) as rnk from (select user_id,create_date from order_info group by user_id,create_date)s1
取出每个用户下单日期与按天排序的序号之间的差值(若差值即首次下单日期的前一天的个数大于等于2,则说明此用户有首次下单后第二天仍然下单的情况)
select user_id,create_date,rnk,date_sub(create_date,rnk) as date_diff from (select user_id,create_date,row_number() over(partition by user_id order by create_date) as rnk from (select user_id,create_date from order_info group by user_id,create_date)s1)s2
取出连续两天下单的用户
select user_id,date_diff,count(1) as cnt from (select user_id,create_date,rnk,date_sub(create_date,rnk) as date_diff from (select user_id,create_date,row_number() over(partition by user_id order by create_date) as rnk from (select user_id,create_date from order_info group by user_id,create_date)s1)s2)s3 group by user_id,date_diff having cnt>=2
排除用户有多次连续两天下单的可能
select user_id from (select user_id,date_diff,count(1) as cnt from (select user_id,create_date,rnk,date_sub(create_date,rnk) as date_diff from (select user_id,create_date,row_number() over(partition by user_id order by create_date) as rnk from (select user_id,create_date from order_info group by user_id,create_date)s1)s2)s3 group by user_id,date_diff having cnt>=2)s3 group by user_id
left join获取到首次下单后第二天仍然下单的用户占所有下单用户的比例
select concat(100*sum(if(s5.user_id is not null,1,0))/count(1),'%') as percentage from
(select user_id from order_info group by user_id)s4
left join
(select user_id from (select user_id,date_diff,count(1) as cnt from (select user_id,create_date,rnk,date_sub(create_date,rnk) as date_diff from (select user_id,create_date,row_number() over(partition by user_id order by create_date) as rnk from (select user_id,create_date from order_info group by user_id,create_date)s1)s2)s3 group by user_id,date_diff having cnt>=2)s3 group by user_id)s5
on s4.user_id=s5.user_id
最终SQL:
select
concat(
100 * sum(if(s5.user_id is not null, 1, 0)) / count(1),
'%'
) as percentage
from
(
select
user_id
from
order_info
group by
user_id
) s4
left join (
select
user_id
from
(
select
user_id,
date_diff,
count(1) as cnt
from
(
select
user_id,
create_date,
rnk,
date_sub(create_date, rnk) as date_diff
from
(
select
user_id,
create_date,
row_number() over(
partition by user_id
order by
create_date
) as rnk
from
(
select
user_id,
create_date
from
order_info
group by
user_id,
create_date
) s1
) s2
) s3
group by
user_id,
date_diff
having
cnt >= 2
) s3
group by
user_id
) s5 on s4.user_id = s5.user_id
AC时间:
2022年12月08日22:56:25

本文通过SQL查询,分析了订单信息表中首次下单后第二天继续下单的用户比例,并详细展示了如何处理用户重复下单及连续下单的数据逻辑。
&spm=1001.2101.3001.5002&articleId=128247149&d=1&t=3&u=591182253dd14c0dbadc730e49210603)
492

被折叠的 条评论
为什么被折叠?



