一、视频观看用户偏好
1、新增用户问题
2020 年 1 月 4 日、1 月 6 日的新增会员分别为?
按用户分组对时间最小的记录打标签,然后对标签统计
if(时间=min(时间)over(partition by 用户 ),1,0)
select v_date,sum(tag) as '新增会员数'
from
(select *,if(v_date = min(v_date)over(partition by usr_id),1,0) as tag
from bilibili_t1
where m_flg = 1) a
group by v_date
select v_date,count(1) as '新增会员数'
from
(select usr_id, min(v_date)
from bilibili_t1
where m_flg = 1
group by usr_id) a
group by v_date
2,哪个小时内的用户数最多,用户观看最高峰
直接对时间小时分组,求count(1),最后排序
select hour(v_time),count(1)
from bilibili_t2
group by hour(v_time)
order by 2 desc
3,鬼畜区用户里,有多少用户看过汽车,番剧区用户里,有多少用户看过放映厅?
呃。。发现sql对我来说,首要的一大难题就是读题理解题
4,哪一类用户的观看视频个数最多?(以每个用户观看的视频个数平均数衡量)
计算不同用户不同类型视频 -对应的观看个数
再按视频类型分组求均值 ----sum(观看个数)/sum(用户个数)
select v_tpe,avg(gs) as '平均观看个数'
from(
select b.v_typ,a.usr_id,count(1) as gs
from bilibili_t2 a left join bilibili_t3 a
on a.v_id = b.v_id
group by b.v_typ,a.usr_id ) t
group by v_tpe ;
5、不同类型中播放记录最多的剧名分别是?
当天最受欢迎的放映厅、番剧分别是?
按不同类型不同剧名分组求记录数—加上排序列(窗口函数)–求最值、第一,可以最后再嵌套一下然后筛选排序为1
求最值,简单的:order 排个序算了
select *
from(
select *,dense_rank()over(partition by v_tpy order by gs desc) as rnk
from(
select b.v_typ,b.v_nm,count(1) as gs
from bilibili_t2 a left join bilibili_t3 a
on a.v_id = b.v_id
group by b.v_typ,b.v_nm )t
) s
where rnk = 1
二、热门目的地
1、哪个地址的用车人数最多?-简单
select start_loc,count(distinct cust_uid) as gs
from didi_sht_rcd
group by start_loc
order by 2 desc
2、以’前海湾休闲会’所为目的地的订单高峰期是几点?–轻轻松松
select hour(start_tm),count(distinct cust_uid)
from didi_sht_rcd
where end_loc like '海湾休闲%'
group by hour(start_tm)
3、用车人次最多的住宅、酒吧分别是?
和上面的第五题不是一模一样嘛
select *,dense_rank()over(partition by loc_ctg order by gs desc) as rnk
from(
select b.loc_ctg,a.start_loc,count(1) as gs
from didi_sht_rcd a left join loc_nm_ctg b
on a.start_loc = b.loc_nm
group by b.loc_ctg,a.start_loc ) a
4、从机场到酒店,单量最高的车型为?-老套路
筛选条件多一些,分组-求个数-排个序
select a.car_cls,count(distinct cust_uid) as gs
from didi_sht_rcd a
join loc_nm_ctg b1 on a.start_loc = b1.loc_nm
join loc_nm_ctg b2 on a.end_loc = b2.loc_nm
where b1.loc_ctg='机场' and b2.loc_ctg='酒店'
group by a.car_cls
order by 2