SELECT t2.userid,
t2.visitmonth,
subtotal_visit_cnt,
sum(subtotal_visit_cnt) over (partition BY userid
ORDER BY visitmonth) AS total_visit_cnt
FROM
(SELECT userid,
visitmonth,
sum(visitcount) AS subtotal_visit_cnt
FROM
(SELECT userid,
date_format(regexp_replace(visitdate,'/','-'),'yyyy-MM') AS visitmonth,
visitcount
FROM test_sql.test1) t1
GROUP BY userid,
visitmonth)t2
ORDER BY t2.userid,
t2.visitmonth
第二题
需求
有50W个京东店铺,每个顾客访客访问任何一个店铺的任何一个商品时都会产生一条访问日志,
访问日志存储的表名为Visit,访客的用户id为user_id,被访问的店铺名称为shop,数据如下:
u1 a
u2 b
u1 b
u1 a
u3 c
u4 b
u1 a
u2 c
u5 b
u4 b
u6 c
u2 c
u1 b
u2 a
u2 a
u3 a
u5 a
u5 a
u5 a
请统计:
(1)每个店铺的UV(访客数)
(2)每个店铺访问次数top3的访客信息。输出店铺名称、访客id、访问次数
(1)方式1:
SELECT shop,
count(DISTINCT user_id)
FROM test_sql.test2
GROUP BY shop
方式2:
SELECT t.shop,
count(*)
FROM
(SELECT user_id,
shop
FROM test_sql.test2
GROUP BY user_id,
shop) t
GROUP BY t.shop
(2)
SELECT t2.shop,
t2.user_id,
t2.cnt
FROM
(SELECT t1.*,
row_number() over(partition BY t1.shop
ORDER BY t1.cnt DESC) rank
FROM
(SELECT user_id,
shop,
count(*) AS cnt
FROM test_sql.test2
GROUP BY user_id,
shop) t1)t2
WHERE rank <= 3
第九题:请写出SQL语句,查询充值日志表2019年01月02号每个区组下充值额最大的账号,要求结果:
区组id,账号,金额,充值时间
[code=sql]
select
l.dist_id,
l.account,
l.`money`,
l.creat_time
from(
select
dist_id,
account,
sum_money
from(
select
dist_id,
account,
sum_money,
rank() over(partition by dist_id order by sum_money desc) rk
from(
select
dist_id,
account,
sum(`money`) sum_money
from
test_sql.test9
where
substr(creat_time,1,10)='2019-01-02'
group by dist_id,account
) a
) b where b.rk=1
) c join credit_log l on c.dist_id=l.dist_id and c.account=l.account;
1 22 110000.00 2019-01-02 13:00:02
2 18 100800.00 2019-01-02 13:00:11
[/code]
第四题:根据年龄段观看电影的次数进行排序。[code=sql]
select
b.name_range `年龄段`,b.watch_times `观看电影的次数`
from(
select
a.`年龄段` name_range,count(a.user_id) watch_times
from(
select
user_id,case
when age>0 and age<=10 then '0-10'
when age>10 and age<=20 then '10-20'
when age>20 and age<=30 then '20-30'
when age>30 and age<=40 then '30-40'
when age>40 and age<=50 then '40-50'
when age>50 and age<=60 then '50-60'
when age>60 and age<=70 then '60-70'
else '70以上' end as `年龄段`
from test4user
) a join test4log l on a.user_id=l.user_id
group by a.`年龄段`
) b
order by `观看电影的次数` desc;
[/code]
(2)
SELECT count(user_id)
FROM test_sql.test3
GROUP BY user_id
HAVING date_format(min(dt),'yyyy-MM')='2017-11';[code=sql]
-------
count(user_id)是分完组这个用户有几条数据的总数,不是新增用户的总数
SELECT COUNT(*) FROM(
SELECT user_id
FROM test_sql.test3
GROUP BY user_id
HAVING date_format(min(dt),'yyyy-MM')='2017-11')
[/code]