HQL笔试题
其他场景题目(含思路和步骤):Hive场景题
1. 求:找出所有科目成绩都大于某一学科平均成绩的学生
uid,subject_id,score
1001 01 90
1001 02 90
1001 03 90
1002 01 85
1002 02 85
1002 03 70
1003 01 70
1003 02 70
1003 03 85
-- 创建表语句
create table score(
uid string,
subject_id string,
score int)
row format delimited fields terminated by '\t';
-- 1) 求出每个学科的平均成绩
select uid,score,avg(score) over(parition by subject_id) avg_score
from score;
-- 2) 根据是否大于平均成绩记录 flag,大于则记为 0 否则记为 1
select uid,if(score>avg_score,0,1) flag
from (
select uid,score,avg(score) over(parition by subject_id) avg_score
from score
) t1;
-- 3) 根据根据学生 id 进行分组统计 flag 的和,和为 0 则是所有学科都大于平均成绩
select uid from
(select uid,if(score>avg_score,0,1) flag from
(select uid,score,avg(score) over(parition by subject_id) avg_score
from score) t1) t2
group by uid
having sum(flag)=0;
2. 要求使用 SQL 统计出每个用户的累积访问次数。
userId,visitDate,visitCount
u01 2017/1/21 5
u02 2017/1/23 6
u03 2017/1/22 8
u04 2017/1/20 3
u01 2017/1/23 6
u01 2017/2/21 8
u02 2017/1/23 6
u01 2017/2/22 4
-- 创建表语句
create table action(
userId string,
visitsDate string,
visitCount int)
row format delimited fields terminated by '\t';
regexp_replace函数:regexp_replace(s1,s2[,s3])
使用s3字符串替换出现在s1字符串中的所有s2字符串,并返回替换后的字符串;
-- 1) 修改数据格式
select userId,date_format(regexp_replace(visitDate,'/','-'),'yyyy-MM') dt,visitCount from action
-- 2) 计算每人单月访问量
select userId,dt,sum(visitCount) dt_count from
(select userId,date_format(regexp_replace(visitDate,'/','-'),'yyyy-MM') dt,visitCount from action) t1
group by userId,dt;
-- 3) 按月累计访问量
select userId,dt,dt_count
,sum(dt_count) over(partition by userId order by dt)
from
(select userId,dt,sum(visitCount) dt_count from
(select userId,date_format(regexp_replace(visitDate,'/','-'),'yyyy-MM') dt,visitCount from action) t1
group by userId,dt) t2;
3. 有个visit表,完成下面需求
user_id,shop
u1 a
u2 b
u1 b
u1 a
u3 c
u4 b
u1 a
u2 c
u5 b
u4 b
u6 c
u2 c
u1 b
u2 a
u2 a
u3 a
u5 a
u5 a
u5 a
-- 创建表语句
create table visit(
user_id,string,
shop string)
row format delimited fileds terminated by '\t';
1. 每个店铺的 UV(访客数)
select shop,count(distinct user_id) cnt from visit
group by shop;
2. 每个店铺访问次数 top3 的访客信息。输出店铺名称、访客 id、访问次数
-- 1) 查询每个店铺被每个用户访问次数
select shop,user_id,count(*) cnt from visit
group by shop,user_id;
-- 2) 计算每个店铺被用户访问次数排名
select shop,user_id,cnt,rank() over(partition by shop order by cnt) rk from
(select shop,user_id,count(*) cnt from visit
group by shop,user_id) t1;
-- 3) 取每个店铺排名前 3 的
select shop,user_id,cnt from
(select shop,user_id,cnt,rank() over(partition by shop order by cnt) rk from
(select shop,user_id,count(*) cnt from visit
group by shop,user_id) t1) t2;
4. 已知一个表order_table
dt,order_id,user_id,amount
2017-01-01,10029028,1000003251,33.57
...
create table order_tab(
dt string,
order_id string,
user_id string,
amount decimal(10,2))
row format delimited fields terminated by ',';
1. 给出 2017 年每个月的订单数、用户数、总成交金额。
select date_format(dt,'yyyy-MM')
,count(order_id)
,count