Hive简单查询练习50题

本文详细介绍了一个使用Hive SQL进行数据查询和分析的实际案例,涵盖了复杂查询、数据汇总、排名、统计分析等多个方面,通过具体的问题解决展示了Hive SQL的强大功能。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

表数据

student表
s_id s_name s_birth s_sex
01 赵雷 1990-01-01 男
02 钱电 1990-12-21 男
03 孙风 1990-05-20 男
04 李云 1990-08-06 男
05 周梅 1991-12-01 女
06 吴兰 1992-03-01 女
07 郑竹 1989-07-01 女
08 王菊 1990-01-20 女

course表
c_id c_course t_id
01 语文 02
02 数学 01
03 英语 03

teacher表
t_id t_name
01 张三
02 李四
03 王五

score表
s_id c_id s_score
01 01 80
01 02 90
01 03 99
02 01 70
02 02 60
02 03 80
03 01 80
03 02 80
03 03 80
04 01 50
04 02 30
04 03 20
05 01 76
05 02 87
06 01 31
06 03 34
07 02 89
07 03 98

表名和字段

  • student表
    s_id、s_name、s_birth、s_sex –-学生编号,学生姓名, 出生年月,学生性别

  • course表
    c_id、c_course、t_id –-课程编号, 课程名称, 教师编号

  • teacher表
    t_id、t_name --教师编号,教师姓名

  • score表
    s_id、c_id、s_score --学生编号,课程编号,分数

--student表
create external table if not exists student(
s_id string,
s_name string,
s_birth date,
s_sex string
)
row format delimited fields terminated by '\t'
stored as textfile;
--course表
create external table if not exists course(
c_id string,
c_course string,
t_id string
)
row format delimited fields terminated by '\t'
stored as textfile;
--teacher表
create external table if not exists teacher(
t_id string,
t_name string
)
row format delimited fields terminated by '\t'
stored as textfile;
--score表
create external table if not exists score(
s_id string,
c_id string,
s_score int
)
row format delimited fields terminated by '\t'
stored as textfile;

数据导入

ssh连接到安装hive的节点

  1. 新建一个hive节点的ssh连接

  2. 创建一个文件夹放本地数据
    mkdir ./data
    cd ./data

  3. 创建普通文件
    vi ./student
    vi ./course
    vi ./teacher
    vi ./score

  4. 将对应的数据粘贴到对应文件里 —>esc :wq 保存并退出

  5. 加载数据

    #切换到hive选项卡
    load data local inpath '/root/data/student' into table student;
    load data local inpath '/root/data/course' into table course;
    load data local inpath '/root/data/teacher' into table teacher;
    load data local inpath '/root/data/score' into table score;
    

题目和答案

--1、查询"01"课程比"02"课程成绩高的学生的信息及课程分数:
select stu.s_id,stu.s_name,stu.s_birth,stu.s_sex,sc.s_score 
from student stu 
right join 
(select s1.s_id,s1.s_score from score s1,score s2 where s1.c_id='01' and s2.c_id='02' and s1.s_id=s2.s_id and s1.s_score>s2.s_score) sc 
on sc.s_id=stu.s_id;


--2、查询"01"课程比"02"课程成绩低的学生的信息及课程分数:
select stu.s_id,stu.s_name,stu.s_birth,stu.s_sex,sc.s_score from student stu 
right join 
(select s1.s_id,s1.s_score from score s1,score s2 where s1.c_id='01' and s2.c_id='02' and s1.s_id=s2.s_id and s1.s_score<s2.s_score) sc 
on sc.s_id=stu.s_id;


--3、查询平均成绩大于等于60分的同学的学生编号和学生姓名和平均成绩:
select s_id,avg(s_score) from score group by s_id having avg(s_score)>=60

select sc.s_id,sc.avgsc,stu.s_name
from
(select s_id,avg(s_score) avgsc from score group by s_id having avg(s_score)>=60) sc
left join student stu
on stu.s_id=sc.s_id;


--4、查询平均成绩小于60分的同学的学生编号和学生姓名和平均成绩:(包括有成绩的和无成绩的)
select sc.s_id,stu.s_name,avg(sc.s_score) as avgsc
from student stu
right join score sc on sc.s_id=stu.s_id
group by sc.s_id,stu.s_name
having avgsc<60
union 
select stu.s_id,stu.s_name,NULL
from score sc
right join student stu on stu.s_id=sc.s_id
where sc.s_score is null;


--5、查询所有同学的学生编号、学生姓名、选课总数、所有课程的总成绩:
select stu.s_id,stu.s_name,count(sc.c_id),sum(sc.s_score)
from student stu
left join score sc on sc.s_id=stu.s_id
group by stu.s_id,stu.s_name


--6、查询"李"姓老师的数量:
select count(t_name) from teacher where t_name like '李%';


--7、查询学过"张三"老师授课的同学的信息:
select stu.* from teacher t
left join course c on c.t_id=t.t_id
left join score sc on sc.c_id=c.c_id
left join student stu on stu.s_id=sc.s_id
where t.t_name='张三'

--8、查询没学过"张三"老师授课的同学的信息:
select stu.* from student stu 
left join teacher t on t.t_name='张三'
left join course c on c.t_id=t.t_id
left join score sc on sc.s_id=stu.s_id and sc.c_id=c.c_id
where sc.s_score is null;

select stu2.* from (select stu.* from teacher t
left join course c on c.t_id=t.t_id
left join score sc on sc.c_id=c.c_id
left join student stu on stu.s_id=sc.s_id
where t.t_name='张三') stu1
right join student stu2 on stu2.s_id=stu1.s_id
where stu1.s_id is null;


--9、查询学过编号为"01"并且也学过编号为"02"的课程的同学的信息:
select stu.* from score sc
left join score sc1 on sc1.s_id=sc.s_id and sc1.c_id='01'
left join student stu on stu.s_id=sc.s_id
where sc.c_id=02 and sc.s_score is not null and sc1.s_score is not null;

--10、查询学过编号为"01"但是没有学过编号为"02"的课程的同学的信息:
select stu.* from score sc1
left join score sc2 on sc2.s_id=sc1.s_id and sc2.c_id=02
left join student stu on stu.s_id=sc1.s_id
where sc1.c_id=01 and sc1.s_score is not null and sc2.s_score is null;

--11、查询没有学全所有课程的同学的信息:
--–先查询出课程的总数量–再查询所需结果
select stu.* from course cs
left join score sc on sc.c_id=cs.c_id
left join student stu on stu.s_id=sc.s_id

select count(c_id) from course;
select stu.* from student stu
left join score sc on sc.s_id=stu.s_id
left join (select count(*) as sumc from course) cs


--12、查询至少有一门课与学号为"01"的同学所学相同的同学的信息:
select distinct stu.* from student stu
left join score s1 on s1.s_id='01'
left join score s2 on s2.s_id=stu.s_id and s2.c_id=s1.c_id
where s2.s_score is not null

--13、查询和"01"号的同学学习的课程完全相同的其他同学的信息:
select stu1.* from student stu1
left join (select distinct stu.s_id from student stu
left join course cs
left join score s1 on s1.s_id='01' and s1.c_id=cs.c_id
left join score sc on sc.c_id=s1.c_id and sc.s_id=stu.s_id
where sc.s_score is null) stu2 on stu1.s_id=stu2.s_id
where stu1.s_id<>'01' and stu2.s_id is null;

--14、查询没学过"张三"老师讲授的任一门课程的学生姓名:
select stu.s_name from student stu 
left join teacher t on t.t_name='张三'
left join course cs on cs.t_id=t.t_id
left join score sc on sc.c_id=cs.c_id and sc.s_id=stu.s_id
where sc.s_score is null--15、查询两门及其以上不及格课程的同学的学号,姓名及其平均成绩:
select stu.s_id,stu.s_name,avg(sc.s_score) from student stu
left join score sc on sc.s_id=stu.s_id
left join score s1 on s1.s_id=sc.s_id
where s1.c_id<>sc.c_id and sc.s_score<60 and s1.s_score<60
group by stu.s_id,stu.s_name

--16、检索"01"课程分数小于60,按分数降序排列的学生信息:
select * from student stu
left join score sc on stu.s_id=sc.s_id and sc.c_id='01'
where sc.s_score<60
order by sc.s_score desc

--17、按平均成绩从高到低显示所有学生的所有课程的成绩以及平均成绩:
select sc.*,s1.avgsc from score sc
left join (select s_id,avg(s_score) as avgsc from score  group by s_id) s1 on sc.s_id=s1.s_id  
order by s1.avgsc desc;

--18.查询各科成绩最高分、最低分和平均分:以如下形式显示:课程ID,课程name,最高分,最低分,平均分,及格率:
select cs.c_id,cs.c_course,sc.maxsc,sc.minsc,sc.avgsc,sc.jigelv from course cs
left join 
(select c_id,
max(s_score) maxsc,
min(s_score) minsc,
round(avg(s_score),2) avgsc,
concat(round(sum(if(s_score>60,1,0))/sum(if(s_score is not null,1,0))*100,2),'%') jigelv
from score group by c_id) sc on sc.c_id=cs.c_id;

--19、按各科成绩进行排序,并显示排名:– row_number() over()分组排序功能
select cs.c_id,cs.c_course,stu.s_name,sc.s_id,sc.s_score,row_number()over(partition by sc.c_id order by sc.s_score desc) as rank
from course cs
left join score sc on sc.c_id=cs.c_id
left join student stu on stu.s_id=sc.s_id;

--20、查询学生的总成绩并进行排名:
select sc.s_id,stu.s_name,sum(sc.s_score),row_number() over(order by sum(sc.s_score) desc)
from score sc
left join student stu on stu.s_id=sc.s_id
group by sc.s_id,stu.s_name;

--21、查询不同老师所教不同课程平均分从高到低显示:
select c_id,avg(s_score) as avgsc from score group by c_id;

select cs.*,t.t_name,sc.avgsc from course cs
left join 
teacher t on t.t_id=cs.t_id
left join 
(select c_id,avg(s_score) as avgsc from score group by c_id) sc
on sc.c_id=cs.c_id
order by avgsc desc;

--22、查询所有课程的成绩第2名到第3名的学生信息及该课程成绩:
--因为row_number()over()的执行晚于 where 、group by、  order by 的执行
select 
s_id,
s_score,
c_id,
row_number() over(partition bt c_id order by s_score desc) as rk
from score;

select stu.*,sc.s_score,cs.c_course,sc.rk from student stu
right join 
(select 
s_id,
s_score,
c_id,
row_number() over(partition by c_id order by s_score desc) as rk
from score) as sc on sc.s_id=stu.s_id
left join course cs on cs.c_id=sc.c_id
where rk between 2 and 3;
--23、统计各科成绩各分数段人数:课程编号,课程名称,[100-85],[85-70],[70-60],[0-60]及所占百分比
select 
c_id,
sum(if(s_score>=85 and s_score<100,1,0)) as `[100-85]`,
sum(if(s_score>=75 and s_score<85,1,0)) as `[84-70]`,
sum(if(s_score>=60 and s_score<75,1,0)) as `[69-69]`,
sum(case when s_score<60 then 1 else 0 end) as `[60-0]`
from score
group by c_id;

select cs.c_id,cs.c_course,sc.*
from course cs
left join 
(select 
c_id,
sum(if(s_score>=85 and s_score<100,1,0)) as `[100-85]`,
sum(if(s_score>=75 and s_score<85,1,0)) as `[84-70]`,
sum(if(s_score>=60 and s_score<75,1,0)) as `[69-69]`,
sum(case when s_score<60 then 1 else 0 end) as `[60-0]`
from score
group by c_id) sc on sc.c_id=cs.c_id;

--24、查询学生平均成绩及其名次:
select s_id,
round(avg(s_score),2),
row_number()over(order by avg(s_score) desc) as rk 
from score group by s_id;

--25、查询各科成绩前三名的记录   三个语句
select s_id,
c_id,
row_number()over(partition by c_id order by s_score desc) as rk
from score;

select sc.*,cs.c_course,stu.s_name
from (select s_id,
c_id,
row_number()over(partition by c_id order by s_score desc) as rk
from score) sc
left join course cs on cs.c_id=sc.c_id
left join student stu on stu.s_id=sc.s_id
where rk between 1 and 3;

--26、查询每门课程被选修的学生数:
select sc.c_id,count(sc.s_id) from score sc group by sc.c_id;

--27、查询出只有两门课程的全部学生的学号和姓名:
select stu.s_id,stu.s_name 
from (select s_id from score group by s_id having count(c_id)=2) sc
left join student stu on stu.s_id=sc.s_id;

--28、查询男生、女生人数:
select stu.s_sex,count(s_sex) from student stu group by s_sex;

--29、查询名字中含有"风"字的学生信息:
select stu.* from student stu where stu.s_name like '%风%';

--30、查询同名同性学生名单,并统计同名人数:
select stu.s_name,count(1) from student stu group by stu.s_name;

--31、查询1990年出生的学生名单:
select stu.* from student stu where year(s_birth)=1990;

--32、查询每门课程的平均成绩,结果按平均成绩降序排列,平均成绩相同时,按课程编号升序排列:
select c_id,avg(s_score) avgsc from score group by c_id order by avgsc desc,c_id asc;

--33、查询平均成绩大于等于85的所有学生的学号、姓名和平均成绩:
select s_id,avg(s_score) avgsc from score group by s_id;

select sc.s_id,stu.s_name,sc.avgsc from (select s_id,avg(s_score) avgsc from score group by s_id) sc
left join student stu on stu.s_id=sc.s_id
where avgsc>=85;

--34、查询课程名称为"数学",且分数低于60的学生姓名和分数:
select stu.s_name,sc.s_score from course cs
left join score sc on sc.c_id=cs.c_id
left join student stu on stu.s_id=sc.s_id
where cs.c_course='数学' and sc.s_score<60;

--35、查询所有学生的课程及分数情况:
select stu.*,sc.*,cs.c_course from student stu
left join score sc on sc.s_id=stu.s_id
left join course cs on cs.c_id=sc.c_id;

--36、查询任何一门课程成绩在70分以上的学生姓名、课程名称和分数:
select distinct stu.s_name,cs.c_course,sc.s_score from course cs
left join score sc on sc.c_id=cs.c_id
left join student stu on stu.s_id=sc.s_id
where s_score>70;

--37、查询课程不及格的学生:
select distinct stu.s_name from score sc
left join student stu on stu.s_id=sc.s_id
where sc.s_score<60;

--38、查询课程编号为01且课程成绩在80分以上的学生的学号和姓名:
select sc.s_id,stu.s_name from score sc
left join student stu on stu.s_id=sc.s_id
where sc.c_id='01' and sc.s_score>80;

--39、求每门课程的学生人数:
select c_id,count(1) from score sc group by c_id;

--40、查询选修"张三"老师所授课程的学生中,成绩最高的学生信息及其成绩:
select stu.*,sc.s_score
from score sc
left join course cs on cs.c_id=sc.c_id
left join teacher t on t.t_name='张三'
left join student stu on stu.s_id=sc.s_id
order by sc.s_score desc
limit 1;

--41、查询不同课程成绩相同的学生的学生编号、课程编号、学生成绩:
select distinct s1.s_id,s1.c_id,s1.s_score
from score s1
left join score s2 on s1.s_score=s2.s_score
where s1.c_id<>s2.c_id;

--42、查询每门课程成绩最好的前三名:
select *,
row_number()over(partition by c_id order by s_score) rk
from score;

select sc.*,stu.s_name from (select *,
row_number()over(partition by c_id order by s_score desc) rk
from score) sc
left join student stu on stu.s_id=sc.s_id
where rk between 1 and 3;

--43、统计每门课程的学生选修人数(超过5人的课程才统计):
-- 要求输出课程号和选修人数,查询结果按人数降序排列,若人数相同,按课程号升序排列
select c_id,count(1) as sumcs
from score 
group by c_id 
having count(1)>5 
order by sumcs desc,c_id asc;

--44、检索至少选修两门课程的学生学号:
select sc.s_id
from 
(select s_id,count(*)over(partition by s_id) sumcs from score) sc
where sc.sumcs>=2;

--45、查询选修了全部课程的学生信息:
select s_id,count(*)over(partition by s_id) sumcs from score

select distinct stu.*
from 
(select s_id,count(*)over(partition by s_id) sumcs from score) sc
left join (select count(*) sumcs from course) cs
left join student stu on stu.s_id=sc.s_id
where sc.sumcs=cs.sumcs;

--46、查询各学生的年龄(周岁):
-- 按照出生日期来算,当前月日 < 出生年月的月日则,年龄减一
select s_id,s_name,
(case when  year(s_birth)<year(current_date()) and month(s_birth)>month(current_date())
then year(current_date())-year(s_birth)-1 
when  year(s_birth)<year(current_date()) and month(s_birth)=month(current_date()) and day(s_birth)>day(current_date()) 
then year(current_date())-year(s_birth)-1 
else  year(current_date())-year(s_birth) end) as age
from student

--47、查询本周过生日的学生:
select * from student stu where weekofyear(s_birth)=weekofyear(current_date());

--48、查询下周过生日的学生:
SELECT * FROM student WHERE weekofyear(s_birth)=weekofyear(current_date()) + 1;

--49、查询本月过生日的学生:
select * from student where month(current_date())=month(s_birth);

--50、查询12月份过生日的学生:
SELECT * FROM student WHERE 12=MONTH(s_birth);
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值