大数据学习之 Hive Sql篇

1. 准备数据:

score.csv数据如下:

01,01,80
01,02,90
01,03,99
02,01,70
02,02,60
02,03,80
03,01,80
03,02,80
03,03,80
04,01,50
04,02,30
04,03,20
05,01,76
05,02,87
06,01,31
06,03,34
07,02,89
07,03,98

student.csv 数据如下:

01,赵雷,1990-01-01,男
02,钱电,1990-12-21,男
03,孙风,1990-05-20,男
04,李云,1990-08-06,男
05,周梅,1991-12-01,女
06,吴兰,1992-03-01,女
07,郑竹,1989-07-01,女
08,王菊,1990-01-20,女

teacher.csv 数据如下:

01,张三
02,李四
03,王五

course.csv 数据如下:

01,语文,02
02,数学,01
03,英语,03

2.将数据导入hive中:

package com.spark.self

import org.apache.spark.sql.SparkSession

object buildRegionIntoHive {
  def main(args: Array[String]): Unit = {

    val warehouseLocation = "spark-warehouse路劲"

    val spark = SparkSession
      .builder()
      .appName("Spark Hive Example")
      .config("spark.sql.warehouse.dir", warehouseLocation)
      .master("local[*]")
      .enableHiveSupport()
      .getOrCreate()

    import spark.sql

    sql("drop table student")
    sql("create table if not exists student(s_id string,s_name string,s_birth string,s_sex string)row format delimited fields terminated by ','")
    sql("load data local inpath 'in/student.csv' into table student")
    sql("SELECT * FROM student").show()

    sql("drop table course")
    sql("create table if not exists course(c_id string,c_name string,t_id string)row format delimited fields terminated by ','")
    sql("load data local inpath 'in/course.csv' into table course")
    sql("SELECT * FROM course").show()

    sql("drop table teacher")
    sql("create table if not exists teacher(t_id string,t_name string)row format delimited fields terminated by ','")
    sql("load data local inpath 'in/teacher.csv' into table teacher")
    sql("SELECT * FROM teacher").show()

    sql("drop table score")
    sql("create table if not exists score(s_id string,c_id string,s_score int)row format delimited fields terminated by ','")
    sql("load data local inpath 'in/score.csv' into table score")
    sql("SELECT * FROM score").show()

  }
}

3.进行hive sql的编写:代码如下:

package com.spark.self

import org.apache.spark.sql.SparkSession

/**
  * student : s_id ,s_name ,s_birth ,s_sex
  * course  : c_id ,c_name ,t_id
  * teacher : t_id ,t_name
  * score   : s_id ,c_id ,s_score
  */
object HiveSqlTest {
  def main(args: Array[String]): Unit = {
    val warehouseLocation = "spark-warehouse路劲"

    val spark = SparkSession
      .builder()
      .appName("Spark Hive Example")
      .config("spark.sql.warehouse.dir", warehouseLocation)
      .master("local[*]")
      .enableHiveSupport()
      .getOrCreate()
    import spark.sql
    //查询"01"课程比"02"课程成绩高的学生的信息及课程分数
    sql("select student.*,a.s_score as score01,b.s_score as score02 from student " +
      "join score a on student.s_id = a.s_id and a.c_id='01' " +
      "join score b on student.s_id = b.s_id and b.c_id='02' " +
      "where a.s_score>b.s_score").show()

    //查询平均成绩大于等于60分的同学的学生编号和学生姓名和平均成绩
    sql("select student.s_id,student.s_name,score_avg.avgScore from student " +
      "join (select s_id,avg(s_score) as avgScore from score group by s_id) as score_avg " +
      "on score_avg.s_id = student.s_id " +
      "where score_avg.avgScore>=60").show()

    //查询所有同学的学生编号、学生姓名、选课总数、所有课程的总成绩
    sql("select student.s_id,student.s_name,score1.count,score1.sum from student " +
      "join (select s_id,count(s_id) as count,sum(s_score) as sum from score group by s_id) as score1 " +
      "where score1.s_id=student.s_id").show()

    //查询"李"姓老师的数量
    //    sql("select count(*) from teacher where t_name like '李%'").show()

    //查询学过"张三"老师授课的同学的信息
    sql("select student.* from student " +
      "join score on student.s_id =score.s_id " +
      "join  course on course.c_id=score.c_id " +
      "join  teacher on course.t_id=teacher.t_id and t_name='张三'").show()

    //查询没学过"张三"老师授课的同学的信息
    sql("select student.* from student " +
      "left join (select s_id from score " +
      "join  course on course.c_id=score.c_id " +
      "join  teacher on course.t_id=teacher.t_id and t_name='张三')tmp " +
      "on  student.s_id =tmp.s_id " +
      "where tmp.s_id is null").show()

    //第一步:得到所有学过张三老师课的同学
    sql("select score.s_id,score.c_id,c.c_name,c.t_id,t.t_name from score " +
      "join course c on c.c_id = score.c_id " +
      "join teacher t on t.t_id = c.t_id and t_name='张三'").show()

    //得到结果如下:
    /**
      * +----+----+------+----+------+
      * |s_id|c_id|c_name|t_id|t_name|
      * +----+----+------+----+------+
      * |  01|  02|    数学|  01|    张三|
      * |  02|  02|    数学|  01|    张三|
      * |  03|  02|    数学|  01|    张三|
      * |  04|  02|    数学|  01|    张三|
      * |  05|  02|    数学|  01|    张三|
      * |  07|  02|    数学|  01|    张三|
      * +----+----+------+----+------+
      */
    //第二步:使用学生表与 第一步查询出的结果做left join
    sql("select student.*,tmp.* from student " +
      "left join (select score.s_id,score.c_id,c.c_name,c.t_id,t.t_name from score " +
      "join course c on c.c_id = score.c_id " +
      "join teacher t on t.t_id = c.t_id and t_name='张三') tmp " +
      "on tmp.s_id = student.s_id").show()
    //得到结果如下:
    /**
      * +----+------+----------+-----+----+----+------+----+------+
      * |s_id|s_name|   s_birth|s_sex|s_id|c_id|c_name|t_id|t_name|
      * +----+------+----------+-----+----+----+------+----+------+
      * |  01|    赵雷|1990-01-01|    男|  01|  02|    数学|  01|    张三|
      * |  02|    钱电|1990-12-21|    男|  02|  02|    数学|  01|    张三|
      * |  03|    孙风|1990-05-20|    男|  03|  02|    数学|  01|    张三|
      * |  04|    李云|1990-08-06|    男|  04|  02|    数学|  01|    张三|
      * |  05|    周梅|1991-12-01|    女|  05|  02|    数学|  01|    张三|
      * |  06|    吴兰|1992-03-01|    女|null|null|  null|null|  null|
      * |  07|    郑竹|1989-07-01|    女|  07|  02|    数学|  01|    张三|
      * |  08|    王菊|1990-01-20|    女|null|null|  null|null|  null|
      * +----+------+----------+-----+----+----+------+----+------+
      */
    //第三步;
    sql("select student.* from student " +
      "left join (select score.s_id,score.c_id,c.c_name,c.t_id,t.t_name from score " +
      "join course c on c.c_id = score.c_id " +
      "join teacher t on t.t_id = c.t_id and t_name='张三') tmp " +
      "on tmp.s_id = student.s_id " +
      "where tmp.s_id is null").show()
    //得到结果如下:
    /**
      * +----+------+----------+-----+
      * |s_id|s_name|   s_birth|s_sex|
      * +----+------+----------+-----+
      * |  06|    吴兰|1992-03-01|    女|
      * |  08|    王菊|1990-01-20|    女|
      * +----+------+----------+-----+
      */

    //查询学过编号为"01"并且也学过编号为"02"的课程的同学的信息
    //方法一:同时学过01和02的课程找出来,计算数量
    sql("select student.*,tmp.* from student " +
      "left join (select s_id,count(*) as number from score where c_id='01' or c_id='02' group by s_id) tmp " +
      "on student.s_id=tmp.s_id " +
      "where tmp.number=2").show()
    //方法二:分别查出01课程和02课程然后做join
    sql(" select student.* from student " +
      "join (select s_id from score where c_id ='01' )tmp1 " +
      "on student.s_id=tmp1.s_id " +
      "join (select s_id from score where c_id ='02' )tmp2 " +
      "on student.s_id=tmp2.s_id"
    ).show()


    //查询学过编号为"01"但是没有学过编号为"02"的课程的同学的信息:
    sql("select * from score where c_id='01'").show()
    sql("select * from score where c_id='02'").show()
    sql("select student.*,tmp1.c_id as c_id_01 ,tmp2.c_id as c_id_02 from student " +
      "left join (select * from score where c_id='01') tmp1 " +
      "on student.s_id=tmp1.s_id " +
      "left join (select * from score where c_id='02') tmp2 " +
      "on student.s_id=tmp2.s_id " +
      "where tmp1.c_id='01' and tmp2.c_id is null")
      .show()

    //查询没有学全所有课程的同学的信息
    //1. 先查询有多少们课程
    sql("select count(*) from course").show()
    //2. 查询课程数量不等于课程总数的学生
    sql("select s_id,count(*) from score group by s_id having count(c_id)=(select count(*) from course)").show()
    //3.查询学生信息
    sql("select student.* from student " +
      "join (select s_id,count(*) from score group by s_id having count(c_id)=(select count(*) from course)) tmp " +
      "on tmp.s_id=student.s_id").show()

    //查询和"01"号的同学学习的课程完全相同的其他同学的信息 concat_ws
    sql("select concat_ws('|',collect_set(c_id)) as c_id_con from score where s_id='01'").show()
    sql("select s_id,concat_ws('|',collect_set(c_id)) as c_id_con from score group by s_id having c_id_con=(select concat_ws('|',collect_set(c_id)) as c_id_con from score where s_id='01')").show()
    sql("select student.* from student " +
      "join (select s_id,concat_ws('|',collect_set(c_id)) as c_id_con from score group by s_id having c_id_con=(select concat_ws('|',collect_set(c_id)) as c_id_con from score where s_id='01')) tmp " +
      "on tmp.s_id=student.s_id").show()
  //    按平均成绩从高到低显示所有学生的所有课程的成绩以及平均成绩:
    sql("select a.s_id,tmp1.s_score as chinese,tmp2.s_score as math,tmp3.s_score as english,round(avg (a.s_score),2) as avgScore from score a " +
      "left join (select s_id,s_score  from score s1 where  c_id='01')tmp1 on  tmp1.s_id=a.s_id " +
      "left join (select s_id,s_score  from score s2 where  c_id='02')tmp2 on  tmp2.s_id=a.s_id " +
      "left join (select s_id,s_score  from score s3 where  c_id='03')tmp3 on  tmp3.s_id=a.s_id " +
      "group by a.s_id,tmp1.s_score,tmp2.s_score,tmp3.s_score order by avgScore desc").show()


    //查询本月过生日得学生
    sql("select * from student where MONTH(CURRENT_DATE)= MONTH(s_birth)").show()

    //查询12月份过生日得学生 substring(),截取字符串,(第一个参数:需要截取得字段,从第几位开始截取,截取几位)
    sql("select s_name,s_sex,s_birth from student where substring(s_birth,6,2)='12'").show()
    sql("select student.*,substring(s_birth,1,4) as birthday from student where substring(s_birth,6,2)='12'").show()
 
  }
}

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值