头歌SparkSQL数据源

第1关:SparkSQL加载和保存

编程要求
在右侧编辑器补充代码,加载people.json文件,以覆盖的方式保存到people路径里,继续加载people1.json文件,以附加的方式保存到people路径里,最后以表格形式显示people里前20行Dataset。

people.json、people1.json文件内容分别如下:

{"age":21,"name":"张三", "salary":"3000"}
{"age":22,"name":"李四", "salary":"4500"}
{"age":23,"name":"王五", "salary":"7500"}
{"name":"Michael", "salary":"6000"}
{"name":"Andy", "age":30 , "salary":"9000"}
{"name":"Justin", "age":19 , "salary":"6900"}

代码实现

package com.educoder.bigData.sparksql2;

import org.apache.spark.sql.AnalysisException;
import org.apache.spark.sql.SaveMode;
import org.apache.spark.sql.SparkSession;


public class Test1 {
	
	public static void main(String[] args) throws AnalysisException {
		SparkSession  spark  =  SparkSession 
				  .builder()
				  .appName("test1")
				  .master("local")
				  .getOrCreate();
		/********* Begin *********/
		
	spark.read().format("json").load("people.json").write().mode(SaveMode.Append).save("people");
    spark.read().format("json").load("people1.json").write().mode(SaveMode.Append).save("people");
    spark.read().load("people").show();


		
		
		
		
		/********* End *********/
	}

	

}

第2关:Parquet文件介绍

编程要求
在右侧编辑器补充代码,把文件people、people1存在people路径下,通过id=1和id=2进行分区,以表格方式显示前20行内容。

people.json、people1.json文件内容分别如下:

{"age":21,"name":"张三", "salary":"3000"}
{"age":22,"name":"李四", "salary":"4500"}
{"age":23,"name":"王五", "salary":"7500"}
{"name":"Michael", "salary":"6000"}
{"name":"Andy", "age":30 , "salary":"9000"}
{"name":"Justin", "age":19 , "salary":"6900"}

代码实现

package com.educoder.bigData.sparksql2;

import org.apache.spark.sql.AnalysisException;
import org.apache.spark.sql.SparkSession;


public class Test2 {

	public static void main(String[] args) throws AnalysisException {
		SparkSession  spark  =  SparkSession 
				  .builder()
				  .appName("test1")
				  .master("local")
				  .getOrCreate();
		/********* Begin *********/
	spark.read().format("json").load("people.json").write().parquet("people/id=1");
    spark.read().format("json").load("people1.json").write().parquet("people/id=2");
    spark.read().load("people").show();


		
		
		
		
		
		/********* End *********/
	}

	

}

第3关:json文件介绍

编程要求
在右侧编辑器补充代码,通过people文件和people1文件,统计薪水平均值。

people.json、people1.json文件内容分别如下:

{"age":21,"name":"张三", "salary":"3000"}
{"age":22,"name":"李四", "salary":"4500"}
{"age":23,"name":"王五", "salary":"7500"}
{"name":"Michael", "salary":"6000"}
{"name":"Andy", "age":30 , "salary":"9000"}
{"name":"Justin", "age":19 , "salary":"6900"}

代码实现

package com.educoder.bigData.sparksql2;

import org.apache.spark.sql.AnalysisException;
import org.apache.spark.sql.SparkSession;


public class Test3 {
	
	public static void main(String[] args) throws AnalysisException {
		SparkSession  spark  =  SparkSession 
				  .builder()
				  .appName("test1")
				  .master("local")
				  .getOrCreate();
		/********* Begin *********/
		
	    spark.read().format("json").load("people.json").createOrReplaceTempView("people");
        spark.read().format("json").load("people1.json").createOrReplaceTempView("people1");
        spark.sql("select avg(salary) from ( select salary from people union all select salary from people1) a").show();	
		
		
		
		
		/********* End *********/
		
	}

	

}

第4关:JDBC读取数据源

编程要求
在右侧编辑器补充代码,读取people、people1文件到mysql的people表,并从people表里读取内容,以表格方式显示前20行内容。

people.json、people1.json文件内容分别如下:

{"age":21,"name":"张三", "salary":"3000"}
{"age":22,"name":"李四", "salary":"4500"}
{"age":23,"name":"王五", "salary":"7500"}
{"name":"Michael", "salary":"6000"}
{"name":"Andy", "age":30 , "salary":"9000"}
{"name":"Justin", "age":19 , "salary":"6900"}

代码实现

package com.educoder.bigData.sparksql2;

import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SaveMode;
import org.apache.spark.sql.SparkSession;


public class Test4 {

	public static void case4(SparkSession spark) {
		
		/********* Begin *********/
		Dataset<Row> load = spark.read().format("json").load("people.json");  
 
        load.write()  
 
          .format("jdbc")  
 
          .option("url", "jdbc:mysql://127.0.0.1:3306/test?useUnicode=true&characterEncoding=utf-8")  
 
          .option("dbtable", "people")  
 
          .option("user", "root")  
 
          .option("password", "123123")  
 
          .mode(SaveMode.Overwrite)  
 
          .save();  
 
        Dataset<Row> load1 = spark.read().format("json").load("people1.json");  
 
        load1.write()  
 
          .format("jdbc")  
 
          .option("url", "jdbc:mysql://127.0.0.1:3306/test?useUnicode=true&characterEncoding=utf-8")  
 
          .option("dbtable", "people")  
 
          .option("user", "root")  
 
          .option("password", "123123")  
 
          .mode(SaveMode.Append)  
 
          .save();  
 
        Dataset<Row> load2 = spark.read()  
 
        .format("jdbc")  
 
          .option("url", "jdbc:mysql://127.0.0.1:3306/test?useUnicode=true&characterEncoding=utf-8")  
 
          .option("dbtable", "people")  
 
          .option("user", "root")  
 
          .option("password", "123123").load();  
 
        load2.show(); 

		
		
		
		
		
		/********* End *********/
	}
	

}

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值