第1关:SparkSQL加载和保存
编程要求
在右侧编辑器补充代码,加载people.json文件,以覆盖的方式保存到people路径里,继续加载people1.json文件,以附加的方式保存到people路径里,最后以表格形式显示people里前20行Dataset。
people.json、people1.json文件内容分别如下:
{"age":21,"name":"张三", "salary":"3000"}
{"age":22,"name":"李四", "salary":"4500"}
{"age":23,"name":"王五", "salary":"7500"}
{"name":"Michael", "salary":"6000"}
{"name":"Andy", "age":30 , "salary":"9000"}
{"name":"Justin", "age":19 , "salary":"6900"}
代码实现
package com.educoder.bigData.sparksql2;
import org.apache.spark.sql.AnalysisException;
import org.apache.spark.sql.SaveMode;
import org.apache.spark.sql.SparkSession;
public class Test1 {
public static void main(String[] args) throws AnalysisException {
SparkSession spark = SparkSession
.builder()
.appName("test1")
.master("local")
.getOrCreate();
/********* Begin *********/
spark.read().format("json").load("people.json").write().mode(SaveMode.Append).save("people");
spark.read().format("json").load("people1.json").write().mode(SaveMode.Append).save("people");
spark.read().load("people").show();
/********* End *********/
}
}
第2关:Parquet文件介绍
编程要求
在右侧编辑器补充代码,把文件people、people1存在people路径下,通过id=1和id=2进行分区,以表格方式显示前20行内容。
people.json、people1.json文件内容分别如下:
{"age":21,"name":"张三", "salary":"3000"}
{"age":22,"name":"李四", "salary":"4500"}
{"age":23,"name":"王五", "salary":"7500"}
{"name":"Michael", "salary":"6000"}
{"name":"Andy", "age":30 , "salary":"9000"}
{"name":"Justin", "age":19 , "salary":"6900"}
代码实现
package com.educoder.bigData.sparksql2;
import org.apache.spark.sql.AnalysisException;
import org.apache.spark.sql.SparkSession;
public class Test2 {
public static void main(String[] args) throws AnalysisException {
SparkSession spark = SparkSession
.builder()
.appName("test1")
.master("local")
.getOrCreate();
/********* Begin *********/
spark.read().format("json").load("people.json").write().parquet("people/id=1");
spark.read().format("json").load("people1.json").write().parquet("people/id=2");
spark.read().load("people").show();
/********* End *********/
}
}
第3关:json文件介绍
编程要求
在右侧编辑器补充代码,通过people文件和people1文件,统计薪水平均值。
people.json、people1.json文件内容分别如下:
{"age":21,"name":"张三", "salary":"3000"}
{"age":22,"name":"李四", "salary":"4500"}
{"age":23,"name":"王五", "salary":"7500"}
{"name":"Michael", "salary":"6000"}
{"name":"Andy", "age":30 , "salary":"9000"}
{"name":"Justin", "age":19 , "salary":"6900"}
代码实现
package com.educoder.bigData.sparksql2;
import org.apache.spark.sql.AnalysisException;
import org.apache.spark.sql.SparkSession;
public class Test3 {
public static void main(String[] args) throws AnalysisException {
SparkSession spark = SparkSession
.builder()
.appName("test1")
.master("local")
.getOrCreate();
/********* Begin *********/
spark.read().format("json").load("people.json").createOrReplaceTempView("people");
spark.read().format("json").load("people1.json").createOrReplaceTempView("people1");
spark.sql("select avg(salary) from ( select salary from people union all select salary from people1) a").show();
/********* End *********/
}
}
第4关:JDBC读取数据源
编程要求
在右侧编辑器补充代码,读取people、people1文件到mysql的people表,并从people表里读取内容,以表格方式显示前20行内容。
people.json、people1.json文件内容分别如下:
{"age":21,"name":"张三", "salary":"3000"}
{"age":22,"name":"李四", "salary":"4500"}
{"age":23,"name":"王五", "salary":"7500"}
{"name":"Michael", "salary":"6000"}
{"name":"Andy", "age":30 , "salary":"9000"}
{"name":"Justin", "age":19 , "salary":"6900"}
代码实现
package com.educoder.bigData.sparksql2;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SaveMode;
import org.apache.spark.sql.SparkSession;
public class Test4 {
public static void case4(SparkSession spark) {
/********* Begin *********/
Dataset<Row> load = spark.read().format("json").load("people.json");
load.write()
.format("jdbc")
.option("url", "jdbc:mysql://127.0.0.1:3306/test?useUnicode=true&characterEncoding=utf-8")
.option("dbtable", "people")
.option("user", "root")
.option("password", "123123")
.mode(SaveMode.Overwrite)
.save();
Dataset<Row> load1 = spark.read().format("json").load("people1.json");
load1.write()
.format("jdbc")
.option("url", "jdbc:mysql://127.0.0.1:3306/test?useUnicode=true&characterEncoding=utf-8")
.option("dbtable", "people")
.option("user", "root")
.option("password", "123123")
.mode(SaveMode.Append)
.save();
Dataset<Row> load2 = spark.read()
.format("jdbc")
.option("url", "jdbc:mysql://127.0.0.1:3306/test?useUnicode=true&characterEncoding=utf-8")
.option("dbtable", "people")
.option("user", "root")
.option("password", "123123").load();
load2.show();
/********* End *********/
}
}
2938

被折叠的 条评论
为什么被折叠?



