示例一
示例数据:
{"name":"zhangsan","age":20}
{"name":"lisi"}
{"name":"wangwu","age":18}
{"name":"a","age":21}
{"name":"zhangsan1","age":20}
{"name":"lisi1"}
{"name":"wangwu1","age":18}
{"name":"a1","age":21}
{"name":"zhangsan2","age":20}
{"name":"lisi2"}
{"name":"wangwu2","age":18}
{"name":"a2","age":21}
{"name":"zhangsan3","age":20}
{"name":"lisi3"}
{"name":"wangwu3","age":18}
{"name":"a3","age":21}
{"name":"zhangsan4","age":20}
{"name":"lisi4"}
{"name":"wangwu4","age":18}
{"name":"a4","age":21}
{"name":"zhangsan5","age":20}
{"name":"lisi5"}
{"name":"wangwu5","age":18}
{"name":"a5","age":21}
代码块:
package com.lw.scalaspark.sql.examples
import org.apache.spark.sql.types.IntegerType
import org.apache.spark.sql.{DataFrame, SparkSession}
object readJsonFile {
def main(args: Array[String]): Unit = {
val spark = SparkSession.builder()
.appName("readJsonFile")
.master("local")
.getOrCreate()
import org.apache.spark.sql.functions._
import spark.implicits._
val list = List[String](
"{\"name\":\"zhangsan\",\"age\":20}",
"{\"name\":\"lisi\",\"age\":21}",
"{\"name\":\"wangwu\",\"age\":22}",
"{\"name\":\"zhaoliu\",\"age\":23}"
)
val frame: DataFrame = list.toDF("infos")
frame.show(100,false)
frame.printSchema()
val result: DataFrame = frame.select(get_json_object($"infos","$.name").as("name"),get_json_object($"infos", "$.age").cast(IntegerType).as("age"))
result.show(100)
result.printSchema()
}
}
示例二
示例数据:
{"name":"zhangsan","age":18,"scores":[{"yuwen":98,"shuxue":90,"yingyu":100},{"dili":98,"shengwu":78,"huaxue":100}]}
{"name":"lisi","age":19,"scores":[{"yuwen":58,"shuxue":50,"yingyu":78},{"dili":56,"shengwu":76,"huaxue":13}]}
{"name":"wangwu","age":17,"scores":[{"yuwen":18,"shuxue":90,"yingyu":45},{"dili":76,"shengwu":42,"huaxue":45}]}
{"name":"zhaoliu","age":20,"scores":[{"yuwen":68,"shuxue":23,"yingyu":63},{"dili":23,"shengwu":45,"huaxue":87}]}
{"name":"tianqi","age":22,"scores":[{"yuwen":88,"shuxue":91,"yingyu":41},{"dili":56,"shengwu":79,"huaxue":45}]}
代码块:
package com.bjsxt.scalaspark.sql.examples
import org.apache.spark.sql.{DataFrame, SparkSession}
object readJsonArray {
def main(args: Array[String]): Unit = {
val spark: SparkSession = SparkSession.builder().master("local").appName("readJsonArray").getOrCreate()
val frame: DataFrame = spark.read.json("./data/jsonArrayFile")
frame.show(false)
frame.printSchema()
import org.apache.spark.sql.functions._
import spark.implicits._
val transDF: DataFrame = frame.select($"name", $"age", explode($"scores")).toDF("name", "age","allScores")
transDF.show(100,false)
transDF.printSchema()
val result: DataFrame = transDF.select(
$"name",$"age",
$"allScores.yuwen" as "yuwen",
$"allScores.shuxue" as "shuxue",
$"allScores.yingyu" as "yingyu",
$"allScores.dili" as "dili",
$"allScores.shengwu" as "shengwu",
$"allScores.huaxue" as "huaxue"
)
result.show(100)
}
}
示例三
示例数据:
{"name":"zhangsan","score":100,"infos":{"age":20,"gender":'man'}}
{"name":"lisi","score":70,"infos":{"age":21,"gender":'femal'}}
{"name":"wangwu","score":80,"infos":{"age":23,"gender":'man'}}
{"name":"maliu","score":50,"infos":{"age":16,"gender":'femal'}}
{"name":"tianqi","score":90,"infos":{"age":19,"gender":'man'}}
代码块:
package com.bjsxt.scalaspark.sql.examples
import org.apache.spark.sql.{DataFrame, SparkSession}
object readNestJsonFile {
def main(args: Array[String]): Unit = {
val spark: SparkSession = SparkSession.builder().master("local").appName("readNestJsonFile").getOrCreate()
val frame: DataFrame = spark.read.format("json").load("./data/NestJsonFile")
frame.printSchema()
frame.show(100)
frame.createOrReplaceTempView("infosView")
spark.sql("select name,infos.age,score,infos.gender from infosView").show(100)
}
}