package com.spark.sql
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
import org.apache.spark.sql.types.{IntegerType, StringType, StructField, StructType}
import org.apache.spark.sql._
object Rdd2DataFrame {
System.setProperty("hadoop.home.dir", "d://soft/hadoop/hadoop-2.9.2")
def main(args: Array[String]): Unit = {
val spark = SparkSession.builder().master("local").appName("rdd2dataframe").getOrCreate()
val lineRdd = spark.sparkContext.textFile("G:\\people.txt")
val df = rdd2DataFrame(lineRdd, spark)
dataFrame2Rdd(df)
val ds = rdd2Dataset(lineRdd, spark)
dataset2Rdd(ds)
val ds1 = dataFrame2Dataset(df)
dataset2DataFrame(ds1)
spark.close()
}
def rdd2DataFrame(lineRdd: RDD[String], spark: SparkSession): DataFrame = {
val personRDD = lineRdd.map(line => (line.split(" ")(0), line.split(" ")(1).toInt))
import spark.implicits._
val personDF = personRDD.toDF("name", "age")
personDF.show()
personDF
}
def dataFrame2Rdd(df: DataFrame): Unit = {
val rdd = df.rdd
rdd.foreach(println)
}
def rdd2Dataset(lineRdd: RDD[String], spark: SparkSession): Dataset[Person] = {
import spark.implicits._
val personRdd = lineRdd.map(line => Person(line.split(" ")(0), line.split(" ")(1).toInt))
val personDS = personRdd.toDS()
personDS.filter(p => p.age > 20).show()
personDS.show()
personDS
}
def dataset2Rdd(ds: Dataset[Person]): Unit = {
val rdd = ds.rdd
rdd.foreach(println)
}
def dataFrame2Dataset(df: DataFrame): Dataset[Person] = {
implicit val personEncoder: Encoder[Person] = ExpressionEncoder()
val personDS = df.as[Person]
personDS.show()
personDS
}
def dataset2DataFrame(ds: Dataset[Person]): Unit = {
val df = ds.toDF("name", "age")
df.show()
}
}
case class Persons(name: String, age: Int)