package com.javartisan.demo
import org.apache.spark.sql.SparkSession
object SparkLocal {
def main(args: Array[String]): Unit = {
val spark = SparkSession.builder().master("local[*]").appName("spark test").getOrCreate()
import spark.implicits._
val sc = spark.sparkContext
val a: Int = 1
val b: Int = 1
val c: String = "1"
val d: String = "1"
val arr1 = Array[(Int, Int, String, String)]((a, b, c, d))
val arr2 = Array[(Int, Int, String, String)]((2, b, c, d))
val rdd1 = sc.parallelize[(Int, Int, String, String)](arr1)
val rdd2 = sc.parallelize[(Int, Int, String, String)](arr2)
val df1 = rdd1.toDF("a", "b", "c", "d")
val df2 = rdd2.toDF("a1", "b1", "c1", "d1")
df1.printSchema()
df2.printSchema()
val full = df1.join(df2, $"a" === $"a1", "full")
val newFull = full.rdd.map(row => {
//GenericRowWithSchema
println("row class " + row.getClass)
row
})
println(newFull.count())
full.show(false)
println(df1.count())
println(df2.count())
spark.stop()
}
}