import org.apache.spark.SparkContext
import org.apache.spark.sql.expressions.UserDefinedFunction
import org.apache.spark.sql.{DataFrame, SparkSession}
case class Hobbies(name:String,hobbies:String)
object UDF {
def main(args: Array[String]): Unit = {
val sparkSession: SparkSession = SparkSession.builder().appName("UDF").master("local[*]").getOrCreate()
val sc: SparkContext = sparkSession.sparkContext
import sparkSession.implicits._
val hobbyDF: DataFrame = sc.textFile("in/hobbies.txt").map(x => x.split(" "))
.map(x => Hobbies(x(0), x(1))).toDF()
hobbyDF.printSchema()
hobbyDF.show(false)
hobbyDF.createOrReplaceTempView("hobby")
sparkSession.udf.register("hobby_num",(x:String)=>{x.split(",").size})
sparkSession.sql("select name,hobbies,hobby_num(hobbies)as hobbyNum from hobby")
.show(false)
println("----------------------------------")
import org.apache.spark.sql.functions
val hobby_num: UserDefinedFunction = functions.udf((x:String)=>{x.split(",").size})
val newHobbyDF: DataFrame = hobbyDF .withColumn("hobbyNum",hobby_num($"hobbies"))
newHobbyDF.printSchema()
newHobbyDF.show(false)
}
}