import org.apache.avro.generic.GenericData.StringType
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.types.{IntegerType, StructField, StructType}
import org.apache.spark.sql.{DataFrame, Row, SQLContext}
import org.apache.spark.{SparkConf, SparkContext}
import org.spark_project.dmg.pmml.True
object TestDataFrame2 {
def main(args: Array[String]): Unit = {
val conf=new SparkConf().setAppName("TestDataFrame2").setMaster("local")
val sc=new SparkContext(conf)
val SQLContext=new SQLContext(sc)
//将本地的数据读入RDD
val fileRDD=sc.textFile("D:\\Users\\shashahu\\Desktop\\work\\spark-2.4.4\\examples\\src\\main\\resources\\people.txt")
//将RDD数据映射成Row,需要import org.apache.spark.sql.row
val rowRDD:RDD[Row]=fileRDD.map(line=>{
val fields=line.split(",")
Row(fields(0),fields(1).trim.toInt)
})
//创建StructType来定义结构
val structType: StructType = StructType(
//字段名,字段类型,是否可以为空
// StructField("name", StringType, true) ::
StructField("age", IntegerType, true) :: Nil
)
/**
* rows: java.util.List[Row],
* schema: StructType
* */
val df: DataFrame = SQLContext.createDataFrame(rowRDD,structType)
df.createOrReplaceTempView("people")
SQLContext.sql("select * from people").show()
}
}
使用结构体来进行创建表的相关语句。