package com.shufang.spark_sql
import com.shufang.utils.SparkUtils
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.Row
object DataFrameWithSchema {
def main(args: Array[String]): Unit = {
val sc = SparkUtils.getSC("local[*]","schema")
val spark = SparkUtils.getSS("local[*]","schema")
// 定义结构字段
val schemaString:String = "name age"
// 千万别导错包,创建schema
// StructField StructType
import org.apache.spark.sql.types._
val fields = schemaString.split(" ")
.map(fieldName => StructField(fieldName, StringType, nullable = true))
val schema = StructType(fields)
val rdd: RDD[(String, String)] = sc.textFile("/Users/shufang/idea_project/spark244/src/main/testdata/helloworld.txt").map(line => (line.split(" ")(0),line.split(" ")(1)))
val rowRDD = rdd.map(a => Row(a._1,a._2))
val df = spark.createDataFrame(rowRDD,schema)
df.show()
df.printSchema()
}
}
// 结果如下:
+--------+---+
| name|age|
+--------+---+
|zhangsan| 1|
| lisi| 2|
| wangwu| 3|
| zhaoliu| 4|
+--------+---+
root
|-- name: string (nullable = true)
|-- age: string (nullable = true)
用指定schema创建DataFrame
最新推荐文章于 2025-03-15 16:13:38 发布
本文详细介绍如何在Spark中利用RDD创建DataFrame,并指定Schema。通过具体代码实例,展示了从读取文件到创建DataFrame,再到展示数据和Schema的全过程。

8117

被折叠的 条评论
为什么被折叠?



