总结
- RDD 转换成 DataFrame使用SQLContext的方法createDataFrame:
def createDataFrame(data: java.util.List[_], beanClass: Class[_]): DataFrame
- DataFrame转换成RDD使用DataFrame的字段 rdd:
dataFrame.rdd
举例程序
package a
import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import org.apache.spark.sql.SQLContext
import org.apache.spark.rdd.RDD
object Test {
def main(args: Array[String]): Unit = {
val sparkConf = new SparkConf().setMaster("local").setAppName("adaa")
val sc = new SparkContext(sparkConf)
var sqlcontext = new SQLContext(sc)
var rdd:RDD[Emp]= sc.parallelize(Array(
"1 zhangsan 3000 20",
"2 lisi 4000 10",
"3 wangwu 8000 20",
"4 zhaoliu 1000 20"
)).flatMap(line =>{
var arr = line.split("\\s+")
var empno:Int = arr(0).toInt
var ename:String = arr(1)
var sal:Int = arr(2).toInt
var deptno:Int = arr(3).toInt
Array(Emp(empno,ename,sal,deptno))
})
var dataframe = sqlcontext.createDataFrame(rdd, Class.forName("a.Emp"))
dataframe.registerTempTable("emp")
var newdataframe = sqlcontext.sql("select empno from emp")
newdataframe.show()
}
}
rdd中数据整合时所用到的类
package a
// "1 zhangsan 3000 20",
class Emp(empno:Int,ename:String,sal:Int,deptno:Int) {
def this(){
this(0,null,0,0)
}
def getEmpno = empno
def getEname = ename
def getSal =sal
def getDeptno = deptno
}
object Emp{
def apply() = {
new Emp()
}
def apply(empno:Int,ename:String,sal:Int,deptno:Int) = {
new Emp(empno,ename,sal,deptno)
}
}