spark代码中 map 和 mapPartitions 的区别和 foreach 与 foreachPartition 的区别类似 前者是一条数据一条数据处理,后者是一个分区一个分区数据处理
import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}
/**
* Created by root on 2016/11/14.
*/
object t {
def main(args: Array[String]): Unit = {
val aa: SparkContext = new SparkContext(new SparkConf().setAppName("t").setMaster("local"))
val bb: RDD[Int] = aa.parallelize(1 to 10)
bb.foreach(println(_))
bb.foreachPartition(
it => while (it.hasNext){
println(it.next())
}
)
val xx: RDD[Int] = bb.mapPartitions{ x => {
var result = List[Int]()
var i = 0
while(x.hasNext){
i += x.next()
}
result.::(i).iterator
}}
xx.cache()
xx.unpersist()
xx.foreach(println(_))
}
}