import org.apache.spark.{SparkConf, SparkContext} /** * Created by MC on 2018/6/11. * CheckPoint是为RDD增加检查点而实现的容错机制 *本文介绍的是给一个Spark的WordCount程序添加检查点 */ object CheckPointTest { def WordCount(sc:SparkContext): String ={ val file = sc.textFile("D://B/a.txt") //Spark的WordCount程序 val rdd1 = file.flatMap(_.split(" ")).map((_,1)).reduceByKey(_ + _).groupBy(_._2).sortByKey() //将写好的spark运算程序的检查点持久化到硬盘 rdd1.checkpoint() //把检查点持久化到硬盘,程序会再从头运行到检查点一回,所以将程序持久化到内存,设置检查点的时候就不用重新计算了 rdd1.cache().checkpoint() val rdd2 = rdd1.collect().take(5).foreach(println(_)) rdd2.toString() } def main(args: Array[String]): Unit = { val conf = new SparkConf().setMaster("local").setAppName("CheckPointTest") val sc = new SparkContext(conf) //设置检查点持久化到硬盘的硬盘路径 sc.setCheckpointDir("D://B") var result: String = "" result=WordCount(sc) println(result) sc.stop } }
Spark-CheckPoint
最新推荐文章于 2022-10-31 17:40:06 发布