概要
WordCount程序示例
import org.apache.spark.SparkContext
import org.apache.spark.SparkConf
import org.apache.spark.SparkConf
object WordCount {
def main(args: Array[String]) {
val conf = new SparkConf
val sc = new SparkContext(conf)
val lines = sc.parallelize(List("hello spark", "hi spark", "wow scala"));
val words = lines.flatMap { x => x.split(" ") };
val word = words.map { x => (x, 1) };
val wordCount = word.reduceByKey(_ + _);
wordCount.collect().foreach(println)
}
}
WordCount的逻辑执行
WordCount的物理执行
Spark中的shuffle