object wordcount {
def main(args: Array[String]): Unit = {
val conf: SparkConf = new SparkConf().setMaster("local").setAppName("wordcount")
val sc: SparkContext = new SparkContext(conf)
Wordcount11(sc)
sc.stop()
}
// groupBy
def Wordcount1(sc:SparkContext):Unit={
val rdd: RDD[String] = sc.makeRDD(List("Hello Scala","Hello Spark"))
val words=rdd.flatMap(_.split(" "))
val group: RDD[(String, Iterable[String])] = words.groupBy(word=>word)
val wordCount: RDD[(String, Int)] = group.mapValues(iter=>iter.size)
wordCount.collect().foreach(println)
}
//groupByKey
def Wordcount2(sc:SparkContext):Unit={
val rdd: RDD[String] = sc.makeRDD(List("Hello Scala","Hello Spark"))
val words=rdd.flatMap(_.split(" "))
val wordOne=words.map((_,1))
val groupBy: RDD[(String, Iterable[Int])] = wordOne.groupByKey()
val wordCount: RDD[(String, Int)] = gro
spark05-实现wordcount的11种方式
于 2023-02-15 11:38:43 首次发布
博客围绕Spark展开,但具体内容缺失。Spark是大数据开发领域的重要技术,可用于大规模数据处理和分析等。

最低0.47元/天 解锁文章
533

被折叠的 条评论
为什么被折叠?



