Spark 单词统计
数据:
I love you and I will give you a perfect home with dogs and cats that you always love
本地运行方式:
import org.apache.spark.{SparkConf, SparkContext}
/**
* 单词统计
*/
object WordCount {
//本地运行
System.setProperty("hadoop.home.dir","D:\\soft\\hadoop\\hadoop-2.9.2")
def main(args: Array[String]): Unit = {
//1.生成spark core总入口这个对象
val conf = new SparkConf().setMaster("local").setAppName("wordcount")
val sc = new SparkContext(conf)
//2.单词统计
sc.textFile(args(0))
.flatMap(line => line.split(" "))
.map(word => (word,1))
.reduceByKey((x,y) => x+y)
//.saveAsTextFile(args(1)) //存储路径位置
.foreach(println) //控制台输出
//3.关闭环境
sc.stop()
}
}
jar运行方式:
import org.apache.spark.{SparkConf, SparkContext}
/**
* 单词统计
*/
object WordCount {
//本地运行
//System.setProperty("hadoop.home.dir","D:\\soft\\hadoop\\hadoop-2.9.2")
def main(args: Array[String]): Unit = {
//1.生成spark core总入口这个对象
val conf = new SparkConf()/*.setMaster("local")*/.setAppName("wordcount")
val sc = new SparkContext(conf)
//2.单词统计
sc.textFile(args(0))
.flatMap(line => line.split(" "))
.map(word => (word,1))
.reduceByKey((x,y) => x+y)
.saveAsTextFile(args(1)) //存储路径位置
// .foreach(println) //控制台输出
//3.关闭环境
sc.stop()
}
}
结果:
(cats,1)
(you,3)
(that,1)
(a,1)
(love,2)
(perfect,1)
(home,1)
(with,1)
(will,1)
(I,2)
(dogs,1)
(always,1)
(and,2)
(give,1)