源数据:
hello word
hello java
hello python
hello waad
hello word
hello java
hello python
hello waad
hello word
hello java
hello python
hello waad
hello word
hello java
hello python
hello waad
hello word
hello java
hello python
hello waad
dfsadh
dafhads sadfksadj sadfksadh
dsafhkjdsahf sdahfksad
sdfkjahlksadf fdshjslkad
ghghfhjkdhf
Scala代码:
import scala.tools.scalap.Main
import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
object Regression {
def main(args: Array[String]): Unit = {
val conf=new SparkConf().setAppName("local").setMaster("local");
val sc=new SparkContext(conf);
// val data=sc.textFile("./words");
// data.flatMap(_.split(" ")).map((_,1)).reduceByKey((_+_)).collect().foreach(println)
sc.textFile("./words").flatMap(s=>{
s.split(" ")
}).map(s=>{
(s,1)
}).reduceByKey((v1,v2)=>{
(v1+v2)
}).foreach(s=>{
println(s)
})
}
}