Point 1:Top N
package com.scala.top
import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
object TopN {
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setAppName("TopN").setMaster("local")
val sc = new SparkContext(conf)
val file = sc.textFile("./data/top.txt")
val word = file.map( t => (t.toInt,t))
val sorted = word.sortByKey(false)
val result = sorted.map(pair => pair._2)
result.take(3).foreach(x => println(x))
}
}
Point 2:分组取Top N
package com.scala.top
import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import java.util.ArrayList
object GroupTopN {
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setAppName("GroupTopN").setMaster("local")
val sc = new SparkContext(conf)
val file = sc.textFile("./data/score.txt")
val word = file.map(line =>(line.split(" ")(0),line.split(" ")(1))).groupByKey()
val test = word.map(f =>{
var t = List[Int]()
for(temp <- f._2){
t = temp.toInt :: t
}
(f._1,t.sortBy(x => -x).take(2))
})
test.foreach(println(_))
}
}