package com.spark.core
import org.apache.spark.{SparkConf, SparkContext}
//orderid,userid,money,productid
object TopN {
System.setProperty("hadoop.home.dir","D:\\soft\\hadoop\\hadoop-2.7.3")
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setMaster("local[*]").setAppName("topn")
val sc = new SparkContext(conf)
var idx = 0
val six = sc.textFile(args(0))
//方式一
// six.filter(x => (x.trim().length>0) && (x.split(",").length==4))
// .map(line => line.split(",")(2))
// .map(x => (x.toInt,""))
// .sortByKey(false)
// .map(x=>x._1)
// .take(5)
// .foreach(x => {
// idx = idx+1
// println(idx +"\t"+x)})
//方式二
// six.filter(x => (x.trim().length>0) && (x.split(",").length==4))
// .map(line => line.split(",")(2))
// .sortBy(line => line.toInt, false)
// .take(5)
// .foreach(x => {
// idx = idx+1
// println(idx +"\t"+x)})
//方式三
six.filter(x => (x.trim().length > 0) && (x.split(",").length == 4))
.map(line => line.split(",")(2).toInt)
.top(5)
.foreach(x => {
idx = idx + 1
println(idx + "\t" + x)
})
}
}
Spark TopN操作
最新推荐文章于 2024-06-19 15:39:06 发布