自定义排序
数据以bean类型为例
第一种 bean类是class 实现 Comparable 重写compareTo方法进行排序 由于在Driver端所以bean需要实现序列化 进行传输
package cn.spark.customSort
import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}
object CustomSort1 {
def main(args: Array[String]): Unit = {
//是否本地运行
val isLocal:Boolean = args(0).toBoolean
val conf = new SparkConf().setAppName(this.getClass.getSimpleName)
if (isLocal){
conf.setMaster("local[*]")
}
//创建SparkCongtext对象
val sc: SparkContext = new SparkContext(conf)
//创建RDD对象,导入一些数据
val stair: RDD[String] = sc.makeRDD(List("xiaocang,22,99.99", "xiaobo,23,999.99", "xiaosan,24,989.99", "xiaoxiang,25,980.99"))
//将数据切割存入bean中
val stairGilr: RDD[StairGirl1] = stair.map(line => {
//按,切割
val floed = line.split(",")
val name = floed(0)
val age = floed(1).toInt
val fv = floed(2).toDouble
new StairGirl1(name,age,fv)
})
val res = stairGilr.sortBy(s => s)
//打印
println(res.collect().toBuffer)
//关闭资源
sc.stop()
}
}
package cn.spark.customSort
class StairGirl1 (var name: String , var age: Int , var fv: Double ) extends Comparable[StairGirl1] with Serializable {
//重写compareTo方法
override def compareTo(o: StairGirl1): Int = {
//如果颜值相等按年龄排序
if (this.fv==o.fv){
this.age - o.age
}else{
(o.fv-this.fv).toInt
}
}
override def toString = s"StairGirl1($name, $age, $fv)"
}
第二种 bean是case class 实现Ordered方法compare进行排序 case class内有序列化和toString方法
package cn.spark.customSort
import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}
object CustomSort1 {
def main(args: Array[String]): Unit = {
//是否本地运行
val isLocal:Boolean = args(0).toBoolean
val conf = new SparkConf().setAppName(this.getClass.getSimpleName)
if (isLocal){
conf.setMaster("local[*]")
}
//创建SparkCongtext对象
val sc: SparkContext = new SparkContext(conf)
//创建RDD对象,导入一些数据
val stair: RDD[String] = sc.makeRDD(List("xiaocang,22,99.99", "xiaobo,23,999.99", "xiaosan,24,989.99", "xiaoxiang,25,980.99"))
//将数据切割存入bean中
val stairGilr: RDD[StairGirl1] = stair.map(line => {
//按,切割
val floed = line.split(",")
val name = floed(0)
val age = floed(1).toInt
val fv = floed(2).toDouble
new StairGirl1(name,age,fv)
})
val res = stairGilr.sortBy(s => s)
//打印
println(res.collect().toBuffer)
//关闭资源
sc.stop()
}
}
package cn.spark.customSort
/**
* 实现Ordered方法排序
* case内有序列化和toString方法
*/
case class StairGirl(
var name : String,
var age : Int,
var fv : Double)
extends Ordered[StairGirl]{
override def compare(that: StairGirl): Int = {
//如果颜值相等按年龄排序
if (this.fv==that.fv){
this.age - that.age
}else{
(that.fv-this.fv).toInt
}
}
}
第三种 bean是case class 不继承任何排序 sortBy方法中可以传入隐式参数 进行排序
package cn.spark.customSort
import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}
object CustomSort3 {
def main(args: Array[String]): Unit = {
//是否本地运行
val isLocal:Boolean = args(0).toBoolean
val conf = new SparkConf().setAppName(this.getClass.getSimpleName)
if (isLocal){
conf.setMaster("local[*]")
}
//创建SparkCongtext对象
val sc: SparkContext = new SparkContext(conf)
//创建RDD对象,导入一些数据
val stair: RDD[String] = sc.makeRDD(List("xiaocang,22,99.99", "xiaobo,23,999.99", "xiaosan,24,989.99", "xiaoxiang,25,980.99"))
//将数据切割存入bean中
val stairGilr: RDD[StairGirl2] = stair.map(line => {
//按,切割
val floed = line.split(",")
val name = floed(0)
val age = floed(1).toInt
val fv = floed(2).toDouble
StairGirl2(name,age,fv)
})
//导入隐式参数进行排序
import MyContext.OrderGirl
val res: RDD[StairGirl2] = stairGilr.sortBy(s => s)
//打印
println(res.collect().toBuffer)
//关闭资源
sc.stop()
}
}
package cn.spark.customSort
object MyContext {
implicit object OrderGirl extends Ordering[StairGirl2]{
override def compare(x: StairGirl2, y: StairGirl2): Int = {
if (x.fv==y.fv){
x.age - y.age
}else{
(y.fv-x.fv).toInt
}
}
}
}
package cn.spark.customSort
case class StairGirl2(
var name : String,
var age : Int,
var fv : Double)
第四种 利用元组的内排序规则
package cn.spark.customSort
import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}
object CustomSort4 {
def main(args: Array[String]): Unit = {
//是否本地运行
val isLocal:Boolean = args(0).toBoolean
val conf = new SparkConf().setAppName(this.getClass.getSimpleName)
if (isLocal){
conf.setMaster("local[*]")
}
//创建SparkCongtext对象
val sc: SparkContext = new SparkContext(conf)
//创建RDD对象,导入一些数据
val stair: RDD[String] = sc.makeRDD(List("xiaocang,22,99.99", "xiaobo,23,999.99", "xiaosan,24,989.99", "xiaoxiang,25,980.99"))
//将数据切割存入bean中
val stairGilr: RDD[(String, Int, Double)] = stair.map(line => {
//按,切割
val floed = line.split(",")
val name = floed(0)
val age = floed(1).toInt
val fv = floed(2).toDouble
(name,age,fv)
})
val res = stairGilr.sortBy(t => (-t._3, t._2))
//打印
println(res.collect().toBuffer)
//关闭资源
sc.stop()
}
}