输入数据格式
- 四列数据分别对应 年 月 日 温度

预期结果

实现代码
package com.jackie.spark
import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}
class UDFSort(val first: Int, val second: Int) extends Ordered[UDFSort] with Serializable {
override def compare(that: UDFSort): Int = {
if (this.first - that.first != 0) {
this.first - that.first
}
else {
this.second - that.second
}
}
}
object SecondSort {
def main(args: Array[String]): Unit = {
val config: SparkConf = new SparkConf().setMaster("local[*]").setAppName("SecondSort")
val sc = new SparkContext(config)
val inputPath = "D:\\IDEA2018\\Scala\\spark-study\\src\\test.txt"
val outputPath = "D:\\IDEA2018\\Scala\\spark-study\\src\\test1.txt"
val input: RDD[String] = sc.textFile(inputPath)
val mapRDD: RDD[(UDFSort, (String, String))] = input.map(line => {
val fields: Array[String] = line.split("\t")
(new UDFSort(fields(1).toInt, fields(3).toInt), (fields(0) + "-" + fields(1), fields(3)))
})
val sortRDD: RDD[(String, String)] = mapRDD.sortByKey(true, 1).map(x => {
(x._2._1, x._2._2)
}).reduceByKey(_ + "\t" + _)
sortRDD.foreach(println(_))
}
}
其他