Spark Java 二次排序

最新推荐文章于 2021-04-25 21:06:34 发布

生命不息丶折腾不止

最新推荐文章于 2021-04-25 21:06:34 发布

阅读量724

点赞数

CC 4.0 BY-SA版权

分类专栏： spark 文章标签： spark java

本文链接：https://blog.youkuaiyun.com/leen0304/article/details/78280282

spark 专栏收录该内容

58 篇文章

订阅专栏

本文介绍了如何在Spark Java环境中，通过自定义比较器来实现数据的二次排序，包括输入处理、比较器的定义和使用，以及最终的结果展示。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

1.输入

2.代码实现

2.1自定义比较器

import scala.math.Ordered;

import java.io.Serializable;

/**
 * User:leen
 * Date:2017/10/18 0018
 * Time:11:23
 *
 *
 * 比较器：
 * 1. 实现 Ordered<T> 和 Serializable 接口
 * 2. 重写 equals 和 hashCode 方法
 * 3. 重写 $less ，$greater ，$less$eq ，$greater$eq 方法
 * 4. 重写 compare ， compareTo 方法
 * 5. 构造函数
 */

public class SecondarySortKey implements Ordered<SecondarySortKey>, Serializable {

    int first;
    int second;

    public SecondarySortKey() {
    }
    /**
     * 构造方法
     * @param first
     * @param second
     */
    public SecondarySortKey(int first, int second) {
        this.first = first;
        this.second = second;
    }

    public boolean $less(SecondarySortKey that) {
        if (this.first < that.getFirst()) {
            return true;
        } else if (this.first == that.getFirst() && this.second < that.getSecond()) {
            return true;
        }
        return false;
    }

    public boolean $greater(SecondarySortKey that) {
        if (this.first > that.getFirst()) {
            return true;
        } else if (this.first == that.getFirst() && this.second > that.getSecond()) {
            return true;
        }
        return false;
    }

    public boolean $less$eq(SecondarySortKey that) {
        if (this.$less(that)) {
            return true;
        } else if (this.first == that.getFirst() && this.second == that.getSecond()) {
            return true;
        }
        return false;
    }

    public boolean $greater$eq(SecondarySortKey that) {
        if (this.$greater(that)) {
            return true;
        } else if (this.first == that.getFirst() && this.second == that.getSecond()) {
            return true;
        }
        return false;
    }

    public int compare(SecondarySortKey that) {
        if (this.first - that.getFirst() != 0) {
            return this.first - that.getFirst();
        } else {
            return this.second - that.getSecond();
        }
    }

    public int compareTo(SecondarySortKey that) {
        if (this.first - that.getFirst() != 0) {
            return this.first - that.getFirst();
        } else {
            return this.second - that.getSecond();
        }
    }

    public int getFirst() {
        return first;
    }

    public void setFirst(int first) {
        this.first = first;
    }

    public int getSecond() {
        return second;
    }

    public void setSecond(int second) {
        this.second = second;
    }

    @Override
    public boolean equals(Object o) {
        if (this == o) return true;
        if (o == null || getClass() != o.getClass()) return false;

        SecondarySortKey that = (SecondarySortKey) o;

        if (first != that.first) return false;
        return second == that.second;
    }

    @Override
    public int hashCode() {
        int result = first;
        result = 31 * result + second;
        return result;
    }
}

2.2调用比较器实现二次排序

import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.PairFunction;
import org.apache.spark.api.java.function.VoidFunction;
import scala.Tuple2;

/**
 * User:leen
 * Date:2017/10/18 0018
 * Time:12:00
 */
public class SecondarySort {
    public static void main(String[] args) {
        SparkConf conf = new SparkConf().setAppName("SecondarySort").setMaster("local");
        JavaSparkContext sc = new JavaSparkContext(conf);
        JavaRDD<String> lines = sc.textFile("C:\\Users\\leen\\Desktop\\sort.txt");

        JavaPairRDD<SecondarySortKey,String> pairs = lines.mapToPair(new PairFunction<String, SecondarySortKey, String>() {
            public Tuple2<SecondarySortKey, String> call(String line) throws Exception {
                String[] lineSplits = line.split(" ");
                SecondarySortKey key = new SecondarySortKey(Integer.valueOf(lineSplits[0]), Integer.valueOf(lineSplits[1]));
                return new Tuple2<SecondarySortKey, String>(key, line);
            }
        });

        JavaPairRDD<SecondarySortKey,String> sortedPairs = pairs.sortByKey();
        sortedPairs.foreach(new VoidFunction<Tuple2<SecondarySortKey, String>>() {
            public void call(Tuple2<SecondarySortKey, String> res) throws Exception {
                System.out.println(res._1().hashCode()+" : "+ res._2());
            }
        });


    }
}

3.结果输出