hadoop二次排序解析

  1. 二次排序思路分析
试验数据 
100 12
100 23
100 9
101 32
101 30 
99 23
99 20

2.定义数据类型intpair,第一个数字为first,第二个数字为second

public static class IntPair implements WritableComparable<IntPair>
    {
        int first;
        int second;

        /**
         * 
         */
        public IntPair() {
            // TODO Auto-generated constructor stub
        }
        /**
         * @param first
         * @param second
         */
        public IntPair(int first, int second) {
            super();
            this.first = first;
            this.second = second;
        }


        @Override
        public void write(DataOutput out) throws IOException {
            // TODO Auto-generated method stub
            out.writeInt(first);
            out.writeInt(second);
        }


        @Override
        public void readFields(DataInput in) throws IOException {
            // TODO Auto-generated method stub
            first=in.readInt();
            second=in.readInt();
        }

        //这个地方是核心函数,对数据进行排序,前后两个数都按照递增排列
        @Override
        public int compareTo(IntPair pair) {
            // TODO Auto-generated method stub
            if (first!=pair.first) {
                return first<pair.first ? -1:1;
            }else if (second!=pair.second) {
                return second<pair.second ?-1:1;
            }else {
                return 0;
            }
        }


        @Override
        public int hashCode() {
            // TODO Auto-generated method stub
            return first*157+second;
        }

        @Override
        public boolean equals(Object obj) {
            // TODO Auto-generated method stub
            if (obj==null) {
                return false;
            }
            if (obj==this) {
                return true;
            }
            if (obj instanceof IntPair) {
                IntPair intPair=(IntPair) obj;
                return intPair.first==first&&intPair.second==second;
            }else {
                return false;
            }

        }



    }

3.分区处理,这里我们是以IntPair为key,所以为以保证之前原始数据的key能分配到相同的reduce中,要自定义分区函数

public static class FirstPartitioner extends Partitioner<IntPair, IntWritable>
    {


        @Override
        public int getPartition(IntPair key, IntWritable value, int numPartitions) {
            // TODO Auto-generated method stub
            return Math.abs(key.first*127) % numPartitions;
        }

    }

4.为了保证原始数据中key相同的能在同一组中,自定义分组函数

public static class GroupingComparator extends WritableComparator{

        /**
         * 
         */
        public GroupingComparator() {
            // TODO Auto-generated constructor stub
            super(IntPair.class,true);
        }
        @Override
        public int compare(WritableComparable a, WritableComparable b) {
            // TODO Auto-generated method stub
            IntPair intPair=(IntPair) a;
            IntPair intPair1=(IntPair) b;
            if (intPair.first!=intPair1.first) {
                return intPair.first<intPair1.first ?-1:1;
            }else {
                return 0;
            }
        }

    }

5.重点理解内容

 context.write(intPair, new IntWritable(second));

经过排序后应为
key /value

99 20 / 20
99 23 / 23
100 9 / 9
100 12 / 12
100 23 / 23
101 30 / 30
101 32 / 32

6.完整代码

/**
 * @author DELL_pc
 *  @date 2017年6月27日
 * 
 */
package com.beifeng.test;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.StringTokenizer;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableComparator;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Partitioner;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;

public class GroupDemo {
    public static class IntPair implements WritableComparable<IntPair>
    {
        int first;
        int second;

        /**
         * 
         */
        public IntPair() {
            // TODO Auto-generated constructor stub
        }
        /**
         * @param first
         * @param second
         */
        public IntPair(int first, int second) {
            super();
            this.first = first;
            this.second = second;
        }


        @Override
        public void write(DataOutput out) throws IOException {
            // TODO Auto-generated method stub
            out.writeInt(first);
            out.writeInt(second);
        }


        @Override
        public void readFields(DataInput in) throws IOException {
            // TODO Auto-generated method stub
            first=in.readInt();
            second=in.readInt();
        }


        @Override
        public int compareTo(IntPair pair) {
            // TODO Auto-generated method stub
            if (first!=pair.first) {
                return first<pair.first ? -1:1;
            }else if (second!=pair.second) {
                return second<pair.second ?-1:1;
            }else {
                return 0;
            }
        }


        @Override
        public int hashCode() {
            // TODO Auto-generated method stub
            return first*157+second;
        }

        @Override
        public boolean equals(Object obj) {
            // TODO Auto-generated method stub
            if (obj==null) {
                return false;
            }
            if (obj==this) {
                return true;
            }
            if (obj instanceof IntPair) {
                IntPair intPair=(IntPair) obj;
                return intPair.first==first&&intPair.second==second;
            }else {
                return false;
            }

        }



    }
    public static class FirstPartitioner extends Partitioner<IntPair, IntWritable>
    {


        @Override
        public int getPartition(IntPair key, IntWritable value, int numPartitions) {
            // TODO Auto-generated method stub
            return Math.abs(key.first*127) % numPartitions;
        }

    }
    public static class GroupingComparator extends WritableComparator{

        /**
         * 
         */
        public GroupingComparator() {
            // TODO Auto-generated constructor stub
            super(IntPair.class,true);
        }
        @Override
        public int compare(WritableComparable a, WritableComparable b) {
            // TODO Auto-generated method stub
            IntPair intPair=(IntPair) a;
            IntPair intPair1=(IntPair) b;
            if (intPair.first!=intPair1.first) {
                return intPair.first<intPair1.first ?-1:1;
            }else {
                return 0;
            }
        }

    }
    public static class WordCountMap extends
            Mapper<LongWritable, Text, IntPair, IntWritable> {
        int first;
        int second;

        public void map(LongWritable key, Text value, Context context)
                throws IOException, InterruptedException {


            String line = value.toString();
            StringTokenizer token = new StringTokenizer(line);
           first=Integer.valueOf(token.nextToken());
           second=Integer.valueOf(token.nextToken());
            IntPair intPair=new IntPair(first, second);
            context.write(intPair, new IntWritable(second));
        }
    }

    public static class WordCountReduce extends
            Reducer<IntPair, IntWritable, IntWritable, IntWritable> {
        public void reduce(IntPair key, Iterable<IntWritable> values,
                Context context) throws IOException, InterruptedException {

            for (IntWritable val : values) {
                 context.write(new IntWritable(key.first), val);
            }

        }
    }

    public static void main(String[] args) throws Exception {
        Configuration conf =new Configuration();;


        Job job = new Job(conf);
        job.setJarByClass(GroupDemo.class);
        job.setJobName("GroupDemo");
        job.setMapOutputKeyClass(IntPair.class);
        job.setMapOutputValueClass(IntWritable.class);
        job.setOutputKeyClass(IntWritable.class);
        job.setOutputValueClass(IntWritable.class);
        job.setPartitionerClass(FirstPartitioner.class);
        job.setGroupingComparatorClass(GroupingComparator.class);
        job.setMapperClass(WordCountMap.class);
        job.setReducerClass(WordCountReduce.class);
        job.setInputFormatClass(TextInputFormat.class);
        job.setOutputFormatClass(TextOutputFormat.class);
        FileInputFormat.addInputPath(job, new Path("/data/demo.txt"));
        FileOutputFormat.setOutputPath(job, new Path("/data/out"));
        job.waitForCompletion(true);
    }
}
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

wending-Y

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值