- 二次排序思路分析
试验数据
100 12
100 23
100 9
101 32
101 30
99 23
99 20
2.定义数据类型intpair,第一个数字为first,第二个数字为second
public static class IntPair implements WritableComparable<IntPair>
{
int first;
int second;
/**
*
*/
public IntPair() {
// TODO Auto-generated constructor stub
}
/**
* @param first
* @param second
*/
public IntPair(int first, int second) {
super();
this.first = first;
this.second = second;
}
@Override
public void write(DataOutput out) throws IOException {
// TODO Auto-generated method stub
out.writeInt(first);
out.writeInt(second);
}
@Override
public void readFields(DataInput in) throws IOException {
// TODO Auto-generated method stub
first=in.readInt();
second=in.readInt();
}
//这个地方是核心函数,对数据进行排序,前后两个数都按照递增排列
@Override
public int compareTo(IntPair pair) {
// TODO Auto-generated method stub
if (first!=pair.first) {
return first<pair.first ? -1:1;
}else if (second!=pair.second) {
return second<pair.second ?-1:1;
}else {
return 0;
}
}
@Override
public int hashCode() {
// TODO Auto-generated method stub
return first*157+second;
}
@Override
public boolean equals(Object obj) {
// TODO Auto-generated method stub
if (obj==null) {
return false;
}
if (obj==this) {
return true;
}
if (obj instanceof IntPair) {
IntPair intPair=(IntPair) obj;
return intPair.first==first&&intPair.second==second;
}else {
return false;
}
}
}
3.分区处理,这里我们是以IntPair为key,所以为以保证之前原始数据的key能分配到相同的reduce中,要自定义分区函数
public static class FirstPartitioner extends Partitioner<IntPair, IntWritable>
{
@Override
public int getPartition(IntPair key, IntWritable value, int numPartitions) {
// TODO Auto-generated method stub
return Math.abs(key.first*127) % numPartitions;
}
}
4.为了保证原始数据中key相同的能在同一组中,自定义分组函数
public static class GroupingComparator extends WritableComparator{
/**
*
*/
public GroupingComparator() {
// TODO Auto-generated constructor stub
super(IntPair.class,true);
}
@Override
public int compare(WritableComparable a, WritableComparable b) {
// TODO Auto-generated method stub
IntPair intPair=(IntPair) a;
IntPair intPair1=(IntPair) b;
if (intPair.first!=intPair1.first) {
return intPair.first<intPair1.first ?-1:1;
}else {
return 0;
}
}
}
5.重点理解内容
context.write(intPair, new IntWritable(second));
经过排序后应为
key /value
99 20 / 20
99 23 / 23
100 9 / 9
100 12 / 12
100 23 / 23
101 30 / 30
101 32 / 32
6.完整代码
/**
* @author DELL_pc
* @date 2017年6月27日
*
*/
package com.beifeng.test;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.StringTokenizer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableComparator;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Partitioner;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
public class GroupDemo {
public static class IntPair implements WritableComparable<IntPair>
{
int first;
int second;
/**
*
*/
public IntPair() {
// TODO Auto-generated constructor stub
}
/**
* @param first
* @param second
*/
public IntPair(int first, int second) {
super();
this.first = first;
this.second = second;
}
@Override
public void write(DataOutput out) throws IOException {
// TODO Auto-generated method stub
out.writeInt(first);
out.writeInt(second);
}
@Override
public void readFields(DataInput in) throws IOException {
// TODO Auto-generated method stub
first=in.readInt();
second=in.readInt();
}
@Override
public int compareTo(IntPair pair) {
// TODO Auto-generated method stub
if (first!=pair.first) {
return first<pair.first ? -1:1;
}else if (second!=pair.second) {
return second<pair.second ?-1:1;
}else {
return 0;
}
}
@Override
public int hashCode() {
// TODO Auto-generated method stub
return first*157+second;
}
@Override
public boolean equals(Object obj) {
// TODO Auto-generated method stub
if (obj==null) {
return false;
}
if (obj==this) {
return true;
}
if (obj instanceof IntPair) {
IntPair intPair=(IntPair) obj;
return intPair.first==first&&intPair.second==second;
}else {
return false;
}
}
}
public static class FirstPartitioner extends Partitioner<IntPair, IntWritable>
{
@Override
public int getPartition(IntPair key, IntWritable value, int numPartitions) {
// TODO Auto-generated method stub
return Math.abs(key.first*127) % numPartitions;
}
}
public static class GroupingComparator extends WritableComparator{
/**
*
*/
public GroupingComparator() {
// TODO Auto-generated constructor stub
super(IntPair.class,true);
}
@Override
public int compare(WritableComparable a, WritableComparable b) {
// TODO Auto-generated method stub
IntPair intPair=(IntPair) a;
IntPair intPair1=(IntPair) b;
if (intPair.first!=intPair1.first) {
return intPair.first<intPair1.first ?-1:1;
}else {
return 0;
}
}
}
public static class WordCountMap extends
Mapper<LongWritable, Text, IntPair, IntWritable> {
int first;
int second;
public void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
String line = value.toString();
StringTokenizer token = new StringTokenizer(line);
first=Integer.valueOf(token.nextToken());
second=Integer.valueOf(token.nextToken());
IntPair intPair=new IntPair(first, second);
context.write(intPair, new IntWritable(second));
}
}
public static class WordCountReduce extends
Reducer<IntPair, IntWritable, IntWritable, IntWritable> {
public void reduce(IntPair key, Iterable<IntWritable> values,
Context context) throws IOException, InterruptedException {
for (IntWritable val : values) {
context.write(new IntWritable(key.first), val);
}
}
}
public static void main(String[] args) throws Exception {
Configuration conf =new Configuration();;
Job job = new Job(conf);
job.setJarByClass(GroupDemo.class);
job.setJobName("GroupDemo");
job.setMapOutputKeyClass(IntPair.class);
job.setMapOutputValueClass(IntWritable.class);
job.setOutputKeyClass(IntWritable.class);
job.setOutputValueClass(IntWritable.class);
job.setPartitionerClass(FirstPartitioner.class);
job.setGroupingComparatorClass(GroupingComparator.class);
job.setMapperClass(WordCountMap.class);
job.setReducerClass(WordCountReduce.class);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
FileInputFormat.addInputPath(job, new Path("/data/demo.txt"));
FileOutputFormat.setOutputPath(job, new Path("/data/out"));
job.waitForCompletion(true);
}
}