SortMapper:
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Counter;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
public class SortMapper extends Mapper<LongWritable,Text,PairSort,Text> {
/**
* 自定义map阶段,封装我们自定义的key2,然后对key2做排序
* @param key
* @param value
* @param context
* @throws IOException
* @throws InterruptedException
a 1
a 9
b 3
a 7
b 8
b 10
a 5
a 9
*/
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
//通过context可以获取到我们的计数器
Counter counter = context.getCounter("MAP_COUNTER", "MAP_INPUT_RECORDS");
//统计我们map阶段输入了多少条数据
counter.increment(1L);
PairSort pairSort = new PairSort();
String[] split = value.toString().split("\t");
pairSort.setFirst(split[0]);
pairSort.setSecond(Integer.parseInt(split[1]));
//我们定义的泛型key2 value 是pairSort Text
context.write(pairSort,value);
}
}
PairSort:
import org.apache.hadoop.io.WritableComparable;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
public class PairSort implements WritableComparable<PairSort> {
private String first;
private Integer second;
// a 9
// a 9
@Override
public String toString() {
return first+"\t"+second;
}
public String getFirst() {
return first;
}
public void setFirst(String first) {
this.first = first;
}
public Integer getSecond() {
return second;
}
public void setSecond(Integer second) {
this.second = second;
}
/**
* 这个方法就是实现我们的比较器
*
* @param o
* @return
*/
@Override
public int compareTo(PairSort o) {
//比较我们第一列的数据
int i = this.first.compareTo(o.first);
//如果判断不等于0,那么就说明第一列不相等 a b
if(i != 0){
//如果第一列不相等,那么优先按照第一列来做排序
//直接返回比较的结果,就可以把我们的数据进行排序
return i ;
}else{
//如果第一列相等了 a a
//如果第一列相等,那么就要比较第二列了
int i1 = this.second.compareTo(o.second);
//如果第一列相等了,那么就比较第二列,直接将第二列的值返回回去,就可以做排序了
//默认比较是按照升序排序,如果需要降序排序,那么就直接取反即可
return -i1;
}
}
/**
* 序列化的方法
* @param out
* @throws IOException
*/
@Override
public void write(DataOutput out) throws IOException {
out.writeUTF(first);
out.writeInt(second);
}
/**
* 反序列化的方法
* @param in
* @throws IOException
*/
@Override
public void readFields(DataInput in) throws IOException {
this.first = in.readUTF();
this.second = in.readInt();
}
}
MyCombiner:
import javafx.util.Pair;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
/**
* 注意自定义combiner的话,我们这里的输入类型以及输出类型,都是我们的key2 value2
* 可以减少我们输出到reduce的key2的个数
*/
public class MyCombiner extends Reducer<PairSort,Text,PairSort,Text> {
@Override
protected void reduce(PairSort key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
//这里面写我们规约的逻辑
for (Text value : values) {
context.write(key,value);
}
}
}
SortReducer :
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
public class SortReducer extends Reducer<PairSort,Text,PairSort,NullWritable> {
public static enum Counter{
REDUCE_INPUT_KEY_TOAL,
REDUCE_INPUT_VALUE_TOAL
}
/*
集合当中有一个集合是这样的 (pairSort <a,9 a,9>)
*/
@Override
protected void reduce(PairSort key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
context.getCounter(Counter.REDUCE_INPUT_KEY_TOAL).increment(1L);
for (Text value : values) {
context.getCounter(Counter.REDUCE_INPUT_VALUE_TOAL).increment(1L);
context.write(key,NullWritable.get());
}
}
}
SortMain:
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
public class SortMain extends Configured implements Tool{
@Override
public int run(String[] args) throws Exception {
//获取job对象
Job job = Job.getInstance(super.getConf(), "xxsdsdlf");
job.setJarByClass(SortMain.class);
//第一步:读取文件,解析成key,value对
job.setInputFormatClass(TextInputFormat.class);
TextInputFormat.addInputPath(job,new Path("file:///F:\\排序\\input"));
//第二步:设置我们的mapper类
job.setMapperClass(SortMapper.class);
//设置我们key2 value2的输出类型
job.setMapOutputKeyClass(PairSort.class);
job.setMapOutputValueClass(Text.class);
// 第三到六步 全部省略
//设置第五步:规约:
job.setCombinerClass(MyCombiner.class);
//第七步:reduce阶段
job.setReducerClass(SortReducer.class);
job.setOutputKeyClass(PairSort.class);
job.setOutputValueClass(NullWritable.class);
//第八步:数据输出
job.setOutputFormatClass(TextOutputFormat.class);
TextOutputFormat.setOutputPath(job,new Path("file:///F:\\排序\\outSort"));
//提交任务
boolean b = job.waitForCompletion(true);
return b?0:1;
}
public static void main(String[] args) throws Exception {
int run = ToolRunner.run(new Configuration(), new SortMain(), args);
System.exit(run);
}
}
实验数据 sort.txt
a 1
a 9
b 3
a 7
b 8
b 10
a 5
a 9
实验结果
a 9
a 9
a 7
a 5
a 1
b 10
b 8
b 3