TopNMapper
package mr.topN;
import java.util.TreeMap;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
/*
* TreeMap save key-value
*/
public class TopNMapper extends Mapper<LongWritable, Text, NullWritable, IntWritable> {
private TreeMap<Integer,String>repToRecordMap=new TreeMap<Integer,String>();
@Override
public void map(LongWritable key,Text vaule,Context context){
String line=vaule.toString();
String[] nums=line.split(" ");
for (String num:nums){
/*
* read data write TreeMap,>5 move minimum number
*/
repToRecordMap.put(Integer.parseInt(num)," ");
if (repToRecordMap.size()>5){
repToRecordMap.remove(repToRecordMap.firstKey());
}
}
}
/*
* write cleanup function
* read all data , output reduce
*/
@Override
protected void cleanup(Context context){
for (Integer i:repToRecordMap.keySet()){
try{
context.write(NullWritable.get(),new IntWritable(i));
}catch (Exception e){
e.printStackTrace();
}
}
}
}
TopNReducer
package mr.topN;
import java.io.IOException;
import java.util.Comparator;
import java.util.TreeMap;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Reducer;
public class TopNReducer extends Reducer<NullWritable,IntWritable,NullWritable,IntWritable> {
//create TreeMap
private TreeMap<Integer,String>repToRecordMap=new TreeMap<Integer,String>(new Comparator<Integer>() {
//return plural :a<b
//return 0 :a=b
//return positive number :a>b
public int compare(Integer a, Integer b) {
return b-a;
}
});
public void reduce(NullWritable key,
Iterable<IntWritable>vaules,Context context)throws IOException,InterruptedException{
for (IntWritable value:vaules){
repToRecordMap.put(value.get(),"");
if (repToRecordMap.size()>5){
repToRecordMap.remove(repToRecordMap.lastKey());
}
}
for (Integer i:repToRecordMap.keySet()){
context.write(NullWritable.get(),new IntWritable(i));
}
}
}
TopNDriver
package mr.topN;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.Job;
public class TopNDriver {
public static void main(String[] args)throws Exception{
Configuration configuration=new Configuration();
Job job=Job.getInstance(configuration);
job.setJarByClass(TopNDriver.class);
job.setMapperClass(TopNMapper.class);
job.setReducerClass(TopNReducer.class);
job.setNumReduceTasks(1);
//map output key
job.setMapOutputKeyClass(NullWritable.class);
//map output value
job.setMapOutputValueClass(IntWritable.class);
//reduce output key
job.setOutputKeyClass(NullWritable.class);
//reduce output value
job.setOutputValueClass(IntWritable.class);
FileInputFormat.setInputPaths(job,new Path("D:\\mr\\intput"));
FileOutputFormat.setOutputPath(job,new Path("D:\\mr\\output"));
boolean res=job.waitForCompletion(true);
System.exit(res?0:1);
}
}