本编文章主要运用mapreduce中的机制进行排序
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.io.compress.GzipCodec;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.lib.HashPartitioner;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Partitioner;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
public class Sort {
//map将输入中的value化成IntWritable类型,作为输出的key
public static class Map extends Mapper<Object,Text,IntWritable,IntWritable>{
private static IntWritable data=new IntWritable();
//实现map函数
public void map(Object key,Text value,Context context)
throws IOException,InterruptedException{
String line=value.toString();
data.set(Integer.parseInt(line));
context.write(data, new IntWritable(1));
}
}
//reduce将输入中的key复制到输出数据的key上,
//然后根据输入的value-list中元素的个数决定key的输出次数
//用全局linenum来代表key的位次
public static class Reduce extends
Reducer<IntWritable,IntWritable,IntWritable,IntWritable>{
private static IntWritable linenum = new IntWritable(1);
//实现reduce函数
public void reduce(IntWritable key,Iterable<IntWritable> values,Context context)
throws IOException,InterruptedException{
for(IntWritable val:values){
context.write(linenum, key);
linenum = new IntWritable(linenum.get()+1);
}
}
}
public static class MyPartition extends Partitioner<IntWritable,IntWritable>{
@Override
public int getPartition(IntWritable key,IntWritable value,int numPartitions){
if(key.get()>=1&&key.get()<=11){
return 1;
}
if(key.get()>=12&&key.get()<=21){
return 2;
}
if(key.get()>=22&&key.get()<=31){
return 3;
}
if(key.get()>=32&&key.get()<=41){
return 4;
}
if(key.get()>=42&&key.get()<=51){
return 5;
}
if(key.get()>=52&&key.get()<=61){
return 6;
}
return 0;
/*System.out.print(numPartitions);
int MaxNumber = 100;
int bound = MaxNumber / numPartitions +1;
int keynumber = key.get();
for(int i = 0;i<numPartitions;i++){
if(keynumber<bound*i&&keynumber>=bound*(i-1))
return i-1;
}
return 0;*/
}
}
public static void main(String[] args) throws Exception{
Configuration conf = new Configuration();
//conf.setBoolean("mapred.compress.map.output",true);
//conf.set("mapred.compress.map.output", "true");//代码实现map输出压缩减少网络传输压力
//conf.set("mapred.map.output.compression.codec","org.apache.hadoop.io.compress.DefaultCodec");
Job job = new Job(conf, "Data Sort");
job.setJarByClass(Sort.class);
//设置Map和Reduce处理类
job.setMapperClass(Map.class);
job.setReducerClass(Reduce.class);
// job.setPartitionerClass(MyPartition.class);
// job.setNumReduceTasks(10);
//设置输出类型
job.setOutputKeyClass(IntWritable.class);
job.setOutputValueClass(IntWritable.class);
//设置输入和输出目录
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
//FileOutputFormat.setCompressOutput(job,true);//代码实现输出压缩
//FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}