mapreduce代码整理之sort

最新推荐文章于 2025-05-08 13:08:49 发布

qq_35488275

最新推荐文章于 2025-05-08 13:08:49 发布

阅读量398

点赞数

文章标签： mapreduce

本文链接：https://blog.youkuaiyun.com/qq_35488275/article/details/60573099

版权

本编文章主要运用mapreduce中的机制进行排序

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.io.compress.GzipCodec;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.lib.HashPartitioner;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Partitioner;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
 
public class Sort {
 
    //map将输入中的value化成IntWritable类型，作为输出的key
    public static class Map extends Mapper<Object,Text,IntWritable,IntWritable>{
        private static IntWritable data=new IntWritable();
       
        //实现map函数
        public void map(Object key,Text value,Context context)
                throws IOException,InterruptedException{
            String line=value.toString();
            data.set(Integer.parseInt(line));
            context.write(data, new IntWritable(1));
        }
       
    }
   
    //reduce将输入中的key复制到输出数据的key上，
    //然后根据输入的value-list中元素的个数决定key的输出次数
    //用全局linenum来代表key的位次
    public static class Reduce extends
            Reducer<IntWritable,IntWritable,IntWritable,IntWritable>{
       
        private static IntWritable linenum = new IntWritable(1);
       
        //实现reduce函数
        public void reduce(IntWritable key,Iterable<IntWritable> values,Context context)
                throws IOException,InterruptedException{
            for(IntWritable val:values){
                context.write(linenum, key);
                linenum = new IntWritable(linenum.get()+1);
            }
        }
    }
  public static class MyPartition extends Partitioner<IntWritable,IntWritable>{
	   @Override
	   public int getPartition(IntWritable key,IntWritable value,int numPartitions){
		   if(key.get()>=1&&key.get()<=11){
			   return 1;
		   }
		   if(key.get()>=12&&key.get()<=21){
			   return 2;
		   }
		   if(key.get()>=22&&key.get()<=31){
			   return 3;
		   }
		   if(key.get()>=32&&key.get()<=41){
			   return 4;
		   }
		   if(key.get()>=42&&key.get()<=51){
			   return 5;
		   }
		   if(key.get()>=52&&key.get()<=61){
			   return 6;
		   }
		   return 0;
		/*System.out.print(numPartitions);
		int MaxNumber = 100;
		int bound = MaxNumber / numPartitions +1;
		int keynumber = key.get();
		for(int i = 0;i<numPartitions;i++){
			if(keynumber<bound*i&&keynumber>=bound*(i-1))
				return  i-1;
		}
		return 0;*/
	  }
   }
    public static void main(String[] args) throws Exception{
        Configuration conf = new Configuration();
        //conf.setBoolean("mapred.compress.map.output",true);
      //conf.set("mapred.compress.map.output", "true");//代码实现map输出压缩减少网络传输压力
      //conf.set("mapred.map.output.compression.codec","org.apache.hadoop.io.compress.DefaultCodec");
     Job job = new Job(conf, "Data Sort");
     job.setJarByClass(Sort.class);
     
     //设置Map和Reduce处理类
     job.setMapperClass(Map.class);
     job.setReducerClass(Reduce.class);
    // job.setPartitionerClass(MyPartition.class);
    // job.setNumReduceTasks(10);
     //设置输出类型
     job.setOutputKeyClass(IntWritable.class);
     job.setOutputValueClass(IntWritable.class);
     
     //设置输入和输出目录
     FileInputFormat.addInputPath(job, new Path(args[0]));
     FileOutputFormat.setOutputPath(job, new Path(args[1]));
   //FileOutputFormat.setCompressOutput(job,true);//代码实现输出压缩
  //FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);
     System.exit(job.waitForCompletion(true) ? 0 : 1);
     }
}