MapReduce_SecondarySort示例

本文介绍了一个使用Hadoop实现的二次排序示例程序。通过自定义分区器和比较器完成对数据的排序处理,适用于需要对输出结果进行精确排序的场景。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

示例代码:

package com.xfyan.three;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableComparator;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.mapreduce.lib.partition.HashPartitioner;

/**
 * 
 *二次排序:
 *原始数据:
 *4 3
 *4 2 
 *4 1
 *2 3
 *2 7
 *
 *排序后:
 *2 3
 *2 7
 *4 1
 *4 2
 *4 3
 */
public class SecondarySort {
	public class SecondaryMapper extends Mapper<LongWritable,Text,Text,NullWritable>{
		@Override
		protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, NullWritable>.Context context)
				throws IOException, InterruptedException {
			context.write(value,NullWritable.get());
		}
	}
	
	public class KeyPartitioner extends HashPartitioner<Text,NullWritable>{
		@Override
		public int getPartition(Text key, NullWritable value, int numReduceTasks) {
			return (key.toString().split(" ")[0].hashCode() & Integer.MAX_VALUE) % numReduceTasks;
		}
	}
	
	
	public class SortCompatarator extends WritableComparator{
		protected SortCompatarator(){
			super(Text.class,true);
		}
		
		@Override
		public int compare(WritableComparable a, WritableComparable b) {
			if(Integer.parseInt(a.toString().split(" ")[0]) == Integer.parseInt(b.toString().split(" ")[0])){
				if(Integer.parseInt(a.toString().split(" ")[1]) > Integer.parseInt(b.toString().split(" ")[1])){
					return 1;
				}else if(Integer.parseInt(a.toString().split(" ")[1]) == Integer.parseInt(b.toString().split(" ")[1])){
					return 0;
				}else if(Integer.parseInt(a.toString().split(" ")[1]) < Integer.parseInt(b.toString().split(" ")[1])){
					return -1;
				}
			}else{
				if(Integer.parseInt(a.toString().split(" ")[0]) > Integer.parseInt(b.toString().split(" ")[0])){
					return 1;
				}else if(Integer.parseInt(a.toString().split(" ")[0]) < Integer.parseInt(b.toString().split(" ")[0])){
					return -1;
				}
			}
			
			return 0;
		}
	}
	
	
	
	public class SecondaryReducer extends Reducer<Text,NullWritable,NullWritable,Text>{
		@Override
		protected void reduce(Text key, Iterable<NullWritable> values,
				Reducer<Text, NullWritable, NullWritable, Text>.Context context) throws IOException, InterruptedException {
			context.write(NullWritable.get(), key);
		}
	}
	
	public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
		Configuration conf = new Configuration();
		
		Job job = new Job(conf,"secondary sort");
		job.setJarByClass(SecondarySort.class);
		job.setMapperClass(SecondaryMapper.class);
		job.setReducerClass(SecondaryReducer.class);
		job.setPartitionerClass(KeyPartitioner.class);
		job.setSortComparatorClass(SortCompatarator.class);
		job.setMapOutputKeyClass(Text.class);
		job.setMapOutputValueClass(NullWritable.class);
		job.setOutputKeyClass(NullWritable.class);
		job.setOutputValueClass(Text.class);
		
		job.setInputFormatClass(TextInputFormat.class);
		job.setOutputFormatClass(TextOutputFormat.class);
		
		FileInputFormat.setInputPaths(job,new Path(args[0]));
		FileOutputFormat.setOutputPath(job,new Path(args[1]));
		
		job.setNumReduceTasks(1);
		System.exit(job.waitForCompletion(true)?0:1);
		
	}
}


评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值