案例四
使用MapReduce程序读取三个文件中的数据,进行排序(利用shuffle阶段的排序)
注意:shuffle阶段的排序是针对key进行排序的
map函数
public class MyMapper extends Mapper<LongWritable, Text, LongWritable, LongWritable>{
@Override
protected void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
LongWritable a = new LongWritable();
long i = Long.parseLong(value.toString());
a.set(i);
context.write(a, new LongWritable(1));
}
}
reduce函数
public class MyReducer extends Reducer<LongWritable, LongWritable, LongWritable, LongWritable>{
@Override
protected void reduce(LongWritable key, Iterable<LongWritable> vs,Context context) throws IOException, InterruptedException {
for(LongWritable value:vs) {
context.write(key, value);
}
}
}
驱动类
public class MyDriver {
public static void main(String[] args) throws Exception, IOException {
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(conf);
Path path = new Path("E:/data/sort/output");
if(fs.exists(path)){
fs.delete(path);
}
Job job = Job.getInstance();
job.setJobName("sort");
job.setJarByClass(MyDriver.class);
job.setMapperClass(MyMapper.class);
job.setReducerClass(MyReducer.class);
job.setOutputKeyClass(LongWritable.class);
job.setOutputValueClass(LongWritable.class);
FileInputFormat.addInputPath(job, new Path("E:/data/sort/input/f*"));
FileOutputFormat.setOutputPath(job, new Path("E:/data/sort/output"));
System.exit(job.waitForCompletion(true)?0:1);
}
}
运行结果
对案例中运行结果数据去重(使用了NullWritable)
map函数
public class MyMapper extends Mapper<LongWritable, Text, LongWritable, NullWritable>{
@Override
protected void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
LongWritable a = new LongWritable();
long i = Long.parseLong(value.toString());
a.set(i);
context.write(a, NullWritable.get());
}
}
reduce函数
public class MyReducer extends Reducer<LongWritable, NullWritable, LongWritable, NullWritable>{
@Override
protected void reduce(LongWritable key, Iterable<NullWritable> vs,Context context) throws IOException, InterruptedException {
context.write(key, NullWritable.get());
}
}
驱动类
public class MyDriver {
public static void main(String[] args) throws Exception, IOException {
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(conf);
Path path = new Path("E:/data/sort/output1");
if(fs.exists(path)){
fs.delete(path);
}
Job job = Job.getInstance();
job.setJobName("sort");
job.setJarByClass(MyDriver.class);
job.setMapperClass(MyMapper.class);
job.setReducerClass(MyReducer.class);
job.setOutputKeyClass(LongWritable.class);
job.setOutputValueClass(NullWritable.class);
FileInputFormat.addInputPath(job, new Path("E:/data/sort/input/f*"));
FileOutputFormat.setOutputPath(job, new Path("E:/data/sort/output1"));
System.exit(job.waitForCompletion(true)?0:1);
}
}
运行结果