package com.bigdata.hadoop.mapred;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.net.URI;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
/**
* hello3中数据
* 3 2
* 3 3
* 3 1
* 2 1
* 2 2
* 1 1
* 期望排序后 输出的K3升序排列 V3也升序排列
* 1 1
* 2 1
* 2 2
* 3 1
* 3 2
* 3 3
* 因为map任务的默认排序是将K2进行排序,不会排序V 所以需要自定义K2,将原来的K2和V2封装到新的K2中
*
* @author yinhao
*
*/
public class SortApp {
private static final String INPUT_PATH = "hdfs://hadoop1:9000/dir1/hello3";
private static final String OUTPUT_PATH = "hdfs://hadoop1:9000/dir1/sort_out";
public static void main(String[] args) throws Exception {
Configuration configuration = new Configuration();
Job job = new Job(configuration, SortApp.class.getSimpleName());
final FileSystem fileSystem = FileSystem.get(new URI(OUTPUT_PATH),
configuration);
fileSystem.delete(new Path(OUTPUT_PATH), true);
job.setJarByClass(SortApp.class);
FileInputFormat.setInputPaths(job, INPUT_PATH);
job.setMapperClass(MyMapper.class);
job.setMapOutputKeyClass(NewK2.class);
job.setMapOutputValueClass(NullWritable.class);
job.setReducerClass(MyReducer.class);
job.setOutputKeyClass(NewK2.class);
job.setOutputValueClass(NullWritable.class);
FileOutputFormat.setOutputPath(job, new Path(OUTPUT_PATH));
job.waitForCompletion(true);
}
public static class MyMapper extends
Mapper<LongWritable, Text, NewK2, NullWritable> {
@Override
protected void map(LongWritable key, Text value,
Mapper<LongWritable, Text, NewK2, NullWritable>.Context context)
throws IOException, InterruptedException {
final String line = value.toString();
final String[] splited = line.split("\t");
context.write(
new NewK2(Long.parseLong(splited[0]), Long
.parseLong(splited[1])), NullWritable.get());
}
}
public static class MyReducer extends
Reducer<NewK2, NullWritable, NewK2, NullWritable> {
@Override
protected void reduce(
NewK2 key,
Iterable<NullWritable> values,
Reducer<NewK2, NullWritable, NewK2, NullWritable>.Context context)
throws IOException, InterruptedException {
context.write(key, NullWritable.get());
}
}
public static class NewK2 implements WritableComparable<NewK2> {
long first;
long second;
public NewK2() {
// TODO Auto-generated constructor stub
}
public NewK2(long first, long second) {
this.first = first;
this.second = second;
}
@Override
public void write(DataOutput out) throws IOException {
out.writeLong(first);
out.writeLong(second);
}
@Override
public void readFields(DataInput in) throws IOException {
this.first = in.readLong();
this.second = in.readLong();
}
@Override
public int compareTo(NewK2 o) {
long result = this.first - o.first;
if (result != 0) {
return (int) result;
} else {
return (int) (this.second - o.second);
}
}
@Override
public String toString() {
return this.first + "\t" + this.second;
}
}
}
MapReduce的自定义排序
最新推荐文章于 2024-12-24 19:37:40 发布