输入数据:
[root@baolibin hadoop]# hadoop fs -text /input/haha
Warning: $HADOOP_HOME is deprecated.
2 1
3 2
1 3
代码:
package hadoop_2_6_0;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.Iterator;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
public class SortText {
public static class NewkWritable implements WritableComparable<NewkWritable>{
long first;
long second;
public NewkWritable(){
}
public NewkWritable(long first,long second){
this.set(first, second);
}
public void set(long first,long second){
this.first=first;
this.second=second;
}
@Override
public void readFields(DataInput in) throws IOException {
this.first=in.readLong();
this.second=in.readLong();
}
@Override
public void write(DataOutput out) throws IOException {
out.writeLong(first);
out.writeLong(second);
}
@Override
public int compareTo(NewkWritable o) {
//return (int) ((this.first+this.second)-(o.first+o.second));
//按照降序和排序
return (int) ((o.first+o.second)-(this.first+this.second));
}
@Override
public String toString() {
return first+"";
}
}
public static class MyMapper extends Mapper<LongWritable, Text, NewkWritable, LongWritable>{
NewkWritable k2=new NewkWritable();
LongWritable V2=new LongWritable();
@Override
protected void map(LongWritable key,Text value,Mapper<LongWritable, Text, NewkWritable, LongWritable>.Context context)throws IOException, InterruptedException {
String line=value.toString();
String[] splited=line.split("\t");
k2.set(Long.parseLong(splited[0]),Long.parseLong(splited[1]));
V2.set(Long.parseLong(splited[1]));
//k2的toString方法,只输出了first,输出的结果正常。
context.write(k2, V2);
}
}
public static class MyReducer extends Reducer<NewkWritable, LongWritable, NewkWritable, LongWritable>{
@Override
protected void reduce(NewkWritable k2,Iterable<LongWritable> v2s,Reducer<NewkWritable, LongWritable, NewkWritable, LongWritable>.Context context)throws IOException, InterruptedException {
Iterator<LongWritable> iterator=v2s.iterator();
iterator.hasNext();
LongWritable v2=iterator.next();
context.write(k2, v2);
}
}
public static void main(String[] args) throws Exception {
//String INPUT_PATH=args[0];
//String OUTPUT_PATH=args[1];
String INPUT_PATH="/input/haha";
String OUTPUT_PATH="/sort_out2";
Configuration conf=new Configuration();
Job job=Job.getInstance(conf, SortText.class.getSimpleName());
job.setJarByClass(SortText.class);
//1.1
FileInputFormat.setInputPaths(job, new Path("hdfs://192.168.1.100:9000"+INPUT_PATH));
job.setInputFormatClass(TextInputFormat.class);
//1.2
job.setMapperClass(MyMapper.class);
job.setMapOutputKeyClass(NewkWritable.class);
job.setMapOutputValueClass(LongWritable.class);
//1.3
//1.4
//1.5
//2.2
job.setReducerClass(MyReducer.class);
job.setOutputKeyClass(NewkWritable.class);
job.setOutputValueClass(LongWritable.class);
//2.3
FileOutputFormat.setOutputPath(job, new Path("hdfs://192.168.1.100:9000"+OUTPUT_PATH));
job.setOutputFormatClass(TextOutputFormat.class);
job.waitForCompletion(true);
}
}
结果为:
[root@baolibin hadoop]# hadoop fs -text /sort_out2/part-r*
Warning: $HADOOP_HOME is deprecated.
3 2
1 3
2 1
解析:
结果如下,按照每行和排序。
自定义一个数据类型类,实现WritableComparable接口,并实现比较方法。
这样书写并不安全:
return (int) ((o.first+o.second)-(this.first+this.second));
但是本例子只是实现简单的少量数据的排序。
map方法按行进行字符串切分,找出要进行比较的内容:
k2.set(Long.parseLong(splited[0]),Long.parseLong(splited[1]));
V2.set(Long.parseLong(splited[1]));
context.write(k2, V2);
reduce方法只需要迭代进行输出即可:
Iterator<LongWritable> iterator=v2s.iterator();
iterator.hasNext();
LongWritable v2=iterator.next();
context.write(k2, v2);