Hadoop虽然 已经实现了一些非常有用的Writable,如Text 、IntWritable、NullWritable等,但有时候需要构造一些更加复杂的结果存入context中,使用这些方法可能就不是那么方便。
自定义对象分为Key和Value二种情况
如果需要自定义value对象,则实现Writable接口,实现的接口为是:public class FlowBean implements Writable<FlowBean>
如果需要将自定义key对象,则实现WritableComparable接口,因为mapreduce框架的shuffle过程会根据key对数据进行排序,此时,自定义对象实现的接口应该是:public class FlowBean implements WritableComparable<FlowBean>
WritableComparable源码public interface WritableComparable<T> extends Writable, Comparable<T> {}
如,我需要将一个解析一个维度的list将每个维度后面拼上','并且在前面加上维度的名称。实现writeable并不难,主要是一定要实现toString()、compareTo()、write()、readFields()这些方法,并且反序列化时,从流中读取到的各个字段的顺序应该与序列化时写出去的顺序保持一致。
import org.apache.hadoop.io.WritableComparable;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.List;
import java.util.Map;
public class AuroraWritable implements WritableComparable<AuroraWritable>{
private String dimValue;
public AuroraWritable() {
}
public void set(String dim,List<String> dimName,Map<String,String> values) {
this.dimValue = parseDimension(dim,dimName,values);
}
public String getDimValue() {
return dimValue;
}
public void setDimValue(String dimValue) {
this.dimValue = dimValue;
}
public String parseDimension(String dim,List<String> dimName, Map<String,String> values) {
int k = 0;
String value = null;
for(String name : dimName) {
if( k == 0)
value = dim + "," + values.get(name);
else
value = value + "," + values.get(name);
k++;
}
return value;
}
@Override
public String toString() {
return dimValue;
}
@Override
public int compareTo(AuroraWritable dimValue) {
return this.dimValue.compareTo(String.valueOf(dimValue));
}
@Override
public void write(DataOutput dataOutput) throws IOException {
dataOutput.writeUTF(dimValue);
}
@Override
public void readFields(DataInput dataInput) throws IOException {
this.dimValue = dataInput.readUTF();
}
}