Hadoop: Output data to mutiple dir

import java.io.IOException;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs;

public class ImportDataFromMongoReducer extends Reducer<Text, Text, Text, Text> {

	private static final Log LOG = LogFactory
			.getLog(ImportDataFromMongoReducer.class);
	private MultipleOutputs out;

	public void setup(Context context) {
		out = new MultipleOutputs(context);
	}

	private String generateFileName(Text k) {

		return k.toString() + "/part";
	}

	@Override
	public void reduce(final Text pKey, final Iterable<Text> pValues,
			final Context pContext) throws IOException, InterruptedException {

		for (final Text value : pValues) {
			// pContext.write(pKey, value);

			out.write(NullWritable.get(), value, generateFileName(pKey));
		}

	}

	protected void cleanup(Context context) throws IOException,
			InterruptedException {
		out.close();
	}
}

 

 

 

 

 

 

 

 

 

 

 

References

http://hadoop.apache.org/docs/current/api/org/apache/hadoop/mapreduce/lib/output/MultipleOutputs.html

http://www.infoq.com/articles/HadoopOutputFormat

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值