编写 mapreduce 程序从 hbase 读取数据,然后存储到 hdfs
/**
- 描述:
*/
public class HBaseDataToHDFSMR {
public static final String ZK_CONNECT = “hadoop02:2181,hadoop03:2181,hadoop04:2181”;
public static final String ZK_CONNECT_KEY = “hbase.zookeeper.quorum”;
public static final String HDFS_CONNECT = “hdfs://myha01/”;
public static final String HDFS_CONNECT_KEY = “fs.defaultFS”;
public static void main(String[] args) throws Exception {
Configuration conf = HBaseConfiguration.create();
conf.set(ZK_CONNECT_KEY, ZK_CONNECT);
conf.set(HDFS_CONNECT_KEY, HDFS_CONNECT);
System.setProperty(“HADOOP_USER_NAME”, “hadoop”);
Job job = Job.getInstance(conf);
// 输入数据来源于 hbase 的 user_info 表
Scan scan = new Scan();
TableMapReduceUtil.initTableMapperJob(“user_info”, scan,
Stay hungry Stay foolish --马中华-- http://blog.youkuaiyun.com/zhongqi2513
HBaseDataToHDFSMRMapper.class, Text.class, NullWritable.class, job);
// RecordReader — TableRecordReader
// InputFormat ----- TextInputFormat
// 数据输出到 hdfs
FileOutputFormat.setOutputPath(job, new Path("/hbase2hdfs/output2"));
boolean waitForCompletion = job.waitForCompletion(true);
System.exit(waitForCompletion ? 0 : 1);
}
static class HBaseDataToHDFSMRMapper extends TableMapper<Text, NullWritable> {
/**
* keyType: LongWritable -- ImmutableBytesWritable:rowkey
* ValueType: Text -- Result:hbase 表中某一个 rowkey 查询出来的所有的 key-value 对
*/
@Override
protected void map(ImmutableBytesWritable key, Result value, Context context)
throws IOException, InterruptedException {
// byte[] rowkey = Bytes.copy(key, 0, key.getLength());
String rowkey = Bytes.toString(key.copyBytes());
List<Cell> listCells = value.listCells();
Text text = new Text();
// 最后输出格式是: rowkey, base_info:name-huangbo, base-info:age-34
for (Cell cell : listCells) {
String family = new String(CellUtil.cloneFamily(cell));
String qualifier = new String(CellUtil.cloneQualifier(cell));
String v = new String(CellUtil.cloneValue(cell));
long ts = cell.getTimestamp();
text.set(rowkey + "\t" + family + "\t" + qualifier + "\t" + v + "\t" + ts);
context.write(text, NullWritable.get());
}
}
}
}