简单小程序演示如何在Hbase 中编写mr程序:
hbase 中已经存在的数据:
hbase(main):009:0> scan 'test'
ROW COLUMN+CELL
row1 column=li:a, timestamp=1386254343222, value=liang
row1 column=li:b, timestamp=1386253305942, value=\x00\x00\x00\x03
row1 column=li:c, timestamp=1386256955011, value=liang
row2 column=li:a, timestamp=1386255985261, value=liang
row3 column=li:a, timestamp=1386256003938, value=lei
row4 column=li:a, timestamp=1386256057937, value=lei
row5 column=li:a, timestamp=1386256064945, value=lei
row6 column=li:a, timestamp=1386256226767, value=lei
row7 column=li:a, timestamp=1386256230868, value=lei
row8 column=li:a, timestamp=1386256234817, value=lei
测试代码如下:
package com.hbase.create;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.ByteWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
/**
* this file ,containing the map and reduce ,which is prepared for the crawler program
* for realizing the goal of storing image files in the hdfs.
* @author lloppo
*
*/
public class MapReduce_Hbase_DownLoad extends Configured implements Tool{
public static void main(String[] args) throws Exception {
ToolRunner.run(new MapReduce_Hbase_DownLoad(), args);
}
static class MyMapper extends TableMapper<Text, LongWritable>{
public void map(ImmutableBytesWritable row, Result value, Context context)
throws InterruptedException, IOException {
// for(KeyValue kv:value.raw()){
// System.out.print(new String(kv.getRow()));
// System.out.println(new String(kv.getValue()));
// }
for (int i = 0; i < value.size(); i++) {
String val = new String(value.getValue(Bytes.toBytes("li"), Bytes.toBytes("a")));
System.out.println("value :"+val);
}
}
}
public int run(String[] args) throws Exception {
Configuration config = HBaseConfiguration.create();
try {
Job job = new Job(config,"ExampleReadWrite");
job.setJarByClass(MapReduce_Hbase_DownLoad.class); // class that contains mapper
Scan scan = new Scan();
scan.setCaching(500); // 1 is the default in Scan, which will be bad for MapReduce jobs
scan.setCacheBlocks(false);
TableMapReduceUtil.initTableMapperJob(
"test", // input HBase table name
scan, // Scan instance to control CF and attribute selection
MyMapper.class, // mapper
null, // mapper output key
null, // mapper output value
job);
job.setOutputFormatClass(NullOutputFormat.class); // because we aren't emitting anything from mapper
boolean b = job.waitForCompletion(true);
if (!b) {
throw new IOException("error with job!");
}
} catch (Exception e) {
e.printStackTrace();
}
return 0;
}
}