package com.founder.hbase.mapreduce;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
public class TestMapReducerJob extends Configured implements Tool {
public static Configuration conf = null ;
static{
conf = HBaseConfiguration.create();
conf.set("hbase.master", "192.168.136.131");
conf.set("hbase.zookeeper.quorum", "ubuntu");
conf.set("hbase.zookeeper.property.clientPort", "2181");
conf.set("hadoop.job.ugi", "hadoop,Tardis");
}
public static class Mapper extends TableMapper<ImmutableBytesWritable, ImmutableBytesWritable>{
public Mapper(){
}
@Override
protected void map(ImmutableBytesWritable row, Result values, Context context) throws IOException{
ImmutableBytesWritable value = null;
String[] tags = null;
for (KeyValue kv : values.list()) {
if ("author".equals(Bytes.toString(kv.getFamily()))
&& "nickname".equals(Bytes.toString(kv.getQualifier()))) {
value = new ImmutableBytesWritable(kv.getValue());
}
if ("article".equals(Bytes.toString(kv.getFamily()))
&& "tags".equals(Bytes.toString(kv.getQualifier()))) {
tags = Bytes.toString(kv.getValue()).split(",");
}
}
for (int i = 0; i < tags.length; i++) {
ImmutableBytesWritable key = new ImmutableBytesWritable(Bytes.toBytes(tags[i].toLowerCase()));
try {
context.write(key, value);
} catch (InterruptedException e) {
throw new IOException(e);
}
}
}
}
public static class Reducer extends TableReducer<ImmutableBytesWritable, ImmutableBytesWritable,
ImmutableBytesWritable>{
@Override
protected void reduce(ImmutableBytesWritable key, Iterable<ImmutableBytesWritable> values,
Context context)
throws IOException, InterruptedException {
String friends = "";
for (ImmutableBytesWritable val : values) {
friends += (friends.length() > 0 ? "," : "") + Bytes.toString(val.get());
}
Put put = new Put(key.get());
put.add(Bytes.toBytes("person"), Bytes.toBytes("nicknames"),
Bytes.toBytes(friends));
context.write(key, put);
}
}
@Override
public int run(String[] arg) throws Exception {
Job job = new Job(conf, "HBase_FindFriend");// 任务名称
job.setJarByClass(TestMapReducerJob.class);// 指定class
Scan scan = new Scan();
scan.addColumn(Bytes.toBytes("author"), Bytes.toBytes("nickname"));
scan.addColumn(Bytes.toBytes("article"), Bytes.toBytes("tags"));
TableMapReduceUtil.initTableMapperJob("blog", scan,
Mapper.class, ImmutableBytesWritable.class,
ImmutableBytesWritable.class, job);
TableMapReduceUtil.initTableReducerJob("tag_friend",
Reducer.class, job);
return job.waitForCompletion(true) ? 0:1;
}
public static void main(String[] args) throws Exception{
int res = ToolRunner.run(conf, new TestMapReducerJob(),args);
System.exit(res);
}
}
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
public class TestMapReducerJob extends Configured implements Tool {
public static Configuration conf = null ;
static{
conf = HBaseConfiguration.create();
conf.set("hbase.master", "192.168.136.131");
conf.set("hbase.zookeeper.quorum", "ubuntu");
conf.set("hbase.zookeeper.property.clientPort", "2181");
conf.set("hadoop.job.ugi", "hadoop,Tardis");
}
public static class Mapper extends TableMapper<ImmutableBytesWritable, ImmutableBytesWritable>{
public Mapper(){
}
@Override
protected void map(ImmutableBytesWritable row, Result values, Context context) throws IOException{
ImmutableBytesWritable value = null;
String[] tags = null;
for (KeyValue kv : values.list()) {
if ("author".equals(Bytes.toString(kv.getFamily()))
&& "nickname".equals(Bytes.toString(kv.getQualifier()))) {
value = new ImmutableBytesWritable(kv.getValue());
}
if ("article".equals(Bytes.toString(kv.getFamily()))
&& "tags".equals(Bytes.toString(kv.getQualifier()))) {
tags = Bytes.toString(kv.getValue()).split(",");
}
}
for (int i = 0; i < tags.length; i++) {
ImmutableBytesWritable key = new ImmutableBytesWritable(Bytes.toBytes(tags[i].toLowerCase()));
try {
context.write(key, value);
} catch (InterruptedException e) {
throw new IOException(e);
}
}
}
}
public static class Reducer extends TableReducer<ImmutableBytesWritable, ImmutableBytesWritable,
ImmutableBytesWritable>{
@Override
protected void reduce(ImmutableBytesWritable key, Iterable<ImmutableBytesWritable> values,
Context context)
throws IOException, InterruptedException {
String friends = "";
for (ImmutableBytesWritable val : values) {
friends += (friends.length() > 0 ? "," : "") + Bytes.toString(val.get());
}
Put put = new Put(key.get());
put.add(Bytes.toBytes("person"), Bytes.toBytes("nicknames"),
Bytes.toBytes(friends));
context.write(key, put);
}
}
@Override
public int run(String[] arg) throws Exception {
Job job = new Job(conf, "HBase_FindFriend");// 任务名称
job.setJarByClass(TestMapReducerJob.class);// 指定class
Scan scan = new Scan();
scan.addColumn(Bytes.toBytes("author"), Bytes.toBytes("nickname"));
scan.addColumn(Bytes.toBytes("article"), Bytes.toBytes("tags"));
TableMapReduceUtil.initTableMapperJob("blog", scan,
Mapper.class, ImmutableBytesWritable.class,
ImmutableBytesWritable.class, job);
TableMapReduceUtil.initTableReducerJob("tag_friend",
Reducer.class, job);
return job.waitForCompletion(true) ? 0:1;
}
public static void main(String[] args) throws Exception{
int res = ToolRunner.run(conf, new TestMapReducerJob(),args);
System.exit(res);
}
}

本文介绍了一个使用HBase MapReduce进行数据处理的具体示例。该示例从HBase表'blog'读取数据,通过MapReduce任务将作者昵称与文章标签关联起来,并将结果写回到另一个HBase表'tag_friend'中。
2233

被折叠的 条评论
为什么被折叠?



