源码:一个涉及MR、Bytes、Hbase的简单程序
---------------------------------------------------------------------------------------------------------
package com.inspur.hadoop;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
public class CharCount {
private static Log log=LogFactory.getLog(CharCount.class);
static class CharMap extends TableMapper<Text, IntWritable>{
private static final IntWritable one = new IntWritable(1);
private Text word = new Text();
public void map(ImmutableBytesWritable row, Result value, Context context){
try{
String content=new String(value.getValue(Bytes.toBytes("doccontent"), Bytes.toBytes("dealedcontent")));
String title=new String(value.getValue(Bytes.toBytes("docattr"), Bytes.toBytes("dealedtitle")));
content+=title;
for(int i=0;i<content.length();i++){
word.set(String.valueOf(content.charAt(i)));
context.write(word, one);
context.progress();
}
}catch(Exception e){
log.error("----------wrong--------------");
}
}
}
static class CharReduce extends TableReducer<Text,IntWritable, ImmutableBytesWritable>{
public void reduce(Text key, Iterable<IntWritable> values, Context context){
try{
int sum=0;
for(IntWritable i:values){
sum=sum+i.get();
}
Put put = new Put(Bytes.toBytes(key.toString()));
put.add(Bytes.toBytes("charattr"), Bytes.toBytes("count"), Bytes.toBytes(String.valueOf(sum)));
context.write(null, put);
}catch(Exception e){
e.printStackTrace();
}
}
}
public static void main(String[] args) throws Exception {
Job job = Job.getInstance(HBaseConfiguration.create());
job.setJarByClass(CharCount.class);
Scan scan = new Scan();
scan.setCaching(500);
scan.setCacheBlocks(false);
TableMapReduceUtil.initTableMapperJob("documents_crawl", scan, CharMap.class, Text.class, IntWritable.class, job);
TableMapReduceUtil.initTableReducerJob("globaldocchar_test", CharReduce.class, job);
job.setNumReduceTasks(20);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}