主要功能是读取一定时间内hhase中多个版本的数据,进行mapreduce计算,必须的jar有:![]()
编写的mapreduce类:
package org.dragon.hbase.mapreduce;
import java.io.IOException;
import java.util.Date;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Counter;
import org.apache.hadoop.mapreduce.Job;
public class TypeStatisticMapReduce {
/**
* Mapper Class
* */
public static class TypeStatisticMapper extends
TableMapper<Text, IntWritable> {
private final IntWritable ONE = new IntWritable(1);
private Text keycid = new Text();
@Override
public void map(ImmutableBytesWritable key, Result values,
Context context) throws IOException, InterruptedException {
//这里通过values.list() 获取各个版本的值
for (KeyValue kv : values.list()) {
String qualifier = new String(Bytes.toString(kv.getQualifier()));
String val = new String(Bytes.toString(kv.getValue()));
keycid.set(val+qualifier);
Counter countPrint1 = context.getCounter("Map++++keycid", keycid.toString());
countPrint1.increment(1l);
context.write(keycid, ONE);
}
}
}
/**
* Reducer Class
* */
public static class TypeStatisticReducer extends
TableReducer<Text, IntWritable, ImmutableBytesWritable> {
@Override
public void reduce(Text key, Iterable<IntWritable> values,
Context context) throws IOException, InterruptedException {
int sum = 0;
// 遍历相同key的value集合,累加每一个value的值
for (IntWritable value : values) {
sum += value.get();
}
String keyString = key.toString();
Counter countPrint1 = context.getCounter("reduce++++++keycid", keyString);
countPrint1.increment(1l);
Put put = new Put(keyString.getBytes());
put.add(Bytes.toBytes("content"), Bytes.toBytes("count"),
Bytes.toBytes(String.valueOf(sum)));
context.write(null, put);
}
}
public static void main(String[] args) throws Exception {
long maxStamp = new Date().getTime();
long minStamp = maxStamp - 1000 * 60 * 60 * 24 * 3;
Configuration config = HBaseConfiguration.create();
// 是否对Map Task启用推测执行机制
config.setBoolean("mapreduce.map.speculative", false);
// 是否对Reduce Task启用推测执行机制
config.setBoolean("mapreduce.reduce.speculative", false);
Job job = new Job(config, "TypeStatistic");
job.setJarByClass(TypeStatisticMapReduce.class); // class that
// contains
// mapper and
// reducer
Scan scan = new Scan();
scan.setCaching(500); // 1 is the default in Scan, which will be bad
// for MapReduce jobs
scan.setCacheBlocks(false); // don't set to true for MR jobs
// set other scan attrs
//scan.addColumn(Bytes.toBytes("information"), Bytes.toBytes("cid"));
//设置版本数
scan.setMaxVersions(10);
//设置时间
scan.setTimeRange(minStamp, maxStamp);
TableMapReduceUtil.initTableMapperJob("TheUserCommodityRecord", // input
// table
scan, // Scan instance to control CF and attribute selection
TypeStatisticMapper.class, // mapper class
Text.class, // mapper output key
IntWritable.class, // mapper output value
job);
TableMapReduceUtil.initTableReducerJob("TypeStatistic", // output
// table
TypeStatisticReducer.class, // reducer class
job);
job.setNumReduceTasks(1); // at least one, adjust as required
boolean b = job.waitForCompletion(true);
if (!b) {
throw new IOException("error with job!");
}
}
}
这里要提醒几句,最好将写好的程序达成jar包,用Hadoop jar 运行,如果想通过eclipse运行,会经常出现缺少各种类的错误,博主目前能想到,尝试过各种方法都不能解决,就算可以运行,也还是需要将程序打成jar包,在Hadoop-env.sh中,通过export导入,eclipse修改代码,jar也是需要修改的,更加麻烦!想要调试程序,可以通过计数器counter,对map和reduce中变量输出,进行查看。