package com.zz.hbase.ccrc
import java.io.IOException
import java.util.ArrayList
import java.util.HashMap
import java.util.List
import java.util.Map
import java.util.Map.Entry
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.FileSystem
import org.apache.hadoop.fs.Path
import org.apache.hadoop.hbase.HBaseConfiguration
import org.apache.hadoop.hbase.client.Result
import org.apache.hadoop.hbase.client.Scan
import org.apache.hadoop.hbase.io.ImmutableBytesWritable
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil
import org.apache.hadoop.hbase.mapreduce.TableMapper
import org.apache.hadoop.hbase.util.Bytes
import org.apache.hadoop.io.NullWritable
import org.apache.hadoop.io.Text
import org.apache.hadoop.mapreduce.Job
import org.apache.hadoop.mapreduce.Reducer
import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat
import com.zz.hive.Constants
import com.zz.util.DateUtil
public class HBaseToHiveJob {
public static class HBaseToHiveMapper extends TableMapper<Text, Text> {
@Override
protected void map(ImmutableBytesWritable key, Result value, Context context) throws IOException, InterruptedException {
StringBuilder sb = new StringBuilder("")
Map<String, String> m = new HashMap<String, String>()
List<String> temp = new ArrayList<String>()
List<String> a = new ArrayList<String>()
String sp = "-"
String tableName = ""
String str = ""
String keyStr = ""
for (Entry<byte[], byte[]> entry : value.getFamilyMap("aaaa".getBytes()).entrySet()) {
str = new String(entry.getValue())
if (str != null) {
keyStr = new String(entry.getKey())
if ("event".equals(keyStr)) {
tableName = str
} else {
m.put(keyStr, str)
}
}
}
sb = sort(tableName, m, sb, temp, sp, a)
context.write(new Text(tableName), new Text(new String(sb)))
}
private StringBuilder sort(String tableName, Map<String, String> map, StringBuilder sb, List<String> t, String sp, List<String> a) {
if("Summary".equals(tableName)) {
a = com.zz.option.Constants.SUMMARY_COLUMN
return handler(a, t, sb, sp, map)
}
return sb
}
private StringBuilder handler(List<String> a, List<String> t, StringBuilder sb, String sp, Map<String, String> m) {
String mapValue
for (String val : a) {
mapValue = m.get(val)
sb.append(mapValue != null ? mapValue : sp)
sb.append(Constants.BLANK_CHAR)
}
return sb.deleteCharAt(sb.length() - 1)
}
}
public static class HBaseToHiveReducer extends Reducer<Text, Text, NullWritable, Text> {
private MultipleOutputs<NullWritable, Text> out
@Override
protected void setup(Context context) {
out = new MultipleOutputs<NullWritable, Text>(context)
}
@Override
protected void reduce(Text key, Iterable<Text> values, Context context)throws IOException, InterruptedException {
String tableName = key.toString()
String path = tableName + "/" + tableName
for (Text val : values) {
out.write(NullWritable.get(), new Text(val), path)
}
}
@Override
protected void cleanup(Context context) throws IOException, InterruptedException {
out.close()
}
}
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
Configuration conf = HBaseConfiguration.create()
Job job = Job.getInstance(conf, "Fetch HBase2Hive" + DateUtil.getNowTime())
job.setJarByClass(HBaseToHiveJob.class)
Scan scan = new Scan()
scan.setCaching(500)
scan.setCacheBlocks(false)
scan.addFamily(Bytes.toBytes("aaaa"))
Path output = new Path(args[0])
final FileSystem fileSystem = output.getFileSystem(conf)
fileSystem.delete(output, true)
TableMapReduceUtil.initTableMapperJob("Fetch", scan, HBaseToHiveMapper.class, Text.class, Text.class, job, false)
job.setReducerClass(HBaseToHiveReducer.class)
job.setOutputFormatClass(TextOutputFormat.class)
TextOutputFormat.setOutputPath(job, output)
job.setMapOutputKeyClass(Text.class)
job.setMapOutputValueClass(Text.class)
job.setOutputKeyClass(NullWritable.class)
job.setOutputValueClass(Text.class)
System.exit(job.waitForCompletion(true) ? 0 : 1)
}
}