import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.HFileOutputFormat2;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
public class BulkLoadDriver extends Configured implements Tool {
public static void main(String[] args) {
try {
int response = ToolRunner.run(HBaseConfiguration.create(), new BulkLoadDriver(), args);
if(response == 0) {
System.out.println("Job is successfully completed...");
} else {
System.out.println("Job failed...");
}
} catch(Exception exception) {
exception.printStackTrace();
}
}
class BulkLoadMapper extends Mapper<LongWritable, Text, ImmutableBytesWritable, KeyValue> {
public void map(LongWritable key, Text value, Context context){
try {
String[] values = value.toString().split("\t");
byte[] row = Bytes.toBytes(values[0]);
byte[] hbase_val = Bytes.toBytes(values[1]);
ImmutableBytesWritable rowKey = new ImmutableBytesWritable(row);
KeyValue kv = new KeyValue(row, "C1".getBytes(), "c".getBytes(), hbase_val);
context.write(rowKey, kv);
} catch(Exception exception) {
exception.printStackTrace();
}
}
}
public int run(String[] args) throws Exception {
String inputPath = args[0];
String outputPath = args[1];
String tablename = args[3];
/**
* 设置作业参数
*/
Configuration configuration = HBaseConfiguration.create();
configuration.set("hbase.zookeeper.quorum", "");
configuration.set("hbase.zookeeper.property.clientPort", "2181");
Job job = Job.getInstance(configuration, "HFile_for_HBase_Table:" + tablename);
job.setJarByClass(BulkLoadDriver.class);
job.setInputFormatClass(TextInputFormat.class);
job.setMapOutputKeyClass(ImmutableBytesWritable.class);//指定输出键类
job.setMapOutputValueClass(KeyValue.class);//指定输出值类
job.setMapperClass(BulkLoadMapper.class);//指定Map函数
FileInputFormat.addInputPaths(job, inputPath);//输入路径
FileSystem fs = FileSystem.get(configuration);
Path output = new Path(outputPath);
if (fs.exists(output)) {
fs.delete(output, true);//如果输出路径存在,就将其删除
}
FileOutputFormat.setOutputPath(job, output);//输出路径
HTable table=new HTable(configuration,tablename);
HFileOutputFormat2.configureIncrementalLoad(job, table);
boolean res=job.waitForCompletion(true);
return res?0:1;
}
}
HBase-mapreduce生成hfile
最新推荐文章于 2022-04-30 23:47:21 发布