需求1:对hbase中一张表yangme的rowkey进行计数
1)导入环境变量
export HBASE_HOME=/root/hd/hbase-1.3.0
export HADOOP_HOME=/root/hd/hadoop-2.8.5
export HADOOP_CLASSPATH=${HBASE_HOME}/bin/hbase mapredcp
2)启动hbase-mr任务
hadoop里面有个hbase-server-1.3.0.jar包,可以直接用。
/root/hd/hadoop-2.8.5/bin/yarn jar lib/hbase-server-1.3.0.jar rowcounter yangmi
需求2:本地数据导入到hbase当中
思路?hbase底层存储是hdfs,把数据先导入到hdfs
hbase对应创建一张表
利用mr把数据导入hbase
本地数据
e1.tsv
001 tony henshuai
002 Dlireba beautiful
003 Yangmi good
004 isme perfect
hbase创建一张表
bin/shell
create ‘hdfs_hbase’,‘info’
将本地数据上传到hdfs文件系统
hdfs dfs -put e1.tsv /hbaseexample1/
执行下面命令:
/root/hd/hadoop-2.8.5/bin/yarn jar lib/hbase-server-1.3.0.jar
importtsv -Dimporttsv.columns=HBASE_ROW_KEY,info:name,info:description
hdfs_hbase hdfs://bigdata121:9000/hbaseexample1/
需求3:将hbase中hdfs_hbase表进行指定列的筛选然后插入到lovemr表
1)构建mapper类,读取hdfs_hbase表中数据
2)构建Reducer类,将hdfs_hbase表中数据写入到lovemr表中
3)构建driver驱动类
4)打包放入集群中运行这个任务
新建Mapper类ReadMapper.class
package com.itstaredu.hbasemr;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.util.Bytes;
import java.io.IOException;
public class ReadMapper extends TableMapper<ImmutableBytesWritable, Put> {
@Override
protected void map(ImmutableBytesWritable key, Result value, Context context) throws IOException, InterruptedException {
//1.读取数据 拿到一个rowkey的数据
Put put = new Put(key.get());
//2.遍历column
for (Cell c:value.rawCells()){
//3.加入列族数据 当前列族是info的导入lovemr表。
if("info".equals(Bytes.toString(CellUtil.cloneFamily(c)))){
//4.拿到指定列中的数据
if("name".equals(Bytes.toString(CellUtil.cloneQualifier(c)))) {
put.add(c);
}
}
}
context.write(key,put);
}
}
新建Reducer类WriteReducer.class
package com.itstaredu.hbasemr;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.io.NullWritable;
import java.io.IOException;
public class WriteReducer extends TableReducer<ImmutableBytesWritable, Put, NullWritable> {
@Override
protected void reduce(ImmutableBytesWritable key, Iterable<Put> values, Context context) throws IOException, InterruptedException {
for (Put p:values){
//遍历数据
context.write(NullWritable.get(),p);
}
}
}
新建驱动类
package com.itstaredu.hbasemr;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
public class LoveDriver implements Tool {
private Configuration conf;
public int run(String[] strings) throws Exception {
//1.创建任务
Job job = Job.getInstance(conf);
//2.指定运行的主类
job.setJarByClass(LoveDriver.class);
//3.配置job 采用scan方式扫描该表
Scan scan = new Scan();
//4.设置mapper类
TableMapReduceUtil.initTableMapperJob("hdfs_hbase",
scan,
ReadMapper.class,
ImmutableBytesWritable.class,
Put.class,
job
);
//5.设置reduce类
TableMapReduceUtil.initTableReducerJob("lovemr",
WriteReducer.class,
job);
//设置reduceTask个数
job.setNumReduceTasks(1);
boolean rs = job.waitForCompletion(true);
return rs?0:1;
}
//设置配置
public void setConf(Configuration configuration) {
this.conf = HBaseConfiguration.create(configuration);
}
public Configuration getConf() {
return conf;
}
public static void main(String[] args) throws Exception {
int status = ToolRunner.run(new LoveDriver(),args);
System.out.println(status);
}
}
hbase中新建lovemr表
然后打包成jar包,默认会生成HbaseTest-1.0-SNAPSHOT.jar
执行命令:
/root/hd/hadoop-2.8.5/bin/yarn jar HbaseTest-1.0-SNAPSHOT.jar
com.itstaredu.hbasemr.LoveDriver
需求4:自定义Mapper从hdfs中读取数据添加到hbase表中
思路:
1)构建Mapper,来读取hdfs中数据
2)构建Reducer
3)驱动类
4)打包运行
5)测试
新建Mapper类ReadLoveFromHDFSMapper.class
package com.itstaredu.com.itstaredu.hbasemr1;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
public class ReadLoveFromHDFSMapper extends Mapper<LongWritable, Text, ImmutableBytesWritable, Put> {
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
//1.读取数据
String line = value.toString();
//2.切分数据
String[] fields = line.split("\t");
//3.封装数据
byte[] rowkey = Bytes.toBytes(fields[0]);
byte[] name = Bytes.toBytes(fields[1]);
byte[] desc = Bytes.toBytes(fields[2]);
//封装put对象
Put put = new Put(rowkey);
put.addColumn(Bytes.toBytes("info"),Bytes.toBytes("name"),name);
put.addColumn(Bytes.toBytes("info"),Bytes.toBytes("desc"),desc);
//3.输出到rowkey端
context.write(new ImmutableBytesWritable(rowkey),put);
}
}
新建Reducer类 WriteLoveReducer.class
package com.itstaredu.com.itstaredu.hbasemr1;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.io.NullWritable;
import java.io.IOException;
public class WriteLoveReducer extends TableReducer<ImmutableBytesWritable, Put, NullWritable> {
@Override
protected void reduce(ImmutableBytesWritable key, Iterable<Put> values, Context context) throws IOException, InterruptedException {
for (Put p:values){
context.write(NullWritable.get(),p);
}
}
}
新建Driver类 LoveDriver.class
package com.itstaredu.com.itstaredu.hbasemr1;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
public class LoveDriver implements Tool {
private Configuration conf = null;
public void setConf(Configuration conf) {
this.conf = HBaseConfiguration.create();
}
public Configuration getConf() {
return conf;
}
public int run(String[] strings) throws Exception {
//1.创建job
Job job = Job.getInstance(conf);
job.setJarByClass(LoveDriver.class);
//2.配置mapper
job.setMapperClass(ReadLoveFromHDFSMapper.class);
job.setMapOutputKeyClass(ImmutableBytesWritable.class);
job.setMapOutputValueClass(Put.class);
//3.配置reducer
TableMapReduceUtil.initTableReducerJob("lovehdfs",WriteLoveReducer.class,job);
//4.配置输入inputformat
FileInputFormat.addInputPath(job,new Path("/lovehbase"));
//5.输出
return job.waitForCompletion(true)?0:1;
}
public static void main(String[] args) throws Exception {
int status = ToolRunner.run(new LoveDriver(),args);
System.out.println(status);
}
}
然后打包工程 生成jar包
在hdfs上新建文件夹存放要读取的数据
hdfs dfs -mkdir /lovehbase
上传数据到该文件夹
hdfs dfs -put love.tsv /lovehbase/love.tsv
最后执行命令:
/root/hd/hadoop-2.8.5/bin/yarn jar /root/HbaseTest-1.0-SNAPSHOT.jar com.itstaredu.com.itstaredu.hbasemr1.LoveDriver

12万+

被折叠的 条评论
为什么被折叠?



