hbase相关需求

最新推荐文章于 2021-12-13 20:30:00 发布

原创最新推荐文章于 2021-12-13 20:30:00 发布 · 349 阅读

0 ·

CC 4.0 BY-SA版权

大数据同时被 2 个专栏收录

36 篇文章

订阅专栏

大数据开发入门

21 篇文章

订阅专栏

需求1：对hbase中一张表yangme的rowkey进行计数

1）导入环境变量
export HBASE_HOME=/root/hd/hbase-1.3.0
export HADOOP_HOME=/root/hd/hadoop-2.8.5
export HADOOP_CLASSPATH=${HBASE_HOME}/bin/hbase mapredcp

2）启动hbase-mr任务
hadoop里面有个hbase-server-1.3.0.jar包，可以直接用。
/root/hd/hadoop-2.8.5/bin/yarn jar lib/hbase-server-1.3.0.jar rowcounter yangmi

需求2：本地数据导入到hbase当中

思路？hbase底层存储是hdfs，把数据先导入到hdfs
hbase对应创建一张表
利用mr把数据导入hbase

本地数据
e1.tsv

001	tony	henshuai
002	Dlireba	beautiful
003	Yangmi	good
004	isme	perfect

hbase创建一张表
bin/shell
create ‘hdfs_hbase’,‘info’

将本地数据上传到hdfs文件系统
hdfs dfs -put e1.tsv /hbaseexample1/

执行下面命令：

/root/hd/hadoop-2.8.5/bin/yarn jar lib/hbase-server-1.3.0.jar 
 importtsv -Dimporttsv.columns=HBASE_ROW_KEY,info:name,info:description 
hdfs_hbase hdfs://bigdata121:9000/hbaseexample1/

需求3：将hbase中hdfs_hbase表进行指定列的筛选然后插入到lovemr表

1)构建mapper类，读取hdfs_hbase表中数据
2）构建Reducer类，将hdfs_hbase表中数据写入到lovemr表中
3）构建driver驱动类
4）打包放入集群中运行这个任务

新建Mapper类ReadMapper.class

package com.itstaredu.hbasemr;

import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.util.Bytes;
import java.io.IOException;

public class ReadMapper extends TableMapper<ImmutableBytesWritable, Put> {

    @Override
    protected void map(ImmutableBytesWritable key, Result value, Context context) throws IOException, InterruptedException {
        //1.读取数据 拿到一个rowkey的数据
        Put put = new Put(key.get());
        //2.遍历column
        for (Cell c:value.rawCells()){
            //3.加入列族数据 当前列族是info的导入lovemr表。
            if("info".equals(Bytes.toString(CellUtil.cloneFamily(c)))){
                //4.拿到指定列中的数据
                if("name".equals(Bytes.toString(CellUtil.cloneQualifier(c)))) {
                    put.add(c);
                }
            }
        }
        context.write(key,put);
    }
}

新建Reducer类WriteReducer.class

package com.itstaredu.hbasemr;

import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.io.NullWritable;

import java.io.IOException;

public class WriteReducer extends TableReducer<ImmutableBytesWritable, Put, NullWritable> {

    @Override
    protected void reduce(ImmutableBytesWritable key, Iterable<Put> values, Context context) throws IOException, InterruptedException {
        for (Put p:values){
            //遍历数据
            context.write(NullWritable.get(),p);
        }
    }
}

新建驱动类

package com.itstaredu.hbasemr;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

public class LoveDriver implements Tool {
    private Configuration conf;
    public int run(String[] strings) throws Exception {
        //1.创建任务
        Job job = Job.getInstance(conf);

        //2.指定运行的主类
        job.setJarByClass(LoveDriver.class);
        //3.配置job 采用scan方式扫描该表
        Scan scan = new Scan();

        //4.设置mapper类
        TableMapReduceUtil.initTableMapperJob("hdfs_hbase",
                scan,
                ReadMapper.class,
                ImmutableBytesWritable.class,
                Put.class,
                job
                );

        //5.设置reduce类
        TableMapReduceUtil.initTableReducerJob("lovemr",
                WriteReducer.class,
                job);
        //设置reduceTask个数
        job.setNumReduceTasks(1);
        boolean rs = job.waitForCompletion(true);
        return rs?0:1;
    }

    //设置配置
    public void setConf(Configuration configuration) {
        this.conf = HBaseConfiguration.create(configuration);
    }

    public Configuration getConf() {
        return conf;
    }

    public static void main(String[] args) throws Exception {
       int status =  ToolRunner.run(new LoveDriver(),args);
       System.out.println(status);
    }
}

hbase中新建lovemr表
然后打包成jar包，默认会生成HbaseTest-1.0-SNAPSHOT.jar
执行命令：

/root/hd/hadoop-2.8.5/bin/yarn jar HbaseTest-1.0-SNAPSHOT.jar 
com.itstaredu.hbasemr.LoveDriver

需求4：自定义Mapper从hdfs中读取数据添加到hbase表中

思路：
1）构建Mapper,来读取hdfs中数据
2）构建Reducer
3）驱动类
4）打包运行
5）测试

新建Mapper类ReadLoveFromHDFSMapper.class

package com.itstaredu.com.itstaredu.hbasemr1;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;

public class ReadLoveFromHDFSMapper extends Mapper<LongWritable, Text, ImmutableBytesWritable, Put> {
    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {

        //1.读取数据
        String line = value.toString();

        //2.切分数据
        String[] fields = line.split("\t");

        //3.封装数据
        byte[] rowkey = Bytes.toBytes(fields[0]);
        byte[] name = Bytes.toBytes(fields[1]);
        byte[] desc = Bytes.toBytes(fields[2]);

        //封装put对象
        Put put = new Put(rowkey);
        put.addColumn(Bytes.toBytes("info"),Bytes.toBytes("name"),name);
        put.addColumn(Bytes.toBytes("info"),Bytes.toBytes("desc"),desc);

        //3.输出到rowkey端
        context.write(new ImmutableBytesWritable(rowkey),put);
    }
}

新建Reducer类 WriteLoveReducer.class

package com.itstaredu.com.itstaredu.hbasemr1;

import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.io.NullWritable;
import java.io.IOException;

public class WriteLoveReducer extends TableReducer<ImmutableBytesWritable, Put, NullWritable> {
    @Override
    protected void reduce(ImmutableBytesWritable key, Iterable<Put> values, Context context) throws IOException, InterruptedException {
        for (Put p:values){
            context.write(NullWritable.get(),p);
        }
    }
}

新建Driver类 LoveDriver.class

package com.itstaredu.com.itstaredu.hbasemr1;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;


public class LoveDriver implements Tool {

    private Configuration conf = null;

    public void setConf(Configuration conf) {
        this.conf = HBaseConfiguration.create();
    }

    public Configuration getConf() {
        return conf;
    }

    public int run(String[] strings) throws Exception {

        //1.创建job
        Job job = Job.getInstance(conf);
        job.setJarByClass(LoveDriver.class);

        //2.配置mapper
        job.setMapperClass(ReadLoveFromHDFSMapper.class);
        job.setMapOutputKeyClass(ImmutableBytesWritable.class);
        job.setMapOutputValueClass(Put.class);

        //3.配置reducer
        TableMapReduceUtil.initTableReducerJob("lovehdfs",WriteLoveReducer.class,job);

        //4.配置输入inputformat
        FileInputFormat.addInputPath(job,new Path("/lovehbase"));

        //5.输出
        return job.waitForCompletion(true)?0:1;
    }

    public static void main(String[] args) throws Exception {
        int status = ToolRunner.run(new LoveDriver(),args);
        System.out.println(status);
    }

}

然后打包工程生成jar包

在hdfs上新建文件夹存放要读取的数据
hdfs dfs -mkdir /lovehbase
上传数据到该文件夹
hdfs dfs -put love.tsv /lovehbase/love.tsv

最后执行命令：

/root/hd/hadoop-2.8.5/bin/yarn jar /root/HbaseTest-1.0-SNAPSHOT.jar com.itstaredu.com.itstaredu.hbasemr1.LoveDriver