目录
1.版本
HBase:1.3.1
Hadoop:3.1.3
2.官方案例
1.查看 HBase 的 MapReduce 任务的执行需要的jar包
2. 环境变量导入
永久生效的方式:
在 hadoop-env.sh 中配置,直接在最后一行加入
export HADOOP_CLASSPATH=$HADOOP_CLASSPATH:/opt/module/hbase/lib/*
3.运行官方的 MapReduce 任务
(1)案例一:统计 stu 表中有多少行数据(读数据)
在/opt/module/hbase目录下执行
/opt/module/hadoop-3.1.3/bin/yarn jar lib/hbase-server-1.3.1.jar rowcounter student
(2)案例二:使用 MapReduce 将本地数据导入到 HBase (写数据)
1)在本地创建一个 tsv 格式的文件:fruit.tsv
1001 | Apple Red |
1002 | Pear Yellow |
1003 | Pineapple Yellow |
2)创建Hbase表
Hbase(main):001:0> create 'fruit','info'
3)执行 MapReduce 到HBase 的 fruit 表中
/opt/module/hadoop-3.1.3/bin/yarn jar lib/hbase-server-1.3.1.jar importtsv \
-Dimporttsv.columns=HBASE_ROW_KEY,info:name,info:color fruit \
hdfs://hadoop102:8020/fruit.tsv//每行后面的 \ 表示换行,结构清晰,也可以把所有写出一行,就去掉 \
//最后一行是输入
4)使用 scan 命令查看导入后的结果
3.自定义案例1
目标:实现将HDFS 中的数据写入到 Hbase 表中。
1.构建 fruitMapper 类
package com.atguigu.mr1;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
public class fruitMapper extends Mapper<LongWritable, Text,LongWritable, Text> {
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
context.write(key,value);
}
}
2.构建 fruitReducer 类
package com.atguigu.mr1;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import java.io.IOException;
public class fruitReducer extends TableReducer<LongWritable, Text, NullWritable> {
// String cf1 = null;
//
// @Override
// protected void setup(Context context) throws IOException, InterruptedException {
// Configuration configuration = context.getConfiguration();
//
// cf1 = configuration.get("cf1");
// }
@Override
protected void reduce(LongWritable key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
//1.遍历values:1001 Apple Red
for (Text value : values) {
//2.获取每一行数据
String[] fields = value.toString().split("\t");
//3.构建Put对象
Put put = new Put(Bytes.toBytes(fields[0]));
//4.给put对象赋值
put.addColumn(Bytes.toBytes("info"),Bytes.toBytes("name"),Bytes.toBytes(fields[1]));
put.addColumn(Bytes.toBytes("info"),Bytes.toBytes("color"),Bytes.toBytes(fields[2]));
//5.写出
context.write(NullWritable.get(),put);
}
}
}
3.构建 fruitDriver类
package com.atguigu.mr1;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import java.io.IOException;
public class fruitDriver implements Tool {
//定义一个configuration
private Configuration configuration = null;
@Override
public int run(String[] strings) throws Exception {
//1.获取Job对象
Job job = Job.getInstance(configuration);
//2.设置驱动类路径
job.setJarByClass(fruitDriver.class);
//3.设置Mapper & Mapper输出的KV类型
job.setMapperClass(fruitMapper.class);
job.setMapOutputKeyClass(LongWritable.class);
job.setMapOutputValueClass(Text.class);
//4.设置Reducer类
//strings[1]:表名
//strings[0]:数据
TableMapReduceUtil.initTableReducerJob(strings[1],
fruitReducer.class,
job);
//5.设置输入输出参数
FileInputFormat.setInputPaths(job,new Path(strings[0]));
//6.提交任务
boolean result = job.waitForCompletion(true);
return result?0:1;
}
@Override
public void setConf(Configuration conf) {
configuration = conf;
}
@Override
public Configuration getConf() {
return configuration;
}
public static void main(String[] args) {
try {
Configuration configuration = new Configuration();
int run = ToolRunner.run(configuration,new fruitDriver(),args);
System.exit(run);
} catch (Exception e) {
e.printStackTrace();
}
}
}
4.打包
5.运行任务
创建完表fruit1后,运行以下命令,第一个参数 /fruit.tsv 为输入数据,第二个参数 fruit1 为表名
[atguigu@Hadoop102 hbase]$ yarn jar hbase-demo-1.0-SNAPSHOT.jar com.atguigu.mr1.fruitDriver /fruit.tsv fruit1
6.使用 scan 命令查看导入后的结果
4.自定义案例2
目标:从Hbase读数据,再写到Hbase。将 fruit1表中的数据,通过 MR 迁入到 fruit2表中。
1.构建 Fruit2Mapper 类
package com.atguigu.mr2;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.util.Bytes;
import java.io.IOException;
public class Fruit2Mapper extends TableMapper<ImmutableBytesWritable, Put> {
@Override
protected void map(ImmutableBytesWritable key, Result value, Context context) throws IOException, InterruptedException {
//构建Put对象
Put put = new Put(key.get());
//1.获取数据
for (Cell cell : value.rawCells()) {
//2.判断当前的cell是否为"name"列
if("name".equals(Bytes.toString(CellUtil.cloneQualifier(cell)))){
//3.给Put对象赋值,也可以用put.addColumn()
put.add(cell);
}
}
//4.写出
context.write(key,put);
}
}
2.构建 Fruit2Reducer 类
package com.atguigu.mr2;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.io.NullWritable;
import java.io.IOException;
public class Fruit2Reducer extends TableReducer<ImmutableBytesWritable, Put, NullWritable> {
@Override
protected void reduce(ImmutableBytesWritable key, Iterable<Put> values, Context context) throws IOException, InterruptedException {
//遍历写出
for (Put put : values) {
context.write(NullWritable.get(),put);
}
}
}
3.构建 Fruit2Driver类
package com.atguigu.mr2;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
public class Fruit2Driver implements Tool {
private Configuration configuration = null;
@Override
public int run(String[] strings) throws Exception {
Job job = Job.getInstance(configuration);
job.setJarByClass(Fruit2Driver.class);
TableMapReduceUtil.initTableMapperJob("fruit",
new Scan(),
Fruit2Mapper.class,
ImmutableBytesWritable.class,
Put.class,
job);
TableMapReduceUtil.initTableReducerJob("fruit2",
Fruit2Reducer.class,
job);
boolean result = job.waitForCompletion(true);
return result?0:1;
}
@Override
public void setConf(Configuration conf) {
configuration = conf;
}
@Override
public Configuration getConf() {
return configuration;
}
public static void main(String[] args) {
try {
// Configuration configuration = new Configuration();
Configuration configuration = HBaseConfiguration.create();
ToolRunner.run(configuration,new Fruit2Driver(),args);
} catch (Exception e) {
e.printStackTrace();
}
}
}
4.写配置文件
在resources下新建一个文件,将hadoop102上 /opt/module/hbase/conf 位置的hbase-site.xml 复制过来,名字必须一样
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
/**
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-->
<configuration>
<property>
<name>hbase.rootdir</name>
<value>hdfs://hadoop102:8020/HBase</value>
</property>
<property>
<name>hbase.cluster.distributed</name>
<value>true</value>
</property>
<!-- 0.98 后的新变动,之前版本没有.port,默认端口为 60000 -->
<property>
<name>hbase.master.port</name>
<value>16000</value>
</property>
<property>
<name>hbase.zookeeper.quorum</name>
<value>hadoop102,hadoop103,hadoop104</value>
</property>
<property>
<name>hbase.zookeeper.property.dataDir</name>
<value>/opt/module/zookeeper-3.5.7/zkData</value>
</property>
</configuration>
5.在本地直接运行
6.使用 scan 命令查看导入后的结果