构建Maven工程:
Pom文件添加依赖:
pom.xml
<dependencies>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-core</artifactId>
<version>2.6.5</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>2.6.5</version>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-server</artifactId>
<version>1.3.2</version>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-client</artifactId>
<version>1.3.2</version>
</dependency>
修改完成后右下角event log显示自动加载依赖库,点击手动或自动,第一次加载需要10分钟左右,耐心等待
判断文件名是否存在程序如下:
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.util.Bytes;
import java.io.IOException;
public class HbaseTest {
public static Configuration conf;
public static Connection conn;
public static Admin admin;
static {
System.out.println("------2------");
conf = HBaseConfiguration.create();
System.out.println("==>"+conf);
}
public static boolean isexist(String tbname) throws IOException {
boolean isexist = false;
System.out.println("------3------");
conn = ConnectionFactory.createConnection(conf);
//管理表
admin = conn.getAdmin();
System.out.println("conn==>"+admin);
if(admin.tableExists(TableName.valueOf(tbname)))
{
isexist = true;
}
conn.close();
return isexist;
}
public static void createTables(String tbname,String... colunmFamily){
try {
conn = ConnectionFactory.createConnection(conf);
admin = conn.getAdmin();
} catch (IOException e) {
e.printStackTrace();
}
//创建描述器
HTableDescriptor htd = new HTableDescriptor(TableName.valueOf(tbname));
//创建列族
for(String str:colunmFamily){
htd.addFamily(new HColumnDescriptor(str));
}
//创建表
try {
admin.createTable(htd);
conn.close();
} catch (IOException e) {
e.printStackTrace();
}
}
public static void addRow(String tbname,String rowKey,String cf,
String column,String value) throws Exception{
Table tb = null;
try {
conn = ConnectionFactory.createConnection(conf);
tb = conn.getTable(TableName.valueOf(tbname));
} catch (IOException e) {
e.printStackTrace();
}
Put p = new Put(Bytes.toBytes(rowKey));
p.addColumn(Bytes.toBytes(cf),
Bytes.toBytes(column),
Bytes.toBytes(value));
tb.put(p);
conn.close();
}
public static void main(String[] args) throws Exception{
System.out.println("-----1-------");
try {
System.out.println(isexist("Hello"));
} catch (IOException e) {
e.printStackTrace();
}
//createTables("scjy_address","info");
addRow("scjy_address","100002","info","address","shanghai");
addRow("scjy_address","100003","info","name","Amy");
addRow("scjy_address","100004","info","age","3");
System.out.println("插入成功...");
}
}
添加配置文件到工程中
hdfs-site.xml hbase-site.xml core-site.xml到resources
采用hbase执行MR程序
加载依赖库:export HADOOP_CALSSPATH=`${HBASE_HOME}/bin/hbase mapredcp`
yarn jar /opt/hbase-1.3.1/lib/hbase-server-1.3.1.jar rowcounter scjy_addres
结果显示:
将指定文件插入到hbase中
命令:
yarn jar /opt/hbase-1.3.1/lib/hbase-server-1.3.1.jar importtsv -Dimporttsv.columns=HBASE_ROW_KEY,info:id,info:name hbaseDemo hdfs://Master:9000/hbasetest
结果显示:
预分区
create ‘user_demo','info','partition',SPLITS=>['110','111','112','113]
自动生成预分区
create 'user_demo2','info','partition',{NUMREGIONS=>10,SPLITALGO=>'HexStringSplit'}
设置文件预分区
create 'user_demo3','info','partition',SPLITS_FILE=>'split.txt' #文件要放在hbase目录下
内存优化
1. hadoop-env.sh => export HADOOP_PORTMAP_OPTS="-Xmx512m $HADOOP_PORTMAP_OPTS"
2. 最大文件数
hdfs -site.xml => dfs.datanode.max.transfer.threads 默认为4096 设置大于4096
3. 优化延迟高的数据操作等待时间
dfs.image.transfer.timeout 默认为60000毫秒
4. 数据写入效率 压缩
mapreducer.map.output.compress==>org.apache.hadoop.io.compress.GzipCodec
5.优化Hstore的大小
hbase.hregion.max.filessize默认为10G,调小好
问题:
org.apache.hadoop.hbase.client.RetriesExhaustedException: Failed after attempts=36, exceptions:
Tue Jul 19 16:36:05 CST 2016, null, java.net.SocketTimeoutException: callTimeout=60000, callDuration=79721: row 'testtable,,' on table 'hbase:meta' at region=hbase:meta,,1.1588230740, hostname=ubuntu,16020,1468916750524, seqNum=0
解决方案:
其中在windows10 下修改 C:\Windows\System32\drivers\etc\hosts,添加集群映射
192.168.8.xxx Master
192.168.8.xxx Slave1
192.168.8.xxx Slave2