import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.client.metrics.ScanMetrics;
import java.io.IOException;
/**
* Created by similarface on 16/8/23.
*/
public class ScanDataUseCache {
private static Table table=null;
public static Table getTable() {
if(table==null){
try {
Configuration configuration = HBaseConfiguration.create();
Connection connection = ConnectionFactory.createConnection(configuration);
//建立表的连接
return connection.getTable(TableName.valueOf("testtable"));
}catch (IOException e){
return table;
}
}
return table;
}
private static void scan(int caching,int batch,boolean small) {
int count=0;
//setCaching 设置的值为每次rpc的请求记录数,默认是1;cache大可以优化性能,但是太大了会花费很长的时间进行一次传输。
//setBatch 设置每次取的column size;有些row特别大,所以需要分开传给client,就是一次传一个row的几个column。
//setSmall 是否为小扫描
//setScanMetricsEnabled 使用了集合
Scan scan = new Scan().setCaching(caching).setBatch(batch).setSmall(small).setScanMetricsEnabled(true);
ResultScanner scanner=null;
try {
scanner = getTable().getScanner(scan);
}catch (IOException e){
System.out.println(e);
}
if (scanner!=null){
for (Result result:scanner){
count++;
}
scanner.close();
ScanMetrics metrics = scan.getScanMetrics();
System.out.println("Caching: " + caching + ", Batch: " + batch + ", Small: " + small + ", Results: " + count + ", RPCs: " + metrics.countOfRPCcalls);
}
else {
System.out.println("Error");
}
}
public static void main(String[] args) throws IOException {
// Caching: 1, Batch: 1, Small: false, Results: 9, RPCs: 12
scan(1, 1, false);
//Caching: 1, Batch: 0, Small: false, Results: 4, RPCs: 7
scan(1, 0, false);
// Caching: 1, Batch: 0, Small: true, Results: 4, RPCs: 0
scan(1, 0, true);
//Caching: 200, Batch: 1, Small: false, Results: 9, RPCs: 3
scan(200, 1, false);
//Caching: 200, Batch: 0, Small: false, Results: 4, RPCs: 3
scan(200, 0, false);
//Caching: 200, Batch: 0, Small: true, Results: 4, RPCs: 0
scan(200, 0, true);
// Caching: 2000, Batch: 100, Small: false, Results: 4, RPCs: 3
scan(2000, 100, false);
// Caching: 2, Batch: 100, Small: false, Results: 4, RPCs: 5
scan(2, 100, false);
// Caching: 2, Batch: 10, Small: false, Results: 4, RPCs: 5
scan(2, 10, false);
// Caching: 2, Batch: 10, Small: false, Results: 4, RPCs: 5
scan(5, 100, false);
// Caching: 5, Batch: 100, Small: false, Results: 4, RPCs: 3
scan(5, 20, false);
// Caching: 10, Batch: 10, Small: false, Results: 4, RPCs: 3
scan(10, 10, false);
}
}
/**
Caching: 1, Batch: 0, Small: false, Results: 5, RPCs: 8
Caching: 1, Batch: 0, Small: true, Results: 5, RPCs: 0
Caching: 200, Batch: 1, Small: false, Results: 1009, RPCs: 8
Caching: 200, Batch: 0, Small: false, Results: 5, RPCs: 3
Caching: 200, Batch: 0, Small: true, Results: 5, RPCs: 0
Caching: 2000, Batch: 100, Small: false, Results: 14, RPCs: 3
Caching: 2, Batch: 100, Small: false, Results: 14, RPCs: 10
Caching: 2, Batch: 10, Small: false, Results: 104, RPCs: 55
Caching: 5, Batch: 100, Small: false, Results: 14, RPCs: 5
Caching: 5, Batch: 20, Small: false, Results: 54, RPCs: 13
Caching: 10, Batch: 10, Small: false, Results: 104, RPCs: 13
**/
这是一个9行数据的表
每行包含一些列
使用缓存为6 批量为3的扫描器
需要3个RPC
3个列装入一个Result实例
6个result到缓存中 组成一个RPC
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.client.metrics.ScanMetrics;
import java.io.IOException;
/**
* Created by similarface on 16/8/24.
*/
public class ScanWithOffsetAndLimit {
private static Table table = null;
public static Table getTable() {
if (table == null) {
try {
Configuration configuration = HBaseConfiguration.create();
Connection connection = ConnectionFactory.createConnection(configuration);
//建立表的连接
return connection.getTable(TableName.valueOf("testtable"));
} catch (IOException e) {
return table;
}
}
return table;
}
/**
* 遍历访问数据
* @param num 运行次序
* @param caching
* @param batch
* @param offset
* @param maxResults
* @param maxResultSize
* @param dump
* @throws IOException
*/
private static void scan(int num, int caching, int batch, int offset, int maxResults, int maxResultSize, boolean dump
) throws IOException {
int count = 0;
Scan scan = new Scan().setCaching(caching).setBatch(batch)
.setRowOffsetPerColumnFamily(offset)
.setMaxResultsPerColumnFamily(maxResults)
.setMaxResultSize(maxResultSize)
.setScanMetricsEnabled(true);
ResultScanner scanner = getTable().getScanner(scan);
System.out.println("Scan #" + num + " running...");
for (Result result : scanner) {
count++;
if (dump)
System.out.println("Result [" + count + "]:" + result);
}
scanner.close();
ScanMetrics metrics = scan.getScanMetrics();
System.out.println("Caching: " + caching + ", Batch: " + batch +
", Offset: " + offset + ", maxResults: " + maxResults +
", maxSize: " + maxResultSize + ", Results: " + count +
", RPCs: " + metrics.countOfRPCcalls);
}
public static void main(String[] args) throws IOException {
//偏移为0 最大2个cell 所以会扫描到列1 和列2
scan(1, 11, 0, 0, 2, -1, true);
//偏移为4 最大2个cell 所以会扫描到列5 和列6
scan(2, 11, 0, 4, 2, -1, true);
//
scan(3, 5, 0, 0, 2, -1, false);
scan(4, 11, 2, 0, 5, -1, true);
scan(5, 11, -1, -1, -1, 1, false);
scan(6, 11, -1, -1, -1, 10000, false);
}
}
/**
Caching: 11, Batch: 0, Offset: 0, maxResults: 2, maxSize: -1, Results: 5005, RPCs: 458
Caching: 11, Batch: 0, Offset: 4, maxResults: 2, maxSize: -1, Results: 1, RPCs: 3
Caching: 5, Batch: 0, Offset: 0, maxResults: 2, maxSize: -1, Results: 5005, RPCs: 1004
Caching: 11, Batch: 2, Offset: 0, maxResults: 5, maxSize: -1, Results: 5009, RPCs: 458
Caching: 11, Batch: -1, Offset: -1, maxResults: -1, maxSize: 1, Results: 5005, RPCs: 11012
Caching: 11, Batch: -1, Offset: -1, maxResults: -1, maxSize: 10000, Results: 5005, RPCs: 469
**/