HBase JAVA API

最新推荐文章于 2023-04-25 16:04:00 发布

转载最新推荐文章于 2023-04-25 16:04:00 发布 · 396 阅读

【HBase】专栏收录该内容

10 篇文章

订阅专栏

本文介绍如何使用 HBase 的 Java API 进行表的增删查改操作，并演示了多种 Filter 的使用方法，包括组合 Filter 来实现复杂查询。

转载自：
https://blog.youkuaiyun.com/kongxx/article/details/79245829
https://blog.youkuaiyun.com/kongxx/article/details/79223790
https://blog.youkuaiyun.com/kongxx/article/details/79234928

　　使用的client版本是org.apache.hbase:hbase-client:1.3.0。
　　直接看示例：

    static Connection connect = null;

    public static void main(String[] args) throws Exception {
        Configuration conf = HBaseConfiguration.create();
        conf.set("hbase.zookeeper.property.clientPort", "2182");
        conf.set("hbase.zookeeper.quorum", "127.0.0.1");
        connect = ConnectionFactory.createConnection(conf);

        createTable("test1", null);
        getAllTable();
        addOneRecord("test1", "001", "base_info", "name", "zero");
        addOneRecord("test1", "001", "base_info", "name", "zero");
        getRow("test1", "001");
        getAllRows("test1");
    }

    private static void createTable(String tableName, String column) throws Exception {
        HTableDescriptor table = new HTableDescriptor(TableName.valueOf(tableName));

        Admin admin = connect.getAdmin();
        if (admin.tableExists(table.getTableName())) {
            System.out.println("表已经存在！");
        } else {
            table.addFamily(new HColumnDescriptor("base_info"));// 建立列族
            table.addFamily(new HColumnDescriptor("ext_info"));

            admin.createTable(table);
            System.out.println(tableName + "创建成功！");
        }

//        TableDescriptor tableDesc = TableDescriptorBuilder.newBuilder(TableName.valueOf(TABLE_NAME))
//                .addColumnFamily(ColumnFamilyDescriptorBuilder.newBuilder(Bytes.toBytes(COLUMN_FAMILY_BASE)).build())
//                .addColumnFamily(ColumnFamilyDescriptorBuilder.newBuilder(Bytes.toBytes(COLUMN_FAMILY_ADDRESS)).build())
//                .build();
    }

    private static void putRow(String tableName, String rowkey,
            String family, String qualifier, String value) throws Exception {
        Table table = connect.getTable(TableName.valueOf(tableName));

        Put put = new Put(Bytes.toBytes(rowkey));
        put.addColumn(Bytes.toBytes(family), Bytes.toBytes(qualifier), Bytes.toBytes(value));

        try {
            table.put(put);
            System.out.println("行键" + rowkey + " 内容插入成功！");
        } catch (IOException e) {
            e.printStackTrace();
            System.out.println("行键 " + rowkey + " 插入内容失败！");
        }finally {
            table.close();
        }
    }

    private static void getRow(String tableName, String rowkey) throws Exception {
        Table table = connect.getTable(TableName.valueOf(tableName));
        Get get = new Get(Bytes.toBytes(rowkey));
        try {
            Result result = table.get(get);
            if (result.rawCells().length == 0) {
                System.out.println("行键" + rowkey + "内容为空！");
            } else {
                for (Cell cell : result.rawCells()) {

                    String row = Bytes.toString(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength());
                    String family = Bytes.toString(cell.getFamilyArray(), cell.getFamilyOffset(), cell.getFamilyLength());
                    String column = Bytes.toString(cell.getQualifierArray(), cell.getQualifierOffset(), cell.getQualifierLength());
                    String value = Bytes.toString(cell.getValueArray(), cell.getValueOffset(), cell.getValueLength());
                    System.out.println(row + "\t" + family + ": " + column + " -> " + value);

//                    System.out.println(new String(CellUtil.cloneRow(cell)));
//                    System.out.println(new String(CellUtil.cloneFamily(cell)));
//                    System.out.println(new String(CellUtil.cloneQualifier(cell)));
//                    System.out.println(new String(CellUtil.cloneValue(cell)));
                }
            }
        } catch (IOException e) {
            e.printStackTrace();
        }finally {
            table.close();
        }
    }

    private static void getAllRows(String tableName) throws Exception {
        Table table = connect.getTable(TableName.valueOf(tableName));
        Scan scan = new Scan();
        ResultScanner scanner = table.getScanner(scan);
        for (Result result : scanner) {
            if (result.rawCells().length == 0) {
                System.out.println(tableName + "  为空");
            } else {
                for (Cell cell : result.rawCells()) {

                    String row = Bytes.toString(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength());
                    String family = Bytes.toString(cell.getFamilyArray(), cell.getFamilyOffset(), cell.getFamilyLength());
                    String column = Bytes.toString(cell.getQualifierArray(), cell.getQualifierOffset(), cell.getQualifierLength());
                    String value = Bytes.toString(cell.getValueArray(), cell.getValueOffset(), cell.getValueLength());
                    System.out.println(row + "\t" + family + ": " + column + " -> " + value);
                }
            }
        }
    }

    private static void deleteTable(String tableName) throws Exception {
        Admin admin = connect.getAdmin();
        try {
            if (admin.tableExists(TableName.valueOf(tableName))) {
                admin.disableTable(TableName.valueOf(tableName));
                admin.deleteTable(TableName.valueOf(tableName));
                System.out.println("表:" + tableName + " 删除成功");
            }
        } catch (IOException e) {
            e.printStackTrace();
            System.out.println("表" + tableName + "删除失败");
        }finally {
            admin.close();
        }
    }

    public static void deleteRow(String tableName, String rowKey) throws Exception {
        Table table = connect.getTable(TableName.valueOf(tableName));
        Delete delete = new Delete(Bytes.toBytes(rowKey));
        try {
            table.delete(delete);
            System.out.println(rowKey + "记录删除成功！");
        } catch (IOException e) {
            e.printStackTrace();
            System.out.println(rowKey + "记录删除失败！");
        }finally {
            table.close();
        }
    }

    private static List<String> getAllTable() throws Exception {
        ArrayList<String> tables = new ArrayList<>();
        Admin admin = connect.getAdmin();
        if (admin != null) {
            HTableDescriptor[] listTables = admin.listTables();
            if (listTables.length > 0) {
                for (HTableDescriptor tableDesc : listTables) {
                    tables.add(tableDesc.getNameAsString());
                    System.out.println(tableDesc.getNameAsString());
                }
            }
        }
        return tables;
    }

　　在使用HBase的API查询数据的时候，经常需要设置一些过滤条件来查询数据，这个时候就需要使用Filter来实现这一功能。在HBase API中使用过滤器需要创建一个Filter实例，然后使用Scan.setFilter()或者Get.setFilter()来使用 Filter。

Table table = connection.getTable(TableName.valueOf(TABLE_NAME));
Scan scan = new Scan();

Filter filter = new RowFilter(CompareFilter.CompareOp.EQUAL, new BinaryComparator(Bytes.toBytes("row_2")));
scan.setFilter(filter);

ResultScanner resultScanner = table.getScanner(scan);
Iterator<Result> it = resultScanner.iterator();
while (it.hasNext()) {
    Result result = it.next();
    // ...
}
resultScanner.close();
table.close();

　　在 HBase API 中提供了大量的 Filter 实现，比如一些常见的 Filter：

RowFilter: 过滤指定的行记录
FamilyFilter: 过滤指定的列族，其它列族返回null
QualifierFilter: 过滤指定的列，其它列返回null
ValueFilter: 过滤指定的值，，其它列返回null
SingleColumnValueFilter: 单列值过滤器
SingleColumnValueExcludeFilter: 单列值排除过滤器，被排除的列返回null
PageFilter: 分页过滤器
ColumnPaginationFilter: 列分页过滤器
…

　　设置SingleColumnValueFilter时候注意的地方：要过滤的列必须存在，如果不存在，那么这些列不存在的数据也会返回。如果不想让这些数据返回，做如下设置即可。filter.setFilterIfMissing(true);

　　在HBase API提供了一些常用比较运算符，这些写比较器可以用来比较过滤器中的值，如：

CompareOp.LESS
CompareOp.LESS_OR_EQUAL
CompareOp.EQUAL
CompareOp.NOT_EQUAL
CompareOp.GREATOR
CompareOp.GREATOR_OR_EQUAL
CompareOp.NO_OP

　　在HBase API还提供了一些常用比较器，这些写比较器可以用来比较过滤器中的值，如：

BinaryComparator
RegexStringComparator
NullComparator
SubstringComparator
…

HBase 的分页过滤器
　　在HBase中分页过滤是通过PageFilter来实现的，在创建这个参数的时候需要设置一个pageSize参数，通过这个参数来控制每页返回的行数，并且在每次查询时需要指定本次查询的起始行。
　　这里有一点需要注意，HBase中行键的排序是按字典顺序排列的，因此返回的结果也是按此顺序排列。

Filter filter = new PageFilter(10);
Table table = connection.getTable(TableName.valueOf(TABLE_NAME));

byte[] lastRow = null;
while(true) {
    Scan scan = new Scan();
    scan.setFilter(filter);
    if (lastRow != null) {
        scan.withStartRow(lastRow, false);
    }
    ResultScanner resultScanner = table.getScanner(scan);
    Iterator<Result> it = resultScanner.iterator();
    int count = 0;
    while (it.hasNext()) {
        Result result = it.next();
        printRow(result);
        lastRow = result.getRow();
        count ++;
    }
    resultScanner.close();
    if (count == 0) {
        break;
    }
}
table.close();

首先需要创建一个PageFilter对象，并设置每页10行。
然后通过Scan.withStartRow()来设置起始行，对于第一次查询，可以不用设置。其中第二个参数是用来标识是否需要包括指定起始行。
执行查询，对于每次查询设置了一个计数，当计数为 0 时，表示本次查询没有返回结果，说明查询遍历完成，此时跳出循环。

　　上面介绍了怎样使用单个Filter来过滤数据，但是很多情况下我们需要做一下组合过滤，比如有逻辑与和逻辑或的查询，此时我们可以使用FilterList来实现了。
　　FilterList也是实现了Filter接口，因此可以通过多个过滤器组合来实现某些效果。看下面的例子，我们创建了两个filter，第一个是过滤 username=user_0，第二个是过滤 password=password_0，然后我们将这两个filter组合到一个FilterList对象中，并且制定组合操作符为MUST_PASS_ALL，意思是过滤满足这两个条件的记录。然后就可以像使用普通过滤器一样来扫描记录了。

Filter filter1 = new SingleColumnValueFilter(Bytes.toBytes("base"), Bytes.toBytes("username"),
        CompareOperator.EQUAL, new BinaryComparator(Bytes.toBytes("user_0")));
Filter filter2 = new SingleColumnValueFilter(Bytes.toBytes("base"), Bytes.toBytes("password"),
        CompareOperator.EQUAL, new BinaryComparator(Bytes.toBytes("password_0")));

FilterList filterList = new FilterList(FilterList.Operator.MUST_PASS_ALL, filter1, filter2);

Table table = connection.getTable(TableName.valueOf(TABLE_NAME));
Scan scan = new Scan();
scan.setFilter(filterList);

ResultScanner resultScanner = table.getScanner(scan);
Iterator<Result> it = resultScanner.iterator();
while (it.hasNext()) {
    Result result = it.next();
    printRow(result);
}
resultScanner.close();
table.close();

　　在HBase中如果更新（添加/修改/删除）记录，是按行一条一条更新的，这种方法在处理大量更新操作时，性能比较差，HBase中提供了以Batch方式来批量更新数据表的方法。

        Table table = connection.getTable(TableName.valueOf(TABLE_NAME));

        List<Row> actions = new ArrayList<Row>();
        for (int i = 0; i < 10000; i++) {
            Put put = new Put(Bytes.toBytes("row_" + i));
            put.addColumn(Bytes.toBytes("cf"), Bytes.toBytes("c1"), Bytes.toBytes("user_" + i));
            put.addColumn(Bytes.toBytes("cf"), Bytes.toBytes("c2"), Bytes.toBytes("password_" + i));
            actions.add(put);
        }
        Object[] results = new Object[actions.size()];

        try {
            table.batch(actions, results);
        } catch (InterruptedException e) {
            e.printStackTrace();
        }

        table.close();