HBase专用过滤器

本文详细介绍了HBase中的过滤器使用,包括RowFilter、FamilyFilter、QualifierFilter、ValueFilter、SingleColumnValueFilter、SingleColumnValueExcludeFilter、PrefixFilter、PageFilter等多种过滤器的实例,展示了如何通过这些过滤器进行数据查询和筛选,帮助理解HBase数据过滤机制。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

package com.shujia;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.filter.*;
import org.apache.hadoop.hbase.util.Bytes;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;

import java.io.IOException;

public class Demo04Filter {
    Connection conn;
    Admin admin;
    TableName stuName;
    Table stu;

    //传入一个过滤器 返回ResultScanner对象
    public ResultScanner getScanner(Filter filter) throws IOException {
        Scan scan = new Scan();

        scan.setFilter(filter);

        return stu.getScanner(scan);
    }

    //对ResultScanner对象进行打印
    //第一种情况:数据格式一致
    public void printScanner(ResultScanner rss) {
        //获取数据
        for (Result rs : rss) {
            String id = Bytes.toString(rs.getRow());
            String name = Bytes.toString(rs.getValue("info".getBytes(),"name".getBytes()));
            String age = Bytes.toString(rs.getValue("info".getBytes(),"age".getBytes()));
            String gender = Bytes.toString(rs.getValue("info".getBytes(),"gender".getBytes()));
            String clazz = Bytes.toString(rs.getValue("info".getBytes(),"clazz".getBytes()));

            System.out.println(id+","+name+","+age+","+gender+","+clazz);
        }
    }

    //第二种情况:数据格式不一致
    public void printScannerWithCellUnit(ResultScanner rss){
        for (Result rs : rss) {
            String rk = Bytes.toString(rs.getRow());
            for (Cell cell : rs.listCells()) {
                String cf = Bytes.toString(CellUtil.cloneFamily(cell));
                String qua=Bytes.toString(CellUtil.cloneQualifier(cell));
                String value=Bytes.toString(CellUtil.cloneValue(cell));
                System.out.println(rk+","+cf+","+qua+","+value);
            }
        }
    }

    @Before
    public void init() throws IOException {
        //创建HBase的配置
        Configuration conf = HBaseConfiguration.create();
        //配置HBase所属ZK集群的地址
        conf.set("hbase.zookeeper.quorum","master:2181,node1:2181,node2:2181");

        //建立连接
        conn = ConnectionFactory.createConnection(conf);
        //获取所有表的名字,这个操作跟表的数据无关
        admin= conn.getAdmin();
        stuName=TableName.valueOf("stu");
        stu=conn.getTable(stuName);
    }

    @Test
    //rowKey过滤器
    // 通过RowFilter过滤比rowKey 1500100010 小的所有值出来
    public void RowFilterWithBinaryComparator() throws IOException {

        //因为涉及到不等式的比较 所以只能使用二进制比较器
        BinaryComparator binaryComparator = new BinaryComparator("1500100010".getBytes());

        //小于,所以用CompareOp.LESS
        RowFilter rowFilter = new RowFilter(CompareFilter.CompareOp.LESS, binaryComparator);

        printScanner(getScanner(rowFilter));
    }

    @Test
    //列簇过滤器
    // 通过FamilyFilter与SubstringComparator查询列簇名包含nf的列簇下面所有的数据
    public void FamilyFilterWithSubstringComparator() throws IOException {

        SubstringComparator substringComparator = new SubstringComparator("f");

        FamilyFilter familyFilter = new FamilyFilter(CompareFilter.CompareOp.EQUAL, substringComparator);

        printScannerWithCellUnit(getScanner(familyFilter));
    }

    @Test
    //列名过滤器
    // 通过QualifierFilter与SubstringComparator查询列名包含n/l/d的列下面所有的数据
    public void QualifierFilterWithSubstringComparator() throws IOException {

        RegexStringComparator regexStringComparator = new RegexStringComparator(".*[mld].*");

        QualifierFilter qualifierFilter = new QualifierFilter(CompareFilter.CompareOp.EQUAL, regexStringComparator);

        printScannerWithCellUnit(getScanner(qualifierFilter));
    }

    @Test
    //列值过滤器
    // 通过ValueFilter与BinaryPrefixComparator过滤出所有的cell(所有的值)中值以 "张" 开头的学生
    //注意:并没有基于某一列去过滤 而是针对所有的cell的值进行过滤
    public void ValueFilterWithBinaryPrefixComparator() throws IOException {

        BinaryPrefixComparator binaryPrefixComparator = new BinaryPrefixComparator("张".getBytes());

        ValueFilter valueFilter = new ValueFilter(CompareFilter.CompareOp.EQUAL, binaryPrefixComparator);

        printScannerWithCellUnit(getScanner(valueFilter));

    }

    @Test
    //单列值过滤器
    //通过SingleColumnValueFilter与查询文科班所有学生信息
    //会返回完整的一行数据
    public void SingleColumnValueFilter() throws IOException {
        SingleColumnValueFilter singleColumnValueFilter = new SingleColumnValueFilter("info".getBytes()
                , "clazz".getBytes()
                , CompareFilter.CompareOp.EQUAL
                , new BinaryPrefixComparator("文科".getBytes()));

        printScanner(getScanner(singleColumnValueFilter));
    }

    @Test
    //列值排除过滤器
    //通过 SingleColumnValueExcludeFilter与查询文科班所有学生信息
    //会返回完整的一行数据(除指定的比较列外)
    public void  SingleColumnValueExcludeFilter() throws IOException {
        SingleColumnValueExcludeFilter singleColumnValueExcludeFilter = new SingleColumnValueExcludeFilter("info".getBytes()
                , "clazz".getBytes()
                , CompareFilter.CompareOp.EQUAL
                , new BinaryPrefixComparator("文科".getBytes()));

        printScanner(getScanner(singleColumnValueExcludeFilter));
    }

    @After
    public void close() throws IOException {
        //关闭连接
        admin.close();
        conn.close();
    }
}
package com.shujia;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.filter.*;
import org.apache.hadoop.hbase.util.Bytes;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;

import java.io.IOException;
import java.nio.charset.StandardCharsets;

public class Demo04Filter {
    Connection conn;
    Admin admin;
    TableName stuName;
    Table stu;

    //传入一个过滤器 返回ResultScanner对象
    public ResultScanner getScanner(Filter filter) throws IOException {
        Scan scan = new Scan();

        scan.setFilter(filter);

        return stu.getScanner(scan);
    }

    //对ResultScanner对象进行打印
    //第一种情况:数据格式一致
    public void printScanner(ResultScanner rss) {
        //获取数据
        for (Result rs : rss) {
            String id = Bytes.toString(rs.getRow());
            String name = Bytes.toString(rs.getValue("info".getBytes(),"name".getBytes()));
            String age = Bytes.toString(rs.getValue("info".getBytes(),"age".getBytes()));
            String gender = Bytes.toString(rs.getValue("info".getBytes(),"gender".getBytes()));
            String clazz = Bytes.toString(rs.getValue("info".getBytes(),"clazz".getBytes()));

            System.out.println(id+","+name+","+age+","+gender+","+clazz);
        }
    }

    //第二种情况:数据格式不一致
    public void printScannerWithCellUnit(ResultScanner rss){
        for (Result rs : rss) {
            String rk = Bytes.toString(rs.getRow());
            for (Cell cell : rs.listCells()) {
                String cf = Bytes.toString(CellUtil.cloneFamily(cell));
                String qua=Bytes.toString(CellUtil.cloneQualifier(cell));
                String value=Bytes.toString(CellUtil.cloneValue(cell));
                System.out.println(rk+","+cf+","+qua+","+value);
            }
        }
    }

    @Before
    public void init() throws IOException {
        //创建HBase的配置
        Configuration conf = HBaseConfiguration.create();
        //配置HBase所属ZK集群的地址
        conf.set("hbase.zookeeper.quorum","master:2181,node1:2181,node2:2181");

        //建立连接
        conn = ConnectionFactory.createConnection(conf);
        //获取所有表的名字,这个操作跟表的数据无关
        admin= conn.getAdmin();
        stuName=TableName.valueOf("stu");
        stu=conn.getTable(stuName);
    }

    @Test
    //rowKey过滤器
    // 通过RowFilter过滤比rowKey 1500100010 小的所有值出来
    public void RowFilterWithBinaryComparator() throws IOException {

        //因为涉及到不等式的比较 所以只能使用二进制比较器
        BinaryComparator binaryComparator = new BinaryComparator("1500100010".getBytes());

        //小于,所以用CompareOp.LESS
        RowFilter rowFilter = new RowFilter(CompareFilter.CompareOp.LESS, binaryComparator);

        printScanner(getScanner(rowFilter));
    }

    @Test
    //列簇过滤器
    // 通过FamilyFilter与SubstringComparator查询列簇名包含nf的列簇下面所有的数据
    public void FamilyFilterWithSubstringComparator() throws IOException {

        SubstringComparator substringComparator = new SubstringComparator("f");

        FamilyFilter familyFilter = new FamilyFilter(CompareFilter.CompareOp.EQUAL, substringComparator);

        printScannerWithCellUnit(getScanner(familyFilter));
    }

    @Test
    //列名过滤器
    // 通过QualifierFilter与SubstringComparator查询列名包含n/l/d的列下面所有的数据
    public void QualifierFilterWithSubstringComparator() throws IOException {

        RegexStringComparator regexStringComparator = new RegexStringComparator(".*[mld].*");

        QualifierFilter qualifierFilter = new QualifierFilter(CompareFilter.CompareOp.EQUAL, regexStringComparator);

        printScannerWithCellUnit(getScanner(qualifierFilter));
    }

    @Test
    //列值过滤器
    // 通过ValueFilter与BinaryPrefixComparator过滤出所有的cell(所有的值)中值以 "张" 开头的学生
    //注意:并没有基于某一列去过滤 而是针对所有的cell的值进行过滤
    public void ValueFilterWithBinaryPrefixComparator() throws IOException {

        BinaryPrefixComparator binaryPrefixComparator = new BinaryPrefixComparator("张".getBytes());

        ValueFilter valueFilter = new ValueFilter(CompareFilter.CompareOp.EQUAL, binaryPrefixComparator);

        printScannerWithCellUnit(getScanner(valueFilter));

    }

    @Test
    //单列值过滤器
    //通过SingleColumnValueFilter与查询文科班所有学生信息
    //会返回完整的一行数据
    public void SingleColumnValueFilter() throws IOException {
        SingleColumnValueFilter singleColumnValueFilter = new SingleColumnValueFilter("info".getBytes()
                , "clazz".getBytes()
                , CompareFilter.CompareOp.EQUAL
                , new BinaryPrefixComparator("文科".getBytes()));

        printScanner(getScanner(singleColumnValueFilter));
    }

    @Test
    //列值排除过滤器
    //通过 SingleColumnValueExcludeFilter与查询文科班所有学生信息
    //会返回完整的一行数据(除指定的比较列外)
    public void  SingleColumnValueExcludeFilter() throws IOException {
        SingleColumnValueExcludeFilter singleColumnValueExcludeFilter = new SingleColumnValueExcludeFilter("info".getBytes()
                , "clazz".getBytes()
                , CompareFilter.CompareOp.EQUAL
                , new BinaryPrefixComparator("文科".getBytes()));

        printScanner(getScanner(singleColumnValueExcludeFilter));
    }

    @Test
    //rowKey前缀过滤器
    //通过PrefixFilter查询以150010008开头的所有前缀的rowKey
    //PrefixFilter=RowFilter+BinaryPrefixComparator
    public void PrefixFilter() throws IOException {

        PrefixFilter prefixFilter = new PrefixFilter("150010008".getBytes());

        printScanner(getScanner(prefixFilter));
    }

    @Test
    //分页过滤器
    //pageSize 10
    //pageNum 3
    //做分页
    public void PageFilter() throws IOException {
        int pageNum=3;
        int pageSize=10;
        PageFilter pageFilter = new PageFilter((pageNum-1)*pageSize+1);

        Scan scan = new Scan();
        scan.setFilter(pageFilter);

        ResultScanner rss = stu.getScanner(scan);
        String rk="";
        for (Result rs : rss) {
            rk=Bytes.toString(rs.getRow());
        }
        Scan scan1 = new Scan();
        scan1.withStartRow(rk.getBytes());
//        PageFilter pageFilter1 = new PageFilter(pageSize);
//        scan1.setFilter(pageFilter1);
        scan1.setLimit(pageSize);
       printScanner(stu.getScanner(scan1));
    }

    @Test
    /**
     * PageFilter效率太低,需要遍历
     * 所以分页一般需要结合RowKey的设计去实现
     */
    public void pageWithRk() throws IOException {
        int rk_base=1500100000;
        int pageNum=7;
        int pageSize=10;

        String startRow=rk_base+(pageNum-1)*pageSize+1+"";
        String endRow=rk_base+pageNum*pageSize+1+"";

        Scan scan = new Scan();
        scan.withStartRow(startRow.getBytes());
        scan.withStopRow(endRow.getBytes());

        printScanner(stu.getScanner(scan));
    }

    @Test
    //多条件查询
    //过滤出 文科一班 的 男生的信息 并且 id 包含 8
    public void FilterList() throws IOException {
        SingleColumnValueFilter filter1= new SingleColumnValueFilter("info".getBytes()
                , "clazz".getBytes()
                , CompareFilter.CompareOp.EQUAL
                , new BinaryPrefixComparator("文科一班".getBytes()));

        SingleColumnValueFilter filter2 = new SingleColumnValueFilter("info".getBytes()
                , "gender".getBytes()
                , CompareFilter.CompareOp.EQUAL
                , new BinaryPrefixComparator("男".getBytes()));

        RowFilter rowFilter = new RowFilter(CompareFilter.CompareOp.EQUAL, new SubstringComparator("8"));

        /**
         * Operator.MUST_PASS_ALL 相当于 and 默认
         * Operator.MUST_PASS_ONE 相当于 or
         */
        FilterList filterList = new FilterList(FilterList.Operator.MUST_PASS_ALL);
        filterList.addFilter(filter1);
        filterList.addFilter(filter2);
        filterList.addFilter(rowFilter);

       printScanner(getScanner(filterList));
    }

    @After
    public void close() throws IOException {
        //关闭连接
        admin.close();
        conn.close();
    }
}

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值