HBase过滤器

1.作用

  • 过滤器的作用是在服务端判断数据是否满足条件,然后只将满足条件的数据返回给客户端。
  • 过滤器可以分为两大类:
    (1)比较过滤器:可以应用于rowkey、列簇、列、列值过滤器
    (2)专用过滤器 :只能适用于特定的过滤器

2.比较过滤器

比较运算符
  • LESS <
  • LESS_OR_EQUAL <=
  • EQUAL =
  • NOT_EQUAL <>
  • GREATER_OR_EQUAL >=
  • GREATER >
  • NO_OP 排除所有
常见六大比较过滤器
  • BinaryComparator:按字节索引顺序比较指定字节数组,采用Bytes.compareTo(byte[])
  • BinaryPrefixComparator:同BinaryComparator,只是比较左端前缀的数据是否相同。
  • NullComparator:判断给定的是否为空。
  • BitComparator:按位比较。
  • RegexStringComparator:提供一个正则比较器,仅支持EQUAL和非 EQUAL。
  • SubstringComparator:判断提供的子串是否出现在value中,并且不区分大小写。包含字串返回0,不包含返回1,仅支持 EQUAL 和非 EQUAL。
示例代码
rowkey过滤器:RowFilter
@Test
//通过RowFilter过滤器比rowkey 1500100009 小的所有值
    public void BinaryComparatorFilter() throws IOException {
        BinaryComparator binaryComparator = new BinaryComparator("1500100010".getBytes());
        RowFilter rowFilter = new RowFilter(CompareOp.LESS, binaryComparator);
        Scan scan = new Scan();
        scan.setFilter(filter);
        for (Result rs : students.getScanner(scan)) {
            String rowkey = Bytes.toString(rs.getRow());
            System.out.print(rowkey+" ");
            List<Cell> cells = rs.listCells();
            for (Cell cell : cells) {
                String value = Bytes.toString(CellUtil.cloneValue(cell));
                System.out.print(value+" ");
            }
            System.out.println();
        }
        
    }
列簇过滤器:FamilyFilter
    @Test
    //通过FamilyFilter查询列簇名包含c的所有列簇下面的数据
    public void SubstringComparatorFilter() throws IOException {
        Scan scan = new Scan();
        SubstringComparator comparator = new SubstringComparator("c");
        FamilyFilter familyFilter = new FamilyFilter(CompareOp.EQUAL, comparator);
        Scan scan = new Scan();
        scan.setFilter(filter);
        for (Result rs : students.getScanner(scan)) {
            String rowkey = Bytes.toString(rs.getRow());
            System.out.print(rowkey+" ");
            List<Cell> cells = rs.listCells();
            for (Cell cell : cells) {
                String value = Bytes.toString(CellUtil.cloneValue(cell));
                System.out.print(value+" ");
            }
            System.out.println();
        }

    }
列过滤器:QualifierFilter
 @Test
    //查询列名包含a的列 下面的所有的值
    //QualifiterFilter  SubstringComparator
    public void QualifiterFilter() throws IOException {
        SubstringComparator comparator = new SubstringComparator("a");
        QualifierFilter qualifierFilter = new QualifierFilter(CompareOp.EQUAL, comparator);
        Scan scan = new Scan();
        scan.setFilter(filter);
        for (Result rs : students.getScanner(scan)) {
            String rowkey = Bytes.toString(rs.getRow());
            System.out.print(rowkey+" ");
            List<Cell> cells = rs.listCells();
            for (Cell cell : cells) {
                String value = Bytes.toString(CellUtil.cloneValue(cell));
                System.out.print(value+" ");
            }
            System.out.println();
        }
    }
列值过滤器:ValueFilter
@Test
    //过滤文科一班的学生
    //ValueFilter
    //ValueFilter是作用在每一个cell上,只有符合条件的cell才会保留
    public void ValueFilter() throws IOException {
        BinaryComparator binaryComparator = new BinaryComparator("文科一班".getBytes());
        ValueFilter valueFilter = new ValueFilter(CompareOp.EQUAL, binaryComparator);
        Scan scan = new Scan();
        scan.setFilter(filter);
        for (Result rs : students.getScanner(scan)) {
            String rowkey = Bytes.toString(rs.getRow());
            System.out.print(rowkey+" ");
            List<Cell> cells = rs.listCells();
            for (Cell cell : cells) {
                String value = Bytes.toString(CellUtil.cloneValue(cell));
                System.out.print(value+" ");
            }
            System.out.println();
        }

    }

3.完整代码

public class Demo4Filter {

    Connection conn = null;
    Admin admin = null;
    Table students = null;
    TableName name = TableName.valueOf("students");

    public void useFilterAndPrint(Filter filter) throws IOException {
        Scan scan = new Scan();
        scan.setFilter(filter);
        for (Result rs : students.getScanner(scan)) {
            String rowkey = Bytes.toString(rs.getRow());
            System.out.print(rowkey+" ");
            List<Cell> cells = rs.listCells();
            for (Cell cell : cells) {
                String value = Bytes.toString(CellUtil.cloneValue(cell));
                System.out.print(value+" ");
            }
            System.out.println();
        }

    }

    @Before
    public void init()  {
        Configuration conf = HBaseConfiguration.create();
        conf.set("hbase.zookeeper.quorum","master:2181,node1:2181,node2:2181");
        try {
            conn = ConnectionFactory.createConnection(conf);
            admin = conn.getAdmin();
            students = conn.getTable(name);
        } catch (IOException e) {
            e.printStackTrace();
        }

    }
    @Test
    //通过RowFilter过滤器比rowkey 1500100009 小的所有值
    public void BinaryComparatorFilter() throws IOException {
        Scan scan = new Scan();

        BinaryComparator binaryComparator = new BinaryComparator("1500100010".getBytes());
        RowFilter rowFilter = new RowFilter(CompareOp.LESS, binaryComparator);
        useFilterAndPrint(rowFilter);

    }

    @Test
    //通过FamilyFilter查询列簇名包含c的所有列簇下面的数据
    public void SubstringComparatorFilter() throws IOException {
        Scan scan = new Scan();
        SubstringComparator comparator = new SubstringComparator("c");
        FamilyFilter familyFilter = new FamilyFilter(CompareOp.EQUAL, comparator);
//        scan.setFilter(familyFilter);
//        for (Result rs : students.getScanner(scan)) {
//            String rowkey = Bytes.toString(rs.gcetRow());
//            System.out.print(rowkey+" ");
//            List<Cell> cells = rs.listCells();
//            for (Cell cell : cells) {
//                String value = Bytes.toString(CellUtil.cloneValue(cell));
//                System.out.print(value+" ");
//            }
//            System.out.println();
//        }
        useFilterAndPrint(familyFilter);

    }

    @Test
    //查询列名包含a的列 下面的所有的值
    //QualifiterFilter  SubstringComparator
    public void QualifiterFilter() throws IOException {
        SubstringComparator comparator = new SubstringComparator("a");
        QualifierFilter qualifierFilter = new QualifierFilter(CompareOp.EQUAL, comparator);
        useFilterAndPrint(qualifierFilter);
    }

    @Test
    //过滤文科一班的学生
    //ValueFilter
    //ValueFilter是作用在每一个cell上,只有符合条件的cell才会保留
    public void ValueFilter() throws IOException {
        BinaryComparator binaryComparator = new BinaryComparator("文科一班".getBytes());
        ValueFilter valueFilter = new ValueFilter(CompareOp.EQUAL, binaryComparator);
        useFilterAndPrint(valueFilter);

    }

    @Test
    //过滤文科一班的学生,并且返回学生的所有信息
    //SingleColumnValueFilter
    //会完整的返回整条数据
    //在比较的时候需要指定列簇,如果数据中存在没有所指定的列簇的数据  在会保留并返回
    //在比较的时候需要指定 列名,如果数据中存在没有指定的所有列名的数据  则也会保留并返回
    public void SingleColumnValueFilter() throws IOException {
        SingleColumnValueFilter singleColumnValueFilter = new SingleColumnValueFilter("info".getBytes()
                , "clazz".getBytes()
                , CompareOp.EQUAL
                , "文科一班".getBytes()

        );
        useFilterAndPrint(singleColumnValueFilter);
    }

    @Test
    //相对于SingleColumnValueFilter会将用于过滤的列值去除
    public void SingleColumnValueExcludeFilter() throws IOException {
        SingleColumnValueExcludeFilter singleColumnValueExcludeFilter = new SingleColumnValueExcludeFilter("info".getBytes()
                , "clazz".getBytes()
                , CompareOp.EQUAL
                , "文科一班".getBytes()

        );
        useFilterAndPrint(singleColumnValueExcludeFilter);
    }

    @Test
    //通过PrefixFilterFilter查询以150010008开头的所有前缀的rowkey
    //相当于BinaryComparator 加上RowFilter
    public void PrefixFilterFilter() throws IOException {
        PrefixFilter prefixFilter = new PrefixFilter("150010008".getBytes());
        useFilterAndPrint(prefixFilter);

    }

    @Test
    //通过PageFilter查询第三页的数据,每页10条
    public void PageFilter() throws IOException {
        int page = 3;
        int pageSize = 10;

        //计算出当前页面的第一条数据的位置(相对于整个数据的第一条)
        int current_page_first_size = (page - 1) * pageSize + 1;   //21
//        System.out.println(current_page_first_size);
        PageFilter pageFilter = new PageFilter(current_page_first_size);
        Scan scan = new Scan();
        scan.setFilter(pageFilter);
        byte[] rk = null;
        for (Result result : students.getScanner(scan)) {
            //取出当前页面的第一条数据的rowkey
            rk = result.getRow();


        }
        PageFilter pageFilter1 = new PageFilter(pageSize);
        Scan scan1 = new Scan();
        scan1.withStartRow(rk);
        scan1.setFilter(pageFilter1);
        for (Result rs : students.getScanner(scan1)) {
            String rowkey = Bytes.toString(rs.getRow());
            System.out.print(rowkey+" ");
            List<Cell> cells = rs.listCells();
            for (Cell cell : cells) {
                String value = Bytes.toString(CellUtil.cloneValue(cell));
                System.out.print(value+" ");
            }
            System.out.println();
        }


    }

    @Test
    //通过合理的设计rowkey查询第五页的数据,每10条
    //students表种的rowkey是从1500100001依次递增至1500100000
    //所以第五页数据的rowkey范围可以直接计算出来
    //1500100047~1500100050
    public void PageFilterWithRowkey() throws IOException {
        String rowkeyPrefix = "1500100000";
        //五页,每页10条数据
        int page = 5;
        int pageSize = 10;

        int current_page_start_row_offset = (page - 1) * pageSize + 1;

        int i = Integer.parseInt(rowkeyPrefix);
        int start_rowkey = i+current_page_start_row_offset;
        int end_rowkey = start_rowkey+pageSize;
        String start_rowkey_str = start_rowkey+" ";
        String end_rowkey_str  = end_rowkey+"";

        Scan scan = new Scan();

        scan.withStartRow(start_rowkey_str.getBytes());
        scan.withStopRow(end_rowkey_str.getBytes());

        for (Result rs : students.getScanner(scan)) {
            String rowkey = Bytes.toString(rs.getRow());
            System.out.print(rowkey+" ");
            List<Cell> cells = rs.listCells();
            for (Cell cell : cells) {
                String value = Bytes.toString(CellUtil.cloneValue(cell));
                System.out.print(value+" ");
            }
            System.out.println();
        }

    }

    @Test
    // 查询文科班中的学生中学号以150010008开头并且年龄小于23的学生信息
    public void FilterListFilter() throws IOException {

        PrefixFilter prefixFilter = new PrefixFilter("150010008".getBytes());
        SingleColumnValueFilter singleColumnValueFilter = new SingleColumnValueFilter("info".getBytes(), "age".getBytes(), CompareOp.LESS,"23".getBytes());
        SingleColumnValueFilter singleColumnValueFilter1 = new SingleColumnValueFilter("info".getBytes(), "clazz".getBytes(), CompareOp.EQUAL,"文科一班".getBytes());
        //通过FilterList将多个过滤器组装起来
        FilterList filterList = new FilterList();
        filterList.addFilter(prefixFilter);
        filterList.addFilter(singleColumnValueFilter);
        useFilterAndPrint(filterList);
    }

    @After
    public void close(){
        try {
            admin.close();
            conn.close();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

}

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值