1.作用
- 过滤器的作用是在服务端判断数据是否满足条件,然后只将满足条件的数据返回给客户端。
- 过滤器可以分为两大类:
(1)比较过滤器:可以应用于rowkey、列簇、列、列值过滤器
(2)专用过滤器 :只能适用于特定的过滤器
2.比较过滤器
比较运算符
- LESS <
- LESS_OR_EQUAL <=
- EQUAL =
- NOT_EQUAL <>
- GREATER_OR_EQUAL >=
- GREATER >
- NO_OP 排除所有
常见六大比较过滤器
- BinaryComparator:按字节索引顺序比较指定字节数组,采用Bytes.compareTo(byte[])
- BinaryPrefixComparator:同BinaryComparator,只是比较左端前缀的数据是否相同。
- NullComparator:判断给定的是否为空。
- BitComparator:按位比较。
- RegexStringComparator:提供一个正则比较器,仅支持EQUAL和非 EQUAL。
- SubstringComparator:判断提供的子串是否出现在value中,并且不区分大小写。包含字串返回0,不包含返回1,仅支持 EQUAL 和非 EQUAL。
示例代码
rowkey过滤器:RowFilter
@Test
//通过RowFilter过滤器比rowkey 1500100009 小的所有值
public void BinaryComparatorFilter() throws IOException {
BinaryComparator binaryComparator = new BinaryComparator("1500100010".getBytes());
RowFilter rowFilter = new RowFilter(CompareOp.LESS, binaryComparator);
Scan scan = new Scan();
scan.setFilter(filter);
for (Result rs : students.getScanner(scan)) {
String rowkey = Bytes.toString(rs.getRow());
System.out.print(rowkey+" ");
List<Cell> cells = rs.listCells();
for (Cell cell : cells) {
String value = Bytes.toString(CellUtil.cloneValue(cell));
System.out.print(value+" ");
}
System.out.println();
}
}
列簇过滤器:FamilyFilter
@Test
//通过FamilyFilter查询列簇名包含c的所有列簇下面的数据
public void SubstringComparatorFilter() throws IOException {
Scan scan = new Scan();
SubstringComparator comparator = new SubstringComparator("c");
FamilyFilter familyFilter = new FamilyFilter(CompareOp.EQUAL, comparator);
Scan scan = new Scan();
scan.setFilter(filter);
for (Result rs : students.getScanner(scan)) {
String rowkey = Bytes.toString(rs.getRow());
System.out.print(rowkey+" ");
List<Cell> cells = rs.listCells();
for (Cell cell : cells) {
String value = Bytes.toString(CellUtil.cloneValue(cell));
System.out.print(value+" ");
}
System.out.println();
}
}
列过滤器:QualifierFilter
@Test
//查询列名包含a的列 下面的所有的值
//QualifiterFilter SubstringComparator
public void QualifiterFilter() throws IOException {
SubstringComparator comparator = new SubstringComparator("a");
QualifierFilter qualifierFilter = new QualifierFilter(CompareOp.EQUAL, comparator);
Scan scan = new Scan();
scan.setFilter(filter);
for (Result rs : students.getScanner(scan)) {
String rowkey = Bytes.toString(rs.getRow());
System.out.print(rowkey+" ");
List<Cell> cells = rs.listCells();
for (Cell cell : cells) {
String value = Bytes.toString(CellUtil.cloneValue(cell));
System.out.print(value+" ");
}
System.out.println();
}
}
列值过滤器:ValueFilter
@Test
//过滤文科一班的学生
//ValueFilter
//ValueFilter是作用在每一个cell上,只有符合条件的cell才会保留
public void ValueFilter() throws IOException {
BinaryComparator binaryComparator = new BinaryComparator("文科一班".getBytes());
ValueFilter valueFilter = new ValueFilter(CompareOp.EQUAL, binaryComparator);
Scan scan = new Scan();
scan.setFilter(filter);
for (Result rs : students.getScanner(scan)) {
String rowkey = Bytes.toString(rs.getRow());
System.out.print(rowkey+" ");
List<Cell> cells = rs.listCells();
for (Cell cell : cells) {
String value = Bytes.toString(CellUtil.cloneValue(cell));
System.out.print(value+" ");
}
System.out.println();
}
}
3.完整代码
public class Demo4Filter {
Connection conn = null;
Admin admin = null;
Table students = null;
TableName name = TableName.valueOf("students");
public void useFilterAndPrint(Filter filter) throws IOException {
Scan scan = new Scan();
scan.setFilter(filter);
for (Result rs : students.getScanner(scan)) {
String rowkey = Bytes.toString(rs.getRow());
System.out.print(rowkey+" ");
List<Cell> cells = rs.listCells();
for (Cell cell : cells) {
String value = Bytes.toString(CellUtil.cloneValue(cell));
System.out.print(value+" ");
}
System.out.println();
}
}
@Before
public void init() {
Configuration conf = HBaseConfiguration.create();
conf.set("hbase.zookeeper.quorum","master:2181,node1:2181,node2:2181");
try {
conn = ConnectionFactory.createConnection(conf);
admin = conn.getAdmin();
students = conn.getTable(name);
} catch (IOException e) {
e.printStackTrace();
}
}
@Test
//通过RowFilter过滤器比rowkey 1500100009 小的所有值
public void BinaryComparatorFilter() throws IOException {
Scan scan = new Scan();
BinaryComparator binaryComparator = new BinaryComparator("1500100010".getBytes());
RowFilter rowFilter = new RowFilter(CompareOp.LESS, binaryComparator);
useFilterAndPrint(rowFilter);
}
@Test
//通过FamilyFilter查询列簇名包含c的所有列簇下面的数据
public void SubstringComparatorFilter() throws IOException {
Scan scan = new Scan();
SubstringComparator comparator = new SubstringComparator("c");
FamilyFilter familyFilter = new FamilyFilter(CompareOp.EQUAL, comparator);
// scan.setFilter(familyFilter);
// for (Result rs : students.getScanner(scan)) {
// String rowkey = Bytes.toString(rs.gcetRow());
// System.out.print(rowkey+" ");
// List<Cell> cells = rs.listCells();
// for (Cell cell : cells) {
// String value = Bytes.toString(CellUtil.cloneValue(cell));
// System.out.print(value+" ");
// }
// System.out.println();
// }
useFilterAndPrint(familyFilter);
}
@Test
//查询列名包含a的列 下面的所有的值
//QualifiterFilter SubstringComparator
public void QualifiterFilter() throws IOException {
SubstringComparator comparator = new SubstringComparator("a");
QualifierFilter qualifierFilter = new QualifierFilter(CompareOp.EQUAL, comparator);
useFilterAndPrint(qualifierFilter);
}
@Test
//过滤文科一班的学生
//ValueFilter
//ValueFilter是作用在每一个cell上,只有符合条件的cell才会保留
public void ValueFilter() throws IOException {
BinaryComparator binaryComparator = new BinaryComparator("文科一班".getBytes());
ValueFilter valueFilter = new ValueFilter(CompareOp.EQUAL, binaryComparator);
useFilterAndPrint(valueFilter);
}
@Test
//过滤文科一班的学生,并且返回学生的所有信息
//SingleColumnValueFilter
//会完整的返回整条数据
//在比较的时候需要指定列簇,如果数据中存在没有所指定的列簇的数据 在会保留并返回
//在比较的时候需要指定 列名,如果数据中存在没有指定的所有列名的数据 则也会保留并返回
public void SingleColumnValueFilter() throws IOException {
SingleColumnValueFilter singleColumnValueFilter = new SingleColumnValueFilter("info".getBytes()
, "clazz".getBytes()
, CompareOp.EQUAL
, "文科一班".getBytes()
);
useFilterAndPrint(singleColumnValueFilter);
}
@Test
//相对于SingleColumnValueFilter会将用于过滤的列值去除
public void SingleColumnValueExcludeFilter() throws IOException {
SingleColumnValueExcludeFilter singleColumnValueExcludeFilter = new SingleColumnValueExcludeFilter("info".getBytes()
, "clazz".getBytes()
, CompareOp.EQUAL
, "文科一班".getBytes()
);
useFilterAndPrint(singleColumnValueExcludeFilter);
}
@Test
//通过PrefixFilterFilter查询以150010008开头的所有前缀的rowkey
//相当于BinaryComparator 加上RowFilter
public void PrefixFilterFilter() throws IOException {
PrefixFilter prefixFilter = new PrefixFilter("150010008".getBytes());
useFilterAndPrint(prefixFilter);
}
@Test
//通过PageFilter查询第三页的数据,每页10条
public void PageFilter() throws IOException {
int page = 3;
int pageSize = 10;
//计算出当前页面的第一条数据的位置(相对于整个数据的第一条)
int current_page_first_size = (page - 1) * pageSize + 1; //21
// System.out.println(current_page_first_size);
PageFilter pageFilter = new PageFilter(current_page_first_size);
Scan scan = new Scan();
scan.setFilter(pageFilter);
byte[] rk = null;
for (Result result : students.getScanner(scan)) {
//取出当前页面的第一条数据的rowkey
rk = result.getRow();
}
PageFilter pageFilter1 = new PageFilter(pageSize);
Scan scan1 = new Scan();
scan1.withStartRow(rk);
scan1.setFilter(pageFilter1);
for (Result rs : students.getScanner(scan1)) {
String rowkey = Bytes.toString(rs.getRow());
System.out.print(rowkey+" ");
List<Cell> cells = rs.listCells();
for (Cell cell : cells) {
String value = Bytes.toString(CellUtil.cloneValue(cell));
System.out.print(value+" ");
}
System.out.println();
}
}
@Test
//通过合理的设计rowkey查询第五页的数据,每10条
//students表种的rowkey是从1500100001依次递增至1500100000
//所以第五页数据的rowkey范围可以直接计算出来
//1500100047~1500100050
public void PageFilterWithRowkey() throws IOException {
String rowkeyPrefix = "1500100000";
//五页,每页10条数据
int page = 5;
int pageSize = 10;
int current_page_start_row_offset = (page - 1) * pageSize + 1;
int i = Integer.parseInt(rowkeyPrefix);
int start_rowkey = i+current_page_start_row_offset;
int end_rowkey = start_rowkey+pageSize;
String start_rowkey_str = start_rowkey+" ";
String end_rowkey_str = end_rowkey+"";
Scan scan = new Scan();
scan.withStartRow(start_rowkey_str.getBytes());
scan.withStopRow(end_rowkey_str.getBytes());
for (Result rs : students.getScanner(scan)) {
String rowkey = Bytes.toString(rs.getRow());
System.out.print(rowkey+" ");
List<Cell> cells = rs.listCells();
for (Cell cell : cells) {
String value = Bytes.toString(CellUtil.cloneValue(cell));
System.out.print(value+" ");
}
System.out.println();
}
}
@Test
// 查询文科班中的学生中学号以150010008开头并且年龄小于23的学生信息
public void FilterListFilter() throws IOException {
PrefixFilter prefixFilter = new PrefixFilter("150010008".getBytes());
SingleColumnValueFilter singleColumnValueFilter = new SingleColumnValueFilter("info".getBytes(), "age".getBytes(), CompareOp.LESS,"23".getBytes());
SingleColumnValueFilter singleColumnValueFilter1 = new SingleColumnValueFilter("info".getBytes(), "clazz".getBytes(), CompareOp.EQUAL,"文科一班".getBytes());
//通过FilterList将多个过滤器组装起来
FilterList filterList = new FilterList();
filterList.addFilter(prefixFilter);
filterList.addFilter(singleColumnValueFilter);
useFilterAndPrint(filterList);
}
@After
public void close(){
try {
admin.close();
conn.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}