- 博客是hbase使用filter快速高效查询的方法,我会慢慢补齐
几大Filters
1、Comparision Filters
1.1 RowFilter
1.2 FamilyFilter
1.3 QualifierFilter
1.4 ValueFilter
1.5 DependentColumnFilter
2、Dedicated Filters
2.1 SingleColumnValueFilter
2.2 SingleColumnValueExcludeFilter
2.3 PrefixFilter
2.4 PageFilter
2.5 KeyOnlyFilter
2.6 FirstKeyOnlyFilter
2.7 TimestampsFilter
2.8 RandomRowFilter
3、Decorating Filters
3.1 SkipFilter
3.2 WhileMatchFilters
一个简单的示例 SingleColumnValueFilter
- publicstaticvoidselectByFilter(Stringtablename,List<String>arr)throwsIOException{
- HTabletable=newHTable(hbaseConfig,tablename);
- FilterListfilterList=newFilterList();
- Scans1=newScan();
- for(Stringv:arr){//各个条件之间是“与”的关系
- String[]s=v.split(",");
- filterList.addFilter(newSingleColumnValueFilter(Bytes.toBytes(s[0]),
- Bytes.toBytes(s[1]),
- CompareOp.EQUAL,Bytes.toBytes(s[2])
- )
- );
- //添加下面这一行后,则只返回指定的cell,同一行中的其他cell不返回
- //s1.addColumn(Bytes.toBytes(s[0]),Bytes.toBytes(s[1]));
- }
- s1.setFilter(filterList);
- ResultScannerResultScannerFilterList=table.getScanner(s1);
- for(Resultrr=ResultScannerFilterList.next();rr!=null;rr=ResultScannerFilterList.next()){
- for(KeyValuekv:rr.list()){
- System.out.println("row:"+newString(kv.getRow()));
- System.out.println("column:"+newString(kv.getColumn()));
- System.out.println("value:"+newString(kv.getValue()));
- }
- }
- }
MultipleColumnPrefixFilter
api上介绍如下
- Thisfilterisusedforselectingonlythosekeyswithcolumnsthatmatchesaparticularprefix.Forexample,ifprefixis'an',itwillpasskeyswillcolumnslike'and','anti'butnotkeyswithcolumnslike'ball','act'.
构造方法如下
- publicMultipleColumnPrefixFilter(byte[][]prefixes)
传入多个prefix
源码里说明如下
- publicMultipleColumnPrefixFilter(finalbyte[][]prefixes){
- if(prefixes!=null){
- for(inti=0;i<prefixes.length;i++){
- if(!sortedPrefixes.add(prefixes[i]))
- thrownewIllegalArgumentException("prefixesmustbedistinct");
- }
- }
- }
示例代码如下:是我从网上找的,看了,没啥难理解的,
- +publicclassTestMultipleColumnPrefixFilter{
- +
- +privatefinalstaticHBaseTestingUtilityTEST_UTIL=new
- +HBaseTestingUtility();
- +
- +@Test
- +publicvoidtestMultipleColumnPrefixFilter()throwsIOException{
- +Stringfamily="Family";
- +HTableDescriptorhtd=newHTableDescriptor("TestMultipleColumnPrefixFilter");
- +htd.addFamily(newHColumnDescriptor(family));
- +//HRegionInfoinfo=newHRegionInfo(htd,null,null,false);
- +HRegionInfoinfo=newHRegionInfo(htd.getName(),null,null,false);
- +HRegionregion=HRegion.createHRegion(info,HBaseTestingUtility.
- +getTestDir(),TEST_UTIL.getConfiguration(),htd);
- +
- +List<String>rows=generateRandomWords(100,"row");
- +List<String>columns=generateRandomWords(10000,"column");
- +longmaxTimestamp=2;
- +
- +List<KeyValue>kvList=newArrayList<KeyValue>();
- +
- +Map<String,List<KeyValue>>prefixMap=newHashMap<String,
- +List<KeyValue>>();
- +
- +prefixMap.put("p",newArrayList<KeyValue>());
- +prefixMap.put("q",newArrayList<KeyValue>());
- +prefixMap.put("s",newArrayList<KeyValue>());
- +
- +StringvalueString="ValueString";
- +
- +for(Stringrow:rows){
- +Putp=newPut(Bytes.toBytes(row));
- +for(Stringcolumn:columns){
- +for(longtimestamp=1;timestamp<=maxTimestamp;timestamp++){
- +KeyValuekv=KeyValueTestUtil.create(row,family,column,timestamp,
- +valueString);
- +p.add(kv);
- +kvList.add(kv);
- +for(Strings:prefixMap.keySet()){
- +if(column.startsWith(s)){
- +prefixMap.get(s).add(kv);
- +}
- +}
- +}
- +}
- +region.put(p);
- +}
- +
- +MultipleColumnPrefixFilterfilter;
- +Scanscan=newScan();
- +scan.setMaxVersions();
- +byte[][]filter_prefix=newbyte[2][];
- +filter_prefix[0]=newbyte[]{'p'};
- +filter_prefix[1]=newbyte[]{'q'};
- +
- +filter=newMultipleColumnPrefixFilter(filter_prefix);
- +scan.setFilter(filter);
- +List<KeyValue>results=newArrayList<KeyValue>();
- +InternalScannerscanner=region.getScanner(scan);
- +while(scanner.next(results));
- +assertEquals(prefixMap.get("p").size()+prefixMap.get("q").size(),results.size());
- +}
- +
- +@Test
- +publicvoidtestMultipleColumnPrefixFilterWithManyFamilies()throwsIOException{
- +Stringfamily1="Family1";
- +Stringfamily2="Family2";
- +HTableDescriptorhtd=newHTableDescriptor("TestMultipleColumnPrefixFilter");
- +htd.addFamily(newHColumnDescriptor(family1));
- +htd.addFamily(newHColumnDescriptor(family2));
- +HRegionInfoinfo=newHRegionInfo(htd.getName(),null,null,false);
- +HRegionregion=HRegion.createHRegion(info,HBaseTestingUtility.
- +getTestDir(),TEST_UTIL.getConfiguration(),htd);
- +
- +List<String>rows=generateRandomWords(100,"row");
- +List<String>columns=generateRandomWords(10000,"column");
- +longmaxTimestamp=3;
- +
- +List<KeyValue>kvList=newArrayList<KeyValue>();
- +
- +Map<String,List<KeyValue>>prefixMap=newHashMap<String,
- +List<KeyValue>>();
- +
- +prefixMap.put("p",newArrayList<KeyValue>());
- +prefixMap.put("q",newArrayList<KeyValue>());
- +prefixMap.put("s",newArrayList<KeyValue>());
- +
- +StringvalueString="ValueString";
- +
- +for(Stringrow:rows){
- +Putp=newPut(Bytes.toBytes(row));
- +for(Stringcolumn:columns){
- +for(longtimestamp=1;timestamp<=maxTimestamp;timestamp++){
- +doublerand=Math.random();
- +KeyValuekv;
- +if(rand<0.5)
- +kv=KeyValueTestUtil.create(row,family1,column,timestamp,
- +valueString);
- +else
- +kv=KeyValueTestUtil.create(row,family2,column,timestamp,
- +valueString);
- +p.add(kv);
- +kvList.add(kv);
- +for(Strings:prefixMap.keySet()){
- +if(column.startsWith(s)){
- +prefixMap.get(s).add(kv);
- +}
- +}
- +}
- +}
- +region.put(p);
- +}
- +
- +MultipleColumnPrefixFilterfilter;
- +Scanscan=newScan();
- +scan.setMaxVersions();
- +byte[][]filter_prefix=newbyte[2][];
- +filter_prefix[0]=newbyte[]{'p'};
- +filter_prefix[1]=newbyte[]{'q'};
- +
- +filter=newMultipleColumnPrefixFilter(filter_prefix);
- +scan.setFilter(filter);
- +List<KeyValue>results=newArrayList<KeyValue>();
- +InternalScannerscanner=region.getScanner(scan);
- +while(scanner.next(results));
- +assertEquals(prefixMap.get("p").size()+prefixMap.get("q").size(),results.size());
- +}
- +
- +@Test
- +publicvoidtestMultipleColumnPrefixFilterWithColumnPrefixFilter()throwsIOException{
- +Stringfamily="Family";
- +HTableDescriptorhtd=newHTableDescriptor("TestMultipleColumnPrefixFilter");
- +htd.addFamily(newHColumnDescriptor(family));
- +HRegionInfoinfo=newHRegionInfo(htd.getName(),null,null,false);
- +HRegionregion=HRegion.createHRegion(info,HBaseTestingUtility.
- +getTestDir(),TEST_UTIL.getConfiguration(),htd);
- +
- +List<String>rows=generateRandomWords(100,"row");
- +List<String>columns=generateRandomWords(10000,"column");
- +longmaxTimestamp=2;
- +
- +StringvalueString="ValueString";
- +
- +for(Stringrow:rows){
- +Putp=newPut(Bytes.toBytes(row));
- +for(Stringcolumn:columns){
- +for(longtimestamp=1;timestamp<=maxTimestamp;timestamp++){
- +KeyValuekv=KeyValueTestUtil.create(row,family,column,timestamp,
- +valueString);
- +p.add(kv);
- +}
- +}
- +region.put(p);
- +}
- +
- +MultipleColumnPrefixFiltermultiplePrefixFilter;
- +Scanscan1=newScan();
- +scan1.setMaxVersions();
- +byte[][]filter_prefix=newbyte[1][];
- +filter_prefix[0]=newbyte[]{'p'};
- +
- +multiplePrefixFilter=newMultipleColumnPrefixFilter(filter_prefix);
- +scan1.setFilter(multiplePrefixFilter);
- +List<KeyValue>results1=newArrayList<KeyValue>();
- +InternalScannerscanner1=region.getScanner(scan1);
- +while(scanner1.next(results1));
- +
- +ColumnPrefixFiltersinglePrefixFilter;
- +Scanscan2=newScan();
- +scan2.setMaxVersions();
- +singlePrefixFilter=newColumnPrefixFilter(Bytes.toBytes("p"));
- +
- +scan2.setFilter(singlePrefixFilter);
- +List<KeyValue>results2=newArrayList<KeyValue>();
- +InternalScannerscanner2=region.getScanner(scan1);
- +while(scanner2.next(results2));
- +
- +assertEquals(results1.size(),results2.size());
- +}
- +
- +List<String>generateRandomWords(intnumberOfWords,Stringsuffix){
- +Set<String>wordSet=newHashSet<String>();
- +for(inti=0;i<numberOfWords;i++){
- +intlengthOfWords=(int)(Math.random()*2)+1;
- +char[]wordChar=newchar[lengthOfWords];
- +for(intj=0;j<wordChar.length;j++){
- +wordChar[j]=(char)(Math.random()*26+97);
- +}
- +Stringword;
- +if(suffix==null){
- +word=newString(wordChar);
- +}else{
- +word=newString(wordChar)+suffix;
- +}
- +wordSet.add(word);
- +}
- +List<String>wordList=newArrayList<String>(wordSet);
- +returnwordList;
- +}
- +}
- +
- .
ColumnPrefixFilter
- publicclassColumnPrefixFilterextendsFilterBaseThisfilterisusedforselectingonlythosekeyswithcolumnsthatmatchesaparticularprefix.Forexample,ifprefixis'an',itwillpasskeyswillcolumnslike'and','anti'butnotkeyswithcolumnslike'ball','act'.
上面是类的说明
只有一个有参构造 ColumnPrefixFilter(byte[]prefix)
这个类用法很简单,就是匹配前缀是prefix的rowkey,但是,不知道大家用了之后有什么感觉,我是用了,但是不起作用,有起作用的大牛告诉我下。
无奈之下,只好选择PrefixFilter
PrefixFilter
类说明 :
Pass results that have same row prefix.
同样的构造方法,跟ColumnPrefixFilter一模一样,用法也相同,
基本上几个Filter就是这些了,慢慢的我再更新这个文章
上段代码,我自己写的,使用中的代码
- publicstaticStringgetKeywordTableRowkeyUseFilter(StringfilterString1,StringfilterString2){
- FilterListfilterList=newFilterList();
- StringrowkeyValue="";
- Scans1=newScan();
- String[]sf1=filterString1.split(",");
- filterList.addFilter(newSingleColumnValueFilter(Bytes.toBytes(sf1[0]),
- Bytes.toBytes(sf1[1]),
- CompareOp.EQUAL,Bytes.toBytes(sf1[2])
- ));
- String[]sf2=filterString2.split(",");
- filterList.addFilter(newSingleColumnValueFilter(Bytes.toBytes(sf2[0]),
- Bytes.toBytes(sf2[1]),
- CompareOp.EQUAL,Bytes.toBytes(sf2[2])
- ));
- filterList.addFilter(newColumnPrefixFilter(Bytes.toBytes("3274980668:")));
- filterList.addFilter(newPrefixFilter(Bytes.toBytes("3274980668:")));
- s1.setFilter(filterList);
- ResultScannerResultScannerFilterList;
- try{
- ResultScannerFilterList=tableKeyword.getScanner(s1);
- for(Resultrr=ResultScannerFilterList.next();rr!=null;rr=ResultScannerFilterList.next()){
- StringrowkeyValueTmp=newString(rr.getRow());
- rowkeyValue=rowkeyValue+"##"+rowkeyValueTmp;
- }
- }catch(IOExceptione){
- //TODOAuto-generatedcatchblock
- e.printStackTrace();
- }
- log.warn("rowkeyValue"+rowkeyValue);
- returnrowkeyValue;
- }
PrefixFilter和ColumnPrefixFilter的用法几乎一样,但是在开发中,建议使用PrefixFilter