import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.apache.lucene.analysis.miscellaneous.LengthFilter;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import java.io.IOException;
/**
* Created by kangz on 2016/12/16.
* 自定义Analyzer实现扩展停用词
*/
public class MyAnalzerlen extends Analyzer {
public MyAnalzerlen(int len) {
this.len = len;
}
private int len; //过虑的长度
public int getLen() {
return len;
}
public void setLen(int len) {
this.len = len;
}
@Override
protected TokenStreamComponents createComponents(String s) {
Tokenizer source = new WhitespaceTokenizer();
TokenStream tokenStream = new LengthFilter(source, len, 6);// 分词器 最小长度开区间 最大长度闭区间
return new TokenStreamComponents(source, tokenStream);
//return new TokenStreamComponents(source, new StopFilter(source, stopWordSet));
}
public static void main(String[] args) throws Exception {
//把长度小于2的过滤掉,开区间
Analyzer analyzer = new MyAnalzerlen(2);
String words = "I am a java coder ! jjjuje !";
TokenStream stream = analyzer.tokenStream("myfield", words);
try {
stream.reset();
CharTermAttribute offsetAtt = stream.addAttribute(CharTermAttribute.class);
while (stream.incrementToken()) {
System.out.println(offsetAtt.toString());
}
stream.end();
stream.close();
} catch (IOException e) {
}
}
}
TokenStream tokenStream = new LengthFilter(source, len, 6);// 分词器 最小长度开区间 最大长度闭区间 重点的一句话
下面是小编的微信转帐二维码,小编再次谢谢读者的支持,小编会更努力的
----请看下方↓↓↓↓↓↓↓
百度搜索 Drools从入门到精通:可下载开源全套Drools教程
深度Drools教程不段更新中:
更多Drools实战陆续发布中………
扫描下方二维码关注公众号 ↓↓↓↓↓↓↓↓↓↓