import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.junit.Test;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
/**
* Created by kangz on 2016/12/16.
*/
public class AnalyzersTokenFilter {
@Test
public void test() throws IOException {
String text = "Hi, Dr Wang, Mr Liu asks if you stay with Mrs Liu yesterday!";
Analyzer analyzer = new WhitespaceAnalyzer();
CourtesyTitleFilter filter = new CourtesyTitleFilter(analyzer.tokenStream("text", text));
CharTermAttribute charTermAttribute = filter.addAttribute(CharTermAttribute.class);
filter.reset();
while (filter.incrementToken()) {
System.out.print(charTermAttribute + " ");
}
}
/**
* 自定义词扩展过滤器
*/
class CourtesyTitleFilter extends TokenFilter {
Map<String, String> courtesyTitleMap = new HashMap<>();
private CharTermAttribute termAttribute;
/**
* 构造一个过滤给定输入的令牌流。
*
* @param input
*/
protected CourtesyTitleFilter(TokenStream input) {
super(input);
termAttribute = addAttribute(CharTermAttribute.class);
courtesyTitleMap.put("Dr", "doctor");
courtesyTitleMap.put("Mr", "mister");
courtesyTitleMap.put("Mrs", "miss");
}
@Override
public final boolean incrementToken() throws IOException {
if (!input.incrementToken()) {
return false;
}
String small = termAttribute.toString();
if (courtesyTitleMap.containsKey(small)) {
termAttribute.setEmpty().append(courtesyTitleMap.get(small));
}
return true;
}
}
}
//这段代码是参考 其他博客中的代码所写的,上面已经写的挺清楚了,所以我这里就不在啰嗦啦
下面是小编的微信转帐二维码,小编再次谢谢读者的支持,小编会更努力的
----请看下方↓↓↓↓↓↓↓
百度搜索 Drools从入门到精通:可下载开源全套Drools教程
深度Drools教程不段更新中:
更多Drools实战陆续发布中………
扫描下方二维码关注公众号 ↓↓↓↓↓↓↓↓↓↓