[size=xx-large][color=orange][b]Lucene_demo02_分词[/b][/color][/size]
/**
* 英文的分词器 中文的分词器
*/
public class AnalyzerTest {
/**
* 英文分词:(Lucene自带包)
* @throws Exception
*/
@Test
public void testEN() throws Exception {
String text = "Creates a searcher searching the index in the named directory";
Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_30);
this.testAnalyzer(analyzer, text);
}
/**
* 中文分词:单字分词器(Lucene自带包)
* @throws Exception
*/
@Test
public void testCH1() throws Exception {
String text = "LBJ和韦德能带领热火在2013赛季拿到NBA总冠军吗?";
Analyzer analyzer = new ChineseAnalyzer();
this.testAnalyzer(analyzer, text);
}
/**
* 中文分词:二分法分词器(Lucene自带包)
* @throws Exception
*/
@Test
public void testCH2() throws Exception {
String text = "LBJ和韦德能带领热火在2013赛季拿到NBA总冠军吗";
Analyzer analyzer = new CJKAnalyzer(Version.LUCENE_30);
this.testAnalyzer(analyzer, text);
}
/**
* 中文分词:IK分词器(Lucene自带包)
* @throws Exception
*/
@Test
public void testCH3() throws Exception {
String text = "fasd";
Analyzer analyzer = new IKAnalyzer();
this.testAnalyzer(analyzer, text);
}
/**
* 输出分词后的结果
* @param analyzer
* @param text
* @throws Exception
*/
private void testAnalyzer(Analyzer analyzer, String text) throws Exception {
TokenStream tokenStream = analyzer.tokenStream("content", new StringReader(text));
tokenStream.addAttribute(TermAttribute.class);
while (tokenStream.incrementToken()) {
TermAttribute termAttribute = tokenStream.getAttribute(TermAttribute.class);
System.out.println(termAttribute.term());
}
}
}