影响分析器选择的一个原因是语种另一个原因是被分析的文本所属的域
import java.io.*;
public interface SynonymEngine {
String[] getSynonyms(String s) throws IOException;
}
import java.util.*;
public class TestSynonymEngine implements SynonymEngine{
private static HashMap<String,String[]> map =
new HashMap<String,String[]>();
static{
map.put("quick", new String[] {"fast","speeedy"});
map.put("jumps", new String[]{"leaps","hops"});
map.put("over", new String[] {"above"});
map.put("lazy", new String[] {"apathetic","sluggish"});
map.put("dog", new String[] {"canine","pooch"});
}
public String[] getSynonyms(String s){
return map.get(s);
}
}
import java.io.*;
import org.apache.lucene.analysis.*;
import org.apache.lucene.analysis.core.*;
import org.apache.lucene.analysis.standard.*;
public class SynonymAnalyzer extends Analyzer{
private SynonymEngine engine;
public SynonymAnalyzer(SynonymEngine engine){
this.engine = engine;
}
protected TokenStreamComponents createComponents(String text) {
Tokenizer tokenizer = new StandardTokenizer();
TokenStream tokenStream = new SynonymFilter(tokenizer, engine);
tokenStream = new LowerCaseFilter(tokenStream);
tokenStream = new StopFilter(tokenStream,StopAnalyzer.ENGLISH_STOP_WORDS_SET);
return new TokenStreamComponents(tokenizer, tokenStream);
}
}
实现createComponents方法,创建分析器链
Tokenizer通过Reader读取字符并创建语汇单元,而TokenFilter则负责处理输入的语汇单元
import org.apache.lucene.analysis.*;
public class SynonymAnalyzerTest {
public static void main(String[] args) throws Exception
{
String text = "The quick brown fox jumps over the lazy dog";
Analyzer analyzer = new SynonymAnalyzer(new TestSynonymEngine());
AnalyzerDome.displayTokens(analyzer, text);
}
}