package test;
import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.io.StringReader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.wltea.analyzer.lucene.IKAnalyzer;
/**
*
* @author Administrator
* @测试分词器
*/
public class TestAnalyzer {
public static void main(String[] args)throws Exception{
Analyzer ik=new IKAnalyzer();
String text2="我们是中国人举行了2008年8月8日北京奥林匹克运动会我们是中国人举行了2008年8月8日北京奥林匹克运动会";
//读取文件
// text2=readTxt("D:\\note.txt");
//十词=叠加字符串
StringBuffer sb=new StringBuffer();
for(int i=0;i<10;i++){
sb.append(text2);
}
//分词
testAanlyzer(ik,sb.toString());
}
public static void testAanlyzer (Analyzer analyzer,String text)throws Exception{
long start=System.currentTimeMillis();
TokenStream ts=analyzer.tokenStream("content",new StringReader(text));
CharTermAttribute term=ts.getAttribute(CharTermAttribute.class);
System.out.println("分词效果如下:");
int i=0;
while(ts.incrementToken()){
i++;
System.out.println(new String(term.buffer(),0,term.length()));
}
long usetime=System.currentTimeMillis()-start;
System.out.println("共分词="+i+",共耗时="+usetime+"毫秒。");
}
public String readTxt(String path){
StringBuffer text = new StringBuffer("");
String tex = null;
BufferedReader read = null;
try {
read = new BufferedReader(new FileReader(path));
while ((tex = read.readLine()) != null) {
text.append(tex + "\n");
}
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} finally {
if (read != null) {
try {
read.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
//System.out.println(text);
return text.toString();
}
}