/*
*param 分词
*/
public List getname(String param) throws IOException{
//分词(庖丁解牛分词法)
Analyzer ika = new PaodingAnalyzer();
List<String> keys = new ArrayList<String>();
TokenStream ts = null;
try{
Reader r = new StringReader(param);
ts = ika.tokenStream("TestField", r);
TermAttribute termAtt = (TermAttribute) ts.getAttribute(TermAttribute.class);
TypeAttribute typeAtt = (TypeAttribute) ts.getAttribute(TypeAttribute.class);
String key = null;
while (ts.incrementToken()) {
if ("word".equals(typeAtt.type())) {
key = termAtt.term();
if (key.length() >= 2) {
keys.add(key);
}
}
}
}catch(IOException e){
e.printStackTrace();
} finally {
if (ts != null) {
ts.close();
}
}
Map<String, Integer> keyMap = new HashMap<String, Integer>();
Integer $ = null;
//计算每个词出现的次数
for (String key : keys) {
keyMap.put(key, ($ = keyMap.get(key)) == null ? 1 : $ + 1);
}
List<Map.Entry<String, Integer>> keyList = new ArrayList<Map.Entry<String, Integer>>(keyMap.entrySet());
//进行排序
Collections.sort(keyList, new Comparator<Map.Entry<String, Integer>>() {
public int compare(Map.Entry<String, Integer> o1, Map.Entry<String, Integer> o2) {
return (o2.getValue() - o1.getValue());
}
});
//取出关键词
String id = null;
String str = "";
List list = new ArrayList();
if(keyList.size() >0){
for (int i = 0;i < keyList.size(); i++) {
id = keyList.get(i).toString();
String[] strs = id.split("\\=");
str = strs[0];
list.add(strs[0]);
System.out.println("id:"+id);
}
}
return list;
}
lucene Analyzer 庖丁解牛 中文分词
最新推荐文章于 2025-08-10 14:29:07 发布