下载ansj jar
<dependency>
<groupId>org.ansj</groupId>
<artifactId>ansj_seg</artifactId>
<version>0.9</version>
</dependency>
java版本
List<Term> parse = NlpAnalysis.parse("山东威海隧道发生交通事故 造成9死1伤3人失踪");new NatureRecognition(parse).recognition(); //词性标注HashMap<String, String> strHashMap = new HashMap<String, String>();//读取停词文件String stopWordTable = "F://360downloads/StopWordTable.txt";File f = new File(stopWordTable);try { FileInputStream fileInputStream = new FileInputStream(f);//读入停用词文件 BufferedReader StopWordFileBr = new BufferedReader(new InputStreamReader(fileInputStream, "GBK")); String stopWord = null; for (; (stopWord = StopWordFileBr.readLine()) != null; ) { strHashMap.put(stopWord, "_stop"); } StopWordFileBr.close(); FilterModifWord.setUpdateDic(strHashMap); List<Term> term = FilterModifWord.modifResult(parse); //根据词性做一些业务上的判断 List list = new ArrayList(); for(int i=0;i<term.size();i++){ String word = term.get(i).getName(); //拿到词 String nature=term.get(i).getNatrue().toString().split(":")[0];//拿到词性 // System.out.println(word+":"+nature); if(nature.equals("d")){ if((term.get(i+1).getNatrue().toString().split(":")[0]).equals("a")){ String w = word+term.get(i+1).getName(); list.add(w); } }else{ if(i>0 && (term.get(i-1).getNatrue().toString().split(":")[0]).equals("d") && nature.equals("a")){ System.out.println("移除这一条数据"); }else{ list.add(word); } } } for(int i=0;i<list.size();i++){ System.out.println(list.get(i)); }} catch (Exception e) { e.printStackTrace();}
scala版本
val parse = ToAnalysis.parse("山东威海隧道发生交通事故 造成9死1伤3人失踪
")//new NatureRecognition(parse).recognition(); //词性标注val strHashMap = new util.HashMap[String, String]val file = Source.fromFile("F://360downloads/StopWordTable.txt", "GBK")for (line <- file.getLines) { strHashMap.put(line, "_stop")}file.closeFilterModifWord.setUpdateDic(strHashMap)val term = FilterModifWord.modifResult(parse)System.out.println(term)