package demo;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.Date;
import java.util.Enumeration;
import java.util.Properties;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
/*
* 利用正在表达式查找敏感词
*/
public class KeyWordFilter {
private static Pattern pattern = null;
public static void initPattern(){
StringBuffer patternBuf = new StringBuffer("");
try{
InputStream in = KeyWordFilter.class.getClassLoader().getResourceAsStream("word.txt");
Properties pro = new Properties();
pro.load(in);
Enumeration enu = pro.propertyNames();
while(enu.hasMoreElements()){
patternBuf.append((String)enu.nextElement()+"|");
}
String[] strarr = {"你好","死吧"};
for(int i=0; i<strarr.length;i++){
patternBuf.append(strarr[i]+"|");
}
//去掉最后一个'|'
patternBuf.deleteCharAt(patternBuf.length()-1);
//unix换成UTF-8 win下换成gb2312
//匹配文件夹下的txt文件的时候 用这个
// pattern = Pattern.compile(new String(patternBuf.toString().getBytes("ISO-8859-1"), "gbk"),Pattern.CASE_INSENSITIVE); //编译一个正则表达式,同时生成Pattern
pattern = Pattern.compile(patternBuf.toString(),Pattern.CASE_INSENSITIVE);
}catch(Exception ioEx){
ioEx.printStackTrace();
}
}
public static String doFilter(String str){
try {
Matcher m = pattern.matcher(str); //匹配到得用去掉
str = m.replaceAll("****");
}catch (Exception e){
e.printStackTrace();
}
return str;
}
public static String FileReaderAll(String FileName, String charset)throws IOException {
BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(FileName), charset));
String line = new String();
String temp = new String();
while ((line = reader.readLine()) != null) {
temp += line;
}
reader.close();
return temp;
}
public static void main(String[] args) throws IOException {
initPattern(); //初始化pattern
String temp = null;
//文件夹下的所有txt文件
// File fileDir = new File("e://test");
// File[] textFiles = fileDir.listFiles();
// for (int i=0; i<textFiles.length; i++){
// if(textFiles[i].isFile()&&textFiles[i].getName().endsWith(".txt")){
// temp = FileReaderAll(textFiles[i].getCanonicalPath(),"gbk");
// }
// }
//
temp = "生存必须要有几个经济实力雄厚死吧的你好大藏家做支撑,而不是靠零";
String newcoment = KeyWordFilter.doFilter(temp);
System.out.println(newcoment);
}
}