一、在web系统中难免会出现输入的数据中包含敏感词,这是可以有三种做法
1、把敏感词高亮显示
2、禁止提交,提示内容中包含敏感词
3、把敏感词转化为****
二、利用word-search这个算法实现敏感词的查找和定位 ,shiro实现拦截过滤
附:word-search百度云盘地址:链接:https://pan.baidu.com/s/1HVkjPo-qc8EtCeF0EY96OA 提取码:t4dv
三、实现过程
1、敏感词文件,文件中包含要过滤的敏感词,每个词语用英文逗号隔开:例(aaa,bbb,cc)
2、SensitiveWords 读取本地的敏感词文件
import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.List;
import com.shiro.oauth.util.Constans;
import com.tfc.analysis.KWSeeker;
import com.tfc.analysis.entity.Keyword;
import com.tfc.analysis.fragment.HTMLFragment;
import com.tfc.analysis.process.WordFinder;
public class SensitiveWords {
//本地敏感词文件的路径 根据自己的项目自行修改
private static String sensitiveWordsFilePath=Constans.WEBAPP_PATH+"/commonfile/sensitivewords.txt";
public static List<Keyword> list=new ArrayList<Keyword>();
static{
BufferedReader br;
String words="";
try {
br = new BufferedReader(new InputStreamReader(new FileInputStream(sensitiveWordsFilePath),"gbk"));
String line = null;
while ((line = br.readLine()) != null) {
words+=line;
}
String[] every=words.split(",");
for(int i=0;i<every.length;i++){
list.add(new Keyword(every[i]));
}
br.close();
} catch (Exception e) {
e.printStackTrace();
}
}
public boolean haveWords(String content){
KWSeeker kw1 = KWSeeker.getInstance(list);
if(kw1.findWords(content).size()>0){
return true;
}
return false;
}
public String findWords(String content){
KWSeeker kw1 = KWSeeker.getInstance(list);
// 使用默认的高亮方式将文本中含有上面指定的所有词显示出来!
kw1.highlight("这是test1,要注意哦!");
// 使用HTML页面加粗的高亮方式将文本中含有上面指定的所有词显示出来!
kw1.highlight("这是test2,要注意哦!", new HTMLFragment("<b>", "</b>"));
// 找出文本中所有含有上面词库中的词!
kw1.findWords("这是test1,要注意哦!test2");
// 使用指定的processor(如:WordFinder找出文本中所有含有上面词库中的词)对文本进行处理!
kw1.process(new WordFinder(), "这是test1,要注意哦!", null);
return kw1.highlight(content);
}
}
3、SensitivewordsExecption 异常类
import org.apache.shiro.ShiroException;
public class SensitivewordsExecption extends ShiroException {
/**
*
*/
private static final long serialVersionUID = 1L;
public SensitivewordsExecption() {};
public SensitivewordsExecption (String msg){
super(msg);
}
}
4、SensitiveWordsFilter 自定义敏感词过滤器(重点!!)
该过滤器继承PathMatchingFilter过滤器,对shiro过滤器不清楚的可以看一下详细的介绍 这里就不介绍了。上代码
import java.io.UnsupportedEncodingException;
import java.util.Enumeration;
import javax.servlet.ServletRequest;
import javax.servlet.ServletResponse;
import javax.servlet.http.HttpServletRequest;
import net.sf.json.JsonConfig;
import net.sf.json.util.CycleDetectionStrategy;
import org.apache.commons.lang.StringUtils;
import org.apache.shiro.ShiroException;
import org.apache.shiro.web.filter.PathMatchingFilter;
public class SensitiveWordsFilter extends PathMatchingFilter {
@Override
public boolean onPreHandle(ServletRequest request, ServletResponse response, Object mappedValue) throws ShiroException {
HttpServletRequest req=(HttpServletRequest) request;
try {
req.setCharacterEncoding("UTF-8");
} catch (UnsupportedEncodingException e1) {
e1.printStackTrace();
}
//获取
String strPath=req.getRequestURL().toString();
String queryString = req.getQueryString();
SensitiveWords sensitiveWords=new SensitiveWords();
//路径当中是否包含敏感词
if(sensitiveWords.haveWords(strPath.toLowerCase())){
throw new SensitivewordsExecption("你输入的内容中存在敏感词");
}
//get请求参数中是否包含敏感词
if(StringUtils.isNotEmpty(queryString)){
if(sensitiveWords.haveWords(queryString.toLowerCase())){
throw new SensitivewordsExecption("你输入的内容中存在敏感词");
}
}
//post请求中是否包含敏感词
Enumeration<String> enuma=req.getParameterNames();
for(Enumeration<String> e=enuma;enuma.hasMoreElements();){
String thisName=e.nextElement().toString();
String thisValue=request.getParameter(thisName);
if(sensitiveWords.haveWords(thisValue.toLowerCase())||sensitiveWords.haveWords(thisName.toLowerCase())){
throw new SensitivewordsExecption("你输入的内容中存在敏感词");
}
}
return true;
}
}
5、shiro的xml文件配置
<bean id="shiroFilter" class="org.apache.shiro.spring.web.ShiroFilterFactoryBean">
<property name="securityManager" ref="securityManager"/>
<property name="loginUrl" value="/login"/>
<property name="successUrl" value="/main"/>
<property name="filters">
<util:map>
<entry key="authc" value-ref="formAuthenticationFilter"/>
<!--敏感词过滤器添加到shiro中-->
<entry key="sensitiveWords" value-ref="sensitiveWordsFilter" />
</util:map>
</property>
<property name="filterChainDefinitions">
<value>
/css/** = anon
/fonts/** = anon
/js/** = anon
/images/** = anon
/sysImages/** = anon
/front/**= anon
/login = authc
/logout = logout
/captcha = anon
<!--敏感词拦截的地址-->
/saveContent=sensitiveWords,authc
/back/** = authc
</value>
</property>
</bean>
<!--敏感词过滤器添加到容器中-->
<bean id="sensitiveWordsFilter" class="com.shiro.security.SensitiveWordsFilter"/>
6、对敏感词过滤器抛出的SensitivewordsExecption进行捕捉处理
<error-page>
<exception-type>com.shiro.security.SensitivewordsExecption</exception-type>
<location>/WEB-INF/view/error/words.html</location>
</error-page>
简单的实现了shiro敏感词过滤拦截,具体情况可以按照实际的业务需求进行处理。