使用filter统一对敏感词的处理,
包含两种方式处理
a: 替换敏感词
b: 包含敏感词禁止提交
1、初始化 词库信息
package com.common.sensitiveword;
import lombok.extern.slf4j.Slf4j;
import org.springframework.util.ResourceUtils;
import java.io.*;
import java.util.*;
import java.util.concurrent.ConcurrentHashMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@Slf4j
public class InitSensitiveWord {
/**
* 敏感词文件默认编码格式
*/
public static final String DEFAULT_ENCODING = "utf-8";
/**
* 敏感词DFA树关系标记key
*/
private final String IS_END = "isEnd";
/**
* 不是敏感词的最后一个字符
*/
private final String END_FALSE = "0";
/**
* 是敏感词的最后一个字符
*/
private final String END_TRUE = "1";
/**
* 敏感词文件存放路径
*/
private final String SENSITIVE_WORD_FILE_PATH = "sensitiveWord" + File.separator;
/**
* 忽略特殊字符的正则表达式
*/
private final String IGNORE_SPECIAL_CHAR_REGEX = "[`~!@#$%^&*()+=|{}':;',\\\\[\\\\].<>/?~!@#¥%……&*()——+|{}【】‘;:”“’。,、?]|\\s*";
Pattern pattern = Pattern.compile(IGNORE_SPECIAL_CHAR_REGEX);
/**
* 词库信息
*/
public static Map sensitiveWordMap;
public InitSensitiveWord() {
super();
}
/**
* 初始化词库
*
* @return 敏感词
*/
public Map initKeyword() {
//读取敏感词库
Set keyWordSet = readSensitiveWordFile();
// 将敏感词读入hashMap
addSensitiveWordToHashMap(keyWordSet);
// 返回敏感词
return sensitiveWordMap;
}
/**
* 读取敏感词库
*
* @return 敏感词
*/
private Set readSensitiveWordFile() {
Set set = new HashSet();
set.add("日本鬼子");
set.add("傻逼");
set.add("大傻子");
return set;
}
/**
* 获取文件夹路径
*
* @return 文件夹下所有txt
*/
public void initSensitiveWords() throws FileNotFoundException {
sensitiveWordMap = new ConcurrentHashMap();
File dir = new File(getFilePath() + SENSITIVE_WORD_FILE_PATH);
if (dir.isDirectory() && dir.exists()) {
for (File file : dir.listFiles()) {
createDFATree(readSensitiveWordFileToSet(file));
log.info(String.format("将敏感词文件加载到DFA树列表成功{%s}", file));
}
log.info(String.format("总共构建%s棵DFA敏感词树", sensitiveWordMap.size()));
} else {
throw new RuntimeException(String.format("敏感词文件目录不存在{%s}", dir));
}
}
/**
* 读取文件中的敏感词
*
* @param file 敏感词文件
* @return 敏感词set集合
*/
private Set<String> readSensitiveWordFileToSet(File file) {
Set<String> words = new HashSet<>();
if (file.exists()) {
BufferedReader reader = null;
try {
reader = new BufferedReader(new InputStreamReader(new FileInputStream(file), DEFAULT_ENCODING));
String line = "";
while ((line = reader.readLine()) != null) {
words.add(line.trim());
}
} catch (Exception e) {
e.printStackTrace();
} finally {
if (reader != null) {
try {
reader.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
log.info(String.format("从文件{%s}读取到{%s}个敏感词", file, words.size()));
return words;
}
/**
* 将敏感词构建成DFA树
* {
* 出={
* isEnd=0,
* 售={
* isEnd=0,
* 手={
* isEnd=0,
* 刀={
* isEnd=1
* }
* },
* 军={
* isEnd=0,
* 刀={
* isEnd=1
* }
* }
* }
* }
* }
*
* @param sensitiveWords 敏感词列表
*/
private void createDFATree(Set<String> sensitiveWords) {
Iterator<String> it = sensitiveWords.iterator();
while (it.hasNext()) {
String word = it.next();
Map currentMap = sensitiveWordMap;
for (int i = 0; i < word.length(); i++) {
char key = word.charAt(i);
if (isIgnore(key)) {
continue;
}
Object oldValueMap = currentMap.get(key);
if (oldValueMap == null) {
// 不存在以key字符的DFA树则需要创建一个
Map newValueMap = new ConcurrentHashMap();
newValueMap.put(IS_END, END_FALSE);
currentMap.put(key, newValueMap);
currentMap = newValueMap;
} else {
currentMap = (Map) oldValueMap;
}
if (i == word.length() - 1) {
// 给最后一个字符添加结束标识
currentMap.put(IS_END, END_TRUE);
}
}
}
}
/**
* 读取敏感词库中的内容,将内容添加到set集合中
*/
private Set readSensitiveWordFileFromFile() throws IOException {
String filePath = getFilePath();
Set set = null;
InputStreamReader read = null;
try {
//读取文件
File file = new File(filePath + "sensitiveword/SensitiveWord.txt");
read = new InputStreamReader(new FileInputStream(file), DEFAULT_ENCODING);
if (file.isFile() && file.exists()) {
//文件流是否存在
set = new HashSet();
BufferedReader bufferedReader = new BufferedReader(read);
String txt = null;
while ((txt = bufferedReader.readLine()) != null) {
//读取文件,将文件内容放入到set中
set.add(txt);
}
} else {
//不存在抛出异常信息
throw new RuntimeException("敏感词库文件不存在");
}
} finally {
if (null != read) {
//关闭文件流
read.close();
}
}
return set;
}
/**
* 将敏感词读入hashMap
*
* @param keyWordSet 敏感词
*/
private void addSensitiveWordToHashMap(Set<String> keyWordSet) {
//初始化敏感词容器,减少扩容操作
sensitiveWordMap = new ConcurrentHashMap(keyWordSet.size());
String key = null;
Map nowMap = null;
Map newWorMap = null;
//迭代keyWordSet
Iterator iterator = keyWordSet.iterator();
while (iterator.hasNext()) {
// 关键字
key = (String) iterator.next();
nowMap = sensitiveWordMap;
for (int i = 0; i < key.length(); i++) {
// 转换成char型
char keyChar = key.charAt(i);
// 获取
Object wordMap = nowMap.get(keyChar);
// 如果存在该key,直接赋值
if (wordMap != null) {
nowMap = (Map) wordMap;
} else {
// 不存在则,则构建一个map,同时将isEnd设置为0,因为他不是最后一个
newWorMap = new HashMap();
// 不是最后一个
newWorMap.put("isEnd", "0");
nowMap.put(keyChar, newWorMap);
nowMap = newWorMap;
}
if (i == key.length() - 1) {
//最后一个
nowMap.put("isEnd", "1");
}
}
}
}
/**
* 判断是否是要忽略的字符(忽略所有特殊字符以及空格)
*
* @param specificChar 指定字符
* @return 特殊字符或空格true否则false
*/
private boolean isIgnore(char specificChar) {
Matcher matcher = pattern.matcher(String.valueOf(specificChar));
return matcher.matches();
}
/**
* 获取项目文件路径
*
* @return 目录路径
* @throws FileNotFoundException
*/
private String getFilePath() throws FileNotFoundException {
//判断系统环境
String osName = System.getProperty("os.name");
log.info("-------系统环境 {} --------", osName);
String filePath = null;
if (osName.startsWith("Windows")) {
// windows
filePath = ResourceUtils.getURL("classpath:").getPath();
} else {
// unix or linux
filePath = System.getProperty("user.dir") + "/";
}
log.info("-------------文件路径 {} ----------", filePath);
return filePath;
}
}
2、敏感词 帮助类
package com.common.sensitiveword;
import java.io.FileNotFoundException;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
public class SensitivewordUtils {
/**
* 最小匹配规则
*/
public static final Integer MIN_MATCH_TYPE = 0;
/**
* 最大匹配规则
*/
public static final Integer MAX_MATCH_TYPE = -1;
static {
// 初始化数据
try {
new InitSensitiveWord().initSensitiveWords();
} catch (FileNotFoundException e) {
e.printStackTrace();
}
}
/**
* 判断文字是否包含敏感字符
*
* @param txt 文字
* @param matchType 匹配规则 1:最小匹配规则,2:最大匹配规则
* @return 若包含返回true,否则返回false
*/
public static boolean isContaintSensitiveWord(String txt, int matchType) {
boolean flag = false;
for (int i = 0; i < txt.length(); i++) {
//判断是否包含敏感字符
int matchFlag = checkSensitiveWord(txt, i, matchType);
//大于0存在,返回true
if (matchFlag > 0) {
flag = true;
}
}
return flag;
}
/**
* 获取文字中的敏感词
*
* @param txt 文字
* @param matchType 匹配规则 1:最小匹配规则,2:最大匹配规则
* @return
*/
public static Set getSensitiveWord(String txt, int matchType) {
Set sensitiveWordList = new HashSet();
for (int i = 0; i < txt.length(); i++) {
//判断是否包含敏感字符
int length = checkSensitiveWord(txt, i, matchType);
//存在,加入list中
if (length > 0) {
sensitiveWordList.add(txt.substring(i, i + length));
//减1的原因,是因为for会自增
i = i + length - 1;
}
}
return sensitiveWordList;
}
/**
* 替换敏感字字符,所有的敏感词都用*替换
*
* @param txt 字符串的内容
* @version 1.0
*/
public static String replaceSensitiveWord(String txt) {
String resultTxt = txt;
//获取所有的敏感词
Set set = getSensitiveWord(txt, MAX_MATCH_TYPE);
Iterator iterator = set.iterator();
String word = null;
String replaceString = null;
while (iterator.hasNext()) {
word = (String) iterator.next();
replaceString = getReplaceChars("*", word.length());
resultTxt = resultTxt.replaceAll(word, replaceString);
}
return resultTxt;
}
/**
* 获取替换后的字符串
*
* @param replaceChar 替换字符
* @param length 替换长度
* @return 替换结果
*/
private static String getReplaceChars(String replaceChar, int length) {
String resultReplace = replaceChar;
for (int i = 1; i < length; i++) {
resultReplace += replaceChar;
}
return resultReplace;
}
/**
* 检查文字中是否包含敏感字符,检查规则如下:
*
* @param txt
* @param beginIndex
* @param matchType
* @return,如果存在,则返回敏感词字符的长度,不存在返回0
*/
@SuppressWarnings({"rawtypes"})
public static int checkSensitiveWord(String txt, int beginIndex, int matchType) {
//敏感词结束标识位:用于敏感词只有1位的情况
boolean flag = false;
//匹配标识数默认为0
int matchFlag = 0;
char word = 0;
Map nowMap = InitSensitiveWord.sensitiveWordMap;
for (int i = beginIndex; i < txt.length(); i++) {
word = txt.charAt(i);
//获取指定key
nowMap = (Map) nowMap.get(word);
//存在,则判断是否为最后一个
if (nowMap != null) {
//找到相应key,匹配标识+1
matchFlag++;
//如果为最后一个匹配规则,结束循环,返回匹配标识数
if ("1".equals(nowMap.get("isEnd"))) {
//结束标志位为true
flag = true;
//最小规则,直接返回,最大规则还需继续查找
if (MIN_MATCH_TYPE == matchType) {
break;
}
}
} else {
//不存在,直接返回
break;
}
}
if (matchFlag < 2 || !flag) {
//长度必须大于等于1,为词
matchFlag = 0;
}
return matchFlag;
}
public static void main(String[] args) {
long beginTime = System.currentTimeMillis();
System.out.println("敏感词的数量:" + InitSensitiveWord.sensitiveWordMap.size());
String string = "腐败125公关兼职招聘4, 傻逼5,大 傻 逼123大傻子啊";
System.out.println("待检测语句字数:" + string.length());
//查看字符串有哪些敏感词
Set set = SensitivewordUtils.getSensitiveWord(string, 1);
//替换
String str = SensitivewordUtils.replaceSensitiveWord(string);
System.out.println("替换后的:" + str);
long endTime = System.currentTimeMillis();
System.out.println("语句中包含敏感词的个数为:" + set.size() + "。包含:" + set);
System.out.println("总共消耗时间为:" + (endTime - beginTime));
}
}
3、统一处理,使用filter处理
包含敏感词,对敏感词进行替换
package com.opencloud.common.filter;
import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.common.sensitiveword.SensitivewordUtils;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.springframework.http.HttpHeaders;
import org.springframework.http.MediaType;
import javax.servlet.ReadListener;
import javax.servlet.ServletInputStream;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletRequestWrapper;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
/**
* 敏感词请求
*
* @author kou
*/
public class SensitiveWordRequestWrapper extends HttpServletRequestWrapper {
private Map<String, String[]> params = new HashMap<>();
public SensitiveWordRequestWrapper(HttpServletRequest request) {
super(request);
//将参数表,赋予给当前的Map以便于持有request中的参数
this.params.putAll(request.getParameterMap());
}
@Override
public ServletInputStream getInputStream() throws IOException {
// 非json类型,直接返回
if (!super.getHeader(HttpHeaders.CONTENT_TYPE).equalsIgnoreCase(MediaType.APPLICATION_JSON_VALUE)) {
return super.getInputStream();
}
//为空,直接返回
String json = IOUtils.toString(super.getInputStream(), "utf-8");
if (StringUtils.isEmpty(json)) {
return super.getInputStream();
}
Map<String, Object> map = JSON.parseObject(json);
dealSensitiveWord(map);
ByteArrayInputStream byteArrayInputStream = new ByteArrayInputStream(JSON.toJSONString(map).getBytes("utf-8"));
return new ServletInputStream() {
@Override
public boolean isFinished() {
return false;
}
@Override
public boolean isReady() {
return false;
}
@Override
public void setReadListener(ReadListener readListener) {
}
@Override
public int read() {
return byteArrayInputStream.read();
}
};
}
@Override
public String getParameter(String name) {
String[] values = params.get(name);
if (values == null || values.length == 0) {
return null;
}
// 判断是否包含敏感字符
if (SensitivewordUtils.isContaintSensitiveWord(values[0], SensitivewordUtils.MAX_MATCH_TYPE)) {
// 替换敏感字符
return SensitivewordUtils.replaceSensitiveWord(values[0]);
}
return values[0];
}
@Override
public String[] getParameterValues(String name) {
String[] values = params.get(name);
if (values == null || values.length == 0) {
return null;
}
for (int i = 0; i < values.length; i++) {
// 判断是否包含敏感字符
if (SensitivewordUtils.isContaintSensitiveWord(values[i], SensitivewordUtils.MAX_MATCH_TYPE)) {
// 替换敏感字符
values[i] = SensitivewordUtils.replaceSensitiveWord(values[i]);
}
}
return values;
}
private void dealSensitiveWord(Map<String, Object> map) {
Set<String> set = map.keySet();
Iterator<String> it = set.iterator();
// 将parameter的值去除空格后重写回去
while (it.hasNext()) {
String key = it.next();
Object values = map.get(key);
String value;
if (values instanceof String) {
value = ((String) values).trim();
// 判断是否包含敏感字符
if (SensitivewordUtils.isContaintSensitiveWord(value, SensitivewordUtils.MAX_MATCH_TYPE)) {
// 替换敏感字符
values = SensitivewordUtils.replaceSensitiveWord(value);
}
} else if (values instanceof JSONArray) {
JSONArray json = (JSONArray) values;
if (!json.isEmpty()) {
for (int i = 0; i < json.size(); i++) {
// 遍历 jsonarray 数组,把每一个对象转成 json 对象
JSONObject job = json.getJSONObject(i);
dealSensitiveWord(job);
}
}
}
map.put(key, values);
}
}
}
包含敏感词禁止提交,并返回敏感词信息
package com.common.filter;
import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.common.sensitiveword.SensitivewordUtils;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.springframework.http.HttpHeaders;
import org.springframework.http.MediaType;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletRequestWrapper;
import java.io.IOException;
import java.util.*;
/**
* 包含敏感词直接返回
*
* @author kou
*/
@Slf4j
public class SensitiveWordReturnRequestWrapper extends HttpServletRequestWrapper {
private Map<String, String[]> params = new HashMap<>();
public SensitiveWordReturnRequestWrapper(HttpServletRequest request) {
super(request);
//将参数表,赋予给当前的Map以便于持有request中的参数
this.params.putAll(request.getParameterMap());
}
/**
* 校验是否包含敏感字符
*
* @return 返回敏感字符
*/
public Set validateSensitiveWord() throws IOException {
// 保存包含的敏感字符
Set<String> sensitiveWords = new HashSet<>();
Set sensitive = null;
if (params.size() > 0) {
Iterator<Map.Entry<String, String[]>> it = params.entrySet().iterator();
while (it.hasNext()) {
Map.Entry<String, String[]> entry = it.next();
String[] values = entry.getValue();
if (values != null) {
for (int i = 0; i < values.length; i++) {
// 获取敏感字词
sensitive = SensitivewordUtils.getSensitiveWord(values[i], SensitivewordUtils.MAX_MATCH_TYPE);
if (null != sensitive && sensitive.size() > 0) {
sensitiveWords.addAll(sensitive);
}
}
}
}
}
// application/json 提交的获取body流
if (null != super.getHeader(HttpHeaders.CONTENT_TYPE)) {
//为空,直接返回
String contentType = super.getHeader(HttpHeaders.CONTENT_TYPE).split(";")[0];
if (contentType.equalsIgnoreCase(MediaType.APPLICATION_JSON_VALUE)) {
String json = IOUtils.toString(super.getInputStream(), "utf-8");
if (StringUtils.isNotBlank(json)) {
// 处理json数据
dealJsonDatas(json, sensitiveWords);
}
}
}
if (sensitiveWords.size() > 0) {
return sensitiveWords;
}
return null;
}
/**
* 处理json 数据
*
* @param json 原始json数据
* @param sensitiveWords 包含的敏感词
*/
public void dealJsonDatas(String json, Set<String> sensitiveWords) {
Object datas = JSON.parse(json);
if (datas instanceof JSONObject) {
// 对象
dealSensitiveWord((Map<String, Object>) datas, sensitiveWords);
} else if (datas instanceof JSONArray) {
// 数组
dealArrayDatas((JSONArray) datas, sensitiveWords);
} else {
// 其他的类型直接用字符串进行替换
// 获取敏感字词
Set sensitive = SensitivewordUtils.getSensitiveWord(json.toLowerCase(), SensitivewordUtils.MAX_MATCH_TYPE);
if (null != sensitive && sensitive.size() > 0) {
sensitiveWords.addAll(sensitive);
}
}
}
/**
* 处理敏感词
*
* @param map 原始数据
* @param sensitiveWords 包含的敏感词
*/
private void dealSensitiveWord(Map<String, Object> map, Set<String> sensitiveWords) {
Set<String> set = map.keySet();
Iterator<String> it = set.iterator();
// 将parameter的值去除空格后重写回去
while (it.hasNext()) {
String key = it.next();
Object values = map.get(key);
if (null != values) {
if (values instanceof String) {
// 处理字符串
dealStringData((String) values, sensitiveWords);
} else if (values instanceof JSONObject || values instanceof Map) {
// 包含对象
JSONObject object = (JSONObject) values;
if (!object.isEmpty()) {
// 判断是否是空对象
dealSensitiveWord(object, sensitiveWords);
}
} else if (values instanceof JSONArray || values instanceof List) {
// 包含数组
dealArrayDatas((JSONArray) values, sensitiveWords);
}
}
}
}
/**
* 处理数组类型数据
*
* @param json json 数组
* @param sensitiveWords 包含的敏感词
*/
public void dealArrayDatas(JSONArray json, Set<String> sensitiveWords) {
if (!json.isEmpty()) {
if (null != json) {
// 判断数组是否是对象,是对象循环处理对象内容
if (json.get(0) instanceof JSONObject) {
for (int i = 0; i < json.size(); i++) {
// 遍历 jsonarray 数组,把每一个对象转成 json 对象
JSONObject job = json.getJSONObject(i);
dealSensitiveWord(job, sensitiveWords);
}
} else if (json.get(0) instanceof String) {
// 不是对象,为字符串数组
for (int i = 0; i < json.size(); i++) {
// 处理字符串
dealStringData(json.getString(i), sensitiveWords);
}
}
}
}
}
/**
* 处理数组类型数据
*
* @param content 字符串内容
* @param sensitiveWords 包含的敏感词
*/
public void dealStringData(String content, Set<String> sensitiveWords) {
// 将字符串转换为小写
String value = content.toLowerCase();
// 空数据不处理
if (StringUtils.isNotBlank(value)) {
// 获取敏感字词
Set sensitive = SensitivewordUtils.getSensitiveWord(value, SensitivewordUtils.MAX_MATCH_TYPE);
// 保存敏感词
if (null != sensitive && sensitive.size() > 0) {
sensitiveWords.addAll(sensitive);
}
}
}
}
敏感词过滤器处理,针对不同的处理方式进行相应处理
package com.common.filter;
import com.alibaba.fastjson.JSON;
import com.common.model.ResultBody;
import com.common.utils.StringUtils;
import lombok.extern.slf4j.Slf4j;
import org.springframework.http.HttpMethod;
import javax.servlet.*;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.Set;
/**
* 敏感字过滤器
*
* @author kou
*/
@Slf4j
public class SensitiveWordFilter implements Filter {
/**
* 1:替换敏感字符
*/
public static final String REPLACE_SENSITIVE_WORD = "1";
/**
* 2:包含敏感字符禁止提交
*/
public static final String RETURN_CONTAIN_SENSITIVE_WORD = "2";
/**
* 处理方式,1:替换敏感字符, 2:包含敏感字符禁止提交,null不处理
*/
private String dealMethod;
public SensitiveWordFilter(String dealMethod) {
log.info("处理敏感字符方式:{}", StringUtils.isBlank(dealMethod) ? "不做任何处理" : dealMethod.equals(1) ? "替换敏感字符" : "包含敏感字符禁止提交");
this.dealMethod = dealMethod;
}
@Override
public void doFilter(ServletRequest servletRequest, ServletResponse servletResponse, FilterChain filterChain) throws IOException, ServletException {
// 获取request
HttpServletRequest request = (HttpServletRequest) servletRequest;
// 获取response
HttpServletResponse response = (HttpServletResponse) servletResponse;
// 获取请求方式
String method = request.getMethod();
// 判断是否是post请求
if (method.equals(HttpMethod.POST.name())) {
// 未传处理方式不进行任何校验
if (null == this.dealMethod) {
filterChain.doFilter(request, response);
return;
} else if (this.dealMethod.equals(REPLACE_SENSITIVE_WORD)) {
// 替换敏感字符
SensitiveWordRequestWrapper sensitiveWordRequest = new SensitiveWordRequestWrapper(request);
filterChain.doFilter(sensitiveWordRequest, response);
return;
} else if (this.dealMethod.equals(RETURN_CONTAIN_SENSITIVE_WORD)) {
// 包含敏感字符禁止提交
SensitiveWordReturnRequestWrapper sensitiveWordReturn = new SensitiveWordReturnRequestWrapper(request);
Set<String> set = sensitiveWordReturn.validateSensitiveWord();
// 判断是否包含敏感字符
if (null != set && set.size() > 0) {
log.info("包含敏感词数:{} 个,禁止提交", set.size());
response.setCharacterEncoding("UTF-8");
response.setContentType("application/json; charset=utf-8");
PrintWriter out = response.getWriter();
out.append(JSON.toJSONString(ResultBody.failed().msg("包含敏感词禁止提交").data(set)));
return;
}
}
}
filterChain.doFilter(request, response);
}
}
需要过滤指定路径的类
import com.alibaba.fastjson.JSON;
import com.common.model.ResultBody;
import com.common.utils.StringUtils;
import lombok.extern.slf4j.Slf4j;
import org.springframework.http.HttpMethod;
import javax.servlet.*;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.Set;
/**
* 敏感字过滤器
*
* @author kou
*/
@Slf4j
public class SensitiveWordFilter implements Filter {
/**
* 1:替换敏感字符
*/
public static final String REPLACE_SENSITIVE_WORD = "1";
/**
* 2:包含敏感字符禁止提交
*/
public static final String RETURN_CONTAIN_SENSITIVE_WORD = "2";
/**
* 处理方式,1:替换敏感字符, 2:包含敏感字符禁止提交,null不处理
*/
private String dealMethod;
/**
* 敏感词不处理包含以下路径的请求
*/
private Set<String> notValidate;
public SensitiveWordFilter(String dealMethod, Set<String> notValidate) {
log.info("处理敏感字符方式:{}", StringUtils.isBlank(dealMethod) ? "不做任何处理" : dealMethod.equals(1) ? "替换敏感字符" : "包含敏感字符禁止提交");
this.dealMethod = dealMethod;
this.notValidate = notValidate;
}
@Override
public void doFilter(ServletRequest servletRequest, ServletResponse servletResponse, FilterChain filterChain) throws IOException, ServletException {
// 获取request
HttpServletRequest request = (HttpServletRequest) servletRequest;
// 获取response
HttpServletResponse response = (HttpServletResponse) servletResponse;
// 获取请求方式
String method = request.getMethod();
// 判断是否是post请求
if (method.equals(HttpMethod.POST.name())) {
// 未传处理方式不进行任何校验
if (null == this.dealMethod) {
filterChain.doFilter(request, response);
return;
} else if (this.dealMethod.equals(REPLACE_SENSITIVE_WORD)) {
// 替换敏感字符
if (null != notValidate && notValidate.size() > 0) {
String requestUrl = request.getRequestURI();
// 判断是否需要校验,true不需要校验,false 需要校验
boolean isNotValidate = isNotCheck(requestUrl, this.notValidate);
if (isNotValidate) {
log.info("不校验敏感词,url:{}", requestUrl);
filterChain.doFilter(request, response);
return;
}
}
SensitiveWordRequestWrapper sensitiveWordRequest = new SensitiveWordRequestWrapper(request);
filterChain.doFilter(sensitiveWordRequest, response);
return;
} else if (this.dealMethod.equals(RETURN_CONTAIN_SENSITIVE_WORD)) {
// 包含敏感字符禁止提交
// 替换敏感字符
if (null != notValidate && notValidate.size() > 0) {
String requestUrl = request.getRequestURI();
// 判断是否需要校验,true不需要校验,false 需要校验
boolean isNotValidate = isNotCheck(requestUrl, this.notValidate);
if (isNotValidate) {
log.info("不校验敏感词,url:{}", requestUrl);
filterChain.doFilter(request, response);
return;
}
}
SensitiveWordReturnRequestWrapper sensitiveWordReturn = new SensitiveWordReturnRequestWrapper(request);
Set<String> set = sensitiveWordReturn.validateSensitiveWord();
// 判断是否包含敏感字符
if (null != set && set.size() > 0) {
log.info("包含敏感词数:{} 个,禁止提交", set.size());
response.setCharacterEncoding("UTF-8");
response.setContentType("application/json; charset=utf-8");
PrintWriter out = response.getWriter();
out.append(JSON.toJSONString(ResultBody.failed().msg("包含敏感词禁止提交").put("errorCode", 10).data(set)));
return;
}
}
}
filterChain.doFilter(request, response);
}
/**
* 根据url判断是否需要校验,false需要校验
*
* @param url
* @return 是否需要校验
*/
private boolean isNotCheck(String url, Set<String> checkUrls) {
// 处理路径以"/" 结尾的"/"
url = url.endsWith("/") ? url.substring(0, url.lastIndexOf("/")) : url;
for (String path : checkUrls) {
// 判断以 "/**/" 开头
if (path.startsWith("/**/")) {
// 判断是否以 "/**" 结尾
if (path.endsWith("/**")) {
log.info("check url start with /** and end with /**, url: {}, path: {} ", url, path);
String parseUrl = path.substring(0, path.lastIndexOf("/") + 1);
parseUrl = parseUrl.substring(3);
if (url.contains(parseUrl)) {
return true;
}
} else {
log.info("check url start with /**, url: {}, path: {} ", url, path);
if (url.endsWith(path.substring(3))) {
return true;
}
}
}
// 判断是否以 "/**" 结尾
if (path.endsWith("/**")) {
log.info("check url end with /**, url: {}, path: {} ", url, path);
if (url.startsWith(path.substring(0, path.lastIndexOf("/") + 1))
|| url.equals(path.substring(0, path.lastIndexOf("/")))) {
return true;
}
}
// 判断url == path
if (url.equals(path)) {
return true;
}
}
return false;
}
}
在config里设置bean
/**
* 默认配置类
*
* @author kou
*/
@Slf4j
@Configuration
public class AutoConfiguration {
/**
* 敏感词处理方式,1:替换敏感字符, 2:包含敏感字符禁止提交,null不处理
*/
@Value("${sensitiveWord.dealMethod:#{null}}")
private String sensitiveWordDealMethod;
/**
* 敏感词过滤
*
* @return
*/
@Bean
public FilterRegistrationBean sensitiveWordFilter() {
FilterRegistrationBean filterRegistrationBean = new FilterRegistrationBean(new SensitiveWordFilter(sensitiveWordDealMethod));
log.info("SensitiveWordFilter [{}]", filterRegistrationBean);
return filterRegistrationBean;
}
}
配置文件设置
sensitiveWord.dealMethod 配置,默认不设置不处理敏感词
sensitiveWord:
# 敏感词处理方式,1:替换敏感字符, 2:包含敏感字符禁止提交,null不处理
dealMethod: 2
# 过滤不校验的路径
notValidate: /**/remove, /**/removeById, /blogReport/send