JSP---过滤HTML字符

最新推荐文章于 2021-06-03 23:51:38 发布

原创最新推荐文章于 2021-06-03 23:51:38 发布 · 1.3k 阅读

0 ·

CC 4.0 BY-SA版权

文章标签：

#过滤 #HTML

JAVA 专栏收录该内容

66 篇文章

订阅专栏

本文介绍了一种使用Java实现的过滤HTML代码的方法，通过正则表达式去除恶意脚本和样式标签，确保评论模块安全。

写jap网页里评论模块的时候，需要过滤用户恶意输入HTML代码的部分，用下面的方法可以对评论内容过滤！

import java.util.regex.Pattern;
/**
 * 截取HTML代码
 *
 * @author YangJunping
 * @date 2010 -7- 15
 */
public class FiterHtml {
    public static void main(String[] args) {
         StringBuffer htmlStr = new StringBuffer();
         htmlStr.append( "<!DOCTYPE html PUBLIC '-//W3C//DTD XHTML 1.0 Transitional//EN' 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd'>")
                .append( "<html xmlns='http://www.w3.org/1999/xhtml' xml:lang='en'><head><title>aaa</title><mce:script type='text/javascript'></mce:script>")
                .append("</head><body><ul><li>XXXX</li></ul></body></html>" )
                .append( "<font color='red'>heh</font>");
         FiterHtml fh = new FiterHtml();
        System. out.println(fh.Html2Text(htmlStr.toString()));
    }
    public  String Html2Text(String inputString) {   
        String htmlStr = inputString; // 含html 标签的字符串   
        String textStr = "";   
        java.util.regex.Pattern p_script;   
        java.util.regex.Matcher m_script;   
        java.util.regex.Pattern p_style;   
        java.util.regex.Matcher m_style;   
        java.util.regex.Pattern p_html;   
        java.util.regex.Matcher m_html;   
 
        java.util.regex.Pattern p_html1;   
        java.util.regex.Matcher m_html1;   
 
       try {   
            String regEx_script = "<[//s]*?script[^>]*?>[//s//S]*?<[//s]*?///[//s]*?script[//s]*?>" ; // 定义script的正则表达式{或<script[^>]*?>[//s//S]*?<///script>   
            String regEx_style = "<[//s]*?style[^>]*?>[//s//S]*?<[//s]*?///[//s]*?style[//s]*?>" ; // 定义style的正则表达式{或<style[^>]*?>[//s//S]*?<///style>   
            String regEx_html = "<[^>]+>"; // 定义HTML标签的正则表达式   
            String regEx_html1 = "<[^>]+";   
            p_script = Pattern. compile(regEx_script, Pattern.CASE_INSENSITIVE );   
            m_script = p_script.matcher(htmlStr);   
            htmlStr = m_script.replaceAll( ""); // 过滤script标签   
 
            p_style = Pattern. compile(regEx_style, Pattern.CASE_INSENSITIVE );   
            m_style = p_style.matcher(htmlStr);   
            htmlStr = m_style.replaceAll( ""); // 过滤style标签   
 
            p_html = Pattern. compile(regEx_html, Pattern.CASE_INSENSITIVE );   
            m_html = p_html.matcher(htmlStr);   
            htmlStr = m_html.replaceAll( ""); // 过滤 html标签   
 
            p_html1 = Pattern. compile(regEx_html1, Pattern.CASE_INSENSITIVE );   
            m_html1 = p_html1.matcher(htmlStr);   
            htmlStr = m_html1.replaceAll( ""); // 过滤 html标签   
 
            textStr = htmlStr;   
 
        } catch (Exception e) {   
            System. err.println("Html2Text: " + e.getMessage());   
        }   
 
       return textStr;// 返回文本字符串   
    }  
}

用java类实现即可，没有必要用到Filter 的servlet！！！！