.net C# 字符串过滤Html标签

最新推荐文章于 2021-06-09 10:43:06 发布

原创最新推荐文章于 2021-06-09 10:43:06 发布 · 785 阅读

0 ·

CC 4.0 BY-SA版权

.Net 专栏收录该内容

11 篇文章

订阅专栏

本文介绍了一种使用C#编程语言过滤HTML标签的方法，通过正则表达式去除HTML特殊字符和标签，确保文本的纯文本显示。适用于需要清理HTML内容的场景。

/// <summary>
        /// 字符串过滤Html标签
        /// </summary>
        /// <param name="strHtml">待转化的字符串</param>
        /// <returns>经过转化的字符串</returns>
        public static string GetStringFilterHtml(string strHtml)
        {
            try
            {
                if (String.IsNullOrEmpty(strHtml))
                {
                    return strHtml;
                }
                else
                {
                    string[] aryReg ={
                    @"<script[^>]*?>.*?</script>",
                    @")?",
                    @"<(\/\s*)?(.|\n)*?(\/\s*)?>",
                    @"<(\w|\s|""|'| |=|\\|\.|\/|#)*",
                    @"([\r\n|\s])*",
                    @"&(quot|#34);",
                    @"&(amp|#38);",
                    @"&(lt|#60);",
                    @"&(gt|#62);",
                    @"&(nbsp|#160);",
                    @"&(iexcl|#161);",
                    @"&(cent|#162);",
                    @"&(pound|#163);",
                    @"&(copy|#169);",
                    @"&#(\d+);"};

                    string newReg = aryReg[0];
                    string strOutput = strHtml.Replace(" ", " ");
                    for (int i = 0; i < aryReg.Length; i++)
                    {
                        Regex regex = new Regex(aryReg[i], RegexOptions.IgnoreCase);
                        strOutput = regex.Replace(strOutput, "");
                    }
                    strOutput.Replace("<", ">");
                    strOutput.Replace(">", "<");
                    return strOutput.Replace(" ", " ");
                }
            }
            catch { return strHtml; }
        }