c#几个正则表达式

    #region 几个比较实用比较N的正则表达式
    /// <summary>
    /// 几个比较实用比较N的正则表达式[思归写出来的方法]
    /// </summary>
    /// <param name="strHtml"></param>
    /// <returns></returns>
    public static string StripHtml(string strHtml)
    {
        // 比较牛的处理文章中的图片,根据这个可推出好多类似有用的正则
        //将<img>转化为标准的<img src="" border="1" alt="">
        strHtml = Regex.Replace(strHtml, @"<img/s+(((?<alt>alt=('[^']*'|""[^""]*""|[^/s>]*))|(?<src>src=('[^']*'|""[^""]*""|[^/s>]*))|(?<border>border=('[^']*'|""[^""]*""|[^/s>]*))|(?<others>[^=<>]+=('[^']*'|""[^""]*""|[^/s>]*)))/s*)*[^>]*>", "[img ${src} ${border} ${alt}]", RegexOptions.IgnoreCase);

        //不能显示大图片的时候用(并且实现lightbox效果)
        string imgicon = "<img src='images/imgicon.jpg' width='16' height='12' border='0' alt='点击查看大图'>";
        strHtml = Regex.Replace(strHtml, @"<img/s+((src=(?<src>'[^']*'|""[^""]*""|[^/s>]*))/s*)*[^>]*>", @"<a href=${src} rel='lightbox'>" + imgicon + "</a>", RegexOptions.IgnoreCase);

        //图片的一般处理
        strHtml = Regex.Replace(strHtml, @"<img/s+((src=(?<src>'[^']*'|""[^""]*""|[^/s>]*))/s*)*[^>]*>", @"<img src= ${src}>", RegexOptions.IgnoreCase);

        //将<strhtml str="str" str="str">整理成<strhtml>
        strHtml = Regex.Replace(strHtml, @"<div[^>]+>|]+>", "<div>", RegexOptions.IgnoreCase);

        //所有带<>的标签都去掉
        strHtml = Regex.Replace(strHtml, @"<[^>]+>|]+>", "", RegexOptions.IgnoreCase);

        return strHtml;
    }
    #endregion

    #region // 清理Word产生的垃圾代码(不是很理想,但...)
    /// <summary>
    /// Removes all FONT and SPAN tags, and all Class and Style attributes.
    /// Designed to get rid of non-standard Microsoft Word HTML tags.
    /// http://tim.mackey.ie/CleanWordHTMLUsingRegularExpressions.aspx
    /// http://article.pchome.net/content-425187.html
    /// </summary>
    private string CleanWord(string strHtml)
    {
        // start by completely removing all unwanted tags    
        strHtml = Regex.Replace(strHtml, @"<[/]?(font|span|xml|del|ins|[ovwxp]:/w+)[^>]*?>", "", RegexOptions.IgnoreCase);
       
        // then run another pass over the html (twice), removing unwanted attributes    
        strHtml = Regex.Replace(strHtml, @"<([^>]*)(?:class|lang|style|size|face|[ovwxp]:/w+)=(?:'[^']*'|""[^""]*""|[^/s>]+)([^>]*)>", "<$1$2>", RegexOptions.IgnoreCase);
        strHtml = Regex.Replace(strHtml, @"<([^>]*)(?:class|lang|style|size|face|[ovwxp]:/w+)=(?:'[^']*'|""[^""]*""|[^/s>]+)([^>]*)>", "<$1$2>", RegexOptions.IgnoreCase);
       
        // [20080323]
        strHtml = Regex.Replace(strHtml, @"%", "%", RegexOptions.IgnoreCase);

        // [20080323]去掉<?xml:namespace prefix = o ns = "urn:schemas-microsoft-com:office:office" /></
        strHtml = Regex.Replace(strHtml, @"<[?]xml[^>]+>|]+>", "", RegexOptions.IgnoreCase);

        // [20080323]自动加载图片(自动替换M_IMG[i])
        for (int i = 1; i < 100; i++)
        {
            strHtml = Regex.Replace(strHtml, @"#M_IMG" + i + "#", "<img src='images/" + i + ".jpg'>", RegexOptions.IgnoreCase);
        }
        return strHtml;
    }
    #endregion 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值