如果截取处未关闭标签,向下找关闭标签,再截取

本文提供了一个C#方法,用于从HTML字符串中提取指定长度的内容摘要。该方法能够去除HTML标签,保留关键元素如图片,并在接近指定长度时智能截断文本。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

来源:http://topic.youkuaiyun.com/u/20080829/14/458c71bb-3e9b-4a84-a1e5-8a66813ea208.html

 


public string GetContentSummary(string content, int length, bool StripHTML)
        {
           
if (string.IsNullOrEmpty(content) || length == 0)
               
return "";
           
if (StripHTML)
            {
                System.Text.RegularExpressions.Regex re
= new System.Text.RegularExpressions.Regex("<[^>]*>");
                content
= re.Replace(content, "");
                content
= content.Replace(" ", "").Replace(" ", "").Replace("&nbsp;", "");
               
if (content.Length <= length)
                   
return content;
               
else
                   
return content.Substring(0, length) + "...";
            }
           
else
            {
               
if (content.Length <= length)
                   
return content;

               
int pos = 0, npos = 0, size = 0;
               
bool firststop = false, notr = false, noli = false;
                System.Text.StringBuilder sb
= new System.Text.StringBuilder();
               
while (true)
                {
                   
if (pos >= content.Length)
                       
break;
                   
string cur = content.Substring(pos, 1);
                   
if (cur == "<")
                    {
                       
string next = content.Substring(pos + 1, 3).ToLower();
                       
if (next.IndexOf("p") == 0 && next.IndexOf("pre") != 0)
                        {
                            npos
= content.IndexOf(">", pos) + 1;
                        }
                       
else if (next.IndexOf("/p") == 0 && next.IndexOf("/pr") != 0)
                        {
                            npos
= content.IndexOf(">", pos) + 1;
                           
if (size < length)
                                sb.Append(
"<br />");
                        }
                       
else if (next.IndexOf("br") == 0)
                        {
                            npos
= content.IndexOf(">", pos) + 1;
                           
if (size < length)
                                sb.Append(
"<br />");
                        }
                       
else if (next.IndexOf("img") == 0)
                        {
                            npos
= content.IndexOf(">", pos) + 1;
                           
if (size < length)
                            {
                                sb.Append(content.Substring(pos, npos
- pos));
                                size
+= npos - pos + 1;
                            }
                        }
                       
else if (next.IndexOf("li") == 0 || next.IndexOf("/li") == 0)
                        {
                            npos
= content.IndexOf(">", pos) + 1;
                           
if (size < length)
                            {
                                sb.Append(content.Substring(pos, npos
- pos));
                            }
                           
else
                            {
                               
if (!noli && next.IndexOf("/li") == 0) { sb.Append(content.Substring(pos, npos - pos));
                                    noli
= true;
                                }
                            }
                        }
                       
else if (next.IndexOf("tr") == 0 || next.IndexOf("/tr") == 0)
                        {
                            npos
= content.IndexOf(">", pos) + 1;
                           
if (size < length)
                            {
                                sb.Append(content.Substring(pos, npos
- pos));
                            }
                           
else
                            {
                               
if (!notr && next.IndexOf("/tr") == 0)
                                {
                                    sb.Append(content.Substring(pos, npos
- pos));
                                    notr
= true;
                                }
                            }
                        }
                       
else if (next.IndexOf("td") == 0 || next.IndexOf("/td") == 0)
                        {
                            npos
= content.IndexOf(">", pos) + 1;
                           
if (size < length)
                            {
                                sb.Append(content.Substring(pos, npos
- pos));
                            }
                           
else
                            {
                               
if (!notr)
                                {
                                    sb.Append(content.Substring(pos, npos
- pos));
                                }
                            }
                        }
                       
else
                        {
                            npos
= content.IndexOf(">", pos) + 1;
                            sb.Append(content.Substring(pos, npos
- pos));
                        }
                       
if (npos <= pos)
                            npos
= pos + 1;
                        pos
= npos;
                    }
                   
else
                    {
                       
if (size < length)
                        {
                            sb.Append(cur);
                            size
++;
                        }
                       
else
                        {
                           
if (!firststop)
                            {
                                sb.Append(
"...");
                                firststop
= true;
                            }
                        }
                        pos
++;
                    }

                }
               
return sb.ToString();
            }
        }

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值