对文章内容进行分页

最新推荐文章于 2021-03-12 20:54:29 发布

weixin_33814685

最新推荐文章于 2021-03-12 20:54:29 发布

阅读量84

点赞数 1

CC 4.0 BY-SA版权

原文链接：http://www.cnblogs.com/jianchun491/archive/2012/04/18/ContentPager.html

最近公司要在原来的Web网站发布的内容，也要在Wap上面显示。现有的文章内容是存放在一个字段里面，没有进行分页处理保存。因些在手机显示时，要对文章内容进行截取分页处理显示。

现在问题出来了，文章内容里面包含了很多HTML标签，用SubString进行截取那会把HTML标签也给截断，也在可能会在标签的属性中截开，这样我们得出来的字符串就是错乱的了。因此在截取时，对HTML标签进行过滤。

自己的表达能力有限，直接上代码。

/// <summary>

/// 得到分页后的数据

/// </summary>

/// <param name="param">文章内容</param>

/// <param name="size">文章字数（不包含HTML）</param>

/// <returns></returns>

public static List<string> SubstringTo(string param, int size)

{

param = NoHTML(param);//过滤一些有Wap上面不能显示的HTML标签，你也不可不过滤

var length = param.ToCharArray().Length;

var being = 0;

var list = new List<string>();

while (true)

{

string str = SubstringToHTML(param, being, size, "", out being);

list.Add(str);

if (length <= being)

{

break;

}

return list;

}

/// <summary>

/// 按字节长度截取字符串(支持截取带HTML代码样式的字符串)

/// </summary>

/// <param name="param">将要截取的字符串参数</param>

/// <param name="length">截取的字节长度</param>

/// <param name="end">字符串末尾补上的字符串</param>

/// <returns>返回截取后的字符串</returns>

public static string SubstringToHTML(string param, int being, int length, string end, out int index)

{

string Pattern = null;

MatchCollection m = null;

StringBuilder result = new StringBuilder();

int n = 0;

char temp;

bool isCode = false; //是不是HTML代码

bool isHTML = false; //是不是HTML特殊字符,如 

char[] pchar = param.ToCharArray();

int i = 0;

for (i = being; i < pchar.Length; i++)

{

temp = pchar[i];

if (temp == '<')

{

isCode = true;

}

else if (temp == '&')

{

isHTML = true;

}

else if (temp == '>' && isCode)

{

//n = n - 1;

isCode = false;

}

else if (isHTML)

{

isHTML = false;

}

if (!isCode && !isHTML)

{

n = n + 1;

//UNICODE码字符占两个字节

if (System.Text.Encoding.Default.GetBytes(temp + "").Length > 1)

{

n = n + 1;

}

result.Append(temp);

if (n >= length)

{

break;

}

index = i + 1;

result.Append(end);

//去掉成对的HTML标记,我的正则表达式不好，所以这里写得不好，大家可以写个正则直接去掉所有的

temp_result = Regex.Replace(temp_result, @"(?is)<p[^>]*?>.*?</p>", "$2", RegexOptions.IgnoreCase);

temp_result = Regex.Replace(temp_result, @"(?is)<a[^>]*?>.*?</a>", "$2", RegexOptions.IgnoreCase);

temp_result = Regex.Replace(temp_result, @"(?is)<img[^>]*>", "$2", RegexOptions.IgnoreCase);

temp_result = Regex.Replace(temp_result, @"(?is)<br[^>]*>", "$2", RegexOptions.IgnoreCase);

//用正则表达式取出标记

Pattern = ("</([a-zA-Z]+)*>");

m = Regex.Matches(temp_result, Pattern);

ArrayList bengHTML = new ArrayList();

foreach (Match mt in m)

{

bengHTML.Add(mt.Result("$1"));

}

//补全前面不成对的HTML标记

for (int nn = bengHTML.Count - 1; nn >= 0; nn--)

{

result.Insert(0, "<" + bengHTML[nn] + ">");

}

//用正则表达式取出标记

Pattern = ("<([a-zA-Z]+)[^<>]*>");

m = Regex.Matches(temp_result, Pattern);

ArrayList endHTML = new ArrayList();

foreach (Match mt in m)

{

endHTML.Add(mt.Result("$1"));

}

//补全后面不成对的HTML标记

for (int nn = endHTML.Count - 1; nn >= 0; nn--)

{

result.Append("</");

result.Append(endHTML[nn]);

result.Append(">");

}

return result.ToString();

}

转载于:https://www.cnblogs.com/jianchun491/archive/2012/04/18/ContentPager.html