//去除指定字符串中的HTML标签相关代码函数
private static string RemoveHtml(string strContent, string strTagName)
{
string pattern = "";
string strResult = "";
Regex exp;
MatchCollection matchList;
//去掉所有<a></a>两个标记的内容,保留<a>和</a>代码中间的代码
pattern = "<" + strTagName + "([^>])*>";
exp = new Regex(pattern, RegexOptions.IgnoreCase | RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace);
matchList = exp.Matches(strContent);
foreach (Match match in matchList)
{
if (match.Value.Length > 0)
{
strResult = match.Value;
strContent = strContent.Replace(strResult, "");
}
}
pattern = "</" + strTagName + "([^>])*>";
exp = new Regex(pattern, RegexOptions.IgnoreCase | RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace);
matchList = exp.Matches(strContent);
foreach (Match match in matchList)
{
if (match.Value.Length > 0)
{
strResult = match.Value;
strContent = strContent.Replace(strResult, "");
}
去掉所有<a></a>和两个标记之间的全部内容
pattern = "<" + strTagName + "([^>])*>.*?</" + strTagName + "([^>])*>";
exp = new Regex(pattern, RegexOptions.IgnoreCase | RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace);
matchList = exp.Matches(strContent);
foreach (Match match in matchList)
{
if (match.Value.Length > 0)
{
strResult = match.Value;
strContent = strContent.Replace(strResult, "");
}
}
}
return strContent;
}
本文介绍了一个实用的C#函数,用于从文本中移除指定的HTML标签及其内容。该方法首先移除<tagName>标签内的属性及内容,然后移除</tagName>标签,最后彻底清除包括标签本身在内的整个HTML结构。
711

被折叠的 条评论
为什么被折叠?



