/// <summary>
/// 获取图文混排中的image路径,返回(Hashtable)ImageHashtable
/// </summary>
/// <param name="HtmlStr"></param>
/// <returns></returns>
public bool ParseImageFromHtmlConent(string HtmlStr)
{
bool exc = false;
try
{
//搜索图像的正则表达式
string pattern = @"(<img[^>]*((>)|(/>)|(>\s*</img>)))";
System.Text.RegularExpressions.Regex reg = new System.Text.RegularExpressions.Regex(pattern, System.Text.RegularExpressions.RegexOptions.Multiline | System.Text.RegularExpressions.RegexOptions.Compiled | System.Text.RegularExpressions.RegexOptions.IgnoreCase);
HtmlStr = reg.Replace(HtmlStr, new System.Text.RegularExpressions.MatchEvaluator(ParseImageTag));
if (HtmlStr == "1")
{
exc = true;
}
else
{
exc = false;
}
}
catch
{
exc = false;
}
return exc;
}
private string ParseImageTag(System.Text.RegularExpressions.Match m)
{
string exc = "0";
try
{
System.Text.RegularExpressions.GroupCollection gc = m.Groups;
string img = gc[1].ToString();
if (ParseImagePropertiesTages(img) == true)
{
//exc = "1";
exc = "";
}
else
{
//exc = "0";
exc = "";
}
}
catch
{
exc = "0";
}
return exc;
}
private bool ParseImagePropertiesTages(string img)
{
bool exc = false;
try
{
//Dictionary<string, string> dic = new Dictionary<string, string>();
//搜索属性的正则表达式
string pattern = @"([a-zA-Z]+)\s*=\s*[""']\s*([^""']+)\s*[""']";
System.Text.RegularExpressions.Regex reg = new System.Text.RegularExpressions.Regex(pattern);
System.Text.RegularExpressions.MatchCollection mc = reg.Matches(img);
foreach (System.Text.RegularExpressions.Match m in mc)
{
System.Text.RegularExpressions.GroupCollection groupCol = m.Groups;
string key = "Imag@" + groupCol[1].ToString().Trim();
string value = groupCol[2].ToString().Trim();
ImageHashtable.Add(key, value);
}
exc = true;
}
catch
{
exc = false;
}
return exc;
}
/// <summary>
/// 去除图文混排中的image
/// </summary>
/// <param name="HtmlStr"></param>
/// <returns></returns>
public string ParseImageFrom(string HtmlStr)
{
string content = "";
try
{
string pattern = @"(<img[^>]*((>)|(/>)|(>\s*</img>))|<img>)";
System.Text.RegularExpressions.Regex reg = new System.Text.RegularExpressions.Regex(pattern, System.Text.RegularExpressions.RegexOptions.Multiline | System.Text.RegularExpressions.RegexOptions.Compiled | System.Text.RegularExpressions.RegexOptions.IgnoreCase);
content = reg.Replace(HtmlStr, new System.Text.RegularExpressions.MatchEvaluator(ParseImageTag));
pattern = @"(<a[^>]*((>)|(/>)|(>\s*</a>))|</a>)";
reg = new System.Text.RegularExpressions.Regex(pattern, System.Text.RegularExpressions.RegexOptions.Multiline | System.Text.RegularExpressions.RegexOptions.Compiled | System.Text.RegularExpressions.RegexOptions.IgnoreCase);
System.Text.RegularExpressions.MatchCollection mc = reg.Matches(content);
content = reg.Replace(content, "", mc.Count);
}
catch
{
}
return content;
}
/// 去除HTML标记
/// </summary>
/// <param name="NoHTML">包括HTML的源码</param>
/// <returns>已经去除后的文字</returns>
public string NoHTML(string Htmlstring)
{
//删除脚本
Htmlstring = Regex.Replace(Htmlstring, @"<script[^>]*?>.*?</script>", "", RegexOptions.IgnoreCase);
//删除HTML
Htmlstring = Regex.Replace(Htmlstring, @"<(.[^>]*)>", "", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"([\r\n])[\s]+", "", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"-->", "", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"<!--.*", "", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(quot|#34);", "\"", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(amp|#38);", "&", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(lt|#60);", "<", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(gt|#62);", ">", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(nbsp|#160);", " ", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(iexcl|#161);", "\xa1", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(cent|#162);", "\xa2", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(pound|#163);", "\xa3", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(copy|#169);", "\xa9", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&#(\d+);", "", RegexOptions.IgnoreCase);
Htmlstring.Replace("<", "");
Htmlstring.Replace(">", "");
Htmlstring.Replace("\r\n", "");
Htmlstring = HttpContext.Current.Server.HtmlEncode(Htmlstring).Trim();
return Htmlstring;
}
需要引用的命名空间有以下两个:
using System.Reflection;
using System.Text.RegularExpressions;