// 获取网页的HTML内容,指定Encoding
static string GetHtml(string url, System.Text.Encoding encoding)
{
byte[] buf = new System.Net.WebClient().DownloadData(url);
if (encoding != null) return encoding.GetString(buf);
string html = System.Text.Encoding.UTF8.GetString(buf);
encoding = GetEncoding(html);
if (encoding == null || encoding == System.Text.Encoding.UTF8) return html;
return encoding.GetString(buf);
}
// 根据网页的HTML内容提取网页的Encoding
static System.Text.Encoding GetEncoding(string html)
{
string pattern = @"(?i)/bcharset=(?<charset>[-a-zA-Z_0-9]+)";
string charset = System.Text.RegularExpressions.Regex.Match(html, pattern).Groups["charset"].Value;
try { return System.Text.Encoding.GetEncoding(charset); }
catch (ArgumentException) { return null; }
}