/// <summary>
/// 根据任务要素构造抓取的url
/// </summary>
protected override string createUrl(int pageIndex)
{
NameValueCollection PostVars = new NameValueCollection();
PostVars.Add("mod", "forum");
PostVars.Add("srchtype", "title");
PostVars.Add("srhfid", "0");
PostVars.Add("srhlocality", "portal::index");
PostVars.Add("srchtxt", keyWord);
string sRemoteInfo = CommonFunction.getSearchResault(PostVars, "http://bbs.tuniu.com/search.php?searchsubmit=yes", true);
Regex regexRtCt = new Regex(@"<div class=""pgs cl mbm""><div class=""pg"">[\s\S]+?</div></div>");
MatchCollection matchRtCtList = regexRtCt.Matches(sRemoteInfo);
Regex regexRtCtHref = new Regex(@"<a[\s\S]+?>[\s\S]+?</a>");
Regex regexRtCtURL = new Regex(@"href=""[\s\S]+?"">");
string strUrlTemp = "http://bbs.tuniu.com/" + regexRtCtURL.Match(regexRtCtHref.Matches(matchRtCtList[0].Value)[0].Value).Value.Replace("href=\"", "").Replace("\">", "").Replace("page=2", "page=");
string strUrl = strUrlTemp + pageIndex;
return strUrl;
}
/// <summary>
/// 模拟Form提交,得到结果页内容
/// </summary>
public static string getSearchResault(NameValueCollection PostVars, string searchUrl, bool UrlEncodeUtf8)
{
WebClient WebClientObj = new WebClient();
string sRemoteInfo = "";
try
{
byte[] byRemoteInfo = WebClientObj.UploadValues(searchUrl, "POST", PostVars);
if (UrlEncodeUtf8)
{
sRemoteInfo = System.Text.Encoding.UTF8.GetString(byRemoteInfo);
}
else
{
sRemoteInfo = System.Text.Encoding.Default.GetString(byRemoteInfo);
}
}
catch
{
}
return sRemoteInfo;
}