利用客户端程序自动发送文章。
1)获取登录界面,拼装post数据。(引用HtmlAgilityPack)
string postdata = null;
HtmlWeb hweb = new HtmlWeb();
HtmlDocument hdoc = hweb.Load(loginurl);
foreach (HtmlNode hnode in hdoc.DocumentNode.Descendants("form"))
{
if (hnode.Attributes["name"].Value != "loginform") continue;
foreach (HtmlNode subhnode in hnode.SelectNodes("//input"))
{
if (!subhnode.Attributes.Contains("name")) continue;
string n = subhnode.Attributes["name"].Value;
string v = subhnode.Attributes["value"].Value;
if ( string.IsNullOrEmpty(v) )
{
if ( n.IndexOf("log") >=0) v = loginname;
if ( n.IndexOf("pwd") >=0) v = pwd;
}
v = System.Web.HttpUtility.UrlEncode(v,hdoc.Encoding);
postdata += string.Format("{0}={1}&",n, v);
}
}
if (!string.IsNullOrEmpty(postdata)) postdata = postdata.Trim().TrimEnd('&');
2)登录。
必须设置request.AllowAutoRedirect = false,否则直接跳转管理界面,且由于cookie问题,再跳转回登录界面。
CookieContainer cc = new CookieContainer();
HttpWebRequest request = GetHttpWebRequest(loginurl,"POST");
request.CookieContainer = cc;
request.AllowAutoRedirect = false;
byte[] pbytes = Encoding.UTF8.GetBytes(postdata);
using ( Stream stream = request.GetRequestStream() )
{
stream.Write(pbytes, 0, pbytes.Length);
}
HttpWebResponse response = request.GetResponse() as HttpWebResponse;
if (response.StatusCode == HttpStatusCode.Found)
{
3)获取管理界面。
理论上,cc 中已包含需要的cookie信息,可继续利用 HttpWebRequest 访问管理界面,但实际并不如此。利用fiddler查看程序和浏览器在登录后获取管理界面,比较后如下:
程序:Cookie: wordpress_test_cookie=WP+Cookie+check; wordpress_logged_in_33496cfac029fe8684ecd0304af8c268=admin%7C1355837731%7Cfaffab04dbd1b56428680070b340e099
浏览器:Cookie: wordpress_33496cfac029fe8684ecd0304af8c268=admin%7C1354800972%7C20a76f98dc39528a6d99de411b82120b; wp-settings-time-1=1354627518; wordpress_test_cookie=WP+Cookie+check; wordpress_logged_in_33496cfac029fe8684ecd0304af8c268=admin%7C1354800972%7C5a1f7e076c62af9733457fd0ddca5482
显然Cookie丢失了。两者登录成功后response的内容是一致的,如下:
Set-Cookie: wordpress_33496cfac029fe8684ecd0304af8c268=admin%7C1354802748%7C2a761d1e3dcda51e283132a48d66b83f; path=/wp/wp-content/plugins; httponly
Set-Cookie: wordpress_33496cfac029fe8684ecd0304af8c268=admin%7C1354802748%7C2a761d1e3dcda51e283132a48d66b83f; path=/wp/wp-admin; httponly
Set-Cookie: wordpress_logged_in_33496cfac029fe8684ecd0304af8c268=admin%7C1354802748%7C67b3113747f9bfc16830c786893b54c0; path=/wp/; httponly
其中红色部分即CookieContainer丢失的部分。未继续研究HttpWebRequest,转战Socket,如下:
3.1)从header中获取cookie信息
string setcookies = response.Headers[HttpResponseHeader.SetCookie];
setcookies = setcookies.Replace(", ", " ");
foreach (string sc in setcookies.Split(','))
{
string path =null;
string scname = null;
string scvalue = null;
foreach (string svalue in sc.Split(';'))
{
string s = svalue.Trim();
if (s.Contains("expires")) continue;
if (s.Contains('='))
{
string[] nv = s.Split('=');
string n = nv[0];
string v = nv[1];
if (n == "path") path = v;
else
{
scname = n;
scvalue = v;
}
}
}
Cookie c = new Cookie(scname, scvalue);
cc.Add(new Uri(wpsiteurl + path), c);
if ( reqcookie==null || !reqcookie.Contains(scname))
{
reqcookie += string.Format("{0}={1}; ", scname, scvalue);
}
}
3.2)利用Socket发送数据
//socket 方式获取发布文章界面。
string reqstr = string.Format("GET {0} HTTP/1.1\r\nHost: {1}", adminurl, wpuri.Host);
reqstr += "\r\nReferer: " + loginurl;
reqstr += "\r\nUser-Agent: Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.89 Safari/537.1";
reqstr += "\r\nCookie: " + reqcookie;
reqstr += "\r\nConnection: Close\r\n\r\n";
。。。
4)同1),从管理界面中获取发布文章的form(name="post")信息,拼装post内容。
5) 利用3.1中的cookie,仍然用socket方式发送数据。
//发布文章。
reqstr = string.Format("POST {0} HTTP/1.1\r\nHost: {1}", posturl, wpuri.Host);
reqstr += "\r\nReferer: " + adminurl;
reqstr += "\r\nUser-Agent: Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.89 Safari/537.1";
reqstr += "\r\nCookie: " + reqcookie;
reqstr += "\r\nContent-Length: " + postdata.Length;
reqstr += "\r\nContent-Type: application/x-www-form-urlencoded";
reqstr += "\r\nConnection: Close\r\n\r\n";
reqstr += postdata;