[url]http://blog.jiexoo.com/2008/05/21/%e7%94%a8httpclient%e8%8e%b7%e5%8f%96hotmail%e8%81%94%e7%b3%bb%e4%ba%ba%e5%88%97%e8%a1%a8/[/url]
第一步,用HttpClient访问http://login.live.com/login.srf?id=2,这个页面会返回一个登录表单
第二步,解析出form中所有的隐含变量和form的action,这些变量是你必须要通过httpClient Post回去,hotmail服务器会验证这些参数,另外,你还必须传递一个PwdPad变量,它的值是IfYouAreReadingThisYouHaveTooMuchFreeTime 从后面截取登录密码长度的值,比如你的密码是123,则PwdPad的值是IfYouAreReadingThisYouHaveTooMuchFreeT
第三、为了知道你接下来导向的地址,你必须解析服务器给你返回的脚本,其中relpace(”***”)中的***即为重定向的地址
第四、得到上一步重定向后的真实的主机地址,联系人列表页面的具体地址就是http://+ hostAddress + /mail/GetContacts.aspx
第五、用正则表达式解析此页面即可
具体代码如下:
第一步,用HttpClient访问http://login.live.com/login.srf?id=2,这个页面会返回一个登录表单
第二步,解析出form中所有的隐含变量和form的action,这些变量是你必须要通过httpClient Post回去,hotmail服务器会验证这些参数,另外,你还必须传递一个PwdPad变量,它的值是IfYouAreReadingThisYouHaveTooMuchFreeTime 从后面截取登录密码长度的值,比如你的密码是123,则PwdPad的值是IfYouAreReadingThisYouHaveTooMuchFreeT
第三、为了知道你接下来导向的地址,你必须解析服务器给你返回的脚本,其中relpace(”***”)中的***即为重定向的地址
第四、得到上一步重定向后的真实的主机地址,联系人列表页面的具体地址就是http://+ hostAddress + /mail/GetContacts.aspx
第五、用正则表达式解析此页面即可
具体代码如下:
import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.Cookie;
import org.apache.commons.httpclient.NameValuePair;
import org.apache.commons.httpclient.methods.GetMethod;
import org.apache.commons.httpclient.methods.PostMethod;
import org.apache.commons.httpclient.cookie.CookiePolicy;
import org.apache.commons.lang.StringUtils;
import java.util.regex.Pattern;
import java.util.regex.Matcher;
import java.util.List;
import java.util.ArrayList;
import java.util.Arrays;
import java.io.InputStream;
import java.io.IOException;
import java.io.BufferedReader;
import java.io.InputStreamReader;
/**
* User: cjp
* Date: 2008-4-30
* Time: 9:26:58
*/
public class HotmailImporter {
public static String[] parseContact(String loginname, String password) throws Exception {
HttpClient client = new HttpClient();
client.getParams().setCookiePolicy(
CookiePolicy.BROWSER_COMPATIBILITY);
//获取登录页面html
String hotmailUrl = “http://login.live.com/login.srf?id=2“;
GetMethod hotmailGet = new GetMethod(hotmailUrl);
hotmailGet.setRequestHeader(”Accept”, “image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, application/x-shockwave-flash, application/vnd.ms-excel, application/vnd.ms-powerpoint, application/msword, */*”);
hotmailGet.setRequestHeader(”Accept-Language”, “zh-cn”);
hotmailGet.setRequestHeader(”User-Agent”, “Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; .NET CLR 2.0.50727)”);
hotmailGet.setRequestHeader(”Host”, “www.hotmail.com“);
hotmailGet.setRequestHeader(”Connection”, “Keep-Alive”);
client.executeMethod(hotmailGet);
String responseStr = hotmailGet.getResponseBodyAsString();
hotmailGet.releaseConnection();
//传递所有的cookie
Cookie[] cookies = client.getState().getCookies();
String cookieStr = “”;
for (Cookie cookie : cookies) {
cookieStr += cookie.getName() + “=” + cookie.getValue() + “;”;
}
//分析登录页面的HTML,获取action,ppsx,ppft
String actionUrl = getFormUrl(responseStr);
NameValuePair loginPair = new NameValuePair(”login”, loginname);
NameValuePair loginOptionsPair = new NameValuePair(”LoginOptions”, “2″);
NameValuePair passwdPair = new NameValuePair(”passwd”, password);
NameValuePair ppsxPair = new NameValuePair(”PPSX”, getInputValue(”ppsx”, responseStr));
NameValuePair ppftPair = new NameValuePair(”PPFT”, getInputValue(”ppft”, responseStr));
//算出pwdpad
String pwdpad = “IfYouAreReadingThisYouHaveTooMuchFreeTime”;
pwdpad = StringUtils.substring(pwdpad, 0, pwdpad.length() - password.length());
NameValuePair pwdpadPair = new NameValuePair(”PwdPad”, pwdpad);
PostMethod loginPost = new PostMethod(actionUrl);
loginPost.setRequestBody(new NameValuePair[]{loginPair, passwdPair, ppsxPair, ppftPair, loginOptionsPair, pwdpadPair});
loginPost.setRequestHeader(”Accept”, “image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, application/x-shockwave-flash, application/vnd.ms-excel, application/vnd.ms-powerpoint, application/msword, */*”);
loginPost.setRequestHeader(”Referer”, hotmailGet.getURI().toString());
loginPost.setRequestHeader(”Accept-Language”, “zh-cn”);
loginPost.setRequestHeader(”User-Agent”, “Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; .NET CLR 2.0.50727)”);
loginPost.setRequestHeader(”Host”, “login.live.com”);
loginPost.setRequestHeader(”Connection”, “Keep-Alive”);
loginPost.setRequestHeader(”Cache-Control”, “no-cache”);
loginPost.setRequestHeader(”Cookie”, cookieStr);
client.executeMethod(loginPost);
String str = loginPost.getResponseBodyAsString();
loginPost.releaseConnection();
String toUrl = StringUtils.substringBetween(str, “replace(\”", “\”);}function OnBack()”);
//获取登录后的跳转页面
GetMethod getMethod = new GetMethod(toUrl);
client.executeMethod(getMethod);
//获取联系人列表
GetMethod contactMethod = new GetMethod(”http://” + getMethod.getURI().getHost() + “/mail/GetContacts.aspx”);
getMethod.releaseConnection();
client.executeMethod(contactMethod);
List<String> contacts = parseContacts(contactMethod.getResponseBodyAsStream());
contactMethod.releaseConnection();
//noinspection ToArrayCallWithZeroLengthArrayArgument
return contacts.toArray(new String[]{});
}
private static List<String> parseContacts(InputStream contactsContent) throws IOException {
List<String> contacts = new ArrayList<String>();
BufferedReader in = new BufferedReader(new InputStreamReader(contactsContent));
String line;
while ((line = in.readLine()) != null) {
System.out.println(line);
String[] values = line.split(”,”);
if (values.length < 47) continue;
String email = parseValue(values[46]);
if (email.length() == 0) continue;
email = email.toLowerCase();
if (isEmailAddress(email)) {
contacts.add(email);
}
}
return contacts;
}
private static String parseValue(String value) {
if (value.length() > 0 && value.charAt(0) == ‘”‘) {
value = value.substring(1, value.length() - 1);
}
return value;
}
private static String getFormUrl(String content) throws Exception {
content = content.substring(content.indexOf(”<form”) + 5);
String actionAttribute = content.split(”\\s+”)[5];
Pattern p = Pattern.compile(”\”(.*?)\”");
Matcher matcher = p.matcher(actionAttribute);
if (!matcher.find()) {
throw new Exception(”hotmail登录界面已改变,无法正常解析”);
}
return matcher.group(1);
}
private static String getInputValue(String name, String content) throws Exception {
Pattern p = Pattern.compile(”^.+value=\”([^\\s\"]+)\”");
int index = content.indexOf(name.toUpperCase()) + name.length() + 2;
content = content.substring(index, index + 200 > content.length() ? content.length() : index + 200);
Matcher matcher = p.matcher(content);
if (!matcher.find()) {
throw new Exception(”hotmail登录界面已改变,无法正常解析”);
}
return matcher.group(1);
}
public static boolean isEmailAddress(String email) {
Pattern emailPattern = Pattern.compile(
“^[0-9a-z]([-_.~]?[0-9a-z])*@[0-9a-z]([-.]?[0-9a-z])*\\.[a-z]{2,4}$”
);
return emailPattern.matcher(email).matches();
}
public static void main(String[] args) {
try {
String[] contacts = parseContact(”test@live.cn“, “test”);
System.out.println(Arrays.toString(contacts));
} catch (Exception e) {
e.printStackTrace();
}
}
}
本文介绍了一种使用HttpClient从Hotmail抓取联系人列表的方法。主要包括:访问登录页面、解析表单信息、构造登录请求及重定向处理等步骤。

被折叠的 条评论
为什么被折叠?



