摘至:http://hi.baidu.com/xbnh0217/blog/item/b1feee321e162549ad4b5f8d.html
package com.bonck.service.cook;
import java.io.IOException;
import java.util.HashMap;
import org.apache.commons.httpclient.Cookie;
import org.apache.commons.httpclient.DefaultMethodRetryHandler;
import org.apache.commons.httpclient.Header;
import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.HttpException;
import org.apache.commons.httpclient.HttpStatus;
import org.apache.commons.httpclient.NameValuePair;
import org.apache.commons.httpclient.methods.GetMethod;
import org.apache.commons.httpclient.methods.PostMethod;
import org.apache.commons.httpclient.params.HttpMethodParams;
import com.bonck.util.LogService;
public class CommonParserTest {
/**
* 此登录需要验证码
* 这里开始登陆
* map 参数设置
* String url 登陆地址
* String code 验证码名字
* String vcode 验证码
* String uname 用户名 实参
* String upwd 密码 实参
* String username 用户名名字
* String password 用户名名字
* String hosturl cook这只url
* String encode 编码种类 gb2312 utf-8
* 登录入口并返回Map 存有client信息和cookie信息
* @return
*/
public HashMap vlogin(HashMap sm) {
//String url, String vcode,String usrName,String pwd
HashMap map=new HashMap();
HttpClient client = new HttpClient();
// 设置请求时间
client.getParams().setConnectionManagerTimeout(10000l);
client.getHostConfiguration().setHost("www.myshida.com", 80, "http");
// 获得PostMethod 对象 并设置参数
String loginUrl=sm.get("url")+"";
PostMethod post = new PostMethod("http://www.myshida.com/club/logging.php?action=login" );
// 设置登录参数
NameValuePair[] para = new NameValuePair[] {
new NameValuePair("username", "liuxue"),
new NameValuePair("password", "liuxue123456") ,
new NameValuePair("loginfield", "username")
// new NameValuePair("formhash", "2a8c1766"),
//
// new NameValuePair("referer", "index.php"),
// new NameValuePair(sm.get("vcode")+"", (sm.get("code")+"")
//验证码
};
// 将参数设置到也面中
post.setRequestBody(para);
// 设置请求参数 默认请求三次
post.getParams().setParameter(HttpMethodParams.RETRY_HANDLER,
new DefaultMethodRetryHandler());
// Httpclient 对于接受后继服务的请求,象post和put等不能自动转发
// 正确状态码是 SC_OK =200 301 302
// 获得状态码
try {
int status = client.executeMethod(post);
//HttpStatus.SC_OK
if (status == 302) {
Header locationHeader = post.getResponseHeader("location");
String location = null;
if (locationHeader != null) {
location = locationHeader.getValue();
System.out
.println("the Page was redirected to " + location);
} else {
System.out.println("this location was null");
}
}
else if(status==HttpStatus.SC_OK){
// 读取相应信息
byte[] r = post.getResponseBody();
//转码 获取相应的数据
String content = new String(r,"gb2312");
// LogService.info("-------------------------------****************");
LogService.info(content);
Cookie[] cookie = client.getState().getCookies();
// 将cook设置到client里面
client.getState().addCookies(cookie);
// 设置释放链接
post.releaseConnection();
map.put("cookie", cookie);
map.put("client", client);
}
} catch (HttpException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
return map;
}
/**
* 该登陆不需要验证码
* 这里开始登陆
* map 参数设置
* String url 登陆地址
* String uname 用户名 实参
* String upwd 密码 实参
* String username 用户名名字
* String password 用户名名字
* String hosturl cook这只url
* String encode 编码
* 登录入口并返回Map 存有client信息和cookie信息
* @return
*/
@SuppressWarnings("unchecked")
public HashMap login(HashMap sm) {
//String url, String vcode,String usrName,String pwd
HashMap map=new HashMap();
HttpClient client = new HttpClient();
// 设置请求时间
client.getParams().setConnectionManagerTimeout(5000l);
client.getHostConfiguration().setHost(sm.get("hosturl")+"", 80, "http");
// 获得PostMethod 对象 并设置参数
PostMethod post = new PostMethod( sm.get("url")+"");
// 设置登录参数
NameValuePair[] para = new NameValuePair[] {
new NameValuePair(sm.get("username")+"", sm.get("uname")+""),
new NameValuePair(sm.get("password")+"", sm.get("upwd")+"")
//, new NameValuePair(sm.get("code")+"", (sm.get("vcode")+""))//验证码
};
// 将参数设置到也面中
post.setRequestBody(para);
// 设置请求参数 默认请求三次
post.getParams().setParameter(HttpMethodParams.RETRY_HANDLER,
new DefaultMethodRetryHandler());
// Httpclient 对于接受后继服务的请求,象post和put等不能自动转发
// 正确状态码是 SC_OK =200 301 302
// 获得状态码
try {
int status = client.executeMethod(post);
if (status == HttpStatus.SC_OK) {
Header locationHeader = post.getResponseHeader("location");
String location = null;
if (locationHeader != null) {
location = locationHeader.getValue();
String st=new String(location.getBytes(),"gb2312");
System.out
.println("the Page was redirected to " + st);
} else {
System.out.println("this location was null");
}
// 读取相应信息
byte[] r = post.getResponseBody();
//转码 获取相应的数据
String content = new String(r, sm.get("encode")+"");
LogService.info("-------------------------------****************");
LogService.info(content);
Cookie[] cookie = client.getState().getCookies();
// 将cook设置到client里面
client.getState().addCookies(cookie);
// 设置释放链接
post.releaseConnection();
map.put("cookie", cookie);
map.put("client", client);
}
} catch (HttpException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
return map;
}
/***************************************************************************
* @version 1.0
* 主要用来爬取要爬的信息
* 根据网址对页面信息进行采集
* @param url 指定爬去的网页
* @param map client客户端 cookie Cookie信息
* @param ,String encoding 对内容进行编码转换
* @return
*/
public String getContentByParser(String url,HashMap map,String encoding) {
HttpClient client=(HttpClient)map.get("client");
Cookie []cookie=(Cookie[])map.get("cookie");
GetMethod get=new GetMethod(url);
get.getParams().setParameter(HttpMethodParams.RETRY_HANDLER, new DefaultMethodRetryHandler());
//设置cookie
get.setRequestHeader("Cookie",cookie.toString());
String content = null;
try {
int statusCode=client.executeMethod(get);
if(statusCode==HttpStatus.SC_OK){
byte[] responseBody = get.getResponseBody();
content = new String(responseBody, encoding);//对内容进行编码转换
}
} catch (HttpException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
return content;
}
public static void main(String[] args) {
String url="";
String username="";
String pwd="";
CommonParserTest test=new CommonParserTest();
HashMap map=new HashMap();
test.vlogin(map);
}
}