HttpClient模拟登陆并获取指定页面的内容

本文详细记录了使用HttpClient进行网站内容采集的方法,包括登录、获取指定路径页面内容及保存为.htm文件的操作。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

前段时间工作上需要采集别的网站上的内容,然后再自己系统中展示数据。经过自己找资料最后选用了HttpClient。以下为自己摸索的方法 记录下来以便以后使用:

1、模拟登陆

private static final Logger logger = Logger.getLogger(IpManagerRobot.class);

private DefaultHttpClient httpClient = new DefaultHttpClient();

private boolean loging = false;

public boolean login() throws Exception{

//需要登陆的地址,这里直接用登陆网站的form表单中的action属性。有的则可以直接使用...../login.jsp
String loginForm = "http://124.238.214.79/platform/j_spring_security_check";

// notifyMethod(loginForm);

HttpPost httpPost = new HttpPost(loginForm);

//设置请求头,httpwatch可以跟踪到
httpPost.setHeader("User-Agent", "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.0; WOW64; Trident/4.0; znwb6600; SLCC1; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30618)");
httpPost.setHeader("Referer", "http://124.238.214.79/platform/");
httpPost.setHeader("Content-Type", "application/x-www-form-urlencoded");

//登陆form表单需要提交的参数
List<NameValuePair> params = new ArrayList<NameValuePair>();
params.add(new BasicNameValuePair("j_username", "stc2012"));
params.add(new BasicNameValuePair("j_password", "q1w2e3r4"));
params.add(new BasicNameValuePair("domain","124.238.214.79"));
params.add(new BasicNameValuePair("url","/platform/"));
params.add(new BasicNameValuePair("expiry",""));

//设置编码
try {
httpPost.setEntity(new UrlEncodedFormEntity(params,HTTP.UTF_8));
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
}

HttpResponse res = postMethod(httpPost);

if(res.getStatusLine().getStatusCode() == HttpStatus.SC_MOVED_PERMANENTLY ||
res.getStatusLine().getStatusCode() == HttpStatus.SC_MOVED_TEMPORARILY ||
res.getStatusLine().getStatusCode() == HttpStatus.SC_SEE_OTHER ||
res.getStatusLine().getStatusCode() == HttpStatus.SC_TEMPORARY_REDIRECT){

logger.info("登陆成功");
loging = true;
}else{
logger.info("登陆失败");
loging = false;
}

return loging;
}



public HttpResponse postMethod(HttpPost post) {
HttpResponse resp = null;
try {
resp = httpClient.execute(post);
} catch (ClientProtocolException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} finally {
post.abort();
}
return resp;
}


2、登陆了以后就可以获取指定路径的页面内容了

public String get(String url) throws Exception{

// String url = "http://124.238.214.79/platform/pages/getWssHistory.action?startDate="+startDate+"&endDate="+endDate+"&pageContext.currentpage=1";
HttpGet get = new HttpGet(url);
try {
HttpResponse response = httpClient.execute(get);
String responseString = "";
if(response.getStatusLine().getStatusCode() == HttpStatus.SC_OK){
HttpEntity entity = response.getEntity();

BufferedReader br = new BufferedReader(new InputStreamReader(entity.getContent(), "UTF-8"));
responseString = IOUtils.toString(br);
}
return responseString;

} finally{
get.abort();
}

}



另一种方法获取指定路径页面的内容

public String getText2(String url) throws Exception{

HttpGet httpget = new HttpGet(url);
try {

//创建HttpGet对象实例(get method实现)
// String url = "http://124.238.214.79/platform/pages/getWssHistory.action?startDate="+startDate+"&endDate="+endDate+"&pageContext.currentpage=1";

// 创建Response Handler,这里使用了其自带的BasicResponseHandler
ResponseHandler<String> responseHandler = new BasicResponseHandler();
String responseBody = httpClient.execute(httpget, responseHandler);
//编码转换,解决中文乱码问题
String responseString = new String(responseBody.getBytes("ISO-8859-1"),"UTF-8");
//返回网页的响应结果
return responseString;
} finally {
//代码执行完毕后,释放HttpClient占用的资源
// httpClient.getConnectionManager().shutdown();
httpget.abort();
}
}



保存指定路径页面为.htm文件

public void saveCollectionPage() throws Exception{

//这里也可以直接使用httpGet的绝对地址,当然如果不是具体地址不要忘记
HttpGet httpGet = new HttpGet("http://124.238.214.79/platform/pages/getWssHistory.action?startDate=2013-04-07&endDate=2013-05-07&pageContext.currentpage=1");
HttpResponse response = httpClient.execute(httpGet);
if(HttpStatus.SC_OK == response.getStatusLine().getStatusCode()){
//请求成功,取得请求内容
HttpEntity entity = response.getEntity();
if(entity != null){

File storeFile = new File("d:/tt.htm");
FileOutputStream output = new FileOutputStream(storeFile);
InputStream input = entity.getContent();
byte b[] = new byte[1024];
int j = 0;
while ((j = input.read(b)) != -1) {
output.write(b, 0, j);
}
output.flush();
output.close();

}
if(entity != null){
entity.consumeContent();
}
}
}



读取htm文件

public String readTextFile(String fileName,String encode){
StringBuffer str = new StringBuffer();
try {
File file = new File(fileName);
InputStreamReader read = new InputStreamReader(new FileInputStream(file),encode);
BufferedReader in = new BufferedReader(read);
String dataLine = "";
while (null != (dataLine = in.readLine())) {
str.append(dataLine);
}
in.close();
} catch (Exception e) {
logger.info("------------->文件读取失败!");
}
return str.toString();
}

</div>
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值