杭州摇号网站分页显示,浏览起来非常的不方便,于是写了几行代码
将它给全部下载到一个文件中。代码很简单,先通过httpwatch观察,http
是如何拼接请求的,然后在代码中将链接拼接好,批量下载下来。
PrintStream ps = null;
try {
ps = new PrintStream(new FileOutputStream("company.txt"));
} catch (FileNotFoundException e1) {
e1.printStackTrace();
}
for(int i = 1; i <= 40; i++){
HttpClient client = new DefaultHttpClient();
//http://apply.hzcb.gov.cn/apply/app/status/norm/unit
//http://apply.hzcb.gov.cn/apply/app/status/norm/person
HttpPost httppost = new HttpPost("http://apply.hzcb.gov.cn/apply/app/status/norm/unit");
httppost.setHeader("Referer","http://apply.hzcb.gov.cn/apply/app/status/norm/unit");
List<NameValuePair> params = new ArrayList<NameValuePair>();
String pageNo = new Integer(i).toString();
params.add(new BasicNameValuePair("pageNo", pageNo));
params.add(new BasicNameValuePair("issueNumber", "201405"));
params.add(new BasicNameValuePair("applyCode", ""));
httppost.setEntity(new UrlEncodedFormEntity(params));
HttpResponse response = client.execute(httppost);
HttpEntity entity = response.getEntity();
// 在这里可以用Jsoup之类的工具对返回结果进行分析,以判断登录是否成功
String postResult = EntityUtils.toString(entity, "GBK");
Document document = Jsoup.parse(postResult);
Elements carpersons = document.select("tr.content_data");
for(Element e : carpersons){
String id = "";
String name = "";
Elements props = e.select("td");
int count = 0;
for(Element e1 : props){
if(count == 0)
id = e1.text();
else
name = e1.text();
count++;
}
System.out.println("id:" + id + " name:" + name);
ps.println(id + "\t" + name);
}
}
if (null != ps) {
ps.close();
}
}