/*
* 爬取网页信息
*/
private static String pickData(String url) {
CloseableHttpClient httpclient = HttpClients.createDefault();
try {
HttpGet httpget = new HttpGet(url);
CloseableHttpResponse response = httpclient.execute(httpget);
try {
// 获取响应实体
HttpEntity entity = response.getEntity();
// 打印响应状态
if (entity != null) {
return EntityUtils.toString(entity);
}
} finally {
response.close();
}
} catch (ClientProtocolException e) {
e.printStackTrace();
} catch (ParseException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} finally {
// 关闭连接,释放资源
try {
httpclient.close();
} catch (IOException e) {
e.printStackTrace();
}
}
return null;
}上面方法返回网页字符串
下来解析字符串
用到了 Jsoup
Document document =Jsoup.parse(html)
Elements element=docment.select(".class")
Elements element=docment.select("标签")
String str=element.child(0).chaid(1).childNode(0).childNode(0).attributes().toString()
String str=element.child(0).child(1).childNode(0).attributes().get("src").toString
str为解析到的网页具体内容