一、爬取京东评论
京东评论竟然全部对外开放
public class CommentCrawler {
final static PoolingHttpClientConnectionManager httpClientConnectionManager = new PoolingHttpClientConnectionManager();
final static int MAX_PAGE = 50;
static HttpClient getClient() {
return HttpClients.custom().setConnectionManager(httpClientConnectionManager).build();
}
static String getUrl(String productId, int page) {
return String.format(
"http://sclub.jd.com/comment/productPageComments.action?productId=%s&score=0&sortType=3&page=%d&pageSize=10",
productId, page);
}
static Comment commentFromJson(JSONObject json, String productId) {
return new Comment(json.getLongValue("id"), productId, json.getString("score"), json.getString("content"));
}
public static boolean crawlComments(Stri