一个成熟的项目 里面有很多表 已经实现在网页上了 要一起导出并压缩 我擦 这么多 还不如 用爬虫爬 就少些很多字了 于是。。。
/**
* 测试一下 jsoup爬取
*
* @throws IOException
*
*/
@Test
public void test() throws IOException {
Date date = new Date();
FileReader fileReader = new FileReader("src/main/resources/jsoup_sheet.json");
String s;
StringBuilder sb = new StringBuilder();
BufferedReader br = new BufferedReader(fileReader);
while ((s = br.readLine()) != null) {
sb.append(s);
}
JSONObject obj = JSONObject.fromObject(sb.toString());
JSONArray jsonArray = obj.getJSONArray("水闸");
// 遍历这个类别sheet 找到对应的action
for (Object object : jsonArray) {
JSONObject json = JSONObject.fromObject(object);
try {
String real = "http://localhost:8080/******/gcTg/" + json.get("action")
+ "Detail.do?ennmcd=KHD00001082&_" + date.getTime();
Document doc = Jsoup.connect(real).timeout(8000).get();
System.out.println(doc);
Elements leftClass = doc.getElementsByClass("left");
Elements rightClass = doc.getElementsByClass("right");
int size = leftClass.size();
if (size > rightClass.size()) {
// 值和字段标签的 个数不对等 取小的
size = rightClass.size();
}
System.out.println("**********" + real + ":" + json.getString("name") + "************");
for (int i = 0; i < size; i++) {
System.out.print(leftClass.get(i).text() + rightClass.get(i).text());
if ((i + 1) % 2 == 0) {
System.out.println();
}
}
} catch (IOException e) {
e.printStackTrace();
}
}
System.out.println("耗时" + (new Date().getTime() - date.getTime()) / 1000);
}
/**
*
* @param fieldList
* 字段list
* @param valueList
* 值list
* @param name
* sheetName
* @param sheetNum
* @param book
* 表对象
*/
private void setSheet4(List<Element> fieldList, List<Element> valueList, String name, int sheetNum,
WritableWorkbook book) {
try {
WritableSheet sheet = book.createSheet(name, sheetNum);
WritableCellFormat cf = new WritableCellFormat();
// 标题
sheet.mergeCells(0, 0, 3, 0);
sheet.addCell(new Label(0, 0, name, cf));
int size = fieldList.size();
if (size > valueList.size()) {
// 值和字段标签的 个数不对等 取小的
size = valueList.size();
}
for (int i = 0; i < size; i = i + 2) {
sheet.addCell(new Label(0, i + 1, fieldList.get(i).text(), cf));
sheet.addCell(new Label(1, i + 1, valueList.get(i).text(), cf));
sheet.addCell(new Label(2, i + 1, fieldList.get(i + 1).text(), cf));
sheet.addCell(new Label(3, i + 1, valueList.get(i + 1).text(), cf));
}
sheetNum++;
} catch (RowsExceededException e) {
e.printStackTrace();
} catch (WriteException e) {
e.printStackTrace();
}
}
还有一个json文件 来配置 去那几个网页
{
"暗窦":[{"name":"一般信息","action":"slcmin"},{"name":"","action":"rvcmin"},{"name":"","action":"rvcmin"},{"name":"","action":"rvcmin"},{"name":"","action":"rvcmin"},{"name":"","action":"rvcmin"},{"name":"","action":"rvcmin"}],
"测站":[{"name":"一般信息","action":"slcmin"},{"name":"","action":"rvcmin"},{"name":"","action":"rvcmin"},{"name":"","action":"rvcmin"},{"name":"","action":"rvcmin"},{"name":"","action":"rvcmin"},{"name":"","action":"rvcmin"}],
"城市防洪":[{"name":"一般信息","action":"cpfcmin"}],
"穿堤建筑物":[{"name":"一般信息","action":"pbccmin"},{"name":"","action":"rvcmin"},{"name":"","action":"rvcmin"},{"name":"","action":"rvcmin"},{"name":"","action":"rvcmin"},{"name":"","action":"rvcmin"},{"name":"","action":"rvcmin"}],
"船闸":[{"name":"一般信息","action":"czmin"},{"name":"船闸设计参数","action":"czmin/czsjcs"},{"name":"船闸闸门特征","action":"czmin/czzmtz"},{"name":"船闸工程信息","action":"czmin/czgctx"},{"name":"船闸历史运用记录","action":"czlsjl"}],
"堤段":[{"name":"一般信息","action":"ddinfo"},{"name":"堤段横断面特征值","action":"ddinfo/dktr"},{"name":"堤段水文特征","action":"ddinfo/bsfst"}]
}