public static void main(String [] args) throws Exception{
//获取连接内容
Document document=Jsoup.connect("http://www.biquge5200.com/14_14620/").userAgent("Mozilla/5.0 (Windows NT 6.1; WOW64; rv:23.0)").get();
//根据标签获取书籍章节
Elements chapters=document.getElementsByTag("dd").select("a");
//根据标签获取书籍名称
String bookname= document.getElementById("info").getElementsByTag("h1").text();
String cover= document.getElementById("fmimg").select("img").attr("abs:src");//书籍封面地址
List<EbookChapter> list=new ArrayList<EbookChapter>();
for (Element e:chapters) {
String second_url=e.attr("abs:href");//内容详情地址
Document contentDocument=Jsoup.connect(second_url).userAgent("Mozilla/5.0 (Windows NT 6.1; WOW64; rv:23.0)").get();
//章节内容
String content=contentDocument.getElementById("content").text();
String chaptername=e.text();//章节名
EbookChapter chapter=new EbookChapter();
chapter.setChapter(chaptername);
chapter.setContent(content);
chapter.setCopyurl(second_url);
list.add(chapter);
}
}
Jsoup爬取网页内容
最新推荐文章于 2025-03-11 16:27:22 发布