web

01 package jsoup;
02  
03 import java.io.FileNotFoundException;
04 import java.io.FileOutputStream;
05 import java.io.IOException;
06  
07 import org.jsoup.Jsoup;
08 import org.jsoup.nodes.Element;
09 import org.jsoup.select.Elements;
10  
11 import com.itextpdf.text.Anchor;
12 import com.itextpdf.text.BaseColor;
13 import com.itextpdf.text.Document;
14 import com.itextpdf.text.DocumentException;
15 import com.itextpdf.text.Font;
16 import com.itextpdf.text.PageSize;
17 import com.itextpdf.text.Paragraph;
18 import com.itextpdf.text.pdf.BaseFont;
19 import com.itextpdf.text.pdf.PdfWriter;
20  
21 public class Cnblogs {
22     public static void main(String[] args) {
23         int page = 20;
24         org.jsoup.nodes.Document doc;
25         com.itextpdf.text.Document pdf = new Document(PageSize.A4.rotate(), 50,
26                 505050);
27         try {
28             PdfWriter writer = PdfWriter.getInstance(pdf, new FileOutputStream(
29                     "G:\\cnblogs.pdf"));
30             pdf.open();
31             BaseFont zh_cn = BaseFont.createFont(
32                     "C:\\WINDOWS\\Fonts\\msyh.ttf""Identity-H",
33                     BaseFont.NOT_EMBEDDED);
34  
35             for (int p = 1; p <= page; p++) {
36                 if (p == 1) {
37                     doc = Jsoup.connect("http://www.cnblogs.com/").get();
38                 else {
39                     doc = Jsoup.connect("http://www.cnblogs.com/p" + p).get();
40                 }
41                 Elements elements = doc.body().getElementsByClass(
42                         "post_item_body");
43                 for (Element e : elements) {
44                     Elements titleEle = e.getElementsByClass("titlelnk");
45                     String titleLink = titleEle.attr("href");
46                     String titleText = titleEle.text();
47                     Elements summaryEle = e
48                             .getElementsByClass("post_item_summary");
49                     String summary = summaryEle.text();
50  
51                     Anchor anchor = new Anchor(titleText, new Font(zh_cn, 14,
52                             Font.UNDERLINE, BaseColor.BLUE));
53                     anchor.setReference(titleLink);
54                     Paragraph titlePar = new Paragraph();
55                     titlePar.add(anchor);
56  
57                     Paragraph summaryPar = new Paragraph(summary, new Font(
58                             zh_cn, 12));
59                     summaryPar.setFirstLineIndent(24);
60  
61                     pdf.add(titlePar);
62                     pdf.add(summaryPar);
63                     pdf.add(new Paragraph(" "));
64                 }
65             }
66             pdf.close();
67             writer.close();
68         catch (FileNotFoundException e1) {
69             e1.printStackTrace();
70         catch (DocumentException e1) {
71             e1.printStackTrace();
72         catch (IOException e) {
73             e.printStackTrace();
74         }
75  
76     }
77 }

2. [代码][Java]代码     

01 package jsoup;
02  
03 import java.io.FileNotFoundException;
04 import java.io.FileOutputStream;
05 import java.io.IOException;
06  
07 import org.jsoup.Jsoup;
08 import org.jsoup.nodes.Element;
09 import org.jsoup.select.Elements;
10  
11 import com.itextpdf.text.Anchor;
12 import com.itextpdf.text.BaseColor;
13 import com.itextpdf.text.Document;
14 import com.itextpdf.text.DocumentException;
15 import com.itextpdf.text.Font;
16 import com.itextpdf.text.PageSize;
17 import com.itextpdf.text.Paragraph;
18 import com.itextpdf.text.pdf.BaseFont;
19 import com.itextpdf.text.pdf.PdfWriter;
20  
21 public class CsdnBlog {
22     public static void main(String[] args) {
23         int page = 20;
24         org.jsoup.nodes.Document doc;
25         com.itextpdf.text.Document pdf = new Document(PageSize.A4.rotate(), 50,
26                 505050);
27         try {
28             PdfWriter writer = PdfWriter.getInstance(pdf, new FileOutputStream(
29                     "G:\\csdn.pdf"));
30             pdf.open();
31             BaseFont zh_cn = BaseFont.createFont(
32                     "C:\\WINDOWS\\Fonts\\msyh.ttf""Identity-H",
33                     BaseFont.NOT_EMBEDDED);
34  
35             for (int p = 1; p <= page; p++) {
36                 doc = Jsoup
37                         .connect("http://blog.youkuaiyun.com/hot.html?page=" + p)
38                         .header("User-Agent",
39                                 "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.4; en-US; rv:1.9.2.2) Gecko/20100316 Firefox/3.6.2")
40                         .get();
41                 Elements elements = doc.body().getElementsByClass("blog_list");
42                 for (Element e : elements) {
43                     Elements titleEle = e.getElementsByTag("h1");
44                     String titleLink = titleEle.last().attr("href");
45                     String titleText = titleEle.last().text();
46                     Elements summaryEle = e.getElementsByTag("dd");
47                     String summary = summaryEle.text();
48  
49                     Anchor anchor = new Anchor(titleText, new Font(zh_cn, 14,
50                             Font.UNDERLINE, BaseColor.BLUE));
51                     anchor.setReference(titleLink);
52                     Paragraph titlePar = new Paragraph();
53                     titlePar.add(anchor);
54  
55                     Paragraph summaryPar = new Paragraph(summary, new Font(
56                             zh_cn, 12));
57                     summaryPar.setFirstLineIndent(24);
58  
59                     pdf.add(titlePar);
60                     pdf.add(summaryPar);
61                     pdf.add(new Paragraph(" "));
62                 }
63             }
64             pdf.close();
65             writer.close();
66         catch (FileNotFoundException e1) {
67             e1.printStackTrace();
68         catch (DocumentException e1) {
69             e1.printStackTrace();
70         catch (IOException e) {
71             e.printStackTrace();
72         }
73     }
74 }

3. [图片] 截图00.png    

4. [图片] 截图01.png    

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值