01 | package jsoup; |
02 |
03 | import java.io.FileNotFoundException; |
04 | import java.io.FileOutputStream; |
05 | import java.io.IOException; |
06 |
07 | import org.jsoup.Jsoup; |
08 | import org.jsoup.nodes.Element; |
09 | import org.jsoup.select.Elements; |
10 |
11 | import com.itextpdf.text.Anchor; |
12 | import com.itextpdf.text.BaseColor; |
13 | import com.itextpdf.text.Document; |
14 | import com.itextpdf.text.DocumentException; |
15 | import com.itextpdf.text.Font; |
16 | import com.itextpdf.text.PageSize; |
17 | import com.itextpdf.text.Paragraph; |
18 | import com.itextpdf.text.pdf.BaseFont; |
19 | import com.itextpdf.text.pdf.PdfWriter; |
20 |
21 | public class Cnblogs { |
22 | public static void main(String[] args) { |
23 | int page = 20 ; |
24 | org.jsoup.nodes.Document doc; |
25 | com.itextpdf.text.Document pdf = new Document(PageSize.A4.rotate(), 50 , |
26 | 50 , 50 , 50 ); |
27 | try { |
28 | PdfWriter writer = PdfWriter.getInstance(pdf, new FileOutputStream( |
29 | "G:\\cnblogs.pdf" )); |
30 | pdf.open(); |
31 | BaseFont zh_cn = BaseFont.createFont( |
32 | "C:\\WINDOWS\\Fonts\\msyh.ttf" , "Identity-H" , |
33 | BaseFont.NOT_EMBEDDED); |
34 |
35 | for ( int p = 1 ; p <= page; p++) { |
36 | if (p == 1 ) { |
37 | doc = Jsoup.connect( "http://www.cnblogs.com/" ).get(); |
38 | } else { |
39 | doc = Jsoup.connect( "http://www.cnblogs.com/p" + p).get(); |
40 | } |
41 | Elements elements = doc.body().getElementsByClass( |
42 | "post_item_body" ); |
43 | for (Element e : elements) { |
44 | Elements titleEle = e.getElementsByClass( "titlelnk" ); |
45 | String titleLink = titleEle.attr( "href" ); |
46 | String titleText = titleEle.text(); |
47 | Elements summaryEle = e |
48 | .getElementsByClass( "post_item_summary" ); |
49 | String summary = summaryEle.text(); |
50 |
51 | Anchor anchor = new Anchor(titleText, new Font(zh_cn, 14 , |
52 | Font.UNDERLINE, BaseColor.BLUE)); |
53 | anchor.setReference(titleLink); |
54 | Paragraph titlePar = new Paragraph(); |
55 | titlePar.add(anchor); |
56 |
57 | Paragraph summaryPar = new Paragraph(summary, new Font( |
58 | zh_cn, 12 )); |
59 | summaryPar.setFirstLineIndent( 24 ); |
60 |
61 | pdf.add(titlePar); |
62 | pdf.add(summaryPar); |
63 | pdf.add( new Paragraph( " " )); |
64 | } |
65 | } |
66 | pdf.close(); |
67 | writer.close(); |
68 | } catch (FileNotFoundException e1) { |
69 | e1.printStackTrace(); |
70 | } catch (DocumentException e1) { |
71 | e1.printStackTrace(); |
72 | } catch (IOException e) { |
73 | e.printStackTrace(); |
74 | } |
75 |
76 | } |
77 | } |
2. [代码][Java]代码
01 | package jsoup; |
02 |
03 | import java.io.FileNotFoundException; |
04 | import java.io.FileOutputStream; |
05 | import java.io.IOException; |
06 |
07 | import org.jsoup.Jsoup; |
08 | import org.jsoup.nodes.Element; |
09 | import org.jsoup.select.Elements; |
10 |
11 | import com.itextpdf.text.Anchor; |
12 | import com.itextpdf.text.BaseColor; |
13 | import com.itextpdf.text.Document; |
14 | import com.itextpdf.text.DocumentException; |
15 | import com.itextpdf.text.Font; |
16 | import com.itextpdf.text.PageSize; |
17 | import com.itextpdf.text.Paragraph; |
18 | import com.itextpdf.text.pdf.BaseFont; |
19 | import com.itextpdf.text.pdf.PdfWriter; |
20 |
21 | public class CsdnBlog { |
22 | public static void main(String[] args) { |
23 | int page = 20 ; |
24 | org.jsoup.nodes.Document doc; |
25 | com.itextpdf.text.Document pdf = new Document(PageSize.A4.rotate(), 50 , |
26 | 50 , 50 , 50 ); |
27 | try { |
28 | PdfWriter writer = PdfWriter.getInstance(pdf, new FileOutputStream( |
29 | "G:\\csdn.pdf" )); |
30 | pdf.open(); |
31 | BaseFont zh_cn = BaseFont.createFont( |
32 | "C:\\WINDOWS\\Fonts\\msyh.ttf" , "Identity-H" , |
33 | BaseFont.NOT_EMBEDDED); |
34 |
35 | for ( int p = 1 ; p <= page; p++) { |
36 | doc = Jsoup |
37 | .connect( "http://blog.youkuaiyun.com/hot.html?page=" + p) |
38 | .header( "User-Agent" , |
39 | "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.4; en-US; rv:1.9.2.2) Gecko/20100316 Firefox/3.6.2" ) |
40 | .get(); |
41 | Elements elements = doc.body().getElementsByClass( "blog_list" ); |
42 | for (Element e : elements) { |
43 | Elements titleEle = e.getElementsByTag( "h1" ); |
44 | String titleLink = titleEle.last().attr( "href" ); |
45 | String titleText = titleEle.last().text(); |
46 | Elements summaryEle = e.getElementsByTag( "dd" ); |
47 | String summary = summaryEle.text(); |
48 |
49 | Anchor anchor = new Anchor(titleText, new Font(zh_cn, 14 , |
50 | Font.UNDERLINE, BaseColor.BLUE)); |
51 | anchor.setReference(titleLink); |
52 | Paragraph titlePar = new Paragraph(); |
53 | titlePar.add(anchor); |
54 |
55 | Paragraph summaryPar = new Paragraph(summary, new Font( |
56 | zh_cn, 12 )); |
57 | summaryPar.setFirstLineIndent( 24 ); |
58 |
59 | pdf.add(titlePar); |
60 | pdf.add(summaryPar); |
61 | pdf.add( new Paragraph( " " )); |
62 | } |
63 | } |
64 | pdf.close(); |
65 | writer.close(); |
66 | } catch (FileNotFoundException e1) { |
67 | e1.printStackTrace(); |
68 | } catch (DocumentException e1) { |
69 | e1.printStackTrace(); |
70 | } catch (IOException e) { |
71 | e.printStackTrace(); |
72 | } |
73 | } |
74 | } |