1 import java.io.IOException; 2 3 import org.jsoup.Jsoup; 4 import org.jsoup.nodes.Document; 5 6 /** 7 * 解析知网文章的页面内容的代码 8 * 9 */ 10 public class Kns50onepage { 11 public static void main(String[] args) throws IOException { 12 13 String url="http://www.cfed.cnki.net/kns50/detail.aspx?filename=GLXB201301003&dbname=CFJD2013&filetitle=%E7%9B%8A%E7%BB%84%E7%BB%87%E8%A1%8C%E4%B8%BA%E4%B8%8E%E6%8D%9F%E7%BB%84%E7%BB%87%E8%A1%8C%E4%B8%BA%3a%E4%B8%AD%E5%9B%BD%E7%89%B9%E5%BE%81%E7%9A%84%E8%A7%92%E8%89%B2%E5%A4%96%E8%A1%8C%E4%B8%BA%E6%A8%A1%E5%9E%8B%E5%8F%8A%E5%85%B6%E7%BB%8F%E9%AA%8C%E5%AE%9E%E8%AF%81"; 14 getContentByJsoup(url); 15 //getLinksByJsoup(divContent); 16 17 } 18 public static void getContentByJsoup(String url){ 19 //解析整个网页 20 String content=""; 21 try { 22 Document doc=Jsoup.connect(url) 23 .data("jquery", "java") 24 .userAgent("Mozilla") 25 .cookie("auth", "token") 26 .timeout(50000) 27 .get(); 28 29 content=doc.toString(); 30 } catch (IOException e) { 31 e.printStackTrace(); 32 } 33 34 Document doc=Jsoup.parse(content); 35 36 String title=doc.select("span.datatitle").get(1).text(); 37 System.out.println("标题:"+title); 38 39 String author=doc.select("td").text().split("【作者】")[1].split("【")[0]; 40 System.out.println("作者:"+author); 41 42 String summary=doc.select("td").text().split("【中文摘要】")[1].split("【")[0]; 43 System.out.println("中文摘要:"+summary); 44 45 46 } 47 48 49 }