Jsoup的简单使用

最近学习了下Jsoup,拿来解析html非常好用。

今天来演示下爬极客公园:点击打开链接

一、工程目录


Jsoup去官网下载就行了下载地址.

二、Article.java

一个简单的POJO类,拿来装载一个文章的内容。

public class Article {

	private String tag;
	private String title;
	private String description;
	private String author;
	private String comments;

	/**
	 * @return the tag
	 */
	public String getTag() {
		return tag;
	}

	/**
	 * @param tag
	 *            the tag to set
	 */
	public void setTag(String tag) {
		this.tag = tag;
	}

	/**
	 * @return the title
	 */
	public String getTitle() {
		return title;
	}

	/**
	 * @param title
	 *            the title to set
	 */
	public void setTitle(String title) {
		this.title = title;
	}

	/**
	 * @return the description
	 */
	public String getDescription() {
		return description;
	}

	/**
	 * @param description
	 *            the description to set
	 */
	public void setDescription(String description) {
		this.description = description;
	}

	/**
	 * @return the author
	 */
	public String getAuthor() {
		return author;
	}

	/**
	 * @param author
	 *            the author to set
	 */
	public void setAuthor(String author) {
		this.author = author;
	}

	/**
	 * @return the comments
	 */
	public String getComments() {
		return comments;
	}

	/**
	 * @param comments
	 *            the comments to set
	 */
	public void setComments(String comments) {
		this.comments = comments;
	}

}

三、Jsoup.java

package com.ydalien;

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;

import org.jsoup.Connection;
import org.jsoup.Connection.Method;
import org.jsoup.Connection.Response;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.jsoup.Jsoup;

public class JsoupTest {

	private final static String BASEURL = "http://www.geekpark.net/articles_list?page=";
	
	private int page=1;
	private List<Article> datas;
	
	public JsoupTest(){
		
		
		try {
			Connection conn = Jsoup.connect(BASEURL+page);
			//设置头信息
			conn.header("User-Agent",
					"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:29.0) Gecko/20100101 Firefox/29.0");
			Response rs = conn.method(Method.GET).execute();
			Document doc = Jsoup.parse(rs.body());
			Elements contents = doc.select(".article-item");
			
			//查找出自己需要的内容
			datas=new ArrayList<Article>();
			
			for(Element el:contents){
				Article temp=new Article();
				temp.setTag(el.select("a.category-tag").text());
				temp.setTitle(el.select("a.article-title").text());
				temp.setAuthor(el.select("a.dib-middle.article-author").text());
				temp.setDescription(el.select("p.article-description").text());
				temp.setComments(el.select("a.source-right").text());
				datas.add(temp);
			}
			
			//输出爬来的数据
			for(int i=0;i<datas.size();i++){
				System.out.println(datas.get(i).getTag()+"---"+datas.get(i).getTitle());
				System.out.println(datas.get(i).getDescription());
				System.out.println("作者:"+datas.get(i).getAuthor()+"----回复数:"+datas.get(i).getComments());
				System.out.println("================================================");
			}

			
		} catch (IOException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
	
	}
	
	
	public static void main(String[] args){
		JsoupTest jt=new JsoupTest();
	}
	
}

四、结果


评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值