首先我们引入jsoup的依赖
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.7.3</version>
</dependency>
然后我们编写爬虫类WebSpider
package cn.qblank.util;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.URL;
import java.net.URLConnection;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
/**
* 用于爬取一些职位信息,输出到某个文件中
* @author evan_qb
*
*/
public class WebSpider {
public static String getHtml(String url,String encoding) {
URL uri = null;
URLConnection conn = null;
BufferedReader br = null;
StringBuffer sb =