httpclient抓取免费停车场

本文介绍了一个Java程序,用于从122Park网站抓取免费停车场信息,并将其存储到数据库中。该程序使用了Jsoup进行网页解析,通过HttpClient发起HTTP请求,并利用MySQL进行数据存储。


ImportParsFrom122Park.java

package com.xxx.park;

import java.io.BufferedWriter;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.sql.Connection;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.ArrayList;
import java.util.List;

import net.sf.json.JSONObject;

import org.apache.http.Header;
import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.util.EntityUtils;
import org.apache.log4j.Logger;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import com.xxx.park.Location;
import com.xxx.park.ParkInfo;

/*
 * @author : TF-BJ-C064
 * @creation : 2014-7-26 下午4:41:45
 * @description :
 * 		从  http://www.122park.com/ 导入免费停车场 信息 
 *		
 */

public class ImportParsFrom122Park {

	public static Logger log = Logger.getLogger(ImportParsFrom122Park.class);


	public static void main(String []argv) throws ClientProtocolException, IOException, SQLException{

		ImportParsFrom122Park tr = new ImportParsFrom122Park();
		String urls[] = {"http://www.122park.com/","http://www.122park.com/sh","http://www.122park.com/gz","http://www.122park.com/sz"};
		String city[] = {"北京","上海","广州","深圳"};
		int i = 0;
		Location locationRoot = new Location("中国");
		for(String url: urls){
			Location locCity = new Location(city[i]);
			List<Location> list = tr.Test(url, city[i++]);//get district
			locCity.addChildren(list);
			locationRoot.add(locCity);// add to children
		}

		log.info("------------------Done-----------------");
		
		KDataSource kds = new KDataSource();
		Connection conn = kds.getConnection();

		if(conn==null){
			log.error("conn==null");
			return;
		}
		Statement statement = (Statement) conn.createStatement();
		String sql;
//		ResultSet rs = statement.executeQuery(sql);
		
		String space0 = "";
		String space1 = "    |____";
		String space2 = "        |____";
		String space3 = "            |____";

		int sumParks = 0;
		int linesSum = 0;
		int lines = 0;
		long t1 , t2, t3;
		int insertlines = 0;
		System.out.println( locationRoot.getName() );//root 
		t1 = System.currentTimeMillis();
		insertlines = 0;
		sql = "";
		for(Location loccity : locationRoot.getChildren()){
			System.out.println( space1 + loccity.getName() );//city name
			for(Location district : loccity.getChildren()){
				System.out.println( space2 + district.getName() );//district name
				for(ParkInfo pi : district.getParklist()){
					if(sql.isEmpty())
						sql = "insert into parkinfo ( p_name,p_desc,longitude,latitude,create_time,orderable,source,status,is_free)  values";
//					System.out.println( space3 + pi.getP_name() +" "+pi.getLongitude()+" "+ pi.getLatitude() );//district name
					if(insertlines>0) 
						sql += ","; 
					insertlines ++;
					sumParks ++;//停车场数量计数
					sql += " ('"+pi.getP_name()+"', '"+pi.getP_desc()+"', '"+pi.getLongitude()+"', '"+pi.getLatitude()+"', '"+pi.getCreate_time()+"'," +
									" "+pi.getOrderable()+", "+pi.getSource()+", "+pi.getStatus()+", "+pi.getIs_free()+")";
				}
				if(insertlines > 5000){
					System.out.println(sql);
					lines = statement.executeUpdate(sql);
					sql = "";
					t3 = System.currentTimeMillis();
					linesSum += lines;
					log.info("update lines "+lines + " in "+(t3-t1)+" ms");
					insertlines = 0;
				}
			}
		}
		if(!sql.isEmpty() && insertlines>0){
			System.out.println(sql);
			lines = statement.executeUpdate(sql);
			t3 = System.currentTimeMillis();
			linesSum += lines;
			sql = "";
			insertlines = 0;
			log.info("update lines "+lines + " in "+(t3-t1)+" ms");
		}
		t2 = System.currentTimeMillis();
		statement.close();
		conn.close();
		log.info("update total lines "+linesSum + " in "+(t2-t1)+" ms");
		log.info("停车场总数: "+sumParks);
		log.info("------------------Done-----------------");

	}

	public List<Location> Test(String url, String city) throws ClientProtocolException, IOException{

		HttpClient httpclient = new DefaultHttpClient();
		HttpGet get = new HttpGet(url);
		get.addHeader("Referer", "");
		HttpResponse httpResponse = httpclient.execute(get);

		log.info("---------------------------"+city+"-------------------------");

		HttpEntity entity = httpResponse.getEntity();
		//		log.info("getContentType	= "+entity.getContentType());
		//		log.info("getContentLength	= "+entity.getContentLength());
		//		log.info("getContentEncoding= "+entity.getContentEncoding());


		String html = EntityUtils.toString( entity );
		//		byte[] bytedata = html.getBytes();
		//		String encoding = "GB2312";
		//		BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(  
		//				new FileOutputStream(new File("D:/myhtml-1.html")), encoding));  
		////		FileOutputStream  out1 = new FileOutputStream(new File("D:/myhtml-1.html"));
		//	//	writer.write(html);
		//		writer.close();

		Document doc = Jsoup.parse(html);
		Elements selCityHotcity = doc.select("div.sel-city-hotcity");
		
		Elements marked_places_box = doc.select("div.marked_places_box");
		Elements marked_places_L1 = marked_places_box.select("div.marked_places_L1");
		int i=1;
		String str="";

		List<Location> listLoc = new ArrayList<Location>();

		for(Element em : marked_places_L1){
			Elements span = em.select("span");
			Location loc = new Location();
			if(span!=null){
				str = span.html();
				loc.setName(str);//set location name;
			}
			Elements marked_places = marked_places_box.select("#marked_places_L2_"+i);
			if(marked_places!=null){
				Elements parkem = marked_places.select("a.marked_places_L2");
				if(parkem!=null){
					for(Element park : parkem){
						if(park!=null){
							str += " {parkname:" + park.attr("label") + ",parkpos:" + park.attr("id") + "}";
							ParkInfo parkInfo = new ParkInfo(park.attr("label"), park.attr("id"));
							loc.add(parkInfo);
						}
					}
				}
			}
			listLoc.add(loc);
			i++;
			log.info(str);
		}
		return listLoc;

	}

}


KDataSource.java

package com.xxx.park;
import java.beans.Statement;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.SQLException;

import org.apache.log4j.Logger;


/*
 * @author : TF-BJ-C064
 * @creation : 2014-7-26 下午8:00:43
 * @description : 
 *
 */

public class KDataSource {

	public static Logger log = Logger.getLogger(ImportParsFrom122Park.class);

	private String driver = "com.mysql.jdbc.Driver";
	private String url = "jdbc:mysql://10.6.12.3:3306/mypark?useUnicode=true&characterEncoding=utf8&mysqlEncoding=utf8";
	private String username = "root";
	private String password = "root";
	
	private Connection conn = null;
	
	public KDataSource(){}
	public KDataSource(String driver, String url, String username,
			String password) {
		super();
		this.driver = driver;
		this.url = url;
		this.username = username;
		this.password = password;
	}

	public Connection getConnection(){
		try {
			log.info("connection.. {url:"+url+",drriver:"+driver+", username:"+username+", password:******}");
			// 加载驱动程序
			Class.forName(driver);
			// 连续数据库
			conn = DriverManager.getConnection(url, username, password);
			if(conn.isClosed()){
				log.info("failed connecting to the Database");
			}
			log.info("connected ");
			
		}catch (SQLException e) {
			e.printStackTrace();
		}catch (ClassNotFoundException e) {
			e.printStackTrace();
		}
		return conn;
	}
	
	public void close(){
		try {
			if(conn!=null && !conn.isClosed())
				conn.close();
		} catch (SQLException e) {
			e.printStackTrace();
		}
	}


	public String getDriver() {
		return driver;
	}
	public void setDriver(String driver) {
		this.driver = driver;
	}
	public String getUrl() {
		return url;
	}
	public void setUrl(String url) {
		this.url = url;
	}
	public String getUsername() {
		return username;
	}
	public void setUsername(String username) {
		this.username = username;
	}
	public String getPassword() {
		return password;
	}
	public void setPassword(String password) {
		this.password = password;
	}

}

Location.java

/*
* @author : TF-BJ-C064
* @creation : 2014-7-26 下午7:22:09
* @description : 
*
*/

package com.xxx.park;

import java.util.ArrayList;
import java.util.List;

public class Location {
	
	Long id;
	String name;
	String longitude;//经度
	String latitude;//纬度
	
	Location parent;
	List<Location> children = new ArrayList<Location>();
	List<ParkInfo> parklist = new ArrayList<ParkInfo>();
	
	public Location(){
		
	}
	
	public boolean add(Location ch){
		return children.add(ch);
	}
	public boolean addChildren(List<Location> chs){
		return children.addAll(chs);
	}
	public boolean add(ParkInfo pi){
		return parklist.add(pi);
	}
	
	public Location(String name){
		this.name = name;
	}
	

	public String getName() {
		return name;
	}

	public void setName(String name) {
		this.name = name;
	}

	public String getLongitude() {
		return longitude;
	}

	public void setLongitude(String longitude) {
		this.longitude = longitude;
	}

	public String getLatitude() {
		return latitude;
	}

	public void setLatitude(String latitude) {
		this.latitude = latitude;
	}

	public Location getParent() {
		return parent;
	}

	public void setParent(Location parent) {
		this.parent = parent;
	}

	public List<Location> getChildren() {
		return children;
	}

	public void setChildren(List<Location> children) {
		this.children = children;
	}

	public List<ParkInfo> getParklist() {
		return parklist;
	}

	public void setParklist(List<ParkInfo> parklist) {
		this.parklist = parklist;
	}

	public Long getId() {
		return id;
	}

	public void setId(Long id) {
		this.id = id;
	}
}

ParkInfo.java

/*
* @author : TF-BJ-C064
* @creation : 2014-7-26 下午6:53:07
* @description : 
*
*/

package com.xxx.park;

import java.math.BigDecimal;
import java.text.SimpleDateFormat;
import java.util.Date;

public class ParkInfo {

	Long p_id;
	String p_name;
	String p_desc;
	BigDecimal longitude;//经度
	BigDecimal latitude;//纬度
	String create_time;
	int is_free = 1; //是否为免费车场(0:收费车场;1:免费车场)
	int orderable = 0;//是否支持预定(0:不可预订,1:可预订)
	int source = 2;//车场信息来源(1:自己注册;2:外部引入3:合同用户)
	int status = 3;//车场状态(-2:暂停营业中;-1:已删除;0:已冻结;1:未审核;2:审核中;3:正常)
	
	SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
	
	public ParkInfo(){
		
	}
	
	public ParkInfo(String p_name, String longitude_latitude){
		this.p_name = p_name;
		this.p_desc = p_name;
		if(longitude_latitude!=null){
			String temp[] = longitude_latitude.split(",");
			if(temp.length==2){
				this.longitude = BigDecimal.valueOf(Double.parseDouble(temp[0]));
				this.latitude = BigDecimal.valueOf(Double.parseDouble(temp[0]));
			}
		}
		this.create_time = sdf.format(new Date());
	}
	
	public ParkInfo(String p_name, BigDecimal longitude, BigDecimal latitude){
		this.p_name = p_name;
		this.p_desc = p_name;
		this.longitude = longitude;
		this.latitude = latitude;
		this.create_time = sdf.format(new Date());
	}
	
	public Long getP_id() {
		return p_id;
	}
	public void setP_id(Long p_id) {
		this.p_id = p_id;
	}
	public String getP_name() {
		return p_name;
	}
	public void setP_name(String p_name) {
		this.p_name = p_name;
	}
	public String getP_desc() {
		return p_desc;
	}
	public void setP_desc(String p_desc) {
		this.p_desc = p_desc;
	}
	public BigDecimal getLongitude() {
		return longitude;
	}
	public void setLongitude(BigDecimal longitude) {
		this.longitude = longitude;
	}
	public BigDecimal getLatitude() {
		return latitude;
	}
	public void setLatitude(BigDecimal latitude) {
		this.latitude = latitude;
	}
	public String getCreate_time() {
		return create_time;
	}
	public void setCreate_time(String create_time) {
		this.create_time = create_time;
	}
	public int getOrderable() {
		return orderable;
	}
	public void setOrderable(int orderable) {
		this.orderable = orderable;
	}
	public int getSource() {
		return source;
	}
	public void setSource(int source) {
		this.source = source;
	}
	public int getStatus() {
		return status;
	}
	public void setStatus(int status) {
		this.status = status;
	}

	public int getIs_free() {
		return is_free;
	}

	public void setIs_free(int is_free) {
		this.is_free = is_free;
	}
	
}


Refer: http://blog.youkuaiyun.com/menxu_work/article/details/10115487

下载lib


评论
成就一亿技术人!
拼手气红包6.0元
还能输入1000个字符
 
红包 添加红包
表情包 插入表情
 条评论被折叠 查看
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值