ImportParsFrom122Park.java
package com.xxx.park;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.sql.Connection;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.ArrayList;
import java.util.List;
import net.sf.json.JSONObject;
import org.apache.http.Header;
import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.util.EntityUtils;
import org.apache.log4j.Logger;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import com.xxx.park.Location;
import com.xxx.park.ParkInfo;
/*
* @author : TF-BJ-C064
* @creation : 2014-7-26 下午4:41:45
* @description :
* 从 http://www.122park.com/ 导入免费停车场 信息
*
*/
public class ImportParsFrom122Park {
public static Logger log = Logger.getLogger(ImportParsFrom122Park.class);
public static void main(String []argv) throws ClientProtocolException, IOException, SQLException{
ImportParsFrom122Park tr = new ImportParsFrom122Park();
String urls[] = {"http://www.122park.com/","http://www.122park.com/sh","http://www.122park.com/gz","http://www.122park.com/sz"};
String city[] = {"北京","上海","广州","深圳"};
int i = 0;
Location locationRoot = new Location("中国");
for(String url: urls){
Location locCity = new Location(city[i]);
List<Location> list = tr.Test(url, city[i++]);//get district
locCity.addChildren(list);
locationRoot.add(locCity);// add to children
}
log.info("------------------Done-----------------");
KDataSource kds = new KDataSource();
Connection conn = kds.getConnection();
if(conn==null){
log.error("conn==null");
return;
}
Statement statement = (Statement) conn.createStatement();
String sql;
// ResultSet rs = statement.executeQuery(sql);
String space0 = "";
String space1 = " |____";
String space2 = " |____";
String space3 = " |____";
int sumParks = 0;
int linesSum = 0;
int lines = 0;
long t1 , t2, t3;
int insertlines = 0;
System.out.println( locationRoot.getName() );//root
t1 = System.currentTimeMillis();
insertlines = 0;
sql = "";
for(Location loccity : locationRoot.getChildren()){
System.out.println( space1 + loccity.getName() );//city name
for(Location district : loccity.getChildren()){
System.out.println( space2 + district.getName() );//district name
for(ParkInfo pi : district.getParklist()){
if(sql.isEmpty())
sql = "insert into parkinfo ( p_name,p_desc,longitude,latitude,create_time,orderable,source,status,is_free) values";
// System.out.println( space3 + pi.getP_name() +" "+pi.getLongitude()+" "+ pi.getLatitude() );//district name
if(insertlines>0)
sql += ",";
insertlines ++;
sumParks ++;//停车场数量计数
sql += " ('"+pi.getP_name()+"', '"+pi.getP_desc()+"', '"+pi.getLongitude()+"', '"+pi.getLatitude()+"', '"+pi.getCreate_time()+"'," +
" "+pi.getOrderable()+", "+pi.getSource()+", "+pi.getStatus()+", "+pi.getIs_free()+")";
}
if(insertlines > 5000){
System.out.println(sql);
lines = statement.executeUpdate(sql);
sql = "";
t3 = System.currentTimeMillis();
linesSum += lines;
log.info("update lines "+lines + " in "+(t3-t1)+" ms");
insertlines = 0;
}
}
}
if(!sql.isEmpty() && insertlines>0){
System.out.println(sql);
lines = statement.executeUpdate(sql);
t3 = System.currentTimeMillis();
linesSum += lines;
sql = "";
insertlines = 0;
log.info("update lines "+lines + " in "+(t3-t1)+" ms");
}
t2 = System.currentTimeMillis();
statement.close();
conn.close();
log.info("update total lines "+linesSum + " in "+(t2-t1)+" ms");
log.info("停车场总数: "+sumParks);
log.info("------------------Done-----------------");
}
public List<Location> Test(String url, String city) throws ClientProtocolException, IOException{
HttpClient httpclient = new DefaultHttpClient();
HttpGet get = new HttpGet(url);
get.addHeader("Referer", "");
HttpResponse httpResponse = httpclient.execute(get);
log.info("---------------------------"+city+"-------------------------");
HttpEntity entity = httpResponse.getEntity();
// log.info("getContentType = "+entity.getContentType());
// log.info("getContentLength = "+entity.getContentLength());
// log.info("getContentEncoding= "+entity.getContentEncoding());
String html = EntityUtils.toString( entity );
// byte[] bytedata = html.getBytes();
// String encoding = "GB2312";
// BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(
// new FileOutputStream(new File("D:/myhtml-1.html")), encoding));
//// FileOutputStream out1 = new FileOutputStream(new File("D:/myhtml-1.html"));
// // writer.write(html);
// writer.close();
Document doc = Jsoup.parse(html);
Elements selCityHotcity = doc.select("div.sel-city-hotcity");
Elements marked_places_box = doc.select("div.marked_places_box");
Elements marked_places_L1 = marked_places_box.select("div.marked_places_L1");
int i=1;
String str="";
List<Location> listLoc = new ArrayList<Location>();
for(Element em : marked_places_L1){
Elements span = em.select("span");
Location loc = new Location();
if(span!=null){
str = span.html();
loc.setName(str);//set location name;
}
Elements marked_places = marked_places_box.select("#marked_places_L2_"+i);
if(marked_places!=null){
Elements parkem = marked_places.select("a.marked_places_L2");
if(parkem!=null){
for(Element park : parkem){
if(park!=null){
str += " {parkname:" + park.attr("label") + ",parkpos:" + park.attr("id") + "}";
ParkInfo parkInfo = new ParkInfo(park.attr("label"), park.attr("id"));
loc.add(parkInfo);
}
}
}
}
listLoc.add(loc);
i++;
log.info(str);
}
return listLoc;
}
}
KDataSource.java
package com.xxx.park;
import java.beans.Statement;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.SQLException;
import org.apache.log4j.Logger;
/*
* @author : TF-BJ-C064
* @creation : 2014-7-26 下午8:00:43
* @description :
*
*/
public class KDataSource {
public static Logger log = Logger.getLogger(ImportParsFrom122Park.class);
private String driver = "com.mysql.jdbc.Driver";
private String url = "jdbc:mysql://10.6.12.3:3306/mypark?useUnicode=true&characterEncoding=utf8&mysqlEncoding=utf8";
private String username = "root";
private String password = "root";
private Connection conn = null;
public KDataSource(){}
public KDataSource(String driver, String url, String username,
String password) {
super();
this.driver = driver;
this.url = url;
this.username = username;
this.password = password;
}
public Connection getConnection(){
try {
log.info("connection.. {url:"+url+",drriver:"+driver+", username:"+username+", password:******}");
// 加载驱动程序
Class.forName(driver);
// 连续数据库
conn = DriverManager.getConnection(url, username, password);
if(conn.isClosed()){
log.info("failed connecting to the Database");
}
log.info("connected ");
}catch (SQLException e) {
e.printStackTrace();
}catch (ClassNotFoundException e) {
e.printStackTrace();
}
return conn;
}
public void close(){
try {
if(conn!=null && !conn.isClosed())
conn.close();
} catch (SQLException e) {
e.printStackTrace();
}
}
public String getDriver() {
return driver;
}
public void setDriver(String driver) {
this.driver = driver;
}
public String getUrl() {
return url;
}
public void setUrl(String url) {
this.url = url;
}
public String getUsername() {
return username;
}
public void setUsername(String username) {
this.username = username;
}
public String getPassword() {
return password;
}
public void setPassword(String password) {
this.password = password;
}
}
Location.java
/*
* @author : TF-BJ-C064
* @creation : 2014-7-26 下午7:22:09
* @description :
*
*/
package com.xxx.park;
import java.util.ArrayList;
import java.util.List;
public class Location {
Long id;
String name;
String longitude;//经度
String latitude;//纬度
Location parent;
List<Location> children = new ArrayList<Location>();
List<ParkInfo> parklist = new ArrayList<ParkInfo>();
public Location(){
}
public boolean add(Location ch){
return children.add(ch);
}
public boolean addChildren(List<Location> chs){
return children.addAll(chs);
}
public boolean add(ParkInfo pi){
return parklist.add(pi);
}
public Location(String name){
this.name = name;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public String getLongitude() {
return longitude;
}
public void setLongitude(String longitude) {
this.longitude = longitude;
}
public String getLatitude() {
return latitude;
}
public void setLatitude(String latitude) {
this.latitude = latitude;
}
public Location getParent() {
return parent;
}
public void setParent(Location parent) {
this.parent = parent;
}
public List<Location> getChildren() {
return children;
}
public void setChildren(List<Location> children) {
this.children = children;
}
public List<ParkInfo> getParklist() {
return parklist;
}
public void setParklist(List<ParkInfo> parklist) {
this.parklist = parklist;
}
public Long getId() {
return id;
}
public void setId(Long id) {
this.id = id;
}
}
ParkInfo.java
/*
* @author : TF-BJ-C064
* @creation : 2014-7-26 下午6:53:07
* @description :
*
*/
package com.xxx.park;
import java.math.BigDecimal;
import java.text.SimpleDateFormat;
import java.util.Date;
public class ParkInfo {
Long p_id;
String p_name;
String p_desc;
BigDecimal longitude;//经度
BigDecimal latitude;//纬度
String create_time;
int is_free = 1; //是否为免费车场(0:收费车场;1:免费车场)
int orderable = 0;//是否支持预定(0:不可预订,1:可预订)
int source = 2;//车场信息来源(1:自己注册;2:外部引入3:合同用户)
int status = 3;//车场状态(-2:暂停营业中;-1:已删除;0:已冻结;1:未审核;2:审核中;3:正常)
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
public ParkInfo(){
}
public ParkInfo(String p_name, String longitude_latitude){
this.p_name = p_name;
this.p_desc = p_name;
if(longitude_latitude!=null){
String temp[] = longitude_latitude.split(",");
if(temp.length==2){
this.longitude = BigDecimal.valueOf(Double.parseDouble(temp[0]));
this.latitude = BigDecimal.valueOf(Double.parseDouble(temp[0]));
}
}
this.create_time = sdf.format(new Date());
}
public ParkInfo(String p_name, BigDecimal longitude, BigDecimal latitude){
this.p_name = p_name;
this.p_desc = p_name;
this.longitude = longitude;
this.latitude = latitude;
this.create_time = sdf.format(new Date());
}
public Long getP_id() {
return p_id;
}
public void setP_id(Long p_id) {
this.p_id = p_id;
}
public String getP_name() {
return p_name;
}
public void setP_name(String p_name) {
this.p_name = p_name;
}
public String getP_desc() {
return p_desc;
}
public void setP_desc(String p_desc) {
this.p_desc = p_desc;
}
public BigDecimal getLongitude() {
return longitude;
}
public void setLongitude(BigDecimal longitude) {
this.longitude = longitude;
}
public BigDecimal getLatitude() {
return latitude;
}
public void setLatitude(BigDecimal latitude) {
this.latitude = latitude;
}
public String getCreate_time() {
return create_time;
}
public void setCreate_time(String create_time) {
this.create_time = create_time;
}
public int getOrderable() {
return orderable;
}
public void setOrderable(int orderable) {
this.orderable = orderable;
}
public int getSource() {
return source;
}
public void setSource(int source) {
this.source = source;
}
public int getStatus() {
return status;
}
public void setStatus(int status) {
this.status = status;
}
public int getIs_free() {
return is_free;
}
public void setIs_free(int is_free) {
this.is_free = is_free;
}
}
Refer: http://blog.youkuaiyun.com/menxu_work/article/details/10115487
本文介绍了一个Java程序,用于从122Park网站抓取免费停车场信息,并将其存储到数据库中。该程序使用了Jsoup进行网页解析,通过HttpClient发起HTTP请求,并利用MySQL进行数据存储。

被折叠的 条评论
为什么被折叠?



