抓取汽车之家的车辆信息和车辆图片
CarBrands.java
/*
* @author : TF-BJ-C064
* @creation : 2014-8-19 上午9:57:38
* @description :
*
*/
package com.car;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class CarBrands {
private String name;
private String url;
private String bid;
private List<CarSerie> series = new ArrayList<CarSerie>();
public void add(CarSerie cb ){
series.add(cb);
}
public CarBrands(){}
public CarBrands(String name, String href) {
this.name = name;
this.setUrl(href);
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public String getUrl() {
return url;
}
//e.g: http://car.autohome.com.cn/price/brand-22.html ==> bid = b22
public void setUrl(String url) {
this.url = url;
if( url != null ){
String regex = "-(\\d+).html";
Pattern pattern = Pattern.compile(regex);
Matcher matcher = pattern.matcher(url);
if (matcher.find()) {
String group = matcher.group(1);
this.setBid(group);
}else {
//System.out.println("no matches!!");
}
}
}
public String getBid() {
return bid;
}
public void setBid(String bid) {
this.bid = bid;
}
public List<CarSerie> getSeries() {
return series;
}
public void setSeries(List<CarSerie> series) {
this.series = series;
}
}
CarModels.java
/*
* @author : TF-BJ-C064
* @creation : 2014-8-19 下午2:26:13
* @description :
*
*/
package com.car;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
//车辆类型
public class CarModels {
private String name;
private String url;
private String price;//指导价格
private String level;//级别
private String structure;//车身结构
private String Engine;//发动机
private String Transmission;//变速箱
private String size;//车身尺寸
private String imageurl;
private List<CarSerieImage> images = new ArrayList<CarSerieImage>();
public boolean add(CarSerieImage imageurl){
return images.add(imageurl);
}
public List<CarSerieImage> getImages() {
return images;
}
public void setImages(List<CarSerieImage> images) {
this.images = images;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
//e.g parurl = http://www.autohome.com.cn/buycar.html?specid=19460&#pvareaid=101622
public void parseAsetUrl(String parurl){
if(parurl==null)
return ;
String regex = "specid=(\\d+)&";
Pattern pattern = Pattern.compile(regex);
Matcher matcher = pattern.matcher(parurl);
int pagesum = 0;
if (matcher.find()) {
String group = matcher.group(1);
this.setUrl(group);
}
}
public String getUrl() {
return url;
}
public void setUrl(String url) {
this.url = url;
}
public String getPrice() {
return price;
}
public void setPrice(String price) {
this.price = price;
}
public String getLevel() {
return level;
}
public void setLevel(String level) {
this.level = level;
}
public String getStructure() {
return structure;
}
public void setStructure(String structure) {
this.structure = structure;
}
public String getEngine() {
return Engine;
}
public void setEngine(String engine) {
Engine = engine;
}
public String getTransmission() {
return Transmission;
}
public void setTransmission(String transmission) {
Transmission = transmission;
}
public String getImageurl() {
return imageurl;
}
public void setImageurl(String imageurl) {
this.imageurl = imageurl;
}
public String getSize() {
return size;
}
public void setSize(String size) {
this.size = size;
}
}
CarSerie.java
/*
* @author : TF-BJ-C064
* @creation : 2014-8-19 上午11:48:14
* @description :
*
*/
package com.car;
import java.util.ArrayList;
import java.util.List;
public class CarSerie {
private String name;
private String url;
private String price;//指导价格
private String level;//级别
private String structure;//车身结构
private String Engine;//发动机
private String Transmission;//变速箱
private String extInfo="";
private String extInfoHtml;
private List<CarYear> carYearList = new ArrayList<CarYear>();
public boolean add(CarYear cy){
return this.carYearList.add(cy);
}
public CarSerie(){}
public CarSerie(String name, String href){
this.name = name;
// http://car.autohome.com.cn/pic/series/66.html;
int index = href.lastIndexOf(".html");
this.url = href ; //href.substring(0, index) + "-1.html";
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public String getUrl() {
return url;
}
public void setUrl(String url) {
this.url = url;
}
public List<CarYear> getCarYearList() {
return carYearList;
}
public void setCarYearList(List<CarYear> carYearList) {
this.carYearList = carYearList;
}
public String getPrice() {
return price;
}
public void setPrice(String price) {
this.price = price;
}
public String getLevel() {
return level;
}
public void setLevel(String level) {
this.level = level;
}
public String getStructure() {
return structure;
}
public void setStructure(String structure) {
this.structure = structure;
}
public String getEngine() {
return Engine;
}
public void setEngine(String engine) {
Engine = engine;
}
public String getTransmission() {
return Transmission;
}
public void setTransmission(String transmission) {
Transmission = transmission;
}
public String getExtInfo() {
return extInfo;
}
public void addExtInfo(String extInfoIn) {
if(extInfoIn==null)
return ;
if(this.extInfo!=null && !extInfo.trim().isEmpty())
this.extInfo += ", ";
this.extInfo += extInfoIn;
}
public void setExtInfo(String extInfoIn) {
if(extInfoIn!=null)
this.extInfo = extInfo;
}
public String getExtInfoHtml() {
return extInfoHtml;
}
public void setExtInfoHtml(String extInfoHtml) {
this.extInfoHtml = extInfoHtml;
}
}
CarSerieImage.java
/*
* @author : TF-BJ-C064
* @creation : 2014-8-19 上午11:52:54
* @description :
*
*/
package com.car;
public class CarSerieImage {
private String title ;
private String src ;
public CarSerieImage(){}
public CarSerieImage(String title, String src){
this.title = title;
this.src = src;
}
public String getTitle() {
if(title==null || title.trim().isEmpty())
return ""+System.currentTimeMillis();
return title;
}
public void setTitle(String title) {
this.title = title;
}
public String getSrc() {
return src;
}
public void setSrc(String src) {
this.src = src;
}
}
CarTree.java
/*
* @author : TF-BJ-C064
* @creation : 2014-8-19 上午9:59:06
* @description :
*
*/
package com.car;
import java.util.ArrayList;
import java.util.List;
public class CarTree {
private List<CarBrands> tree = new ArrayList<CarBrands>();
public boolean add(CarBrands carbs){
return tree.add(carbs);
}
public List<CarBrands> getTree() {
return tree;
}
public void setTree(List<CarBrands> tree) {
this.tree = tree;
}
}
CarYear.java
/*
* @author : TF-BJ-C064
* @creation : 2014-8-19 下午2:48:56
* @description :
*
*/
package com.car;
import java.util.ArrayList;
import java.util.List;
// 车辆款式,如2013款
public class CarYear {
private String name;
private List<CarModels> carModels = new ArrayList<CarModels>();
public boolean add(CarModels cm){
return this.carModels.add(cm);
}
public CarYear(){};
public CarYear(String name){
this.name = name;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public List<CarModels> getCarModels() {
return carModels;
}
public void setCarModels(List<CarModels> carModels) {
this.carModels = carModels;
}
}
QCZJmain.java
/*
* @author : TF-BJ-C064
* @creation : 2014-8-19 上午9:31:38
* @description :
*
*/
package com.car;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.text.SimpleDateFormat;
import java.util.Date;
import org.apache.commons.httpclient.params.HttpMethodParams;
import org.apache.http.HttpResponse;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.params.CoreConnectionPNames;
import org.apache.http.util.EntityUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.xvolks.jnative.exceptions.NativeException;
import sun.audio.AudioPlayer;
import sun.audio.AudioStream;
/**
* 抓取汽车之家汽车数据
* <p>Title: QCZJmain</p>
* <p>Description:
* args:
* args[0]: 车辆信息存储根路径
* args[1]: 是否下载图片,默认false
* args[2]:是否抓取车辆款式,default=true
* e.g:
* run.bat
echo off
color 0a
java -jar CarInfoCrawl.jar D:/craw/car20140821 true
pause
* </p>
* <p>Company: </p>
* @author
* @date 2014-8-21
*/
public class QCZJmain {
public static boolean debug = false;
public static int MAX_DEBUG_LINE = 5;
private String DIR_ROOT = "data/cars";
public static String BASE_URL = "http://car.autohome.com.cn";
public static String CAR_SPEC_URL = "http://www.autohome.com.cn/spec/"; // http://www.autohome.com.cn/spec/19390
public static String Item_URL = BASE_URL+"/AsLeftMenu/As_LeftListNew.ashx?"; // AsLeftMenu/As_LeftListNew.ashx?typeId=1&brandId=34&fctId=0&seriesId=0
public String carhtml = "<div class='cartree-letter'>A</div><ul><li id='b34'><h3>" +
"<a href='/price/brand-34.html'><i class='icon10 icon10-sjr'></i>阿尔法罗密欧<em>(3)</em></a></h3></li><li id='b35'><h3><a href='/price/brand-35.html'><i class='icon10 icon10-sjr'></i>阿斯顿·马丁<em>(38)</em></a></h3></li><li id='b33'><h3><a href='/price/brand-33.html'><i class='icon10 icon10-sjr'></i>奥迪<em>(523)</em></a></h3></li></ul><div class='cartree-letter'>B</div><ul><li id='b140'><h3><a href='/price/brand-140.html'><i class='icon10 icon10-sjr'></i>巴博斯<em>(9)</em></a></h3></li><li id='b120'><h3><a href='/price/brand-120.html'><i class='icon10 icon10-sjr'></i>宝骏<em>(82)</em></a></h3></li><li id='b15'><h3><a href='/price/brand-15.html'><i class='icon10 icon10-sjr'></i>宝马<em>(537)</em></a></h3></li><li id='b40'><h3><a href='/price/brand-40.html'><i class='icon10 icon10-sjr'></i>保时捷<em>(148)</em></a></h3></li><li id='b27'><h3><a href='/price/brand-27.html'><i class='icon10 icon10-sjr'></i>北京汽车<em>(20)</em></a></h3></li><li id='b203'><h3><a href='/price/brand-203.html'><i class='icon10 icon10-sjr'></i>北汽幻速<em>(8)</em></a></h3></li><li id='b143'><h3><a href='/price/brand-143.html'><i class='icon10 icon10-sjr'></i>北汽威旺<em>(50)</em></a></h3></li><li id='b208'><h3><a href='/price/brand-208.html'><i class='icon10 icon10-sjr'></i>北汽新能源<em>(3)</em></a></h3></li><li id='b154'><h3><a href='/price/brand-154.html'><i class='icon10 icon10-sjr'></i>北汽制造<em>(29)</em></a></h3></li><li id='b36'><h3><a href='/price/brand-36.html'><i class='icon10 icon10-sjr'></i>奔驰<em>(399)</em></a></h3></li><li id='b95'><h3><a href='/price/brand-95.html'><i class='icon10 icon10-sjr'></i>奔腾<em>(103)</em></a></h3></li><li id='b14'><h3><a href='/price/brand-14.html'><i class='icon10 icon10-sjr'></i>本田<em>(265)</em></a></h3></li><li id='b75'><h3><a href='/price/brand-75.html'><i class='icon10 icon10-sjr'></i>比亚迪<em>(251)</em></a></h3></li><li id='b13'><h3><a href='/price/brand-13.html'><i class='icon10 icon10-sjr'></i>标致<em>(292)</em></a></h3></li><li id='b38'><h3><a href='/price/brand-38.html'><i class='icon10 icon10-sjr'></i>别克<em>(266)</em></a></h3></li><li id='b39'><h3><a href='/price/brand-39.html'><i class='icon10 icon10-sjr'></i>宾利<em>(36)</em></a></h3></li><li id='b37'><h3><a href='/price/brand-37.html'><i class='icon10 icon10-sjr'></i>布加迪<em>(3)</em></a></h3></li></ul><div class='cartree-letter'>C</div><ul><li id='b79'><h3><a href='/price/brand-79.html'><i class='icon10 icon10-sjr'></i>昌河<em>(49)</em></a></h3></li><li id='b76'><h3><a href='/price/brand-76.html'><i class='icon10 icon10-sjr'></i>长安<em>(196)</em></a></h3></li><li id='b163'><h3><a href='/price/brand-163.html'><i class='icon10 icon10-sjr'></i>长安商用<em>(101)</em></a></h3></li><li id='b77'><h3><a href='/price/brand-77.html'><i class='icon10 icon10-sjr'></i>长城<em>(273)</em></a></h3></li><li id='b196'><h3><a href='/price/brand-196.html'><i class='icon10 icon10-sjr'></i>成功汽车<em>(7)</em></a></h3></li></ul><div class='cartree-letter'>D</div><ul><li id='b169'><h3><a href='/price/brand-169.html'><i class='icon10 icon10-sjr'></i>DS<em>(29)</em></a></h3></li><li id='b92'><h3><a href='/price/brand-92.html'><i class='icon10 icon10-sjr'></i>大发<em>(13)</em></a></h3></li><li id='b1'><h3><a href='/price/brand-1.html'><i class='icon10 icon10-sjr'></i>大众<em>(863)</em></a></h3></li><li id='b41'><h3><a href='/price/brand-41.html'><i class='icon10 icon10-sjr'></i>道奇<em>(21)</em></a></h3></li><li id='b32'><h3><a href='/price/brand-32.html'><i class='icon10 icon10-sjr'></i>东风<em>(33)</em></a></h3></li><li id='b187'><h3><a href='/price/brand-187.html'><i class='icon10 icon10-sjr'></i>东风风度<em>(114)</em></a></h3></li><li id='b113'><h3><a href='/price/brand-113.html'><i class='icon10 icon10-sjr'></i>东风风神<em>(86)</em></a></h3></li><li id='b165'><h3><a href='/price/brand-165.html'><i class='icon10 icon10-sjr'></i>东风风行<em>(253)</em></a></h3></li><li id='b142'><h3><a href='/price/brand-142.html'><i class='icon10 icon10-sjr'></i>东风小康<em>(71)</em></a></h3></li><li id='b81'><h3><a href='/price/brand-81.html'><i class='icon10 icon10-sjr'></i>东南<em>(133)</em></a></h3></li></ul><div class='cartree-letter'>F</div><ul><li id='b42'><h3><a href='/price/brand-42.html'><i class='icon10 icon10-sjr'></i>法拉利<em>(18)</em></a></h3></li><li id='b11'><h3><a href='/price/brand-11.html'><i class='icon10 icon10-sjr'></i>菲亚特<em>(89)</em></a></h3></li><li id='b3'><h3><a href='/price/brand-3.html'><i class='icon10 icon10-sjr'></i>丰田<em>(547)</em></a></h3></li><li id='b141'><h3><a href='/price/brand-141.html'><i class='icon10 icon10-sjr'></i>福迪<em>(11)</em></a></h3></li><li id='b197'><h3><a href='/price/brand-197.html'><i class='icon10 icon10-sjr'></i>福汽启腾<em>(4)</em></a></h3></li><li id='b8'><h3><a href='/price/brand-8.html'><i class='icon10 icon10-sjr'></i>福特<em>(312)</em></a></h3></li><li id='b96'><h3><a href='/price/brand-96.html'><i class='icon10 icon10-sjr'></i>福田<em>(276)</em></a></h3></li></ul><div class='cartree-letter'>G</div><ul><li id='b112'><h3><a href='/price/brand-112.html'><i class='icon10 icon10-sjr'></i>GMC<em>(35)</em></a></h3></li><li id='b152'><h3><a href='/price/brand-152.html'><i class='icon10 icon10-sjr'></i>观致<em>(10)</em></a></h3></li><li id='b116'><h3><a href='/price/brand-116.html'><i class='icon10 icon10-sjr'></i>光冈<em>(3)</em></a></h3></li><li id='b82'><h3><a href='/price/brand-82.html'><i class='icon10 icon10-sjr'></i>广汽传祺<em>(69)</em></a></h3></li><li id='b108'><h3><a href='/price/brand-108.html'><i class='icon10 icon10-sjr'></i>广汽吉奥<em>(136)</em></a></h3></li></ul><div class='cartree-letter'>H</div><ul><li id='b24'><h3><a href='/price/brand-24.html'><i class='icon10 icon10-sjr'></i>哈飞<em>(75)</em></a></h3></li><li id='b181'><h3><a href='/price/brand-181.html'><i class='icon10 icon10-sjr'></i>哈弗<em>(210)</em></a></h3></li><li id='b150'><h3><a href='/price/brand-150.html'><i class='icon10 icon10-sjr'></i>海格<em>(35)</em></a></h3></li><li id='b86'><h3><a href='/price/brand-86.html'><i class='icon10 icon10-sjr'></i>海马<em>(245)</em></a></h3></li><li id='b43'><h3><a href='/price/brand-43.html'><i class='icon10 icon10-sjr'></i>悍马<em>(5)</em></a></h3></li><li id='b164'><h3><a href='/price/brand-164.html'><i class='icon10 icon10-sjr'></i>恒天<em>(15)</em></a></h3></li><li id='b91'><h3><a href='/price/brand-91.html'><i class='icon10 icon10-sjr'></i>红旗<em>(11)</em></a></h3></li><li id='b85'><h3><a href='/price/brand-85.html'><i class='icon10 icon10-sjr'></i>华普<em>(41)</em></a></h3></li><li id='b87'><h3><a href='/price/brand-87.html'><i class='icon10 icon10-sjr'></i>华泰<em>(108)</em></a></h3></li><li id='b97'><h3><a href='/price/brand-97.html'><i class='icon10 icon10-sjr'></i>黄海<em>(54)</em></a></h3></li></ul><div class='cartree-letter'>J</div><ul><li id='b46'><h3><a href='/price/brand-46.html'><i class='icon10 icon10-sjr'></i>Jeep<em>(155)</em></a></h3></li><li id='b25'><h3><a href='/price/brand-25.html'><i class='icon10 icon10-sjr'></i>吉利汽车<em>(437)</em></a></h3></li><li id='b84'><h3><a href='/price/brand-84.html'><i class='icon10 icon10-sjr'></i>江淮<em>(411)</em></a></h3></li><li id='b119'><h3><a href='/price/brand-119.html'><i class='icon10 icon10-sjr'></i>江铃<em>(79)</em></a></h3></li><li id='b210'><h3><a href='/price/brand-210.html'><i class='icon10 icon10-sjr'></i>江铃集团轻汽<em>(12)</em></a></h3></li><li id='b44'><h3><a href='/price/brand-44.html'><i class='icon10 icon10-sjr'></i>捷豹<em>(86)</em></a></h3></li><li id='b83'><h3><a href='/price/brand-83.html'><i class='icon10 icon10-sjr'></i>金杯<em>(198)</em></a></h3></li><li id='b145'><h3><a href='/price/brand-145.html'><i class='icon10 icon10-sjr'></i>金龙<em>(56)</em></a></h3></li><li id='b175'><h3><a href='/price/brand-175.html'><i class='icon10 icon10-sjr'></i>金旅<em>(7)</em></a></h3></li><li id='b151'><h3><a href='/price/brand-151.html'><i class='icon10 icon10-sjr'></i>九龙<em>(16)</em></a></h3></li></ul><div class='cartree-letter'>K</div><ul><li id='b109'><h3><a href='/price/brand-109.html'><i class='icon10 icon10-sjr'></i>KTM<em>(1)</em></a></h3></li><li id='b156'><h3><a href='/price/brand-156.html'><i class='icon10 icon10-sjr'></i>卡尔森<em>(4)</em></a></h3></li><li id='b199'><h3><a href='/price/brand-199.html'><i class='icon10 icon10-sjr'></i>卡威<em>(4)</em></a></h3></li><li id='b101'><h3><a href='/price/brand-101.html'><i class='icon10 icon10-sjr'></i>开瑞<em>(98)</em></a></h3></li><li id='b47'><h3><a href='/price/brand-47.html'><i class='icon10 icon10-sjr'></i>凯迪拉克<em>(114)</em></a></h3></li><li id='b100'><h3><a href='/price/brand-100.html'><i class='icon10 icon10-sjr'></i>科尼赛克<em>(4)</em></a></h3></li><li id='b9'><h3><a href='/price/brand-9.html'><i class='icon10 icon10-sjr'></i>克莱斯勒<em>(27)</em></a></h3></li></ul><div class='cartree-letter'>L</div><ul><li id='b48'><h3><a href='/price/brand-48.html'><i class='icon10 icon10-sjr'></i>兰博基尼<em>(21)</em></a></h3></li><li id='b118'><h3><a href='/price/brand-118.html'><i class='icon10 icon10-sjr'></i>劳伦士<em>(6)</em></a></h3></li><li id='b54'><h3><a href='/price/brand-54.html'><i class='icon10 icon10-sjr'></i>劳斯莱斯<em>(17)</em></a></h3></li><li id='b215'><h3><a href='/price/brand-215.html'><i class='icon10 icon10-sjr'></i>雷丁<em>(5)</em></a></h3></li><li id='b52'><h3><a href='/price/brand-52.html'><i class='icon10 icon10-sjr'></i>雷克萨斯<em>(148)</em></a></h3></li><li id='b10'><h3><a href='/price/brand-10.html'><i class='icon10 icon10-sjr'></i>雷诺<em>(99)</em></a></h3></li><li id='b124'><h3><a href='/price/brand-124.html'><i class='icon10 icon10-sjr'></i>理念<em>(15)</em></a></h3></li><li id='b80'><h3><a href='/price/brand-80.html'><i class='icon10 icon10-sjr'></i>力帆<em>(154)</em></a></h3></li><li id='b89'><h3><a href='/price/brand-89.html'><i class='icon10 icon10-sjr'></i>莲花汽车<em>(60)</em></a></h3></li><li id='b78'><h3><a href='/price/brand-78.html'><i class='icon10 icon10-sjr'></i>猎豹汽车<em>(90)</em></a></h3></li><li id='b51'><h3><a href='/price/brand-51.html'><i class='icon10 icon10-sjr'></i>林肯<em>(15)</em></a></h3></li><li id='b53'><h3><a href='/price/brand-53.html'><i class='icon10 icon10-sjr'></i>铃木<em>(359)</em></a></h3></li><li id='b204'><h3><a href='/price/brand-204.html'><i class='icon10 icon10-sjr'></i>陆地方舟<em>(7)</em></a></h3></li><li id='b88'><h3><a href='/price/brand-88.html'><i class='icon10 icon10-sjr'></i>陆风<em>(131)</em></a></h3></li><li id='b49'><h3><a href='/price/brand-49.html'><i class='icon10 icon10-sjr'></i>路虎<em>(145)</em></a></h3></li><li id='b50'><h3><a href='/price/brand-50.html'><i class='icon10 icon10-sjr'></i>路特斯<em>(11)</em></a></h3></li></ul><div class='cartree-letter'>M</div><ul><li id='b20'><h3><a href='/price/brand-20.html'><i class='icon10 icon10-sjr'></i>MG<em>(120)</em></a></h3></li><li id='b56'><h3><a href='/price/brand-56.html'><i class='icon10 icon10-sjr'></i>MINI<em>(75)</em></a></h3></li><li id='b58'><h3><a href='/price/brand-58.html'><i class='icon10 icon10-sjr'></i>马自达<em>(176)</em></a></h3></li><li id='b57'><h3><a href='/price/brand-57.html'><i class='icon10 icon10-sjr'></i>玛莎拉蒂<em>(27)</em></a></h3></li><li id='b55'><h3><a href='/price/brand-55.html'><i class='icon10 icon10-sjr'></i>迈巴赫<em>(4)</em></a></h3></li><li id='b129'><h3><a href='/price/brand-129.html'><i class='icon10 icon10-sjr'></i>迈凯伦<em>(8)</em></a></h3></li><li id='b168'><h3><a href='/price/brand-168.html'><i class='icon10 icon10-sjr'></i>摩根<em>(11)</em></a></h3></li></ul><div class='cartree-letter'>N</div><ul><li id='b130'><h3><a href='/price/brand-130.html'><i class='icon10 icon10-sjr'></i>纳智捷<em>(44)</em></a></h3></li><li id='b213'><h3><a href='/price/brand-213.html'><i class='icon10 icon10-sjr'></i>南京金龙<em>(5)</em></a></h3></li></ul><div class='cartree-letter'>O</div><ul><li id='b60'><h3><a href='/price/brand-60.html'><i class='icon10 icon10-sjr'></i>讴歌<em>(27)</em></a></h3></li><li id='b59'><h3><a href='/price/brand-59.html'><i class='icon10 icon10-sjr'></i>欧宝<em>(48)</em></a></h3></li><li id='b146'><h3><a href='/price/brand-146.html'><i class='icon10 icon10-sjr'></i>欧朗<em>(10)</em></a></h3></li></ul><div class='cartree-letter'>Q</div><ul><li id='b26'><h3><a href='/price/brand-26.html'><i class='icon10 icon10-sjr'></i>奇瑞<em>(429)</em></a></h3></li><li id='b122'><h3><a href='/price/brand-122.html'><i class='icon10 icon10-sjr'></i>启辰<em>(32)</em></a></h3></li><li id='b62'><h3><a href='/price/brand-62.html'><i class='icon10 icon10-sjr'></i>起亚<em>(407)</em></a></h3></li></ul><div class='cartree-letter'>R</div><ul><li id='b63'><h3><a href='/price/brand-63.html'><i class='icon10 icon10-sjr'></i>日产<em>(423)</em></a></h3></li><li id='b19'><h3><a href='/price/brand-19.html'><i class='icon10 icon10-sjr'></i>荣威<em>(126)</em></a></h3></li><li id='b174'><h3><a href='/price/brand-174.html'><i class='icon10 icon10-sjr'></i>如虎<em>(2)</em></a></h3></li><li id='b103'><h3><a href='/price/brand-103.html'><i class='icon10 icon10-sjr'></i>瑞麒<em>(50)</em></a></h3></li></ul><div class='cartree-letter'>S</div><ul><li id='b45'><h3><a href='/price/brand-45.html'><i class='icon10 icon10-sjr'></i>smart<em>(57)</em></a></h3></li><li id='b64'><h3><a href='/price/brand-64.html'><i class='icon10 icon10-sjr'></i>萨博<em>(16)</em></a></h3></li><li id='b68'><h3><a href='/price/brand-68.html'><i class='icon10 icon10-sjr'></i>三菱<em>(249)</em></a></h3></li><li id='b149'><h3><a href='/price/brand-149.html'><i class='icon10 icon10-sjr'></i>陕汽通家<em>(26)</em></a></h3></li><li id='b155'><h3><a href='/price/brand-155.html'><i class='icon10 icon10-sjr'></i>上汽大通<em>(60)</em></a></h3></li><li id='b173'><h3><a href='/price/brand-173.html'><i class='icon10 icon10-sjr'></i>绅宝<em>(14)</em></a></h3></li><li id='b66'><h3><a href='/price/brand-66.html'><i class='icon10 icon10-sjr'></i>世爵<em>(1)</em></a></h3></li><li id='b90'><h3><a href='/price/brand-90.html'><i class='icon10 icon10-sjr'></i>双环<em>(82)</em></a></h3></li><li id='b69'><h3><a href='/price/brand-69.html'><i class='icon10 icon10-sjr'></i>双龙<em>(111)</em></a></h3></li><li id='b162'><h3><a href='/price/brand-162.html'><i class='icon10 icon10-sjr'></i>思铭<em>(2)</em></a></h3></li><li id='b65'><h3><a href='/price/brand-65.html'><i class='icon10 icon10-sjr'></i>斯巴鲁<em>(167)</em></a></h3></li><li id='b67'><h3><a href='/price/brand-67.html'><i class='icon10 icon10-sjr'></i>斯柯达<em>(198)</em></a></h3></li></ul><div class='cartree-letter'>T</div><ul><li id='b202'><h3><a href='/price/brand-202.html'><i class='icon10 icon10-sjr'></i>泰卡特<em>(7)</em></a></h3></li><li id='b133'><h3><a href='/price/brand-133.html'><i class='icon10 icon10-sjr'></i>特斯拉<em>(3)</em></a></h3></li><li id='b161'><h3><a href='/price/brand-161.html'><i class='icon10 icon10-sjr'></i>腾势<em>(2)</em></a></h3></li></ul><div class='cartree-letter'>W</div><ul><li id='b102'><h3><a href='/price/brand-102.html'><i class='icon10 icon10-sjr'></i>威麟<em>(27)</em></a></h3></li><li id='b99'><h3><a href='/price/brand-99.html'><i class='icon10 icon10-sjr'></i>威兹曼<em>(7)</em></a></h3></li><li id='b70'><h3><a href='/price/brand-70.html'><i class='icon10 icon10-sjr'></i>沃尔沃<em>(241)</em></a></h3></li><li id='b114'><h3><a href='/price/brand-114.html'><i class='icon10 icon10-sjr'></i>五菱汽车<em>(69)</em></a></h3></li><li id='b167'><h3><a href='/price/brand-167.html'><i class='icon10 icon10-sjr'></i>五十铃<em>(12)</em></a></h3></li></ul><div class='cartree-letter'>X</div><ul><li id='b98'><h3><a href='/price/brand-98.html'><i class='icon10 icon10-sjr'></i>西雅特<em>(13)</em></a></h3></li><li id='b12'><h3><a href='/price/brand-12.html'><i class='icon10 icon10-sjr'></i>现代<em>(418)</em></a></h3></li><li id='b185'><h3><a href='/price/brand-185.html'><i class='icon10 icon10-sjr'></i>新凯<em>(3)</em></a></h3></li><li id='b71'><h3><a href='/price/brand-71.html'><i class='icon10 icon10-sjr'></i>雪佛兰<em>(263)</em></a></h3></li><li id='b72'><h3><a href='/price/brand-72.html'><i class='icon10 icon10-sjr'></i>雪铁龙<em>(289)</em></a></h3></li></ul><div class='cartree-letter'>Y</div><ul><li id='b111'><h3><a href='/price/brand-111.html'><i class='icon10 icon10-sjr'></i>野马汽车<em>(20)</em></a></h3></li><li id='b110'><h3><a href='/price/brand-110.html'><i class='icon10 icon10-sjr'></i>一汽<em>(218)</em></a></h3></li><li id='b144'><h3><a href='/price/brand-144.html'><i class='icon10 icon10-sjr'></i>依维柯<em>(19)</em></a></h3></li><li id='b73'><h3><a href='/price/brand-73.html'><i class='icon10 icon10-sjr'></i>英菲尼迪<em>(109)</em></a></h3></li><li id='b192'><h3><a href='/price/brand-192.html'><i class='icon10 icon10-sjr'></i>英致<em>(6)</em></a></h3></li><li id='b93'><h3><a href='/price/brand-93.html'><i class='icon10 icon10-sjr'></i>永源<em>(70)</em></a></h3></li></ul><div class='cartree-letter'>Z</div><ul><li id='b206'><h3><a href='/price/brand-206.html'><i class='icon10 icon10-sjr'></i>知豆<em>(1)</em></a></h3></li><li id='b22'><h3><a href='/price/brand-22.html'><i class='icon10 icon10-sjr'></i>中华<em>(190)</em></a></h3></li><li id='b74'><h3><a href='/price/brand-74.html'><i class='icon10 icon10-sjr'></i>中兴<em>(66)</em></a></h3></li><li id='b94'><h3><a href='/price/brand-94.html'><i class='icon10 icon10-sjr'></i>众泰<em>(117)</em></a></h3></li></ul>";
private CarTree cartree = new CarTree();
private boolean bDownloadImage = false;//是否下载图片,默认false
private boolean bGetModelDetail = true;
public static void main(String[] args) {
QCZJmain q = new QCZJmain();
if(args!=null){
if(args.length >=1 ){
q.setDIR_ROOT(args[0]);
System.out.println("reset DIR_ROOT : "+q.getDIR_ROOT());
}
if(args.length >=2 ){
if(args[1].equals("true"))
q.setbDownloadImage(true);
else
q.setbDownloadImage(false);
}
if(args.length >=3 ){
if(args[1].equals("true"))
q.setbGetModelDetail(true);
else
q.setbGetModelDetail(false);
}
}
q.init();
File ftemp = new File(q.getDIR_ROOT());
System.out.println("DIR_ROOT : " + ftemp.getAbsolutePath());
boolean bsucces = false;
while(!bsucces){
q.readConfig();//
q.play();
for(int i=5; i>0; i--){
try {
System.out.println("wait "+i+" s");
Thread.sleep(1000);
} catch (InterruptedException e1) {
e1.printStackTrace();
}
}
try {
bsucces = q.start();
} catch (Exception e) {
e.printStackTrace();
}
if(!bsucces){
println("Retry.. in 5 Minutes ");
try {
Thread.sleep(5000);
} catch (InterruptedException e) {
e.printStackTrace();
}
}
}
println("=============== Finish ==============");
}
public void play(){
try {
InputStream is = this.getClass().getResourceAsStream("/[000279].wav");
AudioStream as=new AudioStream(is);
AudioPlayer.player.start(as);
}catch (Exception e) {
e.printStackTrace();
}
}
//
private int carBrandsNumber = 0;
private int carSerieNumber = 0;
private int carYearNumber = 0;
private int carImagesNumber = 0;
private int intTemp = 0;
private String configFilePath = this.DIR_ROOT + "/config.ini";
private String exceptionLogFilePath = this.DIR_ROOT + "/error.log";
public void init(){
File froot = new File(DIR_ROOT);
if(!froot.exists())
froot.mkdirs();
configFilePath = this.DIR_ROOT + "/config.ini";
carserielistPath = this.DIR_ROOT + "/carserielist.txt";
exceptionLogFilePath = this.DIR_ROOT + "/error.log";
}
public void readConfig(){
//read config.ini
File configfile = new File(this.configFilePath);
if(configfile.exists()){
try {
FileInputStream fis = new FileInputStream(configfile);
BufferedReader dr=new BufferedReader(new InputStreamReader(fis));
while(true)
{
try {
String line = dr.readLine();
if(line==null)
break ;
if(line!=null && !line.trim().isEmpty()){
String items[] = line.split(",");
if(items!=null && items.length >=4){
intTemp = Integer.parseInt(items[0]);
carBrandsNumber = intTemp;
intTemp = Integer.parseInt(items[1]);
carSerieNumber = intTemp;
intTemp = Integer.parseInt(items[2]);
carImagesNumber = intTemp;
intTemp = Integer.parseInt(items[3]);
carYearNumber = intTemp;
}
}
} catch (IOException e) {
e.printStackTrace();
}
}
} catch (FileNotFoundException e) {
e.printStackTrace();
}
}
//read carserielist.txt
System.out.println("readConfig "+configfile.getAbsolutePath());
System.out.println("carBrandsNumber carSerieNumber carImagesNumber carYearNumber");
System.out.println(carBrandsNumber +" \t "+ carSerieNumber +" \t "+ carImagesNumber +" \t "+ carYearNumber);
}
public void saveConfig(int sum1, int sum2, int sum4, int sum3, FileOutputStream cofigout){
try {
String str = sum1 + "," + sum2 +"," +sum4+ ","+sum3 + ", "+sdf.format(new Date())+"\r\n";
cofigout.write( str.getBytes() );
} catch (IOException e2) {
e2.printStackTrace();
}
}
public static void print(String str){
System.out.print(str);
}
public static void println(String str){
System.out.println(str);
}
// get root car, 获取品牌列表CarBrands
public void Step1(){
println("======= Step 1 ======");
Document html = Jsoup.parse(carhtml);
Elements emItem = html.select("ul li em");
if(emItem!=null)
emItem.remove();
Elements items = html.select("ul li a");
for(Element em : items){
if(em!=null){
String href = BASE_URL + em.attr("href");
String name = em.text();
cartree.add(new CarBrands(name, href));
}
}
println("cartree.size=" + cartree.getTree().size());
}
private String carserielistPath = this.DIR_ROOT + "/carserielist.txt";
//get series 获取各个品牌CarBrands的各个系列CarSerie
public void Step2() throws ClientProtocolException, IOException{
println("======= Step 2 ======");
HttpClient httpclient = new DefaultHttpClient();
httpclient.getParams().setParameter(HttpMethodParams.USER_AGENT, "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101 Safari/537.36");
HttpGet get = null;
HttpResponse httpResponse = null;
File carserielistFile = new File(carserielistPath);
FileOutputStream out = new FileOutputStream(carserielistFile);
int sum = 0;
String str;
for(CarBrands carbs : cartree.getTree()){
sum ++ ;
if(debug){
if(sum > this.MAX_DEBUG_LINE)
break;
}
str = "Brand="+carbs.getName() + " \r\n";
out.write(str.getBytes());
//get series url with price url
get = new HttpGet( Item_URL + "typeId=1&brandId="+carbs.getBid()+"&fctId=0&seriesId=0");
httpResponse = httpclient.execute(get);
String htmlstring = EntityUtils.toString(httpResponse.getEntity());
Document html = Jsoup.parse(htmlstring);
Element curli = html.getElementById("b"+carbs.getBid());
if(curli!=null){
Elements aem = curli.select("dl dd a em");
if(aem!=null)
aem.remove();
Elements series = curli.select("dl dd a");//系列
println(carbs.getName() + " ");
for(Element serie : series){
String href = this.BASE_URL + serie.attr("href");
String name = serie.text();
CarSerie serieCarbrands= new CarSerie(name, href);//ADD SERIE
carbs.add(serieCarbrands);
str = " serie="+serieCarbrands.getName()+" = " + serieCarbrands.getUrl() + "\r\n";
print( str );
out.write(str.getBytes());
}
}
}
out.close();
get.releaseConnection();
}
SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMddHHmmss");
//获取各个品牌CarBrands的各个系列CarSerie的各个车型CarYear/配置Models
public boolean Step3() throws IOException {
boolean bok = true;
println("======= Step 3 ======");
httpclient.getParams().setParameter(CoreConnectionPNames.CONNECTION_TIMEOUT, 20000);//连接时间20s
httpclient.getParams().setParameter(CoreConnectionPNames.SO_TIMEOUT, 60000);
httpclient.getParams().setParameter(HttpMethodParams.USER_AGENT, "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101 Safari/537.36");
File configFile = new File(configFilePath);
FileOutputStream cofigout = new FileOutputStream(configFile, true);
File carlistfile = new File( this.DIR_ROOT+"/carlist.txt"); // "/carlist_"+sdf.format(new Date())+".txt"
FileOutputStream out = new FileOutputStream(carlistfile, true);
File exceptionLogFile = new File( exceptionLogFilePath ); // "/carlist_"+sdf.format(new Date())+".txt"
FileOutputStream exceptionout = new FileOutputStream(exceptionLogFile, true);
String tempstr="";
String str = "";
int sumcars = 0;
int sumserie = 0;
int sumcaryear = 0;
int sum1 = 0;
int sum2 = 0;
int sum3 = 0;
int sum4 = 0;
boolean bcontinue = true;
for(CarBrands carbs : cartree.getTree()){ //CarBrands
if(debug && sum1 > this.MAX_DEBUG_LINE){
break;
}
if( carBrandsNumber>0 && sum1 < this.carBrandsNumber){
println("carBrandsNumber: "+ sum1 +" < "+carBrandsNumber );
sum1 ++;
continue;
}else{
carBrandsNumber = 0;
}
this.saveConfig(sum1, sum2, sum4, sum3, cofigout);
println(sum1+" : "+ carbs.getName());//品牌名称
str = "brand=" + carbs.getName() + "\r\n" ;
try {
out.write( str.getBytes() );
} catch (IOException e1) {
tempstr = sdf.format(new Date())+"\r\n"+e1.getMessage();
exceptionout.write( tempstr.getBytes());
e1.printStackTrace();
}
File fcar = new File(DIR_ROOT + "/"+ carbs.getName());
if(!fcar.exists())
fcar.mkdirs();
sum2 = 0;
for(CarSerie serie : carbs.getSeries()){ // CarSerie
if(debug && sum2 > MAX_DEBUG_LINE){
break;
}
if( carSerieNumber>0 && sum2 < this.carSerieNumber){
println(" carSerieNumber: "+ sum2 +" < "+carSerieNumber );
sum2 ++;
continue;
}else{
carSerieNumber = 0;
}
this.saveConfig(sum1, sum2, -1, -1, cofigout);
print( " serie=" +serie.getName() );//系列名称
str = " serie="+serie.getName() + " " ;
try {
out.write( str.getBytes() );
} catch (IOException e1) {
tempstr = sdf.format(new Date())+"\r\n"+e1.getMessage() + e1.getLocalizedMessage();
exceptionout.write( tempstr.getBytes());
e1.printStackTrace();
}
File fcarserie = new File( fcar.getAbsolutePath()+ "/"+ serie.getName());
if(!fcarserie.exists())
fcarserie.mkdirs();
try {
get = new HttpGet( serie.getUrl() );//点击系列链接,获取该系列详情
httpResponse = httpclient.execute(get);
String htmlstring = EntityUtils.toString( httpResponse.getEntity() );
Document html = Jsoup.parse(htmlstring);
//获取该系列的级别、车身结构、指导价、发动机、变速箱信息
GetDetailSerie(serie, html);
str = " [指导价: "+ serie.getPrice() + "," + serie.getExtInfo()+" ]";
println(str);
try{
out.write( str.getBytes() );
}catch(Exception e){
tempstr = sdf.format(new Date())+"\r\n"+e.getMessage() + e.getLocalizedMessage();
exceptionout.write( tempstr.getBytes());
e.printStackTrace();
}
if(this.bGetModelDetail)
{
//获取该系列的在售、停手、预售各个款式
Elements brandtabs = html.select(".row .brandtab-cont .tab-nav ul li a"); //获取在售、停手、预售链接
if(brandtabs!=null){
for(Element brandtabItem : brandtabs){
if(brandtabItem!=null){
String brandtabhref = brandtabItem.attr("href");//在售、停手、预售链接
println(" "+ brandtabItem.text());
if(brandtabhref!=null && !brandtabhref.trim().isEmpty()){
get = new HttpGet( this.BASE_URL + brandtabhref );//获取在售、停手、预售车辆信息
httpResponse = httpclient.execute(get);
String htmlstringBrand = EntityUtils.toString( httpResponse.getEntity() );
Document htmlBrand = Jsoup.parse(htmlstringBrand);
{
Elements interval01List = htmlBrand.select("div.intervalcont .interval01 .interval01-list li ");
sum3 = 0;
for(Element interval : interval01List){
Elements carsinfo = interval.select(".interval01-list-cars .infor-title a");
if(carsinfo != null){
CarYear caryear = new CarYear( carsinfo.text() );//款式名称
if(debug && sum3 > MAX_DEBUG_LINE){
break;
}
if( carYearNumber>0 && sum3 < this.carYearNumber){
println(" carYearNumber: "+ sum3 +" < "+carYearNumber );
sum3 ++;
continue;
}else{
carYearNumber = 0;
}
print( " " +caryear.getName() );
str = " "+caryear.getName() + " " ;
try {
out.write( str.getBytes() );//款式名称
} catch (IOException e1) {
tempstr = sdf.format(new Date())+"\r\n"+e1.getMessage() + e1.getLocalizedMessage();
exceptionout.write( tempstr.getBytes());
e1.printStackTrace();
}
CarModels carModel = new CarModels();
{
carModel.setName(caryear.getName());//款式名称
//1
Elements guidance = interval.select(".interval01-list-guidance .guidance-price");//指导价
if(guidance != null)
carModel.setPrice(guidance.text());//指导价
//2
carModel.setUrl(carsinfo.attr("href"));//根据此url获取配置信息
GetDetailModel(carModel);
//3
Elements related = interval.select(".interval01-list-related a[href^=/pic]");//图片
if(related!=null){
carModel.setImageurl( this.BASE_URL + related.attr("href") );
println( " imageurl = "+carModel.getImageurl() );
//load images
File fcaryear= new File( fcarserie.getAbsolutePath()+ "/"+ caryear.getName());
if(!fcaryear.exists())
fcaryear.mkdirs();
if(this.bDownloadImage){
sum4 = SetImagesList(carModel.getImageurl(), carModel, fcaryear, cofigout, sum1, sum2, sum3);
}else{
this.saveConfig(sum1, sum2, 0, sum3, cofigout);
}
}
str = ", 指导价="+carModel.getPrice() + " , 车身结构="+carModel.getStructure()+
", 发动机="+carModel.getEngine()+", 变速箱="+carModel.getTransmission()+", imagepageurl="+carModel.getImageurl()+" \r\n" ; //级别="+carModel.getLevel()+",
try{
out.write( str.getBytes() );
}catch(Exception e){
tempstr = sdf.format(new Date())+"\r\n"+e.getMessage() + e.getLocalizedMessage();
exceptionout.write( tempstr.getBytes());
e.printStackTrace();
}
}
caryear.add(carModel);
serie.add(caryear);
this.saveConfig(sum1, sum2, sum4, sum3, cofigout);
}
sum3 ++ ;
}//end of for(Element interval : interval01List)
}
}
}
}
}
}
} catch (ClientProtocolException e) {
if(get!=null)
get.releaseConnection();
e.printStackTrace();
tempstr = sdf.format(new Date())+"\r\n"+e.getMessage();
exceptionout.write( tempstr.getBytes());
return false;
} catch (IOException e) {
if(get!=null)
get.releaseConnection();
e.printStackTrace();
tempstr = sdf.format(new Date())+"\r\n"+e.getMessage();
exceptionout.write( tempstr.getBytes());
return false;
}
sum2 ++;
}
sum1 ++;
}
if(get!=null)
get.releaseConnection();
try {
cofigout.close();
out.close();
exceptionout.close();
} catch (IOException e) {
e.printStackTrace();
return false;
}
return true;
}
//获取该系列的级别、车身结构、发动机、变速箱、指导价信息
public void GetDetailSerie(CarSerie serie, Document html){
Elements carCont = html.select(".car-cont .list-cont-main .main-lever");
if(carCont!=null){
Elements priceItem = carCont.select(".main-lever-right .lever-price");//指导价
if(priceItem!=null)
serie.setPrice( priceItem.text() ) ;//指导价
Elements carcolors = carCont.select(".main-lever-left ul.lever-ul .lever-ul-color");//车身颜色
if(carcolors!=null)
carcolors.remove();
Elements carExt= carCont.select(".main-lever-left ul.lever-ul li");//获取该系列的级别、车身结构、发动机、变速箱信息
if(carExt!=null){
serie.setExtInfoHtml(carExt.html());
for(Element item : carExt){
serie.addExtInfo(item.text());
}
}
}
}
//根据此url获取配置信息
public void GetDetailModel( CarModels carModel){
if(carModel.getUrl()==null)
return ;
get = new HttpGet( carModel.getUrl() );
try{
httpResponse = httpclient.execute(get);
String htmlstring = EntityUtils.toString(httpResponse.getEntity());
Document html = Jsoup.parse(htmlstring);
Elements cardetails = html.select(".cardetail-infor .cardetail-infor-car li");
for(Element cardetail : cardetails){
Elements em = cardetail.getElementsContainingText("车身尺寸");
if(em!=null && em.size()>0){
cardetail.select("span").remove();
carModel.setSize(cardetail.text());
continue;
}
em = cardetail.getElementsContainingText("车身结构");
if(em!=null && em.size()>0){
cardetail.select("span").remove();
carModel.setStructure(cardetail.text());
continue;
}
em = cardetail.getElementsContainingText("机");
if(em!=null && em.size()>0){
cardetail.select("span").remove();
carModel.setEngine(cardetail.text());
continue;
}
em = cardetail.getElementsContainingText("箱");
if(em!=null && em.size()>0){
cardetail.select("span").remove();
carModel.setTransmission(cardetail.text());
continue;
}
}
}catch(Exception e){
}
}
public int SetImagesList(String imagepage, CarModels carModel, File fcaryear, FileOutputStream cofigout,
int sum1, int sum2, int sum3){
int sum4 = 0;
try{
get = new HttpGet( imagepage );
httpResponse = httpclient.execute(get);
String htmlstring = EntityUtils.toString(httpResponse.getEntity());
Document html = Jsoup.parse(htmlstring);
Elements imagesElements = html.select(".row .column .uibox .uibox-con ul li>a"); //图片列表
sum4 = 0;
for(Element em : imagesElements){//遍历图片列表 li
if(debug && sum4 > MAX_DEBUG_LINE){
break;
}
if(this.carImagesNumber>0 && sum4 < this.carImagesNumber){
println(" carImagesNumber: "+sum4 +" < "+carImagesNumber );
sum4 ++;
continue;
}else{
carImagesNumber = 0;
}
String imageName = carModel.getName()+"_"+sum4+".jpg";
File storeFile = new File( fcaryear.getAbsolutePath() + "/" + imageName );
if(storeFile.exists()){
println("ignore exist file @ "+storeFile.getAbsolutePath());
continue;
}
if(em!=null){
String href = this.BASE_URL + em.attr("href");//获取图片浏览页面链接
try{
get = new HttpGet( href );
httpResponse = httpclient.execute(get);//打开图片浏览页面
String htmlstring2 = EntityUtils.toString(httpResponse.getEntity());
Document html2 = Jsoup.parse(htmlstring2);
Element img = html2.getElementById("img");//获取大图链接
if(img!=null){
CarSerieImage im = new CarSerieImage( carModel.getName(), img.attr("src"));
carModel.add(im);
print( " " +im.getTitle()+" img = "+im.getSrc() );
downloadPhotos(im.getSrc(), fcaryear.getAbsolutePath(), imageName);//下载图片
println("");
}
}catch(Exception e){
e.printStackTrace();
}
}
sum4 ++ ;
this.saveConfig(sum1, sum2, sum4, sum3, cofigout);
}
}catch(Exception e){
e.printStackTrace();
}
return sum4;
}
HttpClient httpclient = new DefaultHttpClient();
HttpGet get = null;
HttpResponse httpResponse = null;
public void downloadPhotos (String url, String savePath, String saveNamge){
httpclient.getParams().setParameter(HttpMethodParams.USER_AGENT, "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101 Safari/537.36");
try {
get = new HttpGet( url );
httpResponse = httpclient.execute(get);
File storeFile = new File( savePath + "/" + saveNamge );
FileOutputStream output = new FileOutputStream(storeFile);
//得到网络资源的字节数组,并写入文件
output.write( EntityUtils.toByteArray(httpResponse.getEntity()) );
output.close();
print( " saved image @ "+storeFile.getAbsolutePath() );
} catch (Exception e) {
e.printStackTrace();
}
}
//save nams 2 carlist.txt
public void Step4() throws ClientProtocolException, IOException{
println("======= Step 4 save picture data ======");
File froot = new File(DIR_ROOT);
if(!froot.exists())
froot.mkdirs();
File carlistfile = new File(froot.getAbsolutePath()+"/carlist.txt");
FileOutputStream out = new FileOutputStream(carlistfile);
String str = "";
int sumcars = 0;
int sumserie = 0;
int sumcaryear = 0;
for(CarBrands carbs : cartree.getTree()){
sumcars ++;
str = carbs.getName() + "\r\n" ;
out.write( str.getBytes() );
for(CarSerie serie : carbs.getSeries()){
sumserie ++;
str = " "+serie.getName() + "\r\n" ;
out.write( str.getBytes() );
for(CarYear caryear : serie.getCarYearList()){
sumcaryear ++;
str = " "+caryear.getName() + "\r\n" ;
out.write( str.getBytes() );
for(CarModels carModel : caryear.getCarModels()){
str = ", 指导价="+carModel.getPrice() + " , 车身结构="+carModel.getStructure()+
", 发动机="+carModel.getEngine()+", 变速箱="+carModel.getTransmission()+", imagepageurl="+carModel.getImageurl()+" \r\n" ; //级别="+carModel.getLevel()+",
out.write( str.getBytes() );
}
}
}
}
out.close();
println("sumcars = "+sumcars + " sumserie = "+sumserie);
try {
Thread.sleep(3000);
} catch (InterruptedException e) {
e.printStackTrace();
}
}
boolean bstarted = false;
public boolean start() throws ClientProtocolException, IOException{
boolean bsuccess = false;
long t1 = System.currentTimeMillis();
if(bstarted==false){
this.Step1();//获取品牌列表CarBrands
this.Step2();//获取各个品牌CarBrands的各个系列CarSerie
}
bstarted = true;
bsuccess = this.Step3();//获取各个品牌CarBrands的各个系列CarSerie的各个车型CarYear/配置Models
// this.Step4();//存储文本数据和图片
// this.Step5();//获取图片并存储图片
long t2 = System.currentTimeMillis();
long diff = (t2 -t1)/1000;
long hour = diff/3600;
long minite = (diff - hour*3600)/(60);
long sec = diff % 60;
println("start at "+sdf.format(new Date(t1)));
println("end at "+sdf.format(new Date(t2)));
println("it takes "+hour+" h "+minite+" m "+sec+" s ." );
return bsuccess;
}
//get pictrue url
public void Step5(){
println("======= Step 5 ======");
HttpClient httpclient = new DefaultHttpClient();
HttpGet get = null;
HttpResponse httpResponse = null;
httpclient.getParams().setParameter(HttpMethodParams.USER_AGENT, "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101 Safari/537.36");
File froot = new File(DIR_ROOT);
if(!froot.exists())
froot.mkdirs();
int sum = 0;
for(CarBrands carbs : cartree.getTree()){
println(carbs.getName());
File fcar = new File(DIR_ROOT + "/"+ carbs.getName());
if(!fcar.exists())
fcar.mkdirs();
sum ++ ;
if(debug && sum > MAX_DEBUG_LINE){
break;
}
int sum2 = 0;
for(CarSerie serie : carbs.getSeries()){
File fcarserie = new File( fcar.getAbsolutePath()+ "/"+ serie.getName());
if(!fcarserie.exists())
fcarserie.mkdirs();
sum2++;
if(debug && sum2 > MAX_DEBUG_LINE){
break;
}
println( " " +serie.getName() );
get = new HttpGet( serie.getUrl() );
try{
httpResponse = httpclient.execute(get);
String htmlstring = EntityUtils.toString(httpResponse.getEntity());
Document html = Jsoup.parse(htmlstring);
Elements imagesElements = html.select(".row .column .uibox .uibox-con ul li>a"); //图片列表
int sum3 = 0;
for(Element em : imagesElements){
sum3 ++ ;
if(debug && sum3 > MAX_DEBUG_LINE){
break;
}
if(em!=null){
String href = this.BASE_URL + em.attr("href");
get = new HttpGet( href );
httpResponse = httpclient.execute(get);
String htmlstring2 = EntityUtils.toString(httpResponse.getEntity());
Document html2 = Jsoup.parse(htmlstring2);
Element img = html2.getElementById("img");
if(img!=null){
CarSerieImage im = new CarSerieImage(em.attr("title"), img.attr("src"));
//serie.add(im);
print( " " +im.getTitle()+" img = "+im.getSrc() );
downloadPhotos(im.getSrc(), fcarserie.getAbsolutePath(), im.getTitle()+"_"+sum3+".jpg");
println("");
}
}
}
}catch(Exception e){
e.printStackTrace();
}
}
}
get.releaseConnection();
}
public String getDIR_ROOT() {
return DIR_ROOT;
}
public void setDIR_ROOT(String dIR_ROOT) {
DIR_ROOT = dIR_ROOT;
}
public boolean isbDownloadImage() {
return bDownloadImage;
}
public void setbDownloadImage(boolean bDownloadImage) {
this.bDownloadImage = bDownloadImage;
}
public boolean isbGetModelDetail() {
return bGetModelDetail;
}
public void setbGetModelDetail(boolean bGetModelDetail) {
this.bGetModelDetail = bGetModelDetail;
}
}
6737

被折叠的 条评论
为什么被折叠?



