抓取车辆信息

抓取汽车之家的车辆信息和车辆图片


CarBrands.java

/*
 * @author : TF-BJ-C064
 * @creation : 2014-8-19 上午9:57:38
 * @description : 
 *
 */

package com.car;

import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class CarBrands {

	private String name;
	private String url;
	private String bid;

	private List<CarSerie> series = new ArrayList<CarSerie>();

	public void add(CarSerie cb ){
		series.add(cb);
	}

	public CarBrands(){}
	
	public CarBrands(String name, String href) {
		this.name = name;
		this.setUrl(href);
	}
	public String getName() {
		return name;
	}
	public void setName(String name) {
		this.name = name;
	}
	public String getUrl() {
		return url;
	}

	//e.g: http://car.autohome.com.cn/price/brand-22.html ==> bid = b22 
	public void setUrl(String url) {
		this.url = url;
		if( url != null ){
			String regex = "-(\\d+).html";
			Pattern pattern = Pattern.compile(regex);
			Matcher matcher = pattern.matcher(url); 
			if (matcher.find()) {
				String group = matcher.group(1);
				this.setBid(group);
			}else {
				//System.out.println("no matches!!");
			} 
		}
	}
	public String getBid() {
		return bid;
	}
	public void setBid(String bid) {
		this.bid = bid;
	}

	public List<CarSerie> getSeries() {
		return series;
	}

	public void setSeries(List<CarSerie> series) {
		this.series = series;
	}

}


CarModels.java

/*
* @author : TF-BJ-C064
* @creation : 2014-8-19 下午2:26:13
* @description : 
*
*/

package com.car;

import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;


//车辆类型
public class CarModels {
	
	private String name;
	private String url;
	private String price;//指导价格
	private String level;//级别
	private String structure;//车身结构
	private String Engine;//发动机
	private String Transmission;//变速箱
	private String size;//车身尺寸
	
	private String imageurl;
	private List<CarSerieImage> images = new ArrayList<CarSerieImage>();
	
	public boolean add(CarSerieImage imageurl){
		return images.add(imageurl);
	}
	
	public List<CarSerieImage> getImages() {
		return images;
	}

	public void setImages(List<CarSerieImage> images) {
		this.images = images;
	}

	public String getName() {
		return name;
	}

	public void setName(String name) {
		this.name = name;
	}
	
	
	//e.g parurl =  http://www.autohome.com.cn/buycar.html?specid=19460&#pvareaid=101622
	public void parseAsetUrl(String parurl){
		if(parurl==null)
			return ;
		String regex = "specid=(\\d+)&";
		Pattern pattern = Pattern.compile(regex);
		Matcher matcher = pattern.matcher(parurl); 
		int pagesum = 0;
		if (matcher.find()) {
			String group = matcher.group(1);
			this.setUrl(group);
		}
	}

	public String getUrl() {
		return url;
	}

	public void setUrl(String url) {
		this.url = url;
	}

	public String getPrice() {
		return price;
	}

	public void setPrice(String price) {
		this.price = price;
	}

	public String getLevel() {
		return level;
	}

	public void setLevel(String level) {
		this.level = level;
	}

	public String getStructure() {
		return structure;
	}

	public void setStructure(String structure) {
		this.structure = structure;
	}

	public String getEngine() {
		return Engine;
	}

	public void setEngine(String engine) {
		Engine = engine;
	}

	public String getTransmission() {
		return Transmission;
	}

	public void setTransmission(String transmission) {
		Transmission = transmission;
	}

	public String getImageurl() {
		return imageurl;
	}

	public void setImageurl(String imageurl) {
		this.imageurl = imageurl;
	}

	public String getSize() {
		return size;
	}

	public void setSize(String size) {
		this.size = size;
	}


}


CarSerie.java

/*
 * @author : TF-BJ-C064
 * @creation : 2014-8-19 上午11:48:14
 * @description : 
 *
 */

package com.car;

import java.util.ArrayList;
import java.util.List;

public class CarSerie {

	private String name;
	private String url;
	
	private String price;//指导价格
	private String level;//级别
	private String structure;//车身结构
	private String Engine;//发动机
	private String Transmission;//变速箱
	private String extInfo="";
	private String extInfoHtml;

	private List<CarYear> carYearList = new ArrayList<CarYear>();
	
	public boolean add(CarYear cy){
		return this.carYearList.add(cy);
	}

	public CarSerie(){}
	
	public CarSerie(String name, String href){
		this.name = name;
		// http://car.autohome.com.cn/pic/series/66.html;
		int index = href.lastIndexOf(".html");
		this.url = href ; //href.substring(0, index) + "-1.html";
	}

	public String getName() {
		return name;
	}
	public void setName(String name) {
		this.name = name;
	}
	public String getUrl() {
		return url;
	}
	public void setUrl(String url) {
		this.url = url;
	}

	public List<CarYear> getCarYearList() {
		return carYearList;
	}

	public void setCarYearList(List<CarYear> carYearList) {
		this.carYearList = carYearList;
	}

	public String getPrice() {
		return price;
	}

	public void setPrice(String price) {
		this.price = price;
	}

	public String getLevel() {
		return level;
	}

	public void setLevel(String level) {
		this.level = level;
	}

	public String getStructure() {
		return structure;
	}

	public void setStructure(String structure) {
		this.structure = structure;
	}

	public String getEngine() {
		return Engine;
	}

	public void setEngine(String engine) {
		Engine = engine;
	}

	public String getTransmission() {
		return Transmission;
	}

	public void setTransmission(String transmission) {
		Transmission = transmission;
	}

	public String getExtInfo() {
		return extInfo;
	}

	public void addExtInfo(String extInfoIn) {
		if(extInfoIn==null)
			return ;
		if(this.extInfo!=null && !extInfo.trim().isEmpty())
			this.extInfo += ", ";
		this.extInfo += extInfoIn;
	}
	public void setExtInfo(String extInfoIn) {
		if(extInfoIn!=null)
			this.extInfo = extInfo;
	}

	public String getExtInfoHtml() {
		return extInfoHtml;
	}

	public void setExtInfoHtml(String extInfoHtml) {
		this.extInfoHtml = extInfoHtml;
	}

	
}


CarSerieImage.java

/*
* @author : TF-BJ-C064
* @creation : 2014-8-19 上午11:52:54
* @description : 
*
*/

package com.car;

public class CarSerieImage {
	
	private String title ;
	private String src ;
	
	public CarSerieImage(){}
	
	public CarSerieImage(String title, String src){
		this.title = title;
		this.src = src;
	}
	
	public String getTitle() {
		if(title==null || title.trim().isEmpty())
			return ""+System.currentTimeMillis();
		return title;
	}
	public void setTitle(String title) {
		this.title = title;
	}
	public String getSrc() {
		return src;
	}
	public void setSrc(String src) {
		this.src = src;
	}
	

}


CarTree.java

/*
* @author : TF-BJ-C064
* @creation : 2014-8-19 上午9:59:06
* @description : 
*
*/

package com.car;

import java.util.ArrayList;
import java.util.List;

public class CarTree {
	
	private List<CarBrands> tree = new ArrayList<CarBrands>();
	
	public boolean add(CarBrands carbs){
		return tree.add(carbs);
	}

	public List<CarBrands> getTree() {
		return tree;
	}

	public void setTree(List<CarBrands> tree) {
		this.tree = tree;
	}
	
}


CarYear.java

/*
* @author : TF-BJ-C064
* @creation : 2014-8-19 下午2:48:56
* @description : 
*
*/

package com.car;

import java.util.ArrayList;
import java.util.List;

// 车辆款式,如2013款
public class CarYear {
	
	private String name;
	private List<CarModels> carModels = new ArrayList<CarModels>();
	
	public boolean add(CarModels cm){
		return this.carModels.add(cm);
	}
	
	public CarYear(){};
	public CarYear(String name){
		this.name = name;
	}
	
	
	public String getName() {
		return name;
	}

	public void setName(String name) {
		this.name = name;
	}

	public List<CarModels> getCarModels() {
		return carModels;
	}

	public void setCarModels(List<CarModels> carModels) {
		this.carModels = carModels;
	}
	
}


QCZJmain.java

/*
 * @author : TF-BJ-C064
 * @creation : 2014-8-19 上午9:31:38
 * @description : 
 *
 */

package com.car;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.text.SimpleDateFormat;
import java.util.Date;

import org.apache.commons.httpclient.params.HttpMethodParams;
import org.apache.http.HttpResponse;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.params.CoreConnectionPNames;
import org.apache.http.util.EntityUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.xvolks.jnative.exceptions.NativeException;

import sun.audio.AudioPlayer;
import sun.audio.AudioStream;

/**
 * 抓取汽车之家汽车数据
 * <p>Title: QCZJmain</p>
 * <p>Description: 
 * 	args:
 * 		args[0]: 车辆信息存储根路径
 * 		args[1]: 是否下载图片,默认false
 * 		args[2]:是否抓取车辆款式,default=true
 *  e.g:
 * 	  run.bat
  		echo off 
		color 0a
		java -jar CarInfoCrawl.jar D:/craw/car20140821 true
		pause
 * </p>
 * <p>Company: </p> 
 * @author	 
 * @date	2014-8-21
 */
public class QCZJmain {

	public static boolean debug = false;
	public static int MAX_DEBUG_LINE = 5;

	private String DIR_ROOT = "data/cars";

	public static String BASE_URL = "http://car.autohome.com.cn";
	public static String CAR_SPEC_URL = "http://www.autohome.com.cn/spec/"; // http://www.autohome.com.cn/spec/19390
	public static String Item_URL = BASE_URL+"/AsLeftMenu/As_LeftListNew.ashx?"; // AsLeftMenu/As_LeftListNew.ashx?typeId=1&brandId=34&fctId=0&seriesId=0

	public String carhtml = "<div class='cartree-letter'>A</div><ul><li  id='b34'><h3>" +
			"<a href='/price/brand-34.html'><i class='icon10 icon10-sjr'></i>阿尔法罗密欧<em>(3)</em></a></h3></li><li id='b35'><h3><a href='/price/brand-35.html'><i class='icon10 icon10-sjr'></i>阿斯顿·马丁<em>(38)</em></a></h3></li><li id='b33'><h3><a href='/price/brand-33.html'><i class='icon10 icon10-sjr'></i>奥迪<em>(523)</em></a></h3></li></ul><div class='cartree-letter'>B</div><ul><li  id='b140'><h3><a href='/price/brand-140.html'><i class='icon10 icon10-sjr'></i>巴博斯<em>(9)</em></a></h3></li><li id='b120'><h3><a href='/price/brand-120.html'><i class='icon10 icon10-sjr'></i>宝骏<em>(82)</em></a></h3></li><li id='b15'><h3><a href='/price/brand-15.html'><i class='icon10 icon10-sjr'></i>宝马<em>(537)</em></a></h3></li><li id='b40'><h3><a href='/price/brand-40.html'><i class='icon10 icon10-sjr'></i>保时捷<em>(148)</em></a></h3></li><li id='b27'><h3><a href='/price/brand-27.html'><i class='icon10 icon10-sjr'></i>北京汽车<em>(20)</em></a></h3></li><li id='b203'><h3><a href='/price/brand-203.html'><i class='icon10 icon10-sjr'></i>北汽幻速<em>(8)</em></a></h3></li><li id='b143'><h3><a href='/price/brand-143.html'><i class='icon10 icon10-sjr'></i>北汽威旺<em>(50)</em></a></h3></li><li id='b208'><h3><a href='/price/brand-208.html'><i class='icon10 icon10-sjr'></i>北汽新能源<em>(3)</em></a></h3></li><li id='b154'><h3><a href='/price/brand-154.html'><i class='icon10 icon10-sjr'></i>北汽制造<em>(29)</em></a></h3></li><li id='b36'><h3><a href='/price/brand-36.html'><i class='icon10 icon10-sjr'></i>奔驰<em>(399)</em></a></h3></li><li id='b95'><h3><a href='/price/brand-95.html'><i class='icon10 icon10-sjr'></i>奔腾<em>(103)</em></a></h3></li><li id='b14'><h3><a href='/price/brand-14.html'><i class='icon10 icon10-sjr'></i>本田<em>(265)</em></a></h3></li><li id='b75'><h3><a href='/price/brand-75.html'><i class='icon10 icon10-sjr'></i>比亚迪<em>(251)</em></a></h3></li><li id='b13'><h3><a href='/price/brand-13.html'><i class='icon10 icon10-sjr'></i>标致<em>(292)</em></a></h3></li><li id='b38'><h3><a href='/price/brand-38.html'><i class='icon10 icon10-sjr'></i>别克<em>(266)</em></a></h3></li><li id='b39'><h3><a href='/price/brand-39.html'><i class='icon10 icon10-sjr'></i>宾利<em>(36)</em></a></h3></li><li id='b37'><h3><a href='/price/brand-37.html'><i class='icon10 icon10-sjr'></i>布加迪<em>(3)</em></a></h3></li></ul><div class='cartree-letter'>C</div><ul><li  id='b79'><h3><a href='/price/brand-79.html'><i class='icon10 icon10-sjr'></i>昌河<em>(49)</em></a></h3></li><li id='b76'><h3><a href='/price/brand-76.html'><i class='icon10 icon10-sjr'></i>长安<em>(196)</em></a></h3></li><li id='b163'><h3><a href='/price/brand-163.html'><i class='icon10 icon10-sjr'></i>长安商用<em>(101)</em></a></h3></li><li id='b77'><h3><a href='/price/brand-77.html'><i class='icon10 icon10-sjr'></i>长城<em>(273)</em></a></h3></li><li id='b196'><h3><a href='/price/brand-196.html'><i class='icon10 icon10-sjr'></i>成功汽车<em>(7)</em></a></h3></li></ul><div class='cartree-letter'>D</div><ul><li  id='b169'><h3><a href='/price/brand-169.html'><i class='icon10 icon10-sjr'></i>DS<em>(29)</em></a></h3></li><li id='b92'><h3><a href='/price/brand-92.html'><i class='icon10 icon10-sjr'></i>大发<em>(13)</em></a></h3></li><li id='b1'><h3><a href='/price/brand-1.html'><i class='icon10 icon10-sjr'></i>大众<em>(863)</em></a></h3></li><li id='b41'><h3><a href='/price/brand-41.html'><i class='icon10 icon10-sjr'></i>道奇<em>(21)</em></a></h3></li><li id='b32'><h3><a href='/price/brand-32.html'><i class='icon10 icon10-sjr'></i>东风<em>(33)</em></a></h3></li><li id='b187'><h3><a href='/price/brand-187.html'><i class='icon10 icon10-sjr'></i>东风风度<em>(114)</em></a></h3></li><li id='b113'><h3><a href='/price/brand-113.html'><i class='icon10 icon10-sjr'></i>东风风神<em>(86)</em></a></h3></li><li id='b165'><h3><a href='/price/brand-165.html'><i class='icon10 icon10-sjr'></i>东风风行<em>(253)</em></a></h3></li><li id='b142'><h3><a href='/price/brand-142.html'><i class='icon10 icon10-sjr'></i>东风小康<em>(71)</em></a></h3></li><li id='b81'><h3><a href='/price/brand-81.html'><i class='icon10 icon10-sjr'></i>东南<em>(133)</em></a></h3></li></ul><div class='cartree-letter'>F</div><ul><li  id='b42'><h3><a href='/price/brand-42.html'><i class='icon10 icon10-sjr'></i>法拉利<em>(18)</em></a></h3></li><li id='b11'><h3><a href='/price/brand-11.html'><i class='icon10 icon10-sjr'></i>菲亚特<em>(89)</em></a></h3></li><li id='b3'><h3><a href='/price/brand-3.html'><i class='icon10 icon10-sjr'></i>丰田<em>(547)</em></a></h3></li><li id='b141'><h3><a href='/price/brand-141.html'><i class='icon10 icon10-sjr'></i>福迪<em>(11)</em></a></h3></li><li id='b197'><h3><a href='/price/brand-197.html'><i class='icon10 icon10-sjr'></i>福汽启腾<em>(4)</em></a></h3></li><li id='b8'><h3><a href='/price/brand-8.html'><i class='icon10 icon10-sjr'></i>福特<em>(312)</em></a></h3></li><li id='b96'><h3><a href='/price/brand-96.html'><i class='icon10 icon10-sjr'></i>福田<em>(276)</em></a></h3></li></ul><div class='cartree-letter'>G</div><ul><li  id='b112'><h3><a href='/price/brand-112.html'><i class='icon10 icon10-sjr'></i>GMC<em>(35)</em></a></h3></li><li id='b152'><h3><a href='/price/brand-152.html'><i class='icon10 icon10-sjr'></i>观致<em>(10)</em></a></h3></li><li id='b116'><h3><a href='/price/brand-116.html'><i class='icon10 icon10-sjr'></i>光冈<em>(3)</em></a></h3></li><li id='b82'><h3><a href='/price/brand-82.html'><i class='icon10 icon10-sjr'></i>广汽传祺<em>(69)</em></a></h3></li><li id='b108'><h3><a href='/price/brand-108.html'><i class='icon10 icon10-sjr'></i>广汽吉奥<em>(136)</em></a></h3></li></ul><div class='cartree-letter'>H</div><ul><li  id='b24'><h3><a href='/price/brand-24.html'><i class='icon10 icon10-sjr'></i>哈飞<em>(75)</em></a></h3></li><li id='b181'><h3><a href='/price/brand-181.html'><i class='icon10 icon10-sjr'></i>哈弗<em>(210)</em></a></h3></li><li id='b150'><h3><a href='/price/brand-150.html'><i class='icon10 icon10-sjr'></i>海格<em>(35)</em></a></h3></li><li id='b86'><h3><a href='/price/brand-86.html'><i class='icon10 icon10-sjr'></i>海马<em>(245)</em></a></h3></li><li id='b43'><h3><a href='/price/brand-43.html'><i class='icon10 icon10-sjr'></i>悍马<em>(5)</em></a></h3></li><li id='b164'><h3><a href='/price/brand-164.html'><i class='icon10 icon10-sjr'></i>恒天<em>(15)</em></a></h3></li><li id='b91'><h3><a href='/price/brand-91.html'><i class='icon10 icon10-sjr'></i>红旗<em>(11)</em></a></h3></li><li id='b85'><h3><a href='/price/brand-85.html'><i class='icon10 icon10-sjr'></i>华普<em>(41)</em></a></h3></li><li id='b87'><h3><a href='/price/brand-87.html'><i class='icon10 icon10-sjr'></i>华泰<em>(108)</em></a></h3></li><li id='b97'><h3><a href='/price/brand-97.html'><i class='icon10 icon10-sjr'></i>黄海<em>(54)</em></a></h3></li></ul><div class='cartree-letter'>J</div><ul><li  id='b46'><h3><a href='/price/brand-46.html'><i class='icon10 icon10-sjr'></i>Jeep<em>(155)</em></a></h3></li><li id='b25'><h3><a href='/price/brand-25.html'><i class='icon10 icon10-sjr'></i>吉利汽车<em>(437)</em></a></h3></li><li id='b84'><h3><a href='/price/brand-84.html'><i class='icon10 icon10-sjr'></i>江淮<em>(411)</em></a></h3></li><li id='b119'><h3><a href='/price/brand-119.html'><i class='icon10 icon10-sjr'></i>江铃<em>(79)</em></a></h3></li><li id='b210'><h3><a href='/price/brand-210.html'><i class='icon10 icon10-sjr'></i>江铃集团轻汽<em>(12)</em></a></h3></li><li id='b44'><h3><a href='/price/brand-44.html'><i class='icon10 icon10-sjr'></i>捷豹<em>(86)</em></a></h3></li><li id='b83'><h3><a href='/price/brand-83.html'><i class='icon10 icon10-sjr'></i>金杯<em>(198)</em></a></h3></li><li id='b145'><h3><a href='/price/brand-145.html'><i class='icon10 icon10-sjr'></i>金龙<em>(56)</em></a></h3></li><li id='b175'><h3><a href='/price/brand-175.html'><i class='icon10 icon10-sjr'></i>金旅<em>(7)</em></a></h3></li><li id='b151'><h3><a href='/price/brand-151.html'><i class='icon10 icon10-sjr'></i>九龙<em>(16)</em></a></h3></li></ul><div class='cartree-letter'>K</div><ul><li  id='b109'><h3><a href='/price/brand-109.html'><i class='icon10 icon10-sjr'></i>KTM<em>(1)</em></a></h3></li><li id='b156'><h3><a href='/price/brand-156.html'><i class='icon10 icon10-sjr'></i>卡尔森<em>(4)</em></a></h3></li><li id='b199'><h3><a href='/price/brand-199.html'><i class='icon10 icon10-sjr'></i>卡威<em>(4)</em></a></h3></li><li id='b101'><h3><a href='/price/brand-101.html'><i class='icon10 icon10-sjr'></i>开瑞<em>(98)</em></a></h3></li><li id='b47'><h3><a href='/price/brand-47.html'><i class='icon10 icon10-sjr'></i>凯迪拉克<em>(114)</em></a></h3></li><li id='b100'><h3><a href='/price/brand-100.html'><i class='icon10 icon10-sjr'></i>科尼赛克<em>(4)</em></a></h3></li><li id='b9'><h3><a href='/price/brand-9.html'><i class='icon10 icon10-sjr'></i>克莱斯勒<em>(27)</em></a></h3></li></ul><div class='cartree-letter'>L</div><ul><li  id='b48'><h3><a href='/price/brand-48.html'><i class='icon10 icon10-sjr'></i>兰博基尼<em>(21)</em></a></h3></li><li id='b118'><h3><a href='/price/brand-118.html'><i class='icon10 icon10-sjr'></i>劳伦士<em>(6)</em></a></h3></li><li id='b54'><h3><a href='/price/brand-54.html'><i class='icon10 icon10-sjr'></i>劳斯莱斯<em>(17)</em></a></h3></li><li id='b215'><h3><a href='/price/brand-215.html'><i class='icon10 icon10-sjr'></i>雷丁<em>(5)</em></a></h3></li><li id='b52'><h3><a href='/price/brand-52.html'><i class='icon10 icon10-sjr'></i>雷克萨斯<em>(148)</em></a></h3></li><li id='b10'><h3><a href='/price/brand-10.html'><i class='icon10 icon10-sjr'></i>雷诺<em>(99)</em></a></h3></li><li id='b124'><h3><a href='/price/brand-124.html'><i class='icon10 icon10-sjr'></i>理念<em>(15)</em></a></h3></li><li id='b80'><h3><a href='/price/brand-80.html'><i class='icon10 icon10-sjr'></i>力帆<em>(154)</em></a></h3></li><li id='b89'><h3><a href='/price/brand-89.html'><i class='icon10 icon10-sjr'></i>莲花汽车<em>(60)</em></a></h3></li><li id='b78'><h3><a href='/price/brand-78.html'><i class='icon10 icon10-sjr'></i>猎豹汽车<em>(90)</em></a></h3></li><li id='b51'><h3><a href='/price/brand-51.html'><i class='icon10 icon10-sjr'></i>林肯<em>(15)</em></a></h3></li><li id='b53'><h3><a href='/price/brand-53.html'><i class='icon10 icon10-sjr'></i>铃木<em>(359)</em></a></h3></li><li id='b204'><h3><a href='/price/brand-204.html'><i class='icon10 icon10-sjr'></i>陆地方舟<em>(7)</em></a></h3></li><li id='b88'><h3><a href='/price/brand-88.html'><i class='icon10 icon10-sjr'></i>陆风<em>(131)</em></a></h3></li><li id='b49'><h3><a href='/price/brand-49.html'><i class='icon10 icon10-sjr'></i>路虎<em>(145)</em></a></h3></li><li id='b50'><h3><a href='/price/brand-50.html'><i class='icon10 icon10-sjr'></i>路特斯<em>(11)</em></a></h3></li></ul><div class='cartree-letter'>M</div><ul><li  id='b20'><h3><a href='/price/brand-20.html'><i class='icon10 icon10-sjr'></i>MG<em>(120)</em></a></h3></li><li id='b56'><h3><a href='/price/brand-56.html'><i class='icon10 icon10-sjr'></i>MINI<em>(75)</em></a></h3></li><li id='b58'><h3><a href='/price/brand-58.html'><i class='icon10 icon10-sjr'></i>马自达<em>(176)</em></a></h3></li><li id='b57'><h3><a href='/price/brand-57.html'><i class='icon10 icon10-sjr'></i>玛莎拉蒂<em>(27)</em></a></h3></li><li id='b55'><h3><a href='/price/brand-55.html'><i class='icon10 icon10-sjr'></i>迈巴赫<em>(4)</em></a></h3></li><li id='b129'><h3><a href='/price/brand-129.html'><i class='icon10 icon10-sjr'></i>迈凯伦<em>(8)</em></a></h3></li><li id='b168'><h3><a href='/price/brand-168.html'><i class='icon10 icon10-sjr'></i>摩根<em>(11)</em></a></h3></li></ul><div class='cartree-letter'>N</div><ul><li  id='b130'><h3><a href='/price/brand-130.html'><i class='icon10 icon10-sjr'></i>纳智捷<em>(44)</em></a></h3></li><li id='b213'><h3><a href='/price/brand-213.html'><i class='icon10 icon10-sjr'></i>南京金龙<em>(5)</em></a></h3></li></ul><div class='cartree-letter'>O</div><ul><li  id='b60'><h3><a href='/price/brand-60.html'><i class='icon10 icon10-sjr'></i>讴歌<em>(27)</em></a></h3></li><li id='b59'><h3><a href='/price/brand-59.html'><i class='icon10 icon10-sjr'></i>欧宝<em>(48)</em></a></h3></li><li id='b146'><h3><a href='/price/brand-146.html'><i class='icon10 icon10-sjr'></i>欧朗<em>(10)</em></a></h3></li></ul><div class='cartree-letter'>Q</div><ul><li  id='b26'><h3><a href='/price/brand-26.html'><i class='icon10 icon10-sjr'></i>奇瑞<em>(429)</em></a></h3></li><li id='b122'><h3><a href='/price/brand-122.html'><i class='icon10 icon10-sjr'></i>启辰<em>(32)</em></a></h3></li><li id='b62'><h3><a href='/price/brand-62.html'><i class='icon10 icon10-sjr'></i>起亚<em>(407)</em></a></h3></li></ul><div class='cartree-letter'>R</div><ul><li  id='b63'><h3><a href='/price/brand-63.html'><i class='icon10 icon10-sjr'></i>日产<em>(423)</em></a></h3></li><li id='b19'><h3><a href='/price/brand-19.html'><i class='icon10 icon10-sjr'></i>荣威<em>(126)</em></a></h3></li><li id='b174'><h3><a href='/price/brand-174.html'><i class='icon10 icon10-sjr'></i>如虎<em>(2)</em></a></h3></li><li id='b103'><h3><a href='/price/brand-103.html'><i class='icon10 icon10-sjr'></i>瑞麒<em>(50)</em></a></h3></li></ul><div class='cartree-letter'>S</div><ul><li  id='b45'><h3><a href='/price/brand-45.html'><i class='icon10 icon10-sjr'></i>smart<em>(57)</em></a></h3></li><li id='b64'><h3><a href='/price/brand-64.html'><i class='icon10 icon10-sjr'></i>萨博<em>(16)</em></a></h3></li><li id='b68'><h3><a href='/price/brand-68.html'><i class='icon10 icon10-sjr'></i>三菱<em>(249)</em></a></h3></li><li id='b149'><h3><a href='/price/brand-149.html'><i class='icon10 icon10-sjr'></i>陕汽通家<em>(26)</em></a></h3></li><li id='b155'><h3><a href='/price/brand-155.html'><i class='icon10 icon10-sjr'></i>上汽大通<em>(60)</em></a></h3></li><li id='b173'><h3><a href='/price/brand-173.html'><i class='icon10 icon10-sjr'></i>绅宝<em>(14)</em></a></h3></li><li id='b66'><h3><a href='/price/brand-66.html'><i class='icon10 icon10-sjr'></i>世爵<em>(1)</em></a></h3></li><li id='b90'><h3><a href='/price/brand-90.html'><i class='icon10 icon10-sjr'></i>双环<em>(82)</em></a></h3></li><li id='b69'><h3><a href='/price/brand-69.html'><i class='icon10 icon10-sjr'></i>双龙<em>(111)</em></a></h3></li><li id='b162'><h3><a href='/price/brand-162.html'><i class='icon10 icon10-sjr'></i>思铭<em>(2)</em></a></h3></li><li id='b65'><h3><a href='/price/brand-65.html'><i class='icon10 icon10-sjr'></i>斯巴鲁<em>(167)</em></a></h3></li><li id='b67'><h3><a href='/price/brand-67.html'><i class='icon10 icon10-sjr'></i>斯柯达<em>(198)</em></a></h3></li></ul><div class='cartree-letter'>T</div><ul><li  id='b202'><h3><a href='/price/brand-202.html'><i class='icon10 icon10-sjr'></i>泰卡特<em>(7)</em></a></h3></li><li id='b133'><h3><a href='/price/brand-133.html'><i class='icon10 icon10-sjr'></i>特斯拉<em>(3)</em></a></h3></li><li id='b161'><h3><a href='/price/brand-161.html'><i class='icon10 icon10-sjr'></i>腾势<em>(2)</em></a></h3></li></ul><div class='cartree-letter'>W</div><ul><li  id='b102'><h3><a href='/price/brand-102.html'><i class='icon10 icon10-sjr'></i>威麟<em>(27)</em></a></h3></li><li id='b99'><h3><a href='/price/brand-99.html'><i class='icon10 icon10-sjr'></i>威兹曼<em>(7)</em></a></h3></li><li id='b70'><h3><a href='/price/brand-70.html'><i class='icon10 icon10-sjr'></i>沃尔沃<em>(241)</em></a></h3></li><li id='b114'><h3><a href='/price/brand-114.html'><i class='icon10 icon10-sjr'></i>五菱汽车<em>(69)</em></a></h3></li><li id='b167'><h3><a href='/price/brand-167.html'><i class='icon10 icon10-sjr'></i>五十铃<em>(12)</em></a></h3></li></ul><div class='cartree-letter'>X</div><ul><li  id='b98'><h3><a href='/price/brand-98.html'><i class='icon10 icon10-sjr'></i>西雅特<em>(13)</em></a></h3></li><li id='b12'><h3><a href='/price/brand-12.html'><i class='icon10 icon10-sjr'></i>现代<em>(418)</em></a></h3></li><li id='b185'><h3><a href='/price/brand-185.html'><i class='icon10 icon10-sjr'></i>新凯<em>(3)</em></a></h3></li><li id='b71'><h3><a href='/price/brand-71.html'><i class='icon10 icon10-sjr'></i>雪佛兰<em>(263)</em></a></h3></li><li id='b72'><h3><a href='/price/brand-72.html'><i class='icon10 icon10-sjr'></i>雪铁龙<em>(289)</em></a></h3></li></ul><div class='cartree-letter'>Y</div><ul><li  id='b111'><h3><a href='/price/brand-111.html'><i class='icon10 icon10-sjr'></i>野马汽车<em>(20)</em></a></h3></li><li id='b110'><h3><a href='/price/brand-110.html'><i class='icon10 icon10-sjr'></i>一汽<em>(218)</em></a></h3></li><li id='b144'><h3><a href='/price/brand-144.html'><i class='icon10 icon10-sjr'></i>依维柯<em>(19)</em></a></h3></li><li id='b73'><h3><a href='/price/brand-73.html'><i class='icon10 icon10-sjr'></i>英菲尼迪<em>(109)</em></a></h3></li><li id='b192'><h3><a href='/price/brand-192.html'><i class='icon10 icon10-sjr'></i>英致<em>(6)</em></a></h3></li><li id='b93'><h3><a href='/price/brand-93.html'><i class='icon10 icon10-sjr'></i>永源<em>(70)</em></a></h3></li></ul><div class='cartree-letter'>Z</div><ul><li  id='b206'><h3><a href='/price/brand-206.html'><i class='icon10 icon10-sjr'></i>知豆<em>(1)</em></a></h3></li><li id='b22'><h3><a href='/price/brand-22.html'><i class='icon10 icon10-sjr'></i>中华<em>(190)</em></a></h3></li><li id='b74'><h3><a href='/price/brand-74.html'><i class='icon10 icon10-sjr'></i>中兴<em>(66)</em></a></h3></li><li id='b94'><h3><a href='/price/brand-94.html'><i class='icon10 icon10-sjr'></i>众泰<em>(117)</em></a></h3></li></ul>";

	private CarTree cartree = new CarTree(); 
	private boolean bDownloadImage = false;//是否下载图片,默认false
	private boolean bGetModelDetail = true;

	public static void main(String[] args) {
		QCZJmain q = new QCZJmain();

		if(args!=null){
			if(args.length >=1 ){
				q.setDIR_ROOT(args[0]);
				System.out.println("reset DIR_ROOT : "+q.getDIR_ROOT());
			}
			if(args.length >=2 ){
				if(args[1].equals("true"))	
					q.setbDownloadImage(true);
				else
					q.setbDownloadImage(false);
			}
			if(args.length >=3 ){
				if(args[1].equals("true"))	
					q.setbGetModelDetail(true);
				else
					q.setbGetModelDetail(false);
			}
		}

		q.init();

		File ftemp = new File(q.getDIR_ROOT());
		System.out.println("DIR_ROOT : " + ftemp.getAbsolutePath());

		boolean bsucces = false;
		while(!bsucces){

			q.readConfig();//
			q.play();

			for(int i=5; i>0; i--){
				try {
					System.out.println("wait "+i+" s");
					Thread.sleep(1000);
				} catch (InterruptedException e1) {
					e1.printStackTrace();
				}
			}
			try {
				bsucces = q.start();
			} catch (Exception e) {
				e.printStackTrace();
			}
			if(!bsucces){
				println("Retry.. in 5 Minutes ");
				try {
					Thread.sleep(5000);
				} catch (InterruptedException e) {
					e.printStackTrace();
				}
			}
		}

		println("=============== Finish ==============");
	}

	public void play(){
		
		try {
			InputStream is = this.getClass().getResourceAsStream("/[000279].wav");
			AudioStream as=new AudioStream(is);
			AudioPlayer.player.start(as);
		}catch (Exception e) {
			e.printStackTrace();
		}

		
	}
	//
	private  int carBrandsNumber = 0;
	private  int carSerieNumber = 0;
	private  int carYearNumber = 0;
	private  int carImagesNumber = 0;
	private  int intTemp = 0;

	private String configFilePath = this.DIR_ROOT + "/config.ini";
	private String exceptionLogFilePath = this.DIR_ROOT + "/error.log";

	public void init(){
		File froot = new File(DIR_ROOT);
		if(!froot.exists())
			froot.mkdirs();

		configFilePath = this.DIR_ROOT + "/config.ini";
		carserielistPath = this.DIR_ROOT + "/carserielist.txt";
		exceptionLogFilePath = this.DIR_ROOT + "/error.log";
	}

	public void readConfig(){

		//read  config.ini
		File configfile = new File(this.configFilePath);
		if(configfile.exists()){
			try {
				FileInputStream fis = new FileInputStream(configfile);
				BufferedReader dr=new BufferedReader(new InputStreamReader(fis));
				while(true)
				{
					try {
						String line =  dr.readLine();
						if(line==null)
							break ;
						if(line!=null && !line.trim().isEmpty()){
							String items[] = line.split(",");
							if(items!=null && items.length >=4){
								intTemp =  Integer.parseInt(items[0]);
								carBrandsNumber = intTemp;
								intTemp =  Integer.parseInt(items[1]);
								carSerieNumber = intTemp;
								intTemp =  Integer.parseInt(items[2]);
								carImagesNumber = intTemp;
								intTemp =  Integer.parseInt(items[3]);
								carYearNumber = intTemp;
							}
						}
					} catch (IOException e) {
						e.printStackTrace();
					}
				}
			} catch (FileNotFoundException e) {
				e.printStackTrace();
			}
		}

		//read carserielist.txt
		System.out.println("readConfig  "+configfile.getAbsolutePath());
		System.out.println("carBrandsNumber carSerieNumber   carImagesNumber  carYearNumber");
		System.out.println(carBrandsNumber +" \t    "+ carSerieNumber +" \t    "+ carImagesNumber +"    \t "+ carYearNumber);
	}

	public void saveConfig(int sum1, int sum2, int sum4, int sum3, FileOutputStream cofigout){
		try {
			String str = sum1 + "," + sum2 +"," +sum4+ ","+sum3 + ", "+sdf.format(new Date())+"\r\n";
			cofigout.write( str.getBytes() );
		} catch (IOException e2) {
			e2.printStackTrace();
		}
	}

	public static void print(String str){
		System.out.print(str);
	}
	public static void println(String str){
		System.out.println(str);
	}

	// get root car, 获取品牌列表CarBrands
	public void Step1(){
		println("=======  Step 1 ======");
		Document html = Jsoup.parse(carhtml);
		Elements emItem = html.select("ul li em");
		if(emItem!=null)
			emItem.remove();
		Elements items = html.select("ul li a");
		for(Element em : items){
			if(em!=null){
				String href = BASE_URL + em.attr("href");
				String name = em.text();
				cartree.add(new CarBrands(name, href));
			}
		}
		println("cartree.size=" + cartree.getTree().size());

	}

	private String carserielistPath = this.DIR_ROOT + "/carserielist.txt";
	//get series 获取各个品牌CarBrands的各个系列CarSerie
	public void Step2() throws ClientProtocolException, IOException{
		println("=======  Step 2 ======");
		HttpClient httpclient = new DefaultHttpClient();
		httpclient.getParams().setParameter(HttpMethodParams.USER_AGENT, "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101 Safari/537.36");

		HttpGet get = null;
		HttpResponse httpResponse = null;

		File carserielistFile = new File(carserielistPath);
		FileOutputStream out = new FileOutputStream(carserielistFile);

		int sum = 0;
		String str;
		for(CarBrands carbs : cartree.getTree()){
			sum ++ ;
			if(debug){
				if(sum > this.MAX_DEBUG_LINE)
					break;
			}
			str = "Brand="+carbs.getName() + " \r\n";
			out.write(str.getBytes());
			//get series url with price url
			get = new HttpGet( Item_URL + "typeId=1&brandId="+carbs.getBid()+"&fctId=0&seriesId=0");
			httpResponse = httpclient.execute(get);
			String htmlstring = EntityUtils.toString(httpResponse.getEntity());
			Document html = Jsoup.parse(htmlstring);
			Element curli = html.getElementById("b"+carbs.getBid());
			if(curli!=null){
				Elements aem = curli.select("dl dd a em");
				if(aem!=null)
					aem.remove();
				Elements series = curli.select("dl dd a");//系列
				println(carbs.getName() + " ");
				for(Element serie : series){
					String href = this.BASE_URL + serie.attr("href");
					String name = serie.text();
					CarSerie serieCarbrands= new CarSerie(name, href);//ADD SERIE
					carbs.add(serieCarbrands);
					str = "    serie="+serieCarbrands.getName()+" = " + serieCarbrands.getUrl() + "\r\n";
					print( str );
					out.write(str.getBytes());
				}
			}

		}
		out.close();
		get.releaseConnection();
	}

	SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMddHHmmss");

	//获取各个品牌CarBrands的各个系列CarSerie的各个车型CarYear/配置Models
	public boolean Step3() throws IOException {

		boolean bok = true;
		println("=======  Step 3 ======");

		httpclient.getParams().setParameter(CoreConnectionPNames.CONNECTION_TIMEOUT,  20000);//连接时间20s
		httpclient.getParams().setParameter(CoreConnectionPNames.SO_TIMEOUT,  60000);
		httpclient.getParams().setParameter(HttpMethodParams.USER_AGENT, "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101 Safari/537.36");

		File configFile = new File(configFilePath);
		FileOutputStream cofigout = new FileOutputStream(configFile, true);

		File carlistfile = new File( this.DIR_ROOT+"/carlist.txt"); // "/carlist_"+sdf.format(new Date())+".txt"
		FileOutputStream out = new FileOutputStream(carlistfile, true);

		File exceptionLogFile = new File( exceptionLogFilePath ); // "/carlist_"+sdf.format(new Date())+".txt"
		FileOutputStream exceptionout = new FileOutputStream(exceptionLogFile, true);

		String tempstr="";

		String str = "";
		int sumcars = 0;
		int sumserie = 0;
		int sumcaryear = 0;

		int sum1 = 0;
		int sum2 = 0;
		int sum3 = 0;
		int sum4 = 0;
		boolean bcontinue = true;
		for(CarBrands carbs : cartree.getTree()){ //CarBrands
			if(debug && sum1 > this.MAX_DEBUG_LINE){
				break;
			}
			if( carBrandsNumber>0 && sum1 < this.carBrandsNumber){
				println("carBrandsNumber: "+ sum1 +" < "+carBrandsNumber );
				sum1 ++;
				continue;
			}else{
				carBrandsNumber = 0;
			}
			this.saveConfig(sum1, sum2, sum4, sum3, cofigout);
			println(sum1+" :   "+ carbs.getName());//品牌名称
			str = "brand=" + carbs.getName() + "\r\n" ;
			try {
				out.write( str.getBytes() );
			} catch (IOException e1) {
				tempstr = sdf.format(new Date())+"\r\n"+e1.getMessage();
				exceptionout.write( tempstr.getBytes());
				e1.printStackTrace();
			}

			File fcar = new File(DIR_ROOT + "/"+ carbs.getName());
			if(!fcar.exists())
				fcar.mkdirs();

			sum2 = 0;
			for(CarSerie serie : carbs.getSeries()){ // CarSerie
				if(debug && sum2 > MAX_DEBUG_LINE){
					break;
				}
				if( carSerieNumber>0 && sum2 < this.carSerieNumber){
					println("    carSerieNumber: "+ sum2 +" < "+carSerieNumber );
					sum2 ++;
					continue;
				}else{
					carSerieNumber = 0;
				}
				this.saveConfig(sum1, sum2, -1, -1, cofigout);
				print( "    serie=" +serie.getName() );//系列名称
				str = "        serie="+serie.getName() + " " ;
				try {
					out.write( str.getBytes() );
				} catch (IOException e1) {
					tempstr = sdf.format(new Date())+"\r\n"+e1.getMessage()  + e1.getLocalizedMessage();
					exceptionout.write( tempstr.getBytes());
					e1.printStackTrace();
				}

				File fcarserie = new File( fcar.getAbsolutePath()+ "/"+ serie.getName());
				if(!fcarserie.exists())
					fcarserie.mkdirs();

				try {
					get = new HttpGet( serie.getUrl() );//点击系列链接,获取该系列详情
					httpResponse = httpclient.execute(get);
					String htmlstring = EntityUtils.toString( httpResponse.getEntity() );
					Document html = Jsoup.parse(htmlstring);

					//获取该系列的级别、车身结构、指导价、发动机、变速箱信息
					GetDetailSerie(serie, html);
					str = " [指导价:  "+ serie.getPrice() + "," + serie.getExtInfo()+" ]";
					println(str);
					try{
						out.write( str.getBytes() );
					}catch(Exception e){
						tempstr = sdf.format(new Date())+"\r\n"+e.getMessage()  + e.getLocalizedMessage();
						exceptionout.write( tempstr.getBytes());
						e.printStackTrace();
					}

					if(this.bGetModelDetail)
					{
						//获取该系列的在售、停手、预售各个款式
						Elements brandtabs = html.select(".row .brandtab-cont .tab-nav ul li a"); //获取在售、停手、预售链接
						if(brandtabs!=null){
							for(Element brandtabItem : brandtabs){
								if(brandtabItem!=null){
									String brandtabhref = brandtabItem.attr("href");//在售、停手、预售链接
									println("           "+ brandtabItem.text());
									if(brandtabhref!=null && !brandtabhref.trim().isEmpty()){
										get = new HttpGet( this.BASE_URL + brandtabhref );//获取在售、停手、预售车辆信息
										httpResponse = httpclient.execute(get);
										String htmlstringBrand = EntityUtils.toString( httpResponse.getEntity() );
										Document htmlBrand = Jsoup.parse(htmlstringBrand);
										{
											Elements interval01List = htmlBrand.select("div.intervalcont .interval01 .interval01-list li ");
											sum3 = 0;
											for(Element interval : interval01List){
												Elements carsinfo = interval.select(".interval01-list-cars .infor-title a");
												if(carsinfo != null){
													CarYear caryear = new CarYear( carsinfo.text() );//款式名称
													if(debug && sum3 > MAX_DEBUG_LINE){
														break;
													}
													if( carYearNumber>0 && sum3 < this.carYearNumber){
														println("       carYearNumber: "+ sum3  +" < "+carYearNumber );
														sum3 ++;
														continue;
													}else{
														carYearNumber = 0;
													}

													print( "        " +caryear.getName() );
													str = "        "+caryear.getName() + " " ;
													try {
														out.write( str.getBytes() );//款式名称
													} catch (IOException e1) {
														tempstr = sdf.format(new Date())+"\r\n"+e1.getMessage()   + e1.getLocalizedMessage();
														exceptionout.write( tempstr.getBytes());
														e1.printStackTrace();
													}

													CarModels carModel = new CarModels();
													{
														carModel.setName(caryear.getName());//款式名称
														//1
														Elements guidance = interval.select(".interval01-list-guidance .guidance-price");//指导价
														if(guidance != null)
															carModel.setPrice(guidance.text());//指导价
														//2
														carModel.setUrl(carsinfo.attr("href"));//根据此url获取配置信息
														GetDetailModel(carModel);

														//3 	
														Elements related = interval.select(".interval01-list-related a[href^=/pic]");//图片
														if(related!=null){
															carModel.setImageurl( this.BASE_URL + related.attr("href") );
															println( "        imageurl = "+carModel.getImageurl() );
															//load images 
															File fcaryear= new File( fcarserie.getAbsolutePath()+ "/"+ caryear.getName());
															if(!fcaryear.exists())
																fcaryear.mkdirs();
															if(this.bDownloadImage){
																sum4 = SetImagesList(carModel.getImageurl(), carModel, fcaryear, cofigout, sum1, sum2, sum3);
															}else{
																this.saveConfig(sum1, sum2, 0, sum3, cofigout);
															}
														}
														str = ", 指导价="+carModel.getPrice() + " ,  车身结构="+carModel.getStructure()+
																", 发动机="+carModel.getEngine()+", 变速箱="+carModel.getTransmission()+", imagepageurl="+carModel.getImageurl()+"  \r\n" ; //级别="+carModel.getLevel()+",
														try{
															out.write( str.getBytes() );
														}catch(Exception e){
															tempstr = sdf.format(new Date())+"\r\n"+e.getMessage()   + e.getLocalizedMessage();
															exceptionout.write( tempstr.getBytes());
															e.printStackTrace();
														}
													}
													caryear.add(carModel);
													serie.add(caryear);

													this.saveConfig(sum1, sum2, sum4, sum3, cofigout);
												}
												sum3 ++ ;
											}//end of for(Element interval : interval01List)
										}
									}
								}
							}
						}

					}
				} catch (ClientProtocolException e) {
					if(get!=null)
						get.releaseConnection();
					e.printStackTrace();
					tempstr = sdf.format(new Date())+"\r\n"+e.getMessage();
					exceptionout.write( tempstr.getBytes());
					return false;
				} catch (IOException e) {
					if(get!=null)
						get.releaseConnection();
					e.printStackTrace();
					tempstr = sdf.format(new Date())+"\r\n"+e.getMessage();
					exceptionout.write( tempstr.getBytes());
					return false;
				}

				sum2 ++;

			}

			sum1 ++;

		}

		if(get!=null)
			get.releaseConnection();

		try {
			cofigout.close();
			out.close();
			exceptionout.close();
		} catch (IOException e) {
			e.printStackTrace();
			return false;
		}

		return true;
	}

	//获取该系列的级别、车身结构、发动机、变速箱、指导价信息
	public void GetDetailSerie(CarSerie serie, Document html){
		Elements carCont = html.select(".car-cont .list-cont-main .main-lever");
		if(carCont!=null){
			Elements priceItem = carCont.select(".main-lever-right .lever-price");//指导价
			if(priceItem!=null)
				serie.setPrice( priceItem.text() ) ;//指导价
			Elements carcolors = carCont.select(".main-lever-left ul.lever-ul .lever-ul-color");//车身颜色
			if(carcolors!=null)
				carcolors.remove();
			Elements carExt= carCont.select(".main-lever-left ul.lever-ul li");//获取该系列的级别、车身结构、发动机、变速箱信息
			if(carExt!=null){
				serie.setExtInfoHtml(carExt.html());
				for(Element item : carExt){
					serie.addExtInfo(item.text());
				}
			}
		}
	}

	//根据此url获取配置信息
	public void GetDetailModel( CarModels carModel){
		if(carModel.getUrl()==null)
			return ;
		get = new HttpGet( carModel.getUrl() );
		try{

			httpResponse = httpclient.execute(get);
			String htmlstring = EntityUtils.toString(httpResponse.getEntity());
			Document html = Jsoup.parse(htmlstring);
			Elements cardetails = html.select(".cardetail-infor .cardetail-infor-car li");
			for(Element cardetail : cardetails){
				Elements em = cardetail.getElementsContainingText("车身尺寸");
				if(em!=null && em.size()>0){
					cardetail.select("span").remove();
					carModel.setSize(cardetail.text());
					continue;
				}
				em = cardetail.getElementsContainingText("车身结构");
				if(em!=null && em.size()>0){
					cardetail.select("span").remove();
					carModel.setStructure(cardetail.text());
					continue;
				}
				em = cardetail.getElementsContainingText("机");
				if(em!=null && em.size()>0){
					cardetail.select("span").remove();
					carModel.setEngine(cardetail.text());
					continue;
				}
				em = cardetail.getElementsContainingText("箱");
				if(em!=null && em.size()>0){
					cardetail.select("span").remove();
					carModel.setTransmission(cardetail.text());
					continue;
				}
			}
		}catch(Exception e){

		}
	}

	public int SetImagesList(String imagepage, CarModels carModel, File fcaryear, FileOutputStream cofigout,
			int sum1, int sum2, int sum3){

		int sum4 = 0;
		try{
			get = new HttpGet( imagepage );
			httpResponse = httpclient.execute(get);
			String htmlstring = EntityUtils.toString(httpResponse.getEntity());
			Document html = Jsoup.parse(htmlstring);
			Elements imagesElements = html.select(".row .column .uibox .uibox-con ul li>a"); //图片列表

			sum4 = 0;
			for(Element em : imagesElements){//遍历图片列表 li
				if(debug && sum4 > MAX_DEBUG_LINE){
					break;
				}
				if(this.carImagesNumber>0 && sum4 < this.carImagesNumber){
					println("           carImagesNumber: "+sum4 +" < "+carImagesNumber );
					sum4 ++;
					continue;
				}else{
					carImagesNumber = 0;
				}
				String imageName = carModel.getName()+"_"+sum4+".jpg";
				File storeFile = new File( fcaryear.getAbsolutePath() + "/" + imageName );
				if(storeFile.exists()){
					println("ignore exist file @ "+storeFile.getAbsolutePath());
					continue;
				}

				if(em!=null){
					String href = this.BASE_URL + em.attr("href");//获取图片浏览页面链接

					try{
						get = new HttpGet( href );
						httpResponse = httpclient.execute(get);//打开图片浏览页面
						String htmlstring2 = EntityUtils.toString(httpResponse.getEntity());
						Document html2 = Jsoup.parse(htmlstring2);
						Element img = html2.getElementById("img");//获取大图链接
						if(img!=null){
							CarSerieImage  im = new CarSerieImage( carModel.getName(), img.attr("src"));
							carModel.add(im);
							print( "        " +im.getTitle()+"  img = "+im.getSrc() );
							downloadPhotos(im.getSrc(), fcaryear.getAbsolutePath(), imageName);//下载图片
							println("");
						}
					}catch(Exception e){
						e.printStackTrace();
					}
				}

				sum4 ++ ;
				this.saveConfig(sum1, sum2, sum4, sum3, cofigout);
			}
		}catch(Exception e){
			e.printStackTrace();
		}
		return sum4;
	}

	HttpClient httpclient = new DefaultHttpClient();
	HttpGet get = null;
	HttpResponse httpResponse = null;

	public void downloadPhotos (String url, String savePath, String saveNamge){

		httpclient.getParams().setParameter(HttpMethodParams.USER_AGENT, "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101 Safari/537.36");

		try {
			get = new HttpGet( url );
			httpResponse = httpclient.execute(get);
			File storeFile = new File( savePath + "/" + saveNamge );  
			FileOutputStream output = new FileOutputStream(storeFile);  
			//得到网络资源的字节数组,并写入文件  
			output.write( EntityUtils.toByteArray(httpResponse.getEntity()) );  
			output.close();  
			print( "    saved image @ "+storeFile.getAbsolutePath() );
		} catch (Exception e) {
			e.printStackTrace();
		}
	}

	//save nams 2 carlist.txt
	public void Step4() throws ClientProtocolException, IOException{
		println("=======  Step 4  save picture data ======");


		File froot = new File(DIR_ROOT);
		if(!froot.exists())
			froot.mkdirs();

		File carlistfile = new File(froot.getAbsolutePath()+"/carlist.txt");
		FileOutputStream out = new FileOutputStream(carlistfile);

		String str = "";
		int sumcars = 0;
		int sumserie = 0;
		int sumcaryear = 0;
		for(CarBrands carbs : cartree.getTree()){
			sumcars ++;
			str = carbs.getName() + "\r\n" ;
			out.write( str.getBytes() );
			for(CarSerie serie : carbs.getSeries()){
				sumserie ++;
				str = "    "+serie.getName() + "\r\n" ;
				out.write( str.getBytes() );
				for(CarYear caryear : serie.getCarYearList()){
					sumcaryear ++;
					str = "        "+caryear.getName() + "\r\n" ;
					out.write( str.getBytes() );
					for(CarModels carModel : caryear.getCarModels()){
						str = ",  指导价="+carModel.getPrice() + " ,  车身结构="+carModel.getStructure()+
								", 发动机="+carModel.getEngine()+", 变速箱="+carModel.getTransmission()+", imagepageurl="+carModel.getImageurl()+" \r\n" ; //级别="+carModel.getLevel()+",
						out.write( str.getBytes() );
					}
				}
			}
		}
		out.close();

		println("sumcars = "+sumcars + "  sumserie = "+sumserie);

		try {
			Thread.sleep(3000);
		} catch (InterruptedException e) {
			e.printStackTrace();
		}
	}

	boolean bstarted = false;
	public boolean start() throws ClientProtocolException, IOException{
		boolean bsuccess = false;
		long t1 = System.currentTimeMillis();
		if(bstarted==false){
			this.Step1();//获取品牌列表CarBrands
			this.Step2();//获取各个品牌CarBrands的各个系列CarSerie
		}
		bstarted = true;
		bsuccess = this.Step3();//获取各个品牌CarBrands的各个系列CarSerie的各个车型CarYear/配置Models
		//		this.Step4();//存储文本数据和图片
		//		this.Step5();//获取图片并存储图片
		long t2 = System.currentTimeMillis();
		long diff = (t2 -t1)/1000;
		long hour = diff/3600;
		long minite = (diff - hour*3600)/(60);
		long sec = diff % 60;
		println("start at "+sdf.format(new Date(t1)));
		println("end at "+sdf.format(new Date(t2)));
		println("it takes "+hour+" h "+minite+" m "+sec+" s ." );
		return bsuccess;
	}

	//get pictrue url
	public void Step5(){
		println("=======  Step 5 ======");

		HttpClient httpclient = new DefaultHttpClient();
		HttpGet get = null;
		HttpResponse httpResponse = null;

		httpclient.getParams().setParameter(HttpMethodParams.USER_AGENT, "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101 Safari/537.36");

		File froot = new File(DIR_ROOT);
		if(!froot.exists())
			froot.mkdirs();

		int sum = 0;
		for(CarBrands carbs : cartree.getTree()){
			println(carbs.getName());
			File fcar = new File(DIR_ROOT + "/"+ carbs.getName());
			if(!fcar.exists())
				fcar.mkdirs();

			sum ++ ;
			if(debug && sum > MAX_DEBUG_LINE){
				break;
			}

			int sum2 = 0;
			for(CarSerie serie : carbs.getSeries()){
				File fcarserie = new File( fcar.getAbsolutePath()+ "/"+ serie.getName());
				if(!fcarserie.exists())
					fcarserie.mkdirs();

				sum2++;
				if(debug && sum2 > MAX_DEBUG_LINE){
					break;
				}
				println( "    " +serie.getName() );
				get = new HttpGet( serie.getUrl() );
				try{
					httpResponse = httpclient.execute(get);
					String htmlstring = EntityUtils.toString(httpResponse.getEntity());
					Document html = Jsoup.parse(htmlstring);
					Elements imagesElements = html.select(".row .column .uibox .uibox-con ul li>a"); //图片列表

					int sum3 = 0;
					for(Element em : imagesElements){
						sum3 ++ ;
						if(debug && sum3 > MAX_DEBUG_LINE){
							break;
						}
						if(em!=null){
							String href = this.BASE_URL + em.attr("href");
							get = new HttpGet( href );
							httpResponse = httpclient.execute(get);
							String htmlstring2 = EntityUtils.toString(httpResponse.getEntity());
							Document html2 = Jsoup.parse(htmlstring2);
							Element img = html2.getElementById("img");
							if(img!=null){
								CarSerieImage  im = new CarSerieImage(em.attr("title"), img.attr("src"));
								//serie.add(im);
								print( "        " +im.getTitle()+"  img = "+im.getSrc() );
								downloadPhotos(im.getSrc(), fcarserie.getAbsolutePath(), im.getTitle()+"_"+sum3+".jpg");
								println("");
							}
						}
					}
				}catch(Exception e){
					e.printStackTrace();
				}
			}
		}
		get.releaseConnection();
	}

	public String getDIR_ROOT() {
		return DIR_ROOT;
	}
	public void setDIR_ROOT(String dIR_ROOT) {
		DIR_ROOT = dIR_ROOT;
	}

	public boolean isbDownloadImage() {
		return bDownloadImage;
	}

	public void setbDownloadImage(boolean bDownloadImage) {
		this.bDownloadImage = bDownloadImage;
	}


	public boolean isbGetModelDetail() {
		return bGetModelDetail;
	}

	public void setbGetModelDetail(boolean bGetModelDetail) {
		this.bGetModelDetail = bGetModelDetail;
	}
}


评论
成就一亿技术人!
拼手气红包6.0元
还能输入1000个字符
 
红包 添加红包
表情包 插入表情
 条评论被折叠 查看
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值