抓取canobd2网站数据

本文介绍了一个使用Java实现的HTTP客户端示例,通过该客户端可以发送POST请求并获取响应结果。此外,还展示了如何利用此客户端爬取特定网站上关于不同品牌及年份汽车的诊断接口位置信息,并将抓取到的数据保存到文件中以便后续处理。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

/* 
 * Created on 2009-10-12 
 * Copyright 2009 by www.xfok.net. All Rights Reserved 
 * 
 */
package test;

import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.Date;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import org.apache.commons.httpclient.Header;
import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.HttpStatus;
import org.apache.commons.httpclient.NameValuePair;
import org.apache.commons.httpclient.methods.PostMethod;

/**
 * @author YangHua 转载请注明出处:http://www.xfok.net/2009/10/124488.html
 */
public class MyClient {
 /**
  * 的到Http请求结果
  *
  * @param url请求地址
  * @param parms请求参数
  * @return
  */
 public byte[] getBody(String url, Map parms) {
  byte[] body = null;
  // 构造HttpClient的实例
  HttpClient httpClient = new HttpClient();
  // 创建post方法的实例
  PostMethod postMethod = new PostMethod(url);
  // 填入各个表单域的值
  NameValuePair[] data = new NameValuePair[parms.keySet().size()];
  Iterator it = parms.entrySet().iterator();
  int i = 0;
  while (it.hasNext()) {
   Map.Entry entry = (Map.Entry) it.next();
   Object key = entry.getKey();
   Object value = entry.getValue();
   data[i] = new NameValuePair(key.toString(), value.toString());
   i++;
  }

  // 将表单的值放入postMethod中
  postMethod.setRequestBody(data);
  try {
   // 执行postMethod
   int statusCode = httpClient.executeMethod(postMethod); // HttpClient对于要求接受后继服务的请求,象POST和PUT等不能自动处理转发
   // 301或者302
   if (statusCode == HttpStatus.SC_MOVED_PERMANENTLY
     || statusCode == HttpStatus.SC_MOVED_TEMPORARILY) {
    // 从头中取出转向的地址
    Header locationHeader = postMethod
      .getResponseHeader("location");
    String location = null;
    if (locationHeader != null) {
     location = locationHeader.getValue();
     System.out
       .println("The page was redirected to:" + location);
    } else {
     System.err.println("Location field value is null.");
    }
   }
   body = postMethod.getResponseBody();

  } catch (Exception e) {
   e.printStackTrace();
  }
  return body;
 }

 public static void main(String[] args) {

  String url = "http://www.canobd2.com/TechInfo/DlcLocator.aspx";
  // 数组:所有车系
  String[] makes = { "Acura", "Audi", "BMW", "Buick", "Cadillac",
    "Chevrolet", "Chrysler", "Daewoo", "Dodge", "Eagle", "Ford",
    "Geo", "GMC", "Honda", "Hummer", "Hyundai", "Infiniti",
    "Isuzu", "Jaguar", "Jeep", "Kia", "Land Rover", "Lexus",
    "Lincoln", "Mazda", "Mercedes-Benz", "Mercury", "MINI",
    "Mitsubishi", "Nissan", "Oldsmobile", "Plymouth", "Pontiac",
    "Porsche", "Saab", "Saturn", "Scion", "Smart", "Subaru",
    "Suzuki", "Toyota", "Volkswagen", "Volvo" };
  // 数组:所有年款
  String[] years = { "2010", "2009", "2008", "2007", "2006", "2005",
    "2004", "2003", "2002", "2001", "2000", "1999", "1998", "1997",
    "1996" };

  Map parms1 = new HashMap();
  MyClient client = new MyClient();
  parms1.put("submitButton", "Submit");
  parms1.put("__EVENTARGUMENT", "");
  parms1.put("__sx", "0");
  parms1
    .put(
      "__VIEWSTATE",
      "/wEPDwUKMTQzMzU1MTQxOA8WBB4OX19QcmV2aW91c1BhZ2VlHgtfX1JldHVyblVybAUZL1RlY2hJbmZvL0RsY0xvY2F0b3IuYXNweBYCAgEPZBYIZg9kFgZmDxYCHgVzdHlsZQUNZGlzcGxheTpub25lO2QCAg8WAh4HVmlzaWJsZWdkAgMPFgIfA2hkAgIPFCsAAWRkAgMPFCsAAWRkAgQPFCsAAWRkZLivoIUw0bAn8SSzCNO/mKrSXsQ/");
  parms1.put("__sy", "167");
  FileOutputStream fos = null;
  PrintWriter pw = null;
  // 将拼装的数据写到文件中去,方便批量插入数据库
  try {
   try {
    File file = new File("D://dlc.txt");
    fos = new FileOutputStream(file);
    pw = new PrintWriter(fos);
    Date startTime = new Date();
    for (int i = 0; i < makes.length; i++) {
     parms1.put("dlcMake", makes[i]);
     for (int j = 0; j < years.length; j++) {
      parms1.put("dlcYear", years[j]);
      parms1.put("__EVENTTARGET", "dlcYear");
      // 根据车系、年款查出车型
      String result2 = new String(client.getBody(url, parms1));
      // 处理result2数据,提炼出车型数组
      // 数据格式形如:1window['dlcModel_jsobject'].UpdateOptions(new
      // Array({Value:'',Text:'Select...',Selected:'true'}));
      // 取Value:''里的值
      //System.out.println("result2: " + result2);
      String[] result2split = result2.split("\\{");// 以“{”分割
      // 以“{”分割的字符串的个数
      int length = result2split.length;
      int firstIndex = 0;
      int secondIndex = 0;
      int thirdIndex = 0;
      // 根据数据格式,以”{“分割后,第二个Value为空,从分割后的第三个字符串开始
      if (length > 2) {
       String[] models = new String[length - 2];
       for (int k = 2; k < length; k++) {
        firstIndex = result2split[k].indexOf("Value:'");
        secondIndex = firstIndex + 7;
        thirdIndex = result2split[k].indexOf("'",
          secondIndex);
        models[k - 2] = result2split[k].substring(
          secondIndex, thirdIndex);
//        System.out.println("make1: " + makes[i]
//          + ", year1: " + years[j] + ", model1: "
//          + models[k - 2]);
       }
       parms1.put("__EVENTTARGET", "ajaxSubmitButton");
       parms1.put("__sy", "204");
       for (int m = 0; m < models.length; m++) {
        parms1.put("dlcModel", models[m]);
        // 根据车系、年款、车型查汽车诊断座位置
        String result3 = new String(client.getBody(url,
          parms1));
//        System.out.println("make: " + makes[i]
//          + ", year: " + years[j] + ", model: "
//          + models[m]);
        //System.out.println("result3: " + result3);
        String result3split[] = result3.split("\"");
        // for(String str:result3split)
        // System.out.println(str);
//        System.out.println(makes[i] + "@" + years[j]
//          + "@" + models[m] + "@"
//          + result3split[5] + "@"
//          + result3split[7] + "@"
//          + result3split[9] + "@"
//          + result3split[11]);// 6 8 10 12
        String ret =  models[m] + "@" + models[m] + "@" +"15" + "@"
          + makes[i] + "@" +years[j]
          + "@" + result3split[7] + "@"
          + result3split[9] + "@"
          + result3split[11]+"@!";
        pw.println(ret);
        pw.flush();
       }
      }
     }
    }
    Date endTime = new Date();
    long time = endTime.getTime() -startTime.getTime();
    
    System.out.println("------------------get data over--------------used time:"+time);
   } finally {
    if (fos != null) {
     fos.close();
    }
    if (pw != null) {
     pw.close();
    }
   }
  } catch (Exception e) {
   // TODO: handle exception
  }
 }
}

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值