HttpClient发送URL

本文介绍了一种利用Apache HttpClient库实现网页抓取的方法。通过示例代码展示了如何配置HttpClient对象,设置请求头,发送GET和POST请求,并解析响应内容。特别关注了处理HTTP状态码及异常情况,确保了请求的稳定性和准确性。

package com.leiwang.HttpClient;

import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.util.HashMap;
import java.util.Map;
import java.util.zip.GZIPInputStream;

import org.apache.commons.httpclient.Header;
import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.HttpMethod;
import org.apache.commons.httpclient.NameValuePair;
import org.apache.commons.httpclient.cookie.CookiePolicy;
import org.apache.commons.httpclient.methods.GetMethod;
import org.apache.commons.httpclient.methods.PostMethod;


public class httpClient {
    private static Map<Thread, HttpClient> clients = new HashMap<Thread, HttpClient>();

    public static void clean() {
        clients.clear();
    }
   
    public static String postPage(String url, String enc, Map<String, String> params) throws Exception {
        System.out.println(url);
        PostMethod method = new PostMethod(url);
        method.getParams().setContentCharset(enc);
        setHeaders(method);
        NameValuePair[] pairs = new NameValuePair[params.size()];
        int i = 0;
        for (Map.Entry<String, String> entry : params.entrySet()) {
            pairs[i++] = new NameValuePair(entry.getKey(), entry.getValue());
        }
        method.setRequestBody(pairs);
        return tryResponse(method, enc);
    }
   


    public static String getPage(String url, String enc) throws Exception {
        //System.out.println("sss" + url);
        GetMethod method;
        int idx = url.indexOf('?');
        if(idx==-1) {
            method = new GetMethod(url);
        } else {
            method = new GetMethod(url.substring(0, idx));
            method.setQueryString(url.substring(idx + 1));
        }
        setHeaders(method);
        return tryResponse(method, enc);
    }

    private static String tryResponse(final HttpMethod method, final String enc) throws Exception {
        HttpClient hc = clients.get(Thread.currentThread());
        if(hc==null) {
            clients.put(Thread.currentThread(), hc = new HttpClient());
            hc.getHttpConnectionManager().getParams().setSoTimeout(30000);
            hc.getHttpConnectionManager().getParams().setConnectionTimeout(30000);
            hc.getParams().setSoTimeout(30000);
            hc.getParams().setConnectionManagerTimeout(30000);
        }
        final String[] s = new String[1];
        final HttpClient fhc = hc;
        final Exception[] fe = new Exception[1];
        Thread thread = new Thread() {
            public void run() {
                while(true) {
                    try {
                        fhc.executeMethod(method);
                        if(method.getStatusCode()==500) throw new Exception("status code: 500");
                        String ss = getResponse(method, enc);
//                        System.out.println(method.getStatusText());
                        if(ss.length()==0) throw new Exception("zero length response text");
                        method.releaseConnection();
                        s[0] =  ss;
                        synchronized (fhc) {
                            fhc.notify();
                        }
                        return;
                    } catch (Exception e) {
                        fe[0] = e;
                        try {
                            Thread.sleep(60*1000);
                        } catch (InterruptedException e1) {
                            e1.printStackTrace();
                        }
                    } finally {
                        method.releaseConnection();
                    }
                }
            }
        };
        thread.start();
        synchronized (fhc) {
            fhc.wait(1200000);
        }
        thread.interrupt();
        thread.stop();
        if(s[0]==null) throw new Exception("fail at last: " + method.getURI(), fe[0]);
        return s[0];
    }

    private static void setHeaders(HttpMethod method) {
        method.getParams().setCookiePolicy(CookiePolicy.RFC_2965);
        method.setRequestHeader("User-Agent", "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.8.1.14) Gecko/20080404 Firefox/2.0.0.14");
        method.setRequestHeader("Accept", "text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5");
        method.setRequestHeader("Accept-Language", "zh-cn,zh;q=0.5");
        method.setRequestHeader("Accept-Encoding", "gzip,deflate");
        method.setRequestHeader("Accept-Charset", "gb2312,utf-8;q=0.7,*;q=0.7");
        method.setRequestHeader("Keep-Alive", "300");
        method.setRequestHeader("Connection", "Keep-Alive");
    }

    private static String getResponse(HttpMethod method, String enc) throws IOException {
        Header renc = method.getResponseHeader("Content-Encoding");
        InputStream is;
        if(renc!=null && renc.getValue().equals("gzip")) {
            is = new GZIPInputStream(method.getResponseBodyAsStream());
        } else {
            is = method.getResponseBodyAsStream();
        }
        Reader reader = new InputStreamReader(is, enc);
        char[] buf = new char[1024];
        StringBuilder sb = new StringBuilder();
        int rd;
        while ((rd = reader.read(buf)) != -1) {
            sb.append(buf, 0, rd);
        }
        return sb.toString();
    }
   
    public static void main(String[] args){
     
     try {
      String str = "";
      for(int i=0;i<10;i++){
       getPage("http://localhost:8080/wleing/myhttpclient.jsp","GBk");
       System.out.println(".................." + str);
      }
   //System.out.println(".................." + str);
  } catch (Exception e) {
   e.printStackTrace();
  }
     
    }
}

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值