Java抓取https网页数据,解决peer not authenticated异常

在抓取https开头的网页时报错:unable to find valid certification path to requested target或者是peer not authenticated异常,原因你可能是使用jdk1.6,可以1.7试试,如果还是报错那就重新包装抓取用到HttpClient类,代码如下:

新建HttpsClient类


import java.security.cert.CertificateException;
import java.security.cert.X509Certificate;

import javax.net.ssl.SSLContext;
import javax.net.ssl.TrustManager;
import javax.net.ssl.X509TrustManager;

import org.apache.http.client.HttpClient;
import org.apache.http.conn.scheme.Scheme;
import org.apache.http.conn.scheme.SchemeRegistry;
import org.apache.http.conn.ssl.SSLSocketFactory;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.impl.conn.tsccm.ThreadSafeClientConnManager;

public class HttpsClient {
	
	public static DefaultHttpClient getNewHttpsClient(HttpClient httpClient){

		try {
			SSLContext ctx = SSLContext.getInstance("TLS");
			X509TrustManager tm = new X509TrustManager() {
				public X509Certificate[] getAcceptedIssuers() {
					return null;
				}

				public void checkClientTrusted(X509Certificate[] arg0,
						String arg1) throws CertificateException {
				}

				public void checkServerTrusted(X509Certificate[] arg0,
						String arg1) throws CertificateException {
				}
			};
			ctx.init(null, new TrustManager[] { tm }, null);
			SSLSocketFactory ssf = new SSLSocketFactory(ctx,SSLSocketFactory.ALLOW_ALL_HOSTNAME_VERIFIER);
			SchemeRegistry registry = new SchemeRegistry();
			registry.register(new Scheme("https", 443, ssf));
			ThreadSafeClientConnManager mgr = new ThreadSafeClientConnManager(registry);
			return new DefaultHttpClient(mgr, httpClient.getParams());
		} catch (Exception ex) {
			ex.printStackTrace();
			return null;
		}
	
	}
}

 

在抓取之前重新获取httpClient类(httpClient = HttpsClient.getNewHttpsClient(httpClient);)

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.URL;
import java.net.URLConnection;

import org.apache.commons.httpclient.HttpStatus;
import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.util.EntityUtils;
public class Test {

	public static void main(String[] args) {
		String url ="https://crs.edqm.eu/db/4DCGI/search?vSelectName=2&vContains=1&vtUserName=a&OK=Search&vTypeCRS=";
		String html = getPageHtml(url);
		System.out.println(html);
	}

	
	/**
	 * 获取网页html
	 */
	public static String getPageHtml(String currentUrl) {
		HttpClient httpClient=new DefaultHttpClient();
		httpClient = HttpsClient.getNewHttpsClient(httpClient);
		String html = "";
		HttpGet request = new HttpGet(currentUrl);  
		HttpResponse response = null;
		 try {
			 response = httpClient.execute(request);
             if(response.getStatusLine().getStatusCode() == HttpStatus.SC_OK){
            	HttpEntity mEntity = response.getEntity();
                html = EntityUtils.toString(mEntity);
             }
         }catch(IOException e){
        	 e.printStackTrace();
         }
         return html.toString();
     }
}

 

使用的jar:

commons-httpclient-3.1.jar

commons-logging.jar

httpclient-4.2.5.jar

httpcore-4.2.4.jar


以上代码使用jdk1.7测试通过。

源码和jar已上传http://download.youkuaiyun.com/detail/itjavaer/8172293,导入eclipse中就能运行。

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值