<!--
Author:Lovingshu
Date:2012/09/25
Remark:Get Source Code Of Web Pages Including Proxy Using
-->
public class WebTools {
public static void main(String[] args) {
String proxy = "proxy1.bj.petrochina";
int port = 8080;
String username = "name";
String password = "pwd";
initProxy(proxy,port,username,password);
String url="http://www.google.com";
/*System.out.println("content= "+WebTools.getHTML("http://www.baidu.com", "GB2312"));
System.out.println("网页源码:"+WebTools.getHTML("http://localhost:9001/rdfcc", "UTF-8"));*/
String[] res=WebTools.getSourceCode(url, "UTF-8");
if(res[0].equals("0")){
System.out.println("Geting Source Code Failed With:"+res[1]);
}else{
System.out.println("The Source Code Of "+url+" Is :\r\n"+res[1]);
}
}
/**
* 初始化网络代理,如果内网需要代理才能访问外网的话,那么就需要调用该方法
* @param host 代理名称
* @param port 端口号
* @param username 用户名(如果有的话)
* @param password 密码(如果有的话)
*/
public static void initProxy(String host, int port, final String username,
final String password) {
Authenticator.setDefault(new Authenticator() {
protected PasswordAuthentication getPasswordAuthentication() {
return new PasswordAuthentication(username,
new String(password).toCharArray());
}
});
System.setProperty("proxyType", "4");
System.setProperty("proxyPort", Integer.toString(port));
System.setProperty("proxyHost", host);
System.setProperty("proxySet", "true");
}
/**
* 获取网页源码
* @param pageURL 地址
* @param encoding 网页编码
* @return 长度为2的字符串数组,第一个元素表示获取是否成功,第二个元素表示成功与否返回的信息
*/
public static String[] getSourceCode(String pageURL, String encoding) {
StringBuilder pageHTML = new StringBuilder();
try {
URL url = new URL(pageURL);
HttpURLConnection connection = (HttpURLConnection) url.openConnection();
connection.setRequestProperty("User-Agent", "MSIE 7.0");
BufferedReader br = new BufferedReader(new InputStreamReader(connection.getInputStream(), encoding));
String line = null;
while ((line = br.readLine()) != null) {
pageHTML.append(line);
pageHTML.append("\r\n");
}
connection.disconnect();
} catch (Exception e) {
return new String[]{"0",e.getMessage()};
}
return new String[]{"1",pageHTML.toString()};
}
}
获取网页源码,以及内网通过代理访问外网获取
最新推荐文章于 2024-04-18 08:18:20 发布