import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.Date;
public class URLFile {
/**
* 根据URL从网页上获取资料
* @param urlPath 链接地址http://开头
* @return
*/
public static String getResponseDataFromWeb(String urlPath) {
StringBuffer buffer = new StringBuffer();
HttpURLConnection httpUrlConn = null;
HttpURLConnection isexit = null;
InputStream inputStream = null;
BufferedReader bufferedReader = null;
InputStreamReader inputStreamReader = null;
int state = 0;
try {
URL u = new URL(urlPath);
isexit = (HttpURLConnection) u.openConnection();
state = isexit.getResponseCode();
isexit.disconnect();
if (state == 404) {
return "404";
}
URL url = new URL(urlPath);
httpUrlConn = (HttpURLConnection) url.openConnection();
httpUrlConn.setDoOutput(false);
httpUrlConn.setDoInput(true);
httpUrlConn.setUseCaches(false);
httpUrlConn.setRequestMethod("GET");
httpUrlConn.connect();
// 将返回的输入流转换成字符串
inputStream = httpUrlConn.getInputStream();
inputStreamReader = new InputStreamReader(inputStream, "utf-8");
bufferedReader = new BufferedReader(inputStreamReader);
String str = null;
while ((str = bufferedReader.readLine()) != null) {
buffer.append(str);
}
} catch (Exception e) {
e.printStackTrace();
return "404";
} finally {
try {
if (state != 404) {
bufferedReader.close();
bufferedReader = null;
inputStreamReader.close();
// 释放资源
inputStream.close();
inputStream = null;
inputStreamReader = null;
httpUrlConn.disconnect();
}
} catch (Exception e) {
e.printStackTrace();
}
}
return buffer.toString();
}
/**
* 打开链接判断用时
* @param urlPath 链接地址http://开头
* @return
*/
public static String doTimer(String urlPath) {
StringBuffer buffer = new StringBuffer();
HttpURLConnection httpUrlConn = null;
int state = 0;
long useTime=0;
try {
URL url = new URL(urlPath);
Date d1=new Date();
httpUrlConn = (HttpURLConnection) url.openConnection();
httpUrlConn.setDoOutput(false);
httpUrlConn.setDoInput(true);
httpUrlConn.setUseCaches(false);
httpUrlConn.setRequestMethod("GET");
state = httpUrlConn.getResponseCode();
if (state == 404) {
buffer.append("链接错误404:"+urlPath);
return buffer.toString();
}
httpUrlConn.connect();
Date d2=new Date();
useTime=d2.getTime()-d1.getTime();
buffer.append("任务完成:"+urlPath+",用时:"+useTime/1000+"秒");
} catch (Exception e) {
e.printStackTrace();
buffer.append("系统异常:"+e.getMessage());
return buffer.toString();
} finally {
try {
if (state != 404) {
httpUrlConn.disconnect();
}
} catch (Exception e) {
e.printStackTrace();
}
}
return buffer.toString();
}
public static void main(String[] args) {
String url="http://www.baidu.com";
URLFile uf=new URLFile();
System.out.println(uf.doTimer(url));
}
}
作为一个屌丝程序员不得不收藏的工具类 一 网站爬虫工具类
最新推荐文章于 2025-08-18 11:29:28 发布