爬虫学习(一)
抓取网页源码
代码块:
package com.spider;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
public class BaiduSourceCodeTest {
public static void main(String[] args) {
String url = "http://www.baidu.com";
String result = "";
BufferedReader in = null;
try {
URL realUrl = new URL(url);
URLConnection con = realUrl.openConnection();
con.connect();
in = new BufferedReader(new InputStreamReader(con.getInputStream()));
String line;
while((line=in.readLine()) != null) {
result = result + line;
}
} catch (MalformedURLException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} finally {
if(in != null) {
try {
in.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
System.out.println(result);
}
}