问题不少,大家看看 提提意见啊!!! :oops:
package test;
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.net.URL;
import java.net.URLConnection;
import java.util.ArrayList;
import java.util.List;
public class readBaidu {
public List readHTML(String s) throws Exception {
List list = new ArrayList();
while(s.indexOf("<table border=\"0\" cellpadding=\"0\" cellspacing=\"0\"><tr><td class=f>")>0){
baiduBean bb = new baiduBean();
int start = s.indexOf("<table border=\"0\" cellpadding=\"0\" cellspacing=\"0\">");
int end = s.indexOf("</table>", start)+8;
String content = s.substring(start,end);
bb.setContents(this.getShow(content.replace(" ", "")));
bb.setUrl(this.getURL(content.replace(" ", "")));
bb.setTitle(this.getTitle(content.replace(" ", "")));
s = s.replace(content, "");
list.add(bb);
}
return list;
}
public String getContents(String path)throws Exception{
StringBuffer contents = new StringBuffer();
URL url = new URL(path);
URLConnection uc = url.openConnection();
BufferedReader bf = new BufferedReader(new InputStreamReader(uc.getInputStream()));
String nowLine = "";
while((nowLine=bf.readLine())!=null){
contents.append(nowLine);
}
return contents.toString();
}
public String getURL(String con){
String url = "";
int srartURL = con.indexOf("href=\"")+6;
int endURL = con.indexOf("target=", srartURL)-1;
url = con.substring(srartURL,endURL);
return url;
}
public String getTitle(String con){
String title = "";
int startCon = con.indexOf("<fontsize=")+14;
int endCon = con.indexOf("</a>",startCon);
title = con.substring(startCon,endCon);
title = title.replace("fontcolor", "font color");
return title;
}
public String getShow(String con){
String contents = "";
String br = "";
int startBR = con.indexOf("<br>")+17;
int endBR = con.indexOf("<br>",startBR);
br = con.substring(startBR,endBR);
br = br.replace("fontcolor", "font color");
return br;
}
public static void main(String[] args)throws Exception{
String path = "http://www.baidu.com/s?wd=csdn&pn=10";
readBaidu r = new readBaidu();
String rest = r.getContents(path);
List list = r.readHTML(rest);
for (int i = 0; i < list.size(); i++) {
baiduBean b = (baiduBean)list.get(i);
System.out.println(b.url);
System.out.println(b.title);
System.out.println(b.contents);
}
}
}
package test;
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.net.URL;
import java.net.URLConnection;
import java.util.ArrayList;
import java.util.List;
public class readBaidu {
public List readHTML(String s) throws Exception {
List list = new ArrayList();
while(s.indexOf("<table border=\"0\" cellpadding=\"0\" cellspacing=\"0\"><tr><td class=f>")>0){
baiduBean bb = new baiduBean();
int start = s.indexOf("<table border=\"0\" cellpadding=\"0\" cellspacing=\"0\">");
int end = s.indexOf("</table>", start)+8;
String content = s.substring(start,end);
bb.setContents(this.getShow(content.replace(" ", "")));
bb.setUrl(this.getURL(content.replace(" ", "")));
bb.setTitle(this.getTitle(content.replace(" ", "")));
s = s.replace(content, "");
list.add(bb);
}
return list;
}
public String getContents(String path)throws Exception{
StringBuffer contents = new StringBuffer();
URL url = new URL(path);
URLConnection uc = url.openConnection();
BufferedReader bf = new BufferedReader(new InputStreamReader(uc.getInputStream()));
String nowLine = "";
while((nowLine=bf.readLine())!=null){
contents.append(nowLine);
}
return contents.toString();
}
public String getURL(String con){
String url = "";
int srartURL = con.indexOf("href=\"")+6;
int endURL = con.indexOf("target=", srartURL)-1;
url = con.substring(srartURL,endURL);
return url;
}
public String getTitle(String con){
String title = "";
int startCon = con.indexOf("<fontsize=")+14;
int endCon = con.indexOf("</a>",startCon);
title = con.substring(startCon,endCon);
title = title.replace("fontcolor", "font color");
return title;
}
public String getShow(String con){
String contents = "";
String br = "";
int startBR = con.indexOf("<br>")+17;
int endBR = con.indexOf("<br>",startBR);
br = con.substring(startBR,endBR);
br = br.replace("fontcolor", "font color");
return br;
}
public static void main(String[] args)throws Exception{
String path = "http://www.baidu.com/s?wd=csdn&pn=10";
readBaidu r = new readBaidu();
String rest = r.getContents(path);
List list = r.readHTML(rest);
for (int i = 0; i < list.size(); i++) {
baiduBean b = (baiduBean)list.get(i);
System.out.println(b.url);
System.out.println(b.title);
System.out.println(b.contents);
}
}
}