java的爬行器代码我已经修改完成。
程序运行效果图:

程序的输出效果图片:

代码
import
java.applet.Applet;
import
java.awt.
*
;
import
java.awt.List;
import
java.awt.event.
*
;
import
java.util.
*
;
import
java.net.
*
;
import
java.io.
*
;


public
class
WebCrawler
extends
Applet
implements
ActionListener, Runnable
...
{
public static final String SEARCH = "Search";

public static final String STOP = "Stop";

public static final String DISALLOW = "Disallow:";

public static final int SEARCH_LIMIT = 50;

Panel panelMain;

List listMatches;

Label labelStatus;

// URLs to be searched
Vector vectorToSearch;

// URLs already searched
Vector vectorSearched;

// URLs which match
Vector vectorMatches;

Thread searchThread;

TextField textURL;

Choice choiceType;


public void init() ...{

// set up the main UI panel
panelMain = new Panel();
panelMain.setLayout(new BorderLayout(5, 5));

// text entry components
Panel panelEntry = new Panel();
panelEntry.setLayout(new BorderLayout(5, 5));

Panel panelURL = new Panel();
panelURL.setLayout(new FlowLayout(FlowLayout.LEFT, 5, 5));
Label labelURL = new Label("Starting URL: ", Label.RIGHT);
panelURL.add(labelURL);
textURL = new TextField("", 40);
panelURL.add(textURL);
panelEntry.add("North", panelURL);

Panel panelType = new Panel();
panelType.setLayout(new FlowLayout(FlowLayout.LEFT, 5, 5));
Label labelType = new Label("Content type: ", Label.RIGHT);
panelType.add(labelType);
choiceType = new Choice();
choiceType.addItem("text/html");
choiceType.addItem("audio/basic");
choiceType.addItem("audio/au");
choiceType.addItem("audio/aiff");
choiceType.addItem("audio/wav");
choiceType.addItem("video/mpeg");
choiceType.addItem("video/x-avi");
panelType.add(choiceType);
panelEntry.add("South", panelType);

panelMain.add("North", panelEntry);

// list of result URLs
Panel panelListButtons = new Panel();
panelListButtons.setLayout(new BorderLayout(5, 5));

Panel panelList = new Panel();
panelList.setLayout(new BorderLayout(5, 5));
Label labelResults = new Label("Search results");
panelList.add("North", labelResults);
Panel panelListCurrent = new Panel();
panelListCurrent.setLayout(new BorderLayout(5, 5));
listMatches = new List(10);
panelListCurrent.add("North", listMatches);
labelStatus = new Label("");
panelListCurrent.add("South", labelStatus);
panelList.add("South", panelListCurrent);

panelListButtons.add("North", panelList);

// control buttons
Panel panelButtons = new Panel();
Button buttonSearch = new Button(SEARCH);
buttonSearch.addActionListener(this);
panelButtons.add(buttonSearch);
Button buttonStop = new Button(STOP);
buttonStop.addActionListener(this);
panelButtons.add(buttonStop);

panelListButtons.add("South", panelButtons);

panelMain.add("South", panelListButtons);

add(panelMain);
setVisible(true);

repaint();

// initialize search data structures
vectorToSearch = new Vector();
vectorSearched = new Vector();
vectorMatches = new Vector();

// set default for URL access
URLConnection.setDefaultAllowUserInteraction(false);
}

/**//*public String createFolder(String folderPath) {
String txt = folderPath;
try {
java.io.File myFilePath = new java.io.File(txt);
txt = folderPath;
if (!myFilePath.exists()) {
myFilePath.mkdir();
}
}
catch (Exception e) {
setStatus ("创建目录操作出错");
}
return txt;
}*/

public void createFile(String filePathAndName, String fileContent) ...{//以Xml文件格式保存网页

try ...{
String filePath = filePathAndName;
filePath = filePath.toString();
File myFilePath = new File(filePath);

if (!myFilePath.exists()) ...{
myFilePath.createNewFile();
}
FileWriter resultFile = new FileWriter(myFilePath);
PrintWriter myFile = new PrintWriter(resultFile);
String strContent = fileContent;
myFile.print(strContent);
myFile.close();
resultFile.close();
}

catch (Exception e) ...{
setStatus("创建文件操作出错");
}
}




public void start() ...{
}


public void stop() ...{

if (searchThread != null) ...{
setStatus("stopping...");
searchThread = null;
}
}


public void destroy() ...{
}


boolean robotSafe(URL url) ...{
String strHost = url.getHost();
// form URL of the robots.txt file
String strRobot = "http://" + strHost ;setStatus(strRobot);//String strRobot = "http://" + strHost + "robots.txt"
URL urlRobot;

try ...{
urlRobot = new URL(strRobot);

} catch (MalformedURLException e) ...{
// something weird is happening, so don't trust it
return false;
}

String strCommands;

try ...{
InputStream urlRobotStream = urlRobot.openStream();

// read in entire file
byte b[] = new byte[1000];
int numRead = urlRobotStream.read(b);
strCommands = new String(b, 0, numRead);

while (numRead != -1) ...{
if (Thread.currentThread() != searchThread)
break;
numRead = urlRobotStream.read(b);

if (numRead != -1) ...{
String newCommands = new String(b, 0, numRead);
strCommands += newCommands;
}
}
urlRobotStream.close();

} catch (IOException e) ...{
// if there is no robots.txt file, it is OK to search
return true;
}

// assume that this robots.txt refers to us and
// search for "Disallow:" commands.
String strURL = url.getFile();
int index = 0;

while ((index = strCommands.indexOf(DISALLOW, index)) != -1) ...{
index += DISALLOW.length();
String strPath = strCommands.substring(index);
StringTokenizer st = new StringTokenizer(strPath);

if (!st.hasMoreTokens())
break;

String strBadPath = st.nextToken();

// if the URL starts with a disallowed path, it is not safe
if (strURL.indexOf(strBadPath) == 0)
return false;
}

return true;
}


public void paint(Graphics g) ...{
//Draw a Rectangle around the applet's display area.
g.drawRect(0, 0, getSize().width - 1, getSize().height - 1);

panelMain.paint(g);
panelMain.paintComponents(g);
// update(g);
// panelMain.update(g);
}


public void run() ...{
String strURL = textURL.getText();
String strTargetType = choiceType.getSelectedItem();
int numberSearched = 0;
int numberFound = 0;


if (strURL.length() == 0) ...{
setStatus("ERROR: must enter a starting URL");
return;
}

// initialize search data structures
vectorToSearch.removeAllElements();
vectorSearched.removeAllElements();
vectorMatches.removeAllElements();
listMatches.removeAll();

vectorToSearch.addElement(strURL);
int FileNum = 0;
String FileName = "Xml-Photo";
String FileDir = "D://Html-Saves//";
while ((vectorToSearch.size() > 0)

&& (Thread.currentThread() == searchThread)) ...{
// get the first element from the to be searched list
FileNum++;
strURL = (String) vectorToSearch.elementAt(0);

setStatus("searching " + strURL);

URL url;

try ...{
url = new URL(strURL);

} catch (MalformedURLException e) ...{
setStatus("ERROR: invalid URL " + strURL);
break;
}

// mark the URL as searched (we want this one way or the other)
vectorToSearch.removeElementAt(0);
vectorSearched.addElement(strURL);

// can only search http: protocol URLs
if (url.getProtocol().compareTo("http") != 0)
break;

// test to make sure it is before searching
if (!robotSafe(url))
break;


try ...{
// try opening the URL
URLConnection urlConnection = url.openConnection();

urlConnection.setAllowUserInteraction(false);

InputStream urlStream = url.openStream();
String type = URLConnection.guessContentTypeFromStream(urlStream);
type = "text/html";
//if (type == null)
// break;
if (type.compareTo("text/html") != 0)
break;

// search the input stream for links
// first, read in the entire URL
byte b[] = new byte[1000];
int numRead = urlStream.read(b);
String content = new String(b, 0, numRead);

while (numRead != -1) ...{
if (Thread.currentThread() != searchThread)
break;
numRead = urlStream.read(b);

if (numRead != -1) ...{
String newContent = new String(b, 0, numRead);
content += newContent;
}
}
urlStream.close();
createFile(FileDir+FileName+FileNum+".xml",content);

if (Thread.currentThread() != searchThread)
break;

String lowerCaseContent = content.toLowerCase();

int index = 0;

while ((index = lowerCaseContent.indexOf("<a", index)) != -1) ...{
if ((index = lowerCaseContent.indexOf("href", index)) == -1)
break;
if ((index = lowerCaseContent.indexOf("=", index)) == -1)
break;

if (Thread.currentThread() != searchThread)
break;

index++;
String remaining = content.substring(index);

StringTokenizer st = new StringTokenizer(remaining,
" ">#");
String strLink = st.nextToken();

URL urlLink;

try ...{
urlLink = new URL(url, strLink);
strLink = urlLink.toString();

} catch (MalformedURLException e) ...{
setStatus("ERROR: bad URL " + strLink);
continue;
}

// only look at http links
if (urlLink.getProtocol().compareTo("http") != 0)
break;

if (Thread.currentThread() != searchThread)
break;


try ...{
// try opening the URL
URLConnection urlLinkConnection = urlLink
.openConnection();
urlLinkConnection.setAllowUserInteraction(false);
InputStream linkStream = urlLink.openStream();
String strType = URLConnection.guessContentTypeFromStream(linkStream);
linkStream.close();
strType = "text/html";
// if another page, add to the end of search list
//if (strType == null)
// break;

if (strType.compareTo("text/html") == 0) ...{
// check to see if this URL has already been
// searched or is going to be searched
if ((!vectorSearched.contains(strLink))

&& (!vectorToSearch.contains(strLink))) ...{

// test to make sure it is robot-safe!
if (robotSafe(urlLink))
vectorToSearch.addElement(strLink);
}
}

// if the proper type, add it to the results list
// unless we have already seen it

if (strType.compareTo(strTargetType) == 0) ...{

if (vectorMatches.contains(strLink) == false) ...{
listMatches.add(strLink);
vectorMatches.addElement(strLink);
numberFound++;
if (numberFound >= SEARCH_LIMIT)
break;
}
}

} catch (IOException e) ...{
setStatus("ERROR: couldn't open URL " + strLink);
continue;
}
}

} catch (IOException e) ...{
setStatus("ERROR: couldn't open URL " + strURL);
break;
}

numberSearched++;
if (numberSearched >= SEARCH_LIMIT)
break;
}

if (numberSearched >= SEARCH_LIMIT || numberFound >= SEARCH_LIMIT)
setStatus("reached search limit of " + SEARCH_LIMIT);
else
setStatus("done");
searchThread = null;
// searchThread.stop();
}


void setStatus(String status) ...{
labelStatus.setText(status);
}


public void actionPerformed(ActionEvent event) ...{
String command = event.getActionCommand();


if (command.compareTo(SEARCH) == 0) ...{
setStatus("searching...");

// launch a thread to do the search

if (searchThread == null) ...{
searchThread = new Thread(this);
}
searchThread.start();

} else if (command.compareTo(STOP) == 0) ...{
stop();
}
}


public static void main(String argv[]) ...{
Frame f = new Frame("WebFrame");
WebCrawler applet = new WebCrawler();
f.add("Center", applet);


/**//*
* Behind a firewall set your proxy and port here!
*/
Properties props = new Properties(System.getProperties());
props.put("http.proxySet", "true");
props.put("http.proxyHost", "webcache-cup");
props.put("http.proxyPort", "80");

Properties newprops = new Properties(props);
System.setProperties(newprops);

/** *//**/

applet.init();
applet.start();
f.pack();
f.show();
}

}