代码功能实现,输入一个简写的url地址,得到此页面上所有的超级链接。
从原理上看,把后台输入的简写url加工,转换成java.net.URL类型。使用poenStream()将资源数据转化成数据流,在网络资源和URL对象之间建立一条信道链路。从HTML角度来说,就是查该主页上包含多少个HREF标记。
package net;
import java.net.*;
import java.io.*;
class URL_PageScan
{
public static void main(String args[])
{
System.out.println("Working..");
//if(args.length!=1)
//{
System.out.println("输入一个URL地址后回车,如http://java.sun.com/index.html");
// System.exit(0);
//}
URL ul = null;
char c;
StringBuffer buf = new StringBuffer();
try
{
while((c = (char)System.in.read()) != ' ')
{
buf.append(c);
}
buf.insert(buf.length() - 1, "/index.html");
System.out.println("URL_3:" + buf.toString());
ul = new URL("http://" + buf.toString());
System.out.println("URL:" + ul);
}
catch(Exception e)
{
System.out.println("Bad URL:" + ul);
}
StringBuffer page = new StringBuffer();
try
{
InputStream in = ul.openStream();
BufferedReader reader = new BufferedReader(new InputStreamReader(in));
int data;
while((data = reader.read()) != -1)
page.append((char)data);
}
catch(IOException e)
{
e.printStackTrace();
}
//计算"HREF"出现的次数
int refCount = 0;
for(int i = 4; i < page.length(); i ++ )
{
if(page.substring(i - 4, i).equalsIgnoreCase("HREF"))
refCount ++ ;
}
System.out.println("pages referenced:" + refCount);
}
}
import java.net.*;
import java.io.*;
class URL_PageScan
{
public static void main(String args[])
{
System.out.println("Working..");
//if(args.length!=1)
//{
System.out.println("输入一个URL地址后回车,如http://java.sun.com/index.html");
// System.exit(0);
//}
URL ul = null;
char c;
StringBuffer buf = new StringBuffer();
try
{
while((c = (char)System.in.read()) != ' ')
{
buf.append(c);
}
buf.insert(buf.length() - 1, "/index.html");
System.out.println("URL_3:" + buf.toString());
ul = new URL("http://" + buf.toString());
System.out.println("URL:" + ul);
}
catch(Exception e)
{
System.out.println("Bad URL:" + ul);
}
StringBuffer page = new StringBuffer();
try
{
InputStream in = ul.openStream();
BufferedReader reader = new BufferedReader(new InputStreamReader(in));
int data;
while((data = reader.read()) != -1)
page.append((char)data);
}
catch(IOException e)
{
e.printStackTrace();
}
//计算"HREF"出现的次数
int refCount = 0;
for(int i = 4; i < page.length(); i ++ )
{
if(page.substring(i - 4, i).equalsIgnoreCase("HREF"))
refCount ++ ;
}
System.out.println("pages referenced:" + refCount);
}
}