package demo;
import java.net.URL;
import java.util.ArrayList;
import java.util.Scanner;
public class Wbe_Crawl {
public static void main(String[] args) throws Exception {
System.out.println("input ");
Scanner input = new Scanner(System.in);
String str = "http://" + input.nextLine();
Crawl(str);
}
private static void Crawl(String a) {
ArrayList<String> a1 = new ArrayList<>();// 未爬
ArrayList<String> a2 = new ArrayList<>();// 已爬
a1.add(a);
while (!a1.isEmpty() && a2.size() <=200) {
String q1 = a1.remove(0);
if (!a2.contains(q1)) {
a2.add(q1);
System.out.println("Crawl :" + q1);
}
for (String s : GetURL(q1)) {
a1.add(s);
}
}
}
private static ArrayList<String> GetURL(String q1) {
ArrayList<String> list = new ArrayList<>();
try {
URL url = new URL(q1);
Scanner input = new Scanner(url.openStream());
int c1 = 0;
while (input.hasNext()) {
String line = input.nextLine();
c1 = line.indexOf("http:", c1);
while (c1 > 0) {
int c2 = line.indexOf("\"", c1);
if (c2 > 0) {
list.add(line.substring(c1, c2));
c1 = line.indexOf("http:", c2);
} else {
c1 = -1;
}
}
}
} catch (Exception e) {
System.out.println("出错了");
}
return list;
}
}