<p>大致的内容是:</p><p>利用页面链接获得内容,再通过正则表达式匹配到图片的网络地址,最后根据图片的名称写入到根目录文件中。</p><p>接下来我们看代码。</p>
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileOutputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.*;
import java.util.HashSet;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class URLTest {
public static void main(String[] args) throws Exception {
picTest();
}
//根据获得的图片地址写入文件
public static void btyeTest(String pic) throws Exception {
URL url = new URL(pic);
InputStream is = url.openStream();
String fileName = pic.substring(pic.lastIndexOf("/") + 1);
FileOutputStream fos = new FileOutputStream(fileName);
byte[] buffer = new byte[8192];
int len = -1;
while ((len = is.read(buffer)) != -1) {
fos.write(buffer, 0, len);
}
fos.close();
}
public static void picTest() throws Exception {
URL url = new URL("http://www.moko.cc/focus/681/info.html");
InputStream is = url.openStream();
// 读取返回内容
BufferedReader br = new BufferedReader(new InputStreamReader(is));
String line = null;
StringBuffer content = new StringBuffer();
while ((line = br.readLine()) != null) {
content.append(line);
}
String html = content.toString().replaceAll("\\s+", " ");
// 找出html中的所有jpg图片的地址
Pattern p = Pattern.compile(
"https?:\\/\\/(([a-zA-Z0-9_-])+(\\.)?)*(:\\d+)?(\\/((\\.)?(\\?)?=?&?[a-zA-Z0-9_-](\\?)?)*)*\\.jpg");
Matcher m = p.matcher(html);
// 创建一个set集合,用于取出重复的图片地址
Set<String> urls = new HashSet<>();
while (m.find()) {
urls.add(m.group());
}
for (String ul : urls) {
btyeTest(ul);//根据图片地址写到根目录
}
}
}