还不够完美,只能下载楼主上传的图片,楼主如果是盗链的则无法识别,还有的流的开闭太频繁了.
import java.io.BufferedReader;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* 批量下载百度贴吧楼主上传的图片
* 如果图片是楼主使用链接地址则无法下载
* 这时可以重写正则匹配原则
* @author GeenkEmp01
*
*/
public class GetBaiDuTieBaPicture {
private int index = 0;
/**
* @param args
*/
public static void main(String[] args) {
GetBaiDuTieBaPicture getP = new GetBaiDuTieBaPicture();
String tieziUrl = "http://tieba.baidu.com/p/1212071711";
String imageDirectory = "E:/baiduimage/";
getP.getImageUrl(tieziUrl,imageDirectory);
}
/**
*
* @param tieziUrl
* 帖子的网址
* @param imageDirectory
* 存储下载图片的目录
*/
public void getImageUrl(String tieziUrl, String imageDirectory) {
int pn = getTotalPageNum(tieziUrl);
URL url;
Pattern p = Pattern.compile("http://imgsrc.baidu.com/forum/pic/item/[\\w,\\d]{40}.jpg");
InputStream is = null;
BufferedReader br = null;
for (int i = 1; i <= pn; i++) {
try {
url = new URL(tieziUrl+"?pn=" + i);
is = (InputStream) url.getContent();
br = new BufferedReader(new InputStreamReader(is));
String str = null;
while ((str = br.readLine()) != null) {
Matcher m = p.matcher(str);
while (m.find()) {
index++;
String imageUrl = m.group();
System.out.println(imageUrl);
System.out.println("正在下载第" + index + "张图片...");
downloadImage(imageUrl, imageDirectory + index + ".jpg");
}
}
} catch (Exception e) {
e.printStackTrace();
} finally {
try {
if (br != null)
br.close();
if (is != null)
is.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
System.out.println("共下载了" + index + "张图片");
}
/**
* 获取帖子总共页数
* @param tieziUrl
* @return
*/
public int getTotalPageNum(String tieziUrl) {
int pageNum = 1;
URL url;
Pattern p = Pattern.compile("pn=\\d*\">尾页<");
InputStream is = null;
BufferedReader br = null;
try {
url = new URL(tieziUrl);
is = (InputStream) url.getContent();
br = new BufferedReader(new InputStreamReader(is));
String str = null;
while ((str = br.readLine()) != null) {
Matcher m = p.matcher(str);
while (m.find()) {
String s = m.group();
pageNum = Integer.parseInt(s.substring(3, s.length()-5));
}
}
} catch (Exception e) {
e.printStackTrace();
} finally {
try {
if (br != null)
br.close();
if (is != null)
is.close();
} catch (IOException e) {
e.printStackTrace();
}
}
System.out.println("帖子共有"+pageNum+"页");
return pageNum;
}
/**
* 下载一张图片
* @param imageUrl
* @param saveFile
*/
public void downloadImage(String imageUrl, String saveFile) {
URL url = null;
OutputStream os = null;
InputStream is = null;
try {
url = new URL(imageUrl);
is = url.openStream();
os = new FileOutputStream(saveFile);
byte[] buff = new byte[1024];
int readed;
while ((readed = is.read(buff)) != -1) {
os.write(buff, 0, readed);
}
} catch (Exception e) {
e.getStackTrace();
} finally {
try {
if (is != null)
is.close();
if (os != null)
os.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}