java批量下载百度贴吧楼主上传的图片

本文提供了一个Java程序,用于批量下载百度贴吧帖子中楼主上传的图片。程序通过解析帖子页面内容,提取并下载图片到指定目录,同时考虑到部分图片可能为链接地址导致无法直接下载的情况。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

还不够完美,只能下载楼主上传的图片,楼主如果是盗链的则无法识别,还有的流的开闭太频繁了.

import java.io.BufferedReader;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * 批量下载百度贴吧楼主上传的图片
 * 如果图片是楼主使用链接地址则无法下载
 * 这时可以重写正则匹配原则
 * @author GeenkEmp01
 * 
 */
public class GetBaiDuTieBaPicture {

	private int index = 0;

	/**
	 * @param args
	 */
	public static void main(String[] args) {
		GetBaiDuTieBaPicture getP = new GetBaiDuTieBaPicture();
		String tieziUrl = "http://tieba.baidu.com/p/1212071711";
		String imageDirectory = "E:/baiduimage/";
		getP.getImageUrl(tieziUrl,imageDirectory);
	}

	/**
	 * 
	 * @param tieziUrl
	 *            帖子的网址
	 * @param imageDirectory
	 *            存储下载图片的目录
	 */
	public void getImageUrl(String tieziUrl, String imageDirectory) {
		
		int pn = getTotalPageNum(tieziUrl);
		URL url;
		Pattern p = Pattern.compile("http://imgsrc.baidu.com/forum/pic/item/[\\w,\\d]{40}.jpg");
		InputStream is = null;
		BufferedReader br = null;
		for (int i = 1; i <= pn; i++) {
			try {
				url = new URL(tieziUrl+"?pn=" + i);
				is = (InputStream) url.getContent();
				br = new BufferedReader(new InputStreamReader(is));
				String str = null;
				while ((str = br.readLine()) != null) {
					Matcher m = p.matcher(str);
					while (m.find()) {
						index++;
						String imageUrl = m.group();
						System.out.println(imageUrl);
						System.out.println("正在下载第" + index + "张图片...");
						downloadImage(imageUrl, imageDirectory + index + ".jpg");
					}
				}
			} catch (Exception e) {
				e.printStackTrace();
			} finally {
				try {
					if (br != null)
						br.close();
					if (is != null)
						is.close();
				} catch (IOException e) {
					e.printStackTrace();
				}
			}
		}
		System.out.println("共下载了" + index + "张图片");
	}

	/**
	 * 获取帖子总共页数
	 * @param tieziUrl
	 * @return
	 */
	public int getTotalPageNum(String tieziUrl) {
		int pageNum = 1;
		URL url;
		Pattern p = Pattern.compile("pn=\\d*\">尾页<");
		InputStream is = null;
		BufferedReader br = null;
		try {
			url = new URL(tieziUrl);
			is = (InputStream) url.getContent();
			br = new BufferedReader(new InputStreamReader(is));
			String str = null;
			while ((str = br.readLine()) != null) {
				Matcher m = p.matcher(str);
				while (m.find()) {
					String s = m.group();
					pageNum = Integer.parseInt(s.substring(3, s.length()-5));
				}
			}
		} catch (Exception e) {
			e.printStackTrace();
		} finally {
			try {
				if (br != null)
					br.close();
				if (is != null)
					is.close();
			} catch (IOException e) {
				e.printStackTrace();
			}
		}
		System.out.println("帖子共有"+pageNum+"页");
		return pageNum;
	}

	/**
	 * 下载一张图片
	 * @param imageUrl
	 * @param saveFile
	 */
	public void downloadImage(String imageUrl, String saveFile) {
		URL url = null;
		OutputStream os = null;
		InputStream is = null;
		try {
			url = new URL(imageUrl);
			is = url.openStream();
			os = new FileOutputStream(saveFile);
			byte[] buff = new byte[1024];
			int readed;
			while ((readed = is.read(buff)) != -1) {
				os.write(buff, 0, readed);
			}
		} catch (Exception e) {
			e.getStackTrace();
		} finally {
			try {
				if (is != null)
					is.close();
				if (os != null)
					os.close();
			} catch (IOException e) {
				e.printStackTrace();
			}
		}
	}
}


 

评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值