利用java代码自动下载嗨学网的学习视频

本文介绍了一款用于从嗨学网下载视频的学习辅助工具。该工具使用Java编写,通过分析网页结构抓取视频链接,并实现自动登录及视频下载功能。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

需求:

将嗨学网上所有需要学习的全部视频下载下来并保存为合适的名字。


由于对前端知识不太了解,完成过程比较困难。

关键点分析:

1.拿到下载视频的链接

第一想法是分析网页源码得到组装链接的逻辑,然后用java代码组装,但是网页源代码太复杂,而且对前端实在是不熟,最终采用了一个投机取巧的方法:在点击下载的同时用截屏捕捉下载地址。


其中有两个变量itemId和goodsId。

再结合源代码进行查找分析得知itemId就是goodsCatalogVideoId,goodsId一直是41889不变。

这样一来就得到了下载视频的地址。

2.通过登陆验证

之前一直认为登陆之后需要保存一些cookie相关的东西,分析了一下网页返回的cookie,比较复杂不知如何下手。一番搜索,最终得知,只需要登陆下载保持用同一个会话就可以了,cookie之类的东西都不用自己操心。(详情见代码)

3.正则匹配,得到课程名字以及goodsCatalogVideoId。

这个过程不是很复杂,详情见代码。


源代码贴上,方便自己将来查看,也许对他人也会有些许帮助。

package Spider;

import java.io.BufferedOutputStream;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.http.HeaderIterator;
import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.NameValuePair;
import org.apache.http.ParseException;
import org.apache.http.client.entity.UrlEncodedFormEntity;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.message.BasicNameValuePair;
import org.apache.http.util.EntityUtils;


public class DownloadVideo {
	static String loginUrl = "http://haixue.com/doLogin.do";
	// String downloadUrl =
	// "http://haixue.com/goods/downloadUrl.do?itemId=161789577&type=Video&isCatalog=No&goodsId=41889";
	static String path = "E:/videos/";
	static CloseableHttpClient client = HttpClients.createDefault();
	// key:goodsCatalogVideoId value:0.name  1.goodsId
	//后面发现goodsId根本不会变,懒得改数据结构了。
	static Map<String, List<String>> coureseInfoMap = new LinkedHashMap<String, List<String>>();

	public static void main(String[] args) {
		try {
			login();
			String page = getCoursePage();
			//String page = read("C:/Users/copbint/Desktop/test.html");
			//System.out.println(page);
			
			
			getIdAndName(page);
			//getAnotherId();
			download();
			
		} catch (Exception e) {
			e.printStackTrace();
		}
	}
	
	public static String getCoursePage() throws ParseException, IOException{
		HttpResponse httpResponse = null;
		try{
			HttpGet httpGet = new HttpGet("http://haixue.com/course/video/watchVideo.do?goodsCatalogVideoId=161789633&goodsId=0");
			 httpResponse = client.execute(httpGet);
		} catch(Exception e){
			e.printStackTrace();
		}
		return EntityUtils.toString(httpResponse.getEntity());
	}
	
	public static void download(){
		String url = null;
		for(String goodsCatalogVideoId : coureseInfoMap.keySet()){
			url = "http://haixue.com/goods/downloadUrl.do?itemId="
					+ goodsCatalogVideoId + "&type=Video&isCatalog=No&goodsId=41889";
			String fileName = coureseInfoMap.get(goodsCatalogVideoId).get(0) + ".flv";
			downloadVideo(url,fileName);
		}
	}
	
	public static void getAnotherId(){
		try{
			for(String goodsCatalogVideoId : coureseInfoMap.keySet()){
				HttpGet httpGet = new HttpGet("http://haixue.com/course/video/watchVideo.do?goodsCatalogVideoId="
						+goodsCatalogVideoId+"&goodsId=0");
				HttpResponse httpResponse = client.execute(httpGet);
				String page = EntityUtils.toString(httpResponse.getEntity());
				Pattern pattern = Pattern.compile("<input type=\"hidden\" id=\"goodsId\" value=\"(.*?)\"/>");
				Matcher m = pattern.matcher(page);
				List<String> list = coureseInfoMap.get(goodsCatalogVideoId);
				if(m.find()){
					list.add(m.group(1));
					coureseInfoMap.put(goodsCatalogVideoId, list);
				}else{
					System.out.println("没有找到goodsId:" + list.get(0));
					coureseInfoMap.remove(goodsCatalogVideoId);
				}
			}
		} catch(Exception e){
			e.printStackTrace();
		}
		
	}
	
	public static void getIdAndName(String in) {
		//.*?中非贪心匹配
		 Pattern pattern = Pattern.compile("<div class=\"tit\">(.*?)</div>.*?"
		 		+ "<div class=\"con-bottom hideinfo\">.*?"
		 		+ "<input type=\"hidden\" value=\".*?\"/>.*?"
		 		+ "<input type=\"hidden\" value=\"(.*?)\"/>.*?"
		 		+ "<span>时长 </span>.*?"
		 		+ "<span>已观看 </span>.*?"
		 		+ "</div>"
		 ,Pattern.DOTALL);
		
		//Pattern pattern = Pattern.compile("<div class=\"tit\">(.*?)</div>\n",Pattern.DOTALL);
		 java.util.regex.Matcher m = pattern.matcher(in);
		 while(m.find()){
			 String name = m.group(1).trim();
			 String goodsCatalogVideoId = m.group(2);
			 List<String> list = new ArrayList<String>();
			 list.add(name);
			 
			 System.out.println("课程名称:"+m.group(1).trim()+"\ngoodsCatalogVideoId:" + m.group(2));
			 coureseInfoMap.put(goodsCatalogVideoId,list);
		 }
	}

	public static void login() throws Exception {
		HttpPost httpPost = new HttpPost(loginUrl);
		Map<String, String> parameterMap = new HashMap<String, String>();
		parameterMap.put("j_username", "******");
		parameterMap.put("j_password", "****");
		parameterMap.put("_spring_security_remember_me", "no");
		UrlEncodedFormEntity postEntity = new UrlEncodedFormEntity(getParam(parameterMap), "UTF-8");
		httpPost.setEntity(postEntity);
		System.out.println("request line:" + httpPost.getRequestLine());
		try {
			HttpResponse httpResponse = client.execute(httpPost);
			printResponse(httpResponse);
		} catch (IOException e) {
			e.printStackTrace();
		}
	}

	public static void downloadVideo(String downloadUrl, String fileName) {
		try {
			System.out.println("strart download video:" + fileName);
			HttpGet httpGet = new HttpGet(downloadUrl);
			HttpResponse httpResponse1 = client.execute(httpGet);

			InputStream in = httpResponse1.getEntity().getContent();
			byte[] buffer = new byte[1024 * 1024];
			int n = -1;
			// byte[] result =
			// EntityUtils.toByteArray(httpResponse1.getEntity());
			BufferedOutputStream bw = null;
			File f = new File(path + fileName);
			if (!f.getParentFile().exists())
				f.getParentFile().mkdirs();
			bw = new BufferedOutputStream(new FileOutputStream(f));
			while ((n = in.read(buffer)) != -1) {
				bw.write(buffer, 0, n);
			}

			bw.close();
			System.out.println("finished!");
		} catch (Exception e) {
			e.printStackTrace();
		}
	}

	public static void printResponse(HttpResponse httpResponse) throws ParseException, IOException {
		// 获取响应消息实体
		HttpEntity entity = httpResponse.getEntity();
		// 响应状态
		System.out.println("status:" + httpResponse.getStatusLine());
		System.out.println("headers:");
		HeaderIterator iterator = httpResponse.headerIterator();
		while (iterator.hasNext()) {
			System.out.println("\t" + iterator.next());
		}
		// 判断响应实体是否为空
		if (entity != null) {
			String responseString = EntityUtils.toString(entity);
			System.out.println("response length:" + responseString.length());
			System.out.println("response content:" + responseString.replace("\r\n", ""));
		}
	}

	public static List<NameValuePair> getParam(Map parameterMap) {
		List<NameValuePair> param = new ArrayList<NameValuePair>();
		Iterator it = parameterMap.entrySet().iterator();
		while (it.hasNext()) {
			Entry parmEntry = (Entry) it.next();
			param.add(new BasicNameValuePair((String) parmEntry.getKey(), (String) parmEntry.getValue()));
		}
		return param;
	}
	
	public static  String read(String filename) throws IOException {
		// Reading input by lines:
		BufferedReader in = new BufferedReader(new InputStreamReader(new FileInputStream(filename),"UTF-8"));
		String s;
		StringBuilder sb = new StringBuilder();
		while ((s = in.readLine()) != null)
			sb.append(s + "\n");
		in.close();
		return sb.toString();
	}
}


结果展示:



评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值