2021-下载酷狗音乐-爬虫-java

最新推荐文章于 2023-03-22 21:21:41 发布

ooooo博ooooo

最新推荐文章于 2023-03-22 21:21:41 发布

阅读量585

点赞数

本文链接：https://blog.youkuaiyun.com/t15263857960/article/details/113400378

版权

这是一个使用Java编写的简单爬虫程序，用于从酷狗音乐网站抓取歌曲信息并下载音乐文件。程序首先解析网页获取歌曲链接，然后通过HTTP请求下载MP3文件。主要依赖了Jsoup库进行网页解析，Apache HttpClient进行网络请求。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

启动类：

package com.tianbo.second;



import java.io.File;
import java.io.IOException;
import java.net.URL;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
 
import net.sf.json.JSONObject;

import org.apache.http.HttpEntity;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.impl.entity.LaxContentLengthStrategy;
import org.apache.http.util.EntityUtils;
import org.hamcrest.CoreMatchers;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
 
 
/**
 * @说明：主启动类
 */
public class Download_KuGou_2021 {
 
    public static String filePath = "E:\\\\java_paChong\\\\kugou_music"; 
    static String  top2 ="&dfid=33V6Po0SomVY3diAxp0AWXJM&mid=18cec7cb0bc0e9602342c459c61adfa4&platid=4";
    static String  album_ID="album_ID";
							 //   https://wwwapi.kugou.com/yy/index.php?r=play/getdata&callback=jQuery19107190036980605969_1611650526484&
    	//hash=F8784EBFBAE36B324EC1E3441B6156B4 &dfid=33V6Po0SomVY3diAxp0AWXJM&mid=18cec7cb0bc0e9602342c459c61adfa4&platid=4&album_id=41163623&_=1611650526485
    public static   String mp3 = "https://wwwapi.kugou.com/yy/index.php?r=play/getdata&callback=jQuery19107190036980605969_1611650526484&"
            + "hash=HASH"+ top2 + "&album_id="+album_ID +"&_=TIME";;

    public static String LINK = "https://www.kugou.com/yy/rank/home/PAGE-8888.html?from=rank";

    public static void main(String[] args) throws Exception {
 
        for(int i = 1 ; i < 23 ; i++){
            String url = LINK.replace("PAGE", i + "");
            getTitle(url);
            //download("https://www.kugou.com/song/mfy6je5.html");
        }
    }
 
    public static String getTitle(String url) throws Exception{
    	Document doc = Jsoup.parse(new URL(url),10000); // 解析网页 得到文档对象
        Elements ele = doc.select("#rankWrap .pc_temp_songlist li");
       	 for (Element e__0:ele){	
       		String title = e__0.attr("title").trim();
       		String link = e__0.getElementsByTag("a").first().attr("href");
       		System.out.println("---title----"+title);
       		System.out.println("---link----"+link);
       	  File file2=new File(filePath  + File.separator  + File.separator + title + ".mp3");    
       	  if(!file2.exists()) {    
       		download(link,title);
       	  }
        }
        return "";
    }
 
    public static String download(String url,String name) throws Exception{
    	   String hash = "";
    	   album_ID="album_ID";
    	   String playUrl ="";
    		Document doc = Jsoup.parse(new URL(url),10000); // 解析网页 得到mp3的质地
            Elements ele = doc.select("html");
           	 for (Element e__0:ele){
	           	  String regEx = "\"hash\":\"[0-9A-Z]+\"";
	              // 编译正则表达式
	              Pattern pattern = Pattern.compile(regEx);
	              Matcher matcher = pattern.matcher(e__0.toString());
	            
	            System.out.println("----album_ID----"+album_ID);
	              if (matcher.find()) {
	                  hash = matcher.group();
	                  hash = hash.replace("\"hash\":\"", "");
	                  hash = hash.replace("\"", "");
	                  System.out.println("-----hash-----"+hash);   //F8784EBFBAE36B324EC1E3441B6156B4
	              }
	              
	              int album_id_index=e__0.toString().indexOf("\"album_id\":");//获取下标，截取字符串
	              album_ID = e__0.toString().substring(album_id_index + 11, album_id_index + 11 + 10);
	              album_ID = album_ID.substring(0, album_ID.indexOf("}"));
	              
	              String item_url = mp3.replace("HASH", hash);
	              item_url = item_url.replace("TIME", System.currentTimeMillis() + "");
	              item_url = item_url.replace("album_ID", album_ID);
	              System.out.println("-------item_url------"+item_url);
           		 
           		 //------------------------------------------------------
	          	Document doc__1 = Jsoup.parse(new URL(item_url),10000); // 解析网页 得到mp3的质地
	            Elements e__1 = doc__1.select("html");
	           	 for (Element e__2:e__1){
//	           System.out.println("--------e__2------"+e__2.toString());
	           		 String e__2__0 = e__2.toString().substring(e__2.toString().indexOf("(") + 1, e__2.toString().length() - 3);
//	           System.out.println("------e__2__0-----"+e__2__0);
	           		  e__2__0 = e__2__0.toString().substring(0, e__2__0.toString().indexOf(");") );
		       System.out.println("------e__2__0--111111111---"+e__2__0);
         		  playUrl = e__2__0.toString().substring(e__2__0.toString().indexOf("\"play_url\""), e__2__0.toString().indexOf("\"authors\""));
         		 System.out.println("------playUrl-----"+playUrl);
         		 playUrl = playUrl.substring(12,playUrl.toString().indexOf("\","));
         		 System.out.println("------playUrl-----"+playUrl);
		           
 		        FileDownload down = new FileDownload();
 		        File file_xiazai=new File(filePath  + File.separator  + File.separator + name + ".mp3");    
 		        if(!file_xiazai.exists()) {
 		        	System.out.println("----------file_xiazai------"+filePath  + File.separator  + File.separator + name + ".mp3");
 		        	boolean download = down.download(playUrl, filePath  + File.separator  + File.separator + name + ".mp3");
 		        		System.out.println("-----download-------"+download);
 		        		
 		        	if(download==false) {
 		        			continue;
 		        		}
 		        			
 		        	System.out.println(name + "，下载完成");
 		        }
	           	 }
           	 }
   
        return playUrl;
    }
 
}

下载：

package com.tianbo.second;

import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URL;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
 
/**
 * @说明：文件下载
 */
public class FileDownload {
 
    /**
     * 文件下载
     * @param url 链接地址
     * @param path 要保存的路径及文件名
     * @return
     * @throws Exception 
     */
    public static boolean download(String url,String path) {
 
//    	System.out.println("----url---"+url);
    	url=url.replace("\\","/");
//    	System.out.println("----url---"+url);
    	url=url.replace("","//");
    	System.out.println("----url---"+url);
//	
//    	System.out.println("----path---"+path);

        
        boolean flag = false;
 
        CloseableHttpClient httpclient = HttpClients.createDefault();
        RequestConfig requestConfig = RequestConfig.custom().setSocketTimeout(2000)
                .setConnectTimeout(2000).build();
 
        HttpGet get = new HttpGet(url);
        get.setConfig(requestConfig);
 
        BufferedInputStream in = null;
        BufferedOutputStream out = null;
        try{
            for(int i=0;i<3;i++){
                CloseableHttpResponse result = httpclient.execute(get);
                System.out.println(result.getStatusLine());
                if(result.getStatusLine().getStatusCode() == 200){
                    in = new BufferedInputStream(result.getEntity().getContent());
                    File file = new File(path);
                    out = new BufferedOutputStream(new FileOutputStream(file));
                    byte[] buffer = new byte[1024];
                    int len = -1;
                    while((len = in.read(buffer,0,1024)) > -1){
                        out.write(buffer,0,len);
                    }
                    flag = true;
                    break;
                }else if(result.getStatusLine().getStatusCode() == 403){
                	System.out.println("-----下载---错误----"+403);
                    break ;
                }else if(result.getStatusLine().getStatusCode() == 500){
                    continue ;
                }
            }
 
        }catch(Exception e){
            e.printStackTrace();
            flag = false;
        }finally{
            get.releaseConnection();
            try{
                if(in != null){
                    in.close();
                }
                if(out != null){
                    out.close();
                }
            }catch(Exception e){
                e.printStackTrace();
                flag = false;
            }
        }
        return flag;
    }
 
    private static Log log = LogFactory.getLog(FileDownload.class);
}

pom：

<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
  xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
  <modelVersion>4.0.0</modelVersion>
  <groupId>com.tianbo</groupId>
  <artifactId>maven-pachong-second</artifactId>
  <packaging>war</packaging>
  <version>0.0.1-SNAPSHOT</version>
  <name>maven-pachong-second Maven Webapp</name>
  <url>http://maven.apache.org</url>
  <dependencies>
    <dependency>
      <groupId>junit</groupId>
      <artifactId>junit</artifactId>
      <version>4.12</version>
    </dependency>
     <!-- httpclient -->
		<dependency>
			<groupId>org.apache.httpcomponents</groupId>
			<artifactId>httpclient</artifactId>
			<version>4.5.12</version>
		</dependency>

		<!-- https://mvnrepository.com/artifact/org.slf4j/slf4j-log4j12 -->
		<dependency>
	        <groupId>org.apache.logging.log4j</groupId>
	        <artifactId>log4j-core</artifactId>
	        <version>2.11.0</version>
	    </dependency>
		<dependency>
			<groupId>org.slf4j</groupId>
			<artifactId>slf4j-log4j12</artifactId>
			<version>1.7.25</version>
		</dependency>
     <!-- servlet依赖的jar包start -->
        <dependency>
            <groupId>javax.servlet</groupId>
            <artifactId>javax.servlet-api</artifactId>
            <version>3.1.0</version>
        </dependency>
         <!-- servlet依赖的jar包start -->
        <!-- jsp依赖jar包start -->
        <dependency>
            <groupId>javax.servlet.jsp</groupId>
            <artifactId>javax.servlet.jsp-api</artifactId>
            <version>2.3.1</version>
        </dependency>
         <!--jstl标签依赖的jar包start -->
        <dependency>
            <groupId>javax.servlet</groupId>
            <artifactId>jstl</artifactId>
            <version>1.2</version>
        </dependency>
        
        
        <!-- https://mvnrepository.com/artifact/org.jsoup/jsoup -->
		<dependency>
		    <groupId>org.jsoup</groupId>
		    <artifactId>jsoup</artifactId>
		    <version>1.11.3</version>
		</dependency>
        <!-- https://mvnrepository.com/artifact/junit/junit -->
		<dependency>
		    <groupId>junit</groupId>
		    <artifactId>junit</artifactId>
		    <version>4.12</version>
		</dependency>
        <!-- https://mvnrepository.com/artifact/commons-io/commons-io -->
		<dependency>
		    <groupId>commons-io</groupId>
		    <artifactId>commons-io</artifactId>
		    <version>2.8.0</version>
		</dependency>
        <!-- https://mvnrepository.com/artifact/org.apache.commons/commons-lang3 -->
		<dependency>
		    <groupId>org.apache.commons</groupId>
		    <artifactId>commons-lang3</artifactId>
		    <version>3.11</version>
		</dependency>
		
		

	  <!-- json 处理 -->
        <dependency>
            <groupId>net.sf.json-lib</groupId>
            <artifactId>json-lib</artifactId>
            <version>2.4</version>
            <classifier>jdk15</classifier>
        </dependency>


		
  </dependencies>
  <build>
    <finalName>maven-pachong-second</finalName>
  </build>
</project>