使用Python爬取YY小视频

import threading
class ShortVideoDownload(threading.Thread):
        def __init__(self,downloadurl,downloadDir,filename):
            threading.Thread.__init__(self)
            self.downloadurl = downloadurl
            self.downloadDir = downloadDir
            self.filename = filename
            import os
            self.fullpath = os.path.join(self.downloadDir,self.filename)
        
        def run(self):
            import requests
            import os

            if not os.path.exists(self.downloadDir):
                os.makedirs(self.downloadDir)
                print("\n",self.downloadDir," 不存在,已经创建(^---^)")
            if os.path.exists(self.fullpath):
                print("\n",self.fullpath," 已经存在")
                return 
            
            headers={"user-agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36"} 
            response = requests.get(url=self.downloadurl,headers=headers)
            error = 10
            while not response.ok  and error:
                response = requests.get(url=self.downloadurl,headers=headers)
                error -= 1
                import time
                import random
                time.sleep(random.randint(0,3))
            
            if not response.ok:
                print("\n",self.downloadurl,"获取失败,亲稍后重新尝试(^---^)")
                return 
            
            with open(self.fullpath,mode="wb") as fp:
                fp.write(response.content)
                
            print("\n",self.fullpath," 写入完成(^---^)")

class YYShortVideo(threading.Thread):
    
    def __init__(self,page):
        threading.Thread.__init__(self)
        self.url = "https://api-tinyvideo-web.yy.com/home/tinyvideos"
        self.params={'data':'{"uid":2580941092,"page":%s,"pageSize":10}'% page}
        self.headers={"user-agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36"}
        
    def run(self):

        import requests
        response = requests.get(url=self.url,params=self.params,headers=self.headers)
        error = 10
        while not response.ok  and error:
            response = requests.get(url=self.downloadurl,headers=headers)
            error -= 1
            import time
            import random
            time.sleep(random.randint(0,3))
    
        if not response.ok:
            print("\n",self.url,"获取失败,亲稍后重新尝试(^---^)")
            return 
        
        self.videoJsonData = response.json()['data']['data']
        
        self.saveVideos()
        
    def saveVideos(self):
        threadList=[]
        for each in self.videoJsonData:
            if each["resdesc"].find("舞") == -1:
                print("\n",each["resdesc"]," 不包含舞字,进行下一个(^---^)")
                continue
            threadProcess = ShortVideoDownload(each["resurl"],"./ShortVideo",each["resdesc"]+".mp4")
            threadProcess.start()
            threadList.append(threadProcess)
        for eachThread in threadList:
            eachThread.join()
YYthreadList=[]
begin=1
end=11
for i in range(begin,end,1):
    YYthreadProcess = YYShortVideo(i)
    YYthreadProcess.start()
    YYthreadList.append(YYthreadProcess)
for eachThread in YYthreadList:
    eachThread.join()
print("Videos全部保存完成,亲(^---^)")
请注意,未经授权的爬取可能涉及侵犯版权等问题,请在法律允许的情况下进行爬取。 以下是使用 Python 爬取 100 个视频的一种实现方式: 1. 导入必要的库,如 requests、beautifulsoup4 等。 ```python import requests from bs4 import BeautifulSoup import os ``` 2. 定义一个函数,用于爬取指定网站的视频链接。以 Bilibili 网站为例,假设要爬取前 100 个视频,可以使用以下代码: ```python def get_video_links(): video_links = [] for i in range(1, 6): url = f'https://www.bilibili.com/v/popular/rank/all#/?page={i}' html = requests.get(url).text soup = BeautifulSoup(html, 'html.parser') items = soup.find_all('li', class_='rank-item') for item in items: link = item.find('a', class_='title')['href'] video_links.append('https:' + link) if len(video_links) >= 100: return video_links return video_links ``` 3. 定义一个函数,用于下载视频。以 Bilibili 网站为例,可以使用以下代码: ```python def download_video(link, index): html = requests.get(link).text soup = BeautifulSoup(html, 'html.parser') title = soup.find('h1', class_='title').text.strip() video_url = soup.find('video')['src'] video_data = requests.get(video_url).content with open(f'{index}_{title}.mp4', 'wb') as f: f.write(video_data) ``` 4. 调用上述两个函数,爬取下载前 100 个视频。 ```python if __name__ == '__main__': video_links = get_video_links() for i, link in enumerate(video_links): download_video(link, i+1) ``` 以上代码仅供参考,具体实现方式可能因网站结构不同而有所差异。请注意爬取过程中的法律问题,遵循网络爬虫道德规范。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值