针对弹幕的爬取我们如果只需要获取看到的网页里面的而数据,使用selenium就能实现,对于直播平台来说,往往有第三方平台api让你获取数据(可以获取发弹幕,发弹幕者的名字礼物等等,这需要客户端向弹幕服务器发送登录请求,心跳信息的发送等等)今天只获取弹幕信息储存到txt文件中,上代码,上图片
#!/usr/bin/env python3
# _*_ coding: utf-8 _*_
# auther hou
import time
import random
from selenium import webdriver
chrome_options = webdriver.ChromeOptions()
# 使用headless无界面浏览器模式
chrome_options.add_argument('--headless')
# 设置配置文件不加载图片增加爬虫效率
prefs = {"profile.managed_default_content_settings.images": 2}
chrome_options.add_experimental_option("prefs", prefs)
browser = webdriver.Chrome(chrome_options=chrome_options)
class DoYu_DanMu():
def __init__(self) -> None:
self.url = 'https://www.douyu.com/' # 初始化斗鱼路径为下面拼接url
def GetIdUrl(self,Id):
Id_Href = self.url+str(Id)
browser.get(Id_Href) # 浏览器请求到直播房间中
while True:
time.sleep(random.random()*3)
try:
ls = browser.find_elements_by_xpath('.//div[@class=" danmu-6e95c1"]/div/div')
for danmu in ls :
if len(danmu.text) > 0:
try:
print(danmu.text)
except:
pass
self.save(danmu.text)
else:
continue
except:
pass
# except:
# time.sleep(random.random()*6)
# ls = browser.find_elements_by_xpath('.//div[@class=" danmu-6e95c1"]/div/div')
#
# for danmu in ls:
# if len(danmu.text) > 0:
# try:
# print(danmu.text)
# except:
# pass
# self.save(danmu.text)
# else:
# continue
def save(self,danmus):
with open('hcf.txt','a+',encoding='utf-8')as f:
f.write(danmus+'\n')
if __name__ == '__main__':
num = input('请输入要爬取弹幕的房间号') # 22619
DoYu = DoYu_DanMu()
DoYu.GetIdUrl(num)
