from bs4 import BeautifulSoup
from selenium import webdriver
class Dyu():
def setUp(self):
self.driver = webdriver.Chrome()
self.url = “https://www.douyu.com/directory/all”
def douyu(self):
self.driver.get(self.url)
while True:
soup = BeautifulSoup(self.driver.page_source,'xml')
titles = soup.find_all('h3',{'class':"ellipsis"})
num = soup.find_all('span',{'class':"dy-num fr"})
t = 1
d = {}
for k,v in zip(titles,num):
print('第{}个房间名称:{},人数为{}'.format(t,k.get_text().strip(),v.get_text().strip()))
t +=1
def gb(self):
self.driver.quit()
if name == ‘main’:
d = Dyu()
d.setUp()
d.douyu()
d.gb()
本文介绍了一种使用Selenium和BeautifulSoup实现的斗鱼直播平台数据爬取方法。通过自动化浏览器操作获取页面源代码,解析并提取直播间的标题和在线人数等关键信息。
683

被折叠的 条评论
为什么被折叠?



