from selenium import webdriver
import time
def saveData(text):
with open(r'C:\Users\Administrator\Desktop\04\data.txt','a',encoding='utf-8') as f:
f.write(text+'\n')
class YunSpider(object):
# 初始化方法
def __init__(self,url):
self.url = url
self.driver = webdriver.Chrome()
# 打开网站,提取数据,翻页
def getcontent(self):
self.driver.get(self.url)
# html上有IFrame,要先进入IFrame中
self.driver.switch_to.frame(0)# 进入第一个框
js = 'window.scrollBy(0,8000)'
self.driver.execute_script(js)
for page in range(5):
# 保存数据
selectors = self.driver.find_elements_by_xpath('//div[@class="cmmts j-flag"]/div')
for selector in selectors:
# text = selector.find_elements_by_xpath('.//div[@class="cnt f-brk"]').text
# elements 要注意这个坑 一个是元素们,一个是元素
text = selector.find_element_by_xpath('.//div[@class="cnt f-brk"]').text
saveData(text)
# 找到下一页的元素进行点击
nextPage = self.driver.find_element_by_partial_link_text("下一页")
nextPage.click()
time.sleep(.5)
if __name__ == '__main__':
base_url = 'https://music.163.com/#/song?id=417250673'
yun = YunSpider(base_url)
yun.getcontent()
配置selenium-https://blog.youkuaiyun.com/weixin_44352981/article/details/90714826
学习地址:https://www.bilibili.com/video/BV1pE411B7BL?t=3516