学习目的:练习selenium的用法,获取美食杰--->菜谱大全--->早餐 页面中的菜名以及作者名,保存到TXT文件中
1、首先引入所需要的包
from selenium.webdriver.common.action_chains import ActionChains #模拟鼠标
from selenium import webdriver
import time
2、打开美食杰的首页,隐式等待十秒,等待页面加载完成
driver = webdriver.Firefox()
driver.get('https://www.meishij.net/')
driver.implicitly_wait(10)
3、 通过查找到<菜谱大全>在网页中的位置,模拟鼠标移动到<菜谱大全>上
cai_pu = driver.find_element_by_css_selector('li.hasmore a.link.pngFix')
ActionChains(driver).move_to_element(cai_pu).perform()
4、找到<早餐>,并自动点击跳转到<早餐>页面上
driver.find_element_by_link_text('早餐').click()
driver.implicitly_wait(10)
5、模拟鼠标下拉滑动条
# 使用js控制滑动条
for row in range(1,7,2):
x = float(row) / 6
js = 'document.documentElement.scrollTop = document.documentElement.scrollHeight * %f'%x
driver.execute_script(js)
time.sleep(1)
6、获取早餐美食的信息,保存到meishijie.txt 文件中
info_lists = driver.find_elements_by_css_selector('div.c1')
for info in info_lists:
name = info.find_element_by_tag_name('strong').text
author = info.find_element_by_tag_name('em').text
# print(name + "-->" +author)
with open('meishijie.txt','a',encoding='utf-8') as f:
f.write(name+"-->"+author+"\n")
7、以下是完整代码
from selenium.webdriver.common.action_chains import ActionChains #模拟鼠标
from selenium import webdriver
import time
driver = webdriver.Firefox()
driver.get('https://www.meishij.net/')
driver.implicitly_wait(10)
cai_pu = driver.find_element_by_css_selector('li.hasmore a.link.pngFix')
ActionChains(driver).move_to_element(cai_pu).perform()
driver.find_element_by_link_text('早餐').click()
driver.implicitly_wait(10)
#只获取前三页数据,要获取多页只需修改range值
for page in range(1,4):
print(f"正在获取第{page}页")
# 使用js控制滑动条
for row in range(1,7,2):
x = float(row) / 6
js = 'document.documentElement.scrollTop = document.documentElement.scrollHeight * %f'%x
driver.execute_script(js)
time.sleep(1)
info_lists = driver.find_elements_by_css_selector('div.c1')
for info in info_lists:
name = info.find_element_by_tag_name('strong').text
author = info.find_element_by_tag_name('em').text
# print(name + "-->" +author)
with open('meishijie.txt','a',encoding='utf-8') as f:
f.write(name+"-->"+author+"\n")
# 点击下一页链接
next_page = driver.find_element_by_class_name('next')
if next_page:
next_page.click()
else:print("已经最后一页了")
# 关闭浏览器
driver.quit()