import requests
from bs4 import BeautifulSoup
url = 'https://www.shicimingju.com/book/sanguoyanyi.html'
header = {
"User-Agent": 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.51 Safari/537.36'
}
response = requests.get(url=url, headers=header)
response.encoding = 'UTF-8'
soup = BeautifulSoup(response.text, "lxml")
search_list = soup.select(".book-mulu > ul > li > a")
file = open("txtdir/sanguoyanyi.txt", "w", encoding="utf-8")
for i in search_list:
page_name = i.text
page_url = "https://www.shicimingju.com/" + i["href"]
page_content = requests.get(url=page_url, headers=header)
page_content.encoding = "UTF-8"
detail_soup = BeautifulSoup(page_content.text, "lxml")
detail_content = detail_soup.find('div', class_='chapter_content').text
file.write(page_name + ":" + detail_content + "\n\n")
print(page_name + " 爬取成功!!!!!!!!!")
file.close()