更多爬虫内容请关注博主专栏python3网络爬虫
笔趣看小说网:https://www.biqukan.com/
就拿最近热播的猫腻小说《将夜》来实验吧
代码
import requests,sys,os
from bs4 import BeautifulSoup
'''
@ author: Face_to_sun
@ modigy: 2018-12-1
'''
def getListHTML(url):
headers = {
'Referer': 'https://www.biqukan.com/s.php?ie=gbk&s=2758772450457967865&q=%BD%AB%D2%B9',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.62 Safari/537.36'
}
try:
response = requests.get(url, headers=headers)
response.raise_for_status()
response.encoding = response.apparent_encoding
return response.text
except:
return "产生异常"
def downLoadPage(url, filename):
headers = {
'Referer': 'https://www.biqukan.com/2_2758/',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.62 Safari/537.36'
}
response = requests.get(url, headers=headers, timeout=30)
soup = BeautifulSoup(response.text,'lxml')
content = soup.find_all('div', class_='showtxt')
try:
print(content[0].text.replace('\xa0'*8,'\n\n'))
with open(filename+'.txt', 'w', encoding='utf8') as f:
f.write(content[0].get_text().replace('\xa0'*8,'\n\n'))
except:
print("失败")
def getUrl(soup):
server = "https://www.biqukan.com"
div = soup.find_all('div', class_='listmain')
# print(div)
Tag_a = div[0].find_all('a')
nums = len(Tag_a)
for index in range(len(Tag_a)):
page_url = server + Tag_a[index].get('href')
name = Tag_a[index].string
# print(name, page_url)
downLoadPage(page_url, name)
sys.stdout.write("已下载:%.3f%%" % float(index/nums) + '\r')
sys.stdout.flush()
if __name__ == "__main__":
url = "https://www.biqukan.com/2_2758/"
html = getListHTML(url)
soup = BeautifulSoup(html,'lxml')
getUrl(soup)
效果


本文介绍了一种使用Python3网络爬虫从笔趣看小说网抓取热门小说《将夜》的方法。通过requests和BeautifulSoup库,实现了对小说章节的自动下载,并保存为文本文件。
225

被折叠的 条评论
为什么被折叠?



