import requests
from lxml import etree
url = "http://www.cits0871.com/booktxt/20768/"
file_name = '第一序列.txt'
res = requests.get(url).content.decode('gbk')
content = etree.HTML(res)
url = content.xpath("//div[@id='list']/dl/dd/a/@href")
k = 1
with open(file_name, 'w', encoding='utf-8') as f:
f.write("")
for i in url:
k += 1
url = "http://www.cits0871.com" + i
res = requests.get(url).content.decode('gbk')
content = etree.HTML(res)
name = content.xpath("//div[@class='bookname']/h1/text()")[0]
ls = name.split('、')
if len(ls) < 2:
continue
name = "第" + ls[0]+ "章 "+ls[1]
text = content.xpath("//div[@id='content']/text()")
with open(file_name,'a',encoding='utf-8') as f:
f.write( name +"\n")
for sr in text:
with open(file_name, 'a', encoding='utf-8') as f:
f.write(sr+"\n")
with open(file_name, 'a', encoding='utf-8') as f:
f.write("\n\n\n\n")
print(name+"爬取成功")
笔趣网小说试爬
最新推荐文章于 2024-03-19 15:26:27 发布