import requests from bs4 import BeautifulSoup # 发送请求,获得响应 def get_page(url): header={ 'User-agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36' } cont=requests.get(url,headers=header) #发送请求 html=cont.content #获取网页内容 title=BeautifulSoup(html).title.text.split('(')[0] #根据网页获取书名 filename=title+'.txt' #建立书单 tag = BeautifulSoup(html).find_all('dd', {'class': 'col-md-3'}) #获取章节名 with open(filename,'w+',encoding="utf-8") as file: file.write(title+'\n\n') for link in tag: title=link.string file.write(title+'\n\n') link=link.find('a')['href'] link=requests.get(url+link) html2=link.content cont=BeautifulSoup(html2).find(id='htmlContent') content=cont.text file.write(content+'\n\n') if __name__ == '__main__': # 1.获取url地址 page=input('请输入对应的网页:')#填写一个对应的数字即可 url='http://www.7kankan.la/book/'+page+'/' get_page(url)
爬虫学习-爬取小说
最新推荐文章于 2025-05-17 08:15:38 发布