顶点小说
网址: https://www.xiaoshuopu.com
是html 数据
请求用的:requests ;
解析用的BeautifulSoup
从目录开始爬的。
import requests
from bs4 import BeautifulSoup
def jgmessage(url):
if(url == ''):
url = 'https://www.xiaoshuopu.com/xiaoshuo/67/67169/'
else:
url
header = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36',
'Cookie': 'PHPSESSID=kpdso1jkn879plqu56nnft1nah; targetEncodingwwwgembbcom=2',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
'Accept-Encoding': 'gzip, deflate, br'
}
resp = requests.get(url, headers=header, timeout=3)
resp.encoding = resp.apparent_encoding
soup = BeautifulSoup(resp.text, 'html.parser')
return soup
def chatper(message):
base_url = 'https://www.xiaoshuopu.com'
dict = {}
message = message.find('table')
message = message.find_all('td' ,class_ = 'L')
for x in message:
message = x.find('a')
href = message.get('href')
title = message.get('title')
dict[title] = base_url+href
return dict
def getcontent(url):
soup = jgmessage(url)
content = soup.find('div', id='htmlContent')
content.text
return content.text
def download(content):
with open('D:\\xioashuo\\从明星野外生存秀开始.txt','a', encoding = 'utf-8') as dw :
dw.write(content)
if __name__ == '__main__':
base_url = 'https://www.xiaoshuopu.com'
message = jgmessage('')
chchatperme = chatper(message)
for x in chchatperme:
content = getcontent(chchatperme[x])
contenttitle = x + '\n'
download(contenttitle)
download(content+"\n")
感兴趣的可以了试一下。也是刚开始用python ,写的很烂