利用爬虫爬取小说章节并写入本地文件_soup = bs(res.content, 'html.parser')-优快云博客

本文链接：https://blog.youkuaiyun.com/limenghao2002/article/details/121423745

本文介绍了一种使用Python爬虫技术从笔趣阁网站抓取小说章节的方法。通过发送HTTP请求获取网页内容，利用BeautifulSoup解析网页并提取小说标题及各章节链接，最后下载各章节内容并保存为TXT文件。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

import requests
import re
from bs4 import BeautifulSoup
import os
 
url = "https://www.biqugee.com/book/56078/"
 
resp = requests.get(url)
# print(resp.text)
soup = BeautifulSoup(resp.text, 'html.parser').find('div', {'id': 'info'})
# print(soup)
so = soup.find('h1')
title = so.text
filename = 'e://{}'.format(title)
# print(soup)
 
isExists = os.path.exists(filename)
if not isExists:
    os.mkdir(filename)
    print('目录创建成功！')
else:
    print('目录已经存在！')
 
# so = soup.find_all("p")
# for i in so:
#     print(i.text)
soup = BeautifulSoup(resp.text, 'html.parser').find("div", {'id': 'list'})
a_list = soup.find_all('a')
for a in a_list:
    href = a.get('href')
    href = "https://www.biqugee.com" + href
    # print(href)
    resp = requests.get(href)
    # print(resp.text)
 
    soup = BeautifulSoup(resp.text, 'html.parser').find('div', {'class', 'bookname'})
    bookname = soup.find('h1')
    # print(bookname.string)
    bookname = bookname.string
    bookname02 = 'e://{}//{}.txt'       .format(title, bookname)
    # print(bookname)
    soup = BeautifulSoup(resp.text, 'html.parser').find('div', {'id': 'content'})
    file = soup.text.replace('\xa0' * 8, '\n\n')
    file = re.sub(r'\n\s*\n', '\n', file)
    # print(file)
    with open(bookname02, 'a', encoding='utf-8') as f:
        f.write(file)
        f.close()
        print(bookname+"  "+'下载成功！')