Python练习014

题目:爬取诗词名句网的全本三国演义。

import requests
from bs4 import BeautifulSoup
import os

if not os.path.exists("三国演义"):
    os.mkdir("三国演义")
url = "https://www.shicimingju.com/book/sanguoyanyi.html"
headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit\
           /537.36 (KHTML, like Gecko) Chrome/87.0.4280.141 Safari/537.36 Edg/\
           87.0.664.75'}
response = requests.get(headers = headers, url = url)
response.encoding = 'utf-8'
page_text = response.text
soup = BeautifulSoup(page_text, 'lxml')

chapters = soup.select('.book-mulu > ul > li > a')
num = 1
for each_chapter in chapters:
    chapter_name = each_chapter.text
    chapter_url = "https://www.shicimingju.com/"+each_chapter['href']
    response = requests.get(url = chapter_url, headers = headers)
    response.encoding = 'utf-8'
    chapter_soup = BeautifulSoup(response.text,'lxml')
    #chapter_content = chapter_soup.select('.chapter_content')
    chapter_content = chapter_soup.find('div', class_ = 'chapter_content')
    filename = '三国演义/'+str(num)+'.txt'
    with open(filename, 'w', encoding = 'utf-8') as f:
        f.write(chapter_name+'\n'+chapter_content.text)
        print(chapter_name+"下载完成")
        num += 1

题目:爬取彼岸图网的图片数据

import requests
from lxml import etree
import os


if not os.path.exists('pictures'):
    os.mkdir('pictures')
url = "http://pic.netbian.com/4kfengjing/"
headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit\
           /537.36 (KHTML, like Gecko) Chrome/87.0.4280.141 Safari/537.36 Edg/\
           87.0.664.75'}
response = requests.get(url = url, headers = headers)
page_text = response.text
tree = etree.HTML(page_text)
a_list = tree.xpath('//ul[@class="clearfix"]/li/a')
for each_a in a_list:
    img_src = 'http://pic.netbian.com'+each_a.xpath('./@href')[0]
    img_name = each_a.xpath('./img/@alt')[0]+'.jpg'
    img_name = img_name.encode('iso-8859-1').decode('gbk')
    img = requests.get(url = img_src, headers=headers).content
    path = 'pictures/'+img_name
    with open(path,'wb') as f:
        f.write(img)
        print(img_name,"下载成功")
    

请添加图片描述
请添加图片描述
请添加图片描述
请添加图片描述
请添加图片描述
请添加图片描述
请添加图片描述
请添加图片描述
请添加图片描述
请添加图片描述
请添加图片描述
请添加图片描述

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值