首先,导入相关包:
import urllib.request
from urllib.request import Request
from urllib.parse import urlencode
from fake_useragent import UserAgent
步骤一:读取网页内容
def get_html(url):
headers = {
"User-Agent": UserAgent().chrome
}
request = Request(url, headers=headers)
response=urllib.request.urlopen(request)
return response.read()
步骤二:保存网页内容
def sava_html(filename,html_bytes):
with open(filename,'wb') as file:
file.write(html_bytes)
步骤三:调用读取与保存方法:
def main():
content=input('请输入要下载的内容:')
num=int(input('请输入要下载多少页:'))
base_url="https://tieba.baidu.com/f?ie=utf-8&