先放完整代码,再分函数解释
import re
import urllib.request
from bs4 import BeautifulSoup
import random
import xlwt
import time
def main():
url =
get_url(url)
get_data(url)
save_data(datalist=get_data(url),savepath="D:\谷歌下载\电影Top250.xls")
def get_url(url):
head={
"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36"}
request = urllib.request.Request(headers=head,url=url)
response = urllib.request.urlopen(request)
html = response.read().decode('utf-8')
return html
def get_data(url):
datalist=[]
for i in range (0,10):
baseurl = url + str(i*25)
html = get_url(baseurl)
html = BeautifulSoup(html,"html.parser")
time.sleep(random.randint(0,