#使用正则表达式取出网页中图片url并下载图片
import time
import re
import requests
import multiprocessing
import os
def down_image(url,i):
data = requests.get(url).content
print("正在下载第%d张图片...%s"%(i,url))
name = url[-15:]
if not os.path.exists("./douyu"):
os.mkdir("./douyu")
with open("./douyu/" + name,"wb") as f:
f.write(data)
def dealdata(html):
exp = r'https?://[^:]*_big\.jpg'
ls = re.findall(exp,html)
i = 1
print("共找到%d张符合要求的图片,开始下载..."%len(ls))
begin = time.time()
pool = multiprocessing.Pool(10)
for url in ls:
pool.apply_async(down_image, args = (url,i))
i += 1
pool.close()
pool.join()
end = time.time()
print("下载完成共耗时%f秒"%(end-begin))
def main():
pagenum = int(input("请输入要下载的页码:"))
url = "https://www.douyu.com/gapi/rkc/directory/2_201/%d"%pagenum
headers = {"User-Agent":"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_5) AppleWebKit/537.36 (KHTML, like Gecko)"}
html = requests.get(url, headers = headers).text
dealdata(html)
if __name__ == "__main__":
main()
爬取某直播网站首页小姐姐的照骗
最新推荐文章于 2025-02-20 17:43:42 发布