# -*- coding:utf-8 -*-
import requests
import re
import sys
import os
reload(sys)
sys.setdefaultencoding("utf-8")
type =sys.getfilesystemencoding()
header ={'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.157 Safari/537.36'}
dir_no =1
for j in range(1,13):#不同的网页
url ='http://www.94soxx.com/xo/yazhousetu/5_'+str(j)+'.html'
html =requests.get(url,headers=header)
html.encoding='utf-8'
# print html.content
url_list = re.findall("(/xo/yazhousetu/0R.*?html)",html.content,re.S)
for it in url_list:#同一网页中不同的链接
pic_dir = r'pic/pic'+str(dir_no)+r'/'
os.mkdir(pic_dir)
c_url ='http://www.94soxx.com'+it
c_html =requests.get(c_url,headers=header)
c_html.encoding='utf-8'
pic_url = re.findall('<img src="(http.*?)" />',c_html.content,re.S)
pic_no =1
for each in pic_url:#每个链接中的众多图片
print 'now downloading:' +each
pic =requests.get(each)
fp =open(pic_dir+str(pic_no)+'.jpg','wb')
fp.write(pic.content)
fp.close()
pic_no+=1
dir_no+=1
python 爬取某网站图片(娱乐向)
最新推荐文章于 2022-10-19 07:37:15 发布