爬取网站
http://616pic.com/beijing/
先上码
怎么写入到指定的文件 , 怎么更简洁爬虫 , 关注我!!! 持续更新!!!
原理和前面那个一样
# _*_coding:utf-8_*_
from bs4 import BeautifulSoup
import urllib.request
import requests
header = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko)\
Chrome/67.0.3396.62 Safari/537.36'}
url = r"http://616pic.com/beijing/"
req = urllib.request.Request(url, headers=header)
response = urllib.request.urlopen(req)
soup = BeautifulSoup(response, 'html.parser')
result = soup.findAll(attrs={'class': 'lazy'})
for i in result:
i = str(i)
i = i.split('nal=\"', 1)[1].split('\" src')[0]
print(i, type(i), '\n')
res = requests.get(i)
j = i.split('bg', 1)[1].split('/', 4)[4].split('.jpg', 1)[0]
print(j)
new_pic = open('./%s.jpg' % j, 'wb')
new_pic.write(res.content)
new_pic.close()
print('finished')