#! /user/bin/python
# -*- coding: utf-8 -*-
# Author: chen
# Date 06/12
import requests
import re #导入正则表达式 提取所需要的内容
import random
def spiderPic(html,keyword):
print('正在查找:'+keyword+'对应的文件,正在从百度中查找!!!')
for addr in re.findall('"objURL":"(.*?)"',html,re.S):
print('现在正在爬取URL中的地址:'+str(addr)[0:30]+'...')
try:
pic = requests.get(addr,timeout=10)
except requests.exceptions.ConnectionError:
print('您当前的URL出现错误!')
continue
fn = open('D:\\Python\\image\\'+(str(random.randrange(0,1000,4))+'.jpg'),'wb')
fn.write(pic.content)
fn.close()
if __name__ =='__main__':
print('python')
word = input('请输入您想要爬取的关键词:')
resource = requests.get('http://image.baidu.com/search/index?tn=baiduimage&ps=1&ct=201326592&lm=-1&cl=2&nc=1&ie=utf-8&word='+word)
# 调用函数
spiderPic(resource.text,word)