from bs4 import BeautifulSoup
import urllib2
import urllib, os, re, time, sys
#import socket
def build_request(link):
# user_agent = 'Mozilla/5.0 (X11; U; Linux i686) Gecko/20071127 Firefox/2.0.0.11'
values = {'name' : 'Michael Foord',
'location' : 'Northampton',
'language' : 'Python' }
headers = { 'User-Agent' : 'Custom User-Agent' }
data=urllib.urlencode(values)
req = urllib2.Request(link, data, headers)
req.add_unredirected_header('User-Agent', 'Custom User-Agent')
return req
def build_urllib2(link):
print time.strftime('%Y-%m-%d_%H:%M:%S',time.localtime(time.time()))
#set timeout
urllib2.socket.setdefaulttimeout(60)
#set proxy
null_proxy_handler = urllib2.ProxyHandler({"http" : 'http://10.159.32.155:8080'})
# null_proxy_handler = urllib2.ProxyHandler({"http" : 'http://10.144.1.10:8080'})
null_proxy_handler = urllib2.ProxyHandler({})
opener = urllib2.build_opener(null_proxy_handler)
urllib2.install_opener(opener)
print 'after install opener'
print time.strftime('%Y-%m-%d_%H:%M:%S',time.localtime(time.time()))
def get_img(link, path):
print 'start to download '+ link
content=[]
try:
print link
req= build_request(link)
print 'start to build_request'
content = urllib2.urlopen(req)
# content = opener.open(req)
print 'start to create soup about '
print content
except urllib2.HTTPError, e:
print 'Error happened'
print e.code
print e.msg
print e.headers
print e.url
print e.fp.read()
#weblink = "http://www.163.com"
weblink = "http://qt.gtimg.cn/q=s_sh601899"
#weblink = "http://blog.chinaunix.net/uid-7448695-id-2626493.html"
mypath = "D:\\python\\gif\\test\\img"
build_urllib2(weblink)
get_img(weblink, mypath)
import urllib2
import urllib, os, re, time, sys
#import socket
def build_request(link):
# user_agent = 'Mozilla/5.0 (X11; U; Linux i686) Gecko/20071127 Firefox/2.0.0.11'
values = {'name' : 'Michael Foord',
'location' : 'Northampton',
'language' : 'Python' }
headers = { 'User-Agent' : 'Custom User-Agent' }
data=urllib.urlencode(values)
req = urllib2.Request(link, data, headers)
req.add_unredirected_header('User-Agent', 'Custom User-Agent')
return req
def build_urllib2(link):
print time.strftime('%Y-%m-%d_%H:%M:%S',time.localtime(time.time()))
#set timeout
urllib2.socket.setdefaulttimeout(60)
#set proxy
null_proxy_handler = urllib2.ProxyHandler({"http" : 'http://10.159.32.155:8080'})
# null_proxy_handler = urllib2.ProxyHandler({"http" : 'http://10.144.1.10:8080'})
null_proxy_handler = urllib2.ProxyHandler({})
opener = urllib2.build_opener(null_proxy_handler)
urllib2.install_opener(opener)
print 'after install opener'
print time.strftime('%Y-%m-%d_%H:%M:%S',time.localtime(time.time()))
def get_img(link, path):
print 'start to download '+ link
content=[]
try:
print link
req= build_request(link)
print 'start to build_request'
content = urllib2.urlopen(req)
# content = opener.open(req)
print 'start to create soup about '
print content
except urllib2.HTTPError, e:
print 'Error happened'
print e.code
print e.msg
print e.headers
print e.url
print e.fp.read()
#weblink = "http://www.163.com"
weblink = "http://qt.gtimg.cn/q=s_sh601899"
#weblink = "http://blog.chinaunix.net/uid-7448695-id-2626493.html"
mypath = "D:\\python\\gif\\test\\img"
build_urllib2(weblink)
get_img(weblink, mypath)