Python3 使用post提交表单登陆豆瓣
import urllib.request
import requests
import http.cookiejar
import zlib
import json
import random
import time
import urllib
from lxml import etree
import ssl
ssl._create_default_https_context = ssl._create_unverified_context
url = 'https://accounts.douban.com/login'
hds = [{'User-Agent','Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6'},
{'User-Agent','Mozilla/5.0 (Windows NT 6.2) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.12 Safari/535.11'},
{'User-Agent','Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.2; Trident/6.0)'},
{'User-Agent','Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:34.0) Gecko/20100101 Firefox/34.0'},
{'User-Agent','Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/44.0.2403.89 Chrome/44.0.2403.89 Safari/537.36'},
{'User-Agent','Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_8; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50'},
{'User-Agent','Mozilla/5.0 (Windows; U; Windows NT 6.1; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50'},
{'User-Agent','Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0'},
{'User-Agent','Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:2.0.1) Gecko/20100101 Firefox/4.0.1'},
{'User-Agent','Mozilla/5.0 (Windows NT 6.1; rv:2.0.1) Gecko/20100101 Firefox/4.0.1'},
{'User-Agent','Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11'},
{'User-Agent','Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; en) Presto/2.8.131 Version/11.11'},
{'User-Agent','Opera/9.80 (Windows NT 6.1; U; en) Presto/2.8.131 Version/11.11'}]
Formdata ={
'redir':'https://www.douban.com/people/175030420/',
'form_email':'xxxxxxxxxx',
'form_password':'xxxxxxxxxx',
'login':'登陆',
}
Formdata = urllib.parse.urlencode(Formdata)
headers = {'User-agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36'}
content = requests.post(url,data=Formdata,headers=headers,verify = False)
con = content.text
con = etree.HTML(con)
captcha_url = con.xpath('//img[@id="captcha_image"]/@src')[0]
captcha_id = con.xpath('//input[@name="captcha-id"]/@value')[0]
filename = '../爬虫数据/captcha.png'
urllib.request.urlretrieve(captcha_url,filename)
captcha = input("please input the captcha:")
captcha_solution = captcha
Formdata ={
'redir':'https://www.douban.com/people/175030420/',
'form_email':'your email',
'form_password':'your password',
'login':u'登陆',
'captcha-solution':captcha,
'captcha-id':captcha_id,
}
con = requests.post(url,data=Formdata,headers=headers,verify = False)
page = con.text
print(page)