__author__ = 'chw'
import urllib2
import urllib
import cookielib
import re
import chardet
URL_BAIDU_INDEX = u'http://www.baidu.com/'
URL_BAIDU_TOKEN = 'https://passport.baidu.com/v2/api/?getapi&tpl=pp&apiver=v3&class=login'
URL_BAIDU_LOGIN = 'https://passport.baidu.com/v2/api/?login'
username = '********'
password = '********'
cj = cookielib.CookieJar()
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
urllib2.install_opener(opener)
reqReturn = urllib2.urlopen(URL_BAIDU_INDEX)
tokenReturn = urllib2.urlopen(URL_BAIDU_TOKEN)
matchVal = re.search(u'"token" : "(?P<tokenVal>.*?)"',tokenReturn.read())
tokenVal = matchVal.group('tokenVal')
postData = {
'username' : username,
'password' : password,
'u' : 'https://passport.baidu.com/',
'tpl' : 'pp',
'token' : tokenVal,
'staticpage' : 'https://passport.baidu.com/static/passpc-account/html/v3Jump.html',
'isPhone' : 'false',
'charset' : 'UTF-8',
'callback' : 'parent.bd__pcbs__ra48vi'
};
postData = urllib.urlencode(postData)
loginRequest = urllib2.Request(URL_BAIDU_LOGIN,postData)
loginRequest.add_header('Accept','text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8');
loginRequest.add_header('Accept-Encoding','gzip,deflate,sdch');
loginRequest.add_header('Accept-Language','zh-CN,zh;q=0.8');
loginRequest.add_header('User-Agent','Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1500.72 Safari/537.36');
loginRequest.add_header('Content-Type','application/x-www-form-urlencoded');
sendPost = urllib2.urlopen(loginRequest);
teibaUrl = 'http://tieba.baidu.com/home/main?id=5890636877d0d0caa4d3dad1d49131&fr=itb'
content = urllib2.urlopen(teibaUrl).read();
print chardet.detect(content)
print content