本笔记基于Python3.6
主要涉及到爬虫cookie的处理,特别使用,对今后的爬虫意义深重
主要实现了:
1. 12306登录验证码验证
2,12306账号的登录
3, 12306余票查询
直接贴上源码:
# !/usr/bin/env python
# _*_ coding:utf-8 _*_
import ssl#z证书
import requests
# import cookielib
import json
import urllib
from http import cookiejar
from urllib import request, parse
# ssl._creat_default_https_context=ssl._create_unverified_context#不验证证书
requested=requests.Session()
#获取验证码
def getcode():
url="https://kyfw.12306.cn/passport/captcha/captcha-image?login_site=E&module=login&rand=sjrand&0.22323172816879921"
headers={
'Accept':'image/webp,image/*,*/*;q=0.8',
'Accept-Encoding':'gzip, deflate, sdch, br',
'Accept-Language':'zh-CN,zh;q=0.8',
# 'Connection':'keep-alive',
# 'Host':'kyfw.12306.cn',
# 'Referer':'https://kyfw.12306.cn/otn/login/init',
'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.104 Safari/537.36 Core/1.53.2372.400 QQBrowser/9.5.10801.400'
}
r=requested.get(url=url,headers=headers,verify=False)#将verify置为false是关闭证书验证,防止证书过期
with open("code.png","wb") as fn:
fn.write(r.content)
# print(r.cookies)
return r.cookies
#验证验证码
def checkcode():
url = 'https://kyfw.12306.cn/passport/captcha/captcha-check'
Axis = {"1": "41,50", "2": "108,50", "3": "182,50", "4": "254,50", "5": "43,120", "6": "111,120",
"7": "175,120",
"8": "250,120"}
png = input("请输入验证码:")
Arr = png.split(',')
code = ''
for val in Arr:
code = code + ',' + Axis[str(val)]
code = code[1:]
url="https://kyfw.12306.cn/passport/captcha/captcha-check"
headers={
'Accept': 'application/json, text/javascript, */*; q=0.01',
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language':'zh-CN,zh;q=0.8',
'Connection':'keep-alive',
'Content-Length': '63',
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
'Host': 'kyfw.12306.cn',
# 'Cookie': cookie,
'Origin': 'https://kyfw.12306.cn',
'Referer': 'https://kyfw.12306.cn/otn/login/init',
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.104 Safari/537.36 Core/1.53.2372.400 QQBrowser/9.5.10801.400',
'X-Requested-With': 'XMLHttpRequest'
}
date={
'answer':code,
'login_site':'E',
'rand':'sjrand'
}
r=requested.post(url=url,data=date,headers=headers,verify=False)
# print(r.text)
result=json.loads(r.text)
result=result["result_code"]
return (result)
#登录账户
def login():
url="https://kyfw.12306.cn/passport/web/login"
headers={
'Accept': 'application/json, text/javascript, */*; q=0.01',
'Accept-Encoding': 'gzip, deflate',
'Accept-Language': 'zh-CN,zh;q=0.9 ',
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.26 Safari/537.36 Core/1.63.4793.400 QQBrowser/10.0.745.400',
'X-Requested-With':'XMLHttpRequest'
}
# data="username=491577803%40qq.com&password=zhangli520&appid=otn"
data={
'username': "491577803@qq.com",
"password":"*******",
'appid': 'otn'
}
r=requested.post(url=url,headers=headers,data=data,verify=False)
# print(r.text)
logstat=json.loads(r.text)
logstat=logstat["result_code"]
return logstat
#查询余票
def prebuy():
# 请求url:https://kyfw.12306.cn/otn/passport?redirect=/otn/login/userLogin
url = "https://kyfw.12306.cn/otn/passport?redirect=/otn/login/userLogin"
headers = {
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
"Accept-Encoding": "gzip, deflate",
"Accept-Language": "zh-CN,zh;q=0.9",
"User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.26 Safari/537.36 Core/1.63.4793.400 QQBrowser/10.0.745.400"
}
r = requested.get(url=url, headers=headers)
# 请求url:https://kyfw.12306.cn/passport/web/auth/uamtk
url = "https://kyfw.12306.cn/passport/web/auth/uamtk"
headers = {
"Accept": "application/json, text/javascript, */*; q=0.01",
"Accept-Encoding": "gzip, deflate",
"Accept-Language": "zh-CN,zh;q=0.9",
"User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.26 Safari/537.36 Core/1.63.4793.400 QQBrowser/10.0.745.400",
"X-Requested-With": "XMLHttpRequest"
}
data = {"appid": "otn"}
r = requested.post(url=url, data=data, headers=headers, verify=False)
checkresl = json.loads(r.text)
apptk = checkresl["newapptk"]
checkresl = checkresl['result_code']
if checkresl == 0:
print("验证1通过!!!")
pass
else:
print("验证1失败!!!")
# exit(0)
# 请求 url:https://kyfw.12306.cn/otn/uamauthclient
url = "https://kyfw.12306.cn/otn/uamauthclient"
headers = {
"Accept": "*/*",
"Accept-Encoding": "gzip, deflate",
"Accept-Language": "zh-CN,zh;q=0.9",
"User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.26 Safari/537.36 Core/1.63.4793.400 QQBrowser/10.0.745.400",
"X-Requested-With": "XMLHttpRequest"
}
data = {"tk": apptk}
r = requested.post(url=url, data=data, headers=headers, verify=False)
checkres2 = json.loads(r.text)
checkres2 = checkres2["result_code"]
if checkres2 == 0:
print("验证2通过!!!")
pass
else:
print("验证2失败!!!")
# exit(0)
#请求网站url:https://kyfw.12306.cn/otn/leftTicket/init
url="https://kyfw.12306.cn/otn/leftTicket/init"
headers={
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
'Accept-Encoding': 'gzip, deflate',
'Accept-Language': 'zh-CN,zh;q=0.9',
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.26 Safari/537.36 Core/1.63.4793.400 QQBrowser/10.0.745.400'
}
r=requested.get(url=url,headers=headers,verify=False)
# print(r.text)
#请求网站https://kyfw.12306.cn/otn/HttpZF/GetJS
url="https://kyfw.12306.cn/otn/HttpZF/GetJS"
headers={
"User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.104 Safari/537.36 Core/1.53.3176.400 QQBrowser/9.6.11576.400",
}
r=requested.get(url=url,headers=headers,verify=False)
#请求网站https://kyfw.12306.cn/otn/dynamicJs/qrtqmnp
url="https://kyfw.12306.cn/otn/dynamicJs/qrtqmnp"
headers={"User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.104 Safari/537.36 Core/1.53.3176.400 QQBrowser/9.6.11576.400"}
r=requested.get(url=url,headers=headers,verify=False)
#请求网站https://kyfw.12306.cn/otn/passcodeNew/getPassCodeNew?module=passenger&rand=randp&0.9482634139575532
url="https://kyfw.12306.cn/otn/passcodeNew/getPassCodeNew?module=passenger&rand=randp&0.9482634139575532"
headers={"User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.104 Safari/537.36 Core/1.53.3176.400 QQBrowser/9.6.11576.400"}
r=requested.get(url=url,headers=headers,verify=False)
#请求网站查询余票 url="https://kyfw.12306.cn/otn/leftTicket/queryZ?leftTicketDTO.train_date=2018-03-07&leftTicketDTO.from_station=CDW&leftTicketDTO.to_station=NJH&purpose_codes=ADULT"
url="https://kyfw.12306.cn/otn/leftTicket/queryZ?leftTicketDTO.train_date=2018-03-07&leftTicketDTO.from_station=NAF&leftTicketDTO.to_station=CCT&purpose_codes=ADULT"
headers={
"User-Agent":"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.26 Safari/537.36 Core/1.63.4793.400 QQBrowser/10.0.745.400"
}
r=requested.get(url=url,headers=headers,verify=False)
result=json.loads(r.text)
result=result["data"]
result=result["result"]
print(result)
c=0
for i in result:
temp_list=i.split("|")
for j in temp_list:
print("%s %s"%(j,c))
c=c+1
# print(temp_list)
print("```````````````````````````````````````")
cde0=temp_list[0]
print(cde0)
print("```````````````````````````````````````")
if temp_list[26]=="有":
print("有票")
flag=1
elif temp_list[26] == "无" or temp_list[26]=="":
print("没票")
flag=0
elif int(temp_list[26])>0:
print("有票")
flag=1
else:
print("没票")
flag=0
if flag==0:
exit(0)
# 预定 验证登录状态https://kyfw.12306.cn/otn/login/checkUser
url="https://kyfw.12306.cn/otn/login/checkUser"
headers={
"Accept": "*/*",
"Accept-Encoding": "gzip, deflate",
"Accept-Language": "zh-CN,zh;q=0.9",
"User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.26 Safari/537.36 Core/1.63.4793.400 QQBrowser/10.0.745.400",
"X-Requested-With":"XMLHttpRequest"
}
data={"_json_att":""}
r=requested.post(url=url,headers=headers,verify=False)
print("验证登录状态")
print(r.text)
print(r.cookies)
#请求url https://kyfw.12306.cn/otn/leftTicket/submitOrderRequest
url="https://kyfw.12306.cn/otn/leftTicket/submitOrderRequest"
headers={
"Accept": "*/*",
"Accept-Encoding": "gzip, deflate, br",
"Accept-Language": "zh-CN,zh;q=0.8",
"User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.26 Safari/537.36 Core/1.63.4793.400 QQBrowser/10.0.745.400",
"X-Requested-With": "XMLHttpRequest"
}
cde1="&train_date=2018-03-07&back_train_date=2018-02-27&tour_flag=dc&purpose_codes=ADULT&query_from_station_name=南召&query_to_station_name=长春&undefined"
data={
"secretStr":str(cde0+cde1)
}
print(data)
r=requested.post(url=url,headers=headers,data= data ,verify=False)
print(r.text)
#执行
def run():
cookie=getcode()
result=checkcode()
if result !="4":
print("验证码校验失败!!!")
run()
else:
print("验证码校验成功!!!")
pass
logstat=login()
if logstat==0 :
print("登录成功!!!")
pass
else:
print("登录失败!!!")
run()
prebuy()
run()