Python爬取携程机票代码实例
现在携程的页面是通过接口传递数据的,不能直接使用xpath进行解析,需要模拟调用接口的步骤
dcity是指出发地的城市编码
acity是指目的地的城市编码
其他参数是根据前端页面直接复制过来的,Mac 的Chrome浏览器如下图
def getResponse(fromCity,toCity,date):
cityInfo = judgeCity(fromCity,toCity,date)
dcity = cityInfo.get('dcity')
dcityName = cityInfo.get('dcityName')
acity = cityInfo.get('acity')
acityName = cityInfo.get('acityName')
token = getToken(fromCity,toCity,date)
url = "https://flights.ctrip.com/itinerary/api/12808/products"
headers = {
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.163 Safari/537.36",
"Referer": "https://flights.ctrip.com/itinerary/oneway/"+dcity+"-"+acity+"?date="+date,
"Content-Type": "application/json"}
request_payload = {
"flightWay": "Oneway",
"classType": "ALL",
"hasChild": False,
"hasBaby": False,
"searchIndex": 1,
"airportParams": [
{
"dcity": dcity, "acity": acity, "dcityname": dcityName, "acityname": acityName, "date": date}],
"token": token
}
# post请求
response = requests.post(url, data=json.dumps(request_payload), headers=headers).text
return response
返回的结果可以在下图中找到具体字段,进行json字段的解析
def parseInfo(fromCity,toCity,date):
airplaneInfo = {}
response = getResponse(fromCity,toCity,date)
print(response)
# 很多航班信息在此分一下
routeList = json.loads(response).get('data').get('routeList')
# print(routeList)
# 依次读取每条信息
for route in routeList:
# 判断是否有信息,有时候没有会报错
if len(route.get('legs')) == 1:
legs = route.get('legs')
flight = legs[0].get('flight')
# 提取想要的信息
airlineName = flight.get('airlineName') # 航空公司
flightNumber = flight.get('flightNumber') # 航班编号
departureDate = flight.get('departureDate') # 出发时间
arrivalDate = flight.get('arrivalDate') # 到达时间
craftTypeName = flight.get('craftTypeName') # 飞机类型
craftTypeKindDisplayName = flight.get('craftTypeKindDisplayName') # 飞机型号:大型;中型,小型
departureCityName = flight.get('departureAirportInfo').get('cityName') # 出发城市
departureAirportName = flight.get('departureAirportInfo').get('airportName') # 出发机场名称
departureTerminalName = flight.get('departureAirportInfo').get('terminal').get('name') # 出发机场航站楼
arrivalCityName = flight.get('arrivalAirportInfo').get('cityName') # 到达城市
arrivalAirportName = flight.get('arrivalAirportInfo').get('airportName') # 到达机场名称
arrivalTerminalName = flight.get('arrivalAirportInfo').get('terminal').get('name') # 到达机场航站楼
punctualityRate = flight.get('punctualityRate') # 到达准点率
mealType = flight.get('mealType') # 是否有餐食 None:代表无餐食,Snack:代表小食,Meal:代表含餐食
cabins = legs[0].get('cabins')
price = cabins[0].get('price').get('price') # 标准价格
rate = cabins[0].get('price').get('rate') # 折扣率
seatCount = cabins[0].get('seatCount') # 剩余座位数
遇到的问题,是token的问题,在输入出发地和目的地时,发现token会改变,然后我尝试着去获取到token,找到了一个获取token的接口,如下图:
def getTokenResponse(fromCity,toCity,date):
cityInfo = judgeCity(fromCity,toCity,date)
dcity = cityInfo.get('dcity')
dcityname = cityInfo.get('dcityname')
acity = cityInfo.get('acity')
acityname = cityInfo.get('acityname')
referer = "https://flights.ctrip.com/itinerary/oneway/" + dcity + "-" + acity + "?date=" + date
url = "https://flights.ctrip.com/itinerary/api/12808/records"
headers = {
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.163 Safari/537.36",
"Referer": referer,
"Content-Type": "application/json"}
request_payload = {
"vid": "1586849490183.3km93i"
}
print(referer)
# post请求
response = requests