目的:爬取携程网址
火车 中的单程与中转
单程
url=“https://trains.ctrip.com/trainbooking/search?tocn=%25e5%258d%2583%25e5%25b2%259b%25e6%25b9%2596&fromcn=%25e6%259d%25ad%25e5%25b7%259e&day=2020-12-31”
中转
url=“https://trains.ctrip.com/pages/booking/hubSingleTrip?ticketType=2&fromCn=%25E6%259D%25AD%25E5%25B7%259E&toCn=%25E5%258D%2583%25E5%25B2%259B%25E6%25B9%2596&departDate=2020-12-31”
采用parse.quote()进行url转码
采用csv进行数据保存
random.choice进行选择一个User Agent 自认为这是个不错的习惯
携程单程信息在原网页源代码中
携程中转网址火车中中转信息保存在json文件中(js_url)
LET’S GO
url="https://trains.ctrip.com/pages/booking/hubSingleTrip?ticketType=5&fromCn=%25E6%259D%25AD%25E5%25B7%259E&toCn=%25E6%2596%25B0%25E4%25B9%25A1&departDate=2020-12-30" # 携程单程火车原网址 查询参数 fromcn 出发站 tocn 目的站 departDate 日期
#原网页查询参数需要进行两次url编码(注意点1)
#携程单程信息在原网页源代码中
'''
url="https://trains.ctrip.com/pages/booking/hubSingleTrip?ticketType=2&fromCn=%25E6%259D%25AD%25E5%25B7%259E&toCn=%25E5%258D%2583%25E5%25B2%259B%25E6%25B9%2596&departDate=2020-12-31"
js_url="https://trains.ctrip.com/pages/booking/getTransferList?departureStation=%2525E6%25259D%2525AD%2525E5%2525B7%25259E&arrivalStation=%2525E6%252596%2525B0%2525E4%2525B9%2525A1&departDateStr=2020-12-30"
携程中转网址火车中中转信息保存在json文件中(js_url) 查询参数departureStation arrivalStation departDateStr
类似稍加自己比较即可发现
js_url查询参数需要进行三次url编码(注意点2)
'''
from urllib import parse
import random
from bs4 import BeautifulSoup
import csv
import os
import requests
# print(parse.unquote((parse.unquote("%25E6%259D%25AD%25E5%25B7%259E"))))
fromArea = input("出发站")
toArea = input("目的站")
date=input("年-月-日 :")
if not os.path.exists("D:/携程查找练习"):#创建后续保存文件
os.mkdir("D:/携程查找练习")
class NewsByTransfer():#该类用于爬取中转的信息
def __init__(self):#初始化
self.fromArea=fromArea
self.toArea=toArea
self.date=date
def getOneJsUrl(self,fromArea,toArea,date):#进行js_url拼接
fromArea=parse.quote(parse.quote(fromArea))
departureStation=parse.quote(fromArea)
toArea=parse.quote(parse.quote(toArea))
arrivalStation=parse.quote(toArea)
url="https://trains.ctrip.com/pages/booking/hubSingleTrip?ticketType=5&fromCn="+fromArea+"&toCn="+toArea #原网页
js_url="https://trains.ctrip.com/pages/booking/getTransferList?departureStation="+departureStation+"&arrivalStation="+arrivalStation
js_url=js_url+"&departDateStr="+date
# print(url)
print(js_url)
return js