本例子中将爬取相应日期的车次信息到数据库中,用的是postgresql数据库
import requests
import json
from bs4 import BeautifulSoup
import psycopg2
##打开数据库并登陆
db = psycopg2.connect(host='localhost',port='5432',user=#####',password='#####',database='#####')
cursor = db.cursor()
cursor.execute("CREATE TABLE timetable(出发地 varchar not null,到达地 varchar not null,班次 varchar not null,出发时间 varchar not null,"
"到达时间 varchar not null,总用时 varchar not null,出发日期 varchar not null)")
city_number = {'北京':'BJP','南京':'NJH','合肥':'HFH','上海':'SHH','郑州':'ZZF','徐州':'XCH','武汉':'WHN','石家庄':'SJP','杭州':'HZH','济南':'JNK'}
print("开始输入数据")
for i in city_number:
for j in city_number:
if not i == j:
url = 'https://kyfw.12306.cn/otn/leftTicket/query?leftTicketDTO.train_date=2019-05-08&leftTicketDTO.from_st' \
'ation='+city_number[i]+'&leftTicketDTO.to_station='+city_number[j]+'&purpose_codes=ADULT'
data = requests.get(url)
data_json = data.json()['data']['result']
for k in data_json:
temp_list = k.split('|')
#print(temp_list)
if temp_list[1] == '预订':
#根据分析发现相关位置在切割后位置固定
start_station = city_number[i]
end_station = city_number[j]
vehicle = temp_list[3]
start_time = temp_list[8]
end_time = temp_list[9]
all_time = temp_list[10]
date = temp_list[13]
#用insert将数据插入到数据库中
cursor.execute("insert into timetable values ('{}','{}','{}','{}','{}','{}','{}')"
.format(start_station,end_station,vehicle,start_time,end_time,all_time,date))
db.commit()
db.close()