文章目录
环境搭建
所需版本:python3.7及以下
import numpy as np
import pandas as pd
from tqdm import tqdm,trange
import geopandas as gpd
import transbigdata as tbd
import re
from igraph import *
import copy
基础信息输入
def InfoSet():
'''数据时间确认'''
day=input("请输入要处理数据的日期!")
starttime=input("请输入要处理数据的起始时间!") #时间
endtime=input("请输入要处理数据的终止时间!") #时间
print("你选择的是{0}日{1}时到{2}时的数据,请再次确认!".format(day,starttime,endtime))
'''设置基本路径'''
file_path=input("请输入要处理数据的路径!(具体到存放数据的文件夹)")
return [day,starttime,endtime,file_path]
数据筛选(时间)
def Data_sort(base_info):
'''数据初始化、拆分。初始表头生成'''
total_data = pd.read_csv(base_info[3]+r"\{0}\part{0}.csv".format(base_info[0]),skiprows=1,\
names=['ID','报警','if_empty','顶灯状态','高架','刹车','接收时间','GPS测定时间','经度','纬度','速度','方向','卫星个数'],\
encoding='utf-8', iterator=True, chunksize=1000000,header=None)
safile=pd.DataFrame(columns=('ID','time','lon','lat','if_empty','speed'))
safile.to_csv(base_info[3]+r"\{0}\timeData{0}_{1}-{2}.csv".format(base_info[0],base_info[1],base_info[2]))
'''数据OD提取(运行过程中不要打开文件)'''
times=1
for chunk in total_data:
chunk=chunk.loc[:,['ID', '接收时间', '经度', '纬度', 'if_empty','速度']]
save_file=chunk[(chunk['接收时间']>='2015-04-0{0} {1}:00:00'.format(base_info[0],base_info[1]))&(chunk['接收时间']<='2015-04-0{0} {1}:00:00'.format(base_info[0],base_info[2]))&(chunk['if_empty']==0)]
save_file.sort_values(by=['ID','接收时间'],inplace=True,na_position='first',kind='quicksort')
save_file.to_csv(base_info[3]+r"\{0}\timeData{0}_{1}-{2}.csv".format(base_info[0],base_info[1],base_info[2]),mode="a",header=False)
tqdm.pandas(desc="数据排序进度!")
print("\r{0},进度:{1}/114".format('■'*times,times),end='', flush=True)
times=times+1
'''排序'''
sort_data=pd.read_csv(r"E:\SRTP数据\{0}\timeData{0}_{1}-{2}.csv".format(base_info[0],base_info[1],base_info[2]))
sort_data.sort_values(by=['ID','time'],inplace=True,na_position='first',kind='mergesort')
print("数据排序成功!")
sort_data.index=range(len(sort_data))
sort_data=sort_data.iloc[:,1:]
return sort_data
栅格化数据并统计栅格内数据
def to_geohash(base_info,sort_data,precision_num):
'''将轨迹数据栅格化'''
total_data=sort_data
#依据经纬度geohash编码,精确度选6时,栅格大小约为±0.61km
total_data['geohash'] = tbd.geohash_encode(total_data['lon'],total_data['lat'],precision=precision_num)
total_data.to_csv(base_info[3]+r"\{0}\timeData{0}_{1}-{2}.csv".format(base_info[0],base_info[1],base_info[2]),mode='w')#数据栅格化完成
print("Gps数据栅格化成功!")
'''统计各个栅格的数据'''
#基于geohash编码集计
dataagg = total_data.groupby(['geohash'])['ID'].count().reset_index()
#geohash编码生成栅格矢量图形
dataagg['geometry'] = tbd.geohash_togrid(dataagg['geohash'])
#转换为GeoDataFrame
dataagg = gpd.GeoDataFrame(dataagg)
#geohash编码解码为经纬度
dataagg['lon']=0
dataagg['lat']=0
for i in trange(0,len(dataagg)):
a=re.findall(r"\d+\.?\d*",str(dataagg.loc[i,'geometry']))
dataagg.iloc[i,-2]=(float(a[0])+float(a[4]))/2
dataagg.iloc[i,-1]=(float(a[1])+float(a[3]))/2
#栅格内平均速度计算
dataagg['speed']=0
dataagg.index=dataagg['geohash']
for i in trange(len(total_data)):
sign=total_data.loc[i,'geohash']
speed=float(total_data.loc[i,'speed'])
if(speed<=60):
dataagg.loc[sign,'speed']+=speed
else:
dataagg.loc[sign,'ID']-=1
dataagg.index=range(len(dataagg))
for i in trange(len(dataagg)):
try:
dataagg.loc[i,'speed']=dataagg.loc[i,'speed']/dataagg.loc[i,'ID']
except:
continue
dataagg.to_csv(base_info[3]+r"\{0}\精度{3}dataagg_day{0}_{1}-{2}.csv".format(base_info[0],base_info[1],base_info[2],precision_num),mode='w')#保存栅格数据
print("栅格统计数据保存成功!")
return dataagg
公交站点信息栅格化并统计
def station_count(base_info,dataagg,station):
'''将公交站点栅格化,并统计附近数据点数量'''
station['number']=0
dat=copy(dataagg)
station['geohash']= tbd.geohash_encode(station['lng84'],station['lat84'],6)
dat.index=dat.iloc[:,0]
station.index=station['geohash']
for i in station['geohash']:
try:
station.loc[i,'number']=dat.loc[i,'ID']
except:
continue
station.to_csv(base_info[3]+r"\{0}\station_day{0}_{1}-{2}.csv".format(base_info[0],base_info[1],base_info[2]),encoding='ansi',mode='w')#保存站点数据
轨迹提取与保存
def caltime(a,b):#a.hour*60+a.minute
return abs(a.hour*60+a.minute-b.hour*60-b.minute)
def GetTrip(base_info,total_data):
'''数据路径提取与保存'''
ori_data=total_data
ori_data['time']=pd.to_datetime(ori_data['time'],format="%Y-%m-%d %H:%M:%S")
'''储存列表初始化'''
p=0
a=list([[0, 0, 0]])
a[p][0]=ori_data.iloc[0][0]
a[p][1]=ori_data.iloc[0][1]
a[p].append(ori_data.loc[0,'geohash6'])
a.append([0, 0, 0])
'''提取每条轨迹及其经过的栅格'''
for i in tqdm(range(1,len(ori_data))):
if(caltime(ori_data.iloc[i][1],ori_data.iloc[i-1][1])>3 or ori_data.iloc[i][0]!=ori_data.iloc[i-1][0]):
a[p][2]=ori_data.iloc[i-1][1]
drop_reapt=[]
for x in a[p][3:]:
if x not in drop_reapt:
drop_reapt.append(x)
a[p][3:]=drop_reapt
p+=1 #开始提取下一个轨迹
a.append([0,0,0])
a[p][0]=ori_data.iloc[i][0]
a[p][1]=ori_data.iloc[i][1]
a[p].append(ori_data.loc[i,'geohash6'])
else:
a[p].append(ori_data.loc[i,'geohash6'])
drop_reapt=[]
for x in a[p]:
if x not in drop_reapt:
drop_reapt.append(x)
a[p]=drop_reapt
save_file=pd.DataFrame(a)
save_file.to_csv(base_info[3]+r"\{0}\Day{0}trip2_{1}-{2}.csv".format(base_info[0],base_info[1],base_info[2]))
trip_data=save_file.iloc[:,1:]
return trip_data
邻接矩阵生成与存储
'''正、反邻接矩阵生成'''
def create_g(dataagg,trip_data,base_info):
maxnum=0
for i in trange(0,len(trip_data)):
if maxnum<=len(trip_data.loc[i,:]):
maxnum=len(trip_data.loc[i,:])
df=pd.DataFrame(columns=dataagg['geohash6'],index=dataagg['geohash6'])
df=df.fillna(0) #生成空邻接矩阵
'''空邻接矩阵赋值'''
for i in trange(0,len(trip_data)-1):
j=2
if(pd.isnull(trip_data.iloc[i,j])==True):
continue
else:
a=trip_data.iloc[i,j]
for j in range(3,maxnum):
if(pd.isnull(trip_data.iloc[i,j])==False and j<maxnum):
b=trip_data.iloc[i,j]
num=df.loc[a,b]
df.loc[a,b]=num+1
a=trip_data.iloc[i,j]
else:
break
df.to_csv(base_info[3]+r"\{0}\Triplinjie_day{0}_{1}-{2}.csv".format(base_info[0],base_info[1],base_info[2]))#生成邻接矩阵并存储
print("邻接矩阵存储成功!")
m = df.values.tolist()
a=m
for i in trange(0,len(m)):
for j in range(0,len(df)):
if m[i][j]>0:
m[i][j]=1.0/m[i][j]
M=99999
for i in trange(0,len(m

该博客介绍了城市公交数据处理的完整流程,包括环境搭建、数据筛选、栅格化、公交站点信息处理、轨迹提取、邻接矩阵生成、Dijkstra算法路径优化、公交线路质量评估等步骤。同时,提供了GUI界面搭建,方便用户交互操作。
最低0.47元/天 解锁文章
1401

被折叠的 条评论
为什么被折叠?



