想不到好的题目,就凑合吧,反正是pandas各种折腾数据
data = pd.read_csv('new_jn_data.csv')
data['date'] = data['ctime'].str.split(' ',1).apply(lambda x:x[0])
data['date'] = data['date'].str.replace('/','-')
data['date'] = pd.to_datetime(data['date'])
data['data'] = data['data'].str.replace(',','').astype(float)
data['start_time']=data['ctime'].str.split(' ',1).apply(lambda x:x[1].split(':',1)[0]).astype(int)
#为了方便排序序列,先按cell从小到大,然后按照日期排,都是递增的顺序
data = data[['date','start_time','cell','enodeb','data','sub_net','ne']].sort_values(by='date').sort_values(by='cell')
#索引重排列
data = data.reset_index(drop=True)
#保存数据
# data.to_csv('new_jn_data_with_nan.csv',index=0)
#去掉含有nan的行。
data2 = data.dropna(axis=0,how='any')
#保存数据,不过这个索引还是data的索引,如果想重新建立索引可以:data2 = data.dropna(axis=0,how='any').reset_index(drop=True)
# data2.to_csv('new_jn_data_exclude_nan.csv',index=0)