pandas中DataFrame 数据合并,连接(merge,join,concat):https://blog.youkuaiyun.com/qq_41853758/article/details/83280104
Pandas之drop_duplicates:去除重复项:https://blog.youkuaiyun.com/u010665216/article/details/78559091
#合并两个df,以uid这一列作为关键字(并为一行)
df3 = pd.merge(df1_group, df2_group, how='left', on='uid')
#删去time列的重复值的行,保留遇到的第一个
df_q_new=df_q.drop_duplicates(subset='time', keep='first', inplace=False).copy()
#将inf、nan都删去
df=df_q_valid.replace([np.inf, -np.inf,], np.nan).dropna(subset=["delta_wave"], how="all")
#在x轴上做一条竖线,x=300处
ax.axvline(300, color="red", linestyle="--")
#取分组后的某一组 第9组
df.get_group(9).to_csv('result_9.csv')
#相邻两行做差,将df整体向上移一行
df_valid['timedt']=df_valid['time'].shift(-1)-df_valid['time']-df_valid['con_Time']
df['level'].value_counts()
#对某一列做移位操作
df_stuck_valid['switchHDTimes'] = df_stuck_valid.apply(lambda row: (row['auto_1806']>>20)&15, axis=1)
#读取文件的特定前几行到df里
df_path=pd.read_csv('%s' % pathname, low_memory=False,nrows=2)
把某一列拆分成多列
(1)df_unique['singlestuckstartts1']=df_unique['singlestuckstartts'].map(lambda x:x.split(',')[0])
(2)df_stack_start=df_uni['singlestuckstartts'].str.split(',',expand=True)
#并重新命名列名,原来为0,1,2,3...
df_stack_start.columns = ['start'+str(s) for s in df_stack_start.columns]
#把NaN用0替换
df_stuck_valid_test['auto_1809'].replace(np.nan, 0, inplace=True)
#把某一列改为int类型
df_stuck_valid_test["auto_1809"]=df_stuck_valid_test["auto_1809"].astype('int')