python和excel
查看列数:data.shape[1]
查看行数:data.shape[0]
查看单元格数:data.size
查看列名:data.columns
查看某列:data['name']
查看某行:data[1:2]
查看某行:data.iloc[0]
查看某列:data.loc[0]
查看多个单元格:data.loc[[0,2],['XXX','YYY']] #可以数字也可以字母
查看某块:data.iloc[0:3,0:3] #只能数字
信息筛选:data[(xxx >= 20) & (yyy <= 30) & (data.YYY.isin(['A','B']))]
删除行:data.drop([0,2,3,4,5])
删除列:data.drop(columns=['xxx','yyy'])
删除空值:data.dropna(how='any') data.dropna(how='all')
插入最后一列:data['new'] = 0
指定位置插入一列:data.insert(1,'new',0)
替换空值:data.fillna(0)
字符串删除空格:df["city"].str.strip()
更换大小写: df["city"] = df['city'].str.lower()
更改列名称:df.rename(columns={"ciity":"newcity"})
删除重复值:df['city'].drop_duplicates()
替换:df['city'].replace('sh','shanghai')
设置索引列:df.set_index('id')
生成连续指定天数的日期:pd.date_range('20000101',periods=10)
排序:
按列排序:data.sort_values(by=['xxx','yyy'],ascending=[0,1])
按索引排序:df.sort_index()
替换某列的特定值:
for i in rane(data.shape[0]):
if data.loc[[i],['xxx']].values >= 10:
data.loc[[i],['xxx']] = 0
合并列:data['new'] = data['xxx'].map(str) + "-" + data['yyy'].map(str)
一列拆分多列:data['xxx_split'] = data['xxx'].str.split('-')
data['xxx_split1'] = data['xxx_split'].str[0]
data['xxx_split2'] = data['xxx_split'].str[1]
内容匹配:data = pd.merge(data1,data2,how='left',on=['xxx','yyy'])
数字格式处理:data['xxx'] = data['xxx'].astype(int)
透视表:
data = pd.pivot_table(data,index=['x','xx'],columns = ['y','yy'],values=['z','zz'],aggfunc=[np.sum,np.mean,len],margins=True)
df2=df.groupby(['月份','区域'])[['销售额','成本']].agg('sum')
df3 = pd.pivot_table(df, values=['销售额', '成本'], index=['月份', '区域'] , aggfunc='sum')
分组:
a.groupby('gender').sum()
a.groupby('gender').size()
vlookup:
禁用科学计数法:pd.set_option('display.float_format', lambda x: '%.2f' % x)
(1)数据透视表
def rank(x):
if x >= 90:
return 'A'
elif x >= 80:
return 'B'
elif x >= 70:
return 'C'
elif x >= 60:
return 'D'
else:
return 'E'
df['rank'] = df['score'].apply(rank)
(2)跨表查询,merge
(3)模糊匹配,近似查找
excel: = vlookup(A2&" *",B:G,6,0)
python:
df1
df3
df3['月折旧']=0
for i in range(len(df3['资产']):
df3['月折旧'][i] = df1[df1['资产'].map(lambda x :df3['资产'][i] in x)]['月折旧']
合并:
contact:
frames = [df1,df2,df3]
result = pd.concat(frames)
append: result = df1.append(df2)
merge: 类似于sql
join:left.join(right,on='key')