pandas中透视表与交叉表使用pivot_table()和crosstab()实现
透视表pivot_table()
import pandas as pd
import numpy as np
detail=pd.Dataframe({'order_id':[1,2,3,4,5],
'counts':[1,2,1,3,1],
'amounts':[35,66,23,12,44],
'dishes_name':['铁板牛肉','土豆丝','白饭','辣椒','豆腐']
})
#1.pivot_table:默认聚合方式,自动过滤掉非数值型数据, aggfunc = 'mean',默认返回均值,返回order_id为行索引
detail_pivot=pd.pivot_table(detail[['order_id','counts','dishes_name']],index='order_id')
print(detail_pivot)
#多个index:使用order_id,dishes_name作为行所有索引的透视表
detail_pivot=pd.pivot_table(detail[['order_id','counts','amounts','dishes_name']],index=['order_id','dishes_name'],aggfunc=np.sum)
print(detail_pivot)
#columns 表列分组键:#values显示自己的想要的列,当不存在时显示NAN
detail_pivot=pd.pivot_table(detail[['order_id','counts','amounts','dishes_name']],
index=['order_id'],columns='dishes_name',
aggfunc=np.sum,values='counts')
print(detail_pivot)
#填充缺失值为0,加一个函数 fill_value=0
#添加汇总开关, margins = True
交叉表crosstab()
不同:crosstab函数index,columns,values,输入都是从dataframe中直接取值
detail_crosstab = pd.crosstab(
index=detail['order_id'],
columns=detail['dishes_name'],
values=detail['counts'],
aggfunc=np.sum
)