import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.colors as col
import matplotlib.cm as cm
%matplotlib inline
#1.数据加载
dataOne = pd.read_excel('meal_order_detail.xls', sheet_name = 'meal_order_detail1')
dataTwo = pd.read_excel('meal_order_detail.xls', sheet_name = 'meal_order_detail2')
dataThree = pd.read_excel('meal_order_detail.xls', sheet_name = 'meal_order_detail3')
#2.数据预处理(合并数据,NA处理等),分析数据
data = pd.concat([dataOne, dataTwo, dataThree], axis=0) #按行进行拼接数据
data.dropna(axis=1, inplace=True)#按列删除空数据并修改原数据
#统计卖出菜品的平均价格
print(round(np.mean(data['amounts']), 2))#对某一列求平均值并保留两位小数,tips:numpy求平均值比pandas速度快
#频数统计,什么菜最受欢迎(对菜名进行频数统计,取最大前10名)
dishesCount = data['dishes_name'].value_counts()[:10]
dishesCount.plot(kind='line', color='r')
dishesCount.plot(kind='bar', fontsize=10)
xPos = list(range(len(dishesCount)))
plt.rcParams['font.sans-serif'] = 'SimHei'#中文显示
for a, b in zip(xPos, dishesCount):#a,b返回的分别是索引表序号和值
plt.text(a, b + 0.1, b, ha='center', va='bottom')