import numpy as np
import pandas as pd
df=pd.read_csv('D:/data/双十一淘宝美妆数据.csv')
df
df.info()#查看数值类型,日期类型
df['update_time'].unique()#以数组形式返回唯一值
df.columns
df.describe()
df.isnull().sum()
df.fillna(0,inplace=True)#空值填充
df.duplicated().sum()
df=df.drop_duplicates()
df.reset_index(drop=True,inplace=True)
#重建索引,drop=True;把原来的索引index列去掉,丢掉,(重新排序)
df
df.duplicated().sum()
## 日期格式化,格式为“%Y-%m-%d”
#df['update_time'] = pd.to_datetime(df['update_time']).apply(lambda x:x.strftime('%Y-%m-%d'))
df['update_time'] = pd.to_datetime(df['update_time'])
df['update_time'] = df['update_time'].apply(lambda x:x.strftime('%Y-%m-%d'))
df.info()
df
## 增加一列销售额
df['sale_amount'] = df['price']*df['sale_count']
df[df['sale_count']>0].sort_values(by='sale_count')
#检查一下最小值,最小值最好计算sale_amount是否准确
result1=df.groupby('update_time')['sale_count'].sum().to_dict()
#list(result1.keys())
result2=list(round(float(x/10e6),2) for x in result1.values())
import pyecharts.options as opts
from pyecharts.charts import Line
dayXL=(
Line()
.add_xaxis(xaxis_data=list(result1.keys()))
.add_yaxis(
y_axis=result2,
series_name="销售量(单位:千万件)"
)
.set_global_opts(
yaxis_opts=opts.AxisOpts(
splitline_opts=opts.SplitLineOpts(
is_show=True,
),
name="销量",
),
xaxis_opts=opts.AxisOpts(
name="时间",
),
)
)
dayXL.render('html/dayXL.html')
## 按照日期统计出累计销售量排行TOP10的店铺
datetimes = list(df['update_time'].unique())#unique()以数组的形式返回唯一值,并按从小到大的顺序排序
datetimes.reverse()
datetimes
temp1 = df[df['update_time']<=datetimes[0]].groupby('店名').agg({'sale_count':'sum','sale_amount':'sum'})
temp2 = temp1.sort_values(by='sale_count',ascending=False)[:10].sort_values(by='sale_count')
#temp2.to_dict()['sale_count'].values()
label = list(temp2.to_dict()['sale_count'].keys())
saleCount = list(temp2.to_dict()['sale_count'].values())
saleAmount = list(temp2.to_dict()['sale_amount'].values())
#result1=df.groupby('update_time')['sale_count'].sum().to_dict()
#list(result1.keys())
#result2=list(round(float(x/10e6),2) for x in result1.values())
#
from pyecharts import options as opts
from pyecharts.charts import Bar, Timeline
timeline=(
Timeline()
.add_schema(
is_loop_play=True,
play_interval=500
)
)
for datetime in datetimes:
temp1 = df[df['update_time']<=datetime].groupby('店名').agg({'sale_count':'sum','sale_amount':'sum'})
temp2 = temp1.sort_values(by='sale_count',ascending=False)[:10].sort_values(by='sale_count')
temp3 = temp1.sort_values(by='sale_count',ascending=False)[:10].sort_values(by='sale_amount')
label = list(temp2.to_dict()['sale_count'].keys())
temp4 = temp2.to_dict()['sale_count']
saleCount = list(round(float(x/10e5),2) for x in temp4.values())
#saleCount = list(temp2.to_dict()['sale_count'].values())
temp5 = temp2.to_dict()['sale_amount']
saleAmount = list(round(float(x/10e6),2) for x in temp5.values())
#saleAmount = list(temp2.to_dict()['sale_amount'].values())
bar=(
Bar()
.add_xaxis(label)
.add_yaxis(series_name="销售量(单位:百万件)",y_axis=saleCount,label_opts=opts.LabelOpts(position="right"))
.add_yaxis(series_name="销售额(单位:千万)",y_axis=saleAmount,label_opts=opts.LabelOpts(position="right"))
.set_global_opts(
title_opts=opts.TitleOpts(title="累计销售量排行TOP10的店铺")
)
.reversal_axis()#x轴与y轴转置,变成条形图
)
timeline.add(bar,datetime)
timeline.render('html/timelineChart.html')
from pyecharts import options as opts
from pyecharts.charts import Bar, Timeline
timeline=(
Timeline()
.add_schema(
is_loop_play=True,
play_interval=500
)
)
for datetime in datetimes:
temp1 = df[df['update_time']<=datetime].groupby('店名').agg({'price':'mean','sale_amount':'mean'})
temp2 = temp1.sort_values(by='price',ascending=False)[:20].sort_values(by='price')
#temp3 = temp1.sort_values(by='price',ascending=False)[:20].sort_values(by='sale_amount')
label = list(temp2.to_dict()['price'].keys())
salePrice = list( round(float(x),2) for x in temp2.to_dict()['price'].values())
bar=(
Bar()
.add_xaxis(label)
# .add_yaxis(series_name="销售量(单位:百万件)",y_axis=saleCount,label_opts=opts.LabelOpts(position="right"))
.add_yaxis(series_name="平均价格(单位:元/件)",y_axis=salePrice,label_opts=opts.LabelOpts(position="right"))
.set_global_opts(
title_opts=opts.TitleOpts(title="平均价格排行TOP20的店铺")
)
.reversal_axis()#x轴与y轴转置,变成条形图
)
timeline.add(bar,datetime)
timeline.render('html/Price.html')