import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
plt.style.use('ggplot')
columns = ['UID','order_dt','product','amount']
df = pd.read_csv(r'C:\Users\think\Desktop\CDNOW_master.txt',encoding = 'gb2312',names = columns, sep='\s+')
df.head()
df.describe()
df['date'] = pd.to_datetime(df.order_dt,format='%Y%m%d')
df['month'] = df.date.values.astype('datetime64[M]')
df.head()
user_group = df.groupby('UID').sum()
user_group.head()
user_group.describe()
df.groupby('month').amount.sum().plot() #默认plot()为折线图;根据哪个指标聚合、图表呈现哪个指标、统计方式如何、选用哪个类型图表
df.groupby('UID').sum().plot.scatter(x='amount',y='product') #选择不同的数据呈现形式 plot.+图表函数
plt.figure(figsize=(12,4)) #设置一个宽12,高4的画图区域
plt.subplot(
数据分析
最新推荐文章于 2024-11-10 19:00:45 发布
