pandas学习综合练习
一、2002年-2018年上海机动车拍照拍卖分析
df_car = pd.read_csv('data1/2002年-2018年上海机动车拍照拍卖.csv')
##1
df = df_car.copy()
df['percent'] = df.apply(lambda x:x['Total number of license issued']/x['Total number of applicants'],axis=1)
for index, row in df.iterrows():
if row['percent']<0.05:
print(row['Date'])
break
##2
groupedyear = dfym.groupby('年份')
def f(df):
data = OrderedDict()
data['LP_max'] = df["lowest price "].max()
data['LP_mean'] = df['lowest price '].mean()
data['LP_075'] = df['lowest price '].quantile(q=0.75)
return pd.Series(data)
groupedyear.apply(f)
##3
df = df_car.copy()
df['年份'] = df['Date'].apply(lambda x : 2000+int(x.split('-')[0]))
df['月份'] = df['Date'].apply(lambda x : x.split('-')[1])
newcolumns = ['年份','月份']+list(df.columns[1:-2])
dfym = df.reindex(columns=newcolumns).copy()
dfym.head()
##4
Month = dfym.iloc[0:12,1].to_list()
result = dfym.melt(id_vars=['年份','月份'],value_vars=['Total number of license issued','lowest price ','avg price','Total number of applicants'],value_name='info')
result.pivot_table(index = ['年份','variable'],columns='月份',values='info',fill_value='-').reindex(columns = Month)
##5
print('[最低价、均值]与上月差额不同号的有:')
for index in dfym.index:
try:
signal = (dfym.loc[index,'lowest price ']-dfym.loc[index+1,'lowest price '])*\
(dfym.loc[index,'avg price'] -dfym.loc[index+1,'avg price'])
if signal<0:
print(dfym.loc[index+1,['年份','月份']])
print('\n')
except:
break
##6
df6 = dfym.copy()
df6['发行增益']=0
for index in df6.index:
if index<2:continue
df6.loc[index,'发行增益']= df6.loc[index,'Total number of license issued']-(df6.loc[index-1,'Total number of license issued']+df6.loc[index-2,'Total number of license issued'])/2
print("最小",df6.loc[df6["发行增益"] == df6["发行增益"].min()][['年份','月份']].head())
print("最大",df6.loc[df6["发行增益"] == df6["发行增益"].max()][['年份','月份']].head())
未完待续…