任务1 财务数据处理
任务1.1
import pandas as pd
table1=[
"Stkcd",
"Accper",
"Typrep",
"B001000000",
"B001100000",
"B001101000",
"B001200000",
"B001201000",
"B001207000",
"B001209000",
"B001210000",
"B001211000",
"B001212000",
"B001303000",
"B002300000"
]
lr=pd.read_csv('LR.csv')
lr=lr[table1]
a=lr['Typrep']
lr[a=='A'].to_csv("LR_1.csv", encoding="utf-8", index=False)
print(lr[a=='A'].shape[0])
print(lr[a=='A'].shape[1])
任务1.2
import pandas as pd
lr_1=pd.read_csv('LR_1.csv')
zcfz=pd.read_csv('ZCFZ.csv')
stk=pd.read_csv('Stk_ind.csv',encoding='gbk')
t1=zcfz[["Stkcd", "Accper", "Typrep",'A002000000','A001000000']]
lr_2=lr_1.merge(t1,on=["Stkcd", "Accper", "Typrep"],how='left')
t2=stk[['Stkcd','Indnme','Nindnme']]
lr_2=lr_2.merge(t2,on='Stkcd',how='left')
lr_2.to_csv("LR_2.csv", encoding="utf-8", index=False)
print(lr_2.shape[0])
print(lr_2.shape[1])
任务1.3
import pandas as pd
lr_2=pd.read_csv('LR_2.csv')
nan=lr_2.isna().sum()/len(lr_2)
lr_3=lr_2[nan[nan<0.7].index]
lr_3.to_csv('LR_3.csv', encoding="utf-8", index=False)
print(lr_3.shape[1])
任务1.4
import pandas as pd
lr_3=pd.read_csv('LR_3.csv')
lr_4=lr_3.dropna(axis=0, how='any')
lr_4.to_csv('LR_4.csv',index=False,encoding='utf-8')
print(lr_4.shape[0])
任务1.5
import pandas as pd
lr_4=pd.read_csv("LR_4.csv")
lr_4["Accper"] = pd.to_datetime(lr_4["Accper"]).dt.strftime("%Y-%m-%d")
lr_4.to_csv("LR_5.csv", encoding="utf-8", index=False)
任务1.6
import pandas as pd
lr_5=pd.read_csv("LR_5.csv")
lr_5['利润率']=lr_5['B001000000']/lr_5['B001100000']
lr_5['资产负债率']=lr_5['A002000000']/lr_5['A001000000']
lr_5=lr_5[(lr_5['利润率']>=-3)&(lr_5['利润率']<=3)&(lr_5['资产负债率']>=-3)&(lr_5['资产负债率']<=3)]
lr_5.to_csv("LR_new.csv",index=False)
print( lr_5.shape[0])
print(lr_5.shape[1])
print(lr_5[["利润率", "资产负债率"]].head())
任务 2 财务数据指标分析及可视化
任务2.1
import matplotlib
import pandas as pd
import matplotlib.pyplot as plt
font={
'family' : 'Microsoft YaHei',
'weight' : 'bold',
'size' : '14'}
matplotlib.rc('font', **font)
lr_new=pd.read_csv("LR_new.csv")
lr_new["Accper"] = pd.to_datetime(lr_new["Accper"])
t1=lr_new[(lr_new["Accper"].dt.year==2019) & (lr_new["Accper"].dt.month==9)]
pro=t1.groupby("Indnme")["B001000000"].mean()
x1=pro.index
y1=pro.values
plt.figure(figsize=(15,10))
plt.bar(x1, y1,width=0.6)
plt.title('2019年9月各行业大类的利润对比')
plt.xlabel("行业大类")
plt.ylabel("利润总额均值")
plt.xticks(rotation=45,ha="right")
plt.savefig('2019年9月各行业大类的利润对比.png')
plt.show()
lr_new["季度"]=lr_new["Accper"].dt.to_period("Q")
pro_rate=lr_new.groupby(["Indnme",'季度'])["利润率"].mean().unstack(level=0)
pro_rate.index = pro_rate.index.astype(str)
plt.figure(figsize=(15, 7))
for i in pro_rate.columns:
plt.plot(pro_rate.index, pro_rate[i], marker="o", linewidth=2, label=i)
plt.title("2018年1月至2019年9月各行业大类利润率变化", fontsize=14)
plt.xlabel("季度", fontsize=12)
plt.ylabel("利润率均值", fontsize=12)
plt.legend(title="行业大类",bbox_to_anchor=(1.05, 1),loc="upper left")
plt.tight_layout()
plt.savefig("2018年1月至2019年9月各行业大类利润率变化.png")
plt.show()
任务2.2
import matplotlib
import pandas as pd
import matplotlib.pyplot as plt
font={
'family' : 'Microsoft YaHei',
'weight' : 'bold',
'size' : '14'}
matplotlib.rc('font', **font)
lr_new=pd.read_csv("LR_new.csv")
lr_new["Accper"] = pd.to_datetime(lr_new["Accper"])
t1=lr_new[(lr_new["Accper"].dt.year==2019) & (lr_new["Accper"].dt.month==9)]
t1=t1[t1['Indnme']=='金融']
pro=t1.groupby("Nindnme")["利润率"].mean().sort_values(ascending=False)
plt.figure(figsize=(10, 6))
plt.bar(pro.index[:3],pro.values[:3],width=0.6)
plt.xlabel('金融行业细类')
plt.ylabel("利润率均值")
plt.title('2019年该行业各细类利润率对比')
plt.savefig('2019年该行业各细类利润率对比.png')
plt.show()