import os
import pandas as pd
os.chdir(r"D:\pycharm程序文件\练习1")
data = pd.read_excel("data.xlsx")
missing_value_list = []
missing_rate_list = []
for i in range(data.shape[1]):
data1 = data.iloc[:,i]
missing_value = data.shape[0] - data1.count()
missing_value_list.append(missing_value)
missing_rate = missing_value/data.shape[0]
missing_rate_list.append(missing_rate)
colunm_name = []
for item in data.itertuples():
colunm_name.append(item._fields)
break
new_colunm_name = ['保单号', '起保日期', '终止日期', '渠道', '品牌', '车系', '保单性质', '续保年',
'投保类别', '是否本省车牌', '使用性质', '车辆种类', '车辆用途', '新车购置价', '车龄',
'险种', 'NCD', '_18', '客户类别', '被保险人性别', '被保险人年龄', '是否投保车损',
'是否投保盗抢', '是否投保车上人员', '三者险保额', '签单保费', '立案件数', '已决赔款', '是否续保']
for i in new_colunm_name:
for a in missing_rate_list:
print("{}缺失率={}".format(i,a))
break
print("/"*188)
for i in new_colunm_name:
for b in missing_value_list:
print("{}缺失值={}".format(i,b))
break
"""
data1 = data.iloc[:,17]
print(data1.count())
"""