import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
data = pd.read_csv(r'credit_card.csv',sep=',',encoding='gbk')
print(data.columns)
#1.处理信用卡异常值
data1 = data[data['逾期'] != 1]
data2 = data[data['呆账'] != 1]
data3 = data[data['强制停卡记录'] != 1]
data4 = data[data['退票'] != 1]
data5 = data[data['拒往记录'] != 1]
data6 = data[data['瑕疵户'] != 1]
print(data6)
#2.构造信用卡客户风险关键特征
#(1)行为特征;(2)经济风险特征#;(3)收入风险特征;(4)标准化数据
data_action = data[['瑕疵户','逾期','呆账','强制停卡记录','退票','拒往记录']]
data_jingji = data[['借款余额', '个人月收入','个人月开销','家庭月收入','月刷卡额']]
data_income = data[['职业','年龄','住家']]
data_action_std = StandardScaler().fit_transform(data_action)
data_jingji_std = StandardScaler().fit_transform(data_jingji)
data_income_std = StandardScaler().fit_transform(data_income)
#3.构建Kmeans聚类模型(客户分群)
data_kmeans1 = KMeans(n_clusters=5,random_state=123).fit(data_action_std)
#print('聚类中心:',data_kmeans1.cluster_centers_)
r1 = pd.Series(data_kmeans1.labels_).value_counts(
银行信用卡客户价值分析(Python数据分析)
最新推荐文章于 2025-07-03 21:23:10 发布