我运行以下代码# 1、处理信用卡数据异常值
# (1)设置路径,读取信用卡数据
data <- read.csv("D:/cursor-program/R/航空公司/实训3/credit_card.csv",stringsAsFactors = FALSE,fileEncoding = "GBK")
# (2)丢弃逾期、呆账、强制停卡、退票、拒往为1,瑕疵户为2的记录
data1 <- data[-which(
data$逾期 == 1 &
data$呆账 == 1 &
data$强制停卡记录 == 1 &
data$退票 == 1 &
data$拒往记录 == 1 &
data$瑕疵户 == 2
), ]
# (3)丢弃呆账、强制停卡、退票为1,拒往为2的记录
data2 <- data1[-which(
data1$呆账 == 1 &
data1$强制停卡记录 == 1 &
data1$退票 == 1 &
data1$拒往记录 == 2
), ]
# (4)丢弃频率为5、刷卡金额不等于1的数据
data3 <- data2[-which(
data2$频率 == 5 &
data2$月刷卡额 != 1
), ]
# 2、构造信用卡客户风险评价关键特征
# (1)构建历史行为特征:"瑕疵户", "逾期", "呆账", "退票", "拒往记录", "强制停卡记录"求均值
data3$历史行为特征 <- rowMeans(data3[, c("瑕疵户", "逾期", "呆账", "退票", "拒往记录", "强制停卡记录")])
# (2)构建经济风险特征:"借款余额", "个人月收入", "个人月开销", "家庭月收入","月刷卡额"求和
data3$经济风险特征 <- rowSums(data3[, c("借款余额", "个人月收入", "个人月开销", "家庭月收入", "月刷卡额")])
# (3)构建收入风险特征:"职业", "年龄", "住家"重新分成5个级别,求和
library(car)
Job <-recode(data3$职业,"1:7=2;8:14=4;15:17=3;18=5;19=1;20=3;21:22=1")
Age<-recode(data3$年龄,"1=1;2:3=2;4:5=3;6:7=4;8:9=5")
House<- recode(data3$住家,"1=2;2=1;3=5;4=3;5=4;6=1")
data3$收入风险特征 <- Job + Age + House
# (4)标准化历史行为、经济风险、收入风险特征
data4 <- data3[, c("历史行为特征", "经济风险特征", "收入风险特征")]
standardizedData <- scale(data4)
head(standardizedData, 9)
# 3、构建K-Means聚类模型
# (1)训练K-Means聚类模型,类别数为5,给出聚类中心和每类用户数
set.seed(123)
result <- kmeans(standardizedData, 5)
result
round(result$centers, 3) # 查看聚类中心
table(result$cluster)
# (2)画出客户特征雷达图
library(fmsb)
max <- apply(result$centers, 2, max); max
min <- apply(result$centers, 2, min); min
df <- round(data.frame(rbind(max, min, result$centers)),3); df
radarchart(df, seg = 5, plty = c(1:5), vlcex = 1, plwd = 2)
legend(x="topleft", legend = c("客户群1", "客户群2", "客户群3", "客户群4", "客户群5"),
lty = c(1:5), lwd = 2, col =c(1:5), text.width = 0.2,
inset=0.01, cex= 1, box.col = "grey80") 报错Error: unexpected '\\' in "data$\"
最新发布