## single variable analysis
woe_decile_qrt <- data.table()
result_qrt <- data.table()
for (name in names(pricing.merge_data)) {
print(name)
for (date in unique(pricing.merge_data$decision_qrt)) {
for(t in unique(pricing.merge_data$group_binning)){
tmp_tb <- pricing.merge_data[ group_binning == t & decision_qrt == date]
# tmp_tb <- model.data
N <- nrow(tmp_tb)
miss_cnt <- nrow(tmp_tb[(is.na(get(name)) | get(name) == 'null')])
miss_pct <- miss_cnt/N
if(length(unique(tmp_tb[,get(name)])) > 1 & !name %in% c('transport_id', 'code_risk_policy') ){
print(name)
# try(result <- data.table(name, t, miss_cnt, WoE_Cal(tmp_tb[target >= 0], event = 'target', segVar = name, cut_num = 5, seg = 5)), silent = T )
t <- try(result_qrt <- data.table(name, t, date,miss_cnt, WoE_Cal(tmp_tb[mob6_cur_dpd30_flag >= 0 & !is.na(get(name))], event = 'mob6_cur_dpd30_flag', segVar = name, cut_num = 5, seg = 5)), silent = T )
# t <- try(result <- data.table(name, miss_cnt, WoE_Cal(tmp_tb[target >= 0 & !is.na(get(name))], event = 'target', segVar = name, cut_num = 5, seg = 5)), silent = T )
if(inherits(t, 'try-error')) next
woe_decile_qrt <- rbind(woe_decile_qrt, result_qrt)
# if(is.numeric(tmp2[!is.na(get(name)), get(name)])){
# try(tmp2[, paste0('release_seg.', name) := cut(age, breaks = paste0(0, unique(tmp2[, get(name)]), Inf))])
# }else{
# try(tmp2[, paste0('release_seg.', name) := get(name)])
# }
}
}
}
}
woe_decile_qrt[, ks := abs(ks)]
iv_sum_qrt <- woe_decile_qrt[, .(iv = round(sum(ifelse(IV == Inf, 0.0, IV)), digits = 4)
, ks = max(ks)
, miss_pct =max(miss_cnt)/sum(Total)
, cnt = sum(Total)
)
, by = c('name', 't', 'date')]
out <- dcast(iv_sum, t + name ~ date, value.var = 'iv')
write.csv(woe_decile_qrt, 'woe_decile_qrt.csv')
woe_decile_all <- data.table()
result_all <- data.table()
for (name in names(pricing.merge_data)) {
print(name)
for(t in unique(pricing.merge_data$group_binning)){
tmp_tb <- pricing.merge_data[ group_binning == t ]
# tmp_tb <- model.data
N <- nrow(tmp_tb)
miss_cnt <- nrow(tmp_tb[(is.na(get(name)) | get(name) == 'null')])
miss_pct <- miss_cnt/N
if(length(unique(tmp_tb[,get(name)])) > 1 & !name %in% c('transport_id', 'code_risk_policy') ){
print(name)
# try(result <- data.table(name, t, miss_cnt, WoE_Cal(tmp_tb[target >= 0], event = 'target', segVar = name, cut_num = 5, seg = 5)), silent = T )
t <- try(result_all <- data.table(name, t, miss_cnt, WoE_Cal(tmp_tb[mob6_ever_dpd30_flag >= 0 & !is.na(get(name))], event = 'mob6_ever_dpd30_flag', segVar = name, cut_num = 5, seg = 5)), silent = T )
# t <- try(result <- data.table(name, miss_cnt, WoE_Cal(tmp_tb[target >= 0 & !is.na(get(name))], event = 'target', segVar = name, cut_num = 5, seg = 5)), silent = T )
if(inherits(t, 'try-error')) next
woe_decile_all <- rbind(woe_decile_all, result_all)
# if(is.numeric(tmp2[!is.na(get(name)), get(name)])){
# try(tmp2[, paste0('release_seg.', name) := cut(age, breaks = paste0(0, unique(tmp2[, get(name)]), Inf))])
# }else{
# try(tmp2[, paste0('release_seg.', name) := get(name)])
# }
}
}
}
woe_decile_all[, ks := abs(ks)]
iv_sum_all <- woe_decile_all[, .(iv = round(sum(ifelse(IV == Inf, 0.0, IV)), digits = 4)
, ks = max(ks)
, miss_pct =max(miss_cnt)/sum(Total)
, cnt = sum(Total)
)
, by = c('name', 't')]
t1 <- dcast(iv_sum_qrt, t + name ~ date, value.var ='iv')
t2 <- iv_sum_all[, c('t', 'name', 'iv'), with = F]
tt <- merge(t2, t1, by = c('t', 'name') )
write.csv(tt, 'out.csv')