collection <- read.csv('E:/03 工作代码/02 R project/风险定价/collection.csv', stringsAsFactors = F)
collection <- data.table(collection)
pricing.result <- merge(collection, t, by = 'transport_id')
pricing.result[, m2_flag := ifelse(overdue_states >= 2, 1, 0)]
pricing.result[, enr_flag := ifelse(overdue_states < 7, 1, 0)]
pricing.result[, gwo_flag := ifelse(overdue_states == 7, 1, 0)]
pricing.result[, mob := 12 * (year(timee) - year()) + month(timee)]
result_sum <- pricing.result[
, .(enr_amt = sum(bal_corpus * enr_flag, na.rm = T)
, enr_cnt = sum(enr_flag)
, coin_M2_amt = sum(enr_flag * m2_flag * bal_corpus, na.rm = T)
, coin_M2_cnt = sum(enr_flag * m2_flag, na.rm = T)
, gwo_cnt = sum(gwo_flag)
, gwo_bal = sum(gwo_flag * bal_corpus)
, adj_enr_amt = sum(ifelse(adj_amt_contract == amt_contract, bal_corpus * enr_flag,
ifelse(bal_corpus * enr_flag > 0, bal_corpus * enr_flag * adj_amt_contract/amt_contract, 0 )), na.rm = T)
, adj_gwo_bal = sum(ifelse(adj_amt_contract == amt_contract, gwo_flag * bal_corpus,
ifelse(adj_amt_contract < gwo_flag * bal_corpus, adj_amt_contract, gwo_flag * bal_corpus )), na.rm = T)
)
, by = c('timee', 'platform','group_binning' ,'price_level')]
x1 <- dcast(result_sum, timee + group_binning ~ price_level, value.var = 'coin_M2_cnt')
write.csv(result_sum, 'x1.csv')
########################################################## 最开始的变量统计
stat_table <- data.table()
for(name in names(pricing.merge_data)){
print(name)
for(group in unique(pricing.merge_data$group_binning)){
for(t in unique(pricing.merge_data$decision_qrt)){
tmp <- pricing.merge_data[flag_loan == 1 & group_binning == group & decision_qrt == t]
if(is.numeric(tmp[, get(name)])){
N <- nrow(tmp)
miss_cnt <- nrow(tmp[(is.na(get(name)) | get(name) == 'null')])
miss_pct <- miss_cnt/N
unique_cnt <- length(unique(tmp[, get(name)], na.rm = T))
maxValue <- max(tmp[, get(name)], na.rm = T)
minValue <- min(tmp[, get(name)], na.rm = T)
avgValue <- mean(tmp[, get(name)], na.rm = T)
P10 <- quantile(tmp[, get(name)], 0.1, na.rm = T)
P30 <- quantile(pricing.merge_data[, get(name)], 0.3, na.rm = T)
P50 <- quantile(tmp[, get(name)], 0.5, na.rm = T)
P70 <- quantile(tmp[, get(name)], 0.7, na.rm = T)
P90 <- quantile(tmp[, get(name)], 0.9, na.rm = T)
P99 <- quantile(tmp[, get(name)], 0.99, na.rm = T)
stat_table <- rbind(stat_table, data.table( group, t, name, N, miss_cnt, miss_pct, unique_cnt, maxValue, minValue, avgValue, P10, P30, P50, P70, P90, P99))
}
}
}
}
write.csv(stat_table, 'variable_profile.csv')