业务提需求,希望可以自动寻找阈值,完成分箱工作,继而找到合适的区间,区别好坏用户。采用R软件的smbinning包,提供自动最优分箱和手动切分两种方式,便于业务同事自动化的切分区间和观察结果。
提供ui和sever脚本,供大家参考。
UI脚本
# 参数调整 --------------------------------------------------------------------
tabPanel("参数调整",
sidebarPanel(
#侧边栏的控制键
width = 3,
#设置侧边栏的宽度
#时间控件
#时间控件
dateRangeInput(
inputId = "para_date",
label = h2(strong(span("应还日期", style = "color:blue"))),
start = Sys.Date() - 7,
#开始日期设置
end = Sys.Date() - 1,
language = "zh-CN",
width = "90%",
separator = "至"
),
textInput(inputId="seq_bin",h2(strong(span("输入分组间隔", style = "color:blue"))), "Sequence"),
## 主维度选择
radioButtons(
#单选框
inputId = "para_feature_choose",
label = h2(strong(span("维度选择", style = "color:blue"))),
choices = list(
"魔蝎运营商" = "para_mx_operator",
"其他" = "para_others"
),
selected = "para_mx_operator"#默认T+1数据
),
## 子维度选择
radioButtons(
#单选框
inputId = "para_branch_dimension",
label = h2(strong(span("维度选择", style = "color:blue"))),
choices = list(
"最近一个月通话时长" = "mx_last1month_call_time",
"最近一个月通话次数" = "mx_last1month_call_nums",
"最近一个月主叫次数" = "mx_last1month_dial_nums",
"最近一个月被叫次数" = "mx_last1month_dialed_nums",
"最近一个月通话号码数目" = "mx_last1month_call_number",
"最近一个月主叫号码数" = "mx_last1month_dial_number",
"最近一个月被叫号码数" = "mx_last1month_dialed_number",
"最近一个月通话归属地数" = "mx_last1month_call_city"
),
selected = "mx_last1month_call_time"
),
## 提交按钮
actionButton(inputId = "para_update", label = "提 交",icon=icon('play-circle')#按钮图案
)#按钮,触发机制一样,第一次看到
),
# Show a summary of the dataset and an HTML table with the
# requested number of observations
mainPanel(tabsetPanel(type = "pills", #tabs标准外观,"pills"对三个标签字段着色
tabPanel("Summary", DT::dataTableOutput("para_Summary")
),
tabPanel("Detail", DT::dataTableOutput("para_bin_Summary")
)
)
)
)
Server脚本
# 参数调整 --------------------------------------------------------------------
datasetInput_para <- eventReactive(
eventExpr = input$para_update,## 提交按钮
valueExpr = {
# 加上进度条显示
progress <-
Progress$new(session,
min = 1,
max = 10
# ,style = "old"
)
on.exit(progress$close())
progress$set(message = '程序执行中')
for (i in 1:5) {
progress$set(value = i)
Sys.sleep(0.5)
}
## 判断不同的维度,分别运行相关的函数
if(input$para_feature_choose == "para_mx_operator"){
para_data <- para_mx_operator_f(input$para_date[1],input$para_date[2])
}else if(input$para_feature_choose == "para_others"){
para_data <- head(mtcars)
}
for (i in 6:10) {
progress$set(value = i)
Sys.sleep(0.5)
}
para_data
},
ignoreNULL = T
)
observe({
## 分析主维度
x <- input$para_feature_choose
# Can use character(0) to remove all choices
if (is.null(x)){
x <- character(0)
}
## 子维度的动态展示
if (x == "para_mx_operator") {
choiceNames <-
c(
"最近一个月通话时长",
"最近一个月通话次数",
"最近一个月主叫次数",
"最近一个月被叫次数",
"最近一个月通话号码数目",
"最近一个月主叫号码数",
"最近一个月被叫号码数",
"最近一个月通话归属地数"
)
choiceValues <-
c(
"mx_last1month_call_time",
# 通话时长
"mx_last1month_call_nums",
# 通话次数
"mx_last1month_dial_nums",
# 主叫次数
"mx_last1month_dialed_nums",
# 被叫次数
"mx_last1month_call_number",
# 通话号码数
"mx_last1month_dial_number",
# 主叫号码数
"mx_last1month_dialed_number",
# 被叫号码数
"mx_last1month_call_city"# 通话归属地
)
} else if (x == "para_others") {
choiceNames <- c("没有备选")
choiceValues <- c("no_choice")
}
# Can also set the label and select items
updateRadioButtons(session, inputId = "para_branch_dimension",
choiceNames = choiceNames,
choiceValues = choiceValues
)
## Summary展示
output$para_Summary <-
DT::renderDataTable(DT::datatable({
## 最近一个月通话时长
if(input$para_branch_dimension == "mx_last1month_call_time"){
analyze_sub_f(data = datasetInput_para(),x = "total_time",y = "user_label")
}else if(input$para_branch_dimension == "mx_last1month_call_nums"){
## 最近一个月通话次数
analyze_sub_f(data = datasetInput_para(),x = "total_num",y = "user_label")
}else if(input$para_branch_dimension == "mx_last1month_dial_nums"){
## 最近一个月主叫次数
analyze_sub_f(data = datasetInput_para(),x = "total_dial_num",y = "user_label")
}else if(input$para_branch_dimension == "mx_last1month_dialed_nums"){
## 最近一个月被叫次数
analyze_sub_f(data = datasetInput_para(),x = "total_dialed_num",y = "user_label")
}else if(input$para_branch_dimension == "mx_last1month_call_number"){
## 最近一个月通话号码数目
analyze_sub_f(data = datasetInput_para(),x = "total_peer_num",y = "user_label")
}else if(input$para_branch_dimension == "mx_last1month_dial_number"){
## 最近一个月主叫号码数
analyze_sub_f(data = datasetInput_para(),x = "total_dial_peer_num",y = "user_label")
}else if(input$para_branch_dimension == "mx_last1month_dialed_number"){
## 最近一个月被叫号码数
analyze_sub_f(data = datasetInput_para(),x = "total_dialed_peer_num",y = "user_label")
}else if(input$para_branch_dimension == "mx_last1month_call_city"){
## 最近一个月被叫号码数
analyze_sub_f(data = datasetInput_para(),x = "total_city_num",y = "user_label")
}
}, rownames = FALSE, options = list(autoWidth = TRUE)))
## Detail展示
output$para_bin_Summary <-
DT::renderDataTable(DT::datatable({
if(input$para_branch_dimension == "mx_last1month_call_time"){
bin_sub_f(data = datasetInput_para(),x = "total_time",y = "user_label",seq_bin = input$seq_bin)
}else if(input$para_branch_dimension == "mx_last1month_call_nums"){
## 最近一个月通话次数
bin_sub_f(data = datasetInput_para(),x = "total_num",y = "user_label",seq_bin = input$seq_bin)
}else if(input$para_branch_dimension == "mx_last1month_dial_nums"){
## 最近一个月主叫次数
bin_sub_f(data = datasetInput_para(),x = "total_dial_num",y = "user_label",seq_bin = input$seq_bin)
}else if(input$para_branch_dimension == "mx_last1month_dialed_nums"){
## 最近一个月被叫次数
bin_sub_f(data = datasetInput_para(),x = "total_dialed_num",y = "user_label",seq_bin = input$seq_bin)
}else if(input$para_branch_dimension == "mx_last1month_call_number"){
## 最近一个月通话号码数目
bin_sub_f(data = datasetInput_para(),x = "total_peer_num",y = "user_label",seq_bin = input$seq_bin)
}else if(input$para_branch_dimension == "mx_last1month_dial_number"){
## 最近一个月主叫号码数
bin_sub_f(data = datasetInput_para(),x = "total_dial_peer_num",y = "user_label",seq_bin = input$seq_bin)
}else if(input$para_branch_dimension == "mx_last1month_dialed_number"){
## 最近一个月被叫号码数
bin_sub_f(data = datasetInput_para(),x = "total_dialed_peer_num",y = "user_label",seq_bin = input$seq_bin)
}else if(input$para_branch_dimension == "mx_last1month_call_city"){
## 最近一个月被叫号码数
bin_sub_f(data = datasetInput_para(),x = "total_city_num",y = "user_label",seq_bin = input$seq_bin)
}
}, rownames = FALSE, options = list(autoWidth = TRUE)))
})
Function脚本
一些计算的逻辑放在这个脚本下,以供Server脚本调用
# 运营商数据 -----------------------------------------------------------------
para_mx_operator_f <-
function(start_date = Sys.Date() - 7,
end_date = Sys.Date() - 1) {
library(RODBC)#使用配置文件,连接Mysql
channel <-
odbcConnect("MySQL", uid = "user0001", pwd = "******")
sql <- paste0("SELECT t3.*,
t4.total_time,
t4.total_num,
t4.total_dial_num,
t4.total_dialed_num,
t4.total_peer_num,
t4.total_dial_peer_num,
t4.total_dialed_peer_num,
t4.total_city_num
FROM
(SELECT t1.id AS order_id,
t2.user_id,
t2.id,
t1.user_label
FROM
(SELECT id,
user_id,
gmt_create,
CASE
WHEN status=8 THEN 1
WHEN status=9 THEN 0
END AS user_label
FROM t_order_info
WHERE status IN (5,6,7,8,9,10,11)
AND date(reim_date)>='",start_date,"'
AND date(reim_date)<='",end_date,"') t1
LEFT JOIN
(SELECT id,
user_id,
gmt_create
FROM t_mx_operator_basic_info) t2 ON t1.user_id=t2.user_id
WHERE date(t1.gmt_create)>=date(t2.gmt_create)
GROUP BY t1.id
ORDER BY t2.gmt_create DESC) t3
LEFT JOIN
(SELECT *
FROM t_mx_operator_call_analysis
WHERE mouth_type=3) t4 ON t3.id=t4.basic_info_id
")
mx_operator_data <-
sqlQuery(channel, sql, stringsAsFactors = FALSE)
return(mx_operator_data)
}
# 分箱函数 --------------------------------------------------------------------
## 子维度分析函数
## 参数:数据框、自变量、因变量、分组间隔序列
bin_sub_f <- function(data,x,y,seq_bin){
## 如果不采取人工划分区间
if(seq_bin == "Sequence"){
library(smbinning)
result_total_num <-
smbinning(df = data,
y = y,
x = x,
p = 0.05)
## 自动分组可能存在一些异常
if (length(result_total_num) > 1) {
data_total_num <-
data.frame(result_total_num$ivtable[, c("Cutpoint", "CntGood", "CntBad", "CntRec", "BadRate", "IV")])
} else{
data_total_num <- data.frame(Information = "No significant splits")
}
}else{
library(smbinning)
## 手动输入间隔
cuts <- as.numeric(unlist(stringr::str_split(seq_bin,",")))
result_total_num <-
smbinning.custom(df = data,
y = y,
x = x,
cuts = cuts)
## 自动分组可能存在一些异常
if (length(result_total_num) > 1) {
data_total_num <-
data.frame(result_total_num$ivtable[, c("Cutpoint", "CntGood", "CntBad", "CntRec", "BadRate", "IV")])
} else{
data_total_num <- data.frame(Information = "No significant splits")
}
}
return(data_total_num)
}
# 子维度数据分析 -----------------------------------------------------------------
analyze_sub_f <- function(data,x,y){
# 百分比转化函数 -----------------------------------------------------------------
turn_percentage <- function(var){
return(paste0(round(var*100,2),"%"))
}
## 正常还款、逾期已还、逾期未还
## 频数统计
freq_userlabel <- data.frame(table(data[,y]))
colnames(freq_userlabel) <- c( "Name","Value")
## 频率统计
prop_userlabel <- data.frame(prop.table(table(data[,y])))
colnames(prop_userlabel) <- c( "Name","Value")
prop_userlabel$Value <- turn_percentage(prop_userlabel$Value)
## 月通话次数的统计
stat_calls_of_lastmonth <- data.frame(Name=paste0(names(summary(data[,x]))),Value=as.vector(summary(data[,x])))
summary_table <- rbind(freq_userlabel,prop_userlabel,stat_calls_of_lastmonth)
return(summary_table)
}
结果展示
Summary
脚本的主要部分都在,可以参看。shiny的中文资料比较少,可以去RStudio的官网看下官方文档,可能方便理解点。
2018-04-05 于杭州