Shiny07---用Shiny完成分箱调参工作

本文介绍了一种利用R语言中的smbinning包进行自动最优分箱的方法,通过UI界面简化了参数配置过程,并展示了如何根据不同维度(如通话时长、通话次数等)对数据进行分箱处理以区分好用户与坏用户。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

业务提需求,希望可以自动寻找阈值,完成分箱工作,继而找到合适的区间,区别好坏用户。采用R软件的smbinning包,提供自动最优分箱和手动切分两种方式,便于业务同事自动化的切分区间和观察结果。
提供ui和sever脚本,供大家参考。

UI脚本

# 参数调整 --------------------------------------------------------------------
tabPanel("参数调整", 
         sidebarPanel(
           #侧边栏的控制键
           width = 3,
           #设置侧边栏的宽度
           #时间控件
           #时间控件
           dateRangeInput(
             inputId = "para_date",
             label = h2(strong(span("应还日期", style = "color:blue"))),
             start = Sys.Date() - 7,
             #开始日期设置
             end = Sys.Date() - 1,
             language = "zh-CN",
             width = "90%",
             separator = "至"
           ),
           textInput(inputId="seq_bin",h2(strong(span("输入分组间隔", style = "color:blue"))), "Sequence"),
           ## 主维度选择
           radioButtons(
             #单选框
             inputId = "para_feature_choose",
             label = h2(strong(span("维度选择", style = "color:blue"))),
             choices = list(
               "魔蝎运营商" = "para_mx_operator",
               "其他" = "para_others"
             ),
             selected = "para_mx_operator"#默认T+1数据
           ),
           ## 子维度选择
           radioButtons(
             #单选框
             inputId = "para_branch_dimension",
             label = h2(strong(span("维度选择", style = "color:blue"))),
             choices = list(
               "最近一个月通话时长" = "mx_last1month_call_time",
               "最近一个月通话次数" = "mx_last1month_call_nums",
               "最近一个月主叫次数" = "mx_last1month_dial_nums",
               "最近一个月被叫次数" = "mx_last1month_dialed_nums",
               "最近一个月通话号码数目" = "mx_last1month_call_number",
               "最近一个月主叫号码数" = "mx_last1month_dial_number",
               "最近一个月被叫号码数" = "mx_last1month_dialed_number",
               "最近一个月通话归属地数" = "mx_last1month_call_city"
             ),
             selected = "mx_last1month_call_time"
           ),
           ## 提交按钮 
           actionButton(inputId = "para_update", label = "提 交",icon=icon('play-circle')#按钮图案
           )#按钮,触发机制一样,第一次看到
         ),
         # Show a summary of the dataset and an HTML table with the
         # requested number of observations
         mainPanel(tabsetPanel(type = "pills", #tabs标准外观,"pills"对三个标签字段着色
                               tabPanel("Summary", DT::dataTableOutput("para_Summary")
                               ), 
                               tabPanel("Detail", DT::dataTableOutput("para_bin_Summary")
                               )
         )
         )
)

Server脚本

# 参数调整 --------------------------------------------------------------------
  datasetInput_para <- eventReactive(
    eventExpr = input$para_update,## 提交按钮
    valueExpr = {
      # 加上进度条显示
      progress <-
        Progress$new(session,
                     min = 1,
                     max = 10
                     # ,style = "old"
        )
      on.exit(progress$close())
      progress$set(message = '程序执行中')
      for (i in 1:5) {
        progress$set(value = i)
        Sys.sleep(0.5)
      }
      ## 判断不同的维度,分别运行相关的函数
      if(input$para_feature_choose == "para_mx_operator"){
        para_data <- para_mx_operator_f(input$para_date[1],input$para_date[2])
      }else if(input$para_feature_choose == "para_others"){
        para_data <- head(mtcars)
      }
      for (i in 6:10) {
        progress$set(value = i)
        Sys.sleep(0.5)
      }
      para_data
    },
    ignoreNULL = T
  )
  observe({
    ## 分析主维度
    x <- input$para_feature_choose
    # Can use character(0) to remove all choices
    if (is.null(x)){
      x <- character(0) 
    }
    ## 子维度的动态展示
    if (x == "para_mx_operator") {
      choiceNames <-
        c(
          "最近一个月通话时长",
          "最近一个月通话次数",
          "最近一个月主叫次数",
          "最近一个月被叫次数",
          "最近一个月通话号码数目",
          "最近一个月主叫号码数",
          "最近一个月被叫号码数",
          "最近一个月通话归属地数"
        )
      choiceValues <-
        c(
          "mx_last1month_call_time",
          # 通话时长
          "mx_last1month_call_nums",
          # 通话次数
          "mx_last1month_dial_nums",
          # 主叫次数
          "mx_last1month_dialed_nums",
          # 被叫次数
          "mx_last1month_call_number",
          # 通话号码数
          "mx_last1month_dial_number",
          # 主叫号码数
          "mx_last1month_dialed_number",
          # 被叫号码数
          "mx_last1month_call_city"# 通话归属地
        )
    } else if (x == "para_others") {
      choiceNames <- c("没有备选")
      choiceValues <- c("no_choice")
    }
    # Can also set the label and select items
    updateRadioButtons(session, inputId = "para_branch_dimension",
                       choiceNames = choiceNames,
                       choiceValues = choiceValues
    )
    
    ## Summary展示
    output$para_Summary <-
      DT::renderDataTable(DT::datatable({
        ## 最近一个月通话时长
        if(input$para_branch_dimension == "mx_last1month_call_time"){
          analyze_sub_f(data = datasetInput_para(),x = "total_time",y = "user_label")
        }else if(input$para_branch_dimension == "mx_last1month_call_nums"){
          ## 最近一个月通话次数
          analyze_sub_f(data = datasetInput_para(),x = "total_num",y = "user_label") 
        }else if(input$para_branch_dimension == "mx_last1month_dial_nums"){
          ## 最近一个月主叫次数
          analyze_sub_f(data = datasetInput_para(),x = "total_dial_num",y = "user_label") 
        }else if(input$para_branch_dimension == "mx_last1month_dialed_nums"){
          ## 最近一个月被叫次数
          analyze_sub_f(data = datasetInput_para(),x = "total_dialed_num",y = "user_label") 
        }else if(input$para_branch_dimension == "mx_last1month_call_number"){
          ## 最近一个月通话号码数目
          analyze_sub_f(data = datasetInput_para(),x = "total_peer_num",y = "user_label") 
        }else if(input$para_branch_dimension == "mx_last1month_dial_number"){
          ## 最近一个月主叫号码数
          analyze_sub_f(data = datasetInput_para(),x = "total_dial_peer_num",y = "user_label") 
        }else if(input$para_branch_dimension == "mx_last1month_dialed_number"){
          ## 最近一个月被叫号码数
          analyze_sub_f(data = datasetInput_para(),x = "total_dialed_peer_num",y = "user_label") 
        }else if(input$para_branch_dimension == "mx_last1month_call_city"){
          ## 最近一个月被叫号码数
          analyze_sub_f(data = datasetInput_para(),x = "total_city_num",y = "user_label") 
        }
      }, rownames = FALSE, options = list(autoWidth = TRUE)))
    ## Detail展示
    output$para_bin_Summary <-
      DT::renderDataTable(DT::datatable({
        if(input$para_branch_dimension == "mx_last1month_call_time"){
          bin_sub_f(data = datasetInput_para(),x = "total_time",y = "user_label",seq_bin = input$seq_bin)
        }else if(input$para_branch_dimension == "mx_last1month_call_nums"){
          ## 最近一个月通话次数
          bin_sub_f(data = datasetInput_para(),x = "total_num",y = "user_label",seq_bin = input$seq_bin) 
        }else if(input$para_branch_dimension == "mx_last1month_dial_nums"){
          ## 最近一个月主叫次数
          bin_sub_f(data = datasetInput_para(),x = "total_dial_num",y = "user_label",seq_bin = input$seq_bin) 
        }else if(input$para_branch_dimension == "mx_last1month_dialed_nums"){
          ## 最近一个月被叫次数
          bin_sub_f(data = datasetInput_para(),x = "total_dialed_num",y = "user_label",seq_bin = input$seq_bin) 
        }else if(input$para_branch_dimension == "mx_last1month_call_number"){
          ## 最近一个月通话号码数目
          bin_sub_f(data = datasetInput_para(),x = "total_peer_num",y = "user_label",seq_bin = input$seq_bin) 
        }else if(input$para_branch_dimension == "mx_last1month_dial_number"){
          ## 最近一个月主叫号码数
          bin_sub_f(data = datasetInput_para(),x = "total_dial_peer_num",y = "user_label",seq_bin = input$seq_bin) 
        }else if(input$para_branch_dimension == "mx_last1month_dialed_number"){
          ## 最近一个月被叫号码数
          bin_sub_f(data = datasetInput_para(),x = "total_dialed_peer_num",y = "user_label",seq_bin = input$seq_bin) 
        }else if(input$para_branch_dimension == "mx_last1month_call_city"){
          ## 最近一个月被叫号码数
          bin_sub_f(data = datasetInput_para(),x = "total_city_num",y = "user_label",seq_bin = input$seq_bin) 
        }
      }, rownames = FALSE, options = list(autoWidth = TRUE)))
    
  })

Function脚本

一些计算的逻辑放在这个脚本下,以供Server脚本调用


# 运营商数据 -----------------------------------------------------------------
para_mx_operator_f <-
  function(start_date = Sys.Date() - 7,
           end_date = Sys.Date() - 1) {
    library(RODBC)#使用配置文件,连接Mysql
    channel <-
      odbcConnect("MySQL", uid = "user0001", pwd = "******")
    sql <- paste0("SELECT t3.*,
    t4.total_time,
    t4.total_num,
    t4.total_dial_num,
    t4.total_dialed_num,
    t4.total_peer_num,
    t4.total_dial_peer_num,
    t4.total_dialed_peer_num,
    t4.total_city_num
    FROM
    (SELECT t1.id AS order_id,
    t2.user_id,
    t2.id,
    t1.user_label
    FROM
    (SELECT id,
    user_id,
    gmt_create,
    CASE
    WHEN status=8 THEN 1
    WHEN status=9 THEN 0
    END AS user_label
    FROM t_order_info
    WHERE status IN (5,6,7,8,9,10,11)
    AND date(reim_date)>='",start_date,"'
    AND date(reim_date)<='",end_date,"') t1
    LEFT JOIN
    (SELECT id,
    user_id,
    gmt_create
    FROM t_mx_operator_basic_info) t2 ON t1.user_id=t2.user_id
    WHERE date(t1.gmt_create)>=date(t2.gmt_create)
    GROUP BY t1.id
    ORDER BY t2.gmt_create DESC) t3
    LEFT JOIN
    (SELECT *
    FROM t_mx_operator_call_analysis
    WHERE mouth_type=3) t4 ON t3.id=t4.basic_info_id
    ")
    mx_operator_data <-
      sqlQuery(channel, sql, stringsAsFactors = FALSE)
    return(mx_operator_data)
  }


# 分箱函数 --------------------------------------------------------------------
## 子维度分析函数
## 参数:数据框、自变量、因变量、分组间隔序列
bin_sub_f <- function(data,x,y,seq_bin){
  ## 如果不采取人工划分区间
  if(seq_bin == "Sequence"){
    library(smbinning)
    result_total_num <-
      smbinning(df = data,
                y = y,
                x = x,
                p = 0.05)
    ## 自动分组可能存在一些异常
    if (length(result_total_num) > 1) {
      data_total_num <-
        data.frame(result_total_num$ivtable[, c("Cutpoint", "CntGood", "CntBad", "CntRec", "BadRate", "IV")])
    } else{
      data_total_num <- data.frame(Information = "No significant splits")
    }
  }else{
    library(smbinning)
    ## 手动输入间隔
    cuts <- as.numeric(unlist(stringr::str_split(seq_bin,",")))
    result_total_num <-
      smbinning.custom(df = data,
                       y = y,
                       x = x,
                       cuts = cuts)
    ## 自动分组可能存在一些异常
    if (length(result_total_num) > 1) {
      data_total_num <-
        data.frame(result_total_num$ivtable[, c("Cutpoint", "CntGood", "CntBad", "CntRec", "BadRate", "IV")])
    } else{
      data_total_num <- data.frame(Information = "No significant splits")
    }
  }
  return(data_total_num)
}

# 子维度数据分析 -----------------------------------------------------------------
analyze_sub_f <- function(data,x,y){
  # 百分比转化函数 -----------------------------------------------------------------
  turn_percentage <- function(var){
    return(paste0(round(var*100,2),"%"))
  }
  ## 正常还款、逾期已还、逾期未还
  ## 频数统计
  freq_userlabel <- data.frame(table(data[,y]))
  colnames(freq_userlabel) <- c( "Name","Value")
  ## 频率统计
  prop_userlabel <- data.frame(prop.table(table(data[,y])))
  colnames(prop_userlabel) <- c( "Name","Value")
  prop_userlabel$Value <- turn_percentage(prop_userlabel$Value)
  ## 月通话次数的统计
  stat_calls_of_lastmonth <- data.frame(Name=paste0(names(summary(data[,x]))),Value=as.vector(summary(data[,x])))
  summary_table <- rbind(freq_userlabel,prop_userlabel,stat_calls_of_lastmonth)
  return(summary_table)
}
  

结果展示

这里写图片描述
这里写图片描述

Summary

脚本的主要部分都在,可以参看。shiny的中文资料比较少,可以去RStudio的官网看下官方文档,可能方便理解点。

                   2018-04-05 于杭州

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值