用户频繁路径更新

本文探讨了如何分析用户在应用或网站中的频繁路径,并介绍了一种实现实时更新这些路径的方法。通过深入理解用户的交互行为,可以优化用户体验,提高用户留存率。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

package demo

import java.text.SimpleDateFormat
import java.util.ArrayList
import java.util.Calendar
import scala.collection.JavaConversions._
import org.apache.spark.sql.{DataFrame, SparkSession}

import scala.collection.mutable.ArrayBuffer
import scala.util.control.Breaks

object CompareFlowDemo {
  var activateRepeatPath:Set[Int] = Set()   //重复的路径集合
  def main(args:Array[String]) {
    val conf = SparkSession
      .builder()
      .appName("spark dataframe test").master("local")
    val spark =  conf.getOrCreate()

    /**
      * 获取当前日期
      */
    var dateFormat:SimpleDateFormat = new SimpleDateFormat("yyyyMMdd")
    var cal1:Calendar=Calendar.getInstance()
    var thisDay=dateFormat.format(cal1.getTime())
    val nowDay : String = thisDay.format()

    /**
      * 获取三天之前日期
      */
    var cal3:Calendar=Calendar.getInstance()
    cal3.add(Calendar.DATE,-3)
    var threeDay=dateFormat.format(cal3.getTime())
    val threeDayAgo : String = threeDay.format()

    /**
      * 初始化历史路径
      * (路径编号、路径、路径步长、频次、创建时间、最新有效时间、是否生效、分区日期)
      */
    var repeatHisPathData = spark.createDataFrame(List(
      (1,"a->b->c->d->e->", 5, 182,"20200630","20200716",1,"20200717"),
      (2,"h->g->e->", 3, 22,"20200630","20200710",1,"20200717")
    )) toDF("flow_id","paths", "step", "freq","create_dt","active_dt","is_enable","dt")

    /**
      * 初始化当日新增路径
      */
    val repeatNewPathData=new ArrayList[(String,Int,Int)]
    repeatNewPathData.add(("a->b->c->d->e->",5,2))   //路径、步长、频次
    repeatNewPathData.add(("h->g->e->i->",4,2))
    repeatNewPathData.add(("h->g->e->",3,2))
    /**
      * 第一步:新增路径中的重复性判断,并标记
      */
    val afterRepeatCom = isInHistPath(repeatNewPathData,repeatHisPathData)
    println(afterRepeatCom)

    /**
      * 第二步:新增路径中的相似性判断
      */
    val afterSimilarCom = newActivatePath(afterRepeatCom,repeatHisPathData,threeDayAgo)
//    println(afterSimilarCom)

    /**
      * 第三步:对历史路径进行整理
      */
    val maxId= repeatHisPathData.groupBy().max("flow_id").select("max(flow_id)").first().get(0).toString.toInt
    val completeData = noAvailHisPath(afterSimilarCom,repeatHisPathData,maxId,nowDay,threeDayAgo)
//    println(completeData)

//    var resultDF = spark.createDataFrame(completeData) toDF("flow_id","paths", "step", "freq","create_dt","active_dt","is_enable")

    val n= "a->b->c->d->"
    repeatHisPathData.where("paths = '"+n+"'").count()
    val similarFeatures:DataFrame = repeatHisPathData.select("flow_id","paths")
//    similarFeatures.show()
    val x =similarFeatures.first().get(0).toString.toInt
//    println(isInHistPath(delPath,todayPath))


    val desOrderedDF=repeatHisPathData.orderBy(-repeatHisPathData("step"),-repeatHisPathData("freq")) //按照步长、频率降序
    val incOrderedDF=repeatHisPathData.orderBy("step","freq") //按照步长、频率升序
  }

  /**
    * 对路径进行重复性判断
    * @param inputPath    新输入路径
    * @param comparePath  被比较路径
    * @return               新输入路径重复性标记
    */
  def isInHistPath(inputPath:ArrayList[(String,Int,Int)],comparePath:DataFrame):ArrayBuffer[(String,Int,Int,Int,Int,String)] ={
    val pathAddRepeatFlag=new ArrayBuffer[(String,Int,Int,Int,Int,String)]()

    //遍历当日新增路径
    for(ele <- inputPath){
      val path = ele._1
      val thisPath = comparePath.where("paths = '"+ path +"'")   //是否在历史路径中有完全相同的路径
      val isIn = thisPath.count()
      var inputTuple = ("",0,0,0,0,"")

      if (isIn>0){      //如果历史路径中出现相同路径
        val path_id = thisPath.first().get(0).toString.toInt
        val path_create_day = thisPath.first().get(4).toString
        inputTuple = (ele._1,ele._2,ele._3,1,path_id,path_create_day)    //记录路径id、历史创建时间
      }else {
        inputTuple = (ele._1,ele._2,ele._3,0,0,"")  //路径、步长、频次、是否重复(1重复,0不重复)、重复的路径id、创建时间
      }
      pathAddRepeatFlag += inputTuple    //对路径进行重复判断后标记加入集合返回
    }
    pathAddRepeatFlag
  }

  /**
    * 通过相似性判断过滤新增路径是否要添加到有效路径中
    * @param inputPath            新输入路径
    * @param hisComparePath      历史路径
    * @param noActDayNum          路径最新有效日期
    * @return                      确认添加的新增路径集合
    */
  def newActivatePath(inputPath:ArrayBuffer[(String,Int,Int,Int,Int,String)],hisComparePath:DataFrame,noActDayNum:String):ArrayBuffer[(String,Int,Int,Int,Int,String)] ={
    val addPaths =new ArrayBuffer[(String,Int,Int,Int,Int,String)]
    val actData = hisComparePath.where("is_enable = 1 and is_enable = 1 and active_dt>='"+ noActDayNum +"'")
    val rows = actData.collect()

    //对输入路径进行遍历判断
    for(ele <- inputPath){
      var isAdd=true
      val isDeleteLoop = new Breaks;
      isDeleteLoop.breakable{
        //将新增路径与历史有效路径比较
        for(row <- rows){
          //输入路径步长小于历史路径步长,或者输入路径等于历史路径步长但频次小于历史路径频次
          if(ele._2 < row.get(2).toString.toInt || (ele._2 == row.get(2).toString.toInt && ele._3 < row.get(3).toString.toInt)){
            val delete = isSimilar(ele._1,row.get(1).toString,0.8)  //判断路径是否相似
            if(delete){
              isAdd=false
              isDeleteLoop.break()
            }
          }
        }
      }
      if(isAdd) {
        addPaths += ele
        if(ele._5!=0)  {    //表明为重复的路径id
          val a:Int = ele._5
          activateRepeatPath += ele._5
        }
      }
    }
    addPaths
  }

  /**
    * 判断某个路径是否与另一个路径相似
    * @param beJudgedPath   比较路径
    * @param comPath         被比较路径
    * @param factor          比较因子
    * @return                 是否相似
    */
  def isSimilar(beJudgedPath:String,comPath:String,factor:Double): Boolean ={
    val pageIds = beJudgedPath.split("->")
    var isSimilar = false
    var count = 0
    for(i <- 0 until pageIds.length-1){
      if(comPath.contains(pageIds(i)+"->")) count += 1    //如果这个pageid被比较路径包含,记录+1
    }
    val percentage = count/(pageIds.length-1)         //被包含的pageid占比
    if(percentage >= factor){               //占比是否大于等于比较因子值,若大于等于,则判断为相似
      isSimilar=true
    }
    isSimilar
  }


  def noAvailHisPath(inputPath:ArrayBuffer[(String,Int,Int,Int,Int,String)],hisAllPath:DataFrame,maxIdValue:Int,nowDay:String,threeDayAgo:String)={
    val resultPaths =new ArrayBuffer[(Int,String,Int,Int,String,String,Int)]   //(路径id、路径内容、步长、频次、创建时间、有效期、是否有效)
    val rows = hisAllPath.collect()

    for(row <- rows){
      /**
        * 如果当前日期未出现过该路径,对其进行是否过期判断
        * 如果当前日期出现过该路径,不对其判断,在当前路径数据中再判断添加
        */
      if(!activateRepeatPath.contains(row.get(0).toString.toInt)){
        if(row.get(6).toString.toInt==0){
          //如果历史路径为无效,直接添加
          resultPaths += ((row.get(0).toString.toInt,row.get(1).toString,row.get(2).toString.toInt,row.get(3).toString.toInt,row.get(4).toString,row.get(5).toString,0))
        }else{
          //如果历史路径为有效,先判断是否在日期上过期
          var isAdd=true
          val later = isLater(row.get(5).toString,threeDayAgo)       //通过数据有效日期判断是否需要设为失效
          if(later) {  //如果截止当前时间过期,则设为失效
            isAdd=false
            resultPaths +=
              ((row.get(0).toString.toInt,row.get(1).toString,row.get(2).toString.toInt,row.get(3).toString.toInt,row.get(4).toString,row.get(5).toString,0))
          }else{
            //如果日期上未过期,再判断是否与某个当前新增的路径相似
            val isSimilarLoop = new Breaks;
            isSimilarLoop.breakable{
              for(newPath <- inputPath){     //将历史数据遍历与当前新路径做比较
                if(newPath._2 > row.get(2).toString.toInt ||
                  (newPath._2 == row.get(2).toString.toInt) && (newPath._3 > row.get(3).toString.toInt)){
                  val isNouse = isSimilar(row.get(1).toString,newPath._1,0.8)  //判断是否与历史某个路径相似
                  if(isNouse){  //如果相似,将数据设为失效
                    isAdd=false
                    resultPaths +=
                      ((row.get(0).toString.toInt,row.get(1).toString,row.get(2).toString.toInt,row.get(3).toString.toInt,row.get(4).toString,row.get(5).toString,0))
                    isSimilarLoop.break()
                  }
                }
              }
            }
          }

          //如果为过期,且不满足相似性,则设为有效
          if(isAdd) resultPaths +=
            ((row.get(0).toString.toInt,row.get(1).toString,row.get(2).toString.toInt,row.get(3).toString.toInt,row.get(4).toString,row.get(5).toString,1))
        }
      }
    }
    var increadValue = maxIdValue

    //新路径加入集合
    for(newPath <- inputPath){
      if(newPath._5!=0){
        resultPaths += ((newPath._5,newPath._1,newPath._2,newPath._3,newPath._6,nowDay,1))
      }else{
        increadValue=increadValue+1
        resultPaths += ((increadValue,newPath._1,newPath._2,newPath._3,nowDay,nowDay,1))
      }
    }
    resultPaths
  }

  /**
    * 判断是否过期超过三天未出现
    * @param dataDay      数据有效日期
    * @param compareDay   作为比较的标准日期
    * @return              是否设置为过期
    */
  def isLater(dataDay:String,compareDay:String) ={
    var flag=false
    if(dataDay < compareDay) flag=true
    flag
  }
}

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值