自定义累加器代码解析

                                            今天也要努力学习

 package com.bjsxt.scalaspark.core.examples

import org.apache.spark.util.AccumulatorV2
import org.apache.spark.{SparkConf, SparkContext}

/**
  * 自定义累加器:
  * 自定义累加器需要继承extends AccumulatorV2[String, String],第一个为输入类型,第二个为输出类型
  * val myVectorAcc = new VectorAccumulatorV2
  *  方法 zero() 要与reset() 值保持一致
  */
case class Info(var totalCount:Int,var totalAge :Int)


class SelfAccumlator extends AccumulatorV2 [Info,Info]{

  /**
    * 初始化累计器的值,这个值是最后要在merge合并的时候累加到最终结果内
    */
  private var result: Info = new Info(0,0)
//  println(s" in first result = $result  end。")

  /**
    * 返回累计器是否是零值。 例如: Int 类型累加器 0 就是零值,对于List 类型数据 Nil 就是零值。
    * 这里判断时,要与方法reset()初始的值一致,初始判断时要返回true. 内部会在每个分区内自动调用判断。
    */
  override def isZero: Boolean = {
    println("判断 累加器是否是初始值***"+(result.totalAge == 0 && result.totalCount ==0)+" ***end")
    result.totalCount ==100 && result.totalAge == 200
  }

  /**
    * 复制一个新的累加器,在这里就是如果用到了就会复制一个新的累加器。
    */
  override def copy(): AccumulatorV2[Info, Info] = {
    val newAccumulator = new SelfAccumlator()
    newAccumulator.result = this.result
    newAccumulator
  }

  /**
    * 重置AccumulatorV2中的数据,这里初始化的数据是在RDD每个分区内部,每个分区内的初始值。
    */
  override def reset(): Unit = {
//    println("重置累加器中的值")
    result = new Info(100,200)
  }

  /**
    * 每个分区累加数据
    * 这里是拿着初始的result值和每个分区的数据累加
    */
  override def add(v: Info): Unit = {
    println(s" in add method : v = $v ,v.totalCount = ${v.totalCount},v.totalAge = ${v.totalAge}")
    result.totalAge += v.totalAge
    result.totalCount += v.totalCount
  }

  /**
    * 分区之间总和累加数据
    *
    * 这里拿着初始的result值 和每个分区最终的结果累加
    *
    */
  override def merge(other: AccumulatorV2[Info, Info]): Unit = other match {
    case o : SelfAccumlator => {
      println(s" in merge method : o = $o ")
      result.totalCount +=o.result.totalCount
      result.totalAge +=o.result.totalAge
    }
    case _ => throw new UnsupportedOperationException(
      s"Cannot merge ${this.getClass.getName} with ${other.getClass.getName}")
  }

  /**
    *  累计器堆外返回的最终的结果
    */
  override def value: Info = result
}


object DefindSelfAccumulator {
  def main(args: Array[String]): Unit = {
    val conf = new SparkConf()
    conf.setMaster("local")
    conf.setAppName("selfAccumulator")
    val sc = new SparkContext(conf)

    val nameList = sc.parallelize(List[String](
      "A 1","B 2","C 3",
      "D 4","E 5","F 6",
      "G 7","H 8","I 9"
    ),3)
    println("nameList RDD partition length = "+nameList.getNumPartitions)
    /**
      * 初始化累加器
      *
      */
    val myAccumulator = new SelfAccumlator()
    sc.register(myAccumulator, "First Accumulator")

    val transInfo = nameList.map(one=>{
      val info = Info(1,one.split(" ")(1).toInt)
      myAccumulator.add(info)
      info
    })

    transInfo.count()

    println(s"accumulator totalCount = ${myAccumulator.value.totalCount}, totalAge = ${myAccumulator.value.totalAge}")





  }
}
package com.wuyue.examples

import org.apache.spark.util.AccumulatorV2
import org.apache.spark.{SparkConf, SparkContext}

class MyAcc extends AccumulatorV2[String,String]{

  //  var returnResult = "ppp"
  var returnResult = ""


  override def isZero: Boolean = {
    "X".equals(returnResult)
  }

  override def copy(): AccumulatorV2[String, String] = {
    val myAcc = new MyAcc
    myAcc.returnResult = this.returnResult
    myAcc
  }

  override def reset(): Unit = {
    returnResult = "X"
  }

  override def add(v: String): Unit = {
    returnResult += v
  }


  override def merge(other: AccumulatorV2[String, String]): Unit = {
    returnResult += other.asInstanceOf[MyAcc].returnResult
  }

  override def value: String = returnResult
}

object DefindSelfAccumulator2 {
  def main(args: Array[String]): Unit = {
    val conf = new SparkConf()
    conf.setMaster("local")
    conf.setAppName("selfAccumulator2")
    val sc = new SparkContext(conf)
    val infos = sc.parallelize(List[String]("a","b","c","d","e","f"),5)

    /**
      * 定义累计器
      */
    val myacc = new MyAcc()
    sc.register(myacc,"myacc")

    infos.map(one=>{
      myacc.add(one)
    }).count()
    println(s" 累计器值 = ${myacc.value}")
  }
}

 

package com.wuyue.spark.util;

import com.wuyue.spark.conf.Constants;
import org.apache.spark.util.AccumulatorV2;

/**
 * 这个定义的累加器是用来更新String的,也可以更新INT,对象等等
 */
public class SelfDefineAccumulator extends AccumulatorV2<String,String> {
    String returnResult = "";

    /**
     * 这个方法的值需要保持与reset方法中保持一样 相当于检查初始状态是否正确
     * @return
     */
    @Override
    public boolean isZero() {
        return "normalMonitorCount=0|normalCameraCount=0|abnormalMonitorCount=0|abnormalCameraCount=0|abnormalMonitorCameraInfos= ".equals(returnResult);
    }


    /**
     * 就是返回一个新的累加器,在每个分区会调用这个方法
     * @return
     */
    @Override
    public AccumulatorV2<String, String> copy() {
        SelfDefineAccumulator acc = new SelfDefineAccumulator();
        acc.returnResult  = this.returnResult;
        return acc;
    }

    /**
     * 给每个分区赋予初始值  后面的isZero要与他保持一致
     */
    @Override
    public void reset() {
        returnResult = Constants.FIELD_NORMAL_MONITOR_COUNT+"=0|"
                + Constants.FIELD_NORMAL_CAMERA_COUNT+"=0|"
                + Constants.FIELD_ABNORMAL_MONITOR_COUNT+"=0"
                + Constants.FIELD_ABNORMAL_CAMERA_COUNT+"=0"
                + Constants.FIELD_ABNORMAL_MONITOR_CAMERA_INFOS+"= ";
    }

    /**
     * 分区内调用进行累加 基础是分区的初始值 reset给定了的
     * @param v
     */
    @Override
    public void add(String v) {
        returnResult = myAdd(returnResult,v);
    }

    /**
     *自定义分区的累加方法
     */
    private String myAdd(String str1, String str2) {
        if (StringUtils.isEmpty(str1)) {
            //如若returnresult的初始值为空 就直接返回str2
            return str2;
        }
        /**
         *如果returnresult有值了,开始进行累加,上面也是累加 累加了空值而已
         * str1数据格式:
         * normalMonitorCount=0|normalCameraCount=0|abnormalMonitorCount=1|abnormalCameraCount=3|abnormalMonitorCameraInfos= ~"0001":07553,07554,07556~"0001":07553,07554,07556~"0001":07553,07554,07556~"0001":07553,07554,07556
         */
        //  两个\\是转义符啊   就是按照|切割
        String[] valArr = str2.split("\\|");
        for (String string : valArr){
            //切割后格式 一个数组元素对应的 normalMonitorCount 0
            String[] fieldAndValArr = string.split("=");
            String field = fieldAndValArr[0];//normalMonitorCount
            String value = fieldAndValArr[1];//0
            /**
             * 特殊情况 最后一个infos本身是一个字符串
             * //|abnormalMonitorCameraInfos= ~"0001":07553,07554,07556~"0001":07553,07554,07556~"0001":07553,07554,07556~"0001":07553,07554,07556
             *他切割出来应该是  abnormalMonitorCameraInfos   ~"0001":07553,07554,07556~"0001":07553,07554,07556~。。。。。。
             */
            String oldVal = StringUtils.getFieldFromConcatString(str1,"\\|",field);
            if (oldVal!=null){
                //只有这个字段是详细信息,是拼接字段,单独拿出来处理
                if (Constants.FIELD_ABNORMAL_MONITOR_CAMERA_INFOS.equals(field)){
                    if (value.startsWith("~")){
                        value = value.substring(2);//从下标2开始剪切
                    }
                    str1 = StringUtils.setFieldInConcatString(str1, "\\|", field, oldVal + "~" + value);                }
            }else{
                //其他部分是int类型,不需要拼接,直接加减就可以了
                int newVal = Integer.parseInt(oldVal)+Integer.parseInt(value);
                str1 = StringUtils.setFieldInConcatString(str1, "\\|", field, String.valueOf(newVal));

            }

        }

        //就是返回累加后的returnresult
        return str1;
    }

    /**
     * 将每个分区的最终处理结果和初始值 returnresult 相加 得到最终的返回结果
     * @param other
     */
    @Override
    public void merge(AccumulatorV2<String, String> other) {
    SelfDefineAccumulator accumulator = (SelfDefineAccumulator)other;
    returnResult = myAdd(returnResult,accumulator.returnResult);
    }

    @Override
    public String value() {
        return returnResult;
    }
}

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值