今天也要努力学习
package com.bjsxt.scalaspark.core.examples
import org.apache.spark.util.AccumulatorV2
import org.apache.spark.{SparkConf, SparkContext}
/**
* 自定义累加器:
* 自定义累加器需要继承extends AccumulatorV2[String, String],第一个为输入类型,第二个为输出类型
* val myVectorAcc = new VectorAccumulatorV2
* 方法 zero() 要与reset() 值保持一致
*/
case class Info(var totalCount:Int,var totalAge :Int)
class SelfAccumlator extends AccumulatorV2 [Info,Info]{
/**
* 初始化累计器的值,这个值是最后要在merge合并的时候累加到最终结果内
*/
private var result: Info = new Info(0,0)
// println(s" in first result = $result end。")
/**
* 返回累计器是否是零值。 例如: Int 类型累加器 0 就是零值,对于List 类型数据 Nil 就是零值。
* 这里判断时,要与方法reset()初始的值一致,初始判断时要返回true. 内部会在每个分区内自动调用判断。
*/
override def isZero: Boolean = {
println("判断 累加器是否是初始值***"+(result.totalAge == 0 && result.totalCount ==0)+" ***end")
result.totalCount ==100 && result.totalAge == 200
}
/**
* 复制一个新的累加器,在这里就是如果用到了就会复制一个新的累加器。
*/
override def copy(): AccumulatorV2[Info, Info] = {
val newAccumulator = new SelfAccumlator()
newAccumulator.result = this.result
newAccumulator
}
/**
* 重置AccumulatorV2中的数据,这里初始化的数据是在RDD每个分区内部,每个分区内的初始值。
*/
override def reset(): Unit = {
// println("重置累加器中的值")
result = new Info(100,200)
}
/**
* 每个分区累加数据
* 这里是拿着初始的result值和每个分区的数据累加
*/
override def add(v: Info): Unit = {
println(s" in add method : v = $v ,v.totalCount = ${v.totalCount},v.totalAge = ${v.totalAge}")
result.totalAge += v.totalAge
result.totalCount += v.totalCount
}
/**
* 分区之间总和累加数据
*
* 这里拿着初始的result值 和每个分区最终的结果累加
*
*/
override def merge(other: AccumulatorV2[Info, Info]): Unit = other match {
case o : SelfAccumlator => {
println(s" in merge method : o = $o ")
result.totalCount +=o.result.totalCount
result.totalAge +=o.result.totalAge
}
case _ => throw new UnsupportedOperationException(
s"Cannot merge ${this.getClass.getName} with ${other.getClass.getName}")
}
/**
* 累计器堆外返回的最终的结果
*/
override def value: Info = result
}
object DefindSelfAccumulator {
def main(args: Array[String]): Unit = {
val conf = new SparkConf()
conf.setMaster("local")
conf.setAppName("selfAccumulator")
val sc = new SparkContext(conf)
val nameList = sc.parallelize(List[String](
"A 1","B 2","C 3",
"D 4","E 5","F 6",
"G 7","H 8","I 9"
),3)
println("nameList RDD partition length = "+nameList.getNumPartitions)
/**
* 初始化累加器
*
*/
val myAccumulator = new SelfAccumlator()
sc.register(myAccumulator, "First Accumulator")
val transInfo = nameList.map(one=>{
val info = Info(1,one.split(" ")(1).toInt)
myAccumulator.add(info)
info
})
transInfo.count()
println(s"accumulator totalCount = ${myAccumulator.value.totalCount}, totalAge = ${myAccumulator.value.totalAge}")
}
}
package com.wuyue.examples
import org.apache.spark.util.AccumulatorV2
import org.apache.spark.{SparkConf, SparkContext}
class MyAcc extends AccumulatorV2[String,String]{
// var returnResult = "ppp"
var returnResult = ""
override def isZero: Boolean = {
"X".equals(returnResult)
}
override def copy(): AccumulatorV2[String, String] = {
val myAcc = new MyAcc
myAcc.returnResult = this.returnResult
myAcc
}
override def reset(): Unit = {
returnResult = "X"
}
override def add(v: String): Unit = {
returnResult += v
}
override def merge(other: AccumulatorV2[String, String]): Unit = {
returnResult += other.asInstanceOf[MyAcc].returnResult
}
override def value: String = returnResult
}
object DefindSelfAccumulator2 {
def main(args: Array[String]): Unit = {
val conf = new SparkConf()
conf.setMaster("local")
conf.setAppName("selfAccumulator2")
val sc = new SparkContext(conf)
val infos = sc.parallelize(List[String]("a","b","c","d","e","f"),5)
/**
* 定义累计器
*/
val myacc = new MyAcc()
sc.register(myacc,"myacc")
infos.map(one=>{
myacc.add(one)
}).count()
println(s" 累计器值 = ${myacc.value}")
}
}
package com.wuyue.spark.util;
import com.wuyue.spark.conf.Constants;
import org.apache.spark.util.AccumulatorV2;
/**
* 这个定义的累加器是用来更新String的,也可以更新INT,对象等等
*/
public class SelfDefineAccumulator extends AccumulatorV2<String,String> {
String returnResult = "";
/**
* 这个方法的值需要保持与reset方法中保持一样 相当于检查初始状态是否正确
* @return
*/
@Override
public boolean isZero() {
return "normalMonitorCount=0|normalCameraCount=0|abnormalMonitorCount=0|abnormalCameraCount=0|abnormalMonitorCameraInfos= ".equals(returnResult);
}
/**
* 就是返回一个新的累加器,在每个分区会调用这个方法
* @return
*/
@Override
public AccumulatorV2<String, String> copy() {
SelfDefineAccumulator acc = new SelfDefineAccumulator();
acc.returnResult = this.returnResult;
return acc;
}
/**
* 给每个分区赋予初始值 后面的isZero要与他保持一致
*/
@Override
public void reset() {
returnResult = Constants.FIELD_NORMAL_MONITOR_COUNT+"=0|"
+ Constants.FIELD_NORMAL_CAMERA_COUNT+"=0|"
+ Constants.FIELD_ABNORMAL_MONITOR_COUNT+"=0"
+ Constants.FIELD_ABNORMAL_CAMERA_COUNT+"=0"
+ Constants.FIELD_ABNORMAL_MONITOR_CAMERA_INFOS+"= ";
}
/**
* 分区内调用进行累加 基础是分区的初始值 reset给定了的
* @param v
*/
@Override
public void add(String v) {
returnResult = myAdd(returnResult,v);
}
/**
*自定义分区的累加方法
*/
private String myAdd(String str1, String str2) {
if (StringUtils.isEmpty(str1)) {
//如若returnresult的初始值为空 就直接返回str2
return str2;
}
/**
*如果returnresult有值了,开始进行累加,上面也是累加 累加了空值而已
* str1数据格式:
* normalMonitorCount=0|normalCameraCount=0|abnormalMonitorCount=1|abnormalCameraCount=3|abnormalMonitorCameraInfos= ~"0001":07553,07554,07556~"0001":07553,07554,07556~"0001":07553,07554,07556~"0001":07553,07554,07556
*/
// 两个\\是转义符啊 就是按照|切割
String[] valArr = str2.split("\\|");
for (String string : valArr){
//切割后格式 一个数组元素对应的 normalMonitorCount 0
String[] fieldAndValArr = string.split("=");
String field = fieldAndValArr[0];//normalMonitorCount
String value = fieldAndValArr[1];//0
/**
* 特殊情况 最后一个infos本身是一个字符串
* //|abnormalMonitorCameraInfos= ~"0001":07553,07554,07556~"0001":07553,07554,07556~"0001":07553,07554,07556~"0001":07553,07554,07556
*他切割出来应该是 abnormalMonitorCameraInfos ~"0001":07553,07554,07556~"0001":07553,07554,07556~。。。。。。
*/
String oldVal = StringUtils.getFieldFromConcatString(str1,"\\|",field);
if (oldVal!=null){
//只有这个字段是详细信息,是拼接字段,单独拿出来处理
if (Constants.FIELD_ABNORMAL_MONITOR_CAMERA_INFOS.equals(field)){
if (value.startsWith("~")){
value = value.substring(2);//从下标2开始剪切
}
str1 = StringUtils.setFieldInConcatString(str1, "\\|", field, oldVal + "~" + value); }
}else{
//其他部分是int类型,不需要拼接,直接加减就可以了
int newVal = Integer.parseInt(oldVal)+Integer.parseInt(value);
str1 = StringUtils.setFieldInConcatString(str1, "\\|", field, String.valueOf(newVal));
}
}
//就是返回累加后的returnresult
return str1;
}
/**
* 将每个分区的最终处理结果和初始值 returnresult 相加 得到最终的返回结果
* @param other
*/
@Override
public void merge(AccumulatorV2<String, String> other) {
SelfDefineAccumulator accumulator = (SelfDefineAccumulator)other;
returnResult = myAdd(returnResult,accumulator.returnResult);
}
@Override
public String value() {
return returnResult;
}
}