**************************************
1、aggregate,第一个为初始值,第二个为element类型,第三个为返回值,其中第一个和第三个可以为自定义类型
如同时返回元素的和与元素的个数,第一个值需要传入初始化的一个自定义对象
Double distenceAccount = sequencesRDD.aggregate(0.0, new Function2<Double, SequenceForCalc, Double>() {
@Override
public Double call(Double v1, SequenceForCalc v2) throws Exception {
v1 += v2.getDistanceOfSequence();
return v1;
}
}, new Function2<Double, Double, Double>() {
@Override
public Double call(Double v1, Double v2) throws Exception {
v1 += v2;
return v1;
}
});
double distencePerPartition = distenceAccount/numSlices;
************************************************************
2、mapToPair
JavaPairRDD<SequenceForCalc, Integer> rddKeyVal =
sequencesRDD.mapToPair(new PairFunction<SequenceForCalc, SequenceForCalc, Integer>(){
@Override
public Tuple2<SequenceForCalc, Integer> call(SequenceForCalc sequenceForCalc) {
return new Tuple2<>(sequenceForCalc, 1);
}
});
**********************************************
3、自定义分区,之前的应用需要先排序然后统一操作
MyPartitioner为自定义分区的类,numSlices为分区个数
JavaPairRDD<SequenceForCalc, Integer> rdd4Repartition = rddKeyVal.sortByKey(false).coalesce(1);
JavaPairRDD<SequenceForCalc, Integer> rdd4Calculate = rdd4Repartition.partitionBy(new MyPartitioner(numSlices));
public class MyPartitioner extends Partitioner{
private int partitionNum;
double[] partitionDisLst;
public MyPartitioner(int num) {
this.partitionNum = num;
iniPartitionLst();
}
private void iniPartitionLst(){
partitionDisLst = new double[partitionNum];
for (int i = 0; i < partitionNum; i++) {
partitionDisLst[i] = 0;
}
}
@Override
public int numPartitions(){
return partitionNum;
}
@Override
public int getPartition(Object obj){
//System.out.println("enter into getPartition");
SequenceForCalc sequence = (SequenceForCalc)obj;
// 遍历分区,找一个串长度最小的分区,加入当前串的长度并返回此分区号
int iLenMin = 0;
for(int i = 1; i < partitionDisLst.length; i++){
if (partitionDisLst[i] < partitionDisLst[iLenMin]) {
iLenMin = i;
}
}
partitionDisLst[iLenMin] += sequence.getDistanceOfSequence();
//System.out.println("repar len = " + sequence.getDistanceOfSequence() + " iLenMin = " + iLenMin);
return iLenMin;
}
@Override
public boolean equals(Object obj) {
if (null == obj) {
return false;
}
if (obj == this) {
return true;
}
if (!(obj instanceof MyPartitioner)) {
return false;
}
MyPartitioner myPartitioner = (MyPartitioner)obj;
if (this.partitionNum == myPartitioner.partitionNum) {
return true;
}
else {
return false;
}
}
@Override
public int hashCode(){
return this.partitionNum;
}
}
********************************************************
4、mapPartition操作
JavaRDD<Tuple3<String, Integer, String>> rdd = rdd4Calculate
.mapPartitions(new FlatMapFunction<Iterator<Tuple2<SequenceForCalc,Integer>>, Tuple3<String, Integer, String>>() {
/**
*
*/
private static final long serialVersionUID = 1L;
@Override
public Iterator<Tuple3<String, Integer, String>> call(Iterator<Tuple2<SequenceForCalc,Integer>> item)
throws Exception {
List<Tuple3<String, Integer, String>> set = new LinkedList<>();
try {
while (item.hasNext()) {
SequenceForCalc sequenceForCalc = item.next()._1;
*******
try {
info = String.format("%s-%d", sequenceForCalc.isLinks() ? "link" : "lane",
sequenceForCalc.getGroupID());
pw = new PrintWriter(sw);
LOG.info("sch calculator: " + info);
sequenceForCalc.CalcSCH(oracleConnForSCHCalc);
} catch (Exception ex) {
LOG.error(ex.getMessage(), ex);
pw.print(ex.getMessage());
result = CALC_FAILURE;
} finally {
if (null != pw) {
pw.close();
}
}
set.add(new Tuple3<>(info, result, sw.toString()));
}
return set.iterator();
} catch (Exception ex) {
throw new Exception(ex);
} finally {
if (null != oracleConnForSCHCalc) {
oracleConnForSCHCalc.close();
}
}
}
});
// sch计算结果打印
List<Tuple3<String, Integer, String>> results = rdd.collect();
List<Tuple3<String, Integer, String>> failure = new LinkedList<>();
*************************************************************************
5、
1、aggregate,第一个为初始值,第二个为element类型,第三个为返回值,其中第一个和第三个可以为自定义类型
如同时返回元素的和与元素的个数,第一个值需要传入初始化的一个自定义对象
Double distenceAccount = sequencesRDD.aggregate(0.0, new Function2<Double, SequenceForCalc, Double>() {
@Override
public Double call(Double v1, SequenceForCalc v2) throws Exception {
v1 += v2.getDistanceOfSequence();
return v1;
}
}, new Function2<Double, Double, Double>() {
@Override
public Double call(Double v1, Double v2) throws Exception {
v1 += v2;
return v1;
}
});
double distencePerPartition = distenceAccount/numSlices;
************************************************************
2、mapToPair
JavaPairRDD<SequenceForCalc, Integer> rddKeyVal =
sequencesRDD.mapToPair(new PairFunction<SequenceForCalc, SequenceForCalc, Integer>(){
@Override
public Tuple2<SequenceForCalc, Integer> call(SequenceForCalc sequenceForCalc) {
return new Tuple2<>(sequenceForCalc, 1);
}
});
**********************************************
3、自定义分区,之前的应用需要先排序然后统一操作
MyPartitioner为自定义分区的类,numSlices为分区个数
JavaPairRDD<SequenceForCalc, Integer> rdd4Repartition = rddKeyVal.sortByKey(false).coalesce(1);
JavaPairRDD<SequenceForCalc, Integer> rdd4Calculate = rdd4Repartition.partitionBy(new MyPartitioner(numSlices));
public class MyPartitioner extends Partitioner{
private int partitionNum;
double[] partitionDisLst;
public MyPartitioner(int num) {
this.partitionNum = num;
iniPartitionLst();
}
private void iniPartitionLst(){
partitionDisLst = new double[partitionNum];
for (int i = 0; i < partitionNum; i++) {
partitionDisLst[i] = 0;
}
}
@Override
public int numPartitions(){
return partitionNum;
}
@Override
public int getPartition(Object obj){
//System.out.println("enter into getPartition");
SequenceForCalc sequence = (SequenceForCalc)obj;
// 遍历分区,找一个串长度最小的分区,加入当前串的长度并返回此分区号
int iLenMin = 0;
for(int i = 1; i < partitionDisLst.length; i++){
if (partitionDisLst[i] < partitionDisLst[iLenMin]) {
iLenMin = i;
}
}
partitionDisLst[iLenMin] += sequence.getDistanceOfSequence();
//System.out.println("repar len = " + sequence.getDistanceOfSequence() + " iLenMin = " + iLenMin);
return iLenMin;
}
@Override
public boolean equals(Object obj) {
if (null == obj) {
return false;
}
if (obj == this) {
return true;
}
if (!(obj instanceof MyPartitioner)) {
return false;
}
MyPartitioner myPartitioner = (MyPartitioner)obj;
if (this.partitionNum == myPartitioner.partitionNum) {
return true;
}
else {
return false;
}
}
@Override
public int hashCode(){
return this.partitionNum;
}
}
********************************************************
4、mapPartition操作
JavaRDD<Tuple3<String, Integer, String>> rdd = rdd4Calculate
.mapPartitions(new FlatMapFunction<Iterator<Tuple2<SequenceForCalc,Integer>>, Tuple3<String, Integer, String>>() {
/**
*
*/
private static final long serialVersionUID = 1L;
@Override
public Iterator<Tuple3<String, Integer, String>> call(Iterator<Tuple2<SequenceForCalc,Integer>> item)
throws Exception {
List<Tuple3<String, Integer, String>> set = new LinkedList<>();
try {
while (item.hasNext()) {
SequenceForCalc sequenceForCalc = item.next()._1;
*******
try {
info = String.format("%s-%d", sequenceForCalc.isLinks() ? "link" : "lane",
sequenceForCalc.getGroupID());
pw = new PrintWriter(sw);
LOG.info("sch calculator: " + info);
sequenceForCalc.CalcSCH(oracleConnForSCHCalc);
} catch (Exception ex) {
LOG.error(ex.getMessage(), ex);
pw.print(ex.getMessage());
result = CALC_FAILURE;
} finally {
if (null != pw) {
pw.close();
}
}
set.add(new Tuple3<>(info, result, sw.toString()));
}
return set.iterator();
} catch (Exception ex) {
throw new Exception(ex);
} finally {
if (null != oracleConnForSCHCalc) {
oracleConnForSCHCalc.close();
}
}
}
});
// sch计算结果打印
List<Tuple3<String, Integer, String>> results = rdd.collect();
List<Tuple3<String, Integer, String>> failure = new LinkedList<>();
*************************************************************************
5、