2021SC@SDUSC
NoneGrouping
不关注并行处理负载均衡策略时使用该方式,目前等同于Shuffle Grouping,另外Storm将会把bolt任务和他的上游提供数据的任务安排在同一个线程下。
public static class NoneGrouping implements CustomStreamGrouping {
private final Random random;
private List<Integer> targetTasks;
private int numTasks;
public NoneGrouper() {
random = new Random();
}
@Override
public void prepare(WorkerTopologyContext context, GlobalStreamId stream, List<Integer> targetTasks) {
this.targetTasks = targetTasks;
this.numTasks = targetTasks.size();
}
@Override
public List<Integer> chooseTasks(int taskId, List<Object> values) {
int index = random.nextInt(numTasks);
return Collections.singletonList(targetTasks.get(index));
}
}
这里通过random.nextInt(numTasks)随机取task
AllGrouping
广播发送,对于每一个tuple,所有的bolts都会收到 。
public static class AllGrouping implements CustomStreamGrouping {
private List<Integer> targetTasks;
@Override
public void prepare(WorkerTopologyContext context, GlobalStreamId stream, List<Integer> targetTasks) {
this.targetTasks = targetTasks;
}
@Override
public List<Integer> chooseTasks(int taskId, List<Object> values) {
return targetTasks;
}
}
这里返回所有的targetTasks。
PartialKeyGrouping
public class PartialKeyGrouping implements CustomStreamGrouping, Serializable {
private static final long serialVersionUID = -1672360572274911808L;
private List<Integer> targetTasks;
private Fields fields = null;
private Fields outFields = null;
private AssignmentCreator assignmentCreator;
private TargetSelector targetSelector;
public PartialKeyGrouping() {
this(null);
}
public PartialKeyGrouping(Fields fields) {
this(fields, new RandomTwoTaskAssignmentCreator(), new BalancedTargetSelector());
}
public PartialKeyGrouping(Fields fields, AssignmentCreator assignmentCreator) {
this(fields, assignmentCreator, new BalancedTargetSelector());
}
public PartialKeyGrouping(Fields fields, AssignmentCreator assignmentCreator, TargetSelector targetSelector) {
this.fields = fields;
this.assignmentCreator = assignmentCreator;
this.targetSelector = targetSelector;
}
@Override
public void prepare(WorkerTopologyContext context, GlobalStreamId stream, List<Integer> targetTasks) {
this.targetTasks = targetTasks;
if (this.fields != null) {
this.outFields = context.getComponentOutputFields(stream);
}
}
@Override
public List<Integer> chooseTasks(int taskId, List<Object> values) {
List<Integer> boltIds = new ArrayList<>(1);
if (values.size() > 0) {
final byte[] rawKeyBytes = getKeyBytes(values);
final int[] taskAssignmentForKey = assignmentCreator.createAssignment(this.targetTasks, rawKeyBytes);
final int selectedTask = targetSelector.chooseTask(taskAssignmentForKey);
boltIds.add(selectedTask);
}
return boltIds;
}
private byte[] getKeyBytes(List<Object> values) {
byte[] raw;
if (fields != null) {
List<Object> selectedFields = outFields.select(fields, values);
ByteBuffer out = ByteBuffer.allocate(selectedFields.size() * 4);
for (Object o : selectedFields) {
if (o instanceof List) {
out.putInt(Arrays.deepHashCode(((List) o).toArray()));
} else if (o instanceof Object[]) {
out.putInt(Arrays.deepHashCode((Object[]) o));
} else if (o instanceof byte[]) {
out.putInt(Arrays.hashCode((byte[]) o));
} else if (o instanceof short[]) {
out.putInt(Arrays.hashCode((short[]) o));
} else if (o instanceof int[]) {
out.putInt(Arrays.hashCode((int[]) o));
} else if (o instanceof long[]) {
out.putInt(Arrays.hashCode((long[]) o));
} else if (o instanceof char[]) {
out.putInt(Arrays.hashCode((char[]) o));
} else if (o instanceof float[]) {
out.putInt(Arrays.hashCode((float[]) o));
} else if (o instanceof double[]) {
out.putInt(Arrays.hashCode((double[]) o));
} else if (o instanceof boolean[]) {
out.putInt(Arrays.hashCode((boolean[]) o));
} else if (o != null) {
out.putInt(o.hashCode());
} else {
out.putInt(0);
}
}
raw = out.array();
} else {
raw = values.get(0).toString().getBytes(); // assume key is the first field
}
return raw;
}
/*==================================================
* Helper Classes
*==================================================*/
/**
* 这个接口负责为给定的键选择目标任务的子集。
*/
public interface AssignmentCreator extends Serializable {
int[] createAssignment(List<Integer> targetTasks, byte[] key);
}
/**
* 该接口从任务分配中选择一个元素来发送特定的Tuple。
*/
public interface TargetSelector extends Serializable {
Integer chooseTask(int[] assignedTasks);
}
/*========== Implementations ==========*/
/**
* AssignmentCreator的实现选择两个任意任务。
*/
public static class RandomTwoTaskAssignmentCreator implements AssignmentCreator {
/**
* 通过选择随机任务来创建两个任务分配。
*/
@Override
public int[] createAssignment(List<Integer> tasks, byte[] key) {
// 这需要产生一个基于密钥的确定性赋值。
final long seedForRandom = Arrays.hashCode(key);
final Random random = new Random(seedForRandom);
final int choice1 = random.nextInt(tasks.size());
int choice2 = random.nextInt(tasks.size());
// 确保选项1和选项2不是同一个任务。
choice2 = choice1 == choice2 ? (choice2 + 1) % tasks.size() : choice2;
return new int[]{ tasks.get(choice1), tasks.get(choice2) };
}
}
/**
* A basic implementation of target selection. This strategy chooses the task within the assignment that has received the fewest Tuples
* overall from this instance of the grouping.
*/
public static class BalancedTargetSelector implements TargetSelector {
private Map<Integer, Long> targetTaskStats = Maps.newHashMap();
/**
* 选择一个传入的任务,并选择到目前为止选择次数最少的任务。
*/
@Override
public Integer chooseTask(int[] assignedTasks) {
Integer taskIdWithMinLoad = null;
Long minTaskLoad = Long.MAX_VALUE;
for (Integer currentTaskId : assignedTasks) {
final Long currentTaskLoad = targetTaskStats.getOrDefault(currentTaskId, 0L);
if (currentTaskLoad < minTaskLoad) {
minTaskLoad = currentTaskLoad;
taskIdWithMinLoad = currentTaskId;
}
}
targetTaskStats.put(taskIdWithMinLoad, targetTaskStats.getOrDefault(taskIdWithMinLoad, 0L) + 1);
return taskIdWithMinLoad;
}
}
}
在prepare的时候,初始化了long[] targetTaskStats用于统计每个task
partialKeyGrouping如果没有指定fields,则默认按outputFields的第一个field来计算。
BalancedTargetSelector根据选中的taskId,然后根据targetTaskStats计算taskIdWithMinLoad返回。
这里通过RandomTwoTaskAssignmentCreator来选中两个taskId,然后选择使用次数小的那个。
getKeyBytes()方法从输入的Tuple中提取键。