1. 简述
UnsafeShuffleWriter的使用条件
1. map端 不做聚合操作
2. 数据序列化可寻址,即使用
3. 分区数要小于16777215+1
2. 源码解析:
- write函数解析
发现结构非常的简单,从迭代器中取出数据,调用 insertRecordIntoSorter 方法
public void write(scala.collection.Iterator<Product2<K, V>> records) throws IOException {
// Keep track of success so we know if we encountered an exception
// We do this rather than a standard try/catch/re-throw to handle
// generic throwables.
boolean success = false;
try {
while (records.hasNext()) {
insertRecordIntoSorter(records.next());
}
closeAndWriteOutput();
success = true;
........
- insertRecordIntoSorter 解析
校验是否 sorter为空,然后把 数据写入缓存中并序列化,调用 sorter中的insertRecord函数
@VisibleForTesting
void insertRecordIntoSorter(Product2<K, V> record) throws IOException {
assert(sorter != null);
final K key = record._1();
//通过KEY hash获取分区id
final int partitionId = partitioner.getPartition(key);
//数据写入buffer 且序列化
serBuffer.reset();
serOutputStream.writeKey(key, OBJECT_CLASS_TAG);
serOutputStream.writeValue(record._2(), OBJECT_CLASS_TAG);
serOutputStream.flush();
final int serializedRecordSize = serBuffer.size();
assert (serializedRecordSize > 0);
sorter.insertRecord(
serBuffer.getBuf(), Platform.BYTE_ARRAY_OFFSET, serializedRecordSize, partitionId);
}
- sorter解析
那么我们先看看 sorter是什么 什么时候初始化的
做了那些事
//在new UnsafeShuffleWriter的时候 调用了 open方法 初始化 sorter
private void open() {
assert (sorter == null);
sorter = new ShuffleExternalSorter(
memoryManager,
blockManager,
taskContext,
initialSortBufferSize,
partitioner.numPartitions(),
sparkConf,
writeMetrics);
serBuffer = new MyByteArrayOutputStream(DEFAULT_INITIAL_SER_BUFFER_SIZE);
serOutputStream = serializer.serializeStream(serBuffer);
}
// 在看下 ShuffleExternalSorter ,发现在父类super中给 pagesize 赋值了
//最大值 128M 最小值 1MB,依据 MemoryMode 在堆外或者堆内给出内存大小
ShuffleExternalSorter(
TaskMemoryManager memoryManager,
BlockManager blockManager,
TaskContext taskContext,
int initialSize,
int numPartitions,
SparkConf conf,
ShuffleWriteMetrics writeMetrics) {
//设置page页大小
super(memoryManager,
(int) Math.min(PackedRecordPointer.MAXIMUM_PAGE_SIZE_BYTES, memoryManager.pageSizeBytes()),
memoryManager.getTungstenMemoryMode());
this.taskMemoryManager = memoryManager;
this.blockManager = blockManager;
this.taskContext = taskContext;
this.numPartitions = numPartitions;
// Use getSizeAsKb (not bytes) to maintain backwards compatibility if no units are provided
this.fileBufferSizeBytes =
(int) (long) conf.get(package$.MODULE$.SHUFFLE_FILE_BUFFER_SIZE()) * 1024;
this.numElementsForSpillThreshold =
(int) conf.get(package$.MODULE$.SHUFFLE_SPILL_NUM_ELEMENTS_FORCE_SPILL_THRESHOLD());
this.writeMetrics = writeMetrics;
this.inMemSorter = new ShuffleInMemorySorter(
this, initialSize, conf.getBoolean("spark.shuffle.sort.useRadixSort", true));
this.peakMemoryUsedBytes = getMemoryUsage();
this.diskWriteBufferSize =
(int) (long) conf.get(package$.MODULE$.SHUFFLE_DISK_WRITE_BUFFER_SIZE());
}
- insertRecord 函数 解析
public void insertRecord(Object recordBase, long recordOffset, int length, int partitionId)
throws IOException {
// for tests 判断是否要溢写
assert(inMemSorter != null);
if (inMemSorter.numRecords() >= numElementsForSpillThreshold) {
logger.info("Spilling data because number of spilledRecords crossed the threshold " +
numElementsForSpillThreshold);
spill();
}
// 申请LongArray用于存储partitionId和指针
growPointerArrayIfNecessary();
//判断是否使用int 或 long 类型
final int uaoSize = UnsafeAlignedOffset.getUaoSize();
// Need 4 or 8 bytes to store the record length.
final int required = length + uaoSize;
// 判断是否要申请新的page页,
acquireNewPageIfNecessary(required);
assert(currentPage != null);
final Object base = currentPage.getBaseObject();
//获取当前页以及所在偏移量,组合成long类型,即当前记录所在逻辑内存地址
final long recordAddress = taskMemoryManager.encodePageNumberAndOffset(currentPage, pageCursor);
//先写入4个字节或8个字节记录数据的长度
UnsafeAlignedOffset.putSize(base, pageCursor, length);
pageCursor += uaoSize;
//写入序列化后的数据
Platform.copyMemory(recordBase, recordOffset, base, pageCursor, length);
pageCursor += length;
//保存数据的逻辑内存地址+分区,,用于排序
inMemSorter.insertRecord(recordAddress, partitionId);
}
- acquireNewPageIfNecessary解析
private void acquireNewPageIfNecessary(int required) {
//如果currentPage为null 或者 当前记录大小大于剩余内存大小则申请page
if (currentPage == null ||
pageCursor + required > currentPage.getBaseOffset() + currentPage.size() ) {
// TODO: try to find space in previous pages
currentPage = allocatePage(required);
pageCursor = currentPage.getBaseOffset();
allocatedPages.add(currentPage);
}
}
//进入allocatePage 函数 发现page是 MemoryBlock,且是调用 taskMemoryManager的 allocatePage方法生成
protected MemoryBlock allocatePage(long required) {
MemoryBlock page = taskMemoryManager.allocatePage(Math.max(pageSize, required), this);
if (page == null || page.size() < required) {
throwOom(page, required);
}
used += page.size();
return page;
}
//最终调用 memoryManager allocate方法 依据MemoryMode(堆内或对外申请内存地址空间)
//1. 堆外 : new MemoryBlock(null, address, size);
// 2. 堆内 : new MemoryBlock(array, Platform.LONG_ARRAY_OFFSET, size);
public MemoryBlock allocatePage(long size, MemoryConsumer consumer) {
......
try {
page = memoryManager.tungstenMemoryAllocator().allocate(acquired);
} catch (OutOfMemoryError e) {
.....
}