前面讲到KV的输出,会先写入KV buffer,当buffer用完或达到一定比例的时候spillThread会将buffer中的数据写入磁盘,以便buffer重用,刷新过程并不是直接写入的,写入前会先进行一个排序操作,写入时会将一个partition的数据写在一起,并且会生成一个index文件作为标记文件。每次spill都会生成一个新的数据文件,数据文件和索引文件的结构图如下:
spill的过程如下:
spill的过程如下:
private void sortAndSpill() throws IOException, ClassNotFoundException,
InterruptedException {
//approximate the length of the output file to be the length of the
//buffer + header lengths for the partitions
long size = (bufend >= bufstart
? bufend - bufstart
: (bufvoid - bufend) + bufstart) +
partitions * APPROX_HEADER_LENGTH;
FSDataOutputStream out = null;
try {
// 创建spill文件
final SpillRecord spillRec = new SpillRecord(partitions);
final Path filename =
mapOutputFile.getSpillFileForWrite(numSpills, size);
out = rfs.create(filename);
//确定终止位置
final int endPosition = (kvend > kvstart)
? kvend
: kvoffsets.length + kvend;
//内部排序
sorter.sort(MapOutputBuffer.this, kvstart, endPosition, reporter);
//确定起始位置
int spindex = kvstart;
IndexRecord rec = new IndexRecord();
InMemValBytes value = new InMemValBytes();
//根据reduce数量生成文件,文件由一或多个partition段组成
for (int i = 0; i < partitions; ++i) {
IFile.Writer<K, V> writer = null;
try {
long segmentStart = out.getPos();//获得本partition写入的起始位置
writer = new Writer<K, V>(job, out, keyClass, valClass, codec,
spilledRecordsCounter);
if (combinerRunner == null) {
// spill directly
DataInputBuffer key = new DataInputBuffer();
//读取相同partition的数据,并顺序写入文件
while (spindex < endPosition &&
kvindices[kvoffsets[spindex % kvoffsets.length]
+ PARTITION] == i) {
//获得KV的值,注意这里并没有数据复制产生
final int kvoff = kvoffsets[spindex % kvoffsets.length];
getVBytesForOffset(kvoff, value);
key.reset(kvbuffer, kvindices[kvoff + KEYSTART],
(kvindices[kvoff + VALSTART] -
kvindices[kvoff + KEYSTART]));
//文件写入操作
writer.append(key, value);
//偏移指针更新
++spindex;
}
} else {
int spstart = spindex;
while (spindex < endPosition &&
kvindices[kvoffsets[spindex % kvoffsets.length]
+ PARTITION] == i) {
++spindex;
}
// Note: we would like to avoid the combiner if we've fewer
// than some threshold of records for a partition
if (spstart != spindex) {
combineCollector.setWriter(writer);
RawKeyValueIterator kvIter =
new MRResultIterator(spstart, spindex);
combinerRunner.combine(kvIter, combineCollector);
}
}
// close the writer
writer.close();
// record offsets
rec.startOffset = segmentStart;//获得本分区的起始位置
rec.rawLength = writer.getRawLength();//本次写入数据总量,注意会包含结束标志
rec.partLength = writer.getCompressedLength();//该partition写入总的数据量,为了方便控制会加一些标志位
spillRec.putIndex(rec, i);//添加recordIndex
writer = null;
} finally {
if (null != writer) writer.close();
}
}
if (totalIndexCacheMemory >= INDEX_CACHE_MEMORY_LIMIT) {//如果index所占用内存超过阈值,则写index文件
// create spill index file
Path indexFilename =
mapOutputFile.getSpillIndexFileForWrite(numSpills, partitions
* MAP_OUTPUT_INDEX_RECORD_LENGTH);
spillRec.writeToFile(indexFilename, job);//写入index文件
} else {
indexCacheList.add(spillRec);//spillRecord加入缓存列表
totalIndexCacheMemory += //更新索引内存信息,每条索引代表一个partition信息
spillRec.size() * MAP_OUTPUT_INDEX_RECORD_LENGTH;
}
LOG.info("Finished spill " + numSpills);
++numSpills; //sill文件数量
} finally {
if (out != null) out.close();
}
}