书接上回,继续讲解ReplicationMonitor,做完块复制后,开始进入processPendingReplications,很显然这个函数处理复制超时的块。至于为什么把超时块的监控放入另外一个线程来监控,我认为应该是为了防止复制监控线程任务过重。
/**
* 如果块的复制请求超时,则重新把它们放回needReplication队列
*/
void processPendingReplications() {
//注意这里取得超时块的方法,是和前面pendingReplicationMonitor有关的
//在那个线程里会把超时的块放入timedOutItems集合,在这里重新取出来
//为了便于回忆看下面pendingReplicationCheck函数
Block[]timedOutItems = pendingReplications.getTimedOutBlocks();
if (timedOutItems != null) {
synchronized (this) {
for (int i = 0; i < timedOutItems.length; i++) {
NumberReplicas num =countNodes(timedOutItems[i]);
//在这里会把块重新放入needReplication集合
neededReplications.add(timedOutItems[i],
num.liveReplicas(),
num.decommissionedReplicas(),
getReplication(timedOutItems[i]));
}
}
}
}
//注意:上面用到的超时块是在这里生成的,timedOutItems.add(block)void pendingReplicationCheck() {
synchronized (pendingReplications) {
Iterator iter = pendingReplications.entrySet().iterator();
long now = FSNamesystem.now();
FSNamesystem.LOG.debug("PendingReplicationMonitor checkingQ");
while (iter.hasNext()) {
Map.Entry entry = (Map.Entry)iter.next();
PendingBlockInfo pendingBlock =(PendingBlockInfo) entry.getValue();
if (now > pendingBlock.getTimeStamp() + timeout) {
Block block = (Block)entry.getKey();
synchronized (timedOutItems) {
timedOutItems.add(block);//加入超时块队列
}
FSNamesystem.LOG.warn(
"PendingReplicationMonitor timed outblock " + block);
iter.remove();
}
}
}
}
}
下面再看下获得超时块的函数,直接把集合转化为数组返回
Block[] getTimedOutBlocks() {
synchronized (timedOutItems) {
if (timedOutItems.size() <= 0) {
return null;
}
Block[] blockList = timedOutItems.toArray(
new Block[timedOutItems.size()]);
timedOutItems.clear();
return blockList;
}
}