本文从源码方面介绍应用程序的AM在NM上成功启动并向RM注册后,向RM请求资源(Container)到获取资源的整个过程,
以及RM内部涉及的主要工作流程。整个过程可看做以下两个阶段的迭代循环:
阶段1:AM汇报资源需求并领取已经分配到的资源。
阶段2:NM向RM汇报各个Container的运行状态,如果RM发现它上面有空闲的资源,则进行一次资源分配,并将分配的资源保存
到对应的数据结构中,等待下一次AM发送心跳信息时获取。
Container分配与申请流程具体步骤如下:
阶段1:
步骤1: AM通过RPC函数ApplicationMasterProtocol#allocate向RM汇报资源需求(周期性调用)
包括新的资源需求描述、待释放的Container列表、请求加入黑名单的节点列表、请求移除黑名单的节点列表。
//RMContainerAllocator.java
protected synchronized void heartbeat() throws Exception {
scheduleStats.updateAndLogIfChanged("Before Scheduling: ");
List<Container> allocatedContainers = getResources();
if (allocatedContainers.size() > 0) {
scheduledRequests.assign(allocatedContainers);
}
int completedMaps = getJob().getCompletedMaps();
int completedTasks = completedMaps + getJob().getCompletedReduces();
if ((lastCompletedTasks != completedTasks) ||
(scheduledRequests.maps.size() > 0)) {
lastCompletedTasks = completedTasks;
recalculateReduceSchedule = true;
}
if (recalculateReduceSchedule) {
preemptReducesIfNeeded();
scheduleReduces(
getJob().getTotalMaps(), completedMaps,
scheduledRequests.maps.size(), scheduledRequests.reduces.size(),
assignedRequests.maps.size(), assignedRequests.reduces.size(),
mapResourceRequest, reduceResourceRequest,
pendingReduces.size(),
maxReduceRampupLimit, reduceSlowStart);
recalculateReduceSchedule = false;
}
scheduleStats.updateAndLogIfChanged("After Scheduling: ");
}
在类RMContainerAllocator的心跳函数heartbeat中,调用函数
List<Container> allocatedContainers = getResources();
获取Container列表,进入函数getResources:
//RMContainerAllocator.java
private List<Container> getResources() throws Exception {
int headRoom = getAvailableResources() != null
? getAvailableResources().getMemory() : 0;//first time it would be null
AllocateResponse response;
/*
* If contact with RM is lost, the AM will wait MR_AM_TO_RM_WAIT_INTERVAL_MS
* milliseconds before aborting. During this interval, AM will still try
* to contact the RM.
*/
try {
response = makeRemoteRequest();
// Reset retry count if no exception occurred.
retrystartTime = System.currentTimeMillis();
} catch (Exception e) {
...
}
进入response = makeRemoteRequest();
//RMContainerRequestor.java
private List<Container> getResources() throws Exception {
protected AllocateResponse makeRemoteRequest() throws IOException {
ResourceBlacklistRequest blacklistRequest =
ResourceBlacklistRequest.newInstance(new ArrayList<String>(blacklistAdditions),
new ArrayList<String>(blacklistRemovals));
AllocateRequest allocateRequest =
AllocateRequest.newInstance(lastResponseID,
super.getApplicationProgress(), new ArrayList<ResourceRequest>(ask),
new ArrayList<ContainerId>(release), blacklistRequest);
AllocateResponse allocateResponse;
try {
allocateResponse = scheduler.allocate(allocateRequest);
} catch (YarnException e) {
throw new IOException(e);
}
lastResponseID = allocateResponse.getResponseId();
availableResources = allocateResponse.getAvailableResources();
lastClusterNmCount = clusterNmCount;
clusterNmCount = allocateResponse.getNumClusterNodes();
if (ask.size() > 0 || release.size() > 0) {
LOG.info("getResources() for " + applicationId + ":" + " ask="
+ ask.size() + " release= " + release.size() + " newContainers="
+ allocateResponse.getAllocatedContainers().size()
+ " finishedContainers="
+ allocateResponse.getCompletedContainersStatuses().size()
+ " resourcelimit=" + availableResources + " knownNMs="
+ clusterNmCount);
}
ask.clear();
release.clear();
if (blacklistAdditions.size() > 0 || blacklistRemovals.size() > 0) {
LOG.info("Update the blacklist for " + applicationId +
": blacklistAdditions=" + blacklistAdditions.size() +
" blacklistRemovals=" + blacklistRemovals.size());
}
blacklistAdditions.clear();
blacklistRemovals.clear();
return allocateResponse;
}
主要看allocateResponse = scheduler.allocate(allocateRequest);
变量scheduler定义和实现都在类RMCommunicator中:
//RMCommunicator.java
protected ApplicationMasterProtocol scheduler;
...
protected void serviceStart() throws Exception {
scheduler= createSchedulerProxy();
JobID id = TypeConverter.fromYarn(this.applicationId);
JobId jobId = TypeConverter.toYarn(id);
job = context.getJob(jobId);
register();
startAllocatorThread();
super.serviceStart();
}
...
protected ApplicationMasterProtocol createSchedulerProxy() {