Hadoop源码解析之申请与分配Container

本文从源码方面介绍应用程序的AM在NM上成功启动并向RM注册后,向RM请求资源(Container)到获取资源的整个过程,

以及RM内部涉及的主要工作流程。整个过程可看做以下两个阶段的迭代循环:

阶段1:AM汇报资源需求并领取已经分配到的资源。

阶段2:NM向RM汇报各个Container的运行状态,如果RM发现它上面有空闲的资源,则进行一次资源分配,并将分配的资源保存

到对应的数据结构中,等待下一次AM发送心跳信息时获取。


Container分配与申请流程具体步骤如下:

阶段1:

步骤1: AM通过RPC函数ApplicationMasterProtocol#allocate向RM汇报资源需求(周期性调用)

包括新的资源需求描述、待释放的Container列表、请求加入黑名单的节点列表、请求移除黑名单的节点列表。

//RMContainerAllocator.java
protected synchronized void heartbeat() throws Exception {
    scheduleStats.updateAndLogIfChanged("Before Scheduling: ");
    List<Container> allocatedContainers = getResources();
    if (allocatedContainers.size() > 0) {
      scheduledRequests.assign(allocatedContainers);
    }

    int completedMaps = getJob().getCompletedMaps();
    int completedTasks = completedMaps + getJob().getCompletedReduces();
    if ((lastCompletedTasks != completedTasks) ||
          (scheduledRequests.maps.size() > 0)) {
      lastCompletedTasks = completedTasks;
      recalculateReduceSchedule = true;
    }

    if (recalculateReduceSchedule) {
      preemptReducesIfNeeded();
      scheduleReduces(
          getJob().getTotalMaps(), completedMaps,
          scheduledRequests.maps.size(), scheduledRequests.reduces.size(), 
          assignedRequests.maps.size(), assignedRequests.reduces.size(),
          mapResourceRequest, reduceResourceRequest,
          pendingReduces.size(), 
          maxReduceRampupLimit, reduceSlowStart);
      recalculateReduceSchedule = false;
    }

    scheduleStats.updateAndLogIfChanged("After Scheduling: ");
  }

在类RMContainerAllocator的心跳函数heartbeat中,调用函数

List<Container> allocatedContainers = getResources();

获取Container列表,进入函数getResources:

//RMContainerAllocator.java
private List<Container> getResources() throws Exception {
    int headRoom = getAvailableResources() != null
        ? getAvailableResources().getMemory() : 0;//first time it would be null
    AllocateResponse response;
    /*
     * If contact with RM is lost, the AM will wait MR_AM_TO_RM_WAIT_INTERVAL_MS
     * milliseconds before aborting. During this interval, AM will still try
     * to contact the RM.
     */
    try {
      response = makeRemoteRequest();
      // Reset retry count if no exception occurred.
      retrystartTime = System.currentTimeMillis();
    } catch (Exception e) {
    
    ...
    
}

进入response = makeRemoteRequest();

//RMContainerRequestor.java
private List<Container> getResources() throws Exception {
protected AllocateResponse makeRemoteRequest() throws IOException {
    ResourceBlacklistRequest blacklistRequest =
        ResourceBlacklistRequest.newInstance(new ArrayList<String>(blacklistAdditions),
            new ArrayList<String>(blacklistRemovals));
    AllocateRequest allocateRequest =
        AllocateRequest.newInstance(lastResponseID,
          super.getApplicationProgress(), new ArrayList<ResourceRequest>(ask),
          new ArrayList<ContainerId>(release), blacklistRequest);
    AllocateResponse allocateResponse;
    try {
      allocateResponse = scheduler.allocate(allocateRequest);
    } catch (YarnException e) {
      throw new IOException(e);
    }
    lastResponseID = allocateResponse.getResponseId();
    availableResources = allocateResponse.getAvailableResources();
    lastClusterNmCount = clusterNmCount;
    clusterNmCount = allocateResponse.getNumClusterNodes();

    if (ask.size() > 0 || release.size() > 0) {
      LOG.info("getResources() for " + applicationId + ":" + " ask="
          + ask.size() + " release= " + release.size() + " newContainers="
          + allocateResponse.getAllocatedContainers().size()
          + " finishedContainers="
          + allocateResponse.getCompletedContainersStatuses().size()
          + " resourcelimit=" + availableResources + " knownNMs="
          + clusterNmCount);
    }

    ask.clear();
    release.clear();

    if (blacklistAdditions.size() > 0 || blacklistRemovals.size() > 0) {
      LOG.info("Update the blacklist for " + applicationId +
          ": blacklistAdditions=" + blacklistAdditions.size() +
          " blacklistRemovals=" +  blacklistRemovals.size());
    }
    blacklistAdditions.clear();
    blacklistRemovals.clear();
    return allocateResponse;
  }

主要看allocateResponse = scheduler.allocate(allocateRequest);

变量scheduler定义和实现都在类RMCommunicator中:

//RMCommunicator.java
protected ApplicationMasterProtocol scheduler;
...

protected void serviceStart() throws Exception {
    scheduler= createSchedulerProxy();
    JobID id = TypeConverter.fromYarn(this.applicationId);
    JobId jobId = TypeConverter.toYarn(id);
    job = context.getJob(jobId);
    register();
    startAllocatorThread();
    super.serviceStart();
  }
    
...

protected ApplicationMasterProtocol createSchedulerProxy() {
   
评论 3
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值