YARN源码分析:RM子服务初始化

本文深入解析了YARN ResourceManager中的关键组件及其初始化流程,包括ClientRMService、ResourceScheduler、AMLivelinessMonitor等核心部分。文章详细阐述了各组件的功能、初始化方法以及配置细节,帮助读者理解YARN架构的实现原理。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

RM的子服务列表

RMApplicationHistoryWriter
AsyncDispatcher
AdminService
RMActiveServices
–RMSecretManagerService
–ContainerAllocationExpirer
–AMLivelinessMonitor
–RMNodeLabelsManager
–RMStateStore
–RMApplicationHistoryWriter
–SystemMetricsPublisher
–NodesListManager
–ResourceScheduler
–SchedulerEventDispatcher
–NMLivelinessMonitor
–ResourceTrackerService
–ApplicationMasterService
–ClientRMService
–ApplicationMasterLauncher
–DelegationTokenRenewer

ClientRMService初始化

在ResouceManager#RMActiveServices#serviceInit()方法中进行ClientRMService的创建

clientRM = createClientRMService();
ResourceManager.java
protected ClientRMService createClientRMService() {
  return new ClientRMService(this.rmContext, scheduler, this.rmAppManager,
      this.applicationACLsManager, this.queueACLsManager,
      this.rmContext.getRMDelegationTokenSecretManager());
}

再看ClientRMService的serviceInit()方法,很简单就是为设置了下客户端绑定地址(即RM的RPC ip+port)

protected void serviceInit(Configuration conf) throws Exception {
  clientBindAddress = getBindAddress(conf);
  super.serviceInit(conf);
}
InetSocketAddress getBindAddress(Configuration conf) {
    return conf.getSocketAddr(
            YarnConfiguration.RM_BIND_HOST,
            YarnConfiguration.RM_ADDRESS,
            YarnConfiguration.DEFAULT_RM_ADDRESS,
            YarnConfiguration.DEFAULT_RM_PORT);
  }

ResourceScheduler

在ResouceManager#RMActiveServices#serviceInit()方法中进行ClientRMService的创建

//ResouceManager.java$RMActiveServices
// Initialize the scheduler
scheduler = createScheduler();
scheduler.setRMContext(rmContext);
addIfService(scheduler);
rmContext.setScheduler(scheduler);

进入createScheduler():根据配置文件中的参数反射创建scheduler实例,默认创建出CapacityScheduler对象。

//ResourceManager.java
protected ResourceScheduler createScheduler() {
  String schedulerClassName = conf.get(YarnConfiguration.RM_SCHEDULER,
      YarnConfiguration.DEFAULT_RM_SCHEDULER);
  LOG.info("Using Scheduler: " + schedulerClassName);
  try {
    Class<?> schedulerClazz = Class.forName(schedulerClassName);
    if (ResourceScheduler.class.isAssignableFrom(schedulerClazz)) {
      return (ResourceScheduler) ReflectionUtils.newInstance(schedulerClazz,
          this.conf);
    } else {
      throw new YarnRuntimeException("Class: " + schedulerClassName
          + " not instance of " + ResourceScheduler.class.getCanonicalName());
    }
  } catch (ClassNotFoundException e) {
    throw new YarnRuntimeException("Could not instantiate Scheduler: "
        + schedulerClassName, e);
  }
}

由于scheduler被addIfService(scheduler)了,所以进入CapacityScheduler的serviceInit()

@Override
public void serviceInit(Configuration conf) throws Exception {
  Configuration configuration = new Configuration(conf);
  super.serviceInit(conf);
  initScheduler(configuration);
}

先看super.serviceInit(conf);

//AbstractYarnScheduler.java
@Override
 public void serviceInit(Configuration conf) throws Exception {
   //yarn.nm.liveness-monitor.expiry-interval-ms nm存活检测间隔,默认600s
   nmExpireInterval =
       conf.getInt(YarnConfiguration.RM_NM_EXPIRY_INTERVAL_MS,
         YarnConfiguration.DEFAULT_RM_NM_EXPIRY_INTERVAL_MS);
   //yarn.resourcemanager.work-preserving-recovery.scheduling-wait-ms 默认10s
   configuredMaximumAllocationWaitTime =
       conf.getLong(YarnConfiguration.RM_WORK_PRESERVING_RECOVERY_SCHEDULING_WAIT_MS,
         YarnConfiguration.DEFAULT_RM_WORK_PRESERVING_RECOVERY_SCHEDULING_WAIT_MS);
   //创建定期释放死亡NM上container的Timer,调度间隔为nmExpireInterval
   createReleaseCache();
   super.serviceInit(conf);
 }

再看initScheduler(configuration);

//CapacityScheduler.java
private synchronized void initScheduler(Configuration configuration) throws
    IOException {
  //加载配置文件capacity-scheduler.xml中的配置
  this.conf = loadCapacitySchedulerConfiguration(configuration);
  //验证mem和vcore配置是否合法
  validateConf(this.conf);
  //从配置中获取最小资源配置Resource对象,资源包括两个维度:mem和vcore
  this.minimumAllocation = this.conf.getMinimumAllocation();
  //从配置中获取最大资源配置Resrouce对象,赋值给maximumAllocation
  initMaximumResourceCapability(this.conf.getMaximumAllocation());
  //创建出资源计算器对象DefaultResourceCalculator
  this.calculator = this.conf.getResourceCalculator();
  //yarn.scheduler.include-port-in-node-name 默认false
  this.usePortForNodeName = this.conf.getUsePortForNodeName();
  this.applications =
      new ConcurrentHashMap<ApplicationId,
          SchedulerApplication<FiCaSchedulerApp>>(); //调度的app列表
  this.labelManager = rmContext.getNodeLabelManager(); //获取节点标签管理器对象
  authorizer = YarnAuthorizationProvider.getInstance(yarnConf);
  //初始化队列
  initializeQueues(this.conf);
  //是否开启异步资源调度 yarn.scheduler.capacity.schedule-asynchronously.enable,默认false
  scheduleAsynchronously = this.conf.getScheduleAynschronously();
  //异步调度间隔 yarn.scheduler.capacity.schedule-asynchronously.scheduling-interval-ms,默认5s
  asyncScheduleInterval =
      this.conf.getLong(ASYNC_SCHEDULER_INTERVAL,
          DEFAULT_ASYNC_SCHEDULER_INTERVAL);
  //如果开启了异步调度,则创建异步调度线程
  if (scheduleAsynchronously) {
    asyncSchedulerThread = new AsyncScheduleThread(this);
  }

  LOG.info("Initialized CapacityScheduler with " +
      "calculator=" + getResourceCalculator().getClass() + ", " +
      "minimumAllocation=<" + getMinimumResourceCapability() + ">, " +
      "maximumAllocation=<" + getMaximumResourceCapability() + ">, " +
      "asynchronousScheduling=" + scheduleAsynchronously + ", " +
      "asyncScheduleInterval=" + asyncScheduleInterval + "ms");
}

重点来分析下初始化队列initializeQueues(this.conf);
(1)解析配置项,构建队列树
(2)构建队列和用户/组的映射

private void initializeQueues(CapacitySchedulerConfiguration conf)
  throws IOException {
  //构建队列树,树根为root队列
  root = 
      parseQueue(this, conf, null, CapacitySchedulerConfiguration.ROOT, 
          queues, queues, noop);
  labelManager.reinitializeQueueLabels(getQueueToLabels());
  LOG.info("Initialized root queue " + root);
  initializeQueueMappings();
  setQueueAcls(authorizer, queues);
}

进入parseQueue

static CSQueue parseQueue(
    CapacitySchedulerContext csContext,
    CapacitySchedulerConfiguration conf, 
    CSQueue parent, String queueName, Map<String, CSQueue> queues,
    Map<String, CSQueue> oldQueues, 
    QueueHook hook) throws IOException {
  CSQueue queue;
  //fullQueueName=root
  String fullQueueName =
      (parent == null) ? queueName
          : (parent.getQueuePath() + "." + queueName);
  //获取子队列列表,即读取配置项yarn.scheduler.capacity.root.queues
  String[] childQueueNames = 
    conf.getQueues(fullQueueName);
  //队列否开启了资源预留,即读取配置项yarn.scheduler.capacity.root.reservable,默认为false
  boolean isReservableQueue = conf.isReservable(fullQueueName);
  //如果没有子队列则创建LeafQueue
  if (childQueueNames == null || childQueueNames.length == 0) {
    if (null == parent) {
      throw new IllegalStateException(
          "Queue configuration missing child queue names for " + queueName);
    }
    // Check if the queue will be dynamically managed by the Reservation
    // system
    // 如果队列开启了资源预留,则创建PlanQueue,该类型的queue会被ReservationSystem管理
    if (isReservableQueue) {
      queue =
          new PlanQueue(csContext, queueName, parent,
              oldQueues.get(queueName));
    } else {
      queue =
          new LeafQueue(csContext, queueName, parent,
              oldQueues.get(queueName));

      // Used only for unit tests
      queue = hook.hook(queue);
    }
  } else { //有子队列则创建ParentQueue
    if (isReservableQueue) {
      throw new IllegalStateException(
          "Only Leaf Queues can be reservable for " + queueName);
    }

    //root队列构建
    ParentQueue parentQueue = 
      new ParentQueue(csContext, queueName, parent, oldQueues.get(queueName));

    // Used only for unit tests
    queue = hook.hook(parentQueue);

    //root的子队列构建,递归调用parseQueue
    List<CSQueue> childQueues = new ArrayList<CSQueue>();
    for (String childQueueName : childQueueNames) {
      CSQueue childQueue = 
        parseQueue(csContext, conf, queue, childQueueName, 
            queues, oldQueues, hook);
      childQueues.add(childQueue);
    }
    //为父队列设置子队列,即childQueues属性赋值,childQueues是个TreeSet按队列的capacity排序
    parentQueue.setChildQueues(childQueues);
  }

  if(queue instanceof LeafQueue == true && queues.containsKey(queueName)
    && queues.get(queueName) instanceof LeafQueue == true) {
    throw new IOException("Two leaf queues were named " + queueName
      + ". Leaf queue names must be distinct");
  }
  queues.put(queueName, queue);

  LOG.info("Initialized queue: " + queue);
  return queue;
}

ParentQueue构建

public ParentQueue(CapacitySchedulerContext cs, 
    String queueName, CSQueue parent, CSQueue old) throws IOException {
  super(cs, queueName, parent, old);
  this.scheduler = cs;
  this.queueComparator = cs.getQueueComparator(); //队列比较器

  this.rootQueue = (parent == null); //是否root队列

  //获取队列设置的capacity,参数:yarn.scheduler.capacity.root.rec.capacity,如果是root队列则直接是100
  float rawCapacity = cs.getConfiguration().getNonLabeledQueueCapacity(getQueuePath());

  if (rootQueue &&
      (rawCapacity != CapacitySchedulerConfiguration.MAXIMUM_CAPACITY_VALUE)) {
    throw new IllegalArgumentException("Illegal " +
        "capacity of " + rawCapacity + " for queue " + queueName +
        ". Must be " + CapacitySchedulerConfiguration.MAXIMUM_CAPACITY_VALUE);
  }
  
  this.childQueues = new TreeSet<CSQueue>(queueComparator); //子队列按队列比较器来进行排序

  //加载特定队列的配置项
  setupQueueConfigs(cs.getClusterResource());

  LOG.info("Initialized parent-queue " + queueName + 
      " name=" + queueName + 
      ", fullname=" + getQueuePath()); 
}

LeafQueue构建

public LeafQueue(CapacitySchedulerContext cs, 
    String queueName, CSQueue parent, CSQueue old) throws IOException {
  super(cs, queueName, parent, old);
  this.scheduler = cs;

  this.activeUsersManager = new ActiveUsersManager(metrics); 

  if(LOG.isDebugEnabled()) {
    LOG.debug("LeafQueue:" + " name=" + queueName
      + ", fullname=" + getQueuePath());
  }

  Comparator<FiCaSchedulerApp> applicationComparator =
      cs.getApplicationComparator(); //app比较器,按appID排序
  this.pendingApplications = 
      new TreeSet<FiCaSchedulerApp>(applicationComparator); //pendingApp列表
  this.activeApplications = new TreeSet<FiCaSchedulerApp>(applicationComparator); //activeApp列表

  //加载特定队列的配置项
  setupQueueConfigs(cs.getClusterResource());
}

ParentQueue和LeafQueue的一点说明:队列通过ParentQueue和LeafQueue构成了一颗树,app只能在LeafQueue上申请资源,LeafQueue保存了当前运行的app列表,app列表分两种:pendingApp列表和activeApp列表,两个列表都是按appID来排序的,即capacityScheduler在队列内部是按FIFO调度的。

队列映射 initializeQueueMappings():将用户/组和队列映射起来

//CapacityScheduler.java
private void initializeQueueMappings() throws IOException {
    //如果存在队列映射,它是否会覆盖用户指定的值? 管理员可以使用此项将作业放入与用户指定的队列不同的队列中.
    //yarn.scheduler.capacity.queue-mappings-override.enable 默认false
    overrideWithQueueMappings = conf.getOverrideWithQueueMappings();
    LOG.info("Initialized queue mappings, override: "
            + overrideWithQueueMappings);
    // Get new user/group mappings
    //指定用户/组到特定队列的映射 yarn.scheduler.capacity.queue-mappings
    List<QueueMapping> newMappings = conf.getQueueMappings();
    //check if mappings refer to valid queues
    for (QueueMapping mapping : newMappings) {
      if (!mapping.queue.equals(CURRENT_USER_MAPPING) &&
              !mapping.queue.equals(PRIMARY_GROUP_MAPPING)) {
        CSQueue queue = queues.get(mapping.queue);
        if (queue == null || !(queue instanceof LeafQueue)) {
          throw new IOException(
                  "mapping contains invalid or non-leaf queue " + mapping.queue);
        }
      }
    }
    //apply the new mappings since they are valid
    mappings = newMappings;
    // initialize groups if mappings are present
    if (mappings.size() > 0) {
      groups = new Groups(conf);
    }
}

AMLivelinessMonitor

在ResouceManager#RMActiveServices#serviceInit()方法中进行ClientRMService的创建

AMLivelinessMonitor amLivelinessMonitor = createAMLivelinessMonitor();
addService(amLivelinessMonitor);
rmContext.setAMLivelinessMonitor(amLivelinessMonitor);

AMLivelinessMonitor amFinishingMonitor = createAMLivelinessMonitor();
addService(amFinishingMonitor);
rmContext.setAMFinishingMonitor(amFinishingMonitor);

创建了两个AMLivelinessMonitor实例:amLivelinessMonitor代表运行中的AM,amFinishingMonitor代表完成中的AM。

进入createAMLivelinessMonitor()

//AMLiveLinessMonitor.java
protected AMLivelinessMonitor createAMLivelinessMonitor() {
  return new AMLivelinessMonitor(this.rmDispatcher);
}

进入new AMLivelinessMonitor

//AMLiveLinessMonitor.java
public AMLivelinessMonitor(Dispatcher d) {
  super("AMLivelinessMonitor", new SystemClock());
  this.dispatcher = d.getEventHandler();
}

进入serviceInit

//AMLiveLinessMonitor.java
public void serviceInit(Configuration conf) throws Exception {
  super.serviceInit(conf);
  //am过期时间,yarn.am.liveness-monitor.expiry-interval-ms,默认10分钟
  int expireIntvl = conf.getInt(YarnConfiguration.RM_AM_EXPIRY_INTERVAL_MS,
          YarnConfiguration.DEFAULT_RM_AM_EXPIRY_INTERVAL_MS);
  //设置am过期间隔为10分钟
  setExpireInterval(expireIntvl);
  //设置am监控间隔为3分钟
  setMonitorInterval(expireIntvl/3);
}

进入serviceStart

@Override
  protected void serviceStart() throws Exception {
    assert !stopped : "starting when already stopped";
    //将存储的AM的时间重置为当前时间
    resetTimer();
    //创建一个监控AM的线程
    checkerThread = new Thread(new PingChecker());
    checkerThread.setName("Ping Checker");
    //启动监控线程,不停的监控AM是否过期了
    checkerThread.start();
    super.serviceStart();
  }

来看看监控线程的工作

//AbstractLivelinessMonitor.java$PingChecker
private class PingChecker implements Runnable {

    @Override
    public void run() {
      while (!stopped && !Thread.currentThread().isInterrupted()) {
        synchronized (AbstractLivelinessMonitor.this) {
          Iterator<Map.Entry<O, Long>> iterator = 
            running.entrySet().iterator(); //running是个map,元素为<AM,reportTime>

          //avoid calculating current time everytime in loop
          long currentTime = clock.getTime();

          //所有AM进行当前时间和上次心跳汇报时间的间隔比较,如果超过设置的过期时间没有汇报心跳则认为AM过期了,然后会发起过期流程
          while (iterator.hasNext()) {
            Map.Entry<O, Long> entry = iterator.next();
            if (currentTime > entry.getValue() + expireInterval) {
              iterator.remove();
              //发起过期流程
              expire(entry.getKey());
              LOG.info("Expired:" + entry.getKey().toString() + 
                      " Timed out after " + expireInterval/1000 + " secs");
            }
          }
        }
        try {
          Thread.sleep(monitorInterval);
        } catch (InterruptedException e) {
          LOG.info(getName() + " thread interrupted");
          break;
        }
      }
    }
  }

来看下过期流程

@Override
protected void expire(ApplicationAttemptId id) {
  dispatcher.handle(
      new RMAppAttemptEvent(id, RMAppAttemptEventType.EXPIRE));
}

即向RMAppAttemptImpl发送RMAppAttemptEventType.EXPIRE事件。

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值