hadoop 2.7.5 LocalizerTracker uses seperate Private Localizer for every container

本文深入解析了LocalizerTracker的工作原理及其实现细节,包括如何管理公共本地化资源和私有本地化资源,以及如何通过LocalizerRunner进行资源本地化。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

LocalizerTracker

LocalizerTracker manages both public public localizer and others.

 class LocalizerTracker extends AbstractService implements EventHandler<LocalizerEvent>  {
 private final PublicLocalizer publicLocalizer;
    private final Map<String,LocalizerRunner> privLocalizers;

    LocalizerTracker(Configuration conf) {
      this(conf, new HashMap<String,LocalizerRunner>());
    }

    LocalizerTracker(Configuration conf,
        Map<String,LocalizerRunner> privLocalizers) {
      super(LocalizerTracker.class.getName());
      this.publicLocalizer = new PublicLocalizer(conf);
      this.privLocalizers = privLocalizers;
    }

LocalizerTracker.handle

In the handle method of LocalizerRunner, it create a LocalizerRunner for every container, and run it. Afterwards, it call localizer.addResource(req); to propagate a message.

@Override
    public void handle(LocalizerEvent event) {
      String locId = event.getLocalizerId();
      switch (event.getType()) {
      case REQUEST_RESOURCE_LOCALIZATION:
        // 0) find running localizer or start new thread
        LocalizerResourceRequestEvent req =
          (LocalizerResourceRequestEvent)event;
        switch (req.getVisibility()) {
        case PUBLIC:
          publicLocalizer.addResource(req);
          break;
        case PRIVATE:
        case APPLICATION:
          synchronized (privLocalizers) {
            LocalizerRunner localizer = privLocalizers.get(locId);
            if (null == localizer) {
              LOG.info("Created localizer for " + locId);
              localizer = new LocalizerRunner(req.getContext(), locId);
              privLocalizers.put(locId, localizer);
              localizer.start();
            }
            // 1) propagate event
            localizer.addResource(req);
          }
          break;
        }
        break;
      }
    }

LocalizerRunner

class LocalizerRunner extends Thread {

    final LocalizerContext context;
    final String localizerId;
    final Map<LocalResourceRequest,LocalizerResourceRequestEvent> scheduled;
    // Its a shared list between Private Localizer and dispatcher thread.
    final List<LocalizerResourceRequestEvent> pending;
    private AtomicBoolean killContainerLocalizer = new AtomicBoolean(false);

    // TODO: threadsafe, use outer?
    private final RecordFactory recordFactory =
      RecordFactoryProvider.getRecordFactory(getConfig());

    LocalizerRunner(LocalizerContext context, String localizerId) {
      super("LocalizerRunner for " + localizerId);
      this.context = context;
      this.localizerId = localizerId;
      this.pending =
          Collections
            .synchronizedList(new ArrayList<LocalizerResourceRequestEvent>());
      this.scheduled =
          new HashMap<LocalResourceRequest, LocalizerResourceRequestEvent>();
    }

LocalizerRunner.addResource

public void addResource(LocalizerResourceRequestEvent request) {
      pending.add(request);
    }

LocalizerRunner.run

The important part is

exec.startLocalizer(nmPrivateCTokensPath, localizationServerAddress,
              context.getUser(),
              ConverterUtils.toString(
                  context.getContainerId().
                  getApplicationAttemptId().getApplicationId()),
              localizerId,
              dirsHandler);

exec is parsed as constructor parameter. By default, exec is instance of DefaultContainerExecutor.

  private final ContainerExecutor exec;

The complete code of run

public void run() {
      Path nmPrivateCTokensPath = null;
      Throwable exception = null;
      try {
        // Get nmPrivateDir
        nmPrivateCTokensPath =
          dirsHandler.getLocalPathForWrite(
                NM_PRIVATE_DIR + Path.SEPARATOR
                    + String.format(ContainerLocalizer.TOKEN_FILE_NAME_FMT,
                        localizerId));

        // 0) init queue, etc.
        // 1) write credentials to private dir
        writeCredentials(nmPrivateCTokensPath);
        // 2) exec initApplication and wait
        List<String> localDirs = getInitializedLocalDirs();
        List<String> logDirs = getInitializedLogDirs();
        if (dirsHandler.areDisksHealthy()) {
          exec.startLocalizer(nmPrivateCTokensPath, localizationServerAddress,
              context.getUser(),
              ConverterUtils.toString(
                  context.getContainerId().
                  getApplicationAttemptId().getApplicationId()),
              localizerId,
              dirsHandler);
        } else {
          throw new IOException("All disks failed. "
              + dirsHandler.getDisksHealthReport(false));
        }
      // TODO handle ExitCodeException separately?
      } catch (FSError fe) {
        exception = fe;
      } catch (Exception e) {
        exception = e;
      } finally {
        if (exception != null) {
          LOG.info("Localizer failed", exception);
          // On error, report failure to Container and signal ABORT
          // Notify resource of failed localization
          ContainerId cId = context.getContainerId();
          dispatcher.getEventHandler().handle(new ContainerResourceFailedEvent(
              cId, null, exception.getMessage()));
        }
        List<Path> paths = new ArrayList<Path>();
        for (LocalizerResourceRequestEvent event : scheduled.values()) {
          // This means some resources were in downloading state. Schedule
          // deletion task for localization dir and tmp dir used for downloading
          Path locRsrcPath = event.getResource().getLocalPath();
          if (locRsrcPath != null) {
            Path locRsrcDirPath = locRsrcPath.getParent();
            paths.add(locRsrcDirPath);
            paths.add(new Path(locRsrcDirPath + "_tmp"));
          }
          event.getResource().unlock();
        }
        if (!paths.isEmpty()) {
          delService.delete(context.getUser(),
              null, paths.toArray(new Path[paths.size()]));
        }
        delService.delete(null, nmPrivateCTokensPath, new Path[] {});
      }
    }

DefaultContainerExecutor.startLocalizer

By default, exec is instance of DefaultContainerExecutor.

@Override
  public void startLocalizer(Path nmPrivateContainerTokensPath,
      InetSocketAddress nmAddr, String user, String appId, String locId,
      LocalDirsHandlerService dirsHandler)
      throws IOException, InterruptedException {

    List<String> localDirs = dirsHandler.getLocalDirs();
    List<String> logDirs = dirsHandler.getLogDirs();

    createUserLocalDirs(localDirs, user);
    createUserCacheDirs(localDirs, user);
    createAppDirs(localDirs, user, appId);
    createAppLogDirs(appId, logDirs, user);

    // randomly choose the local directory
    Path appStorageDir = getWorkingDir(localDirs, user, appId);

    String tokenFn = String.format(ContainerLocalizer.TOKEN_FILE_NAME_FMT, locId);
    Path tokenDst = new Path(appStorageDir, tokenFn);
    copyFile(nmPrivateContainerTokensPath, tokenDst, user);
    LOG.info("Copying from " + nmPrivateContainerTokensPath + " to " + tokenDst);


    FileContext localizerFc = FileContext.getFileContext(
        lfs.getDefaultFileSystem(), getConf());
    localizerFc.setUMask(lfs.getUMask());
    localizerFc.setWorkingDirectory(appStorageDir);
    LOG.info("Localizer CWD set to " + appStorageDir + " = " 
        + localizerFc.getWorkingDirectory());
    ContainerLocalizer localizer =
        new ContainerLocalizer(localizerFc, user, appId, locId, 
            getPaths(localDirs), RecordFactoryProvider.getRecordFactory(getConf()));
    // TODO: DO it over RPC for maintaining similarity?
    localizer.runLocalization(nmAddr);
  }

ContainerLocalizer.runLocalization

public int runLocalization(final InetSocketAddress nmAddr)
      throws IOException, InterruptedException {
    // load credentials
    initDirs(conf, user, appId, lfs, localDirs);
    final Credentials creds = new Credentials();
    DataInputStream credFile = null;
    try {
      // assume credentials in cwd
      // TODO: Fix
      Path tokenPath =
          new Path(String.format(TOKEN_FILE_NAME_FMT, localizerId));
      credFile = lfs.open(tokenPath);
      creds.readTokenStorageStream(credFile);
      // Explicitly deleting token file.
      lfs.delete(tokenPath, false);      
    } finally  {
      if (credFile != null) {
        credFile.close();
      }
    }
    // create localizer context
    UserGroupInformation remoteUser =
      UserGroupInformation.createRemoteUser(user);
    remoteUser.addToken(creds.getToken(LocalizerTokenIdentifier.KIND));
    final LocalizationProtocol nodeManager =
        remoteUser.doAs(new PrivilegedAction<LocalizationProtocol>() {
          @Override
          public LocalizationProtocol run() {
            return getProxy(nmAddr);
          }
        });

    // create user context
    UserGroupInformation ugi =
      UserGroupInformation.createRemoteUser(user);
    for (Token<? extends TokenIdentifier> token : creds.getAllTokens()) {
      ugi.addToken(token);
    }

    ExecutorService exec = null;
    try {
      exec = createDownloadThreadPool();
      CompletionService<Path> ecs = createCompletionService(exec);
      localizeFiles(nodeManager, ecs, ugi);
      return 0;
    } catch (Throwable e) {
      // Print traces to stdout so that they can be logged by the NM address
      // space.
      e.printStackTrace(System.out);
      return -1;
    } finally {
      try {
        if (exec != null) {
          exec.shutdownNow();
        }
        LocalDirAllocator.removeContext(appCacheDirContextName);
      } finally {
        closeFileSystems(ugi);
      }
    }
  }

ContainerLocalizer.createDownloadThreadPool

  ExecutorService createDownloadThreadPool() {
    return Executors.newSingleThreadExecutor(new ThreadFactoryBuilder()
      .setNameFormat("ContainerLocalizer Downloader").build());
  }

ContainerLocalizer.localizeFiles

The following part is the most important part.

cs.submit(download(
                new Path(newRsrc.getDestinationDirectory().getFile())
protected void localizeFiles(LocalizationProtocol nodemanager,
      CompletionService<Path> cs, UserGroupInformation ugi)
      throws IOException {
    while (true) {
      try {
        LocalizerStatus status = createStatus();
        LocalizerHeartbeatResponse response = nodemanager.heartbeat(status);
        switch (response.getLocalizerAction()) {
        case LIVE:
          List<ResourceLocalizationSpec> newRsrcs = response.getResourceSpecs();
          for (ResourceLocalizationSpec newRsrc : newRsrcs) {
            if (!pendingResources.containsKey(newRsrc.getResource())) {
              pendingResources.put(newRsrc.getResource(), cs.submit(download(
                new Path(newRsrc.getDestinationDirectory().getFile()),
                newRsrc.getResource(), ugi)));
            }
          }
          break;
        case DIE:
          // killall running localizations
          for (Future<Path> pending : pendingResources.values()) {
            pending.cancel(true);
          }
          status = createStatus();
          // ignore response
          try {
            nodemanager.heartbeat(status);
          } catch (YarnException e) { }
          return;
        }
        cs.poll(1000, TimeUnit.MILLISECONDS);
      } catch (InterruptedException e) {
        return;
      } catch (YarnException e) {
        // TODO cleanup
        return;
      }
    }
  }

ContainerLocalizer.createStatus

private LocalizerStatus createStatus() throws InterruptedException {
    final List<LocalResourceStatus> currentResources =
      new ArrayList<LocalResourceStatus>();
    // TODO: Synchronization??
    for (Iterator<LocalResource> i = pendingResources.keySet().iterator();
         i.hasNext();) {
      LocalResource rsrc = i.next();
      LocalResourceStatus stat =
        recordFactory.newRecordInstance(LocalResourceStatus.class);
      stat.setResource(rsrc);
      Future<Path> fPath = pendingResources.get(rsrc);
      if (fPath.isDone()) {
        try {
          Path localPath = fPath.get();
          stat.setLocalPath(
              ConverterUtils.getYarnUrlFromPath(localPath));
          stat.setLocalSize(
              FileUtil.getDU(new File(localPath.getParent().toUri())));
          stat.setStatus(ResourceStatusType.FETCH_SUCCESS);
        } catch (ExecutionException e) {
          stat.setStatus(ResourceStatusType.FETCH_FAILURE);
          stat.setException(SerializedException.newInstance(e.getCause()));
        } catch (CancellationException e) {
          stat.setStatus(ResourceStatusType.FETCH_FAILURE);
          stat.setException(SerializedException.newInstance(e));
        }
        // TODO shouldn't remove until ACK
        i.remove();
      } else {
        stat.setStatus(ResourceStatusType.FETCH_PENDING);
      }
      currentResources.add(stat);
    }
    LocalizerStatus status =
      recordFactory.newRecordInstance(LocalizerStatus.class);
    status.setLocalizerId(localizerId);
    status.addAllResources(currentResources);
    return status;
  }
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值