概述
NM端launch container会经历的event: LaunchContainer类型的ContainersLaunchEvent
yarn application会因为container分配失败而导致application failed,以下日志可以大概得知launch container的流程
Application application_1555482591931_0853 failed 2 times due to AM Container for appattempt_1555482591931_0853_000002 exited with exitCode: 10
For more detailed output, check application tracking page:http://hadoop5:8088/proxy/application_1555482591931_0853/Then, click on links to logs of each attempt.
Diagnostics: Exception from container-launch.
Container id: container_e156_1555482591931_0853_02_000001
Exit code: 10
Stack trace: ExitCodeException exitCode=10:
at org.apache.hadoop.util.Shell.runCommand(Shell.java:538)
at org.apache.hadoop.util.Shell.run(Shell.java:455)
at org.apache.hadoop.util.Shell$ShellCommandExecutor.execute(Shell.java:715)
at org.apache.hadoop.yarn.server.nodemanager.DefaultContainerExecutor.launchContainer(DefaultContainerExecutor.java:212)
at org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainerLaunch.call(ContainerLaunch.java:302)
at org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainerLaunch.call(ContainerLaunch.java:82)
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
Container exited with a non-zero exit code 10
Failing this attempt. Failing the application.
ContainerImpl将ContainersLauncherEvent提交给AsyncDisptaher
public void sendLaunchEvent() {
if (ContainerState.PAUSED == getContainerState()) {
dispatcher.getEventHandler().handle(
new ContainerResumeEvent(containerId,
"Container Resumed as some resources freed up"));
} else {
ContainersLauncherEventType launcherEvent =
ContainersLauncherEventType.LAUNCH_CONTAINER;
if (recoveredStatus == RecoveredContainerStatus.LAUNCHED) {
// try to recover a container that was previously launched
launcherEvent = ContainersLauncherEventType.RECOVER_CONTAINER;
} else if (recoveredStatus == RecoveredContainerStatus.PAUSED) {
launcherEvent = ContainersLauncherEventType.RECOVER_PAUSED_CONTAINER;
}
containerLaunchStartTime = clock.getTime();
dispatcher.getEventHandler().handle(
new ContainersLauncherEvent(this, launcherEvent));
}
}
ContainerManagerImpl注册处理ContainersLauncherEvent的处理器
public ContainerManagerImpl(Context context, ContainerExecutor exec,
DeletionService deletionContext, NodeStatusUpdater nodeStatusUpdater,
NodeManagerMetrics metrics, LocalDirsHandlerService dirsHandler) {
super(ContainerManagerImpl.class.getName());
this.context = context;
this.dirsHandler = dirsHandler;
// ContainerManager level dispatcher.
dispatcher = new AsyncDispatcher("NM ContainerManager dispatcher");
this.deletionService = deletionContext;
this.metrics = metrics;
rsrcLocalizationSrvc =
createResourceLocalizationService(exec, deletionContext, context,
metrics);
addService(rsrcLocalizationSrvc);
containersLauncher = createContainersLauncher(context, exec);
addService(containersLauncher);
this.nodeStatusUpdater = nodeStatusUpdater;
this.containerScheduler = createContainerScheduler(context);
addService(containerScheduler);
AuxiliaryLocalPathHandler auxiliaryLocalPathHandler =
new AuxiliaryLocalPathHandlerImpl(dirsHandler);
// Start configurable services
auxiliaryServices = new AuxServices(auxiliaryLocalPathHandler,
this.context, this.deletionService);
auxiliaryServices.registerServiceListener(this);
addService(auxiliaryServices);
// initialize the metrics publisher if the timeline service v.2 is enabled
// and the system publisher is enabled
Configuration conf = context.getConf();
if (YarnConfiguration.timelineServiceV2Enabled(conf) &&
YarnConfiguration.systemMetricsPublisherEnabled(conf)) {
LOG.info("YARN system metrics publishing service is enabled");
nmMetricsPublisher = createNMTimelinePublisher(context);
context.setNMTimelinePublisher(nmMetricsPublisher);
}
this.containersMonitor = createContainersMonitor(exec);
addService(this.containersMonitor);
dispatcher.register(ContainerEventType.class,
new ContainerEventDispatcher());
dispatcher.register(ApplicationEventType.class,
createApplicationEventDispatcher());
dispatcher.register(LocalizationEventType.class,
new LocalizationEventHandlerWrapper(rsrcLocalizationSrvc,
nmMetricsPublisher));
dispatcher.register(AuxServicesEventType.class, auxiliaryServices);
dispatcher.register(ContainersMonitorEventType.class, containersMonitor);
//注册处理ContainersLaunchEvent的处理器
dispatcher.register(ContainersLauncherEventType.class, containersLauncher);
dispatcher.register(ContainerSchedulerEventType.class, containerScheduler);
addService(dispatcher);
ReentrantReadWriteLock lock = new ReentrantReadWriteLock();
this.readLock = lock.readLock();
this.writeLock = lock.writeLock();
}
protected ContainersLauncher createContainersLauncher(Context context,
ContainerExecutor exec) {
return new ContainersLauncher(context, this.dispatcher, exec, dirsHandler, this);
}
ContainersLauncherEvent的处理器 —— ContainersLauncher
内部通过线程池运行ContainerLaunch