LocalizerTracker
LocalizerTracker manages both public public localizer and others.
class LocalizerTracker extends AbstractService implements EventHandler<LocalizerEvent> {
private final PublicLocalizer publicLocalizer;
private final Map<String,LocalizerRunner> privLocalizers;
LocalizerTracker(Configuration conf) {
this(conf, new HashMap<String,LocalizerRunner>());
}
LocalizerTracker(Configuration conf,
Map<String,LocalizerRunner> privLocalizers) {
super(LocalizerTracker.class.getName());
this.publicLocalizer = new PublicLocalizer(conf);
this.privLocalizers = privLocalizers;
}
LocalizerTracker.handle
In the handle method of LocalizerRunner, it create a LocalizerRunner for every container, and run it. Afterwards, it call localizer.addResource(req);
to propagate a message.
@Override
public void handle(LocalizerEvent event) {
String locId = event.getLocalizerId();
switch (event.getType()) {
case REQUEST_RESOURCE_LOCALIZATION:
// 0) find running localizer or start new thread
LocalizerResourceRequestEvent req =
(LocalizerResourceRequestEvent)event;
switch (req.getVisibility()) {
case PUBLIC:
publicLocalizer.addResource(req);
break;
case PRIVATE:
case APPLICATION:
synchronized (privLocalizers) {
LocalizerRunner localizer = privLocalizers.get(locId);
if (null == localizer) {
LOG.info("Created localizer for " + locId);
localizer = new LocalizerRunner(req.getContext(), locId);
privLocalizers.put(locId, localizer);
localizer.start();
}
// 1) propagate event
localizer.addResource(req);
}
break;
}
break;
}
}
LocalizerRunner
class LocalizerRunner extends Thread {
final LocalizerContext context;
final String localizerId;
final Map<LocalResourceRequest,LocalizerResourceRequestEvent> scheduled;
// Its a shared list between Private Localizer and dispatcher thread.
final List<LocalizerResourceRequestEvent> pending;
private AtomicBoolean killContainerLocalizer = new AtomicBoolean(false);
// TODO: threadsafe, use outer?
private final RecordFactory recordFactory =
RecordFactoryProvider.getRecordFactory(getConfig());
LocalizerRunner(LocalizerContext context, String localizerId) {
super("LocalizerRunner for " + localizerId);
this.context = context;
this.localizerId = localizerId;
this.pending =
Collections
.synchronizedList(new ArrayList<LocalizerResourceRequestEvent>());
this.scheduled =
new HashMap<LocalResourceRequest, LocalizerResourceRequestEvent>();
}
LocalizerRunner.addResource
public void addResource(LocalizerResourceRequestEvent request) {
pending.add(request);
}
LocalizerRunner.run
The important part is
exec.startLocalizer(nmPrivateCTokensPath, localizationServerAddress,
context.getUser(),
ConverterUtils.toString(
context.getContainerId().
getApplicationAttemptId().getApplicationId()),
localizerId,
dirsHandler);
exec is parsed as constructor parameter. By default, exec is instance of DefaultContainerExecutor.
private final ContainerExecutor exec;
The complete code of run
public void run() {
Path nmPrivateCTokensPath = null;
Throwable exception = null;
try {
// Get nmPrivateDir
nmPrivateCTokensPath =
dirsHandler.getLocalPathForWrite(
NM_PRIVATE_DIR + Path.SEPARATOR
+ String.format(ContainerLocalizer.TOKEN_FILE_NAME_FMT,
localizerId));
// 0) init queue, etc.
// 1) write credentials to private dir
writeCredentials(nmPrivateCTokensPath);
// 2) exec initApplication and wait
List<String> localDirs = getInitializedLocalDirs();
List<String> logDirs = getInitializedLogDirs();
if (dirsHandler.areDisksHealthy()) {
exec.startLocalizer(nmPrivateCTokensPath, localizationServerAddress,
context.getUser(),
ConverterUtils.toString(
context.getContainerId().
getApplicationAttemptId().getApplicationId()),
localizerId,
dirsHandler);
} else {
throw new IOException("All disks failed. "
+ dirsHandler.getDisksHealthReport(false));
}
// TODO handle ExitCodeException separately?
} catch (FSError fe) {
exception = fe;
} catch (Exception e) {
exception = e;
} finally {
if (exception != null) {
LOG.info("Localizer failed", exception);
// On error, report failure to Container and signal ABORT
// Notify resource of failed localization
ContainerId cId = context.getContainerId();
dispatcher.getEventHandler().handle(new ContainerResourceFailedEvent(
cId, null, exception.getMessage()));
}
List<Path> paths = new ArrayList<Path>();
for (LocalizerResourceRequestEvent event : scheduled.values()) {
// This means some resources were in downloading state. Schedule
// deletion task for localization dir and tmp dir used for downloading
Path locRsrcPath = event.getResource().getLocalPath();
if (locRsrcPath != null) {
Path locRsrcDirPath = locRsrcPath.getParent();
paths.add(locRsrcDirPath);
paths.add(new Path(locRsrcDirPath + "_tmp"));
}
event.getResource().unlock();
}
if (!paths.isEmpty()) {
delService.delete(context.getUser(),
null, paths.toArray(new Path[paths.size()]));
}
delService.delete(null, nmPrivateCTokensPath, new Path[] {});
}
}
DefaultContainerExecutor.startLocalizer
By default, exec is instance of DefaultContainerExecutor.
@Override
public void startLocalizer(Path nmPrivateContainerTokensPath,
InetSocketAddress nmAddr, String user, String appId, String locId,
LocalDirsHandlerService dirsHandler)
throws IOException, InterruptedException {
List<String> localDirs = dirsHandler.getLocalDirs();
List<String> logDirs = dirsHandler.getLogDirs();
createUserLocalDirs(localDirs, user);
createUserCacheDirs(localDirs, user);
createAppDirs(localDirs, user, appId);
createAppLogDirs(appId, logDirs, user);
// randomly choose the local directory
Path appStorageDir = getWorkingDir(localDirs, user, appId);
String tokenFn = String.format(ContainerLocalizer.TOKEN_FILE_NAME_FMT, locId);
Path tokenDst = new Path(appStorageDir, tokenFn);
copyFile(nmPrivateContainerTokensPath, tokenDst, user);
LOG.info("Copying from " + nmPrivateContainerTokensPath + " to " + tokenDst);
FileContext localizerFc = FileContext.getFileContext(
lfs.getDefaultFileSystem(), getConf());
localizerFc.setUMask(lfs.getUMask());
localizerFc.setWorkingDirectory(appStorageDir);
LOG.info("Localizer CWD set to " + appStorageDir + " = "
+ localizerFc.getWorkingDirectory());
ContainerLocalizer localizer =
new ContainerLocalizer(localizerFc, user, appId, locId,
getPaths(localDirs), RecordFactoryProvider.getRecordFactory(getConf()));
// TODO: DO it over RPC for maintaining similarity?
localizer.runLocalization(nmAddr);
}
ContainerLocalizer.runLocalization
public int runLocalization(final InetSocketAddress nmAddr)
throws IOException, InterruptedException {
// load credentials
initDirs(conf, user, appId, lfs, localDirs);
final Credentials creds = new Credentials();
DataInputStream credFile = null;
try {
// assume credentials in cwd
// TODO: Fix
Path tokenPath =
new Path(String.format(TOKEN_FILE_NAME_FMT, localizerId));
credFile = lfs.open(tokenPath);
creds.readTokenStorageStream(credFile);
// Explicitly deleting token file.
lfs.delete(tokenPath, false);
} finally {
if (credFile != null) {
credFile.close();
}
}
// create localizer context
UserGroupInformation remoteUser =
UserGroupInformation.createRemoteUser(user);
remoteUser.addToken(creds.getToken(LocalizerTokenIdentifier.KIND));
final LocalizationProtocol nodeManager =
remoteUser.doAs(new PrivilegedAction<LocalizationProtocol>() {
@Override
public LocalizationProtocol run() {
return getProxy(nmAddr);
}
});
// create user context
UserGroupInformation ugi =
UserGroupInformation.createRemoteUser(user);
for (Token<? extends TokenIdentifier> token : creds.getAllTokens()) {
ugi.addToken(token);
}
ExecutorService exec = null;
try {
exec = createDownloadThreadPool();
CompletionService<Path> ecs = createCompletionService(exec);
localizeFiles(nodeManager, ecs, ugi);
return 0;
} catch (Throwable e) {
// Print traces to stdout so that they can be logged by the NM address
// space.
e.printStackTrace(System.out);
return -1;
} finally {
try {
if (exec != null) {
exec.shutdownNow();
}
LocalDirAllocator.removeContext(appCacheDirContextName);
} finally {
closeFileSystems(ugi);
}
}
}
ContainerLocalizer.createDownloadThreadPool
ExecutorService createDownloadThreadPool() {
return Executors.newSingleThreadExecutor(new ThreadFactoryBuilder()
.setNameFormat("ContainerLocalizer Downloader").build());
}
ContainerLocalizer.localizeFiles
The following part is the most important part.
cs.submit(download(
new Path(newRsrc.getDestinationDirectory().getFile())
protected void localizeFiles(LocalizationProtocol nodemanager,
CompletionService<Path> cs, UserGroupInformation ugi)
throws IOException {
while (true) {
try {
LocalizerStatus status = createStatus();
LocalizerHeartbeatResponse response = nodemanager.heartbeat(status);
switch (response.getLocalizerAction()) {
case LIVE:
List<ResourceLocalizationSpec> newRsrcs = response.getResourceSpecs();
for (ResourceLocalizationSpec newRsrc : newRsrcs) {
if (!pendingResources.containsKey(newRsrc.getResource())) {
pendingResources.put(newRsrc.getResource(), cs.submit(download(
new Path(newRsrc.getDestinationDirectory().getFile()),
newRsrc.getResource(), ugi)));
}
}
break;
case DIE:
// killall running localizations
for (Future<Path> pending : pendingResources.values()) {
pending.cancel(true);
}
status = createStatus();
// ignore response
try {
nodemanager.heartbeat(status);
} catch (YarnException e) { }
return;
}
cs.poll(1000, TimeUnit.MILLISECONDS);
} catch (InterruptedException e) {
return;
} catch (YarnException e) {
// TODO cleanup
return;
}
}
}
ContainerLocalizer.createStatus
private LocalizerStatus createStatus() throws InterruptedException {
final List<LocalResourceStatus> currentResources =
new ArrayList<LocalResourceStatus>();
// TODO: Synchronization??
for (Iterator<LocalResource> i = pendingResources.keySet().iterator();
i.hasNext();) {
LocalResource rsrc = i.next();
LocalResourceStatus stat =
recordFactory.newRecordInstance(LocalResourceStatus.class);
stat.setResource(rsrc);
Future<Path> fPath = pendingResources.get(rsrc);
if (fPath.isDone()) {
try {
Path localPath = fPath.get();
stat.setLocalPath(
ConverterUtils.getYarnUrlFromPath(localPath));
stat.setLocalSize(
FileUtil.getDU(new File(localPath.getParent().toUri())));
stat.setStatus(ResourceStatusType.FETCH_SUCCESS);
} catch (ExecutionException e) {
stat.setStatus(ResourceStatusType.FETCH_FAILURE);
stat.setException(SerializedException.newInstance(e.getCause()));
} catch (CancellationException e) {
stat.setStatus(ResourceStatusType.FETCH_FAILURE);
stat.setException(SerializedException.newInstance(e));
}
// TODO shouldn't remove until ACK
i.remove();
} else {
stat.setStatus(ResourceStatusType.FETCH_PENDING);
}
currentResources.add(stat);
}
LocalizerStatus status =
recordFactory.newRecordInstance(LocalizerStatus.class);
status.setLocalizerId(localizerId);
status.addAllResources(currentResources);
return status;
}