use of org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerExitEvent in project hadoop by apache.
the class RecoveredContainerLaunch method call.
/**
* Wait on the process specified in pid file and return its exit code
*/
@SuppressWarnings("unchecked")
@Override
public Integer call() {
int retCode = ExitCode.LOST.getExitCode();
ContainerId containerId = container.getContainerId();
String appIdStr = containerId.getApplicationAttemptId().getApplicationId().toString();
String containerIdStr = containerId.toString();
dispatcher.getEventHandler().handle(new ContainerEvent(containerId, ContainerEventType.CONTAINER_LAUNCHED));
boolean notInterrupted = true;
try {
File pidFile = locatePidFile(appIdStr, containerIdStr);
if (pidFile != null) {
String pidPathStr = pidFile.getPath();
pidFilePath = new Path(pidPathStr);
exec.activateContainer(containerId, pidFilePath);
retCode = exec.reacquireContainer(new ContainerReacquisitionContext.Builder().setContainer(container).setUser(container.getUser()).setContainerId(containerId).build());
} else {
LOG.warn("Unable to locate pid file for container " + containerIdStr);
}
} catch (InterruptedException | InterruptedIOException e) {
LOG.warn("Interrupted while waiting for exit code from " + containerId);
notInterrupted = false;
} catch (IOException e) {
LOG.error("Unable to recover container " + containerIdStr, e);
} finally {
if (notInterrupted) {
this.completed.set(true);
exec.deactivateContainer(containerId);
try {
getContext().getNMStateStore().storeContainerCompleted(containerId, retCode);
} catch (IOException e) {
LOG.error("Unable to set exit code for container " + containerId);
}
}
}
if (retCode != 0) {
LOG.warn("Recovered container exited with a non-zero exit code " + retCode);
this.dispatcher.getEventHandler().handle(new ContainerExitEvent(containerId, ContainerEventType.CONTAINER_EXITED_WITH_FAILURE, retCode, "Container exited with a non-zero exit code " + retCode));
return retCode;
}
LOG.info("Recovered container " + containerId + " succeeded");
dispatcher.getEventHandler().handle(new ContainerEvent(containerId, ContainerEventType.CONTAINER_EXITED_WITH_SUCCESS));
return 0;
}
use of org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerExitEvent in project hadoop by apache.
the class ContainerLaunch method handleContainerExitWithFailure.
/**
* Tries to tail and fetch TAIL_SIZE_IN_BYTES of data from the error log.
* ErrorLog filename is not fixed and depends upon app, hence file name
* pattern is used.
* @param containerID
* @param ret
* @param containerLogDir
* @param diagnosticInfo
*/
@SuppressWarnings("unchecked")
protected void handleContainerExitWithFailure(ContainerId containerID, int ret, Path containerLogDir, StringBuilder diagnosticInfo) {
LOG.warn(diagnosticInfo);
String errorFileNamePattern = conf.get(YarnConfiguration.NM_CONTAINER_STDERR_PATTERN, YarnConfiguration.DEFAULT_NM_CONTAINER_STDERR_PATTERN);
FSDataInputStream errorFileIS = null;
try {
FileSystem fileSystem = FileSystem.getLocal(conf).getRaw();
FileStatus[] errorFileStatuses = fileSystem.globStatus(new Path(containerLogDir, errorFileNamePattern));
if (errorFileStatuses != null && errorFileStatuses.length != 0) {
long tailSizeInBytes = conf.getLong(YarnConfiguration.NM_CONTAINER_STDERR_BYTES, YarnConfiguration.DEFAULT_NM_CONTAINER_STDERR_BYTES);
Path errorFile = errorFileStatuses[0].getPath();
long fileSize = errorFileStatuses[0].getLen();
// modified file, and also append the file names in the diagnosticInfo
if (errorFileStatuses.length > 1) {
String[] errorFileNames = new String[errorFileStatuses.length];
long latestModifiedTime = errorFileStatuses[0].getModificationTime();
errorFileNames[0] = errorFileStatuses[0].getPath().getName();
for (int i = 1; i < errorFileStatuses.length; i++) {
errorFileNames[i] = errorFileStatuses[i].getPath().getName();
if (errorFileStatuses[i].getModificationTime() > latestModifiedTime) {
latestModifiedTime = errorFileStatuses[i].getModificationTime();
errorFile = errorFileStatuses[i].getPath();
fileSize = errorFileStatuses[i].getLen();
}
}
diagnosticInfo.append("Error files: ").append(StringUtils.join(", ", errorFileNames)).append(".\n");
}
long startPosition = (fileSize < tailSizeInBytes) ? 0 : fileSize - tailSizeInBytes;
int bufferSize = (int) ((fileSize < tailSizeInBytes) ? fileSize : tailSizeInBytes);
byte[] tailBuffer = new byte[bufferSize];
errorFileIS = fileSystem.open(errorFile);
errorFileIS.readFully(startPosition, tailBuffer);
diagnosticInfo.append("Last ").append(tailSizeInBytes).append(" bytes of ").append(errorFile.getName()).append(" :\n").append(new String(tailBuffer, StandardCharsets.UTF_8));
}
} catch (IOException e) {
LOG.error("Failed to get tail of the container's error log file", e);
} finally {
IOUtils.cleanup(LOG, errorFileIS);
}
this.dispatcher.getEventHandler().handle(new ContainerExitEvent(containerID, ContainerEventType.CONTAINER_EXITED_WITH_FAILURE, ret, diagnosticInfo.toString()));
}
use of org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerExitEvent in project hadoop by apache.
the class ContainerLaunch method handleContainerExitCode.
@SuppressWarnings("unchecked")
protected void handleContainerExitCode(int exitCode, Path containerLogDir) {
ContainerId containerId = container.getContainerId();
if (LOG.isDebugEnabled()) {
LOG.debug("Container " + containerId + " completed with exit code " + exitCode);
}
StringBuilder diagnosticInfo = new StringBuilder("Container exited with a non-zero exit code ");
diagnosticInfo.append(exitCode);
diagnosticInfo.append(". ");
if (exitCode == ExitCode.FORCE_KILLED.getExitCode() || exitCode == ExitCode.TERMINATED.getExitCode()) {
// If Container was killed before starting... NO need to do this.
if (!killedBeforeStart) {
dispatcher.getEventHandler().handle(new ContainerExitEvent(containerId, ContainerEventType.CONTAINER_KILLED_ON_REQUEST, exitCode, diagnosticInfo.toString()));
}
} else if (exitCode != 0) {
handleContainerExitWithFailure(containerId, exitCode, containerLogDir, diagnosticInfo);
} else {
LOG.info("Container " + containerId + " succeeded ");
dispatcher.getEventHandler().handle(new ContainerEvent(containerId, ContainerEventType.CONTAINER_EXITED_WITH_SUCCESS));
}
}
use of org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerExitEvent in project hadoop by apache.
the class DummyContainerManager method createContainersLauncher.
@Override
@SuppressWarnings("unchecked")
protected ContainersLauncher createContainersLauncher(Context context, ContainerExecutor exec) {
return new ContainersLauncher(context, super.dispatcher, exec, super.dirsHandler, this) {
@Override
public void handle(ContainersLauncherEvent event) {
Container container = event.getContainer();
ContainerId containerId = container.getContainerId();
switch(event.getType()) {
case LAUNCH_CONTAINER:
dispatcher.getEventHandler().handle(new ContainerEvent(containerId, ContainerEventType.CONTAINER_LAUNCHED));
break;
case CLEANUP_CONTAINER:
dispatcher.getEventHandler().handle(new ContainerExitEvent(containerId, ContainerEventType.CONTAINER_KILLED_ON_REQUEST, 0, "Container exited with exit code 0."));
break;
}
}
};
}
use of org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerExitEvent in project hadoop by apache.
the class ContainerLaunch method call.
@Override
// dispatcher not typed
@SuppressWarnings("unchecked")
public Integer call() {
if (!validateContainerState()) {
return 0;
}
final ContainerLaunchContext launchContext = container.getLaunchContext();
ContainerId containerID = container.getContainerId();
String containerIdStr = containerID.toString();
final List<String> command = launchContext.getCommands();
int ret = -1;
Path containerLogDir;
try {
Map<Path, List<String>> localResources = getLocalizedResources();
final String user = container.getUser();
// /////////////////////////// Variable expansion
// Before the container script gets written out.
List<String> newCmds = new ArrayList<String>(command.size());
String appIdStr = app.getAppId().toString();
String relativeContainerLogDir = ContainerLaunch.getRelativeContainerLogDir(appIdStr, containerIdStr);
containerLogDir = dirsHandler.getLogPathForWrite(relativeContainerLogDir, false);
recordContainerLogDir(containerID, containerLogDir.toString());
for (String str : command) {
// TODO: Should we instead work via symlinks without this grammar?
newCmds.add(expandEnvironment(str, containerLogDir));
}
launchContext.setCommands(newCmds);
Map<String, String> environment = launchContext.getEnvironment();
// Make a copy of env to iterate & do variable expansion
for (Entry<String, String> entry : environment.entrySet()) {
String value = entry.getValue();
value = expandEnvironment(value, containerLogDir);
entry.setValue(value);
}
// /////////////////////////// End of variable expansion
FileContext lfs = FileContext.getLocalFSFileContext();
Path nmPrivateContainerScriptPath = dirsHandler.getLocalPathForWrite(getContainerPrivateDir(appIdStr, containerIdStr) + Path.SEPARATOR + CONTAINER_SCRIPT);
Path nmPrivateTokensPath = dirsHandler.getLocalPathForWrite(getContainerPrivateDir(appIdStr, containerIdStr) + Path.SEPARATOR + String.format(ContainerLocalizer.TOKEN_FILE_NAME_FMT, containerIdStr));
Path nmPrivateClasspathJarDir = dirsHandler.getLocalPathForWrite(getContainerPrivateDir(appIdStr, containerIdStr));
DataOutputStream containerScriptOutStream = null;
DataOutputStream tokensOutStream = null;
// Select the working directory for the container
Path containerWorkDir = dirsHandler.getLocalPathForWrite(ContainerLocalizer.USERCACHE + Path.SEPARATOR + user + Path.SEPARATOR + ContainerLocalizer.APPCACHE + Path.SEPARATOR + appIdStr + Path.SEPARATOR + containerIdStr, LocalDirAllocator.SIZE_UNKNOWN, false);
recordContainerWorkDir(containerID, containerWorkDir.toString());
String pidFileSubpath = getPidFileSubpath(appIdStr, containerIdStr);
// pid file should be in nm private dir so that it is not
// accessible by users
pidFilePath = dirsHandler.getLocalPathForWrite(pidFileSubpath);
List<String> localDirs = dirsHandler.getLocalDirs();
List<String> logDirs = dirsHandler.getLogDirs();
List<String> filecacheDirs = getNMFilecacheDirs(localDirs);
List<String> userLocalDirs = getUserLocalDirs(localDirs);
List<String> containerLocalDirs = getContainerLocalDirs(localDirs);
List<String> containerLogDirs = getContainerLogDirs(logDirs);
if (!dirsHandler.areDisksHealthy()) {
ret = ContainerExitStatus.DISKS_FAILED;
throw new IOException("Most of the disks failed. " + dirsHandler.getDisksHealthReport(false));
}
try {
// /////////// Write out the container-script in the nmPrivate space.
List<Path> appDirs = new ArrayList<Path>(localDirs.size());
for (String localDir : localDirs) {
Path usersdir = new Path(localDir, ContainerLocalizer.USERCACHE);
Path userdir = new Path(usersdir, user);
Path appsdir = new Path(userdir, ContainerLocalizer.APPCACHE);
appDirs.add(new Path(appsdir, appIdStr));
}
containerScriptOutStream = lfs.create(nmPrivateContainerScriptPath, EnumSet.of(CREATE, OVERWRITE));
// Set the token location too.
environment.put(ApplicationConstants.CONTAINER_TOKEN_FILE_ENV_NAME, new Path(containerWorkDir, FINAL_CONTAINER_TOKENS_FILE).toUri().getPath());
// Sanitize the container's environment
sanitizeEnv(environment, containerWorkDir, appDirs, userLocalDirs, containerLogDirs, localResources, nmPrivateClasspathJarDir);
exec.prepareContainer(new ContainerPrepareContext.Builder().setContainer(container).setLocalizedResources(localResources).setUser(user).setContainerLocalDirs(containerLocalDirs).setCommands(launchContext.getCommands()).build());
// Write out the environment
exec.writeLaunchEnv(containerScriptOutStream, environment, localResources, launchContext.getCommands(), new Path(containerLogDirs.get(0)), user);
// /////////// End of writing out container-script
// /////////// Write out the container-tokens in the nmPrivate space.
tokensOutStream = lfs.create(nmPrivateTokensPath, EnumSet.of(CREATE, OVERWRITE));
Credentials creds = container.getCredentials();
creds.writeTokenStorageToStream(tokensOutStream);
// /////////// End of writing out container-tokens
} finally {
IOUtils.cleanup(LOG, containerScriptOutStream, tokensOutStream);
}
ret = launchContainer(new ContainerStartContext.Builder().setContainer(container).setLocalizedResources(localResources).setNmPrivateContainerScriptPath(nmPrivateContainerScriptPath).setNmPrivateTokensPath(nmPrivateTokensPath).setUser(user).setAppId(appIdStr).setContainerWorkDir(containerWorkDir).setLocalDirs(localDirs).setLogDirs(logDirs).setFilecacheDirs(filecacheDirs).setUserLocalDirs(userLocalDirs).setContainerLocalDirs(containerLocalDirs).setContainerLogDirs(containerLogDirs).build());
} catch (Throwable e) {
LOG.warn("Failed to launch container.", e);
dispatcher.getEventHandler().handle(new ContainerExitEvent(containerID, ContainerEventType.CONTAINER_EXITED_WITH_FAILURE, ret, e.getMessage()));
return ret;
} finally {
setContainerCompletedStatus(ret);
}
handleContainerExitCode(ret, containerLogDir);
return ret;
}
Aggregations