use of com.microsoft.frameworklauncher.common.exceptions.NonTransientException in project pai by Microsoft.
the class RequestManager method pullRequest.
private void pullRequest() throws Exception {
// Pull LauncherRequest
LOGGER.logDebug("Pulling LauncherRequest");
LauncherRequest newLauncherRequest = zkStore.getLauncherRequest();
LOGGER.logDebug("Pulled LauncherRequest");
// newLauncherRequest is always not null
updateLauncherRequest(newLauncherRequest);
// Pull AggregatedFrameworkRequest
AggregatedFrameworkRequest aggFrameworkRequest;
try {
LOGGER.logDebug("Pulling AggregatedFrameworkRequest");
aggFrameworkRequest = zkStore.getAggregatedFrameworkRequest(conf.getFrameworkName());
LOGGER.logDebug("Pulled AggregatedFrameworkRequest");
} catch (NoNodeException e) {
existsLocalVersionFrameworkRequest = 0;
throw new NonTransientException("Failed to getAggregatedFrameworkRequest, FrameworkRequest is already deleted on ZK", e);
}
// newFrameworkDescriptor is always not null
FrameworkDescriptor newFrameworkDescriptor = aggFrameworkRequest.getFrameworkRequest().getFrameworkDescriptor();
checkFrameworkVersion(newFrameworkDescriptor);
flattenFrameworkDescriptor(newFrameworkDescriptor);
updateFrameworkDescriptor(newFrameworkDescriptor);
updateOverrideApplicationProgressRequest(aggFrameworkRequest.getOverrideApplicationProgressRequest());
updateMigrateTaskRequests(aggFrameworkRequest.getMigrateTaskRequests());
}
use of com.microsoft.frameworklauncher.common.exceptions.NonTransientException in project pai by Microsoft.
the class StatusManager method recover.
@Override
protected void recover() throws Exception {
super.recover();
AggregatedFrameworkStatus aggFrameworkStatus;
try {
aggFrameworkStatus = zkStore.getAggregatedFrameworkStatus(conf.getFrameworkName());
for (Map.Entry<String, AggregatedTaskRoleStatus> aggTaskRoleStatus : aggFrameworkStatus.getAggregatedTaskRoleStatuses().entrySet()) {
String taskRoleName = aggTaskRoleStatus.getKey();
TaskRoleStatus taskRoleStatus = aggTaskRoleStatus.getValue().getTaskRoleStatus();
TaskStatuses taskStatuses = aggTaskRoleStatus.getValue().getTaskStatuses();
// exit either due to AM RM heartbeat or pushStatus.existsLocalVersionFrameworkRequest.
if (!taskRoleStatus.getFrameworkVersion().equals(conf.getFrameworkVersion())) {
throw new NonTransientException(String.format("[%s]: FrameworkVersion mismatch: Local Version %s, Previous TaskRoleStatus Version %s", taskRoleName, conf.getFrameworkVersion(), taskRoleStatus.getFrameworkVersion()));
}
if (!taskStatuses.getFrameworkVersion().equals(conf.getFrameworkVersion())) {
throw new NonTransientException(String.format("[%s]: FrameworkVersion mismatch: Local Version %s, Previous TaskStatuses Version %s", taskRoleName, conf.getFrameworkVersion(), taskStatuses.getFrameworkVersion()));
}
}
} catch (KeeperException.NoNodeException e) {
throw new NonTransientException("Failed to getAggregatedFrameworkStatus, FrameworkStatus is already deleted on ZK", e);
} catch (KeeperException e) {
throw e;
} catch (Exception e) {
LOGGER.logError(e, "Failed to recover %s. Reinitializing all TaskRoleStatuses and TaskStatuseses in the Framework on ZK.", serviceName);
zkStore.deleteFrameworkStatus(conf.getFrameworkName(), true);
aggFrameworkStatus = null;
}
if (aggFrameworkStatus != null) {
for (Map.Entry<String, AggregatedTaskRoleStatus> aggTaskRoleStatus : aggFrameworkStatus.getAggregatedTaskRoleStatuses().entrySet()) {
String taskRoleName = aggTaskRoleStatus.getKey();
TaskRoleStatus taskRoleStatus = aggTaskRoleStatus.getValue().getTaskRoleStatus();
TaskStatuses taskStatuses = aggTaskRoleStatus.getValue().getTaskStatuses();
taskRoleStatuses.put(taskRoleName, taskRoleStatus);
taskStatuseses.put(taskRoleName, taskStatuses);
taskRoleStatusesChanged.put(taskRoleName, false);
taskStatusesesChanged.put(taskRoleName, false);
List<TaskStatus> taskStatusArray = taskStatuses.getTaskStatusArray();
for (int taskIndex = 0; taskIndex < taskStatusArray.size(); taskIndex++) {
addExtensionTaskStatus(new TaskStatusLocator(taskRoleName, taskIndex));
}
}
LOGGER.logInfo("Succeeded to recover %s.", serviceName);
}
// Here ZK and Mem Status is the same.
// Since Request may be ahead of Status even when Running,
// so here the Recovery of AM StatusManager is completed.
}
use of com.microsoft.frameworklauncher.common.exceptions.NonTransientException in project pai by Microsoft.
the class HadoopUtils method convertToLocalResource.
private static LocalResource convertToLocalResource(String hdfsPath, LocalResourceVisibility visibility) throws Exception {
// Directory resource path must not end with /, otherwise localization will hang.
hdfsPath = StringUtils.stripEnd(hdfsPath, HDFS_PATH_SEPARATOR);
String extension = FilenameUtils.getExtension(hdfsPath).toLowerCase();
LocalResourceType type;
if (extension.equals(".zip") || extension.equals(".tgz") || extension.equals(".tar") || extension.equals(".tar.gz")) {
type = LocalResourceType.ARCHIVE;
} else {
type = LocalResourceType.FILE;
}
// Applications' Containers on the same node write the same data in the resource directory.
try {
FileStatus fileStatus = getFileStatusInHdfs(hdfsPath);
FileContext fileContext = FileContext.getFileContext(conf);
return LocalResource.newInstance(ConverterUtils.getYarnUrlFromPath(fileContext.getDefaultFileSystem().resolvePath(fileStatus.getPath())), type, visibility, fileStatus.getLen(), fileStatus.getModificationTime());
} catch (IllegalArgumentException e) {
// hdfsPath may be from user, so it may be illegal.
throw new NonTransientException("Path is illegal.", e);
}
}
use of com.microsoft.frameworklauncher.common.exceptions.NonTransientException in project pai by Microsoft.
the class HadoopUtils method makeDirInHdfs.
// Should success when the hdfsPath and its parent paths are directories
// Note if parent directories do not exist, they will be created
public static void makeDirInHdfs(String hdfsPath) throws Exception {
try {
FileSystem fs = FileSystem.get(conf);
LOGGER.logInfo("[hadoop fs -mkdir -p %s]", hdfsPath);
fs.mkdirs(new Path(hdfsPath));
} catch (Exception e) {
if (e.getMessage().toLowerCase().contains("not a directory")) {
throw new NonTransientException("Path is not a directory", e);
} else {
throw e;
}
}
}
use of com.microsoft.frameworklauncher.common.exceptions.NonTransientException in project pai by Microsoft.
the class Service method handleException.
@Override
protected Boolean handleException(Exception e) {
super.handleException(e);
if (e instanceof NonTransientException) {
LOGGER.logError(e, "NonTransientException occurred in %1$s. %1$s will be stopped.", serviceName);
stop(new StopStatus(ExitStatusKey.LAUNCHER_INTERNAL_NON_TRANSIENT_ERROR.toInt(), true, null, e));
return false;
} else {
LOGGER.logError(e, "Exception occurred in %1$s. It should be transient. Will restart %1$s inplace.", serviceName);
// TODO: Only Restart Service instead of exit whole process and Restart by external system.
stop(new StopStatus(ExitStatusKey.LAUNCHER_INTERNAL_UNKNOWN_ERROR.toInt(), false, null, e));
return true;
}
}
Aggregations