use of org.apache.tez.dag.api.TezException in project hive by apache.
the class TezJobMonitor method monitorExecution.
public int monitorExecution() {
boolean done = false;
boolean success = false;
int failedCounter = 0;
int rc = 0;
DAGStatus status = null;
Map<String, Progress> vertexProgressMap = null;
long monitorStartTime = System.currentTimeMillis();
synchronized (shutdownList) {
shutdownList.add(dagClient);
}
perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.TEZ_RUN_DAG);
perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.TEZ_SUBMIT_TO_RUNNING);
DAGStatus.State lastState = null;
boolean running = false;
while (true) {
try {
if (context != null) {
context.checkHeartbeaterLockException();
}
status = dagClient.getDAGStatus(new HashSet<StatusGetOpts>(), CHECK_INTERVAL);
vertexProgressMap = status.getVertexProgress();
DAGStatus.State state = status.getState();
if (state != lastState || state == RUNNING) {
lastState = state;
switch(state) {
case SUBMITTED:
console.printInfo("Status: Submitted");
break;
case INITING:
console.printInfo("Status: Initializing");
this.executionStartTime = System.currentTimeMillis();
break;
case RUNNING:
if (!running) {
perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.TEZ_SUBMIT_TO_RUNNING);
console.printInfo("Status: Running (" + dagClient.getExecutionContext() + ")\n");
this.executionStartTime = System.currentTimeMillis();
running = true;
}
updateFunction.update(status, vertexProgressMap);
break;
case SUCCEEDED:
if (!running) {
this.executionStartTime = monitorStartTime;
}
updateFunction.update(status, vertexProgressMap);
success = true;
running = false;
done = true;
break;
case KILLED:
if (!running) {
this.executionStartTime = monitorStartTime;
}
updateFunction.update(status, vertexProgressMap);
console.printInfo("Status: Killed");
running = false;
done = true;
rc = 1;
break;
case FAILED:
case ERROR:
if (!running) {
this.executionStartTime = monitorStartTime;
}
updateFunction.update(status, vertexProgressMap);
console.printError("Status: Failed");
running = false;
done = true;
rc = 2;
break;
}
}
} catch (Exception e) {
console.printInfo("Exception: " + e.getMessage());
boolean isInterrupted = hasInterruptedException(e);
if (isInterrupted || (++failedCounter % MAX_RETRY_INTERVAL / CHECK_INTERVAL == 0)) {
try {
console.printInfo("Killing DAG...");
dagClient.tryKillDAG();
} catch (IOException | TezException tezException) {
// best effort
}
console.printError("Execution has failed. stack trace: " + ExceptionUtils.getStackTrace(e));
rc = 1;
done = true;
} else {
console.printInfo("Retrying...");
}
} finally {
if (done) {
if (rc != 0 && status != null) {
for (String diag : status.getDiagnostics()) {
console.printError(diag);
diagnostics.append(diag);
}
}
synchronized (shutdownList) {
shutdownList.remove(dagClient);
}
break;
}
}
}
perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.TEZ_RUN_DAG);
printSummary(success, vertexProgressMap);
return rc;
}
use of org.apache.tez.dag.api.TezException in project hive by apache.
the class TezSessionState method openInternal.
protected void openInternal(final HiveConf conf, Collection<String> additionalFiles, boolean isAsync, LogHelper console, Path scratchDir) throws IOException, LoginException, IllegalArgumentException, URISyntaxException, TezException {
this.conf = conf;
// TODO Why is the queue name set again. It has already been setup via setQueueName. Do only one of the two.
String confQueueName = conf.get(TezConfiguration.TEZ_QUEUE_NAME);
if (queueName != null && !queueName.equals(confQueueName)) {
LOG.warn("Resetting a queue name that was already set: was " + queueName + ", now " + confQueueName);
}
this.queueName = confQueueName;
this.doAsEnabled = conf.getBoolVar(HiveConf.ConfVars.HIVE_SERVER2_ENABLE_DOAS);
final boolean llapMode = "llap".equalsIgnoreCase(HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_EXECUTION_MODE));
// TODO This - at least for the session pool - will always be the hive user. How does doAs above this affect things ?
UserGroupInformation ugi = Utils.getUGI();
user = ugi.getShortUserName();
LOG.info("User of session id " + sessionId + " is " + user);
// create the tez tmp dir
tezScratchDir = scratchDir == null ? createTezDir(sessionId) : scratchDir;
additionalFilesNotFromConf.clear();
if (additionalFiles != null) {
additionalFilesNotFromConf.addAll(additionalFiles);
}
refreshLocalResourcesFromConf(conf);
// unless already installed on all the cluster nodes, we'll have to
// localize hive-exec.jar as well.
appJarLr = createJarLocalResource(utils.getExecJarPathLocal());
// configuration for the application master
final Map<String, LocalResource> commonLocalResources = new HashMap<String, LocalResource>();
commonLocalResources.put(utils.getBaseName(appJarLr), appJarLr);
for (LocalResource lr : localizedResources) {
commonLocalResources.put(utils.getBaseName(lr), lr);
}
if (llapMode) {
// localize llap client jars
addJarLRByClass(LlapTaskSchedulerService.class, commonLocalResources);
addJarLRByClass(LlapProtocolClientImpl.class, commonLocalResources);
addJarLRByClass(LlapProtocolClientProxy.class, commonLocalResources);
addJarLRByClassName("org.apache.hadoop.registry.client.api.RegistryOperations", commonLocalResources);
}
// Create environment for AM.
Map<String, String> amEnv = new HashMap<String, String>();
MRHelpers.updateEnvBasedOnMRAMEnv(conf, amEnv);
// and finally we're ready to create and start the session
// generate basic tez config
final TezConfiguration tezConfig = new TezConfiguration(conf);
// set up the staging directory to use
tezConfig.set(TezConfiguration.TEZ_AM_STAGING_DIR, tezScratchDir.toUri().toString());
conf.stripHiddenConfigurations(tezConfig);
ServicePluginsDescriptor servicePluginsDescriptor;
Credentials llapCredentials = null;
if (llapMode) {
if (UserGroupInformation.isSecurityEnabled()) {
llapCredentials = new Credentials();
llapCredentials.addToken(LlapTokenIdentifier.KIND_NAME, getLlapToken(user, tezConfig));
}
// TODO Change this to not serialize the entire Configuration - minor.
UserPayload servicePluginPayload = TezUtils.createUserPayloadFromConf(tezConfig);
// we need plugins to handle llap and uber mode
servicePluginsDescriptor = ServicePluginsDescriptor.create(true, new TaskSchedulerDescriptor[] { TaskSchedulerDescriptor.create(LLAP_SERVICE, LLAP_SCHEDULER).setUserPayload(servicePluginPayload) }, new ContainerLauncherDescriptor[] { ContainerLauncherDescriptor.create(LLAP_SERVICE, LLAP_LAUNCHER) }, new TaskCommunicatorDescriptor[] { TaskCommunicatorDescriptor.create(LLAP_SERVICE, LLAP_TASK_COMMUNICATOR).setUserPayload(servicePluginPayload) });
} else {
servicePluginsDescriptor = ServicePluginsDescriptor.create(true);
}
// container prewarming. tell the am how many containers we need
if (HiveConf.getBoolVar(conf, ConfVars.HIVE_PREWARM_ENABLED)) {
int n = HiveConf.getIntVar(conf, ConfVars.HIVE_PREWARM_NUM_CONTAINERS);
n = Math.max(tezConfig.getInt(TezConfiguration.TEZ_AM_SESSION_MIN_HELD_CONTAINERS, TezConfiguration.TEZ_AM_SESSION_MIN_HELD_CONTAINERS_DEFAULT), n);
tezConfig.setInt(TezConfiguration.TEZ_AM_SESSION_MIN_HELD_CONTAINERS, n);
}
setupSessionAcls(tezConfig, conf);
final TezClient session = TezClient.newBuilder("HIVE-" + sessionId, tezConfig).setIsSession(true).setLocalResources(commonLocalResources).setCredentials(llapCredentials).setServicePluginDescriptor(servicePluginsDescriptor).build();
LOG.info("Opening new Tez Session (id: " + sessionId + ", scratch dir: " + tezScratchDir + ")");
TezJobMonitor.initShutdownHook();
if (!isAsync) {
startSessionAndContainers(session, conf, commonLocalResources, tezConfig, false);
this.session = session;
} else {
FutureTask<TezClient> sessionFuture = new FutureTask<>(new Callable<TezClient>() {
@Override
public TezClient call() throws Exception {
try {
return startSessionAndContainers(session, conf, commonLocalResources, tezConfig, true);
} catch (Throwable t) {
LOG.error("Failed to start Tez session", t);
throw (t instanceof Exception) ? (Exception) t : new Exception(t);
}
}
});
new Thread(sessionFuture, "Tez session start thread").start();
// We assume here nobody will try to get session before open() returns.
this.console = console;
this.sessionFuture = sessionFuture;
}
}
Aggregations