use of org.apache.hadoop.hive.llap.cli.status.LlapStatusHelpers.State in project hive by apache.
the class LlapStatusServiceDriver method main.
public static void main(String[] args) {
LOG.info("LLAP status invoked with arguments = {}", Arrays.toString(args));
int ret = ExitCode.SUCCESS.getInt();
Clock clock = new SystemClock();
long startTime = clock.getTime();
long lastSummaryLogTime = -1;
LlapStatusServiceDriver statusServiceDriver = null;
LlapStatusOptions options = null;
try {
statusServiceDriver = new LlapStatusServiceDriver();
options = statusServiceDriver.parseOptions(args);
} catch (Throwable t) {
statusServiceDriver.close();
logError(t);
if (t instanceof LlapStatusCliException) {
LlapStatusCliException ce = (LlapStatusCliException) t;
ret = ce.getExitCode().getInt();
} else {
ret = ExitCode.INTERNAL_ERROR.getInt();
}
}
if (ret != 0 || options == null) {
// Failure / help
if (statusServiceDriver != null) {
statusServiceDriver.close();
}
System.exit(ret);
}
boolean firstAttempt = true;
final long refreshInterval = options.getRefreshIntervalMs();
final boolean watchMode = options.isWatchMode();
final long watchTimeout = options.getWatchTimeoutMs();
long numAttempts = watchTimeout / refreshInterval;
// Break out of the loop fast if watchMode is disabled.
numAttempts = watchMode ? numAttempts : 1;
LlapStatusHelpers.State launchingState = null;
LlapStatusHelpers.State currentState = null;
boolean desiredStateAttained = false;
final float runningNodesThreshold = options.getRunningNodesThreshold();
try (OutputStream os = options.getOutputFile() == null ? System.out : new BufferedOutputStream(new FileOutputStream(options.getOutputFile()));
PrintWriter pw = new PrintWriter(os)) {
LOG.info("Configured refresh interval: {}s. Watch timeout: {}s. Attempts remaining: {}." + " Watch mode: {}. Running nodes threshold: {}.", TimeUnit.SECONDS.convert(refreshInterval, TimeUnit.MILLISECONDS), TimeUnit.SECONDS.convert(watchTimeout, TimeUnit.MILLISECONDS), numAttempts, watchMode, new DecimalFormat("#.###").format(runningNodesThreshold));
while (numAttempts > 0) {
if (!firstAttempt) {
if (watchMode) {
try {
Thread.sleep(refreshInterval);
} catch (InterruptedException e) {
// ignore
}
} else {
// reported once, so break
break;
}
} else {
firstAttempt = false;
}
ret = statusServiceDriver.run(options, watchMode ? watchTimeout : 0);
currentState = statusServiceDriver.appStatusBuilder.getState();
try {
lastSummaryLogTime = LlapStatusServiceDriver.maybeLogSummary(clock, lastSummaryLogTime, statusServiceDriver, watchMode, watchTimeout, launchingState);
} catch (Exception e) {
LOG.warn("Failed to log summary", e);
}
if (ret == ExitCode.SUCCESS.getInt()) {
if (watchMode) {
// slider has started llap application, now if for some reason state changes to COMPLETE then fail fast
if (launchingState == null && LAUNCHING_STATES.contains(currentState)) {
launchingState = currentState;
}
if (currentState.equals(State.COMPLETE)) {
if (launchingState != null || options.isLaunched()) {
LOG.warn("COMPLETE state reached while waiting for RUNNING state. Failing.");
System.err.println("Final diagnostics: " + statusServiceDriver.appStatusBuilder.getDiagnostics());
break;
} else {
LOG.info("Found a stopped application; assuming it was a previous attempt " + "and waiting for the next one. Omit the -l flag to avoid this.");
}
}
if (!(currentState.equals(State.RUNNING_PARTIAL) || currentState.equals(State.RUNNING_ALL))) {
if (LOG.isDebugEnabled()) {
LOG.debug("Current state: {}. Desired state: {}. {}/{} instances.", currentState, runningNodesThreshold == 1.0f ? State.RUNNING_ALL : State.RUNNING_PARTIAL, statusServiceDriver.appStatusBuilder.getLiveInstances(), statusServiceDriver.appStatusBuilder.getDesiredInstances());
}
numAttempts--;
continue;
}
// we have reached RUNNING state, now check if running nodes threshold is met
final int liveInstances = statusServiceDriver.appStatusBuilder.getLiveInstances();
final int desiredInstances = statusServiceDriver.appStatusBuilder.getDesiredInstances();
if (desiredInstances > 0) {
final float ratio = (float) liveInstances / (float) desiredInstances;
if (ratio < runningNodesThreshold) {
if (LOG.isDebugEnabled()) {
LOG.debug("Waiting until running nodes threshold is reached. Current: {} Desired: {}." + " {}/{} instances.", new DecimalFormat("#.###").format(ratio), new DecimalFormat("#.###").format(runningNodesThreshold), statusServiceDriver.appStatusBuilder.getLiveInstances(), statusServiceDriver.appStatusBuilder.getDesiredInstances());
}
numAttempts--;
continue;
} else {
desiredStateAttained = true;
statusServiceDriver.appStatusBuilder.setRunningThresholdAchieved(true);
}
} else {
numAttempts--;
continue;
}
}
} else if (ret == ExitCode.YARN_ERROR.getInt() && watchMode) {
LOG.warn("Watch mode enabled and got YARN error. Retrying..");
numAttempts--;
continue;
} else if (ret == ExitCode.SLIDER_CLIENT_ERROR_CREATE_FAILED.getInt() && watchMode) {
LOG.warn("Watch mode enabled and slider client creation failed. Retrying..");
numAttempts--;
continue;
} else if (ret == ExitCode.SLIDER_CLIENT_ERROR_OTHER.getInt() && watchMode) {
LOG.warn("Watch mode enabled and got slider client error. Retrying..");
numAttempts--;
continue;
} else if (ret == ExitCode.LLAP_REGISTRY_ERROR.getInt() && watchMode) {
LOG.warn("Watch mode enabled and got LLAP registry error. Retrying..");
numAttempts--;
continue;
}
break;
}
// Log final state to CONSOLE_LOGGER
LlapStatusServiceDriver.maybeLogSummary(clock, 0L, statusServiceDriver, watchMode, watchTimeout, launchingState);
CONSOLE_LOGGER.info("\n\n\n");
// print current state before exiting
statusServiceDriver.outputJson(pw);
os.flush();
pw.flush();
if (numAttempts == 0 && watchMode && !desiredStateAttained) {
LOG.warn("Watch timeout {}s exhausted before desired state RUNNING is attained.", TimeUnit.SECONDS.convert(watchTimeout, TimeUnit.MILLISECONDS));
}
} catch (Throwable t) {
logError(t);
if (t instanceof LlapStatusCliException) {
LlapStatusCliException ce = (LlapStatusCliException) t;
ret = ce.getExitCode().getInt();
} else {
ret = ExitCode.INTERNAL_ERROR.getInt();
}
} finally {
LOG.info("LLAP status finished");
statusServiceDriver.close();
}
if (LOG.isDebugEnabled()) {
LOG.debug("Completed processing - exiting with " + ret);
}
System.exit(ret);
}
Aggregations