Search in sources :

Example 1 with State

use of org.apache.hadoop.hive.llap.cli.status.LlapStatusHelpers.State in project hive by apache.

the class LlapStatusServiceDriver method main.

public static void main(String[] args) {
    LOG.info("LLAP status invoked with arguments = {}", Arrays.toString(args));
    int ret = ExitCode.SUCCESS.getInt();
    Clock clock = new SystemClock();
    long startTime = clock.getTime();
    long lastSummaryLogTime = -1;
    LlapStatusServiceDriver statusServiceDriver = null;
    LlapStatusOptions options = null;
    try {
        statusServiceDriver = new LlapStatusServiceDriver();
        options = statusServiceDriver.parseOptions(args);
    } catch (Throwable t) {
        statusServiceDriver.close();
        logError(t);
        if (t instanceof LlapStatusCliException) {
            LlapStatusCliException ce = (LlapStatusCliException) t;
            ret = ce.getExitCode().getInt();
        } else {
            ret = ExitCode.INTERNAL_ERROR.getInt();
        }
    }
    if (ret != 0 || options == null) {
        // Failure / help
        if (statusServiceDriver != null) {
            statusServiceDriver.close();
        }
        System.exit(ret);
    }
    boolean firstAttempt = true;
    final long refreshInterval = options.getRefreshIntervalMs();
    final boolean watchMode = options.isWatchMode();
    final long watchTimeout = options.getWatchTimeoutMs();
    long numAttempts = watchTimeout / refreshInterval;
    // Break out of the loop fast if watchMode is disabled.
    numAttempts = watchMode ? numAttempts : 1;
    LlapStatusHelpers.State launchingState = null;
    LlapStatusHelpers.State currentState = null;
    boolean desiredStateAttained = false;
    final float runningNodesThreshold = options.getRunningNodesThreshold();
    try (OutputStream os = options.getOutputFile() == null ? System.out : new BufferedOutputStream(new FileOutputStream(options.getOutputFile()));
        PrintWriter pw = new PrintWriter(os)) {
        LOG.info("Configured refresh interval: {}s. Watch timeout: {}s. Attempts remaining: {}." + " Watch mode: {}. Running nodes threshold: {}.", TimeUnit.SECONDS.convert(refreshInterval, TimeUnit.MILLISECONDS), TimeUnit.SECONDS.convert(watchTimeout, TimeUnit.MILLISECONDS), numAttempts, watchMode, new DecimalFormat("#.###").format(runningNodesThreshold));
        while (numAttempts > 0) {
            if (!firstAttempt) {
                if (watchMode) {
                    try {
                        Thread.sleep(refreshInterval);
                    } catch (InterruptedException e) {
                    // ignore
                    }
                } else {
                    // reported once, so break
                    break;
                }
            } else {
                firstAttempt = false;
            }
            ret = statusServiceDriver.run(options, watchMode ? watchTimeout : 0);
            currentState = statusServiceDriver.appStatusBuilder.getState();
            try {
                lastSummaryLogTime = LlapStatusServiceDriver.maybeLogSummary(clock, lastSummaryLogTime, statusServiceDriver, watchMode, watchTimeout, launchingState);
            } catch (Exception e) {
                LOG.warn("Failed to log summary", e);
            }
            if (ret == ExitCode.SUCCESS.getInt()) {
                if (watchMode) {
                    // slider has started llap application, now if for some reason state changes to COMPLETE then fail fast
                    if (launchingState == null && LAUNCHING_STATES.contains(currentState)) {
                        launchingState = currentState;
                    }
                    if (currentState.equals(State.COMPLETE)) {
                        if (launchingState != null || options.isLaunched()) {
                            LOG.warn("COMPLETE state reached while waiting for RUNNING state. Failing.");
                            System.err.println("Final diagnostics: " + statusServiceDriver.appStatusBuilder.getDiagnostics());
                            break;
                        } else {
                            LOG.info("Found a stopped application; assuming it was a previous attempt " + "and waiting for the next one. Omit the -l flag to avoid this.");
                        }
                    }
                    if (!(currentState.equals(State.RUNNING_PARTIAL) || currentState.equals(State.RUNNING_ALL))) {
                        if (LOG.isDebugEnabled()) {
                            LOG.debug("Current state: {}. Desired state: {}. {}/{} instances.", currentState, runningNodesThreshold == 1.0f ? State.RUNNING_ALL : State.RUNNING_PARTIAL, statusServiceDriver.appStatusBuilder.getLiveInstances(), statusServiceDriver.appStatusBuilder.getDesiredInstances());
                        }
                        numAttempts--;
                        continue;
                    }
                    // we have reached RUNNING state, now check if running nodes threshold is met
                    final int liveInstances = statusServiceDriver.appStatusBuilder.getLiveInstances();
                    final int desiredInstances = statusServiceDriver.appStatusBuilder.getDesiredInstances();
                    if (desiredInstances > 0) {
                        final float ratio = (float) liveInstances / (float) desiredInstances;
                        if (ratio < runningNodesThreshold) {
                            if (LOG.isDebugEnabled()) {
                                LOG.debug("Waiting until running nodes threshold is reached. Current: {} Desired: {}." + " {}/{} instances.", new DecimalFormat("#.###").format(ratio), new DecimalFormat("#.###").format(runningNodesThreshold), statusServiceDriver.appStatusBuilder.getLiveInstances(), statusServiceDriver.appStatusBuilder.getDesiredInstances());
                            }
                            numAttempts--;
                            continue;
                        } else {
                            desiredStateAttained = true;
                            statusServiceDriver.appStatusBuilder.setRunningThresholdAchieved(true);
                        }
                    } else {
                        numAttempts--;
                        continue;
                    }
                }
            } else if (ret == ExitCode.YARN_ERROR.getInt() && watchMode) {
                LOG.warn("Watch mode enabled and got YARN error. Retrying..");
                numAttempts--;
                continue;
            } else if (ret == ExitCode.SLIDER_CLIENT_ERROR_CREATE_FAILED.getInt() && watchMode) {
                LOG.warn("Watch mode enabled and slider client creation failed. Retrying..");
                numAttempts--;
                continue;
            } else if (ret == ExitCode.SLIDER_CLIENT_ERROR_OTHER.getInt() && watchMode) {
                LOG.warn("Watch mode enabled and got slider client error. Retrying..");
                numAttempts--;
                continue;
            } else if (ret == ExitCode.LLAP_REGISTRY_ERROR.getInt() && watchMode) {
                LOG.warn("Watch mode enabled and got LLAP registry error. Retrying..");
                numAttempts--;
                continue;
            }
            break;
        }
        // Log final state to CONSOLE_LOGGER
        LlapStatusServiceDriver.maybeLogSummary(clock, 0L, statusServiceDriver, watchMode, watchTimeout, launchingState);
        CONSOLE_LOGGER.info("\n\n\n");
        // print current state before exiting
        statusServiceDriver.outputJson(pw);
        os.flush();
        pw.flush();
        if (numAttempts == 0 && watchMode && !desiredStateAttained) {
            LOG.warn("Watch timeout {}s exhausted before desired state RUNNING is attained.", TimeUnit.SECONDS.convert(watchTimeout, TimeUnit.MILLISECONDS));
        }
    } catch (Throwable t) {
        logError(t);
        if (t instanceof LlapStatusCliException) {
            LlapStatusCliException ce = (LlapStatusCliException) t;
            ret = ce.getExitCode().getInt();
        } else {
            ret = ExitCode.INTERNAL_ERROR.getInt();
        }
    } finally {
        LOG.info("LLAP status finished");
        statusServiceDriver.close();
    }
    if (LOG.isDebugEnabled()) {
        LOG.debug("Completed processing - exiting with " + ret);
    }
    System.exit(ret);
}
Also used : State(org.apache.hadoop.hive.llap.cli.status.LlapStatusHelpers.State) SystemClock(org.apache.hadoop.yarn.util.SystemClock) LlapStatusOptions(org.apache.hadoop.hive.llap.cli.LlapStatusOptionsProcessor.LlapStatusOptions) BufferedOutputStream(java.io.BufferedOutputStream) OutputStream(java.io.OutputStream) FileOutputStream(java.io.FileOutputStream) DecimalFormat(java.text.DecimalFormat) SystemClock(org.apache.hadoop.yarn.util.SystemClock) Clock(org.apache.hadoop.yarn.util.Clock) URISyntaxException(java.net.URISyntaxException) YarnException(org.apache.hadoop.yarn.exceptions.YarnException) SliderException(org.apache.slider.core.exceptions.SliderException) IOException(java.io.IOException) FileOutputStream(java.io.FileOutputStream) BufferedOutputStream(java.io.BufferedOutputStream) LlapStatusHelpers(org.apache.hadoop.hive.llap.cli.status.LlapStatusHelpers) PrintWriter(java.io.PrintWriter)

Aggregations

BufferedOutputStream (java.io.BufferedOutputStream)1 FileOutputStream (java.io.FileOutputStream)1 IOException (java.io.IOException)1 OutputStream (java.io.OutputStream)1 PrintWriter (java.io.PrintWriter)1 URISyntaxException (java.net.URISyntaxException)1 DecimalFormat (java.text.DecimalFormat)1 LlapStatusOptions (org.apache.hadoop.hive.llap.cli.LlapStatusOptionsProcessor.LlapStatusOptions)1 LlapStatusHelpers (org.apache.hadoop.hive.llap.cli.status.LlapStatusHelpers)1 State (org.apache.hadoop.hive.llap.cli.status.LlapStatusHelpers.State)1 YarnException (org.apache.hadoop.yarn.exceptions.YarnException)1 Clock (org.apache.hadoop.yarn.util.Clock)1 SystemClock (org.apache.hadoop.yarn.util.SystemClock)1 SliderException (org.apache.slider.core.exceptions.SliderException)1