Search in sources :

Example 1 with LlapInstance

use of org.apache.hadoop.hive.llap.cli.status.LlapStatusHelpers.LlapInstance in project hive by apache.

the class LlapStatusServiceDriver method processAppDiagnostics.

private static void processAppDiagnostics(AppStatusBuilder appStatusBuilder, ApplicationDiagnostics appDiagnostics, boolean appComplete) {
    // For a running app this should be empty.
    String finalMessage = appDiagnostics.getFinalMessage();
    Collection<ContainerInformation> containerInfos = appDiagnostics.getContainers();
    appStatusBuilder.setDiagnostics(finalMessage);
    if (containerInfos != null) {
        for (ContainerInformation containerInformation : containerInfos) {
            if (containerInformation.getState() == StateValues.STATE_LIVE && !appComplete) {
                LlapInstance instance = appStatusBuilder.removeAndGetCompletedLlapInstanceForContainer(containerInformation.getContainerId());
                if (instance == null) {
                    // New launch. Not available during slider status, but available now.
                    instance = new LlapInstance(containerInformation.getHost(), containerInformation.getContainerId());
                }
                instance.setLogUrl(containerInformation.getLogLink());
                appStatusBuilder.addNewRunningLlapInstance(instance);
            } else if (containerInformation.getState() == StateValues.STATE_STOPPED || appComplete) {
                LlapInstance instance = new LlapInstance(containerInformation.getHost(), containerInformation.getContainerId());
                instance.setLogUrl(containerInformation.getLogLink());
                if (appComplete && containerInformation.getExitCode() != ContainerExitStatus.INVALID) {
                    instance.setYarnContainerExitStatus(containerInformation.getExitCode());
                }
                instance.setDiagnostics(containerInformation.getDiagnostics());
                appStatusBuilder.addNewCompleteLlapInstance(instance);
            } else {
                LOG.warn("Unexpected containerstate={}, for container={}", containerInformation.getState(), containerInformation);
            }
        }
    } else {
        if (LOG.isDebugEnabled()) {
            LOG.debug("ContainerInfos is null");
        }
    }
}
Also used : LlapInstance(org.apache.hadoop.hive.llap.cli.status.LlapStatusHelpers.LlapInstance) ContainerInformation(org.apache.slider.api.types.ContainerInformation)

Example 2 with LlapInstance

use of org.apache.hadoop.hive.llap.cli.status.LlapStatusHelpers.LlapInstance in project hive by apache.

the class LlapStatusServiceDriver method populateAppStatusFromLlapRegistry.

/**
 * Populate additional information for containers from the LLAP registry. Must be invoked
 * after Slider status. Also after slider-diagnostics.
 * @param appStatusBuilder
 * @return an ExitCode. An ExitCode other than ExitCode.SUCCESS implies future progress not possible
 * @throws LlapStatusCliException
 */
private ExitCode populateAppStatusFromLlapRegistry(AppStatusBuilder appStatusBuilder, long watchTimeoutMs) throws LlapStatusCliException {
    if (llapRegistry == null) {
        try {
            llapRegistry = LlapRegistryService.getClient(llapRegistryConf);
        } catch (Exception e) {
            throw new LlapStatusCliException(ExitCode.LLAP_REGISTRY_ERROR, "Failed to create llap registry client", e);
        }
    }
    Collection<LlapServiceInstance> serviceInstances;
    try {
        serviceInstances = llapRegistry.getInstances(watchTimeoutMs).getAll();
    } catch (Exception e) {
        throw new LlapStatusCliException(ExitCode.LLAP_REGISTRY_ERROR, "Failed to get instances from llap registry", e);
    }
    if (serviceInstances == null || serviceInstances.isEmpty()) {
        if (LOG.isDebugEnabled()) {
            LOG.debug("No information found in the LLAP registry");
        }
        appStatusBuilder.setLiveInstances(0);
        appStatusBuilder.setState(State.LAUNCHING);
        appStatusBuilder.clearRunningLlapInstances();
        return ExitCode.SUCCESS;
    } else {
        // Tracks instances known by both slider and llap.
        List<LlapInstance> validatedInstances = new LinkedList<>();
        List<String> llapExtraInstances = new LinkedList<>();
        for (LlapServiceInstance serviceInstance : serviceInstances) {
            String containerIdString = serviceInstance.getProperties().get(HiveConf.ConfVars.LLAP_DAEMON_CONTAINER_ID.varname);
            LlapInstance llapInstance = appStatusBuilder.removeAndGetRunningLlapInstanceForContainer(containerIdString);
            if (llapInstance != null) {
                llapInstance.setMgmtPort(serviceInstance.getManagementPort());
                llapInstance.setRpcPort(serviceInstance.getRpcPort());
                llapInstance.setShufflePort(serviceInstance.getShufflePort());
                llapInstance.setWebUrl(serviceInstance.getServicesAddress());
                llapInstance.setStatusUrl(serviceInstance.getServicesAddress() + "/status");
                validatedInstances.add(llapInstance);
            } else {
                // This likely indicates that an instance has recently restarted
                // (the old instance has not been unregistered), and the new instances has not registered yet.
                llapExtraInstances.add(containerIdString);
            // This instance will not be added back, since it's services are not up yet.
            }
        }
        appStatusBuilder.setLiveInstances(validatedInstances.size());
        appStatusBuilder.setLaunchingInstances(llapExtraInstances.size());
        if (validatedInstances.size() >= appStatusBuilder.getDesiredInstances()) {
            appStatusBuilder.setState(State.RUNNING_ALL);
            if (validatedInstances.size() > appStatusBuilder.getDesiredInstances()) {
                LOG.warn("Found more entries in LLAP registry, as compared to desired entries");
            }
        } else {
            if (validatedInstances.size() > 0) {
                appStatusBuilder.setState(State.RUNNING_PARTIAL);
            } else {
                appStatusBuilder.setState(State.LAUNCHING);
            }
        }
        // Debug only
        if (appStatusBuilder.allRunningInstances().size() > 0) {
            // Containers likely to come up soon.
            LOG.debug("Potential instances starting up: {}", appStatusBuilder.allRunningInstances());
        }
        if (llapExtraInstances.size() > 0) {
            // Old containers which are likely shutting down, or new containers which
            // launched between slider-status/slider-diagnostics. Skip for this iteration.
            LOG.debug("Instances likely to shutdown soon: {}", llapExtraInstances);
        }
        appStatusBuilder.clearAndAddPreviouslyKnownRunningInstances(validatedInstances);
    }
    return ExitCode.SUCCESS;
}
Also used : LlapServiceInstance(org.apache.hadoop.hive.llap.registry.LlapServiceInstance) LlapInstance(org.apache.hadoop.hive.llap.cli.status.LlapStatusHelpers.LlapInstance) URISyntaxException(java.net.URISyntaxException) YarnException(org.apache.hadoop.yarn.exceptions.YarnException) SliderException(org.apache.slider.core.exceptions.SliderException) IOException(java.io.IOException) LinkedList(java.util.LinkedList)

Example 3 with LlapInstance

use of org.apache.hadoop.hive.llap.cli.status.LlapStatusHelpers.LlapInstance in project hive by apache.

the class LlapStatusServiceDriver method constructCompletedContainerDiagnostics.

private static String constructCompletedContainerDiagnostics(List<LlapInstance> completedInstances) {
    StringBuilder sb = new StringBuilder();
    if (completedInstances == null || completedInstances.size() == 0) {
        return "";
    } else {
        // TODO HIVE-15865 Ideally sort these by completion time, once that is available.
        boolean isFirst = true;
        for (LlapInstance instance : completedInstances) {
            if (!isFirst) {
                sb.append("\n");
            } else {
                isFirst = false;
            }
            if (instance.getYarnContainerExitStatus() == ContainerExitStatus.KILLED_EXCEEDED_PMEM || instance.getYarnContainerExitStatus() == ContainerExitStatus.KILLED_EXCEEDED_VMEM) {
                sb.append("\tKILLED container (by YARN for exceeding memory limits): ");
            } else {
                // TODO HIVE-15865 Handle additional reasons like OS launch failed (Slider needs to give this info)
                sb.append("\tFAILED container: ");
            }
            sb.append(" ").append(instance.getContainerId());
            sb.append(", Logs at: ").append(instance.getLogUrl());
        }
    }
    return sb.toString();
}
Also used : LlapInstance(org.apache.hadoop.hive.llap.cli.status.LlapStatusHelpers.LlapInstance)

Example 4 with LlapInstance

use of org.apache.hadoop.hive.llap.cli.status.LlapStatusHelpers.LlapInstance in project hive by apache.

the class LlapStatusServiceDriver method populateAppStatusFromSliderStatus.

/**
 * Populates information from SliderStatus.
 *
 * @param appName
 * @param sliderClient
 * @param appStatusBuilder
 * @return an ExitCode. An ExitCode other than ExitCode.SUCCESS implies future progress not possible
 * @throws LlapStatusCliException
 */
private ExitCode populateAppStatusFromSliderStatus(String appName, SliderClient sliderClient, AppStatusBuilder appStatusBuilder) throws LlapStatusCliException {
    ClusterDescription clusterDescription;
    try {
        clusterDescription = sliderClient.getClusterDescription(appName);
    } catch (SliderException e) {
        throw new LlapStatusCliException(ExitCode.SLIDER_CLIENT_ERROR_OTHER, "Failed to get cluster description from slider. SliderErrorCode=" + (e).getExitCode(), e);
    } catch (Exception e) {
        throw new LlapStatusCliException(ExitCode.SLIDER_CLIENT_ERROR_OTHER, "Failed to get cluster description from slider", e);
    }
    if (clusterDescription == null) {
        LOG.info("Slider ClusterDescription not available");
        // ClusterDescription should always be present.
        return ExitCode.SLIDER_CLIENT_ERROR_OTHER;
    } else {
        // Process the Cluster Status returned by slider.
        appStatusBuilder.setOriginalConfigurationPath(clusterDescription.originConfigurationPath);
        appStatusBuilder.setGeneratedConfigurationPath(clusterDescription.generatedConfigurationPath);
        appStatusBuilder.setAppStartTime(clusterDescription.createTime);
        // Finish populating AMInfo
        appStatusBuilder.maybeCreateAndGetAmInfo().setAmWebUrl(clusterDescription.getInfo(StatusKeys.INFO_AM_WEB_URL));
        appStatusBuilder.maybeCreateAndGetAmInfo().setHostname(clusterDescription.getInfo(StatusKeys.INFO_AM_HOSTNAME));
        appStatusBuilder.maybeCreateAndGetAmInfo().setContainerId(clusterDescription.getInfo(StatusKeys.INFO_AM_CONTAINER_ID));
        if (clusterDescription.statistics != null) {
            Map<String, Integer> llapStats = clusterDescription.statistics.get(LLAP_KEY);
            if (llapStats != null) {
                int desiredContainers = llapStats.get(StatusKeys.STATISTICS_CONTAINERS_DESIRED);
                int liveContainers = llapStats.get(StatusKeys.STATISTICS_CONTAINERS_LIVE);
                appStatusBuilder.setDesiredInstances(desiredContainers);
                appStatusBuilder.setLiveInstances(liveContainers);
            } else {
                throw new LlapStatusCliException(ExitCode.SLIDER_CLIENT_ERROR_OTHER, // Error since LLAP should always exist.
                "Failed to get statistics for LLAP");
            }
        // TODO HIVE-13454 Use some information from here such as containers.start.failed
        // and containers.failed.recently to provide an estimate of whether this app is healthy or not.
        } else {
            throw new LlapStatusCliException(ExitCode.SLIDER_CLIENT_ERROR_OTHER, // Error since statistics should always exist.
            "Failed to get statistics");
        }
        // Code to locate container status via slider. Not using this at the moment.
        if (clusterDescription.status != null) {
            Object liveObject = clusterDescription.status.get(ClusterDescriptionKeys.KEY_CLUSTER_LIVE);
            if (liveObject != null) {
                Map<String, Map<String, Map<String, Object>>> liveEntity = (Map<String, Map<String, Map<String, Object>>>) liveObject;
                Map<String, Map<String, Object>> llapEntity = liveEntity.get(LLAP_KEY);
                if (llapEntity != null) {
                    // Not a problem. Nothing has come up yet.
                    for (Map.Entry<String, Map<String, Object>> containerEntry : llapEntity.entrySet()) {
                        String containerIdString = containerEntry.getKey();
                        Map<String, Object> containerParams = containerEntry.getValue();
                        String host = (String) containerParams.get("host");
                        LlapInstance llapInstance = new LlapInstance(host, containerIdString);
                        appStatusBuilder.addNewRunningLlapInstance(llapInstance);
                    }
                }
            }
        }
        return ExitCode.SUCCESS;
    }
}
Also used : SliderException(org.apache.slider.core.exceptions.SliderException) LlapInstance(org.apache.hadoop.hive.llap.cli.status.LlapStatusHelpers.LlapInstance) URISyntaxException(java.net.URISyntaxException) YarnException(org.apache.hadoop.yarn.exceptions.YarnException) SliderException(org.apache.slider.core.exceptions.SliderException) IOException(java.io.IOException) ClusterDescription(org.apache.slider.api.ClusterDescription) Map(java.util.Map)

Aggregations

LlapInstance (org.apache.hadoop.hive.llap.cli.status.LlapStatusHelpers.LlapInstance)4 IOException (java.io.IOException)2 URISyntaxException (java.net.URISyntaxException)2 YarnException (org.apache.hadoop.yarn.exceptions.YarnException)2 SliderException (org.apache.slider.core.exceptions.SliderException)2 LinkedList (java.util.LinkedList)1 Map (java.util.Map)1 LlapServiceInstance (org.apache.hadoop.hive.llap.registry.LlapServiceInstance)1 ClusterDescription (org.apache.slider.api.ClusterDescription)1 ContainerInformation (org.apache.slider.api.types.ContainerInformation)1