Search in sources :

Example 11 with LlapServiceInstance

use of org.apache.hadoop.hive.llap.registry.LlapServiceInstance in project hive by apache.

the class LlapTaskCommunicator method constructLogUrl.

private String constructLogUrl(final TezTaskAttemptID attemptID, final NodeId containerNodeId, final boolean isDone) {
    if (timelineServerUri == null || containerNodeId == null) {
        return null;
    }
    Set<LlapServiceInstance> instanceSet;
    try {
        instanceSet = serviceRegistry.getInstances().getByHost(containerNodeId.getHost());
    } catch (IOException e) {
        // Not failing the job due to a failure constructing the log url
        LOG.warn("Unable to find instance for yarnNodeId={} to construct the log url. Exception message={}", containerNodeId, e.getMessage());
        return null;
    }
    // Once NodeId includes fragmentId - this becomes a lot more reliable.
    if (instanceSet != null) {
        LlapServiceInstance matchedInstance = null;
        for (LlapServiceInstance instance : instanceSet) {
            if (instance.getRpcPort() == containerNodeId.getPort()) {
                matchedInstance = instance;
                break;
            }
        }
        if (matchedInstance != null) {
            String containerIdString = matchedInstance.getProperties().get(HiveConf.ConfVars.LLAP_DAEMON_CONTAINER_ID.varname);
            String nmNodeAddress = matchedInstance.getProperties().get(ConfVars.LLAP_DAEMON_NM_ADDRESS.varname);
            if (!StringUtils.isBlank(containerIdString) && !StringUtils.isBlank(nmNodeAddress)) {
                return constructLlapLogUrl(attemptID, containerIdString, isDone, nmNodeAddress);
            }
        }
    }
    return null;
}
Also used : LlapServiceInstance(org.apache.hadoop.hive.llap.registry.LlapServiceInstance) IOException(java.io.IOException) ByteString(com.google.protobuf.ByteString)

Example 12 with LlapServiceInstance

use of org.apache.hadoop.hive.llap.registry.LlapServiceInstance in project hive by apache.

the class LlapTaskSchedulerService method getTotalResources.

@Override
public Resource getTotalResources() {
    int memory = 0;
    int vcores = 0;
    readLock.lock();
    try {
        int numInstancesFound = 0;
        for (LlapServiceInstance inst : activeInstances.getAll()) {
            Resource r = inst.getResource();
            memory += r.getMemory();
            vcores += r.getVirtualCores();
            numInstancesFound++;
        }
        if (LOG.isDebugEnabled()) {
            LOG.debug("GetTotalResources: numInstancesFound={}, totalMem={}, totalVcores={}", numInstancesFound, memory, vcores);
        }
    } finally {
        readLock.unlock();
    }
    return Resource.newInstance(memory, vcores);
}
Also used : Resource(org.apache.hadoop.yarn.api.records.Resource) LlapServiceInstance(org.apache.hadoop.hive.llap.registry.LlapServiceInstance)

Example 13 with LlapServiceInstance

use of org.apache.hadoop.hive.llap.registry.LlapServiceInstance in project hive by apache.

the class LlapTaskSchedulerService method start.

@Override
public void start() throws IOException {
    if (pluginEndpoint != null) {
        pluginEndpoint.start();
    }
    writeLock.lock();
    try {
        scheduledLoggingExecutor.schedule(new Callable<Void>() {

            @Override
            public Void call() throws Exception {
                readLock.lock();
                try {
                    if (dagRunning) {
                        LOG.info("Stats for current dag: {}", dagStats);
                    }
                } finally {
                    readLock.unlock();
                }
                return null;
            }
        }, 10000L, TimeUnit.MILLISECONDS);
        nodeEnablerFuture = nodeEnabledExecutor.submit(nodeEnablerCallable);
        Futures.addCallback(nodeEnablerFuture, new LoggingFutureCallback("NodeEnablerThread", LOG));
        delayedTaskSchedulerFuture = delayedTaskSchedulerExecutor.submit(delayedTaskSchedulerCallable);
        Futures.addCallback(delayedTaskSchedulerFuture, new LoggingFutureCallback("DelayedTaskSchedulerThread", LOG));
        schedulerFuture = schedulerExecutor.submit(schedulerCallable);
        Futures.addCallback(schedulerFuture, new LoggingFutureCallback("SchedulerThread", LOG));
        registry.start();
        registry.registerStateChangeListener(new NodeStateChangeListener());
        activeInstances = registry.getInstances();
        for (LlapServiceInstance inst : activeInstances.getAll()) {
            addNode(new NodeInfo(inst, nodeBlacklistConf, clock, numSchedulableTasksPerNode, metrics), inst);
        }
        if (amRegistry != null) {
            amRegistry.start();
            int pluginPort = pluginEndpoint != null ? pluginEndpoint.getActualPort() : -1;
            amRegistry.register(amPort, pluginPort, HiveConf.getVar(conf, ConfVars.HIVESESSIONID), serializedToken, jobIdForToken, 0);
        }
    } finally {
        writeLock.unlock();
    }
}
Also used : LlapServiceInstance(org.apache.hadoop.hive.llap.registry.LlapServiceInstance) LoggingFutureCallback(org.apache.hadoop.hive.llap.tezplugins.scheduler.LoggingFutureCallback) TezUncheckedException(org.apache.tez.dag.api.TezUncheckedException) IOException(java.io.IOException)

Example 14 with LlapServiceInstance

use of org.apache.hadoop.hive.llap.registry.LlapServiceInstance in project hive by apache.

the class LlapStatusServiceDriver method populateAppStatusFromLlapRegistry.

/**
 * Populate additional information for containers from the LLAP registry. Must be invoked
 * after Slider status. Also after slider-diagnostics.
 * @param appStatusBuilder
 * @return an ExitCode. An ExitCode other than ExitCode.SUCCESS implies future progress not possible
 * @throws LlapStatusCliException
 */
private ExitCode populateAppStatusFromLlapRegistry(AppStatusBuilder appStatusBuilder, long watchTimeoutMs) throws LlapStatusCliException {
    if (llapRegistry == null) {
        try {
            llapRegistry = LlapRegistryService.getClient(llapRegistryConf);
        } catch (Exception e) {
            throw new LlapStatusCliException(ExitCode.LLAP_REGISTRY_ERROR, "Failed to create llap registry client", e);
        }
    }
    Collection<LlapServiceInstance> serviceInstances;
    try {
        serviceInstances = llapRegistry.getInstances(watchTimeoutMs).getAll();
    } catch (Exception e) {
        throw new LlapStatusCliException(ExitCode.LLAP_REGISTRY_ERROR, "Failed to get instances from llap registry", e);
    }
    if (serviceInstances == null || serviceInstances.isEmpty()) {
        if (LOG.isDebugEnabled()) {
            LOG.debug("No information found in the LLAP registry");
        }
        appStatusBuilder.setLiveInstances(0);
        appStatusBuilder.setState(State.LAUNCHING);
        appStatusBuilder.clearRunningLlapInstances();
        return ExitCode.SUCCESS;
    } else {
        // Tracks instances known by both slider and llap.
        List<LlapInstance> validatedInstances = new LinkedList<>();
        List<String> llapExtraInstances = new LinkedList<>();
        for (LlapServiceInstance serviceInstance : serviceInstances) {
            String containerIdString = serviceInstance.getProperties().get(HiveConf.ConfVars.LLAP_DAEMON_CONTAINER_ID.varname);
            LlapInstance llapInstance = appStatusBuilder.removeAndGetRunningLlapInstanceForContainer(containerIdString);
            if (llapInstance != null) {
                llapInstance.setMgmtPort(serviceInstance.getManagementPort());
                llapInstance.setRpcPort(serviceInstance.getRpcPort());
                llapInstance.setShufflePort(serviceInstance.getShufflePort());
                llapInstance.setWebUrl(serviceInstance.getServicesAddress());
                llapInstance.setStatusUrl(serviceInstance.getServicesAddress() + "/status");
                validatedInstances.add(llapInstance);
            } else {
                // This likely indicates that an instance has recently restarted
                // (the old instance has not been unregistered), and the new instances has not registered yet.
                llapExtraInstances.add(containerIdString);
            // This instance will not be added back, since it's services are not up yet.
            }
        }
        appStatusBuilder.setLiveInstances(validatedInstances.size());
        appStatusBuilder.setLaunchingInstances(llapExtraInstances.size());
        if (validatedInstances.size() >= appStatusBuilder.getDesiredInstances()) {
            appStatusBuilder.setState(State.RUNNING_ALL);
            if (validatedInstances.size() > appStatusBuilder.getDesiredInstances()) {
                LOG.warn("Found more entries in LLAP registry, as compared to desired entries");
            }
        } else {
            if (validatedInstances.size() > 0) {
                appStatusBuilder.setState(State.RUNNING_PARTIAL);
            } else {
                appStatusBuilder.setState(State.LAUNCHING);
            }
        }
        // Debug only
        if (appStatusBuilder.allRunningInstances().size() > 0) {
            // Containers likely to come up soon.
            LOG.debug("Potential instances starting up: {}", appStatusBuilder.allRunningInstances());
        }
        if (llapExtraInstances.size() > 0) {
            // Old containers which are likely shutting down, or new containers which
            // launched between slider-status/slider-diagnostics. Skip for this iteration.
            LOG.debug("Instances likely to shutdown soon: {}", llapExtraInstances);
        }
        appStatusBuilder.clearAndAddPreviouslyKnownRunningInstances(validatedInstances);
    }
    return ExitCode.SUCCESS;
}
Also used : LlapServiceInstance(org.apache.hadoop.hive.llap.registry.LlapServiceInstance) LlapInstance(org.apache.hadoop.hive.llap.cli.status.LlapStatusHelpers.LlapInstance) URISyntaxException(java.net.URISyntaxException) YarnException(org.apache.hadoop.yarn.exceptions.YarnException) SliderException(org.apache.slider.core.exceptions.SliderException) IOException(java.io.IOException) LinkedList(java.util.LinkedList)

Aggregations

LlapServiceInstance (org.apache.hadoop.hive.llap.registry.LlapServiceInstance)14 IOException (java.io.IOException)7 ByteString (com.google.protobuf.ByteString)5 ArrayList (java.util.ArrayList)2 InactiveServiceInstance (org.apache.hadoop.hive.llap.registry.impl.InactiveServiceInstance)2 LlapRegistryService (org.apache.hadoop.hive.llap.registry.impl.LlapRegistryService)2 LlapTokenIdentifier (org.apache.hadoop.hive.llap.security.LlapTokenIdentifier)2 Resource (org.apache.hadoop.yarn.api.records.Resource)2 ServiceException (com.google.protobuf.ServiceException)1 OutputStream (java.io.OutputStream)1 InetAddress (java.net.InetAddress)1 Socket (java.net.Socket)1 URISyntaxException (java.net.URISyntaxException)1 HashMap (java.util.HashMap)1 LinkedList (java.util.LinkedList)1 TreeMap (java.util.TreeMap)1 ChildData (org.apache.curator.framework.recipes.cache.ChildData)1 LlapInstance (org.apache.hadoop.hive.llap.cli.status.LlapStatusHelpers.LlapInstance)1 LlapOutputSocketInitMessage (org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.LlapOutputSocketInitMessage)1 SignableVertexSpec (org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.SignableVertexSpec)1