use of org.apache.hadoop.hive.llap.registry.LlapServiceInstance in project hive by apache.
the class LlapTaskCommunicator method constructLogUrl.
private String constructLogUrl(final TezTaskAttemptID attemptID, final NodeId containerNodeId, final boolean isDone) {
if (timelineServerUri == null || containerNodeId == null) {
return null;
}
Set<LlapServiceInstance> instanceSet;
try {
instanceSet = serviceRegistry.getInstances().getByHost(containerNodeId.getHost());
} catch (IOException e) {
// Not failing the job due to a failure constructing the log url
LOG.warn("Unable to find instance for yarnNodeId={} to construct the log url. Exception message={}", containerNodeId, e.getMessage());
return null;
}
// Once NodeId includes fragmentId - this becomes a lot more reliable.
if (instanceSet != null) {
LlapServiceInstance matchedInstance = null;
for (LlapServiceInstance instance : instanceSet) {
if (instance.getRpcPort() == containerNodeId.getPort()) {
matchedInstance = instance;
break;
}
}
if (matchedInstance != null) {
String containerIdString = matchedInstance.getProperties().get(HiveConf.ConfVars.LLAP_DAEMON_CONTAINER_ID.varname);
String nmNodeAddress = matchedInstance.getProperties().get(ConfVars.LLAP_DAEMON_NM_ADDRESS.varname);
if (!StringUtils.isBlank(containerIdString) && !StringUtils.isBlank(nmNodeAddress)) {
return constructLlapLogUrl(attemptID, containerIdString, isDone, nmNodeAddress);
}
}
}
return null;
}
use of org.apache.hadoop.hive.llap.registry.LlapServiceInstance in project hive by apache.
the class LlapTaskSchedulerService method getTotalResources.
@Override
public Resource getTotalResources() {
int memory = 0;
int vcores = 0;
readLock.lock();
try {
int numInstancesFound = 0;
for (LlapServiceInstance inst : activeInstances.getAll()) {
Resource r = inst.getResource();
memory += r.getMemory();
vcores += r.getVirtualCores();
numInstancesFound++;
}
if (LOG.isDebugEnabled()) {
LOG.debug("GetTotalResources: numInstancesFound={}, totalMem={}, totalVcores={}", numInstancesFound, memory, vcores);
}
} finally {
readLock.unlock();
}
return Resource.newInstance(memory, vcores);
}
use of org.apache.hadoop.hive.llap.registry.LlapServiceInstance in project hive by apache.
the class LlapTaskSchedulerService method start.
@Override
public void start() throws IOException {
if (pluginEndpoint != null) {
pluginEndpoint.start();
}
writeLock.lock();
try {
scheduledLoggingExecutor.schedule(new Callable<Void>() {
@Override
public Void call() throws Exception {
readLock.lock();
try {
if (dagRunning) {
LOG.info("Stats for current dag: {}", dagStats);
}
} finally {
readLock.unlock();
}
return null;
}
}, 10000L, TimeUnit.MILLISECONDS);
nodeEnablerFuture = nodeEnabledExecutor.submit(nodeEnablerCallable);
Futures.addCallback(nodeEnablerFuture, new LoggingFutureCallback("NodeEnablerThread", LOG));
delayedTaskSchedulerFuture = delayedTaskSchedulerExecutor.submit(delayedTaskSchedulerCallable);
Futures.addCallback(delayedTaskSchedulerFuture, new LoggingFutureCallback("DelayedTaskSchedulerThread", LOG));
schedulerFuture = schedulerExecutor.submit(schedulerCallable);
Futures.addCallback(schedulerFuture, new LoggingFutureCallback("SchedulerThread", LOG));
registry.start();
registry.registerStateChangeListener(new NodeStateChangeListener());
activeInstances = registry.getInstances();
for (LlapServiceInstance inst : activeInstances.getAll()) {
addNode(new NodeInfo(inst, nodeBlacklistConf, clock, numSchedulableTasksPerNode, metrics), inst);
}
if (amRegistry != null) {
amRegistry.start();
int pluginPort = pluginEndpoint != null ? pluginEndpoint.getActualPort() : -1;
amRegistry.register(amPort, pluginPort, HiveConf.getVar(conf, ConfVars.HIVESESSIONID), serializedToken, jobIdForToken, 0);
}
} finally {
writeLock.unlock();
}
}
use of org.apache.hadoop.hive.llap.registry.LlapServiceInstance in project hive by apache.
the class LlapStatusServiceDriver method populateAppStatusFromLlapRegistry.
/**
* Populate additional information for containers from the LLAP registry. Must be invoked
* after Slider status. Also after slider-diagnostics.
* @param appStatusBuilder
* @return an ExitCode. An ExitCode other than ExitCode.SUCCESS implies future progress not possible
* @throws LlapStatusCliException
*/
private ExitCode populateAppStatusFromLlapRegistry(AppStatusBuilder appStatusBuilder, long watchTimeoutMs) throws LlapStatusCliException {
if (llapRegistry == null) {
try {
llapRegistry = LlapRegistryService.getClient(llapRegistryConf);
} catch (Exception e) {
throw new LlapStatusCliException(ExitCode.LLAP_REGISTRY_ERROR, "Failed to create llap registry client", e);
}
}
Collection<LlapServiceInstance> serviceInstances;
try {
serviceInstances = llapRegistry.getInstances(watchTimeoutMs).getAll();
} catch (Exception e) {
throw new LlapStatusCliException(ExitCode.LLAP_REGISTRY_ERROR, "Failed to get instances from llap registry", e);
}
if (serviceInstances == null || serviceInstances.isEmpty()) {
if (LOG.isDebugEnabled()) {
LOG.debug("No information found in the LLAP registry");
}
appStatusBuilder.setLiveInstances(0);
appStatusBuilder.setState(State.LAUNCHING);
appStatusBuilder.clearRunningLlapInstances();
return ExitCode.SUCCESS;
} else {
// Tracks instances known by both slider and llap.
List<LlapInstance> validatedInstances = new LinkedList<>();
List<String> llapExtraInstances = new LinkedList<>();
for (LlapServiceInstance serviceInstance : serviceInstances) {
String containerIdString = serviceInstance.getProperties().get(HiveConf.ConfVars.LLAP_DAEMON_CONTAINER_ID.varname);
LlapInstance llapInstance = appStatusBuilder.removeAndGetRunningLlapInstanceForContainer(containerIdString);
if (llapInstance != null) {
llapInstance.setMgmtPort(serviceInstance.getManagementPort());
llapInstance.setRpcPort(serviceInstance.getRpcPort());
llapInstance.setShufflePort(serviceInstance.getShufflePort());
llapInstance.setWebUrl(serviceInstance.getServicesAddress());
llapInstance.setStatusUrl(serviceInstance.getServicesAddress() + "/status");
validatedInstances.add(llapInstance);
} else {
// This likely indicates that an instance has recently restarted
// (the old instance has not been unregistered), and the new instances has not registered yet.
llapExtraInstances.add(containerIdString);
// This instance will not be added back, since it's services are not up yet.
}
}
appStatusBuilder.setLiveInstances(validatedInstances.size());
appStatusBuilder.setLaunchingInstances(llapExtraInstances.size());
if (validatedInstances.size() >= appStatusBuilder.getDesiredInstances()) {
appStatusBuilder.setState(State.RUNNING_ALL);
if (validatedInstances.size() > appStatusBuilder.getDesiredInstances()) {
LOG.warn("Found more entries in LLAP registry, as compared to desired entries");
}
} else {
if (validatedInstances.size() > 0) {
appStatusBuilder.setState(State.RUNNING_PARTIAL);
} else {
appStatusBuilder.setState(State.LAUNCHING);
}
}
// Debug only
if (appStatusBuilder.allRunningInstances().size() > 0) {
// Containers likely to come up soon.
LOG.debug("Potential instances starting up: {}", appStatusBuilder.allRunningInstances());
}
if (llapExtraInstances.size() > 0) {
// Old containers which are likely shutting down, or new containers which
// launched between slider-status/slider-diagnostics. Skip for this iteration.
LOG.debug("Instances likely to shutdown soon: {}", llapExtraInstances);
}
appStatusBuilder.clearAndAddPreviouslyKnownRunningInstances(validatedInstances);
}
return ExitCode.SUCCESS;
}
Aggregations