use of org.apache.hadoop.hive.llap.registry.LlapServiceInstance in project hive by apache.
the class LlapClusterResourceProcessor method llapClusterCommandHandler.
private CommandProcessorResponse llapClusterCommandHandler(SessionState ss, String[] params) throws ParseException, CommandProcessorException {
CommandLine args = parseCommandArgs(CLUSTER_OPTIONS, params);
String hs2Host = null;
if (ss.isHiveServerQuery()) {
hs2Host = ss.getHiveServer2Host();
}
boolean hasInfo = args.hasOption("info");
if (hasInfo) {
List<String> fullCommand = Lists.newArrayList("llap", "cluster");
fullCommand.addAll(Arrays.asList(params));
CommandProcessorResponse authErrResp = CommandUtil.authorizeCommandAndServiceObject(ss, HiveOperationType.LLAP_CLUSTER_INFO, fullCommand, hs2Host);
if (authErrResp != null) {
// there was an authorization issue
return authErrResp;
}
try {
LlapRegistryService llapRegistryService = LlapRegistryService.getClient(ss.getConf());
String appId = llapRegistryService.getApplicationId() == null ? "null" : llapRegistryService.getApplicationId().toString();
for (LlapServiceInstance instance : llapRegistryService.getInstances().getAll()) {
ss.out.println(Joiner.on("\t").join(appId, instance.getWorkerIdentity(), instance.getHost(), instance.getRpcPort(), instance.getResource().getMemory() * 1024L * 1024L, instance.getResource().getVirtualCores()));
}
return new CommandProcessorResponse(getSchema(), null);
} catch (Exception e) {
LOG.error("Unable to list LLAP instances. err: ", e);
throw new CommandProcessorException("LLAP Cluster Processor Helper Failed: Unable to list LLAP instances. err: " + e.getMessage());
}
} else {
String usage = getUsageAsString();
throw new CommandProcessorException("LLAP Cluster Processor Helper Failed: Unsupported sub-command option. " + usage);
}
}
use of org.apache.hadoop.hive.llap.registry.LlapServiceInstance in project hive by apache.
the class LlapClusterStateForCompile method initClusterInfo.
public boolean initClusterInfo() {
if (!isUpdateNeeded())
return true;
synchronized (updateInfoLock) {
// At this point, no one will take the write lock and update, so we can do the last check.
if (!isUpdateNeeded())
return true;
if (svc == null) {
try {
svc = LlapRegistryService.getClient(conf);
} catch (Throwable t) {
LOG.info("Cannot create the client; ignoring", t);
// Don't fail; this is best-effort.
return false;
}
}
ServiceInstanceSet<LlapServiceInstance> instances;
try {
instances = svc.getInstances(10);
} catch (IOException e) {
LOG.info("Cannot update cluster information; ignoring", e);
// Don't wait for the cluster if not started; this is best-effort.
return false;
}
int executorsLocal = 0, noConfigNodesLocal = 0;
for (LlapServiceInstance si : instances.getAll()) {
// Shouldn't happen in getAll.
if (si instanceof InactiveServiceInstance)
continue;
Map<String, String> props = si.getProperties();
if (props == null) {
++noConfigNodesLocal;
continue;
}
try {
int numExecutors = Integer.parseInt(props.get(ConfVars.LLAP_DAEMON_NUM_EXECUTORS.varname));
executorsLocal += numExecutors;
if (numExecutorsPerNode == -1) {
numExecutorsPerNode = numExecutors;
}
if (memoryPerInstance == -1) {
memoryPerInstance = si.getResource().getMemorySize() * 1024L * 1024L;
}
} catch (NumberFormatException e) {
++noConfigNodesLocal;
}
}
noConfigNodeCount = noConfigNodesLocal;
executorCount = executorsLocal;
lastClusterUpdateNs = System.nanoTime();
return true;
}
}
use of org.apache.hadoop.hive.llap.registry.LlapServiceInstance in project hive by apache.
the class LlapStatusServiceDriver method populateAppStatusFromLlapRegistry.
/**
* Populate additional information for containers from the LLAP registry. Must be invoked
* after Slider status. Also after slider-diagnostics.
* @param appStatusBuilder
* @return an ExitCode. An ExitCode other than ExitCode.SUCCESS implies future progress not possible
* @throws LlapStatusCliException
*/
private ExitCode populateAppStatusFromLlapRegistry(AppStatusBuilder appStatusBuilder, long watchTimeoutMs) throws LlapStatusCliException {
if (llapRegistry == null) {
try {
llapRegistry = LlapRegistryService.getClient(llapRegistryConf);
} catch (Exception e) {
throw new LlapStatusCliException(ExitCode.LLAP_REGISTRY_ERROR, "Failed to create llap registry client", e);
}
}
Collection<LlapServiceInstance> serviceInstances;
try {
serviceInstances = llapRegistry.getInstances(watchTimeoutMs).getAll();
} catch (Exception e) {
throw new LlapStatusCliException(ExitCode.LLAP_REGISTRY_ERROR, "Failed to get instances from llap registry", e);
}
if (serviceInstances == null || serviceInstances.isEmpty()) {
if (LOG.isDebugEnabled()) {
LOG.debug("No information found in the LLAP registry");
}
appStatusBuilder.setLiveInstances(0);
appStatusBuilder.setState(State.LAUNCHING);
appStatusBuilder.clearRunningLlapInstances();
return ExitCode.SUCCESS;
} else {
// Tracks instances known by both slider and llap.
List<LlapInstance> validatedInstances = new LinkedList<>();
List<String> llapExtraInstances = new LinkedList<>();
for (LlapServiceInstance serviceInstance : serviceInstances) {
String containerIdString = serviceInstance.getProperties().get(HiveConf.ConfVars.LLAP_DAEMON_CONTAINER_ID.varname);
LlapInstance llapInstance = appStatusBuilder.removeAndGetRunningLlapInstanceForContainer(containerIdString);
if (llapInstance != null) {
llapInstance.setMgmtPort(serviceInstance.getManagementPort());
llapInstance.setRpcPort(serviceInstance.getRpcPort());
llapInstance.setShufflePort(serviceInstance.getShufflePort());
llapInstance.setWebUrl(serviceInstance.getServicesAddress());
llapInstance.setStatusUrl(serviceInstance.getServicesAddress() + "/status");
validatedInstances.add(llapInstance);
} else {
// This likely indicates that an instance has recently restarted
// (the old instance has not been unregistered), and the new instances has not registered yet.
llapExtraInstances.add(containerIdString);
// This instance will not be added back, since it's services are not up yet.
}
}
appStatusBuilder.setLiveInstances(validatedInstances.size());
appStatusBuilder.setLaunchingInstances(llapExtraInstances.size());
if (validatedInstances.size() >= appStatusBuilder.getDesiredInstances()) {
appStatusBuilder.setState(State.RUNNING_ALL);
if (validatedInstances.size() > appStatusBuilder.getDesiredInstances()) {
LOG.warn("Found more entries in LLAP registry, as compared to desired entries");
}
} else {
if (validatedInstances.size() > 0) {
appStatusBuilder.setState(State.RUNNING_PARTIAL);
} else {
appStatusBuilder.setState(State.LAUNCHING);
}
}
// Debug only
if (appStatusBuilder.allRunningInstances().size() > 0) {
// Containers likely to come up soon.
LOG.debug("Potential instances starting up: {}", appStatusBuilder.allRunningInstances());
}
if (llapExtraInstances.size() > 0) {
// Old containers which are likely shutting down, or new containers which
// launched between slider-status/slider-diagnostics. Skip for this iteration.
LOG.debug("Instances likely to shutdown soon: {}", llapExtraInstances);
}
appStatusBuilder.clearAndAddPreviouslyKnownRunningInstances(validatedInstances);
}
return ExitCode.SUCCESS;
}
use of org.apache.hadoop.hive.llap.registry.LlapServiceInstance in project hive by apache.
the class LlapStatusServiceDriver method populateAppStatusFromLlapRegistry.
/**
* Populate additional information for containers from the LLAP registry. Must be invoked
* after YARN Service status and diagnostics.
* @return an ExitCode. An ExitCode other than ExitCode.SUCCESS implies future progress not possible
* @throws LlapStatusCliException
*/
private ExitCode populateAppStatusFromLlapRegistry(AppStatusBuilder appStatusBuilder, long watchTimeoutMs) throws LlapStatusCliException {
if (llapRegistry == null) {
try {
llapRegistry = LlapRegistryService.getClient(llapRegistryConf);
} catch (Exception e) {
throw new LlapStatusCliException(ExitCode.LLAP_REGISTRY_ERROR, "Failed to create llap registry client", e);
}
}
Collection<LlapServiceInstance> serviceInstances;
try {
serviceInstances = llapRegistry.getInstances(watchTimeoutMs).getAll();
} catch (Exception e) {
throw new LlapStatusCliException(ExitCode.LLAP_REGISTRY_ERROR, "Failed to get instances from llap registry", e);
}
if (serviceInstances == null || serviceInstances.isEmpty()) {
LOG.debug("No information found in the LLAP registry");
appStatusBuilder.setLiveInstances(0);
appStatusBuilder.setState(State.LAUNCHING);
appStatusBuilder.clearRunningLlapInstances();
return ExitCode.SUCCESS;
} else {
// Tracks instances known by both YARN Service and llap.
List<LlapInstance> validatedInstances = new LinkedList<>();
List<String> llapExtraInstances = new LinkedList<>();
for (LlapServiceInstance serviceInstance : serviceInstances) {
String containerIdString = serviceInstance.getProperties().get(HiveConf.ConfVars.LLAP_DAEMON_CONTAINER_ID.varname);
LlapInstance llapInstance = appStatusBuilder.removeAndGetRunningLlapInstanceForContainer(containerIdString);
if (llapInstance != null) {
llapInstance.setMgmtPort(serviceInstance.getManagementPort());
llapInstance.setRpcPort(serviceInstance.getRpcPort());
llapInstance.setShufflePort(serviceInstance.getShufflePort());
llapInstance.setWebUrl(serviceInstance.getServicesAddress());
llapInstance.setStatusUrl(serviceInstance.getServicesAddress() + "/status");
validatedInstances.add(llapInstance);
} else {
// This likely indicates that an instance has recently restarted
// (the old instance has not been unregistered), and the new instances has not registered yet.
llapExtraInstances.add(containerIdString);
// This instance will not be added back, since it's services are not up yet.
}
}
appStatusBuilder.setLiveInstances(validatedInstances.size());
appStatusBuilder.setLaunchingInstances(llapExtraInstances.size());
if (appStatusBuilder.getDesiredInstances() != null && validatedInstances.size() >= appStatusBuilder.getDesiredInstances()) {
appStatusBuilder.setState(State.RUNNING_ALL);
if (validatedInstances.size() > appStatusBuilder.getDesiredInstances()) {
LOG.warn("Found more entries in LLAP registry, as compared to desired entries");
}
} else {
if (validatedInstances.size() > 0) {
appStatusBuilder.setState(State.RUNNING_PARTIAL);
} else {
appStatusBuilder.setState(State.LAUNCHING);
}
}
// Debug only
if (appStatusBuilder.allRunningInstances().size() > 0) {
// Containers likely to come up soon.
LOG.debug("Potential instances starting up: {}", appStatusBuilder.allRunningInstances());
}
if (llapExtraInstances.size() > 0) {
// Old containers which are likely shutting down, or new containers which
// launched between YARN Service status/diagnostics. Skip for this iteration.
LOG.debug("Instances likely to shutdown soon: {}", llapExtraInstances);
}
appStatusBuilder.clearAndAddPreviouslyKnownRunningInstances(validatedInstances);
}
return ExitCode.SUCCESS;
}
use of org.apache.hadoop.hive.llap.registry.LlapServiceInstance in project hive by apache.
the class LlapTaskCommunicator method constructLogUrl.
private String constructLogUrl(final TezTaskAttemptID attemptID, final NodeId containerNodeId, final boolean isDone) {
if (timelineServerUri == null || containerNodeId == null) {
return null;
}
Set<LlapServiceInstance> instanceSet;
try {
instanceSet = serviceRegistry.getInstances().getByHost(containerNodeId.getHost());
} catch (IOException e) {
// Not failing the job due to a failure constructing the log url
LOG.warn("Unable to find instance for yarnNodeId={} to construct the log url. Exception message={}", containerNodeId, e.getMessage());
return null;
}
// Once NodeId includes fragmentId - this becomes a lot more reliable.
if (instanceSet != null) {
LlapServiceInstance matchedInstance = null;
for (LlapServiceInstance instance : instanceSet) {
if (instance.getRpcPort() == containerNodeId.getPort()) {
matchedInstance = instance;
break;
}
}
if (matchedInstance != null) {
String containerIdString = matchedInstance.getProperties().get(HiveConf.ConfVars.LLAP_DAEMON_CONTAINER_ID.varname);
String nmNodeAddress = matchedInstance.getProperties().get(ConfVars.LLAP_DAEMON_NM_ADDRESS.varname);
if (!StringUtils.isBlank(containerIdString) && !StringUtils.isBlank(nmNodeAddress)) {
return constructLlapLogUrl(attemptID, containerIdString, isDone, nmNodeAddress);
}
}
}
return null;
}
Aggregations