use of org.apache.hadoop.hive.llap.registry.impl.InactiveServiceInstance in project hive by apache.
the class LlapTaskSchedulerService method selectHost.
/**
* @param request the list of preferred hosts. null implies any host
* @return
*/
private SelectHostResult selectHost(TaskInfo request) {
String[] requestedHosts = request.requestedHosts;
String requestedHostsDebugStr = Arrays.toString(requestedHosts);
if (LOG.isDebugEnabled()) {
LOG.debug("selectingHost for task={} on hosts={}", request.task, requestedHostsDebugStr);
}
long schedulerAttemptTime = clock.getTime();
// Read-lock. Not updating any stats at the moment.
readLock.lock();
try {
boolean shouldDelayForLocality = request.shouldDelayForLocality(schedulerAttemptTime);
LOG.debug("ShouldDelayForLocality={} for task={} on hosts={}", shouldDelayForLocality, request.task, requestedHostsDebugStr);
if (requestedHosts != null && requestedHosts.length > 0) {
int prefHostCount = -1;
boolean requestedHostsWillBecomeAvailable = false;
for (String host : requestedHosts) {
prefHostCount++;
// Pick the first host always. Weak attempt at cache affinity.
Set<LlapServiceInstance> instances = activeInstances.getByHost(host);
if (!instances.isEmpty()) {
for (LlapServiceInstance inst : instances) {
NodeInfo nodeInfo = instanceToNodeMap.get(inst.getWorkerIdentity());
if (nodeInfo != null) {
if (nodeInfo.canAcceptTask()) {
// Successfully scheduled.
LOG.info("Assigning {} when looking for {}." + " local=true FirstRequestedHost={}, #prefLocations={}", nodeInfo.toShortString(), host, (prefHostCount == 0), requestedHosts.length);
return new SelectHostResult(nodeInfo);
} else {
// The node cannot accept a task at the moment.
if (shouldDelayForLocality) {
// Perform some checks on whether the node will become available or not.
if (request.shouldForceLocality()) {
requestedHostsWillBecomeAvailable = true;
} else {
if (nodeInfo.getEnableTime() > request.getLocalityDelayTimeout() && nodeInfo.isDisabled() && nodeInfo.hadCommFailure()) {
LOG.debug("Host={} will not become available within requested timeout", nodeInfo);
// This node will likely be activated after the task timeout expires.
} else {
// Worth waiting for the timeout.
requestedHostsWillBecomeAvailable = true;
}
}
}
}
} else {
LOG.warn("Null NodeInfo when attempting to get host with worker {}, and host {}", inst, host);
// Leave requestedHostWillBecomeAvailable as is. If some other host is found - delay,
// else ends up allocating to a random host immediately.
}
}
}
}
// Check if forcing the location is required.
if (shouldDelayForLocality) {
if (requestedHostsWillBecomeAvailable) {
if (LOG.isDebugEnabled()) {
LOG.debug("Delaying local allocation for [" + request.task + "] when trying to allocate on [" + requestedHostsDebugStr + "]" + ". ScheduleAttemptTime=" + schedulerAttemptTime + ", taskDelayTimeout=" + request.getLocalityDelayTimeout());
}
return SELECT_HOST_RESULT_DELAYED_LOCALITY;
} else {
if (LOG.isDebugEnabled()) {
LOG.debug("Skipping local allocation for [" + request.task + "] when trying to allocate on [" + requestedHostsDebugStr + "] since none of these hosts are part of the known list");
}
}
}
}
/* fall through - miss in locality or no locality-requested */
Collection<LlapServiceInstance> instances = activeInstances.getAllInstancesOrdered(true);
List<NodeInfo> allNodes = new ArrayList<>(instances.size());
List<NodeInfo> activeNodesWithFreeSlots = new ArrayList<>();
for (LlapServiceInstance inst : instances) {
if (inst instanceof InactiveServiceInstance) {
allNodes.add(null);
} else {
NodeInfo nodeInfo = instanceToNodeMap.get(inst.getWorkerIdentity());
if (nodeInfo == null) {
allNodes.add(null);
} else {
allNodes.add(nodeInfo);
if (nodeInfo.canAcceptTask()) {
activeNodesWithFreeSlots.add(nodeInfo);
}
}
}
}
if (allNodes.isEmpty()) {
return SELECT_HOST_RESULT_DELAYED_RESOURCES;
}
// no locality-requested, randomly pick a node containing free slots
if (requestedHosts == null || requestedHosts.length == 0) {
if (LOG.isDebugEnabled()) {
LOG.debug("No-locality requested. Selecting a random host for task={}", request.task);
}
return randomSelection(activeNodesWithFreeSlots);
}
// miss in locality request, try picking consistent location with fallback to random selection
final String firstRequestedHost = requestedHosts[0];
int requestedHostIdx = -1;
for (int i = 0; i < allNodes.size(); i++) {
NodeInfo nodeInfo = allNodes.get(i);
if (nodeInfo != null) {
if (nodeInfo.getHost().equals(firstRequestedHost)) {
requestedHostIdx = i;
break;
}
}
}
// TODO: At this point we don't know the slot number of the requested host, so can't rollover to next available
if (requestedHostIdx == -1) {
if (LOG.isDebugEnabled()) {
LOG.debug("Requested node [{}] in consistent order does not exist. Falling back to random selection for " + "request {}", firstRequestedHost, request);
}
return randomSelection(activeNodesWithFreeSlots);
}
// requested host is still alive but cannot accept task, pick the next available host in consistent order
for (int i = 0; i < allNodes.size(); i++) {
NodeInfo nodeInfo = allNodes.get((i + requestedHostIdx + 1) % allNodes.size());
// next node in consistent order died or does not have free slots, rollover to next
if (nodeInfo == null || !nodeInfo.canAcceptTask()) {
continue;
} else {
if (LOG.isDebugEnabled()) {
LOG.debug("Assigning {} in consistent order when looking for first requested host, from #hosts={}," + " requestedHosts={}", nodeInfo.toShortString(), allNodes.size(), ((requestedHosts == null || requestedHosts.length == 0) ? "null" : requestedHostsDebugStr));
}
return new SelectHostResult(nodeInfo);
}
}
return SELECT_HOST_RESULT_DELAYED_RESOURCES;
} finally {
readLock.unlock();
}
}
use of org.apache.hadoop.hive.llap.registry.impl.InactiveServiceInstance in project hive by apache.
the class LlapClusterStateForCompile method initClusterInfo.
public boolean initClusterInfo() {
if (!isUpdateNeeded())
return true;
synchronized (updateInfoLock) {
// At this point, no one will take the write lock and update, so we can do the last check.
if (!isUpdateNeeded())
return true;
if (svc == null) {
try {
svc = LlapRegistryService.getClient(conf);
} catch (Throwable t) {
LOG.info("Cannot create the client; ignoring", t);
// Don't fail; this is best-effort.
return false;
}
}
ServiceInstanceSet<LlapServiceInstance> instances;
try {
instances = svc.getInstances(10);
} catch (IOException e) {
LOG.info("Cannot update cluster information; ignoring", e);
// Don't wait for the cluster if not started; this is best-effort.
return false;
}
int executorsLocal = 0, noConfigNodesLocal = 0;
for (LlapServiceInstance si : instances.getAll()) {
// Shouldn't happen in getAll.
if (si instanceof InactiveServiceInstance)
continue;
Map<String, String> props = si.getProperties();
if (props == null) {
++noConfigNodesLocal;
continue;
}
try {
int numExecutors = Integer.parseInt(props.get(ConfVars.LLAP_DAEMON_NUM_EXECUTORS.varname));
executorsLocal += numExecutors;
if (numExecutorsPerNode == -1) {
numExecutorsPerNode = numExecutors;
}
if (memoryPerInstance == -1) {
memoryPerInstance = si.getResource().getMemorySize() * 1024L * 1024L;
}
} catch (NumberFormatException e) {
++noConfigNodesLocal;
}
}
noConfigNodeCount = noConfigNodesLocal;
executorCount = executorsLocal;
lastClusterUpdateNs = System.nanoTime();
return true;
}
}
use of org.apache.hadoop.hive.llap.registry.impl.InactiveServiceInstance in project hive by apache.
the class LlapTaskSchedulerService method getResourceAvailability.
private Pair<Resource, Map<String, List<NodeInfo>>> getResourceAvailability() {
int memory = 0;
int vcores = 0;
int numInstancesFound = 0;
Map<String, List<NodeInfo>> availableHostMap;
readLock.lock();
try {
// maintain insertion order (needed for Next slot in locality miss)
availableHostMap = new LinkedHashMap<>(instanceToNodeMap.size());
Collection<LlapServiceInstance> instances = consistentSplits ? // might also include Inactive instances
activeInstances.getAllInstancesOrdered(true) : // if consistent splits are NOT used we don't need the ordering as there will be no cache benefit anyways
activeInstances.getAll();
boolean foundSlot = false;
for (LlapServiceInstance inst : instances) {
NodeInfo nodeInfo = instanceToNodeMap.get(inst.getWorkerIdentity());
if (nodeInfo != null) {
List<NodeInfo> hostList = availableHostMap.get(nodeInfo.getHost());
if (hostList == null) {
hostList = new ArrayList<>();
availableHostMap.put(nodeInfo.getHost(), hostList);
}
if (!(inst instanceof InactiveServiceInstance)) {
Resource r = inst.getResource();
memory += r.getMemory();
vcores += r.getVirtualCores();
numInstancesFound++;
// Hosts, however, exist even for nodes that do not currently have resources
if (nodeInfo.canAcceptTask()) {
foundSlot = true;
hostList.add(nodeInfo);
}
}
} else {
LOG.warn("Null NodeInfo when attempting to get available resources for " + inst.getWorkerIdentity());
}
}
// set it false here to bail out early when we know there are no resources available.
if (!foundSlot) {
isClusterCapacityFull.set(true);
}
} finally {
readLock.unlock();
}
if (LOG.isDebugEnabled()) {
LOG.debug("Available resources: numInstancesFound={}, totalMem={}, totalVcores={} availableHosts: {}", numInstancesFound, memory, vcores, availableHostMap.size());
}
return new ImmutablePair<>(Resource.newInstance(memory, vcores), availableHostMap);
}
use of org.apache.hadoop.hive.llap.registry.impl.InactiveServiceInstance in project hive by apache.
the class TestUtils method testGetSplitLocationProvider.
@Test
public void testGetSplitLocationProvider() throws IOException, URISyntaxException {
// Create test LlapServiceInstances to make sure that we can handle all of the instance types
List<LlapServiceInstance> instances = new ArrayList<>(3);
// Set 1 inactive instance to make sure that this does not cause problem for us
LlapServiceInstance inactive = new InactiveServiceInstance(INACTIVE);
instances.add(inactive);
HiveConf conf = new HiveConf();
conf.set(HiveConf.ConfVars.HIVE_ZOOKEEPER_QUORUM.varname, "localhost");
LlapZookeeperRegistryImpl dynRegistry = new LlapZookeeperRegistryImpl("dyn", conf);
Endpoint rpcEndpoint = RegistryTypeUtils.ipcEndpoint("llap", new InetSocketAddress(ACTIVE, 4000));
Endpoint shuffle = RegistryTypeUtils.ipcEndpoint("shuffle", new InetSocketAddress(ACTIVE, 4000));
Endpoint mng = RegistryTypeUtils.ipcEndpoint("llapmng", new InetSocketAddress(ACTIVE, 4000));
Endpoint outputFormat = RegistryTypeUtils.ipcEndpoint("llapoutputformat", new InetSocketAddress(ACTIVE, 4000));
Endpoint services = RegistryTypeUtils.webEndpoint("services", new URI(ACTIVE + ":4000"));
// Set 1 active instance
ServiceRecord enabledSrv = new ServiceRecord();
enabledSrv.addInternalEndpoint(rpcEndpoint);
enabledSrv.addInternalEndpoint(shuffle);
enabledSrv.addInternalEndpoint(mng);
enabledSrv.addInternalEndpoint(outputFormat);
enabledSrv.addExternalEndpoint(services);
enabledSrv.set(LlapRegistryService.LLAP_DAEMON_NUM_ENABLED_EXECUTORS, 10);
enabledSrv.set(HiveConf.ConfVars.LLAP_DAEMON_MEMORY_PER_INSTANCE_MB.varname, 100);
LlapZookeeperRegistryImpl.DynamicServiceInstance dynamic = dynRegistry.new DynamicServiceInstance(enabledSrv);
instances.add(dynamic);
// Set 1 instance with 0 executors
ServiceRecord disabledSrv = new ServiceRecord(enabledSrv);
disabledSrv.set(LlapRegistryService.LLAP_DAEMON_NUM_ENABLED_EXECUTORS, 0);
LlapZookeeperRegistryImpl.DynamicServiceInstance disabled = dynRegistry.new DynamicServiceInstance(disabledSrv);
disabled.setHost(DISABLED);
instances.add(disabled);
when(mockRegistry.getInstances()).thenReturn(mockInstanceSet);
when(mockInstanceSet.getAllInstancesOrdered(anyBoolean())).thenReturn(instances);
SplitLocationProvider provider = Utils.getCustomSplitLocationProvider(mockRegistry, LOG);
assertLocations((HostAffinitySplitLocationProvider) provider, new String[] { ACTIVE });
// Check if fixed stuff is working as well
LlapFixedRegistryImpl fixRegistry = new LlapFixedRegistryImpl("llap", new HiveConf());
// Instance for testing fixed registry instances
LlapServiceInstance fixed = fixRegistry.new FixedServiceInstance(FIXED);
instances.remove(dynamic);
instances.add(fixed);
provider = Utils.getCustomSplitLocationProvider(mockRegistry, LOG);
assertLocations((HostAffinitySplitLocationProvider) provider, new String[] { FIXED });
}
Aggregations