use of org.apache.hadoop.hive.llap.registry.ServiceInstance in project hive by apache.
the class LlapTaskSchedulerService method selectHost.
/**
* @param request the list of preferred hosts. null implies any host
* @return
*/
private SelectHostResult selectHost(TaskInfo request) {
String[] requestedHosts = request.requestedHosts;
String requestedHostsDebugStr = Arrays.toString(requestedHosts);
if (LOG.isDebugEnabled()) {
LOG.debug("selectingHost for task={} on hosts={}", request.task, requestedHostsDebugStr);
}
long schedulerAttemptTime = clock.getTime();
// Read-lock. Not updating any stats at the moment.
readLock.lock();
try {
boolean shouldDelayForLocality = request.shouldDelayForLocality(schedulerAttemptTime);
LOG.debug("ShouldDelayForLocality={} for task={} on hosts={}", shouldDelayForLocality, request.task, requestedHostsDebugStr);
if (requestedHosts != null && requestedHosts.length > 0) {
int prefHostCount = -1;
boolean requestedHostsWillBecomeAvailable = false;
for (String host : requestedHosts) {
prefHostCount++;
// Pick the first host always. Weak attempt at cache affinity.
Set<ServiceInstance> instances = activeInstances.getByHost(host);
if (!instances.isEmpty()) {
for (ServiceInstance inst : instances) {
NodeInfo nodeInfo = instanceToNodeMap.get(inst.getWorkerIdentity());
if (nodeInfo != null) {
if (nodeInfo.canAcceptTask()) {
// Successfully scheduled.
LOG.info("Assigning {} when looking for {}." + " local=true FirstRequestedHost={}, #prefLocations={}", nodeInfo.toShortString(), host, (prefHostCount == 0), requestedHosts.length);
return new SelectHostResult(nodeInfo);
} else {
// The node cannot accept a task at the moment.
if (shouldDelayForLocality) {
// Perform some checks on whether the node will become available or not.
if (request.shouldForceLocality()) {
requestedHostsWillBecomeAvailable = true;
} else {
if (nodeInfo.getEnableTime() > request.getLocalityDelayTimeout() && nodeInfo.isDisabled() && nodeInfo.hadCommFailure()) {
LOG.debug("Host={} will not become available within requested timeout", nodeInfo);
// This node will likely be activated after the task timeout expires.
} else {
// Worth waiting for the timeout.
requestedHostsWillBecomeAvailable = true;
}
}
}
}
} else {
LOG.warn("Null NodeInfo when attempting to get host with worker {}, and host {}", inst, host);
// Leave requestedHostWillBecomeAvailable as is. If some other host is found - delay,
// else ends up allocating to a random host immediately.
}
}
}
}
// Check if forcing the location is required.
if (shouldDelayForLocality) {
if (requestedHostsWillBecomeAvailable) {
if (LOG.isDebugEnabled()) {
LOG.debug("Delaying local allocation for [" + request.task + "] when trying to allocate on [" + requestedHostsDebugStr + "]" + ". ScheduleAttemptTime=" + schedulerAttemptTime + ", taskDelayTimeout=" + request.getLocalityDelayTimeout());
}
return SELECT_HOST_RESULT_DELAYED_LOCALITY;
} else {
if (LOG.isDebugEnabled()) {
LOG.debug("Skipping local allocation for [" + request.task + "] when trying to allocate on [" + requestedHostsDebugStr + "] since none of these hosts are part of the known list");
}
}
}
}
/* fall through - miss in locality or no locality-requested */
Collection<ServiceInstance> instances = activeInstances.getAllInstancesOrdered(true);
List<NodeInfo> allNodes = new ArrayList<>(instances.size());
List<NodeInfo> activeNodesWithFreeSlots = new ArrayList<>();
for (ServiceInstance inst : instances) {
if (inst instanceof InactiveServiceInstance) {
allNodes.add(null);
} else {
NodeInfo nodeInfo = instanceToNodeMap.get(inst.getWorkerIdentity());
if (nodeInfo == null) {
allNodes.add(null);
} else {
allNodes.add(nodeInfo);
if (nodeInfo.canAcceptTask()) {
activeNodesWithFreeSlots.add(nodeInfo);
}
}
}
}
if (allNodes.isEmpty()) {
return SELECT_HOST_RESULT_DELAYED_RESOURCES;
}
// no locality-requested, randomly pick a node containing free slots
if (requestedHosts == null || requestedHosts.length == 0) {
if (LOG.isDebugEnabled()) {
LOG.debug("No-locality requested. Selecting a random host for task={}", request.task);
}
return randomSelection(activeNodesWithFreeSlots);
}
// miss in locality request, try picking consistent location with fallback to random selection
final String firstRequestedHost = requestedHosts[0];
int requestedHostIdx = -1;
for (int i = 0; i < allNodes.size(); i++) {
NodeInfo nodeInfo = allNodes.get(i);
if (nodeInfo != null) {
if (nodeInfo.getHost().equals(firstRequestedHost)) {
requestedHostIdx = i;
break;
}
}
}
// TODO: At this point we don't know the slot number of the requested host, so can't rollover to next available
if (requestedHostIdx == -1) {
if (LOG.isDebugEnabled()) {
LOG.debug("Requested node [{}] in consistent order does not exist. Falling back to random selection for " + "request {}", firstRequestedHost, request);
}
return randomSelection(activeNodesWithFreeSlots);
}
// requested host is still alive but cannot accept task, pick the next available host in consistent order
for (int i = 0; i < allNodes.size(); i++) {
NodeInfo nodeInfo = allNodes.get((i + requestedHostIdx + 1) % allNodes.size());
// next node in consistent order died or does not have free slots, rollover to next
if (nodeInfo == null || !nodeInfo.canAcceptTask()) {
continue;
} else {
if (LOG.isDebugEnabled()) {
LOG.debug("Assigning {} in consistent order when looking for first requested host, from #hosts={}," + " requestedHosts={}", nodeInfo.toShortString(), allNodes.size(), ((requestedHosts == null || requestedHosts.length == 0) ? "null" : requestedHostsDebugStr));
}
return new SelectHostResult(nodeInfo);
}
}
return SELECT_HOST_RESULT_DELAYED_RESOURCES;
} finally {
readLock.unlock();
}
}
use of org.apache.hadoop.hive.llap.registry.ServiceInstance in project hive by apache.
the class LlapTaskSchedulerService method getAvailableResources.
/**
* The difference between this and getTotalResources() is that this only gives currently free
* resource instances, while the other lists all the instances that may become available in a
* while.
*/
@Override
public Resource getAvailableResources() {
// need a state store eventually for current state & measure backoffs
int memory = 0;
int vcores = 0;
readLock.lock();
try {
int numInstancesFound = 0;
for (ServiceInstance inst : activeInstances.getAll()) {
NodeInfo nodeInfo = instanceToNodeMap.get(inst.getWorkerIdentity());
if (nodeInfo != null && !nodeInfo.isDisabled()) {
Resource r = inst.getResource();
memory += r.getMemory();
vcores += r.getVirtualCores();
numInstancesFound++;
}
}
if (LOG.isDebugEnabled()) {
LOG.debug("GetAvailableResources: numInstancesFound={}, totalMem={}, totalVcores={}", numInstancesFound, memory, vcores);
}
} finally {
readLock.unlock();
}
return Resource.newInstance(memory, vcores);
}
use of org.apache.hadoop.hive.llap.registry.ServiceInstance in project hive by apache.
the class LlapBaseInputFormat method getServiceInstanceForHost.
private ServiceInstance getServiceInstanceForHost(LlapRegistryService registryService, String host) throws IOException {
InetAddress address = InetAddress.getByName(host);
ServiceInstanceSet instanceSet = registryService.getInstances();
ServiceInstance serviceInstance = null;
// The name used in the service registry may not match the host name we're using.
// Try hostname/canonical hostname/host address
String name = address.getHostName();
LOG.info("Searching service instance by hostname " + name);
serviceInstance = selectServiceInstance(instanceSet.getByHost(name));
if (serviceInstance != null) {
return serviceInstance;
}
name = address.getCanonicalHostName();
LOG.info("Searching service instance by canonical hostname " + name);
serviceInstance = selectServiceInstance(instanceSet.getByHost(name));
if (serviceInstance != null) {
return serviceInstance;
}
name = address.getHostAddress();
LOG.info("Searching service instance by address " + name);
serviceInstance = selectServiceInstance(instanceSet.getByHost(name));
if (serviceInstance != null) {
return serviceInstance;
}
return serviceInstance;
}
use of org.apache.hadoop.hive.llap.registry.ServiceInstance in project hive by apache.
the class LlapBaseInputFormat method getRecordReader.
@SuppressWarnings("unchecked")
@Override
public RecordReader<NullWritable, V> getRecordReader(InputSplit split, JobConf job, Reporter reporter) throws IOException {
LlapInputSplit llapSplit = (LlapInputSplit) split;
// Set conf to use LLAP user rather than current user for LLAP Zk registry.
HiveConf.setVar(job, HiveConf.ConfVars.LLAP_ZK_REGISTRY_USER, llapSplit.getLlapUser());
SubmitWorkInfo submitWorkInfo = SubmitWorkInfo.fromBytes(llapSplit.getPlanBytes());
ServiceInstance serviceInstance = getServiceInstance(job, llapSplit);
String host = serviceInstance.getHost();
int llapSubmitPort = serviceInstance.getRpcPort();
LOG.info("Found service instance for host " + host + " with rpc port " + llapSubmitPort + " and outputformat port " + serviceInstance.getOutputFormatPort());
byte[] llapTokenBytes = llapSplit.getTokenBytes();
Token<LlapTokenIdentifier> llapToken = null;
if (llapTokenBytes != null) {
DataInputBuffer in = new DataInputBuffer();
in.reset(llapTokenBytes, 0, llapTokenBytes.length);
llapToken = new Token<LlapTokenIdentifier>();
llapToken.readFields(in);
}
LlapRecordReaderTaskUmbilicalExternalResponder umbilicalResponder = new LlapRecordReaderTaskUmbilicalExternalResponder();
LlapTaskUmbilicalExternalClient llapClient = new LlapTaskUmbilicalExternalClient(job, submitWorkInfo.getTokenIdentifier(), submitWorkInfo.getToken(), umbilicalResponder, llapToken);
llapClient.init(job);
llapClient.start();
int attemptNum = 0;
// Use task attempt number from conf if provided
TaskAttemptID taskAttemptId = TaskAttemptID.forName(job.get(MRJobConfig.TASK_ATTEMPT_ID));
if (taskAttemptId != null) {
attemptNum = taskAttemptId.getId();
if (LOG.isDebugEnabled()) {
LOG.debug("Setting attempt number to " + attemptNum + " from task attempt ID in conf: " + job.get(MRJobConfig.TASK_ATTEMPT_ID));
}
}
SubmitWorkRequestProto request = constructSubmitWorkRequestProto(submitWorkInfo, llapSplit.getSplitNum(), attemptNum, llapClient.getAddress(), submitWorkInfo.getToken(), llapSplit.getFragmentBytes(), llapSplit.getFragmentBytesSignature(), job);
llapClient.submitWork(request, host, llapSubmitPort);
Socket socket = new Socket(host, serviceInstance.getOutputFormatPort());
LOG.debug("Socket connected");
SignableVertexSpec vertex = SignableVertexSpec.parseFrom(submitWorkInfo.getVertexBinary());
String fragmentId = Converters.createTaskAttemptId(vertex.getQueryIdentifier(), vertex.getVertexIndex(), request.getFragmentNumber(), request.getAttemptNumber()).toString();
OutputStream socketStream = socket.getOutputStream();
LlapOutputSocketInitMessage.Builder builder = LlapOutputSocketInitMessage.newBuilder().setFragmentId(fragmentId);
if (llapSplit.getTokenBytes() != null) {
builder.setToken(ByteString.copyFrom(llapSplit.getTokenBytes()));
}
builder.build().writeDelimitedTo(socketStream);
socketStream.flush();
LOG.info("Registered id: " + fragmentId);
@SuppressWarnings("rawtypes") LlapBaseRecordReader recordReader = new LlapBaseRecordReader(socket.getInputStream(), llapSplit.getSchema(), Text.class, job, llapClient, (java.io.Closeable) socket);
umbilicalResponder.setRecordReader(recordReader);
return recordReader;
}
use of org.apache.hadoop.hive.llap.registry.ServiceInstance in project hive by apache.
the class LlapBaseInputFormat method getServiceInstance.
private ServiceInstance getServiceInstance(JobConf job, LlapInputSplit llapSplit) throws IOException {
LlapRegistryService registryService = LlapRegistryService.getClient(job);
String host = llapSplit.getLocations()[0];
ServiceInstance serviceInstance = getServiceInstanceForHost(registryService, host);
if (serviceInstance == null) {
throw new IOException("No service instances found for " + host + " in registry");
}
return serviceInstance;
}
Aggregations