use of org.apache.hadoop.hive.llap.LlapNodeId in project hive by apache.
the class LlapTaskCommunicator method registerRunningTaskAttempt.
@Override
public void registerRunningTaskAttempt(final ContainerId containerId, final TaskSpec taskSpec, Map<String, LocalResource> additionalResources, Credentials credentials, boolean credentialsChanged, int priority) {
super.registerRunningTaskAttempt(containerId, taskSpec, additionalResources, credentials, credentialsChanged, priority);
int dagId = taskSpec.getTaskAttemptID().getTaskID().getVertexID().getDAGId().getId();
if (currentQueryIdentifierProto == null || (dagId != currentQueryIdentifierProto.getDagIndex())) {
// TODO HiveQueryId extraction by parsing the Processor payload is ugly. This can be improved
// once TEZ-2672 is fixed.
String hiveQueryId;
try {
hiveQueryId = extractQueryId(taskSpec);
} catch (IOException e) {
throw new RuntimeException("Failed to extract query id from task spec: " + taskSpec, e);
}
Preconditions.checkNotNull(hiveQueryId, "Unexpected null query id");
resetCurrentDag(dagId, hiveQueryId);
}
ContainerInfo containerInfo = getContainerInfo(containerId);
String host;
int port;
if (containerInfo != null) {
synchronized (containerInfo) {
host = containerInfo.host;
port = containerInfo.port;
}
} else {
// TODO Handle this properly
throw new RuntimeException("ContainerInfo not found for container: " + containerId + ", while trying to launch task: " + taskSpec.getTaskAttemptID());
}
LlapNodeId nodeId = LlapNodeId.getInstance(host, port);
registerKnownNode(nodeId);
entityTracker.registerTaskAttempt(containerId, taskSpec.getTaskAttemptID(), host, port);
nodesForQuery.add(nodeId);
sourceStateTracker.registerTaskForStateUpdates(host, port, taskSpec.getInputs());
FragmentRuntimeInfo fragmentRuntimeInfo;
try {
fragmentRuntimeInfo = sourceStateTracker.getFragmentRuntimeInfo(taskSpec.getVertexName(), taskSpec.getTaskAttemptID().getTaskID().getId(), priority);
} catch (Exception e) {
LOG.error("Error while trying to get runtimeFragmentInfo for fragmentId={}, containerId={}, currentQI={}, currentQueryId={}", taskSpec.getTaskAttemptID(), containerId, currentQueryIdentifierProto, currentHiveQueryId, e);
if (e instanceof RuntimeException) {
throw (RuntimeException) e;
} else {
throw new RuntimeException(e);
}
}
SubmitWorkRequestProto requestProto;
try {
requestProto = constructSubmitWorkRequest(containerId, taskSpec, fragmentRuntimeInfo, currentHiveQueryId);
} catch (IOException e) {
throw new RuntimeException("Failed to construct request", e);
}
// Have to register this up front right now. Otherwise, it's possible for the task to start
// sending out status/DONE/KILLED/FAILED messages before TAImpl knows how to handle them.
getContext().taskStartedRemotely(taskSpec.getTaskAttemptID(), containerId);
communicator.sendSubmitWork(requestProto, host, port, new LlapProtocolClientProxy.ExecuteRequestCallback<SubmitWorkResponseProto>() {
@Override
public void setResponse(SubmitWorkResponseProto response) {
if (response.hasSubmissionState()) {
LlapDaemonProtocolProtos.SubmissionStateProto ss = response.getSubmissionState();
if (ss.equals(LlapDaemonProtocolProtos.SubmissionStateProto.REJECTED)) {
LOG.info("Unable to run task: " + taskSpec.getTaskAttemptID() + " on containerId: " + containerId + ", Service Busy");
getContext().taskKilled(taskSpec.getTaskAttemptID(), TaskAttemptEndReason.EXECUTOR_BUSY, "Service Busy");
return;
}
} else {
// This should never happen as server always returns a valid status on success
throw new RuntimeException("SubmissionState in response is expected!");
}
if (response.hasUniqueNodeId()) {
entityTracker.registerTaskSubmittedToNode(taskSpec.getTaskAttemptID(), response.getUniqueNodeId());
}
LOG.info("Successfully launched task: " + taskSpec.getTaskAttemptID());
}
@Override
public void indicateError(Throwable t) {
Throwable originalError = t;
if (t instanceof ServiceException) {
ServiceException se = (ServiceException) t;
t = se.getCause();
}
if (t instanceof RemoteException) {
// All others from the remote service cause the task to FAIL.
LOG.info("Failed to run task: " + taskSpec.getTaskAttemptID() + " on containerId: " + containerId, t);
processSendError(originalError);
getContext().taskFailed(taskSpec.getTaskAttemptID(), TaskFailureType.NON_FATAL, TaskAttemptEndReason.OTHER, t.toString());
} else {
// Exception from the RPC layer - communication failure, consider as KILLED / service down.
if (t instanceof IOException) {
LOG.info("Unable to run task: " + taskSpec.getTaskAttemptID() + " on containerId: " + containerId + ", Communication Error");
processSendError(originalError);
getContext().taskKilled(taskSpec.getTaskAttemptID(), TaskAttemptEndReason.COMMUNICATION_ERROR, "Communication Error");
} else {
// Anything else is a FAIL.
LOG.info("Failed to run task: " + taskSpec.getTaskAttemptID() + " on containerId: " + containerId, t);
processSendError(originalError);
getContext().taskFailed(taskSpec.getTaskAttemptID(), TaskFailureType.NON_FATAL, TaskAttemptEndReason.OTHER, t.getMessage());
}
}
}
});
}
use of org.apache.hadoop.hive.llap.LlapNodeId in project hive by apache.
the class LlapTaskCommunicator method dagComplete.
@Override
public void dagComplete(final int dagIdentifier) {
QueryIdentifierProto queryIdentifierProto = constructQueryIdentifierProto(dagIdentifier);
QueryCompleteRequestProto request = QueryCompleteRequestProto.newBuilder().setQueryIdentifier(queryIdentifierProto).setDeleteDelay(deleteDelayOnDagComplete).build();
for (final LlapNodeId llapNodeId : nodesForQuery) {
LOG.info("Sending dagComplete message for {}, to {}", dagIdentifier, llapNodeId);
communicator.sendQueryComplete(request, llapNodeId.getHostname(), llapNodeId.getPort(), new LlapProtocolClientProxy.ExecuteRequestCallback<LlapDaemonProtocolProtos.QueryCompleteResponseProto>() {
@Override
public void setResponse(LlapDaemonProtocolProtos.QueryCompleteResponseProto response) {
}
@Override
public void indicateError(Throwable t) {
LOG.warn("Failed to indicate dag complete dagId={} to node {}", dagIdentifier, llapNodeId);
processSendError(t);
}
});
}
nodesForQuery.clear();
// TODO Ideally move some of the other cleanup code from resetCurrentDag over here
}
use of org.apache.hadoop.hive.llap.LlapNodeId in project hive by apache.
the class SourceStateTracker method sourceStateUpdated.
/**
* Handled notifications on state updates for sources
* @param sourceName
* @param sourceState
*/
public synchronized void sourceStateUpdated(String sourceName, VertexState sourceState) {
SourceInfo sourceInfo = getSourceInfo(sourceName);
// Update source info if the state is SUCCEEDED
if (sourceState == VertexState.SUCCEEDED) {
sourceInfo.numCompletedTasks = getVertexCompletedTaskCount(sourceName);
sourceInfo.numTasks = getVertexTotalTaskCount(sourceName);
}
sourceInfo.lastKnownState = sourceState;
// to a single node may fail.
for (LlapNodeId nodeId : sourceInfo.getInterestedNodes()) {
NodeInfo nodeInfo = nodeInfoMap.get(nodeId);
VertexState lastStateForNode = nodeInfo.getLastKnownStateForSource(sourceName);
// Send only if the state has changed.
if (lastStateForNode != sourceState) {
nodeInfo.setLastKnownStateForSource(sourceName, sourceState);
sendStateUpdateToNode(nodeId, sourceName, sourceState);
}
}
}
use of org.apache.hadoop.hive.llap.LlapNodeId in project hive by apache.
the class LlapProtocolClientProxy method sendSubmitWork.
public void sendSubmitWork(SubmitWorkRequestProto request, String host, int port, final ExecuteRequestCallback<SubmitWorkResponseProto> callback) {
LlapNodeId nodeId = LlapNodeId.getInstance(host, port);
requestManager.queueRequest(new SubmitWorkCallable(nodeId, request, callback));
}
use of org.apache.hadoop.hive.llap.LlapNodeId in project hive by apache.
the class TestLlapDaemonProtocolClientProxy method testMultipleNodes.
@Test(timeout = 5000)
public void testMultipleNodes() {
RequestManagerForTest requestManager = new RequestManagerForTest(1);
LlapNodeId nodeId1 = LlapNodeId.getInstance("host1", 1025);
LlapNodeId nodeId2 = LlapNodeId.getInstance("host2", 1025);
Message mockMessage = mock(Message.class);
LlapProtocolClientProxy.ExecuteRequestCallback mockExecuteRequestCallback = mock(LlapProtocolClientProxy.ExecuteRequestCallback.class);
// Request two messages
requestManager.queueRequest(new CallableRequestForTest(nodeId1, mockMessage, mockExecuteRequestCallback));
requestManager.queueRequest(new CallableRequestForTest(nodeId2, mockMessage, mockExecuteRequestCallback));
// Should go through in a single process call
requestManager.process();
assertEquals(2, requestManager.numSubmissionsCounters);
assertNotNull(requestManager.numInvocationsPerNode.get(nodeId1));
assertNotNull(requestManager.numInvocationsPerNode.get(nodeId2));
Assert.assertEquals(1, requestManager.numInvocationsPerNode.get(nodeId1).getValue().intValue());
Assert.assertEquals(1, requestManager.numInvocationsPerNode.get(nodeId2).getValue().intValue());
assertEquals(0, requestManager.currentLoopSkippedRequests.size());
assertEquals(0, requestManager.currentLoopSkippedRequests.size());
assertEquals(0, requestManager.currentLoopDisabledNodes.size());
}
Aggregations