use of org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.FragmentRuntimeInfo in project hive by apache.
the class LlapTaskCommunicator method registerRunningTaskAttempt.
@Override
public void registerRunningTaskAttempt(final ContainerId containerId, final TaskSpec taskSpec, Map<String, LocalResource> additionalResources, Credentials credentials, boolean credentialsChanged, int priority) {
super.registerRunningTaskAttempt(containerId, taskSpec, additionalResources, credentials, credentialsChanged, priority);
int dagId = taskSpec.getTaskAttemptID().getTaskID().getVertexID().getDAGId().getId();
if (currentQueryIdentifierProto == null || (dagId != currentQueryIdentifierProto.getDagIndex())) {
// TODO HiveQueryId extraction by parsing the Processor payload is ugly. This can be improved
// once TEZ-2672 is fixed.
String hiveQueryId;
try {
hiveQueryId = extractQueryId(taskSpec);
} catch (IOException e) {
throw new RuntimeException("Failed to extract query id from task spec: " + taskSpec, e);
}
Preconditions.checkNotNull(hiveQueryId, "Unexpected null query id");
resetCurrentDag(dagId, hiveQueryId);
}
ContainerInfo containerInfo = getContainerInfo(containerId);
String host;
int port;
if (containerInfo != null) {
synchronized (containerInfo) {
host = containerInfo.host;
port = containerInfo.port;
}
} else {
// TODO Handle this properly
throw new RuntimeException("ContainerInfo not found for container: " + containerId + ", while trying to launch task: " + taskSpec.getTaskAttemptID());
}
LlapNodeId nodeId = LlapNodeId.getInstance(host, port);
registerKnownNode(nodeId);
entityTracker.registerTaskAttempt(containerId, taskSpec.getTaskAttemptID(), host, port);
nodesForQuery.add(nodeId);
sourceStateTracker.registerTaskForStateUpdates(host, port, taskSpec.getInputs());
FragmentRuntimeInfo fragmentRuntimeInfo;
try {
fragmentRuntimeInfo = sourceStateTracker.getFragmentRuntimeInfo(taskSpec.getVertexName(), taskSpec.getTaskAttemptID().getTaskID().getId(), priority);
} catch (Exception e) {
LOG.error("Error while trying to get runtimeFragmentInfo for fragmentId={}, containerId={}, currentQI={}, currentQueryId={}", taskSpec.getTaskAttemptID(), containerId, currentQueryIdentifierProto, currentHiveQueryId, e);
if (e instanceof RuntimeException) {
throw (RuntimeException) e;
} else {
throw new RuntimeException(e);
}
}
SubmitWorkRequestProto requestProto;
try {
requestProto = constructSubmitWorkRequest(containerId, taskSpec, fragmentRuntimeInfo, currentHiveQueryId);
} catch (IOException e) {
throw new RuntimeException("Failed to construct request", e);
}
// Have to register this up front right now. Otherwise, it's possible for the task to start
// sending out status/DONE/KILLED/FAILED messages before TAImpl knows how to handle them.
getContext().taskStartedRemotely(taskSpec.getTaskAttemptID(), containerId);
communicator.sendSubmitWork(requestProto, host, port, new LlapProtocolClientProxy.ExecuteRequestCallback<SubmitWorkResponseProto>() {
@Override
public void setResponse(SubmitWorkResponseProto response) {
if (response.hasSubmissionState()) {
LlapDaemonProtocolProtos.SubmissionStateProto ss = response.getSubmissionState();
if (ss.equals(LlapDaemonProtocolProtos.SubmissionStateProto.REJECTED)) {
LOG.info("Unable to run task: " + taskSpec.getTaskAttemptID() + " on containerId: " + containerId + ", Service Busy");
getContext().taskKilled(taskSpec.getTaskAttemptID(), TaskAttemptEndReason.EXECUTOR_BUSY, "Service Busy");
return;
}
} else {
// This should never happen as server always returns a valid status on success
throw new RuntimeException("SubmissionState in response is expected!");
}
if (response.hasUniqueNodeId()) {
entityTracker.registerTaskSubmittedToNode(taskSpec.getTaskAttemptID(), response.getUniqueNodeId());
}
LOG.info("Successfully launched task: " + taskSpec.getTaskAttemptID());
}
@Override
public void indicateError(Throwable t) {
Throwable originalError = t;
if (t instanceof ServiceException) {
ServiceException se = (ServiceException) t;
t = se.getCause();
}
if (t instanceof RemoteException) {
// All others from the remote service cause the task to FAIL.
LOG.info("Failed to run task: " + taskSpec.getTaskAttemptID() + " on containerId: " + containerId, t);
processSendError(originalError);
getContext().taskFailed(taskSpec.getTaskAttemptID(), TaskFailureType.NON_FATAL, TaskAttemptEndReason.OTHER, t.toString());
} else {
// Exception from the RPC layer - communication failure, consider as KILLED / service down.
if (t instanceof IOException) {
LOG.info("Unable to run task: " + taskSpec.getTaskAttemptID() + " on containerId: " + containerId + ", Communication Error");
processSendError(originalError);
getContext().taskKilled(taskSpec.getTaskAttemptID(), TaskAttemptEndReason.COMMUNICATION_ERROR, "Communication Error");
} else {
// Anything else is a FAIL.
LOG.info("Failed to run task: " + taskSpec.getTaskAttemptID() + " on containerId: " + containerId, t);
processSendError(originalError);
getContext().taskFailed(taskSpec.getTaskAttemptID(), TaskFailureType.NON_FATAL, TaskAttemptEndReason.OTHER, t.getMessage());
}
}
}
});
}
use of org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.FragmentRuntimeInfo in project hive by apache.
the class SourceStateTracker method getFragmentRuntimeInfo.
// Assumes serialized DAGs within an AM, and a reset of structures after each DAG completes.
/**
* Constructs FragmentRuntimeInfo for scheduling within LLAP daemons.
* Also caches state based on state updates.
* @param vertexName
* @param fragmentNumber
* @param priority
* @return
*/
public synchronized FragmentRuntimeInfo getFragmentRuntimeInfo(String vertexName, int fragmentNumber, int priority) {
FragmentRuntimeInfo.Builder builder = FragmentRuntimeInfo.newBuilder();
maybeRegisterForVertexUpdates(vertexName);
MutableInt totalTaskCount = new MutableInt(0);
MutableInt completedTaskCount = new MutableInt(0);
computeUpstreamTaskCounts(completedTaskCount, totalTaskCount, vertexName);
builder.setNumSelfAndUpstreamCompletedTasks(completedTaskCount.intValue());
builder.setNumSelfAndUpstreamTasks(totalTaskCount.intValue());
builder.setDagStartTime(taskCommunicatorContext.getDagStartTime());
builder.setWithinDagPriority(priority);
builder.setFirstAttemptStartTime(taskCommunicatorContext.getFirstAttemptStartTime(vertexName, fragmentNumber));
builder.setCurrentAttemptStartTime(System.currentTimeMillis());
return builder.build();
}
use of org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.FragmentRuntimeInfo in project hive by apache.
the class ContainerRunnerImpl method stringifySubmitRequest.
public static String stringifySubmitRequest(SubmitWorkRequestProto request, SignableVertexSpec vertex) {
StringBuilder sb = new StringBuilder();
sb.append("am_details=").append(request.getAmHost()).append(":").append(request.getAmPort());
sb.append(", taskInfo=").append(" fragment ").append(request.getFragmentNumber()).append(" attempt ").append(request.getAttemptNumber());
sb.append(", user=").append(vertex.getUser());
sb.append(", queryId=").append(vertex.getHiveQueryId());
sb.append(", appIdString=").append(vertex.getQueryIdentifier().getApplicationIdString());
sb.append(", appAttemptNum=").append(vertex.getQueryIdentifier().getAppAttemptNumber());
sb.append(", containerIdString=").append(request.getContainerIdString());
sb.append(", dagName=").append(vertex.getDagName());
sb.append(", vertexName=").append(vertex.getVertexName());
sb.append(", processor=").append(vertex.getProcessorDescriptor().getClassName());
sb.append(", numInputs=").append(vertex.getInputSpecsCount());
sb.append(", numOutputs=").append(vertex.getOutputSpecsCount());
sb.append(", numGroupedInputs=").append(vertex.getGroupedInputSpecsCount());
sb.append(", Inputs={");
if (vertex.getInputSpecsCount() > 0) {
for (IOSpecProto ioSpec : vertex.getInputSpecsList()) {
sb.append("{").append(ioSpec.getConnectedVertexName()).append(",").append(ioSpec.getIoDescriptor().getClassName()).append(",").append(ioSpec.getPhysicalEdgeCount()).append("}");
}
}
sb.append("}");
sb.append(", Outputs={");
if (vertex.getOutputSpecsCount() > 0) {
for (IOSpecProto ioSpec : vertex.getOutputSpecsList()) {
sb.append("{").append(ioSpec.getConnectedVertexName()).append(",").append(ioSpec.getIoDescriptor().getClassName()).append(",").append(ioSpec.getPhysicalEdgeCount()).append("}");
}
}
sb.append("}");
sb.append(", GroupedInputs={");
if (vertex.getGroupedInputSpecsCount() > 0) {
for (GroupInputSpecProto group : vertex.getGroupedInputSpecsList()) {
sb.append("{").append("groupName=").append(group.getGroupName()).append(", elements=").append(group.getGroupVerticesList()).append("}");
sb.append(group.getGroupVerticesList());
}
}
sb.append("}");
FragmentRuntimeInfo fragmentRuntimeInfo = request.getFragmentRuntimeInfo();
sb.append(", FragmentRuntimeInfo={");
sb.append("taskCount=").append(fragmentRuntimeInfo.getNumSelfAndUpstreamTasks());
sb.append(", completedTaskCount=").append(fragmentRuntimeInfo.getNumSelfAndUpstreamCompletedTasks());
sb.append(", dagStartTime=").append(fragmentRuntimeInfo.getDagStartTime());
sb.append(", firstAttemptStartTime=").append(fragmentRuntimeInfo.getFirstAttemptStartTime());
sb.append(", currentAttemptStartTime=").append(fragmentRuntimeInfo.getCurrentAttemptStartTime());
sb.append("}");
return sb.toString();
}
Aggregations