Search in sources :

Example 1 with FragmentRuntimeInfo

use of org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.FragmentRuntimeInfo in project hive by apache.

the class LlapTaskCommunicator method registerRunningTaskAttempt.

@Override
public void registerRunningTaskAttempt(final ContainerId containerId, final TaskSpec taskSpec, Map<String, LocalResource> additionalResources, Credentials credentials, boolean credentialsChanged, int priority) {
    super.registerRunningTaskAttempt(containerId, taskSpec, additionalResources, credentials, credentialsChanged, priority);
    int dagId = taskSpec.getTaskAttemptID().getTaskID().getVertexID().getDAGId().getId();
    if (currentQueryIdentifierProto == null || (dagId != currentQueryIdentifierProto.getDagIndex())) {
        // TODO HiveQueryId extraction by parsing the Processor payload is ugly. This can be improved
        // once TEZ-2672 is fixed.
        String hiveQueryId;
        try {
            hiveQueryId = extractQueryId(taskSpec);
        } catch (IOException e) {
            throw new RuntimeException("Failed to extract query id from task spec: " + taskSpec, e);
        }
        Preconditions.checkNotNull(hiveQueryId, "Unexpected null query id");
        resetCurrentDag(dagId, hiveQueryId);
    }
    ContainerInfo containerInfo = getContainerInfo(containerId);
    String host;
    int port;
    if (containerInfo != null) {
        synchronized (containerInfo) {
            host = containerInfo.host;
            port = containerInfo.port;
        }
    } else {
        // TODO Handle this properly
        throw new RuntimeException("ContainerInfo not found for container: " + containerId + ", while trying to launch task: " + taskSpec.getTaskAttemptID());
    }
    LlapNodeId nodeId = LlapNodeId.getInstance(host, port);
    registerKnownNode(nodeId);
    entityTracker.registerTaskAttempt(containerId, taskSpec.getTaskAttemptID(), host, port);
    nodesForQuery.add(nodeId);
    sourceStateTracker.registerTaskForStateUpdates(host, port, taskSpec.getInputs());
    FragmentRuntimeInfo fragmentRuntimeInfo;
    try {
        fragmentRuntimeInfo = sourceStateTracker.getFragmentRuntimeInfo(taskSpec.getVertexName(), taskSpec.getTaskAttemptID().getTaskID().getId(), priority);
    } catch (Exception e) {
        LOG.error("Error while trying to get runtimeFragmentInfo for fragmentId={}, containerId={}, currentQI={}, currentQueryId={}", taskSpec.getTaskAttemptID(), containerId, currentQueryIdentifierProto, currentHiveQueryId, e);
        if (e instanceof RuntimeException) {
            throw (RuntimeException) e;
        } else {
            throw new RuntimeException(e);
        }
    }
    SubmitWorkRequestProto requestProto;
    try {
        requestProto = constructSubmitWorkRequest(containerId, taskSpec, fragmentRuntimeInfo, currentHiveQueryId);
    } catch (IOException e) {
        throw new RuntimeException("Failed to construct request", e);
    }
    // Have to register this up front right now. Otherwise, it's possible for the task to start
    // sending out status/DONE/KILLED/FAILED messages before TAImpl knows how to handle them.
    getContext().taskStartedRemotely(taskSpec.getTaskAttemptID(), containerId);
    communicator.sendSubmitWork(requestProto, host, port, new LlapProtocolClientProxy.ExecuteRequestCallback<SubmitWorkResponseProto>() {

        @Override
        public void setResponse(SubmitWorkResponseProto response) {
            if (response.hasSubmissionState()) {
                LlapDaemonProtocolProtos.SubmissionStateProto ss = response.getSubmissionState();
                if (ss.equals(LlapDaemonProtocolProtos.SubmissionStateProto.REJECTED)) {
                    LOG.info("Unable to run task: " + taskSpec.getTaskAttemptID() + " on containerId: " + containerId + ", Service Busy");
                    getContext().taskKilled(taskSpec.getTaskAttemptID(), TaskAttemptEndReason.EXECUTOR_BUSY, "Service Busy");
                    return;
                }
            } else {
                // This should never happen as server always returns a valid status on success
                throw new RuntimeException("SubmissionState in response is expected!");
            }
            if (response.hasUniqueNodeId()) {
                entityTracker.registerTaskSubmittedToNode(taskSpec.getTaskAttemptID(), response.getUniqueNodeId());
            }
            LOG.info("Successfully launched task: " + taskSpec.getTaskAttemptID());
        }

        @Override
        public void indicateError(Throwable t) {
            Throwable originalError = t;
            if (t instanceof ServiceException) {
                ServiceException se = (ServiceException) t;
                t = se.getCause();
            }
            if (t instanceof RemoteException) {
                // All others from the remote service cause the task to FAIL.
                LOG.info("Failed to run task: " + taskSpec.getTaskAttemptID() + " on containerId: " + containerId, t);
                processSendError(originalError);
                getContext().taskFailed(taskSpec.getTaskAttemptID(), TaskFailureType.NON_FATAL, TaskAttemptEndReason.OTHER, t.toString());
            } else {
                // Exception from the RPC layer - communication failure, consider as KILLED / service down.
                if (t instanceof IOException) {
                    LOG.info("Unable to run task: " + taskSpec.getTaskAttemptID() + " on containerId: " + containerId + ", Communication Error");
                    processSendError(originalError);
                    getContext().taskKilled(taskSpec.getTaskAttemptID(), TaskAttemptEndReason.COMMUNICATION_ERROR, "Communication Error");
                } else {
                    // Anything else is a FAIL.
                    LOG.info("Failed to run task: " + taskSpec.getTaskAttemptID() + " on containerId: " + containerId, t);
                    processSendError(originalError);
                    getContext().taskFailed(taskSpec.getTaskAttemptID(), TaskFailureType.NON_FATAL, TaskAttemptEndReason.OTHER, t.getMessage());
                }
            }
        }
    });
}
Also used : FragmentRuntimeInfo(org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.FragmentRuntimeInfo) ByteString(com.google.protobuf.ByteString) IOException(java.io.IOException) ServiceException(com.google.protobuf.ServiceException) RetriableException(org.apache.hadoop.ipc.RetriableException) TezUncheckedException(org.apache.tez.dag.api.TezUncheckedException) IOException(java.io.IOException) RemoteException(org.apache.hadoop.ipc.RemoteException) TezException(org.apache.tez.dag.api.TezException) LlapNodeId(org.apache.hadoop.hive.llap.LlapNodeId) ServiceException(com.google.protobuf.ServiceException) SubmitWorkRequestProto(org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.SubmitWorkRequestProto) SubmitWorkResponseProto(org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.SubmitWorkResponseProto) LlapProtocolClientProxy(org.apache.hadoop.hive.llap.tez.LlapProtocolClientProxy) RemoteException(org.apache.hadoop.ipc.RemoteException)

Example 2 with FragmentRuntimeInfo

use of org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.FragmentRuntimeInfo in project hive by apache.

the class SourceStateTracker method getFragmentRuntimeInfo.

// Assumes serialized DAGs within an AM, and a reset of structures after each DAG completes.
/**
   * Constructs FragmentRuntimeInfo for scheduling within LLAP daemons.
   * Also caches state based on state updates.
   * @param vertexName
   * @param fragmentNumber
   * @param priority
   * @return
   */
public synchronized FragmentRuntimeInfo getFragmentRuntimeInfo(String vertexName, int fragmentNumber, int priority) {
    FragmentRuntimeInfo.Builder builder = FragmentRuntimeInfo.newBuilder();
    maybeRegisterForVertexUpdates(vertexName);
    MutableInt totalTaskCount = new MutableInt(0);
    MutableInt completedTaskCount = new MutableInt(0);
    computeUpstreamTaskCounts(completedTaskCount, totalTaskCount, vertexName);
    builder.setNumSelfAndUpstreamCompletedTasks(completedTaskCount.intValue());
    builder.setNumSelfAndUpstreamTasks(totalTaskCount.intValue());
    builder.setDagStartTime(taskCommunicatorContext.getDagStartTime());
    builder.setWithinDagPriority(priority);
    builder.setFirstAttemptStartTime(taskCommunicatorContext.getFirstAttemptStartTime(vertexName, fragmentNumber));
    builder.setCurrentAttemptStartTime(System.currentTimeMillis());
    return builder.build();
}
Also used : FragmentRuntimeInfo(org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.FragmentRuntimeInfo) MutableInt(org.apache.commons.lang3.mutable.MutableInt)

Example 3 with FragmentRuntimeInfo

use of org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.FragmentRuntimeInfo in project hive by apache.

the class ContainerRunnerImpl method stringifySubmitRequest.

public static String stringifySubmitRequest(SubmitWorkRequestProto request, SignableVertexSpec vertex) {
    StringBuilder sb = new StringBuilder();
    sb.append("am_details=").append(request.getAmHost()).append(":").append(request.getAmPort());
    sb.append(", taskInfo=").append(" fragment ").append(request.getFragmentNumber()).append(" attempt ").append(request.getAttemptNumber());
    sb.append(", user=").append(vertex.getUser());
    sb.append(", queryId=").append(vertex.getHiveQueryId());
    sb.append(", appIdString=").append(vertex.getQueryIdentifier().getApplicationIdString());
    sb.append(", appAttemptNum=").append(vertex.getQueryIdentifier().getAppAttemptNumber());
    sb.append(", containerIdString=").append(request.getContainerIdString());
    sb.append(", dagName=").append(vertex.getDagName());
    sb.append(", vertexName=").append(vertex.getVertexName());
    sb.append(", processor=").append(vertex.getProcessorDescriptor().getClassName());
    sb.append(", numInputs=").append(vertex.getInputSpecsCount());
    sb.append(", numOutputs=").append(vertex.getOutputSpecsCount());
    sb.append(", numGroupedInputs=").append(vertex.getGroupedInputSpecsCount());
    sb.append(", Inputs={");
    if (vertex.getInputSpecsCount() > 0) {
        for (IOSpecProto ioSpec : vertex.getInputSpecsList()) {
            sb.append("{").append(ioSpec.getConnectedVertexName()).append(",").append(ioSpec.getIoDescriptor().getClassName()).append(",").append(ioSpec.getPhysicalEdgeCount()).append("}");
        }
    }
    sb.append("}");
    sb.append(", Outputs={");
    if (vertex.getOutputSpecsCount() > 0) {
        for (IOSpecProto ioSpec : vertex.getOutputSpecsList()) {
            sb.append("{").append(ioSpec.getConnectedVertexName()).append(",").append(ioSpec.getIoDescriptor().getClassName()).append(",").append(ioSpec.getPhysicalEdgeCount()).append("}");
        }
    }
    sb.append("}");
    sb.append(", GroupedInputs={");
    if (vertex.getGroupedInputSpecsCount() > 0) {
        for (GroupInputSpecProto group : vertex.getGroupedInputSpecsList()) {
            sb.append("{").append("groupName=").append(group.getGroupName()).append(", elements=").append(group.getGroupVerticesList()).append("}");
            sb.append(group.getGroupVerticesList());
        }
    }
    sb.append("}");
    FragmentRuntimeInfo fragmentRuntimeInfo = request.getFragmentRuntimeInfo();
    sb.append(", FragmentRuntimeInfo={");
    sb.append("taskCount=").append(fragmentRuntimeInfo.getNumSelfAndUpstreamTasks());
    sb.append(", completedTaskCount=").append(fragmentRuntimeInfo.getNumSelfAndUpstreamCompletedTasks());
    sb.append(", dagStartTime=").append(fragmentRuntimeInfo.getDagStartTime());
    sb.append(", firstAttemptStartTime=").append(fragmentRuntimeInfo.getFirstAttemptStartTime());
    sb.append(", currentAttemptStartTime=").append(fragmentRuntimeInfo.getCurrentAttemptStartTime());
    sb.append("}");
    return sb.toString();
}
Also used : IOSpecProto(org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.IOSpecProto) FragmentRuntimeInfo(org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.FragmentRuntimeInfo) GroupInputSpecProto(org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.GroupInputSpecProto)

Aggregations

FragmentRuntimeInfo (org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.FragmentRuntimeInfo)3 ByteString (com.google.protobuf.ByteString)1 ServiceException (com.google.protobuf.ServiceException)1 IOException (java.io.IOException)1 MutableInt (org.apache.commons.lang3.mutable.MutableInt)1 LlapNodeId (org.apache.hadoop.hive.llap.LlapNodeId)1 GroupInputSpecProto (org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.GroupInputSpecProto)1 IOSpecProto (org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.IOSpecProto)1 SubmitWorkRequestProto (org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.SubmitWorkRequestProto)1 SubmitWorkResponseProto (org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.SubmitWorkResponseProto)1 LlapProtocolClientProxy (org.apache.hadoop.hive.llap.tez.LlapProtocolClientProxy)1 RemoteException (org.apache.hadoop.ipc.RemoteException)1 RetriableException (org.apache.hadoop.ipc.RetriableException)1 TezException (org.apache.tez.dag.api.TezException)1 TezUncheckedException (org.apache.tez.dag.api.TezUncheckedException)1