Search in sources :

Example 1 with LlapNodeId

use of org.apache.hadoop.hive.llap.LlapNodeId in project hive by apache.

the class LlapTaskCommunicator method registerRunningTaskAttempt.

@Override
public void registerRunningTaskAttempt(final ContainerId containerId, final TaskSpec taskSpec, Map<String, LocalResource> additionalResources, Credentials credentials, boolean credentialsChanged, int priority) {
    super.registerRunningTaskAttempt(containerId, taskSpec, additionalResources, credentials, credentialsChanged, priority);
    int dagId = taskSpec.getTaskAttemptID().getTaskID().getVertexID().getDAGId().getId();
    if (currentQueryIdentifierProto == null || (dagId != currentQueryIdentifierProto.getDagIndex())) {
        // TODO HiveQueryId extraction by parsing the Processor payload is ugly. This can be improved
        // once TEZ-2672 is fixed.
        String hiveQueryId;
        try {
            hiveQueryId = extractQueryId(taskSpec);
        } catch (IOException e) {
            throw new RuntimeException("Failed to extract query id from task spec: " + taskSpec, e);
        }
        Preconditions.checkNotNull(hiveQueryId, "Unexpected null query id");
        resetCurrentDag(dagId, hiveQueryId);
    }
    ContainerInfo containerInfo = getContainerInfo(containerId);
    String host;
    int port;
    if (containerInfo != null) {
        synchronized (containerInfo) {
            host = containerInfo.host;
            port = containerInfo.port;
        }
    } else {
        // TODO Handle this properly
        throw new RuntimeException("ContainerInfo not found for container: " + containerId + ", while trying to launch task: " + taskSpec.getTaskAttemptID());
    }
    LlapNodeId nodeId = LlapNodeId.getInstance(host, port);
    registerKnownNode(nodeId);
    entityTracker.registerTaskAttempt(containerId, taskSpec.getTaskAttemptID(), host, port);
    nodesForQuery.add(nodeId);
    sourceStateTracker.registerTaskForStateUpdates(host, port, taskSpec.getInputs());
    FragmentRuntimeInfo fragmentRuntimeInfo;
    try {
        fragmentRuntimeInfo = sourceStateTracker.getFragmentRuntimeInfo(taskSpec.getVertexName(), taskSpec.getTaskAttemptID().getTaskID().getId(), priority);
    } catch (Exception e) {
        LOG.error("Error while trying to get runtimeFragmentInfo for fragmentId={}, containerId={}, currentQI={}, currentQueryId={}", taskSpec.getTaskAttemptID(), containerId, currentQueryIdentifierProto, currentHiveQueryId, e);
        if (e instanceof RuntimeException) {
            throw (RuntimeException) e;
        } else {
            throw new RuntimeException(e);
        }
    }
    SubmitWorkRequestProto requestProto;
    try {
        requestProto = constructSubmitWorkRequest(containerId, taskSpec, fragmentRuntimeInfo, currentHiveQueryId);
    } catch (IOException e) {
        throw new RuntimeException("Failed to construct request", e);
    }
    // Have to register this up front right now. Otherwise, it's possible for the task to start
    // sending out status/DONE/KILLED/FAILED messages before TAImpl knows how to handle them.
    getContext().taskStartedRemotely(taskSpec.getTaskAttemptID(), containerId);
    communicator.sendSubmitWork(requestProto, host, port, new LlapProtocolClientProxy.ExecuteRequestCallback<SubmitWorkResponseProto>() {

        @Override
        public void setResponse(SubmitWorkResponseProto response) {
            if (response.hasSubmissionState()) {
                LlapDaemonProtocolProtos.SubmissionStateProto ss = response.getSubmissionState();
                if (ss.equals(LlapDaemonProtocolProtos.SubmissionStateProto.REJECTED)) {
                    LOG.info("Unable to run task: " + taskSpec.getTaskAttemptID() + " on containerId: " + containerId + ", Service Busy");
                    getContext().taskKilled(taskSpec.getTaskAttemptID(), TaskAttemptEndReason.EXECUTOR_BUSY, "Service Busy");
                    return;
                }
            } else {
                // This should never happen as server always returns a valid status on success
                throw new RuntimeException("SubmissionState in response is expected!");
            }
            if (response.hasUniqueNodeId()) {
                entityTracker.registerTaskSubmittedToNode(taskSpec.getTaskAttemptID(), response.getUniqueNodeId());
            }
            LOG.info("Successfully launched task: " + taskSpec.getTaskAttemptID());
        }

        @Override
        public void indicateError(Throwable t) {
            Throwable originalError = t;
            if (t instanceof ServiceException) {
                ServiceException se = (ServiceException) t;
                t = se.getCause();
            }
            if (t instanceof RemoteException) {
                // All others from the remote service cause the task to FAIL.
                LOG.info("Failed to run task: " + taskSpec.getTaskAttemptID() + " on containerId: " + containerId, t);
                processSendError(originalError);
                getContext().taskFailed(taskSpec.getTaskAttemptID(), TaskFailureType.NON_FATAL, TaskAttemptEndReason.OTHER, t.toString());
            } else {
                // Exception from the RPC layer - communication failure, consider as KILLED / service down.
                if (t instanceof IOException) {
                    LOG.info("Unable to run task: " + taskSpec.getTaskAttemptID() + " on containerId: " + containerId + ", Communication Error");
                    processSendError(originalError);
                    getContext().taskKilled(taskSpec.getTaskAttemptID(), TaskAttemptEndReason.COMMUNICATION_ERROR, "Communication Error");
                } else {
                    // Anything else is a FAIL.
                    LOG.info("Failed to run task: " + taskSpec.getTaskAttemptID() + " on containerId: " + containerId, t);
                    processSendError(originalError);
                    getContext().taskFailed(taskSpec.getTaskAttemptID(), TaskFailureType.NON_FATAL, TaskAttemptEndReason.OTHER, t.getMessage());
                }
            }
        }
    });
}
Also used : FragmentRuntimeInfo(org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.FragmentRuntimeInfo) ByteString(com.google.protobuf.ByteString) IOException(java.io.IOException) ServiceException(com.google.protobuf.ServiceException) RetriableException(org.apache.hadoop.ipc.RetriableException) TezUncheckedException(org.apache.tez.dag.api.TezUncheckedException) IOException(java.io.IOException) RemoteException(org.apache.hadoop.ipc.RemoteException) TezException(org.apache.tez.dag.api.TezException) LlapNodeId(org.apache.hadoop.hive.llap.LlapNodeId) ServiceException(com.google.protobuf.ServiceException) SubmitWorkRequestProto(org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.SubmitWorkRequestProto) SubmitWorkResponseProto(org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.SubmitWorkResponseProto) LlapProtocolClientProxy(org.apache.hadoop.hive.llap.tez.LlapProtocolClientProxy) RemoteException(org.apache.hadoop.ipc.RemoteException)

Example 2 with LlapNodeId

use of org.apache.hadoop.hive.llap.LlapNodeId in project hive by apache.

the class LlapTaskCommunicator method dagComplete.

@Override
public void dagComplete(final int dagIdentifier) {
    QueryIdentifierProto queryIdentifierProto = constructQueryIdentifierProto(dagIdentifier);
    QueryCompleteRequestProto request = QueryCompleteRequestProto.newBuilder().setQueryIdentifier(queryIdentifierProto).setDeleteDelay(deleteDelayOnDagComplete).build();
    for (final LlapNodeId llapNodeId : nodesForQuery) {
        LOG.info("Sending dagComplete message for {}, to {}", dagIdentifier, llapNodeId);
        communicator.sendQueryComplete(request, llapNodeId.getHostname(), llapNodeId.getPort(), new LlapProtocolClientProxy.ExecuteRequestCallback<LlapDaemonProtocolProtos.QueryCompleteResponseProto>() {

            @Override
            public void setResponse(LlapDaemonProtocolProtos.QueryCompleteResponseProto response) {
            }

            @Override
            public void indicateError(Throwable t) {
                LOG.warn("Failed to indicate dag complete dagId={} to node {}", dagIdentifier, llapNodeId);
                processSendError(t);
            }
        });
    }
    nodesForQuery.clear();
// TODO Ideally move some of the other cleanup code from resetCurrentDag over here
}
Also used : LlapNodeId(org.apache.hadoop.hive.llap.LlapNodeId) LlapDaemonProtocolProtos(org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos) QueryIdentifierProto(org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.QueryIdentifierProto) LlapProtocolClientProxy(org.apache.hadoop.hive.llap.tez.LlapProtocolClientProxy) QueryCompleteRequestProto(org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.QueryCompleteRequestProto)

Example 3 with LlapNodeId

use of org.apache.hadoop.hive.llap.LlapNodeId in project hive by apache.

the class SourceStateTracker method sourceStateUpdated.

/**
   * Handled notifications on state updates for sources
   * @param sourceName
   * @param sourceState
   */
public synchronized void sourceStateUpdated(String sourceName, VertexState sourceState) {
    SourceInfo sourceInfo = getSourceInfo(sourceName);
    // Update source info if the state is SUCCEEDED
    if (sourceState == VertexState.SUCCEEDED) {
        sourceInfo.numCompletedTasks = getVertexCompletedTaskCount(sourceName);
        sourceInfo.numTasks = getVertexTotalTaskCount(sourceName);
    }
    sourceInfo.lastKnownState = sourceState;
    // to a single node may fail.
    for (LlapNodeId nodeId : sourceInfo.getInterestedNodes()) {
        NodeInfo nodeInfo = nodeInfoMap.get(nodeId);
        VertexState lastStateForNode = nodeInfo.getLastKnownStateForSource(sourceName);
        // Send only if the state has changed.
        if (lastStateForNode != sourceState) {
            nodeInfo.setLastKnownStateForSource(sourceName, sourceState);
            sendStateUpdateToNode(nodeId, sourceName, sourceState);
        }
    }
}
Also used : LlapNodeId(org.apache.hadoop.hive.llap.LlapNodeId) VertexState(org.apache.tez.dag.api.event.VertexState)

Example 4 with LlapNodeId

use of org.apache.hadoop.hive.llap.LlapNodeId in project hive by apache.

the class LlapProtocolClientProxy method sendSubmitWork.

public void sendSubmitWork(SubmitWorkRequestProto request, String host, int port, final ExecuteRequestCallback<SubmitWorkResponseProto> callback) {
    LlapNodeId nodeId = LlapNodeId.getInstance(host, port);
    requestManager.queueRequest(new SubmitWorkCallable(nodeId, request, callback));
}
Also used : LlapNodeId(org.apache.hadoop.hive.llap.LlapNodeId)

Example 5 with LlapNodeId

use of org.apache.hadoop.hive.llap.LlapNodeId in project hive by apache.

the class TestLlapDaemonProtocolClientProxy method testMultipleNodes.

@Test(timeout = 5000)
public void testMultipleNodes() {
    RequestManagerForTest requestManager = new RequestManagerForTest(1);
    LlapNodeId nodeId1 = LlapNodeId.getInstance("host1", 1025);
    LlapNodeId nodeId2 = LlapNodeId.getInstance("host2", 1025);
    Message mockMessage = mock(Message.class);
    LlapProtocolClientProxy.ExecuteRequestCallback mockExecuteRequestCallback = mock(LlapProtocolClientProxy.ExecuteRequestCallback.class);
    // Request two messages
    requestManager.queueRequest(new CallableRequestForTest(nodeId1, mockMessage, mockExecuteRequestCallback));
    requestManager.queueRequest(new CallableRequestForTest(nodeId2, mockMessage, mockExecuteRequestCallback));
    // Should go through in a single process call
    requestManager.process();
    assertEquals(2, requestManager.numSubmissionsCounters);
    assertNotNull(requestManager.numInvocationsPerNode.get(nodeId1));
    assertNotNull(requestManager.numInvocationsPerNode.get(nodeId2));
    Assert.assertEquals(1, requestManager.numInvocationsPerNode.get(nodeId1).getValue().intValue());
    Assert.assertEquals(1, requestManager.numInvocationsPerNode.get(nodeId2).getValue().intValue());
    assertEquals(0, requestManager.currentLoopSkippedRequests.size());
    assertEquals(0, requestManager.currentLoopSkippedRequests.size());
    assertEquals(0, requestManager.currentLoopDisabledNodes.size());
}
Also used : LlapNodeId(org.apache.hadoop.hive.llap.LlapNodeId) Message(com.google.protobuf.Message) Test(org.junit.Test)

Aggregations

LlapNodeId (org.apache.hadoop.hive.llap.LlapNodeId)14 ByteString (com.google.protobuf.ByteString)3 LlapProtocolClientProxy (org.apache.hadoop.hive.llap.tez.LlapProtocolClientProxy)3 Message (com.google.protobuf.Message)2 QueryIdentifierProto (org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.QueryIdentifierProto)2 SubmitWorkResponseProto (org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.SubmitWorkResponseProto)2 VertexState (org.apache.tez.dag.api.event.VertexState)2 TezTaskAttemptID (org.apache.tez.dag.records.TezTaskAttemptID)2 Test (org.junit.Test)2 BiMap (com.google.common.collect.BiMap)1 HashBiMap (com.google.common.collect.HashBiMap)1 ServiceException (com.google.protobuf.ServiceException)1 IOException (java.io.IOException)1 HashMap (java.util.HashMap)1 HashSet (java.util.HashSet)1 Map (java.util.Map)1 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)1 ConcurrentMap (java.util.concurrent.ConcurrentMap)1 Configuration (org.apache.hadoop.conf.Configuration)1 LlapTokenInfo (org.apache.hadoop.hive.llap.daemon.impl.LlapTokenChecker.LlapTokenInfo)1