Search in sources :

Example 6 with LlapNodeId

use of org.apache.hadoop.hive.llap.LlapNodeId in project hive by apache.

the class TestLlapDaemonProtocolClientProxy method testSingleInvocationPerNode.

@Test(timeout = 5000)
public void testSingleInvocationPerNode() {
    RequestManagerForTest requestManager = new RequestManagerForTest(1);
    LlapNodeId nodeId1 = LlapNodeId.getInstance("host1", 1025);
    Message mockMessage = mock(Message.class);
    LlapProtocolClientProxy.ExecuteRequestCallback mockExecuteRequestCallback = mock(LlapProtocolClientProxy.ExecuteRequestCallback.class);
    // First request for host.
    requestManager.queueRequest(new CallableRequestForTest(nodeId1, mockMessage, mockExecuteRequestCallback));
    requestManager.process();
    assertEquals(1, requestManager.numSubmissionsCounters);
    assertNotNull(requestManager.numInvocationsPerNode.get(nodeId1));
    Assert.assertEquals(1, requestManager.numInvocationsPerNode.get(nodeId1).getValue().intValue());
    assertEquals(0, requestManager.currentLoopSkippedRequests.size());
    // Second request for host. Single invocation since the last has not completed.
    requestManager.queueRequest(new CallableRequestForTest(nodeId1, mockMessage, mockExecuteRequestCallback));
    requestManager.process();
    assertEquals(1, requestManager.numSubmissionsCounters);
    assertNotNull(requestManager.numInvocationsPerNode.get(nodeId1));
    Assert.assertEquals(1, requestManager.numInvocationsPerNode.get(nodeId1).getValue().intValue());
    assertEquals(1, requestManager.currentLoopSkippedRequests.size());
    assertEquals(1, requestManager.currentLoopDisabledNodes.size());
    assertTrue(requestManager.currentLoopDisabledNodes.contains(nodeId1));
    // Complete first request. Second pending request should go through.
    requestManager.requestFinished(nodeId1);
    requestManager.process();
    assertEquals(2, requestManager.numSubmissionsCounters);
    assertNotNull(requestManager.numInvocationsPerNode.get(nodeId1));
    Assert.assertEquals(2, requestManager.numInvocationsPerNode.get(nodeId1).getValue().intValue());
    assertEquals(0, requestManager.currentLoopSkippedRequests.size());
    assertEquals(0, requestManager.currentLoopDisabledNodes.size());
    assertFalse(requestManager.currentLoopDisabledNodes.contains(nodeId1));
}
Also used : LlapNodeId(org.apache.hadoop.hive.llap.LlapNodeId) Message(com.google.protobuf.Message) Test(org.junit.Test)

Example 7 with LlapNodeId

use of org.apache.hadoop.hive.llap.LlapNodeId in project hive by apache.

the class LlapTaskCommunicator method nodePinged.

void nodePinged(String hostname, String uniqueId, int port, TezAttemptArray tasks) {
    // TODO: do we ever need the port? we could just do away with nodeId altogether.
    LlapNodeId nodeId = LlapNodeId.getInstance(hostname, port);
    registerPingingNode(nodeId);
    BiMap<ContainerId, TezTaskAttemptID> biMap = entityTracker.getContainerAttemptMapForNode(nodeId);
    if (biMap != null) {
        HashSet<TezTaskAttemptID> attempts = new HashSet<>();
        for (Writable w : tasks.get()) {
            attempts.add((TezTaskAttemptID) w);
        }
        String error = "";
        synchronized (biMap) {
            for (Map.Entry<ContainerId, TezTaskAttemptID> entry : biMap.entrySet()) {
                // TODO: this is a stopgap fix. We really need to change all mappings by unique node ID,
                //       or at least (in this case) track the latest unique ID for LlapNode and retry all
                //       older-node tasks proactively. For now let the heartbeats fail them.
                TezTaskAttemptID attemptId = entry.getValue();
                String taskNodeId = entityTracker.getUniqueNodeId(attemptId);
                // Also, we prefer a missed heartbeat over a stuck query in case of discrepancy in ET.
                if (taskNodeId != null && taskNodeId.equals(uniqueId)) {
                    if (attempts.contains(attemptId)) {
                        getContext().taskAlive(entry.getValue());
                    } else {
                        error += (attemptId + ", ");
                    }
                    getContext().containerAlive(entry.getKey());
                }
            }
        }
        if (!error.isEmpty()) {
            LOG.info("The tasks we expected to be on the node are not there: " + error);
        }
    } else {
        long currentTs = TimeUnit.MILLISECONDS.convert(System.nanoTime(), TimeUnit.NANOSECONDS);
        if (currentTs > nodeNotFoundLogTime.get() + 5000l) {
            LOG.warn("Received ping from node without any registered tasks or containers: " + hostname + ":" + port + ". Could be caused by pre-emption by the AM," + " or a mismatched hostname. Enable debug logging for mismatched host names");
            nodeNotFoundLogTime.set(currentTs);
        }
    }
}
Also used : LlapNodeId(org.apache.hadoop.hive.llap.LlapNodeId) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) Writable(org.apache.hadoop.io.Writable) ByteString(com.google.protobuf.ByteString) Map(java.util.Map) BiMap(com.google.common.collect.BiMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) ConcurrentMap(java.util.concurrent.ConcurrentMap) HashBiMap(com.google.common.collect.HashBiMap) TezTaskAttemptID(org.apache.tez.dag.records.TezTaskAttemptID) HashSet(java.util.HashSet)

Example 8 with LlapNodeId

use of org.apache.hadoop.hive.llap.LlapNodeId in project hive by apache.

the class LlapTaskCommunicator method sendTaskTerminated.

private void sendTaskTerminated(final TezTaskAttemptID taskAttemptId, boolean invokedByContainerEnd) {
    LOG.info("Attempting to send terminateRequest for fragment {} due to internal preemption invoked by {}", taskAttemptId.toString(), invokedByContainerEnd ? "containerEnd" : "taskEnd");
    LlapNodeId nodeId = entityTracker.getNodeIdForTaskAttempt(taskAttemptId);
    // NodeId can be null if the task gets unregistered due to failure / being killed by the daemon itself
    if (nodeId != null) {
        TerminateFragmentRequestProto request = TerminateFragmentRequestProto.newBuilder().setQueryIdentifier(constructQueryIdentifierProto(taskAttemptId.getTaskID().getVertexID().getDAGId().getId())).setFragmentIdentifierString(taskAttemptId.toString()).build();
        communicator.sendTerminateFragment(request, nodeId.getHostname(), nodeId.getPort(), new LlapProtocolClientProxy.ExecuteRequestCallback<TerminateFragmentResponseProto>() {

            @Override
            public void setResponse(TerminateFragmentResponseProto response) {
            }

            @Override
            public void indicateError(Throwable t) {
                LOG.warn("Failed to send terminate fragment request for {}", taskAttemptId.toString());
                processSendError(t);
            }
        });
    } else {
        LOG.info("Not sending terminate request for fragment {} since it's node is not known. Already unregistered", taskAttemptId.toString());
    }
}
Also used : LlapNodeId(org.apache.hadoop.hive.llap.LlapNodeId) TerminateFragmentResponseProto(org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.TerminateFragmentResponseProto) LlapProtocolClientProxy(org.apache.hadoop.hive.llap.tez.LlapProtocolClientProxy) TerminateFragmentRequestProto(org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.TerminateFragmentRequestProto)

Example 9 with LlapNodeId

use of org.apache.hadoop.hive.llap.LlapNodeId in project hive by apache.

the class SourceStateTracker method registerTaskForStateUpdates.

/**
   * Used to register a task for state updates. Effectively registers for state updates to go to the specific node.
   * @param host
   * @param port
   * @param inputSpecList
   */
public synchronized void registerTaskForStateUpdates(String host, int port, List<InputSpec> inputSpecList) {
    // Add tracking information. Check if source state already known and send out an update if it is.
    List<String> sourcesOfInterest = getSourceInterestList(inputSpecList);
    if (sourcesOfInterest != null && !sourcesOfInterest.isEmpty()) {
        LlapNodeId nodeId = LlapNodeId.getInstance(host, port);
        NodeInfo nodeInfo = getNodeInfo(nodeId);
        // Set up the data structures, before any notifications come in.
        for (String src : sourcesOfInterest) {
            VertexState oldStateForNode = nodeInfo.getLastKnownStateForSource(src);
            if (oldStateForNode == null) {
                // Not registered for this node.
                // Register and send state if it is successful.
                SourceInfo srcInfo = getSourceInfo(src);
                srcInfo.addNode(nodeId);
                nodeInfo.addSource(src, srcInfo.lastKnownState);
                if (srcInfo.lastKnownState == VertexState.SUCCEEDED) {
                    sendStateUpdateToNode(nodeId, src, srcInfo.lastKnownState);
                }
            } else {
            // Already registered to send updates to this node for the specific source.
            // Nothing to do for now, unless tracking tasks at a later point.
            }
            // Setup for actual notifications, if not already done for a previous task.
            maybeRegisterForVertexUpdates(src);
        }
    } else {
    // Don't need to track anything for this task. No new notifications, etc.
    }
}
Also used : LlapNodeId(org.apache.hadoop.hive.llap.LlapNodeId) VertexState(org.apache.tez.dag.api.event.VertexState)

Example 10 with LlapNodeId

use of org.apache.hadoop.hive.llap.LlapNodeId in project hive by apache.

the class AMReporter method registerTask.

public void registerTask(String amLocation, int port, String umbilicalUser, Token<JobTokenIdentifier> jobToken, QueryIdentifier queryIdentifier, TezTaskAttemptID attemptId) {
    if (LOG.isTraceEnabled()) {
        LOG.trace("Registering for heartbeat: {}, queryIdentifier={}, attemptId={}", (amLocation + ":" + port), queryIdentifier, attemptId);
    }
    AMNodeInfo amNodeInfo;
    // and discard AMNodeInfo instances per query.
    synchronized (knownAppMasters) {
        LlapNodeId amNodeId = LlapNodeId.getInstance(amLocation, port);
        amNodeInfo = knownAppMasters.get(queryIdentifier);
        if (amNodeInfo == null) {
            amNodeInfo = new AMNodeInfo(amNodeId, umbilicalUser, jobToken, queryIdentifier, retryPolicy, retryTimeout, socketFactory, conf);
            knownAppMasters.put(queryIdentifier, amNodeInfo);
            // Add to the queue only the first time this is registered, and on
            // subsequent instances when it's taken off the queue.
            amNodeInfo.setNextHeartbeatTime(System.currentTimeMillis() + heartbeatInterval);
            pendingHeartbeatQueeu.add(amNodeInfo);
        // AMNodeInfo will only be cleared when a queryComplete is received for this query, or
        // when we detect a failure on the AM side (failure to heartbeat).
        // A single queueLookupCallable is added here. We have to make sure one instance stays
        // in the queue till the query completes.
        }
        amNodeInfo.addTaskAttempt(attemptId);
    }
}
Also used : LlapNodeId(org.apache.hadoop.hive.llap.LlapNodeId)

Aggregations

LlapNodeId (org.apache.hadoop.hive.llap.LlapNodeId)14 ByteString (com.google.protobuf.ByteString)3 LlapProtocolClientProxy (org.apache.hadoop.hive.llap.tez.LlapProtocolClientProxy)3 Message (com.google.protobuf.Message)2 QueryIdentifierProto (org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.QueryIdentifierProto)2 SubmitWorkResponseProto (org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.SubmitWorkResponseProto)2 VertexState (org.apache.tez.dag.api.event.VertexState)2 TezTaskAttemptID (org.apache.tez.dag.records.TezTaskAttemptID)2 Test (org.junit.Test)2 BiMap (com.google.common.collect.BiMap)1 HashBiMap (com.google.common.collect.HashBiMap)1 ServiceException (com.google.protobuf.ServiceException)1 IOException (java.io.IOException)1 HashMap (java.util.HashMap)1 HashSet (java.util.HashSet)1 Map (java.util.Map)1 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)1 ConcurrentMap (java.util.concurrent.ConcurrentMap)1 Configuration (org.apache.hadoop.conf.Configuration)1 LlapTokenInfo (org.apache.hadoop.hive.llap.daemon.impl.LlapTokenChecker.LlapTokenInfo)1