Search in sources :

Example 86 with TezTaskAttemptID

use of org.apache.tez.dag.records.TezTaskAttemptID in project hive by apache.

the class TestLlapTaskCommunicator method testEntityTracker1.

@Test(timeout = 30000)
public void testEntityTracker1() {
    LlapTaskCommunicator.EntityTracker entityTracker = new LlapTaskCommunicator.EntityTracker();
    String host1 = "host1";
    int port = 1451;
    // Simple container registration and un-registration without any task attempt being involved.
    ContainerId containerId101 = constructContainerId(101);
    entityTracker.registerContainer(containerId101, host1, port);
    assertEquals(LlapNodeId.getInstance(host1, port), entityTracker.getNodeIdForContainer(containerId101));
    entityTracker.unregisterContainer(containerId101);
    assertNull(entityTracker.getContainerAttemptMapForNode(LlapNodeId.getInstance(host1, port)));
    assertNull(entityTracker.getNodeIdForContainer(containerId101));
    assertEquals(0, entityTracker.nodeMap.size());
    assertEquals(0, entityTracker.attemptToNodeMap.size());
    assertEquals(0, entityTracker.containerToNodeMap.size());
    // Simple task registration and un-registration.
    ContainerId containerId1 = constructContainerId(1);
    TezTaskAttemptID taskAttemptId1 = constructTaskAttemptId(1);
    entityTracker.registerTaskAttempt(containerId1, taskAttemptId1, host1, port);
    assertEquals(LlapNodeId.getInstance(host1, port), entityTracker.getNodeIdForContainer(containerId1));
    assertEquals(LlapNodeId.getInstance(host1, port), entityTracker.getNodeIdForTaskAttempt(taskAttemptId1));
    entityTracker.unregisterTaskAttempt(taskAttemptId1);
    assertNull(entityTracker.getContainerAttemptMapForNode(LlapNodeId.getInstance(host1, port)));
    assertNull(entityTracker.getNodeIdForContainer(containerId1));
    assertNull(entityTracker.getNodeIdForTaskAttempt(taskAttemptId1));
    assertEquals(0, entityTracker.nodeMap.size());
    assertEquals(0, entityTracker.attemptToNodeMap.size());
    assertEquals(0, entityTracker.containerToNodeMap.size());
    // Register taskAttempt, unregister container. TaskAttempt should also be unregistered
    ContainerId containerId201 = constructContainerId(201);
    TezTaskAttemptID taskAttemptId201 = constructTaskAttemptId(201);
    entityTracker.registerTaskAttempt(containerId201, taskAttemptId201, host1, port);
    assertEquals(LlapNodeId.getInstance(host1, port), entityTracker.getNodeIdForContainer(containerId201));
    assertEquals(LlapNodeId.getInstance(host1, port), entityTracker.getNodeIdForTaskAttempt(taskAttemptId201));
    entityTracker.unregisterContainer(containerId201);
    assertNull(entityTracker.getContainerAttemptMapForNode(LlapNodeId.getInstance(host1, port)));
    assertNull(entityTracker.getNodeIdForContainer(containerId201));
    assertNull(entityTracker.getNodeIdForTaskAttempt(taskAttemptId201));
    assertEquals(0, entityTracker.nodeMap.size());
    assertEquals(0, entityTracker.attemptToNodeMap.size());
    assertEquals(0, entityTracker.containerToNodeMap.size());
    // No errors
    entityTracker.unregisterTaskAttempt(taskAttemptId201);
}
Also used : ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) TezTaskAttemptID(org.apache.tez.dag.records.TezTaskAttemptID) Test(org.junit.Test)

Example 87 with TezTaskAttemptID

use of org.apache.tez.dag.records.TezTaskAttemptID in project hive by apache.

the class TaskSpecBuilder method constructTaskSpec.

public TaskSpec constructTaskSpec(DAG dag, String vertexName, int numSplits, ApplicationId appId, int index) {
    Vertex vertex = dag.getVertex(vertexName);
    ProcessorDescriptor processorDescriptor = vertex.getProcessorDescriptor();
    List<RootInputLeafOutput<InputDescriptor, InputInitializerDescriptor>> inputs = vertex.getInputs();
    List<RootInputLeafOutput<OutputDescriptor, OutputCommitterDescriptor>> outputs = vertex.getOutputs();
    Preconditions.checkState(inputs.size() == 1);
    Preconditions.checkState(outputs.size() == 1);
    List<InputSpec> inputSpecs = new ArrayList<>();
    for (RootInputLeafOutput<InputDescriptor, InputInitializerDescriptor> input : inputs) {
        InputSpec inputSpec = new InputSpec(input.getName(), input.getIODescriptor(), 1);
        inputSpecs.add(inputSpec);
    }
    List<OutputSpec> outputSpecs = new ArrayList<>();
    for (RootInputLeafOutput<OutputDescriptor, OutputCommitterDescriptor> output : outputs) {
        OutputSpec outputSpec = new OutputSpec(output.getName(), output.getIODescriptor(), 1);
        outputSpecs.add(outputSpec);
    }
    TezDAGID dagId = TezDAGID.getInstance(appId, 0);
    TezVertexID vertexId = TezVertexID.getInstance(dagId, 0);
    TezTaskID taskId = TezTaskID.getInstance(vertexId, index);
    TezTaskAttemptID taskAttemptId = TezTaskAttemptID.getInstance(taskId, 0);
    return new TaskSpec(taskAttemptId, dag.getName(), vertexName, numSplits, processorDescriptor, inputSpecs, outputSpecs, null);
}
Also used : TaskSpec(org.apache.tez.runtime.api.impl.TaskSpec) ArrayList(java.util.ArrayList) InputSpec(org.apache.tez.runtime.api.impl.InputSpec) TezTaskID(org.apache.tez.dag.records.TezTaskID) TezDAGID(org.apache.tez.dag.records.TezDAGID) TezVertexID(org.apache.tez.dag.records.TezVertexID) OutputSpec(org.apache.tez.runtime.api.impl.OutputSpec) TezTaskAttemptID(org.apache.tez.dag.records.TezTaskAttemptID)

Example 88 with TezTaskAttemptID

use of org.apache.tez.dag.records.TezTaskAttemptID in project hive by apache.

the class LlapTaskCommunicator method nodePinged.

void nodePinged(String hostname, String uniqueId, int port, TezAttemptArray tasks) {
    // TODO: do we ever need the port? we could just do away with nodeId altogether.
    LlapNodeId nodeId = LlapNodeId.getInstance(hostname, port);
    registerPingingNode(nodeId);
    BiMap<ContainerId, TezTaskAttemptID> biMap = entityTracker.getContainerAttemptMapForNode(nodeId);
    if (biMap != null) {
        HashSet<TezTaskAttemptID> attempts = new HashSet<>();
        for (Writable w : tasks.get()) {
            attempts.add((TezTaskAttemptID) w);
        }
        String error = "";
        synchronized (biMap) {
            for (Map.Entry<ContainerId, TezTaskAttemptID> entry : biMap.entrySet()) {
                // TODO: this is a stopgap fix. We really need to change all mappings by unique node ID,
                //       or at least (in this case) track the latest unique ID for LlapNode and retry all
                //       older-node tasks proactively. For now let the heartbeats fail them.
                TezTaskAttemptID attemptId = entry.getValue();
                String taskNodeId = entityTracker.getUniqueNodeId(attemptId);
                // Also, we prefer a missed heartbeat over a stuck query in case of discrepancy in ET.
                if (taskNodeId != null && taskNodeId.equals(uniqueId)) {
                    if (attempts.contains(attemptId)) {
                        getContext().taskAlive(entry.getValue());
                    } else {
                        error += (attemptId + ", ");
                    }
                    getContext().containerAlive(entry.getKey());
                }
            }
        }
        if (!error.isEmpty()) {
            LOG.info("The tasks we expected to be on the node are not there: " + error);
        }
    } else {
        long currentTs = TimeUnit.MILLISECONDS.convert(System.nanoTime(), TimeUnit.NANOSECONDS);
        if (currentTs > nodeNotFoundLogTime.get() + 5000l) {
            LOG.warn("Received ping from node without any registered tasks or containers: " + hostname + ":" + port + ". Could be caused by pre-emption by the AM," + " or a mismatched hostname. Enable debug logging for mismatched host names");
            nodeNotFoundLogTime.set(currentTs);
        }
    }
}
Also used : LlapNodeId(org.apache.hadoop.hive.llap.LlapNodeId) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) Writable(org.apache.hadoop.io.Writable) ByteString(com.google.protobuf.ByteString) Map(java.util.Map) BiMap(com.google.common.collect.BiMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) ConcurrentMap(java.util.concurrent.ConcurrentMap) HashBiMap(com.google.common.collect.HashBiMap) TezTaskAttemptID(org.apache.tez.dag.records.TezTaskAttemptID) HashSet(java.util.HashSet)

Example 89 with TezTaskAttemptID

use of org.apache.tez.dag.records.TezTaskAttemptID in project hive by apache.

the class LlapTaskUmbilicalExternalClient method updateHeartbeatInfo.

private void updateHeartbeatInfo(String hostname, String uniqueId, int port, TezAttemptArray tasks) {
    int updateCount = 0;
    HashSet<TezTaskAttemptID> attempts = new HashSet<>();
    for (Writable w : tasks.get()) {
        attempts.add((TezTaskAttemptID) w);
    }
    String error = "";
    for (String key : pendingEvents.keySet()) {
        PendingEventData pendingEventData = pendingEvents.get(key);
        if (pendingEventData != null) {
            TaskHeartbeatInfo thi = pendingEventData.heartbeatInfo;
            String thiUniqueId = thi.uniqueNodeId;
            if (thi.hostname.equals(hostname) && thi.port == port && (thiUniqueId != null && thiUniqueId.equals(uniqueId))) {
                TezTaskAttemptID ta = TezTaskAttemptID.fromString(thi.taskAttemptId);
                if (attempts.contains(ta)) {
                    thi.lastHeartbeat.set(System.currentTimeMillis());
                    updateCount++;
                } else {
                    error += (thi.taskAttemptId + ", ");
                }
            }
        }
    }
    for (String key : registeredTasks.keySet()) {
        TaskHeartbeatInfo thi = registeredTasks.get(key);
        if (thi != null) {
            String thiUniqueId = thi.uniqueNodeId;
            if (thi.hostname.equals(hostname) && thi.port == port && (thiUniqueId != null && thiUniqueId.equals(uniqueId))) {
                TezTaskAttemptID ta = TezTaskAttemptID.fromString(thi.taskAttemptId);
                if (attempts.contains(ta)) {
                    thi.lastHeartbeat.set(System.currentTimeMillis());
                    updateCount++;
                } else {
                    error += (thi.taskAttemptId + ", ");
                }
            }
        }
    }
    if (!error.isEmpty()) {
        LOG.info("The tasks we expected to be on the node are not there: " + error);
    }
    if (updateCount == 0) {
        LOG.info("No tasks found for heartbeat from hostname " + hostname + ", port " + port);
    }
}
Also used : Writable(org.apache.hadoop.io.Writable) ArrayWritable(org.apache.hadoop.io.ArrayWritable) HashSet(java.util.HashSet) TezTaskAttemptID(org.apache.tez.dag.records.TezTaskAttemptID)

Example 90 with TezTaskAttemptID

use of org.apache.tez.dag.records.TezTaskAttemptID in project tez by apache.

the class TestVertexImpl method testInputInitializerEventsMultipleSources.

@Test(timeout = 10000)
public void testInputInitializerEventsMultipleSources() throws Exception {
    useCustomInitializer = true;
    customInitializer = new EventHandlingRootInputInitializer(null);
    EventHandlingRootInputInitializer initializer = (EventHandlingRootInputInitializer) customInitializer;
    initializer.setNumExpectedEvents(4);
    setupPreDagCreation();
    dagPlan = createDAGPlanWithRunningInitializer4();
    setupPostDagCreation();
    VertexImplWithRunningInputInitializer v1 = (VertexImplWithRunningInputInitializer) vertices.get("vertex1");
    VertexImplWithRunningInputInitializer v2 = (VertexImplWithRunningInputInitializer) vertices.get("vertex2");
    VertexImplWithRunningInputInitializer v3 = (VertexImplWithRunningInputInitializer) vertices.get("vertex3");
    initVertex(v1);
    startVertex(v1);
    dispatcher.await();
    // Vertex1 start should trigger downstream vertices
    Assert.assertEquals(VertexState.RUNNING, v1.getState());
    Assert.assertEquals(VertexState.RUNNING, v2.getState());
    Assert.assertEquals(VertexState.INITIALIZING, v3.getState());
    List<ByteBuffer> expectedPayloads = new LinkedList<ByteBuffer>();
    // Genrate events from v1 to v3's InputInitializer
    ByteBuffer payload = ByteBuffer.allocate(12).putInt(0, 1).putInt(4, 0).putInt(8, 0);
    expectedPayloads.add(payload);
    InputInitializerEvent event = InputInitializerEvent.create("vertex3", "input1", payload);
    // Create taskId and taskAttemptId for the single task that exists in vertex1
    TezTaskID t0_v1 = TezTaskID.getInstance(v1.getVertexId(), 0);
    TezTaskAttemptID ta0_t0_v1 = TezTaskAttemptID.getInstance(t0_v1, 0);
    TezEvent tezEvent = new TezEvent(event, new EventMetaData(EventProducerConsumerType.OUTPUT, "vertex1", "vertex3", ta0_t0_v1));
    dispatcher.getEventHandler().handle(new VertexEventRouteEvent(v1.getVertexId(), Collections.singletonList(tezEvent)));
    dispatcher.await();
    // Events should not be cached in the vertex, since the initializer is running
    Assert.assertEquals(0, v3.pendingInitializerEvents.size());
    // Events should be cached since the tasks have not succeeded.
    // Verify that events are cached
    RootInputInitializerManager.InitializerWrapper initializerWrapper = v3.rootInputInitializerManager.getInitializerWrapper("input1");
    Assert.assertEquals(1, initializerWrapper.getFirstSuccessfulAttemptMap().size());
    Assert.assertEquals(1, initializerWrapper.getPendingEvents().get(v1.getName()).size());
    // Get all tasks of vertex1 to succeed.
    for (TezTaskID taskId : v1.getTasks().keySet()) {
        TezTaskAttemptID taskAttemptId = TezTaskAttemptID.getInstance(taskId, 0);
        v1.handle(new VertexEventTaskAttemptCompleted(taskAttemptId, TaskAttemptStateInternal.SUCCEEDED));
        v1.handle(new VertexEventTaskCompleted(taskId, TaskState.SUCCEEDED));
        dispatcher.await();
        v1.stateChangeNotifier.taskSucceeded(v1.getName(), taskId, taskAttemptId.getId());
    }
    dispatcher.await();
    Assert.assertEquals(1, initializer.initializerEvents.size());
    // Test written based on this
    Assert.assertEquals(2, v2.getTotalTasks());
    // Generate events from v2 to v3's initializer. 1 from task 0, 2 from task 1
    for (Task task : v2.getTasks().values()) {
        TezTaskID taskId = task.getTaskId();
        TezTaskAttemptID attemptId = TezTaskAttemptID.getInstance(taskId, 0);
        int numEventsFromTask = taskId.getId() + 1;
        for (int i = 0; i < numEventsFromTask; i++) {
            payload = ByteBuffer.allocate(12).putInt(0, 2).putInt(4, taskId.getId()).putInt(8, i);
            expectedPayloads.add(payload);
            InputInitializerEvent event2 = InputInitializerEvent.create("vertex3", "input1", payload);
            TezEvent tezEvent2 = new TezEvent(event2, new EventMetaData(EventProducerConsumerType.OUTPUT, "vertex2", "vertex3", attemptId));
            dispatcher.getEventHandler().handle(new VertexEventRouteEvent(v2.getVertexId(), Collections.singletonList(tezEvent2)));
            dispatcher.await();
        }
    }
    // Validate queueing of these events
    // Only v2 events pending
    Assert.assertEquals(1, initializerWrapper.getPendingEvents().keySet().size());
    // 3 events pending
    Assert.assertEquals(3, initializerWrapper.getPendingEvents().get(v2.getName()).size());
    // Get all tasks of vertex1 to succeed.
    for (TezTaskID taskId : v2.getTasks().keySet()) {
        TezTaskAttemptID taskAttemptId = TezTaskAttemptID.getInstance(taskId, 0);
        v2.handle(new VertexEventTaskAttemptCompleted(taskAttemptId, TaskAttemptStateInternal.SUCCEEDED));
        v2.handle(new VertexEventTaskCompleted(taskId, TaskState.SUCCEEDED));
        dispatcher.await();
        v2.stateChangeNotifier.taskSucceeded(v2.getName(), taskId, taskAttemptId.getId());
    }
    dispatcher.await();
    // Initializer would have run, and processed events.
    while (v3.getState() != VertexState.RUNNING) {
        Thread.sleep(10);
    }
    Assert.assertEquals(VertexState.RUNNING, v3.getState());
    Assert.assertEquals(4, initializer.initializerEvents.size());
    Assert.assertTrue(initializer.initComplete.get());
    Assert.assertEquals(2, initializerWrapper.getFirstSuccessfulAttemptMap().size());
    Assert.assertEquals(0, initializerWrapper.getPendingEvents().get(v1.getName()).size());
    for (InputInitializerEvent initializerEvent : initializer.initializerEvents) {
        expectedPayloads.remove(initializerEvent.getUserPayload());
    }
    Assert.assertEquals(0, expectedPayloads.size());
}
Also used : TaskEventScheduleTask(org.apache.tez.dag.app.dag.event.TaskEventScheduleTask) Task(org.apache.tez.dag.app.dag.Task) VertexEventTaskAttemptCompleted(org.apache.tez.dag.app.dag.event.VertexEventTaskAttemptCompleted) VertexEventRouteEvent(org.apache.tez.dag.app.dag.event.VertexEventRouteEvent) DataInputByteBuffer(org.apache.hadoop.io.DataInputByteBuffer) ByteBuffer(java.nio.ByteBuffer) LinkedList(java.util.LinkedList) TezTaskID(org.apache.tez.dag.records.TezTaskID) TaskLocationHint(org.apache.tez.dag.api.TaskLocationHint) VertexLocationHint(org.apache.tez.dag.api.VertexLocationHint) PlanTaskLocationHint(org.apache.tez.dag.api.records.DAGProtos.PlanTaskLocationHint) InputInitializerEvent(org.apache.tez.runtime.api.events.InputInitializerEvent) RootInputInitializerManager(org.apache.tez.dag.app.dag.RootInputInitializerManager) TezEvent(org.apache.tez.runtime.api.impl.TezEvent) VertexEventTaskCompleted(org.apache.tez.dag.app.dag.event.VertexEventTaskCompleted) EventMetaData(org.apache.tez.runtime.api.impl.EventMetaData) TezTaskAttemptID(org.apache.tez.dag.records.TezTaskAttemptID) VertexManagerPluginForTest(org.apache.tez.test.VertexManagerPluginForTest) Test(org.junit.Test) GraceShuffleVertexManagerForTest(org.apache.tez.test.GraceShuffleVertexManagerForTest) StateChangeNotifierForTest(org.apache.tez.dag.app.dag.TestStateChangeNotifier.StateChangeNotifierForTest) EdgeManagerForTest(org.apache.tez.test.EdgeManagerForTest)

Aggregations

TezTaskAttemptID (org.apache.tez.dag.records.TezTaskAttemptID)167 Test (org.junit.Test)124 TezTaskID (org.apache.tez.dag.records.TezTaskID)61 TezVertexID (org.apache.tez.dag.records.TezVertexID)54 Container (org.apache.hadoop.yarn.api.records.Container)48 TezEvent (org.apache.tez.runtime.api.impl.TezEvent)46 TezDAGID (org.apache.tez.dag.records.TezDAGID)43 Configuration (org.apache.hadoop.conf.Configuration)42 ContainerId (org.apache.hadoop.yarn.api.records.ContainerId)41 Priority (org.apache.hadoop.yarn.api.records.Priority)41 TezConfiguration (org.apache.tez.dag.api.TezConfiguration)41 LocalResource (org.apache.hadoop.yarn.api.records.LocalResource)33 Resource (org.apache.hadoop.yarn.api.records.Resource)30 TaskCommunicatorManagerInterface (org.apache.tez.dag.app.TaskCommunicatorManagerInterface)28 EventMetaData (org.apache.tez.runtime.api.impl.EventMetaData)28 ClusterInfo (org.apache.tez.dag.app.ClusterInfo)27 ContainerHeartbeatHandler (org.apache.tez.dag.app.ContainerHeartbeatHandler)27 AMContainerMap (org.apache.tez.dag.app.rm.container.AMContainerMap)27 ContainerContextMatcher (org.apache.tez.dag.app.rm.container.ContainerContextMatcher)27 ApplicationId (org.apache.hadoop.yarn.api.records.ApplicationId)25