Search in sources :

Example 91 with Event

use of org.apache.hadoop.yarn.event.Event in project tez by apache.

the class TestAMNodeTracker method _testNodeSelfBlacklist.

private void _testNodeSelfBlacklist(AMNodeTracker amNodeTracker, TestEventHandler handler, int schedulerId) {
    amNodeTracker.handle(new AMNodeEventNodeCountUpdated(4, schedulerId));
    NodeId nodeId = NodeId.newInstance("host1", 1234);
    NodeId nodeId2 = NodeId.newInstance("host2", 1234);
    NodeId nodeId3 = NodeId.newInstance("host3", 1234);
    NodeId nodeId4 = NodeId.newInstance("host4", 1234);
    amNodeTracker.nodeSeen(nodeId, schedulerId);
    amNodeTracker.nodeSeen(nodeId2, schedulerId);
    amNodeTracker.nodeSeen(nodeId3, schedulerId);
    amNodeTracker.nodeSeen(nodeId4, schedulerId);
    AMNodeImpl node = (AMNodeImpl) amNodeTracker.get(nodeId, schedulerId);
    ContainerId cId1 = mock(ContainerId.class);
    ContainerId cId2 = mock(ContainerId.class);
    ContainerId cId3 = mock(ContainerId.class);
    amNodeTracker.handle(new AMNodeEventContainerAllocated(nodeId, schedulerId, cId1));
    amNodeTracker.handle(new AMNodeEventContainerAllocated(nodeId, schedulerId, cId2));
    amNodeTracker.handle(new AMNodeEventContainerAllocated(nodeId, schedulerId, cId3));
    assertEquals(3, node.containers.size());
    TezTaskAttemptID ta1 = mock(TezTaskAttemptID.class);
    TezTaskAttemptID ta2 = mock(TezTaskAttemptID.class);
    TezTaskAttemptID ta3 = mock(TezTaskAttemptID.class);
    amNodeTracker.handle(new AMNodeEventTaskAttemptSucceeded(nodeId, schedulerId, cId1, ta1));
    assertEquals(1, node.numSuccessfulTAs);
    amNodeTracker.handle(new AMNodeEventTaskAttemptEnded(nodeId, schedulerId, cId2, ta2, true));
    assertEquals(1, node.numSuccessfulTAs);
    assertEquals(1, node.numFailedTAs);
    assertEquals(AMNodeState.ACTIVE, node.getState());
    // duplicate should not affect anything
    amNodeTracker.handle(new AMNodeEventTaskAttemptEnded(nodeId, schedulerId, cId2, ta2, true));
    assertEquals(1, node.numSuccessfulTAs);
    assertEquals(1, node.numFailedTAs);
    assertEquals(AMNodeState.ACTIVE, node.getState());
    amNodeTracker.handle(new AMNodeEventTaskAttemptEnded(nodeId, schedulerId, cId3, ta3, true));
    dispatcher.await();
    assertEquals(1, node.numSuccessfulTAs);
    assertEquals(2, node.numFailedTAs);
    assertEquals(AMNodeState.BLACKLISTED, node.getState());
    assertEquals(4, handler.events.size());
    assertEquals(AMContainerEventType.C_NODE_FAILED, handler.events.get(0).getType());
    assertEquals(cId1, ((AMContainerEventNodeFailed) handler.events.get(0)).getContainerId());
    assertEquals(AMContainerEventType.C_NODE_FAILED, handler.events.get(1).getType());
    assertEquals(cId2, ((AMContainerEventNodeFailed) handler.events.get(1)).getContainerId());
    assertEquals(AMContainerEventType.C_NODE_FAILED, handler.events.get(2).getType());
    assertEquals(cId3, ((AMContainerEventNodeFailed) handler.events.get(2)).getContainerId());
    assertEquals(AMSchedulerEventType.S_NODE_BLACKLISTED, handler.events.get(3).getType());
    assertEquals(node.getNodeId(), ((AMSchedulerEventNodeBlacklistUpdate) handler.events.get(3)).getNodeId());
    // Trigger one more node failure, which should cause BLACKLISTING to be disabled
    ContainerId cId4 = mock(ContainerId.class);
    ContainerId cId5 = mock(ContainerId.class);
    TezTaskAttemptID ta4 = mock(TezTaskAttemptID.class);
    TezTaskAttemptID ta5 = mock(TezTaskAttemptID.class);
    AMNodeImpl node2 = (AMNodeImpl) amNodeTracker.get(nodeId2, schedulerId);
    amNodeTracker.handle(new AMNodeEventContainerAllocated(nodeId2, schedulerId, cId4));
    amNodeTracker.handle(new AMNodeEventContainerAllocated(nodeId2, schedulerId, cId5));
    amNodeTracker.handle(new AMNodeEventTaskAttemptEnded(nodeId2, schedulerId, cId4, ta4, true));
    assertEquals(1, node2.numFailedTAs);
    assertEquals(AMNodeState.ACTIVE, node2.getState());
    handler.events.clear();
    amNodeTracker.handle(new AMNodeEventTaskAttemptEnded(nodeId2, schedulerId, cId5, ta5, true));
    dispatcher.await();
    assertEquals(2, node2.numFailedTAs);
    assertEquals(AMNodeState.FORCED_ACTIVE, node2.getState());
    AMNodeImpl node3 = (AMNodeImpl) amNodeTracker.get(nodeId3, schedulerId);
    assertEquals(AMNodeState.FORCED_ACTIVE, node3.getState());
    assertEquals(5, handler.events.size());
    // Blacklisting Disabled, the node causing this will not be blacklisted. The single node that
    // was blacklisted will be unblacklisted.
    int numIgnoreBlacklistingEnabledEvents = 0;
    int numUnblacklistedEvents = 0;
    for (Event event : handler.events) {
        if (event.getType() == AMNodeEventType.N_IGNORE_BLACKLISTING_ENABLED) {
            numIgnoreBlacklistingEnabledEvents++;
        } else if (event.getType() == AMSchedulerEventType.S_NODE_UNBLACKLISTED) {
            numUnblacklistedEvents++;
        } else {
            fail("Unexpected event of type: " + event.getType());
        }
    }
    assertEquals(4, numIgnoreBlacklistingEnabledEvents);
    assertEquals(1, numUnblacklistedEvents);
    // drain all previous events
    dispatcher.await();
    // Increase the number of nodes. BLACKLISTING should be re-enabled.
    // Node 1 and Node 2 should go into BLACKLISTED state
    handler.events.clear();
    amNodeTracker.handle(new AMNodeEventNodeCountUpdated(8, schedulerId));
    dispatcher.await();
    LOG.info(("Completed waiting for dispatcher to process all pending events"));
    assertEquals(AMNodeState.BLACKLISTED, node.getState());
    assertEquals(AMNodeState.BLACKLISTED, node2.getState());
    assertEquals(AMNodeState.ACTIVE, node3.getState());
    assertEquals(8, handler.events.size());
    int index = 0;
    int numIgnoreBlacklistingDisabledEvents = 0;
    int numBlacklistedEvents = 0;
    int numNodeFailedEvents = 0;
    for (Event event : handler.events) {
        LOG.info("Logging event: index:" + index++ + " type: " + event.getType());
        if (event.getType() == AMNodeEventType.N_IGNORE_BLACKLISTING_DISABLED) {
            numIgnoreBlacklistingDisabledEvents++;
        } else if (event.getType() == AMSchedulerEventType.S_NODE_BLACKLISTED) {
            numBlacklistedEvents++;
        } else if (event.getType() == AMContainerEventType.C_NODE_FAILED) {
            numNodeFailedEvents++;
            // Node2 is now blacklisted so the container's will be informed
            assertTrue(((AMContainerEventNodeFailed) event).getContainerId() == cId4 || ((AMContainerEventNodeFailed) event).getContainerId() == cId5);
        } else {
            fail("Unexpected event of type: " + event.getType());
        }
    }
    assertEquals(4, numIgnoreBlacklistingDisabledEvents);
    assertEquals(2, numBlacklistedEvents);
    assertEquals(2, numNodeFailedEvents);
    amNodeTracker.stop();
}
Also used : ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) NodeId(org.apache.hadoop.yarn.api.records.NodeId) Event(org.apache.hadoop.yarn.event.Event) TezTaskAttemptID(org.apache.tez.dag.records.TezTaskAttemptID)

Example 92 with Event

use of org.apache.hadoop.yarn.event.Event in project tez by apache.

the class AsyncDispatcher method createThread.

public Runnable createThread() {
    return new Runnable() {

        @Override
        public void run() {
            while (!stopped && !Thread.currentThread().isInterrupted()) {
                drained = eventQueue.isEmpty();
                // and calling notify every time in the normal run of the loop.
                if (blockNewEvents) {
                    synchronized (waitForDrained) {
                        if (drained) {
                            waitForDrained.notify();
                        }
                    }
                }
                Event event;
                try {
                    event = eventQueue.take();
                } catch (InterruptedException ie) {
                    if (!stopped) {
                        LOG.warn("AsyncDispatcher thread interrupted", ie);
                    }
                    return;
                }
                if (event != null) {
                    dispatch(event);
                }
            }
        }
    };
}
Also used : Event(org.apache.hadoop.yarn.event.Event)

Example 93 with Event

use of org.apache.hadoop.yarn.event.Event in project tez by apache.

the class DrainDispatcher method getEventHandler.

@SuppressWarnings("unchecked")
@Override
public EventHandler getEventHandler() {
    final EventHandler actual = super.getEventHandler();
    return new EventHandler() {

        @Override
        public void handle(Event event) {
            synchronized (mutex) {
                actual.handle(event);
                drained = false;
            }
        }
    };
}
Also used : EventHandler(org.apache.hadoop.yarn.event.EventHandler) Event(org.apache.hadoop.yarn.event.Event)

Example 94 with Event

use of org.apache.hadoop.yarn.event.Event in project tez by apache.

the class AsyncDispatcherConcurrent method serviceStart.

@Override
protected void serviceStart() throws Exception {
    execService = Executors.newFixedThreadPool(numThreads, new ThreadFactoryBuilder().setDaemon(true).setNameFormat("Dispatcher {" + this.name + "} #%d").build());
    for (int i = 0; i < numThreads; ++i) {
        eventQueues.add(new LinkedBlockingQueue<Event>());
    }
    for (int i = 0; i < numThreads; ++i) {
        execService.execute(new DispatchRunner(eventQueues.get(i)));
    }
    // start all the components
    super.serviceStart();
}
Also used : ThreadFactoryBuilder(com.google.common.util.concurrent.ThreadFactoryBuilder) Event(org.apache.hadoop.yarn.event.Event)

Aggregations

Event (org.apache.hadoop.yarn.event.Event)94 Test (org.junit.Test)79 Configuration (org.apache.hadoop.conf.Configuration)46 DAGHistoryEvent (org.apache.tez.dag.history.DAGHistoryEvent)35 ContainerId (org.apache.hadoop.yarn.api.records.ContainerId)32 TezEvent (org.apache.tez.runtime.api.impl.TezEvent)29 TaskAttemptEvent (org.apache.tez.dag.app.dag.event.TaskAttemptEvent)26 InputReadErrorEvent (org.apache.tez.runtime.api.events.InputReadErrorEvent)26 EventHandler (org.apache.hadoop.yarn.event.EventHandler)25 ApplicationId (org.apache.hadoop.yarn.api.records.ApplicationId)24 TezTaskAttemptID (org.apache.tez.dag.records.TezTaskAttemptID)24 TezConfiguration (org.apache.tez.dag.api.TezConfiguration)23 TaskEvent (org.apache.tez.dag.app.dag.event.TaskEvent)23 SystemClock (org.apache.hadoop.yarn.util.SystemClock)21 TezTaskID (org.apache.tez.dag.records.TezTaskID)21 TaskStatusUpdateEvent (org.apache.tez.runtime.api.events.TaskStatusUpdateEvent)21 Resource (org.apache.hadoop.yarn.api.records.Resource)19 ApplicationAttemptId (org.apache.hadoop.yarn.api.records.ApplicationAttemptId)18 Dispatcher (org.apache.hadoop.yarn.event.Dispatcher)18 TaskCommunicatorManagerInterface (org.apache.tez.dag.app.TaskCommunicatorManagerInterface)18