Search in sources :

Example 41 with AsyncDispatcher

use of org.apache.hadoop.yarn.event.AsyncDispatcher in project hadoop by apache.

the class TestCommitterEventHandler method testCommitWindow.

@Test
public void testCommitWindow() throws Exception {
    Configuration conf = new Configuration();
    conf.set(MRJobConfig.MR_AM_STAGING_DIR, stagingDir);
    AsyncDispatcher dispatcher = new AsyncDispatcher();
    dispatcher.init(conf);
    dispatcher.start();
    TestingJobEventHandler jeh = new TestingJobEventHandler();
    dispatcher.register(JobEventType.class, jeh);
    SystemClock clock = SystemClock.getInstance();
    AppContext appContext = mock(AppContext.class);
    ApplicationAttemptId attemptid = ApplicationAttemptId.fromString("appattempt_1234567890000_0001_0");
    when(appContext.getApplicationID()).thenReturn(attemptid.getApplicationId());
    when(appContext.getApplicationAttemptId()).thenReturn(attemptid);
    when(appContext.getEventHandler()).thenReturn(dispatcher.getEventHandler());
    when(appContext.getClock()).thenReturn(clock);
    OutputCommitter committer = mock(OutputCommitter.class);
    TestingRMHeartbeatHandler rmhh = new TestingRMHeartbeatHandler();
    CommitterEventHandler ceh = new CommitterEventHandler(appContext, committer, rmhh);
    ceh.init(conf);
    ceh.start();
    // verify trying to commit when RM heartbeats are stale does not commit
    ceh.handle(new CommitterJobCommitEvent(null, null));
    long timeToWaitMs = 5000;
    while (rmhh.getNumCallbacks() != 1 && timeToWaitMs > 0) {
        Thread.sleep(10);
        timeToWaitMs -= 10;
    }
    Assert.assertEquals("committer did not register a heartbeat callback", 1, rmhh.getNumCallbacks());
    verify(committer, never()).commitJob(any(JobContext.class));
    Assert.assertEquals("committer should not have committed", 0, jeh.numCommitCompletedEvents);
    // set a fresh heartbeat and verify commit completes
    rmhh.setLastHeartbeatTime(clock.getTime());
    timeToWaitMs = 5000;
    while (jeh.numCommitCompletedEvents != 1 && timeToWaitMs > 0) {
        Thread.sleep(10);
        timeToWaitMs -= 10;
    }
    Assert.assertEquals("committer did not complete commit after RM hearbeat", 1, jeh.numCommitCompletedEvents);
    verify(committer, times(1)).commitJob(any(JobContext.class));
    //Clean up so we can try to commit again (Don't do this at home)
    cleanup();
    // try to commit again and verify it goes through since the heartbeat
    // is still fresh
    ceh.handle(new CommitterJobCommitEvent(null, null));
    timeToWaitMs = 5000;
    while (jeh.numCommitCompletedEvents != 2 && timeToWaitMs > 0) {
        Thread.sleep(10);
        timeToWaitMs -= 10;
    }
    Assert.assertEquals("committer did not commit", 2, jeh.numCommitCompletedEvents);
    verify(committer, times(2)).commitJob(any(JobContext.class));
    ceh.stop();
    dispatcher.stop();
}
Also used : OutputCommitter(org.apache.hadoop.mapreduce.OutputCommitter) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) Configuration(org.apache.hadoop.conf.Configuration) AsyncDispatcher(org.apache.hadoop.yarn.event.AsyncDispatcher) SystemClock(org.apache.hadoop.yarn.util.SystemClock) AppContext(org.apache.hadoop.mapreduce.v2.app.AppContext) ApplicationAttemptId(org.apache.hadoop.yarn.api.records.ApplicationAttemptId) JobContext(org.apache.hadoop.mapreduce.JobContext) Test(org.junit.Test)

Example 42 with AsyncDispatcher

use of org.apache.hadoop.yarn.event.AsyncDispatcher in project hadoop by apache.

the class TestJobImpl method testKilledDuringSetup.

@Test(timeout = 20000)
public void testKilledDuringSetup() throws Exception {
    Configuration conf = new Configuration();
    conf.set(MRJobConfig.MR_AM_STAGING_DIR, stagingDir);
    AsyncDispatcher dispatcher = new AsyncDispatcher();
    dispatcher.init(conf);
    dispatcher.start();
    OutputCommitter committer = new StubbedOutputCommitter() {

        @Override
        public synchronized void setupJob(JobContext jobContext) throws IOException {
            while (!Thread.interrupted()) {
                try {
                    wait();
                } catch (InterruptedException e) {
                }
            }
        }
    };
    CommitterEventHandler commitHandler = createCommitterEventHandler(dispatcher, committer);
    commitHandler.init(conf);
    commitHandler.start();
    JobImpl job = createStubbedJob(conf, dispatcher, 2, null);
    JobId jobId = job.getID();
    job.handle(new JobEvent(jobId, JobEventType.JOB_INIT));
    assertJobState(job, JobStateInternal.INITED);
    job.handle(new JobStartEvent(jobId));
    assertJobState(job, JobStateInternal.SETUP);
    job.handle(new JobEvent(job.getID(), JobEventType.JOB_KILL));
    assertJobState(job, JobStateInternal.KILLED);
    dispatcher.stop();
    commitHandler.stop();
}
Also used : OutputCommitter(org.apache.hadoop.mapreduce.OutputCommitter) Configuration(org.apache.hadoop.conf.Configuration) AsyncDispatcher(org.apache.hadoop.yarn.event.AsyncDispatcher) JobEvent(org.apache.hadoop.mapreduce.v2.app.job.event.JobEvent) JobStartEvent(org.apache.hadoop.mapreduce.v2.app.job.event.JobStartEvent) CommitterEventHandler(org.apache.hadoop.mapreduce.v2.app.commit.CommitterEventHandler) JobContext(org.apache.hadoop.mapreduce.JobContext) JobId(org.apache.hadoop.mapreduce.v2.api.records.JobId) Test(org.junit.Test)

Example 43 with AsyncDispatcher

use of org.apache.hadoop.yarn.event.AsyncDispatcher in project hadoop by apache.

the class TestMRApp method testUpdatedNodes.

/**
   * The test verifies that the AM re-runs maps that have run on bad nodes. It
   * also verifies that the AM records all success/killed events so that reduces
   * are notified about map output status changes. It also verifies that the
   * re-run information is preserved across AM restart
   */
@Test
public void testUpdatedNodes() throws Exception {
    int runCount = 0;
    Dispatcher disp = Mockito.spy(new AsyncDispatcher());
    MRApp app = new MRAppWithHistory(2, 2, false, this.getClass().getName(), true, ++runCount, disp);
    Configuration conf = new Configuration();
    // after half of the map completion, reduce will start
    conf.setFloat(MRJobConfig.COMPLETED_MAPS_FOR_REDUCE_SLOWSTART, 0.5f);
    // uberization forces full slowstart (1.0), so disable that
    conf.setBoolean(MRJobConfig.JOB_UBERTASK_ENABLE, false);
    ContainerAllocEventHandler handler = new ContainerAllocEventHandler();
    disp.register(ContainerAllocator.EventType.class, handler);
    final Job job1 = app.submit(conf);
    app.waitForState(job1, JobState.RUNNING);
    Assert.assertEquals("Num tasks not correct", 4, job1.getTasks().size());
    Iterator<Task> it = job1.getTasks().values().iterator();
    Task mapTask1 = it.next();
    Task mapTask2 = it.next();
    // all maps must be running
    app.waitForState(mapTask1, TaskState.RUNNING);
    app.waitForState(mapTask2, TaskState.RUNNING);
    TaskAttempt task1Attempt = mapTask1.getAttempts().values().iterator().next();
    TaskAttempt task2Attempt = mapTask2.getAttempts().values().iterator().next();
    NodeId node1 = task1Attempt.getNodeId();
    NodeId node2 = task2Attempt.getNodeId();
    Assert.assertEquals(node1, node2);
    // send the done signal to the task
    app.getContext().getEventHandler().handle(new TaskAttemptEvent(task1Attempt.getID(), TaskAttemptEventType.TA_DONE));
    app.getContext().getEventHandler().handle(new TaskAttemptEvent(task2Attempt.getID(), TaskAttemptEventType.TA_DONE));
    // all maps must be succeeded
    app.waitForState(mapTask1, TaskState.SUCCEEDED);
    app.waitForState(mapTask2, TaskState.SUCCEEDED);
    final int checkIntervalMillis = 100;
    final int waitForMillis = 800;
    waitFor(new Supplier<Boolean>() {

        @Override
        public Boolean get() {
            TaskAttemptCompletionEvent[] events = job1.getTaskAttemptCompletionEvents(0, 100);
            return events.length == 2;
        }
    }, checkIntervalMillis, waitForMillis);
    TaskAttemptCompletionEvent[] events = job1.getTaskAttemptCompletionEvents(0, 100);
    Assert.assertEquals("Expecting 2 completion events for success", 2, events.length);
    // send updated nodes info
    ArrayList<NodeReport> updatedNodes = new ArrayList<NodeReport>();
    NodeReport nr = RecordFactoryProvider.getRecordFactory(null).newRecordInstance(NodeReport.class);
    nr.setNodeId(node1);
    nr.setNodeState(NodeState.UNHEALTHY);
    updatedNodes.add(nr);
    app.getContext().getEventHandler().handle(new JobUpdatedNodesEvent(job1.getID(), updatedNodes));
    app.waitForState(task1Attempt, TaskAttemptState.KILLED);
    app.waitForState(task2Attempt, TaskAttemptState.KILLED);
    waitFor(new Supplier<Boolean>() {

        @Override
        public Boolean get() {
            TaskAttemptCompletionEvent[] events = job1.getTaskAttemptCompletionEvents(0, 100);
            return events.length == 4;
        }
    }, checkIntervalMillis, waitForMillis);
    events = job1.getTaskAttemptCompletionEvents(0, 100);
    Assert.assertEquals("Expecting 2 more completion events for killed", 4, events.length);
    // 2 map task attempts which were killed above should be requested from
    // container allocator with the previous map task marked as failed. If
    // this happens allocator will request the container for this mapper from
    // RM at a higher priority of 5(i.e. with a priority equivalent to that of
    // a fail fast map).
    handler.waitForFailedMapContainerReqEvents(2);
    // all maps must be back to running
    app.waitForState(mapTask1, TaskState.RUNNING);
    app.waitForState(mapTask2, TaskState.RUNNING);
    Iterator<TaskAttempt> itr = mapTask1.getAttempts().values().iterator();
    itr.next();
    task1Attempt = itr.next();
    // send the done signal to the task
    app.getContext().getEventHandler().handle(new TaskAttemptEvent(task1Attempt.getID(), TaskAttemptEventType.TA_DONE));
    // map1 must be succeeded. map2 must be running
    app.waitForState(mapTask1, TaskState.SUCCEEDED);
    app.waitForState(mapTask2, TaskState.RUNNING);
    waitFor(new Supplier<Boolean>() {

        @Override
        public Boolean get() {
            TaskAttemptCompletionEvent[] events = job1.getTaskAttemptCompletionEvents(0, 100);
            return events.length == 5;
        }
    }, checkIntervalMillis, waitForMillis);
    events = job1.getTaskAttemptCompletionEvents(0, 100);
    Assert.assertEquals("Expecting 1 more completion events for success", 5, events.length);
    // Crash the app again.
    app.stop();
    // rerun
    // in rerun the 1st map will be recovered from previous run
    app = new MRAppWithHistory(2, 2, false, this.getClass().getName(), false, ++runCount, (Dispatcher) new AsyncDispatcher());
    conf = new Configuration();
    conf.setBoolean(MRJobConfig.MR_AM_JOB_RECOVERY_ENABLE, true);
    conf.setBoolean(MRJobConfig.JOB_UBERTASK_ENABLE, false);
    final Job job2 = app.submit(conf);
    app.waitForState(job2, JobState.RUNNING);
    Assert.assertEquals("No of tasks not correct", 4, job2.getTasks().size());
    it = job2.getTasks().values().iterator();
    mapTask1 = it.next();
    mapTask2 = it.next();
    Task reduceTask1 = it.next();
    Task reduceTask2 = it.next();
    // map 1 will be recovered, no need to send done
    app.waitForState(mapTask1, TaskState.SUCCEEDED);
    app.waitForState(mapTask2, TaskState.RUNNING);
    waitFor(new Supplier<Boolean>() {

        @Override
        public Boolean get() {
            TaskAttemptCompletionEvent[] events = job2.getTaskAttemptCompletionEvents(0, 100);
            return events.length == 2;
        }
    }, checkIntervalMillis, waitForMillis);
    events = job2.getTaskAttemptCompletionEvents(0, 100);
    Assert.assertEquals("Expecting 2 completion events for killed & success of map1", 2, events.length);
    task2Attempt = mapTask2.getAttempts().values().iterator().next();
    app.getContext().getEventHandler().handle(new TaskAttemptEvent(task2Attempt.getID(), TaskAttemptEventType.TA_DONE));
    app.waitForState(mapTask2, TaskState.SUCCEEDED);
    waitFor(new Supplier<Boolean>() {

        @Override
        public Boolean get() {
            TaskAttemptCompletionEvent[] events = job2.getTaskAttemptCompletionEvents(0, 100);
            return events.length == 3;
        }
    }, checkIntervalMillis, waitForMillis);
    events = job2.getTaskAttemptCompletionEvents(0, 100);
    Assert.assertEquals("Expecting 1 more completion events for success", 3, events.length);
    app.waitForState(reduceTask1, TaskState.RUNNING);
    app.waitForState(reduceTask2, TaskState.RUNNING);
    TaskAttempt task3Attempt = reduceTask1.getAttempts().values().iterator().next();
    app.getContext().getEventHandler().handle(new TaskAttemptEvent(task3Attempt.getID(), TaskAttemptEventType.TA_DONE));
    app.waitForState(reduceTask1, TaskState.SUCCEEDED);
    app.getContext().getEventHandler().handle(new TaskAttemptEvent(task3Attempt.getID(), TaskAttemptEventType.TA_KILL));
    app.waitForState(reduceTask1, TaskState.SUCCEEDED);
    TaskAttempt task4Attempt = reduceTask2.getAttempts().values().iterator().next();
    app.getContext().getEventHandler().handle(new TaskAttemptEvent(task4Attempt.getID(), TaskAttemptEventType.TA_DONE));
    app.waitForState(reduceTask2, TaskState.SUCCEEDED);
    waitFor(new Supplier<Boolean>() {

        @Override
        public Boolean get() {
            TaskAttemptCompletionEvent[] events = job2.getTaskAttemptCompletionEvents(0, 100);
            return events.length == 5;
        }
    }, checkIntervalMillis, waitForMillis);
    events = job2.getTaskAttemptCompletionEvents(0, 100);
    Assert.assertEquals("Expecting 2 more completion events for reduce success", 5, events.length);
    // job succeeds
    app.waitForState(job2, JobState.SUCCEEDED);
}
Also used : Task(org.apache.hadoop.mapreduce.v2.app.job.Task) Configuration(org.apache.hadoop.conf.Configuration) ArrayList(java.util.ArrayList) TaskAttemptEvent(org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptEvent) AsyncDispatcher(org.apache.hadoop.yarn.event.AsyncDispatcher) Dispatcher(org.apache.hadoop.yarn.event.Dispatcher) TaskAttemptCompletionEvent(org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptCompletionEvent) ContainerAllocator(org.apache.hadoop.mapreduce.v2.app.rm.ContainerAllocator) AsyncDispatcher(org.apache.hadoop.yarn.event.AsyncDispatcher) NodeId(org.apache.hadoop.yarn.api.records.NodeId) TaskAttempt(org.apache.hadoop.mapreduce.v2.app.job.TaskAttempt) Job(org.apache.hadoop.mapreduce.v2.app.job.Job) JobUpdatedNodesEvent(org.apache.hadoop.mapreduce.v2.app.job.event.JobUpdatedNodesEvent) NodeReport(org.apache.hadoop.yarn.api.records.NodeReport) Test(org.junit.Test)

Example 44 with AsyncDispatcher

use of org.apache.hadoop.yarn.event.AsyncDispatcher in project hadoop by apache.

the class TestJobImpl method testRebootedDuringCommit.

@Test(timeout = 20000)
public void testRebootedDuringCommit() throws Exception {
    Configuration conf = new Configuration();
    conf.set(MRJobConfig.MR_AM_STAGING_DIR, stagingDir);
    conf.setInt(MRJobConfig.MR_AM_MAX_ATTEMPTS, 2);
    AsyncDispatcher dispatcher = new AsyncDispatcher();
    dispatcher.init(conf);
    dispatcher.start();
    CyclicBarrier syncBarrier = new CyclicBarrier(2);
    OutputCommitter committer = new WaitingOutputCommitter(syncBarrier, true);
    CommitterEventHandler commitHandler = createCommitterEventHandler(dispatcher, committer);
    commitHandler.init(conf);
    commitHandler.start();
    AppContext mockContext = mock(AppContext.class);
    when(mockContext.isLastAMRetry()).thenReturn(true);
    when(mockContext.hasSuccessfullyUnregistered()).thenReturn(false);
    JobImpl job = createRunningStubbedJob(conf, dispatcher, 2, mockContext);
    completeJobTasks(job);
    assertJobState(job, JobStateInternal.COMMITTING);
    syncBarrier.await();
    job.handle(new JobEvent(job.getID(), JobEventType.JOB_AM_REBOOT));
    assertJobState(job, JobStateInternal.REBOOT);
    // return the external state as ERROR since this is last retry.
    Assert.assertEquals(JobState.RUNNING, job.getState());
    when(mockContext.hasSuccessfullyUnregistered()).thenReturn(true);
    Assert.assertEquals(JobState.ERROR, job.getState());
    dispatcher.stop();
    commitHandler.stop();
}
Also used : OutputCommitter(org.apache.hadoop.mapreduce.OutputCommitter) Configuration(org.apache.hadoop.conf.Configuration) AsyncDispatcher(org.apache.hadoop.yarn.event.AsyncDispatcher) JobEvent(org.apache.hadoop.mapreduce.v2.app.job.event.JobEvent) AppContext(org.apache.hadoop.mapreduce.v2.app.AppContext) CommitterEventHandler(org.apache.hadoop.mapreduce.v2.app.commit.CommitterEventHandler) CyclicBarrier(java.util.concurrent.CyclicBarrier) Test(org.junit.Test)

Example 45 with AsyncDispatcher

use of org.apache.hadoop.yarn.event.AsyncDispatcher in project hadoop by apache.

the class TestJobImpl method testJobPriorityUpdate.

@Test
public void testJobPriorityUpdate() throws Exception {
    Configuration conf = new Configuration();
    AsyncDispatcher dispatcher = new AsyncDispatcher();
    Priority submittedPriority = Priority.newInstance(5);
    AppContext mockContext = mock(AppContext.class);
    when(mockContext.hasSuccessfullyUnregistered()).thenReturn(false);
    JobImpl job = createStubbedJob(conf, dispatcher, 2, mockContext);
    JobId jobId = job.getID();
    job.handle(new JobEvent(jobId, JobEventType.JOB_INIT));
    assertJobState(job, JobStateInternal.INITED);
    job.handle(new JobStartEvent(jobId));
    assertJobState(job, JobStateInternal.SETUP);
    // Update priority of job to 5, and it will be updated
    job.setJobPriority(submittedPriority);
    Assert.assertEquals(submittedPriority, job.getReport().getJobPriority());
    job.handle(new JobSetupCompletedEvent(jobId));
    assertJobState(job, JobStateInternal.RUNNING);
    // Update priority of job to 8, and see whether its updated
    Priority updatedPriority = Priority.newInstance(8);
    job.setJobPriority(updatedPriority);
    assertJobState(job, JobStateInternal.RUNNING);
    Priority jobPriority = job.getReport().getJobPriority();
    Assert.assertNotNull(jobPriority);
    // Verify whether changed priority is same as what is set in Job.
    Assert.assertEquals(updatedPriority, jobPriority);
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) AsyncDispatcher(org.apache.hadoop.yarn.event.AsyncDispatcher) JobEvent(org.apache.hadoop.mapreduce.v2.app.job.event.JobEvent) JobSetupCompletedEvent(org.apache.hadoop.mapreduce.v2.app.job.event.JobSetupCompletedEvent) Priority(org.apache.hadoop.yarn.api.records.Priority) JobStartEvent(org.apache.hadoop.mapreduce.v2.app.job.event.JobStartEvent) AppContext(org.apache.hadoop.mapreduce.v2.app.AppContext) JobId(org.apache.hadoop.mapreduce.v2.api.records.JobId) Test(org.junit.Test)

Aggregations

AsyncDispatcher (org.apache.hadoop.yarn.event.AsyncDispatcher)51 Test (org.junit.Test)32 Configuration (org.apache.hadoop.conf.Configuration)28 YarnConfiguration (org.apache.hadoop.yarn.conf.YarnConfiguration)20 ApplicationAttemptId (org.apache.hadoop.yarn.api.records.ApplicationAttemptId)13 Dispatcher (org.apache.hadoop.yarn.event.Dispatcher)12 OutputCommitter (org.apache.hadoop.mapreduce.OutputCommitter)11 JobEvent (org.apache.hadoop.mapreduce.v2.app.job.event.JobEvent)11 CommitterEventHandler (org.apache.hadoop.mapreduce.v2.app.commit.CommitterEventHandler)10 ApplicationId (org.apache.hadoop.yarn.api.records.ApplicationId)9 RMContext (org.apache.hadoop.yarn.server.resourcemanager.RMContext)9 NodeAddedSchedulerEvent (org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeAddedSchedulerEvent)8 JobId (org.apache.hadoop.mapreduce.v2.api.records.JobId)7 JobStartEvent (org.apache.hadoop.mapreduce.v2.app.job.event.JobStartEvent)7 ContainerId (org.apache.hadoop.yarn.api.records.ContainerId)7 RMNode (org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode)7 Before (org.junit.Before)7 ArrayList (java.util.ArrayList)6 JobContext (org.apache.hadoop.mapreduce.JobContext)6 LocalDirsHandlerService (org.apache.hadoop.yarn.server.nodemanager.LocalDirsHandlerService)6