Search in sources :

Example 1 with JobEvent

use of org.apache.hadoop.mapreduce.v2.app.job.event.JobEvent in project hadoop by apache.

the class MRAppMaster method serviceStart.

@SuppressWarnings("unchecked")
@Override
protected void serviceStart() throws Exception {
    amInfos = new LinkedList<AMInfo>();
    completedTasksFromPreviousRun = new HashMap<TaskId, TaskInfo>();
    processRecovery();
    // Current an AMInfo for the current AM generation.
    AMInfo amInfo = MRBuilderUtils.newAMInfo(appAttemptID, startTime, containerID, nmHost, nmPort, nmHttpPort);
    // /////////////////// Create the job itself.
    job = createJob(getConfig(), forcedState, shutDownMessage);
    // Send out an MR AM inited event for all previous AMs.
    for (AMInfo info : amInfos) {
        dispatcher.getEventHandler().handle(new JobHistoryEvent(job.getID(), new AMStartedEvent(info.getAppAttemptId(), info.getStartTime(), info.getContainerId(), info.getNodeManagerHost(), info.getNodeManagerPort(), info.getNodeManagerHttpPort(), appSubmitTime)));
    }
    // Send out an MR AM inited event for this AM.
    dispatcher.getEventHandler().handle(new JobHistoryEvent(job.getID(), new AMStartedEvent(amInfo.getAppAttemptId(), amInfo.getStartTime(), amInfo.getContainerId(), amInfo.getNodeManagerHost(), amInfo.getNodeManagerPort(), amInfo.getNodeManagerHttpPort(), this.forcedState == null ? null : this.forcedState.toString(), appSubmitTime)));
    amInfos.add(amInfo);
    // metrics system init is really init & start.
    // It's more test friendly to put it here.
    DefaultMetricsSystem.initialize("MRAppMaster");
    boolean initFailed = false;
    if (!errorHappenedShutDown) {
        // create a job event for job intialization
        JobEvent initJobEvent = new JobEvent(job.getID(), JobEventType.JOB_INIT);
        // Send init to the job (this does NOT trigger job execution)
        // This is a synchronous call, not an event through dispatcher. We want
        // job-init to be done completely here.
        jobEventDispatcher.handle(initJobEvent);
        // If job is still not initialized, an error happened during
        // initialization. Must complete starting all of the services so failure
        // events can be processed.
        initFailed = (((JobImpl) job).getInternalState() != JobStateInternal.INITED);
        if (job.isUber()) {
            speculatorEventDispatcher.disableSpeculation();
            LOG.info("MRAppMaster uberizing job " + job.getID() + " in local container (\"uber-AM\") on node " + nmHost + ":" + nmPort + ".");
        } else {
            // send init to speculator only for non-uber jobs. 
            // This won't yet start as dispatcher isn't started yet.
            dispatcher.getEventHandler().handle(new SpeculatorEvent(job.getID(), clock.getTime()));
            LOG.info("MRAppMaster launching normal, non-uberized, multi-container " + "job " + job.getID() + ".");
        }
        // Start ClientService here, since it's not initialized if
        // errorHappenedShutDown is true
        clientService.start();
    }
    //start all the components
    super.serviceStart();
    // finally set the job classloader
    MRApps.setClassLoader(jobClassLoader, getConfig());
    // set job classloader if configured
    Limits.init(getConfig());
    if (initFailed) {
        JobEvent initFailedEvent = new JobEvent(job.getID(), JobEventType.JOB_INIT_FAILED);
        jobEventDispatcher.handle(initFailedEvent);
    } else {
        // All components have started, start the job.
        startJobs();
    }
}
Also used : AMInfo(org.apache.hadoop.mapreduce.v2.api.records.AMInfo) TaskInfo(org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser.TaskInfo) AMStartedEvent(org.apache.hadoop.mapreduce.jobhistory.AMStartedEvent) TaskId(org.apache.hadoop.mapreduce.v2.api.records.TaskId) JobEvent(org.apache.hadoop.mapreduce.v2.app.job.event.JobEvent) SpeculatorEvent(org.apache.hadoop.mapreduce.v2.app.speculate.SpeculatorEvent) JobHistoryEvent(org.apache.hadoop.mapreduce.jobhistory.JobHistoryEvent)

Example 2 with JobEvent

use of org.apache.hadoop.mapreduce.v2.app.job.event.JobEvent in project hadoop by apache.

the class TestJobEndNotifier method testNotificationOnLastRetryUnregistrationFailure.

@Test
public void testNotificationOnLastRetryUnregistrationFailure() throws Exception {
    HttpServer2 server = startHttpServer();
    MRApp app = spy(new MRAppWithCustomContainerAllocator(2, 2, false, this.getClass().getName(), true, 2, false));
    // Currently, we will have isLastRetry always equals to false at beginning
    // of MRAppMaster, except staging area exists or commit already started at 
    // the beginning.
    // Now manually set isLastRetry to true and this should reset to false when
    // unregister failed.
    app.isLastAMRetry = true;
    doNothing().when(app).sysexit();
    JobConf conf = new JobConf();
    conf.set(JobContext.MR_JOB_END_NOTIFICATION_URL, JobEndServlet.baseUrl + "jobend?jobid=$jobId&status=$jobStatus");
    JobImpl job = (JobImpl) app.submit(conf);
    app.waitForState(job, JobState.RUNNING);
    app.getContext().getEventHandler().handle(new JobEvent(app.getJobId(), JobEventType.JOB_AM_REBOOT));
    app.waitForInternalState(job, JobStateInternal.REBOOT);
    // Now shutdown. User should see FAILED state.
    // Unregistration fails: isLastAMRetry is recalculated, this is
    ///reboot will stop service internally, we don't need to shutdown twice
    app.waitForServiceToStop(10000);
    Assert.assertFalse(app.isLastAMRetry());
    // Since it's not last retry, JobEndServlet didn't called
    Assert.assertEquals(0, JobEndServlet.calledTimes);
    Assert.assertNull(JobEndServlet.requestUri);
    Assert.assertNull(JobEndServlet.foundJobState);
    server.stop();
}
Also used : JobImpl(org.apache.hadoop.mapreduce.v2.app.job.impl.JobImpl) JobEvent(org.apache.hadoop.mapreduce.v2.app.job.event.JobEvent) HttpServer2(org.apache.hadoop.http.HttpServer2) JobConf(org.apache.hadoop.mapred.JobConf) Test(org.junit.Test)

Example 3 with JobEvent

use of org.apache.hadoop.mapreduce.v2.app.job.event.JobEvent in project hadoop by apache.

the class TestKill method testKillTaskWait.

@Test
public void testKillTaskWait() throws Exception {
    final Dispatcher dispatcher = new AsyncDispatcher() {

        private TaskAttemptEvent cachedKillEvent;

        @Override
        protected void dispatch(Event event) {
            if (event instanceof TaskAttemptEvent) {
                TaskAttemptEvent killEvent = (TaskAttemptEvent) event;
                if (killEvent.getType() == TaskAttemptEventType.TA_KILL) {
                    TaskAttemptId taID = killEvent.getTaskAttemptID();
                    if (taID.getTaskId().getTaskType() == TaskType.REDUCE && taID.getTaskId().getId() == 0 && taID.getId() == 0) {
                        // Task is asking the reduce TA to kill itself. 'Create' a race
                        // condition. Make the task succeed and then inform the task that
                        // TA has succeeded. Once Task gets the TA succeeded event at
                        // KILL_WAIT, then relay the actual kill signal to TA
                        super.dispatch(new TaskAttemptEvent(taID, TaskAttemptEventType.TA_DONE));
                        super.dispatch(new TaskAttemptEvent(taID, TaskAttemptEventType.TA_CONTAINER_COMPLETED));
                        super.dispatch(new TaskTAttemptEvent(taID, TaskEventType.T_ATTEMPT_SUCCEEDED));
                        this.cachedKillEvent = killEvent;
                        return;
                    }
                }
            } else if (event instanceof TaskEvent) {
                TaskEvent taskEvent = (TaskEvent) event;
                if (taskEvent.getType() == TaskEventType.T_ATTEMPT_SUCCEEDED && this.cachedKillEvent != null) {
                    // When the TA comes and reports that it is done, send the
                    // cachedKillEvent
                    super.dispatch(this.cachedKillEvent);
                    return;
                }
            }
            super.dispatch(event);
        }
    };
    MRApp app = new MRApp(1, 1, false, this.getClass().getName(), true) {

        @Override
        public Dispatcher createDispatcher() {
            return dispatcher;
        }
    };
    Job job = app.submit(new Configuration());
    JobId jobId = app.getJobId();
    app.waitForState(job, JobState.RUNNING);
    Assert.assertEquals("Num tasks not correct", 2, job.getTasks().size());
    Iterator<Task> it = job.getTasks().values().iterator();
    Task mapTask = it.next();
    Task reduceTask = it.next();
    app.waitForState(mapTask, TaskState.RUNNING);
    app.waitForState(reduceTask, TaskState.RUNNING);
    TaskAttempt mapAttempt = mapTask.getAttempts().values().iterator().next();
    app.waitForState(mapAttempt, TaskAttemptState.RUNNING);
    TaskAttempt reduceAttempt = reduceTask.getAttempts().values().iterator().next();
    app.waitForState(reduceAttempt, TaskAttemptState.RUNNING);
    // Finish map
    app.getContext().getEventHandler().handle(new TaskAttemptEvent(mapAttempt.getID(), TaskAttemptEventType.TA_DONE));
    app.waitForState(mapTask, TaskState.SUCCEEDED);
    // Now kill the job
    app.getContext().getEventHandler().handle(new JobEvent(jobId, JobEventType.JOB_KILL));
    app.waitForInternalState((JobImpl) job, JobStateInternal.KILLED);
}
Also used : Task(org.apache.hadoop.mapreduce.v2.app.job.Task) Configuration(org.apache.hadoop.conf.Configuration) TaskAttemptId(org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId) TaskAttemptEvent(org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptEvent) TaskTAttemptEvent(org.apache.hadoop.mapreduce.v2.app.job.event.TaskTAttemptEvent) Dispatcher(org.apache.hadoop.yarn.event.Dispatcher) AsyncDispatcher(org.apache.hadoop.yarn.event.AsyncDispatcher) AsyncDispatcher(org.apache.hadoop.yarn.event.AsyncDispatcher) JobEvent(org.apache.hadoop.mapreduce.v2.app.job.event.JobEvent) TaskEvent(org.apache.hadoop.mapreduce.v2.app.job.event.TaskEvent) JobEvent(org.apache.hadoop.mapreduce.v2.app.job.event.JobEvent) Event(org.apache.hadoop.yarn.event.Event) TaskTAttemptEvent(org.apache.hadoop.mapreduce.v2.app.job.event.TaskTAttemptEvent) TaskAttemptEvent(org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptEvent) TaskEvent(org.apache.hadoop.mapreduce.v2.app.job.event.TaskEvent) TaskAttempt(org.apache.hadoop.mapreduce.v2.app.job.TaskAttempt) Job(org.apache.hadoop.mapreduce.v2.app.job.Job) JobId(org.apache.hadoop.mapreduce.v2.api.records.JobId) Test(org.junit.Test)

Example 4 with JobEvent

use of org.apache.hadoop.mapreduce.v2.app.job.event.JobEvent in project hadoop by apache.

the class TestKill method testKillJob.

@Test
public void testKillJob() throws Exception {
    final CountDownLatch latch = new CountDownLatch(1);
    MRApp app = new BlockingMRApp(1, 0, latch);
    //this will start the job but job won't complete as task is
    //blocked
    Job job = app.submit(new Configuration());
    //wait and vailidate for Job to become RUNNING
    app.waitForInternalState((JobImpl) job, JobStateInternal.RUNNING);
    //send the kill signal to Job
    app.getContext().getEventHandler().handle(new JobEvent(job.getID(), JobEventType.JOB_KILL));
    //unblock Task
    latch.countDown();
    //wait and validate for Job to be KILLED
    app.waitForState(job, JobState.KILLED);
    // make sure all events are processed. The AM is stopped
    // only when all tasks and task attempts have been killed
    app.waitForState(Service.STATE.STOPPED);
    Map<TaskId, Task> tasks = job.getTasks();
    Assert.assertEquals("No of tasks is not correct", 1, tasks.size());
    Task task = tasks.values().iterator().next();
    Assert.assertEquals("Task state not correct", TaskState.KILLED, task.getReport().getTaskState());
    Map<TaskAttemptId, TaskAttempt> attempts = tasks.values().iterator().next().getAttempts();
    Assert.assertEquals("No of attempts is not correct", 1, attempts.size());
    Iterator<TaskAttempt> it = attempts.values().iterator();
    Assert.assertEquals("Attempt state not correct", TaskAttemptState.KILLED, it.next().getReport().getTaskAttemptState());
}
Also used : Task(org.apache.hadoop.mapreduce.v2.app.job.Task) TaskId(org.apache.hadoop.mapreduce.v2.api.records.TaskId) Configuration(org.apache.hadoop.conf.Configuration) TaskAttemptId(org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId) CountDownLatch(java.util.concurrent.CountDownLatch) JobEvent(org.apache.hadoop.mapreduce.v2.app.job.event.JobEvent) TaskAttempt(org.apache.hadoop.mapreduce.v2.app.job.TaskAttempt) Job(org.apache.hadoop.mapreduce.v2.app.job.Job) Test(org.junit.Test)

Example 5 with JobEvent

use of org.apache.hadoop.mapreduce.v2.app.job.event.JobEvent in project hadoop by apache.

the class TestKill method testKillTaskWaitKillJobAfterTA_DONE.

@Test
public void testKillTaskWaitKillJobAfterTA_DONE() throws Exception {
    CountDownLatch latch = new CountDownLatch(1);
    final Dispatcher dispatcher = new MyAsyncDispatch(latch, TaskAttemptEventType.TA_DONE);
    MRApp app = new MRApp(1, 1, false, this.getClass().getName(), true) {

        @Override
        public Dispatcher createDispatcher() {
            return dispatcher;
        }
    };
    Job job = app.submit(new Configuration());
    JobId jobId = app.getJobId();
    app.waitForState(job, JobState.RUNNING);
    Assert.assertEquals("Num tasks not correct", 2, job.getTasks().size());
    Iterator<Task> it = job.getTasks().values().iterator();
    Task mapTask = it.next();
    Task reduceTask = it.next();
    app.waitForState(mapTask, TaskState.RUNNING);
    app.waitForState(reduceTask, TaskState.RUNNING);
    TaskAttempt mapAttempt = mapTask.getAttempts().values().iterator().next();
    app.waitForState(mapAttempt, TaskAttemptState.RUNNING);
    TaskAttempt reduceAttempt = reduceTask.getAttempts().values().iterator().next();
    app.waitForState(reduceAttempt, TaskAttemptState.RUNNING);
    // The order in the dispatch event queue, from first to last
    // TA_DONE
    // JobEventType.JOB_KILL
    // TaskAttemptEventType.TA_CONTAINER_COMPLETED ( from TA_DONE handling )
    // TaskEventType.T_KILL ( from JobEventType.JOB_KILL handling )
    // TaskEventType.T_ATTEMPT_SUCCEEDED ( from TA_CONTAINER_COMPLETED handling )
    // Finish map
    app.getContext().getEventHandler().handle(new TaskAttemptEvent(mapAttempt.getID(), TaskAttemptEventType.TA_DONE));
    // Now kill the job
    app.getContext().getEventHandler().handle(new JobEvent(jobId, JobEventType.JOB_KILL));
    //unblock
    latch.countDown();
    app.waitForInternalState((JobImpl) job, JobStateInternal.KILLED);
}
Also used : Task(org.apache.hadoop.mapreduce.v2.app.job.Task) Configuration(org.apache.hadoop.conf.Configuration) JobEvent(org.apache.hadoop.mapreduce.v2.app.job.event.JobEvent) TaskAttemptEvent(org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptEvent) TaskAttempt(org.apache.hadoop.mapreduce.v2.app.job.TaskAttempt) CountDownLatch(java.util.concurrent.CountDownLatch) Dispatcher(org.apache.hadoop.yarn.event.Dispatcher) AsyncDispatcher(org.apache.hadoop.yarn.event.AsyncDispatcher) Job(org.apache.hadoop.mapreduce.v2.app.job.Job) JobId(org.apache.hadoop.mapreduce.v2.api.records.JobId) Test(org.junit.Test)

Aggregations

JobEvent (org.apache.hadoop.mapreduce.v2.app.job.event.JobEvent)33 Test (org.junit.Test)21 Configuration (org.apache.hadoop.conf.Configuration)19 JobId (org.apache.hadoop.mapreduce.v2.api.records.JobId)16 AsyncDispatcher (org.apache.hadoop.yarn.event.AsyncDispatcher)13 CommitterEventHandler (org.apache.hadoop.mapreduce.v2.app.commit.CommitterEventHandler)12 OutputCommitter (org.apache.hadoop.mapreduce.OutputCommitter)9 JobStartEvent (org.apache.hadoop.mapreduce.v2.app.job.event.JobStartEvent)9 Job (org.apache.hadoop.mapreduce.v2.app.job.Job)7 Task (org.apache.hadoop.mapreduce.v2.app.job.Task)6 JobDiagnosticsUpdateEvent (org.apache.hadoop.mapreduce.v2.app.job.event.JobDiagnosticsUpdateEvent)6 JobContext (org.apache.hadoop.mapreduce.JobContext)5 AppContext (org.apache.hadoop.mapreduce.v2.app.AppContext)5 IOException (java.io.IOException)4 TaskAttempt (org.apache.hadoop.mapreduce.v2.app.job.TaskAttempt)4 CountDownLatch (java.util.concurrent.CountDownLatch)3 CyclicBarrier (java.util.concurrent.CyclicBarrier)3 JobID (org.apache.hadoop.mapreduce.JobID)3 JobHistoryEvent (org.apache.hadoop.mapreduce.jobhistory.JobHistoryEvent)3 JobTokenSecretManager (org.apache.hadoop.mapreduce.security.token.JobTokenSecretManager)3