Search in sources :

Example 86 with Job

use of org.apache.hadoop.mapreduce.v2.app.job.Job in project hadoop by apache.

the class TestKill method testKillTaskWait.

@Test
public void testKillTaskWait() throws Exception {
    final Dispatcher dispatcher = new AsyncDispatcher() {

        private TaskAttemptEvent cachedKillEvent;

        @Override
        protected void dispatch(Event event) {
            if (event instanceof TaskAttemptEvent) {
                TaskAttemptEvent killEvent = (TaskAttemptEvent) event;
                if (killEvent.getType() == TaskAttemptEventType.TA_KILL) {
                    TaskAttemptId taID = killEvent.getTaskAttemptID();
                    if (taID.getTaskId().getTaskType() == TaskType.REDUCE && taID.getTaskId().getId() == 0 && taID.getId() == 0) {
                        // Task is asking the reduce TA to kill itself. 'Create' a race
                        // condition. Make the task succeed and then inform the task that
                        // TA has succeeded. Once Task gets the TA succeeded event at
                        // KILL_WAIT, then relay the actual kill signal to TA
                        super.dispatch(new TaskAttemptEvent(taID, TaskAttemptEventType.TA_DONE));
                        super.dispatch(new TaskAttemptEvent(taID, TaskAttemptEventType.TA_CONTAINER_COMPLETED));
                        super.dispatch(new TaskTAttemptEvent(taID, TaskEventType.T_ATTEMPT_SUCCEEDED));
                        this.cachedKillEvent = killEvent;
                        return;
                    }
                }
            } else if (event instanceof TaskEvent) {
                TaskEvent taskEvent = (TaskEvent) event;
                if (taskEvent.getType() == TaskEventType.T_ATTEMPT_SUCCEEDED && this.cachedKillEvent != null) {
                    // When the TA comes and reports that it is done, send the
                    // cachedKillEvent
                    super.dispatch(this.cachedKillEvent);
                    return;
                }
            }
            super.dispatch(event);
        }
    };
    MRApp app = new MRApp(1, 1, false, this.getClass().getName(), true) {

        @Override
        public Dispatcher createDispatcher() {
            return dispatcher;
        }
    };
    Job job = app.submit(new Configuration());
    JobId jobId = app.getJobId();
    app.waitForState(job, JobState.RUNNING);
    Assert.assertEquals("Num tasks not correct", 2, job.getTasks().size());
    Iterator<Task> it = job.getTasks().values().iterator();
    Task mapTask = it.next();
    Task reduceTask = it.next();
    app.waitForState(mapTask, TaskState.RUNNING);
    app.waitForState(reduceTask, TaskState.RUNNING);
    TaskAttempt mapAttempt = mapTask.getAttempts().values().iterator().next();
    app.waitForState(mapAttempt, TaskAttemptState.RUNNING);
    TaskAttempt reduceAttempt = reduceTask.getAttempts().values().iterator().next();
    app.waitForState(reduceAttempt, TaskAttemptState.RUNNING);
    // Finish map
    app.getContext().getEventHandler().handle(new TaskAttemptEvent(mapAttempt.getID(), TaskAttemptEventType.TA_DONE));
    app.waitForState(mapTask, TaskState.SUCCEEDED);
    // Now kill the job
    app.getContext().getEventHandler().handle(new JobEvent(jobId, JobEventType.JOB_KILL));
    app.waitForInternalState((JobImpl) job, JobStateInternal.KILLED);
}
Also used : Task(org.apache.hadoop.mapreduce.v2.app.job.Task) Configuration(org.apache.hadoop.conf.Configuration) TaskAttemptId(org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId) TaskAttemptEvent(org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptEvent) TaskTAttemptEvent(org.apache.hadoop.mapreduce.v2.app.job.event.TaskTAttemptEvent) Dispatcher(org.apache.hadoop.yarn.event.Dispatcher) AsyncDispatcher(org.apache.hadoop.yarn.event.AsyncDispatcher) AsyncDispatcher(org.apache.hadoop.yarn.event.AsyncDispatcher) JobEvent(org.apache.hadoop.mapreduce.v2.app.job.event.JobEvent) TaskEvent(org.apache.hadoop.mapreduce.v2.app.job.event.TaskEvent) JobEvent(org.apache.hadoop.mapreduce.v2.app.job.event.JobEvent) Event(org.apache.hadoop.yarn.event.Event) TaskTAttemptEvent(org.apache.hadoop.mapreduce.v2.app.job.event.TaskTAttemptEvent) TaskAttemptEvent(org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptEvent) TaskEvent(org.apache.hadoop.mapreduce.v2.app.job.event.TaskEvent) TaskAttempt(org.apache.hadoop.mapreduce.v2.app.job.TaskAttempt) Job(org.apache.hadoop.mapreduce.v2.app.job.Job) JobId(org.apache.hadoop.mapreduce.v2.api.records.JobId) Test(org.junit.Test)

Example 87 with Job

use of org.apache.hadoop.mapreduce.v2.app.job.Job in project hadoop by apache.

the class TestKill method testKillJob.

@Test
public void testKillJob() throws Exception {
    final CountDownLatch latch = new CountDownLatch(1);
    MRApp app = new BlockingMRApp(1, 0, latch);
    //this will start the job but job won't complete as task is
    //blocked
    Job job = app.submit(new Configuration());
    //wait and vailidate for Job to become RUNNING
    app.waitForInternalState((JobImpl) job, JobStateInternal.RUNNING);
    //send the kill signal to Job
    app.getContext().getEventHandler().handle(new JobEvent(job.getID(), JobEventType.JOB_KILL));
    //unblock Task
    latch.countDown();
    //wait and validate for Job to be KILLED
    app.waitForState(job, JobState.KILLED);
    // make sure all events are processed. The AM is stopped
    // only when all tasks and task attempts have been killed
    app.waitForState(Service.STATE.STOPPED);
    Map<TaskId, Task> tasks = job.getTasks();
    Assert.assertEquals("No of tasks is not correct", 1, tasks.size());
    Task task = tasks.values().iterator().next();
    Assert.assertEquals("Task state not correct", TaskState.KILLED, task.getReport().getTaskState());
    Map<TaskAttemptId, TaskAttempt> attempts = tasks.values().iterator().next().getAttempts();
    Assert.assertEquals("No of attempts is not correct", 1, attempts.size());
    Iterator<TaskAttempt> it = attempts.values().iterator();
    Assert.assertEquals("Attempt state not correct", TaskAttemptState.KILLED, it.next().getReport().getTaskAttemptState());
}
Also used : Task(org.apache.hadoop.mapreduce.v2.app.job.Task) TaskId(org.apache.hadoop.mapreduce.v2.api.records.TaskId) Configuration(org.apache.hadoop.conf.Configuration) TaskAttemptId(org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId) CountDownLatch(java.util.concurrent.CountDownLatch) JobEvent(org.apache.hadoop.mapreduce.v2.app.job.event.JobEvent) TaskAttempt(org.apache.hadoop.mapreduce.v2.app.job.TaskAttempt) Job(org.apache.hadoop.mapreduce.v2.app.job.Job) Test(org.junit.Test)

Example 88 with Job

use of org.apache.hadoop.mapreduce.v2.app.job.Job in project hadoop by apache.

the class TestKill method testKillTaskWaitKillJobAfterTA_DONE.

@Test
public void testKillTaskWaitKillJobAfterTA_DONE() throws Exception {
    CountDownLatch latch = new CountDownLatch(1);
    final Dispatcher dispatcher = new MyAsyncDispatch(latch, TaskAttemptEventType.TA_DONE);
    MRApp app = new MRApp(1, 1, false, this.getClass().getName(), true) {

        @Override
        public Dispatcher createDispatcher() {
            return dispatcher;
        }
    };
    Job job = app.submit(new Configuration());
    JobId jobId = app.getJobId();
    app.waitForState(job, JobState.RUNNING);
    Assert.assertEquals("Num tasks not correct", 2, job.getTasks().size());
    Iterator<Task> it = job.getTasks().values().iterator();
    Task mapTask = it.next();
    Task reduceTask = it.next();
    app.waitForState(mapTask, TaskState.RUNNING);
    app.waitForState(reduceTask, TaskState.RUNNING);
    TaskAttempt mapAttempt = mapTask.getAttempts().values().iterator().next();
    app.waitForState(mapAttempt, TaskAttemptState.RUNNING);
    TaskAttempt reduceAttempt = reduceTask.getAttempts().values().iterator().next();
    app.waitForState(reduceAttempt, TaskAttemptState.RUNNING);
    // The order in the dispatch event queue, from first to last
    // TA_DONE
    // JobEventType.JOB_KILL
    // TaskAttemptEventType.TA_CONTAINER_COMPLETED ( from TA_DONE handling )
    // TaskEventType.T_KILL ( from JobEventType.JOB_KILL handling )
    // TaskEventType.T_ATTEMPT_SUCCEEDED ( from TA_CONTAINER_COMPLETED handling )
    // Finish map
    app.getContext().getEventHandler().handle(new TaskAttemptEvent(mapAttempt.getID(), TaskAttemptEventType.TA_DONE));
    // Now kill the job
    app.getContext().getEventHandler().handle(new JobEvent(jobId, JobEventType.JOB_KILL));
    //unblock
    latch.countDown();
    app.waitForInternalState((JobImpl) job, JobStateInternal.KILLED);
}
Also used : Task(org.apache.hadoop.mapreduce.v2.app.job.Task) Configuration(org.apache.hadoop.conf.Configuration) JobEvent(org.apache.hadoop.mapreduce.v2.app.job.event.JobEvent) TaskAttemptEvent(org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptEvent) TaskAttempt(org.apache.hadoop.mapreduce.v2.app.job.TaskAttempt) CountDownLatch(java.util.concurrent.CountDownLatch) Dispatcher(org.apache.hadoop.yarn.event.Dispatcher) AsyncDispatcher(org.apache.hadoop.yarn.event.AsyncDispatcher) Job(org.apache.hadoop.mapreduce.v2.app.job.Job) JobId(org.apache.hadoop.mapreduce.v2.api.records.JobId) Test(org.junit.Test)

Example 89 with Job

use of org.apache.hadoop.mapreduce.v2.app.job.Job in project hadoop by apache.

the class TestKill method testKillTaskAttempt.

@Test
public void testKillTaskAttempt() throws Exception {
    final CountDownLatch latch = new CountDownLatch(1);
    MRApp app = new BlockingMRApp(2, 0, latch);
    //this will start the job but job won't complete as Task is blocked
    Job job = app.submit(new Configuration());
    //wait and vailidate for Job to become RUNNING
    app.waitForState(job, JobState.RUNNING);
    Map<TaskId, Task> tasks = job.getTasks();
    Assert.assertEquals("No of tasks is not correct", 2, tasks.size());
    Iterator<Task> it = tasks.values().iterator();
    Task task1 = it.next();
    Task task2 = it.next();
    //wait for tasks to become running
    app.waitForState(task1, TaskState.SCHEDULED);
    app.waitForState(task2, TaskState.SCHEDULED);
    //send the kill signal to the first Task's attempt
    TaskAttempt attempt = task1.getAttempts().values().iterator().next();
    app.getContext().getEventHandler().handle(new TaskAttemptEvent(attempt.getID(), TaskAttemptEventType.TA_KILL));
    //unblock
    latch.countDown();
    //wait and validate for Job to become SUCCEEDED
    //job will still succeed
    app.waitForState(job, JobState.SUCCEEDED);
    //first Task will have two attempts 1st is killed, 2nd Succeeds
    //both Tasks and Job succeeds
    Assert.assertEquals("Task state not correct", TaskState.SUCCEEDED, task1.getReport().getTaskState());
    Assert.assertEquals("Task state not correct", TaskState.SUCCEEDED, task2.getReport().getTaskState());
    Map<TaskAttemptId, TaskAttempt> attempts = task1.getAttempts();
    Assert.assertEquals("No of attempts is not correct", 2, attempts.size());
    Iterator<TaskAttempt> iter = attempts.values().iterator();
    Assert.assertEquals("Attempt state not correct", TaskAttemptState.KILLED, iter.next().getReport().getTaskAttemptState());
    Assert.assertEquals("Attempt state not correct", TaskAttemptState.SUCCEEDED, iter.next().getReport().getTaskAttemptState());
    attempts = task2.getAttempts();
    Assert.assertEquals("No of attempts is not correct", 1, attempts.size());
    iter = attempts.values().iterator();
    Assert.assertEquals("Attempt state not correct", TaskAttemptState.SUCCEEDED, iter.next().getReport().getTaskAttemptState());
}
Also used : Task(org.apache.hadoop.mapreduce.v2.app.job.Task) TaskId(org.apache.hadoop.mapreduce.v2.api.records.TaskId) Configuration(org.apache.hadoop.conf.Configuration) TaskAttemptId(org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId) TaskAttemptEvent(org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptEvent) CountDownLatch(java.util.concurrent.CountDownLatch) TaskAttempt(org.apache.hadoop.mapreduce.v2.app.job.TaskAttempt) Job(org.apache.hadoop.mapreduce.v2.app.job.Job) Test(org.junit.Test)

Example 90 with Job

use of org.apache.hadoop.mapreduce.v2.app.job.Job in project hadoop by apache.

the class TestKill method testKillTaskWaitKillJobBeforeTA_DONE.

@Test
public void testKillTaskWaitKillJobBeforeTA_DONE() throws Exception {
    CountDownLatch latch = new CountDownLatch(1);
    final Dispatcher dispatcher = new MyAsyncDispatch(latch, JobEventType.JOB_KILL);
    MRApp app = new MRApp(1, 1, false, this.getClass().getName(), true) {

        @Override
        public Dispatcher createDispatcher() {
            return dispatcher;
        }
    };
    Job job = app.submit(new Configuration());
    JobId jobId = app.getJobId();
    app.waitForState(job, JobState.RUNNING);
    Assert.assertEquals("Num tasks not correct", 2, job.getTasks().size());
    Iterator<Task> it = job.getTasks().values().iterator();
    Task mapTask = it.next();
    Task reduceTask = it.next();
    app.waitForState(mapTask, TaskState.RUNNING);
    app.waitForState(reduceTask, TaskState.RUNNING);
    TaskAttempt mapAttempt = mapTask.getAttempts().values().iterator().next();
    app.waitForState(mapAttempt, TaskAttemptState.RUNNING);
    TaskAttempt reduceAttempt = reduceTask.getAttempts().values().iterator().next();
    app.waitForState(reduceAttempt, TaskAttemptState.RUNNING);
    // The order in the dispatch event queue, from first to last
    // JobEventType.JOB_KILL
    // TA_DONE
    // TaskEventType.T_KILL ( from JobEventType.JOB_KILL handling )
    // TaskAttemptEventType.TA_CONTAINER_COMPLETED ( from TA_DONE handling )
    // TaskAttemptEventType.TA_KILL ( from TaskEventType.T_KILL handling )
    // TaskEventType.T_ATTEMPT_SUCCEEDED ( from TA_CONTAINER_COMPLETED handling )
    // TaskEventType.T_ATTEMPT_KILLED ( from TA_KILL handling )
    // Now kill the job
    app.getContext().getEventHandler().handle(new JobEvent(jobId, JobEventType.JOB_KILL));
    // Finish map
    app.getContext().getEventHandler().handle(new TaskAttemptEvent(mapAttempt.getID(), TaskAttemptEventType.TA_DONE));
    //unblock
    latch.countDown();
    app.waitForInternalState((JobImpl) job, JobStateInternal.KILLED);
}
Also used : Task(org.apache.hadoop.mapreduce.v2.app.job.Task) Configuration(org.apache.hadoop.conf.Configuration) JobEvent(org.apache.hadoop.mapreduce.v2.app.job.event.JobEvent) TaskAttemptEvent(org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptEvent) TaskAttempt(org.apache.hadoop.mapreduce.v2.app.job.TaskAttempt) CountDownLatch(java.util.concurrent.CountDownLatch) Dispatcher(org.apache.hadoop.yarn.event.Dispatcher) AsyncDispatcher(org.apache.hadoop.yarn.event.AsyncDispatcher) Job(org.apache.hadoop.mapreduce.v2.app.job.Job) JobId(org.apache.hadoop.mapreduce.v2.api.records.JobId) Test(org.junit.Test)

Aggregations

Job (org.apache.hadoop.mapreduce.v2.app.job.Job)291 Test (org.junit.Test)266 JobId (org.apache.hadoop.mapreduce.v2.api.records.JobId)221 Configuration (org.apache.hadoop.conf.Configuration)145 Task (org.apache.hadoop.mapreduce.v2.app.job.Task)141 ClientResponse (com.sun.jersey.api.client.ClientResponse)110 WebResource (com.sun.jersey.api.client.WebResource)110 JSONObject (org.codehaus.jettison.json.JSONObject)90 TaskAttempt (org.apache.hadoop.mapreduce.v2.app.job.TaskAttempt)80 TaskAttemptId (org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId)49 TaskId (org.apache.hadoop.mapreduce.v2.api.records.TaskId)49 YarnConfiguration (org.apache.hadoop.yarn.conf.YarnConfiguration)44 IOException (java.io.IOException)35 Path (org.apache.hadoop.fs.Path)31 JobEvent (org.apache.hadoop.mapreduce.v2.app.job.event.JobEvent)30 ApplicationAttemptId (org.apache.hadoop.yarn.api.records.ApplicationAttemptId)30 AppContext (org.apache.hadoop.mapreduce.v2.app.AppContext)28 TaskAttemptEvent (org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptEvent)28 DrainDispatcher (org.apache.hadoop.yarn.event.DrainDispatcher)25 Path (javax.ws.rs.Path)23