use of org.apache.hadoop.mapreduce.v2.app.job.TaskAttempt in project hadoop by apache.
the class TestTaskAttempt method verifyMillisCounters.
public void verifyMillisCounters(Resource containerResource, int minContainerSize) throws Exception {
Clock actualClock = SystemClock.getInstance();
ControlledClock clock = new ControlledClock(actualClock);
clock.setTime(10);
MRApp app = new MRApp(1, 1, false, "testSlotMillisCounterUpdate", true, clock);
app.setAllocatedContainerResource(containerResource);
Configuration conf = new Configuration();
conf.setInt(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB, minContainerSize);
app.setClusterInfo(new ClusterInfo(Resource.newInstance(10240, 1)));
Job job = app.submit(conf);
app.waitForState(job, JobState.RUNNING);
Map<TaskId, Task> tasks = job.getTasks();
Assert.assertEquals("Num tasks is not correct", 2, tasks.size());
Iterator<Task> taskIter = tasks.values().iterator();
Task mTask = taskIter.next();
app.waitForState(mTask, TaskState.RUNNING);
Task rTask = taskIter.next();
app.waitForState(rTask, TaskState.RUNNING);
Map<TaskAttemptId, TaskAttempt> mAttempts = mTask.getAttempts();
Assert.assertEquals("Num attempts is not correct", 1, mAttempts.size());
Map<TaskAttemptId, TaskAttempt> rAttempts = rTask.getAttempts();
Assert.assertEquals("Num attempts is not correct", 1, rAttempts.size());
TaskAttempt mta = mAttempts.values().iterator().next();
TaskAttempt rta = rAttempts.values().iterator().next();
app.waitForState(mta, TaskAttemptState.RUNNING);
app.waitForState(rta, TaskAttemptState.RUNNING);
clock.setTime(11);
app.getContext().getEventHandler().handle(new TaskAttemptEvent(mta.getID(), TaskAttemptEventType.TA_DONE));
app.getContext().getEventHandler().handle(new TaskAttemptEvent(rta.getID(), TaskAttemptEventType.TA_DONE));
app.waitForState(job, JobState.SUCCEEDED);
Assert.assertEquals(mta.getFinishTime(), 11);
Assert.assertEquals(mta.getLaunchTime(), 10);
Assert.assertEquals(rta.getFinishTime(), 11);
Assert.assertEquals(rta.getLaunchTime(), 10);
Counters counters = job.getAllCounters();
int memoryMb = (int) containerResource.getMemorySize();
int vcores = containerResource.getVirtualCores();
Assert.assertEquals((int) Math.ceil((float) memoryMb / minContainerSize), counters.findCounter(JobCounter.SLOTS_MILLIS_MAPS).getValue());
Assert.assertEquals((int) Math.ceil((float) memoryMb / minContainerSize), counters.findCounter(JobCounter.SLOTS_MILLIS_REDUCES).getValue());
Assert.assertEquals(1, counters.findCounter(JobCounter.MILLIS_MAPS).getValue());
Assert.assertEquals(1, counters.findCounter(JobCounter.MILLIS_REDUCES).getValue());
Assert.assertEquals(memoryMb, counters.findCounter(JobCounter.MB_MILLIS_MAPS).getValue());
Assert.assertEquals(memoryMb, counters.findCounter(JobCounter.MB_MILLIS_REDUCES).getValue());
Assert.assertEquals(vcores, counters.findCounter(JobCounter.VCORES_MILLIS_MAPS).getValue());
Assert.assertEquals(vcores, counters.findCounter(JobCounter.VCORES_MILLIS_REDUCES).getValue());
}
use of org.apache.hadoop.mapreduce.v2.app.job.TaskAttempt in project hadoop by apache.
the class TestMRApp method testUpdatedNodes.
/**
* The test verifies that the AM re-runs maps that have run on bad nodes. It
* also verifies that the AM records all success/killed events so that reduces
* are notified about map output status changes. It also verifies that the
* re-run information is preserved across AM restart
*/
@Test
public void testUpdatedNodes() throws Exception {
int runCount = 0;
Dispatcher disp = Mockito.spy(new AsyncDispatcher());
MRApp app = new MRAppWithHistory(2, 2, false, this.getClass().getName(), true, ++runCount, disp);
Configuration conf = new Configuration();
// after half of the map completion, reduce will start
conf.setFloat(MRJobConfig.COMPLETED_MAPS_FOR_REDUCE_SLOWSTART, 0.5f);
// uberization forces full slowstart (1.0), so disable that
conf.setBoolean(MRJobConfig.JOB_UBERTASK_ENABLE, false);
ContainerAllocEventHandler handler = new ContainerAllocEventHandler();
disp.register(ContainerAllocator.EventType.class, handler);
final Job job1 = app.submit(conf);
app.waitForState(job1, JobState.RUNNING);
Assert.assertEquals("Num tasks not correct", 4, job1.getTasks().size());
Iterator<Task> it = job1.getTasks().values().iterator();
Task mapTask1 = it.next();
Task mapTask2 = it.next();
// all maps must be running
app.waitForState(mapTask1, TaskState.RUNNING);
app.waitForState(mapTask2, TaskState.RUNNING);
TaskAttempt task1Attempt = mapTask1.getAttempts().values().iterator().next();
TaskAttempt task2Attempt = mapTask2.getAttempts().values().iterator().next();
NodeId node1 = task1Attempt.getNodeId();
NodeId node2 = task2Attempt.getNodeId();
Assert.assertEquals(node1, node2);
// send the done signal to the task
app.getContext().getEventHandler().handle(new TaskAttemptEvent(task1Attempt.getID(), TaskAttemptEventType.TA_DONE));
app.getContext().getEventHandler().handle(new TaskAttemptEvent(task2Attempt.getID(), TaskAttemptEventType.TA_DONE));
// all maps must be succeeded
app.waitForState(mapTask1, TaskState.SUCCEEDED);
app.waitForState(mapTask2, TaskState.SUCCEEDED);
final int checkIntervalMillis = 100;
final int waitForMillis = 800;
waitFor(new Supplier<Boolean>() {
@Override
public Boolean get() {
TaskAttemptCompletionEvent[] events = job1.getTaskAttemptCompletionEvents(0, 100);
return events.length == 2;
}
}, checkIntervalMillis, waitForMillis);
TaskAttemptCompletionEvent[] events = job1.getTaskAttemptCompletionEvents(0, 100);
Assert.assertEquals("Expecting 2 completion events for success", 2, events.length);
// send updated nodes info
ArrayList<NodeReport> updatedNodes = new ArrayList<NodeReport>();
NodeReport nr = RecordFactoryProvider.getRecordFactory(null).newRecordInstance(NodeReport.class);
nr.setNodeId(node1);
nr.setNodeState(NodeState.UNHEALTHY);
updatedNodes.add(nr);
app.getContext().getEventHandler().handle(new JobUpdatedNodesEvent(job1.getID(), updatedNodes));
app.waitForState(task1Attempt, TaskAttemptState.KILLED);
app.waitForState(task2Attempt, TaskAttemptState.KILLED);
waitFor(new Supplier<Boolean>() {
@Override
public Boolean get() {
TaskAttemptCompletionEvent[] events = job1.getTaskAttemptCompletionEvents(0, 100);
return events.length == 4;
}
}, checkIntervalMillis, waitForMillis);
events = job1.getTaskAttemptCompletionEvents(0, 100);
Assert.assertEquals("Expecting 2 more completion events for killed", 4, events.length);
// 2 map task attempts which were killed above should be requested from
// container allocator with the previous map task marked as failed. If
// this happens allocator will request the container for this mapper from
// RM at a higher priority of 5(i.e. with a priority equivalent to that of
// a fail fast map).
handler.waitForFailedMapContainerReqEvents(2);
// all maps must be back to running
app.waitForState(mapTask1, TaskState.RUNNING);
app.waitForState(mapTask2, TaskState.RUNNING);
Iterator<TaskAttempt> itr = mapTask1.getAttempts().values().iterator();
itr.next();
task1Attempt = itr.next();
// send the done signal to the task
app.getContext().getEventHandler().handle(new TaskAttemptEvent(task1Attempt.getID(), TaskAttemptEventType.TA_DONE));
// map1 must be succeeded. map2 must be running
app.waitForState(mapTask1, TaskState.SUCCEEDED);
app.waitForState(mapTask2, TaskState.RUNNING);
waitFor(new Supplier<Boolean>() {
@Override
public Boolean get() {
TaskAttemptCompletionEvent[] events = job1.getTaskAttemptCompletionEvents(0, 100);
return events.length == 5;
}
}, checkIntervalMillis, waitForMillis);
events = job1.getTaskAttemptCompletionEvents(0, 100);
Assert.assertEquals("Expecting 1 more completion events for success", 5, events.length);
// Crash the app again.
app.stop();
// rerun
// in rerun the 1st map will be recovered from previous run
app = new MRAppWithHistory(2, 2, false, this.getClass().getName(), false, ++runCount, (Dispatcher) new AsyncDispatcher());
conf = new Configuration();
conf.setBoolean(MRJobConfig.MR_AM_JOB_RECOVERY_ENABLE, true);
conf.setBoolean(MRJobConfig.JOB_UBERTASK_ENABLE, false);
final Job job2 = app.submit(conf);
app.waitForState(job2, JobState.RUNNING);
Assert.assertEquals("No of tasks not correct", 4, job2.getTasks().size());
it = job2.getTasks().values().iterator();
mapTask1 = it.next();
mapTask2 = it.next();
Task reduceTask1 = it.next();
Task reduceTask2 = it.next();
// map 1 will be recovered, no need to send done
app.waitForState(mapTask1, TaskState.SUCCEEDED);
app.waitForState(mapTask2, TaskState.RUNNING);
waitFor(new Supplier<Boolean>() {
@Override
public Boolean get() {
TaskAttemptCompletionEvent[] events = job2.getTaskAttemptCompletionEvents(0, 100);
return events.length == 2;
}
}, checkIntervalMillis, waitForMillis);
events = job2.getTaskAttemptCompletionEvents(0, 100);
Assert.assertEquals("Expecting 2 completion events for killed & success of map1", 2, events.length);
task2Attempt = mapTask2.getAttempts().values().iterator().next();
app.getContext().getEventHandler().handle(new TaskAttemptEvent(task2Attempt.getID(), TaskAttemptEventType.TA_DONE));
app.waitForState(mapTask2, TaskState.SUCCEEDED);
waitFor(new Supplier<Boolean>() {
@Override
public Boolean get() {
TaskAttemptCompletionEvent[] events = job2.getTaskAttemptCompletionEvents(0, 100);
return events.length == 3;
}
}, checkIntervalMillis, waitForMillis);
events = job2.getTaskAttemptCompletionEvents(0, 100);
Assert.assertEquals("Expecting 1 more completion events for success", 3, events.length);
app.waitForState(reduceTask1, TaskState.RUNNING);
app.waitForState(reduceTask2, TaskState.RUNNING);
TaskAttempt task3Attempt = reduceTask1.getAttempts().values().iterator().next();
app.getContext().getEventHandler().handle(new TaskAttemptEvent(task3Attempt.getID(), TaskAttemptEventType.TA_DONE));
app.waitForState(reduceTask1, TaskState.SUCCEEDED);
app.getContext().getEventHandler().handle(new TaskAttemptEvent(task3Attempt.getID(), TaskAttemptEventType.TA_KILL));
app.waitForState(reduceTask1, TaskState.SUCCEEDED);
TaskAttempt task4Attempt = reduceTask2.getAttempts().values().iterator().next();
app.getContext().getEventHandler().handle(new TaskAttemptEvent(task4Attempt.getID(), TaskAttemptEventType.TA_DONE));
app.waitForState(reduceTask2, TaskState.SUCCEEDED);
waitFor(new Supplier<Boolean>() {
@Override
public Boolean get() {
TaskAttemptCompletionEvent[] events = job2.getTaskAttemptCompletionEvents(0, 100);
return events.length == 5;
}
}, checkIntervalMillis, waitForMillis);
events = job2.getTaskAttemptCompletionEvents(0, 100);
Assert.assertEquals("Expecting 2 more completion events for reduce success", 5, events.length);
// job succeeds
app.waitForState(job2, JobState.SUCCEEDED);
}
use of org.apache.hadoop.mapreduce.v2.app.job.TaskAttempt in project hadoop by apache.
the class TestMRApp method testContainerPassThrough.
@Test
public void testContainerPassThrough() throws Exception {
MRApp app = new MRApp(0, 1, true, this.getClass().getName(), true) {
@Override
protected ContainerLauncher createContainerLauncher(AppContext context) {
return new MockContainerLauncher() {
@Override
public void handle(ContainerLauncherEvent event) {
if (event instanceof ContainerRemoteLaunchEvent) {
containerObtainedByContainerLauncher = ((ContainerRemoteLaunchEvent) event).getAllocatedContainer();
}
super.handle(event);
}
};
}
;
};
Job job = app.submit(new Configuration());
app.waitForState(job, JobState.SUCCEEDED);
app.verifyCompleted();
Collection<Task> tasks = job.getTasks().values();
Collection<TaskAttempt> taskAttempts = tasks.iterator().next().getAttempts().values();
TaskAttemptImpl taskAttempt = (TaskAttemptImpl) taskAttempts.iterator().next();
// Container from RM should pass through to the launcher. Container object
// should be the same.
Assert.assertTrue(taskAttempt.container == containerObtainedByContainerLauncher);
}
use of org.apache.hadoop.mapreduce.v2.app.job.TaskAttempt in project hadoop by apache.
the class TestJobImpl method testFailAbortDoesntHang.
@Test(timeout = 10000)
public void testFailAbortDoesntHang() throws IOException {
Configuration conf = new Configuration();
conf.set(MRJobConfig.MR_AM_STAGING_DIR, stagingDir);
conf.set(MRJobConfig.MR_AM_COMMITTER_CANCEL_TIMEOUT_MS, "1000");
DrainDispatcher dispatcher = new DrainDispatcher();
dispatcher.init(conf);
dispatcher.start();
OutputCommitter committer = Mockito.mock(OutputCommitter.class);
CommitterEventHandler commitHandler = createCommitterEventHandler(dispatcher, committer);
commitHandler.init(conf);
commitHandler.start();
//Job has only 1 mapper task. No reducers
conf.setInt(MRJobConfig.NUM_REDUCES, 0);
conf.setInt(MRJobConfig.MAP_MAX_ATTEMPTS, 1);
JobImpl job = createRunningStubbedJob(conf, dispatcher, 1, null);
//FAIL_ABORT state
for (Task t : job.tasks.values()) {
TaskImpl task = (TaskImpl) t;
task.handle(new TaskEvent(task.getID(), TaskEventType.T_SCHEDULE));
for (TaskAttempt ta : task.getAttempts().values()) {
task.handle(new TaskTAttemptEvent(ta.getID(), TaskEventType.T_ATTEMPT_FAILED));
}
}
dispatcher.await();
//Verify abortJob is called once and the job failed
Mockito.verify(committer, Mockito.timeout(2000).times(1)).abortJob((JobContext) Mockito.any(), (State) Mockito.any());
assertJobState(job, JobStateInternal.FAILED);
dispatcher.stop();
}
use of org.apache.hadoop.mapreduce.v2.app.job.TaskAttempt in project hadoop by apache.
the class TestKill method testKillTask.
@Test
public void testKillTask() throws Exception {
final CountDownLatch latch = new CountDownLatch(1);
MRApp app = new BlockingMRApp(2, 0, latch);
//this will start the job but job won't complete as Task is blocked
Job job = app.submit(new Configuration());
//wait and vailidate for Job to become RUNNING
app.waitForInternalState((JobImpl) job, JobStateInternal.RUNNING);
Map<TaskId, Task> tasks = job.getTasks();
Assert.assertEquals("No of tasks is not correct", 2, tasks.size());
Iterator<Task> it = tasks.values().iterator();
Task task1 = it.next();
Task task2 = it.next();
//send the kill signal to the first Task
app.getContext().getEventHandler().handle(new TaskEvent(task1.getID(), TaskEventType.T_KILL));
//unblock Task
latch.countDown();
//wait and validate for Job to become SUCCEEDED
app.waitForState(job, JobState.SUCCEEDED);
//first Task is killed and second is Succeeded
//Job is succeeded
Assert.assertEquals("Task state not correct", TaskState.KILLED, task1.getReport().getTaskState());
Assert.assertEquals("Task state not correct", TaskState.SUCCEEDED, task2.getReport().getTaskState());
Map<TaskAttemptId, TaskAttempt> attempts = task1.getAttempts();
Assert.assertEquals("No of attempts is not correct", 1, attempts.size());
Iterator<TaskAttempt> iter = attempts.values().iterator();
Assert.assertEquals("Attempt state not correct", TaskAttemptState.KILLED, iter.next().getReport().getTaskAttemptState());
attempts = task2.getAttempts();
Assert.assertEquals("No of attempts is not correct", 1, attempts.size());
iter = attempts.values().iterator();
Assert.assertEquals("Attempt state not correct", TaskAttemptState.SUCCEEDED, iter.next().getReport().getTaskAttemptState());
}
Aggregations