use of org.apache.hadoop.yarn.event.AsyncDispatcher in project hadoop by apache.
the class TestCommitterEventHandler method testCommitWindow.
@Test
public void testCommitWindow() throws Exception {
Configuration conf = new Configuration();
conf.set(MRJobConfig.MR_AM_STAGING_DIR, stagingDir);
AsyncDispatcher dispatcher = new AsyncDispatcher();
dispatcher.init(conf);
dispatcher.start();
TestingJobEventHandler jeh = new TestingJobEventHandler();
dispatcher.register(JobEventType.class, jeh);
SystemClock clock = SystemClock.getInstance();
AppContext appContext = mock(AppContext.class);
ApplicationAttemptId attemptid = ApplicationAttemptId.fromString("appattempt_1234567890000_0001_0");
when(appContext.getApplicationID()).thenReturn(attemptid.getApplicationId());
when(appContext.getApplicationAttemptId()).thenReturn(attemptid);
when(appContext.getEventHandler()).thenReturn(dispatcher.getEventHandler());
when(appContext.getClock()).thenReturn(clock);
OutputCommitter committer = mock(OutputCommitter.class);
TestingRMHeartbeatHandler rmhh = new TestingRMHeartbeatHandler();
CommitterEventHandler ceh = new CommitterEventHandler(appContext, committer, rmhh);
ceh.init(conf);
ceh.start();
// verify trying to commit when RM heartbeats are stale does not commit
ceh.handle(new CommitterJobCommitEvent(null, null));
long timeToWaitMs = 5000;
while (rmhh.getNumCallbacks() != 1 && timeToWaitMs > 0) {
Thread.sleep(10);
timeToWaitMs -= 10;
}
Assert.assertEquals("committer did not register a heartbeat callback", 1, rmhh.getNumCallbacks());
verify(committer, never()).commitJob(any(JobContext.class));
Assert.assertEquals("committer should not have committed", 0, jeh.numCommitCompletedEvents);
// set a fresh heartbeat and verify commit completes
rmhh.setLastHeartbeatTime(clock.getTime());
timeToWaitMs = 5000;
while (jeh.numCommitCompletedEvents != 1 && timeToWaitMs > 0) {
Thread.sleep(10);
timeToWaitMs -= 10;
}
Assert.assertEquals("committer did not complete commit after RM hearbeat", 1, jeh.numCommitCompletedEvents);
verify(committer, times(1)).commitJob(any(JobContext.class));
//Clean up so we can try to commit again (Don't do this at home)
cleanup();
// try to commit again and verify it goes through since the heartbeat
// is still fresh
ceh.handle(new CommitterJobCommitEvent(null, null));
timeToWaitMs = 5000;
while (jeh.numCommitCompletedEvents != 2 && timeToWaitMs > 0) {
Thread.sleep(10);
timeToWaitMs -= 10;
}
Assert.assertEquals("committer did not commit", 2, jeh.numCommitCompletedEvents);
verify(committer, times(2)).commitJob(any(JobContext.class));
ceh.stop();
dispatcher.stop();
}
use of org.apache.hadoop.yarn.event.AsyncDispatcher in project hadoop by apache.
the class TestJobImpl method testKilledDuringSetup.
@Test(timeout = 20000)
public void testKilledDuringSetup() throws Exception {
Configuration conf = new Configuration();
conf.set(MRJobConfig.MR_AM_STAGING_DIR, stagingDir);
AsyncDispatcher dispatcher = new AsyncDispatcher();
dispatcher.init(conf);
dispatcher.start();
OutputCommitter committer = new StubbedOutputCommitter() {
@Override
public synchronized void setupJob(JobContext jobContext) throws IOException {
while (!Thread.interrupted()) {
try {
wait();
} catch (InterruptedException e) {
}
}
}
};
CommitterEventHandler commitHandler = createCommitterEventHandler(dispatcher, committer);
commitHandler.init(conf);
commitHandler.start();
JobImpl job = createStubbedJob(conf, dispatcher, 2, null);
JobId jobId = job.getID();
job.handle(new JobEvent(jobId, JobEventType.JOB_INIT));
assertJobState(job, JobStateInternal.INITED);
job.handle(new JobStartEvent(jobId));
assertJobState(job, JobStateInternal.SETUP);
job.handle(new JobEvent(job.getID(), JobEventType.JOB_KILL));
assertJobState(job, JobStateInternal.KILLED);
dispatcher.stop();
commitHandler.stop();
}
use of org.apache.hadoop.yarn.event.AsyncDispatcher in project hadoop by apache.
the class TestMRApp method testUpdatedNodes.
/**
* The test verifies that the AM re-runs maps that have run on bad nodes. It
* also verifies that the AM records all success/killed events so that reduces
* are notified about map output status changes. It also verifies that the
* re-run information is preserved across AM restart
*/
@Test
public void testUpdatedNodes() throws Exception {
int runCount = 0;
Dispatcher disp = Mockito.spy(new AsyncDispatcher());
MRApp app = new MRAppWithHistory(2, 2, false, this.getClass().getName(), true, ++runCount, disp);
Configuration conf = new Configuration();
// after half of the map completion, reduce will start
conf.setFloat(MRJobConfig.COMPLETED_MAPS_FOR_REDUCE_SLOWSTART, 0.5f);
// uberization forces full slowstart (1.0), so disable that
conf.setBoolean(MRJobConfig.JOB_UBERTASK_ENABLE, false);
ContainerAllocEventHandler handler = new ContainerAllocEventHandler();
disp.register(ContainerAllocator.EventType.class, handler);
final Job job1 = app.submit(conf);
app.waitForState(job1, JobState.RUNNING);
Assert.assertEquals("Num tasks not correct", 4, job1.getTasks().size());
Iterator<Task> it = job1.getTasks().values().iterator();
Task mapTask1 = it.next();
Task mapTask2 = it.next();
// all maps must be running
app.waitForState(mapTask1, TaskState.RUNNING);
app.waitForState(mapTask2, TaskState.RUNNING);
TaskAttempt task1Attempt = mapTask1.getAttempts().values().iterator().next();
TaskAttempt task2Attempt = mapTask2.getAttempts().values().iterator().next();
NodeId node1 = task1Attempt.getNodeId();
NodeId node2 = task2Attempt.getNodeId();
Assert.assertEquals(node1, node2);
// send the done signal to the task
app.getContext().getEventHandler().handle(new TaskAttemptEvent(task1Attempt.getID(), TaskAttemptEventType.TA_DONE));
app.getContext().getEventHandler().handle(new TaskAttemptEvent(task2Attempt.getID(), TaskAttemptEventType.TA_DONE));
// all maps must be succeeded
app.waitForState(mapTask1, TaskState.SUCCEEDED);
app.waitForState(mapTask2, TaskState.SUCCEEDED);
final int checkIntervalMillis = 100;
final int waitForMillis = 800;
waitFor(new Supplier<Boolean>() {
@Override
public Boolean get() {
TaskAttemptCompletionEvent[] events = job1.getTaskAttemptCompletionEvents(0, 100);
return events.length == 2;
}
}, checkIntervalMillis, waitForMillis);
TaskAttemptCompletionEvent[] events = job1.getTaskAttemptCompletionEvents(0, 100);
Assert.assertEquals("Expecting 2 completion events for success", 2, events.length);
// send updated nodes info
ArrayList<NodeReport> updatedNodes = new ArrayList<NodeReport>();
NodeReport nr = RecordFactoryProvider.getRecordFactory(null).newRecordInstance(NodeReport.class);
nr.setNodeId(node1);
nr.setNodeState(NodeState.UNHEALTHY);
updatedNodes.add(nr);
app.getContext().getEventHandler().handle(new JobUpdatedNodesEvent(job1.getID(), updatedNodes));
app.waitForState(task1Attempt, TaskAttemptState.KILLED);
app.waitForState(task2Attempt, TaskAttemptState.KILLED);
waitFor(new Supplier<Boolean>() {
@Override
public Boolean get() {
TaskAttemptCompletionEvent[] events = job1.getTaskAttemptCompletionEvents(0, 100);
return events.length == 4;
}
}, checkIntervalMillis, waitForMillis);
events = job1.getTaskAttemptCompletionEvents(0, 100);
Assert.assertEquals("Expecting 2 more completion events for killed", 4, events.length);
// 2 map task attempts which were killed above should be requested from
// container allocator with the previous map task marked as failed. If
// this happens allocator will request the container for this mapper from
// RM at a higher priority of 5(i.e. with a priority equivalent to that of
// a fail fast map).
handler.waitForFailedMapContainerReqEvents(2);
// all maps must be back to running
app.waitForState(mapTask1, TaskState.RUNNING);
app.waitForState(mapTask2, TaskState.RUNNING);
Iterator<TaskAttempt> itr = mapTask1.getAttempts().values().iterator();
itr.next();
task1Attempt = itr.next();
// send the done signal to the task
app.getContext().getEventHandler().handle(new TaskAttemptEvent(task1Attempt.getID(), TaskAttemptEventType.TA_DONE));
// map1 must be succeeded. map2 must be running
app.waitForState(mapTask1, TaskState.SUCCEEDED);
app.waitForState(mapTask2, TaskState.RUNNING);
waitFor(new Supplier<Boolean>() {
@Override
public Boolean get() {
TaskAttemptCompletionEvent[] events = job1.getTaskAttemptCompletionEvents(0, 100);
return events.length == 5;
}
}, checkIntervalMillis, waitForMillis);
events = job1.getTaskAttemptCompletionEvents(0, 100);
Assert.assertEquals("Expecting 1 more completion events for success", 5, events.length);
// Crash the app again.
app.stop();
// rerun
// in rerun the 1st map will be recovered from previous run
app = new MRAppWithHistory(2, 2, false, this.getClass().getName(), false, ++runCount, (Dispatcher) new AsyncDispatcher());
conf = new Configuration();
conf.setBoolean(MRJobConfig.MR_AM_JOB_RECOVERY_ENABLE, true);
conf.setBoolean(MRJobConfig.JOB_UBERTASK_ENABLE, false);
final Job job2 = app.submit(conf);
app.waitForState(job2, JobState.RUNNING);
Assert.assertEquals("No of tasks not correct", 4, job2.getTasks().size());
it = job2.getTasks().values().iterator();
mapTask1 = it.next();
mapTask2 = it.next();
Task reduceTask1 = it.next();
Task reduceTask2 = it.next();
// map 1 will be recovered, no need to send done
app.waitForState(mapTask1, TaskState.SUCCEEDED);
app.waitForState(mapTask2, TaskState.RUNNING);
waitFor(new Supplier<Boolean>() {
@Override
public Boolean get() {
TaskAttemptCompletionEvent[] events = job2.getTaskAttemptCompletionEvents(0, 100);
return events.length == 2;
}
}, checkIntervalMillis, waitForMillis);
events = job2.getTaskAttemptCompletionEvents(0, 100);
Assert.assertEquals("Expecting 2 completion events for killed & success of map1", 2, events.length);
task2Attempt = mapTask2.getAttempts().values().iterator().next();
app.getContext().getEventHandler().handle(new TaskAttemptEvent(task2Attempt.getID(), TaskAttemptEventType.TA_DONE));
app.waitForState(mapTask2, TaskState.SUCCEEDED);
waitFor(new Supplier<Boolean>() {
@Override
public Boolean get() {
TaskAttemptCompletionEvent[] events = job2.getTaskAttemptCompletionEvents(0, 100);
return events.length == 3;
}
}, checkIntervalMillis, waitForMillis);
events = job2.getTaskAttemptCompletionEvents(0, 100);
Assert.assertEquals("Expecting 1 more completion events for success", 3, events.length);
app.waitForState(reduceTask1, TaskState.RUNNING);
app.waitForState(reduceTask2, TaskState.RUNNING);
TaskAttempt task3Attempt = reduceTask1.getAttempts().values().iterator().next();
app.getContext().getEventHandler().handle(new TaskAttemptEvent(task3Attempt.getID(), TaskAttemptEventType.TA_DONE));
app.waitForState(reduceTask1, TaskState.SUCCEEDED);
app.getContext().getEventHandler().handle(new TaskAttemptEvent(task3Attempt.getID(), TaskAttemptEventType.TA_KILL));
app.waitForState(reduceTask1, TaskState.SUCCEEDED);
TaskAttempt task4Attempt = reduceTask2.getAttempts().values().iterator().next();
app.getContext().getEventHandler().handle(new TaskAttemptEvent(task4Attempt.getID(), TaskAttemptEventType.TA_DONE));
app.waitForState(reduceTask2, TaskState.SUCCEEDED);
waitFor(new Supplier<Boolean>() {
@Override
public Boolean get() {
TaskAttemptCompletionEvent[] events = job2.getTaskAttemptCompletionEvents(0, 100);
return events.length == 5;
}
}, checkIntervalMillis, waitForMillis);
events = job2.getTaskAttemptCompletionEvents(0, 100);
Assert.assertEquals("Expecting 2 more completion events for reduce success", 5, events.length);
// job succeeds
app.waitForState(job2, JobState.SUCCEEDED);
}
use of org.apache.hadoop.yarn.event.AsyncDispatcher in project hadoop by apache.
the class TestJobImpl method testRebootedDuringCommit.
@Test(timeout = 20000)
public void testRebootedDuringCommit() throws Exception {
Configuration conf = new Configuration();
conf.set(MRJobConfig.MR_AM_STAGING_DIR, stagingDir);
conf.setInt(MRJobConfig.MR_AM_MAX_ATTEMPTS, 2);
AsyncDispatcher dispatcher = new AsyncDispatcher();
dispatcher.init(conf);
dispatcher.start();
CyclicBarrier syncBarrier = new CyclicBarrier(2);
OutputCommitter committer = new WaitingOutputCommitter(syncBarrier, true);
CommitterEventHandler commitHandler = createCommitterEventHandler(dispatcher, committer);
commitHandler.init(conf);
commitHandler.start();
AppContext mockContext = mock(AppContext.class);
when(mockContext.isLastAMRetry()).thenReturn(true);
when(mockContext.hasSuccessfullyUnregistered()).thenReturn(false);
JobImpl job = createRunningStubbedJob(conf, dispatcher, 2, mockContext);
completeJobTasks(job);
assertJobState(job, JobStateInternal.COMMITTING);
syncBarrier.await();
job.handle(new JobEvent(job.getID(), JobEventType.JOB_AM_REBOOT));
assertJobState(job, JobStateInternal.REBOOT);
// return the external state as ERROR since this is last retry.
Assert.assertEquals(JobState.RUNNING, job.getState());
when(mockContext.hasSuccessfullyUnregistered()).thenReturn(true);
Assert.assertEquals(JobState.ERROR, job.getState());
dispatcher.stop();
commitHandler.stop();
}
use of org.apache.hadoop.yarn.event.AsyncDispatcher in project hadoop by apache.
the class TestJobImpl method testJobPriorityUpdate.
@Test
public void testJobPriorityUpdate() throws Exception {
Configuration conf = new Configuration();
AsyncDispatcher dispatcher = new AsyncDispatcher();
Priority submittedPriority = Priority.newInstance(5);
AppContext mockContext = mock(AppContext.class);
when(mockContext.hasSuccessfullyUnregistered()).thenReturn(false);
JobImpl job = createStubbedJob(conf, dispatcher, 2, mockContext);
JobId jobId = job.getID();
job.handle(new JobEvent(jobId, JobEventType.JOB_INIT));
assertJobState(job, JobStateInternal.INITED);
job.handle(new JobStartEvent(jobId));
assertJobState(job, JobStateInternal.SETUP);
// Update priority of job to 5, and it will be updated
job.setJobPriority(submittedPriority);
Assert.assertEquals(submittedPriority, job.getReport().getJobPriority());
job.handle(new JobSetupCompletedEvent(jobId));
assertJobState(job, JobStateInternal.RUNNING);
// Update priority of job to 8, and see whether its updated
Priority updatedPriority = Priority.newInstance(8);
job.setJobPriority(updatedPriority);
assertJobState(job, JobStateInternal.RUNNING);
Priority jobPriority = job.getReport().getJobPriority();
Assert.assertNotNull(jobPriority);
// Verify whether changed priority is same as what is set in Job.
Assert.assertEquals(updatedPriority, jobPriority);
}
Aggregations