use of org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptContainerAssignedEvent in project hadoop by apache.
the class TestRMContainerAllocator method testIgnoreBlacklisting.
@Test
public void testIgnoreBlacklisting() throws Exception {
LOG.info("Running testIgnoreBlacklisting");
Configuration conf = new Configuration();
conf.setBoolean(MRJobConfig.MR_AM_JOB_NODE_BLACKLISTING_ENABLE, true);
conf.setInt(MRJobConfig.MAX_TASK_FAILURES_PER_TRACKER, 1);
conf.setInt(MRJobConfig.MR_AM_IGNORE_BLACKLISTING_BLACKLISTED_NODE_PERECENT, 33);
MyResourceManager rm = new MyResourceManager(conf);
rm.start();
DrainDispatcher dispatcher = (DrainDispatcher) rm.getRMContext().getDispatcher();
// Submit the application
RMApp app = rm.submitApp(1024);
dispatcher.await();
MockNM[] nodeManagers = new MockNM[10];
int nmNum = 0;
List<TaskAttemptContainerAssignedEvent> assigned = null;
nodeManagers[nmNum] = registerNodeManager(nmNum++, rm, dispatcher);
nodeManagers[0].nodeHeartbeat(true);
dispatcher.await();
ApplicationAttemptId appAttemptId = app.getCurrentAppAttempt().getAppAttemptId();
rm.sendAMLaunched(appAttemptId);
dispatcher.await();
JobId jobId = MRBuilderUtils.newJobId(appAttemptId.getApplicationId(), 0);
Job mockJob = mock(Job.class);
when(mockJob.getReport()).thenReturn(MRBuilderUtils.newJobReport(jobId, "job", "user", JobState.RUNNING, 0, 0, 0, 0, 0, 0, 0, "jobfile", null, false, ""));
MyContainerAllocator allocator = new MyContainerAllocator(rm, conf, appAttemptId, mockJob);
// Known=1, blacklisted=0, ignore should be false - assign first container
assigned = getContainerOnHost(jobId, 1, 1024, new String[] { "h1" }, nodeManagers[0], dispatcher, allocator, 0, 0, 0, 0, rm);
Assert.assertEquals("No of assignments must be 1", 1, assigned.size());
LOG.info("Failing container _1 on H1 (Node should be blacklisted and" + " ignore blacklisting enabled");
// Send events to blacklist nodes h1 and h2
ContainerFailedEvent f1 = createFailEvent(jobId, 1, "h1", false);
allocator.sendFailure(f1);
// Test single node.
// Known=1, blacklisted=1, ignore should be true - assign 0
// Because makeRemoteRequest will not be aware of it until next call
// The current call will send blacklisted node "h1" to RM
assigned = getContainerOnHost(jobId, 2, 1024, new String[] { "h1" }, nodeManagers[0], dispatcher, allocator, 1, 0, 0, 1, rm);
Assert.assertEquals("No of assignments must be 0", 0, assigned.size());
// Known=1, blacklisted=1, ignore should be true - assign 1
assigned = getContainerOnHost(jobId, 2, 1024, new String[] { "h1" }, nodeManagers[0], dispatcher, allocator, 0, 0, 0, 0, rm);
Assert.assertEquals("No of assignments must be 1", 1, assigned.size());
nodeManagers[nmNum] = registerNodeManager(nmNum++, rm, dispatcher);
// Known=2, blacklisted=1, ignore should be true - assign 1 anyway.
assigned = getContainerOnHost(jobId, 3, 1024, new String[] { "h2" }, nodeManagers[1], dispatcher, allocator, 0, 0, 0, 0, rm);
Assert.assertEquals("No of assignments must be 1", 1, assigned.size());
nodeManagers[nmNum] = registerNodeManager(nmNum++, rm, dispatcher);
// Known=3, blacklisted=1, ignore should be true - assign 1 anyway.
assigned = getContainerOnHost(jobId, 4, 1024, new String[] { "h3" }, nodeManagers[2], dispatcher, allocator, 0, 0, 0, 0, rm);
Assert.assertEquals("No of assignments must be 1", 1, assigned.size());
// Known=3, blacklisted=1, ignore should be true - assign 1
assigned = getContainerOnHost(jobId, 5, 1024, new String[] { "h1" }, nodeManagers[0], dispatcher, allocator, 0, 0, 0, 0, rm);
Assert.assertEquals("No of assignments must be 1", 1, assigned.size());
nodeManagers[nmNum] = registerNodeManager(nmNum++, rm, dispatcher);
// Known=4, blacklisted=1, ignore should be false - assign 1 anyway
assigned = getContainerOnHost(jobId, 6, 1024, new String[] { "h4" }, nodeManagers[3], dispatcher, allocator, 0, 0, 1, 0, rm);
Assert.assertEquals("No of assignments must be 1", 1, assigned.size());
// Test blacklisting re-enabled.
// Known=4, blacklisted=1, ignore should be false - no assignment on h1
assigned = getContainerOnHost(jobId, 7, 1024, new String[] { "h1" }, nodeManagers[0], dispatcher, allocator, 0, 0, 0, 0, rm);
Assert.assertEquals("No of assignments must be 0", 0, assigned.size());
// RMContainerRequestor would have created a replacement request.
// Blacklist h2
ContainerFailedEvent f2 = createFailEvent(jobId, 3, "h2", false);
allocator.sendFailure(f2);
// Test ignore blacklisting re-enabled
// Known=4, blacklisted=2, ignore should be true. Should assign 0
// container for the same reason above.
assigned = getContainerOnHost(jobId, 8, 1024, new String[] { "h1" }, nodeManagers[0], dispatcher, allocator, 1, 0, 0, 2, rm);
Assert.assertEquals("No of assignments must be 0", 0, assigned.size());
// Known=4, blacklisted=2, ignore should be true. Should assign 2
// containers.
assigned = getContainerOnHost(jobId, 8, 1024, new String[] { "h1" }, nodeManagers[0], dispatcher, allocator, 0, 0, 0, 0, rm);
Assert.assertEquals("No of assignments must be 2", 2, assigned.size());
// Known=4, blacklisted=2, ignore should be true.
assigned = getContainerOnHost(jobId, 9, 1024, new String[] { "h2" }, nodeManagers[1], dispatcher, allocator, 0, 0, 0, 0, rm);
Assert.assertEquals("No of assignments must be 1", 1, assigned.size());
// Test blacklist while ignore blacklisting enabled
ContainerFailedEvent f3 = createFailEvent(jobId, 4, "h3", false);
allocator.sendFailure(f3);
nodeManagers[nmNum] = registerNodeManager(nmNum++, rm, dispatcher);
// Known=5, blacklisted=3, ignore should be true.
assigned = getContainerOnHost(jobId, 10, 1024, new String[] { "h3" }, nodeManagers[2], dispatcher, allocator, 0, 0, 0, 0, rm);
Assert.assertEquals("No of assignments must be 1", 1, assigned.size());
// Assign on 5 more nodes - to re-enable blacklisting
for (int i = 0; i < 5; i++) {
nodeManagers[nmNum] = registerNodeManager(nmNum++, rm, dispatcher);
assigned = getContainerOnHost(jobId, 11 + i, 1024, new String[] { String.valueOf(5 + i) }, nodeManagers[4 + i], dispatcher, allocator, 0, 0, (i == 4 ? 3 : 0), 0, rm);
Assert.assertEquals("No of assignments must be 1", 1, assigned.size());
}
// Test h3 (blacklisted while ignoring blacklisting) is blacklisted.
assigned = getContainerOnHost(jobId, 20, 1024, new String[] { "h3" }, nodeManagers[2], dispatcher, allocator, 0, 0, 0, 0, rm);
Assert.assertEquals("No of assignments must be 0", 0, assigned.size());
}
use of org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptContainerAssignedEvent in project hadoop by apache.
the class TestRMContainerAllocator method checkAssignments.
private void checkAssignments(ContainerRequestEvent[] requests, List<TaskAttemptContainerAssignedEvent> assignments, boolean checkHostMatch) {
Assert.assertNotNull("Container not assigned", assignments);
Assert.assertEquals("Assigned count not correct", requests.length, assignments.size());
// check for uniqueness of containerIDs
Set<ContainerId> containerIds = new HashSet<ContainerId>();
for (TaskAttemptContainerAssignedEvent assigned : assignments) {
containerIds.add(assigned.getContainer().getId());
}
Assert.assertEquals("Assigned containers must be different", assignments.size(), containerIds.size());
// check for all assignment
for (ContainerRequestEvent req : requests) {
TaskAttemptContainerAssignedEvent assigned = null;
for (TaskAttemptContainerAssignedEvent ass : assignments) {
if (ass.getTaskAttemptID().equals(req.getAttemptID())) {
assigned = ass;
break;
}
}
checkAssignment(req, assigned, checkHostMatch);
}
}
use of org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptContainerAssignedEvent in project hadoop by apache.
the class TestRMContainerAllocator method testBlackListedNodes.
@Test
public void testBlackListedNodes() throws Exception {
LOG.info("Running testBlackListedNodes");
Configuration conf = new Configuration();
conf.setBoolean(MRJobConfig.MR_AM_JOB_NODE_BLACKLISTING_ENABLE, true);
conf.setInt(MRJobConfig.MAX_TASK_FAILURES_PER_TRACKER, 1);
conf.setInt(MRJobConfig.MR_AM_IGNORE_BLACKLISTING_BLACKLISTED_NODE_PERECENT, -1);
MyResourceManager rm = new MyResourceManager(conf);
rm.start();
DrainDispatcher dispatcher = (DrainDispatcher) rm.getRMContext().getDispatcher();
// Submit the application
RMApp app = rm.submitApp(1024);
dispatcher.await();
MockNM amNodeManager = rm.registerNode("amNM:1234", 2048);
amNodeManager.nodeHeartbeat(true);
dispatcher.await();
ApplicationAttemptId appAttemptId = app.getCurrentAppAttempt().getAppAttemptId();
rm.sendAMLaunched(appAttemptId);
dispatcher.await();
JobId jobId = MRBuilderUtils.newJobId(appAttemptId.getApplicationId(), 0);
Job mockJob = mock(Job.class);
when(mockJob.getReport()).thenReturn(MRBuilderUtils.newJobReport(jobId, "job", "user", JobState.RUNNING, 0, 0, 0, 0, 0, 0, 0, "jobfile", null, false, ""));
MyContainerAllocator allocator = new MyContainerAllocator(rm, conf, appAttemptId, mockJob);
// add resources to scheduler
MockNM nodeManager1 = rm.registerNode("h1:1234", 10240);
MockNM nodeManager2 = rm.registerNode("h2:1234", 10240);
MockNM nodeManager3 = rm.registerNode("h3:1234", 10240);
dispatcher.await();
// create the container request
ContainerRequestEvent event1 = createReq(jobId, 1, 1024, new String[] { "h1" });
allocator.sendRequest(event1);
// send 1 more request with different resource req
ContainerRequestEvent event2 = createReq(jobId, 2, 1024, new String[] { "h2" });
allocator.sendRequest(event2);
// send another request with different resource and priority
ContainerRequestEvent event3 = createReq(jobId, 3, 1024, new String[] { "h3" });
allocator.sendRequest(event3);
// this tells the scheduler about the requests
// as nodes are not added, no allocations
List<TaskAttemptContainerAssignedEvent> assigned = allocator.schedule();
dispatcher.await();
Assert.assertEquals("No of assignments must be 0", 0, assigned.size());
// Send events to blacklist nodes h1 and h2
ContainerFailedEvent f1 = createFailEvent(jobId, 1, "h1", false);
allocator.sendFailure(f1);
ContainerFailedEvent f2 = createFailEvent(jobId, 1, "h2", false);
allocator.sendFailure(f2);
// update resources in scheduler
// Node heartbeat
nodeManager1.nodeHeartbeat(true);
// Node heartbeat
nodeManager2.nodeHeartbeat(true);
dispatcher.await();
assigned = allocator.schedule();
Assert.assertEquals("No of assignments must be 0", 0, assigned.size());
dispatcher.await();
Assert.assertEquals("No of assignments must be 0", 0, assigned.size());
assertBlacklistAdditionsAndRemovals(2, 0, rm);
// mark h1/h2 as bad nodes
nodeManager1.nodeHeartbeat(false);
nodeManager2.nodeHeartbeat(false);
dispatcher.await();
assigned = allocator.schedule();
dispatcher.await();
assertBlacklistAdditionsAndRemovals(0, 0, rm);
Assert.assertEquals("No of assignments must be 0", 0, assigned.size());
// Node heartbeat
nodeManager3.nodeHeartbeat(true);
dispatcher.await();
assigned = allocator.schedule();
dispatcher.await();
assertBlacklistAdditionsAndRemovals(0, 0, rm);
Assert.assertTrue("No of assignments must be 3", assigned.size() == 3);
// validate that all containers are assigned to h3
for (TaskAttemptContainerAssignedEvent assig : assigned) {
Assert.assertTrue("Assigned container host not correct", "h3".equals(assig.getContainer().getNodeId().getHost()));
}
}
use of org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptContainerAssignedEvent in project hadoop by apache.
the class LocalContainerAllocator method handle.
@SuppressWarnings("unchecked")
@Override
public void handle(ContainerAllocatorEvent event) {
if (event.getType() == ContainerAllocator.EventType.CONTAINER_REQ) {
LOG.info("Processing the event " + event.toString());
// Assign the same container ID as the AM
ContainerId cID = ContainerId.newContainerId(getContext().getApplicationAttemptId(), this.containerId.getContainerId());
Container container = recordFactory.newRecordInstance(Container.class);
container.setId(cID);
NodeId nodeId = NodeId.newInstance(this.nmHost, this.nmPort);
container.setResource(Resource.newInstance(0, 0));
container.setNodeId(nodeId);
container.setContainerToken(null);
container.setNodeHttpAddress(this.nmHost + ":" + this.nmHttpPort);
if (event.getAttemptID().getTaskId().getTaskType() == TaskType.MAP) {
JobCounterUpdateEvent jce = new JobCounterUpdateEvent(event.getAttemptID().getTaskId().getJobId());
// TODO Setting OTHER_LOCAL_MAP for now.
jce.addCounterUpdate(JobCounter.OTHER_LOCAL_MAPS, 1);
eventHandler.handle(jce);
}
eventHandler.handle(new TaskAttemptContainerAssignedEvent(event.getAttemptID(), container, applicationACLs));
}
}
use of org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptContainerAssignedEvent in project hadoop by apache.
the class TestTaskAttempt method testTooManyFetchFailureAfterKill.
@Test
public void testTooManyFetchFailureAfterKill() throws Exception {
ApplicationId appId = ApplicationId.newInstance(1, 2);
ApplicationAttemptId appAttemptId = ApplicationAttemptId.newInstance(appId, 0);
JobId jobId = MRBuilderUtils.newJobId(appId, 1);
TaskId taskId = MRBuilderUtils.newTaskId(jobId, 1, TaskType.MAP);
TaskAttemptId attemptId = MRBuilderUtils.newTaskAttemptId(taskId, 0);
Path jobFile = mock(Path.class);
MockEventHandler eventHandler = new MockEventHandler();
TaskAttemptListener taListener = mock(TaskAttemptListener.class);
when(taListener.getAddress()).thenReturn(new InetSocketAddress("localhost", 0));
JobConf jobConf = new JobConf();
jobConf.setClass("fs.file.impl", StubbedFS.class, FileSystem.class);
jobConf.setBoolean("fs.file.impl.disable.cache", true);
jobConf.set(JobConf.MAPRED_MAP_TASK_ENV, "");
jobConf.set(MRJobConfig.APPLICATION_ATTEMPT_ID, "10");
TaskSplitMetaInfo splits = mock(TaskSplitMetaInfo.class);
when(splits.getLocations()).thenReturn(new String[] { "127.0.0.1" });
AppContext appCtx = mock(AppContext.class);
ClusterInfo clusterInfo = mock(ClusterInfo.class);
Resource resource = mock(Resource.class);
when(appCtx.getClusterInfo()).thenReturn(clusterInfo);
when(resource.getMemorySize()).thenReturn(1024L);
setupTaskAttemptFinishingMonitor(eventHandler, jobConf, appCtx);
TaskAttemptImpl taImpl = new MapTaskAttemptImpl(taskId, 1, eventHandler, jobFile, 1, splits, jobConf, taListener, mock(Token.class), new Credentials(), SystemClock.getInstance(), appCtx);
NodeId nid = NodeId.newInstance("127.0.0.1", 0);
ContainerId contId = ContainerId.newContainerId(appAttemptId, 3);
Container container = mock(Container.class);
when(container.getId()).thenReturn(contId);
when(container.getNodeId()).thenReturn(nid);
when(container.getNodeHttpAddress()).thenReturn("localhost:0");
taImpl.handle(new TaskAttemptEvent(attemptId, TaskAttemptEventType.TA_SCHEDULE));
taImpl.handle(new TaskAttemptContainerAssignedEvent(attemptId, container, mock(Map.class)));
taImpl.handle(new TaskAttemptContainerLaunchedEvent(attemptId, 0));
taImpl.handle(new TaskAttemptEvent(attemptId, TaskAttemptEventType.TA_DONE));
taImpl.handle(new TaskAttemptEvent(attemptId, TaskAttemptEventType.TA_CONTAINER_COMPLETED));
assertEquals("Task attempt is not in succeeded state", taImpl.getState(), TaskAttemptState.SUCCEEDED);
taImpl.handle(new TaskAttemptEvent(attemptId, TaskAttemptEventType.TA_KILL));
assertEquals("Task attempt is not in KILLED state", taImpl.getState(), TaskAttemptState.KILLED);
taImpl.handle(new TaskAttemptEvent(attemptId, TaskAttemptEventType.TA_TOO_MANY_FETCH_FAILURE));
assertEquals("Task attempt is not in KILLED state, still", taImpl.getState(), TaskAttemptState.KILLED);
assertFalse("InternalError occurred trying to handle TA_CONTAINER_CLEANED", eventHandler.internalError);
}
Aggregations