use of org.apache.hadoop.yarn.api.records.NodeId in project hadoop by apache.
the class TaskAttemptImpl method recover.
@SuppressWarnings("unchecked")
public TaskAttemptStateInternal recover(TaskAttemptInfo taInfo, OutputCommitter committer, boolean recoverOutput) {
ContainerId containerId = taInfo.getContainerId();
NodeId containerNodeId = NodeId.fromString(taInfo.getHostname() + ":" + taInfo.getPort());
String nodeHttpAddress = StringInterner.weakIntern(taInfo.getHostname() + ":" + taInfo.getHttpPort());
// Resource/Priority/Tokens are only needed while launching the container on
// an NM, these are already completed tasks, so setting them to null
container = Container.newInstance(containerId, containerNodeId, nodeHttpAddress, null, null, null);
computeRackAndLocality();
launchTime = taInfo.getStartTime();
finishTime = (taInfo.getFinishTime() != -1) ? taInfo.getFinishTime() : clock.getTime();
shufflePort = taInfo.getShufflePort();
trackerName = taInfo.getHostname();
httpPort = taInfo.getHttpPort();
sendLaunchedEvents();
reportedStatus.id = attemptId;
reportedStatus.progress = 1.0f;
reportedStatus.counters = taInfo.getCounters();
reportedStatus.stateString = taInfo.getState();
reportedStatus.phase = Phase.CLEANUP;
reportedStatus.mapFinishTime = taInfo.getMapFinishTime();
reportedStatus.shuffleFinishTime = taInfo.getShuffleFinishTime();
reportedStatus.sortFinishTime = taInfo.getSortFinishTime();
addDiagnosticInfo(taInfo.getError());
boolean needToClean = false;
String recoveredState = taInfo.getTaskStatus();
if (recoverOutput && TaskAttemptState.SUCCEEDED.toString().equals(recoveredState)) {
TaskAttemptContext tac = new TaskAttemptContextImpl(conf, TypeConverter.fromYarn(attemptId));
try {
committer.recoverTask(tac);
LOG.info("Recovered output from task attempt " + attemptId);
} catch (Exception e) {
LOG.error("Unable to recover task attempt " + attemptId, e);
LOG.info("Task attempt " + attemptId + " will be recovered as KILLED");
recoveredState = TaskAttemptState.KILLED.toString();
needToClean = true;
}
}
TaskAttemptStateInternal attemptState;
if (TaskAttemptState.SUCCEEDED.toString().equals(recoveredState)) {
attemptState = TaskAttemptStateInternal.SUCCEEDED;
reportedStatus.taskState = TaskAttemptState.SUCCEEDED;
eventHandler.handle(createJobCounterUpdateEventTASucceeded(this));
logAttemptFinishedEvent(attemptState);
} else if (TaskAttemptState.FAILED.toString().equals(recoveredState)) {
attemptState = TaskAttemptStateInternal.FAILED;
reportedStatus.taskState = TaskAttemptState.FAILED;
eventHandler.handle(createJobCounterUpdateEventTAFailed(this, false));
TaskAttemptUnsuccessfulCompletionEvent tauce = createTaskAttemptUnsuccessfulCompletionEvent(this, TaskAttemptStateInternal.FAILED);
eventHandler.handle(new JobHistoryEvent(attemptId.getTaskId().getJobId(), tauce));
} else {
if (!TaskAttemptState.KILLED.toString().equals(recoveredState)) {
if (String.valueOf(recoveredState).isEmpty()) {
LOG.info("TaskAttempt" + attemptId + " had not completed, recovering as KILLED");
} else {
LOG.warn("TaskAttempt " + attemptId + " found in unexpected state " + recoveredState + ", recovering as KILLED");
}
addDiagnosticInfo("Killed during application recovery");
needToClean = true;
}
attemptState = TaskAttemptStateInternal.KILLED;
reportedStatus.taskState = TaskAttemptState.KILLED;
eventHandler.handle(createJobCounterUpdateEventTAKilled(this, false));
TaskAttemptUnsuccessfulCompletionEvent tauce = createTaskAttemptUnsuccessfulCompletionEvent(this, TaskAttemptStateInternal.KILLED);
eventHandler.handle(new JobHistoryEvent(attemptId.getTaskId().getJobId(), tauce));
}
if (needToClean) {
TaskAttemptContext tac = new TaskAttemptContextImpl(conf, TypeConverter.fromYarn(attemptId));
try {
committer.abortTask(tac);
} catch (Exception e) {
LOG.warn("Task cleanup failed for attempt " + attemptId, e);
}
}
return attemptState;
}
use of org.apache.hadoop.yarn.api.records.NodeId in project hadoop by apache.
the class TestLocalContainerLauncher method createMockContainer.
private static Container createMockContainer() {
Container container = mock(Container.class);
NodeId nodeId = NodeId.newInstance("foo.bar.org", 1234);
when(container.getNodeId()).thenReturn(nodeId);
return container;
}
use of org.apache.hadoop.yarn.api.records.NodeId in project hadoop by apache.
the class TestJobImpl method testUnusableNodeTransition.
@Test(timeout = 20000)
public void testUnusableNodeTransition() throws Exception {
Configuration conf = new Configuration();
conf.set(MRJobConfig.MR_AM_STAGING_DIR, stagingDir);
conf.setInt(MRJobConfig.NUM_REDUCES, 1);
DrainDispatcher dispatcher = new DrainDispatcher();
dispatcher.init(conf);
dispatcher.start();
CyclicBarrier syncBarrier = new CyclicBarrier(2);
OutputCommitter committer = new TestingOutputCommitter(syncBarrier, true);
CommitterEventHandler commitHandler = createCommitterEventHandler(dispatcher, committer);
commitHandler.init(conf);
commitHandler.start();
final JobImpl job = createRunningStubbedJob(conf, dispatcher, 2, null);
// add a special task event handler to put the task back to running in case
// of task rescheduling/killing
EventHandler<TaskAttemptEvent> taskAttemptEventHandler = new EventHandler<TaskAttemptEvent>() {
@Override
public void handle(TaskAttemptEvent event) {
if (event.getType() == TaskAttemptEventType.TA_KILL) {
job.decrementSucceededMapperCount();
}
}
};
dispatcher.register(TaskAttemptEventType.class, taskAttemptEventHandler);
// replace the tasks with spied versions to return the right attempts
Map<TaskId, Task> spiedTasks = new HashMap<TaskId, Task>();
List<NodeReport> nodeReports = new ArrayList<NodeReport>();
Map<NodeReport, TaskId> nodeReportsToTaskIds = new HashMap<NodeReport, TaskId>();
for (Map.Entry<TaskId, Task> e : job.tasks.entrySet()) {
TaskId taskId = e.getKey();
Task task = e.getValue();
if (taskId.getTaskType() == TaskType.MAP) {
// add an attempt to the task to simulate nodes
NodeId nodeId = mock(NodeId.class);
TaskAttempt attempt = mock(TaskAttempt.class);
when(attempt.getNodeId()).thenReturn(nodeId);
TaskAttemptId attemptId = MRBuilderUtils.newTaskAttemptId(taskId, 0);
when(attempt.getID()).thenReturn(attemptId);
// create a spied task
Task spied = spy(task);
doReturn(attempt).when(spied).getAttempt(any(TaskAttemptId.class));
spiedTasks.put(taskId, spied);
// create a NodeReport based on the node id
NodeReport report = mock(NodeReport.class);
when(report.getNodeState()).thenReturn(NodeState.UNHEALTHY);
when(report.getNodeId()).thenReturn(nodeId);
nodeReports.add(report);
nodeReportsToTaskIds.put(report, taskId);
}
}
// replace the tasks with the spied tasks
job.tasks.putAll(spiedTasks);
// complete all mappers first
for (TaskId taskId : job.tasks.keySet()) {
if (taskId.getTaskType() == TaskType.MAP) {
// generate a task attempt completed event first to populate the
// nodes-to-succeeded-attempts map
TaskAttemptCompletionEvent tce = Records.newRecord(TaskAttemptCompletionEvent.class);
TaskAttemptId attemptId = MRBuilderUtils.newTaskAttemptId(taskId, 0);
tce.setAttemptId(attemptId);
tce.setStatus(TaskAttemptCompletionEventStatus.SUCCEEDED);
job.handle(new JobTaskAttemptCompletedEvent(tce));
// complete the task itself
job.handle(new JobTaskEvent(taskId, TaskState.SUCCEEDED));
Assert.assertEquals(JobState.RUNNING, job.getState());
}
}
// add an event for a node transition
NodeReport firstMapperNodeReport = nodeReports.get(0);
NodeReport secondMapperNodeReport = nodeReports.get(1);
job.handle(new JobUpdatedNodesEvent(job.getID(), Collections.singletonList(firstMapperNodeReport)));
dispatcher.await();
// complete the reducer
for (TaskId taskId : job.tasks.keySet()) {
if (taskId.getTaskType() == TaskType.REDUCE) {
job.handle(new JobTaskEvent(taskId, TaskState.SUCCEEDED));
}
}
// add another event for a node transition for the other mapper
// this should not trigger rescheduling
job.handle(new JobUpdatedNodesEvent(job.getID(), Collections.singletonList(secondMapperNodeReport)));
// complete the first mapper that was rescheduled
TaskId firstMapper = nodeReportsToTaskIds.get(firstMapperNodeReport);
job.handle(new JobTaskEvent(firstMapper, TaskState.SUCCEEDED));
// verify the state is moving to committing
assertJobState(job, JobStateInternal.COMMITTING);
// let the committer complete and verify the job succeeds
syncBarrier.await();
assertJobState(job, JobStateInternal.SUCCEEDED);
dispatcher.stop();
commitHandler.stop();
}
use of org.apache.hadoop.yarn.api.records.NodeId in project hadoop by apache.
the class TestTaskAttempt method testAppDiognosticEventOnNewTask.
@Test
public void testAppDiognosticEventOnNewTask() throws Exception {
ApplicationId appId = ApplicationId.newInstance(1, 2);
ApplicationAttemptId appAttemptId = ApplicationAttemptId.newInstance(appId, 0);
JobId jobId = MRBuilderUtils.newJobId(appId, 1);
TaskId taskId = MRBuilderUtils.newTaskId(jobId, 1, TaskType.MAP);
TaskAttemptId attemptId = MRBuilderUtils.newTaskAttemptId(taskId, 0);
Path jobFile = mock(Path.class);
MockEventHandler eventHandler = new MockEventHandler();
TaskAttemptListener taListener = mock(TaskAttemptListener.class);
when(taListener.getAddress()).thenReturn(new InetSocketAddress("localhost", 0));
JobConf jobConf = new JobConf();
jobConf.setClass("fs.file.impl", StubbedFS.class, FileSystem.class);
jobConf.setBoolean("fs.file.impl.disable.cache", true);
jobConf.set(JobConf.MAPRED_MAP_TASK_ENV, "");
jobConf.set(MRJobConfig.APPLICATION_ATTEMPT_ID, "10");
TaskSplitMetaInfo splits = mock(TaskSplitMetaInfo.class);
when(splits.getLocations()).thenReturn(new String[] { "127.0.0.1" });
AppContext appCtx = mock(AppContext.class);
ClusterInfo clusterInfo = mock(ClusterInfo.class);
Resource resource = mock(Resource.class);
when(appCtx.getClusterInfo()).thenReturn(clusterInfo);
when(resource.getMemorySize()).thenReturn(1024L);
setupTaskAttemptFinishingMonitor(eventHandler, jobConf, appCtx);
TaskAttemptImpl taImpl = new MapTaskAttemptImpl(taskId, 1, eventHandler, jobFile, 1, splits, jobConf, taListener, new Token(), new Credentials(), SystemClock.getInstance(), appCtx);
NodeId nid = NodeId.newInstance("127.0.0.1", 0);
ContainerId contId = ContainerId.newContainerId(appAttemptId, 3);
Container container = mock(Container.class);
when(container.getId()).thenReturn(contId);
when(container.getNodeId()).thenReturn(nid);
when(container.getNodeHttpAddress()).thenReturn("localhost:0");
taImpl.handle(new TaskAttemptDiagnosticsUpdateEvent(attemptId, "Task got killed"));
assertFalse("InternalError occurred trying to handle TA_DIAGNOSTICS_UPDATE on assigned task", eventHandler.internalError);
}
use of org.apache.hadoop.yarn.api.records.NodeId in project hadoop by apache.
the class TestTaskAttempt method testContainerKillWhileRunning.
@Test
public void testContainerKillWhileRunning() throws Exception {
ApplicationId appId = ApplicationId.newInstance(1, 2);
ApplicationAttemptId appAttemptId = ApplicationAttemptId.newInstance(appId, 0);
JobId jobId = MRBuilderUtils.newJobId(appId, 1);
TaskId taskId = MRBuilderUtils.newTaskId(jobId, 1, TaskType.MAP);
TaskAttemptId attemptId = MRBuilderUtils.newTaskAttemptId(taskId, 0);
Path jobFile = mock(Path.class);
MockEventHandler eventHandler = new MockEventHandler();
TaskAttemptListener taListener = mock(TaskAttemptListener.class);
when(taListener.getAddress()).thenReturn(new InetSocketAddress("localhost", 0));
JobConf jobConf = new JobConf();
jobConf.setClass("fs.file.impl", StubbedFS.class, FileSystem.class);
jobConf.setBoolean("fs.file.impl.disable.cache", true);
jobConf.set(JobConf.MAPRED_MAP_TASK_ENV, "");
jobConf.set(MRJobConfig.APPLICATION_ATTEMPT_ID, "10");
TaskSplitMetaInfo splits = mock(TaskSplitMetaInfo.class);
when(splits.getLocations()).thenReturn(new String[] { "127.0.0.1" });
AppContext appCtx = mock(AppContext.class);
ClusterInfo clusterInfo = mock(ClusterInfo.class);
Resource resource = mock(Resource.class);
when(appCtx.getClusterInfo()).thenReturn(clusterInfo);
when(resource.getMemorySize()).thenReturn(1024L);
TaskAttemptImpl taImpl = new MapTaskAttemptImpl(taskId, 1, eventHandler, jobFile, 1, splits, jobConf, taListener, new Token(), new Credentials(), SystemClock.getInstance(), appCtx);
NodeId nid = NodeId.newInstance("127.0.0.2", 0);
ContainerId contId = ContainerId.newContainerId(appAttemptId, 3);
Container container = mock(Container.class);
when(container.getId()).thenReturn(contId);
when(container.getNodeId()).thenReturn(nid);
when(container.getNodeHttpAddress()).thenReturn("localhost:0");
taImpl.handle(new TaskAttemptEvent(attemptId, TaskAttemptEventType.TA_SCHEDULE));
taImpl.handle(new TaskAttemptContainerAssignedEvent(attemptId, container, mock(Map.class)));
taImpl.handle(new TaskAttemptContainerLaunchedEvent(attemptId, 0));
assertEquals("Task attempt is not in running state", taImpl.getState(), TaskAttemptState.RUNNING);
taImpl.handle(new TaskAttemptEvent(attemptId, TaskAttemptEventType.TA_KILL));
assertFalse("InternalError occurred trying to handle TA_KILL", eventHandler.internalError);
assertEquals("Task should be in KILL_CONTAINER_CLEANUP state", TaskAttemptStateInternal.KILL_CONTAINER_CLEANUP, taImpl.getInternalState());
}
Aggregations