use of org.apache.hadoop.mapreduce.TaskAttemptID in project hadoop by apache.
the class TestRecovery method recoveryChecker.
private void recoveryChecker(MapTaskImpl checkTask, TaskState finalState, Map<TaskAttemptID, TaskAttemptState> finalAttemptStates, ArgumentCaptor<Event> arg, List<EventType> expectedJobHistoryEvents, long expectedMapLaunches, long expectedFailedMaps) {
assertEquals("Final State of Task", finalState, checkTask.getState());
Map<TaskAttemptId, TaskAttempt> recoveredAttempts = checkTask.getAttempts();
assertEquals("Expected Number of Task Attempts", finalAttemptStates.size(), recoveredAttempts.size());
for (TaskAttemptID taID : finalAttemptStates.keySet()) {
assertEquals("Expected Task Attempt State", finalAttemptStates.get(taID), recoveredAttempts.get(TypeConverter.toYarn(taID)).getState());
}
Iterator<Event> ie = arg.getAllValues().iterator();
int eventNum = 0;
long totalLaunchedMaps = 0;
long totalFailedMaps = 0;
boolean jobTaskEventReceived = false;
while (ie.hasNext()) {
Object current = ie.next();
++eventNum;
LOG.info(eventNum + " " + current.getClass().getName());
if (current instanceof JobHistoryEvent) {
JobHistoryEvent jhe = (JobHistoryEvent) current;
LOG.info(expectedJobHistoryEvents.get(0).toString() + " " + jhe.getHistoryEvent().getEventType().toString() + " " + jhe.getJobID());
assertEquals(expectedJobHistoryEvents.get(0), jhe.getHistoryEvent().getEventType());
expectedJobHistoryEvents.remove(0);
} else if (current instanceof JobCounterUpdateEvent) {
JobCounterUpdateEvent jcue = (JobCounterUpdateEvent) current;
boolean containsUpdates = jcue.getCounterUpdates().size() > 0;
// TaskAttempt recovery. Check that first.
if (containsUpdates) {
LOG.info("JobCounterUpdateEvent " + jcue.getCounterUpdates().get(0).getCounterKey() + " " + jcue.getCounterUpdates().get(0).getIncrementValue());
if (jcue.getCounterUpdates().get(0).getCounterKey() == JobCounter.NUM_FAILED_MAPS) {
totalFailedMaps += jcue.getCounterUpdates().get(0).getIncrementValue();
} else if (jcue.getCounterUpdates().get(0).getCounterKey() == JobCounter.TOTAL_LAUNCHED_MAPS) {
totalLaunchedMaps += jcue.getCounterUpdates().get(0).getIncrementValue();
}
}
} else if (current instanceof JobTaskEvent) {
JobTaskEvent jte = (JobTaskEvent) current;
assertEquals(jte.getState(), finalState);
jobTaskEventReceived = true;
}
}
assertTrue(jobTaskEventReceived || (finalState == TaskState.RUNNING));
assertEquals("Did not process all expected JobHistoryEvents", 0, expectedJobHistoryEvents.size());
assertEquals("Expected Map Launches", expectedMapLaunches, totalLaunchedMaps);
assertEquals("Expected Failed Maps", expectedFailedMaps, totalFailedMaps);
}
use of org.apache.hadoop.mapreduce.TaskAttemptID in project hadoop by apache.
the class JobHistoryParser method handleTaskAttemptStartedEvent.
private void handleTaskAttemptStartedEvent(TaskAttemptStartedEvent event) {
TaskAttemptID attemptId = event.getTaskAttemptId();
TaskInfo taskInfo = info.tasksMap.get(event.getTaskId());
TaskAttemptInfo attemptInfo = new TaskAttemptInfo();
attemptInfo.startTime = event.getStartTime();
attemptInfo.attemptId = event.getTaskAttemptId();
attemptInfo.httpPort = event.getHttpPort();
attemptInfo.trackerName = StringInterner.weakIntern(event.getTrackerName());
attemptInfo.taskType = event.getTaskType();
attemptInfo.shufflePort = event.getShufflePort();
attemptInfo.containerId = event.getContainerId();
taskInfo.attemptsMap.put(attemptId, attemptInfo);
}
use of org.apache.hadoop.mapreduce.TaskAttemptID in project hadoop by apache.
the class FileOutputCommitter method commitTask.
@Private
public void commitTask(TaskAttemptContext context, Path taskAttemptPath) throws IOException {
TaskAttemptID attemptId = context.getTaskAttemptID();
if (hasOutputPath()) {
context.progress();
if (taskAttemptPath == null) {
taskAttemptPath = getTaskAttemptPath(context);
}
FileSystem fs = taskAttemptPath.getFileSystem(context.getConfiguration());
FileStatus taskAttemptDirStatus;
try {
taskAttemptDirStatus = fs.getFileStatus(taskAttemptPath);
} catch (FileNotFoundException e) {
taskAttemptDirStatus = null;
}
if (taskAttemptDirStatus != null) {
if (algorithmVersion == 1) {
Path committedTaskPath = getCommittedTaskPath(context);
if (fs.exists(committedTaskPath)) {
if (!fs.delete(committedTaskPath, true)) {
throw new IOException("Could not delete " + committedTaskPath);
}
}
if (!fs.rename(taskAttemptPath, committedTaskPath)) {
throw new IOException("Could not rename " + taskAttemptPath + " to " + committedTaskPath);
}
LOG.info("Saved output of task '" + attemptId + "' to " + committedTaskPath);
} else {
// directly merge everything from taskAttemptPath to output directory
mergePaths(fs, taskAttemptDirStatus, outputPath);
LOG.info("Saved output of task '" + attemptId + "' to " + outputPath);
}
} else {
LOG.warn("No Output found for " + attemptId);
}
} else {
LOG.warn("Output Path is null in commitTask()");
}
}
use of org.apache.hadoop.mapreduce.TaskAttemptID in project hadoop by apache.
the class FileOutputCommitter method recoverTask.
@Override
public void recoverTask(TaskAttemptContext context) throws IOException {
if (hasOutputPath()) {
context.progress();
TaskAttemptID attemptId = context.getTaskAttemptID();
int previousAttempt = getAppAttemptId(context) - 1;
if (previousAttempt < 0) {
throw new IOException("Cannot recover task output for first attempt...");
}
Path previousCommittedTaskPath = getCommittedTaskPath(previousAttempt, context);
FileSystem fs = previousCommittedTaskPath.getFileSystem(context.getConfiguration());
if (LOG.isDebugEnabled()) {
LOG.debug("Trying to recover task from " + previousCommittedTaskPath);
}
if (algorithmVersion == 1) {
if (fs.exists(previousCommittedTaskPath)) {
Path committedTaskPath = getCommittedTaskPath(context);
if (!fs.delete(committedTaskPath, true) && fs.exists(committedTaskPath)) {
throw new IOException("Could not delete " + committedTaskPath);
}
//Rename can fail if the parent directory does not yet exist.
Path committedParent = committedTaskPath.getParent();
fs.mkdirs(committedParent);
if (!fs.rename(previousCommittedTaskPath, committedTaskPath)) {
throw new IOException("Could not rename " + previousCommittedTaskPath + " to " + committedTaskPath);
}
} else {
LOG.warn(attemptId + " had no output to recover.");
}
} else {
// check if there are any output left in committedTaskPath
try {
FileStatus from = fs.getFileStatus(previousCommittedTaskPath);
LOG.info("Recovering task for upgrading scenario, moving files from " + previousCommittedTaskPath + " to " + outputPath);
mergePaths(fs, from, outputPath);
} catch (FileNotFoundException ignored) {
}
LOG.info("Done recovering task " + attemptId);
}
} else {
LOG.warn("Output Path is null in recoverTask()");
}
}
use of org.apache.hadoop.mapreduce.TaskAttemptID in project hadoop by apache.
the class LocalFetcher method run.
public void run() {
// Create a worklist of task attempts to work over.
Set<TaskAttemptID> maps = new HashSet<TaskAttemptID>();
for (TaskAttemptID map : localMapFiles.keySet()) {
maps.add(map);
}
while (maps.size() > 0) {
try {
// If merge is on, block
merger.waitForResource();
metrics.threadBusy();
// Copy as much as is possible.
doCopy(maps);
metrics.threadFree();
} catch (InterruptedException ie) {
} catch (Throwable t) {
exceptionReporter.reportException(t);
}
}
}
Aggregations