Search in sources :

Example 51 with TaskAttemptID

use of org.apache.hadoop.mapreduce.TaskAttemptID in project hadoop by apache.

the class TestRecovery method recoveryChecker.

private void recoveryChecker(MapTaskImpl checkTask, TaskState finalState, Map<TaskAttemptID, TaskAttemptState> finalAttemptStates, ArgumentCaptor<Event> arg, List<EventType> expectedJobHistoryEvents, long expectedMapLaunches, long expectedFailedMaps) {
    assertEquals("Final State of Task", finalState, checkTask.getState());
    Map<TaskAttemptId, TaskAttempt> recoveredAttempts = checkTask.getAttempts();
    assertEquals("Expected Number of Task Attempts", finalAttemptStates.size(), recoveredAttempts.size());
    for (TaskAttemptID taID : finalAttemptStates.keySet()) {
        assertEquals("Expected Task Attempt State", finalAttemptStates.get(taID), recoveredAttempts.get(TypeConverter.toYarn(taID)).getState());
    }
    Iterator<Event> ie = arg.getAllValues().iterator();
    int eventNum = 0;
    long totalLaunchedMaps = 0;
    long totalFailedMaps = 0;
    boolean jobTaskEventReceived = false;
    while (ie.hasNext()) {
        Object current = ie.next();
        ++eventNum;
        LOG.info(eventNum + " " + current.getClass().getName());
        if (current instanceof JobHistoryEvent) {
            JobHistoryEvent jhe = (JobHistoryEvent) current;
            LOG.info(expectedJobHistoryEvents.get(0).toString() + " " + jhe.getHistoryEvent().getEventType().toString() + " " + jhe.getJobID());
            assertEquals(expectedJobHistoryEvents.get(0), jhe.getHistoryEvent().getEventType());
            expectedJobHistoryEvents.remove(0);
        } else if (current instanceof JobCounterUpdateEvent) {
            JobCounterUpdateEvent jcue = (JobCounterUpdateEvent) current;
            boolean containsUpdates = jcue.getCounterUpdates().size() > 0;
            // TaskAttempt recovery. Check that first.
            if (containsUpdates) {
                LOG.info("JobCounterUpdateEvent " + jcue.getCounterUpdates().get(0).getCounterKey() + " " + jcue.getCounterUpdates().get(0).getIncrementValue());
                if (jcue.getCounterUpdates().get(0).getCounterKey() == JobCounter.NUM_FAILED_MAPS) {
                    totalFailedMaps += jcue.getCounterUpdates().get(0).getIncrementValue();
                } else if (jcue.getCounterUpdates().get(0).getCounterKey() == JobCounter.TOTAL_LAUNCHED_MAPS) {
                    totalLaunchedMaps += jcue.getCounterUpdates().get(0).getIncrementValue();
                }
            }
        } else if (current instanceof JobTaskEvent) {
            JobTaskEvent jte = (JobTaskEvent) current;
            assertEquals(jte.getState(), finalState);
            jobTaskEventReceived = true;
        }
    }
    assertTrue(jobTaskEventReceived || (finalState == TaskState.RUNNING));
    assertEquals("Did not process all expected JobHistoryEvents", 0, expectedJobHistoryEvents.size());
    assertEquals("Expected Map Launches", expectedMapLaunches, totalLaunchedMaps);
    assertEquals("Expected Failed Maps", expectedFailedMaps, totalFailedMaps);
}
Also used : TaskAttemptId(org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId) TaskAttemptID(org.apache.hadoop.mapreduce.TaskAttemptID) JobHistoryEvent(org.apache.hadoop.mapreduce.jobhistory.JobHistoryEvent) JobCounterUpdateEvent(org.apache.hadoop.mapreduce.v2.app.job.event.JobCounterUpdateEvent) JobTaskEvent(org.apache.hadoop.mapreduce.v2.app.job.event.JobTaskEvent) TaskAttemptContainerLaunchedEvent(org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptContainerLaunchedEvent) Event(org.apache.hadoop.mapreduce.jobhistory.Event) TaskRecoverEvent(org.apache.hadoop.mapreduce.v2.app.job.event.TaskRecoverEvent) JobTaskEvent(org.apache.hadoop.mapreduce.v2.app.job.event.JobTaskEvent) JobHistoryEvent(org.apache.hadoop.mapreduce.jobhistory.JobHistoryEvent) JobCounterUpdateEvent(org.apache.hadoop.mapreduce.v2.app.job.event.JobCounterUpdateEvent) ContainerLauncherEvent(org.apache.hadoop.mapreduce.v2.app.launcher.ContainerLauncherEvent) TaskAttemptEvent(org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptEvent) TaskEvent(org.apache.hadoop.mapreduce.v2.app.job.event.TaskEvent) TaskAttempt(org.apache.hadoop.mapreduce.v2.app.job.TaskAttempt)

Example 52 with TaskAttemptID

use of org.apache.hadoop.mapreduce.TaskAttemptID in project hadoop by apache.

the class JobHistoryParser method handleTaskAttemptStartedEvent.

private void handleTaskAttemptStartedEvent(TaskAttemptStartedEvent event) {
    TaskAttemptID attemptId = event.getTaskAttemptId();
    TaskInfo taskInfo = info.tasksMap.get(event.getTaskId());
    TaskAttemptInfo attemptInfo = new TaskAttemptInfo();
    attemptInfo.startTime = event.getStartTime();
    attemptInfo.attemptId = event.getTaskAttemptId();
    attemptInfo.httpPort = event.getHttpPort();
    attemptInfo.trackerName = StringInterner.weakIntern(event.getTrackerName());
    attemptInfo.taskType = event.getTaskType();
    attemptInfo.shufflePort = event.getShufflePort();
    attemptInfo.containerId = event.getContainerId();
    taskInfo.attemptsMap.put(attemptId, attemptInfo);
}
Also used : TaskAttemptID(org.apache.hadoop.mapreduce.TaskAttemptID)

Example 53 with TaskAttemptID

use of org.apache.hadoop.mapreduce.TaskAttemptID in project hadoop by apache.

the class FileOutputCommitter method commitTask.

@Private
public void commitTask(TaskAttemptContext context, Path taskAttemptPath) throws IOException {
    TaskAttemptID attemptId = context.getTaskAttemptID();
    if (hasOutputPath()) {
        context.progress();
        if (taskAttemptPath == null) {
            taskAttemptPath = getTaskAttemptPath(context);
        }
        FileSystem fs = taskAttemptPath.getFileSystem(context.getConfiguration());
        FileStatus taskAttemptDirStatus;
        try {
            taskAttemptDirStatus = fs.getFileStatus(taskAttemptPath);
        } catch (FileNotFoundException e) {
            taskAttemptDirStatus = null;
        }
        if (taskAttemptDirStatus != null) {
            if (algorithmVersion == 1) {
                Path committedTaskPath = getCommittedTaskPath(context);
                if (fs.exists(committedTaskPath)) {
                    if (!fs.delete(committedTaskPath, true)) {
                        throw new IOException("Could not delete " + committedTaskPath);
                    }
                }
                if (!fs.rename(taskAttemptPath, committedTaskPath)) {
                    throw new IOException("Could not rename " + taskAttemptPath + " to " + committedTaskPath);
                }
                LOG.info("Saved output of task '" + attemptId + "' to " + committedTaskPath);
            } else {
                // directly merge everything from taskAttemptPath to output directory
                mergePaths(fs, taskAttemptDirStatus, outputPath);
                LOG.info("Saved output of task '" + attemptId + "' to " + outputPath);
            }
        } else {
            LOG.warn("No Output found for " + attemptId);
        }
    } else {
        LOG.warn("Output Path is null in commitTask()");
    }
}
Also used : Path(org.apache.hadoop.fs.Path) FileStatus(org.apache.hadoop.fs.FileStatus) TaskAttemptID(org.apache.hadoop.mapreduce.TaskAttemptID) FileSystem(org.apache.hadoop.fs.FileSystem) FileNotFoundException(java.io.FileNotFoundException) IOException(java.io.IOException) Private(org.apache.hadoop.classification.InterfaceAudience.Private)

Example 54 with TaskAttemptID

use of org.apache.hadoop.mapreduce.TaskAttemptID in project hadoop by apache.

the class FileOutputCommitter method recoverTask.

@Override
public void recoverTask(TaskAttemptContext context) throws IOException {
    if (hasOutputPath()) {
        context.progress();
        TaskAttemptID attemptId = context.getTaskAttemptID();
        int previousAttempt = getAppAttemptId(context) - 1;
        if (previousAttempt < 0) {
            throw new IOException("Cannot recover task output for first attempt...");
        }
        Path previousCommittedTaskPath = getCommittedTaskPath(previousAttempt, context);
        FileSystem fs = previousCommittedTaskPath.getFileSystem(context.getConfiguration());
        if (LOG.isDebugEnabled()) {
            LOG.debug("Trying to recover task from " + previousCommittedTaskPath);
        }
        if (algorithmVersion == 1) {
            if (fs.exists(previousCommittedTaskPath)) {
                Path committedTaskPath = getCommittedTaskPath(context);
                if (!fs.delete(committedTaskPath, true) && fs.exists(committedTaskPath)) {
                    throw new IOException("Could not delete " + committedTaskPath);
                }
                //Rename can fail if the parent directory does not yet exist.
                Path committedParent = committedTaskPath.getParent();
                fs.mkdirs(committedParent);
                if (!fs.rename(previousCommittedTaskPath, committedTaskPath)) {
                    throw new IOException("Could not rename " + previousCommittedTaskPath + " to " + committedTaskPath);
                }
            } else {
                LOG.warn(attemptId + " had no output to recover.");
            }
        } else {
            // check if there are any output left in committedTaskPath
            try {
                FileStatus from = fs.getFileStatus(previousCommittedTaskPath);
                LOG.info("Recovering task for upgrading scenario, moving files from " + previousCommittedTaskPath + " to " + outputPath);
                mergePaths(fs, from, outputPath);
            } catch (FileNotFoundException ignored) {
            }
            LOG.info("Done recovering task " + attemptId);
        }
    } else {
        LOG.warn("Output Path is null in recoverTask()");
    }
}
Also used : Path(org.apache.hadoop.fs.Path) FileStatus(org.apache.hadoop.fs.FileStatus) TaskAttemptID(org.apache.hadoop.mapreduce.TaskAttemptID) FileSystem(org.apache.hadoop.fs.FileSystem) FileNotFoundException(java.io.FileNotFoundException) IOException(java.io.IOException)

Example 55 with TaskAttemptID

use of org.apache.hadoop.mapreduce.TaskAttemptID in project hadoop by apache.

the class LocalFetcher method run.

public void run() {
    // Create a worklist of task attempts to work over.
    Set<TaskAttemptID> maps = new HashSet<TaskAttemptID>();
    for (TaskAttemptID map : localMapFiles.keySet()) {
        maps.add(map);
    }
    while (maps.size() > 0) {
        try {
            // If merge is on, block
            merger.waitForResource();
            metrics.threadBusy();
            // Copy as much as is possible.
            doCopy(maps);
            metrics.threadFree();
        } catch (InterruptedException ie) {
        } catch (Throwable t) {
            exceptionReporter.reportException(t);
        }
    }
}
Also used : TaskAttemptID(org.apache.hadoop.mapreduce.TaskAttemptID) HashSet(java.util.HashSet)

Aggregations

TaskAttemptID (org.apache.hadoop.mapreduce.TaskAttemptID)78 TaskAttemptContext (org.apache.hadoop.mapreduce.TaskAttemptContext)35 Test (org.junit.Test)34 Configuration (org.apache.hadoop.conf.Configuration)28 Path (org.apache.hadoop.fs.Path)25 TaskAttemptContextImpl (org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl)22 IOException (java.io.IOException)19 JobID (org.apache.hadoop.mapreduce.JobID)16 TaskID (org.apache.hadoop.mapreduce.TaskID)15 File (java.io.File)14 Job (org.apache.hadoop.mapreduce.Job)14 ArrayList (java.util.ArrayList)13 JobContext (org.apache.hadoop.mapreduce.JobContext)12 LongWritable (org.apache.hadoop.io.LongWritable)11 InputSplit (org.apache.hadoop.mapreduce.InputSplit)10 OutputCommitter (org.apache.hadoop.mapreduce.OutputCommitter)10 FileSystem (org.apache.hadoop.fs.FileSystem)9 TaskAttemptInfo (org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser.TaskAttemptInfo)8 JobContextImpl (org.apache.hadoop.mapreduce.task.JobContextImpl)8 HashMap (java.util.HashMap)7