Search in sources :

Example 6 with TaskInfo

use of org.apache.hadoop.mapreduce.v2.app.webapp.dao.TaskInfo in project hadoop by apache.

the class TestRecovery method testRecoveryTaskSuccessAllAttemptsFail.

@Test
public void testRecoveryTaskSuccessAllAttemptsFail() {
    LOG.info("--- START:  testRecoveryTaskSuccessAllAttemptsFail ---");
    long clusterTimestamp = System.currentTimeMillis();
    EventHandler mockEventHandler = mock(EventHandler.class);
    MapTaskImpl recoverMapTask = getMockMapTask(clusterTimestamp, mockEventHandler);
    TaskId taskId = recoverMapTask.getID();
    JobID jobID = new JobID(Long.toString(clusterTimestamp), 1);
    TaskID taskID = new TaskID(jobID, org.apache.hadoop.mapreduce.TaskType.MAP, taskId.getId());
    //Mock up the TaskAttempts
    Map<TaskAttemptID, TaskAttemptInfo> mockTaskAttempts = new HashMap<TaskAttemptID, TaskAttemptInfo>();
    TaskAttemptID taId1 = new TaskAttemptID(taskID, 2);
    TaskAttemptInfo mockTAinfo1 = getMockTaskAttemptInfo(taId1, TaskAttemptState.FAILED);
    mockTaskAttempts.put(taId1, mockTAinfo1);
    TaskAttemptID taId2 = new TaskAttemptID(taskID, 1);
    TaskAttemptInfo mockTAinfo2 = getMockTaskAttemptInfo(taId2, TaskAttemptState.FAILED);
    mockTaskAttempts.put(taId2, mockTAinfo2);
    OutputCommitter mockCommitter = mock(OutputCommitter.class);
    TaskInfo mockTaskInfo = mock(TaskInfo.class);
    when(mockTaskInfo.getTaskStatus()).thenReturn("SUCCEEDED");
    when(mockTaskInfo.getTaskId()).thenReturn(taskID);
    when(mockTaskInfo.getAllTaskAttempts()).thenReturn(mockTaskAttempts);
    recoverMapTask.handle(new TaskRecoverEvent(taskId, mockTaskInfo, mockCommitter, true));
    ArgumentCaptor<Event> arg = ArgumentCaptor.forClass(Event.class);
    verify(mockEventHandler, atLeast(1)).handle((org.apache.hadoop.yarn.event.Event) arg.capture());
    Map<TaskAttemptID, TaskAttemptState> finalAttemptStates = new HashMap<TaskAttemptID, TaskAttemptState>();
    finalAttemptStates.put(taId1, TaskAttemptState.FAILED);
    finalAttemptStates.put(taId2, TaskAttemptState.FAILED);
    // check for one new attempt launched since successful attempt not found
    TaskAttemptID taId3 = new TaskAttemptID(taskID, 2000);
    finalAttemptStates.put(taId3, TaskAttemptState.NEW);
    List<EventType> jobHistoryEvents = new ArrayList<EventType>();
    jobHistoryEvents.add(EventType.TASK_STARTED);
    jobHistoryEvents.add(EventType.MAP_ATTEMPT_STARTED);
    jobHistoryEvents.add(EventType.MAP_ATTEMPT_FAILED);
    jobHistoryEvents.add(EventType.MAP_ATTEMPT_STARTED);
    jobHistoryEvents.add(EventType.MAP_ATTEMPT_FAILED);
    recoveryChecker(recoverMapTask, TaskState.RUNNING, finalAttemptStates, arg, jobHistoryEvents, 2L, 2L);
}
Also used : OutputCommitter(org.apache.hadoop.mapreduce.OutputCommitter) TaskId(org.apache.hadoop.mapreduce.v2.api.records.TaskId) TaskID(org.apache.hadoop.mapreduce.TaskID) HashMap(java.util.HashMap) TaskAttemptEventType(org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptEventType) EventType(org.apache.hadoop.mapreduce.jobhistory.EventType) TaskEventType(org.apache.hadoop.mapreduce.v2.app.job.event.TaskEventType) TaskAttemptID(org.apache.hadoop.mapreduce.TaskAttemptID) ArrayList(java.util.ArrayList) EventHandler(org.apache.hadoop.yarn.event.EventHandler) JobHistoryEventHandler(org.apache.hadoop.mapreduce.jobhistory.JobHistoryEventHandler) MapTaskImpl(org.apache.hadoop.mapreduce.v2.app.job.impl.MapTaskImpl) TaskInfo(org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser.TaskInfo) TaskAttemptState(org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptState) TaskAttemptInfo(org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser.TaskAttemptInfo) TaskAttemptContainerLaunchedEvent(org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptContainerLaunchedEvent) Event(org.apache.hadoop.mapreduce.jobhistory.Event) TaskRecoverEvent(org.apache.hadoop.mapreduce.v2.app.job.event.TaskRecoverEvent) JobTaskEvent(org.apache.hadoop.mapreduce.v2.app.job.event.JobTaskEvent) JobHistoryEvent(org.apache.hadoop.mapreduce.jobhistory.JobHistoryEvent) JobCounterUpdateEvent(org.apache.hadoop.mapreduce.v2.app.job.event.JobCounterUpdateEvent) ContainerLauncherEvent(org.apache.hadoop.mapreduce.v2.app.launcher.ContainerLauncherEvent) TaskAttemptEvent(org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptEvent) TaskEvent(org.apache.hadoop.mapreduce.v2.app.job.event.TaskEvent) JobID(org.apache.hadoop.mapreduce.JobID) TaskRecoverEvent(org.apache.hadoop.mapreduce.v2.app.job.event.TaskRecoverEvent) Test(org.junit.Test)

Example 7 with TaskInfo

use of org.apache.hadoop.mapreduce.v2.app.webapp.dao.TaskInfo in project hadoop by apache.

the class TaskImpl method recover.

/**
   * Recover a completed task from a previous application attempt
   * @param taskInfo recovered info about the task
   * @param recoverTaskOutput whether to recover task outputs
   * @return state of the task after recovery
   */
private TaskStateInternal recover(TaskInfo taskInfo, OutputCommitter committer, boolean recoverTaskOutput) {
    LOG.info("Recovering task " + taskId + " from prior app attempt, status was " + taskInfo.getTaskStatus());
    scheduledTime = taskInfo.getStartTime();
    sendTaskStartedEvent();
    Collection<TaskAttemptInfo> attemptInfos = taskInfo.getAllTaskAttempts().values();
    if (attemptInfos.size() > 0) {
        metrics.launchedTask(this);
    }
    // recover the attempts for this task in the order they finished
    // so task attempt completion events are ordered properly
    int savedNextAttemptNumber = nextAttemptNumber;
    ArrayList<TaskAttemptInfo> taInfos = new ArrayList<TaskAttemptInfo>(taskInfo.getAllTaskAttempts().values());
    Collections.sort(taInfos, TA_INFO_COMPARATOR);
    for (TaskAttemptInfo taInfo : taInfos) {
        nextAttemptNumber = taInfo.getAttemptId().getId();
        TaskAttemptImpl attempt = addAttempt(Avataar.VIRGIN);
        // handle the recovery inline so attempts complete before task does
        attempt.handle(new TaskAttemptRecoverEvent(attempt.getID(), taInfo, committer, recoverTaskOutput));
        finishedAttempts.add(attempt.getID());
        TaskAttemptCompletionEventStatus taces = null;
        TaskAttemptState attemptState = attempt.getState();
        switch(attemptState) {
            case FAILED:
                taces = TaskAttemptCompletionEventStatus.FAILED;
                break;
            case KILLED:
                taces = TaskAttemptCompletionEventStatus.KILLED;
                break;
            case SUCCEEDED:
                taces = TaskAttemptCompletionEventStatus.SUCCEEDED;
                break;
            default:
                throw new IllegalStateException("Unexpected attempt state during recovery: " + attemptState);
        }
        if (attemptState == TaskAttemptState.FAILED) {
            failedAttempts.add(attempt.getID());
            if (failedAttempts.size() >= maxAttempts) {
                taces = TaskAttemptCompletionEventStatus.TIPFAILED;
            }
        }
        // TODO: this shouldn't be necessary after MAPREDUCE-4330
        if (successfulAttempt == null) {
            handleTaskAttemptCompletion(attempt.getID(), taces);
            if (attemptState == TaskAttemptState.SUCCEEDED) {
                successfulAttempt = attempt.getID();
            }
        }
    }
    nextAttemptNumber = savedNextAttemptNumber;
    TaskStateInternal taskState = TaskStateInternal.valueOf(taskInfo.getTaskStatus());
    switch(taskState) {
        case SUCCEEDED:
            if (successfulAttempt != null) {
                sendTaskSucceededEvents();
            } else {
                LOG.info("Missing successful attempt for task " + taskId + ", recovering as RUNNING");
                // there must have been a fetch failure and the retry wasn't complete
                taskState = TaskStateInternal.RUNNING;
                metrics.runningTask(this);
                addAndScheduleAttempt(Avataar.VIRGIN);
            }
            break;
        case FAILED:
        case KILLED:
            {
                if (taskState == TaskStateInternal.KILLED && attemptInfos.size() == 0) {
                    metrics.endWaitingTask(this);
                }
                TaskFailedEvent tfe = new TaskFailedEvent(taskInfo.getTaskId(), taskInfo.getFinishTime(), taskInfo.getTaskType(), taskInfo.getError(), taskInfo.getTaskStatus(), taskInfo.getFailedDueToAttemptId(), taskInfo.getCounters());
                eventHandler.handle(new JobHistoryEvent(taskId.getJobId(), tfe));
                eventHandler.handle(new JobTaskEvent(taskId, getExternalState(taskState)));
                break;
            }
        default:
            throw new java.lang.AssertionError("Unexpected recovered task state: " + taskState);
    }
    return taskState;
}
Also used : TaskStateInternal(org.apache.hadoop.mapreduce.v2.app.job.TaskStateInternal) ArrayList(java.util.ArrayList) TaskAttemptCompletionEventStatus(org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptCompletionEventStatus) JobHistoryEvent(org.apache.hadoop.mapreduce.jobhistory.JobHistoryEvent) TaskAttemptState(org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptState) JobTaskEvent(org.apache.hadoop.mapreduce.v2.app.job.event.JobTaskEvent) TaskAttemptInfo(org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser.TaskAttemptInfo) TaskFailedEvent(org.apache.hadoop.mapreduce.jobhistory.TaskFailedEvent) TaskAttemptRecoverEvent(org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptRecoverEvent)

Example 8 with TaskInfo

use of org.apache.hadoop.mapreduce.v2.app.webapp.dao.TaskInfo in project hadoop by apache.

the class TasksBlock method render.

@Override
protected void render(Block html) {
    if (app.getJob() == null) {
        html.h2($(TITLE));
        return;
    }
    TaskType type = null;
    String symbol = $(TASK_TYPE);
    if (!symbol.isEmpty()) {
        type = MRApps.taskType(symbol);
    }
    TBODY<TABLE<Hamlet>> tbody = html.table("#tasks").thead().tr().th("Task").th("Progress").th("Status").th("State").th("Start Time").th("Finish Time").th("Elapsed Time")._()._().tbody();
    StringBuilder tasksTableData = new StringBuilder("[\n");
    for (Task task : app.getJob().getTasks().values()) {
        if (type != null && task.getType() != type) {
            continue;
        }
        String taskStateStr = $(TASK_STATE);
        if (taskStateStr == null || taskStateStr.trim().equals("")) {
            taskStateStr = "ALL";
        }
        if (!taskStateStr.equalsIgnoreCase("ALL")) {
            try {
                // get stateUI enum
                MRApps.TaskStateUI stateUI = MRApps.taskState(taskStateStr);
                if (!stateUI.correspondsTo(task.getState())) {
                    continue;
                }
            } catch (IllegalArgumentException e) {
                // not supported state, ignore
                continue;
            }
        }
        TaskInfo info = new TaskInfo(task);
        String tid = info.getId();
        String pct = StringUtils.format("%.2f", info.getProgress());
        tasksTableData.append("[\"<a href='").append(url("task", tid)).append("'>").append(tid).append("</a>\",\"").append("<br title='").append(pct).append("'> <div class='").append(C_PROGRESSBAR).append("' title='").append(join(pct, '%')).append("'> ").append("<div class='").append(C_PROGRESSBAR_VALUE).append("' style='").append(join("width:", pct, '%')).append("'> </div> </div>\",\"").append(StringEscapeUtils.escapeJavaScript(StringEscapeUtils.escapeHtml(info.getStatus()))).append("\",\"").append(info.getState()).append("\",\"").append(info.getStartTime()).append("\",\"").append(info.getFinishTime()).append("\",\"").append(info.getElapsedTime()).append("\"],\n");
    }
    //Remove the last comma and close off the array of arrays
    if (tasksTableData.charAt(tasksTableData.length() - 2) == ',') {
        tasksTableData.delete(tasksTableData.length() - 2, tasksTableData.length() - 1);
    }
    tasksTableData.append("]");
    html.script().$type("text/javascript")._("var tasksTableData=" + tasksTableData)._();
    tbody._()._();
}
Also used : TABLE(org.apache.hadoop.yarn.webapp.hamlet.Hamlet.TABLE) MRApps(org.apache.hadoop.mapreduce.v2.util.MRApps) TaskInfo(org.apache.hadoop.mapreduce.v2.app.webapp.dao.TaskInfo) Task(org.apache.hadoop.mapreduce.v2.app.job.Task) TaskType(org.apache.hadoop.mapreduce.v2.api.records.TaskType)

Example 9 with TaskInfo

use of org.apache.hadoop.mapreduce.v2.app.webapp.dao.TaskInfo in project hadoop by apache.

the class AMWebServices method getJobTasks.

@GET
@Path("/jobs/{jobid}/tasks")
@Produces({ MediaType.APPLICATION_JSON + "; " + JettyUtils.UTF_8, MediaType.APPLICATION_XML + "; " + JettyUtils.UTF_8 })
public TasksInfo getJobTasks(@Context HttpServletRequest hsr, @PathParam("jobid") String jid, @QueryParam("type") String type) {
    init();
    Job job = getJobFromJobIdString(jid, appCtx);
    checkAccess(job, hsr);
    TasksInfo allTasks = new TasksInfo();
    for (Task task : job.getTasks().values()) {
        TaskType ttype = null;
        if (type != null && !type.isEmpty()) {
            try {
                ttype = MRApps.taskType(type);
            } catch (YarnRuntimeException e) {
                throw new BadRequestException("tasktype must be either m or r");
            }
        }
        if (ttype != null && task.getType() != ttype) {
            continue;
        }
        allTasks.add(new TaskInfo(task));
    }
    return allTasks;
}
Also used : YarnRuntimeException(org.apache.hadoop.yarn.exceptions.YarnRuntimeException) TaskInfo(org.apache.hadoop.mapreduce.v2.app.webapp.dao.TaskInfo) Task(org.apache.hadoop.mapreduce.v2.app.job.Task) TaskType(org.apache.hadoop.mapreduce.v2.api.records.TaskType) BadRequestException(org.apache.hadoop.yarn.webapp.BadRequestException) Job(org.apache.hadoop.mapreduce.v2.app.job.Job) TasksInfo(org.apache.hadoop.mapreduce.v2.app.webapp.dao.TasksInfo) Path(javax.ws.rs.Path) Produces(javax.ws.rs.Produces) GET(javax.ws.rs.GET)

Example 10 with TaskInfo

use of org.apache.hadoop.mapreduce.v2.app.webapp.dao.TaskInfo in project hadoop by apache.

the class AMWebServices method getJobTask.

@GET
@Path("/jobs/{jobid}/tasks/{taskid}")
@Produces({ MediaType.APPLICATION_JSON + "; " + JettyUtils.UTF_8, MediaType.APPLICATION_XML + "; " + JettyUtils.UTF_8 })
public TaskInfo getJobTask(@Context HttpServletRequest hsr, @PathParam("jobid") String jid, @PathParam("taskid") String tid) {
    init();
    Job job = getJobFromJobIdString(jid, appCtx);
    checkAccess(job, hsr);
    Task task = getTaskFromTaskIdString(tid, job);
    return new TaskInfo(task);
}
Also used : TaskInfo(org.apache.hadoop.mapreduce.v2.app.webapp.dao.TaskInfo) Task(org.apache.hadoop.mapreduce.v2.app.job.Task) Job(org.apache.hadoop.mapreduce.v2.app.job.Job) Path(javax.ws.rs.Path) Produces(javax.ws.rs.Produces) GET(javax.ws.rs.GET)

Aggregations

TaskInfo (org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser.TaskInfo)12 TaskAttemptInfo (org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser.TaskAttemptInfo)10 HashMap (java.util.HashMap)9 TaskID (org.apache.hadoop.mapreduce.TaskID)9 TaskId (org.apache.hadoop.mapreduce.v2.api.records.TaskId)9 Task (org.apache.hadoop.mapreduce.v2.app.job.Task)9 Test (org.junit.Test)8 TaskAttemptID (org.apache.hadoop.mapreduce.TaskAttemptID)7 JobHistoryEvent (org.apache.hadoop.mapreduce.jobhistory.JobHistoryEvent)7 Job (org.apache.hadoop.mapreduce.v2.app.job.Job)7 ArrayList (java.util.ArrayList)6 TaskAttemptState (org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptState)6 JobTaskEvent (org.apache.hadoop.mapreduce.v2.app.job.event.JobTaskEvent)6 TaskInfo (org.apache.hadoop.mapreduce.v2.app.webapp.dao.TaskInfo)6 JobID (org.apache.hadoop.mapreduce.JobID)5 OutputCommitter (org.apache.hadoop.mapreduce.OutputCommitter)5 Event (org.apache.hadoop.mapreduce.jobhistory.Event)5 EventType (org.apache.hadoop.mapreduce.jobhistory.EventType)5 JobHistoryEventHandler (org.apache.hadoop.mapreduce.jobhistory.JobHistoryEventHandler)5 JobCounterUpdateEvent (org.apache.hadoop.mapreduce.v2.app.job.event.JobCounterUpdateEvent)5