Search in sources :

Example 6 with TaskAttemptStatus

use of org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptStatusUpdateEvent.TaskAttemptStatus in project hadoop by apache.

the class TestSpeculativeExecutionWithMRApp method createTaskAttemptStatus.

private TaskAttemptStatus createTaskAttemptStatus(TaskAttemptId id, float progress, TaskAttemptState state) {
    TaskAttemptStatus status = new TaskAttemptStatus();
    status.id = id;
    status.progress = progress;
    status.taskState = state;
    return status;
}
Also used : TaskAttemptStatus(org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptStatusUpdateEvent.TaskAttemptStatus)

Example 7 with TaskAttemptStatus

use of org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptStatusUpdateEvent.TaskAttemptStatus in project hadoop by apache.

the class TaskAttemptListenerImpl method statusUpdate.

@Override
public AMFeedback statusUpdate(TaskAttemptID taskAttemptID, TaskStatus taskStatus) throws IOException, InterruptedException {
    org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId yarnAttemptID = TypeConverter.toYarn(taskAttemptID);
    AMFeedback feedback = new AMFeedback();
    feedback.setTaskFound(true);
    // Propagating preemption to the task if TASK_PREEMPTION is enabled
    if (getConfig().getBoolean(MRJobConfig.TASK_PREEMPTION, false) && preemptionPolicy.isPreempted(yarnAttemptID)) {
        feedback.setPreemption(true);
        LOG.info("Setting preemption bit for task: " + yarnAttemptID + " of type " + yarnAttemptID.getTaskId().getTaskType());
    }
    if (taskStatus == null) {
        //We are using statusUpdate only as a simple ping
        if (LOG.isDebugEnabled()) {
            LOG.debug("Ping from " + taskAttemptID.toString());
        }
        return feedback;
    }
    // if we are here there is an actual status update to be processed
    taskHeartbeatHandler.progressing(yarnAttemptID);
    TaskAttemptStatus taskAttemptStatus = new TaskAttemptStatus();
    taskAttemptStatus.id = yarnAttemptID;
    // Task sends the updated progress to the TT.
    taskAttemptStatus.progress = taskStatus.getProgress();
    LOG.info("Progress of TaskAttempt " + taskAttemptID + " is : " + taskStatus.getProgress());
    // Task sends the updated state-string to the TT.
    taskAttemptStatus.stateString = taskStatus.getStateString();
    // Task sends the updated phase to the TT.
    taskAttemptStatus.phase = TypeConverter.toYarn(taskStatus.getPhase());
    // Counters are updated by the task. Convert counters into new format as
    // that is the primary storage format inside the AM to avoid multiple
    // conversions and unnecessary heap usage.
    taskAttemptStatus.counters = new org.apache.hadoop.mapreduce.Counters(taskStatus.getCounters());
    // Map Finish time set by the task (map only)
    if (taskStatus.getIsMap() && taskStatus.getMapFinishTime() != 0) {
        taskAttemptStatus.mapFinishTime = taskStatus.getMapFinishTime();
    }
    // Shuffle Finish time set by the task (reduce only).
    if (!taskStatus.getIsMap() && taskStatus.getShuffleFinishTime() != 0) {
        taskAttemptStatus.shuffleFinishTime = taskStatus.getShuffleFinishTime();
    }
    // Sort finish time set by the task (reduce only).
    if (!taskStatus.getIsMap() && taskStatus.getSortFinishTime() != 0) {
        taskAttemptStatus.sortFinishTime = taskStatus.getSortFinishTime();
    }
    //set the fetch failures
    if (taskStatus.getFetchFailedMaps() != null && taskStatus.getFetchFailedMaps().size() > 0) {
        taskAttemptStatus.fetchFailedMaps = new ArrayList<org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId>();
        for (TaskAttemptID failedMapId : taskStatus.getFetchFailedMaps()) {
            taskAttemptStatus.fetchFailedMaps.add(TypeConverter.toYarn(failedMapId));
        }
    }
    // Task sends the information about the nextRecordRange to the TT
    //    TODO: The following are not needed here, but needed to be set somewhere inside AppMaster.
    //    taskStatus.getRunState(); // Set by the TT/JT. Transform into a state TODO
    //    taskStatus.getStartTime(); // Used to be set by the TaskTracker. This should be set by getTask().
    //    taskStatus.getFinishTime(); // Used to be set by TT/JT. Should be set when task finishes
    //    // This was used by TT to do counter updates only once every minute. So this
    //    // isn't ever changed by the Task itself.
    //    taskStatus.getIncludeCounters();
    context.getEventHandler().handle(new TaskAttemptStatusUpdateEvent(taskAttemptStatus.id, taskAttemptStatus));
    return feedback;
}
Also used : TaskAttemptStatus(org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptStatusUpdateEvent.TaskAttemptStatus) TaskAttemptStatusUpdateEvent(org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptStatusUpdateEvent)

Example 8 with TaskAttemptStatus

use of org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptStatusUpdateEvent.TaskAttemptStatus in project hadoop by apache.

the class StartEndTimesBase method updateAttempt.

@Override
public void updateAttempt(TaskAttemptStatus status, long timestamp) {
    TaskAttemptId attemptID = status.id;
    TaskId taskID = attemptID.getTaskId();
    JobId jobID = taskID.getJobId();
    Job job = context.getJob(jobID);
    if (job == null) {
        return;
    }
    Task task = job.getTask(taskID);
    if (task == null) {
        return;
    }
    Long boxedStart = startTimes.get(attemptID);
    long start = boxedStart == null ? Long.MIN_VALUE : boxedStart;
    TaskAttempt taskAttempt = task.getAttempt(attemptID);
    if (taskAttempt.getState() == TaskAttemptState.SUCCEEDED) {
        boolean isNew = false;
        // is this  a new success?
        synchronized (doneTasks) {
            if (!doneTasks.contains(task)) {
                doneTasks.add(task);
                isNew = true;
            }
        }
        //  local data] we only count the first one.
        if (isNew) {
            long finish = timestamp;
            if (start > 1L && finish > 1L && start <= finish) {
                long duration = finish - start;
                DataStatistics statistics = dataStatisticsForTask(taskID);
                if (statistics != null) {
                    statistics.add(duration);
                }
            }
        }
    }
}
Also used : Task(org.apache.hadoop.mapreduce.v2.app.job.Task) TaskId(org.apache.hadoop.mapreduce.v2.api.records.TaskId) TaskAttemptId(org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId) TaskAttempt(org.apache.hadoop.mapreduce.v2.app.job.TaskAttempt) Job(org.apache.hadoop.mapreduce.v2.app.job.Job) JobId(org.apache.hadoop.mapreduce.v2.api.records.JobId)

Example 9 with TaskAttemptStatus

use of org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptStatusUpdateEvent.TaskAttemptStatus in project hadoop by apache.

the class DefaultSpeculator method speculationValue.

/*   *************************************************************    */
// This is the code section that runs periodically and adds speculations for
//  those jobs that need them.
// This can return a few magic values for tasks that shouldn't speculate:
//  returns ON_SCHEDULE if thresholdRuntime(taskID) says that we should not
//     considering speculating this task
//  returns ALREADY_SPECULATING if that is true.  This has priority.
//  returns TOO_NEW if our companion task hasn't gotten any information
//  returns PROGRESS_IS_GOOD if the task is sailing through
//  returns NOT_RUNNING if the task is not running
//
// All of these values are negative.  Any value that should be allowed to
//  speculate is 0 or positive.
private long speculationValue(TaskId taskID, long now) {
    Job job = context.getJob(taskID.getJobId());
    Task task = job.getTask(taskID);
    Map<TaskAttemptId, TaskAttempt> attempts = task.getAttempts();
    long acceptableRuntime = Long.MIN_VALUE;
    long result = Long.MIN_VALUE;
    if (!mayHaveSpeculated.contains(taskID)) {
        acceptableRuntime = estimator.thresholdRuntime(taskID);
        if (acceptableRuntime == Long.MAX_VALUE) {
            return ON_SCHEDULE;
        }
    }
    TaskAttemptId runningTaskAttemptID = null;
    int numberRunningAttempts = 0;
    for (TaskAttempt taskAttempt : attempts.values()) {
        if (taskAttempt.getState() == TaskAttemptState.RUNNING || taskAttempt.getState() == TaskAttemptState.STARTING) {
            if (++numberRunningAttempts > 1) {
                return ALREADY_SPECULATING;
            }
            runningTaskAttemptID = taskAttempt.getID();
            long estimatedRunTime = estimator.estimatedRuntime(runningTaskAttemptID);
            long taskAttemptStartTime = estimator.attemptEnrolledTime(runningTaskAttemptID);
            if (taskAttemptStartTime > now) {
                //  attempt status change that chronicles the attempt start
                return TOO_NEW;
            }
            long estimatedEndTime = estimatedRunTime + taskAttemptStartTime;
            long estimatedReplacementEndTime = now + estimator.estimatedNewAttemptRuntime(taskID);
            float progress = taskAttempt.getProgress();
            TaskAttemptHistoryStatistics data = runningTaskAttemptStatistics.get(runningTaskAttemptID);
            if (data == null) {
                runningTaskAttemptStatistics.put(runningTaskAttemptID, new TaskAttemptHistoryStatistics(estimatedRunTime, progress, now));
            } else {
                if (estimatedRunTime == data.getEstimatedRunTime() && progress == data.getProgress()) {
                    // Previous stats are same as same stats
                    if (data.notHeartbeatedInAWhile(now)) {
                        // Stats have stagnated for a while, simulate heart-beat.
                        TaskAttemptStatus taskAttemptStatus = new TaskAttemptStatus();
                        taskAttemptStatus.id = runningTaskAttemptID;
                        taskAttemptStatus.progress = progress;
                        taskAttemptStatus.taskState = taskAttempt.getState();
                        // Now simulate the heart-beat
                        handleAttempt(taskAttemptStatus);
                    }
                } else {
                    // Stats have changed - update our data structure
                    data.setEstimatedRunTime(estimatedRunTime);
                    data.setProgress(progress);
                    data.resetHeartBeatTime(now);
                }
            }
            if (estimatedEndTime < now) {
                return PROGRESS_IS_GOOD;
            }
            if (estimatedReplacementEndTime >= estimatedEndTime) {
                return TOO_LATE_TO_SPECULATE;
            }
            result = estimatedEndTime - estimatedReplacementEndTime;
        }
    }
    // If we are here, there's at most one task attempt.
    if (numberRunningAttempts == 0) {
        return NOT_RUNNING;
    }
    if (acceptableRuntime == Long.MIN_VALUE) {
        acceptableRuntime = estimator.thresholdRuntime(taskID);
        if (acceptableRuntime == Long.MAX_VALUE) {
            return ON_SCHEDULE;
        }
    }
    return result;
}
Also used : Task(org.apache.hadoop.mapreduce.v2.app.job.Task) TaskAttemptId(org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId) TaskAttemptStatus(org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptStatusUpdateEvent.TaskAttemptStatus) TaskAttempt(org.apache.hadoop.mapreduce.v2.app.job.TaskAttempt) Job(org.apache.hadoop.mapreduce.v2.app.job.Job)

Example 10 with TaskAttemptStatus

use of org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptStatusUpdateEvent.TaskAttemptStatus in project hadoop by apache.

the class TestMRClientService method test.

@Test
public void test() throws Exception {
    MRAppWithClientService app = new MRAppWithClientService(1, 0, false);
    Configuration conf = new Configuration();
    Job job = app.submit(conf);
    app.waitForState(job, JobState.RUNNING);
    Assert.assertEquals("Num tasks not correct", 1, job.getTasks().size());
    Iterator<Task> it = job.getTasks().values().iterator();
    Task task = it.next();
    app.waitForState(task, TaskState.RUNNING);
    TaskAttempt attempt = task.getAttempts().values().iterator().next();
    app.waitForState(attempt, TaskAttemptState.RUNNING);
    // send the diagnostic
    String diagnostic1 = "Diagnostic1";
    String diagnostic2 = "Diagnostic2";
    app.getContext().getEventHandler().handle(new TaskAttemptDiagnosticsUpdateEvent(attempt.getID(), diagnostic1));
    // send the status update
    TaskAttemptStatus taskAttemptStatus = new TaskAttemptStatus();
    taskAttemptStatus.id = attempt.getID();
    taskAttemptStatus.progress = 0.5f;
    taskAttemptStatus.stateString = "RUNNING";
    taskAttemptStatus.taskState = TaskAttemptState.RUNNING;
    taskAttemptStatus.phase = Phase.MAP;
    // send the status update
    app.getContext().getEventHandler().handle(new TaskAttemptStatusUpdateEvent(attempt.getID(), taskAttemptStatus));
    //verify that all object are fully populated by invoking RPCs.
    YarnRPC rpc = YarnRPC.create(conf);
    MRClientProtocol proxy = (MRClientProtocol) rpc.getProxy(MRClientProtocol.class, app.clientService.getBindAddress(), conf);
    GetCountersRequest gcRequest = recordFactory.newRecordInstance(GetCountersRequest.class);
    gcRequest.setJobId(job.getID());
    Assert.assertNotNull("Counters is null", proxy.getCounters(gcRequest).getCounters());
    GetJobReportRequest gjrRequest = recordFactory.newRecordInstance(GetJobReportRequest.class);
    gjrRequest.setJobId(job.getID());
    JobReport jr = proxy.getJobReport(gjrRequest).getJobReport();
    verifyJobReport(jr);
    GetTaskAttemptCompletionEventsRequest gtaceRequest = recordFactory.newRecordInstance(GetTaskAttemptCompletionEventsRequest.class);
    gtaceRequest.setJobId(job.getID());
    gtaceRequest.setFromEventId(0);
    gtaceRequest.setMaxEvents(10);
    Assert.assertNotNull("TaskCompletionEvents is null", proxy.getTaskAttemptCompletionEvents(gtaceRequest).getCompletionEventList());
    GetDiagnosticsRequest gdRequest = recordFactory.newRecordInstance(GetDiagnosticsRequest.class);
    gdRequest.setTaskAttemptId(attempt.getID());
    Assert.assertNotNull("Diagnostics is null", proxy.getDiagnostics(gdRequest).getDiagnosticsList());
    GetTaskAttemptReportRequest gtarRequest = recordFactory.newRecordInstance(GetTaskAttemptReportRequest.class);
    gtarRequest.setTaskAttemptId(attempt.getID());
    TaskAttemptReport tar = proxy.getTaskAttemptReport(gtarRequest).getTaskAttemptReport();
    verifyTaskAttemptReport(tar);
    GetTaskReportRequest gtrRequest = recordFactory.newRecordInstance(GetTaskReportRequest.class);
    gtrRequest.setTaskId(task.getID());
    Assert.assertNotNull("TaskReport is null", proxy.getTaskReport(gtrRequest).getTaskReport());
    GetTaskReportsRequest gtreportsRequest = recordFactory.newRecordInstance(GetTaskReportsRequest.class);
    gtreportsRequest.setJobId(job.getID());
    gtreportsRequest.setTaskType(TaskType.MAP);
    Assert.assertNotNull("TaskReports for map is null", proxy.getTaskReports(gtreportsRequest).getTaskReportList());
    gtreportsRequest = recordFactory.newRecordInstance(GetTaskReportsRequest.class);
    gtreportsRequest.setJobId(job.getID());
    gtreportsRequest.setTaskType(TaskType.REDUCE);
    Assert.assertNotNull("TaskReports for reduce is null", proxy.getTaskReports(gtreportsRequest).getTaskReportList());
    List<String> diag = proxy.getDiagnostics(gdRequest).getDiagnosticsList();
    Assert.assertEquals("Num diagnostics not correct", 1, diag.size());
    Assert.assertEquals("Diag 1 not correct", diagnostic1, diag.get(0).toString());
    TaskReport taskReport = proxy.getTaskReport(gtrRequest).getTaskReport();
    Assert.assertEquals("Num diagnostics not correct", 1, taskReport.getDiagnosticsCount());
    //send the done signal to the task
    app.getContext().getEventHandler().handle(new TaskAttemptEvent(task.getAttempts().values().iterator().next().getID(), TaskAttemptEventType.TA_DONE));
    app.waitForState(job, JobState.SUCCEEDED);
    // For invalid jobid, throw IOException
    gtreportsRequest = recordFactory.newRecordInstance(GetTaskReportsRequest.class);
    gtreportsRequest.setJobId(TypeConverter.toYarn(JobID.forName("job_1415730144495_0001")));
    gtreportsRequest.setTaskType(TaskType.REDUCE);
    try {
        proxy.getTaskReports(gtreportsRequest);
        fail("IOException not thrown for invalid job id");
    } catch (IOException e) {
    // Expected
    }
}
Also used : Task(org.apache.hadoop.mapreduce.v2.app.job.Task) Configuration(org.apache.hadoop.conf.Configuration) GetTaskAttemptReportRequest(org.apache.hadoop.mapreduce.v2.api.protocolrecords.GetTaskAttemptReportRequest) MRClientProtocol(org.apache.hadoop.mapreduce.v2.api.MRClientProtocol) JobReport(org.apache.hadoop.mapreduce.v2.api.records.JobReport) GetTaskAttemptCompletionEventsRequest(org.apache.hadoop.mapreduce.v2.api.protocolrecords.GetTaskAttemptCompletionEventsRequest) GetTaskReportRequest(org.apache.hadoop.mapreduce.v2.api.protocolrecords.GetTaskReportRequest) TaskAttempt(org.apache.hadoop.mapreduce.v2.app.job.TaskAttempt) GetDiagnosticsRequest(org.apache.hadoop.mapreduce.v2.api.protocolrecords.GetDiagnosticsRequest) Job(org.apache.hadoop.mapreduce.v2.app.job.Job) GetCountersRequest(org.apache.hadoop.mapreduce.v2.api.protocolrecords.GetCountersRequest) TaskReport(org.apache.hadoop.mapreduce.v2.api.records.TaskReport) TaskAttemptDiagnosticsUpdateEvent(org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptDiagnosticsUpdateEvent) TaskAttemptStatus(org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptStatusUpdateEvent.TaskAttemptStatus) TaskAttemptEvent(org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptEvent) YarnRPC(org.apache.hadoop.yarn.ipc.YarnRPC) IOException(java.io.IOException) GetJobReportRequest(org.apache.hadoop.mapreduce.v2.api.protocolrecords.GetJobReportRequest) TaskAttemptStatusUpdateEvent(org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptStatusUpdateEvent) TaskAttemptReport(org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptReport) GetTaskReportsRequest(org.apache.hadoop.mapreduce.v2.api.protocolrecords.GetTaskReportsRequest) Test(org.junit.Test)

Aggregations

Task (org.apache.hadoop.mapreduce.v2.app.job.Task)8 TaskAttemptId (org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId)7 Job (org.apache.hadoop.mapreduce.v2.app.job.Job)7 TaskAttempt (org.apache.hadoop.mapreduce.v2.app.job.TaskAttempt)7 TaskAttemptStatus (org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptStatusUpdateEvent.TaskAttemptStatus)7 TaskId (org.apache.hadoop.mapreduce.v2.api.records.TaskId)5 Configuration (org.apache.hadoop.conf.Configuration)4 TaskAttemptStatusUpdateEvent (org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptStatusUpdateEvent)4 TaskAttemptEvent (org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptEvent)3 ControlledClock (org.apache.hadoop.yarn.util.ControlledClock)3 Test (org.junit.Test)3 Map (java.util.Map)2 JobId (org.apache.hadoop.mapreduce.v2.api.records.JobId)2 MRApp (org.apache.hadoop.mapreduce.v2.app.MRApp)2 EventHandler (org.apache.hadoop.yarn.event.EventHandler)2 Clock (org.apache.hadoop.yarn.util.Clock)2 SystemClock (org.apache.hadoop.yarn.util.SystemClock)2 IOException (java.io.IOException)1 LinkedList (java.util.LinkedList)1 Random (java.util.Random)1