use of org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptStatusUpdateEvent.TaskAttemptStatus in project hadoop by apache.
the class TestSpeculativeExecutionWithMRApp method createTaskAttemptStatus.
private TaskAttemptStatus createTaskAttemptStatus(TaskAttemptId id, float progress, TaskAttemptState state) {
TaskAttemptStatus status = new TaskAttemptStatus();
status.id = id;
status.progress = progress;
status.taskState = state;
return status;
}
use of org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptStatusUpdateEvent.TaskAttemptStatus in project hadoop by apache.
the class TaskAttemptListenerImpl method statusUpdate.
@Override
public AMFeedback statusUpdate(TaskAttemptID taskAttemptID, TaskStatus taskStatus) throws IOException, InterruptedException {
org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId yarnAttemptID = TypeConverter.toYarn(taskAttemptID);
AMFeedback feedback = new AMFeedback();
feedback.setTaskFound(true);
// Propagating preemption to the task if TASK_PREEMPTION is enabled
if (getConfig().getBoolean(MRJobConfig.TASK_PREEMPTION, false) && preemptionPolicy.isPreempted(yarnAttemptID)) {
feedback.setPreemption(true);
LOG.info("Setting preemption bit for task: " + yarnAttemptID + " of type " + yarnAttemptID.getTaskId().getTaskType());
}
if (taskStatus == null) {
//We are using statusUpdate only as a simple ping
if (LOG.isDebugEnabled()) {
LOG.debug("Ping from " + taskAttemptID.toString());
}
return feedback;
}
// if we are here there is an actual status update to be processed
taskHeartbeatHandler.progressing(yarnAttemptID);
TaskAttemptStatus taskAttemptStatus = new TaskAttemptStatus();
taskAttemptStatus.id = yarnAttemptID;
// Task sends the updated progress to the TT.
taskAttemptStatus.progress = taskStatus.getProgress();
LOG.info("Progress of TaskAttempt " + taskAttemptID + " is : " + taskStatus.getProgress());
// Task sends the updated state-string to the TT.
taskAttemptStatus.stateString = taskStatus.getStateString();
// Task sends the updated phase to the TT.
taskAttemptStatus.phase = TypeConverter.toYarn(taskStatus.getPhase());
// Counters are updated by the task. Convert counters into new format as
// that is the primary storage format inside the AM to avoid multiple
// conversions and unnecessary heap usage.
taskAttemptStatus.counters = new org.apache.hadoop.mapreduce.Counters(taskStatus.getCounters());
// Map Finish time set by the task (map only)
if (taskStatus.getIsMap() && taskStatus.getMapFinishTime() != 0) {
taskAttemptStatus.mapFinishTime = taskStatus.getMapFinishTime();
}
// Shuffle Finish time set by the task (reduce only).
if (!taskStatus.getIsMap() && taskStatus.getShuffleFinishTime() != 0) {
taskAttemptStatus.shuffleFinishTime = taskStatus.getShuffleFinishTime();
}
// Sort finish time set by the task (reduce only).
if (!taskStatus.getIsMap() && taskStatus.getSortFinishTime() != 0) {
taskAttemptStatus.sortFinishTime = taskStatus.getSortFinishTime();
}
//set the fetch failures
if (taskStatus.getFetchFailedMaps() != null && taskStatus.getFetchFailedMaps().size() > 0) {
taskAttemptStatus.fetchFailedMaps = new ArrayList<org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId>();
for (TaskAttemptID failedMapId : taskStatus.getFetchFailedMaps()) {
taskAttemptStatus.fetchFailedMaps.add(TypeConverter.toYarn(failedMapId));
}
}
// Task sends the information about the nextRecordRange to the TT
// TODO: The following are not needed here, but needed to be set somewhere inside AppMaster.
// taskStatus.getRunState(); // Set by the TT/JT. Transform into a state TODO
// taskStatus.getStartTime(); // Used to be set by the TaskTracker. This should be set by getTask().
// taskStatus.getFinishTime(); // Used to be set by TT/JT. Should be set when task finishes
// // This was used by TT to do counter updates only once every minute. So this
// // isn't ever changed by the Task itself.
// taskStatus.getIncludeCounters();
context.getEventHandler().handle(new TaskAttemptStatusUpdateEvent(taskAttemptStatus.id, taskAttemptStatus));
return feedback;
}
use of org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptStatusUpdateEvent.TaskAttemptStatus in project hadoop by apache.
the class StartEndTimesBase method updateAttempt.
@Override
public void updateAttempt(TaskAttemptStatus status, long timestamp) {
TaskAttemptId attemptID = status.id;
TaskId taskID = attemptID.getTaskId();
JobId jobID = taskID.getJobId();
Job job = context.getJob(jobID);
if (job == null) {
return;
}
Task task = job.getTask(taskID);
if (task == null) {
return;
}
Long boxedStart = startTimes.get(attemptID);
long start = boxedStart == null ? Long.MIN_VALUE : boxedStart;
TaskAttempt taskAttempt = task.getAttempt(attemptID);
if (taskAttempt.getState() == TaskAttemptState.SUCCEEDED) {
boolean isNew = false;
// is this a new success?
synchronized (doneTasks) {
if (!doneTasks.contains(task)) {
doneTasks.add(task);
isNew = true;
}
}
// local data] we only count the first one.
if (isNew) {
long finish = timestamp;
if (start > 1L && finish > 1L && start <= finish) {
long duration = finish - start;
DataStatistics statistics = dataStatisticsForTask(taskID);
if (statistics != null) {
statistics.add(duration);
}
}
}
}
}
use of org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptStatusUpdateEvent.TaskAttemptStatus in project hadoop by apache.
the class DefaultSpeculator method speculationValue.
/* ************************************************************* */
// This is the code section that runs periodically and adds speculations for
// those jobs that need them.
// This can return a few magic values for tasks that shouldn't speculate:
// returns ON_SCHEDULE if thresholdRuntime(taskID) says that we should not
// considering speculating this task
// returns ALREADY_SPECULATING if that is true. This has priority.
// returns TOO_NEW if our companion task hasn't gotten any information
// returns PROGRESS_IS_GOOD if the task is sailing through
// returns NOT_RUNNING if the task is not running
//
// All of these values are negative. Any value that should be allowed to
// speculate is 0 or positive.
private long speculationValue(TaskId taskID, long now) {
Job job = context.getJob(taskID.getJobId());
Task task = job.getTask(taskID);
Map<TaskAttemptId, TaskAttempt> attempts = task.getAttempts();
long acceptableRuntime = Long.MIN_VALUE;
long result = Long.MIN_VALUE;
if (!mayHaveSpeculated.contains(taskID)) {
acceptableRuntime = estimator.thresholdRuntime(taskID);
if (acceptableRuntime == Long.MAX_VALUE) {
return ON_SCHEDULE;
}
}
TaskAttemptId runningTaskAttemptID = null;
int numberRunningAttempts = 0;
for (TaskAttempt taskAttempt : attempts.values()) {
if (taskAttempt.getState() == TaskAttemptState.RUNNING || taskAttempt.getState() == TaskAttemptState.STARTING) {
if (++numberRunningAttempts > 1) {
return ALREADY_SPECULATING;
}
runningTaskAttemptID = taskAttempt.getID();
long estimatedRunTime = estimator.estimatedRuntime(runningTaskAttemptID);
long taskAttemptStartTime = estimator.attemptEnrolledTime(runningTaskAttemptID);
if (taskAttemptStartTime > now) {
// attempt status change that chronicles the attempt start
return TOO_NEW;
}
long estimatedEndTime = estimatedRunTime + taskAttemptStartTime;
long estimatedReplacementEndTime = now + estimator.estimatedNewAttemptRuntime(taskID);
float progress = taskAttempt.getProgress();
TaskAttemptHistoryStatistics data = runningTaskAttemptStatistics.get(runningTaskAttemptID);
if (data == null) {
runningTaskAttemptStatistics.put(runningTaskAttemptID, new TaskAttemptHistoryStatistics(estimatedRunTime, progress, now));
} else {
if (estimatedRunTime == data.getEstimatedRunTime() && progress == data.getProgress()) {
// Previous stats are same as same stats
if (data.notHeartbeatedInAWhile(now)) {
// Stats have stagnated for a while, simulate heart-beat.
TaskAttemptStatus taskAttemptStatus = new TaskAttemptStatus();
taskAttemptStatus.id = runningTaskAttemptID;
taskAttemptStatus.progress = progress;
taskAttemptStatus.taskState = taskAttempt.getState();
// Now simulate the heart-beat
handleAttempt(taskAttemptStatus);
}
} else {
// Stats have changed - update our data structure
data.setEstimatedRunTime(estimatedRunTime);
data.setProgress(progress);
data.resetHeartBeatTime(now);
}
}
if (estimatedEndTime < now) {
return PROGRESS_IS_GOOD;
}
if (estimatedReplacementEndTime >= estimatedEndTime) {
return TOO_LATE_TO_SPECULATE;
}
result = estimatedEndTime - estimatedReplacementEndTime;
}
}
// If we are here, there's at most one task attempt.
if (numberRunningAttempts == 0) {
return NOT_RUNNING;
}
if (acceptableRuntime == Long.MIN_VALUE) {
acceptableRuntime = estimator.thresholdRuntime(taskID);
if (acceptableRuntime == Long.MAX_VALUE) {
return ON_SCHEDULE;
}
}
return result;
}
use of org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptStatusUpdateEvent.TaskAttemptStatus in project hadoop by apache.
the class TestMRClientService method test.
@Test
public void test() throws Exception {
MRAppWithClientService app = new MRAppWithClientService(1, 0, false);
Configuration conf = new Configuration();
Job job = app.submit(conf);
app.waitForState(job, JobState.RUNNING);
Assert.assertEquals("Num tasks not correct", 1, job.getTasks().size());
Iterator<Task> it = job.getTasks().values().iterator();
Task task = it.next();
app.waitForState(task, TaskState.RUNNING);
TaskAttempt attempt = task.getAttempts().values().iterator().next();
app.waitForState(attempt, TaskAttemptState.RUNNING);
// send the diagnostic
String diagnostic1 = "Diagnostic1";
String diagnostic2 = "Diagnostic2";
app.getContext().getEventHandler().handle(new TaskAttemptDiagnosticsUpdateEvent(attempt.getID(), diagnostic1));
// send the status update
TaskAttemptStatus taskAttemptStatus = new TaskAttemptStatus();
taskAttemptStatus.id = attempt.getID();
taskAttemptStatus.progress = 0.5f;
taskAttemptStatus.stateString = "RUNNING";
taskAttemptStatus.taskState = TaskAttemptState.RUNNING;
taskAttemptStatus.phase = Phase.MAP;
// send the status update
app.getContext().getEventHandler().handle(new TaskAttemptStatusUpdateEvent(attempt.getID(), taskAttemptStatus));
//verify that all object are fully populated by invoking RPCs.
YarnRPC rpc = YarnRPC.create(conf);
MRClientProtocol proxy = (MRClientProtocol) rpc.getProxy(MRClientProtocol.class, app.clientService.getBindAddress(), conf);
GetCountersRequest gcRequest = recordFactory.newRecordInstance(GetCountersRequest.class);
gcRequest.setJobId(job.getID());
Assert.assertNotNull("Counters is null", proxy.getCounters(gcRequest).getCounters());
GetJobReportRequest gjrRequest = recordFactory.newRecordInstance(GetJobReportRequest.class);
gjrRequest.setJobId(job.getID());
JobReport jr = proxy.getJobReport(gjrRequest).getJobReport();
verifyJobReport(jr);
GetTaskAttemptCompletionEventsRequest gtaceRequest = recordFactory.newRecordInstance(GetTaskAttemptCompletionEventsRequest.class);
gtaceRequest.setJobId(job.getID());
gtaceRequest.setFromEventId(0);
gtaceRequest.setMaxEvents(10);
Assert.assertNotNull("TaskCompletionEvents is null", proxy.getTaskAttemptCompletionEvents(gtaceRequest).getCompletionEventList());
GetDiagnosticsRequest gdRequest = recordFactory.newRecordInstance(GetDiagnosticsRequest.class);
gdRequest.setTaskAttemptId(attempt.getID());
Assert.assertNotNull("Diagnostics is null", proxy.getDiagnostics(gdRequest).getDiagnosticsList());
GetTaskAttemptReportRequest gtarRequest = recordFactory.newRecordInstance(GetTaskAttemptReportRequest.class);
gtarRequest.setTaskAttemptId(attempt.getID());
TaskAttemptReport tar = proxy.getTaskAttemptReport(gtarRequest).getTaskAttemptReport();
verifyTaskAttemptReport(tar);
GetTaskReportRequest gtrRequest = recordFactory.newRecordInstance(GetTaskReportRequest.class);
gtrRequest.setTaskId(task.getID());
Assert.assertNotNull("TaskReport is null", proxy.getTaskReport(gtrRequest).getTaskReport());
GetTaskReportsRequest gtreportsRequest = recordFactory.newRecordInstance(GetTaskReportsRequest.class);
gtreportsRequest.setJobId(job.getID());
gtreportsRequest.setTaskType(TaskType.MAP);
Assert.assertNotNull("TaskReports for map is null", proxy.getTaskReports(gtreportsRequest).getTaskReportList());
gtreportsRequest = recordFactory.newRecordInstance(GetTaskReportsRequest.class);
gtreportsRequest.setJobId(job.getID());
gtreportsRequest.setTaskType(TaskType.REDUCE);
Assert.assertNotNull("TaskReports for reduce is null", proxy.getTaskReports(gtreportsRequest).getTaskReportList());
List<String> diag = proxy.getDiagnostics(gdRequest).getDiagnosticsList();
Assert.assertEquals("Num diagnostics not correct", 1, diag.size());
Assert.assertEquals("Diag 1 not correct", diagnostic1, diag.get(0).toString());
TaskReport taskReport = proxy.getTaskReport(gtrRequest).getTaskReport();
Assert.assertEquals("Num diagnostics not correct", 1, taskReport.getDiagnosticsCount());
//send the done signal to the task
app.getContext().getEventHandler().handle(new TaskAttemptEvent(task.getAttempts().values().iterator().next().getID(), TaskAttemptEventType.TA_DONE));
app.waitForState(job, JobState.SUCCEEDED);
// For invalid jobid, throw IOException
gtreportsRequest = recordFactory.newRecordInstance(GetTaskReportsRequest.class);
gtreportsRequest.setJobId(TypeConverter.toYarn(JobID.forName("job_1415730144495_0001")));
gtreportsRequest.setTaskType(TaskType.REDUCE);
try {
proxy.getTaskReports(gtreportsRequest);
fail("IOException not thrown for invalid job id");
} catch (IOException e) {
// Expected
}
}
Aggregations