use of org.apache.tez.dag.app.dag.TaskAttempt in project tez by apache.
the class TaskImpl method selectBestAttempt.
// select the nextAttemptNumber with best progress
// always called inside the Read Lock
private TaskAttempt selectBestAttempt() {
float progress = 0f;
TaskAttempt result = null;
for (TaskAttempt at : attempts.values()) {
switch(at.getState()) {
// ignore all failed task attempts
case FAILED:
case KILLED:
continue;
default:
}
if (result == null) {
// The first time around
result = at;
}
// calculate the best progress
float attemptProgress = at.getProgress();
if (attemptProgress > progress) {
result = at;
progress = attemptProgress;
}
}
return result;
}
use of org.apache.tez.dag.app.dag.TaskAttempt in project tez by apache.
the class TaskSchedulerManager method handleTASucceeded.
private void handleTASucceeded(AMSchedulerEventTAEnded event) {
TaskAttempt attempt = event.getAttempt();
ContainerId usedContainerId = event.getUsedContainerId();
// assigned to it.
if (event.getUsedContainerId() != null) {
sendEvent(new AMContainerEventTASucceeded(usedContainerId, event.getAttemptID()));
sendEvent(new AMNodeEventTaskAttemptSucceeded(appContext.getAllContainers().get(usedContainerId).getContainer().getNodeId(), event.getSchedulerId(), usedContainerId, event.getAttemptID()));
}
boolean wasContainerAllocated = false;
try {
wasContainerAllocated = taskSchedulers[event.getSchedulerId()].deallocateTask(attempt, true, null, event.getDiagnostics());
} catch (Exception e) {
String msg = "Error in TaskScheduler for handling Task De-allocation" + ", eventType=" + event.getType() + ", scheduler=" + Utils.getTaskSchedulerIdentifierString(event.getSchedulerId(), appContext) + ", taskAttemptId=" + attempt.getID();
LOG.error(msg, e);
sendEvent(new DAGAppMasterEventUserServiceFatalError(DAGAppMasterEventType.TASK_SCHEDULER_SERVICE_FATAL_ERROR, msg, e));
return;
}
if (!wasContainerAllocated) {
LOG.error("De-allocated successful task: " + attempt.getID() + ", but TaskScheduler reported no container assigned to task");
}
}
use of org.apache.tez.dag.app.dag.TaskAttempt in project tez by apache.
the class LegacySpeculator method speculationValue.
/* ************************************************************* */
// This is the code section that runs periodically and adds speculations for
// those jobs that need them.
// This can return a few magic values for tasks that shouldn't speculate:
// returns ON_SCHEDULE if thresholdRuntime(taskID) says that we should not
// considering speculating this task
// returns ALREADY_SPECULATING if that is true. This has priority.
// returns TOO_NEW if our companion task hasn't gotten any information
// returns PROGRESS_IS_GOOD if the task is sailing through
// returns NOT_RUNNING if the task is not running
//
// All of these values are negative. Any value that should be allowed to
// speculate is 0 or positive.
//
// If shouldUseTimeout is true, we will use timeout to decide on
// speculation instead of the task statistics. This can be useful, for
// example for single task vertices for which there are no tasks to compare
// with
private long speculationValue(Task task, long now, boolean shouldUseTimeout) {
Map<TezTaskAttemptID, TaskAttempt> attempts = task.getAttempts();
TezTaskID taskID = task.getTaskId();
long acceptableRuntime = Long.MIN_VALUE;
long result = Long.MIN_VALUE;
// short circuit completed tasks. no need to spend time on them
if (task.getState() == TaskState.SUCCEEDED) {
return NOT_RUNNING;
}
if (!mayHaveSpeculated.contains(taskID) && !shouldUseTimeout) {
acceptableRuntime = estimator.thresholdRuntime(taskID);
if (acceptableRuntime == Long.MAX_VALUE) {
return ON_SCHEDULE;
}
}
TezTaskAttemptID runningTaskAttemptID = null;
int numberRunningAttempts = 0;
for (TaskAttempt taskAttempt : attempts.values()) {
if (taskAttempt.getState() == TaskAttemptState.RUNNING || taskAttempt.getState() == TaskAttemptState.STARTING) {
if (++numberRunningAttempts > 1) {
return ALREADY_SPECULATING;
}
runningTaskAttemptID = taskAttempt.getID();
long taskAttemptStartTime = estimator.attemptEnrolledTime(runningTaskAttemptID);
if (taskAttemptStartTime > now) {
// attempt status change that chronicles the attempt start
return TOO_NEW;
}
if (shouldUseTimeout) {
if ((now - taskAttemptStartTime) > taskTimeout) {
// If the task has timed out, then we want to schedule a speculation
// immediately. However we cannot return immediately since we may
// already have a speculation running.
result = Long.MAX_VALUE;
} else {
// Task has not timed out so we are good
return ON_SCHEDULE;
}
} else {
long estimatedRunTime = estimator.estimatedRuntime(runningTaskAttemptID);
long estimatedEndTime = estimatedRunTime + taskAttemptStartTime;
long estimatedReplacementEndTime = now + estimator.newAttemptEstimatedRuntime();
float progress = taskAttempt.getProgress();
TaskAttemptHistoryStatistics data = runningTaskAttemptStatistics.get(runningTaskAttemptID);
if (data == null) {
runningTaskAttemptStatistics.put(runningTaskAttemptID, new TaskAttemptHistoryStatistics(estimatedRunTime, progress, now));
} else {
if (estimatedRunTime == data.getEstimatedRunTime() && progress == data.getProgress()) {
// Previous stats are same as same stats
if (data.notHeartbeatedInAWhile(now)) {
// Stats have stagnated for a while, simulate heart-beat.
// Now simulate the heart-beat
statusUpdate(taskAttempt.getID(), taskAttempt.getState(), clock.getTime());
}
} else {
// Stats have changed - update our data structure
data.setEstimatedRunTime(estimatedRunTime);
data.setProgress(progress);
data.resetHeartBeatTime(now);
}
}
if (estimatedEndTime < now) {
return PROGRESS_IS_GOOD;
}
if (estimatedReplacementEndTime >= estimatedEndTime) {
return TOO_LATE_TO_SPECULATE;
}
result = estimatedEndTime - estimatedReplacementEndTime;
}
}
}
// If we are here, there's at most one task attempt.
if (numberRunningAttempts == 0) {
return NOT_RUNNING;
}
if ((acceptableRuntime == Long.MIN_VALUE) && !shouldUseTimeout) {
acceptableRuntime = estimator.thresholdRuntime(taskID);
if (acceptableRuntime == Long.MAX_VALUE) {
return ON_SCHEDULE;
}
}
return result;
}
use of org.apache.tez.dag.app.dag.TaskAttempt in project tez by apache.
the class LegacyTaskRuntimeEstimator method updateAttempt.
@Override
public void updateAttempt(TezTaskAttemptID attemptID, TaskAttemptState state, long timestamp) {
super.updateAttempt(attemptID, state, timestamp);
Task task = vertex.getTask(attemptID.getTaskID());
if (task == null) {
return;
}
TaskAttempt taskAttempt = task.getAttempt(attemptID);
if (taskAttempt == null) {
return;
}
float progress = taskAttempt.getProgress();
Long boxedStart = startTimes.get(attemptID);
long start = boxedStart == null ? Long.MIN_VALUE : boxedStart;
//
if (taskAttempt.getState() == TaskAttemptState.RUNNING) {
// See if this task is already in the registry
AtomicLong estimateContainer = attemptRuntimeEstimates.get(taskAttempt);
AtomicLong estimateVarianceContainer = attemptRuntimeEstimateVariances.get(taskAttempt);
if (estimateContainer == null) {
if (attemptRuntimeEstimates.get(taskAttempt) == null) {
attemptRuntimeEstimates.put(taskAttempt, new AtomicLong());
estimateContainer = attemptRuntimeEstimates.get(taskAttempt);
}
}
if (estimateVarianceContainer == null) {
attemptRuntimeEstimateVariances.putIfAbsent(taskAttempt, new AtomicLong());
estimateVarianceContainer = attemptRuntimeEstimateVariances.get(taskAttempt);
}
long estimate = -1;
long varianceEstimate = -1;
// speculative task attempt if two are already running for this task
if (start > 0 && timestamp > start) {
estimate = (long) ((timestamp - start) / Math.max(0.0001, progress));
varianceEstimate = (long) (estimate * progress / 10);
}
if (estimateContainer != null) {
estimateContainer.set(estimate);
}
if (estimateVarianceContainer != null) {
estimateVarianceContainer.set(varianceEstimate);
}
}
}
use of org.apache.tez.dag.app.dag.TaskAttempt in project tez by apache.
the class TestDAGScheduler method testConcurrencyLimit.
@Test(timeout = 5000)
public void testConcurrencyLimit() {
MockEventHandler mockEventHandler = new MockEventHandler();
DAG mockDag = mock(DAG.class);
when(mockDag.getTotalVertices()).thenReturn(2);
TezVertexID vId0 = TezVertexID.fromString("vertex_1436907267600_195589_1_00");
TezVertexID vId1 = TezVertexID.fromString("vertex_1436907267600_195589_1_01");
TezTaskID tId0 = TezTaskID.getInstance(vId0, 0);
TezTaskID tId1 = TezTaskID.getInstance(vId1, 0);
TaskAttempt mockAttempt;
Vertex mockVertex = mock(Vertex.class);
when(mockDag.getVertex((TezVertexID) any())).thenReturn(mockVertex);
when(mockVertex.getDistanceFromRoot()).thenReturn(0);
when(mockVertex.getVertexId()).thenReturn(vId0);
DAGScheduler scheduler = new DAGSchedulerNaturalOrder(mockDag, mockEventHandler);
// not effective
scheduler.addVertexConcurrencyLimit(vId0, 0);
// schedule beyond limit and it gets scheduled
mockAttempt = mock(TaskAttempt.class);
when(mockAttempt.getID()).thenReturn(TezTaskAttemptID.getInstance(tId0, 0));
scheduler.scheduleTask(new DAGEventSchedulerUpdate(DAGEventSchedulerUpdate.UpdateType.TA_SCHEDULE, mockAttempt));
Assert.assertEquals(1, mockEventHandler.events.size());
mockAttempt = mock(TaskAttempt.class);
when(mockAttempt.getID()).thenReturn(TezTaskAttemptID.getInstance(tId0, 1));
scheduler.scheduleTask(new DAGEventSchedulerUpdate(DAGEventSchedulerUpdate.UpdateType.TA_SCHEDULE, mockAttempt));
Assert.assertEquals(2, mockEventHandler.events.size());
mockAttempt = mock(TaskAttempt.class);
when(mockAttempt.getID()).thenReturn(TezTaskAttemptID.getInstance(tId0, 2));
scheduler.scheduleTask(new DAGEventSchedulerUpdate(DAGEventSchedulerUpdate.UpdateType.TA_SCHEDULE, mockAttempt));
Assert.assertEquals(3, mockEventHandler.events.size());
mockEventHandler.events.clear();
List<TaskAttempt> mockAttempts = Lists.newArrayList();
int completed = 0;
int requested = 0;
int scheduled = 0;
// effective
scheduler.addVertexConcurrencyLimit(vId1, 2);
// schedule beyond limit and it gets buffered
mockAttempt = mock(TaskAttempt.class);
mockAttempts.add(mockAttempt);
when(mockAttempt.getID()).thenReturn(TezTaskAttemptID.getInstance(tId1, requested++));
scheduler.scheduleTask(new DAGEventSchedulerUpdate(DAGEventSchedulerUpdate.UpdateType.TA_SCHEDULE, mockAttempt));
// scheduled
Assert.assertEquals(scheduled + 1, mockEventHandler.events.size());
Assert.assertEquals(mockAttempts.get(scheduled).getID(), // matches order
mockEventHandler.events.get(scheduled).getTaskAttemptID());
scheduled++;
mockAttempt = mock(TaskAttempt.class);
mockAttempts.add(mockAttempt);
when(mockAttempt.getID()).thenReturn(TezTaskAttemptID.getInstance(tId1, requested++));
scheduler.scheduleTask(new DAGEventSchedulerUpdate(DAGEventSchedulerUpdate.UpdateType.TA_SCHEDULE, mockAttempt));
// scheduled
Assert.assertEquals(scheduled + 1, mockEventHandler.events.size());
Assert.assertEquals(mockAttempts.get(scheduled).getID(), // matches order
mockEventHandler.events.get(scheduled).getTaskAttemptID());
scheduled++;
mockAttempt = mock(TaskAttempt.class);
mockAttempts.add(mockAttempt);
when(mockAttempt.getID()).thenReturn(TezTaskAttemptID.getInstance(tId1, requested++));
scheduler.scheduleTask(new DAGEventSchedulerUpdate(DAGEventSchedulerUpdate.UpdateType.TA_SCHEDULE, mockAttempt));
// buffered
Assert.assertEquals(scheduled, mockEventHandler.events.size());
mockAttempt = mock(TaskAttempt.class);
mockAttempts.add(mockAttempt);
when(mockAttempt.getID()).thenReturn(TezTaskAttemptID.getInstance(tId1, requested++));
scheduler.scheduleTask(new DAGEventSchedulerUpdate(DAGEventSchedulerUpdate.UpdateType.TA_SCHEDULE, mockAttempt));
// buffered
Assert.assertEquals(scheduled, mockEventHandler.events.size());
scheduler.taskCompleted(new DAGEventSchedulerUpdate(DAGEventSchedulerUpdate.UpdateType.TA_COMPLETED, mockAttempts.get(completed++)));
// scheduled
Assert.assertEquals(scheduled + 1, mockEventHandler.events.size());
Assert.assertEquals(mockAttempts.get(scheduled).getID(), // matches order
mockEventHandler.events.get(scheduled).getTaskAttemptID());
scheduled++;
scheduler.taskCompleted(new DAGEventSchedulerUpdate(DAGEventSchedulerUpdate.UpdateType.TA_COMPLETED, mockAttempts.get(completed++)));
// scheduled
Assert.assertEquals(scheduled + 1, mockEventHandler.events.size());
Assert.assertEquals(mockAttempts.get(scheduled).getID(), // matches order
mockEventHandler.events.get(scheduled).getTaskAttemptID());
scheduled++;
scheduler.taskCompleted(new DAGEventSchedulerUpdate(DAGEventSchedulerUpdate.UpdateType.TA_COMPLETED, mockAttempts.get(completed++)));
// no extra scheduling
Assert.assertEquals(scheduled, mockEventHandler.events.size());
mockAttempt = mock(TaskAttempt.class);
mockAttempts.add(mockAttempt);
when(mockAttempt.getID()).thenReturn(TezTaskAttemptID.getInstance(tId1, requested++));
scheduler.scheduleTask(new DAGEventSchedulerUpdate(DAGEventSchedulerUpdate.UpdateType.TA_SCHEDULE, mockAttempt));
// scheduled
Assert.assertEquals(scheduled + 1, mockEventHandler.events.size());
Assert.assertEquals(mockAttempts.get(scheduled).getID(), // matches order
mockEventHandler.events.get(scheduled).getTaskAttemptID());
scheduled++;
}
Aggregations