use of org.apache.tez.dag.app.dag.event.VertexEventSourceTaskAttemptCompleted in project tez by apache.
the class VertexImpl method checkTasksForCompletion.
// triggered by task_complete
static VertexState checkTasksForCompletion(final VertexImpl vertex) {
// this log helps quickly count the completion count for a vertex.
// grepping and counting for attempts and handling re-tries is time consuming
LOG.info("Task Completion: " + constructCheckTasksForCompletionLog(vertex));
// check for vertex failure first
if (vertex.completedTaskCount > vertex.tasks.size()) {
LOG.error("task completion accounting issue: completedTaskCount > nTasks:" + constructCheckTasksForCompletionLog(vertex));
}
if (vertex.completedTaskCount == vertex.tasks.size()) {
// finished - gather stats
vertex.finalStatistics = vertex.constructStatistics();
// Only succeed if tasks complete successfully and no terminationCause is registered or if failures are below configured threshold.
boolean vertexSucceeded = vertex.succeededTaskCount == vertex.numTasks;
boolean vertexFailuresBelowThreshold = (vertex.succeededTaskCount + vertex.failedTaskCount == vertex.numTasks) && (vertex.failedTaskCount * 100 <= vertex.maxFailuresPercent * vertex.numTasks);
if ((vertexSucceeded || vertexFailuresBelowThreshold) && vertex.terminationCause == null) {
if (vertexSucceeded) {
LOG.info("All tasks have succeeded, vertex:" + vertex.logIdentifier);
} else {
LOG.info("All tasks in the vertex " + vertex.logIdentifier + " have completed and the percentage of failed tasks (failed/total) (" + vertex.failedTaskCount + "/" + vertex.numTasks + ") is less that the threshold of " + vertex.maxFailuresPercent);
vertex.addDiagnostic("Vertex succeeded as percentage of failed tasks (failed/total) (" + vertex.failedTaskCount + "/" + vertex.numTasks + ") is less that the threshold of " + vertex.maxFailuresPercent);
vertex.logSuccessDiagnostics = true;
for (Task task : vertex.tasks.values()) {
if (!task.getState().equals(TaskState.FAILED)) {
continue;
}
// Find the last attempt and mark that as successful
Iterator<TezTaskAttemptID> attempts = task.getAttempts().keySet().iterator();
TezTaskAttemptID lastAttempt = null;
while (attempts.hasNext()) {
TezTaskAttemptID attempt = attempts.next();
if (lastAttempt == null || attempt.getId() > lastAttempt.getId()) {
lastAttempt = attempt;
}
}
LOG.info("Succeeding failed task attempt:" + lastAttempt);
for (Map.Entry<Vertex, Edge> vertexEdge : vertex.targetVertices.entrySet()) {
Vertex destVertex = vertexEdge.getKey();
Edge edge = vertexEdge.getValue();
try {
List<TezEvent> tezEvents = edge.generateEmptyEventsForAttempt(lastAttempt);
// Downstream vertices need to receive a SUCCEEDED completion event for each failed task to ensure num bipartite count is correct
VertexEventTaskAttemptCompleted completionEvent = new VertexEventTaskAttemptCompleted(lastAttempt, TaskAttemptStateInternal.SUCCEEDED);
// Notify all target vertices
vertex.eventHandler.handle(new VertexEventSourceTaskAttemptCompleted(destVertex.getVertexId(), completionEvent));
vertex.eventHandler.handle(new VertexEventRouteEvent(destVertex.getVertexId(), tezEvents));
} catch (Exception e) {
throw new TezUncheckedException(e);
}
}
}
}
if (vertex.commitVertexOutputs && !vertex.committed.getAndSet(true)) {
// start commit if there're commits or just finish if no commits
return commitOrFinish(vertex);
} else {
// just finish because no vertex committing needed
return vertex.finished(VertexState.SUCCEEDED);
}
}
return finishWithTerminationCause(vertex);
}
// return the current state, Vertex not finished yet
return vertex.getInternalState();
}
Aggregations