Search in sources :

Example 1 with VertexEventSourceTaskAttemptCompleted

use of org.apache.tez.dag.app.dag.event.VertexEventSourceTaskAttemptCompleted in project tez by apache.

the class VertexImpl method checkTasksForCompletion.

// triggered by task_complete
static VertexState checkTasksForCompletion(final VertexImpl vertex) {
    // this log helps quickly count the completion count for a vertex.
    // grepping and counting for attempts and handling re-tries is time consuming
    LOG.info("Task Completion: " + constructCheckTasksForCompletionLog(vertex));
    // check for vertex failure first
    if (vertex.completedTaskCount > vertex.tasks.size()) {
        LOG.error("task completion accounting issue: completedTaskCount > nTasks:" + constructCheckTasksForCompletionLog(vertex));
    }
    if (vertex.completedTaskCount == vertex.tasks.size()) {
        // finished - gather stats
        vertex.finalStatistics = vertex.constructStatistics();
        // Only succeed if tasks complete successfully and no terminationCause is registered or if failures are below configured threshold.
        boolean vertexSucceeded = vertex.succeededTaskCount == vertex.numTasks;
        boolean vertexFailuresBelowThreshold = (vertex.succeededTaskCount + vertex.failedTaskCount == vertex.numTasks) && (vertex.failedTaskCount * 100 <= vertex.maxFailuresPercent * vertex.numTasks);
        if ((vertexSucceeded || vertexFailuresBelowThreshold) && vertex.terminationCause == null) {
            if (vertexSucceeded) {
                LOG.info("All tasks have succeeded, vertex:" + vertex.logIdentifier);
            } else {
                LOG.info("All tasks in the vertex " + vertex.logIdentifier + " have completed and the percentage of failed tasks (failed/total) (" + vertex.failedTaskCount + "/" + vertex.numTasks + ") is less that the threshold of " + vertex.maxFailuresPercent);
                vertex.addDiagnostic("Vertex succeeded as percentage of failed tasks (failed/total) (" + vertex.failedTaskCount + "/" + vertex.numTasks + ") is less that the threshold of " + vertex.maxFailuresPercent);
                vertex.logSuccessDiagnostics = true;
                for (Task task : vertex.tasks.values()) {
                    if (!task.getState().equals(TaskState.FAILED)) {
                        continue;
                    }
                    // Find the last attempt and mark that as successful
                    Iterator<TezTaskAttemptID> attempts = task.getAttempts().keySet().iterator();
                    TezTaskAttemptID lastAttempt = null;
                    while (attempts.hasNext()) {
                        TezTaskAttemptID attempt = attempts.next();
                        if (lastAttempt == null || attempt.getId() > lastAttempt.getId()) {
                            lastAttempt = attempt;
                        }
                    }
                    LOG.info("Succeeding failed task attempt:" + lastAttempt);
                    for (Map.Entry<Vertex, Edge> vertexEdge : vertex.targetVertices.entrySet()) {
                        Vertex destVertex = vertexEdge.getKey();
                        Edge edge = vertexEdge.getValue();
                        try {
                            List<TezEvent> tezEvents = edge.generateEmptyEventsForAttempt(lastAttempt);
                            // Downstream vertices need to receive a SUCCEEDED completion event for each failed task to ensure num bipartite count is correct
                            VertexEventTaskAttemptCompleted completionEvent = new VertexEventTaskAttemptCompleted(lastAttempt, TaskAttemptStateInternal.SUCCEEDED);
                            // Notify all target vertices
                            vertex.eventHandler.handle(new VertexEventSourceTaskAttemptCompleted(destVertex.getVertexId(), completionEvent));
                            vertex.eventHandler.handle(new VertexEventRouteEvent(destVertex.getVertexId(), tezEvents));
                        } catch (Exception e) {
                            throw new TezUncheckedException(e);
                        }
                    }
                }
            }
            if (vertex.commitVertexOutputs && !vertex.committed.getAndSet(true)) {
                // start commit if there're commits or just finish if no commits
                return commitOrFinish(vertex);
            } else {
                // just finish because no vertex committing needed
                return vertex.finished(VertexState.SUCCEEDED);
            }
        }
        return finishWithTerminationCause(vertex);
    }
    // return the current state, Vertex not finished yet
    return vertex.getInternalState();
}
Also used : VertexEventRecoverVertex(org.apache.tez.dag.app.dag.event.VertexEventRecoverVertex) Vertex(org.apache.tez.dag.app.dag.Vertex) TaskEventScheduleTask(org.apache.tez.dag.app.dag.event.TaskEventScheduleTask) Task(org.apache.tez.dag.app.dag.Task) TezUncheckedException(org.apache.tez.dag.api.TezUncheckedException) VertexEventTaskAttemptCompleted(org.apache.tez.dag.app.dag.event.VertexEventTaskAttemptCompleted) VertexEventRouteEvent(org.apache.tez.dag.app.dag.event.VertexEventRouteEvent) TezUncheckedException(org.apache.tez.dag.api.TezUncheckedException) IOException(java.io.IOException) InvalidStateTransitonException(org.apache.hadoop.yarn.state.InvalidStateTransitonException) LimitExceededException(org.apache.tez.common.counters.LimitExceededException) TezException(org.apache.tez.dag.api.TezException) VertexEventSourceTaskAttemptCompleted(org.apache.tez.dag.app.dag.event.VertexEventSourceTaskAttemptCompleted) TezEvent(org.apache.tez.runtime.api.impl.TezEvent) Map(java.util.Map) LinkedHashMap(java.util.LinkedHashMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) TezTaskAttemptID(org.apache.tez.dag.records.TezTaskAttemptID)

Aggregations

IOException (java.io.IOException)1 HashMap (java.util.HashMap)1 LinkedHashMap (java.util.LinkedHashMap)1 Map (java.util.Map)1 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)1 InvalidStateTransitonException (org.apache.hadoop.yarn.state.InvalidStateTransitonException)1 LimitExceededException (org.apache.tez.common.counters.LimitExceededException)1 TezException (org.apache.tez.dag.api.TezException)1 TezUncheckedException (org.apache.tez.dag.api.TezUncheckedException)1 Task (org.apache.tez.dag.app.dag.Task)1 Vertex (org.apache.tez.dag.app.dag.Vertex)1 TaskEventScheduleTask (org.apache.tez.dag.app.dag.event.TaskEventScheduleTask)1 VertexEventRecoverVertex (org.apache.tez.dag.app.dag.event.VertexEventRecoverVertex)1 VertexEventRouteEvent (org.apache.tez.dag.app.dag.event.VertexEventRouteEvent)1 VertexEventSourceTaskAttemptCompleted (org.apache.tez.dag.app.dag.event.VertexEventSourceTaskAttemptCompleted)1 VertexEventTaskAttemptCompleted (org.apache.tez.dag.app.dag.event.VertexEventTaskAttemptCompleted)1 TezTaskAttemptID (org.apache.tez.dag.records.TezTaskAttemptID)1 TezEvent (org.apache.tez.runtime.api.impl.TezEvent)1