Search in sources :

Example 1 with Vertex

use of org.apache.tez.dag.app.dag.Vertex in project tez by apache.

the class VertexImpl method checkTasksForCompletion.

// triggered by task_complete
static VertexState checkTasksForCompletion(final VertexImpl vertex) {
    // this log helps quickly count the completion count for a vertex.
    // grepping and counting for attempts and handling re-tries is time consuming
    LOG.info("Task Completion: " + constructCheckTasksForCompletionLog(vertex));
    // check for vertex failure first
    if (vertex.completedTaskCount > vertex.tasks.size()) {
        LOG.error("task completion accounting issue: completedTaskCount > nTasks:" + constructCheckTasksForCompletionLog(vertex));
    }
    if (vertex.completedTaskCount == vertex.tasks.size()) {
        // finished - gather stats
        vertex.finalStatistics = vertex.constructStatistics();
        // Only succeed if tasks complete successfully and no terminationCause is registered or if failures are below configured threshold.
        boolean vertexSucceeded = vertex.succeededTaskCount == vertex.numTasks;
        boolean vertexFailuresBelowThreshold = (vertex.succeededTaskCount + vertex.failedTaskCount == vertex.numTasks) && (vertex.failedTaskCount * 100 <= vertex.maxFailuresPercent * vertex.numTasks);
        if ((vertexSucceeded || vertexFailuresBelowThreshold) && vertex.terminationCause == null) {
            if (vertexSucceeded) {
                LOG.info("All tasks have succeeded, vertex:" + vertex.logIdentifier);
            } else {
                LOG.info("All tasks in the vertex " + vertex.logIdentifier + " have completed and the percentage of failed tasks (failed/total) (" + vertex.failedTaskCount + "/" + vertex.numTasks + ") is less that the threshold of " + vertex.maxFailuresPercent);
                vertex.addDiagnostic("Vertex succeeded as percentage of failed tasks (failed/total) (" + vertex.failedTaskCount + "/" + vertex.numTasks + ") is less that the threshold of " + vertex.maxFailuresPercent);
                vertex.logSuccessDiagnostics = true;
                for (Task task : vertex.tasks.values()) {
                    if (!task.getState().equals(TaskState.FAILED)) {
                        continue;
                    }
                    // Find the last attempt and mark that as successful
                    Iterator<TezTaskAttemptID> attempts = task.getAttempts().keySet().iterator();
                    TezTaskAttemptID lastAttempt = null;
                    while (attempts.hasNext()) {
                        TezTaskAttemptID attempt = attempts.next();
                        if (lastAttempt == null || attempt.getId() > lastAttempt.getId()) {
                            lastAttempt = attempt;
                        }
                    }
                    LOG.info("Succeeding failed task attempt:" + lastAttempt);
                    for (Map.Entry<Vertex, Edge> vertexEdge : vertex.targetVertices.entrySet()) {
                        Vertex destVertex = vertexEdge.getKey();
                        Edge edge = vertexEdge.getValue();
                        try {
                            List<TezEvent> tezEvents = edge.generateEmptyEventsForAttempt(lastAttempt);
                            // Downstream vertices need to receive a SUCCEEDED completion event for each failed task to ensure num bipartite count is correct
                            VertexEventTaskAttemptCompleted completionEvent = new VertexEventTaskAttemptCompleted(lastAttempt, TaskAttemptStateInternal.SUCCEEDED);
                            // Notify all target vertices
                            vertex.eventHandler.handle(new VertexEventSourceTaskAttemptCompleted(destVertex.getVertexId(), completionEvent));
                            vertex.eventHandler.handle(new VertexEventRouteEvent(destVertex.getVertexId(), tezEvents));
                        } catch (Exception e) {
                            throw new TezUncheckedException(e);
                        }
                    }
                }
            }
            if (vertex.commitVertexOutputs && !vertex.committed.getAndSet(true)) {
                // start commit if there're commits or just finish if no commits
                return commitOrFinish(vertex);
            } else {
                // just finish because no vertex committing needed
                return vertex.finished(VertexState.SUCCEEDED);
            }
        }
        return finishWithTerminationCause(vertex);
    }
    // return the current state, Vertex not finished yet
    return vertex.getInternalState();
}
Also used : VertexEventRecoverVertex(org.apache.tez.dag.app.dag.event.VertexEventRecoverVertex) Vertex(org.apache.tez.dag.app.dag.Vertex) TaskEventScheduleTask(org.apache.tez.dag.app.dag.event.TaskEventScheduleTask) Task(org.apache.tez.dag.app.dag.Task) TezUncheckedException(org.apache.tez.dag.api.TezUncheckedException) VertexEventTaskAttemptCompleted(org.apache.tez.dag.app.dag.event.VertexEventTaskAttemptCompleted) VertexEventRouteEvent(org.apache.tez.dag.app.dag.event.VertexEventRouteEvent) TezUncheckedException(org.apache.tez.dag.api.TezUncheckedException) IOException(java.io.IOException) InvalidStateTransitonException(org.apache.hadoop.yarn.state.InvalidStateTransitonException) LimitExceededException(org.apache.tez.common.counters.LimitExceededException) TezException(org.apache.tez.dag.api.TezException) VertexEventSourceTaskAttemptCompleted(org.apache.tez.dag.app.dag.event.VertexEventSourceTaskAttemptCompleted) TezEvent(org.apache.tez.runtime.api.impl.TezEvent) Map(java.util.Map) LinkedHashMap(java.util.LinkedHashMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) TezTaskAttemptID(org.apache.tez.dag.records.TezTaskAttemptID)

Example 2 with Vertex

use of org.apache.tez.dag.app.dag.Vertex in project tez by apache.

the class VertexImpl method setParallelismWrapper.

private void setParallelismWrapper(int parallelism, VertexLocationHint vertexLocationHint, Map<String, EdgeProperty> sourceEdgeProperties, Map<String, InputSpecUpdate> rootInputSpecUpdates, boolean fromVertexManager) throws AMUserCodeException {
    Preconditions.checkArgument(parallelism >= 0, "Parallelism must be >=0. Value: " + parallelism + " for vertex: " + logIdentifier);
    writeLock.lock();
    this.setParallelismCalledFlag = true;
    try {
        // disallow changing things after a vertex has started
        if (!tasksNotYetScheduled) {
            String msg = "setParallelism cannot be called after scheduling tasks. Vertex: " + getLogIdentifier();
            LOG.info(msg);
            throw new TezUncheckedException(msg);
        }
        if (fromVertexManager && canInitVertex()) {
            // vertex is fully defined. setParallelism has been called. VertexManager should have
            // informed us about this. Otherwise we would have notified listeners that we are fully
            // defined before we are actually fully defined
            Preconditions.checkState(vertexToBeReconfiguredByManager, "Vertex is fully configured but still" + " the reconfiguration API has been called. VertexManager must notify the framework using " + " context.vertexReconfigurationPlanned() before re-configuring the vertex." + " vertexId=" + logIdentifier);
        }
        // Input initializer/Vertex Manager/1-1 split expected to set parallelism.
        if (numTasks == -1) {
            if (getState() != VertexState.INITIALIZING) {
                throw new TezUncheckedException("Vertex state is not Initializing. Value: " + getState() + " for vertex: " + logIdentifier);
            }
            if (sourceEdgeProperties != null) {
                for (Map.Entry<String, EdgeProperty> entry : sourceEdgeProperties.entrySet()) {
                    LOG.info("Replacing edge manager for source:" + entry.getKey() + " destination: " + getLogIdentifier());
                    Vertex sourceVertex = appContext.getCurrentDAG().getVertex(entry.getKey());
                    Edge edge = sourceVertices.get(sourceVertex);
                    try {
                        edge.setEdgeProperty(entry.getValue());
                    } catch (Exception e) {
                        throw new TezUncheckedException("Fail to update EdgeProperty for Edge," + "sourceVertex:" + edge.getSourceVertexName() + "destinationVertex:" + edge.getDestinationVertexName(), e);
                    }
                }
            }
            if (rootInputSpecUpdates != null) {
                LOG.info("Got updated RootInputsSpecs: " + rootInputSpecUpdates.toString());
                // Sanity check for correct number of updates.
                for (Entry<String, InputSpecUpdate> rootInputSpecUpdateEntry : rootInputSpecUpdates.entrySet()) {
                    Preconditions.checkState(rootInputSpecUpdateEntry.getValue().isForAllWorkUnits() || (rootInputSpecUpdateEntry.getValue().getAllNumPhysicalInputs() != null && rootInputSpecUpdateEntry.getValue().getAllNumPhysicalInputs().size() == parallelism), "Not enough input spec updates for root input named " + rootInputSpecUpdateEntry.getKey());
                }
                this.rootInputSpecs.putAll(rootInputSpecUpdates);
            }
            int oldNumTasks = numTasks;
            this.numTasks = parallelism;
            stateChangeNotifier.stateChanged(vertexId, new VertexStateUpdateParallelismUpdated(vertexName, numTasks, oldNumTasks));
            this.createTasks();
            setVertexLocationHint(vertexLocationHint);
            LOG.info("Vertex " + getLogIdentifier() + " parallelism set to " + parallelism);
            if (canInitVertex()) {
                getEventHandler().handle(new VertexEvent(getVertexId(), VertexEventType.V_READY_TO_INIT));
            }
        } else {
            // This is an artificial restriction since there's no way of knowing whether a VertexManager
            // will attempt to update root input specs. When parallelism has not been initialized, the
            // Vertex will not be in started state so it's safe to update the specifications.
            // TODO TEZ-937 - add e mechanism to query vertex managers, or for VMs to indicate readines
            // for a vertex to start.
            Preconditions.checkState(rootInputSpecUpdates == null, "Root Input specs can only be updated when the vertex is configured with -1 tasks");
            int oldNumTasks = numTasks;
            // start buffering incoming events so that we can re-route existing events
            for (Edge edge : sourceVertices.values()) {
                edge.startEventBuffering();
            }
            if (parallelism == numTasks) {
                LOG.info("setParallelism same as current value: " + parallelism + " for vertex: " + logIdentifier);
                Preconditions.checkArgument(sourceEdgeProperties != null, "Source edge managers or RootInputSpecs must be set when not changing parallelism");
            } else {
                LOG.info("Resetting vertex location hints due to change in parallelism for vertex: " + logIdentifier);
                vertexLocationHint = null;
                if (parallelism > numTasks) {
                    addTasks((parallelism));
                } else if (parallelism < numTasks) {
                    removeTasks(parallelism);
                }
            }
            Preconditions.checkState(this.numTasks == parallelism, getLogIdentifier());
            // set new vertex location hints
            setVertexLocationHint(vertexLocationHint);
            LOG.info("Vertex " + getLogIdentifier() + " parallelism set to " + parallelism + " from " + oldNumTasks);
            // notify listeners
            stateChangeNotifier.stateChanged(vertexId, new VertexStateUpdateParallelismUpdated(vertexName, numTasks, oldNumTasks));
            assert tasks.size() == numTasks;
            // set new edge managers
            if (sourceEdgeProperties != null) {
                for (Map.Entry<String, EdgeProperty> entry : sourceEdgeProperties.entrySet()) {
                    LOG.info("Replacing edge manager for source:" + entry.getKey() + " destination: " + getLogIdentifier());
                    Vertex sourceVertex = appContext.getCurrentDAG().getVertex(entry.getKey());
                    Edge edge = sourceVertices.get(sourceVertex);
                    try {
                        edge.setEdgeProperty(entry.getValue());
                    } catch (Exception e) {
                        throw new TezUncheckedException(e);
                    }
                }
            }
            // stop buffering events
            for (Edge edge : sourceVertices.values()) {
                edge.stopEventBuffering();
            }
        }
    } finally {
        writeLock.unlock();
    }
}
Also used : VertexStateUpdateParallelismUpdated(org.apache.tez.dag.api.event.VertexStateUpdateParallelismUpdated) VertexEventRecoverVertex(org.apache.tez.dag.app.dag.event.VertexEventRecoverVertex) Vertex(org.apache.tez.dag.app.dag.Vertex) TezUncheckedException(org.apache.tez.dag.api.TezUncheckedException) InputSpecUpdate(org.apache.tez.runtime.api.InputSpecUpdate) TezUncheckedException(org.apache.tez.dag.api.TezUncheckedException) IOException(java.io.IOException) InvalidStateTransitonException(org.apache.hadoop.yarn.state.InvalidStateTransitonException) LimitExceededException(org.apache.tez.common.counters.LimitExceededException) TezException(org.apache.tez.dag.api.TezException) TaskLocationHint(org.apache.tez.dag.api.TaskLocationHint) VertexLocationHint(org.apache.tez.dag.api.VertexLocationHint) EdgeProperty(org.apache.tez.dag.api.EdgeProperty) VertexEvent(org.apache.tez.dag.app.dag.event.VertexEvent) Map(java.util.Map) LinkedHashMap(java.util.LinkedHashMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap)

Example 3 with Vertex

use of org.apache.tez.dag.app.dag.Vertex in project tez by apache.

the class VertexImpl method logVertexConfigurationDoneEvent.

void logVertexConfigurationDoneEvent() {
    if (recoveryData == null || !recoveryData.shouldSkipInit()) {
        Map<String, EdgeProperty> sourceEdgeProperties = new HashMap<String, EdgeProperty>();
        for (Map.Entry<Vertex, Edge> entry : this.sourceVertices.entrySet()) {
            sourceEdgeProperties.put(entry.getKey().getName(), entry.getValue().getEdgeProperty());
        }
        VertexConfigurationDoneEvent reconfigureDoneEvent = new VertexConfigurationDoneEvent(vertexId, clock.getTime(), numTasks, taskLocationHints == null ? null : VertexLocationHint.create(Lists.newArrayList(taskLocationHints)), sourceEdgeProperties, rootInputSpecs, setParallelismCalledFlag);
        this.appContext.getHistoryHandler().handle(new DAGHistoryEvent(getDAGId(), reconfigureDoneEvent));
    }
}
Also used : VertexEventRecoverVertex(org.apache.tez.dag.app.dag.event.VertexEventRecoverVertex) Vertex(org.apache.tez.dag.app.dag.Vertex) LinkedHashMap(java.util.LinkedHashMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) EdgeProperty(org.apache.tez.dag.api.EdgeProperty) VertexConfigurationDoneEvent(org.apache.tez.dag.history.events.VertexConfigurationDoneEvent) DAGHistoryEvent(org.apache.tez.dag.history.DAGHistoryEvent) Map(java.util.Map) LinkedHashMap(java.util.LinkedHashMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap)

Example 4 with Vertex

use of org.apache.tez.dag.app.dag.Vertex in project tez by apache.

the class VertexImpl method getInputSpecList.

@Override
public List<InputSpec> getInputSpecList(int taskIndex) throws AMUserCodeException {
    // For locking strategy, please refer to getOutputSpecList()
    readLock.lock();
    List<InputSpec> inputSpecList = null;
    try {
        inputSpecList = new ArrayList<InputSpec>(this.getInputVerticesCount() + (rootInputDescriptors == null ? 0 : rootInputDescriptors.size()));
        if (rootInputDescriptors != null) {
            for (Entry<String, RootInputLeafOutput<InputDescriptor, InputInitializerDescriptor>> rootInputDescriptorEntry : rootInputDescriptors.entrySet()) {
                inputSpecList.add(new InputSpec(rootInputDescriptorEntry.getKey(), rootInputDescriptorEntry.getValue().getIODescriptor(), rootInputSpecs.get(rootInputDescriptorEntry.getKey()).getNumPhysicalInputsForWorkUnit(taskIndex)));
            }
        }
    } finally {
        readLock.unlock();
    }
    for (Vertex vertex : getInputVertices().keySet()) {
        /**
         * It is possible that setParallelism is in the middle of processing in target vertex with
         * its write lock. So we need to get inputspec by acquiring read lock in target vertex to
         * get consistent view.
         * Refer TEZ-2251
         */
        InputSpec inputSpec = ((VertexImpl) vertex).getDestinationSpecFor(this, taskIndex);
        // TODO DAGAM This should be based on the edge type.
        inputSpecList.add(inputSpec);
    }
    return inputSpecList;
}
Also used : RootInputLeafOutput(org.apache.tez.dag.api.RootInputLeafOutput) VertexEventRecoverVertex(org.apache.tez.dag.app.dag.event.VertexEventRecoverVertex) Vertex(org.apache.tez.dag.app.dag.Vertex) GroupInputSpec(org.apache.tez.runtime.api.impl.GroupInputSpec) InputSpec(org.apache.tez.runtime.api.impl.InputSpec)

Example 5 with Vertex

use of org.apache.tez.dag.app.dag.Vertex in project tez by apache.

the class VertexImpl method getOutputSpecList.

@Override
public List<OutputSpec> getOutputSpecList(int taskIndex) throws AMUserCodeException {
    /**
     * Ref: TEZ-3297
     * Locking entire method could introduce a nested lock and
     * could lead to deadlock in corner cases. Example of deadlock with nested lock here:
     * 1. In thread#1, Downstream vertex is in the middle of processing setParallelism and gets
     * writeLock.
     * 2. In thread#2, currentVertex acquires read lock
     * 3. In thread#3, central dispatcher tries to process an event for current vertex,
     * so tries to acquire write lock.
     *
     * In further processing,
     * 4. In thread#1, it tries to acquire readLock on current vertex for setting edges. But
     * this would be blocked as #3 already requested for write lock
     * 5. In thread#2, getting readLock on downstream vertex would be blocked as writeLock
     * is held by thread#1.
     * 6. thread#3 is anyways blocked due to thread#2's read lock on current vertex.
     */
    List<OutputSpec> outputSpecList = null;
    readLock.lock();
    try {
        outputSpecList = new ArrayList<OutputSpec>(this.getOutputVerticesCount() + this.additionalOutputSpecs.size());
        outputSpecList.addAll(additionalOutputSpecs);
    } finally {
        readLock.unlock();
    }
    for (Vertex vertex : targetVertices.keySet()) {
        /**
         * It is possible that setParallelism (which could change numTasks) is in the middle of
         * processing in target vertex with its write lock. So we need to get outputspec by
         * acquiring read lock in target vertex to get consistent view.
         * Refer TEZ-2251
         */
        OutputSpec outputSpec = ((VertexImpl) vertex).getSourceSpecFor(this, taskIndex);
        outputSpecList.add(outputSpec);
    }
    return outputSpecList;
}
Also used : VertexEventRecoverVertex(org.apache.tez.dag.app.dag.event.VertexEventRecoverVertex) Vertex(org.apache.tez.dag.app.dag.Vertex) OutputSpec(org.apache.tez.runtime.api.impl.OutputSpec)

Aggregations

Vertex (org.apache.tez.dag.app.dag.Vertex)80 Test (org.junit.Test)31 TezVertexID (org.apache.tez.dag.records.TezVertexID)23 DAG (org.apache.tez.dag.app.dag.DAG)22 VertexEventRecoverVertex (org.apache.tez.dag.app.dag.event.VertexEventRecoverVertex)17 HashMap (java.util.HashMap)15 StateChangeNotifierForTest (org.apache.tez.dag.app.dag.TestStateChangeNotifier.StateChangeNotifierForTest)15 PlanTaskLocationHint (org.apache.tez.dag.api.records.DAGProtos.PlanTaskLocationHint)13 Map (java.util.Map)12 TezTaskID (org.apache.tez.dag.records.TezTaskID)11 VertexEventTaskCompleted (org.apache.tez.dag.app.dag.event.VertexEventTaskCompleted)9 LinkedHashMap (java.util.LinkedHashMap)8 VertexLocationHint (org.apache.tez.dag.api.VertexLocationHint)8 ArrayList (java.util.ArrayList)7 EventHandler (org.apache.hadoop.yarn.event.EventHandler)7 Task (org.apache.tez.dag.app.dag.Task)7 EdgeProperty (org.apache.tez.dag.api.EdgeProperty)6 TaskAttemptEventSchedule (org.apache.tez.dag.app.dag.event.TaskAttemptEventSchedule)6 OutputCommitter (org.apache.tez.runtime.api.OutputCommitter)6 TreeMap (java.util.TreeMap)5