Search in sources :

Example 1 with TezEvent

use of org.apache.tez.runtime.api.impl.TezEvent in project tez by apache.

the class TaskImpl method getTaskAttemptTezEvents.

@Override
public ArrayList<TezEvent> getTaskAttemptTezEvents(TezTaskAttemptID attemptID, int fromEventId, int maxEvents) {
    ArrayList<TezEvent> events = EMPTY_TASK_ATTEMPT_TEZ_EVENTS;
    readLock.lock();
    try {
        if (!attempts.containsKey(attemptID)) {
            throw new TezUncheckedException("Unknown TA: " + attemptID + " asking for events from task:" + getTaskId());
        }
        if (tezEventsForTaskAttempts.size() > fromEventId) {
            int actualMax = Math.min(maxEvents, (tezEventsForTaskAttempts.size() - fromEventId));
            int toEventId = actualMax + fromEventId;
            events = new ArrayList<TezEvent>(tezEventsForTaskAttempts.subList(fromEventId, toEventId));
            LOG.info("TaskAttempt:" + attemptID + " sent events: (" + fromEventId + "-" + toEventId + ").");
        // currently not modifying the events so that we dont have to create
        // copies of events. e.g. if we have to set taskAttemptId into the TezEvent
        // destination metadata then we will need to create a copy of the TezEvent
        // and then modify the metadata and then send the copy on the RPC. This
        // is important because TezEvents are only routed in the AM and not copied
        // during routing. So e.g. a broadcast edge will send the same event to
        // all consumers (like it should). If copies were created then re-routing
        // the events on parallelism changes would be difficult. We would have to
        // buffer the events in the Vertex until the parallelism was set and then
        // route the events.
        }
        return events;
    } finally {
        readLock.unlock();
    }
}
Also used : TezUncheckedException(org.apache.tez.dag.api.TezUncheckedException) TezEvent(org.apache.tez.runtime.api.impl.TezEvent) TaskLocationHint(org.apache.tez.dag.api.TaskLocationHint)

Example 2 with TezEvent

use of org.apache.tez.runtime.api.impl.TezEvent in project tez by apache.

the class VertexImpl method checkTasksForCompletion.

// triggered by task_complete
static VertexState checkTasksForCompletion(final VertexImpl vertex) {
    // this log helps quickly count the completion count for a vertex.
    // grepping and counting for attempts and handling re-tries is time consuming
    LOG.info("Task Completion: " + constructCheckTasksForCompletionLog(vertex));
    // check for vertex failure first
    if (vertex.completedTaskCount > vertex.tasks.size()) {
        LOG.error("task completion accounting issue: completedTaskCount > nTasks:" + constructCheckTasksForCompletionLog(vertex));
    }
    if (vertex.completedTaskCount == vertex.tasks.size()) {
        // finished - gather stats
        vertex.finalStatistics = vertex.constructStatistics();
        // Only succeed if tasks complete successfully and no terminationCause is registered or if failures are below configured threshold.
        boolean vertexSucceeded = vertex.succeededTaskCount == vertex.numTasks;
        boolean vertexFailuresBelowThreshold = (vertex.succeededTaskCount + vertex.failedTaskCount == vertex.numTasks) && (vertex.failedTaskCount * 100 <= vertex.maxFailuresPercent * vertex.numTasks);
        if ((vertexSucceeded || vertexFailuresBelowThreshold) && vertex.terminationCause == null) {
            if (vertexSucceeded) {
                LOG.info("All tasks have succeeded, vertex:" + vertex.logIdentifier);
            } else {
                LOG.info("All tasks in the vertex " + vertex.logIdentifier + " have completed and the percentage of failed tasks (failed/total) (" + vertex.failedTaskCount + "/" + vertex.numTasks + ") is less that the threshold of " + vertex.maxFailuresPercent);
                vertex.addDiagnostic("Vertex succeeded as percentage of failed tasks (failed/total) (" + vertex.failedTaskCount + "/" + vertex.numTasks + ") is less that the threshold of " + vertex.maxFailuresPercent);
                vertex.logSuccessDiagnostics = true;
                for (Task task : vertex.tasks.values()) {
                    if (!task.getState().equals(TaskState.FAILED)) {
                        continue;
                    }
                    // Find the last attempt and mark that as successful
                    Iterator<TezTaskAttemptID> attempts = task.getAttempts().keySet().iterator();
                    TezTaskAttemptID lastAttempt = null;
                    while (attempts.hasNext()) {
                        TezTaskAttemptID attempt = attempts.next();
                        if (lastAttempt == null || attempt.getId() > lastAttempt.getId()) {
                            lastAttempt = attempt;
                        }
                    }
                    LOG.info("Succeeding failed task attempt:" + lastAttempt);
                    for (Map.Entry<Vertex, Edge> vertexEdge : vertex.targetVertices.entrySet()) {
                        Vertex destVertex = vertexEdge.getKey();
                        Edge edge = vertexEdge.getValue();
                        try {
                            List<TezEvent> tezEvents = edge.generateEmptyEventsForAttempt(lastAttempt);
                            // Downstream vertices need to receive a SUCCEEDED completion event for each failed task to ensure num bipartite count is correct
                            VertexEventTaskAttemptCompleted completionEvent = new VertexEventTaskAttemptCompleted(lastAttempt, TaskAttemptStateInternal.SUCCEEDED);
                            // Notify all target vertices
                            vertex.eventHandler.handle(new VertexEventSourceTaskAttemptCompleted(destVertex.getVertexId(), completionEvent));
                            vertex.eventHandler.handle(new VertexEventRouteEvent(destVertex.getVertexId(), tezEvents));
                        } catch (Exception e) {
                            throw new TezUncheckedException(e);
                        }
                    }
                }
            }
            if (vertex.commitVertexOutputs && !vertex.committed.getAndSet(true)) {
                // start commit if there're commits or just finish if no commits
                return commitOrFinish(vertex);
            } else {
                // just finish because no vertex committing needed
                return vertex.finished(VertexState.SUCCEEDED);
            }
        }
        return finishWithTerminationCause(vertex);
    }
    // return the current state, Vertex not finished yet
    return vertex.getInternalState();
}
Also used : VertexEventRecoverVertex(org.apache.tez.dag.app.dag.event.VertexEventRecoverVertex) Vertex(org.apache.tez.dag.app.dag.Vertex) TaskEventScheduleTask(org.apache.tez.dag.app.dag.event.TaskEventScheduleTask) Task(org.apache.tez.dag.app.dag.Task) TezUncheckedException(org.apache.tez.dag.api.TezUncheckedException) VertexEventTaskAttemptCompleted(org.apache.tez.dag.app.dag.event.VertexEventTaskAttemptCompleted) VertexEventRouteEvent(org.apache.tez.dag.app.dag.event.VertexEventRouteEvent) TezUncheckedException(org.apache.tez.dag.api.TezUncheckedException) IOException(java.io.IOException) InvalidStateTransitonException(org.apache.hadoop.yarn.state.InvalidStateTransitonException) LimitExceededException(org.apache.tez.common.counters.LimitExceededException) TezException(org.apache.tez.dag.api.TezException) VertexEventSourceTaskAttemptCompleted(org.apache.tez.dag.app.dag.event.VertexEventSourceTaskAttemptCompleted) TezEvent(org.apache.tez.runtime.api.impl.TezEvent) Map(java.util.Map) LinkedHashMap(java.util.LinkedHashMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) TezTaskAttemptID(org.apache.tez.dag.records.TezTaskAttemptID)

Example 3 with TezEvent

use of org.apache.tez.runtime.api.impl.TezEvent in project tez by apache.

the class VertexImpl method getTaskAttemptTezEvents.

@Override
public TaskAttemptEventInfo getTaskAttemptTezEvents(TezTaskAttemptID attemptID, int fromEventId, int preRoutedFromEventId, int maxEvents) {
    Task task = getTask(attemptID.getTaskID());
    ArrayList<TezEvent> events = task.getTaskAttemptTezEvents(attemptID, preRoutedFromEventId, maxEvents);
    int nextPreRoutedFromEventId = preRoutedFromEventId + events.size();
    int nextFromEventId = fromEventId;
    onDemandRouteEventsReadLock.lock();
    try {
        int currEventCount = onDemandRouteEvents.size();
        try {
            if (currEventCount > fromEventId) {
                if (events != TaskImpl.EMPTY_TASK_ATTEMPT_TEZ_EVENTS) {
                    events.ensureCapacity(maxEvents);
                } else {
                    events = Lists.newArrayListWithCapacity(maxEvents);
                }
                int numPreRoutedEvents = events.size();
                int taskIndex = attemptID.getTaskID().getId();
                Preconditions.checkState(taskIndex < tasks.size(), "Invalid task index for TA: " + attemptID + " vertex: " + getLogIdentifier());
                boolean isFirstEvent = true;
                boolean firstEventObsoleted = false;
                for (nextFromEventId = fromEventId; nextFromEventId < currEventCount; ++nextFromEventId) {
                    boolean earlyExit = false;
                    if (events.size() == maxEvents) {
                        break;
                    }
                    EventInfo eventInfo = onDemandRouteEvents.get(nextFromEventId);
                    if (eventInfo.isObsolete) {
                        // ignore obsolete events
                        firstEventObsoleted = true;
                        continue;
                    }
                    TezEvent tezEvent = eventInfo.tezEvent;
                    switch(tezEvent.getEventType()) {
                        case INPUT_FAILED_EVENT:
                        case DATA_MOVEMENT_EVENT:
                        case COMPOSITE_DATA_MOVEMENT_EVENT:
                            {
                                int srcTaskIndex = eventInfo.eventTaskIndex;
                                Edge srcEdge = eventInfo.eventEdge;
                                PendingEventRouteMetadata pendingRoute = null;
                                if (isFirstEvent) {
                                    // the first event is the one that can have pending routes because its expanded
                                    // events had not been completely sent in the last round.
                                    isFirstEvent = false;
                                    pendingRoute = srcEdge.removePendingEvents(attemptID);
                                    if (pendingRoute != null) {
                                        // obsoleted
                                        if (tezEvent != pendingRoute.getTezEvent()) {
                                            Preconditions.checkState(firstEventObsoleted);
                                            // pending routes can be ignored for obsoleted events
                                            pendingRoute = null;
                                        }
                                    }
                                }
                                if (!srcEdge.maybeAddTezEventForDestinationTask(tezEvent, attemptID, srcTaskIndex, events, maxEvents, pendingRoute)) {
                                    // not enough space left for this iteration events.
                                    // Exit and start from here next time
                                    earlyExit = true;
                                }
                            }
                            break;
                        case ROOT_INPUT_DATA_INFORMATION_EVENT:
                            {
                                InputDataInformationEvent riEvent = (InputDataInformationEvent) tezEvent.getEvent();
                                if (riEvent.getTargetIndex() == taskIndex) {
                                    events.add(tezEvent);
                                }
                            }
                            break;
                        default:
                            throw new TezUncheckedException("Unexpected event type for task: " + tezEvent.getEventType());
                    }
                    if (earlyExit) {
                        break;
                    }
                }
                int numEventsSent = events.size() - numPreRoutedEvents;
                if (numEventsSent > 0) {
                    StringBuilder builder = new StringBuilder();
                    builder.append("Sending ").append(attemptID).append(" ").append(numEventsSent).append(" events [").append(fromEventId).append(",").append(nextFromEventId).append(") total ").append(currEventCount).append(" ").append(getLogIdentifier());
                    LOG.info(builder.toString());
                }
            }
        } catch (AMUserCodeException e) {
            String msg = "Exception in " + e.getSource() + ", vertex=" + getLogIdentifier();
            LOG.error(msg, e);
            eventHandler.handle(new VertexEventManagerUserCodeError(getVertexId(), e));
            nextFromEventId = fromEventId;
            events.clear();
        }
    } finally {
        onDemandRouteEventsReadLock.unlock();
    }
    if (!events.isEmpty()) {
        for (int i = (events.size() - 1); i >= 0; --i) {
            TezEvent lastEvent = events.get(i);
            // record the last event sent by the AM to the task
            EventType lastEventType = lastEvent.getEventType();
            // if the following changes then critical path logic/recording may need revision
            if (lastEventType == EventType.COMPOSITE_DATA_MOVEMENT_EVENT || lastEventType == EventType.COMPOSITE_ROUTED_DATA_MOVEMENT_EVENT || lastEventType == EventType.DATA_MOVEMENT_EVENT || lastEventType == EventType.ROOT_INPUT_DATA_INFORMATION_EVENT) {
                task.getAttempt(attemptID).setLastEventSent(lastEvent);
                break;
            }
        }
    }
    return new TaskAttemptEventInfo(nextFromEventId, events, nextPreRoutedFromEventId);
}
Also used : TaskEventScheduleTask(org.apache.tez.dag.app.dag.event.TaskEventScheduleTask) Task(org.apache.tez.dag.app.dag.Task) TaskAttemptEventInfo(org.apache.tez.dag.app.TaskAttemptEventInfo) TezUncheckedException(org.apache.tez.dag.api.TezUncheckedException) DAGEventType(org.apache.tez.dag.app.dag.event.DAGEventType) EventType(org.apache.tez.runtime.api.impl.EventType) VertexEventType(org.apache.tez.dag.app.dag.event.VertexEventType) TaskEventType(org.apache.tez.dag.app.dag.event.TaskEventType) TaskLocationHint(org.apache.tez.dag.api.TaskLocationHint) VertexLocationHint(org.apache.tez.dag.api.VertexLocationHint) PendingEventRouteMetadata(org.apache.tez.dag.app.dag.impl.Edge.PendingEventRouteMetadata) VertexEventManagerUserCodeError(org.apache.tez.dag.app.dag.event.VertexEventManagerUserCodeError) TezEvent(org.apache.tez.runtime.api.impl.TezEvent) TaskAttemptEventInfo(org.apache.tez.dag.app.TaskAttemptEventInfo) InputDataInformationEvent(org.apache.tez.runtime.api.events.InputDataInformationEvent)

Example 4 with TezEvent

use of org.apache.tez.runtime.api.impl.TezEvent in project tez by apache.

the class Edge method sendDmEventOrIfEventToTasks.

void sendDmEventOrIfEventToTasks(TezEvent tezEvent, int srcTaskIndex, boolean isDataMovementEvent, Map<Integer, List<Integer>> taskAndInputIndices) {
    Preconditions.checkState(edgeManager != null, "Edge Manager must be initialized by this time");
    Event event = tezEvent.getEvent();
    // cache of event object per input index
    Map<Integer, TezEvent> inputIndicesWithEvents = Maps.newHashMap();
    for (Map.Entry<Integer, List<Integer>> entry : taskAndInputIndices.entrySet()) {
        int destTaskIndex = entry.getKey();
        List<Integer> inputIndices = entry.getValue();
        for (int i = 0; i < inputIndices.size(); ++i) {
            Integer inputIndex = inputIndices.get(i);
            TezEvent tezEventToSend = inputIndicesWithEvents.get(inputIndex);
            if (tezEventToSend == null) {
                Event e;
                if (isDataMovementEvent) {
                    DataMovementEvent dmEvent = (DataMovementEvent) event;
                    e = DataMovementEvent.create(dmEvent.getSourceIndex(), inputIndex, dmEvent.getVersion(), dmEvent.getUserPayload());
                } else {
                    InputFailedEvent ifEvent = ((InputFailedEvent) event);
                    e = InputFailedEvent.create(inputIndex, ifEvent.getVersion());
                }
                tezEventToSend = new TezEvent(e, tezEvent.getSourceInfo(), tezEvent.getEventReceivedTime());
                tezEventToSend.setDestinationInfo(destinationMetaInfo);
                // cache the event object per input because are unique per input index
                inputIndicesWithEvents.put(inputIndex, tezEventToSend);
            }
            Task destTask = destinationVertex.getTask(destTaskIndex);
            if (destTask == null) {
                throw new TezUncheckedException("Unexpected null task." + " sourceVertex=" + sourceVertex.getLogIdentifier() + " srcTaskIndex = " + srcTaskIndex + " destVertex=" + destinationVertex.getLogIdentifier() + " destTaskIndex=" + destTaskIndex + " destNumTasks=" + destinationVertex.getTotalTasks() + " edgeManager=" + edgeManager.getClass().getName());
            }
            sendEventToTask(destTask, tezEventToSend);
        }
    }
}
Also used : InputFailedEvent(org.apache.tez.runtime.api.events.InputFailedEvent) Task(org.apache.tez.dag.app.dag.Task) TezUncheckedException(org.apache.tez.dag.api.TezUncheckedException) CompositeRoutedDataMovementEvent(org.apache.tez.runtime.api.events.CompositeRoutedDataMovementEvent) CompositeDataMovementEvent(org.apache.tez.runtime.api.events.CompositeDataMovementEvent) DataMovementEvent(org.apache.tez.runtime.api.events.DataMovementEvent) InputFailedEvent(org.apache.tez.runtime.api.events.InputFailedEvent) CompositeRoutedDataMovementEvent(org.apache.tez.runtime.api.events.CompositeRoutedDataMovementEvent) InputReadErrorEvent(org.apache.tez.runtime.api.events.InputReadErrorEvent) Event(org.apache.tez.runtime.api.Event) CompositeDataMovementEvent(org.apache.tez.runtime.api.events.CompositeDataMovementEvent) TezEvent(org.apache.tez.runtime.api.impl.TezEvent) DataMovementEvent(org.apache.tez.runtime.api.events.DataMovementEvent) ArrayList(java.util.ArrayList) List(java.util.List) TezEvent(org.apache.tez.runtime.api.impl.TezEvent) ConcurrentMap(java.util.concurrent.ConcurrentMap) Map(java.util.Map)

Example 5 with TezEvent

use of org.apache.tez.runtime.api.impl.TezEvent in project tez by apache.

the class Edge method generateEmptyEventsForAttempt.

public List<TezEvent> generateEmptyEventsForAttempt(TezTaskAttemptID attempt) throws Exception {
    if (!edgeProperty.getEdgeSource().getClassName().startsWith("org.apache.tez")) {
        throw new TezException("Only org.apache.tez outputs are allowed for max percent failure feature. Disallowed Output: " + edgeProperty.getEdgeSource().getClassName());
    }
    List<Event> events = new ArrayList<>();
    Deflater deflater = TezCommonUtils.newBestCompressionDeflater();
    try {
        ShuffleUtils.generateEventsForNonStartedOutput(events, edgeManager.getNumDestinationConsumerTasks(attempt.getTaskID().getId()), null, false, true, deflater);
    } catch (Exception e) {
        throw new TezException(e);
    }
    EventMetaData sourceInfo = new EventMetaData(EventMetaData.EventProducerConsumerType.INPUT, sourceVertex.getName(), getDestinationVertexName(), attempt);
    List<TezEvent> tezEvents = new ArrayList<>(events.size());
    for (Event e : events) {
        TezEvent tezEvent = new TezEvent(e, sourceInfo);
        tezEvents.add(tezEvent);
    }
    return tezEvents;
}
Also used : TezException(org.apache.tez.dag.api.TezException) Deflater(java.util.zip.Deflater) ArrayList(java.util.ArrayList) InputFailedEvent(org.apache.tez.runtime.api.events.InputFailedEvent) CompositeRoutedDataMovementEvent(org.apache.tez.runtime.api.events.CompositeRoutedDataMovementEvent) InputReadErrorEvent(org.apache.tez.runtime.api.events.InputReadErrorEvent) Event(org.apache.tez.runtime.api.Event) CompositeDataMovementEvent(org.apache.tez.runtime.api.events.CompositeDataMovementEvent) TezEvent(org.apache.tez.runtime.api.impl.TezEvent) DataMovementEvent(org.apache.tez.runtime.api.events.DataMovementEvent) TezEvent(org.apache.tez.runtime.api.impl.TezEvent) TezUncheckedException(org.apache.tez.dag.api.TezUncheckedException) TezException(org.apache.tez.dag.api.TezException) EventMetaData(org.apache.tez.runtime.api.impl.EventMetaData)

Aggregations

TezEvent (org.apache.tez.runtime.api.impl.TezEvent)78 Test (org.junit.Test)50 EventMetaData (org.apache.tez.runtime.api.impl.EventMetaData)48 TezTaskAttemptID (org.apache.tez.dag.records.TezTaskAttemptID)34 VertexEventRouteEvent (org.apache.tez.dag.app.dag.event.VertexEventRouteEvent)31 StateChangeNotifierForTest (org.apache.tez.dag.app.dag.TestStateChangeNotifier.StateChangeNotifierForTest)27 TezTaskID (org.apache.tez.dag.records.TezTaskID)27 EdgeManagerForTest (org.apache.tez.test.EdgeManagerForTest)19 DataMovementEvent (org.apache.tez.runtime.api.events.DataMovementEvent)17 GraceShuffleVertexManagerForTest (org.apache.tez.test.GraceShuffleVertexManagerForTest)16 VertexManagerPluginForTest (org.apache.tez.test.VertexManagerPluginForTest)16 ArrayList (java.util.ArrayList)13 TezVertexID (org.apache.tez.dag.records.TezVertexID)12 ContainerId (org.apache.hadoop.yarn.api.records.ContainerId)11 Task (org.apache.tez.dag.app.dag.Task)11 InputReadErrorEvent (org.apache.tez.runtime.api.events.InputReadErrorEvent)11 VertexInitializedEvent (org.apache.tez.dag.history.events.VertexInitializedEvent)10 InputInitializerEvent (org.apache.tez.runtime.api.events.InputInitializerEvent)10 LinkedList (java.util.LinkedList)9 NodeId (org.apache.hadoop.yarn.api.records.NodeId)9