Search in sources :

Example 1 with Task

use of org.apache.tez.dag.app.dag.Task in project tez by apache.

the class VertexImpl method removeTasks.

private void removeTasks(int newNumTasks) {
    Preconditions.checkArgument(newNumTasks < this.numTasks, getLogIdentifier());
    // assign to local variable of LinkedHashMap to make sure that changing
    // type of task causes compile error. We depend on LinkedHashMap for order
    LinkedHashMap<TezTaskID, Task> currentTasks = this.tasks;
    Iterator<Map.Entry<TezTaskID, Task>> iter = currentTasks.entrySet().iterator();
    // remove tasks from the end to maintain index numbers
    int i = 0;
    while (iter.hasNext()) {
        i++;
        Map.Entry<TezTaskID, Task> entry = iter.next();
        Task task = entry.getValue();
        if (task.getState() != TaskState.NEW) {
            String msg = "All tasks must be in initial state when changing parallelism" + " for vertex: " + getLogIdentifier();
            LOG.warn(msg);
            throw new TezUncheckedException(msg);
        }
        if (i <= newNumTasks) {
            continue;
        }
        LOG.debug("Removing task: {}", entry.getKey());
        iter.remove();
        this.numTasks--;
    }
}
Also used : TaskEventScheduleTask(org.apache.tez.dag.app.dag.event.TaskEventScheduleTask) Task(org.apache.tez.dag.app.dag.Task) Entry(java.util.Map.Entry) TezUncheckedException(org.apache.tez.dag.api.TezUncheckedException) Map(java.util.Map) LinkedHashMap(java.util.LinkedHashMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) TezTaskID(org.apache.tez.dag.records.TezTaskID) TaskLocationHint(org.apache.tez.dag.api.TaskLocationHint) VertexLocationHint(org.apache.tez.dag.api.VertexLocationHint)

Example 2 with Task

use of org.apache.tez.dag.app.dag.Task in project tez by apache.

the class VertexImpl method checkTasksForCompletion.

// triggered by task_complete
static VertexState checkTasksForCompletion(final VertexImpl vertex) {
    // this log helps quickly count the completion count for a vertex.
    // grepping and counting for attempts and handling re-tries is time consuming
    LOG.info("Task Completion: " + constructCheckTasksForCompletionLog(vertex));
    // check for vertex failure first
    if (vertex.completedTaskCount > vertex.tasks.size()) {
        LOG.error("task completion accounting issue: completedTaskCount > nTasks:" + constructCheckTasksForCompletionLog(vertex));
    }
    if (vertex.completedTaskCount == vertex.tasks.size()) {
        // finished - gather stats
        vertex.finalStatistics = vertex.constructStatistics();
        // Only succeed if tasks complete successfully and no terminationCause is registered or if failures are below configured threshold.
        boolean vertexSucceeded = vertex.succeededTaskCount == vertex.numTasks;
        boolean vertexFailuresBelowThreshold = (vertex.succeededTaskCount + vertex.failedTaskCount == vertex.numTasks) && (vertex.failedTaskCount * 100 <= vertex.maxFailuresPercent * vertex.numTasks);
        if ((vertexSucceeded || vertexFailuresBelowThreshold) && vertex.terminationCause == null) {
            if (vertexSucceeded) {
                LOG.info("All tasks have succeeded, vertex:" + vertex.logIdentifier);
            } else {
                LOG.info("All tasks in the vertex " + vertex.logIdentifier + " have completed and the percentage of failed tasks (failed/total) (" + vertex.failedTaskCount + "/" + vertex.numTasks + ") is less that the threshold of " + vertex.maxFailuresPercent);
                vertex.addDiagnostic("Vertex succeeded as percentage of failed tasks (failed/total) (" + vertex.failedTaskCount + "/" + vertex.numTasks + ") is less that the threshold of " + vertex.maxFailuresPercent);
                vertex.logSuccessDiagnostics = true;
                for (Task task : vertex.tasks.values()) {
                    if (!task.getState().equals(TaskState.FAILED)) {
                        continue;
                    }
                    // Find the last attempt and mark that as successful
                    Iterator<TezTaskAttemptID> attempts = task.getAttempts().keySet().iterator();
                    TezTaskAttemptID lastAttempt = null;
                    while (attempts.hasNext()) {
                        TezTaskAttemptID attempt = attempts.next();
                        if (lastAttempt == null || attempt.getId() > lastAttempt.getId()) {
                            lastAttempt = attempt;
                        }
                    }
                    LOG.info("Succeeding failed task attempt:" + lastAttempt);
                    for (Map.Entry<Vertex, Edge> vertexEdge : vertex.targetVertices.entrySet()) {
                        Vertex destVertex = vertexEdge.getKey();
                        Edge edge = vertexEdge.getValue();
                        try {
                            List<TezEvent> tezEvents = edge.generateEmptyEventsForAttempt(lastAttempt);
                            // Downstream vertices need to receive a SUCCEEDED completion event for each failed task to ensure num bipartite count is correct
                            VertexEventTaskAttemptCompleted completionEvent = new VertexEventTaskAttemptCompleted(lastAttempt, TaskAttemptStateInternal.SUCCEEDED);
                            // Notify all target vertices
                            vertex.eventHandler.handle(new VertexEventSourceTaskAttemptCompleted(destVertex.getVertexId(), completionEvent));
                            vertex.eventHandler.handle(new VertexEventRouteEvent(destVertex.getVertexId(), tezEvents));
                        } catch (Exception e) {
                            throw new TezUncheckedException(e);
                        }
                    }
                }
            }
            if (vertex.commitVertexOutputs && !vertex.committed.getAndSet(true)) {
                // start commit if there're commits or just finish if no commits
                return commitOrFinish(vertex);
            } else {
                // just finish because no vertex committing needed
                return vertex.finished(VertexState.SUCCEEDED);
            }
        }
        return finishWithTerminationCause(vertex);
    }
    // return the current state, Vertex not finished yet
    return vertex.getInternalState();
}
Also used : VertexEventRecoverVertex(org.apache.tez.dag.app.dag.event.VertexEventRecoverVertex) Vertex(org.apache.tez.dag.app.dag.Vertex) TaskEventScheduleTask(org.apache.tez.dag.app.dag.event.TaskEventScheduleTask) Task(org.apache.tez.dag.app.dag.Task) TezUncheckedException(org.apache.tez.dag.api.TezUncheckedException) VertexEventTaskAttemptCompleted(org.apache.tez.dag.app.dag.event.VertexEventTaskAttemptCompleted) VertexEventRouteEvent(org.apache.tez.dag.app.dag.event.VertexEventRouteEvent) TezUncheckedException(org.apache.tez.dag.api.TezUncheckedException) IOException(java.io.IOException) InvalidStateTransitonException(org.apache.hadoop.yarn.state.InvalidStateTransitonException) LimitExceededException(org.apache.tez.common.counters.LimitExceededException) TezException(org.apache.tez.dag.api.TezException) VertexEventSourceTaskAttemptCompleted(org.apache.tez.dag.app.dag.event.VertexEventSourceTaskAttemptCompleted) TezEvent(org.apache.tez.runtime.api.impl.TezEvent) Map(java.util.Map) LinkedHashMap(java.util.LinkedHashMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) TezTaskAttemptID(org.apache.tez.dag.records.TezTaskAttemptID)

Example 3 with Task

use of org.apache.tez.dag.app.dag.Task in project tez by apache.

the class VertexImpl method computeProgress.

private void computeProgress() {
    this.readLock.lock();
    try {
        float progress = 0f;
        for (Task task : this.tasks.values()) {
            progress += (task.getProgress());
        }
        if (this.numTasks != 0) {
            progress /= this.numTasks;
        }
        this.progress = progress;
    } finally {
        this.readLock.unlock();
    }
}
Also used : TaskEventScheduleTask(org.apache.tez.dag.app.dag.event.TaskEventScheduleTask) Task(org.apache.tez.dag.app.dag.Task)

Example 4 with Task

use of org.apache.tez.dag.app.dag.Task in project tez by apache.

the class VertexImpl method getTaskAttemptTezEvents.

@Override
public TaskAttemptEventInfo getTaskAttemptTezEvents(TezTaskAttemptID attemptID, int fromEventId, int preRoutedFromEventId, int maxEvents) {
    Task task = getTask(attemptID.getTaskID());
    ArrayList<TezEvent> events = task.getTaskAttemptTezEvents(attemptID, preRoutedFromEventId, maxEvents);
    int nextPreRoutedFromEventId = preRoutedFromEventId + events.size();
    int nextFromEventId = fromEventId;
    onDemandRouteEventsReadLock.lock();
    try {
        int currEventCount = onDemandRouteEvents.size();
        try {
            if (currEventCount > fromEventId) {
                if (events != TaskImpl.EMPTY_TASK_ATTEMPT_TEZ_EVENTS) {
                    events.ensureCapacity(maxEvents);
                } else {
                    events = Lists.newArrayListWithCapacity(maxEvents);
                }
                int numPreRoutedEvents = events.size();
                int taskIndex = attemptID.getTaskID().getId();
                Preconditions.checkState(taskIndex < tasks.size(), "Invalid task index for TA: " + attemptID + " vertex: " + getLogIdentifier());
                boolean isFirstEvent = true;
                boolean firstEventObsoleted = false;
                for (nextFromEventId = fromEventId; nextFromEventId < currEventCount; ++nextFromEventId) {
                    boolean earlyExit = false;
                    if (events.size() == maxEvents) {
                        break;
                    }
                    EventInfo eventInfo = onDemandRouteEvents.get(nextFromEventId);
                    if (eventInfo.isObsolete) {
                        // ignore obsolete events
                        firstEventObsoleted = true;
                        continue;
                    }
                    TezEvent tezEvent = eventInfo.tezEvent;
                    switch(tezEvent.getEventType()) {
                        case INPUT_FAILED_EVENT:
                        case DATA_MOVEMENT_EVENT:
                        case COMPOSITE_DATA_MOVEMENT_EVENT:
                            {
                                int srcTaskIndex = eventInfo.eventTaskIndex;
                                Edge srcEdge = eventInfo.eventEdge;
                                PendingEventRouteMetadata pendingRoute = null;
                                if (isFirstEvent) {
                                    // the first event is the one that can have pending routes because its expanded
                                    // events had not been completely sent in the last round.
                                    isFirstEvent = false;
                                    pendingRoute = srcEdge.removePendingEvents(attemptID);
                                    if (pendingRoute != null) {
                                        // obsoleted
                                        if (tezEvent != pendingRoute.getTezEvent()) {
                                            Preconditions.checkState(firstEventObsoleted);
                                            // pending routes can be ignored for obsoleted events
                                            pendingRoute = null;
                                        }
                                    }
                                }
                                if (!srcEdge.maybeAddTezEventForDestinationTask(tezEvent, attemptID, srcTaskIndex, events, maxEvents, pendingRoute)) {
                                    // not enough space left for this iteration events.
                                    // Exit and start from here next time
                                    earlyExit = true;
                                }
                            }
                            break;
                        case ROOT_INPUT_DATA_INFORMATION_EVENT:
                            {
                                InputDataInformationEvent riEvent = (InputDataInformationEvent) tezEvent.getEvent();
                                if (riEvent.getTargetIndex() == taskIndex) {
                                    events.add(tezEvent);
                                }
                            }
                            break;
                        default:
                            throw new TezUncheckedException("Unexpected event type for task: " + tezEvent.getEventType());
                    }
                    if (earlyExit) {
                        break;
                    }
                }
                int numEventsSent = events.size() - numPreRoutedEvents;
                if (numEventsSent > 0) {
                    StringBuilder builder = new StringBuilder();
                    builder.append("Sending ").append(attemptID).append(" ").append(numEventsSent).append(" events [").append(fromEventId).append(",").append(nextFromEventId).append(") total ").append(currEventCount).append(" ").append(getLogIdentifier());
                    LOG.info(builder.toString());
                }
            }
        } catch (AMUserCodeException e) {
            String msg = "Exception in " + e.getSource() + ", vertex=" + getLogIdentifier();
            LOG.error(msg, e);
            eventHandler.handle(new VertexEventManagerUserCodeError(getVertexId(), e));
            nextFromEventId = fromEventId;
            events.clear();
        }
    } finally {
        onDemandRouteEventsReadLock.unlock();
    }
    if (!events.isEmpty()) {
        for (int i = (events.size() - 1); i >= 0; --i) {
            TezEvent lastEvent = events.get(i);
            // record the last event sent by the AM to the task
            EventType lastEventType = lastEvent.getEventType();
            // if the following changes then critical path logic/recording may need revision
            if (lastEventType == EventType.COMPOSITE_DATA_MOVEMENT_EVENT || lastEventType == EventType.COMPOSITE_ROUTED_DATA_MOVEMENT_EVENT || lastEventType == EventType.DATA_MOVEMENT_EVENT || lastEventType == EventType.ROOT_INPUT_DATA_INFORMATION_EVENT) {
                task.getAttempt(attemptID).setLastEventSent(lastEvent);
                break;
            }
        }
    }
    return new TaskAttemptEventInfo(nextFromEventId, events, nextPreRoutedFromEventId);
}
Also used : TaskEventScheduleTask(org.apache.tez.dag.app.dag.event.TaskEventScheduleTask) Task(org.apache.tez.dag.app.dag.Task) TaskAttemptEventInfo(org.apache.tez.dag.app.TaskAttemptEventInfo) TezUncheckedException(org.apache.tez.dag.api.TezUncheckedException) DAGEventType(org.apache.tez.dag.app.dag.event.DAGEventType) EventType(org.apache.tez.runtime.api.impl.EventType) VertexEventType(org.apache.tez.dag.app.dag.event.VertexEventType) TaskEventType(org.apache.tez.dag.app.dag.event.TaskEventType) TaskLocationHint(org.apache.tez.dag.api.TaskLocationHint) VertexLocationHint(org.apache.tez.dag.api.VertexLocationHint) PendingEventRouteMetadata(org.apache.tez.dag.app.dag.impl.Edge.PendingEventRouteMetadata) VertexEventManagerUserCodeError(org.apache.tez.dag.app.dag.event.VertexEventManagerUserCodeError) TezEvent(org.apache.tez.runtime.api.impl.TezEvent) TaskAttemptEventInfo(org.apache.tez.dag.app.TaskAttemptEventInfo) InputDataInformationEvent(org.apache.tez.runtime.api.events.InputDataInformationEvent)

Example 5 with Task

use of org.apache.tez.dag.app.dag.Task in project tez by apache.

the class Edge method sendDmEventOrIfEventToTasks.

void sendDmEventOrIfEventToTasks(TezEvent tezEvent, int srcTaskIndex, boolean isDataMovementEvent, Map<Integer, List<Integer>> taskAndInputIndices) {
    Preconditions.checkState(edgeManager != null, "Edge Manager must be initialized by this time");
    Event event = tezEvent.getEvent();
    // cache of event object per input index
    Map<Integer, TezEvent> inputIndicesWithEvents = Maps.newHashMap();
    for (Map.Entry<Integer, List<Integer>> entry : taskAndInputIndices.entrySet()) {
        int destTaskIndex = entry.getKey();
        List<Integer> inputIndices = entry.getValue();
        for (int i = 0; i < inputIndices.size(); ++i) {
            Integer inputIndex = inputIndices.get(i);
            TezEvent tezEventToSend = inputIndicesWithEvents.get(inputIndex);
            if (tezEventToSend == null) {
                Event e;
                if (isDataMovementEvent) {
                    DataMovementEvent dmEvent = (DataMovementEvent) event;
                    e = DataMovementEvent.create(dmEvent.getSourceIndex(), inputIndex, dmEvent.getVersion(), dmEvent.getUserPayload());
                } else {
                    InputFailedEvent ifEvent = ((InputFailedEvent) event);
                    e = InputFailedEvent.create(inputIndex, ifEvent.getVersion());
                }
                tezEventToSend = new TezEvent(e, tezEvent.getSourceInfo(), tezEvent.getEventReceivedTime());
                tezEventToSend.setDestinationInfo(destinationMetaInfo);
                // cache the event object per input because are unique per input index
                inputIndicesWithEvents.put(inputIndex, tezEventToSend);
            }
            Task destTask = destinationVertex.getTask(destTaskIndex);
            if (destTask == null) {
                throw new TezUncheckedException("Unexpected null task." + " sourceVertex=" + sourceVertex.getLogIdentifier() + " srcTaskIndex = " + srcTaskIndex + " destVertex=" + destinationVertex.getLogIdentifier() + " destTaskIndex=" + destTaskIndex + " destNumTasks=" + destinationVertex.getTotalTasks() + " edgeManager=" + edgeManager.getClass().getName());
            }
            sendEventToTask(destTask, tezEventToSend);
        }
    }
}
Also used : InputFailedEvent(org.apache.tez.runtime.api.events.InputFailedEvent) Task(org.apache.tez.dag.app.dag.Task) TezUncheckedException(org.apache.tez.dag.api.TezUncheckedException) CompositeRoutedDataMovementEvent(org.apache.tez.runtime.api.events.CompositeRoutedDataMovementEvent) CompositeDataMovementEvent(org.apache.tez.runtime.api.events.CompositeDataMovementEvent) DataMovementEvent(org.apache.tez.runtime.api.events.DataMovementEvent) InputFailedEvent(org.apache.tez.runtime.api.events.InputFailedEvent) CompositeRoutedDataMovementEvent(org.apache.tez.runtime.api.events.CompositeRoutedDataMovementEvent) InputReadErrorEvent(org.apache.tez.runtime.api.events.InputReadErrorEvent) Event(org.apache.tez.runtime.api.Event) CompositeDataMovementEvent(org.apache.tez.runtime.api.events.CompositeDataMovementEvent) TezEvent(org.apache.tez.runtime.api.impl.TezEvent) DataMovementEvent(org.apache.tez.runtime.api.events.DataMovementEvent) ArrayList(java.util.ArrayList) List(java.util.List) TezEvent(org.apache.tez.runtime.api.impl.TezEvent) ConcurrentMap(java.util.concurrent.ConcurrentMap) Map(java.util.Map)

Aggregations

Task (org.apache.tez.dag.app.dag.Task)41 TezTaskID (org.apache.tez.dag.records.TezTaskID)15 Test (org.junit.Test)14 TaskEventScheduleTask (org.apache.tez.dag.app.dag.event.TaskEventScheduleTask)11 TezEvent (org.apache.tez.runtime.api.impl.TezEvent)11 StateChangeNotifierForTest (org.apache.tez.dag.app.dag.TestStateChangeNotifier.StateChangeNotifierForTest)9 TezTaskAttemptID (org.apache.tez.dag.records.TezTaskAttemptID)8 TaskAttempt (org.apache.tez.dag.app.dag.TaskAttempt)7 Vertex (org.apache.tez.dag.app.dag.Vertex)7 VertexEventRouteEvent (org.apache.tez.dag.app.dag.event.VertexEventRouteEvent)7 EventMetaData (org.apache.tez.runtime.api.impl.EventMetaData)7 Map (java.util.Map)6 TaskLocationHint (org.apache.tez.dag.api.TaskLocationHint)6 ByteString (com.google.protobuf.ByteString)5 TezUncheckedException (org.apache.tez.dag.api.TezUncheckedException)5 VertexLocationHint (org.apache.tez.dag.api.VertexLocationHint)5 TezVertexID (org.apache.tez.dag.records.TezVertexID)5 DataMovementEvent (org.apache.tez.runtime.api.events.DataMovementEvent)5 EdgeManagerForTest (org.apache.tez.test.EdgeManagerForTest)5 HashMap (java.util.HashMap)4